@@ -2978,7 +2978,7 @@ def parse_individuals(
2978
2978
if table is None :
2979
2979
table = tables .IndividualTable ()
2980
2980
# Read the header and find the indexes of the required fields.
2981
- header = source .readline ().strip ("\n " ).split (sep )
2981
+ header = source .readline ().rstrip ("\n " ).split (sep )
2982
2982
flags_index = header .index ("flags" )
2983
2983
location_index = None
2984
2984
parents_index = None
@@ -2996,7 +2996,7 @@ def parse_individuals(
2996
2996
except ValueError :
2997
2997
pass
2998
2998
for line in source :
2999
- tokens = line .split (sep )
2999
+ tokens = line .rstrip ( " \n " ). split (sep )
3000
3000
if len (tokens ) >= 1 :
3001
3001
flags = int (tokens [flags_index ])
3002
3002
location = ()
@@ -3047,7 +3047,7 @@ def parse_nodes(source, strict=True, encoding="utf8", base64_metadata=True, tabl
3047
3047
if table is None :
3048
3048
table = tables .NodeTable ()
3049
3049
# Read the header and find the indexes of the required fields.
3050
- header = source .readline ().strip ("\n " ).split (sep )
3050
+ header = source .readline ().rstrip ("\n " ).split (sep )
3051
3051
is_sample_index = header .index ("is_sample" )
3052
3052
time_index = header .index ("time" )
3053
3053
population_index = None
@@ -3066,7 +3066,7 @@ def parse_nodes(source, strict=True, encoding="utf8", base64_metadata=True, tabl
3066
3066
except ValueError :
3067
3067
pass
3068
3068
for line in source :
3069
- tokens = line .split (sep )
3069
+ tokens = line .rstrip ( " \n " ). split (sep )
3070
3070
if len (tokens ) >= 2 :
3071
3071
is_sample = int (tokens [is_sample_index ])
3072
3072
time = float (tokens [time_index ])
@@ -3116,13 +3116,13 @@ def parse_edges(source, strict=True, table=None):
3116
3116
sep = "\t "
3117
3117
if table is None :
3118
3118
table = tables .EdgeTable ()
3119
- header = source .readline ().strip ("\n " ).split (sep )
3119
+ header = source .readline ().rstrip ("\n " ).split (sep )
3120
3120
left_index = header .index ("left" )
3121
3121
right_index = header .index ("right" )
3122
3122
parent_index = header .index ("parent" )
3123
3123
children_index = header .index ("child" )
3124
3124
for line in source :
3125
- tokens = line .split (sep )
3125
+ tokens = line .rstrip ( " \n " ). split (sep )
3126
3126
if len (tokens ) >= 4 :
3127
3127
left = float (tokens [left_index ])
3128
3128
right = float (tokens [right_index ])
@@ -3159,7 +3159,7 @@ def parse_sites(source, strict=True, encoding="utf8", base64_metadata=True, tabl
3159
3159
sep = "\t "
3160
3160
if table is None :
3161
3161
table = tables .SiteTable ()
3162
- header = source .readline ().strip ("\n " ).split (sep )
3162
+ header = source .readline ().rstrip ("\n " ).split (sep )
3163
3163
position_index = header .index ("position" )
3164
3164
ancestral_state_index = header .index ("ancestral_state" )
3165
3165
metadata_index = None
@@ -3168,7 +3168,7 @@ def parse_sites(source, strict=True, encoding="utf8", base64_metadata=True, tabl
3168
3168
except ValueError :
3169
3169
pass
3170
3170
for line in source :
3171
- tokens = line .split (sep )
3171
+ tokens = line .rstrip ( " \n " ). split (sep )
3172
3172
if len (tokens ) >= 2 :
3173
3173
position = float (tokens [position_index ])
3174
3174
ancestral_state = tokens [ancestral_state_index ]
@@ -3212,7 +3212,7 @@ def parse_mutations(
3212
3212
sep = "\t "
3213
3213
if table is None :
3214
3214
table = tables .MutationTable ()
3215
- header = source .readline ().strip ("\n " ).split (sep )
3215
+ header = source .readline ().rstrip ("\n " ).split (sep )
3216
3216
site_index = header .index ("site" )
3217
3217
node_index = header .index ("node" )
3218
3218
try :
@@ -3232,7 +3232,7 @@ def parse_mutations(
3232
3232
except ValueError :
3233
3233
pass
3234
3234
for line in source :
3235
- tokens = line .split (sep )
3235
+ tokens = line .rstrip ( " \n " ). split (sep )
3236
3236
if len (tokens ) >= 3 :
3237
3237
site = int (tokens [site_index ])
3238
3238
node = int (tokens [node_index ])
@@ -3289,10 +3289,10 @@ def parse_populations(
3289
3289
if table is None :
3290
3290
table = tables .PopulationTable ()
3291
3291
# Read the header and find the indexes of the required fields.
3292
- header = source .readline ().strip ("\n " ).split (sep )
3292
+ header = source .readline ().rstrip ("\n " ).split (sep )
3293
3293
metadata_index = header .index ("metadata" )
3294
3294
for line in source :
3295
- tokens = line .split (sep )
3295
+ tokens = line .rstrip ( " \n " ). split (sep )
3296
3296
if len (tokens ) >= 1 :
3297
3297
metadata = tokens [metadata_index ].encode (encoding )
3298
3298
if base64_metadata :
@@ -3329,7 +3329,10 @@ def load_text(
3329
3329
:func:`parse_nodes` and :func:`parse_edges`, respectively. ``sites``,
3330
3330
``mutations``, ``individuals`` and ``populations`` are optional, and must
3331
3331
be parsable by :func:`parse_sites`, :func:`parse_individuals`,
3332
- :func:`parse_populations`, and :func:`parse_mutations`, respectively.
3332
+ :func:`parse_populations`, and :func:`parse_mutations`, respectively. For
3333
+ convenience, if the node table refers to populations, but the ``populations``
3334
+ parameter is not provided, a minimal set of rows are added to the
3335
+ population table, so that a valid tree sequence can be returned.
3333
3336
3334
3337
The ``sequence_length`` parameter determines the
3335
3338
:attr:`TreeSequence.sequence_length` of the returned tree sequence. If it
@@ -3394,12 +3397,6 @@ def load_text(
3394
3397
base64_metadata = base64_metadata ,
3395
3398
table = tc .nodes ,
3396
3399
)
3397
- # We need to add populations any referenced in the node table.
3398
- if len (tc .nodes ) > 0 :
3399
- max_population = tc .nodes .population .max ()
3400
- if max_population != NULL :
3401
- for _ in range (max_population + 1 ):
3402
- tc .populations .add_row ()
3403
3400
if sites is not None :
3404
3401
parse_sites (
3405
3402
sites ,
@@ -3424,7 +3421,14 @@ def load_text(
3424
3421
base64_metadata = base64_metadata ,
3425
3422
table = tc .individuals ,
3426
3423
)
3427
- if populations is not None :
3424
+ if populations is None :
3425
+ # As a convenience we add any populations referenced in the node table.
3426
+ if len (tc .nodes ) > 0 :
3427
+ max_population = tc .nodes .population .max ()
3428
+ if max_population != NULL :
3429
+ for _ in range (max_population + 1 ):
3430
+ tc .populations .add_row ()
3431
+ else :
3428
3432
parse_populations (
3429
3433
populations ,
3430
3434
strict = strict ,
0 commit comments