Skip to content

Commit 11a7cee

Browse files
committed
adding jerome's suggestions
1 parent 46731d8 commit 11a7cee

File tree

2 files changed

+62
-47
lines changed

2 files changed

+62
-47
lines changed

python/tests/tsutil.py

Lines changed: 51 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -138,26 +138,22 @@ def insert_branch_mutations(ts, mutations_per_branch=1):
138138

139139
def remove_mutation_times(ts):
140140
tables = ts.tables
141-
tables.mutations.clear()
142-
for mut in ts.mutations():
143-
tables.mutations.add_row(
144-
site=mut.site,
145-
node=mut.node,
146-
derived_state=mut.derived_state,
147-
parent=mut.parent,
148-
metadata=mut.metadata,
149-
time=None,
150-
)
141+
tables.mutations.time = np.full_like(tables.mutations.time, tskit.UNKNOWN_TIME)
151142
return tables.tree_sequence()
152143

153144

154-
def insert_discrete_time_mutations(ts, num_times=4):
145+
def insert_discrete_time_mutations(ts, num_times=4, num_sites=10):
146+
"""
147+
Inserts mutations in the tree sequence at regularly-spaced num_sites
148+
positions, at only a discrete set of times (the same for all trees): at
149+
num_times times evenly spaced between 0 and the maximum tree height.
150+
"""
155151
tables = ts.tables
156152
tables.sites.clear()
157153
tables.mutations.clear()
158154
height = max([t.time(t.roots[0]) for t in ts.trees()])
159-
for pos in range(int(tables.sequence_length)):
160-
anc = "X" * pos
155+
for j, pos in enumerate(np.linspace(0, tables.sequence_length, num_sites + 1)[:-1]):
156+
anc = "X" * j
161157
tables.sites.add_row(position=pos, ancestral_state=anc)
162158
t = ts.at(pos)
163159
for k, s in enumerate(np.linspace(0, height, num_times)):
@@ -166,11 +162,12 @@ def insert_discrete_time_mutations(ts, num_times=4):
166162
(t.parent(n) == tskit.NULL) or (t.time(t.parent(n)) > s)
167163
):
168164
tables.mutations.add_row(
169-
site=pos, node=n, derived_state=anc + str(k), time=s
165+
site=j, node=n, derived_state=anc + str(k), time=s
170166
)
171167
k += 1
172168
tables.sort()
173-
tables.compute_mutation_parents
169+
tables.build_index()
170+
tables.compute_mutation_parents()
174171
return tables.tree_sequence()
175172

176173

@@ -833,13 +830,14 @@ def shuffle_tables(
833830
shuffle_individuals=True,
834831
shuffle_sites=True,
835832
shuffle_mutations=True,
833+
shuffle_migrations=True,
836834
keep_mutation_parent_order=False,
837835
):
838836
"""
839-
Randomizes the order the Individual, Population, Edge, Site and Mutation Tables rows.
840-
Note that if mutations are completely shuffled, then TableCollection.sort() will not
841-
necessarily produce valid tables (unless all mutation times are present and
842-
distinct), since it only puts parent mutations before children if
837+
Randomizes the order of rows in (possibly) all except the Node table. Note
838+
that if mutations are completely shuffled, then TableCollection.sort() will
839+
not necessarily produce valid tables (unless all mutation times are present
840+
and distinct), since it only puts parent mutations before children if
843841
canonical=True. However, setting keep_mutation_parent_order to True will
844842
maintain the order of mutations within each site.
845843
@@ -885,6 +883,20 @@ def shuffle_tables(
885883
rng.shuffle(randomised_edges)
886884
for e in randomised_edges:
887885
tables.edges.add_row(e.left, e.right, e.parent, e.child, metadata=e.metadata)
886+
# migrations
887+
randomised_migrations = list(orig.migrations)
888+
if shuffle_migrations:
889+
rng.shuffle(randomised_migrations)
890+
for m in randomised_migrations:
891+
tables.migrations.add_row(
892+
m.left,
893+
m.right,
894+
m.node,
895+
pop_id_map[m.source],
896+
pop_id_map[m.dest],
897+
m.time,
898+
m.metadata,
899+
)
888900
# sites
889901
randomised_sites = list(enumerate(orig.sites))
890902
if shuffle_sites:
@@ -934,7 +946,9 @@ def orig_cmp_site(i, j, tables):
934946

935947

936948
def orig_cmp_mutation(i, j, tables, site_order, canonical=False, num_descendants=None):
937-
ret = site_order[tables.mutations.site[i]] - site_order[tables.mutations.site[j]]
949+
site_i = tables.mutations.site[i]
950+
site_j = tables.mutations.site[j]
951+
ret = site_order[site_i] - site_order[site_j]
938952
if (
939953
ret == 0
940954
and (not tskit.is_unknown_time(tables.mutations.time[i]))
@@ -1574,40 +1588,39 @@ def assert_table_collections_equal(t1, t2, ignore_provenance=False):
15741588
Checks for table collection equality, but step-by-step,
15751589
so it's easy to see what's different.
15761590
"""
1577-
if ignore_provenance:
1578-
t1.provenances.clear()
1579-
t2.provenances.clear()
15801591
assert_tables_equal(t1.populations, t2.populations, "populations")
15811592
assert_tables_equal(t1.individuals, t2.individuals, "individuals")
15821593
assert_tables_equal(t1.nodes, t2.nodes, "nodes")
15831594
assert_tables_equal(t1.edges, t2.edges, "edges")
15841595
assert_tables_equal(t1.sites, t2.sites, "sites")
15851596
assert_tables_equal(t1.mutations, t2.mutations, "mutations")
15861597
assert_tables_equal(t1.migrations, t2.migrations, "migrations")
1587-
assert_tables_equal(t1.provenances, t2.provenances, "provenances")
1598+
if not ignore_provenance:
1599+
assert_tables_equal(t1.provenances, t2.provenances, "provenances")
15881600
assert t1.metadata_schema == t2.metadata_schema
15891601
assert t1.metadata == t2.metadata
15901602
assert t1.metadata_bytes == t2.metadata_bytes
15911603
assert t1.sequence_length == t2.sequence_length
1592-
assert t1 == t2
1604+
assert t1.equals(t2, ignore_provenance=ignore_provenance)
15931605

15941606

15951607
def assert_tables_equal(t1, t2, label=""):
15961608
if hasattr(t1, "metadata_schema"):
15971609
if t1.metadata_schema != t2.metadata_schema:
1598-
print(f"{label} :::::::::: t1 ::::::::::::")
1599-
print(t1.metadata_schema)
1600-
print(f"{label} :::::::::: t2 ::::::::::::")
1601-
print(t2.metadata_schema)
1602-
assert t1.metadata_schema == t2.metadata_schema
1603-
if t1.num_rows != t2.num_rows:
1604-
print(f"{label}: t1.num_rows {t1.num_rows} != {t2.num_rows} t2.num_rows")
1610+
msg = (
1611+
f"{label} :::::::::: t1 ::::::::::::\n{t1.metadata_schema}"
1612+
f"{label} :::::::::: t2 ::::::::::::\n{t1.metadata_schema}"
1613+
)
1614+
raise AssertionError(msg)
16051615
for k, (e1, e2) in enumerate(zip(t1, t2)):
16061616
if e1 != e2:
1607-
print(f"{label} :::::::::: t1 (row {k}) ::::::::::::")
1608-
print(e1)
1609-
print(f"{label} :::::::::: t2 (row {k}) ::::::::::::")
1610-
print(e2)
1611-
assert e1 == e2
1612-
assert t1.num_rows == t2.num_rows
1617+
msg = (
1618+
f"{label} :::::::::: t1 (row {k}) ::::::::::::\n{e1}"
1619+
f"{label} :::::::::: t2 (row {k}) ::::::::::::\n{e2}"
1620+
)
1621+
raise AssertionError(msg)
1622+
if t1.num_rows != t2.num_rows:
1623+
raise AssertionError(
1624+
f"{label}: t1.num_rows {t1.num_rows} != {t2.num_rows} t2.num_rows"
1625+
)
16131626
assert t1 == t2

python/tskit/tables.py

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2603,7 +2603,7 @@ def sort(self, edge_start=0):
26032603

26042604
def canonicalise(self):
26052605
"""
2606-
This puts the tables in *canonical* order - to do this, the individual
2606+
This puts the tables in *canonical* form - to do this, the individual
26072607
and population tables are sorted by the first node that refers to each
26082608
(see :meth:`TreeSequence.subset`, and note that individuals and
26092609
populations not referred to by any nodes will be put at the end of the
@@ -2667,8 +2667,8 @@ def deduplicate_sites(self):
26672667
site), and renumbering the ``site`` column of the mutation table
26682668
appropriately. This requires the site table to be sorted by position.
26692669
2670-
This method does not sort the tables afterwards, so mutations may no longer
2671-
be sorted by time.
2670+
..warning:: This method does not sort the tables afterwards, so
2671+
mutations may no longer be sorted by time.
26722672
"""
26732673
self._ll_tables.deduplicate_sites()
26742674
# TODO add provenance
@@ -2956,20 +2956,22 @@ def subset(
29562956
):
29572957
"""
29582958
Modifies the tables in place to contain only the entries referring to
2959-
the provided list of nodes, with nodes reordered according to the order
2960-
they appear in the list. See :meth:`TreeSequence.subset` for a more
2961-
detailed description.
2959+
the provided list of node IDs, with nodes reordered according to the
2960+
order they appear in the list. See :meth:`TreeSequence.subset` for a
2961+
more detailed description.
29622962
2963-
Note: the tables can be completely unsorted.
2963+
Note: there are no sortedness requirements on the tables.
29642964
29652965
:param list nodes: The list of nodes for which to retain information. This
29662966
may be a numpy array (or array-like) object (dtype=np.int32).
29672967
:param bool record_provenance: Whether to record a provenance entry
29682968
in the provenance table for this operation.
29692969
:param bool filter_populations: Whether to remove populations not referenced by
2970-
retained nodes. If False, the population table will remain unchanged.
2970+
retained nodes. If False, the population table will not be altered
2971+
in any way.
29712972
:param bool filter_individuals: Whether to remove individuals not referenced by
2972-
retained nodes. If False, the individuals table will remain unchanged.
2973+
retained nodes. If False, the individuals table will not be altered
2974+
in any way.
29732975
:param bool filter_sites: Whether to remove sites not referenced by
29742976
retained mutations. If False, the site table will remain unchanged.
29752977
:param bool canonicalise: If True, retains all unused entries, putting

0 commit comments

Comments
 (0)