Skip to content

Commit b324e54

Browse files
authored
Merge pull request #1826 from benjeffery/sort-options
Allow skipping of sites and mutations in sort
2 parents cc9e445 + ce64c23 commit b324e54

File tree

6 files changed

+51
-28
lines changed

6 files changed

+51
-28
lines changed

c/tests/test_tables.c

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -6297,10 +6297,11 @@ test_sort_tables_offsets(void)
62976297
CU_ASSERT_EQUAL_FATAL(ret, 0);
62986298
CU_ASSERT_TRUE(tsk_table_collection_equals(&tables, &copy, 0));
62996299

6300+
/* Individual bookmark ignored */
63006301
tsk_memset(&bookmark, 0, sizeof(bookmark));
63016302
bookmark.individuals = tables.individuals.num_rows - 1;
63026303
ret = tsk_table_collection_sort(&tables, &bookmark, 0);
6303-
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_SORT_OFFSET_NOT_SUPPORTED);
6304+
CU_ASSERT_EQUAL_FATAL(ret, 0);
63046305

63056306
tsk_table_collection_free(&tables);
63066307
tsk_table_collection_free(&copy);
@@ -6409,8 +6410,8 @@ test_sort_tables_errors(void)
64096410
tsk_table_collection_t tables;
64106411
tsk_bookmark_t pos;
64116412

6412-
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL, NULL,
6413-
NULL, NULL, NULL, 0);
6413+
tsk_treeseq_from_text(&ts, 1, single_tree_ex_nodes, single_tree_ex_edges, NULL,
6414+
single_tree_ex_sites, single_tree_ex_mutations, NULL, NULL, 0);
64146415
ret = tsk_treeseq_copy_tables(&ts, &tables, 0);
64156416
CU_ASSERT_EQUAL_FATAL(ret, 0);
64166417

@@ -6457,8 +6458,7 @@ test_sort_tables_errors(void)
64576458
ret = tsk_table_collection_sort(&tables, &pos, 0);
64586459
CU_ASSERT_EQUAL_FATAL(ret, 0);
64596460

6460-
/* Setting sites or mutations gives a BAD_PARAM. See
6461-
* github.com/tskit-dev/tskit/issues/101 */
6461+
/* Specifying only one of sites or mutations is an error */
64626462
tsk_memset(&pos, 0, sizeof(pos));
64636463
pos.sites = 1;
64646464
ret = tsk_table_collection_sort(&tables, &pos, 0);

c/tskit/tables.c

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -6348,14 +6348,6 @@ tsk_table_sorter_run(tsk_table_sorter_t *self, const tsk_bookmark_t *start)
63486348
ret = TSK_ERR_SORT_OFFSET_NOT_SUPPORTED;
63496349
goto out;
63506350
}
6351-
6352-
/* Individuals also must be all sorted or skipped entirely */
6353-
if (start->individuals == self->tables->individuals.num_rows) {
6354-
skip_individuals = true;
6355-
} else if (start->individuals != 0) {
6356-
ret = TSK_ERR_SORT_OFFSET_NOT_SUPPORTED;
6357-
goto out;
6358-
}
63596351
}
63606352
/* The indexes will be invalidated, so drop them */
63616353
ret = tsk_table_collection_drop_index(self->tables, 0);

python/CHANGELOG.rst

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -39,8 +39,11 @@
3939

4040
**Features**
4141

42+
- Allow skipping of site and mutation tables in ``TableCollection.sort``
43+
(:user:`benjeffery`, :issue:`1475`, :pr:`1826`).
44+
4245
- Add ``TableCollection.sort_individuals`` to sort the individuals as this is no longer done by the
43-
default sort. (:user:`benjeffery`, :issue:`1774`, :pr:`1789`)
46+
default sort (:user:`benjeffery`, :issue:`1774`, :pr:`1789`).
4447

4548
- Add ``__setitem__`` to all tables allowing single rows to be updated. For example
4649
``tables.nodes[0] = tables.nodes[0].replace(flags=tskit.NODE_IS_SAMPLE)``

python/_tskitmodule.c

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6611,17 +6611,22 @@ TableCollection_sort(TableCollection *self, PyObject *args, PyObject *kwds)
66116611
int err;
66126612
PyObject *ret = NULL;
66136613
Py_ssize_t edge_start = 0;
6614+
Py_ssize_t site_start = 0;
6615+
Py_ssize_t mutation_start = 0;
66146616
tsk_bookmark_t start;
6615-
static char *kwlist[] = { "edge_start", NULL };
6617+
static char *kwlist[] = { "edge_start", "site_start", "mutation_start", NULL };
66166618

66176619
if (TableCollection_check_state(self) != 0) {
66186620
goto out;
66196621
}
6620-
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|n", kwlist, &edge_start)) {
6622+
if (!PyArg_ParseTupleAndKeywords(
6623+
args, kwds, "|nnn", kwlist, &edge_start, &site_start, &mutation_start)) {
66216624
goto out;
66226625
}
66236626
memset(&start, 0, sizeof(start));
6624-
start.edges = edge_start;
6627+
start.edges = (tsk_size_t) edge_start;
6628+
start.sites = (tsk_size_t) site_start;
6629+
start.mutations = (tsk_size_t) mutation_start;
66256630
err = tsk_table_collection_sort(self->tables, &start, 0);
66266631
if (err != 0) {
66276632
handle_library_error(err);

python/tests/test_tables.py

Lines changed: 24 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2216,7 +2216,7 @@ def verify_sort(self, tables, seed):
22162216
self.verify_sort_mutation_consistency(tables, seed)
22172217
self.verify_randomise_tables(tables, seed)
22182218

2219-
def verify_edge_sort_offset(self, ts):
2219+
def verify_sort_offset(self, ts):
22202220
"""
22212221
Verifies the behaviour of the edge_start offset value.
22222222
"""
@@ -2225,7 +2225,6 @@ def verify_edge_sort_offset(self, ts):
22252225
starts = [0]
22262226
if len(edges) > 2:
22272227
starts = [0, 1, len(edges) // 2, len(edges) - 2]
2228-
random.seed(self.random_seed)
22292228
for start in starts:
22302229
# Unsort the edges starting from index start
22312230
all_edges = list(ts.edges())
@@ -2235,7 +2234,7 @@ def verify_edge_sort_offset(self, ts):
22352234
tables.edges.clear()
22362235
for e in all_edges:
22372236
tables.edges.append(e)
2238-
# Verify that import fails for randomised edges
2237+
# Verify that import fails for reversed edges
22392238
with pytest.raises(_tskit.LibraryError):
22402239
tables.tree_sequence()
22412240
# If we sort after the start value we should still fail.
@@ -2250,6 +2249,22 @@ def verify_edge_sort_offset(self, ts):
22502249
# Verify the new and old edges are equal.
22512250
assert edges == tables.edges
22522251

2252+
tables.tree_sequence()
2253+
if len(tables.mutations) > 2:
2254+
mutations = tables.mutations.copy()
2255+
tables.mutations.clear()
2256+
for m in mutations[::-1]:
2257+
tables.mutations.append(m)
2258+
with pytest.raises(_tskit.LibraryError):
2259+
tables.tree_sequence()
2260+
tables.sort(
2261+
0, site_start=len(tables.sites), mutation_start=len(tables.mutations)
2262+
)
2263+
with pytest.raises(_tskit.LibraryError):
2264+
tables.tree_sequence()
2265+
tables.sort(0)
2266+
tables.tree_sequence()
2267+
22532268
def get_wf_example(self, seed):
22542269
tables = wf.wf_sim(
22552270
6,
@@ -2277,7 +2292,7 @@ def test_wf_example(self):
22772292

22782293
def test_single_tree_no_mutations(self):
22792294
ts = msprime.simulate(10, random_seed=self.random_seed)
2280-
self.verify_edge_sort_offset(ts)
2295+
self.verify_sort_offset(ts)
22812296
self.verify_sort(ts.tables, 432)
22822297

22832298
def test_single_tree_no_mutations_metadata(self):
@@ -2288,13 +2303,13 @@ def test_single_tree_no_mutations_metadata(self):
22882303
def test_many_trees_no_mutations(self):
22892304
ts = msprime.simulate(10, recombination_rate=2, random_seed=self.random_seed)
22902305
assert ts.num_trees > 2
2291-
self.verify_edge_sort_offset(ts)
2306+
self.verify_sort_offset(ts)
22922307
self.verify_sort(ts.tables, 31)
22932308

22942309
def test_single_tree_mutations(self):
22952310
ts = msprime.simulate(10, mutation_rate=2, random_seed=self.random_seed)
22962311
assert ts.num_sites > 2
2297-
self.verify_edge_sort_offset(ts)
2312+
self.verify_sort_offset(ts)
22982313
self.verify_sort(ts.tables, 83)
22992314

23002315
def test_single_tree_mutations_metadata(self):
@@ -2320,7 +2335,7 @@ def test_many_trees_mutations(self):
23202335
)
23212336
assert ts.num_trees > 2
23222337
assert ts.num_sites > 2
2323-
self.verify_edge_sort_offset(ts)
2338+
self.verify_sort_offset(ts)
23242339
self.verify_sort(ts.tables, 173)
23252340

23262341
def test_many_trees_multichar_mutations(self):
@@ -2358,14 +2373,14 @@ def get_nonbinary_example(self, mutation_rate):
23582373

23592374
def test_nonbinary_trees(self):
23602375
ts = self.get_nonbinary_example(mutation_rate=0)
2361-
self.verify_edge_sort_offset(ts)
2376+
self.verify_sort_offset(ts)
23622377
self.verify_sort(ts.tables, 9182)
23632378

23642379
def test_nonbinary_trees_mutations(self):
23652380
ts = self.get_nonbinary_example(mutation_rate=2)
23662381
assert ts.num_trees > 2
23672382
assert ts.num_sites > 2
2368-
self.verify_edge_sort_offset(ts)
2383+
self.verify_sort_offset(ts)
23692384
self.verify_sort(ts.tables, 44)
23702385

23712386
def test_unknown_times(self):

python/tskit/tables.py

Lines changed: 10 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -3111,7 +3111,7 @@ def map_ancestors(self, *args, **kwargs):
31113111
# A deprecated alias for link_ancestors()
31123112
return self.link_ancestors(*args, **kwargs)
31133113

3114-
def sort(self, edge_start=0):
3114+
def sort(self, edge_start=0, *, site_start=0, mutation_start=0):
31153115
"""
31163116
Sorts the tables in place. This ensures that all tree sequence ordering
31173117
requirements listed in the
@@ -3124,6 +3124,10 @@ def sort(self, edge_start=0):
31243124
are not affected. This parameter is provided to allow for efficient sorting
31253125
when the user knows that the edges up to a given index are already sorted.
31263126
3127+
If both ``site_start`` and ``mutation_start`` are equal to the number of rows
3128+
in their retrospective tables then neither is sorted. Note that a partial
3129+
non-sorting is not possible, and both or neither must be skipped.
3130+
31273131
The node, population and provenance tables are not affected
31283132
by this method.
31293133
@@ -3160,8 +3164,12 @@ def sort(self, edge_start=0):
31603164
31613165
:param int edge_start: The index in the edge table where sorting starts
31623166
(default=0; must be <= len(edges)).
3167+
:param int site_start: The index in the site table where sorting starts
3168+
(default=0; must be one of [0, len(sites)]).
3169+
:param int mutation_start: The index in the mutation table where sorting starts
3170+
(default=0; must be one of [0, len(mutations)]).
31633171
"""
3164-
self._ll_tables.sort(edge_start)
3172+
self._ll_tables.sort(edge_start, site_start, mutation_start)
31653173
# TODO add provenance
31663174

31673175
def sort_individuals(self):

0 commit comments

Comments
 (0)