Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -925,6 +925,104 @@ verify_empty_tree_sequence(tsk_treeseq_t *ts, double sequence_length)
* Simplest test cases.
*======================================================*/

static void
test_simplest_discrete_genome(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0";
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts;
tsk_table_collection_t tables;
tsk_id_t ret_id;
int ret;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));

ret = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);

tables.sequence_length = 1.001;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.sequence_length = 1;

ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tables.edges.right[0] = 0.999;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.edges.right[0] = 1.0;

tables.edges.left[0] = 0.999;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.edges.left[0] = 0;

ret_id = tsk_site_table_add_row(&tables.sites, 0, "A", 1, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tables.sites.position[0] = 0.001;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.sites.position[0] = 0;

/* Need another population for a migration */
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 1);

ret_id
= tsk_migration_table_add_row(&tables.migrations, 0, 1, 0, 0, 1, 1.0, NULL, 0);
CU_ASSERT_EQUAL_FATAL(ret_id, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tables.migrations.left[0] = 0.001;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.migrations.left[0] = 0;

tables.migrations.right[0] = 0.999;
CU_ASSERT_EQUAL_FATAL(ret, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);
tables.migrations.right[0] = 1;

/* An empty tree sequence is has a discrete genome. */
tsk_table_collection_clear(&tables, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_genome(&ts));
tsk_treeseq_free(&ts);

tsk_table_collection_free(&tables);
}

static void
test_simplest_records(void)
{
Expand Down Expand Up @@ -6558,6 +6656,7 @@ main(int argc, char **argv)
{
CU_TestInfo tests[] = {
/* simplest example tests */
{ "test_simplest_discrete_genome", test_simplest_discrete_genome },
{ "test_simplest_records", test_simplest_records },
{ "test_simplest_nonbinary_records", test_simplest_nonbinary_records },
{ "test_simplest_unary_records", test_simplest_unary_records },
Expand Down
39 changes: 39 additions & 0 deletions c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@

#include <tskit/trees.h>

static inline bool
is_discrete(double x)
{
return trunc(x) == x;
}

/* ======================================================== *
* tree sequence
* ======================================================== */
Expand Down Expand Up @@ -127,6 +133,8 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
const tsk_size_t num_mutations = self->tables->mutations.num_rows;
const tsk_size_t num_sites = self->tables->sites.num_rows;
const tsk_id_t *restrict mutation_site = self->tables->mutations.site;
const double *restrict site_position = self->tables->sites.position;
bool discrete_sites = true;

self->site_mutations_mem
= tsk_malloc(num_mutations * sizeof(*self->site_mutations_mem));
Expand All @@ -148,6 +156,7 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
}
k = 0;
for (j = 0; j < (tsk_id_t) num_sites; j++) {
discrete_sites = discrete_sites && is_discrete(site_position[j]);
self->site_mutations[j] = self->site_mutations_mem + offset;
self->site_mutations_length[j] = 0;
/* Go through all mutations for this site */
Expand All @@ -161,6 +170,7 @@ tsk_treeseq_init_sites(tsk_treeseq_t *self)
goto out;
}
}
self->discrete_genome = self->discrete_genome && discrete_sites;
out:
return ret;
}
Expand Down Expand Up @@ -243,6 +253,7 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
const double *restrict edge_right = self->tables->edges.right;
const double *restrict edge_left = self->tables->edges.left;
tsk_size_t num_trees_alloc = self->num_trees + 1;
bool discrete_breakpoints = true;

self->tree_sites_length
= tsk_malloc(num_trees_alloc * sizeof(*self->tree_sites_length));
Expand All @@ -264,6 +275,7 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
j = 0;
k = 0;
while (j < num_edges || tree_left < sequence_length) {
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_left);
self->breakpoints[tree_index] = tree_left;
while (k < num_edges && edge_right[O[k]] == tree_left) {
k++;
Expand All @@ -289,11 +301,29 @@ tsk_treeseq_init_trees(tsk_treeseq_t *self)
tsk_bug_assert(site == num_sites);
tsk_bug_assert(tree_index == self->num_trees);
self->breakpoints[tree_index] = tree_right;
discrete_breakpoints = discrete_breakpoints && is_discrete(tree_right);
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
ret = 0;
out:
return ret;
}

static void
tsk_treeseq_init_migrations(tsk_treeseq_t *self)
{
tsk_size_t j;
tsk_size_t num_migrations = self->tables->migrations.num_rows;
const double *restrict left = self->tables->migrations.left;
const double *restrict right = self->tables->migrations.right;
bool discrete_breakpoints = true;

for (j = 0; j < num_migrations; j++) {
discrete_breakpoints
= discrete_breakpoints && is_discrete(left[j]) && is_discrete(right[j]);
}
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
}

static int
tsk_treeseq_init_nodes(tsk_treeseq_t *self)
{
Expand Down Expand Up @@ -400,6 +430,7 @@ tsk_treeseq_init(
if (ret != 0) {
goto out;
}
self->discrete_genome = true;
ret = tsk_treeseq_init_sites(self);
if (ret != 0) {
goto out;
Expand All @@ -412,6 +443,8 @@ tsk_treeseq_init(
if (ret != 0) {
goto out;
}
tsk_treeseq_init_migrations(self);

if (tsk_treeseq_get_time_units_length(self) == strlen(TSK_TIME_UNITS_UNCALIBRATED)
&& !strncmp(tsk_treeseq_get_time_units(self), TSK_TIME_UNITS_UNCALIBRATED,
strlen(TSK_TIME_UNITS_UNCALIBRATED))) {
Expand Down Expand Up @@ -630,6 +663,12 @@ tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u)
return ret;
}

bool
tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self)
{
return self->discrete_genome;
}

/* Stats functions */

#define GET_2D_ROW(array, row_len, row) (array + (((size_t)(row_len)) * (size_t) row))
Expand Down
3 changes: 3 additions & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ typedef struct {
tsk_id_t *samples;
/* Does this tree sequence have time_units == "uncalibrated" */
bool time_uncalibrated;
/* Are all genome coordinates discrete? */
bool discrete_genome;
/* Breakpoints along the sequence, including 0 and L. */
double *breakpoints;
/* If a node is a sample, map to its index in the samples list */
Expand Down Expand Up @@ -268,6 +270,7 @@ const double *tsk_treeseq_get_breakpoints(const tsk_treeseq_t *self);
const tsk_id_t *tsk_treeseq_get_samples(const tsk_treeseq_t *self);
const tsk_id_t *tsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self);
bool tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u);
bool tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self);

int tsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node);
int tsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge);
Expand Down
3 changes: 3 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,9 @@
- Substantial performance improvement for ``Tree.total_branch_length``
(:user:`jeromekelleher`, :issue:`1794` :pr:`1799`)

- Add the ``discrete_genome`` property to the TreeSequence class which is true if
all coordinates are discrete (:user:`jeromekelleher`, :issue:`1144`, :pr:`1819`)

--------------------
[0.3.7] - 2021-07-08
--------------------
Expand Down
17 changes: 17 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -7812,6 +7812,19 @@ TreeSequence_get_sequence_length(TreeSequence *self)
return ret;
}

static PyObject *
TreeSequence_get_discrete_genome(TreeSequence *self)
{
PyObject *ret = NULL;

if (TreeSequence_check_state(self) != 0) {
goto out;
}
ret = Py_BuildValue("i", tsk_treeseq_get_discrete_genome(self->tree_sequence));
out:
return ret;
}

static PyObject *
TreeSequence_get_breakpoints(TreeSequence *self)
{
Expand Down Expand Up @@ -9017,6 +9030,10 @@ static PyMethodDef TreeSequence_methods[] = {
.ml_meth = (PyCFunction) TreeSequence_get_sequence_length,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns the sequence length in bases." },
{ .ml_name = "get_discrete_genome",
.ml_meth = (PyCFunction) TreeSequence_get_discrete_genome,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns True if this TreeSequence has discrete coordinates" },
{ .ml_name = "get_breakpoints",
.ml_meth = (PyCFunction) TreeSequence_get_breakpoints,
.ml_flags = METH_NOARGS,
Expand Down
16 changes: 16 additions & 0 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1241,6 +1241,22 @@ class TestTreeSequence(HighLevelTestCase):
Tests for the tree sequence object.
"""

@pytest.mark.parametrize("ts", get_example_tree_sequences())
def test_discrete_genome(self, ts):
def is_discrete(a):
return np.all(np.floor(a) == a)

tables = ts.tables
discrete_genome = (
is_discrete([tables.sequence_length])
and is_discrete(tables.edges.left)
and is_discrete(tables.edges.right)
and is_discrete(tables.sites.position)
and is_discrete(tables.migrations.left)
and is_discrete(tables.migrations.right)
)
assert ts.discrete_genome == discrete_genome

def test_trees(self):
for ts in get_example_tree_sequences():
self.verify_trees(ts)
Expand Down
16 changes: 16 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -3842,6 +3842,22 @@ def get_sample_size(self):
def file_uuid(self):
return self._ll_tree_sequence.get_file_uuid()

@property
def discrete_genome(self):
"""
Returns True if all genome coordinates in this TreeSequence are
discrete integer values. This is true iff all the following are true:

- The sequence length is discrete
- All site positions are discrete
- All left and right edge coordinates are discrete
- All migration left and right coordinates are discrete

:return: True if this TreeSequence uses discrete genome coordinates.
:rtype: bool
"""
return bool(self._ll_tree_sequence.get_discrete_genome())

@property
def sequence_length(self):
"""
Expand Down