Skip to content

Add discrete time #1890

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Nov 8, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
76 changes: 76 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -1022,6 +1022,81 @@ test_simplest_discrete_genome(void)
tsk_table_collection_free(&tables);
}

static void
test_simplest_discrete_time(void)
{
int ret;
tsk_treeseq_t ts;
tsk_table_collection_t tables;
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 0 0\n"
"0 0 0";
const char *edges = "0 1 2 0,1,3,4\n";
const char *sites = "0.1 0\n"
"0.2 0\n"
"0.3 0\n"
"0.4 0\n";
const char *mutations = "0 0 1\n"
"1 1 1\n"
"2 3 1\n"
"3 4 1";
const char *migrations = "0 1 0 0 1 1";

tsk_treeseq_from_text(
&ts, 1, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));

ret = tsk_table_collection_copy(ts.tables, &tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tsk_treeseq_free(&ts);

ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);

tables.nodes.time[0] = 0.0001;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tables.nodes.time[0] = 0;

tables.mutations.time[0] = 0.001;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tables.mutations.time[0] = 0;

tables.migrations.time[0] = 0.001;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);
tables.migrations.time[0] = 0;

tables.mutations.time[0] = TSK_UNKNOWN_TIME;
tables.mutations.time[1] = TSK_UNKNOWN_TIME;
tables.mutations.time[2] = TSK_UNKNOWN_TIME;
tables.mutations.time[3] = TSK_UNKNOWN_TIME;
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);

/* An empty tree sequence is has a discrete time. */
tsk_table_collection_clear(&tables, 0);
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
tsk_treeseq_free(&ts);

tsk_table_collection_free(&tables);
}

static void
test_simplest_records(void)
{
Expand Down Expand Up @@ -6746,6 +6821,7 @@ main(int argc, char **argv)
CU_TestInfo tests[] = {
/* simplest example tests */
{ "test_simplest_discrete_genome", test_simplest_discrete_genome },
{ "test_simplest_discrete_time", test_simplest_discrete_time },
{ "test_simplest_records", test_simplest_records },
{ "test_simplest_nonbinary_records", test_simplest_nonbinary_records },
{ "test_simplest_unary_records", test_simplest_unary_records },
Expand Down
64 changes: 64 additions & 0 deletions c/tests/testlib.c
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,63 @@ parse_edges(const char *text, tsk_edge_table_t *edge_table)
}
}

void
parse_migrations(const char *text, tsk_migration_table_t *migration_table)
{
tsk_id_t ret_id;
size_t c, k;
size_t MAX_LINE = 1024;
char line[MAX_LINE];
const char *whitespace = " \t";
char *p;
double left, right, time;
int node, source, dest;
char *metadata;

c = 0;
while (text[c] != '\0') {
/* Fill in the line */
k = 0;
while (text[c] != '\n' && text[c] != '\0') {
CU_ASSERT_FATAL(k < MAX_LINE - 1);
line[k] = text[c];
c++;
k++;
}
if (text[c] == '\n') {
c++;
}
line[k] = '\0';
p = strtok(line, whitespace);
CU_ASSERT_FATAL(p != NULL);
left = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
right = atof(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
node = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
source = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
dest = atoi(p);
p = strtok(NULL, whitespace);
CU_ASSERT_FATAL(p != NULL);
time = atof(p);
p = strtok(NULL, whitespace);
if (p == NULL) {
metadata = "";
} else {
metadata = p;
}
ret_id = tsk_migration_table_add_row(migration_table, left, right, node, source,
dest, time, metadata, strlen(metadata));
CU_ASSERT_FATAL(ret_id >= 0);
}
}

void
parse_sites(const char *text, tsk_site_table_t *site_table)
{
Expand Down Expand Up @@ -706,11 +763,18 @@ tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nod
if (individuals != NULL) {
parse_individuals(individuals, &tables.individuals);
}
if (migrations != NULL) {
parse_migrations(migrations, &tables.migrations);
}
/* We need to add in populations if they are referenced */
max_population_id = -1;
for (j = 0; j < tables.nodes.num_rows; j++) {
max_population_id = TSK_MAX(max_population_id, tables.nodes.population[j]);
}
for (j = 0; j < tables.migrations.num_rows; j++) {
max_population_id = TSK_MAX(max_population_id, tables.migrations.source[j]);
max_population_id = TSK_MAX(max_population_id, tables.migrations.dest[j]);
}
if (max_population_id >= 0) {
for (j = 0; j <= (tsk_size_t) max_population_id; j++) {
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);
Expand Down
38 changes: 37 additions & 1 deletion c/tskit/trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -315,13 +315,33 @@ tsk_treeseq_init_migrations(tsk_treeseq_t *self)
tsk_size_t num_migrations = self->tables->migrations.num_rows;
const double *restrict left = self->tables->migrations.left;
const double *restrict right = self->tables->migrations.right;
const double *restrict time = self->tables->migrations.time;
bool discrete_breakpoints = true;
bool discrete_times = true;

for (j = 0; j < num_migrations; j++) {
discrete_breakpoints
= discrete_breakpoints && is_discrete(left[j]) && is_discrete(right[j]);
discrete_times
= discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));
}
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
self->discrete_time = self->discrete_time && discrete_times;
}

static void
tsk_treeseq_init_mutations(tsk_treeseq_t *self)
{
tsk_size_t j;
tsk_size_t num_mutations = self->tables->mutations.num_rows;
const double *restrict time = self->tables->mutations.time;
bool discrete_times = true;

for (j = 0; j < num_mutations; j++) {
discrete_times
= discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));
}
self->discrete_time = self->discrete_time && discrete_times;
}

static int
Expand All @@ -330,7 +350,9 @@ tsk_treeseq_init_nodes(tsk_treeseq_t *self)
tsk_size_t j, k;
tsk_size_t num_nodes = self->tables->nodes.num_rows;
const tsk_flags_t *restrict node_flags = self->tables->nodes.flags;
const double *restrict time = self->tables->nodes.time;
int ret = 0;
bool discrete_times = true;

/* Determine the sample size */
self->num_samples = 0;
Expand All @@ -356,6 +378,12 @@ tsk_treeseq_init_nodes(tsk_treeseq_t *self)
}
}
tsk_bug_assert(k == self->num_samples);

for (j = 0; j < num_nodes; j++) {
discrete_times
= discrete_times && (is_discrete(time[j]) || tsk_is_unknown_time(time[j]));
}
self->discrete_time = self->discrete_time && discrete_times;
out:
return ret;
}
Expand Down Expand Up @@ -426,11 +454,12 @@ tsk_treeseq_init(
tsk_memcpy(self->tables->file_uuid, tables->file_uuid, TSK_UUID_SIZE + 1);
}

self->discrete_genome = true;
self->discrete_time = true;
ret = tsk_treeseq_init_nodes(self);
if (ret != 0) {
goto out;
}
self->discrete_genome = true;
ret = tsk_treeseq_init_sites(self);
if (ret != 0) {
goto out;
Expand All @@ -444,6 +473,7 @@ tsk_treeseq_init(
goto out;
}
tsk_treeseq_init_migrations(self);
tsk_treeseq_init_mutations(self);

if (tsk_treeseq_get_time_units_length(self) == strlen(TSK_TIME_UNITS_UNCALIBRATED)
&& !strncmp(tsk_treeseq_get_time_units(self), TSK_TIME_UNITS_UNCALIBRATED,
Expand Down Expand Up @@ -669,6 +699,12 @@ tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self)
return self->discrete_genome;
}

bool
tsk_treeseq_get_discrete_time(const tsk_treeseq_t *self)
{
return self->discrete_time;
}

/* Stats functions */

#define GET_2D_ROW(array, row_len, row) (array + (((size_t)(row_len)) * (size_t) row))
Expand Down
3 changes: 3 additions & 0 deletions c/tskit/trees.h
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,8 @@ typedef struct {
bool time_uncalibrated;
/* Are all genome coordinates discrete? */
bool discrete_genome;
/* Are all time values discrete? */
bool discrete_time;
/* Breakpoints along the sequence, including 0 and L. */
double *breakpoints;
/* If a node is a sample, map to its index in the samples list */
Expand Down Expand Up @@ -266,6 +268,7 @@ const tsk_id_t *tsk_treeseq_get_samples(const tsk_treeseq_t *self);
const tsk_id_t *tsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self);
bool tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u);
bool tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self);
bool tsk_treeseq_get_discrete_time(const tsk_treeseq_t *self);

int tsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node);
int tsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge);
Expand Down
4 changes: 4 additions & 0 deletions python/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,9 @@

- Add the ``TreeSequence.alignments`` method. (user:`jeromekelleher`, :pr:`1825`)

- Add the ``discrete_time`` property to the TreeSequence class which is true if
all time coordinates are discrete or unknown (:user:`benjeffery`, :issue:`1839`, :pr:`1890`)

**Fixes**

- `dump_tables` omitted individual parents. (:user:`benjeffery`, :issue:`1828`, :pr:`1884`)
Expand All @@ -101,6 +104,7 @@
(:user:`jeetsukumaran`, :user:`jeromekelleher`, :issue:`1785`, :pr:`1835`,
:pr:`1836`, :pr:`1838`)


--------------------
[0.3.7] - 2021-07-08
--------------------
Expand Down
17 changes: 17 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -7830,6 +7830,19 @@ TreeSequence_get_discrete_genome(TreeSequence *self)
return ret;
}

static PyObject *
TreeSequence_get_discrete_time(TreeSequence *self)
{
PyObject *ret = NULL;

if (TreeSequence_check_state(self) != 0) {
goto out;
}
ret = Py_BuildValue("i", tsk_treeseq_get_discrete_time(self->tree_sequence));
out:
return ret;
}

static PyObject *
TreeSequence_get_breakpoints(TreeSequence *self)
{
Expand Down Expand Up @@ -9039,6 +9052,10 @@ static PyMethodDef TreeSequence_methods[] = {
.ml_meth = (PyCFunction) TreeSequence_get_discrete_genome,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns True if this TreeSequence has discrete coordinates" },
{ .ml_name = "get_discrete_time",
.ml_meth = (PyCFunction) TreeSequence_get_discrete_time,
.ml_flags = METH_NOARGS,
.ml_doc = "Returns True if this TreeSequence has discrete times" },
{ .ml_name = "get_breakpoints",
.ml_meth = (PyCFunction) TreeSequence_get_breakpoints,
.ml_flags = METH_NOARGS,
Expand Down
13 changes: 13 additions & 0 deletions python/tests/test_highlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1270,6 +1270,19 @@ def is_discrete(a):
)
assert ts.discrete_genome == discrete_genome

@pytest.mark.parametrize("ts", get_example_tree_sequences())
def test_discrete_time(self, ts):
def is_discrete(a):
return np.all(np.logical_or(np.floor(a) == a, tskit.is_unknown_time(a)))

tables = ts.tables
discrete_time = (
is_discrete(tables.nodes.time)
and is_discrete(tables.mutations.time)
and is_discrete(tables.migrations.time)
)
assert ts.discrete_time == discrete_time

def test_trees(self):
for ts in get_example_tree_sequences():
self.verify_trees(ts)
Expand Down
14 changes: 14 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -1412,6 +1412,20 @@ def test_clear_table(self, ts_fixture):
assert tables.sequence_length == ts_fixture.tables.sequence_length
assert tables.time_units == ts_fixture.tables.time_units

def test_discrete_genome(self):
tables = _tskit.TableCollection(1)
tables.build_index()
ts = _tskit.TreeSequence()
ts.load_tables(tables)
assert ts.get_discrete_genome() == 1

def test_discrete_time(self):
tables = _tskit.TableCollection(1)
tables.build_index()
ts = _tskit.TreeSequence()
ts.load_tables(tables)
assert ts.get_discrete_time() == 1


class StatsInterfaceMixin:
"""
Expand Down
17 changes: 17 additions & 0 deletions python/tskit/trees.py
Original file line number Diff line number Diff line change
Expand Up @@ -3950,6 +3950,23 @@ def discrete_genome(self):
"""
return bool(self._ll_tree_sequence.get_discrete_genome())

@property
def discrete_time(self):
"""
Returns True if all time coordinates in this TreeSequence are
discrete integer values. This is true iff all the following are true:

- All node times are discrete
- All mutation times are discrete
- All migration times are discrete

Note that ``tskit.UNKNOWN_TIME`` counts as discrete.

:return: True if this TreeSequence uses discrete time coordinates.
:rtype: bool
"""
return bool(self._ll_tree_sequence.get_discrete_time())

@property
def sequence_length(self):
"""
Expand Down