Skip to content

Commit 89a2cb4

Browse files
committed
Add discrete time
1 parent ab99547 commit 89a2cb4

File tree

9 files changed

+243
-2
lines changed

9 files changed

+243
-2
lines changed

c/tests/test_trees.c

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1022,6 +1022,81 @@ test_simplest_discrete_genome(void)
10221022
tsk_table_collection_free(&tables);
10231023
}
10241024

1025+
static void
1026+
test_simplest_discrete_time(void)
1027+
{
1028+
int ret;
1029+
tsk_treeseq_t ts;
1030+
tsk_table_collection_t tables;
1031+
const char *nodes = "1 0 0\n"
1032+
"1 0 0\n"
1033+
"0 1 0\n"
1034+
"0 0 0\n"
1035+
"0 0 0";
1036+
const char *edges = "0 1 2 0,1,3,4\n";
1037+
const char *sites = "0.1 0\n"
1038+
"0.2 0\n"
1039+
"0.3 0\n"
1040+
"0.4 0\n";
1041+
const char *mutations = "0 0 1\n"
1042+
"1 1 1\n"
1043+
"2 3 1\n"
1044+
"3 4 1";
1045+
const char *migrations = "0 1 0 0 1 1";
1046+
1047+
tsk_treeseq_from_text(
1048+
&ts, 1, nodes, edges, migrations, sites, mutations, NULL, NULL, 0);
1049+
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
1050+
1051+
ret = tsk_table_collection_copy(ts.tables, &tables, 0);
1052+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1053+
tsk_treeseq_free(&ts);
1054+
1055+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
1056+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1057+
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
1058+
tsk_treeseq_free(&ts);
1059+
1060+
tables.nodes.time[0] = 0.0001;
1061+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
1062+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1063+
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
1064+
tsk_treeseq_free(&ts);
1065+
tables.nodes.time[0] = 0;
1066+
1067+
tables.mutations.time[0] = 0.001;
1068+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
1069+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1070+
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
1071+
tsk_treeseq_free(&ts);
1072+
tables.mutations.time[0] = 0;
1073+
1074+
tables.migrations.time[0] = 0.001;
1075+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
1076+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1077+
CU_ASSERT_FALSE(tsk_treeseq_get_discrete_time(&ts));
1078+
tsk_treeseq_free(&ts);
1079+
tables.migrations.time[0] = 0;
1080+
1081+
tables.mutations.time[0] = TSK_UNKNOWN_TIME;
1082+
tables.mutations.time[1] = TSK_UNKNOWN_TIME;
1083+
tables.mutations.time[2] = TSK_UNKNOWN_TIME;
1084+
tables.mutations.time[3] = TSK_UNKNOWN_TIME;
1085+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
1086+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1087+
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
1088+
tsk_treeseq_free(&ts);
1089+
1090+
/* An empty tree sequence is has a discrete time. */
1091+
tsk_table_collection_clear(&tables, 0);
1092+
ret = tsk_treeseq_init(&ts, &tables, TSK_BUILD_INDEXES);
1093+
CU_ASSERT_EQUAL_FATAL(ret, 0);
1094+
CU_ASSERT_TRUE(tsk_treeseq_get_discrete_time(&ts));
1095+
tsk_treeseq_free(&ts);
1096+
1097+
tsk_table_collection_free(&tables);
1098+
}
1099+
10251100
static void
10261101
test_simplest_records(void)
10271102
{
@@ -6746,6 +6821,7 @@ main(int argc, char **argv)
67466821
CU_TestInfo tests[] = {
67476822
/* simplest example tests */
67486823
{ "test_simplest_discrete_genome", test_simplest_discrete_genome },
6824+
{ "test_simplest_discrete_time", test_simplest_discrete_time },
67496825
{ "test_simplest_records", test_simplest_records },
67506826
{ "test_simplest_nonbinary_records", test_simplest_nonbinary_records },
67516827
{ "test_simplest_unary_records", test_simplest_unary_records },

c/tests/testlib.c

Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,63 @@ parse_edges(const char *text, tsk_edge_table_t *edge_table)
493493
}
494494
}
495495

496+
void
497+
parse_migrations(const char *text, tsk_migration_table_t *migration_table)
498+
{
499+
tsk_id_t ret_id;
500+
size_t c, k;
501+
size_t MAX_LINE = 1024;
502+
char line[MAX_LINE];
503+
const char *whitespace = " \t";
504+
char *p;
505+
double left, right, time;
506+
int node, source, dest;
507+
char *metadata;
508+
509+
c = 0;
510+
while (text[c] != '\0') {
511+
/* Fill in the line */
512+
k = 0;
513+
while (text[c] != '\n' && text[c] != '\0') {
514+
CU_ASSERT_FATAL(k < MAX_LINE - 1);
515+
line[k] = text[c];
516+
c++;
517+
k++;
518+
}
519+
if (text[c] == '\n') {
520+
c++;
521+
}
522+
line[k] = '\0';
523+
p = strtok(line, whitespace);
524+
CU_ASSERT_FATAL(p != NULL);
525+
left = atof(p);
526+
p = strtok(NULL, whitespace);
527+
CU_ASSERT_FATAL(p != NULL);
528+
right = atof(p);
529+
p = strtok(NULL, whitespace);
530+
CU_ASSERT_FATAL(p != NULL);
531+
node = atoi(p);
532+
p = strtok(NULL, whitespace);
533+
CU_ASSERT_FATAL(p != NULL);
534+
source = atoi(p);
535+
p = strtok(NULL, whitespace);
536+
CU_ASSERT_FATAL(p != NULL);
537+
dest = atoi(p);
538+
p = strtok(NULL, whitespace);
539+
CU_ASSERT_FATAL(p != NULL);
540+
time = atof(p);
541+
p = strtok(NULL, whitespace);
542+
if (p == NULL) {
543+
metadata = "";
544+
} else {
545+
metadata = p;
546+
}
547+
ret_id = tsk_migration_table_add_row(migration_table, left, right, node, source,
548+
dest, time, metadata, strlen(metadata));
549+
CU_ASSERT_FATAL(ret_id >= 0);
550+
}
551+
}
552+
496553
void
497554
parse_sites(const char *text, tsk_site_table_t *site_table)
498555
{
@@ -706,11 +763,18 @@ tsk_treeseq_from_text(tsk_treeseq_t *ts, double sequence_length, const char *nod
706763
if (individuals != NULL) {
707764
parse_individuals(individuals, &tables.individuals);
708765
}
766+
if (migrations != NULL) {
767+
parse_migrations(migrations, &tables.migrations);
768+
}
709769
/* We need to add in populations if they are referenced */
710770
max_population_id = -1;
711771
for (j = 0; j < tables.nodes.num_rows; j++) {
712772
max_population_id = TSK_MAX(max_population_id, tables.nodes.population[j]);
713773
}
774+
for (j = 0; j < tables.migrations.num_rows; j++) {
775+
max_population_id = TSK_MAX(max_population_id, tables.migrations.source[j]);
776+
max_population_id = TSK_MAX(max_population_id, tables.migrations.dest[j]);
777+
}
714778
if (max_population_id >= 0) {
715779
for (j = 0; j <= (tsk_size_t) max_population_id; j++) {
716780
ret_id = tsk_population_table_add_row(&tables.populations, NULL, 0);

c/tskit/trees.c

Lines changed: 35 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
static inline bool
3636
is_discrete(double x)
3737
{
38-
return trunc(x) == x;
38+
return trunc(x) == x || tsk_is_unknown_time(x);
3939
}
4040

4141
/* ======================================================== *
@@ -315,13 +315,31 @@ tsk_treeseq_init_migrations(tsk_treeseq_t *self)
315315
tsk_size_t num_migrations = self->tables->migrations.num_rows;
316316
const double *restrict left = self->tables->migrations.left;
317317
const double *restrict right = self->tables->migrations.right;
318+
const double *restrict time = self->tables->migrations.time;
318319
bool discrete_breakpoints = true;
320+
bool discrete_times = true;
319321

320322
for (j = 0; j < num_migrations; j++) {
321323
discrete_breakpoints
322324
= discrete_breakpoints && is_discrete(left[j]) && is_discrete(right[j]);
325+
discrete_times = discrete_times && is_discrete(time[j]);
323326
}
324327
self->discrete_genome = self->discrete_genome && discrete_breakpoints;
328+
self->discrete_time = self->discrete_time && discrete_times;
329+
}
330+
331+
static void
332+
tsk_treeseq_init_mutations(tsk_treeseq_t *self)
333+
{
334+
tsk_size_t j;
335+
tsk_size_t num_mutations = self->tables->mutations.num_rows;
336+
const double *restrict time = self->tables->mutations.time;
337+
bool discrete_times = true;
338+
339+
for (j = 0; j < num_mutations; j++) {
340+
discrete_times = discrete_times && is_discrete(time[j]);
341+
}
342+
self->discrete_time = self->discrete_time && discrete_times;
325343
}
326344

327345
static int
@@ -330,7 +348,9 @@ tsk_treeseq_init_nodes(tsk_treeseq_t *self)
330348
tsk_size_t j, k;
331349
tsk_size_t num_nodes = self->tables->nodes.num_rows;
332350
const tsk_flags_t *restrict node_flags = self->tables->nodes.flags;
351+
const double *restrict time = self->tables->nodes.time;
333352
int ret = 0;
353+
bool discrete_times = true;
334354

335355
/* Determine the sample size */
336356
self->num_samples = 0;
@@ -356,6 +376,11 @@ tsk_treeseq_init_nodes(tsk_treeseq_t *self)
356376
}
357377
}
358378
tsk_bug_assert(k == self->num_samples);
379+
380+
for (j = 0; j < num_nodes; j++) {
381+
discrete_times = discrete_times && is_discrete(time[j]);
382+
}
383+
self->discrete_time = self->discrete_time && discrete_times;
359384
out:
360385
return ret;
361386
}
@@ -426,11 +451,12 @@ tsk_treeseq_init(
426451
tsk_memcpy(self->tables->file_uuid, tables->file_uuid, TSK_UUID_SIZE + 1);
427452
}
428453

454+
self->discrete_genome = true;
455+
self->discrete_time = true;
429456
ret = tsk_treeseq_init_nodes(self);
430457
if (ret != 0) {
431458
goto out;
432459
}
433-
self->discrete_genome = true;
434460
ret = tsk_treeseq_init_sites(self);
435461
if (ret != 0) {
436462
goto out;
@@ -444,6 +470,7 @@ tsk_treeseq_init(
444470
goto out;
445471
}
446472
tsk_treeseq_init_migrations(self);
473+
tsk_treeseq_init_mutations(self);
447474

448475
if (tsk_treeseq_get_time_units_length(self) == strlen(TSK_TIME_UNITS_UNCALIBRATED)
449476
&& !strncmp(tsk_treeseq_get_time_units(self), TSK_TIME_UNITS_UNCALIBRATED,
@@ -669,6 +696,12 @@ tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self)
669696
return self->discrete_genome;
670697
}
671698

699+
bool
700+
tsk_treeseq_get_discrete_time(const tsk_treeseq_t *self)
701+
{
702+
return self->discrete_time;
703+
}
704+
672705
/* Stats functions */
673706

674707
#define GET_2D_ROW(array, row_len, row) (array + (((size_t)(row_len)) * (size_t) row))

c/tskit/trees.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -70,6 +70,8 @@ typedef struct {
7070
bool time_uncalibrated;
7171
/* Are all genome coordinates discrete? */
7272
bool discrete_genome;
73+
/* Are all time values discrete? */
74+
bool discrete_time;
7375
/* Breakpoints along the sequence, including 0 and L. */
7476
double *breakpoints;
7577
/* If a node is a sample, map to its index in the samples list */
@@ -266,6 +268,7 @@ const tsk_id_t *tsk_treeseq_get_samples(const tsk_treeseq_t *self);
266268
const tsk_id_t *tsk_treeseq_get_sample_index_map(const tsk_treeseq_t *self);
267269
bool tsk_treeseq_is_sample(const tsk_treeseq_t *self, tsk_id_t u);
268270
bool tsk_treeseq_get_discrete_genome(const tsk_treeseq_t *self);
271+
bool tsk_treeseq_get_discrete_time(const tsk_treeseq_t *self);
269272

270273
int tsk_treeseq_get_node(const tsk_treeseq_t *self, tsk_id_t index, tsk_node_t *node);
271274
int tsk_treeseq_get_edge(const tsk_treeseq_t *self, tsk_id_t index, tsk_edge_t *edge);

python/CHANGELOG.rst

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -82,6 +82,9 @@
8282
- Add the ``discrete_genome`` property to the TreeSequence class which is true if
8383
all coordinates are discrete (:user:`jeromekelleher`, :issue:`1144`, :pr:`1819`)
8484

85+
- Add the ``discrete_time`` property to the TreeSequence class which is true if
86+
all time coordinates are discrete or unknown (:user:`benjeffery`, :issue:`1839`, :pr:`XXXX`)
87+
8588
**Fixes**
8689

8790
- `dump_tables` omitted individual parents. (:user:`benjeffery`, :issue:`1828`, :pr:`1884`)
@@ -97,6 +100,7 @@
97100
(:user:`jeetsukumaran`, :user:`jeromekelleher`, :issue:`1785`, :pr:`1835`,
98101
:pr:`1836`, :pr:`1838`)
99102

103+
100104
--------------------
101105
[0.3.7] - 2021-07-08
102106
--------------------

python/_tskitmodule.c

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7830,6 +7830,19 @@ TreeSequence_get_discrete_genome(TreeSequence *self)
78307830
return ret;
78317831
}
78327832

7833+
static PyObject *
7834+
TreeSequence_get_discrete_time(TreeSequence *self)
7835+
{
7836+
PyObject *ret = NULL;
7837+
7838+
if (TreeSequence_check_state(self) != 0) {
7839+
goto out;
7840+
}
7841+
ret = Py_BuildValue("i", tsk_treeseq_get_discrete_time(self->tree_sequence));
7842+
out:
7843+
return ret;
7844+
}
7845+
78337846
static PyObject *
78347847
TreeSequence_get_breakpoints(TreeSequence *self)
78357848
{
@@ -9039,6 +9052,10 @@ static PyMethodDef TreeSequence_methods[] = {
90399052
.ml_meth = (PyCFunction) TreeSequence_get_discrete_genome,
90409053
.ml_flags = METH_NOARGS,
90419054
.ml_doc = "Returns True if this TreeSequence has discrete coordinates" },
9055+
{ .ml_name = "get_discrete_time",
9056+
.ml_meth = (PyCFunction) TreeSequence_get_discrete_time,
9057+
.ml_flags = METH_NOARGS,
9058+
.ml_doc = "Returns True if this TreeSequence has discrete times" },
90429059
{ .ml_name = "get_breakpoints",
90439060
.ml_meth = (PyCFunction) TreeSequence_get_breakpoints,
90449061
.ml_flags = METH_NOARGS,

python/tests/test_highlevel.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1262,6 +1262,19 @@ def is_discrete(a):
12621262
)
12631263
assert ts.discrete_genome == discrete_genome
12641264

1265+
@pytest.mark.parametrize("ts", get_example_tree_sequences())
1266+
def test_discrete_time(self, ts):
1267+
def is_discrete(a):
1268+
return np.all(np.logical_or(np.floor(a) == a, tskit.is_unknown_time(a)))
1269+
1270+
tables = ts.tables
1271+
discrete_time = (
1272+
is_discrete(tables.nodes.time)
1273+
and is_discrete(tables.mutations.time)
1274+
and is_discrete(tables.migrations.time)
1275+
)
1276+
assert ts.discrete_time == discrete_time
1277+
12651278
def test_trees(self):
12661279
for ts in get_example_tree_sequences():
12671280
self.verify_trees(ts)

python/tests/test_lowlevel.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1412,6 +1412,20 @@ def test_clear_table(self, ts_fixture):
14121412
assert tables.sequence_length == ts_fixture.tables.sequence_length
14131413
assert tables.time_units == ts_fixture.tables.time_units
14141414

1415+
def test_discrete_genome(self):
1416+
tables = _tskit.TableCollection(1)
1417+
tables.build_index()
1418+
ts = _tskit.TreeSequence()
1419+
ts.load_tables(tables)
1420+
assert ts.get_discrete_genome() == 1
1421+
1422+
def test_discrete_time(self):
1423+
tables = _tskit.TableCollection(1)
1424+
tables.build_index()
1425+
ts = _tskit.TreeSequence()
1426+
ts.load_tables(tables)
1427+
assert ts.get_discrete_time() == 1
1428+
14151429

14161430
class StatsInterfaceMixin:
14171431
"""

0 commit comments

Comments
 (0)