Skip to content

Commit d71fac9

Browse files
committed
Method to form the union of TableCollections
1 parent 1487541 commit d71fac9

File tree

14 files changed

+1196
-93
lines changed

14 files changed

+1196
-93
lines changed

c/CHANGELOG.rst

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,12 @@ In development.
4444

4545
**New features**
4646

47+
- New methods to perform set operations on table collections.
48+
``tsk_table_collection_subset`` subsets and reorders table collections by nodes
49+
(:user:`mufernando`, :user:`petrelharp`, :pr:`663`, :pr:`690`).
50+
``tsk_table_collection_union`` forms the node-wise union of two table collections
51+
(:user:`mufernando`, :user:`petrelharp`, :issue:`381`, :pr:`623`).
52+
4753
- Mutations now have an optional double-precision floating-point ``time`` column.
4854
If not specified, this defaults to a particular NaN value (``TSK_UNKNOWN_TIME``)
4955
indicating that the time is unknown. For a tree sequence to be considered valid

c/tests/test_tables.c

Lines changed: 251 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3803,7 +3803,7 @@ test_table_collection_check_integrity_with_options(tsk_flags_t tc_options)
38033803
ret = tsk_mutation_table_clear(&tables.mutations);
38043804
CU_ASSERT_EQUAL_FATAL(ret, 0);
38053805
ret = tsk_mutation_table_add_row(
3806-
&tables.mutations, 0, 0, TSK_NULL, NAN, NULL, 0, NULL, 0);
3806+
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
38073807
CU_ASSERT_FATAL(ret >= 0);
38083808
ret = tsk_table_collection_check_integrity(&tables, 0);
38093809
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_TIME_NONFINITE);
@@ -3986,10 +3986,10 @@ test_table_collection_subset_with_options(tsk_flags_t options)
39863986
// four nodes from two diploids; the first is from pop 0
39873987
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
39883988
CU_ASSERT_FATAL(ret >= 0);
3989-
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
3989+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);
39903990
CU_ASSERT_FATAL(ret >= 0);
39913991
ret = tsk_node_table_add_row(
3992-
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
3992+
&tables.nodes, TSK_NODE_IS_SAMPLE, 2.0, TSK_NULL, 1, NULL, 0);
39933993
CU_ASSERT_FATAL(ret >= 0);
39943994
ret = tsk_node_table_add_row(
39953995
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
@@ -4009,13 +4009,16 @@ test_table_collection_subset_with_options(tsk_flags_t options)
40094009
ret = tsk_site_table_add_row(&tables.sites, 0.4, "A", 1, NULL, 0);
40104010
CU_ASSERT_FATAL(ret >= 0);
40114011
ret = tsk_mutation_table_add_row(
4012-
&tables.mutations, 0, 0, TSK_NULL, NAN, NULL, 0, NULL, 0);
4012+
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
40134013
CU_ASSERT_FATAL(ret >= 0);
4014-
ret = tsk_mutation_table_add_row(&tables.mutations, 0, 0, 0, NAN, NULL, 0, NULL, 0);
4014+
ret = tsk_mutation_table_add_row(
4015+
&tables.mutations, 0, 0, 0, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
40154016
CU_ASSERT_FATAL(ret >= 0);
40164017
ret = tsk_mutation_table_add_row(
4017-
&tables.mutations, 1, 1, TSK_NULL, NAN, NULL, 0, NULL, 0);
4018+
&tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
40184019
CU_ASSERT_FATAL(ret >= 0);
4020+
ret = tsk_table_collection_build_index(&tables, 0);
4021+
CU_ASSERT_EQUAL_FATAL(ret, 0);
40194022

40204023
// empty nodes should get empty tables
40214024
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT | options);
@@ -4069,16 +4072,17 @@ test_table_collection_subset_errors(void)
40694072

40704073
ret = tsk_table_collection_init(&tables, 0);
40714074
CU_ASSERT_EQUAL_FATAL(ret, 0);
4075+
tables.sequence_length = 1;
40724076
ret = tsk_table_collection_init(&tables_copy, 0);
40734077
CU_ASSERT_EQUAL_FATAL(ret, 0);
40744078

40754079
// four nodes from two diploids; the first is from pop 0
40764080
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
40774081
CU_ASSERT_FATAL(ret >= 0);
4078-
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
4082+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 1.0, 0, 0, NULL, 0);
40794083
CU_ASSERT_FATAL(ret >= 0);
40804084
ret = tsk_node_table_add_row(
4081-
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
4085+
&tables.nodes, TSK_NODE_IS_SAMPLE, 2.0, TSK_NULL, 1, NULL, 0);
40824086
CU_ASSERT_FATAL(ret >= 0);
40834087
ret = tsk_node_table_add_row(
40844088
&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, TSK_NULL, 1, NULL, 0);
@@ -4091,6 +4095,8 @@ test_table_collection_subset_errors(void)
40914095
CU_ASSERT_FATAL(ret >= 0);
40924096
ret = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
40934097
CU_ASSERT_FATAL(ret >= 0);
4098+
ret = tsk_table_collection_build_index(&tables, 0);
4099+
CU_ASSERT_EQUAL_FATAL(ret, 0);
40944100

40954101
/* Migrations are not supported */
40964102
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
@@ -4101,15 +4107,248 @@ test_table_collection_subset_errors(void)
41014107
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
41024108

41034109
// test out of bounds nodes
4110+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4111+
CU_ASSERT_EQUAL_FATAL(ret, 0);
41044112
nodes[0] = -1;
4105-
ret = tsk_table_collection_subset(&tables, nodes, 4);
4113+
ret = tsk_table_collection_subset(&tables_copy, nodes, 4);
41064114
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
41074115
nodes[0] = 6;
4108-
ret = tsk_table_collection_subset(&tables, nodes, 4);
4116+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4117+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4118+
ret = tsk_table_collection_subset(&tables_copy, nodes, 4);
41094119
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
41104120

4121+
// check integrity
4122+
nodes[0] = 0;
4123+
nodes[1] = 1;
4124+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4125+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4126+
ret = tsk_node_table_truncate(&tables_copy.nodes, 3);
4127+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4128+
ret = tsk_node_table_add_row(
4129+
&tables_copy.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);
4130+
CU_ASSERT_FATAL(ret >= 0);
4131+
ret = tsk_table_collection_subset(&tables_copy, nodes, 4);
4132+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
4133+
4134+
tsk_table_collection_free(&tables);
4135+
tsk_table_collection_free(&tables_copy);
4136+
}
4137+
4138+
static void
4139+
test_table_collection_union(void)
4140+
{
4141+
int ret;
4142+
tsk_table_collection_t tables;
4143+
tsk_table_collection_t tables_empty;
4144+
tsk_table_collection_t tables_copy;
4145+
tsk_id_t node_mapping[3];
4146+
4147+
memset(node_mapping, 0xff, sizeof(node_mapping));
4148+
4149+
ret = tsk_table_collection_init(&tables, 0);
4150+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4151+
tables.sequence_length = 1;
4152+
ret = tsk_table_collection_init(&tables_empty, 0);
4153+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4154+
tables_empty.sequence_length = 1;
4155+
ret = tsk_table_collection_init(&tables_copy, 0);
4156+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4157+
4158+
// does not error on empty tables
4159+
ret = tsk_table_collection_union(
4160+
&tables, &tables_empty, node_mapping, TSK_UNION_NO_CHECK_SHARED);
4161+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4162+
4163+
// three nodes, two pop, three ind, two edge, two site, two mut
4164+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
4165+
CU_ASSERT_FATAL(ret >= 0);
4166+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 1, 1, NULL, 0);
4167+
CU_ASSERT_FATAL(ret >= 0);
4168+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 2, NULL, 0);
4169+
CU_ASSERT_FATAL(ret >= 0);
4170+
ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0);
4171+
CU_ASSERT_FATAL(ret >= 0);
4172+
ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0);
4173+
CU_ASSERT_FATAL(ret >= 0);
4174+
ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0);
4175+
CU_ASSERT_FATAL(ret >= 0);
4176+
ret = tsk_population_table_add_row(&tables.populations, NULL, 0);
4177+
CU_ASSERT_FATAL(ret >= 0);
4178+
ret = tsk_population_table_add_row(&tables.populations, NULL, 0);
4179+
CU_ASSERT_FATAL(ret >= 0);
4180+
ret = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 0, NULL, 0);
4181+
CU_ASSERT_FATAL(ret >= 0);
4182+
ret = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 2, 1, NULL, 0);
4183+
CU_ASSERT_FATAL(ret >= 0);
4184+
ret = tsk_site_table_add_row(&tables.sites, 0.4, "T", 1, NULL, 0);
4185+
CU_ASSERT_FATAL(ret >= 0);
4186+
ret = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
4187+
CU_ASSERT_FATAL(ret >= 0);
4188+
ret = tsk_mutation_table_add_row(
4189+
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
4190+
ret = tsk_mutation_table_add_row(
4191+
&tables.mutations, 1, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
4192+
CU_ASSERT_FATAL(ret >= 0);
4193+
ret = tsk_table_collection_build_index(&tables, 0);
4194+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4195+
ret = tsk_table_collection_sort(&tables, NULL, 0);
4196+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4197+
4198+
// union with empty should not change
4199+
// other is empty
4200+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4201+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4202+
ret = tsk_table_collection_union(
4203+
&tables_copy, &tables_empty, node_mapping, TSK_UNION_NO_CHECK_SHARED);
4204+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy));
4205+
// self is empty
4206+
ret = tsk_table_collection_clear(&tables_copy);
4207+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4208+
ret = tsk_table_collection_union(
4209+
&tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);
4210+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4211+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy));
4212+
4213+
// union all shared nodes + subset original nodes = original table
4214+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4215+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4216+
ret = tsk_table_collection_union(
4217+
&tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);
4218+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4219+
node_mapping[0] = 0;
4220+
node_mapping[1] = 1;
4221+
node_mapping[2] = 2;
4222+
ret = tsk_table_collection_subset(&tables_copy, node_mapping, 3);
4223+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4224+
CU_ASSERT_FATAL(tsk_table_collection_equals(&tables, &tables_copy));
4225+
4226+
// union with one shared node
4227+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4228+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4229+
node_mapping[0] = TSK_NULL;
4230+
node_mapping[1] = TSK_NULL;
4231+
node_mapping[2] = 2;
4232+
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
4233+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4234+
CU_ASSERT_EQUAL_FATAL(
4235+
tables_copy.populations.num_rows, tables.populations.num_rows + 2);
4236+
CU_ASSERT_EQUAL_FATAL(
4237+
tables_copy.individuals.num_rows, tables.individuals.num_rows + 2);
4238+
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, tables.nodes.num_rows + 2);
4239+
CU_ASSERT_EQUAL_FATAL(tables_copy.edges.num_rows, tables.edges.num_rows + 2);
4240+
CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, tables.sites.num_rows);
4241+
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, tables.mutations.num_rows + 2);
4242+
4243+
// union with one shared node, but no add pop
4244+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4245+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4246+
node_mapping[0] = TSK_NULL;
4247+
node_mapping[1] = TSK_NULL;
4248+
node_mapping[2] = 2;
4249+
ret = tsk_table_collection_union(
4250+
&tables_copy, &tables, node_mapping, TSK_UNION_NO_ADD_POP);
4251+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4252+
CU_ASSERT_EQUAL_FATAL(tables_copy.populations.num_rows, tables.populations.num_rows);
4253+
CU_ASSERT_EQUAL_FATAL(
4254+
tables_copy.individuals.num_rows, tables.individuals.num_rows + 2);
4255+
CU_ASSERT_EQUAL_FATAL(tables_copy.nodes.num_rows, tables.nodes.num_rows + 2);
4256+
CU_ASSERT_EQUAL_FATAL(tables_copy.edges.num_rows, tables.edges.num_rows + 2);
4257+
CU_ASSERT_EQUAL_FATAL(tables_copy.sites.num_rows, tables.sites.num_rows);
4258+
CU_ASSERT_EQUAL_FATAL(tables_copy.mutations.num_rows, tables.mutations.num_rows + 2);
4259+
4260+
tsk_table_collection_free(&tables_copy);
4261+
tsk_table_collection_free(&tables_empty);
41114262
tsk_table_collection_free(&tables);
4263+
}
4264+
4265+
static void
4266+
test_table_collection_union_errors(void)
4267+
{
4268+
int ret;
4269+
tsk_table_collection_t tables;
4270+
tsk_table_collection_t tables_copy;
4271+
tsk_id_t node_mapping[] = { 0, 1 };
4272+
4273+
ret = tsk_table_collection_init(&tables, 0);
4274+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4275+
tables.sequence_length = 1;
4276+
ret = tsk_table_collection_init(&tables_copy, 0);
4277+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4278+
4279+
// two nodes, two pop, two ind, one edge, one site, one mut
4280+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, 0, 0, NULL, 0);
4281+
CU_ASSERT_FATAL(ret >= 0);
4282+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.5, 1, 1, NULL, 0);
4283+
CU_ASSERT_FATAL(ret >= 0);
4284+
ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0);
4285+
CU_ASSERT_FATAL(ret >= 0);
4286+
ret = tsk_individual_table_add_row(&tables.individuals, 0, NULL, 0, NULL, 0);
4287+
CU_ASSERT_FATAL(ret >= 0);
4288+
ret = tsk_population_table_add_row(&tables.populations, NULL, 0);
4289+
CU_ASSERT_FATAL(ret >= 0);
4290+
ret = tsk_population_table_add_row(&tables.populations, NULL, 0);
4291+
CU_ASSERT_FATAL(ret >= 0);
4292+
ret = tsk_edge_table_add_row(&tables.edges, 0.0, 1.0, 1, 0, NULL, 0);
4293+
CU_ASSERT_FATAL(ret >= 0);
4294+
ret = tsk_site_table_add_row(&tables.sites, 0.2, "A", 1, NULL, 0);
4295+
CU_ASSERT_FATAL(ret >= 0);
4296+
ret = tsk_mutation_table_add_row(
4297+
&tables.mutations, 0, 0, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
4298+
CU_ASSERT_FATAL(ret >= 0);
4299+
4300+
// trigger diff histories error
4301+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4302+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4303+
ret = tsk_mutation_table_add_row(
4304+
&tables_copy.mutations, 0, 1, TSK_NULL, TSK_UNKNOWN_TIME, NULL, 0, NULL, 0);
4305+
CU_ASSERT_FATAL(ret >= 0);
4306+
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
4307+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNION_DIFF_HISTORIES);
4308+
4309+
// Migrations are not supported
4310+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4311+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4312+
tsk_migration_table_add_row(&tables_copy.migrations, 0, 1, 0, 0, 0, 0, NULL, 0);
4313+
CU_ASSERT_EQUAL_FATAL(tables_copy.migrations.num_rows, 1);
4314+
ret = tsk_table_collection_union(
4315+
&tables_copy, &tables, node_mapping, TSK_UNION_NO_CHECK_SHARED);
4316+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_MIGRATIONS_NOT_SUPPORTED);
4317+
4318+
// unsuported union - child shared parent not shared
4319+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4320+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4321+
node_mapping[0] = 0;
4322+
node_mapping[1] = TSK_NULL;
4323+
ret = tsk_table_collection_union(
4324+
&tables_copy, &tables, node_mapping, TSK_UNION_NO_ADD_POP);
4325+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNION_NOT_SUPPORTED);
4326+
4327+
// test out of bounds node_mapping
4328+
node_mapping[0] = -4;
4329+
node_mapping[1] = 6;
4330+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4331+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4332+
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
4333+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_UNION_BAD_MAP);
4334+
4335+
// check integrity
4336+
node_mapping[0] = 0;
4337+
node_mapping[1] = 1;
4338+
ret = tsk_node_table_add_row(
4339+
&tables_copy.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);
4340+
CU_ASSERT_FATAL(ret >= 0);
4341+
ret = tsk_table_collection_union(&tables_copy, &tables, node_mapping, 0);
4342+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
4343+
ret = tsk_table_collection_copy(&tables, &tables_copy, TSK_NO_INIT);
4344+
CU_ASSERT_EQUAL_FATAL(ret, 0);
4345+
ret = tsk_node_table_add_row(&tables.nodes, TSK_NODE_IS_SAMPLE, 0.0, -2, 0, NULL, 0);
4346+
CU_ASSERT_FATAL(ret >= 0);
4347+
ret = tsk_table_collection_union(&tables, &tables_copy, node_mapping, 0);
4348+
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_POPULATION_OUT_OF_BOUNDS);
4349+
41124350
tsk_table_collection_free(&tables_copy);
4351+
tsk_table_collection_free(&tables);
41134352
}
41144353

41154354
int
@@ -4168,6 +4407,8 @@ main(int argc, char **argv)
41684407
test_table_collection_check_integrity_no_populations },
41694408
{ "test_table_collection_subset", test_table_collection_subset },
41704409
{ "test_table_collection_subset_errors", test_table_collection_subset_errors },
4410+
{ "test_table_collection_union", test_table_collection_union },
4411+
{ "test_table_collection_union_errors", test_table_collection_union_errors },
41714412
{ NULL, NULL },
41724413
};
41734414

c/tskit/core.c

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -352,6 +352,10 @@ tsk_strerror_internal(int err)
352352
case TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED:
353353
ret = "Only binary mutations are supported for this operation";
354354
break;
355+
case TSK_ERR_UNION_NOT_SUPPORTED:
356+
ret = "Union is not supported for cases where there is non-shared"
357+
"history older than the shared history of the two Table Collections";
358+
break;
355359

356360
/* Stats errors */
357361
case TSK_ERR_BAD_NUM_WINDOWS:
@@ -441,6 +445,16 @@ tsk_strerror_internal(int err)
441445
case TSK_ERR_TOO_MANY_VALUES:
442446
ret = "Too many values to compress";
443447
break;
448+
449+
/* Union errors */
450+
case TSK_ERR_UNION_BAD_MAP:
451+
ret = "Node map contains an entry of a node not present in this table "
452+
"collection.";
453+
break;
454+
case TSK_ERR_UNION_DIFF_HISTORIES:
455+
// histories could be equivalent, because subset does not reorder
456+
// edges (if not sorted) or mutations.
457+
ret = "Shared portions of the tree sequences are not equal.";
444458
}
445459
return ret;
446460
}

c/tskit/core.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -267,6 +267,7 @@ not found in the file.
267267
#define TSK_ERR_SORT_OFFSET_NOT_SUPPORTED -803
268268
#define TSK_ERR_NONBINARY_MUTATIONS_UNSUPPORTED -804
269269
#define TSK_ERR_MIGRATIONS_NOT_SUPPORTED -805
270+
#define TSK_ERR_UNION_NOT_SUPPORTED -806
270271

271272
/* Stats errors */
272273
#define TSK_ERR_BAD_NUM_WINDOWS -900
@@ -303,6 +304,11 @@ not found in the file.
303304
#define TSK_ERR_MATCH_IMPOSSIBLE -1301
304305
#define TSK_ERR_BAD_COMPRESSED_MATRIX_NODE -1302
305306
#define TSK_ERR_TOO_MANY_VALUES -1303
307+
308+
/* Union errors */
309+
#define TSK_ERR_UNION_BAD_MAP -1400
310+
#define TSK_ERR_UNION_DIFF_HISTORIES -1401
311+
306312
// clang-format on
307313

308314
/* This bit is 0 for any errors originating from kastore */

0 commit comments

Comments
 (0)