Skip to content

Commit 85a566b

Browse files
committed
Add table append method
1 parent dc76c8c commit 85a566b

File tree

10 files changed

+212
-276
lines changed

10 files changed

+212
-276
lines changed

docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ def handle_item(fieldarg, content):
311311
# TODO these have been triaged here to make the docs compile, but we should
312312
# sort them out properly. https://github.com/tskit-dev/tskit/issues/336
313313
("py:class", "array_like"),
314+
("py:class", "row-like"),
314315
("py:class", "array-like"),
315316
("py:class", "dtype=np.uint32"),
316317
("py:class", "dtype=np.uint32."),

python/CHANGELOG.rst

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,9 @@
99

1010
**Features**
1111

12+
- Add `Table.append` method for adding rows from classes such as `SiteTableRow` and
13+
`Site` (:user:`benjeffery`, :issue:`1111`, :pr:`1254`).
14+
1215
- SVG visualization of a single tree allows all mutations on an edge to be plotted
1316
via the ``all_edge_mutations`` param (:user:`hyanwong`,:issue:`1253`, :pr:`1258`).
1417

python/tests/conftest.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -128,7 +128,7 @@ def ts_fixture():
128128
for name, table in tables.name_map.items():
129129
if name != "provenances":
130130
table.metadata_schema = tskit.MetadataSchema({"codec": "json"})
131-
metadatas = [f"n_{name}_{u}" for u in range(len(table))]
131+
metadatas = [f'{{"foo":"n_{name}_{u}"}}' for u in range(len(table))]
132132
metadata, metadata_offset = tskit.pack_strings(metadatas)
133133
table.set_columns(
134134
**{

python/tests/simplify.py

Lines changed: 15 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
Python implementation of the simplify algorithm.
2525
"""
2626
import sys
27+
from dataclasses import replace
2728

2829
import numpy as np
2930
import portion
@@ -158,13 +159,7 @@ def record_node(self, input_id, is_sample=False):
158159
flags &= ~tskit.NODE_IS_SAMPLE
159160
if is_sample:
160161
flags |= tskit.NODE_IS_SAMPLE
161-
output_id = self.tables.nodes.add_row(
162-
flags=flags,
163-
time=node.time,
164-
population=node.population,
165-
metadata=node.metadata,
166-
individual=node.individual,
167-
)
162+
output_id = self.tables.nodes.append(replace(node, flags=flags))
168163
self.node_id_map[input_id] = output_id
169164
return output_id
170165

@@ -186,9 +181,7 @@ def flush_edges(self):
186181
num_edges = 0
187182
for child in sorted(self.edge_buffer.keys()):
188183
for edge in self.edge_buffer[child]:
189-
self.tables.edges.add_row(
190-
edge.left, edge.right, edge.parent, edge.child
191-
)
184+
self.tables.edges.append(edge)
192185
num_edges += 1
193186
self.edge_buffer.clear()
194187
return num_edges
@@ -413,19 +406,15 @@ def finalise_sites(self):
413406
mapped_parent = -1
414407
if mut.parent != -1:
415408
mapped_parent = mutation_id_map[mut.parent]
416-
self.tables.mutations.add_row(
417-
site=len(self.tables.sites),
418-
node=self.mutation_node_map[mut.id],
419-
time=mut.time,
420-
parent=mapped_parent,
421-
derived_state=mut.derived_state,
422-
metadata=mut.metadata,
409+
self.tables.mutations.append(
410+
replace(
411+
mut,
412+
site=len(self.tables.sites),
413+
node=self.mutation_node_map[mut.id],
414+
parent=mapped_parent,
415+
)
423416
)
424-
self.tables.sites.add_row(
425-
position=site.position,
426-
ancestral_state=site.ancestral_state,
427-
metadata=site.metadata,
428-
)
417+
self.tables.sites.append(site)
429418

430419
def finalise_references(self):
431420
input_populations = self.ts.tables.populations
@@ -455,17 +444,12 @@ def finalise_references(self):
455444
for input_id, count in enumerate(population_ref_count):
456445
if count > 0:
457446
row = input_populations[input_id]
458-
output_id = self.tables.populations.add_row(metadata=row.metadata)
447+
output_id = self.tables.populations.append(row)
459448
population_id_map[input_id] = output_id
460449
for input_id, count in enumerate(individual_ref_count):
461450
if count > 0:
462451
row = input_individuals[input_id]
463-
output_id = self.tables.individuals.add_row(
464-
flags=row.flags,
465-
location=row.location,
466-
parents=row.parents,
467-
metadata=row.metadata,
468-
)
452+
output_id = self.tables.individuals.append(row)
469453
individual_id_map[input_id] = output_id
470454

471455
# Remap the population ID references for nodes.
@@ -489,12 +473,7 @@ def finalise_references(self):
489473
mapped_parents.append(-1)
490474
else:
491475
mapped_parents.append(individual_id_map[p])
492-
self.tables.individuals.add_row(
493-
flags=row.flags,
494-
location=row.location,
495-
parents=mapped_parents,
496-
metadata=row.metadata,
497-
)
476+
self.tables.individuals.append(replace(row, parents=mapped_parents))
498477

499478
# We don't support migrations for now. We'll need to remap these as well.
500479
assert self.ts.num_migrations == 0
@@ -710,7 +689,7 @@ def flush_edges(self):
710689
num_edges = 0
711690
for child in sorted(self.edge_buffer.keys()):
712691
for edge in self.edge_buffer[child]:
713-
self.table.add_row(edge.left, edge.right, edge.parent, edge.child)
692+
self.table.append(edge)
714693
num_edges += 1
715694
self.edge_buffer.clear()
716695
return num_edges

python/tests/test_highlevel.py

Lines changed: 28 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import unittest
4141
import uuid as _uuid
4242
import warnings
43+
from dataclasses import replace
4344

4445
import kastore
4546
import msprime
@@ -2854,15 +2855,10 @@ def verify_random_permutation(self, ts):
28542855
inv_node_map = {v: k for k, v in node_map.items()}
28552856
for j in range(ts.num_nodes):
28562857
node = ts.node(inv_node_map[j])
2857-
other_tables.nodes.add_row(
2858-
flags=node.flags, time=node.time, population=node.population
2859-
)
2858+
other_tables.nodes.append(node)
28602859
for e in ts.edges():
2861-
other_tables.edges.add_row(
2862-
left=e.left,
2863-
right=e.right,
2864-
parent=node_map[e.parent],
2865-
child=node_map[e.child],
2860+
other_tables.edges.append(
2861+
replace(e, parent=node_map[e.parent], child=node_map[e.child])
28662862
)
28672863
for _ in range(ts.num_populations):
28682864
other_tables.populations.add_row()
@@ -3168,6 +3164,30 @@ def get_instances(self, n):
31683164
]
31693165

31703166

3167+
class TestContainersAppend:
3168+
def test_containers_append(self, ts_fixture):
3169+
"""
3170+
Test that the containers work with `Table.append`
3171+
"""
3172+
tables = ts_fixture.dump_tables()
3173+
tables.clear(clear_provenance=True)
3174+
for table_name in [
3175+
"individuals",
3176+
"nodes",
3177+
"edges",
3178+
"migrations",
3179+
"sites",
3180+
"mutations",
3181+
"populations",
3182+
"provenances",
3183+
]:
3184+
table = getattr(tables, table_name)
3185+
for i in range(len(getattr(ts_fixture.tables, table_name))):
3186+
table.append(getattr(ts_fixture, table_name[:-1])(i))
3187+
print(ts_fixture.tables, tables)
3188+
assert ts_fixture.tables == tables
3189+
3190+
31713191
class TestTskitConversionOutput(unittest.TestCase):
31723192
"""
31733193
Tests conversion output to ensure it is correct.

python/tests/test_parsimony.py

Lines changed: 11 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"""
2525
import io
2626
import itertools
27+
from dataclasses import replace
2728

2829
import attr
2930
import Bio.Phylo.TreeConstruction
@@ -549,18 +550,16 @@ def verify(self, ts):
549550
ancestral_state, mutations = self.do_map_mutations(
550551
tree, G[site.id], alleles[site.id]
551552
)
552-
site_id = tables.sites.add_row(site.position, ancestral_state)
553+
site_id = tables.sites.append(
554+
replace(site, ancestral_state=ancestral_state)
555+
)
553556
parent_offset = len(tables.mutations)
554557
for mutation in mutations:
555558
parent = mutation.parent
556559
if parent != tskit.NULL:
557560
parent += parent_offset
558-
tables.mutations.add_row(
559-
site_id,
560-
node=mutation.node,
561-
time=mutation.time,
562-
parent=parent,
563-
derived_state=mutation.derived_state,
561+
tables.mutations.append(
562+
replace(mutation, site=site_id, parent=parent)
564563
)
565564
other_ts = tables.tree_sequence()
566565
for h1, h2 in zip(
@@ -715,19 +714,15 @@ def verify(self, ts):
715714
ancestral_state, mutations = self.do_map_mutations(
716715
tree, G[site.id], alleles[site.id]
717716
)
718-
site_id = tables.sites.add_row(site.position, ancestral_state)
717+
site_id = tables.sites.append(
718+
replace(site, ancestral_state=ancestral_state)
719+
)
719720
parent_offset = len(tables.mutations)
720721
for m in mutations:
721722
parent = m.parent
722723
if m.parent != tskit.NULL:
723724
parent = m.parent + parent_offset
724-
tables.mutations.add_row(
725-
site_id,
726-
node=m.node,
727-
time=m.time,
728-
parent=parent,
729-
derived_state=m.derived_state,
730-
)
725+
tables.mutations.append(replace(m, site=site_id, parent=parent))
731726
other_ts = tables.tree_sequence()
732727
assert ts.num_samples == other_ts.num_samples
733728
H1 = list(ts.haplotypes(isolated_as_missing=False))
@@ -1206,13 +1201,7 @@ def verify(self, ts, k):
12061201
parent = mutation.parent
12071202
if parent != tskit.NULL:
12081203
parent += parent_offset
1209-
tables.mutations.add_row(
1210-
j,
1211-
node=mutation.node,
1212-
time=mutation.time,
1213-
parent=parent,
1214-
derived_state=mutation.derived_state,
1215-
)
1204+
tables.mutations.append(replace(mutation, site=j, parent=parent))
12161205

12171206
ts2 = tables.tree_sequence()
12181207
G2 = np.zeros((m, n), dtype=np.int8)

python/tests/test_tables.py

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,42 @@ def test_add_row_round_trip(self):
386386
t2.add_row(**dataclasses.asdict(row))
387387
assert t1 == t2
388388

389+
def test_append_row(self):
390+
for num_rows in [0, 10, 100]:
391+
table = self.table_class()
392+
for j, row in enumerate(self.make_transposed_input_data(num_rows)):
393+
k = table.append(table.row_class(**row))
394+
assert k == j
395+
for colname, input_array in self.make_input_data(num_rows).items():
396+
output_array = getattr(table, colname)
397+
assert input_array.shape == output_array.shape
398+
assert np.all(input_array == output_array)
399+
table.clear()
400+
assert table.num_rows == 0
401+
assert len(table) == 0
402+
403+
def test_append_duck_type(self):
404+
class Duck:
405+
pass
406+
407+
table = self.table_class()
408+
for j, row in enumerate(self.make_transposed_input_data(20)):
409+
duck = Duck()
410+
for k, v in row.items():
411+
setattr(duck, k, v)
412+
k = table.append(duck)
413+
assert k == j
414+
for colname, input_array in self.make_input_data(20).items():
415+
output_array = getattr(table, colname)
416+
assert np.array_equal(input_array, output_array)
417+
418+
def test_append_error(self):
419+
class NotADuck:
420+
pass
421+
422+
with pytest.raises(AttributeError, match="'NotADuck' object has no attribute"):
423+
self.table_class().append(NotADuck())
424+
389425
def test_set_columns_data(self):
390426
for num_rows in [0, 10, 100, 1000]:
391427
input_data = {col.name: col.get_input(num_rows) for col in self.columns}
@@ -1627,7 +1663,7 @@ def verify_edge_sort_offset(self, ts):
16271663
all_edges = keep + reversed_edges
16281664
tables.edges.clear()
16291665
for e in all_edges:
1630-
tables.edges.add_row(e.left, e.right, e.parent, e.child)
1666+
tables.edges.append(e)
16311667
# Verify that import fails for randomised edges
16321668
with pytest.raises(_tskit.LibraryError):
16331669
tables.tree_sequence()
@@ -1638,7 +1674,7 @@ def verify_edge_sort_offset(self, ts):
16381674
# Sorting from the correct index should give us back the original table.
16391675
tables.edges.clear()
16401676
for e in all_edges:
1641-
tables.edges.add_row(e.left, e.right, e.parent, e.child)
1677+
tables.edges.append(e)
16421678
tables.sort(edge_start=start)
16431679
# Verify the new and old edges are equal.
16441680
assert edges == tables.edges

0 commit comments

Comments
 (0)