Skip to content

Commit 8f71d39

Browse files
committed
Add table append method
1 parent 889330d commit 8f71d39

File tree

8 files changed

+217
-269
lines changed

8 files changed

+217
-269
lines changed

docs/conf.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,7 @@ def handle_item(fieldarg, content):
311311
# TODO these have been triaged here to make the docs compile, but we should
312312
# sort them out properly. https://github.com/tskit-dev/tskit/issues/336
313313
("py:class", "array_like"),
314+
("py:class", "row-like"),
314315
("py:class", "array-like"),
315316
("py:class", "dtype=np.uint32"),
316317
("py:class", "dtype=np.uint32."),

python/tests/simplify.py

Lines changed: 23 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
Python implementation of the simplify algorithm.
2525
"""
2626
import sys
27+
from dataclasses import replace
2728

2829
import numpy as np
2930
import portion
@@ -158,12 +159,11 @@ def record_node(self, input_id, is_sample=False):
158159
flags &= ~tskit.NODE_IS_SAMPLE
159160
if is_sample:
160161
flags |= tskit.NODE_IS_SAMPLE
161-
output_id = self.tables.nodes.add_row(
162-
flags=flags,
163-
time=node.time,
164-
population=node.population,
165-
metadata=node.metadata,
166-
individual=node.individual,
162+
output_id = self.tables.nodes.append(
163+
replace(
164+
node,
165+
flags=flags,
166+
)
167167
)
168168
self.node_id_map[input_id] = output_id
169169
return output_id
@@ -186,9 +186,7 @@ def flush_edges(self):
186186
num_edges = 0
187187
for child in sorted(self.edge_buffer.keys()):
188188
for edge in self.edge_buffer[child]:
189-
self.tables.edges.add_row(
190-
edge.left, edge.right, edge.parent, edge.child
191-
)
189+
self.tables.edges.append(edge)
192190
num_edges += 1
193191
self.edge_buffer.clear()
194192
return num_edges
@@ -413,19 +411,15 @@ def finalise_sites(self):
413411
mapped_parent = -1
414412
if mut.parent != -1:
415413
mapped_parent = mutation_id_map[mut.parent]
416-
self.tables.mutations.add_row(
417-
site=len(self.tables.sites),
418-
node=self.mutation_node_map[mut.id],
419-
time=mut.time,
420-
parent=mapped_parent,
421-
derived_state=mut.derived_state,
422-
metadata=mut.metadata,
414+
self.tables.mutations.append(
415+
replace(
416+
mut,
417+
site=len(self.tables.sites),
418+
node=self.mutation_node_map[mut.id],
419+
parent=mapped_parent,
420+
)
423421
)
424-
self.tables.sites.add_row(
425-
position=site.position,
426-
ancestral_state=site.ancestral_state,
427-
metadata=site.metadata,
428-
)
422+
self.tables.sites.append(site)
429423

430424
def finalise_references(self):
431425
input_populations = self.ts.tables.populations
@@ -455,17 +449,12 @@ def finalise_references(self):
455449
for input_id, count in enumerate(population_ref_count):
456450
if count > 0:
457451
row = input_populations[input_id]
458-
output_id = self.tables.populations.add_row(metadata=row.metadata)
452+
output_id = self.tables.populations.append(row)
459453
population_id_map[input_id] = output_id
460454
for input_id, count in enumerate(individual_ref_count):
461455
if count > 0:
462456
row = input_individuals[input_id]
463-
output_id = self.tables.individuals.add_row(
464-
flags=row.flags,
465-
location=row.location,
466-
parents=row.parents,
467-
metadata=row.metadata,
468-
)
457+
output_id = self.tables.individuals.append(row)
469458
individual_id_map[input_id] = output_id
470459

471460
# Remap the population ID references for nodes.
@@ -489,11 +478,11 @@ def finalise_references(self):
489478
mapped_parents.append(-1)
490479
else:
491480
mapped_parents.append(individual_id_map[p])
492-
self.tables.individuals.add_row(
493-
flags=row.flags,
494-
location=row.location,
495-
parents=mapped_parents,
496-
metadata=row.metadata,
481+
self.tables.individuals.append(
482+
replace(
483+
row,
484+
parents=mapped_parents,
485+
)
497486
)
498487

499488
# We don't support migrations for now. We'll need to remap these as well.
@@ -710,7 +699,7 @@ def flush_edges(self):
710699
num_edges = 0
711700
for child in sorted(self.edge_buffer.keys()):
712701
for edge in self.edge_buffer[child]:
713-
self.table.add_row(edge.left, edge.right, edge.parent, edge.child)
702+
self.table.append(edge)
714703
num_edges += 1
715704
self.edge_buffer.clear()
716705
return num_edges

python/tests/test_highlevel.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@
4040
import unittest
4141
import uuid as _uuid
4242
import warnings
43+
from dataclasses import replace
4344

4445
import kastore
4546
import msprime
@@ -2854,15 +2855,14 @@ def verify_random_permutation(self, ts):
28542855
inv_node_map = {v: k for k, v in node_map.items()}
28552856
for j in range(ts.num_nodes):
28562857
node = ts.node(inv_node_map[j])
2857-
other_tables.nodes.add_row(
2858-
flags=node.flags, time=node.time, population=node.population
2859-
)
2858+
other_tables.nodes.append(node)
28602859
for e in ts.edges():
2861-
other_tables.edges.add_row(
2862-
left=e.left,
2863-
right=e.right,
2864-
parent=node_map[e.parent],
2865-
child=node_map[e.child],
2860+
other_tables.edges.append(
2861+
replace(
2862+
e,
2863+
parent=node_map[e.parent],
2864+
child=node_map[e.child],
2865+
)
28662866
)
28672867
for _ in range(ts.num_populations):
28682868
other_tables.populations.add_row()

python/tests/test_parsimony.py

Lines changed: 25 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@
2424
"""
2525
import io
2626
import itertools
27+
from dataclasses import replace
2728

2829
import attr
2930
import Bio.Phylo.TreeConstruction
@@ -549,18 +550,20 @@ def verify(self, ts):
549550
ancestral_state, mutations = self.do_map_mutations(
550551
tree, G[site.id], alleles[site.id]
551552
)
552-
site_id = tables.sites.add_row(site.position, ancestral_state)
553+
site_id = tables.sites.append(
554+
replace(site, ancestral_state=ancestral_state)
555+
)
553556
parent_offset = len(tables.mutations)
554557
for mutation in mutations:
555558
parent = mutation.parent
556559
if parent != tskit.NULL:
557560
parent += parent_offset
558-
tables.mutations.add_row(
559-
site_id,
560-
node=mutation.node,
561-
time=mutation.time,
562-
parent=parent,
563-
derived_state=mutation.derived_state,
561+
tables.mutations.append(
562+
replace(
563+
mutation,
564+
site=site_id,
565+
parent=parent,
566+
)
564567
)
565568
other_ts = tables.tree_sequence()
566569
for h1, h2 in zip(
@@ -715,18 +718,20 @@ def verify(self, ts):
715718
ancestral_state, mutations = self.do_map_mutations(
716719
tree, G[site.id], alleles[site.id]
717720
)
718-
site_id = tables.sites.add_row(site.position, ancestral_state)
721+
site_id = tables.sites.append(
722+
replace(site, ancestral_state=ancestral_state)
723+
)
719724
parent_offset = len(tables.mutations)
720725
for m in mutations:
721726
parent = m.parent
722727
if m.parent != tskit.NULL:
723728
parent = m.parent + parent_offset
724-
tables.mutations.add_row(
725-
site_id,
726-
node=m.node,
727-
time=m.time,
728-
parent=parent,
729-
derived_state=m.derived_state,
729+
tables.mutations.append(
730+
replace(
731+
m,
732+
site=site_id,
733+
parent=parent,
734+
)
730735
)
731736
other_ts = tables.tree_sequence()
732737
assert ts.num_samples == other_ts.num_samples
@@ -1206,12 +1211,12 @@ def verify(self, ts, k):
12061211
parent = mutation.parent
12071212
if parent != tskit.NULL:
12081213
parent += parent_offset
1209-
tables.mutations.add_row(
1210-
j,
1211-
node=mutation.node,
1212-
time=mutation.time,
1213-
parent=parent,
1214-
derived_state=mutation.derived_state,
1214+
tables.mutations.append(
1215+
replace(
1216+
mutation,
1217+
site=j,
1218+
parent=parent,
1219+
)
12151220
)
12161221

12171222
ts2 = tables.tree_sequence()

python/tests/test_tables.py

Lines changed: 16 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -386,6 +386,20 @@ def test_add_row_round_trip(self):
386386
t2.add_row(**dataclasses.asdict(row))
387387
assert t1 == t2
388388

389+
def test_append_row(self):
390+
for num_rows in [0, 10, 100]:
391+
table = self.table_class()
392+
for j, row in enumerate(self.make_transposed_input_data(num_rows)):
393+
k = table.append(table.row_class(**row))
394+
assert k == j
395+
for colname, input_array in self.make_input_data(num_rows).items():
396+
output_array = getattr(table, colname)
397+
assert input_array.shape == output_array.shape
398+
assert np.all(input_array == output_array)
399+
table.clear()
400+
assert table.num_rows == 0
401+
assert len(table) == 0
402+
389403
def test_set_columns_data(self):
390404
for num_rows in [0, 10, 100, 1000]:
391405
input_data = {col.name: col.get_input(num_rows) for col in self.columns}
@@ -1627,7 +1641,7 @@ def verify_edge_sort_offset(self, ts):
16271641
all_edges = keep + reversed_edges
16281642
tables.edges.clear()
16291643
for e in all_edges:
1630-
tables.edges.add_row(e.left, e.right, e.parent, e.child)
1644+
tables.edges.append(e)
16311645
# Verify that import fails for randomised edges
16321646
with pytest.raises(_tskit.LibraryError):
16331647
tables.tree_sequence()
@@ -1638,7 +1652,7 @@ def verify_edge_sort_offset(self, ts):
16381652
# Sorting from the correct index should give us back the original table.
16391653
tables.edges.clear()
16401654
for e in all_edges:
1641-
tables.edges.add_row(e.left, e.right, e.parent, e.child)
1655+
tables.edges.append(e)
16421656
tables.sort(edge_start=start)
16431657
# Verify the new and old edges are equal.
16441658
assert edges == tables.edges

python/tests/test_topology.py

Lines changed: 17 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
import random
3232
import sys
3333
import unittest
34+
from dataclasses import replace
3435

3536
import msprime
3637
import numpy as np
@@ -66,23 +67,18 @@ def simple_keep_intervals(tables, intervals, simplify=True, record_provenance=Tr
6667
if not (edge.right <= interval_left or edge.left >= interval_right):
6768
left = max(interval_left, edge.left)
6869
right = min(interval_right, edge.right)
69-
tables.edges.add_row(
70-
left, right, edge.parent, edge.child, edge.metadata
71-
)
70+
tables.edges.append(replace(edge, left=left, right=right))
7271
for site in ts.sites():
7372
for interval_left, interval_right in intervals:
7473
if interval_left <= site.position < interval_right:
75-
site_id = tables.sites.add_row(
76-
site.position, site.ancestral_state, site.metadata
77-
)
74+
site_id = tables.sites.append(site)
7875
for m in site.mutations:
79-
tables.mutations.add_row(
80-
site=site_id,
81-
node=m.node,
82-
derived_state=m.derived_state,
83-
parent=tskit.NULL,
84-
time=m.time,
85-
metadata=m.metadata,
76+
tables.mutations.append(
77+
replace(
78+
m,
79+
site=site_id,
80+
parent=tskit.NULL,
81+
)
8682
)
8783
tables.build_index()
8884
tables.compute_mutation_parents()
@@ -2392,9 +2388,7 @@ def verify_unary_tree_sequence(self, ts):
23922388
edges.sort(key=lambda e: node_times[e.parent])
23932389
tables.edges.reset()
23942390
for e in edges:
2395-
tables.edges.add_row(
2396-
left=e.left, right=e.right, child=e.child, parent=e.parent
2397-
)
2391+
tables.edges.append(e)
23982392
ts_new = tables.tree_sequence()
23992393
assert ts_new.num_edges > ts.num_edges
24002394
self.assert_haplotypes_equal(ts, ts_new)
@@ -3471,8 +3465,8 @@ def test_unary_non_sample_external_nodes(self):
34713465
next_node = ts.num_nodes
34723466
tables.edges.reset()
34733467
for e in ts.edges():
3474-
tables.edges.add_row(e.left, e.right, e.parent, e.child)
3475-
tables.edges.add_row(e.left, e.right, e.parent, next_node)
3468+
tables.edges.append(e)
3469+
tables.edges.append(replace(e, child=next_node))
34763470
tables.nodes.add_row(time=0)
34773471
next_node += 1
34783472
tables.sort()
@@ -6684,7 +6678,7 @@ def do_squash(self, ts, compare_lib=True):
66846678
squashed_list = squash_edges(ts)
66856679
squashed_py = tskit.EdgeTable()
66866680
for e in squashed_list:
6687-
squashed_py.add_row(e.left, e.right, e.parent, e.child)
6681+
squashed_py.append(e)
66886682
# Check the Python and C implementations produce the same output.
66896683
assert squashed_py == squashed
66906684
return squashed
@@ -6888,7 +6882,7 @@ def verify_slice_and_squash(self, ts):
68886882
random.shuffle(sliced_edges)
68896883
sliced_table = tskit.EdgeTable()
68906884
for e in sliced_edges:
6891-
sliced_table.add_row(e.left, e.right, e.parent, e.child)
6885+
sliced_table.append(e)
68926886

68936887
# Squash the edges and check against input table.
68946888
sliced_table.squash()
@@ -6949,7 +6943,7 @@ def add_edge(left, right, parent, child):
69496943
# Squash
69506944
edge.right = right
69516945
else:
6952-
tables.edges.add_row(edge.left, edge.right, edge.parent, edge.child)
6946+
tables.edges.append(edge)
69536947
edge_map[child] = new_edge
69546948

69556949
tables.edges.clear()
@@ -6981,7 +6975,7 @@ def add_edge(left, right, parent, child):
69816975
add_edge(left, tables.sequence_length, parent, child)
69826976
# Flush the remaining edges to the table
69836977
for edge in edge_map.values():
6984-
tables.edges.add_row(edge.left, edge.right, edge.parent, edge.child)
6978+
tables.edges.append(edge)
69856979
tables.sort()
69866980
ts = tables.tree_sequence()
69876981
# Now simplify to remove redundant nodes.
@@ -8057,7 +8051,7 @@ def ts_missing_middle(self):
80578051
missing_to = e.left
80588052
else:
80598053
continue # omit this edge => node is isolated
8060-
tables.edges.add_row(e.left, e.right, e.parent, e.child)
8054+
tables.edges.append(e)
80618055
# Check we have non-missing to L & R
80628056
assert 0.0 < missing_from < 1.0
80638057
assert 0.0 < missing_to < 1.0

0 commit comments

Comments
 (0)