Skip to content

TableCollection force_offset_64 #1602

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 4, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 48 additions & 0 deletions python/_tskitmodule.c
Original file line number Diff line number Diff line change
Expand Up @@ -6428,6 +6428,46 @@ TableCollection_load(TableCollection *self, PyObject *args, PyObject *kwds)
return ret;
}

static PyObject *
TableCollection_asdict(TableCollection *self, PyObject *args, PyObject *kwds)
{
PyObject *ret = NULL;
int force_offset_64 = 0;
static char *kwlist[] = { "force_offset_64", NULL };

if (TableCollection_check_state(self) != 0) {
goto out;
}
if (!PyArg_ParseTupleAndKeywords(args, kwds, "|i", kwlist, &force_offset_64)) {
goto out;
}
/* Use the LWT tables code */
ret = dump_tables_dict(self->tables, force_offset_64);
out:
return ret;
}

static PyObject *
TableCollection_fromdict(TableCollection *self, PyObject *args)
{
PyObject *ret = NULL;
PyObject *dict = NULL;

if (TableCollection_check_state(self) != 0) {
goto out;
}
if (!PyArg_ParseTuple(args, "O!", &PyDict_Type, &dict)) {
goto out;
}
/* Use the LWT tables code */
if (parse_table_collection_dict(self->tables, dict) != 0) {
goto out;
}
ret = Py_BuildValue("");
out:
return ret;
}

static PyGetSetDef TableCollection_getsetters[] = {
{ .name = "individuals",
.get = (getter) TableCollection_get_individuals,
Expand Down Expand Up @@ -6548,6 +6588,14 @@ static PyMethodDef TableCollection_methods[] = {
.ml_meth = (PyCFunction) TableCollection_load,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Loads the table collection out to the specified file." },
{ .ml_name = "asdict",
.ml_meth = (PyCFunction) TableCollection_asdict,
.ml_flags = METH_VARARGS | METH_KEYWORDS,
.ml_doc = "Returns the table collection in dictionary encoding. " },
{ .ml_name = "fromdict",
.ml_meth = (PyCFunction) TableCollection_fromdict,
.ml_flags = METH_VARARGS,
.ml_doc = "Sets the state of this table collection from the specified dict" },
{ NULL } /* Sentinel */
};

Expand Down
31 changes: 31 additions & 0 deletions python/tests/test_lowlevel.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,37 @@ def test_equals_bad_args(self):
with pytest.raises(TypeError):
tc.equals(tc, ignore_timestamps=bad_bool)

def test_asdict(self):
for ts in self.get_example_tree_sequences():
tc = _tskit.TableCollection(sequence_length=ts.get_sequence_length())
ts.dump_tables(tc)
d = tc.asdict()
# Method is tested extensively elsewhere, just basic sanity check here
assert isinstance(d, dict)
assert len(d) > 0

def test_fromdict(self):
for ts in self.get_example_tree_sequences():
tc1 = _tskit.TableCollection(sequence_length=ts.get_sequence_length())
ts.dump_tables(tc1)
d = tc1.asdict()
tc2 = _tskit.TableCollection(sequence_length=0)
tc2.fromdict(d)
assert tc1.equals(tc2)

def test_asdict_bad_args(self):
ts = msprime.simulate(10, random_seed=1242)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You could use simple_degree1_ts_fixture here, but I don't feel strongly about it.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I was following the conventions in the rest of the class, so I don't think it's worth changing to a fixture here.

tc = ts.tables._ll_tables
for bad_type in [None, 0.1, "str"]:
with pytest.raises(TypeError):
tc.asdict(force_offset_64=bad_type)

def test_fromdict_bad_args(self):
tc = _tskit.TableCollection(0)
for bad_type in [None, 0.1, "str"]:
with pytest.raises(TypeError):
tc.fromdict(bad_type)


class TestTableMethods:
"""
Expand Down
71 changes: 44 additions & 27 deletions python/tests/test_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -3129,31 +3129,6 @@ class TestTableCollection:
Tests for the convenience wrapper around a collection of related tables.
"""

def add_metadata(self, tc):
tc.metadata_schema = tskit.MetadataSchema(
{
"codec": "struct",
"type": "object",
"properties": {"top-level": {"type": "string", "binaryFormat": "50p"}},
}
)
tc.metadata = {"top-level": "top-level-metadata"}
for table in tskit.TABLE_NAMES:
t = getattr(tc, table)
if hasattr(t, "metadata_schema"):
t.packset_metadata(
[f"{table}-{i:10}".encode() for i in range(t.num_rows)]
)
t.metadata_schema = tskit.MetadataSchema(
{
"codec": "struct",
"type": "object",
"properties": {
table: {"type": "string", "binaryFormat": "16p"}
},
}
)

def test_table_references(self):
ts = msprime.simulate(10, mutation_rate=2, random_seed=1)
tables = ts.tables
Expand Down Expand Up @@ -3229,6 +3204,26 @@ def test_asdict(self, ts_fixture):
assert t1.has_index()
assert t2.has_index()

@pytest.mark.parametrize("force_offset_64", [True, False])
def test_asdict_force_offset_64(self, ts_fixture, force_offset_64):
tables = ts_fixture.dump_tables()
d = tables.asdict(force_offset_64=force_offset_64)
for table in tables.name_map:
for name, column in d[table].items():
if name.endswith("_offset"):
if force_offset_64:
assert column.dtype == np.uint64
else:
assert column.dtype == np.uint32

def test_asdict_force_offset_64_default(self, ts_fixture):
tables = ts_fixture.dump_tables()
d = tables.asdict()
for table in tables.name_map:
for name, column in d[table].items():
if name.endswith("_offset"):
assert column.dtype == np.uint32

def test_asdict_lifecycle(self, ts_fixture):
tables = ts_fixture.dump_tables()
tables_dict = tables.asdict()
Expand Down Expand Up @@ -3945,13 +3940,35 @@ def test_bad_metadata(self):
assert tc._ll_tables.metadata == b""


class TestTableCollectionPickle(TestTableCollection):
def add_table_collection_metadata(tc):
tc.metadata_schema = tskit.MetadataSchema(
{
"codec": "struct",
"type": "object",
"properties": {"top-level": {"type": "string", "binaryFormat": "50p"}},
}
)
tc.metadata = {"top-level": "top-level-metadata"}
for table in tskit.TABLE_NAMES:
t = getattr(tc, table)
if hasattr(t, "metadata_schema"):
t.packset_metadata([f"{table}-{i:10}".encode() for i in range(t.num_rows)])
t.metadata_schema = tskit.MetadataSchema(
{
"codec": "struct",
"type": "object",
"properties": {table: {"type": "string", "binaryFormat": "16p"}},
}
)


class TestTableCollectionPickle:
"""
Tests that we can round-trip table collections through pickle.
"""

def verify(self, tables):
self.add_metadata(tables)
add_table_collection_metadata(tables)
other_tables = pickle.loads(pickle.dumps(tables))
tables.assert_equals(other_tables)

Expand Down
71 changes: 16 additions & 55 deletions python/tskit/tables.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# MIT License
#
# Copyright (c) 2018-2020 Tskit Developers
# Copyright (c) 2018-2021 Tskit Developers
# Copyright (c) 2017 University of Oxford
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
Expand Down Expand Up @@ -2461,29 +2461,22 @@ def metadata_bytes(self) -> Any:
"""
return self._ll_tables.metadata

def asdict(self):
def asdict(self, force_offset_64=False):
"""
Returns a dictionary representation of this TableCollection.
Returns the nested dictionary representation of this TableCollection
used for interchange.

Note: the semantics of this method changed at tskit 0.1.0. Previously a
map of table names to the tables themselves was returned.

:param bool force_offset_64: If True, all offset columns will have dtype
np.uint64. If False (the default) the offset array columns will have
a dtype of either np.uint32 or np.uint64, depending on the size of the
corresponding data array.
:return: The dictionary representation of this table collection.
:rtype: dict
"""
ret = {
"encoding_version": (1, 3),
"sequence_length": self.sequence_length,
"metadata_schema": repr(self.metadata_schema),
"metadata": self.metadata_schema.encode_row(self.metadata),
"individuals": self.individuals.asdict(),
"nodes": self.nodes.asdict(),
"edges": self.edges.asdict(),
"migrations": self.migrations.asdict(),
"sites": self.sites.asdict(),
"mutations": self.mutations.asdict(),
"populations": self.populations.asdict(),
"provenances": self.provenances.asdict(),
"indexes": self.indexes.asdict(),
}
return ret
return self._ll_tables.asdict(force_offset_64)

@property
def name_map(self):
Expand Down Expand Up @@ -2701,45 +2694,13 @@ def dump(self, file_or_path):

# Unpickle support
def __setstate__(self, state):
self.__init__(state["sequence_length"])
self.metadata_schema = tskit.parse_metadata_schema(state["metadata_schema"])
self.metadata = self.metadata_schema.decode_row(state["metadata"])
self.individuals.set_columns(**state["individuals"])
self.nodes.set_columns(**state["nodes"])
self.edges.set_columns(**state["edges"])
self.migrations.set_columns(**state["migrations"])
self.sites.set_columns(**state["sites"])
self.mutations.set_columns(**state["mutations"])
self.populations.set_columns(**state["populations"])
self.provenances.set_columns(**state["provenances"])
self.__init__()
self._ll_tables.fromdict(state)

@classmethod
def fromdict(self, tables_dict):
tables = TableCollection(tables_dict["sequence_length"])
try:
tables.metadata_schema = tskit.parse_metadata_schema(
tables_dict["metadata_schema"]
)
except KeyError:
pass
try:
tables.metadata = tables.metadata_schema.decode_row(tables_dict["metadata"])
except KeyError:
pass
tables.individuals.set_columns(**tables_dict["individuals"])
tables.nodes.set_columns(**tables_dict["nodes"])
tables.edges.set_columns(**tables_dict["edges"])
tables.migrations.set_columns(**tables_dict["migrations"])
tables.sites.set_columns(**tables_dict["sites"])
tables.mutations.set_columns(**tables_dict["mutations"])
tables.populations.set_columns(**tables_dict["populations"])
tables.provenances.set_columns(**tables_dict["provenances"])

# Indexes must be last as other wise the check for their consistency will fail
try:
tables.indexes = TableCollectionIndexes(**tables_dict["indexes"])
except KeyError:
pass
tables = TableCollection()
tables._ll_tables.fromdict(tables_dict)
return tables

def copy(self):
Expand Down