From 42d6be7dfd3cfc5b1c98722dc969d1c85ce65def Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 15:13:14 -0500 Subject: [PATCH 01/11] added descendants property --- datatree/treenode.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/datatree/treenode.py b/datatree/treenode.py index 3ff2eb98..7b499710 100644 --- a/datatree/treenode.py +++ b/datatree/treenode.py @@ -238,7 +238,6 @@ def _post_attach_children(self: Tree, children: Mapping[str, Tree]) -> None: def iter_lineage(self: Tree) -> Iterator[Tree]: """Iterate up the tree, starting from the current node.""" - # TODO should this instead return an OrderedDict, so as to include node names? node: Tree | None = self while node is not None: yield node @@ -298,11 +297,30 @@ def subtree(self: Tree) -> Iterator[Tree]: An iterator over all nodes in this tree, including both self and all descendants. Iterates depth-first. + + See Also + -------- + DataTree.descendants """ from . import iterators return iterators.PreOrderIter(self) + @property + def descendants(self: Tree) -> Tuple[Tree]: + """ + Child nodes and all their child nodes. + + Returned in depth-first order. + + See Also + -------- + DataTree.subtree + """ + all_nodes = tuple(self.subtree) + this_node, *descendants = all_nodes + return tuple(descendants) # type: ignore[return-value] + def _pre_detach(self: Tree, parent: Tree) -> None: """Method call before detaching from `parent`.""" pass From 3555ec792d3facc27bca6b9a98648dc8583954f1 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 15:13:36 -0500 Subject: [PATCH 02/11] tests for descendants, lineage, ancestors, subtree --- datatree/tests/test_treenode.py | 91 +++++++++++++++++++++++++-------- 1 file changed, 70 insertions(+), 21 deletions(-) diff --git a/datatree/tests/test_treenode.py b/datatree/tests/test_treenode.py index 1805f038..aaa0362a 100644 --- a/datatree/tests/test_treenode.py +++ b/datatree/tests/test_treenode.py @@ -225,57 +225,106 @@ def test_del_child(self): def create_test_tree(): - f = NamedNode() + a = NamedNode(name="a") b = NamedNode() - a = NamedNode() - d = NamedNode() c = NamedNode() + d = NamedNode() e = NamedNode() + f = NamedNode() g = NamedNode() - i = NamedNode() h = NamedNode() + i = NamedNode() - f.children = {"b": b, "g": g} - b.children = {"a": a, "d": d} - d.children = {"c": c, "e": e} - g.children = {"i": i} - i.children = {"h": h} + a.children = {"b": b, "c": c} + b.children = {"d": d, "e": e} + e.children = {"f": f, "g": g} + c.children = {"h": h} + h.children = {"i": i} - return f + return a, f class TestIterators: def test_preorderiter(self): - tree = create_test_tree() - result = [node.name for node in PreOrderIter(tree)] + root, _ = create_test_tree() + result = [node.name for node in PreOrderIter(root)] expected = [ - None, # root Node is unnamed - "b", "a", + "b", "d", - "c", "e", + "f", "g", - "i", + "c", "h", + "i", ] assert result == expected def test_levelorderiter(self): - tree = create_test_tree() - result = [node.name for node in LevelOrderIter(tree)] + root, _ = create_test_tree() + result = [node.name for node in LevelOrderIter(root)] expected = [ - None, # root Node is unnamed + "a", # root Node is unnamed "b", + "c", + "d", + "e", + "h", + "f", "g", + "i", + ] + assert result == expected + + +class TestAncestry: + def test_lineage(self): + _, leaf = create_test_tree() + lineage = leaf.lineage + expected = ["f", "e", "b", "a"] + for node, expected_name in zip(lineage, expected): + assert node.name == expected_name + + def test_ancestors(self): + _, leaf = create_test_tree() + ancestors = leaf.ancestors + expected = ["a", "b", "e", "f"] + for node, expected_name in zip(ancestors, expected): + assert node.name == expected_name + + def test_subtree(self): + root, _ = create_test_tree() + subtree = root.subtree + expected = [ "a", + "b", "d", - "i", + "e", + "f", + "g", "c", + "h", + "i", + ] + for node, expected_name in zip(subtree, expected): + assert node.name == expected_name + + def test_descendants(self): + root, _ = create_test_tree() + descendants = root.descendants + expected = [ + "b", + "d", "e", + "f", + "g", + "c", "h", + "i", ] - assert result == expected + for node, expected_name in zip(descendants, expected): + assert node.name == expected_name class TestRenderTree: From c87ecfd4dd5d79c95ac976611a150716456c2327 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 15:16:16 -0500 Subject: [PATCH 03/11] added descendants to API docs --- docs/source/api.rst | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/source/api.rst b/docs/source/api.rst index 75c70584..751e5643 100644 --- a/docs/source/api.rst +++ b/docs/source/api.rst @@ -31,6 +31,7 @@ Attributes relating to the recursive tree-like structure of a ``DataTree``. DataTree.is_root DataTree.is_leaf DataTree.subtree + DataTree.descendants DataTree.siblings DataTree.lineage DataTree.ancestors From a543bb9ce9bda3ecbee20a2bbb09244ea0aa6c05 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 15:20:14 -0500 Subject: [PATCH 04/11] whatsnew --- docs/source/whats-new.rst | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/source/whats-new.rst b/docs/source/whats-new.rst index 71febcd9..44bfc9b0 100644 --- a/docs/source/whats-new.rst +++ b/docs/source/whats-new.rst @@ -28,6 +28,9 @@ New Features - Allow method chaining with a new :py:meth:`DataTree.pipe` method (:issue:`151`, :pull:`156`). By `Justus Magin `_. - New, more specific exception types for tree-related errors (:pull:`169`). + By `Tom Nicholas `_. +- Added a new :py:meth:`DataTree.descendants` property (:pull:`170`). + By `Tom Nicholas `_. Breaking changes ~~~~~~~~~~~~~~~~ From aa4e3af01103a16d1e9ce5789b521b5021206826 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 15:55:17 -0500 Subject: [PATCH 05/11] rerun tests From 4774f461cee93a2c6ef1d6e8dc54983a76aeaa30 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 16:30:52 -0500 Subject: [PATCH 06/11] rewrote copy method --- datatree/datatree.py | 67 +++++++++++++++++++++++++++++++++++++-- datatree/ops.py | 3 -- docs/source/whats-new.rst | 3 ++ 3 files changed, 68 insertions(+), 5 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 606c7935..bc238f54 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -628,6 +628,66 @@ def _replace( ) return obj + def copy( + self: DataTree, + deep: bool = False, + ) -> DataTree: + """ + Returns a copy of this subtree. + + Copies this node and all child nodes. + + If `deep=True`, a deep copy is made of each of the component variables. + Otherwise, a shallow copy of each of the component variable is made, so + that the underlying memory region of the new datatree is the same as in + the original datatree. + + Parameters + ---------- + deep : bool, default: False + Whether each component variable is loaded into memory and copied onto + the new object. Default is False. + + Returns + ------- + object : DataTree + New object with dimensions, attributes, coordinates, name, encoding, + and data of this node and all child nodes copied from original. + + See Also + -------- + xarray.Dataset.copy + pandas.DataFrame.copy + """ + return self._copy_subtree(deep=deep) + + def _copy_subtree( + self: DataTree, + deep: bool = False, + memo: dict[int, Any] | None = None, + ) -> DataTree: + """Copy entire subtree""" + new_tree = self._copy_node(deep=deep) + for node in self.descendants: + new_tree[node.path] = node._copy_node(deep=deep) + return new_tree + + def _copy_node( + self: DataTree, + deep: bool = False, + ) -> DataTree: + """Copy just one node of a tree""" + new_node: DataTree = DataTree() + new_node.name = self.name + new_node.ds = self.to_dataset().copy(deep=deep) + return new_node + + def __copy__(self: DataTree) -> DataTree: + return self._copy_subtree(deep=False) + + def __deepcopy__(self: DataTree, memo: dict[int, Any] | None = None) -> DataTree: + return self._copy_subtree(deep=True, memo=memo) + def get( self: DataTree, key: str, default: Optional[DataTree | DataArray] = None ) -> Optional[DataTree | DataArray]: @@ -694,8 +754,11 @@ def _set(self, key: str, val: DataTree | CoercibleValue) -> None: Counterpart to the public .get method, and also only works on the immediate node, not other nodes in the tree. """ if isinstance(val, DataTree): - val.name = key - val.parent = self + # TODO shallow copy here so as not to alter name of node in original tree? + # new_node = copy.copy(val, deep=False) + new_node = val + new_node.name = key + new_node.parent = self else: if not isinstance(val, (DataArray, Variable)): # accommodate other types that can be coerced into Variables diff --git a/datatree/ops.py b/datatree/ops.py index bdc931c9..eabc1faf 100644 --- a/datatree/ops.py +++ b/datatree/ops.py @@ -31,9 +31,6 @@ ] _DATASET_METHODS_TO_MAP = [ "as_numpy", - "copy", - "__copy__", - "__deepcopy__", "set_coords", "reset_coords", "info", diff --git a/docs/source/whats-new.rst b/docs/source/whats-new.rst index 44bfc9b0..95c4e3a8 100644 --- a/docs/source/whats-new.rst +++ b/docs/source/whats-new.rst @@ -35,6 +35,9 @@ New Features Breaking changes ~~~~~~~~~~~~~~~~ +- :py:meth:`DataTree.copy` copy method now only copies the subtree, not the parent nodes (:pull:`171`). + By `Tom Nicholas `_. + Deprecations ~~~~~~~~~~~~ From 6d63b438a6fa5e57ba848b61955b556661170c12 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 16:47:59 -0500 Subject: [PATCH 07/11] remove outdated mypy ignore error --- datatree/datatree.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index bc238f54..0696c90b 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -855,8 +855,7 @@ def from_dict( # Create and set new node node_name = NodePath(path).name if isinstance(data, cls): - # TODO ignoring type error only needed whilst .copy() method is copied from Dataset.copy(). - new_node = data.copy() # type: ignore[attr-defined] + new_node = data.copy() new_node.orphan() else: new_node = cls(name=node_name, data=data) From bdc710e751bf2c46d26a4349d2f674bc5109d5d4 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 17:42:52 -0500 Subject: [PATCH 08/11] changed tests to reflect new expected behaviour --- datatree/tests/test_datatree.py | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/datatree/tests/test_datatree.py b/datatree/tests/test_datatree.py index f64d5fae..1eafb3cf 100644 --- a/datatree/tests/test_datatree.py +++ b/datatree/tests/test_datatree.py @@ -68,12 +68,6 @@ def test_child_gets_named_on_attach(self): mary = DataTree(children={"Sue": sue}) # noqa assert sue.name == "Sue" - @pytest.mark.xfail(reason="requires refactoring to retain name") - def test_grafted_subtree_retains_name(self): - subtree = DataTree("original") - root = DataTree(children={"new_name": subtree}) # noqa - assert subtree.name == "original" - class TestPaths: def test_path_property(self): @@ -294,8 +288,11 @@ class TestSetItem: def test_setitem_new_child_node(self): john = DataTree(name="john") mary = DataTree(name="mary") - john["Mary"] = mary - assert john["Mary"] is mary + john["mary"] = mary + + grafted_mary = john["mary"] + assert grafted_mary.parent is john + assert grafted_mary.name == "mary" def test_setitem_unnamed_child_node_becomes_named(self): john2 = DataTree(name="john2") @@ -304,10 +301,19 @@ def test_setitem_unnamed_child_node_becomes_named(self): def test_setitem_new_grandchild_node(self): john = DataTree(name="john") - DataTree(name="mary", parent=john) + mary = DataTree(name="mary", parent=john) rose = DataTree(name="rose") - john["Mary/Rose"] = rose - assert john["Mary/Rose"] is rose + john["mary/rose"] = rose + + grafted_rose = john["mary/rose"] + assert grafted_rose.parent is mary + assert grafted_rose.name == "rose" + + def test_grafted_subtree_retains_name(self): + subtree = DataTree(name="original_subtree_name") + root = DataTree(name="root") + root["new_subtree_name"] = subtree # noqa + assert subtree.name == "original_subtree_name" def test_setitem_new_empty_node(self): john = DataTree(name="john") From 2bfce833abffef1c8c6a1f7b8ba75c6f2bf670d3 Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 17:43:16 -0500 Subject: [PATCH 09/11] shallow copy on insertion --- datatree/datatree.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/datatree/datatree.py b/datatree/datatree.py index 0696c90b..661da370 100644 --- a/datatree/datatree.py +++ b/datatree/datatree.py @@ -754,9 +754,8 @@ def _set(self, key: str, val: DataTree | CoercibleValue) -> None: Counterpart to the public .get method, and also only works on the immediate node, not other nodes in the tree. """ if isinstance(val, DataTree): - # TODO shallow copy here so as not to alter name of node in original tree? - # new_node = copy.copy(val, deep=False) - new_node = val + # create and assign a shallow copy here so as not to alter original name of node in grafted tree + new_node = val.copy(deep=False) new_node.name = key new_node.parent = self else: From e0422d805dbabe889415c542dbc018cfede076da Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 17:43:38 -0500 Subject: [PATCH 10/11] update test for checking isomorphism from root --- datatree/tests/test_mapping.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/datatree/tests/test_mapping.py b/datatree/tests/test_mapping.py index e0db6cb0..882217c9 100644 --- a/datatree/tests/test_mapping.py +++ b/datatree/tests/test_mapping.py @@ -68,8 +68,9 @@ def test_not_isomorphic_complex_tree(self, create_test_datatree): def test_checking_from_root(self, create_test_datatree): dt1 = create_test_datatree() dt2 = create_test_datatree() - real_root = DataTree() - real_root["fake_root"] = dt2 + real_root = DataTree(name="real root") + dt2.name = "not_real_root" + dt2.parent = real_root with pytest.raises(TreeIsomorphismError): check_isomorphic(dt1, dt2, check_from_root=True) From 1e11175e29549c792c390af7db6cb24799a1418e Mon Sep 17 00:00:00 2001 From: Thomas Nicholas Date: Fri, 30 Dec 2022 17:45:17 -0500 Subject: [PATCH 11/11] whatsnew --- docs/source/whats-new.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/docs/source/whats-new.rst b/docs/source/whats-new.rst index 95c4e3a8..c2d7f5a5 100644 --- a/docs/source/whats-new.rst +++ b/docs/source/whats-new.rst @@ -37,6 +37,8 @@ Breaking changes - :py:meth:`DataTree.copy` copy method now only copies the subtree, not the parent nodes (:pull:`171`). By `Tom Nicholas `_. +- Grafting a subtree onto another tree now leaves name of original subtree object unchanged (:issue:`116`, :pull:`172`). + By `Tom Nicholas `_. Deprecations ~~~~~~~~~~~~