From bebf63a6f323ef7a182bb214381f6313d2cee434 Mon Sep 17 00:00:00 2001 From: Matt McCormick Date: Sun, 15 May 2022 19:58:36 -0400 Subject: [PATCH 1/2] Do not call __exit__ on Zarr store when opening The `with` context when opening the zarr group with result in calling __exit__ on the store when the function completes. This calls `.close()` on ZipStore's, which results in errors: ``` ValueError: Attempt to use ZIP archive that was already closed ``` --- datatree/io.py | 36 ++++++++++++++++++------------------ datatree/tests/test_io.py | 14 ++++++++++++++ 2 files changed, 32 insertions(+), 18 deletions(-) diff --git a/datatree/io.py b/datatree/io.py index 6cf56275..6236763d 100644 --- a/datatree/io.py +++ b/datatree/io.py @@ -84,24 +84,24 @@ def _open_datatree_netcdf(filename: str, **kwargs) -> DataTree: def _open_datatree_zarr(store, **kwargs) -> DataTree: import zarr # type: ignore - with zarr.open_group(store, mode="r") as zds: - ds = open_dataset(store, engine="zarr", **kwargs) - tree_root = DataTree.from_dict({"/": ds}) - for path in _iter_zarr_groups(zds): - try: - subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs) - except zarr.errors.PathNotFoundError: - subgroup_ds = Dataset() - - # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again - node_name = NodePath(path).name - new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) - tree_root._set_item( - path, - new_node, - allow_overwrite=False, - new_nodes_along_path=True, - ) + zds = zarr.open_group(store, mode="r") + ds = open_dataset(store, engine="zarr", **kwargs) + tree_root = DataTree.from_dict({"/": ds}) + for path in _iter_zarr_groups(zds): + try: + subgroup_ds = open_dataset(store, engine="zarr", group=path, **kwargs) + except zarr.errors.PathNotFoundError: + subgroup_ds = Dataset() + + # TODO refactor to use __setitem__ once creation of new nodes by assigning Dataset works again + node_name = NodePath(path).name + new_node: DataTree = DataTree(name=node_name, data=subgroup_ds) + tree_root._set_item( + path, + new_node, + allow_overwrite=False, + new_nodes_along_path=True, + ) return tree_root diff --git a/datatree/tests/test_io.py b/datatree/tests/test_io.py index 659f0c31..433a2e01 100644 --- a/datatree/tests/test_io.py +++ b/datatree/tests/test_io.py @@ -40,6 +40,20 @@ def test_to_zarr(self, tmpdir): roundtrip_dt = open_datatree(filepath, engine="zarr") assert_equal(original_dt, roundtrip_dt) + @requires_zarr + def test_to_zarr_zip_store(self, tmpdir): + from zarr.storage import ZipStore + filepath = str( + tmpdir / "test.zarr.zip" + ) # casting to str avoids a pathlib bug in xarray + original_dt = create_test_datatree() + store = ZipStore(filepath) + original_dt.to_zarr(store) + + roundtrip_dt = open_datatree(store, engine="zarr") + assert_equal(original_dt, roundtrip_dt) + + @requires_zarr def test_to_zarr_not_consolidated(self, tmpdir): filepath = tmpdir / "test.zarr" From 356daa3c0f0affa5ee07937fb50af33c8b4b1df6 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 16 May 2022 00:11:32 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- datatree/tests/test_io.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/datatree/tests/test_io.py b/datatree/tests/test_io.py index 433a2e01..b7005471 100644 --- a/datatree/tests/test_io.py +++ b/datatree/tests/test_io.py @@ -43,6 +43,7 @@ def test_to_zarr(self, tmpdir): @requires_zarr def test_to_zarr_zip_store(self, tmpdir): from zarr.storage import ZipStore + filepath = str( tmpdir / "test.zarr.zip" ) # casting to str avoids a pathlib bug in xarray @@ -53,7 +54,6 @@ def test_to_zarr_zip_store(self, tmpdir): roundtrip_dt = open_datatree(store, engine="zarr") assert_equal(original_dt, roundtrip_dt) - @requires_zarr def test_to_zarr_not_consolidated(self, tmpdir): filepath = tmpdir / "test.zarr"