From 721aaf1f4dc88d8801717246941d9e63379c9aef Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 8 Apr 2024 10:05:44 -0700 Subject: [PATCH 1/8] Revert `.oindex` nd `.vindex` additions in `_ElementwiseFunctionArray`, `NativeEndiannessArray`, and `BoolTypeArray` classes --- xarray/coding/variables.py | 18 ------------------ 1 file changed, 18 deletions(-) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index d31cb6e626a..94d100c6bb9 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -68,12 +68,6 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): def dtype(self) -> np.dtype: return np.dtype(self._dtype) - def _oindex_get(self, key): - return type(self)(self.array.oindex[key], self.func, self.dtype) - - def _vindex_get(self, key): - return type(self)(self.array.vindex[key], self.func, self.dtype) - def __getitem__(self, key): return type(self)(self.array[key], self.func, self.dtype) @@ -113,12 +107,6 @@ def __init__(self, array) -> None: def dtype(self) -> np.dtype: return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize)) - def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) - - def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) - def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) @@ -151,12 +139,6 @@ def __init__(self, array) -> None: def dtype(self) -> np.dtype: return np.dtype("bool") - def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) - - def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) - def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) From 21cc8d11df602d264f36d46393adc8f8d1f691a7 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 9 Apr 2024 14:54:40 -0700 Subject: [PATCH 2/8] temporary remove `oindex` and `vindex` support from StackedBytesArray --- xarray/coding/strings.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index db95286f6aa..3ecccd02858 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -249,12 +249,6 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" - def _vindex_get(self, key): - return _numpy_char_to_bytes(self.array.vindex[key]) - - def _oindex_get(self, key): - return _numpy_char_to_bytes(self.array.oindex[key]) - def __getitem__(self, key): # require slicing the last dimension completely key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) From 30993ff9498d0fc1e94649ff2822835d0254e0ce Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 9 Apr 2024 15:09:24 -0700 Subject: [PATCH 3/8] temporary add isinstance check for unsupported ExplicitlyIndexedNDArrayMixin subclasses --- xarray/core/indexing.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/xarray/core/indexing.py b/xarray/core/indexing.py index e26c50c8b90..1542b1c899a 100644 --- a/xarray/core/indexing.py +++ b/xarray/core/indexing.py @@ -615,7 +615,18 @@ def shape(self) -> _Shape: return tuple(shape) def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + from xarray.coding import strings, variables + + if isinstance(self.array, ExplicitlyIndexedNDArrayMixin) and not isinstance( + self.array, + ( + strings.StackedBytesArray, + variables._ElementwiseFunctionArray, + variables.BoolTypeArray, + variables.NativeEndiannessArray, + ), + ): + # TODO: Remove the isinstance check for variables.BoolTypeArray and variables.NativeEndiannessArray once the BackendArrray is updated with oindex and vindex properties array = apply_indexer(self.array, self.key) else: # If the array is not an ExplicitlyIndexedNDArrayMixin, @@ -691,7 +702,18 @@ def shape(self) -> _Shape: return np.broadcast(*self.key.tuple).shape def get_duck_array(self): - if isinstance(self.array, ExplicitlyIndexedNDArrayMixin): + from xarray.coding import strings, variables + + if isinstance(self.array, ExplicitlyIndexedNDArrayMixin) and not isinstance( + self.array, + ( + strings.StackedBytesArray, + variables._ElementwiseFunctionArray, + variables.BoolTypeArray, + variables.NativeEndiannessArray, + ), + ): + # TODO: Remove the isinstance check for variables.BoolTypeArray and variables.NativeEndiannessArray once the BackendArrray is updated with oindex and vindex properties array = apply_indexer(self.array, self.key) else: # If the array is not an ExplicitlyIndexedNDArrayMixin, From 89f542dd94cfad38104adfd106a76b66f847ed95 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 9 Apr 2024 15:17:39 -0700 Subject: [PATCH 4/8] fix test --- xarray/tests/test_coding_strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_strings.py b/xarray/tests/test_coding_strings.py index 51f63ea72dd..f1eca00f9a1 100644 --- a/xarray/tests/test_coding_strings.py +++ b/xarray/tests/test_coding_strings.py @@ -181,7 +181,7 @@ def test_StackedBytesArray_vectorized_indexing() -> None: V = IndexerMaker(indexing.VectorizedIndexer) indexer = V[np.array([[0, 1], [1, 0]])] - actual = stacked.vindex[indexer] + actual = stacked[indexer] assert_array_equal(actual, expected) From e6e5e479eb690a7ed66fffd790df150346457805 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 9 Apr 2024 15:50:50 -0700 Subject: [PATCH 5/8] Temporary fix for unsupported ExplicitlyIndexedNDArrayMixin subclasses --- xarray/coding/strings.py | 4 ++++ xarray/coding/variables.py | 12 ++++++++++++ 2 files changed, 16 insertions(+) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 3ecccd02858..c0efa60cb71 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -249,6 +249,10 @@ def shape(self) -> tuple[int, ...]: def __repr__(self): return f"{type(self).__name__}({self.array!r})" + def _check_and_raise_if_non_basic_indexer(self, indexer) -> None: + ... + # TODO: this is a temporary fix until BackendArray supports vindex and oindex + def __getitem__(self, key): # require slicing the last dimension completely key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 94d100c6bb9..fd677c8380c 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -68,6 +68,10 @@ def __init__(self, array, func: Callable, dtype: np.typing.DTypeLike): def dtype(self) -> np.dtype: return np.dtype(self._dtype) + def _check_and_raise_if_non_basic_indexer(self, indexer) -> None: + ... + # TODO: this is a temporary fix until BackendArray supports vindex and oindex + def __getitem__(self, key): return type(self)(self.array[key], self.func, self.dtype) @@ -107,6 +111,10 @@ def __init__(self, array) -> None: def dtype(self) -> np.dtype: return np.dtype(self.array.dtype.kind + str(self.array.dtype.itemsize)) + def _check_and_raise_if_non_basic_indexer(self, indexer) -> None: + ... + # TODO: this is a temporary fix until BackendArray supports vindex and oindex + def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) @@ -139,6 +147,10 @@ def __init__(self, array) -> None: def dtype(self) -> np.dtype: return np.dtype("bool") + def _check_and_raise_if_non_basic_indexer(self, indexer) -> None: + ... + # TODO: this is a temporary fix until BackendArray supports vindex and oindex + def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) From e58ddb31abc1f622a3e5116adaa92801bfc81312 Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 9 Apr 2024 16:05:34 -0700 Subject: [PATCH 6/8] check for key type in __getitem__ --- xarray/coding/strings.py | 9 ++++++++- xarray/coding/variables.py | 27 ++++++++++++++++++++++++--- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index c0efa60cb71..1e6869cde01 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -258,4 +258,11 @@ def __getitem__(self, key): key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) if key.tuple[-1] != slice(None): raise IndexError("too many indices") - return _numpy_char_to_bytes(self.array[key]) + + if isinstance(key, indexing.OuterIndexer): + data = self.array.oindex[key] + elif isinstance(key, indexing.VectorizedIndexer): + data = self.array.vindex[key] + else: + data = self.array[key] + return _numpy_char_to_bytes(data) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index fd677c8380c..6fb00031bcc 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -73,7 +73,14 @@ def _check_and_raise_if_non_basic_indexer(self, indexer) -> None: # TODO: this is a temporary fix until BackendArray supports vindex and oindex def __getitem__(self, key): - return type(self)(self.array[key], self.func, self.dtype) + # TODO: this is a temporary fix until BackendArray supports vindex and oindex + if isinstance(key, indexing.OuterIndexer): + data = self.array.oindex[key] + elif isinstance(key, indexing.VectorizedIndexer): + data = self.array.vindex[key] + else: + data = self.array[key] + return type(self)(data, self.func, self.dtype) def get_duck_array(self): return self.func(self.array.get_duck_array()) @@ -116,7 +123,14 @@ def _check_and_raise_if_non_basic_indexer(self, indexer) -> None: # TODO: this is a temporary fix until BackendArray supports vindex and oindex def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) + # TODO: this is a temporary fix until BackendArray supports vindex and oindex + if isinstance(key, indexing.OuterIndexer): + data = self.array.oindex[key] + elif isinstance(key, indexing.VectorizedIndexer): + data = self.array.vindex[key] + else: + data = self.array[key] + return np.asarray(data, dtype=self.dtype) class BoolTypeArray(indexing.ExplicitlyIndexedNDArrayMixin): @@ -152,7 +166,14 @@ def _check_and_raise_if_non_basic_indexer(self, indexer) -> None: # TODO: this is a temporary fix until BackendArray supports vindex and oindex def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) + # TODO: this is a temporary fix until BackendArray supports vindex and oindex + if isinstance(key, indexing.OuterIndexer): + data = self.array.oindex[key] + elif isinstance(key, indexing.VectorizedIndexer): + data = self.array.vindex[key] + else: + data = self.array[key] + return np.asarray(data, dtype=self.dtype) def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): From 9a59d28d7a994283f237c8b34d29173cf92fdf2f Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Tue, 9 Apr 2024 16:05:49 -0700 Subject: [PATCH 7/8] add comment --- xarray/coding/strings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/strings.py b/xarray/coding/strings.py index 1e6869cde01..5612cdd11e9 100644 --- a/xarray/coding/strings.py +++ b/xarray/coding/strings.py @@ -258,7 +258,7 @@ def __getitem__(self, key): key = type(key)(indexing.expanded_indexer(key.tuple, self.array.ndim)) if key.tuple[-1] != slice(None): raise IndexError("too many indices") - + # TODO: this is a temporary fix until BackendArray supports vindex and oindex if isinstance(key, indexing.OuterIndexer): data = self.array.oindex[key] elif isinstance(key, indexing.VectorizedIndexer): From 3d399e1a32533fc8cd616772edd216859391eafd Mon Sep 17 00:00:00 2001 From: Anderson Banihirwe Date: Mon, 29 Apr 2024 23:49:40 -0700 Subject: [PATCH 8/8] add test --- xarray/tests/test_backends.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index fdf181b583a..43c27d88d9e 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -3211,6 +3211,15 @@ def test_bytes_pickle(self) -> None: unpickled = pickle.loads(pickle.dumps(ds)) assert_identical(unpickled, data) + def test_scipy_wrapper_array_oindex_vindex(self) -> None: + ds = xr.Dataset() + ds["A"] = xr.DataArray([[1, "a"], [2, "b"]], dims=["x", "y"]) + with create_tmp_file(allow_cleanup_failure=False) as path: + ds.to_netcdf(path, engine="scipy") + with xr.open_dataset(path, engine="scipy") as ds2: + with create_tmp_file(allow_cleanup_failure=False) as path2: + ds2.sel(y=[1]).to_netcdf(path2) + @requires_scipy class TestScipyFileObject(CFEncodedBase, NetCDF3Only):