Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 2 additions & 3 deletions ci/install-upstream-wheels.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ $conda remove -y numba numbagg sparse
# temporarily remove numexpr
$conda remove -y numexpr
# temporarily remove backends
$conda remove -y cf_units hdf5 h5py netcdf4 pydap
$conda remove -y cf_units pydap
# forcibly remove packages to avoid artifacts
$conda remove -y --force \
numpy \
Expand All @@ -37,8 +37,7 @@ python -m pip install \
numpy \
scipy \
matplotlib \
pandas \
h5py
pandas
# for some reason pandas depends on pyarrow already.
# Remove once a `pyarrow` version compiled with `numpy>=2.0` is on `conda-forge`
python -m pip install \
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/all-but-dask.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ dependencies:
- netcdf4
- numba
- numbagg
- numpy<2
- numpy
- packaging
- pandas
- pint>=0.22
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment-windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ dependencies:
- netcdf4
- numba
- numbagg
- numpy<2
- numpy
- packaging
- pandas
# - pint>=0.22
Expand Down
2 changes: 1 addition & 1 deletion ci/requirements/environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ dependencies:
- numba
- numbagg
- numexpr
- numpy<2
- numpy
- opt_einsum
- packaging
- pandas
Expand Down
4 changes: 3 additions & 1 deletion doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,8 @@ Bug fixes
By `Pontus Lurcock <https://github.com/pont-us>`_.
- Allow diffing objects with array attributes on variables (:issue:`9153`, :pull:`9169`).
By `Justus Magin <https://github.com/keewis>`_.
- ``numpy>=2`` compatibility in the ``netcdf4`` backend (:pull:`9136`).
By `Justus Magin <https://github.com/keewis>`_ and `Kai Mühlbauer <https://github.com/kmuehlbauer>`_.
- Promote floating-point numeric datetimes before decoding (:issue:`9179`, :pull:`9182`).
By `Justus Magin <https://github.com/keewis>`_.
- Fiy static typing of tolerance arguments by allowing `str` type (:issue:`8892`, :pull:`9194`).
Expand All @@ -61,7 +63,7 @@ Documentation
- Adds a flow-chart diagram to help users navigate help resources (`Discussion #8990 <https://github.com/pydata/xarray/discussions/8990>`_).
By `Jessica Scheick <https://github.com/jessicas11>`_.
- Improvements to Zarr & chunking docs (:pull:`9139`, :pull:`9140`, :pull:`9132`)
By `Maximilian Roos <https://github.com/max-sixty>`_
By `Maximilian Roos <https://github.com/max-sixty>`_.


Internal Changes
Expand Down
16 changes: 10 additions & 6 deletions xarray/coding/variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -516,10 +516,13 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable:
dims, data, attrs, encoding = unpack_for_encoding(variable)

pop_to(encoding, attrs, "_Unsigned")
signed_dtype = np.dtype(f"i{data.dtype.itemsize}")
# we need the on-disk type here
# trying to get it from encoding, resort to an int with the same precision as data.dtype if not available
signed_dtype = np.dtype(encoding.get("dtype", f"i{data.dtype.itemsize}"))
if "_FillValue" in attrs:
new_fill = signed_dtype.type(attrs["_FillValue"])
attrs["_FillValue"] = new_fill
new_fill = np.array(attrs["_FillValue"])
# use view here to prevent OverflowError
attrs["_FillValue"] = new_fill.view(signed_dtype).item()
data = duck_array_ops.astype(duck_array_ops.around(data), signed_dtype)

return Variable(dims, data, attrs, encoding, fastpath=True)
Expand All @@ -535,10 +538,11 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
if unsigned == "true":
unsigned_dtype = np.dtype(f"u{data.dtype.itemsize}")
transform = partial(np.asarray, dtype=unsigned_dtype)
data = lazy_elemwise_func(data, transform, unsigned_dtype)
if "_FillValue" in attrs:
new_fill = unsigned_dtype.type(attrs["_FillValue"])
attrs["_FillValue"] = new_fill
new_fill = np.array(attrs["_FillValue"], dtype=data.dtype)
# use view here to prevent OverflowError
attrs["_FillValue"] = new_fill.view(unsigned_dtype).item()
data = lazy_elemwise_func(data, transform, unsigned_dtype)
elif data.dtype.kind == "u":
if unsigned == "false":
signed_dtype = np.dtype(f"i{data.dtype.itemsize}")
Expand Down
33 changes: 31 additions & 2 deletions xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def create_encoded_masked_and_scaled_data(dtype: np.dtype) -> Dataset:

def create_unsigned_masked_scaled_data(dtype: np.dtype) -> Dataset:
encoding = {
"_FillValue": 255,
"_FillValue": np.int8(-1),
"_Unsigned": "true",
"dtype": "i1",
"add_offset": dtype.type(10),
Expand Down Expand Up @@ -925,6 +925,35 @@ def test_roundtrip_mask_and_scale(self, decoded_fn, encoded_fn, dtype) -> None:
assert decoded.variables[k].dtype == actual.variables[k].dtype
assert_allclose(decoded, actual, decode_bytes=False)

@pytest.mark.parametrize("fillvalue", [np.int8(-1), np.uint8(255)])
def test_roundtrip_unsigned(self, fillvalue):
# regression/numpy2 test for
encoding = {
"_FillValue": fillvalue,
"_Unsigned": "true",
"dtype": "i1",
}
x = np.array([0, 1, 127, 128, 254, np.nan], dtype=np.float32)
decoded = Dataset({"x": ("t", x, {}, encoding)})

attributes = {
"_FillValue": fillvalue,
"_Unsigned": "true",
}
# Create unsigned data corresponding to [0, 1, 127, 128, 255] unsigned
sb = np.asarray([0, 1, 127, -128, -2, -1], dtype="i1")
encoded = Dataset({"x": ("t", sb, attributes)})

with self.roundtrip(decoded) as actual:
for k in decoded.variables:
assert decoded.variables[k].dtype == actual.variables[k].dtype
assert_allclose(decoded, actual, decode_bytes=False)

with self.roundtrip(decoded, open_kwargs=dict(decode_cf=False)) as actual:
for k in encoded.variables:
assert encoded.variables[k].dtype == actual.variables[k].dtype
assert_allclose(encoded, actual, decode_bytes=False)

@staticmethod
def _create_cf_dataset():
original = Dataset(
Expand Down Expand Up @@ -4285,7 +4314,7 @@ def test_roundtrip_coordinates_with_space(self) -> None:
def test_roundtrip_numpy_datetime_data(self) -> None:
# Override method in DatasetIOBase - remove not applicable
# save_kwargs
times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"])
times = pd.to_datetime(["2000-01-01", "2000-01-02", "NaT"], unit="ns")
expected = Dataset({"t": ("t", times), "t0": times[0]})
with self.roundtrip(expected) as actual:
assert_identical(expected, actual)
Expand Down