From 063437bce8b48fa323cc4bc75002789fa5d13d28 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 6 Mar 2025 08:07:27 -0500 Subject: [PATCH 01/33] Proof of concept literal timedelta64 coding --- xarray/coding/times.py | 15 ++++--- xarray/coding/variables.py | 75 +++++++++++++++++++++++++++++++ xarray/conventions.py | 2 + xarray/tests/test_coding_times.py | 21 ++++++++- 4 files changed, 104 insertions(+), 9 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 68369dac0d7..424a6fc6b95 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1368,13 +1368,14 @@ def __init__( def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) - - data, units = encode_cf_timedelta( - data, encoding.pop("units", None), encoding.get("dtype", None) - ) - safe_setitem(attrs, "units", units, name=name) - - return Variable(dims, data, attrs, encoding, fastpath=True) + if "units" in encoding: + data, units = encode_cf_timedelta( + data, encoding.pop("units"), encoding.get("dtype", None) + ) + safe_setitem(attrs, "units", units, name=name) + return Variable(dims, data, attrs, encoding, fastpath=True) + else: + return variable else: return variable diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 83112628dbb..fd8670fa759 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -2,6 +2,7 @@ from __future__ import annotations +import typing import warnings from collections.abc import Callable, Hashable, MutableMapping from functools import partial @@ -11,6 +12,7 @@ import pandas as pd from xarray.core import dtypes, duck_array_ops, indexing +from xarray.core.types import PDDatetimeUnitOptions from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array @@ -161,6 +163,45 @@ def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) +class Timedelta64TypeArray(indexing.ExplicitlyIndexedNDArrayMixin): + """Decode arrays on the fly from integer to np.timedelta64 datatype + + This is useful for decoding timedelta64 arrays from integer typed netCDF + variables. + + >>> x = np.array([1, 0, 1, 1, 0], dtype="int64") + + >>> x.dtype + dtype('int64') + + >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]")).dtype + dtype('timedelta64[ns]') + + >>> indexer = indexing.BasicIndexer((slice(None),)) + >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]"))[indexer].dtype + dtype('timedelta64[ns]') + """ + + __slots__ = ("_dtype", "array") + + def __init__(self, array, dtype: np.typing.DTypeLike) -> None: + self.array = indexing.as_indexable(array) + self._dtype = dtype + + @property + def dtype(self): + return np.dtype(self._dtype) + + def _oindex_get(self, key): + return np.asarray(self.array.oindex[key], dtype=self.dtype) + + def _vindex_get(self, key): + return np.asarray(self.array.vindex[key], dtype=self.dtype) + + def __getitem__(self, key) -> np.ndarray: + return np.asarray(self.array[key], dtype=self.dtype) + + def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): """Lazily apply an element-wise function to an array. Parameters @@ -738,3 +779,37 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: def decode(self, variable: Variable, name: T_Name = None) -> Variable: raise NotImplementedError() + + +class LiteralTimedelta64Coder(VariableCoder): + """Code np.timedelta64 values.""" + + def encode(self, variable: Variable, name: T_Name = None) -> Variable: + if np.issubdtype(variable.data.dtype, np.timedelta64): + dims, data, attrs, encoding = unpack_for_encoding(variable) + resolution, _ = np.datetime_data(variable.dtype) + attrs["dtype"] = f"timedelta64[{resolution}]" + data = duck_array_ops.astype(data, dtype=np.int64, copy=True) + return Variable(dims, data, attrs, encoding, fastpath=True) + else: + return variable + + def decode(self, variable: Variable, name: T_Name = None) -> Variable: + if variable.attrs.get("dtype", "").startswith("timedelta64"): + dims, data, attrs, encoding = unpack_for_decoding(variable) + # overwrite (!) dtype in encoding, and remove from attrs + # needed for correct subsequent encoding + encoding["dtype"] = attrs.pop("dtype") + dtype = np.dtype(encoding["dtype"]) + resolution, _ = np.datetime_data(dtype) + if resolution not in typing.get_args(PDDatetimeUnitOptions): + raise ValueError( + f"Following pandas, xarray only supports decoding to " + f"timedelta64 values with a resolution of 's', 'ms', " + f"'us', or 'ns'. Encoded values have a resolution of " + f"{resolution!r}." + ) + data = Timedelta64TypeArray(data, dtype) + return Variable(dims, data, attrs, encoding, fastpath=True) + else: + return variable diff --git a/xarray/conventions.py b/xarray/conventions.py index f67af95b4ce..05fc61e9210 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -92,6 +92,7 @@ def encode_cf_variable( for coder in [ CFDatetimeCoder(), CFTimedeltaCoder(), + variables.LiteralTimedelta64Coder(), variables.CFScaleOffsetCoder(), variables.CFMaskCoder(), variables.NativeEnumCoder(), @@ -238,6 +239,7 @@ def decode_cf_variable( original_dtype = var.dtype var = variables.BooleanCoder().decode(var) + var = variables.LiteralTimedelta64Coder().decode(var) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 1bc0037decc..b8140c421ba 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1512,7 +1512,7 @@ def test_roundtrip_timedelta64_nanosecond_precision( timedelta_values[2] = nat timedelta_values[4] = nat - encoding = dict(dtype=dtype, _FillValue=fill_value) + encoding = dict(dtype=dtype, _FillValue=fill_value, units="nanoseconds") var = Variable(["time"], timedelta_values, encoding=encoding) encoded_var = conventions.encode_cf_variable(var) @@ -1863,7 +1863,8 @@ def test_decode_timedelta( decode_times, decode_timedelta, expected_dtype, warns ) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) - var = Variable(["time"], timedeltas) + encoding = {"units": "days"} + var = Variable(["time"], timedeltas, encoding=encoding) encoded = conventions.encode_cf_variable(var) if warns: with pytest.warns(FutureWarning, match="decode_timedelta"): @@ -1907,3 +1908,19 @@ def test_lazy_decode_timedelta_error() -> None: ) with pytest.raises(OutOfBoundsTimedelta, match="overflow"): decoded.load() + + +def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions): + timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit) + variable = Variable(["time"], timedeltas) + encoded = conventions.encode_cf_variable(variable) + decoded = conventions.decode_cf_variable("timedeltas", encoded) + assert_identical(decoded, variable) + assert decoded.dtype == variable.dtype + + +def test_literal_timedelta_coding_resolution_error(): + attrs = {"dtype": "timedelta64[D]"} + encoded = Variable(["time"], [0, 1, 2], attrs=attrs) + with pytest.raises(ValueError, match="xarray only supports"): + conventions.decode_cf_variable("timedeltas", encoded) From 03f298860ab26352c5d5fe822e6b62305b5a88e7 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 6 Mar 2025 09:04:20 -0500 Subject: [PATCH 02/33] Ensure test_roundtrip_timedelta_data test uses old encoding pathway --- xarray/tests/test_backends.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/xarray/tests/test_backends.py b/xarray/tests/test_backends.py index a6df4d7b0cb..070496e656c 100644 --- a/xarray/tests/test_backends.py +++ b/xarray/tests/test_backends.py @@ -635,7 +635,10 @@ def test_roundtrip_timedelta_data(self) -> None: # though we cannot test that until we fix the timedelta decoding # to support large ranges time_deltas = pd.to_timedelta(["1h", "2h", "NaT"]).as_unit("s") # type: ignore[arg-type, unused-ignore] + encoding = {"units": "seconds"} expected = Dataset({"td": ("td", time_deltas), "td0": time_deltas[0]}) + expected["td"].encoding = encoding + expected["td0"].encoding = encoding with self.roundtrip( expected, open_kwargs={"decode_timedelta": CFTimedeltaCoder(time_unit="ns")} ) as actual: From bdb53d7ae1d53196eb2b21b89b4a413081d96a1a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Thu, 6 Mar 2025 19:45:35 -0500 Subject: [PATCH 03/33] Remove no longer relevant test --- xarray/tests/test_conventions.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 1d3a8bc809d..db75a754b52 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -120,7 +120,6 @@ def test_incompatible_attributes(self) -> None: Variable( ["t"], pd.date_range("2000-01-01", periods=3), {"units": "foobar"} ), - Variable(["t"], pd.to_timedelta(["1 day"]), {"units": "foobar"}), # type: ignore[arg-type, unused-ignore] Variable(["t"], [0, 1, 2], {"add_offset": 0}, {"add_offset": 2}), Variable(["t"], [0, 1, 2], {"_FillValue": 0}, {"_FillValue": 2}), ] From 00d9eaa3aba7b4c9c73f0bef6141f593bd15e9ab Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 09:47:52 -0500 Subject: [PATCH 04/33] Include units attribute --- xarray/coding/times.py | 13 ++++++++++-- xarray/coding/variables.py | 15 ++++++++------ xarray/conventions.py | 2 +- xarray/tests/test_coding_times.py | 33 ++++++++++++++++++++++++++++++- 4 files changed, 53 insertions(+), 10 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 424a6fc6b95..854a47433cc 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1349,6 +1349,11 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: return variable +def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool: + dtype = attrs_or_encoding.get("dtype", None) + return isinstance(dtype, str) and dtype.startswith("timedelta64") + + class CFTimedeltaCoder(VariableCoder): """Coder for CF Timedelta coding. @@ -1368,7 +1373,7 @@ def __init__( def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) - if "units" in encoding: + if "units" in encoding and not has_timedelta64_encoding_dtype(encoding): data, units = encode_cf_timedelta( data, encoding.pop("units"), encoding.get("dtype", None) ) @@ -1381,7 +1386,11 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: def decode(self, variable: Variable, name: T_Name = None) -> Variable: units = variable.attrs.get("units", None) - if isinstance(units, str) and units in TIME_UNITS: + if ( + isinstance(units, str) + and units in TIME_UNITS + and not has_timedelta64_encoding_dtype(variable.attrs) + ): if self._emit_decode_timedelta_future_warning: emit_user_level_warning( "In a future version of xarray decode_timedelta will " diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index fd8670fa759..115bd91b89f 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -410,7 +410,6 @@ class CFMaskCoder(VariableCoder): def encode(self, variable: Variable, name: T_Name = None): dims, data, attrs, encoding = unpack_for_encoding(variable) - dtype = np.dtype(encoding.get("dtype", data.dtype)) # from netCDF best practices # https://docs.unidata.ucar.edu/nug/current/best_practices.html#bp_Unsigned-Data @@ -786,9 +785,14 @@ class LiteralTimedelta64Coder(VariableCoder): def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): + from xarray.coding.times import _numpy_to_netcdf_timeunit + dims, data, attrs, encoding = unpack_for_encoding(variable) resolution, _ = np.datetime_data(variable.dtype) - attrs["dtype"] = f"timedelta64[{resolution}]" + dtype = f"timedelta64[{resolution}]" + units = _numpy_to_netcdf_timeunit(resolution) + safe_setitem(attrs, "dtype", dtype, name=name) + safe_setitem(attrs, "units", units, name=name) data = duck_array_ops.astype(data, dtype=np.int64, copy=True) return Variable(dims, data, attrs, encoding, fastpath=True) else: @@ -797,10 +801,9 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: def decode(self, variable: Variable, name: T_Name = None) -> Variable: if variable.attrs.get("dtype", "").startswith("timedelta64"): dims, data, attrs, encoding = unpack_for_decoding(variable) - # overwrite (!) dtype in encoding, and remove from attrs - # needed for correct subsequent encoding - encoding["dtype"] = attrs.pop("dtype") - dtype = np.dtype(encoding["dtype"]) + dtype = pop_to(attrs, encoding, "dtype", name=name) + pop_to(attrs, encoding, "units", name=name) + dtype = np.dtype(dtype) resolution, _ = np.datetime_data(dtype) if resolution not in typing.get_args(PDDatetimeUnitOptions): raise ValueError( diff --git a/xarray/conventions.py b/xarray/conventions.py index 05fc61e9210..fa2af5e8442 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -92,13 +92,13 @@ def encode_cf_variable( for coder in [ CFDatetimeCoder(), CFTimedeltaCoder(), - variables.LiteralTimedelta64Coder(), variables.CFScaleOffsetCoder(), variables.CFMaskCoder(), variables.NativeEnumCoder(), variables.NonStringCoder(), variables.DefaultFillvalueCoder(), variables.BooleanCoder(), + variables.LiteralTimedelta64Coder(), ]: var = coder.encode(var, name=name) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index b8140c421ba..8609193fffc 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1913,14 +1913,45 @@ def test_lazy_decode_timedelta_error() -> None: def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions): timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit) variable = Variable(["time"], timedeltas) + expected_dtype = f"timedelta64[{time_unit}]" + expected_units = _numpy_to_netcdf_timeunit(time_unit) + encoded = conventions.encode_cf_variable(variable) + assert encoded.attrs["dtype"] == expected_dtype + assert encoded.attrs["units"] == expected_units + decoded = conventions.decode_cf_variable("timedeltas", encoded) + assert decoded.encoding["dtype"] == expected_dtype + assert decoded.encoding["units"] == expected_units + assert_identical(decoded, variable) assert decoded.dtype == variable.dtype + reencoded = conventions.encode_cf_variable(decoded) + assert_identical(reencoded, encoded) + assert reencoded.dtype == encoded.dtype + def test_literal_timedelta_coding_resolution_error(): - attrs = {"dtype": "timedelta64[D]"} + attrs = {"dtype": "timedelta64[D]", "units": "days"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs) with pytest.raises(ValueError, match="xarray only supports"): conventions.decode_cf_variable("timedeltas", encoded) + + +@pytest.mark.parametrize("attribute", ["dtype", "units"]) +def test_literal_timedelta_decode_invalid_encoding(attribute): + attrs = {"dtype": "timedelta64[s]", "units": "seconds"} + encoding = {attribute: "foo"} + encoded = Variable(["time"], [0, 1, 2], attrs=attrs, encoding=encoding) + with pytest.raises(ValueError, match="failed to prevent"): + conventions.decode_cf_variable("timedeltas", encoded) + + +@pytest.mark.parametrize("attribute", ["dtype", "units"]) +def test_literal_timedelta_encode_invalid_attribute(attribute): + timedeltas = pd.timedelta_range(0, freq="D", periods=3) + attrs = {attribute: "foo"} + variable = Variable(["time"], timedeltas, attrs=attrs) + with pytest.raises(ValueError, match="failed to prevent"): + conventions.encode_cf_variable(variable) From b043b45af5ba832eb9474bec9531e1882836f885 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 09:55:20 -0500 Subject: [PATCH 05/33] Move coder to times.py --- xarray/coding/times.py | 80 +++++++++++++++++++++++++++++++++++++- xarray/coding/variables.py | 79 ------------------------------------- xarray/conventions.py | 6 +-- 3 files changed, 82 insertions(+), 83 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 854a47433cc..8b1d66f9ee0 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1,6 +1,7 @@ from __future__ import annotations import re +import typing import warnings from collections.abc import Callable, Hashable from datetime import datetime, timedelta @@ -20,7 +21,7 @@ unpack_for_decoding, unpack_for_encoding, ) -from xarray.core import indexing +from xarray.core import duck_array_ops, indexing from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import array_all, array_any, asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item @@ -1411,3 +1412,80 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable + + +class Timedelta64TypeArray(indexing.ExplicitlyIndexedNDArrayMixin): + """Decode arrays on the fly from integer to np.timedelta64 datatype + + This is useful for decoding timedelta64 arrays from integer typed netCDF + variables. + + >>> x = np.array([1, 0, 1, 1, 0], dtype="int64") + + >>> x.dtype + dtype('int64') + + >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]")).dtype + dtype('timedelta64[ns]') + + >>> indexer = indexing.BasicIndexer((slice(None),)) + >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]"))[indexer].dtype + dtype('timedelta64[ns]') + """ + + __slots__ = ("_dtype", "array") + + def __init__(self, array, dtype: np.typing.DTypeLike) -> None: + self.array = indexing.as_indexable(array) + self._dtype = dtype + + @property + def dtype(self): + return np.dtype(self._dtype) + + def _oindex_get(self, key): + return np.asarray(self.array.oindex[key], dtype=self.dtype) + + def _vindex_get(self, key): + return np.asarray(self.array.vindex[key], dtype=self.dtype) + + def __getitem__(self, key) -> np.ndarray: + return np.asarray(self.array[key], dtype=self.dtype) + + +class LiteralTimedelta64Coder(VariableCoder): + """Code np.timedelta64 values.""" + + def encode(self, variable: Variable, name: T_Name = None) -> Variable: + if np.issubdtype(variable.data.dtype, np.timedelta64): + from xarray.coding.times import _numpy_to_netcdf_timeunit + + dims, data, attrs, encoding = unpack_for_encoding(variable) + resolution, _ = np.datetime_data(variable.dtype) + dtype = f"timedelta64[{resolution}]" + units = _numpy_to_netcdf_timeunit(resolution) + safe_setitem(attrs, "dtype", dtype, name=name) + safe_setitem(attrs, "units", units, name=name) + data = duck_array_ops.astype(data, dtype=np.int64, copy=True) + return Variable(dims, data, attrs, encoding, fastpath=True) + else: + return variable + + def decode(self, variable: Variable, name: T_Name = None) -> Variable: + if has_timedelta64_encoding_dtype(variable.attrs): + dims, data, attrs, encoding = unpack_for_decoding(variable) + dtype = pop_to(attrs, encoding, "dtype", name=name) + pop_to(attrs, encoding, "units", name=name) + dtype = np.dtype(dtype) + resolution, _ = np.datetime_data(dtype) + if resolution not in typing.get_args(PDDatetimeUnitOptions): + raise ValueError( + f"Following pandas, xarray only supports decoding to " + f"timedelta64 values with a resolution of 's', 'ms', " + f"'us', or 'ns'. Encoded values have a resolution of " + f"{resolution!r}." + ) + data = Timedelta64TypeArray(data, dtype) + return Variable(dims, data, attrs, encoding, fastpath=True) + else: + return variable diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 115bd91b89f..1002b9048c8 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -2,7 +2,6 @@ from __future__ import annotations -import typing import warnings from collections.abc import Callable, Hashable, MutableMapping from functools import partial @@ -12,7 +11,6 @@ import pandas as pd from xarray.core import dtypes, duck_array_ops, indexing -from xarray.core.types import PDDatetimeUnitOptions from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import get_chunked_array_type from xarray.namedarray.pycompat import is_chunked_array @@ -163,45 +161,6 @@ def __getitem__(self, key) -> np.ndarray: return np.asarray(self.array[key], dtype=self.dtype) -class Timedelta64TypeArray(indexing.ExplicitlyIndexedNDArrayMixin): - """Decode arrays on the fly from integer to np.timedelta64 datatype - - This is useful for decoding timedelta64 arrays from integer typed netCDF - variables. - - >>> x = np.array([1, 0, 1, 1, 0], dtype="int64") - - >>> x.dtype - dtype('int64') - - >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]")).dtype - dtype('timedelta64[ns]') - - >>> indexer = indexing.BasicIndexer((slice(None),)) - >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]"))[indexer].dtype - dtype('timedelta64[ns]') - """ - - __slots__ = ("_dtype", "array") - - def __init__(self, array, dtype: np.typing.DTypeLike) -> None: - self.array = indexing.as_indexable(array) - self._dtype = dtype - - @property - def dtype(self): - return np.dtype(self._dtype) - - def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) - - def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) - - def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) - - def lazy_elemwise_func(array, func: Callable, dtype: np.typing.DTypeLike): """Lazily apply an element-wise function to an array. Parameters @@ -778,41 +737,3 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: def decode(self, variable: Variable, name: T_Name = None) -> Variable: raise NotImplementedError() - - -class LiteralTimedelta64Coder(VariableCoder): - """Code np.timedelta64 values.""" - - def encode(self, variable: Variable, name: T_Name = None) -> Variable: - if np.issubdtype(variable.data.dtype, np.timedelta64): - from xarray.coding.times import _numpy_to_netcdf_timeunit - - dims, data, attrs, encoding = unpack_for_encoding(variable) - resolution, _ = np.datetime_data(variable.dtype) - dtype = f"timedelta64[{resolution}]" - units = _numpy_to_netcdf_timeunit(resolution) - safe_setitem(attrs, "dtype", dtype, name=name) - safe_setitem(attrs, "units", units, name=name) - data = duck_array_ops.astype(data, dtype=np.int64, copy=True) - return Variable(dims, data, attrs, encoding, fastpath=True) - else: - return variable - - def decode(self, variable: Variable, name: T_Name = None) -> Variable: - if variable.attrs.get("dtype", "").startswith("timedelta64"): - dims, data, attrs, encoding = unpack_for_decoding(variable) - dtype = pop_to(attrs, encoding, "dtype", name=name) - pop_to(attrs, encoding, "units", name=name) - dtype = np.dtype(dtype) - resolution, _ = np.datetime_data(dtype) - if resolution not in typing.get_args(PDDatetimeUnitOptions): - raise ValueError( - f"Following pandas, xarray only supports decoding to " - f"timedelta64 values with a resolution of 's', 'ms', " - f"'us', or 'ns'. Encoded values have a resolution of " - f"{resolution!r}." - ) - data = Timedelta64TypeArray(data, dtype) - return Variable(dims, data, attrs, encoding, fastpath=True) - else: - return variable diff --git a/xarray/conventions.py b/xarray/conventions.py index fa2af5e8442..fa2888dede1 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -9,7 +9,7 @@ import numpy as np from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder -from xarray.coding import strings, variables +from xarray.coding import strings, times, variables from xarray.coding.variables import SerializationWarning, pop_to from xarray.core import indexing from xarray.core.common import ( @@ -98,7 +98,7 @@ def encode_cf_variable( variables.NonStringCoder(), variables.DefaultFillvalueCoder(), variables.BooleanCoder(), - variables.LiteralTimedelta64Coder(), + times.LiteralTimedelta64Coder(), ]: var = coder.encode(var, name=name) @@ -239,7 +239,7 @@ def decode_cf_variable( original_dtype = var.dtype var = variables.BooleanCoder().decode(var) - var = variables.LiteralTimedelta64Coder().decode(var) + var = times.LiteralTimedelta64Coder().decode(var) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) From 7f737537e2bf3d665446b4975e16cb34e17feab0 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 10:28:52 -0500 Subject: [PATCH 06/33] Add what's new entry --- doc/whats-new.rst | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 994fc70339c..fe54f55b6c9 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -34,6 +34,14 @@ New Features By `Benoit Bovy `_. - Support reading to `GPU memory with Zarr `_ (:pull:`10078`). By `Deepak Cherian `_. +- If not set to be encoded via the existing + :py:class:`coders.CFTimedeltaCoder`, automatically encode + :py:class:`numpy.timedelta64` values by converting to :py:class:`numpy.int64` + values and storing ``"dtype"`` and ``"units"`` attributes. Unlike those coded + through the :py:class:`coders.CFTimedeltaCoder`, these values will + always be decoded without a warning moving forward (:issue:`1621`, + :issue:`10099`, :pull:`10101`). By `Spencer Clark + `_. Breaking changes ~~~~~~~~~~~~~~~~ From 9ce2a244a3c837eb5731b8fbb9b92925a3b9d543 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 10:33:26 -0500 Subject: [PATCH 07/33] Restore test and reduce diff --- xarray/coding/variables.py | 1 + xarray/tests/test_conventions.py | 1 + 2 files changed, 2 insertions(+) diff --git a/xarray/coding/variables.py b/xarray/coding/variables.py index 9a68a88b11e..1b7bc95e2b4 100644 --- a/xarray/coding/variables.py +++ b/xarray/coding/variables.py @@ -266,6 +266,7 @@ def __init__( def encode(self, variable: Variable, name: T_Name = None): dims, data, attrs, encoding = unpack_for_encoding(variable) + dtype = np.dtype(encoding.get("dtype", data.dtype)) # from netCDF best practices # https://docs.unidata.ucar.edu/nug/current/best_practices.html#bp_Unsigned-Data diff --git a/xarray/tests/test_conventions.py b/xarray/tests/test_conventions.py index 7141fd5ab2b..961df78154e 100644 --- a/xarray/tests/test_conventions.py +++ b/xarray/tests/test_conventions.py @@ -120,6 +120,7 @@ def test_incompatible_attributes(self) -> None: Variable( ["t"], pd.date_range("2000-01-01", periods=3), {"units": "foobar"} ), + Variable(["t"], pd.to_timedelta(["1 day"]), {"units": "foobar"}), # type: ignore[arg-type, unused-ignore] Variable(["t"], [0, 1, 2], {"add_offset": 0}, {"add_offset": 2}), Variable(["t"], [0, 1, 2], {"_FillValue": 0}, {"_FillValue": 2}), ] From eb6e19a0359cc9ed8d2356a2f4dd6f93eb5e0bac Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 10:41:07 -0500 Subject: [PATCH 08/33] Fix typing --- xarray/coding/times.py | 2 +- xarray/tests/test_coding_times.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index affa1198b00..2d24be7c4fd 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1503,7 +1503,7 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) resolution, _ = np.datetime_data(variable.dtype) dtype = f"timedelta64[{resolution}]" - units = _numpy_to_netcdf_timeunit(resolution) + units = _numpy_dtype_to_netcdf_timeunit(variable.dtype) safe_setitem(attrs, "dtype", dtype, name=name) safe_setitem(attrs, "units", units, name=name) data = duck_array_ops.astype(data, dtype=np.int64, copy=True) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 1791f08a69e..52c9ee3572a 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1960,7 +1960,7 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None: def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions): - timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit) + timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit) # type: ignore[call-arg] variable = Variable(["time"], timedeltas) expected_dtype = f"timedelta64[{time_unit}]" expected_units = _numpy_to_netcdf_timeunit(time_unit) From 436e588f2a830b9b474bceb1f3c047ef251a43e1 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Sat, 8 Mar 2025 15:41:34 +0000 Subject: [PATCH 09/33] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/coding/times.py | 1 - 1 file changed, 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 2d24be7c4fd..ad50f9facad 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1498,7 +1498,6 @@ class LiteralTimedelta64Coder(VariableCoder): def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): - from xarray.coding.times import _numpy_to_netcdf_timeunit dims, data, attrs, encoding = unpack_for_encoding(variable) resolution, _ = np.datetime_data(variable.dtype) From a305238acce29a0b54599bb77aecad40ad6f50c1 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 10:45:06 -0500 Subject: [PATCH 10/33] Fix doctests --- xarray/coding/times.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index ad50f9facad..f3419e6646d 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1466,11 +1466,11 @@ class Timedelta64TypeArray(indexing.ExplicitlyIndexedNDArrayMixin): dtype('int64') >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]")).dtype - dtype('timedelta64[ns]') + dtype('>> indexer = indexing.BasicIndexer((slice(None),)) >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]"))[indexer].dtype - dtype('timedelta64[ns]') + dtype(' Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): - dims, data, attrs, encoding = unpack_for_encoding(variable) resolution, _ = np.datetime_data(variable.dtype) dtype = f"timedelta64[{resolution}]" From b406c642cfb8dd510a3792730b7644cd7175b870 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 11:25:03 -0500 Subject: [PATCH 11/33] Restore original order of encoders --- xarray/coding/times.py | 3 +++ xarray/conventions.py | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f3419e6646d..f9d00e644a2 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1504,6 +1504,9 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: units = _numpy_dtype_to_netcdf_timeunit(variable.dtype) safe_setitem(attrs, "dtype", dtype, name=name) safe_setitem(attrs, "units", units, name=name) + # Remove dtype encoding if it exists to prevent it from interfering + # downstream in NonStringCoder. + encoding.pop("dtype", None) data = duck_array_ops.astype(data, dtype=np.int64, copy=True) return Variable(dims, data, attrs, encoding, fastpath=True) else: diff --git a/xarray/conventions.py b/xarray/conventions.py index d7bf646d808..30521138e80 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -92,13 +92,13 @@ def encode_cf_variable( for coder in [ CFDatetimeCoder(), CFTimedeltaCoder(), + times.LiteralTimedelta64Coder(), variables.CFScaleOffsetCoder(), variables.CFMaskCoder(), variables.NativeEnumCoder(), variables.NonStringCoder(), variables.DefaultFillvalueCoder(), variables.BooleanCoder(), - times.LiteralTimedelta64Coder(), ]: var = coder.encode(var, name=name) From a21b13753d1035117472056d5944a7c04b7ec181 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 17:24:08 -0500 Subject: [PATCH 12/33] Add return types to tests --- xarray/tests/test_coding_times.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 52c9ee3572a..950b1414466 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1959,7 +1959,7 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None: decoded.load() -def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions): +def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit) # type: ignore[call-arg] variable = Variable(["time"], timedeltas) expected_dtype = f"timedelta64[{time_unit}]" @@ -1981,7 +1981,7 @@ def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions): assert reencoded.dtype == encoded.dtype -def test_literal_timedelta_coding_resolution_error(): +def test_literal_timedelta_coding_resolution_error() -> None: attrs = {"dtype": "timedelta64[D]", "units": "days"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs) with pytest.raises(ValueError, match="xarray only supports"): @@ -1989,7 +1989,7 @@ def test_literal_timedelta_coding_resolution_error(): @pytest.mark.parametrize("attribute", ["dtype", "units"]) -def test_literal_timedelta_decode_invalid_encoding(attribute): +def test_literal_timedelta_decode_invalid_encoding(attribute) -> None: attrs = {"dtype": "timedelta64[s]", "units": "seconds"} encoding = {attribute: "foo"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs, encoding=encoding) @@ -1998,7 +1998,7 @@ def test_literal_timedelta_decode_invalid_encoding(attribute): @pytest.mark.parametrize("attribute", ["dtype", "units"]) -def test_literal_timedelta_encode_invalid_attribute(attribute): +def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) attrs = {attribute: "foo"} variable = Variable(["time"], timedeltas, attrs=attrs) From 5108b02bc188a5fb04813b25c01de60328d4b56c Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 18:40:49 -0500 Subject: [PATCH 13/33] Move everything to CFTimedeltaCoder; reuse code where possible --- xarray/coding/times.py | 145 +++++++++++------------------------------ xarray/conventions.py | 4 +- 2 files changed, 39 insertions(+), 110 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f9d00e644a2..163e503a524 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -21,7 +21,7 @@ unpack_for_decoding, unpack_for_encoding, ) -from xarray.core import duck_array_ops, indexing +from xarray.core import indexing from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import array_all, array_any, asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item @@ -1400,6 +1400,7 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: dims, data, attrs, encoding = unpack_for_encoding(variable) if "units" in encoding and not has_timedelta64_encoding_dtype(encoding): dtype = encoding.pop("dtype", None) + units = encoding.pop("units", None) # in the case of packed data we need to encode into # float first, the correct dtype will be established @@ -1409,124 +1410,54 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: set_dtype_encoding = dtype dtype = data.dtype if data.dtype.kind == "f" else "float64" - data, units = encode_cf_timedelta( - data, encoding.pop("units", None), dtype - ) - # retain dtype for packed data if set_dtype_encoding is not None: safe_setitem(encoding, "dtype", set_dtype_encoding, name=name) - - safe_setitem(attrs, "units", units, name=name) - - return Variable(dims, data, attrs, encoding, fastpath=True) else: - return variable + resolution, _ = np.datetime_data(variable.dtype) + dtype = np.int64 + attrs_dtype = f"timedelta64[{resolution}]" + units = _numpy_dtype_to_netcdf_timeunit(variable.dtype) + safe_setitem(attrs, "dtype", attrs_dtype, name=name) + # Remove dtype encoding if it exists to prevent it from + # interfering downstream in NonStringCoder. + encoding.pop("dtype", None) + data, units = encode_cf_timedelta(data, units, dtype) + safe_setitem(attrs, "units", units, name=name) + return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable def decode(self, variable: Variable, name: T_Name = None) -> Variable: units = variable.attrs.get("units", None) - if ( - isinstance(units, str) - and units in TIME_UNITS - and not has_timedelta64_encoding_dtype(variable.attrs) - ): - if self._emit_decode_timedelta_future_warning: - emit_user_level_warning( - "In a future version of xarray decode_timedelta will " - "default to False rather than None. To silence this " - "warning, set decode_timedelta to True, False, or a " - "'CFTimedeltaCoder' instance.", - FutureWarning, - ) + if isinstance(units, str) and units in TIME_UNITS: dims, data, attrs, encoding = unpack_for_decoding(variable) - units = pop_to(attrs, encoding, "units") - dtype = np.dtype(f"timedelta64[{self.time_unit}]") - transform = partial( - decode_cf_timedelta, units=units, time_unit=self.time_unit - ) + if has_timedelta64_encoding_dtype(variable.attrs): + dtype = pop_to(attrs, encoding, "dtype", name=name) + dtype = np.dtype(dtype) + resolution, _ = np.datetime_data(dtype) + if resolution not in typing.get_args(PDDatetimeUnitOptions): + raise ValueError( + f"Following pandas, xarray only supports decoding to " + f"timedelta64 values with a resolution of 's', 'ms', " + f"'us', or 'ns'. Encoded values have a resolution of " + f"{resolution!r}." + ) + time_unit = resolution + else: + if self._emit_decode_timedelta_future_warning: + emit_user_level_warning( + "In a future version of xarray decode_timedelta will " + "default to False rather than None. To silence this " + "warning, set decode_timedelta to True, False, or a " + "'CFTimedeltaCoder' instance.", + FutureWarning, + ) + dtype = np.dtype(f"timedelta64[{self.time_unit}]") + time_unit = self.time_unit + transform = partial(decode_cf_timedelta, units=units, time_unit=time_unit) data = lazy_elemwise_func(data, transform, dtype=dtype) - - return Variable(dims, data, attrs, encoding, fastpath=True) - else: - return variable - - -class Timedelta64TypeArray(indexing.ExplicitlyIndexedNDArrayMixin): - """Decode arrays on the fly from integer to np.timedelta64 datatype - - This is useful for decoding timedelta64 arrays from integer typed netCDF - variables. - - >>> x = np.array([1, 0, 1, 1, 0], dtype="int64") - - >>> x.dtype - dtype('int64') - - >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]")).dtype - dtype('>> indexer = indexing.BasicIndexer((slice(None),)) - >>> Timedelta64TypeArray(x, np.dtype("timedelta64[ns]"))[indexer].dtype - dtype(' None: - self.array = indexing.as_indexable(array) - self._dtype = dtype - - @property - def dtype(self): - return np.dtype(self._dtype) - - def _oindex_get(self, key): - return np.asarray(self.array.oindex[key], dtype=self.dtype) - - def _vindex_get(self, key): - return np.asarray(self.array.vindex[key], dtype=self.dtype) - - def __getitem__(self, key) -> np.ndarray: - return np.asarray(self.array[key], dtype=self.dtype) - - -class LiteralTimedelta64Coder(VariableCoder): - """Code np.timedelta64 values.""" - - def encode(self, variable: Variable, name: T_Name = None) -> Variable: - if np.issubdtype(variable.data.dtype, np.timedelta64): - dims, data, attrs, encoding = unpack_for_encoding(variable) - resolution, _ = np.datetime_data(variable.dtype) - dtype = f"timedelta64[{resolution}]" - units = _numpy_dtype_to_netcdf_timeunit(variable.dtype) - safe_setitem(attrs, "dtype", dtype, name=name) - safe_setitem(attrs, "units", units, name=name) - # Remove dtype encoding if it exists to prevent it from interfering - # downstream in NonStringCoder. - encoding.pop("dtype", None) - data = duck_array_ops.astype(data, dtype=np.int64, copy=True) - return Variable(dims, data, attrs, encoding, fastpath=True) - else: - return variable - - def decode(self, variable: Variable, name: T_Name = None) -> Variable: - if has_timedelta64_encoding_dtype(variable.attrs): - dims, data, attrs, encoding = unpack_for_decoding(variable) - dtype = pop_to(attrs, encoding, "dtype", name=name) - pop_to(attrs, encoding, "units", name=name) - dtype = np.dtype(dtype) - resolution, _ = np.datetime_data(dtype) - if resolution not in typing.get_args(PDDatetimeUnitOptions): - raise ValueError( - f"Following pandas, xarray only supports decoding to " - f"timedelta64 values with a resolution of 's', 'ms', " - f"'us', or 'ns'. Encoded values have a resolution of " - f"{resolution!r}." - ) - data = Timedelta64TypeArray(data, dtype) return Variable(dims, data, attrs, encoding, fastpath=True) else: return variable diff --git a/xarray/conventions.py b/xarray/conventions.py index 30521138e80..071dab43c28 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -9,7 +9,7 @@ import numpy as np from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder -from xarray.coding import strings, times, variables +from xarray.coding import strings, variables from xarray.coding.variables import SerializationWarning, pop_to from xarray.core import indexing from xarray.core.common import ( @@ -92,7 +92,6 @@ def encode_cf_variable( for coder in [ CFDatetimeCoder(), CFTimedeltaCoder(), - times.LiteralTimedelta64Coder(), variables.CFScaleOffsetCoder(), variables.CFMaskCoder(), variables.NativeEnumCoder(), @@ -243,7 +242,6 @@ def decode_cf_variable( original_dtype = var.dtype var = variables.BooleanCoder().decode(var) - var = times.LiteralTimedelta64Coder().decode(var) dimensions, data, attributes, encoding = variables.unpack_for_decoding(var) From 452968c81c878059c900461d9eaafd0ff9c98a7a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 19:00:57 -0500 Subject: [PATCH 14/33] Fix mypy --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 163e503a524..65c4f56fb1b 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1444,7 +1444,7 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: f"'us', or 'ns'. Encoded values have a resolution of " f"{resolution!r}." ) - time_unit = resolution + time_unit = cast(PDDatetimeUnitOptions, resolution) else: if self._emit_decode_timedelta_future_warning: emit_user_level_warning( From 503db4a6793c4ba3e72e3a157db33baa28c3b6a7 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 8 Mar 2025 19:21:18 -0500 Subject: [PATCH 15/33] Use Kai's offset and scale_factor logic for all encoding --- xarray/coding/times.py | 25 +++++++++++++------------ xarray/tests/test_coding_times.py | 14 ++++++++++++++ 2 files changed, 27 insertions(+), 12 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 65c4f56fb1b..5b355cc9b32 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1401,18 +1401,6 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if "units" in encoding and not has_timedelta64_encoding_dtype(encoding): dtype = encoding.pop("dtype", None) units = encoding.pop("units", None) - - # in the case of packed data we need to encode into - # float first, the correct dtype will be established - # via CFScaleOffsetCoder/CFMaskCoder - set_dtype_encoding = None - if "add_offset" in encoding or "scale_factor" in encoding: - set_dtype_encoding = dtype - dtype = data.dtype if data.dtype.kind == "f" else "float64" - - # retain dtype for packed data - if set_dtype_encoding is not None: - safe_setitem(encoding, "dtype", set_dtype_encoding, name=name) else: resolution, _ = np.datetime_data(variable.dtype) dtype = np.int64 @@ -1422,6 +1410,19 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: # Remove dtype encoding if it exists to prevent it from # interfering downstream in NonStringCoder. encoding.pop("dtype", None) + + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + set_dtype_encoding = None + if "add_offset" in encoding or "scale_factor" in encoding: + set_dtype_encoding = dtype + dtype = data.dtype if data.dtype.kind == "f" else "float64" + + # retain dtype for packed data + if set_dtype_encoding is not None: + safe_setitem(encoding, "dtype", set_dtype_encoding, name=name) + data, units = encode_cf_timedelta(data, units, dtype) safe_setitem(attrs, "units", units, name=name) return Variable(dims, data, attrs, encoding, fastpath=True) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 950b1414466..387b1304aa3 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2004,3 +2004,17 @@ def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: variable = Variable(["time"], timedeltas, attrs=attrs) with pytest.raises(ValueError, match="failed to prevent"): conventions.encode_cf_variable(variable) + + +def test_literal_timedelta_coding_mask_and_scale() -> None: + attrs = { + "units": "nanoseconds", + "dtype": "timedelta64[ns]", + "_FillValue": np.int16(-1), + "add_offset": 100000.0, + } + encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) + decoded = conventions.decode_cf_variable("foo", encoded) + result = conventions.encode_cf_variable(decoded, name="foo") + assert_identical(encoded, result) + assert encoded.dtype == result.dtype From 56f55e2662f1414ac956f18fde3714ec16c22def Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Fri, 21 Mar 2025 20:49:12 -0400 Subject: [PATCH 16/33] Fix bad merge --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index d667724af89..86ee2a10cbf 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1420,7 +1420,7 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) if "units" in encoding and not has_timedelta64_encoding_dtype(encoding): - dtype = encoding.pop("dtype", None) + dtype = encoding.get("dtype", None) units = encoding.pop("units", None) else: resolution, _ = np.datetime_data(variable.dtype) From c5e7de997d854b20127d92936c5c9f23df616942 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 10:33:41 -0400 Subject: [PATCH 17/33] Forbid mixing other encoding with literal timedelta64 encoding --- xarray/coding/times.py | 35 ++++++++++++++++++++++++++----- xarray/tests/test_coding_times.py | 20 +++++++----------- 2 files changed, 38 insertions(+), 17 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 86ee2a10cbf..134ace96b02 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -93,6 +93,14 @@ ) +_INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS = [ + "_FillValue", + "missing_value", + "add_offset", + "scale_factor", +] + + def _is_standard_calendar(calendar: str) -> bool: return calendar.lower() in _STANDARD_CALENDARS @@ -1422,6 +1430,13 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: if "units" in encoding and not has_timedelta64_encoding_dtype(encoding): dtype = encoding.get("dtype", None) units = encoding.pop("units", None) + + # in the case of packed data we need to encode into + # float first, the correct dtype will be established + # via CFScaleOffsetCoder/CFMaskCoder + if "add_offset" in encoding or "scale_factor" in encoding: + dtype = data.dtype if data.dtype.kind == "f" else "float64" + else: resolution, _ = np.datetime_data(variable.dtype) dtype = np.int64 @@ -1432,11 +1447,21 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: # interfering downstream in NonStringCoder. encoding.pop("dtype", None) - # in the case of packed data we need to encode into - # float first, the correct dtype will be established - # via CFScaleOffsetCoder/CFMaskCoder - if "add_offset" in encoding or "scale_factor" in encoding: - dtype = data.dtype if data.dtype.kind == "f" else "float64" + if any( + k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS + ): + raise ValueError( + f"Specifying '_FillValue', 'missing_value', " + f"'add_offset', or 'scale_factor' is not supported " + f"when literally encoding the np.timedelta64 values " + f"of variable {name!r}. To encode {name!r} with such " + f"encoding parameters, additionally set " + f"encoding['units'] to a unit of time, e.g. " + f"'seconds'. To proceed with literal np.timedelta64 " + f"encoding of {name!r}, remove any encoding entries " + f"for '_FillValue', 'missing_value', 'add_offset', " + f"or 'scale_factor'." + ) data, units = encode_cf_timedelta(data, units, dtype) safe_setitem(attrs, "units", units, name=name) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index b64255226a7..474322160e9 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -20,6 +20,7 @@ ) from xarray.coders import CFDatetimeCoder, CFTimedeltaCoder from xarray.coding.times import ( + _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS, _encode_datetime_with_cftime, _netcdf_to_numpy_timeunit, _numpy_to_netcdf_timeunit, @@ -2009,15 +2010,10 @@ def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: conventions.encode_cf_variable(variable) -def test_literal_timedelta_coding_mask_and_scale() -> None: - attrs = { - "units": "nanoseconds", - "dtype": "timedelta64[ns]", - "_FillValue": np.int16(-1), - "add_offset": 100000.0, - } - encoded = Variable(["time"], np.array([0, -1, 1], "int16"), attrs=attrs) - decoded = conventions.decode_cf_variable("foo", encoded) - result = conventions.encode_cf_variable(decoded, name="foo") - assert_identical(encoded, result) - assert encoded.dtype == result.dtype +@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) +def test_literal_timedelta_encoding_mask_and_scale_error(invalid_key) -> None: + encoding = {invalid_key: 1.0} + timedeltas = pd.timedelta_range(0, freq="D", periods=3) + variable = Variable(["time"], timedeltas, encoding=encoding) + with pytest.raises(ValueError, match=invalid_key): + conventions.encode_cf_variable(variable, name="foo") From d1744aff7ac1505e2c2a615fb5665522845e31b2 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 12:45:09 -0400 Subject: [PATCH 18/33] Expose fine-grained control over decoding pathways --- xarray/coding/times.py | 51 ++++++++++++++++++++++++++----- xarray/conventions.py | 6 ++-- xarray/tests/test_coding_times.py | 49 ++++++++++++++++++++++++++++- 3 files changed, 96 insertions(+), 10 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 134ace96b02..522d9330808 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1414,14 +1414,27 @@ class CFTimedeltaCoder(VariableCoder): Parameters ---------- time_unit : PDDatetimeUnitOptions - Target resolution when decoding timedeltas. Defaults to "ns". + Target resolution when decoding timedeltas via units. Defaults to "ns". + When decoding via dtype, the resolution is specified in the dtype + attribute, so this parameter is ignored. + decode_via_units : bool + Whether to decode timedeltas based on the presence of a timedelta-like + units attribute, e.g. "seconds". Defaults to True, but in the future + will default to False. + decode_via_dtype : bool + Whether to decode timedeltas based on the presence of a np.timedelta64 + dtype attribute, e.g. "np.datetime64[s]". Defaults to True. """ def __init__( self, time_unit: PDDatetimeUnitOptions = "ns", + decode_via_units: bool = True, + decode_via_dtype: bool = True, ) -> None: self.time_unit = time_unit + self.decode_via_units = decode_via_units + self.decode_via_dtype = decode_via_dtype self._emit_decode_timedelta_future_warning = False def encode(self, variable: Variable, name: T_Name = None) -> Variable: @@ -1471,10 +1484,25 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: def decode(self, variable: Variable, name: T_Name = None) -> Variable: units = variable.attrs.get("units", None) - if isinstance(units, str) and units in TIME_UNITS: + has_timedelta_units = isinstance(units, str) and units in TIME_UNITS + has_timedelta_dtype = has_timedelta64_encoding_dtype(variable.attrs) + is_dtype_decodable = has_timedelta_units and has_timedelta_dtype + is_units_decodable = has_timedelta_units + if (is_dtype_decodable and self.decode_via_dtype) or ( + is_units_decodable and self.decode_via_units + ): dims, data, attrs, encoding = unpack_for_decoding(variable) units = pop_to(attrs, encoding, "units") - if has_timedelta64_encoding_dtype(variable.attrs): + if is_dtype_decodable and self.decode_via_dtype: + if any( + k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS + ): + raise ValueError( + "Decoding np.timedelta64 values via dtype is not " + "supported when '_FillValue', 'missing_value', " + "'add_offset', or 'scale_factor' are present in " + "encoding." + ) dtype = pop_to(attrs, encoding, "dtype", name=name) dtype = np.dtype(dtype) resolution, _ = np.datetime_data(dtype) @@ -1486,12 +1514,21 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: f"{resolution!r}." ) time_unit = cast(PDDatetimeUnitOptions, resolution) - else: + elif self.decode_via_units: if self._emit_decode_timedelta_future_warning: emit_user_level_warning( - "In a future version of xarray decode_timedelta will " - "default to False rather than None. To silence this " - "warning, set decode_timedelta to True, False, or a " + "In a future version, xarray will not decode " + "timedelta values based on the presence of a " + "timedelta-like units attribute by default. Instead " + "it will rely on the presence of a np.timedelta64 " + "dtype attribute, which is now xarray's default way " + "of encoding np.timedelta64 values. To continue " + "decoding timedeltas based on the presence of a " + "timedelta-like units attribute, users will need to " + "explicitly opt-in by passing True or " + "CFTimedeltaCoder(decode_via_units=True) to " + "decode_timedelta. To silence this warning, set " + "decode_timedelta to True, False, or a " "'CFTimedeltaCoder' instance.", FutureWarning, ) diff --git a/xarray/conventions.py b/xarray/conventions.py index 071dab43c28..ab41804fd62 100644 --- a/xarray/conventions.py +++ b/xarray/conventions.py @@ -204,8 +204,10 @@ def decode_cf_variable( var = coder.decode(var, name=name) if decode_timedelta: - if not isinstance(decode_timedelta, CFTimedeltaCoder): - decode_timedelta = CFTimedeltaCoder() + if isinstance(decode_timedelta, bool): + decode_timedelta = CFTimedeltaCoder( + decode_via_units=decode_timedelta, decode_via_dtype=decode_timedelta + ) decode_timedelta._emit_decode_timedelta_future_warning = ( decode_timedelta_was_none ) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 474322160e9..b3c0c6f6ce9 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2016,4 +2016,51 @@ def test_literal_timedelta_encoding_mask_and_scale_error(invalid_key) -> None: timedeltas = pd.timedelta_range(0, freq="D", periods=3) variable = Variable(["time"], timedeltas, encoding=encoding) with pytest.raises(ValueError, match=invalid_key): - conventions.encode_cf_variable(variable, name="foo") + conventions.encode_cf_variable(variable) + + +@pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) +def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None: + attrs = {invalid_key: 1.0, "dtype": "timedelta64[s]", "units": "seconds"} + variable = Variable(["time"], [0, 1, 2], attrs=attrs) + with pytest.raises(ValueError, match=invalid_key): + conventions.decode_cf_variable("foo", variable) + + +@pytest.mark.parametrize( + ("decode_via_units", "decode_via_dtype", "attrs", "expect_timedelta64"), + [ + (True, True, {"units": "seconds"}, True), + (True, False, {"units": "seconds"}, True), + (False, True, {"units": "seconds"}, False), + (False, False, {"units": "seconds"}, False), + (True, True, {"dtype": "timedelta64[s]", "units": "seconds"}, True), + (True, False, {"dtype": "timedelta64[s]", "units": "seconds"}, True), + (False, True, {"dtype": "timedelta64[s]", "units": "seconds"}, True), + (False, False, {"dtype": "timedelta64[s]", "units": "seconds"}, False), + ], + ids=lambda x: f"{x!r}", +) +def test_timedelta_coding_options( + decode_via_units, decode_via_dtype, attrs, expect_timedelta64 +) -> None: + array = np.array([0, 1, 2], dtype=np.int64) + encoded = Variable(["time"], array, attrs=attrs) + + # Confirm we decode to the expected dtype. + decode_timedelta = CFTimedeltaCoder( + time_unit="s", + decode_via_units=decode_via_units, + decode_via_dtype=decode_via_dtype, + ) + decoded = conventions.decode_cf_variable( + "foo", encoded, decode_timedelta=decode_timedelta + ) + if expect_timedelta64: + assert decoded.dtype == np.dtype("timedelta64[s]") + else: + assert decoded.dtype == np.dtype("int64") + + # Confirm we exactly roundtrip. + reencoded = conventions.encode_cf_variable(decoded) + assert_identical(reencoded, encoded) From 7c7b07119bfc53624eb8b907794668542347ce2a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 12:46:17 -0400 Subject: [PATCH 19/33] Rename test --- xarray/tests/test_coding_times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index b3c0c6f6ce9..798c0a10e23 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2041,7 +2041,7 @@ def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None: ], ids=lambda x: f"{x!r}", ) -def test_timedelta_coding_options( +def test_timedelta_decoding_options( decode_via_units, decode_via_dtype, attrs, expect_timedelta64 ) -> None: array = np.array([0, 1, 2], dtype=np.int64) From da1edc4f3c22995bd47ad10b3b22488859ecbfaf Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 12:47:17 -0400 Subject: [PATCH 20/33] Use consistent dtype spelling --- xarray/tests/test_coding_times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 798c0a10e23..b830a887fd2 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2044,7 +2044,7 @@ def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None: def test_timedelta_decoding_options( decode_via_units, decode_via_dtype, attrs, expect_timedelta64 ) -> None: - array = np.array([0, 1, 2], dtype=np.int64) + array = np.array([0, 1, 2], dtype=np.dtype("int64")) encoded = Variable(["time"], array, attrs=attrs) # Confirm we decode to the expected dtype. From 2bb4b997a085ef42fb679471985a87cb73694fc9 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 13:15:08 -0400 Subject: [PATCH 21/33] Continue supporting non-timedelta dtype-only encoding --- xarray/coding/times.py | 3 ++- xarray/tests/test_coding_times.py | 17 +++++++++++++++++ 2 files changed, 19 insertions(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 522d9330808..aaab26702c5 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1440,7 +1440,8 @@ def __init__( def encode(self, variable: Variable, name: T_Name = None) -> Variable: if np.issubdtype(variable.data.dtype, np.timedelta64): dims, data, attrs, encoding = unpack_for_encoding(variable) - if "units" in encoding and not has_timedelta64_encoding_dtype(encoding): + has_timedelta_dtype = has_timedelta64_encoding_dtype(encoding) + if ("units" in encoding or "dtype" in encoding) and not has_timedelta_dtype: dtype = encoding.get("dtype", None) units = encoding.pop("units", None) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index b830a887fd2..b6a601cad68 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2064,3 +2064,20 @@ def test_timedelta_decoding_options( # Confirm we exactly roundtrip. reencoded = conventions.encode_cf_variable(decoded) assert_identical(reencoded, encoded) + + +def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: + encoding = {"dtype": np.dtype("int32")} + timedeltas = pd.timedelta_range(0, freq="D", periods=3) + variable = Variable(["time"], timedeltas, encoding=encoding) + + encoded = conventions.encode_cf_variable(variable) + assert encoded.attrs["units"] == "days" + assert encoded.dtype == np.dtype("int32") + + with pytest.warns(FutureWarning, match="timedelta"): + decoded = conventions.decode_cf_variable("foo", encoded) + assert_identical(decoded, variable) + + reencoded = conventions.encode_cf_variable(decoded) + assert_identical(reencoded, encoded) From 0220ed5084a68d1d9969ed506a757b44a1e4d5fa Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 13:18:46 -0400 Subject: [PATCH 22/33] Fix example attribute in docstring --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index aaab26702c5..f9a3037e3d2 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1423,7 +1423,7 @@ class CFTimedeltaCoder(VariableCoder): will default to False. decode_via_dtype : bool Whether to decode timedeltas based on the presence of a np.timedelta64 - dtype attribute, e.g. "np.datetime64[s]". Defaults to True. + dtype attribute, e.g. "datetime64[s]". Defaults to True. """ def __init__( From c83fcb39325e6104551a60ce63ce7fca18bc0db7 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 13:34:54 -0400 Subject: [PATCH 23/33] Update what's new --- doc/whats-new.rst | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9034e42504d..5b912f81d6f 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -21,7 +21,16 @@ v2025.03.1 (unreleased) New Features ~~~~~~~~~~~~ - +- By default xarray now encodes :py:class:`numpy.timedelta64` values by + converting to :py:class:`numpy.int64` values and storing ``"dtype"`` and + ``"units"`` attributes consistent with the dtype of the in-memory + :py:class:`numpy.timedelta64` values, e.g. for ``"timedelta64[s]"`` and + ``"seconds"`` for second-resolution timedeltas. These values will always be + decoded to timedeltas without a warning moving forward. Timedeltas encoded + via the previous approach can still be roundtripped exactly, but in the + future will not be decoded by default (:issue:`1621`, + :issue:`10099`, :pull:`10101`). By `Spencer Clark + `_. Breaking changes ~~~~~~~~~~~~~~~~ @@ -79,14 +88,6 @@ New Features (:pull:`9498`). By `Spencer Clark `_. - Support reading to `GPU memory with Zarr `_ (:pull:`10078`). By `Deepak Cherian `_. -- If not set to be encoded via the existing - :py:class:`coders.CFTimedeltaCoder`, automatically encode - :py:class:`numpy.timedelta64` values by converting to :py:class:`numpy.int64` - values and storing ``"dtype"`` and ``"units"`` attributes. Unlike those coded - through the :py:class:`coders.CFTimedeltaCoder`, these values will - always be decoded without a warning moving forward (:issue:`1621`, - :issue:`10099`, :pull:`10101`). By `Spencer Clark - `_. Performance ~~~~~~~~~~~ From d1e8a5e1bb8111659df3cba047acfa2414a89a92 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 13:39:27 -0400 Subject: [PATCH 24/33] Fix typo --- doc/whats-new.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 5b912f81d6f..b952804025d 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -24,13 +24,12 @@ New Features - By default xarray now encodes :py:class:`numpy.timedelta64` values by converting to :py:class:`numpy.int64` values and storing ``"dtype"`` and ``"units"`` attributes consistent with the dtype of the in-memory - :py:class:`numpy.timedelta64` values, e.g. for ``"timedelta64[s]"`` and + :py:class:`numpy.timedelta64` values, e.g. ``"timedelta64[s]"`` and ``"seconds"`` for second-resolution timedeltas. These values will always be decoded to timedeltas without a warning moving forward. Timedeltas encoded via the previous approach can still be roundtripped exactly, but in the - future will not be decoded by default (:issue:`1621`, - :issue:`10099`, :pull:`10101`). By `Spencer Clark - `_. + future will not be decoded by default (:issue:`1621`, :issue:`10099`, + :pull:`10101`). By `Spencer Clark `_. Breaking changes ~~~~~~~~~~~~~~~~ From 7b94d35b8fcaee0a17c2d031eefb311fd262fb31 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 14:48:09 -0400 Subject: [PATCH 25/33] Complete test --- xarray/tests/test_coding_times.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index b6a601cad68..7e1d114ece6 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2081,3 +2081,5 @@ def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: reencoded = conventions.encode_cf_variable(decoded) assert_identical(reencoded, encoded) + assert encoded.attrs["units"] == "days" + assert encoded.dtype == np.dtype("int32") From f269e68a2cf4afa357d40c943f8d71d35b6279ef Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sat, 22 Mar 2025 15:25:10 -0400 Subject: [PATCH 26/33] Fix docstring --- xarray/coding/times.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index f9a3037e3d2..60d9f307339 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1423,7 +1423,7 @@ class CFTimedeltaCoder(VariableCoder): will default to False. decode_via_dtype : bool Whether to decode timedeltas based on the presence of a np.timedelta64 - dtype attribute, e.g. "datetime64[s]". Defaults to True. + dtype attribute, e.g. "timedelta64[s]". Defaults to True. """ def __init__( From 46169ab7ee5cf5376a23f7cf1d5279fa8881dd61 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 6 Apr 2025 10:36:13 -0400 Subject: [PATCH 27/33] Support _FillValue or missing_value encoding --- xarray/coding/times.py | 27 +++++++++---------- xarray/tests/test_coding_times.py | 43 ++++++++++++++++++++++++++++--- 2 files changed, 52 insertions(+), 18 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 60d9f307339..c0839ff403f 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -94,8 +94,6 @@ _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS = [ - "_FillValue", - "missing_value", "add_offset", "scale_factor", ] @@ -1465,17 +1463,17 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS ): raise ValueError( - f"Specifying '_FillValue', 'missing_value', " - f"'add_offset', or 'scale_factor' is not supported " - f"when literally encoding the np.timedelta64 values " - f"of variable {name!r}. To encode {name!r} with such " - f"encoding parameters, additionally set " - f"encoding['units'] to a unit of time, e.g. " - f"'seconds'. To proceed with literal np.timedelta64 " - f"encoding of {name!r}, remove any encoding entries " - f"for '_FillValue', 'missing_value', 'add_offset', " - f"or 'scale_factor'." + f"Specifying 'add_offset' or 'scale_factor' is not " + f"supported when literally encoding the " + f"np.timedelta64 values of variable {name!r}. To " + f"encode {name!r} with such encoding parameters, " + f"additionally set encoding['units'] to a unit of " + f"time, e.g. 'seconds'. To proceed with literal " + f"np.timedelta64 encoding of {name!r}, remove any " + f"encoding entries for 'add_offset' or 'scale_factor'." ) + if "_FillValue" not in encoding and "missing_value" not in encoding: + encoding["_FillValue"] = np.iinfo(np.int64).min data, units = encode_cf_timedelta(data, units, dtype) safe_setitem(attrs, "units", units, name=name) @@ -1500,9 +1498,8 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: ): raise ValueError( "Decoding np.timedelta64 values via dtype is not " - "supported when '_FillValue', 'missing_value', " - "'add_offset', or 'scale_factor' are present in " - "encoding." + "supported when 'add_offset', or 'scale_factor' are " + "present in encoding." ) dtype = pop_to(attrs, encoding, "dtype", name=name) dtype = np.dtype(dtype) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 7e1d114ece6..4cacd7a834c 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1964,7 +1964,7 @@ def test_decode_floating_point_timedelta_no_serialization_warning() -> None: def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: - timedeltas = pd.timedelta_range(0, freq="D", periods=3, unit=time_unit) # type: ignore[call-arg] + timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") variable = Variable(["time"], timedeltas) expected_dtype = f"timedelta64[{time_unit}]" expected_units = _numpy_to_netcdf_timeunit(time_unit) @@ -1972,6 +1972,7 @@ def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: encoded = conventions.encode_cf_variable(variable) assert encoded.attrs["dtype"] == expected_dtype assert encoded.attrs["units"] == expected_units + assert encoded.attrs["_FillValue"] == np.iinfo(np.int64).min decoded = conventions.decode_cf_variable("timedeltas", encoded) assert decoded.encoding["dtype"] == expected_dtype @@ -2011,7 +2012,7 @@ def test_literal_timedelta_encode_invalid_attribute(attribute) -> None: @pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) -def test_literal_timedelta_encoding_mask_and_scale_error(invalid_key) -> None: +def test_literal_timedelta_encoding_invalid_key_error(invalid_key) -> None: encoding = {invalid_key: 1.0} timedeltas = pd.timedelta_range(0, freq="D", periods=3) variable = Variable(["time"], timedeltas, encoding=encoding) @@ -2020,7 +2021,7 @@ def test_literal_timedelta_encoding_mask_and_scale_error(invalid_key) -> None: @pytest.mark.parametrize("invalid_key", _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS) -def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None: +def test_literal_timedelta_decoding_invalid_key_error(invalid_key) -> None: attrs = {invalid_key: 1.0, "dtype": "timedelta64[s]", "units": "seconds"} variable = Variable(["time"], [0, 1, 2], attrs=attrs) with pytest.raises(ValueError, match=invalid_key): @@ -2044,6 +2045,12 @@ def test_literal_timedelta_decoding_mask_and_scale_error(invalid_key) -> None: def test_timedelta_decoding_options( decode_via_units, decode_via_dtype, attrs, expect_timedelta64 ) -> None: + # Note with literal timedelta encoding, we always add a _FillValue, even + # if one is not present in the original encoding parameters, which is why + # we ensure one is defined here when "dtype" is present in attrs. + if "dtype" in attrs: + attrs["_FillValue"] = np.iinfo(np.int64).min + array = np.array([0, 1, 2], dtype=np.dtype("int64")) encoded = Variable(["time"], array, attrs=attrs) @@ -2083,3 +2090,33 @@ def test_timedelta_encoding_explicit_non_timedelta64_dtype() -> None: assert_identical(reencoded, encoded) assert encoded.attrs["units"] == "days" assert encoded.dtype == np.dtype("int32") + + +@pytest.mark.parametrize("mask_attribute", ["_FillValue", "missing_value"]) +def test_literal_timedelta64_coding_with_mask( + time_unit: PDDatetimeUnitOptions, mask_attribute: str +) -> None: + timedeltas = np.array([0, 1, "NaT"], dtype=f"timedelta64[{time_unit}]") + mask = 10 + variable = Variable(["time"], timedeltas, encoding={mask_attribute: mask}) + expected_dtype = f"timedelta64[{time_unit}]" + expected_units = _numpy_to_netcdf_timeunit(time_unit) + + encoded = conventions.encode_cf_variable(variable) + assert encoded.attrs["dtype"] == expected_dtype + assert encoded.attrs["units"] == expected_units + assert encoded.attrs[mask_attribute] == mask + assert encoded[-1] == mask + + decoded = conventions.decode_cf_variable("timedeltas", encoded) + assert decoded.encoding["dtype"] == expected_dtype + assert decoded.encoding["units"] == expected_units + assert decoded.encoding[mask_attribute] == mask + assert np.isnat(decoded[-1]) + + assert_identical(decoded, variable) + assert decoded.dtype == variable.dtype + + reencoded = conventions.encode_cf_variable(decoded) + assert_identical(reencoded, encoded) + assert reencoded.dtype == encoded.dtype From 0e67a04f51f08cc7a679c3e507eb9a67b55f1495 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 11 May 2025 12:00:04 -0400 Subject: [PATCH 28/33] Tweak errors and warnings; relax decoding dtype error --- xarray/coding/times.py | 72 +++++++++++++++++++++---------- xarray/tests/test_coding_times.py | 11 +++-- 2 files changed, 58 insertions(+), 25 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index c0839ff403f..62787557fab 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1,7 +1,6 @@ from __future__ import annotations import re -import typing import warnings from collections.abc import Callable, Hashable from datetime import datetime, timedelta @@ -1464,13 +1463,16 @@ def encode(self, variable: Variable, name: T_Name = None) -> Variable: ): raise ValueError( f"Specifying 'add_offset' or 'scale_factor' is not " - f"supported when literally encoding the " - f"np.timedelta64 values of variable {name!r}. To " - f"encode {name!r} with such encoding parameters, " - f"additionally set encoding['units'] to a unit of " - f"time, e.g. 'seconds'. To proceed with literal " - f"np.timedelta64 encoding of {name!r}, remove any " - f"encoding entries for 'add_offset' or 'scale_factor'." + f"supported when encoding the timedelta64 values of " + f"variable {name!r} with xarray's new default " + f"timedelta64 encoding approach. To encode {name!r} " + f"with xarray's previous timedelta64 encoding " + f"approach, which supports the 'add_offset' and " + f"'scale_factor' parameters, additionally set " + f"encoding['units'] to a unit of time, e.g. " + f"'seconds'. To proceed with encoding of {name!r} " + f"via xarray's new approach, remove any encoding " + f"entries for 'add_offset' or 'scale_factor'." ) if "_FillValue" not in encoding and "missing_value" not in encoding: encoding["_FillValue"] = np.iinfo(np.int64).min @@ -1497,33 +1499,59 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS ): raise ValueError( - "Decoding np.timedelta64 values via dtype is not " - "supported when 'add_offset', or 'scale_factor' are " - "present in encoding." + f"Decoding timedelta64 values via dtype is not " + f"supported when 'add_offset', or 'scale_factor' are " + f"present in encoding. Check the encoding parameters " + f"of variable {name!r}." ) dtype = pop_to(attrs, encoding, "dtype", name=name) dtype = np.dtype(dtype) resolution, _ = np.datetime_data(dtype) - if resolution not in typing.get_args(PDDatetimeUnitOptions): - raise ValueError( + if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): + time_unit = cast(PDDatetimeUnitOptions, "s") + dtype = np.dtype("timedelta64[s]") + message = ( f"Following pandas, xarray only supports decoding to " f"timedelta64 values with a resolution of 's', 'ms', " - f"'us', or 'ns'. Encoded values have a resolution of " - f"{resolution!r}." + f"'us', or 'ns'. Encoded values for variable {name!r} " + f"have a resolution of {resolution!r}. Attempting to " + f"decode to a resolution of 's'. Note, depending on " + f"the encoded values, this may lead to an " + f"OverflowError. Additionally, data will not be " + f"identically round tripped; xarray will choose an " + f"encoding dtype of 'timedelta64[s]' when re-encoding." ) - time_unit = cast(PDDatetimeUnitOptions, resolution) + emit_user_level_warning(message) + elif np.timedelta64(1, resolution) < np.timedelta64(1, "ns"): + time_unit = cast(PDDatetimeUnitOptions, "ns") + dtype = np.dtype("timedelta64[ns]") + message = ( + f"Following pandas, xarray only supports decoding to " + f"timedelta64 values with a resolution of 's', 'ms', " + f"'us', or 'ns'. Encoded values for variable {name!r} " + f"have a resolution of {resolution!r}. Attempting to " + f"decode to a resolution of 'ns'. Note, depending on " + f"the encoded values, this may lead to loss of " + f"precision. Additionally, data will not be " + f"identically round tripped; xarray will choose an " + f"encoding dtype of 'timedelta64[ns]' " + f"when re-encoding." + ) + emit_user_level_warning(message) + else: + time_unit = cast(PDDatetimeUnitOptions, resolution) elif self.decode_via_units: if self._emit_decode_timedelta_future_warning: emit_user_level_warning( "In a future version, xarray will not decode " "timedelta values based on the presence of a " "timedelta-like units attribute by default. Instead " - "it will rely on the presence of a np.timedelta64 " - "dtype attribute, which is now xarray's default way " - "of encoding np.timedelta64 values. To continue " - "decoding timedeltas based on the presence of a " - "timedelta-like units attribute, users will need to " - "explicitly opt-in by passing True or " + "it will rely on the presence of a timedelta64 dtype " + "attribute, which is now xarray's default way of " + "encoding timedelta64 values. To continue decoding " + "timedeltas based on the presence of a timedelta-like " + "units attribute, users will need to explicitly " + "opt-in by passing True or " "CFTimedeltaCoder(decode_via_units=True) to " "decode_timedelta. To silence this warning, set " "decode_timedelta to True, False, or a " diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 4cacd7a834c..369ec1b5588 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1986,11 +1986,16 @@ def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: assert reencoded.dtype == encoded.dtype -def test_literal_timedelta_coding_resolution_error() -> None: +def test_literal_timedelta_coding_non_pandas_resolution_warning() -> None: attrs = {"dtype": "timedelta64[D]", "units": "days"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs) - with pytest.raises(ValueError, match="xarray only supports"): - conventions.decode_cf_variable("timedeltas", encoded) + with pytest.warns(UserWarning, match="xarray only supports"): + decoded = conventions.decode_cf_variable("timedeltas", encoded) + expected_array = np.array([0, 1, 2], dtype="timedelta64[D]") + expected_array = expected_array.astype("timedelta64[s]") + expected = Variable(["time"], expected_array) + assert_identical(decoded, expected) + assert decoded.dtype == np.dtype("timedelta64[s]") @pytest.mark.parametrize("attribute", ["dtype", "units"]) From 8df198197e1e0a8b30096df4ffeb954367284abe Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 11 May 2025 12:11:55 -0400 Subject: [PATCH 29/33] Add xfail test for fine-resolution branch of non-pandas resolution code --- xarray/tests/test_coding_times.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 369ec1b5588..958dba76fcc 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -1986,7 +1986,7 @@ def test_literal_timedelta64_coding(time_unit: PDDatetimeUnitOptions) -> None: assert reencoded.dtype == encoded.dtype -def test_literal_timedelta_coding_non_pandas_resolution_warning() -> None: +def test_literal_timedelta_coding_non_pandas_coarse_resolution_warning() -> None: attrs = {"dtype": "timedelta64[D]", "units": "days"} encoded = Variable(["time"], [0, 1, 2], attrs=attrs) with pytest.warns(UserWarning, match="xarray only supports"): @@ -1998,6 +1998,19 @@ def test_literal_timedelta_coding_non_pandas_resolution_warning() -> None: assert decoded.dtype == np.dtype("timedelta64[s]") +@pytest.mark.xfail(reason="xarray does not recognize picoseconds as time-like") +def test_literal_timedelta_coding_non_pandas_fine_resolution_warning() -> None: + attrs = {"dtype": "timedelta64[ps]", "units": "picoseconds"} + encoded = Variable(["time"], [0, 1000, 2000], attrs=attrs) + with pytest.warns(UserWarning, match="xarray only supports"): + decoded = conventions.decode_cf_variable("timedeltas", encoded) + expected_array = np.array([0, 1000, 2000], dtype="timedelta64[ps]") + expected_array = expected_array.astype("timedelta64[ns]") + expected = Variable(["time"], expected_array) + assert_identical(decoded, expected) + assert decoded.dtype == np.dtype("timedelta64[ns]") + + @pytest.mark.parametrize("attribute", ["dtype", "units"]) def test_literal_timedelta_decode_invalid_encoding(attribute) -> None: attrs = {"dtype": "timedelta64[s]", "units": "seconds"} From 0929ec47e5439e64b1afb13b54bad6fcc7f39e9a Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 11 May 2025 13:36:31 -0400 Subject: [PATCH 30/33] Fix typing --- xarray/coding/times.py | 4 +++- xarray/core/dataarray.py | 10 +++++----- xarray/core/dataset.py | 10 +++++----- xarray/core/types.py | 2 +- 4 files changed, 14 insertions(+), 12 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 62787557fab..cf8a18f20cd 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Callable, Hashable from datetime import datetime, timedelta from functools import partial -from typing import TYPE_CHECKING, Union, cast +from typing import TYPE_CHECKING, Literal, Union, cast import numpy as np import pandas as pd @@ -25,6 +25,7 @@ from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import array_all, asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item +from xarray.core.types import DatetimeUnitOptions from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type @@ -1507,6 +1508,7 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: dtype = pop_to(attrs, encoding, "dtype", name=name) dtype = np.dtype(dtype) resolution, _ = np.datetime_data(dtype) + resolution = cast(Literal["Y", "M"] | DatetimeUnitOptions, resolution) if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): time_unit = cast(PDDatetimeUnitOptions, "s") dtype = np.dtype("timedelta64[s]") diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 1e7e1069076..079d72e32f2 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5372,7 +5372,7 @@ def differentiate( self, coord: Hashable, edge_order: Literal[1, 2] = 1, - datetime_unit: DatetimeUnitOptions = None, + datetime_unit: DatetimeUnitOptions | None = None, ) -> Self: """Differentiate the array with the second order accurate central differences. @@ -5434,7 +5434,7 @@ def differentiate( def integrate( self, coord: Hashable | Sequence[Hashable] = None, - datetime_unit: DatetimeUnitOptions = None, + datetime_unit: DatetimeUnitOptions | None = None, ) -> Self: """Integrate along the given coordinate using the trapezoidal rule. @@ -5446,7 +5446,7 @@ def integrate( ---------- coord : Hashable, or sequence of Hashable Coordinate(s) used for the integration. - datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if a datetime coordinate is used. @@ -5488,7 +5488,7 @@ def integrate( def cumulative_integrate( self, coord: Hashable | Sequence[Hashable] = None, - datetime_unit: DatetimeUnitOptions = None, + datetime_unit: DatetimeUnitOptions | None = None, ) -> Self: """Integrate cumulatively along the given coordinate using the trapezoidal rule. @@ -5503,7 +5503,7 @@ def cumulative_integrate( ---------- coord : Hashable, or sequence of Hashable Coordinate(s) used for the integration. - datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if a datetime coordinate is used. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index 5a7f757ba8a..fefaa0f41b9 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8247,7 +8247,7 @@ def differentiate( The coordinate to be used to compute the gradient. edge_order : {1, 2}, default: 1 N-th order accurate differences at the boundaries. - datetime_unit : None or {"Y", "M", "W", "D", "h", "m", "s", "ms", \ + datetime_unit : None or {"W", "D", "h", "m", "s", "ms", \ "us", "ns", "ps", "fs", "as", None}, default: None Unit to compute gradient. Only valid for datetime coordinate. @@ -8303,7 +8303,7 @@ def differentiate( def integrate( self, coord: Hashable | Sequence[Hashable], - datetime_unit: DatetimeUnitOptions = None, + datetime_unit: DatetimeUnitOptions | None = None, ) -> Self: """Integrate along the given coordinate using the trapezoidal rule. @@ -8315,7 +8315,7 @@ def integrate( ---------- coord : hashable, or sequence of hashable Coordinate(s) used for the integration. - datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if datetime coordinate is used. @@ -8423,7 +8423,7 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): def cumulative_integrate( self, coord: Hashable | Sequence[Hashable], - datetime_unit: DatetimeUnitOptions = None, + datetime_unit: DatetimeUnitOptions | None = None, ) -> Self: """Integrate along the given coordinate using the trapezoidal rule. @@ -8439,7 +8439,7 @@ def cumulative_integrate( ---------- coord : hashable, or sequence of hashable Coordinate(s) used for the integration. - datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if datetime coordinate is used. diff --git a/xarray/core/types.py b/xarray/core/types.py index dc95f3e2d69..b3f8baa8280 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -254,7 +254,7 @@ def copy( InterpOptions = Union[Interp1dOptions, InterpolantOptions, InterpnOptions] DatetimeUnitOptions = Literal[ - "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None + "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as" ] NPDatetimeUnitOptions = Literal["D", "h", "m", "s", "ms", "us", "ns"] PDDatetimeUnitOptions = Literal["s", "ms", "us", "ns"] From 191667f7325b08cab1a4eda2a42c931b33a394f0 Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 11 May 2025 13:40:19 -0400 Subject: [PATCH 31/33] Revert "Fix typing" This reverts commit 0929ec47e5439e64b1afb13b54bad6fcc7f39e9a. --- xarray/coding/times.py | 4 +--- xarray/core/dataarray.py | 10 +++++----- xarray/core/dataset.py | 10 +++++----- xarray/core/types.py | 2 +- 4 files changed, 12 insertions(+), 14 deletions(-) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index cf8a18f20cd..62787557fab 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -5,7 +5,7 @@ from collections.abc import Callable, Hashable from datetime import datetime, timedelta from functools import partial -from typing import TYPE_CHECKING, Literal, Union, cast +from typing import TYPE_CHECKING, Union, cast import numpy as np import pandas as pd @@ -25,7 +25,6 @@ from xarray.core.common import contains_cftime_datetimes, is_np_datetime_like from xarray.core.duck_array_ops import array_all, asarray, ravel, reshape from xarray.core.formatting import first_n_items, format_timestamp, last_item -from xarray.core.types import DatetimeUnitOptions from xarray.core.utils import attempt_import, emit_user_level_warning from xarray.core.variable import Variable from xarray.namedarray.parallelcompat import T_ChunkedArray, get_chunked_array_type @@ -1508,7 +1507,6 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: dtype = pop_to(attrs, encoding, "dtype", name=name) dtype = np.dtype(dtype) resolution, _ = np.datetime_data(dtype) - resolution = cast(Literal["Y", "M"] | DatetimeUnitOptions, resolution) if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): time_unit = cast(PDDatetimeUnitOptions, "s") dtype = np.dtype("timedelta64[s]") diff --git a/xarray/core/dataarray.py b/xarray/core/dataarray.py index 079d72e32f2..1e7e1069076 100644 --- a/xarray/core/dataarray.py +++ b/xarray/core/dataarray.py @@ -5372,7 +5372,7 @@ def differentiate( self, coord: Hashable, edge_order: Literal[1, 2] = 1, - datetime_unit: DatetimeUnitOptions | None = None, + datetime_unit: DatetimeUnitOptions = None, ) -> Self: """Differentiate the array with the second order accurate central differences. @@ -5434,7 +5434,7 @@ def differentiate( def integrate( self, coord: Hashable | Sequence[Hashable] = None, - datetime_unit: DatetimeUnitOptions | None = None, + datetime_unit: DatetimeUnitOptions = None, ) -> Self: """Integrate along the given coordinate using the trapezoidal rule. @@ -5446,7 +5446,7 @@ def integrate( ---------- coord : Hashable, or sequence of Hashable Coordinate(s) used for the integration. - datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if a datetime coordinate is used. @@ -5488,7 +5488,7 @@ def integrate( def cumulative_integrate( self, coord: Hashable | Sequence[Hashable] = None, - datetime_unit: DatetimeUnitOptions | None = None, + datetime_unit: DatetimeUnitOptions = None, ) -> Self: """Integrate cumulatively along the given coordinate using the trapezoidal rule. @@ -5503,7 +5503,7 @@ def cumulative_integrate( ---------- coord : Hashable, or sequence of Hashable Coordinate(s) used for the integration. - datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if a datetime coordinate is used. diff --git a/xarray/core/dataset.py b/xarray/core/dataset.py index fefaa0f41b9..5a7f757ba8a 100644 --- a/xarray/core/dataset.py +++ b/xarray/core/dataset.py @@ -8247,7 +8247,7 @@ def differentiate( The coordinate to be used to compute the gradient. edge_order : {1, 2}, default: 1 N-th order accurate differences at the boundaries. - datetime_unit : None or {"W", "D", "h", "m", "s", "ms", \ + datetime_unit : None or {"Y", "M", "W", "D", "h", "m", "s", "ms", \ "us", "ns", "ps", "fs", "as", None}, default: None Unit to compute gradient. Only valid for datetime coordinate. @@ -8303,7 +8303,7 @@ def differentiate( def integrate( self, coord: Hashable | Sequence[Hashable], - datetime_unit: DatetimeUnitOptions | None = None, + datetime_unit: DatetimeUnitOptions = None, ) -> Self: """Integrate along the given coordinate using the trapezoidal rule. @@ -8315,7 +8315,7 @@ def integrate( ---------- coord : hashable, or sequence of hashable Coordinate(s) used for the integration. - datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if datetime coordinate is used. @@ -8423,7 +8423,7 @@ def _integrate_one(self, coord, datetime_unit=None, cumulative=False): def cumulative_integrate( self, coord: Hashable | Sequence[Hashable], - datetime_unit: DatetimeUnitOptions | None = None, + datetime_unit: DatetimeUnitOptions = None, ) -> Self: """Integrate along the given coordinate using the trapezoidal rule. @@ -8439,7 +8439,7 @@ def cumulative_integrate( ---------- coord : hashable, or sequence of hashable Coordinate(s) used for the integration. - datetime_unit : {'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ + datetime_unit : {'Y', 'M', 'W', 'D', 'h', 'm', 's', 'ms', 'us', 'ns', \ 'ps', 'fs', 'as', None}, optional Specify the unit if datetime coordinate is used. diff --git a/xarray/core/types.py b/xarray/core/types.py index b3f8baa8280..dc95f3e2d69 100644 --- a/xarray/core/types.py +++ b/xarray/core/types.py @@ -254,7 +254,7 @@ def copy( InterpOptions = Union[Interp1dOptions, InterpolantOptions, InterpnOptions] DatetimeUnitOptions = Literal[ - "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as" + "Y", "M", "W", "D", "h", "m", "s", "ms", "us", "μs", "ns", "ps", "fs", "as", None ] NPDatetimeUnitOptions = Literal["D", "h", "m", "s", "ms", "us", "ns"] PDDatetimeUnitOptions = Literal["s", "ms", "us", "ns"] From 52a725514fb99f69fdc322be999c86ddeaa958ef Mon Sep 17 00:00:00 2001 From: Spencer Clark Date: Sun, 11 May 2025 13:41:10 -0400 Subject: [PATCH 32/33] Use simpler typing fix for now --- xarray/coding/times.py | 1 + 1 file changed, 1 insertion(+) diff --git a/xarray/coding/times.py b/xarray/coding/times.py index 62787557fab..cb39518ceed 100644 --- a/xarray/coding/times.py +++ b/xarray/coding/times.py @@ -1507,6 +1507,7 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable: dtype = pop_to(attrs, encoding, "dtype", name=name) dtype = np.dtype(dtype) resolution, _ = np.datetime_data(dtype) + resolution = cast(NPDatetimeUnitOptions, resolution) if np.timedelta64(1, resolution) > np.timedelta64(1, "s"): time_unit = cast(PDDatetimeUnitOptions, "s") dtype = np.dtype("timedelta64[s]") From faac0977a81ac31dadfea8c65e23a8a05932a3f0 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Fri, 23 May 2025 10:37:38 +0000 Subject: [PATCH 33/33] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- xarray/tests/test_coding_times.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/xarray/tests/test_coding_times.py b/xarray/tests/test_coding_times.py index 44b9f4eb8f2..3b85d395fdb 100644 --- a/xarray/tests/test_coding_times.py +++ b/xarray/tests/test_coding_times.py @@ -2060,8 +2060,6 @@ def test_literal_timedelta_decoding_invalid_key_error(invalid_key) -> None: ], ids=lambda x: f"{x!r}", ) - - def test_timedelta_decoding_options( decode_via_units, decode_via_dtype, attrs, expect_timedelta64 ) -> None: @@ -2141,7 +2139,7 @@ def test_literal_timedelta64_coding_with_mask( assert_identical(reencoded, encoded) assert reencoded.dtype == encoded.dtype - + def test_roundtrip_0size_timedelta(time_unit: PDDatetimeUnitOptions) -> None: # regression test for GitHub issue #10310 encoding = {"units": "days", "dtype": np.dtype("int64")}