diff --git a/doc/whats-new.rst b/doc/whats-new.rst index 9b40a323f39..8a9ba2d2ea3 100644 --- a/doc/whats-new.rst +++ b/doc/whats-new.rst @@ -50,7 +50,7 @@ eventually be deprecated. New Features ~~~~~~~~~~~~ -- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`). +- Relax nanosecond datetime restriction in CF time decoding (:issue:`7493`, :pull:`9618`, :pull:`9977`). By `Kai Mühlbauer `_ and `Spencer Clark `_. - Improve the error message raised when no key is matching the available variables in a dataset. (:pull:`9943`) By `Jimmy Westling `_. diff --git a/xarray/core/duck_array_ops.py b/xarray/core/duck_array_ops.py index 7e7333fd8ea..c3f1598050a 100644 --- a/xarray/core/duck_array_ops.py +++ b/xarray/core/duck_array_ops.py @@ -662,16 +662,10 @@ def _to_pytimedelta(array, unit="us"): def np_timedelta64_to_float(array, datetime_unit): - """Convert numpy.timedelta64 to float. - - Notes - ----- - The array is first converted to microseconds, which is less likely to - cause overflow errors. - """ - array = array.astype("timedelta64[ns]").astype(np.float64) - conversion_factor = np.timedelta64(1, "ns") / np.timedelta64(1, datetime_unit) - return conversion_factor * array + """Convert numpy.timedelta64 to float, possibly at a loss of resolution.""" + unit, _ = np.datetime_data(array.dtype) + conversion_factor = np.timedelta64(1, unit) / np.timedelta64(1, datetime_unit) + return conversion_factor * array.astype(np.float64) def pd_timedelta_to_float(value, datetime_unit): @@ -715,12 +709,15 @@ def mean(array, axis=None, skipna=None, **kwargs): if dtypes.is_datetime_like(array.dtype): offset = _datetime_nanmin(array) - # xarray always uses np.datetime64[ns] for np.datetime64 data - dtype = "timedelta64[ns]" + # From version 2025.01.2 xarray uses np.datetime64[unit], where unit + # is one of "s", "ms", "us", "ns". + # To not have to worry about the resolution, we just convert the output + # to "timedelta64" (without unit) and let the dtype of offset take precedence. + # This is fully backwards compatible with datetime64[ns]. return ( _mean( datetime_to_numeric(array, offset), axis=axis, skipna=skipna, **kwargs - ).astype(dtype) + ).astype("timedelta64") + offset ) elif _contains_cftime_datetimes(array): diff --git a/xarray/tests/test_duck_array_ops.py b/xarray/tests/test_duck_array_ops.py index e1306964757..9b8b50d99a1 100644 --- a/xarray/tests/test_duck_array_ops.py +++ b/xarray/tests/test_duck_array_ops.py @@ -9,6 +9,7 @@ from numpy import array, nan from xarray import DataArray, Dataset, cftime_range, concat +from xarray.coding.times import _NS_PER_TIME_DELTA from xarray.core import dtypes, duck_array_ops from xarray.core.duck_array_ops import ( array_notnull_equiv, @@ -28,6 +29,7 @@ where, ) from xarray.core.extension_array import PandasExtensionArray +from xarray.core.types import NPDatetimeUnitOptions, PDDatetimeUnitOptions from xarray.namedarray.pycompat import array_type from xarray.testing import assert_allclose, assert_equal, assert_identical from xarray.tests import ( @@ -411,10 +413,11 @@ def assert_dask_array(da, dask): @arm_xfail @pytest.mark.filterwarnings("ignore:All-NaN .* encountered:RuntimeWarning") @pytest.mark.parametrize("dask", [False, True] if has_dask else [False]) -def test_datetime_mean(dask: bool) -> None: +def test_datetime_mean(dask: bool, time_unit: PDDatetimeUnitOptions) -> None: # Note: only testing numpy, as dask is broken upstream + dtype = f"M8[{time_unit}]" da = DataArray( - np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype="M8[ns]"), + np.array(["2010-01-01", "NaT", "2010-01-03", "NaT", "NaT"], dtype=dtype), dims=["time"], ) if dask: @@ -846,11 +849,11 @@ def test_multiple_dims(dtype, dask, skipna, func): @pytest.mark.parametrize("dask", [True, False]) -def test_datetime_to_numeric_datetime64(dask): +def test_datetime_to_numeric_datetime64(dask, time_unit: PDDatetimeUnitOptions): if dask and not has_dask: pytest.skip("requires dask") - times = pd.date_range("2000", periods=5, freq="7D").values + times = pd.date_range("2000", periods=5, freq="7D").as_unit(time_unit).values if dask: import dask.array @@ -874,8 +877,8 @@ def test_datetime_to_numeric_datetime64(dask): result = duck_array_ops.datetime_to_numeric( times, datetime_unit="h", dtype=dtype ) - expected = 24 * np.arange(0, 35, 7).astype(dtype) - np.testing.assert_array_equal(result, expected) + expected2 = 24 * np.arange(0, 35, 7).astype(dtype) + np.testing.assert_array_equal(result, expected2) @requires_cftime @@ -923,15 +926,18 @@ def test_datetime_to_numeric_cftime(dask): @requires_cftime -def test_datetime_to_numeric_potential_overflow(): +def test_datetime_to_numeric_potential_overflow(time_unit: PDDatetimeUnitOptions): import cftime - times = pd.date_range("2000", periods=5, freq="7D").values.astype("datetime64[us]") + if time_unit == "ns": + pytest.skip("out-of-bounds datetime64 overflow") + dtype = f"M8[{time_unit}]" + times = pd.date_range("2000", periods=5, freq="7D").values.astype(dtype) cftimes = cftime_range( "2000", periods=5, freq="7D", calendar="proleptic_gregorian" ).values - offset = np.datetime64("0001-01-01") + offset = np.datetime64("0001-01-01", time_unit) cfoffset = cftime.DatetimeProlepticGregorian(1, 1, 1) result = duck_array_ops.datetime_to_numeric( @@ -957,24 +963,33 @@ def test_py_timedelta_to_float(): assert py_timedelta_to_float(dt.timedelta(days=1e6), "D") == 1e6 -@pytest.mark.parametrize( - "td, expected", - ([np.timedelta64(1, "D"), 86400 * 1e9], [np.timedelta64(1, "ns"), 1.0]), -) -def test_np_timedelta64_to_float(td, expected): - out = np_timedelta64_to_float(td, datetime_unit="ns") +@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"]) +def test_np_timedelta64_to_float( + np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions +): + # tests any combination of source np.timedelta64 (NPDatetimeUnitOptions) with + # np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions) + td = np.timedelta64(1, np_dt_unit) + expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit] + + out = np_timedelta64_to_float(td, datetime_unit=time_unit) np.testing.assert_allclose(out, expected) assert isinstance(out, float) - out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit="ns") + out = np_timedelta64_to_float(np.atleast_1d(td), datetime_unit=time_unit) np.testing.assert_allclose(out, expected) -@pytest.mark.parametrize( - "td, expected", ([pd.Timedelta(1, "D"), 86400 * 1e9], [pd.Timedelta(1, "ns"), 1.0]) -) -def test_pd_timedelta_to_float(td, expected): - out = pd_timedelta_to_float(td, datetime_unit="ns") +@pytest.mark.parametrize("np_dt_unit", ["D", "h", "m", "s", "ms", "us", "ns"]) +def test_pd_timedelta_to_float( + np_dt_unit: NPDatetimeUnitOptions, time_unit: PDDatetimeUnitOptions +): + # tests any combination of source pd.Timedelta (NPDatetimeUnitOptions) with + # np_timedelta_to_float with dedicated target unit (PDDatetimeUnitOptions) + td = pd.Timedelta(1, np_dt_unit) + expected = _NS_PER_TIME_DELTA[np_dt_unit] / _NS_PER_TIME_DELTA[time_unit] + + out = pd_timedelta_to_float(td, datetime_unit=time_unit) np.testing.assert_allclose(out, expected) assert isinstance(out, float) @@ -982,10 +997,11 @@ def test_pd_timedelta_to_float(td, expected): @pytest.mark.parametrize( "td", [dt.timedelta(days=1), np.timedelta64(1, "D"), pd.Timedelta(1, "D"), "1 day"] ) -def test_timedelta_to_numeric(td): +def test_timedelta_to_numeric(td, time_unit: PDDatetimeUnitOptions): # Scalar input - out = timedelta_to_numeric(td, "ns") - np.testing.assert_allclose(out, 86400 * 1e9) + out = timedelta_to_numeric(td, time_unit) + expected = _NS_PER_TIME_DELTA["D"] / _NS_PER_TIME_DELTA[time_unit] + np.testing.assert_allclose(out, expected) assert isinstance(out, float)