2424from xarray .core .common import contains_cftime_datetimes , is_np_datetime_like
2525from xarray .core .duck_array_ops import asarray , ravel , reshape
2626from xarray .core .formatting import first_n_items , format_timestamp , last_item
27- from xarray .core .pdcompat import nanosecond_precision_timestamp
27+ from xarray .core .pdcompat import nanosecond_precision_timestamp , timestamp_as_unit
2828from xarray .core .utils import attempt_import , emit_user_level_warning
2929from xarray .core .variable import Variable
3030from xarray .namedarray .parallelcompat import T_ChunkedArray , get_chunked_array_type
3636except ImportError :
3737 cftime = None
3838
39- from xarray .core .types import CFCalendar , NPDatetimeUnitOptions , T_DuckArray
39+ from xarray .core .types import (
40+ CFCalendar ,
41+ NPDatetimeUnitOptions ,
42+ T_DuckArray ,
43+ )
4044
4145T_Name = Union [Hashable , None ]
4246
@@ -259,18 +263,26 @@ def _parse_iso8601(date_type, timestr):
259263 return default .replace (** replace ), resolution
260264
261265
262- def _unpack_time_units_and_ref_date (units : str ) -> tuple [str , pd .Timestamp ]:
266+ def _maybe_strip_tz_from_timestamp (date : pd .Timestamp ) -> pd .Timestamp :
267+ # If the ref_date Timestamp is timezone-aware, convert to UTC and
268+ # make it timezone-naive (GH 2649).
269+ if date .tz is not None :
270+ return date .tz_convert ("UTC" ).tz_convert (None )
271+ return date
272+
273+
274+ def _unpack_time_unit_and_ref_date (
275+ units : str ,
276+ ) -> tuple [NPDatetimeUnitOptions , pd .Timestamp ]:
263277 # same us _unpack_netcdf_time_units but finalizes ref_date for
264278 # processing in encode_cf_datetime
265- time_units , _ref_date = _unpack_netcdf_time_units (units )
279+ time_unit , _ref_date = _unpack_netcdf_time_units (units )
280+ time_unit = _netcdf_to_numpy_timeunit (time_unit )
266281 # TODO: the strict enforcement of nanosecond precision Timestamps can be
267282 # relaxed when addressing GitHub issue #7493.
268283 ref_date = nanosecond_precision_timestamp (_ref_date )
269- # If the ref_date Timestamp is timezone-aware, convert to UTC and
270- # make it timezone-naive (GH 2649).
271- if ref_date .tz is not None :
272- ref_date = ref_date .tz_convert (None )
273- return time_units , ref_date
284+ ref_date = _maybe_strip_tz_from_timestamp (ref_date )
285+ return time_unit , ref_date
274286
275287
276288def _decode_cf_datetime_dtype (
@@ -317,6 +329,30 @@ def _decode_datetime_with_cftime(
317329 return np .array ([], dtype = object )
318330
319331
332+ def _check_date_for_units_since_refdate (
333+ date , unit : str , ref_date : pd .Timestamp
334+ ) -> pd .Timestamp :
335+ # check for out-of-bounds floats and raise
336+ if date > np .iinfo ("int64" ).max or date < np .iinfo ("int64" ).min :
337+ raise OutOfBoundsTimedelta (
338+ f"Value { date } can't be represented as Datetime/Timedelta."
339+ )
340+ delta = date * np .timedelta64 (1 , unit )
341+ if not np .isnan (delta ):
342+ # this will raise on dtype overflow for integer dtypes
343+ if date .dtype .kind in "u" and not np .int64 (delta ) == date :
344+ raise OutOfBoundsTimedelta (
345+ "DType overflow in Datetime/Timedelta calculation."
346+ )
347+ # this will raise on overflow if ref_date + delta
348+ # can't be represented in the current ref_date resolution
349+ return timestamp_as_unit (ref_date + delta , ref_date .unit )
350+ else :
351+ # if date is exactly NaT (np.iinfo("int64").min) return NaT
352+ # to make follow-up checks work
353+ return pd .Timestamp ("NaT" )
354+
355+
320356def _decode_datetime_with_pandas (
321357 flat_num_dates : np .ndarray , units : str , calendar : str
322358) -> np .ndarray :
@@ -335,12 +371,8 @@ def _decode_datetime_with_pandas(
335371 elif flat_num_dates .dtype .kind == "u" :
336372 flat_num_dates = flat_num_dates .astype (np .uint64 )
337373
338- time_units , ref_date_str = _unpack_netcdf_time_units (units )
339- time_units = _netcdf_to_numpy_timeunit (time_units )
340374 try :
341- # TODO: the strict enforcement of nanosecond precision Timestamps can be
342- # relaxed when addressing GitHub issue #7493.
343- ref_date = nanosecond_precision_timestamp (ref_date_str )
375+ time_unit , ref_date = _unpack_time_unit_and_ref_date (units )
344376 except ValueError as err :
345377 # ValueError is raised by pd.Timestamp for non-ISO timestamp
346378 # strings, in which case we fall back to using cftime
@@ -350,8 +382,12 @@ def _decode_datetime_with_pandas(
350382 warnings .filterwarnings ("ignore" , "invalid value encountered" , RuntimeWarning )
351383 if flat_num_dates .size > 0 :
352384 # avoid size 0 datetimes GH1329
353- pd .to_timedelta (flat_num_dates .min (), time_units ) + ref_date
354- pd .to_timedelta (flat_num_dates .max (), time_units ) + ref_date
385+ _check_date_for_units_since_refdate (
386+ flat_num_dates .min (), time_unit , ref_date
387+ )
388+ _check_date_for_units_since_refdate (
389+ flat_num_dates .max (), time_unit , ref_date
390+ )
355391
356392 # To avoid integer overflow when converting to nanosecond units for integer
357393 # dtypes smaller than np.int64 cast all integer and unsigned integer dtype
@@ -364,20 +400,24 @@ def _decode_datetime_with_pandas(
364400 elif flat_num_dates .dtype .kind in "f" :
365401 flat_num_dates = flat_num_dates .astype (np .float64 )
366402
367- # Cast input ordinals to integers of nanoseconds because pd.to_timedelta
368- # works much faster when dealing with integers (GH 1399).
369- # properly handle NaN/NaT to prevent casting NaN to int
403+ # keep NaT/nan mask
370404 nan = np .isnan (flat_num_dates ) | (flat_num_dates == np .iinfo (np .int64 ).min )
371- flat_num_dates = flat_num_dates * _NS_PER_TIME_DELTA [time_units ]
372- flat_num_dates_ns_int = np .zeros_like (flat_num_dates , dtype = np .int64 )
373- flat_num_dates_ns_int [nan ] = np .iinfo (np .int64 ).min
374- flat_num_dates_ns_int [~ nan ] = flat_num_dates [~ nan ].astype (np .int64 )
405+ # in case we need to change the unit, we fix the numbers here
406+ # this should be safe, as errors would have been raised above
407+ ns_time_unit = _NS_PER_TIME_DELTA [time_unit ]
408+ ns_ref_date_unit = _NS_PER_TIME_DELTA [ref_date .unit ]
409+ if ns_time_unit > ns_ref_date_unit :
410+ flat_num_dates *= np .int64 (ns_time_unit / ns_ref_date_unit )
411+ time_unit = ref_date .unit
375412
376- # Use pd.to_timedelta to safely cast integer values to timedeltas,
377- # and add those to a Timestamp to safely produce a DatetimeIndex. This
378- # ensures that we do not encounter integer overflow at any point in the
379- # process without raising OutOfBoundsDatetime.
380- return (pd .to_timedelta (flat_num_dates_ns_int , "ns" ) + ref_date ).values
413+ # Cast input ordinals to integers and properly handle NaN/NaT
414+ # to prevent casting NaN to int
415+ flat_num_dates_int = np .zeros_like (flat_num_dates , dtype = np .int64 )
416+ flat_num_dates_int [nan ] = np .iinfo (np .int64 ).min
417+ flat_num_dates_int [~ nan ] = flat_num_dates [~ nan ].astype (np .int64 )
418+
419+ # cast to timedelta64[time_unit] and add to ref_date
420+ return ref_date + flat_num_dates_int .astype (f"timedelta64[{ time_unit } ]" )
381421
382422
383423def decode_cf_datetime (
@@ -409,11 +449,15 @@ def decode_cf_datetime(
409449 dates = _decode_datetime_with_cftime (
410450 flat_num_dates .astype (float ), units , calendar
411451 )
412-
413- if (
414- dates [np .nanargmin (num_dates )].year < 1678
415- or dates [np .nanargmax (num_dates )].year >= 2262
416- ):
452+ # retrieve cftype
453+ dates_min = dates [np .nanargmin (num_dates )]
454+ cftype = type (dates_min )
455+ # "ns" borders
456+ # between ['1677-09-21T00:12:43.145224193', '2262-04-11T23:47:16.854775807']
457+ lower = cftype (1677 , 9 , 21 , 0 , 12 , 43 , 145224 )
458+ upper = cftype (2262 , 4 , 11 , 23 , 47 , 16 , 854775 )
459+
460+ if dates_min < lower or dates [np .nanargmax (num_dates )] > upper :
417461 if _is_standard_calendar (calendar ):
418462 warnings .warn (
419463 "Unable to decode time axis into full "
@@ -833,8 +877,8 @@ def _eagerly_encode_cf_datetime(
833877 raise OutOfBoundsDatetime
834878 assert dates .dtype == "datetime64[ns]"
835879
836- time_units , ref_date = _unpack_time_units_and_ref_date (units )
837- time_delta = _time_units_to_timedelta64 ( time_units )
880+ time_unit , ref_date = _unpack_time_unit_and_ref_date (units )
881+ time_delta = np . timedelta64 ( 1 , time_unit )
838882
839883 # Wrap the dates in a DatetimeIndex to do the subtraction to ensure
840884 # an OverflowError is raised if the ref_date is too far away from
@@ -843,16 +887,17 @@ def _eagerly_encode_cf_datetime(
843887 time_deltas = dates_as_index - ref_date
844888
845889 # retrieve needed units to faithfully encode to int64
846- needed_units , data_ref_date = _unpack_time_units_and_ref_date (data_units )
890+ needed_unit , data_ref_date = _unpack_time_unit_and_ref_date (data_units )
891+ needed_units = _numpy_to_netcdf_timeunit (needed_unit )
847892 if data_units != units :
848893 # this accounts for differences in the reference times
849894 ref_delta = abs (data_ref_date - ref_date ).to_timedelta64 ()
850- data_delta = _time_units_to_timedelta64 ( needed_units )
895+ data_delta = np . timedelta64 ( 1 , needed_unit )
851896 if (ref_delta % data_delta ) > np .timedelta64 (0 , "ns" ):
852897 needed_units = _infer_time_units_from_diff (ref_delta )
853898
854899 # needed time delta to encode faithfully to int64
855- needed_time_delta = _time_units_to_timedelta64 (needed_units )
900+ needed_time_delta = _unit_timedelta_numpy (needed_units )
856901
857902 floor_division = np .issubdtype (dtype , np .integer ) or dtype is None
858903 if time_delta > needed_time_delta :
@@ -865,6 +910,7 @@ def _eagerly_encode_cf_datetime(
865910 f"Set encoding['dtype'] to floating point dtype to silence this warning."
866911 )
867912 elif np .issubdtype (dtype , np .integer ) and allow_units_modification :
913+ floor_division = True
868914 new_units = f"{ needed_units } since { format_timestamp (ref_date )} "
869915 emit_user_level_warning (
870916 f"Times can't be serialized faithfully to int64 with requested units { units !r} . "
@@ -874,9 +920,12 @@ def _eagerly_encode_cf_datetime(
874920 )
875921 units = new_units
876922 time_delta = needed_time_delta
877- floor_division = True
878923
879- num = _division (time_deltas , time_delta , floor_division )
924+ # get resolution of TimedeltaIndex and align time_delta
925+ # todo: check, if this works in any case
926+ num = _division (
927+ time_deltas , time_delta .astype (f"=m8[{ time_deltas .unit } ]" ), floor_division
928+ )
880929 num = reshape (num .values , dates .shape )
881930
882931 except (OutOfBoundsDatetime , OverflowError , ValueError ):
0 commit comments