@@ -1410,6 +1410,43 @@ def has_timedelta64_encoding_dtype(attrs_or_encoding: dict) -> bool:
14101410 return isinstance (dtype , str ) and dtype .startswith ("timedelta64" )
14111411
14121412
1413+ def resolve_time_unit_from_attrs_dtype (
1414+ attrs_dtype : str , name : T_Name
1415+ ) -> PDDatetimeUnitOptions :
1416+ dtype = np .dtype (attrs_dtype )
1417+ resolution , _ = np .datetime_data (dtype )
1418+ resolution = cast (NPDatetimeUnitOptions , resolution )
1419+ if np .timedelta64 (1 , resolution ) > np .timedelta64 (1 , "s" ):
1420+ time_unit = cast (PDDatetimeUnitOptions , "s" )
1421+ message = (
1422+ f"Following pandas, xarray only supports decoding to timedelta64 "
1423+ f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded "
1424+ f"values for variable { name !r} have a resolution of "
1425+ f"{ resolution !r} . Attempting to decode to a resolution of 's'. "
1426+ f"Note, depending on the encoded values, this may lead to an "
1427+ f"OverflowError. Additionally, data will not be identically round "
1428+ f"tripped; xarray will choose an encoding dtype of "
1429+ f"'timedelta64[s]' when re-encoding."
1430+ )
1431+ emit_user_level_warning (message )
1432+ elif np .timedelta64 (1 , resolution ) < np .timedelta64 (1 , "ns" ):
1433+ time_unit = cast (PDDatetimeUnitOptions , "ns" )
1434+ message = (
1435+ f"Following pandas, xarray only supports decoding to timedelta64 "
1436+ f"values with a resolution of 's', 'ms', 'us', or 'ns'. Encoded "
1437+ f"values for variable { name !r} have a resolution of "
1438+ f"{ resolution !r} . Attempting to decode to a resolution of 'ns'. "
1439+ f"Note, depending on the encoded values, this may lead to loss of "
1440+ f"precision. Additionally, data will not be identically round "
1441+ f"tripped; xarray will choose an encoding dtype of "
1442+ f"'timedelta64[ns]' when re-encoding."
1443+ )
1444+ emit_user_level_warning (message )
1445+ else :
1446+ time_unit = cast (PDDatetimeUnitOptions , resolution )
1447+ return time_unit
1448+
1449+
14131450class CFTimedeltaCoder (VariableCoder ):
14141451 """Coder for CF Timedelta coding.
14151452
@@ -1430,7 +1467,7 @@ class CFTimedeltaCoder(VariableCoder):
14301467
14311468 def __init__ (
14321469 self ,
1433- time_unit : PDDatetimeUnitOptions = "ns" ,
1470+ time_unit : PDDatetimeUnitOptions | None = None ,
14341471 decode_via_units : bool = True ,
14351472 decode_via_dtype : bool = True ,
14361473 ) -> None :
@@ -1442,45 +1479,18 @@ def __init__(
14421479 def encode (self , variable : Variable , name : T_Name = None ) -> Variable :
14431480 if np .issubdtype (variable .data .dtype , np .timedelta64 ):
14441481 dims , data , attrs , encoding = unpack_for_encoding (variable )
1445- has_timedelta_dtype = has_timedelta64_encoding_dtype (encoding )
1446- if ("units" in encoding or "dtype" in encoding ) and not has_timedelta_dtype :
1447- dtype = encoding .get ("dtype" , None )
1448- units = encoding .pop ("units" , None )
1482+ dtype = encoding .get ("dtype" , None )
1483+ units = encoding .pop ("units" , None )
14491484
1450- # in the case of packed data we need to encode into
1451- # float first, the correct dtype will be established
1452- # via CFScaleOffsetCoder/CFMaskCoder
1453- if "add_offset" in encoding or "scale_factor" in encoding :
1454- dtype = data .dtype if data .dtype .kind == "f" else "float64"
1485+ # in the case of packed data we need to encode into
1486+ # float first, the correct dtype will be established
1487+ # via CFScaleOffsetCoder/CFMaskCoder
1488+ if "add_offset" in encoding or "scale_factor" in encoding :
1489+ dtype = data .dtype if data .dtype .kind == "f" else "float64"
14551490
1456- else :
1457- resolution , _ = np .datetime_data (variable .dtype )
1458- dtype = np .int64
1459- attrs_dtype = f"timedelta64[{ resolution } ]"
1460- units = _numpy_dtype_to_netcdf_timeunit (variable .dtype )
1461- safe_setitem (attrs , "dtype" , attrs_dtype , name = name )
1462- # Remove dtype encoding if it exists to prevent it from
1463- # interfering downstream in NonStringCoder.
1464- encoding .pop ("dtype" , None )
1465-
1466- if any (
1467- k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS
1468- ):
1469- raise ValueError (
1470- f"Specifying 'add_offset' or 'scale_factor' is not "
1471- f"supported when encoding the timedelta64 values of "
1472- f"variable { name !r} with xarray's new default "
1473- f"timedelta64 encoding approach. To encode { name !r} "
1474- f"with xarray's previous timedelta64 encoding "
1475- f"approach, which supports the 'add_offset' and "
1476- f"'scale_factor' parameters, additionally set "
1477- f"encoding['units'] to a unit of time, e.g. "
1478- f"'seconds'. To proceed with encoding of { name !r} "
1479- f"via xarray's new approach, remove any encoding "
1480- f"entries for 'add_offset' or 'scale_factor'."
1481- )
1482- if "_FillValue" not in encoding and "missing_value" not in encoding :
1483- encoding ["_FillValue" ] = np .iinfo (np .int64 ).min
1491+ resolution , _ = np .datetime_data (variable .dtype )
1492+ attrs_dtype = f"timedelta64[{ resolution } ]"
1493+ safe_setitem (attrs , "dtype" , attrs_dtype , name = name )
14841494
14851495 data , units = encode_cf_timedelta (data , units , dtype )
14861496 safe_setitem (attrs , "units" , units , name = name )
@@ -1499,54 +1509,13 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
14991509 ):
15001510 dims , data , attrs , encoding = unpack_for_decoding (variable )
15011511 units = pop_to (attrs , encoding , "units" )
1502- if is_dtype_decodable and self .decode_via_dtype :
1503- if any (
1504- k in encoding for k in _INVALID_LITERAL_TIMEDELTA64_ENCODING_KEYS
1505- ):
1506- raise ValueError (
1507- f"Decoding timedelta64 values via dtype is not "
1508- f"supported when 'add_offset', or 'scale_factor' are "
1509- f"present in encoding. Check the encoding parameters "
1510- f"of variable { name !r} ."
1511- )
1512- dtype = pop_to (attrs , encoding , "dtype" , name = name )
1513- dtype = np .dtype (dtype )
1514- resolution , _ = np .datetime_data (dtype )
1515- resolution = cast (NPDatetimeUnitOptions , resolution )
1516- if np .timedelta64 (1 , resolution ) > np .timedelta64 (1 , "s" ):
1517- time_unit = cast (PDDatetimeUnitOptions , "s" )
1518- dtype = np .dtype ("timedelta64[s]" )
1519- message = (
1520- f"Following pandas, xarray only supports decoding to "
1521- f"timedelta64 values with a resolution of 's', 'ms', "
1522- f"'us', or 'ns'. Encoded values for variable { name !r} "
1523- f"have a resolution of { resolution !r} . Attempting to "
1524- f"decode to a resolution of 's'. Note, depending on "
1525- f"the encoded values, this may lead to an "
1526- f"OverflowError. Additionally, data will not be "
1527- f"identically round tripped; xarray will choose an "
1528- f"encoding dtype of 'timedelta64[s]' when re-encoding."
1529- )
1530- emit_user_level_warning (message )
1531- elif np .timedelta64 (1 , resolution ) < np .timedelta64 (1 , "ns" ):
1532- time_unit = cast (PDDatetimeUnitOptions , "ns" )
1533- dtype = np .dtype ("timedelta64[ns]" )
1534- message = (
1535- f"Following pandas, xarray only supports decoding to "
1536- f"timedelta64 values with a resolution of 's', 'ms', "
1537- f"'us', or 'ns'. Encoded values for variable { name !r} "
1538- f"have a resolution of { resolution !r} . Attempting to "
1539- f"decode to a resolution of 'ns'. Note, depending on "
1540- f"the encoded values, this may lead to loss of "
1541- f"precision. Additionally, data will not be "
1542- f"identically round tripped; xarray will choose an "
1543- f"encoding dtype of 'timedelta64[ns]' "
1544- f"when re-encoding."
1545- )
1546- emit_user_level_warning (message )
1512+ if is_dtype_decodable :
1513+ attrs_dtype = attrs .pop ("dtype" )
1514+ if self .time_unit is None :
1515+ time_unit = resolve_time_unit_from_attrs_dtype (attrs_dtype , name )
15471516 else :
1548- time_unit = cast ( PDDatetimeUnitOptions , resolution )
1549- elif self . decode_via_units :
1517+ time_unit = self . time_unit
1518+ else :
15501519 if self ._emit_decode_timedelta_future_warning :
15511520 emit_user_level_warning (
15521521 "In a future version, xarray will not decode "
@@ -1564,8 +1533,19 @@ def decode(self, variable: Variable, name: T_Name = None) -> Variable:
15641533 "'CFTimedeltaCoder' instance." ,
15651534 FutureWarning ,
15661535 )
1567- dtype = np .dtype (f"timedelta64[{ self .time_unit } ]" )
1568- time_unit = self .time_unit
1536+ if self .time_unit is None :
1537+ time_unit = cast (PDDatetimeUnitOptions , "ns" )
1538+ else :
1539+ time_unit = self .time_unit
1540+
1541+ # Handle edge case that decode_via_dtype=False and
1542+ # decode_via_units=True, and timedeltas were encoded with a
1543+ # dtype attribute. We need to remove the dtype attribute
1544+ # to prevent an error during round tripping.
1545+ if has_timedelta_dtype :
1546+ attrs .pop ("dtype" )
1547+
1548+ dtype = np .dtype (f"timedelta64[{ time_unit } ]" )
15691549 transform = partial (decode_cf_timedelta , units = units , time_unit = time_unit )
15701550 data = lazy_elemwise_func (data , transform , dtype = dtype )
15711551 return Variable (dims , data , attrs , encoding , fastpath = True )
0 commit comments