diff --git a/asv_bench/benchmarks/categoricals.py b/asv_bench/benchmarks/categoricals.py index 7d5b250c7b157..09758a41250ac 100644 --- a/asv_bench/benchmarks/categoricals.py +++ b/asv_bench/benchmarks/categoricals.py @@ -79,7 +79,8 @@ def setup(self): "int": np.random.randint(2**16, size=154), "float": sys.maxsize * np.random.random((38,)), "timestamp": [ - pd.Timestamp(x, unit="s") for x in np.random.randint(2**18, size=578) + pd.Timestamp(x, input_unit="s") + for x in np.random.randint(2**18, size=578) ], } diff --git a/asv_bench/benchmarks/groupby.py b/asv_bench/benchmarks/groupby.py index 7c1d6457eea15..b9e0d9fa8e244 100644 --- a/asv_bench/benchmarks/groupby.py +++ b/asv_bench/benchmarks/groupby.py @@ -1134,7 +1134,7 @@ def setup(self): index = MultiIndex.from_product( [ np.arange(num_groups), - to_timedelta(np.arange(num_timedeltas), unit="s"), + to_timedelta(np.arange(num_timedeltas), input_unit="s"), ], names=["groups", "timedeltas"], ) diff --git a/asv_bench/benchmarks/inference.py b/asv_bench/benchmarks/inference.py index ce3935d2cd0ac..aea0772ff8a62 100644 --- a/asv_bench/benchmarks/inference.py +++ b/asv_bench/benchmarks/inference.py @@ -118,22 +118,22 @@ def setup(self): # speed of int64, uint64 and float64 paths should be comparable def time_nanosec_int64(self): - to_datetime(self.ts_nanosec, unit="ns") + to_datetime(self.ts_nanosec, input_unit="ns") def time_nanosec_uint64(self): - to_datetime(self.ts_nanosec_uint, unit="ns") + to_datetime(self.ts_nanosec_uint, input_unit="ns") def time_nanosec_float64(self): - to_datetime(self.ts_nanosec_float, unit="ns") + to_datetime(self.ts_nanosec_float, input_unit="ns") def time_sec_uint64(self): - to_datetime(self.ts_sec_uint, unit="s") + to_datetime(self.ts_sec_uint, input_unit="s") def time_sec_int64(self): - to_datetime(self.ts_sec, unit="s") + to_datetime(self.ts_sec, input_unit="s") def time_sec_float64(self): - to_datetime(self.ts_sec_float, unit="s") + to_datetime(self.ts_sec_float, input_unit="s") class ToDatetimeYYYYMMDD: @@ -250,10 +250,10 @@ def setup(self, cache): self.dup_string_with_tz = ["2000-02-11 15:00:00-0800"] * N def time_unique_seconds_and_unit(self, cache): - to_datetime(self.unique_numeric_seconds, unit="s", cache=cache) + to_datetime(self.unique_numeric_seconds, input_unit="s", cache=cache) def time_dup_seconds_and_unit(self, cache): - to_datetime(self.dup_numeric_seconds, unit="s", cache=cache) + to_datetime(self.dup_numeric_seconds, input_unit="s", cache=cache) def time_dup_string_dates(self, cache): to_datetime(self.dup_string_dates, cache=cache) @@ -275,7 +275,7 @@ def setup(self): self.str_seconds.append(f"00:00:{i:02d}") def time_convert_int(self): - to_timedelta(self.ints, unit="s") + to_timedelta(self.ints, input_unit="s") def time_convert_string_days(self): to_timedelta(self.str_days) diff --git a/asv_bench/benchmarks/tslibs/timedelta.py b/asv_bench/benchmarks/tslibs/timedelta.py index 9d9689fcfa94b..542b1df6f440a 100644 --- a/asv_bench/benchmarks/tslibs/timedelta.py +++ b/asv_bench/benchmarks/tslibs/timedelta.py @@ -14,13 +14,13 @@ class TimedeltaConstructor: def setup(self): self.nptimedelta64 = np.timedelta64(3600) self.dttimedelta = datetime.timedelta(seconds=3600) - self.td = Timedelta(3600, unit="s") + self.td = Timedelta(3600, input_unit="s") def time_from_int(self): Timedelta(123456789) def time_from_unit(self): - Timedelta(1, unit="D") + Timedelta(1, input_unit="D") def time_from_components(self): Timedelta( diff --git a/doc/source/user_guide/timedeltas.rst b/doc/source/user_guide/timedeltas.rst index 15b6de4b6a054..d516a69c02499 100644 --- a/doc/source/user_guide/timedeltas.rst +++ b/doc/source/user_guide/timedeltas.rst @@ -35,7 +35,7 @@ You can construct a ``Timedelta`` scalar through various arguments, including `I pd.Timedelta(days=1, seconds=1) # integers with a unit - pd.Timedelta(1, unit="D") + pd.Timedelta(1, input_unit="D") # from a datetime.timedelta/np.timedelta64 pd.Timedelta(datetime.timedelta(days=1, seconds=1)) @@ -93,8 +93,8 @@ is numeric: .. ipython:: python - pd.to_timedelta(np.arange(5), unit="s") - pd.to_timedelta(np.arange(5), unit="D") + pd.to_timedelta(np.arange(5), input_unit="s") + pd.to_timedelta(np.arange(5), input_unit="D") .. warning:: If a string or array of strings is passed as an input then the ``unit`` keyword @@ -199,7 +199,7 @@ You can fillna on timedeltas, passing a timedelta to get a particular value. .. ipython:: python y.fillna(pd.Timedelta(0)) - y.fillna(pd.Timedelta(10, unit="s")) + y.fillna(pd.Timedelta(10, input_unit="s")) y.fillna(pd.Timedelta("-1 days, 00:00:05")) You can also negate, multiply and use ``abs`` on ``Timedeltas``: diff --git a/doc/source/user_guide/timeseries.rst b/doc/source/user_guide/timeseries.rst index 6a66c30cffbf0..d6b3cb0064045 100644 --- a/doc/source/user_guide/timeseries.rst +++ b/doc/source/user_guide/timeseries.rst @@ -353,8 +353,8 @@ as timezone-naive timestamps and then localize to the appropriate timezone: .. ipython:: python - pd.to_datetime([1490195805.433, 1490195805.433502912], unit="s") - pd.to_datetime(1490195805433502912, unit="ns") + pd.to_datetime([1490195805.433, 1490195805.433502912], input_unit="s") + pd.to_datetime(1490195805433502912, input_unit="ns") .. seealso:: @@ -389,14 +389,14 @@ of a ``DatetimeIndex``. For example, to use 1960-01-01 as the starting date: .. ipython:: python - pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")) + pd.to_datetime([1, 2, 3], input_unit="D", origin=pd.Timestamp("1960-01-01")) The default is set at ``origin='unix'``, which defaults to ``1970-01-01 00:00:00``. Commonly called 'unix epoch' or POSIX time. .. ipython:: python - pd.to_datetime([1, 2, 3], unit="D") + pd.to_datetime([1, 2, 3], input_unit="D") .. _timeseries.daterange: @@ -2633,7 +2633,7 @@ Transform nonexistent times to ``NaT`` or shift the times. dti dti.tz_localize("Europe/Warsaw", nonexistent="shift_forward") dti.tz_localize("Europe/Warsaw", nonexistent="shift_backward") - dti.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta(1, unit="h")) + dti.tz_localize("Europe/Warsaw", nonexistent=pd.Timedelta(1, input_unit="h")) dti.tz_localize("Europe/Warsaw", nonexistent="NaT") diff --git a/doc/source/whatsnew/v3.0.0.rst b/doc/source/whatsnew/v3.0.0.rst index 98b91bf4a152c..f7c93f70fa36e 100644 --- a/doc/source/whatsnew/v3.0.0.rst +++ b/doc/source/whatsnew/v3.0.0.rst @@ -716,6 +716,7 @@ Other Deprecations - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.unstack` and :meth:`DataFrame.unstack` (:issue:`12189`, :issue:`53868`) - Deprecated allowing ``fill_value`` that cannot be held in the original dtype (excepting NA values for integer and bool dtypes) in :meth:`Series.shift` and :meth:`DataFrame.shift` (:issue:`53802`) - Deprecated slicing on a :class:`Series` or :class:`DataFrame` with a :class:`DatetimeIndex` using a ``datetime.date`` object, explicitly cast to :class:`Timestamp` instead (:issue:`35830`) +- Deprecated the ``unit`` keyword in :meth:`to_datetime` and :meth:`to_timedelta`, use ``input_unit`` instead (:issue:`62097`) .. --------------------------------------------------------------------------- .. _whatsnew_300.prior_deprecations: diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2a080bcb19ae9..abf041df7f9e3 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -117,7 +117,7 @@ def cast_from_unit_vectorized( # but not clear what 2.5 "M" corresponds to, so we will # disallow that case. raise ValueError( - f"Conversion of non-round float with unit={unit} " + f"Conversion of non-round float with input_unit={unit} " "is ambiguous" ) @@ -194,7 +194,7 @@ cdef int64_t cast_from_unit( # but not clear what 2.5 "M" corresponds to, so we will # disallow that case. raise ValueError( - f"Conversion of non-round float with unit={unit} " + f"Conversion of non-round float with input_unit={unit} " "is ambiguous" ) # GH#47266 go through np.datetime64 to avoid weird results e.g. with "Y" diff --git a/pandas/_libs/tslibs/offsets.pyx b/pandas/_libs/tslibs/offsets.pyx index dfc1fd0fe5630..941e3ce579ddc 100644 --- a/pandas/_libs/tslibs/offsets.pyx +++ b/pandas/_libs/tslibs/offsets.pyx @@ -5382,7 +5382,7 @@ cpdef to_offset(freq, bint is_period=False): # For these prefixes, we have something like "3h" or # "2.5min", so we can construct a Timedelta with the # matching unit and get our offset from delta_to_tick - td = Timedelta(1, unit=name) + td = Timedelta(1, input_unit=name) off = delta_to_tick(td) offset = off * float(stride) if n != 0: diff --git a/pandas/_libs/tslibs/timedeltas.pyi b/pandas/_libs/tslibs/timedeltas.pyi index 2200f9ebbbbb5..dd9db5095fd49 100644 --- a/pandas/_libs/tslibs/timedeltas.pyi +++ b/pandas/_libs/tslibs/timedeltas.pyi @@ -95,6 +95,8 @@ class Timedelta(timedelta): def __new__( # type: ignore[misc] cls: type[Self], value=..., + input_unit: str | None = ..., + *, unit: str | None = ..., **kwargs: float | np.integer | np.floating, ) -> Self | NaTType: ... diff --git a/pandas/_libs/tslibs/timedeltas.pyx b/pandas/_libs/tslibs/timedeltas.pyx index c13b0c4cd78a5..83243c8102350 100644 --- a/pandas/_libs/tslibs/timedeltas.pyx +++ b/pandas/_libs/tslibs/timedeltas.pyx @@ -1132,7 +1132,7 @@ cdef class _Timedelta(timedelta): Examples -------- - >>> pd.Timedelta(1, "us").value + >>> pd.Timedelta(1, input_unit="us").value 1000 """ try: @@ -1174,7 +1174,7 @@ cdef class _Timedelta(timedelta): Examples -------- - >>> td = pd.Timedelta(1, "d") + >>> td = pd.Timedelta(1, input_unit="d") >>> td.days 1 @@ -1216,7 +1216,7 @@ cdef class _Timedelta(timedelta): **Using integer input** - >>> td = pd.Timedelta(42, unit='s') + >>> td = pd.Timedelta(42, input_unit='s') >>> td.seconds 42 """ @@ -1256,7 +1256,7 @@ cdef class _Timedelta(timedelta): **Using integer input** - >>> td = pd.Timedelta(42, unit='us') + >>> td = pd.Timedelta(42, input_unit='us') >>> td.microseconds 42 """ @@ -1308,7 +1308,8 @@ cdef class _Timedelta(timedelta): Examples -------- - >>> td = pd.Timedelta(42, unit='us') + >>> td = pd.Timedelta(42, input_unit='us') + >>> td.unit 'ns' """ return npy_unit_to_abbrev(self._creso) @@ -1652,7 +1653,7 @@ cdef class _Timedelta(timedelta): >>> td.asm8 numpy.timedelta64(3005000,'ns') - >>> td = pd.Timedelta(42, unit='ns') + >>> td = pd.Timedelta(42, input_unit='ns') >>> td.asm8 numpy.timedelta64(42,'ns') """ @@ -1696,7 +1697,7 @@ cdef class _Timedelta(timedelta): >>> td.resolution_string 's' - >>> td = pd.Timedelta(36, unit='us') + >>> td = pd.Timedelta(36, input_unit='us') >>> td.resolution_string 'us' """ @@ -1743,7 +1744,7 @@ cdef class _Timedelta(timedelta): **Using integer input** - >>> td = pd.Timedelta(42, unit='ns') + >>> td = pd.Timedelta(42, input_unit='ns') >>> td.nanoseconds 42 """ @@ -1945,7 +1946,7 @@ class Timedelta(_Timedelta): ---------- value : Timedelta, timedelta, np.timedelta64, str, int or float Input value. - unit : str, default 'ns' + input_unit : str, default 'ns' If input is an integer, denote the unit of the input. If input is a float, denote the unit of the integer parts. The decimal parts with resolution lower than 1 nanosecond are ignored. @@ -1965,6 +1966,10 @@ class Timedelta(_Timedelta): Allowing the values `w`, `d`, `MIN`, `MS`, `US` and `NS` to denote units are deprecated in favour of the values `W`, `D`, `min`, `ms`, `us` and `ns`. + unit : str or None, default None + Use input_unit instead. + + .. deprecated:: 3.0.0 **kwargs Available kwargs: {days, seconds, microseconds, @@ -1995,7 +2000,7 @@ class Timedelta(_Timedelta): -------- Here we initialize Timedelta object with both value and unit - >>> td = pd.Timedelta(1, "D") + >>> td = pd.Timedelta(1, input_unit="D") >>> td Timedelta('1 days 00:00:00') @@ -2011,7 +2016,18 @@ class Timedelta(_Timedelta): _req_any_kwargs_new = {"weeks", "days", "hours", "minutes", "seconds", "milliseconds", "microseconds", "nanoseconds"} - def __new__(cls, object value=_no_input, unit=None, **kwargs): + def __new__(cls, object value=_no_input, input_unit=None, *, unit=None, **kwargs): + if unit is not None: + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + from pandas.errors import Pandas4Warning + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit + unsupported_kwargs = set(kwargs) unsupported_kwargs.difference_update(cls._req_any_kwargs_new) if unsupported_kwargs or ( @@ -2066,12 +2082,12 @@ class Timedelta(_Timedelta): ) raise OutOfBoundsTimedelta(msg) from err - disallow_ambiguous_unit(unit) + disallow_ambiguous_unit(input_unit) # GH 30543 if pd.Timedelta already passed, return it # check that only value is passed if isinstance(value, _Timedelta): - # 'unit' is benign in this case, but e.g. days or seconds + # 'input_unit' is benign in this case, but e.g. days or seconds # doesn't make sense here. if len(kwargs): # GH#48898 @@ -2082,8 +2098,10 @@ class Timedelta(_Timedelta): ) return value elif isinstance(value, str): - if unit is not None: - raise ValueError("unit must not be specified if the value is a str") + if input_unit is not None: + raise ValueError( + "input_unit must not be specified if the value is a str" + ) if (len(value) > 0 and value[0] == "P") or ( len(value) > 1 and value[:2] == "-P" ): @@ -2140,8 +2158,8 @@ class Timedelta(_Timedelta): elif is_integer_object(value) or is_float_object(value): # unit=None is de-facto 'ns' - unit = parse_timedelta_unit(unit) - value = convert_to_timedelta64(value, unit) + input_unit = parse_timedelta_unit(input_unit) + value = convert_to_timedelta64(value, input_unit) elif checknull_with_nat_and_na(value): return NaT else: diff --git a/pandas/_libs/tslibs/timestamps.pyi b/pandas/_libs/tslibs/timestamps.pyi index 3195ce9641f2b..92f8e3f35bb46 100644 --- a/pandas/_libs/tslibs/timestamps.pyi +++ b/pandas/_libs/tslibs/timestamps.pyi @@ -51,8 +51,9 @@ class Timestamp(datetime): *, nanosecond: int | None = ..., tz: _TimeZones = ..., - unit: str | int | None = ..., + input_unit: str | None = ..., fold: int | None = ..., + unit: str | None = ..., ) -> Self | NaTType: ... @classmethod def _from_value_and_reso( diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 2f0c5fa9ef18e..bbc7b1d6b134b 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1804,18 +1804,22 @@ class Timestamp(_Timestamp): Value of nanosecond. tz : str, zoneinfo.ZoneInfo, pytz.timezone, dateutil.tz.tzfile or None Time zone for time which Timestamp will have. - unit : str - Unit used for conversion if ts_input is of type int or float. The - valid values are 'W', 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For - example, 's' means seconds and 'ms' means milliseconds. + unit : str or None, default None + Use input_unit instead. - For float inputs, the result will be stored in nanoseconds, and - the unit attribute will be set as ``'ns'``. + .. deprecated:: 3.0.0 fold : {0, 1}, default None, keyword-only Due to daylight saving time, one wall clock time can occur twice when shifting from summer to winter time; fold describes whether the datetime-like corresponds to the first (0) or the second time (1) the wall clock hits the ambiguous time. + input_unit : str + Unit used for conversion if ts_input is of type int or float. The + valid values are 'W', 'D', 'h', 'm', 's', 'ms', 'us', and 'ns'. For + example, 's' means seconds and 'ms' means milliseconds. + + For float inputs, the result will be stored in nanoseconds, and + the unit attribute will be set as ``'ns'``. See Also -------- @@ -1842,18 +1846,18 @@ class Timestamp(_Timestamp): This converts a float representing a Unix epoch in units of seconds - >>> pd.Timestamp(1513393355.5, unit='s') + >>> pd.Timestamp(1513393355.5, input_unit='s') Timestamp('2017-12-16 03:02:35.500000') This converts an int representing a Unix-epoch in units of weeks - >>> pd.Timestamp(1535, unit='W') + >>> pd.Timestamp(1535, input_unit='W') Timestamp('1999-06-03 00:00:00') This converts an int representing a Unix-epoch in units of seconds and for a particular timezone - >>> pd.Timestamp(1513393355, unit='s', tz='US/Pacific') + >>> pd.Timestamp(1513393355, input_unit='s', tz='US/Pacific') Timestamp('2017-12-15 19:02:35-0800', tz='US/Pacific') Using the other two forms that mimic the API for ``datetime.datetime``: @@ -2586,6 +2590,7 @@ class Timestamp(_Timestamp): tz=_no_input, unit=None, fold=None, + input_unit=None, ): # The parameter list folds together legacy parameter names (the first # four) and positional and keyword parameter names from pydatetime. @@ -2612,6 +2617,17 @@ class Timestamp(_Timestamp): _TSObject ts tzinfo_type tzobj + if unit is not None: + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + from pandas.errors import Pandas4Warning + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit + _date_attributes = [year, month, day, hour, minute, second, microsecond, nanosecond] @@ -2659,7 +2675,7 @@ class Timestamp(_Timestamp): # checking verbosely, because cython doesn't optimize # list comprehensions (as of cython 0.29.x) if (isinstance(ts_input, _Timestamp) and - tz is None and unit is None and year is None and + tz is None and input_unit is None and year is None and month is None and day is None and hour is None and minute is None and second is None and microsecond is None and nanosecond is None and @@ -2701,7 +2717,7 @@ class Timestamp(_Timestamp): # microsecond[, tzinfo]]]]]) ts_input = datetime(ts_input, year, month, day or 0, hour or 0, minute or 0, second or 0, fold=fold or 0) - unit = None + input_unit = None if getattr(ts_input, "tzinfo", None) is not None and tz is not None: raise ValueError("Cannot pass a datetime or Timestamp with tzinfo with " @@ -2714,7 +2730,7 @@ class Timestamp(_Timestamp): elif not (999 >= nanosecond >= 0): raise ValueError("nanosecond must be in 0..999") - ts = convert_to_tsobject(ts_input, tzobj, unit, 0, 0, nanosecond) + ts = convert_to_tsobject(ts_input, tzobj, input_unit, 0, 0, nanosecond) if ts.value == NPY_NAT: return NaT @@ -3078,7 +3094,7 @@ timedelta}, default 'raise' Examples -------- - >>> ts = pd.Timestamp(1584226800, unit='s', tz='Europe/Stockholm') + >>> ts = pd.Timestamp(1584226800, input_unit='s', tz='Europe/Stockholm') >>> ts.tz zoneinfo.ZoneInfo(key='Europe/Stockholm') """ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index b8dd44a58e8ec..c1514ac27d9c6 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -608,7 +608,9 @@ def _box_pa_array( # Workaround https://github.com/apache/arrow/issues/37291 from pandas.core.tools.timedeltas import to_timedelta - value = to_timedelta(value, unit=pa_type.unit).as_unit(pa_type.unit) + value = to_timedelta(value, input_unit=pa_type.unit).as_unit( + pa_type.unit + ) value = value.to_numpy() if pa_type is not None and pa.types.is_timestamp(pa_type): @@ -786,7 +788,13 @@ def __getitem__(self, item: PositionalIndexer): return self._from_pyarrow_array(value) else: pa_type = self._pa_array.type - scalar = value.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + scalar = value.as_py() if scalar is None: return self._dtype.na_value elif pa.types.is_timestamp(pa_type) and pa_type.unit != "ns": @@ -807,16 +815,22 @@ def __iter__(self) -> Iterator[Any]: pa_type = self._pa_array.type box_timestamp = pa.types.is_timestamp(pa_type) and pa_type.unit != "ns" box_timedelta = pa.types.is_duration(pa_type) and pa_type.unit != "ns" - for value in self._pa_array: - val = value.as_py() - if val is None: - yield na_value - elif box_timestamp: - yield Timestamp(val).as_unit(pa_type.unit) - elif box_timedelta: - yield Timedelta(val).as_unit(pa_type.unit) - else: - yield val + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + for value in self._pa_array: + val = value.as_py() + if val is None: + yield na_value + elif box_timestamp: + yield Timestamp(val).as_unit(pa_type.unit) + elif box_timedelta: + yield Timedelta(val).as_unit(pa_type.unit) + else: + yield val def __arrow_array__(self, type=None): """Convert myself to a pyarrow ChunkedArray.""" @@ -2118,7 +2132,14 @@ def _reduce_calc( if keepdims: if isinstance(pa_result, pa.Scalar): - result = pa.array([pa_result.as_py()], type=pa_result.type) + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + item = pa_result.as_py() + result = pa.array([item], type=pa_result.type) else: result = pa.array( [pa_result], @@ -2129,7 +2150,13 @@ def _reduce_calc( if pc.is_null(pa_result).as_py(): return self.dtype.na_value elif isinstance(pa_result, pa.Scalar): - return pa_result.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + return pa_result.as_py() else: return pa_result @@ -2203,7 +2230,13 @@ def __setitem__(self, key, value) -> None: f"index {key} is out of bounds for axis 0 with size {n}" ) if isinstance(value, pa.Scalar): - value = value.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + value = value.as_py() elif is_list_like(value): raise ValueError("Length of indexer and values mismatch") chunks = [ @@ -2504,7 +2537,13 @@ def _to_numpy_and_type(value) -> tuple[np.ndarray, pa.DataType | None]: pa_type = value.type elif isinstance(value, pa.Scalar): pa_type = value.type - value = value.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + value = value.as_py() else: pa_type = None return np.array(value, dtype=object), pa_type @@ -2553,7 +2592,13 @@ def _replace_with_mask( if isinstance(replacements, pa.Array): replacements = np.array(replacements, dtype=object) elif isinstance(replacements, pa.Scalar): - replacements = replacements.as_py() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + replacements = replacements.as_py() result = np.array(values, dtype=object) result[mask] = replacements @@ -2878,7 +2923,13 @@ def _dt_nanoseconds(self) -> Self: ) def _dt_to_pytimedelta(self) -> np.ndarray: - data = self._pa_array.to_pylist() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + data = self._pa_array.to_pylist() if self._dtype.pyarrow_dtype.unit == "ns": data = [None if ts is None else ts.to_pytimedelta() for ts in data] return np.array(data, dtype=object) @@ -3141,7 +3192,13 @@ def _dt_to_pydatetime(self) -> Series: f"to_pydatetime cannot be called with {self.dtype.pyarrow_dtype} type. " "Convert to pyarrow timestamp type." ) - data = self._pa_array.to_pylist() + with warnings.catch_warnings(): + warnings.filterwarnings( + "ignore", + "The 'unit' keyword is deprecated", + Pandas4Warning, + ) + data = self._pa_array.to_pylist() if self._dtype.pyarrow_dtype.unit == "ns": data = [None if ts is None else ts.to_pydatetime(warn=False) for ts in data] return Series(data, dtype=object) diff --git a/pandas/core/arrays/datetimelike.py b/pandas/core/arrays/datetimelike.py index c68b329b00968..04d8b0ea263e3 100644 --- a/pandas/core/arrays/datetimelike.py +++ b/pandas/core/arrays/datetimelike.py @@ -1642,7 +1642,7 @@ def mean(self, *, skipna: bool = True, axis: AxisInt | None = 0): For :class:`pandas.TimedeltaIndex`: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None) diff --git a/pandas/core/arrays/period.py b/pandas/core/arrays/period.py index 180080da4cd00..1a8d905e64c4a 100644 --- a/pandas/core/arrays/period.py +++ b/pandas/core/arrays/period.py @@ -807,10 +807,10 @@ def to_timestamp(self, freq=None, how: str = "start") -> DatetimeArray: if end: if freq == "B" or self.freq == "B": # roll forward to ensure we land on B date - adjust = Timedelta(1, "D") - Timedelta(1, "ns") + adjust = Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") return self.to_timestamp(how="start") + adjust else: - adjust = Timedelta(1, "ns") + adjust = Timedelta(1, input_unit="ns") return (self + self.freq).to_timestamp(how="start") - adjust if freq is None: diff --git a/pandas/core/arrays/timedeltas.py b/pandas/core/arrays/timedeltas.py index 64c2e1779aba7..3b00111d5c362 100644 --- a/pandas/core/arrays/timedeltas.py +++ b/pandas/core/arrays/timedeltas.py @@ -779,7 +779,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: -------- **Series** - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="D")) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), input_unit="D")) >>> s 0 0 days 1 1 days @@ -798,7 +798,7 @@ def total_seconds(self) -> npt.NDArray[np.float64]: **TimedeltaIndex** - >>> idx = pd.to_timedelta(np.arange(5), unit="D") + >>> idx = pd.to_timedelta(np.arange(5), input_unit="D") >>> idx TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) @@ -832,7 +832,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: Examples -------- - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit="D") + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit="D") >>> tdelta_idx TimedeltaIndex(['1 days', '2 days', '3 days'], dtype='timedelta64[ns]', freq=None) @@ -863,7 +863,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='D')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='D')) >>> ser 0 1 days 1 2 days @@ -898,7 +898,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='s')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='s')) >>> ser 0 0 days 00:00:01 1 0 days 00:00:02 @@ -912,7 +912,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: For TimedeltaIndex: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='s') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit='s') >>> tdelta_idx TimedeltaIndex(['0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03'], dtype='timedelta64[ns]', freq=None) @@ -938,7 +938,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='us')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='us')) >>> ser 0 0 days 00:00:00.000001 1 0 days 00:00:00.000002 @@ -952,7 +952,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: For TimedeltaIndex: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='us') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit='us') >>> tdelta_idx TimedeltaIndex(['0 days 00:00:00.000001', '0 days 00:00:00.000002', '0 days 00:00:00.000003'], @@ -978,7 +978,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: -------- For Series: - >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], unit='ns')) + >>> ser = pd.Series(pd.to_timedelta([1, 2, 3], input_unit='ns')) >>> ser 0 0 days 00:00:00.000000001 1 0 days 00:00:00.000000002 @@ -992,7 +992,7 @@ def to_pytimedelta(self) -> npt.NDArray[np.object_]: For TimedeltaIndex: - >>> tdelta_idx = pd.to_timedelta([1, 2, 3], unit='ns') + >>> tdelta_idx = pd.to_timedelta([1, 2, 3], input_unit='ns') >>> tdelta_idx TimedeltaIndex(['0 days 00:00:00.000000001', '0 days 00:00:00.000000002', '0 days 00:00:00.000000003'], diff --git a/pandas/core/computation/pytables.py b/pandas/core/computation/pytables.py index d176beb6cde00..ce38b2ff0f540 100644 --- a/pandas/core/computation/pytables.py +++ b/pandas/core/computation/pytables.py @@ -228,7 +228,7 @@ def stringify(value): if isinstance(conv_val, str): conv_val = Timedelta(conv_val) else: - conv_val = Timedelta(conv_val, unit="s") + conv_val = Timedelta(conv_val, input_unit="s") conv_val = conv_val.as_unit("ns")._value return TermValue(int(conv_val), conv_val, kind) elif meta == "category": diff --git a/pandas/core/indexes/accessors.py b/pandas/core/indexes/accessors.py index b9a0af6b0bc28..b343dfe23738b 100644 --- a/pandas/core/indexes/accessors.py +++ b/pandas/core/indexes/accessors.py @@ -482,7 +482,7 @@ def to_pytimedelta(self) -> np.ndarray: Examples -------- - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="D")) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), input_unit="D")) >>> s 0 0 days 1 1 days @@ -528,7 +528,7 @@ def components(self) -> DataFrame: Examples -------- - >>> s = pd.Series(pd.to_timedelta(np.arange(5), unit="s")) + >>> s = pd.Series(pd.to_timedelta(np.arange(5), input_unit="s")) >>> s 0 0 days 00:00:00 1 0 days 00:00:01 diff --git a/pandas/core/indexes/datetimelike.py b/pandas/core/indexes/datetimelike.py index 7e6461f0fab5e..0f472a4ea243e 100644 --- a/pandas/core/indexes/datetimelike.py +++ b/pandas/core/indexes/datetimelike.py @@ -549,7 +549,7 @@ def _wrap_range_setop(self, other, res_i8) -> Self: new_freq = self.freq elif isinstance(res_i8, RangeIndex): new_freq = to_offset( - Timedelta(res_i8.step, unit=self.unit).as_unit(self.unit) + Timedelta(res_i8.step, input_unit=self.unit).as_unit(self.unit) ) # TODO(GH#41493): we cannot just do diff --git a/pandas/core/indexes/datetimes.py b/pandas/core/indexes/datetimes.py index f64152278a648..e7cf794f4d68b 100644 --- a/pandas/core/indexes/datetimes.py +++ b/pandas/core/indexes/datetimes.py @@ -1031,6 +1031,7 @@ def date_range( '2817-01-01', '2917-01-01'], dtype='datetime64[s]', freq='100YS-JAN') """ + # assert not kwargs, kwargs if freq is None and com.any_none(periods, start, end): freq = "D" diff --git a/pandas/core/indexes/timedeltas.py b/pandas/core/indexes/timedeltas.py index 84b5d9d262740..7a448bc64c4c8 100644 --- a/pandas/core/indexes/timedeltas.py +++ b/pandas/core/indexes/timedeltas.py @@ -230,7 +230,9 @@ def _parse_with_reso(self, label: str) -> tuple[Timedelta | NaTType, None]: # t def _parsed_string_to_bounds(self, reso, parsed: Timedelta): # reso is unused, included to match signature of DTI/PI lbound = parsed.round(parsed.resolution_string) - rbound = lbound + to_offset(parsed.resolution_string) - Timedelta(1, "ns") + rbound = ( + lbound + to_offset(parsed.resolution_string) - Timedelta(1, input_unit="ns") + ) return lbound, rbound # ------------------------------------------------------------------- diff --git a/pandas/core/resample.py b/pandas/core/resample.py index e8803b6f30fce..8629b633431e1 100644 --- a/pandas/core/resample.py +++ b/pandas/core/resample.py @@ -2387,8 +2387,10 @@ def _adjust_bin_edges( edges_dti = binner.tz_localize(None) edges_dti = ( edges_dti - + Timedelta(days=1, unit=edges_dti.unit).as_unit(edges_dti.unit) - - Timedelta(1, unit=edges_dti.unit).as_unit(edges_dti.unit) + + Timedelta(days=1, input_unit=edges_dti.unit).as_unit( + edges_dti.unit + ) + - Timedelta(1, input_unit=edges_dti.unit).as_unit(edges_dti.unit) ) bin_edges = edges_dti.tz_localize(binner.tz).asi8 else: @@ -2801,8 +2803,8 @@ def _adjust_dates_anchored( lresult_int = last._value + (freq_value - loffset) else: lresult_int = last._value + freq_value - fresult = Timestamp(fresult_int, unit=unit) - lresult = Timestamp(lresult_int, unit=unit) + fresult = Timestamp(fresult_int, input_unit=unit) + lresult = Timestamp(lresult_int, input_unit=unit) if first_tzinfo is not None: fresult = fresult.tz_localize("UTC").tz_convert(first_tzinfo) if last_tzinfo is not None: diff --git a/pandas/core/reshape/tile.py b/pandas/core/reshape/tile.py index b13da83084e5c..894b8856ed596 100644 --- a/pandas/core/reshape/tile.py +++ b/pandas/core/reshape/tile.py @@ -596,7 +596,7 @@ def _format_labels( # "dtype[Any] | ExtensionDtype"; expected "DatetimeTZDtype | dtype[Any]" unit = dtype_to_unit(bins.dtype) # type: ignore[arg-type] formatter = lambda x: x - adjust = lambda x: x - Timedelta(1, unit=unit).as_unit(unit) + adjust = lambda x: x - Timedelta(1, input_unit=unit).as_unit(unit) else: precision = _infer_precision(precision, bins) formatter = lambda x: _round_frac(x, precision) diff --git a/pandas/core/tools/datetimes.py b/pandas/core/tools/datetimes.py index f353a3f7dc2cb..18c0403eeba01 100644 --- a/pandas/core/tools/datetimes.py +++ b/pandas/core/tools/datetimes.py @@ -41,6 +41,7 @@ ArrayLike, DateTimeErrorChoices, ) +from pandas.errors import Pandas4Warning from pandas.util._decorators import set_module from pandas.util._exceptions import find_stack_level @@ -322,7 +323,7 @@ def _convert_listlike_datetimes( format: str | None, name: Hashable | None = None, utc: bool = False, - unit: str | None = None, + input_unit: str | None = None, errors: DateTimeErrorChoices = "raise", dayfirst: bool | None = None, yearfirst: bool | None = None, @@ -340,7 +341,7 @@ def _convert_listlike_datetimes( None or string for the Index name utc : bool Whether to convert/localize timestamps to UTC. - unit : str + input_unit : str None or string of the frequency of the passed data errors : str error handing behaviors from to_datetime, 'raise', 'coerce' @@ -406,10 +407,10 @@ def _convert_listlike_datetimes( return arg - elif unit is not None: + elif input_unit is not None: if format is not None: - raise ValueError("cannot specify both format and unit") - return _to_datetime_with_unit(arg, unit, name, utc, errors) + raise ValueError("cannot specify both format and input_unit") + return _to_datetime_with_unit(arg, input_unit, name, utc, errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, datetime, list, tuple, 1-d array, or Series" @@ -481,16 +482,16 @@ def _array_strptime_with_fallback( return Index(result, dtype=result.dtype, name=name) -def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: +def _to_datetime_with_unit(arg, input_unit, name, utc: bool, errors: str) -> Index: """ - to_datetime specalized to the case where a 'unit' is passed. + to_datetime specalized to the case where a 'input_unit' is passed. """ arg = extract_array(arg, extract_numpy=True) # GH#30050 pass an ndarray to tslib.array_to_datetime # because it expects an ndarray argument if isinstance(arg, IntegerArray): - arr = arg.astype(f"datetime64[{unit}]") + arr = arg.astype(f"datetime64[{input_unit}]") tz_parsed = None else: arg = np.asarray(arg) @@ -498,27 +499,27 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: if arg.dtype.kind in "iu": # Note we can't do "f" here because that could induce unwanted # rounding GH#14156, GH#20445 - arr = arg.astype(f"datetime64[{unit}]", copy=False) + arr = arg.astype(f"datetime64[{input_unit}]", copy=False) try: arr = astype_overflowsafe(arr, np.dtype("M8[ns]"), copy=False) except OutOfBoundsDatetime: if errors == "raise": raise arg = arg.astype(object) - return _to_datetime_with_unit(arg, unit, name, utc, errors) + return _to_datetime_with_unit(arg, input_unit, name, utc, errors) tz_parsed = None elif arg.dtype.kind == "f": with np.errstate(over="raise"): try: - arr = cast_from_unit_vectorized(arg, unit=unit) + arr = cast_from_unit_vectorized(arg, unit=input_unit) except OutOfBoundsDatetime as err: if errors != "raise": return _to_datetime_with_unit( - arg.astype(object), unit, name, utc, errors + arg.astype(object), input_unit, name, utc, errors ) raise OutOfBoundsDatetime( - f"cannot convert input with unit '{unit}'" + f"cannot convert input with input_unit '{input_unit}'" ) from err arr = arr.view("M8[ns]") @@ -529,7 +530,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: arg, utc=utc, errors=errors, - unit_for_numerics=unit, + unit_for_numerics=input_unit, creso=cast(int, NpyDatetimeUnit.NPY_FR_ns.value), ) @@ -550,7 +551,7 @@ def _to_datetime_with_unit(arg, unit, name, utc: bool, errors: str) -> Index: return result -def _adjust_to_origin(arg, origin, unit): +def _adjust_to_origin(arg, origin, input_unit): """ Helper function for to_datetime. Adjust input argument to the specified origin @@ -561,8 +562,8 @@ def _adjust_to_origin(arg, origin, unit): date to be adjusted origin : 'julian' or Timestamp origin offset for the arg - unit : str - passed unit from to_datetime, must be 'D' + input_unit : str + passed input_unit from to_datetime, must be 'D' Returns ------- @@ -571,7 +572,7 @@ def _adjust_to_origin(arg, origin, unit): if origin == "julian": original = arg j0 = Timestamp(0).to_julian_date() - if unit != "D": + if input_unit != "D": raise ValueError("unit must be 'D' for origin='julian'") try: arg = arg - j0 @@ -594,12 +595,12 @@ def _adjust_to_origin(arg, origin, unit): ): raise ValueError( f"'{arg}' is not compatible with origin='{origin}'; " - "it must be numeric with a unit specified" + "it must be numeric with a input_unit specified" ) # we are going to offset back to unix / epoch time try: - offset = Timestamp(origin, unit=unit) + offset = Timestamp(origin, input_unit=input_unit) except OutOfBoundsDatetime as err: raise OutOfBoundsDatetime(f"origin {origin} is Out of Bounds") from err except ValueError as err: @@ -613,7 +614,7 @@ def _adjust_to_origin(arg, origin, unit): # convert the offset to the unit of the arg # this should be lossless in terms of precision - ioffset = td_offset // Timedelta(1, unit=unit) + ioffset = td_offset // Timedelta(1, input_unit=input_unit) # scalars & ndarray-like can handle the addition if is_list_like(arg) and not isinstance(arg, (ABCSeries, Index, np.ndarray)): @@ -634,6 +635,8 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., + *, + input_unit: str | None = ..., ) -> Timestamp: ... @@ -649,6 +652,8 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., + *, + input_unit: str | None = ..., ) -> Series: ... @@ -664,6 +669,8 @@ def to_datetime( unit: str | None = ..., origin=..., cache: bool = ..., + *, + input_unit: str | None = ..., ) -> DatetimeIndex: ... @@ -679,6 +686,8 @@ def to_datetime( unit: str | None = None, origin: str = "unix", cache: bool = True, + *, + input_unit: str | None = None, ) -> DatetimeIndex | Series | DatetimeScalar | NaTType: """ Convert argument to datetime. @@ -763,11 +772,10 @@ def to_datetime( string. Cannot be used alongside ``format='ISO8601'`` or ``format='mixed'``. - unit : str, default 'ns' - The unit of the arg (D,s,ms,us,ns) denote the unit, which is an - integer or float number. This will be based off the origin. - Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate - the number of milliseconds to the unix epoch start. + unit : str or None, default None + Use input_unit instead. + + .. deprecated:: 3.0.0 origin : scalar, default 'unix' Define the reference date. The numeric values would be parsed as number of units (defined by `unit`) since this reference date. @@ -787,6 +795,11 @@ def to_datetime( is only used when there are at least 50 values. The presence of out-of-bounds values will render the cache unusable and may slow down parsing. + input_unit : str, default 'ns' + The unit of the arg (D,s,ms,us,ns) denote the unit, which is an + integer or float number. This will be based off the origin. + Example, with ``unit='ms'`` and ``origin='unix'``, this would calculate + the number of milliseconds to the unix epoch start. Returns ------- @@ -885,9 +898,9 @@ def to_datetime( Using a unix epoch time - >>> pd.to_datetime(1490195805, unit="s") + >>> pd.to_datetime(1490195805, input_unit="s") Timestamp('2017-03-22 15:16:45') - >>> pd.to_datetime(1490195805433502912, unit="ns") + >>> pd.to_datetime(1490195805433502912, input_unit="ns") Timestamp('2017-03-22 15:16:45.433502912') .. warning:: For float arg, precision rounding might happen. To prevent @@ -895,7 +908,7 @@ def to_datetime( Using a non-unix epoch origin - >>> pd.to_datetime([1, 2, 3], unit="D", origin=pd.Timestamp("1960-01-01")) + >>> pd.to_datetime([1, 2, 3], input_unit="D", origin=pd.Timestamp("1960-01-01")) DatetimeIndex(['1960-01-02', '1960-01-03', '1960-01-04'], dtype='datetime64[ns]', freq=None) @@ -989,18 +1002,28 @@ def to_datetime( DatetimeIndex(['2018-10-26 12:00:00+00:00', '2020-01-01 18:00:00+00:00'], dtype='datetime64[us, UTC]', freq=None) """ + if unit is not None: + # GH#62097 + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit if exact is not lib.no_default and format in {"mixed", "ISO8601"}: raise ValueError("Cannot use 'exact' when 'format' is 'mixed' or 'ISO8601'") if arg is None: return NaT if origin != "unix": - arg = _adjust_to_origin(arg, origin, unit) + arg = _adjust_to_origin(arg, origin, input_unit) convert_listlike = partial( _convert_listlike_datetimes, utc=utc, - unit=unit, + input_unit=input_unit, dayfirst=dayfirst, yearfirst=yearfirst, errors=errors, @@ -1181,7 +1204,7 @@ def coerce(values): value = unit_rev.get(u) if value is not None and value in arg: try: - values += to_timedelta(coerce(arg[value]), unit=u, errors=errors) + values += to_timedelta(coerce(arg[value]), input_unit=u, errors=errors) except (TypeError, ValueError) as err: raise ValueError( f"cannot assemble the datetimes [{value}]: {err}" diff --git a/pandas/core/tools/timedeltas.py b/pandas/core/tools/timedeltas.py index 2dc5e29308214..61437bd3c168f 100644 --- a/pandas/core/tools/timedeltas.py +++ b/pandas/core/tools/timedeltas.py @@ -9,6 +9,7 @@ Any, overload, ) +import warnings import numpy as np @@ -22,7 +23,9 @@ disallow_ambiguous_unit, parse_timedelta_unit, ) +from pandas.errors import Pandas4Warning from pandas.util._decorators import set_module +from pandas.util._exceptions import find_stack_level from pandas.core.dtypes.common import is_list_like from pandas.core.dtypes.dtypes import ArrowDtype @@ -55,6 +58,8 @@ def to_timedelta( arg: str | float | timedelta, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., + *, + input_unit: UnitChoices | None = ..., ) -> Timedelta: ... @@ -63,6 +68,8 @@ def to_timedelta( arg: Series, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., + *, + input_unit: UnitChoices | None = ..., ) -> Series: ... @@ -71,6 +78,8 @@ def to_timedelta( arg: list | tuple | range | ArrayLike | Index, unit: UnitChoices | None = ..., errors: DateTimeErrorChoices = ..., + *, + input_unit: UnitChoices | None = ..., ) -> TimedeltaIndex: ... @@ -88,6 +97,8 @@ def to_timedelta( | Series, unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", + *, + input_unit: UnitChoices | None = None, ) -> Timedelta | TimedeltaIndex | Series | NaTType | Any: """ Convert argument to timedelta. @@ -106,7 +117,15 @@ def to_timedelta( Strings with units 'M', 'Y' and 'y' do not represent unambiguous timedelta values and will raise an exception. - unit : str, optional + unit : str or None, default None + Use input_unit instead. + + .. deprecated:: 3.0.0 + + errors : {'raise', 'coerce'}, default 'raise' + - If 'raise', then invalid parsing will raise an exception. + - If 'coerce', then invalid parsing will be set as NaT. + input_unit : str, optional Denotes the unit of the arg for numeric `arg`. Defaults to ``"ns"``. Possible values: @@ -126,10 +145,6 @@ def to_timedelta( Units 'H'and 'S' are deprecated and will be removed in a future version. Please use 'h' and 's'. - errors : {'raise', 'coerce'}, default 'raise' - - If 'raise', then invalid parsing will raise an exception. - - If 'coerce', then invalid parsing will be set as NaT. - Returns ------- timedelta @@ -168,17 +183,27 @@ def to_timedelta( Converting numbers by specifying the `unit` keyword argument: - >>> pd.to_timedelta(np.arange(5), unit="s") + >>> pd.to_timedelta(np.arange(5), input_unit="s") TimedeltaIndex(['0 days 00:00:00', '0 days 00:00:01', '0 days 00:00:02', '0 days 00:00:03', '0 days 00:00:04'], dtype='timedelta64[ns]', freq=None) - >>> pd.to_timedelta(np.arange(5), unit="D") + >>> pd.to_timedelta(np.arange(5), input_unit="D") TimedeltaIndex(['0 days', '1 days', '2 days', '3 days', '4 days'], dtype='timedelta64[ns]', freq=None) """ if unit is not None: - unit = parse_timedelta_unit(unit) - disallow_ambiguous_unit(unit) + # GH#62097 + if input_unit is not None: + raise ValueError("Specify only 'input_unit', not 'unit'") + warnings.warn( + "The 'unit' keyword is deprecated. Use 'input_unit' instead.", + Pandas4Warning, + stacklevel=find_stack_level(), + ) + input_unit = unit + if input_unit is not None: + input_unit = parse_timedelta_unit(input_unit) + disallow_ambiguous_unit(input_unit) if errors not in ("raise", "coerce"): raise ValueError("errors must be one of 'raise', or 'coerce'.") @@ -186,10 +211,12 @@ def to_timedelta( if arg is None: return NaT elif isinstance(arg, ABCSeries): - values = _convert_listlike(arg._values, unit=unit, errors=errors) + values = _convert_listlike(arg._values, input_unit=input_unit, errors=errors) return arg._constructor(values, index=arg.index, name=arg.name) elif isinstance(arg, ABCIndex): - return _convert_listlike(arg, unit=unit, errors=errors, name=arg.name) + return _convert_listlike( + arg, input_unit=input_unit, errors=errors, name=arg.name + ) elif isinstance(arg, np.ndarray) and arg.ndim == 0: # extract array scalar and process below # error: Incompatible types in assignment (expression has type "object", @@ -198,27 +225,29 @@ def to_timedelta( # Series]]") [assignment] arg = lib.item_from_zerodim(arg) # type: ignore[assignment] elif is_list_like(arg) and getattr(arg, "ndim", 1) == 1: - return _convert_listlike(arg, unit=unit, errors=errors) + return _convert_listlike(arg, input_unit=input_unit, errors=errors) elif getattr(arg, "ndim", 1) > 1: raise TypeError( "arg must be a string, timedelta, list, tuple, 1-d array, or Series" ) - if isinstance(arg, str) and unit is not None: - raise ValueError("unit must not be specified if the input is/contains a str") + if isinstance(arg, str) and input_unit is not None: + raise ValueError( + "input_unit must not be specified if the input is/contains a str" + ) # ...so it must be a scalar value. Return scalar. - return _coerce_scalar_to_timedelta_type(arg, unit=unit, errors=errors) + return _coerce_scalar_to_timedelta_type(arg, input_unit=input_unit, errors=errors) def _coerce_scalar_to_timedelta_type( - r, unit: UnitChoices | None = "ns", errors: DateTimeErrorChoices = "raise" + r, input_unit: UnitChoices | None = "ns", errors: DateTimeErrorChoices = "raise" ) -> Timedelta | NaTType: """Convert string 'r' to a timedelta object.""" result: Timedelta | NaTType try: - result = Timedelta(r, unit) + result = Timedelta(r, input_unit=input_unit) except ValueError: if errors == "raise": raise @@ -230,7 +259,7 @@ def _coerce_scalar_to_timedelta_type( def _convert_listlike( arg, - unit: UnitChoices | None = None, + input_unit: UnitChoices | None = None, errors: DateTimeErrorChoices = "raise", name: Hashable | None = None, ): @@ -241,7 +270,7 @@ def _convert_listlike( elif isinstance(arg_dtype, ArrowDtype) and arg_dtype.kind == "m": return arg - td64arr = sequence_to_td64ns(arg, unit=unit, errors=errors, copy=False)[0] + td64arr = sequence_to_td64ns(arg, unit=input_unit, errors=errors, copy=False)[0] from pandas import TimedeltaIndex diff --git a/pandas/io/json/_json.py b/pandas/io/json/_json.py index 32e932b70e761..0a022a48753b8 100644 --- a/pandas/io/json/_json.py +++ b/pandas/io/json/_json.py @@ -1314,7 +1314,7 @@ def _try_convert_to_date(self, data: Series) -> Series: date_units = (self.date_unit,) if self.date_unit else self._STAMP_UNITS for date_unit in date_units: try: - return to_datetime(new_data, errors="raise", unit=date_unit) + return to_datetime(new_data, errors="raise", input_unit=date_unit) except (ValueError, OverflowError, TypeError): continue return data diff --git a/pandas/io/sas/sas7bdat.py b/pandas/io/sas/sas7bdat.py index 089576e5680cb..58b77e4676243 100644 --- a/pandas/io/sas/sas7bdat.py +++ b/pandas/io/sas/sas7bdat.py @@ -280,11 +280,11 @@ def _get_properties(self) -> None: x = self._read_float( const.date_created_offset + align1, const.date_created_length ) - self.date_created = epoch + pd.to_timedelta(x, unit="s") + self.date_created = epoch + pd.to_timedelta(x, input_unit="s") x = self._read_float( const.date_modified_offset + align1, const.date_modified_length ) - self.date_modified = epoch + pd.to_timedelta(x, unit="s") + self.date_modified = epoch + pd.to_timedelta(x, input_unit="s") self.header_length = self._read_uint( const.header_size_offset + align1, const.header_size_length diff --git a/pandas/io/sql.py b/pandas/io/sql.py index 18129257af1c9..30346e2c9191e 100644 --- a/pandas/io/sql.py +++ b/pandas/io/sql.py @@ -123,7 +123,7 @@ def _handle_date_column( ): format = "s" if format in ["D", "d", "h", "m", "s", "ms", "us", "ns"]: - return to_datetime(col, errors="coerce", unit=format, utc=utc) + return to_datetime(col, errors="coerce", input_unit=format, utc=utc) elif isinstance(col.dtype, DatetimeTZDtype): # coerce to UTC timezone # GH11216 diff --git a/pandas/tests/apply/test_frame_apply.py b/pandas/tests/apply/test_frame_apply.py index a9afb5dbd11d7..9c9da02f7cf69 100644 --- a/pandas/tests/apply/test_frame_apply.py +++ b/pandas/tests/apply/test_frame_apply.py @@ -179,7 +179,7 @@ def test_apply_mixed_datetimelike(): expected = DataFrame( { "A": date_range("20130101", periods=3), - "B": pd.to_timedelta(np.arange(3), unit="s"), + "B": pd.to_timedelta(np.arange(3), input_unit="s"), } ) result = expected.apply(lambda x: x, axis=1) diff --git a/pandas/tests/arithmetic/test_timedelta64.py b/pandas/tests/arithmetic/test_timedelta64.py index 2361a353f3f8a..8d1024d03fcd0 100644 --- a/pandas/tests/arithmetic/test_timedelta64.py +++ b/pandas/tests/arithmetic/test_timedelta64.py @@ -696,16 +696,16 @@ def test_tdi_add_overflow(self): # preliminary test scalar analogue of vectorized tests below # TODO: Make raised error message more informative and test with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - pd.to_timedelta(106580, "D") + Timestamp("2000") + pd.to_timedelta(106580, input_unit="D") + Timestamp("2000") with pytest.raises(OutOfBoundsDatetime, match="10155196800000000000"): - Timestamp("2000") + pd.to_timedelta(106580, "D") + Timestamp("2000") + pd.to_timedelta(106580, input_unit="D") _NaT = NaT._value + 1 msg = "Overflow in int64 addition" with pytest.raises(OverflowError, match=msg): - pd.to_timedelta([106580], "D") + Timestamp("2000") + pd.to_timedelta([106580], input_unit="D") + Timestamp("2000") with pytest.raises(OverflowError, match=msg): - Timestamp("2000") + pd.to_timedelta([106580], "D") + Timestamp("2000") + pd.to_timedelta([106580], input_unit="D") with pytest.raises(OverflowError, match=msg): pd.to_timedelta([_NaT]) - Timedelta("1 days") with pytest.raises(OverflowError, match=msg): diff --git a/pandas/tests/arrays/test_datetimelike.py b/pandas/tests/arrays/test_datetimelike.py index d1ef29b0bf8a0..82098d7e35634 100644 --- a/pandas/tests/arrays/test_datetimelike.py +++ b/pandas/tests/arrays/test_datetimelike.py @@ -1279,7 +1279,7 @@ def test_to_numpy_extra(arr): "values", [ pd.to_datetime(["2020-01-01", "2020-02-01"]), - pd.to_timedelta([1, 2], unit="D"), + pd.to_timedelta([1, 2], input_unit="D"), PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), ], ) @@ -1310,7 +1310,7 @@ def test_searchsorted_datetimelike_with_listlike(values, klass, as_index): "values", [ pd.to_datetime(["2020-01-01", "2020-02-01"]), - pd.to_timedelta([1, 2], unit="D"), + pd.to_timedelta([1, 2], input_unit="D"), PeriodIndex(["2020-01-01", "2020-02-01"], freq="D"), ], ) diff --git a/pandas/tests/dtypes/cast/test_maybe_box_native.py b/pandas/tests/dtypes/cast/test_maybe_box_native.py index 151586962d517..2a08a34cf5803 100644 --- a/pandas/tests/dtypes/cast/test_maybe_box_native.py +++ b/pandas/tests/dtypes/cast/test_maybe_box_native.py @@ -29,7 +29,7 @@ (np.datetime64("2005-02-25"), Timestamp), (Timestamp("2005-02-25"), Timestamp), (np.timedelta64(1, "D"), Timedelta), - (Timedelta(1, "D"), Timedelta), + (Timedelta(1, input_unit="D"), Timedelta), (Interval(0, 1), Interval), (Period("4Q2005"), Period), ], diff --git a/pandas/tests/dtypes/test_generic.py b/pandas/tests/dtypes/test_generic.py index 2b90886a8d070..400c63cedc15c 100644 --- a/pandas/tests/dtypes/test_generic.py +++ b/pandas/tests/dtypes/test_generic.py @@ -13,7 +13,7 @@ class TestABCClasses: tuples = [[1, 2, 2], ["red", "blue", "red"]] multi_index = pd.MultiIndex.from_arrays(tuples, names=("number", "color")) datetime_index = pd.to_datetime(["2000/1/1", "2010/1/1"]) - timedelta_index = pd.to_timedelta(np.arange(5), unit="s") + timedelta_index = pd.to_timedelta(np.arange(5), input_unit="s") period_index = pd.period_range("2000/1/1", "2010/1/1/", freq="M") categorical = pd.Categorical([1, 2, 3], categories=[2, 3, 1]) categorical_df = pd.DataFrame({"values": [1, 2, 3]}, index=categorical) diff --git a/pandas/tests/extension/test_arrow.py b/pandas/tests/extension/test_arrow.py index d2d65c4b983a7..d3ee7d3b0eadd 100644 --- a/pandas/tests/extension/test_arrow.py +++ b/pandas/tests/extension/test_arrow.py @@ -3038,7 +3038,7 @@ def test_describe_timedelta_data(pa_type): data = pd.Series(range(1, 10), dtype=ArrowDtype(pa_type)) result = data.describe() expected = pd.Series( - [9] + pd.to_timedelta([5, 2, 1, 3, 5, 7, 9], unit=pa_type.unit).tolist(), + [9] + pd.to_timedelta([5, 2, 1, 3, 5, 7, 9], input_unit=pa_type.unit).tolist(), dtype=object, index=["count", "mean", "std", "min", "25%", "50%", "75%", "max"], ) @@ -3053,7 +3053,7 @@ def test_describe_datetime_data(pa_type): expected = pd.Series( [9] + [ - pd.Timestamp(v, tz=pa_type.tz, unit=pa_type.unit) + pd.Timestamp(v, tz=pa_type.tz, input_unit=pa_type.unit) for v in [5, 1, 3, 5, 7, 9] ], dtype=object, @@ -3114,9 +3114,9 @@ def test_from_sequence_temporal(pa_type): val = 3 unit = pa_type.unit if pa.types.is_duration(pa_type): - seq = [pd.Timedelta(val, unit=unit).as_unit(unit)] + seq = [pd.Timedelta(val, input_unit=unit).as_unit(unit)] else: - seq = [pd.Timestamp(val, unit=unit, tz=pa_type.tz).as_unit(unit)] + seq = [pd.Timestamp(val, input_unit=unit, tz=pa_type.tz).as_unit(unit)] result = ArrowExtensionArray._from_sequence(seq, dtype=pa_type) expected = ArrowExtensionArray(pa.array([val], type=pa_type)) @@ -3130,9 +3130,9 @@ def test_setitem_temporal(pa_type): # GH 53171 unit = pa_type.unit if pa.types.is_duration(pa_type): - val = pd.Timedelta(1, unit=unit).as_unit(unit) + val = pd.Timedelta(1, input_unit=unit).as_unit(unit) else: - val = pd.Timestamp(1, unit=unit, tz=pa_type.tz).as_unit(unit) + val = pd.Timestamp(1, input_unit=unit, tz=pa_type.tz).as_unit(unit) arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) @@ -3149,7 +3149,7 @@ def test_arithmetic_temporal(pa_type, request): # GH 53171 arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) unit = pa_type.unit - result = arr - pd.Timedelta(1, unit=unit).as_unit(unit) + result = arr - pd.Timedelta(1, input_unit=unit).as_unit(unit) expected = ArrowExtensionArray(pa.array([0, 1, 2], type=pa_type)) tm.assert_extension_array_equal(result, expected) @@ -3161,9 +3161,9 @@ def test_comparison_temporal(pa_type): # GH 53171 unit = pa_type.unit if pa.types.is_duration(pa_type): - val = pd.Timedelta(1, unit=unit).as_unit(unit) + val = pd.Timedelta(1, input_unit=unit).as_unit(unit) else: - val = pd.Timestamp(1, unit=unit, tz=pa_type.tz).as_unit(unit) + val = pd.Timestamp(1, input_unit=unit, tz=pa_type.tz).as_unit(unit) arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) @@ -3180,10 +3180,10 @@ def test_getitem_temporal(pa_type): arr = ArrowExtensionArray(pa.array([1, 2, 3], type=pa_type)) result = arr[1] if pa.types.is_duration(pa_type): - expected = pd.Timedelta(2, unit=pa_type.unit).as_unit(pa_type.unit) + expected = pd.Timedelta(2, input_unit=pa_type.unit).as_unit(pa_type.unit) assert isinstance(result, pd.Timedelta) else: - expected = pd.Timestamp(2, unit=pa_type.unit, tz=pa_type.tz).as_unit( + expected = pd.Timestamp(2, input_unit=pa_type.unit, tz=pa_type.tz).as_unit( pa_type.unit ) assert isinstance(result, pd.Timestamp) @@ -3200,13 +3200,15 @@ def test_iter_temporal(pa_type): result = list(arr) if pa.types.is_duration(pa_type): expected = [ - pd.Timedelta(1, unit=pa_type.unit).as_unit(pa_type.unit), + pd.Timedelta(1, input_unit=pa_type.unit).as_unit(pa_type.unit), pd.NA, ] assert isinstance(result[0], pd.Timedelta) else: expected = [ - pd.Timestamp(1, unit=pa_type.unit, tz=pa_type.tz).as_unit(pa_type.unit), + pd.Timestamp(1, input_unit=pa_type.unit, tz=pa_type.tz).as_unit( + pa_type.unit + ), pd.NA, ] assert isinstance(result[0], pd.Timestamp) @@ -3232,9 +3234,11 @@ def test_to_numpy_temporal(pa_type, dtype): arr = ArrowExtensionArray(pa.array([1, None], type=pa_type)) result = arr.to_numpy(dtype=dtype) if pa.types.is_duration(pa_type): - value = pd.Timedelta(1, unit=pa_type.unit).as_unit(pa_type.unit) + value = pd.Timedelta(1, input_unit=pa_type.unit).as_unit(pa_type.unit) else: - value = pd.Timestamp(1, unit=pa_type.unit, tz=pa_type.tz).as_unit(pa_type.unit) + value = pd.Timestamp(1, input_unit=pa_type.unit, tz=pa_type.tz).as_unit( + pa_type.unit + ) if dtype == object or (pa.types.is_timestamp(pa_type) and pa_type.tz is not None): if dtype == object: diff --git a/pandas/tests/frame/indexing/test_mask.py b/pandas/tests/frame/indexing/test_mask.py index e4036efeab7ff..b7748722dd9b6 100644 --- a/pandas/tests/frame/indexing/test_mask.py +++ b/pandas/tests/frame/indexing/test_mask.py @@ -125,7 +125,7 @@ def test_mask_stringdtype(frame_or_series): def test_mask_where_dtype_timedelta(): # https://github.com/pandas-dev/pandas/issues/39548 - df = DataFrame([Timedelta(i, unit="D") for i in range(5)]) + df = DataFrame([Timedelta(i, input_unit="D") for i in range(5)]) expected = DataFrame(np.full(5, np.nan, dtype="timedelta64[ns]")) tm.assert_frame_equal(df.mask(df.notna()), expected) @@ -133,7 +133,7 @@ def test_mask_where_dtype_timedelta(): expected = DataFrame( [np.nan, np.nan, np.nan, Timedelta("3 day"), Timedelta("4 day")] ) - tm.assert_frame_equal(df.where(df > Timedelta(2, unit="D")), expected) + tm.assert_frame_equal(df.where(df > Timedelta(2, input_unit="D")), expected) def test_mask_return_dtype(): diff --git a/pandas/tests/frame/indexing/test_setitem.py b/pandas/tests/frame/indexing/test_setitem.py index c0fead4889932..6651be2e5062b 100644 --- a/pandas/tests/frame/indexing/test_setitem.py +++ b/pandas/tests/frame/indexing/test_setitem.py @@ -991,8 +991,8 @@ def test_setitem_frame_keep_ea_dtype(self, any_numeric_ea_dtype): def test_loc_expansion_with_timedelta_type(self): result = DataFrame(columns=list("abc")) result.loc[0] = { - "a": pd.to_timedelta(5, unit="s"), - "b": pd.to_timedelta(72, unit="s"), + "a": pd.to_timedelta(5, input_unit="s"), + "b": pd.to_timedelta(72, input_unit="s"), "c": "23", } expected = DataFrame( diff --git a/pandas/tests/frame/methods/test_astype.py b/pandas/tests/frame/methods/test_astype.py index 9fd1f3133c2f5..dd996d2f3adea 100644 --- a/pandas/tests/frame/methods/test_astype.py +++ b/pandas/tests/frame/methods/test_astype.py @@ -150,7 +150,7 @@ def test_astype_str(self): # see GH#9757 a = Series(date_range("2010-01-04", periods=5)) b = Series(date_range("3/6/2012 00:00", periods=5, tz="US/Eastern")) - c = Series([Timedelta(x, unit="D") for x in range(5)]) + c = Series([Timedelta(x, input_unit="D") for x in range(5)]) d = Series(range(5)) e = Series([0.0, 0.2, 0.4, 0.6, 0.8]) @@ -442,9 +442,9 @@ def test_astype_from_datetimelike_to_object(self, dtype, unit): assert (result.dtypes == object).all() if dtype.startswith("M8"): - assert result.iloc[0, 0] == Timestamp(1, unit=unit) + assert result.iloc[0, 0] == Timestamp(1, input_unit=unit) else: - assert result.iloc[0, 0] == Timedelta(1, unit=unit) + assert result.iloc[0, 0] == Timedelta(1, input_unit=unit) @pytest.mark.parametrize("dtype", ["M8", "m8"]) @pytest.mark.parametrize("unit", ["ns", "us", "ms", "s", "h", "m", "D"]) diff --git a/pandas/tests/frame/methods/test_convert_dtypes.py b/pandas/tests/frame/methods/test_convert_dtypes.py index e90786a43c483..2ce7bd6aa2544 100644 --- a/pandas/tests/frame/methods/test_convert_dtypes.py +++ b/pandas/tests/frame/methods/test_convert_dtypes.py @@ -220,7 +220,7 @@ def test_convert_dtype_pyarrow_timezone_preserve(self): df = pd.DataFrame( { "timestamps": pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), + pd.to_datetime(range(5), utc=True, input_unit="h"), dtype="timestamp[ns, tz=UTC][pyarrow]", ) } diff --git a/pandas/tests/frame/methods/test_isin.py b/pandas/tests/frame/methods/test_isin.py index b4511aad27a93..9679e1fdcdd62 100644 --- a/pandas/tests/frame/methods/test_isin.py +++ b/pandas/tests/frame/methods/test_isin.py @@ -178,7 +178,9 @@ def test_isin_multiIndex(self): def test_isin_empty_datetimelike(self): # GH#15473 df1_ts = DataFrame({"date": pd.to_datetime(["2014-01-01", "2014-01-02"])}) - df1_td = DataFrame({"date": [pd.Timedelta(1, "s"), pd.Timedelta(2, "s")]}) + df1_td = DataFrame( + {"date": [pd.Timedelta(1, input_unit="s"), pd.Timedelta(2, input_unit="s")]} + ) df2 = DataFrame({"date": []}) df3 = DataFrame() diff --git a/pandas/tests/frame/methods/test_reset_index.py b/pandas/tests/frame/methods/test_reset_index.py index 80227c0462329..676f42403e46a 100644 --- a/pandas/tests/frame/methods/test_reset_index.py +++ b/pandas/tests/frame/methods/test_reset_index.py @@ -600,8 +600,12 @@ def test_reset_index_with_drop( {"a": [pd.NaT, Timestamp("2020-01-01")], "b": [1, 2], "x": [11, 12]}, ), ( - [(pd.NaT, 1), (pd.Timedelta(123, "D"), 2)], - {"a": [pd.NaT, pd.Timedelta(123, "D")], "b": [1, 2], "x": [11, 12]}, + [(pd.NaT, 1), (pd.Timedelta(123, input_unit="D"), 2)], + { + "a": [pd.NaT, pd.Timedelta(123, input_unit="D")], + "b": [1, 2], + "x": [11, 12], + }, ), ], ) diff --git a/pandas/tests/frame/methods/test_to_timestamp.py b/pandas/tests/frame/methods/test_to_timestamp.py index 0e7e1d595d6be..0e59a1c6f39ae 100644 --- a/pandas/tests/frame/methods/test_to_timestamp.py +++ b/pandas/tests/frame/methods/test_to_timestamp.py @@ -37,7 +37,9 @@ def test_to_timestamp(self, frame_or_series): obj = tm.get_obj(obj, frame_or_series) exp_index = date_range("1/1/2001", end="12/31/2009", freq="YE-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") + ) result = obj.to_timestamp("D", "end") tm.assert_index_equal(result.index, exp_index) tm.assert_numpy_array_equal(result.values, obj.values) @@ -54,19 +56,25 @@ def test_to_timestamp(self, frame_or_series): delta = timedelta(hours=23) result = obj.to_timestamp("H", "end") exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="h") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) delta = timedelta(hours=23, minutes=59) result = obj.to_timestamp("T", "end") exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="m") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) result = obj.to_timestamp("S", "end") delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) def test_to_timestamp_columns(self): @@ -83,7 +91,9 @@ def test_to_timestamp_columns(self): df = df.T exp_index = date_range("1/1/2001", end="12/31/2009", freq="YE-DEC") - exp_index = exp_index + Timedelta(1, "D") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") + ) result = df.to_timestamp("D", "end", axis=1) tm.assert_index_equal(result.columns, exp_index) tm.assert_numpy_array_equal(result.values, df.values) @@ -95,19 +105,25 @@ def test_to_timestamp_columns(self): delta = timedelta(hours=23) result = df.to_timestamp("H", "end", axis=1) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "h") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="h") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.columns, exp_index) delta = timedelta(hours=23, minutes=59) result = df.to_timestamp("min", "end", axis=1) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "m") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="m") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.columns, exp_index) result = df.to_timestamp("S", "end", axis=1) delta = timedelta(hours=23, minutes=59, seconds=59) exp_index = _get_with_delta(delta) - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.columns, exp_index) result1 = df.to_timestamp("5min", axis=1) @@ -139,7 +155,9 @@ def test_to_timestamp_hourly(self, frame_or_series): exp_index = date_range("1/1/2001 00:59:59", end="1/2/2001 00:59:59", freq="h") result = obj.to_timestamp(how="end") - exp_index = exp_index + Timedelta(1, "s") - Timedelta(1, "ns") + exp_index = ( + exp_index + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result.index, exp_index) if frame_or_series is Series: assert result.name == "foo" diff --git a/pandas/tests/frame/test_constructors.py b/pandas/tests/frame/test_constructors.py index 264011edb65b5..ccde72836f7fa 100644 --- a/pandas/tests/frame/test_constructors.py +++ b/pandas/tests/frame/test_constructors.py @@ -892,8 +892,8 @@ def create_data(constructor): [ (lambda x: np.timedelta64(x, "D"), "timedelta64"), (lambda x: timedelta(days=x), "pytimedelta"), - (lambda x: Timedelta(x, "D"), "Timedelta[ns]"), - (lambda x: Timedelta(x, "D").as_unit("s"), "Timedelta[s]"), + (lambda x: Timedelta(x, input_unit="D"), "Timedelta[ns]"), + (lambda x: Timedelta(x, input_unit="D").as_unit("s"), "Timedelta[s]"), ], ) def test_constructor_dict_timedelta64_index(self, klass, name): @@ -909,7 +909,7 @@ def test_constructor_dict_timedelta64_index(self, klass, name): {0: None, 1: None, 2: 4, 3: None}, {0: None, 1: None, 2: None, 3: 6}, ], - index=[Timedelta(td, "D") for td in td_as_int], + index=[Timedelta(td, input_unit="D") for td in td_as_int], ) result = DataFrame(data) @@ -2079,8 +2079,8 @@ def test_constructor_timedelta_non_ns(self, order, unit): exp_dtype = np.dtype(f"m8[{exp_unit}]") expected = DataFrame( [ - [Timedelta(1, "D"), Timedelta(2, "D")], - [Timedelta(4, "D"), Timedelta(5, "D")], + [Timedelta(1, input_unit="D"), Timedelta(2, input_unit="D")], + [Timedelta(4, input_unit="D"), Timedelta(5, input_unit="D")], ], dtype=exp_dtype, ) diff --git a/pandas/tests/frame/test_reductions.py b/pandas/tests/frame/test_reductions.py index 7c4ce4c67f13d..638cc497f34b8 100644 --- a/pandas/tests/frame/test_reductions.py +++ b/pandas/tests/frame/test_reductions.py @@ -1682,7 +1682,9 @@ def test_frame_any_with_timedelta(self): df = DataFrame( { "a": Series([0, 0]), - "t": Series([to_timedelta(0, "s"), to_timedelta(1, "ms")]), + "t": Series( + [to_timedelta(0, input_unit="s"), to_timedelta(1, input_unit="ms")] + ), } ) diff --git a/pandas/tests/groupby/methods/test_nth.py b/pandas/tests/groupby/methods/test_nth.py index d9c8706ec9202..ccf11b4997627 100644 --- a/pandas/tests/groupby/methods/test_nth.py +++ b/pandas/tests/groupby/methods/test_nth.py @@ -443,7 +443,7 @@ def test_first_last_tz_multi_column(method, ts, alpha, unit): pd.array([True, False], dtype="boolean"), pd.array([1, 2], dtype="Int64"), pd.to_datetime(["2020-01-01", "2020-02-01"]), - pd.to_timedelta([1, 2], unit="D"), + pd.to_timedelta([1, 2], input_unit="D"), ], ) @pytest.mark.parametrize("function", ["first", "last", "min", "max"]) diff --git a/pandas/tests/groupby/methods/test_quantile.py b/pandas/tests/groupby/methods/test_quantile.py index 815513fe96009..496f73644d07b 100644 --- a/pandas/tests/groupby/methods/test_quantile.py +++ b/pandas/tests/groupby/methods/test_quantile.py @@ -364,7 +364,7 @@ def test_groupby_quantile_allNA_column(dtype): def test_groupby_timedelta_quantile(): # GH: 29485 df = DataFrame( - {"value": pd.to_timedelta(np.arange(4), unit="s"), "group": [1, 1, 2, 2]} + {"value": pd.to_timedelta(np.arange(4), input_unit="s"), "group": [1, 1, 2, 2]} ) result = df.groupby("group").quantile(0.99) expected = DataFrame( diff --git a/pandas/tests/groupby/methods/test_value_counts.py b/pandas/tests/groupby/methods/test_value_counts.py index 536e94483c36f..4c57f8035716b 100644 --- a/pandas/tests/groupby/methods/test_value_counts.py +++ b/pandas/tests/groupby/methods/test_value_counts.py @@ -132,7 +132,7 @@ def test_series_groupby_value_counts_with_grouper(utc): } ).drop([3]) - df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s") + df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, input_unit="s") dfg = df.groupby(Grouper(freq="1D", key="Datetime")) # have to sort on index because of unstable sort on values xref GH9212 @@ -1123,7 +1123,9 @@ def test_value_counts_time_grouper(utc, unit): } ).drop([3]) - df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, unit="s").dt.as_unit(unit) + df["Datetime"] = to_datetime(df["Timestamp"], utc=utc, input_unit="s").dt.as_unit( + unit + ) gb = df.groupby(Grouper(freq="1D", key="Datetime")) result = gb.value_counts() dates = to_datetime( diff --git a/pandas/tests/groupby/test_apply.py b/pandas/tests/groupby/test_apply.py index 635393e41bd9d..2298f98d52c09 100644 --- a/pandas/tests/groupby/test_apply.py +++ b/pandas/tests/groupby/test_apply.py @@ -1273,14 +1273,14 @@ def test_apply_empty_string_nan_coerce_bug(): { "a": [1, 1, 2, 2], "b": ["", "", "", ""], - "c": pd.to_datetime([1, 2, 3, 4], unit="s"), + "c": pd.to_datetime([1, 2, 3, 4], input_unit="s"), } ) .groupby(["a", "b"]) .apply(lambda df: df.iloc[-1]) ) expected = DataFrame( - [[pd.to_datetime(2, unit="s")], [pd.to_datetime(4, unit="s")]], + [[pd.to_datetime(2, input_unit="s")], [pd.to_datetime(4, input_unit="s")]], columns=["c"], index=MultiIndex.from_tuples([(1, ""), (2, "")], names=["a", "b"]), ) diff --git a/pandas/tests/groupby/test_counting.py b/pandas/tests/groupby/test_counting.py index 679f7eb7f7f11..e4b252897577e 100644 --- a/pandas/tests/groupby/test_counting.py +++ b/pandas/tests/groupby/test_counting.py @@ -208,7 +208,7 @@ def test_ngroup_respects_groupby_order(self, sort): [Timestamp(f"2016-05-{i:02d} 20:09:25+00:00") for i in range(1, 4)], [Timestamp(f"2016-05-{i:02d} 20:09:25") for i in range(1, 4)], [Timestamp(f"2016-05-{i:02d} 20:09:25", tz="UTC") for i in range(1, 4)], - [Timedelta(x, unit="h") for x in range(1, 4)], + [Timedelta(x, input_unit="h") for x in range(1, 4)], [Period(freq="2W", year=2017, month=x) for x in range(1, 4)], ], ) diff --git a/pandas/tests/groupby/test_groupby.py b/pandas/tests/groupby/test_groupby.py index 4955b1fe0da54..023b21fb13467 100644 --- a/pandas/tests/groupby/test_groupby.py +++ b/pandas/tests/groupby/test_groupby.py @@ -2339,15 +2339,15 @@ def test_groupby_aggregation_multi_non_numeric_dtype(): df = DataFrame( { "x": [1, 0, 1, 1, 0], - "y": [Timedelta(i, "days") for i in range(1, 6)], - "z": [Timedelta(i * 10, "days") for i in range(1, 6)], + "y": [Timedelta(i, input_unit="days") for i in range(1, 6)], + "z": [Timedelta(i * 10, input_unit="days") for i in range(1, 6)], } ) expected = DataFrame( { - "y": [Timedelta(i, "days") for i in range(7, 9)], - "z": [Timedelta(i * 10, "days") for i in range(7, 9)], + "y": [Timedelta(i, input_unit="days") for i in range(7, 9)], + "z": [Timedelta(i * 10, input_unit="days") for i in range(7, 9)], }, index=Index([0, 1], dtype="int64", name="x"), ) @@ -2362,13 +2362,16 @@ def test_groupby_aggregation_numeric_with_non_numeric_dtype(): df = DataFrame( { "x": [1, 0, 1, 1, 0], - "y": [Timedelta(i, "days") for i in range(1, 6)], + "y": [Timedelta(i, input_unit="days") for i in range(1, 6)], "z": list(range(1, 6)), } ) expected = DataFrame( - {"y": [Timedelta(7, "days"), Timedelta(8, "days")], "z": [7, 8]}, + { + "y": [Timedelta(7, input_unit="days"), Timedelta(8, input_unit="days")], + "z": [7, 8], + }, index=Index([0, 1], dtype="int64", name="x"), ) diff --git a/pandas/tests/indexes/datetimes/test_constructors.py b/pandas/tests/indexes/datetimes/test_constructors.py index c418b2a18008b..e18b0c1ad26c9 100644 --- a/pandas/tests/indexes/datetimes/test_constructors.py +++ b/pandas/tests/indexes/datetimes/test_constructors.py @@ -1018,7 +1018,7 @@ def test_dti_constructor_with_non_nano_dtype(self, tz): pointwise = [ vals[0].tz_localize(tz), Timestamp(vals[1], tz=tz), - to_datetime(vals[2], unit="us", utc=True).tz_convert(tz), + to_datetime(vals[2], input_unit="us", utc=True).tz_convert(tz), ] exp_vals = [x.as_unit("us").asm8 for x in pointwise] exp_arr = np.array(exp_vals, dtype="M8[us]") diff --git a/pandas/tests/indexes/period/methods/test_to_timestamp.py b/pandas/tests/indexes/period/methods/test_to_timestamp.py index 4fe429ce71ee4..3b4e0d18d21ab 100644 --- a/pandas/tests/indexes/period/methods/test_to_timestamp.py +++ b/pandas/tests/indexes/period/methods/test_to_timestamp.py @@ -109,7 +109,9 @@ def test_to_timestamp_pi_mult(self): expected = DatetimeIndex( ["2011-02-28", "NaT", "2011-03-31"], dtype="M8[ns]", name="idx" ) - expected = expected + Timedelta(1, "D") - Timedelta(1, "ns") + expected = ( + expected + Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result, expected) def test_to_timestamp_pi_combined(self): @@ -125,14 +127,18 @@ def test_to_timestamp_pi_combined(self): expected = DatetimeIndex( ["2011-01-02 00:59:59", "2011-01-03 01:59:59"], name="idx", dtype="M8[ns]" ) - expected = expected + Timedelta(1, "s") - Timedelta(1, "ns") + expected = ( + expected + Timedelta(1, input_unit="s") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result, expected) result = idx.to_timestamp(how="E", freq="h") expected = DatetimeIndex( ["2011-01-02 00:00", "2011-01-03 01:00"], dtype="M8[ns]", name="idx" ) - expected = expected + Timedelta(1, "h") - Timedelta(1, "ns") + expected = ( + expected + Timedelta(1, input_unit="h") - Timedelta(1, input_unit="ns") + ) tm.assert_index_equal(result, expected) def test_to_timestamp_1703(self): diff --git a/pandas/tests/indexes/period/test_scalar_compat.py b/pandas/tests/indexes/period/test_scalar_compat.py index d8afd29ff31c5..a95e8d3b4bcbe 100644 --- a/pandas/tests/indexes/period/test_scalar_compat.py +++ b/pandas/tests/indexes/period/test_scalar_compat.py @@ -21,7 +21,7 @@ def test_end_time(self): # GH#17157 index = period_range(freq="M", start="2016-01-01", end="2016-05-31") expected_index = date_range("2016-01-01", end="2016-05-31", freq="ME") - expected_index += Timedelta(1, "D") - Timedelta(1, "ns") + expected_index += Timedelta(1, input_unit="D") - Timedelta(1, input_unit="ns") tm.assert_index_equal(index.end_time, expected_index) @pytest.mark.filterwarnings(r"ignore:PeriodDtype\[B\] is deprecated:FutureWarning") diff --git a/pandas/tests/indexes/test_datetimelike.py b/pandas/tests/indexes/test_datetimelike.py index e45d11e6286e2..c6a94f05319ec 100644 --- a/pandas/tests/indexes/test_datetimelike.py +++ b/pandas/tests/indexes/test_datetimelike.py @@ -164,7 +164,7 @@ def test_where_cast_str(self, simple_index): def test_diff(self, unit): # GH 55080 - dti = pd.to_datetime([10, 20, 30], unit=unit).as_unit(unit) + dti = pd.to_datetime([10, 20, 30], input_unit=unit).as_unit(unit) result = dti.diff(1) - expected = pd.to_timedelta([pd.NaT, 10, 10], unit=unit).as_unit(unit) + expected = pd.to_timedelta([pd.NaT, 10, 10], input_unit=unit).as_unit(unit) tm.assert_index_equal(result, expected) diff --git a/pandas/tests/indexes/timedeltas/methods/test_shift.py b/pandas/tests/indexes/timedeltas/methods/test_shift.py index 9bbf06dc51a0c..d06058b1d4c65 100644 --- a/pandas/tests/indexes/timedeltas/methods/test_shift.py +++ b/pandas/tests/indexes/timedeltas/methods/test_shift.py @@ -37,7 +37,7 @@ def test_tdi_shift_minutes(self): def test_tdi_shift_int(self): # GH#8083 - tdi = pd.to_timedelta(range(5), unit="D") + tdi = pd.to_timedelta(range(5), input_unit="D") trange = tdi._with_freq("infer") + pd.offsets.Hour(1) result = trange.shift(1) expected = TimedeltaIndex( @@ -54,7 +54,7 @@ def test_tdi_shift_int(self): def test_tdi_shift_nonstandard_freq(self): # GH#8083 - tdi = pd.to_timedelta(range(5), unit="D") + tdi = pd.to_timedelta(range(5), input_unit="D") trange = tdi._with_freq("infer") + pd.offsets.Hour(1) result = trange.shift(3, freq="2D 1s") expected = TimedeltaIndex( diff --git a/pandas/tests/indexes/timedeltas/test_constructors.py b/pandas/tests/indexes/timedeltas/test_constructors.py index 63d2161dcec09..6645994da4f94 100644 --- a/pandas/tests/indexes/timedeltas/test_constructors.py +++ b/pandas/tests/indexes/timedeltas/test_constructors.py @@ -103,7 +103,7 @@ def test_float64_ns_rounded(self): def test_float64_unit_conversion(self): # GH#23539 - tdi = to_timedelta([1.5, 2.25], unit="D") + tdi = to_timedelta([1.5, 2.25], input_unit="D") expected = TimedeltaIndex([Timedelta(days=1.5), Timedelta(days=2.25)]) tm.assert_index_equal(tdi, expected) @@ -170,7 +170,7 @@ def test_constructor_coverage(self): # NumPy string array strings = np.array(["1 days", "2 days", "3 days"]) result = TimedeltaIndex(strings) - expected = to_timedelta([1, 2, 3], unit="D") + expected = to_timedelta([1, 2, 3], input_unit="D") tm.assert_index_equal(result, expected) from_ints = TimedeltaIndex(expected.asi8) @@ -264,5 +264,5 @@ def test_unit_deprecated(self, unit, unit_depr): tm.assert_index_equal(result, expected) with tm.assert_produces_warning(Pandas4Warning, match=msg): - tdi = to_timedelta([1, 2], unit=unit_depr) + tdi = to_timedelta([1, 2], input_unit=unit_depr) tm.assert_index_equal(tdi, expected) diff --git a/pandas/tests/indexes/timedeltas/test_indexing.py b/pandas/tests/indexes/timedeltas/test_indexing.py index ca126a68cbd43..86a2a94c0c279 100644 --- a/pandas/tests/indexes/timedeltas/test_indexing.py +++ b/pandas/tests/indexes/timedeltas/test_indexing.py @@ -343,7 +343,7 @@ def test_contains(self): # GH#13603, GH#59051 msg = "'d' is deprecated and will be removed in a future version." with tm.assert_produces_warning(Pandas4Warning, match=msg): - td = to_timedelta(range(5), unit="d") + offsets.Hour(1) + td = to_timedelta(range(5), input_unit="d") + offsets.Hour(1) for v in [NaT, None, float("nan"), np.nan]: assert v not in td diff --git a/pandas/tests/indexes/timedeltas/test_timedelta_range.py b/pandas/tests/indexes/timedeltas/test_timedelta_range.py index 01ff536652d2f..eab5052b1fce4 100644 --- a/pandas/tests/indexes/timedeltas/test_timedelta_range.py +++ b/pandas/tests/indexes/timedeltas/test_timedelta_range.py @@ -24,23 +24,23 @@ def test_timedelta_range_unit(self): tm.assert_numpy_array_equal(tdi.to_numpy(), exp_arr) def test_timedelta_range(self): - expected = to_timedelta(np.arange(5), unit="D") + expected = to_timedelta(np.arange(5), input_unit="D") result = timedelta_range("0 days", periods=5, freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(11), unit="D") + expected = to_timedelta(np.arange(11), input_unit="D") result = timedelta_range("0 days", "10 days", freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(5), unit="D") + Second(2) + Day() + expected = to_timedelta(np.arange(5), input_unit="D") + Second(2) + Day() result = timedelta_range("1 days, 00:00:02", "5 days, 00:00:02", freq="D") tm.assert_index_equal(result, expected) - expected = to_timedelta([1, 3, 5, 7, 9], unit="D") + Second(2) + expected = to_timedelta([1, 3, 5, 7, 9], input_unit="D") + Second(2) result = timedelta_range("1 days, 00:00:02", periods=5, freq="2D") tm.assert_index_equal(result, expected) - expected = to_timedelta(np.arange(50), unit="min") * 30 + expected = to_timedelta(np.arange(50), input_unit="min") * 30 result = timedelta_range("0 days", freq="30min", periods=50) tm.assert_index_equal(result, expected) @@ -50,17 +50,17 @@ def test_timedelta_units_H_S_deprecated(self, depr_unit, unit): depr_msg = ( f"'{depr_unit}' is deprecated and will be removed in a future version." ) - expected = to_timedelta(np.arange(5), unit=unit) + expected = to_timedelta(np.arange(5), input_unit=unit) with tm.assert_produces_warning(Pandas4Warning, match=depr_msg): - result = to_timedelta(np.arange(5), unit=depr_unit) - tm.assert_index_equal(result, expected) + result = to_timedelta(np.arange(5), input_unit=depr_unit) + tm.assert_index_equal(result, expected) @pytest.mark.parametrize("unit", ["T", "t", "L", "l", "U", "u", "N", "n"]) def test_timedelta_unit_T_L_U_N_raises(self, unit): msg = f"invalid unit abbreviation: {unit}" with pytest.raises(ValueError, match=msg): - to_timedelta(np.arange(5), unit=unit) + to_timedelta(np.arange(5), input_unit=unit) @pytest.mark.parametrize( "periods, freq", [(3, "2D"), (5, "D"), (6, "19h12min"), (7, "16h"), (9, "12h")] diff --git a/pandas/tests/indexing/test_categorical.py b/pandas/tests/indexing/test_categorical.py index a31f463d0b17e..0373f1d0073d5 100644 --- a/pandas/tests/indexing/test_categorical.py +++ b/pandas/tests/indexing/test_categorical.py @@ -511,7 +511,11 @@ def test_loc_and_at_with_categorical_index(self): # pandas scalars [Interval(1, 4), Interval(4, 6), Interval(6, 9)], [Timestamp(2019, 1, 1), Timestamp(2019, 2, 1), Timestamp(2019, 3, 1)], - [Timedelta(1, "D"), Timedelta(2, "D"), Timedelta(3, "D")], + [ + Timedelta(1, input_unit="D"), + Timedelta(2, input_unit="D"), + Timedelta(3, input_unit="D"), + ], # pandas Integer arrays *(pd.array([1, 2, 3], dtype=dtype) for dtype in tm.ALL_INT_EA_DTYPES), # other pandas arrays diff --git a/pandas/tests/indexing/test_loc.py b/pandas/tests/indexing/test_loc.py index 8e4845a72ec35..4585614e5a61a 100644 --- a/pandas/tests/indexing/test_loc.py +++ b/pandas/tests/indexing/test_loc.py @@ -702,7 +702,9 @@ def test_loc_modify_datetime(self): {"date": [1485264372711, 1485265925110, 1540215845888, 1540282121025]} ) - df["date_dt"] = to_datetime(df["date"], unit="ms", cache=True).dt.as_unit("ms") + df["date_dt"] = to_datetime(df["date"], input_unit="ms", cache=True).dt.as_unit( + "ms" + ) df.loc[:, "date_dt_cp"] = df.loc[:, "date_dt"] df.loc[[2, 3], "date_dt_cp"] = df.loc[[2, 3], "date_dt"] @@ -1396,7 +1398,7 @@ def test_loc_setitem_int_label_with_float_index(self, float_numpy_dtype): ) def test_loc_setitem_listlike_with_timedelta64index(self, indexer, expected): # GH#16637 - tdi = to_timedelta(range(10), unit="s") + tdi = to_timedelta(range(10), input_unit="s") df = DataFrame({"x": range(10)}, dtype="int64", index=tdi) df.loc[df.index[indexer], "x"] = 20 @@ -2371,7 +2373,7 @@ def test_loc_getitem_partial_string_slicing_with_timedeltaindex(self): def test_loc_getitem_str_timedeltaindex(self): # GH#16896 - df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), unit="days")) + df = DataFrame({"x": range(3)}, index=to_timedelta(range(3), input_unit="days")) expected = df.iloc[0] sliced = df.loc["0 days"] tm.assert_series_equal(sliced, expected) @@ -2578,7 +2580,7 @@ class TestLocBooleanMask: def test_loc_setitem_bool_mask_timedeltaindex(self): # GH#14946 df = DataFrame({"x": range(10)}) - df.index = to_timedelta(range(10), unit="s") + df.index = to_timedelta(range(10), input_unit="s") conditions = [df["x"] > 3, df["x"] == 3, df["x"] < 3] expected_data = [ [0, 1, 2, 3, 10, 10, 10, 10, 10, 10], @@ -2591,7 +2593,7 @@ def test_loc_setitem_bool_mask_timedeltaindex(self): expected = DataFrame( data, - index=to_timedelta(range(10), unit="s"), + index=to_timedelta(range(10), input_unit="s"), columns=["x"], dtype="int64", ) @@ -3304,10 +3306,12 @@ def test_loc_assign_dict_to_row(self, dtype): def test_loc_setitem_dict_timedelta_multiple_set(self): # GH 16309 result = DataFrame(columns=["time", "value"]) - result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} - result.loc[1] = {"time": Timedelta(6, unit="s"), "value": "foo"} + result.loc[1] = {"time": Timedelta(6, input_unit="s"), "value": "foo"} + result.loc[1] = {"time": Timedelta(6, input_unit="s"), "value": "foo"} expected = DataFrame( - [[Timedelta(6, unit="s"), "foo"]], columns=["time", "value"], index=[1] + [[Timedelta(6, input_unit="s"), "foo"]], + columns=["time", "value"], + index=[1], ) tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/interchange/test_impl.py b/pandas/tests/interchange/test_impl.py index 5b7564e77d0ab..9d1c6d261f95b 100644 --- a/pandas/tests/interchange/test_impl.py +++ b/pandas/tests/interchange/test_impl.py @@ -486,6 +486,9 @@ def test_non_str_names_w_duplicates(): ), ], ) +@pytest.mark.filterwarnings( + "ignore:The 'unit' keyword is deprecated:DeprecationWarning" +) def test_pandas_nullable_with_missing_values( data: list, dtype: str, expected_dtype: str ) -> None: @@ -553,6 +556,9 @@ def test_pandas_nullable_with_missing_values( ), ], ) +@pytest.mark.filterwarnings( + "ignore:The 'unit' keyword is deprecated:DeprecationWarning" +) def test_pandas_nullable_without_missing_values( data: list, dtype: str, expected_dtype: str ) -> None: diff --git a/pandas/tests/io/formats/test_format.py b/pandas/tests/io/formats/test_format.py index d2da38be865a0..e3c94e6387daa 100644 --- a/pandas/tests/io/formats/test_format.py +++ b/pandas/tests/io/formats/test_format.py @@ -2054,7 +2054,7 @@ def test_too_long(self): class TestTimedelta64Formatter: def test_days(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values + x = pd.to_timedelta(list(range(5)) + [NaT], input_unit="D")._values result = fmt._Timedelta64Formatter(x).get_result() assert result[0].strip() == "0 days" assert result[1].strip() == "1 days" @@ -2070,29 +2070,29 @@ def test_days(self): assert result[0].strip() == "1 days" def test_days_neg(self): - x = pd.to_timedelta(list(range(5)) + [NaT], unit="D")._values + x = pd.to_timedelta(list(range(5)) + [NaT], input_unit="D")._values result = fmt._Timedelta64Formatter(-x).get_result() assert result[0].strip() == "0 days" assert result[1].strip() == "-1 days" def test_subdays(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values + y = pd.to_timedelta(list(range(5)) + [NaT], input_unit="s")._values result = fmt._Timedelta64Formatter(y).get_result() assert result[0].strip() == "0 days 00:00:00" assert result[1].strip() == "0 days 00:00:01" def test_subdays_neg(self): - y = pd.to_timedelta(list(range(5)) + [NaT], unit="s")._values + y = pd.to_timedelta(list(range(5)) + [NaT], input_unit="s")._values result = fmt._Timedelta64Formatter(-y).get_result() assert result[0].strip() == "0 days 00:00:00" assert result[1].strip() == "-1 days +23:59:59" def test_zero(self): - x = pd.to_timedelta(list(range(1)) + [NaT], unit="D")._values + x = pd.to_timedelta(list(range(1)) + [NaT], input_unit="D")._values result = fmt._Timedelta64Formatter(x).get_result() assert result[0].strip() == "0 days" - x = pd.to_timedelta(list(range(1)), unit="D")._values + x = pd.to_timedelta(list(range(1)), input_unit="D")._values result = fmt._Timedelta64Formatter(x).get_result() assert result[0].strip() == "0 days" diff --git a/pandas/tests/io/json/test_json_table_schema.py b/pandas/tests/io/json/test_json_table_schema.py index e4d5eb4758303..485f7a6984bdb 100644 --- a/pandas/tests/io/json/test_json_table_schema.py +++ b/pandas/tests/io/json/test_json_table_schema.py @@ -646,7 +646,7 @@ def test_warns_non_roundtrippable_names(self, idx): def test_timestamp_in_columns(self): df = DataFrame( - [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, unit="s")] + [[1, 2]], columns=[pd.Timestamp("2016"), pd.Timedelta(10, input_unit="s")] ) result = df.to_json(orient="table") js = json.loads(result) diff --git a/pandas/tests/io/json/test_pandas.py b/pandas/tests/io/json/test_pandas.py index d67e725233127..c7db23561acf5 100644 --- a/pandas/tests/io/json/test_pandas.py +++ b/pandas/tests/io/json/test_pandas.py @@ -1002,7 +1002,10 @@ def test_date_unit(self, unit, datetime_frame): (DataFrame({"A": [True, False, False]}), None), ( DataFrame( - {"A": ["a", "b", "c"], "B": pd.to_timedelta(np.arange(3), unit="D")} + { + "A": ["a", "b", "c"], + "B": pd.to_timedelta(np.arange(3), input_unit="D"), + } ), Pandas4Warning, ), @@ -1129,7 +1132,7 @@ def test_url(self, field, dtype, httpserver): assert result[field].dtype == dtype def test_timedelta(self): - converter = lambda x: pd.to_timedelta(x, unit="ms") + converter = lambda x: pd.to_timedelta(x, input_unit="ms") ser = Series([timedelta(23), timedelta(seconds=5)]) assert ser.dtype == "timedelta64[ns]" @@ -1170,7 +1173,7 @@ def test_timedelta2(self): with tm.assert_produces_warning(Pandas4Warning, match=msg): data = StringIO(frame.to_json(date_unit="ns")) result = read_json(data) - result["a"] = pd.to_timedelta(result.a, unit="ns") + result["a"] = pd.to_timedelta(result.a, input_unit="ns") result["c"] = pd.to_datetime(result.c) tm.assert_frame_equal(frame, result) diff --git a/pandas/tests/io/pytables/test_select.py b/pandas/tests/io/pytables/test_select.py index 0dffb284fa6d2..664bb35883a29 100644 --- a/pandas/tests/io/pytables/test_select.py +++ b/pandas/tests/io/pytables/test_select.py @@ -999,7 +999,12 @@ def test_query_compare_column_type(setup_path): for op in ["<", ">", "=="]: # non strings to string column always fail - for v in [2.1, True, Timestamp("2014-01-01"), pd.Timedelta(1, "s")]: + for v in [ + 2.1, + True, + Timestamp("2014-01-01"), + pd.Timedelta(1, input_unit="s"), + ]: query = f"date {op} v" msg = f"Cannot compare {v} of type {type(v)} to string column" with pytest.raises(TypeError, match=msg): diff --git a/pandas/tests/io/sas/test_sas7bdat.py b/pandas/tests/io/sas/test_sas7bdat.py index a17cd27f8284e..3ee18470a8441 100644 --- a/pandas/tests/io/sas/test_sas7bdat.py +++ b/pandas/tests/io/sas/test_sas7bdat.py @@ -30,9 +30,9 @@ def data_test_ix(request, dirpath): fname = os.path.join(dirpath, f"test_sas7bdat_{i}.csv") df = pd.read_csv(fname) epoch = datetime(1960, 1, 1) - t1 = pd.to_timedelta(df["Column4"], unit="D") + t1 = pd.to_timedelta(df["Column4"], input_unit="D") df["Column4"] = (epoch + t1).astype("M8[s]") - t2 = pd.to_timedelta(df["Column12"], unit="D") + t2 = pd.to_timedelta(df["Column12"], input_unit="D") df["Column12"] = (epoch + t2).astype("M8[s]") for k in range(df.shape[1]): col = df.iloc[:, k] diff --git a/pandas/tests/io/test_sql.py b/pandas/tests/io/test_sql.py index 1b9ae5d8e7209..dda5a6cb1c812 100644 --- a/pandas/tests/io/test_sql.py +++ b/pandas/tests/io/test_sql.py @@ -2985,7 +2985,9 @@ def test_date_parsing(conn, request): df = sql.read_sql_table("types", conn, parse_dates={"IntDateCol": "s"}) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) - df = sql.read_sql_table("types", conn, parse_dates={"IntDateCol": {"unit": "s"}}) + df = sql.read_sql_table( + "types", conn, parse_dates={"IntDateCol": {"input_unit": "s"}} + ) assert issubclass(df.IntDateCol.dtype.type, np.datetime64) diff --git a/pandas/tests/plotting/frame/test_frame_subplots.py b/pandas/tests/plotting/frame/test_frame_subplots.py index 7f4009bdb5e66..bad48af5f1de2 100644 --- a/pandas/tests/plotting/frame/test_frame_subplots.py +++ b/pandas/tests/plotting/frame/test_frame_subplots.py @@ -130,9 +130,9 @@ def test_subplots_timeseries_y_axis(self, col): data = { "numeric": np.array([1, 2, 5]), "timedelta": [ - pd.Timedelta(-10, unit="s"), - pd.Timedelta(10, unit="m"), - pd.Timedelta(10, unit="h"), + pd.Timedelta(-10, input_unit="s"), + pd.Timedelta(10, input_unit="m"), + pd.Timedelta(10, input_unit="h"), ], "datetime_no_tz": [ pd.to_datetime("2017-08-01 00:00:00"), diff --git a/pandas/tests/resample/test_datetime_index.py b/pandas/tests/resample/test_datetime_index.py index ab88d221864c0..e56ab6870cf82 100644 --- a/pandas/tests/resample/test_datetime_index.py +++ b/pandas/tests/resample/test_datetime_index.py @@ -1112,7 +1112,7 @@ def test_resample_anchored_intraday(unit): result = df.resample("ME").mean() expected = df.resample("ME").mean().to_period() expected = expected.to_timestamp(how="end") - expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index += Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") expected.index = expected.index.as_unit(unit)._with_freq("infer") assert expected.index.freq == "ME" tm.assert_frame_equal(result, expected) @@ -1121,7 +1121,7 @@ def test_resample_anchored_intraday(unit): exp = df.shift(1, freq="D").resample("ME").mean().to_period() exp = exp.to_timestamp(how="end") - exp.index = exp.index + Timedelta(1, "ns") - Timedelta(1, "D") + exp.index = exp.index + Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") exp.index = exp.index.as_unit(unit)._with_freq("infer") assert exp.index.freq == "ME" tm.assert_frame_equal(result, exp) @@ -1134,7 +1134,7 @@ def test_resample_anchored_intraday2(unit): result = df.resample("QE").mean() expected = df.resample("QE").mean().to_period() expected = expected.to_timestamp(how="end") - expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index += Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") expected.index._data.freq = "QE" expected.index._freq = lib.no_default expected.index = expected.index.as_unit(unit) @@ -1144,7 +1144,7 @@ def test_resample_anchored_intraday2(unit): expected = df.shift(1, freq="D").resample("QE").mean() expected = expected.to_period() expected = expected.to_timestamp(how="end") - expected.index += Timedelta(1, "ns") - Timedelta(1, "D") + expected.index += Timedelta(1, input_unit="ns") - Timedelta(1, input_unit="D") expected.index._data.freq = "QE" expected.index._freq = lib.no_default expected.index = expected.index.as_unit(unit) @@ -1515,7 +1515,7 @@ def test_resample_across_dst(): # 2016-10-30 02:23:00+02:00, 2016-10-30 02:23:00+01:00 df1 = DataFrame([1477786980, 1477790580], columns=["ts"]) dti1 = DatetimeIndex( - pd.to_datetime(df1.ts, unit="s") + pd.to_datetime(df1.ts, input_unit="s") .dt.tz_localize("UTC") .dt.tz_convert("Europe/Madrid") ) @@ -1524,7 +1524,7 @@ def test_resample_across_dst(): # 2016-10-30 02:00:00+02:00, 2016-10-30 02:00:00+01:00 df2 = DataFrame([1477785600, 1477789200], columns=["ts"]) dti2 = DatetimeIndex( - pd.to_datetime(df2.ts, unit="s") + pd.to_datetime(df2.ts, input_unit="s") .dt.tz_localize("UTC") .dt.tz_convert("Europe/Madrid"), freq="h", diff --git a/pandas/tests/resample/test_resampler_grouper.py b/pandas/tests/resample/test_resampler_grouper.py index f3c52a674cf66..bd6bd7fca2592 100644 --- a/pandas/tests/resample/test_resampler_grouper.py +++ b/pandas/tests/resample/test_resampler_grouper.py @@ -502,7 +502,7 @@ def test_groupby_resample_empty_sum_string( result = gbrs.sum(min_count=min_count) index = pd.MultiIndex( - levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", unit="ns")]], + levels=[[1, 2, 3], [pd.to_datetime("2000-01-01", input_unit="ns")]], codes=[[0, 1, 2], [0, 0, 0]], names=["A", None], ) diff --git a/pandas/tests/resample/test_timedelta.py b/pandas/tests/resample/test_timedelta.py index 309810b656ed3..c3258be0f2863 100644 --- a/pandas/tests/resample/test_timedelta.py +++ b/pandas/tests/resample/test_timedelta.py @@ -53,7 +53,7 @@ def test_resample_with_timedeltas(): expected.index = timedelta_range("0 days", freq="30min", periods=50) df = DataFrame( - {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), unit="min") + {"A": np.arange(1480)}, index=pd.to_timedelta(np.arange(1480), input_unit="min") ) result = df.resample("30min").sum() @@ -97,7 +97,9 @@ def test_resample_offset_with_timedeltaindex(): def test_resample_categorical_data_with_timedeltaindex(): # GH #12169 - df = DataFrame({"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), unit="s")) + df = DataFrame( + {"Group_obj": "A"}, index=pd.to_timedelta(list(range(20)), input_unit="s") + ) df["Group"] = df["Group_obj"].astype("category") result = df.resample("10s").agg(lambda x: (x.value_counts().index[0])) exp_tdi = pd.TimedeltaIndex(np.array([0, 10], dtype="m8[s]"), freq="10s").as_unit( @@ -180,7 +182,7 @@ def test_resample_quantile_timedelta(unit): # GH: 29485 dtype = np.dtype(f"m8[{unit}]") df = DataFrame( - {"value": pd.to_timedelta(np.arange(4), unit="s").astype(dtype)}, + {"value": pd.to_timedelta(np.arange(4), input_unit="s").astype(dtype)}, index=pd.date_range("20200101", periods=4, tz="UTC"), ) result = df.resample("2D").quantile(0.99) diff --git a/pandas/tests/reshape/concat/test_datetimes.py b/pandas/tests/reshape/concat/test_datetimes.py index 0cf3192ea3a74..33231efea3fca 100644 --- a/pandas/tests/reshape/concat/test_datetimes.py +++ b/pandas/tests/reshape/concat/test_datetimes.py @@ -534,7 +534,7 @@ def test_concat_period_other_series3(self): def test_concat_timedelta64_block(): - rng = to_timedelta(np.arange(10), unit="s") + rng = to_timedelta(np.arange(10), input_unit="s") df = DataFrame({"time": rng}) diff --git a/pandas/tests/reshape/merge/test_merge_asof.py b/pandas/tests/reshape/merge/test_merge_asof.py index 0e5fcccd73e26..6e666b4a6dc4e 100644 --- a/pandas/tests/reshape/merge/test_merge_asof.py +++ b/pandas/tests/reshape/merge/test_merge_asof.py @@ -3253,14 +3253,18 @@ def test_timedelta_tolerance_nearest(self, unit): columns=["time", "left"], ) - left["time"] = pd.to_timedelta(left["time"], "ms").astype(f"m8[{unit}]") + left["time"] = pd.to_timedelta(left["time"], input_unit="ms").astype( + f"m8[{unit}]" + ) right = pd.DataFrame( list(zip([0, 3, 9, 12, 15, 18], [0, 1, 2, 3, 4, 5])), columns=["time", "right"], ) - right["time"] = pd.to_timedelta(right["time"], "ms").astype(f"m8[{unit}]") + right["time"] = pd.to_timedelta(right["time"], input_unit="ms").astype( + f"m8[{unit}]" + ) expected = pd.DataFrame( list( @@ -3273,7 +3277,9 @@ def test_timedelta_tolerance_nearest(self, unit): columns=["time", "left", "right"], ) - expected["time"] = pd.to_timedelta(expected["time"], "ms").astype(f"m8[{unit}]") + expected["time"] = pd.to_timedelta(expected["time"], input_unit="ms").astype( + f"m8[{unit}]" + ) result = merge_asof( left, right, on="time", tolerance=Timedelta("1ms"), direction="nearest" diff --git a/pandas/tests/reshape/test_qcut.py b/pandas/tests/reshape/test_qcut.py index b6d45aeab8a7b..a817a7f8750b0 100644 --- a/pandas/tests/reshape/test_qcut.py +++ b/pandas/tests/reshape/test_qcut.py @@ -206,7 +206,7 @@ def test_qcut_nat(ser, unit): # see gh-19768 ser = Series(ser) ser = ser.dt.as_unit(unit) - td = Timedelta(1, unit=unit).as_unit(unit) + td = Timedelta(1, input_unit=unit).as_unit(unit) left = Series([ser[0] - td, np.nan, ser[2] - Day()], dtype=ser.dtype) right = Series([ser[2] - Day(), np.nan, ser[2]], dtype=ser.dtype) diff --git a/pandas/tests/scalar/period/test_period.py b/pandas/tests/scalar/period/test_period.py index eff90335ebab1..8a8c47f4e3221 100644 --- a/pandas/tests/scalar/period/test_period.py +++ b/pandas/tests/scalar/period/test_period.py @@ -665,12 +665,12 @@ def test_hash(self): def test_to_timestamp_mult(self): p = Period("2011-01", freq="M") assert p.to_timestamp(how="S") == Timestamp("2011-01-01") - expected = Timestamp("2011-02-01") - Timedelta(1, "ns") + expected = Timestamp("2011-02-01") - Timedelta(1, input_unit="ns") assert p.to_timestamp(how="E") == expected p = Period("2011-01", freq="3M") assert p.to_timestamp(how="S") == Timestamp("2011-01-01") - expected = Timestamp("2011-04-01") - Timedelta(1, "ns") + expected = Timestamp("2011-04-01") - Timedelta(1, input_unit="ns") assert p.to_timestamp(how="E") == expected @pytest.mark.filterwarnings( @@ -712,19 +712,19 @@ def _ex(p): p = Period("1985", freq="Y") result = p.to_timestamp("h", how="end") - expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + expected = Timestamp(1986, 1, 1) - Timedelta(1, input_unit="ns") assert result == expected result = p.to_timestamp("3h", how="end") assert result == expected result = p.to_timestamp("min", how="end") - expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + expected = Timestamp(1986, 1, 1) - Timedelta(1, input_unit="ns") assert result == expected result = p.to_timestamp("2min", how="end") assert result == expected result = p.to_timestamp(how="end") - expected = Timestamp(1986, 1, 1) - Timedelta(1, "ns") + expected = Timestamp(1986, 1, 1) - Timedelta(1, input_unit="ns") assert result == expected expected = datetime(1985, 1, 1) @@ -913,7 +913,7 @@ def test_inner_bounds_start_and_end_time(self, bound, offset, period_property): period = TestPeriodProperties._period_constructor(bound, -offset) expected = period.to_timestamp().round(freq="s") assert getattr(period, period_property).round(freq="s") == expected - expected = (bound - offset * Timedelta(1, unit="s")).floor("s") + expected = (bound - offset * Timedelta(1, input_unit="s")).floor("s") assert getattr(period, period_property).floor("s") == expected def test_start_time(self): diff --git a/pandas/tests/scalar/timedelta/test_arithmetic.py b/pandas/tests/scalar/timedelta/test_arithmetic.py index 9347784fa1ec3..e8af56841be7f 100644 --- a/pandas/tests/scalar/timedelta/test_arithmetic.py +++ b/pandas/tests/scalar/timedelta/test_arithmetic.py @@ -35,7 +35,7 @@ class TestTimedeltaAdditionSubtraction: @pytest.mark.parametrize( "ten_seconds", [ - Timedelta(10, unit="s"), + Timedelta(10, input_unit="s"), timedelta(seconds=10), np.timedelta64(10, "s"), np.timedelta64(10000000000, "ns"), @@ -79,7 +79,7 @@ def test_td_add_sub_one_day_ten_seconds(self, one_day_ten_secs): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_datetimelike_scalar(self, op): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, datetime(2016, 1, 1)) if op is operator.add: @@ -103,7 +103,7 @@ def test_td_add_timestamp_overflow(self): ts = Timestamp("1700-01-01").as_unit("ns") msg = "Cannot cast 259987 from D to 'ns' without overflow." with pytest.raises(OutOfBoundsTimedelta, match=msg): - ts + Timedelta(13 * 19999, unit="D") + ts + Timedelta(13 * 19999, input_unit="D") msg = "Cannot cast 259987 days 00:00:00 to unit='ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): @@ -111,7 +111,7 @@ def test_td_add_timestamp_overflow(self): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_td(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, Timedelta(days=10)) assert isinstance(result, Timedelta) @@ -119,36 +119,36 @@ def test_td_add_td(self, op): @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_pytimedelta(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, timedelta(days=9)) assert isinstance(result, Timedelta) assert result == Timedelta(days=19) @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_timedelta64(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, np.timedelta64(-4, "D")) assert isinstance(result, Timedelta) assert result == Timedelta(days=6) @pytest.mark.parametrize("op", [operator.add, ops.radd]) def test_td_add_offset(self, op): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, offsets.Hour(6)) assert isinstance(result, Timedelta) assert result == Timedelta(days=10, hours=6) def test_td_sub_td(self): - td = Timedelta(10, unit="D") - expected = Timedelta(0, unit="ns") + td = Timedelta(10, input_unit="D") + expected = Timedelta(0, input_unit="ns") result = td - td assert isinstance(result, Timedelta) assert result == expected def test_td_sub_pytimedelta(self): - td = Timedelta(10, unit="D") - expected = Timedelta(0, unit="ns") + td = Timedelta(10, input_unit="D") + expected = Timedelta(0, input_unit="ns") result = td - td.to_pytimedelta() assert isinstance(result, Timedelta) @@ -159,8 +159,8 @@ def test_td_sub_pytimedelta(self): assert result == expected def test_td_sub_timedelta64(self): - td = Timedelta(10, unit="D") - expected = Timedelta(0, unit="ns") + td = Timedelta(10, input_unit="D") + expected = Timedelta(0, input_unit="ns") result = td - td.to_timedelta64() assert isinstance(result, Timedelta) @@ -172,12 +172,12 @@ def test_td_sub_timedelta64(self): def test_td_sub_nat(self): # In this context pd.NaT is treated as timedelta-like - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td - NaT assert result is NaT def test_td_sub_td64_nat(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") td_nat = np.timedelta64("NaT") result = td - td_nat @@ -187,13 +187,13 @@ def test_td_sub_td64_nat(self): assert result is NaT def test_td_sub_offset(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td - offsets.Hour(1) assert isinstance(result, Timedelta) - assert result == Timedelta(239, unit="h") + assert result == Timedelta(239, input_unit="h") def test_td_add_sub_numeric_raises(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") msg = "unsupported operand type" for other in [2, 2.0, np.int64(2), np.float64(2)]: with pytest.raises(TypeError, match=msg): @@ -234,7 +234,7 @@ def test_td_add_sub_int_ndarray(self): other - td def test_td_rsub_nat(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = NaT - td assert result is NaT @@ -242,9 +242,9 @@ def test_td_rsub_nat(self): assert result is NaT def test_td_rsub_offset(self): - result = offsets.Hour(1) - Timedelta(10, unit="D") + result = offsets.Hour(1) - Timedelta(10, input_unit="D") assert isinstance(result, Timedelta) - assert result == Timedelta(-239, unit="h") + assert result == Timedelta(-239, input_unit="h") def test_td_sub_timedeltalike_object_dtype_array(self): # GH#21980 @@ -362,7 +362,7 @@ class TestTimedeltaMultiplicationDivision: @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nat(self, op, td_nat): # GH#19819 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") typs = "|".join(["numpy.timedelta64", "NaTType", "Timedelta"]) msg = "|".join( [ @@ -377,7 +377,7 @@ def test_td_mul_nat(self, op, td_nat): @pytest.mark.parametrize("op", [operator.mul, ops.rmul]) def test_td_mul_nan(self, op, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = op(td, nan) assert result is NaT @@ -449,7 +449,7 @@ def test_td_mul_td64_ndarray_invalid(self): def test_td_div_timedeltalike_scalar(self): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td / offsets.Hour(1) assert result == 240 @@ -480,7 +480,7 @@ def test_td_div_td64_non_nano(self): def test_td_div_numeric_scalar(self): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td / 2 assert isinstance(result, Timedelta) @@ -500,7 +500,7 @@ def test_td_div_numeric_scalar(self): ) def test_td_div_nan(self, nan): # np.float64('NaN') has a 'dtype' attr, avoid treating as array - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = td / nan assert result is NaT @@ -532,7 +532,7 @@ def test_td_div_ndarray_0d(self): def test_td_rdiv_timedeltalike_scalar(self): # GH#19738 - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = offsets.Hour(1) / td assert result == 1 / 240.0 @@ -540,7 +540,7 @@ def test_td_rdiv_timedeltalike_scalar(self): def test_td_rdiv_na_scalar(self): # GH#31869 None gets cast to NaT - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") result = NaT / td assert np.isnan(result) @@ -560,7 +560,7 @@ def test_td_rdiv_na_scalar(self): np.nan / td def test_td_rdiv_ndarray(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") arr = np.array([td], dtype=object) result = arr / td @@ -583,7 +583,7 @@ def test_td_rdiv_ndarray(self): arr / td def test_td_rdiv_ndarray_0d(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") arr = np.array(td.asm8) @@ -750,7 +750,7 @@ def test_td_rfloordiv_intarray(self): msg = "Invalid dtype" with pytest.raises(TypeError, match=msg): - ints // Timedelta(1, unit="s") + ints // Timedelta(1, input_unit="s") def test_td_rfloordiv_numeric_series(self): # GH#18846 @@ -874,7 +874,7 @@ def test_divmod_numeric(self): td = Timedelta(days=2, hours=6) result = divmod(td, 53 * 3600 * 1e9) - assert result[0] == Timedelta(1, unit="ns") + assert result[0] == Timedelta(1, input_unit="ns") assert isinstance(result[1], Timedelta) assert result[1] == Timedelta(hours=1) diff --git a/pandas/tests/scalar/timedelta/test_constructors.py b/pandas/tests/scalar/timedelta/test_constructors.py index c9904a318e22d..7ce3eac642463 100644 --- a/pandas/tests/scalar/timedelta/test_constructors.py +++ b/pandas/tests/scalar/timedelta/test_constructors.py @@ -25,13 +25,13 @@ def test_unit_m_y_raises(self, unit): msg = "Units 'M', 'Y', and 'y' are no longer supported" with pytest.raises(ValueError, match=msg): - Timedelta(10, unit) + Timedelta(10, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta(10, unit) + to_timedelta(10, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta([1, 2], unit) + to_timedelta([1, 2], input_unit=unit) @pytest.mark.parametrize( "unit,unit_depr", @@ -49,9 +49,10 @@ def test_unit_deprecated(self, unit, unit_depr): # GH#52536 msg = f"'{unit_depr}' is deprecated and will be removed in a future version." - expected = Timedelta(1, unit=unit) + expected = Timedelta(1, input_unit=unit) with tm.assert_produces_warning(Pandas4Warning, match=msg): - result = Timedelta(1, unit=unit_depr) + result = Timedelta(1, input_unit=unit_depr) + tm.assert_equal(result, expected) @pytest.mark.parametrize( @@ -135,7 +136,7 @@ def test_unit_parser(self, unit, np_unit, wrapper): ) # TODO(2.0): the desired output dtype may have non-nano resolution - result = to_timedelta(wrapper(range(5)), unit=unit) + result = to_timedelta(wrapper(range(5)), input_unit=unit) tm.assert_index_equal(result, expected) str_repr = [f"{x}{unit}" for x in np.arange(5)] @@ -146,9 +147,9 @@ def test_unit_parser(self, unit, np_unit, wrapper): # scalar expected = Timedelta(np.timedelta64(2, np_unit).astype("timedelta64[ns]")) - result = to_timedelta(2, unit=unit) + result = to_timedelta(2, input_unit=unit) assert result == expected - result = Timedelta(2, unit=unit) + result = Timedelta(2, input_unit=unit) assert result == expected result = to_timedelta(f"2{unit}") @@ -160,13 +161,13 @@ def test_unit_parser(self, unit, np_unit, wrapper): def test_unit_T_L_N_U_raises(self, unit): msg = f"invalid unit abbreviation: {unit}" with pytest.raises(ValueError, match=msg): - Timedelta(1, unit=unit) + Timedelta(1, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta(10, unit) + to_timedelta(10, input_unit=unit) with pytest.raises(ValueError, match=msg): - to_timedelta([1, 2], unit) + to_timedelta([1, 2], input_unit=unit) def test_construct_from_kwargs_overflow(): @@ -182,10 +183,10 @@ def test_construct_from_kwargs_overflow(): def test_construct_with_weeks_unit_overflow(): # GH#47268 don't silently wrap around with pytest.raises(OutOfBoundsTimedelta, match="without overflow"): - Timedelta(1000000000000000000, unit="W") + Timedelta(1000000000000000000, input_unit="W") with pytest.raises(OutOfBoundsTimedelta, match="without overflow"): - Timedelta(1000000000000000000.0, unit="W") + Timedelta(1000000000000000000.0, input_unit="W") def test_construct_from_td64_with_unit(): @@ -194,10 +195,10 @@ def test_construct_from_td64_with_unit(): obj = np.timedelta64(123456789000000000, "h") with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): - Timedelta(obj, unit="ps") + Timedelta(obj, input_unit="ps") with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): - Timedelta(obj, unit="ns") + Timedelta(obj, input_unit="ns") with pytest.raises(OutOfBoundsTimedelta, match="123456789000000000 hours"): Timedelta(obj) @@ -265,8 +266,8 @@ def test_from_tick_reso(): def test_construction(): expected = np.timedelta64(10, "D").astype("m8[ns]").view("i8") - assert Timedelta(10, unit="D")._value == expected - assert Timedelta(10.0, unit="D")._value == expected + assert Timedelta(10, input_unit="D")._value == expected + assert Timedelta(10.0, input_unit="D")._value == expected assert Timedelta("10 days")._value == expected assert Timedelta(days=10)._value == expected assert Timedelta(days=10.0)._value == expected @@ -369,7 +370,7 @@ def test_construction(): expected = np.timedelta64(10, "s").astype("m8[ns]").view("i8") + np.timedelta64( 500, "ms" ).astype("m8[ns]").view("i8") - assert Timedelta(10.5, unit="s")._value == expected + assert Timedelta(10.5, input_unit="s")._value == expected # offset assert to_timedelta(offsets.Hour(2)) == Timedelta(hours=2) @@ -447,7 +448,7 @@ def test_overflow_on_construction(): # xref GH#17637 msg = "Cannot cast 139993 from D to 'ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(7 * 19999, unit="D") + Timedelta(7 * 19999, input_unit="D") # used to overflow before non-ns support td = Timedelta(timedelta(days=13 * 19999)) @@ -475,7 +476,7 @@ def test_construction_out_of_bounds_td64ns(val, unit): td = Timedelta(td64) if unit != "M": - # with unit="M" the conversion to "s" is poorly defined + # with input_unit="M" the conversion to "s" is poorly defined # (and numpy issues DeprecationWarning) assert td.asm8 == td64 assert td.asm8.dtype == "m8[s]" @@ -643,7 +644,7 @@ def test_timedelta_pass_td_and_kwargs_raises(): ) def test_string_with_unit(constructor, value, unit): with pytest.raises(ValueError, match="unit must not be specified"): - constructor(value, unit=unit) + constructor(value, input_unit=unit) @pytest.mark.parametrize( @@ -682,7 +683,7 @@ class MyCustomTimedelta(Timedelta): def test_non_nano_value(): # https://github.com/pandas-dev/pandas/issues/49076 - result = Timedelta(10, unit="D").as_unit("s").value + result = Timedelta(10, input_unit="D").as_unit("s").value # `.value` shows nanoseconds, even though unit is 's' assert result == 864000000000000 @@ -692,7 +693,7 @@ def test_non_nano_value(): r"Use `.asm8.view\('i8'\)` to cast represent Timedelta in its " r"own unit \(here, s\).$" ) - td = Timedelta(1_000, "D").as_unit("s") * 1_000 + td = Timedelta(1_000, input_unit="D").as_unit("s") * 1_000 with pytest.raises(OverflowError, match=msg): td.value # check that the suggested workaround actually works diff --git a/pandas/tests/scalar/timedelta/test_formats.py b/pandas/tests/scalar/timedelta/test_formats.py index 1aafeec2ceed5..24031679005ca 100644 --- a/pandas/tests/scalar/timedelta/test_formats.py +++ b/pandas/tests/scalar/timedelta/test_formats.py @@ -6,10 +6,10 @@ @pytest.mark.parametrize( "td, expected_repr", [ - (Timedelta(10, unit="D"), "Timedelta('10 days 00:00:00')"), - (Timedelta(10, unit="s"), "Timedelta('0 days 00:00:10')"), - (Timedelta(10, unit="ms"), "Timedelta('0 days 00:00:00.010000')"), - (Timedelta(-10, unit="ms"), "Timedelta('-1 days +23:59:59.990000')"), + (Timedelta(10, input_unit="D"), "Timedelta('10 days 00:00:00')"), + (Timedelta(10, input_unit="s"), "Timedelta('0 days 00:00:10')"), + (Timedelta(10, input_unit="ms"), "Timedelta('0 days 00:00:00.010000')"), + (Timedelta(-10, input_unit="ms"), "Timedelta('-1 days +23:59:59.990000')"), ], ) def test_repr(td, expected_repr): @@ -46,10 +46,10 @@ def test_isoformat(td, expected_iso): class TestReprBase: def test_none(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1s = Timedelta(1, unit="s") - delta_500ms = Timedelta(500, unit="ms") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1s = Timedelta(1, input_unit="s") + delta_500ms = Timedelta(500, input_unit="ms") drepr = lambda x: x._repr_base() assert drepr(delta_1d) == "1 days" @@ -63,10 +63,10 @@ def test_none(self): assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" def test_sub_day(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1s = Timedelta(1, unit="s") - delta_500ms = Timedelta(500, unit="ms") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1s = Timedelta(1, input_unit="s") + delta_500ms = Timedelta(500, input_unit="ms") drepr = lambda x: x._repr_base(format="sub_day") assert drepr(delta_1d) == "1 days" @@ -80,10 +80,10 @@ def test_sub_day(self): assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" def test_long(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1s = Timedelta(1, unit="s") - delta_500ms = Timedelta(500, unit="ms") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1s = Timedelta(1, input_unit="s") + delta_500ms = Timedelta(500, input_unit="ms") drepr = lambda x: x._repr_base(format="long") assert drepr(delta_1d) == "1 days 00:00:00" @@ -97,9 +97,9 @@ def test_long(self): assert drepr(-delta_1d + delta_500ms) == "-1 days +00:00:00.500000" def test_all(self): - delta_1d = Timedelta(1, unit="D") - delta_0d = Timedelta(0, unit="D") - delta_1ns = Timedelta(1, unit="ns") + delta_1d = Timedelta(1, input_unit="D") + delta_0d = Timedelta(0, input_unit="D") + delta_1ns = Timedelta(1, input_unit="ns") drepr = lambda x: x._repr_base(format="all") assert drepr(delta_1d) == "1 days 00:00:00.000000000" diff --git a/pandas/tests/scalar/timedelta/test_timedelta.py b/pandas/tests/scalar/timedelta/test_timedelta.py index da22e49b24e54..b4bd51757db3a 100644 --- a/pandas/tests/scalar/timedelta/test_timedelta.py +++ b/pandas/tests/scalar/timedelta/test_timedelta.py @@ -83,7 +83,7 @@ def test_mul_preserves_reso(self, td, unit): def test_cmp_cross_reso(self, td): # numpy gets this wrong because of silent overflow - other = Timedelta(days=106751, unit="ns") + other = Timedelta(days=106751, input_unit="ns") assert other < td assert td > other assert not other == td @@ -283,7 +283,7 @@ def test_timedelta_class_min_max_resolution(): class TestTimedeltaUnaryOps: def test_invert(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") msg = "bad operand type for unary ~" with pytest.raises(TypeError, match=msg): @@ -298,12 +298,12 @@ def test_invert(self): ~(td.to_timedelta64()) def test_unary_ops(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") # __neg__, __pos__ - assert -td == Timedelta(-10, unit="D") + assert -td == Timedelta(-10, input_unit="D") assert -td == Timedelta("-10D") - assert +td == Timedelta(10, unit="D") + assert +td == Timedelta(10, input_unit="D") # __abs__, __abs__(__neg__) assert abs(td) == td @@ -322,7 +322,7 @@ class TestTimedeltas: ) def test_rounding_on_int_unit_construction(self, unit, value, expected): # GH 12690 - result = Timedelta(value, unit=unit) + result = Timedelta(value, input_unit=unit) assert result._value == expected result = Timedelta(str(value) + unit) assert result._value == expected @@ -337,7 +337,7 @@ def test_total_seconds_scalar(self): assert np.isnan(rng.total_seconds()) def test_conversion(self): - for td in [Timedelta(10, unit="D"), Timedelta("1 days, 10:11:12.012345")]: + for td in [Timedelta(10, input_unit="D"), Timedelta("1 days, 10:11:12.012345")]: pydt = td.to_pytimedelta() assert td == Timedelta(pydt) assert td == pydt @@ -402,7 +402,7 @@ def check(value): rng.milliseconds # components - tup = to_timedelta(-1, "us").components + tup = to_timedelta(-1, input_unit="us").components assert tup.days == -1 assert tup.hours == 23 assert tup.minutes == 59 @@ -432,7 +432,7 @@ def check(value): # TODO: this is a test of to_timedelta string parsing def test_iso_conversion(self): # GH #21877 - expected = Timedelta(1, unit="s") + expected = Timedelta(1, input_unit="s") assert to_timedelta("P0DT0H0M1S") == expected # TODO: this is a test of to_timedelta returning NaT @@ -448,12 +448,12 @@ def test_nat_converters(self): def test_numeric_conversions(self): assert Timedelta(0) == np.timedelta64(0, "ns") assert Timedelta(10) == np.timedelta64(10, "ns") - assert Timedelta(10, unit="ns") == np.timedelta64(10, "ns") + assert Timedelta(10, input_unit="ns") == np.timedelta64(10, "ns") - assert Timedelta(10, unit="us") == np.timedelta64(10, "us") - assert Timedelta(10, unit="ms") == np.timedelta64(10, "ms") - assert Timedelta(10, unit="s") == np.timedelta64(10, "s") - assert Timedelta(10, unit="D") == np.timedelta64(10, "D") + assert Timedelta(10, input_unit="us") == np.timedelta64(10, "us") + assert Timedelta(10, input_unit="ms") == np.timedelta64(10, "ms") + assert Timedelta(10, input_unit="s") == np.timedelta64(10, "s") + assert Timedelta(10, input_unit="D") == np.timedelta64(10, "D") def test_timedelta_conversions(self): assert Timedelta(timedelta(seconds=1)) == np.timedelta64(1, "s").astype( @@ -477,7 +477,7 @@ def test_to_numpy_alias(self): td.to_numpy(copy=True) def test_identity(self): - td = Timedelta(10, unit="D") + td = Timedelta(10, input_unit="D") assert isinstance(td, Timedelta) assert isinstance(td, timedelta) @@ -573,7 +573,7 @@ def test_pickle(self): def test_timedelta_hash_equality(self): # GH 11129 - v = Timedelta(1, "D") + v = Timedelta(1, input_unit="D") td = timedelta(days=1) assert hash(v) == hash(td) @@ -584,7 +584,7 @@ def test_timedelta_hash_equality(self): assert all(hash(td) == hash(td.to_pytimedelta()) for td in tds) # python timedeltas drop ns resolution - ns_td = Timedelta(1, "ns") + ns_td = Timedelta(1, input_unit="ns") assert hash(ns_td) != hash(ns_td.to_pytimedelta()) @pytest.mark.slow @@ -621,26 +621,26 @@ def test_implementation_limits(self): assert max_td._value == lib.i8max # Beyond lower limit, a NAT before the Overflow - assert (min_td - Timedelta(1, "ns")) is NaT + assert (min_td - Timedelta(1, input_unit="ns")) is NaT msg = "int too (large|big) to convert" with pytest.raises(OverflowError, match=msg): - min_td - Timedelta(2, "ns") + min_td - Timedelta(2, input_unit="ns") with pytest.raises(OverflowError, match=msg): - max_td + Timedelta(1, "ns") + max_td + Timedelta(1, input_unit="ns") # Same tests using the internal nanosecond values - td = Timedelta(min_td._value - 1, "ns") + td = Timedelta(min_td._value - 1, input_unit="ns") assert td is NaT msg = "Cannot cast -9223372036854775809 from ns to 'ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(min_td._value - 2, "ns") + Timedelta(min_td._value - 2, input_unit="ns") msg = "Cannot cast 9223372036854775808 from ns to 'ns' without overflow" with pytest.raises(OutOfBoundsTimedelta, match=msg): - Timedelta(max_td._value + 1, "ns") + Timedelta(max_td._value + 1, input_unit="ns") def test_total_seconds_precision(self): # GH 19458 @@ -688,8 +688,8 @@ def test_unit_deprecated(self, unit, unit_depr): msg = f"'{unit_depr}' is deprecated and will be removed in a future version." with tm.assert_produces_warning(Pandas4Warning, match=msg): - result = Timedelta(1, unit_depr) - assert result == Timedelta(1, unit) + result = Timedelta(1, input_unit=unit_depr) + assert result == Timedelta(1, input_unit=unit) @pytest.mark.parametrize( @@ -697,9 +697,9 @@ def test_unit_deprecated(self, unit, unit_depr): [ (Timedelta("10s"), True), (Timedelta("-10s"), True), - (Timedelta(10, unit="ns"), True), - (Timedelta(0, unit="ns"), False), - (Timedelta(-10, unit="ns"), True), + (Timedelta(10, input_unit="ns"), True), + (Timedelta(0, input_unit="ns"), False), + (Timedelta(-10, input_unit="ns"), True), (Timedelta(None), True), (NaT, True), ], @@ -711,7 +711,7 @@ def test_truthiness(value, expected): def test_timedelta_attribute_precision(): # GH 31354 - td = Timedelta(1552211999999999872, unit="ns") + td = Timedelta(1552211999999999872, input_unit="ns") result = td.days * 86400 result += td.seconds result *= 1000000 @@ -723,7 +723,7 @@ def test_timedelta_attribute_precision(): def test_to_pytimedelta_large_values(): - td = Timedelta(1152921504609987375, unit="ns") + td = Timedelta(1152921504609987375, input_unit="ns") result = td.to_pytimedelta() expected = timedelta(days=13343, seconds=86304, microseconds=609987) assert result == expected diff --git a/pandas/tests/scalar/timestamp/test_constructors.py b/pandas/tests/scalar/timestamp/test_constructors.py index 912875f818eb6..edd58915c4ac7 100644 --- a/pandas/tests/scalar/timestamp/test_constructors.py +++ b/pandas/tests/scalar/timestamp/test_constructors.py @@ -39,11 +39,11 @@ def test_constructor_int_float_with_YM_unit(self, typ): # GH#47266 avoid the conversions in cast_from_unit val = typ(150) - ts = Timestamp(val, unit="Y") + ts = Timestamp(val, input_unit="Y") expected = Timestamp("2120-01-01") assert ts == expected - ts = Timestamp(val, unit="M") + ts = Timestamp(val, input_unit="M") expected = Timestamp("1982-07-01") assert ts == expected @@ -54,17 +54,17 @@ def test_construct_from_int_float_with_unit_out_of_bound_raises(self, typ): msg = f"cannot convert input {val} with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): - Timestamp(val, unit="D") + Timestamp(val, input_unit="D") def test_constructor_float_not_round_with_YM_unit_raises(self): # GH#47267 avoid the conversions in cast_from-unit - msg = "Conversion of non-round float with unit=[MY] is ambiguous" + msg = "Conversion of non-round float with input_unit=[MY] is ambiguous" with pytest.raises(ValueError, match=msg): - Timestamp(150.5, unit="Y") + Timestamp(150.5, input_unit="Y") with pytest.raises(ValueError, match=msg): - Timestamp(150.5, unit="M") + Timestamp(150.5, input_unit="M") @pytest.mark.parametrize( "value, check_kwargs", @@ -95,7 +95,7 @@ def test_constructor_float_not_round_with_YM_unit_raises(self): ) def test_construct_with_unit(self, value, check_kwargs): def check(value, unit=None, h=1, s=1, us=0, ns=0): - stamp = Timestamp(value, unit=unit) + stamp = Timestamp(value, input_unit=unit) assert stamp.year == 2000 assert stamp.month == 1 assert stamp.day == 1 @@ -1076,7 +1076,7 @@ def test_timestamp_nano_range(nano): def test_non_nano_value(): # https://github.com/pandas-dev/pandas/issues/49076 - result = Timestamp("1800-01-01", unit="s").value + result = Timestamp("1800-01-01", input_unit="s").value # `.value` shows nanoseconds, even though unit is 's' assert result == -5364662400000000000 diff --git a/pandas/tests/scalar/timestamp/test_timestamp.py b/pandas/tests/scalar/timestamp/test_timestamp.py index 8e153827ad086..de93cfc7d2a5e 100644 --- a/pandas/tests/scalar/timestamp/test_timestamp.py +++ b/pandas/tests/scalar/timestamp/test_timestamp.py @@ -869,7 +869,7 @@ def test_resolution(self, ts): def test_out_of_ns_bounds(self): # https://github.com/pandas-dev/pandas/issues/51060 - result = Timestamp(-52700112000, unit="s") + result = Timestamp(-52700112000, input_unit="s") assert result == Timestamp("0300-01-01") assert result.to_numpy() == np.datetime64("0300-01-01T00:00:00", "s") diff --git a/pandas/tests/series/methods/test_astype.py b/pandas/tests/series/methods/test_astype.py index aa38e63c826f6..ba44af98b01dc 100644 --- a/pandas/tests/series/methods/test_astype.py +++ b/pandas/tests/series/methods/test_astype.py @@ -126,7 +126,7 @@ def test_astype_object_to_dt64_non_nano(self, tz): pointwise = [ vals[0].tz_localize(tz), Timestamp(vals[1], tz=tz), - to_datetime(vals[2], unit="us", utc=True).tz_convert(tz), + to_datetime(vals[2], input_unit="us", utc=True).tz_convert(tz), ] exp_vals = [x.as_unit("us").asm8 for x in pointwise] exp_arr = np.array(exp_vals, dtype="M8[us]") @@ -303,7 +303,7 @@ def test_astype_str_cast_dt64(self): def test_astype_str_cast_td64(self): # see GH#9757 - td = Series([Timedelta(1, unit="D")]) + td = Series([Timedelta(1, input_unit="D")]) ser = td.astype(str) expected = Series(["1 days"], dtype="str") diff --git a/pandas/tests/series/methods/test_clip.py b/pandas/tests/series/methods/test_clip.py index c1ee7f8c9e008..ffc480d097455 100644 --- a/pandas/tests/series/methods/test_clip.py +++ b/pandas/tests/series/methods/test_clip.py @@ -31,7 +31,7 @@ def test_clip_types_and_nulls(self): sers = [ Series([np.nan, 1.0, 2.0, 3.0]), Series([None, "a", "b", "c"]), - Series(pd.to_datetime([np.nan, 1, 2, 3], unit="D")), + Series(pd.to_datetime([np.nan, 1, 2, 3], input_unit="D")), ] for s in sers: diff --git a/pandas/tests/series/methods/test_convert_dtypes.py b/pandas/tests/series/methods/test_convert_dtypes.py index ef034e62bb764..e5416f047e423 100644 --- a/pandas/tests/series/methods/test_convert_dtypes.py +++ b/pandas/tests/series/methods/test_convert_dtypes.py @@ -323,7 +323,7 @@ def test_convert_dtype_pyarrow_timezone_preserve(self): # GH 60237 pytest.importorskip("pyarrow") ser = pd.Series( - pd.to_datetime(range(5), utc=True, unit="h"), + pd.to_datetime(range(5), utc=True, input_unit="h"), dtype="timestamp[ns, tz=UTC][pyarrow]", ) result = ser.convert_dtypes(dtype_backend="pyarrow") diff --git a/pandas/tests/series/methods/test_isin.py b/pandas/tests/series/methods/test_isin.py index 4f8484252ba8f..c470d4ffb2161 100644 --- a/pandas/tests/series/methods/test_isin.py +++ b/pandas/tests/series/methods/test_isin.py @@ -92,7 +92,7 @@ def test_isin_with_i8(self): tm.assert_series_equal(result, expected) # timedelta64[ns] - s = Series(pd.to_timedelta(range(5), unit="D")) + s = Series(pd.to_timedelta(range(5), input_unit="D")) result = s.isin(s[0:2]) tm.assert_series_equal(result, expected) diff --git a/pandas/tests/series/test_constructors.py b/pandas/tests/series/test_constructors.py index 3c3a06583883f..7f00460aba8b9 100644 --- a/pandas/tests/series/test_constructors.py +++ b/pandas/tests/series/test_constructors.py @@ -1910,16 +1910,16 @@ def test_constructor_dict_timedelta_index(self): # construct Series from dict as data and TimedeltaIndex as index # will result NaN in result Series data expected = Series( - data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], unit="s") + data=["A", "B", "C"], index=pd.to_timedelta([0, 10, 20], input_unit="s") ) result = Series( data={ - pd.to_timedelta(0, unit="s"): "A", - pd.to_timedelta(10, unit="s"): "B", - pd.to_timedelta(20, unit="s"): "C", + pd.to_timedelta(0, input_unit="s"): "A", + pd.to_timedelta(10, input_unit="s"): "B", + pd.to_timedelta(20, input_unit="s"): "C", }, - index=pd.to_timedelta([0, 10, 20], unit="s"), + index=pd.to_timedelta([0, 10, 20], input_unit="s"), ) tm.assert_series_equal(result, expected) @@ -1979,7 +1979,7 @@ def test_constructor_raise_on_lossy_conversion_of_strings(self): def test_constructor_dtype_timedelta_alternative_construct(self): # GH#35465 result = Series([1000000, 200000, 3000000], dtype="timedelta64[ns]") - expected = Series(pd.to_timedelta([1000000, 200000, 3000000], unit="ns")) + expected = Series(pd.to_timedelta([1000000, 200000, 3000000], input_unit="ns")) tm.assert_series_equal(result, expected) @pytest.mark.xfail( diff --git a/pandas/tests/test_downstream.py b/pandas/tests/test_downstream.py index 02ead5bddec70..a0b63b07dbaee 100644 --- a/pandas/tests/test_downstream.py +++ b/pandas/tests/test_downstream.py @@ -289,7 +289,7 @@ def test_xarray_coerce_unit(): xr = pytest.importorskip("xarray") arr = xr.DataArray([1, 2, 3]) - result = pd.to_datetime(arr, unit="ns") + result = pd.to_datetime(arr, input_unit="ns") expected = DatetimeIndex( [ "1970-01-01 00:00:00.000000001", diff --git a/pandas/tests/tools/test_to_datetime.py b/pandas/tests/tools/test_to_datetime.py index eddfeb80967ef..bc815c15e8dc1 100644 --- a/pandas/tests/tools/test_to_datetime.py +++ b/pandas/tests/tools/test_to_datetime.py @@ -28,6 +28,7 @@ from pandas.errors import ( OutOfBoundsDatetime, OutOfBoundsTimedelta, + Pandas4Warning, ) import pandas.util._test_decorators as td @@ -548,6 +549,15 @@ def test_to_datetime_none(self): # GH#23055 assert to_datetime(None) is NaT + def test_to_datetime_unit_deprecated(self): + msg = "The 'unit' keyword is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + to_datetime([1], unit="s") + + msg2 = "Specify only 'input_unit', not 'unit'" + with pytest.raises(ValueError, match=msg2): + to_datetime([1], unit="s", input_unit="s") + @pytest.mark.filterwarnings("ignore:Could not infer format") def test_to_datetime_overflow(self): # we should get an OutOfBoundsDatetime, NOT OverflowError @@ -1759,20 +1769,22 @@ class TestToDatetimeUnit: @pytest.mark.parametrize("item", [150, float(150)]) def test_to_datetime_month_or_year_unit_int(self, cache, unit, item, request): # GH#50870 Note we have separate tests that pd.Timestamp gets these right - ts = Timestamp(item, unit=unit) + ts = Timestamp(item, input_unit=unit) expected = DatetimeIndex([ts], dtype="M8[ns]") - result = to_datetime([item], unit=unit, cache=cache) + result = to_datetime([item], input_unit=unit, cache=cache) tm.assert_index_equal(result, expected) - result = to_datetime(np.array([item], dtype=object), unit=unit, cache=cache) + result = to_datetime( + np.array([item], dtype=object), input_unit=unit, cache=cache + ) tm.assert_index_equal(result, expected) - result = to_datetime(np.array([item]), unit=unit, cache=cache) + result = to_datetime(np.array([item]), input_unit=unit, cache=cache) tm.assert_index_equal(result, expected) # with a nan! - result = to_datetime(np.array([item, np.nan]), unit=unit, cache=cache) + result = to_datetime(np.array([item, np.nan]), input_unit=unit, cache=cache) assert result.isna()[1] tm.assert_index_equal(result[:1], expected) @@ -1781,42 +1793,42 @@ def test_to_datetime_month_or_year_unit_non_round_float(self, cache, unit): # GH#50301 # Match Timestamp behavior in disallowing non-round floats with # Y or M unit - msg = f"Conversion of non-round float with unit={unit} is ambiguous" + msg = f"Conversion of non-round float with input_unit={unit} is ambiguous" with pytest.raises(ValueError, match=msg): - to_datetime([1.5], unit=unit, errors="raise") + to_datetime([1.5], input_unit=unit, errors="raise") with pytest.raises(ValueError, match=msg): - to_datetime(np.array([1.5]), unit=unit, errors="raise") + to_datetime(np.array([1.5]), input_unit=unit, errors="raise") msg = r"Given date string \"1.5\" not likely a datetime" with pytest.raises(ValueError, match=msg): - to_datetime(["1.5"], unit=unit, errors="raise") + to_datetime(["1.5"], input_unit=unit, errors="raise") - res = to_datetime([1.5], unit=unit, errors="coerce") + res = to_datetime([1.5], input_unit=unit, errors="coerce") expected = Index([NaT], dtype="M8[ns]") tm.assert_index_equal(res, expected) # In 3.0, the string "1.5" is parsed as as it would be without unit, # which fails. With errors="coerce" this becomes NaT. - res = to_datetime(["1.5"], unit=unit, errors="coerce") + res = to_datetime(["1.5"], input_unit=unit, errors="coerce") expected = to_datetime([NaT]).as_unit("ns") tm.assert_index_equal(res, expected) # round floats are OK - res = to_datetime([1.0], unit=unit) - expected = to_datetime([1], unit=unit) + res = to_datetime([1.0], input_unit=unit) + expected = to_datetime([1], input_unit=unit) tm.assert_index_equal(res, expected) def test_unit(self, cache): # GH 11758 # test proper behavior with errors - msg = "cannot specify both format and unit" + msg = "cannot specify both format and input_unit" with pytest.raises(ValueError, match=msg): - to_datetime([1], unit="D", format="%Y%m%d", cache=cache) + to_datetime([1], input_unit="D", format="%Y%m%d", cache=cache) def test_unit_array_mixed_nans(self, cache): values = [11111111111111111, 1, 1.0, iNaT, NaT, np.nan, "NaT", ""] - result = to_datetime(values, unit="D", errors="coerce", cache=cache) + result = to_datetime(values, input_unit="D", errors="coerce", cache=cache) expected = DatetimeIndex( ["NaT", "1970-01-02", "1970-01-02", "NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]", @@ -1825,31 +1837,31 @@ def test_unit_array_mixed_nans(self, cache): msg = "cannot convert input 11111111111111111 with the unit 'D'" with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(values, unit="D", errors="raise", cache=cache) + to_datetime(values, input_unit="D", errors="raise", cache=cache) def test_unit_array_mixed_nans_large_int(self, cache): values = [1420043460000000000000000, iNaT, NaT, np.nan, "NaT"] - result = to_datetime(values, errors="coerce", unit="s", cache=cache) + result = to_datetime(values, errors="coerce", input_unit="s", cache=cache) expected = DatetimeIndex(["NaT", "NaT", "NaT", "NaT", "NaT"], dtype="M8[ns]") tm.assert_index_equal(result, expected) msg = "cannot convert input 1420043460000000000000000 with the unit 's'" with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(values, errors="raise", unit="s", cache=cache) + to_datetime(values, errors="raise", input_unit="s", cache=cache) def test_to_datetime_invalid_str_not_out_of_bounds_valuerror(self, cache): # if we have a string, then we raise a ValueError # and NOT an OutOfBoundsDatetime msg = "Unknown datetime string format, unable to parse: foo" with pytest.raises(ValueError, match=msg): - to_datetime("foo", errors="raise", unit="s", cache=cache) + to_datetime("foo", errors="raise", input_unit="s", cache=cache) @pytest.mark.parametrize("error", ["raise", "coerce"]) def test_unit_consistency(self, cache, error): # consistency of conversions expected = Timestamp("1970-05-09 14:25:11") - result = to_datetime(11111111, unit="s", errors=error, cache=cache) + result = to_datetime(11111111, input_unit="s", errors=error, cache=cache) assert result == expected assert isinstance(result, Timestamp) @@ -1916,18 +1928,18 @@ def test_unit_rounding(self, cache): # GH 14156 & GH 20445: argument will incur floating point errors # but no premature rounding value = 1434743731.8770001 - result = to_datetime(value, unit="s", cache=cache) + result = to_datetime(value, input_unit="s", cache=cache) expected = Timestamp("2015-06-19 19:55:31.877000093") assert result == expected - alt = Timestamp(value, unit="s") + alt = Timestamp(value, input_unit="s") assert alt == result @pytest.mark.parametrize("dtype", [int, float]) def test_to_datetime_unit(self, dtype): epoch = 1370745748 ser = Series([epoch + t for t in range(20)]).astype(dtype) - result = to_datetime(ser, unit="s") + result = to_datetime(ser, input_unit="s") expected = Series( [ Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) @@ -1941,7 +1953,7 @@ def test_to_datetime_unit(self, dtype): def test_to_datetime_unit_with_nulls(self, null): epoch = 1370745748 ser = Series([epoch + t for t in range(20)] + [null]) - result = to_datetime(ser, unit="s") + result = to_datetime(ser, input_unit="s") expected = Series( [Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) for t in range(20)] + [NaT], @@ -1953,7 +1965,7 @@ def test_to_datetime_unit_fractional_seconds(self): # GH13834 epoch = 1370745748 ser = Series([epoch + t for t in np.arange(0, 2, 0.25)] + [iNaT]).astype(float) - result = to_datetime(ser, unit="s") + result = to_datetime(ser, input_unit="s") expected = Series( [ Timestamp("2013-06-09 02:42:28") + timedelta(seconds=t) @@ -1967,7 +1979,7 @@ def test_to_datetime_unit_fractional_seconds(self): tm.assert_series_equal(result, expected) def test_to_datetime_unit_na_values(self): - result = to_datetime([1, 2, "NaT", NaT, np.nan], unit="D") + result = to_datetime([1, 2, "NaT", NaT, np.nan], input_unit="D") expected = DatetimeIndex( [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 3, dtype="M8[ns]", @@ -1981,7 +1993,7 @@ def test_to_datetime_unit_invalid(self, bad_val): else: msg = "cannot convert input 111111111 with the unit 'D'" with pytest.raises(ValueError, match=msg): - to_datetime([1, 2, bad_val], unit="D") + to_datetime([1, 2, bad_val], input_unit="D") @pytest.mark.parametrize("bad_val", ["foo", 111111111]) def test_to_timestamp_unit_coerce(self, bad_val): @@ -1990,12 +2002,12 @@ def test_to_timestamp_unit_coerce(self, bad_val): [Timestamp("1970-01-02"), Timestamp("1970-01-03")] + ["NaT"] * 1, dtype="M8[ns]", ) - result = to_datetime([1, 2, bad_val], unit="D", errors="coerce") + result = to_datetime([1, 2, bad_val], input_unit="D", errors="coerce") tm.assert_index_equal(result, expected) def test_float_to_datetime_raise_near_bounds(self): # GH50183 - msg = "cannot convert input with unit 'D'" + msg = "cannot convert input with input_unit 'D'" oneday_in_ns = 1e9 * 60 * 60 * 24 tsmax_in_days = 2**63 / oneday_in_ns # 2**63 ns, in days # just in bounds @@ -2004,7 +2016,7 @@ def test_float_to_datetime_raise_near_bounds(self): ) expected = (should_succeed * oneday_in_ns).astype(np.int64) for error_mode in ["raise", "coerce"]: - result1 = to_datetime(should_succeed, unit="D", errors=error_mode) + result1 = to_datetime(should_succeed, input_unit="D", errors=error_mode) # Cast to `np.float64` so that `rtol` and inexact checking kick in # (`check_exact` doesn't take place for integer dtypes) tm.assert_almost_equal( @@ -2016,9 +2028,9 @@ def test_float_to_datetime_raise_near_bounds(self): should_fail1 = Series([0, tsmax_in_days + 0.005], dtype=float) should_fail2 = Series([0, -tsmax_in_days - 0.005], dtype=float) with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(should_fail1, unit="D", errors="raise") + to_datetime(should_fail1, input_unit="D", errors="raise") with pytest.raises(OutOfBoundsDatetime, match=msg): - to_datetime(should_fail2, unit="D", errors="raise") + to_datetime(should_fail2, input_unit="D", errors="raise") class TestToDatetimeDataFrame: @@ -2541,7 +2553,7 @@ def test_to_datetime_overflow(self): def test_to_datetime_float_with_nans_floating_point_error(self): # GH#58419 ser = Series([np.nan] * 1000 + [1712219033.0], dtype=np.float64) - result = to_datetime(ser, unit="s", errors="coerce") + result = to_datetime(ser, input_unit="s", errors="coerce") expected = Series( [NaT] * 1000 + [Timestamp("2024-04-04 08:23:53")], dtype="datetime64[ns]" ) @@ -3183,11 +3195,11 @@ def julian_dates(): class TestOrigin: def test_origin_and_unit(self): # GH#42624 - ts = to_datetime(1, unit="s", origin=1) + ts = to_datetime(1, input_unit="s", origin=1) expected = Timestamp("1970-01-01 00:00:02") assert ts == expected - ts = to_datetime(1, unit="s", origin=1_000_000_000) + ts = to_datetime(1, input_unit="s", origin=1_000_000_000) expected = Timestamp("2001-09-09 01:46:41") assert ts == expected @@ -3195,14 +3207,14 @@ def test_julian(self, julian_dates): # gh-11276, gh-11745 # for origin as julian - result = Series(to_datetime(julian_dates, unit="D", origin="julian")) + result = Series(to_datetime(julian_dates, input_unit="D", origin="julian")) expected = Series( - to_datetime(julian_dates - Timestamp(0).to_julian_date(), unit="D") + to_datetime(julian_dates - Timestamp(0).to_julian_date(), input_unit="D") ) tm.assert_series_equal(result, expected) def test_unix(self): - result = Series(to_datetime([0, 1, 2], unit="D", origin="unix")) + result = Series(to_datetime([0, 1, 2], input_unit="D", origin="unix")) expected = Series( [Timestamp("1970-01-01"), Timestamp("1970-01-02"), Timestamp("1970-01-03")], dtype="M8[ns]", @@ -3210,27 +3222,27 @@ def test_unix(self): tm.assert_series_equal(result, expected) def test_julian_round_trip(self): - result = to_datetime(2456658, origin="julian", unit="D") + result = to_datetime(2456658, origin="julian", input_unit="D") assert result.to_julian_date() == 2456658 # out-of-bounds msg = "1 is Out of Bounds for origin='julian'" with pytest.raises(ValueError, match=msg): - to_datetime(1, origin="julian", unit="D") + to_datetime(1, origin="julian", input_unit="D") def test_invalid_unit(self, units, julian_dates): # checking for invalid combination of origin='julian' and unit != D if units != "D": msg = "unit must be 'D' for origin='julian'" with pytest.raises(ValueError, match=msg): - to_datetime(julian_dates, unit=units, origin="julian") + to_datetime(julian_dates, input_unit=units, origin="julian") @pytest.mark.parametrize("unit", ["ns", "D"]) def test_invalid_origin(self, unit): # need to have a numeric specified - msg = "it must be numeric with a unit specified" + msg = "it must be numeric with a input_unit specified" with pytest.raises(ValueError, match=msg): - to_datetime("2005-01-01", origin="1960-01-01", unit=unit) + to_datetime("2005-01-01", origin="1960-01-01", input_unit=unit) @pytest.mark.parametrize( "epochs", @@ -3245,10 +3257,10 @@ def test_epoch(self, units, epochs): epoch_1960 = Timestamp(1960, 1, 1) units_from_epochs = np.arange(5, dtype=np.int64) expected = Series( - [pd.Timedelta(x, unit=units) + epoch_1960 for x in units_from_epochs] + [pd.Timedelta(x, input_unit=units) + epoch_1960 for x in units_from_epochs] ) - result = Series(to_datetime(units_from_epochs, unit=units, origin=epochs)) + result = Series(to_datetime(units_from_epochs, input_unit=units, origin=epochs)) tm.assert_series_equal(result, expected) @pytest.mark.parametrize( @@ -3269,12 +3281,14 @@ def test_invalid_origins(self, origin, exc, units): ] ) with pytest.raises(exc, match=msg): - to_datetime(list(range(5)), unit=units, origin=origin) + to_datetime(list(range(5)), input_unit=units, origin=origin) def test_invalid_origins_tzinfo(self): # GH16842 with pytest.raises(ValueError, match="must be tz-naive"): - to_datetime(1, unit="D", origin=datetime(2000, 1, 1, tzinfo=timezone.utc)) + to_datetime( + 1, input_unit="D", origin=datetime(2000, 1, 1, tzinfo=timezone.utc) + ) def test_incorrect_value_exception(self): # GH47495 @@ -3315,15 +3329,15 @@ def test_processing_order(self, arg, origin, expected_str): # make sure we handle out-of-bounds *before* # constructing the dates - result = to_datetime(arg, unit="D", origin=origin) + result = to_datetime(arg, input_unit="D", origin=origin) expected = Timestamp(expected_str) assert result == expected - result = to_datetime(200 * 365, unit="D", origin="1870-01-01") + result = to_datetime(200 * 365, input_unit="D", origin="1870-01-01") expected = Timestamp("2069-11-13 00:00:00") assert result == expected - result = to_datetime(300 * 365, unit="D", origin="1870-01-01") + result = to_datetime(300 * 365, input_unit="D", origin="1870-01-01") expected = Timestamp("2169-10-20 00:00:00") assert result == expected @@ -3339,7 +3353,7 @@ def test_processing_order(self, arg, origin, expected_str): def test_arg_tz_ns_unit(self, offset, utc, exp): # GH 25546 arg = "2019-01-01T00:00:00.000" + offset - result = to_datetime([arg], unit="ns", utc=utc) + result = to_datetime([arg], input_unit="ns", utc=utc) expected = to_datetime([exp]).as_unit("ns") tm.assert_index_equal(result, expected) @@ -3389,7 +3403,7 @@ def test_nullable_integer_to_datetime(): ser = Series([1, 2, None, 2**61, None], dtype="Int64") ser_copy = ser.copy() - res = to_datetime(ser, unit="ns") + res = to_datetime(ser, input_unit="ns") expected = Series( [ @@ -3439,12 +3453,12 @@ def test_empty_string_datetime(errors, args, format): def test_empty_string_datetime_coerce__unit(): # GH13044 # coerce empty string to pd.NaT - result = to_datetime([1, ""], unit="s", errors="coerce") + result = to_datetime([1, ""], input_unit="s", errors="coerce") expected = DatetimeIndex(["1970-01-01 00:00:01", "NaT"], dtype="datetime64[ns]") tm.assert_index_equal(expected, result) # verify that no exception is raised even when errors='raise' is set - result = to_datetime([1, ""], unit="s", errors="raise") + result = to_datetime([1, ""], input_unit="s", errors="raise") tm.assert_index_equal(expected, result) diff --git a/pandas/tests/tools/test_to_numeric.py b/pandas/tests/tools/test_to_numeric.py index 12e6be18244e1..4d6e2652ef210 100644 --- a/pandas/tests/tools/test_to_numeric.py +++ b/pandas/tests/tools/test_to_numeric.py @@ -387,7 +387,7 @@ def test_timedelta(transform_assert_equal): @pytest.mark.parametrize( "scalar", [ - pd.Timedelta(1, "D"), + pd.Timedelta(1, input_unit="D"), pd.Timestamp("2017-01-01T12"), pd.Timestamp("2017-01-01T12", tz="US/Pacific"), ], diff --git a/pandas/tests/tools/test_to_timedelta.py b/pandas/tests/tools/test_to_timedelta.py index 08ad7b7fb1b93..69b68edd67c27 100644 --- a/pandas/tests/tools/test_to_timedelta.py +++ b/pandas/tests/tools/test_to_timedelta.py @@ -31,6 +31,15 @@ def test_to_timedelta_none(self): # GH#23055 assert to_timedelta(None) is pd.NaT + def test_to_timedelta_unit_deprecated(self): + msg = "The 'unit' keyword is deprecated" + with tm.assert_produces_warning(Pandas4Warning, match=msg): + to_timedelta([1], unit="s") + + msg2 = "Specify only 'input_unit', not 'unit'" + with pytest.raises(ValueError, match=msg2): + to_timedelta([1], unit="s", input_unit="s") + def test_to_timedelta_dt64_raises(self): # Passing datetime64-dtype data to TimedeltaIndex is no longer # supported GH#29794 @@ -74,7 +83,7 @@ def test_to_timedelta_units(self): result = TimedeltaIndex( [np.timedelta64(0, "ns"), np.timedelta64(10, "s").astype("m8[ns]")] ) - expected = to_timedelta([0, 10], unit="s") + expected = to_timedelta([0, 10], input_unit="s") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -90,7 +99,7 @@ def test_to_timedelta_units(self): def test_to_timedelta_units_dtypes(self, dtype, unit): # arrays of various dtypes arr = np.array([1] * 5, dtype=dtype) - result = to_timedelta(arr, unit=unit) + result = to_timedelta(arr, input_unit=unit) exp_dtype = "m8[ns]" if dtype == "int64" else "m8[s]" expected = TimedeltaIndex([np.timedelta64(1, unit)] * 5, dtype=exp_dtype) tm.assert_index_equal(result, expected) @@ -130,7 +139,7 @@ def test_to_timedelta_invalid_unit(self, arg): # these will error msg = "invalid unit abbreviation: foo" with pytest.raises(ValueError, match=msg): - to_timedelta(arg, unit="foo") + to_timedelta(arg, input_unit="foo") def test_to_timedelta_time(self): # time not supported ATM @@ -232,14 +241,14 @@ def test_to_timedelta_on_missing_values_list(self, val): def test_to_timedelta_float(self): # https://github.com/pandas-dev/pandas/issues/25077 arr = np.arange(0, 1, 1e-6)[-10:] - result = to_timedelta(arr, unit="s") + result = to_timedelta(arr, input_unit="s") expected_asi8 = np.arange(999990000, 10**9, 1000, dtype="int64") tm.assert_numpy_array_equal(result.asi8, expected_asi8) def test_to_timedelta_coerce_strings_unit(self): arr = np.array([1, 2, "error"], dtype=object) - result = to_timedelta(arr, unit="ns", errors="coerce") - expected = to_timedelta([1, 2, pd.NaT], unit="ns") + result = to_timedelta(arr, input_unit="ns", errors="coerce") + expected = to_timedelta([1, 2, pd.NaT], input_unit="ns") tm.assert_index_equal(result, expected) @pytest.mark.parametrize( @@ -248,7 +257,7 @@ def test_to_timedelta_coerce_strings_unit(self): def test_to_timedelta_nullable_int64_dtype(self, expected_val, result_val): # GH 35574 expected = Series([timedelta(days=1), expected_val]) - result = to_timedelta(Series([1, result_val], dtype="Int64"), unit="days") + result = to_timedelta(Series([1, result_val], dtype="Int64"), input_unit="days") tm.assert_series_equal(result, expected) @@ -290,11 +299,11 @@ def test_to_timedelta_numeric_ea(self, any_numeric_ea_dtype): # GH#48796 ser = Series([1, pd.NA], dtype=any_numeric_ea_dtype) result = to_timedelta(ser) - expected = Series([pd.Timedelta(1, unit="ns"), pd.NaT]) + expected = Series([pd.Timedelta(1, input_unit="ns"), pd.NaT]) tm.assert_series_equal(result, expected) def test_to_timedelta_fraction(self): - result = to_timedelta(1.0 / 3, unit="h") + result = to_timedelta(1.0 / 3, input_unit="h") expected = pd.Timedelta("0 days 00:19:59.999999998") assert result == expected diff --git a/pandas/tests/tseries/offsets/test_business_hour.py b/pandas/tests/tseries/offsets/test_business_hour.py index 1b488dc9a47d4..9f0a33be149ea 100644 --- a/pandas/tests/tseries/offsets/test_business_hour.py +++ b/pandas/tests/tseries/offsets/test_business_hour.py @@ -957,7 +957,7 @@ def test_apply_nanoseconds(self): def test_bday_ignores_timedeltas(self, unit, td_unit): # GH#55608 idx = date_range("2010/02/01", "2010/02/10", freq="12h", unit=unit) - td = Timedelta(3, unit="h").as_unit(td_unit) + td = Timedelta(3, input_unit="h").as_unit(td_unit) off = BDay(offset=td) t1 = idx + off @@ -996,7 +996,7 @@ def test_bday_ignores_timedeltas(self, unit, td_unit): def test_add_bday_offset_nanos(self): # GH#55608 idx = date_range("2010/02/01", "2010/02/10", freq="12h", unit="ns") - off = BDay(offset=Timedelta(3, unit="ns")) + off = BDay(offset=Timedelta(3, input_unit="ns")) result = idx + off expected = DatetimeIndex([x + off for x in idx]) diff --git a/pandas/tseries/frequencies.py b/pandas/tseries/frequencies.py index 88ea1bfa3c6ed..b8cb525be8f3d 100644 --- a/pandas/tseries/frequencies.py +++ b/pandas/tseries/frequencies.py @@ -284,7 +284,7 @@ def fields(self) -> np.ndarray: # structured array of fields @cache_readonly def rep_stamp(self) -> Timestamp: - return Timestamp(self.i8values[0], unit=self.index.unit) + return Timestamp(self.i8values[0], input_unit=self.index.unit) def month_position_check(self) -> str | None: return month_position_check(self.fields, self.index.dayofweek)