From 1b995d2bbb46c0a019a815a100129e7f543a5f60 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 5 Jun 2022 10:52:06 -0700 Subject: [PATCH 1/2] ENH: support non-nano in Localizer --- pandas/_libs/tslibs/conversion.pyx | 4 +- pandas/_libs/tslibs/timestamps.pyx | 7 +-- pandas/_libs/tslibs/tzconversion.pxd | 11 ++++- pandas/_libs/tslibs/tzconversion.pyi | 5 +- pandas/_libs/tslibs/tzconversion.pyx | 72 +++++++++++++++++++++------- pandas/_libs/tslibs/vectorized.pyx | 12 ++--- 6 files changed, 81 insertions(+), 30 deletions(-) diff --git a/pandas/_libs/tslibs/conversion.pyx b/pandas/_libs/tslibs/conversion.pyx index 2ed1930b01555..e42ebe5ee561b 100644 --- a/pandas/_libs/tslibs/conversion.pyx +++ b/pandas/_libs/tslibs/conversion.pyx @@ -416,7 +416,7 @@ cdef _TSObject _create_tsobject_tz_using_offset(npy_datetimestruct dts, return obj cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, NPY_FR_ns) # Infer fold from offset-adjusted obj.value # see PEP 495 https://www.python.org/dev/peps/pep-0495/#the-fold-attribute @@ -584,7 +584,7 @@ cdef inline void _localize_tso(_TSObject obj, tzinfo tz): cdef: int64_t local_val Py_ssize_t outpos = -1 - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, NPY_FR_ns) assert obj.tzinfo is None diff --git a/pandas/_libs/tslibs/timestamps.pyx b/pandas/_libs/tslibs/timestamps.pyx index 8706d59b084b9..0f60548ec75dc 100644 --- a/pandas/_libs/tslibs/timestamps.pyx +++ b/pandas/_libs/tslibs/timestamps.pyx @@ -1988,10 +1988,11 @@ default 'raise' ambiguous = [ambiguous] value = tz_localize_to_utc_single(self.value, tz, ambiguous=ambiguous, - nonexistent=nonexistent) + nonexistent=nonexistent, + reso=self._reso) elif tz is None: # reset tz - value = tz_convert_from_utc_single(self.value, self.tz) + value = tz_convert_from_utc_single(self.value, self.tz, reso=self._reso) else: raise TypeError( @@ -2139,7 +2140,7 @@ default 'raise' fold = self.fold if tzobj is not None: - value = tz_convert_from_utc_single(value, tzobj) + value = tz_convert_from_utc_single(value, tzobj, reso=self._reso) # setup components dt64_to_dtstruct(value, &dts) diff --git a/pandas/_libs/tslibs/tzconversion.pxd b/pandas/_libs/tslibs/tzconversion.pxd index 2acad9ea34062..13735fb5945a4 100644 --- a/pandas/_libs/tslibs/tzconversion.pxd +++ b/pandas/_libs/tslibs/tzconversion.pxd @@ -5,18 +5,25 @@ from numpy cimport ( ndarray, ) +from pandas._libs.tslibs.np_datetime cimport NPY_DATETIMEUNIT + cpdef int64_t tz_convert_from_utc_single( - int64_t utc_val, tzinfo tz + int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=* ) except? -1 cdef int64_t tz_localize_to_utc_single( - int64_t val, tzinfo tz, object ambiguous=*, object nonexistent=* + int64_t val, + tzinfo tz, + object ambiguous=*, + object nonexistent=*, + NPY_DATETIMEUNIT reso=*, ) except? -1 cdef class Localizer: cdef: tzinfo tz + NPY_DATETIMEUNIT _reso bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz ndarray trans Py_ssize_t ntrans diff --git a/pandas/_libs/tslibs/tzconversion.pyi b/pandas/_libs/tslibs/tzconversion.pyi index 2531383b658fc..fab73f96b0dfb 100644 --- a/pandas/_libs/tslibs/tzconversion.pyi +++ b/pandas/_libs/tslibs/tzconversion.pyi @@ -9,10 +9,13 @@ import numpy as np from pandas._typing import npt # tz_convert_from_utc_single exposed for testing -def tz_convert_from_utc_single(val: np.int64, tz: tzinfo) -> np.int64: ... +def tz_convert_from_utc_single( + val: np.int64, tz: tzinfo, reso: int = ... +) -> np.int64: ... def tz_localize_to_utc( vals: npt.NDArray[np.int64], tz: tzinfo | None, ambiguous: str | bool | Iterable[bool] | None = ..., nonexistent: str | timedelta | np.timedelta64 | None = ..., + reso: int = ..., # NPY_DATETIMEUNIT ) -> npt.NDArray[np.int64]: ... diff --git a/pandas/_libs/tslibs/tzconversion.pyx b/pandas/_libs/tslibs/tzconversion.pyx index 0cdc7b777f45f..86cda289c80e6 100644 --- a/pandas/_libs/tslibs/tzconversion.pyx +++ b/pandas/_libs/tslibs/tzconversion.pyx @@ -31,10 +31,12 @@ from pandas._libs.tslibs.ccalendar cimport ( DAY_NANOS, HOUR_NANOS, ) +from pandas._libs.tslibs.dtypes cimport periods_per_second from pandas._libs.tslibs.nattype cimport NPY_NAT from pandas._libs.tslibs.np_datetime cimport ( - dt64_to_dtstruct, + NPY_DATETIMEUNIT, npy_datetimestruct, + pandas_datetime_to_datetimestruct, ) from pandas._libs.tslibs.timezones cimport ( get_dst_info, @@ -54,6 +56,7 @@ cdef const int64_t[::1] _deltas_placeholder = np.array([], dtype=np.int64) cdef class Localizer: # cdef: # tzinfo tz + # NPY_DATETIMEUNIT _reso # bint use_utc, use_fixed, use_tzlocal, use_dst, use_pytz # ndarray trans # Py_ssize_t ntrans @@ -63,8 +66,9 @@ cdef class Localizer: @cython.initializedcheck(False) @cython.boundscheck(False) - def __cinit__(self, tzinfo tz): + def __cinit__(self, tzinfo tz, NPY_DATETIMEUNIT reso): self.tz = tz + self._reso = reso self.use_utc = self.use_tzlocal = self.use_fixed = False self.use_dst = self.use_pytz = False self.ntrans = -1 # placeholder @@ -80,6 +84,22 @@ cdef class Localizer: else: trans, deltas, typ = get_dst_info(tz) + if reso != NPY_DATETIMEUNIT.NPY_FR_ns: + # NB: using floordiv here is implicitly assuming we will + # never see trans or deltas that are not an integer number + # of seconds. + if reso == NPY_DATETIMEUNIT.NPY_FR_us: + trans = trans // 1_000 + deltas = deltas // 1_000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_ms: + trans = trans // 1_000_000 + deltas = deltas // 1_000_000 + elif reso == NPY_DATETIMEUNIT.NPY_FR_s: + trans = trans // 1_000_000_000 + deltas = deltas // 1_000_000_000 + else: + raise NotImplementedError(reso) + self.trans = trans self.ntrans = self.trans.shape[0] self.deltas = deltas @@ -87,12 +107,12 @@ cdef class Localizer: if typ != "pytz" and typ != "dateutil": # static/fixed; in this case we know that len(delta) == 1 self.use_fixed = True - self.delta = self.deltas[0] + self.delta = deltas[0] else: self.use_dst = True if typ == "pytz": self.use_pytz = True - self.tdata = cnp.PyArray_DATA(self.trans) + self.tdata = cnp.PyArray_DATA(trans) @cython.boundscheck(False) cdef inline int64_t utc_val_to_local_val( @@ -102,7 +122,7 @@ cdef class Localizer: return utc_val elif self.use_tzlocal: return utc_val + _tz_localize_using_tzinfo_api( - utc_val, self.tz, to_utc=False, fold=fold + utc_val, self.tz, to_utc=False, reso=self._reso, fold=fold ) elif self.use_fixed: return utc_val + self.delta @@ -117,7 +137,11 @@ cdef class Localizer: cdef int64_t tz_localize_to_utc_single( - int64_t val, tzinfo tz, object ambiguous=None, object nonexistent=None, + int64_t val, + tzinfo tz, + object ambiguous=None, + object nonexistent=None, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, ) except? -1: """See tz_localize_to_utc.__doc__""" cdef: @@ -131,7 +155,7 @@ cdef int64_t tz_localize_to_utc_single( return val elif is_tzlocal(tz) or is_zoneinfo(tz): - return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True) + return val - _tz_localize_using_tzinfo_api(val, tz, to_utc=True, reso=reso) elif is_fixed_offset(tz): _, deltas, _ = get_dst_info(tz) @@ -144,13 +168,19 @@ cdef int64_t tz_localize_to_utc_single( tz, ambiguous=ambiguous, nonexistent=nonexistent, + reso=reso, )[0] @cython.boundscheck(False) @cython.wraparound(False) -def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None, - object nonexistent=None): +def tz_localize_to_utc( + ndarray[int64_t] vals, + tzinfo tz, + object ambiguous=None, + object nonexistent=None, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, +): """ Localize tzinfo-naive i8 to given time zone (using pytz). If there are ambiguities in the values, raise AmbiguousTimeError. @@ -177,6 +207,7 @@ def tz_localize_to_utc(ndarray[int64_t] vals, tzinfo tz, object ambiguous=None, nonexistent : {None, "NaT", "shift_forward", "shift_backward", "raise", \ timedelta-like} How to handle non-existent times when converting wall times to UTC + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- @@ -196,7 +227,7 @@ timedelta-like} bint shift_forward = False, shift_backward = False bint fill_nonexist = False str stamp - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=reso) # Vectorized version of DstTzInfo.localize if info.use_utc: @@ -210,7 +241,7 @@ timedelta-like} if v == NPY_NAT: result[i] = NPY_NAT else: - result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True) + result[i] = v - _tz_localize_using_tzinfo_api(v, tz, to_utc=True, reso=reso) return result.base # to return underlying ndarray elif info.use_fixed: @@ -512,7 +543,9 @@ cdef ndarray[int64_t] _get_dst_hours( # ---------------------------------------------------------------------- # Timezone Conversion -cpdef int64_t tz_convert_from_utc_single(int64_t utc_val, tzinfo tz) except? -1: +cpdef int64_t tz_convert_from_utc_single( + int64_t utc_val, tzinfo tz, NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns +) except? -1: """ Convert the val (in i8) from UTC to tz @@ -522,13 +555,14 @@ cpdef int64_t tz_convert_from_utc_single(int64_t utc_val, tzinfo tz) except? -1: ---------- utc_val : int64 tz : tzinfo + reso : NPY_DATETIMEUNIT, default NPY_FR_ns Returns ------- converted: int64 """ cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=reso) Py_ssize_t pos # Note: caller is responsible for ensuring utc_val != NPY_NAT @@ -538,7 +572,11 @@ cpdef int64_t tz_convert_from_utc_single(int64_t utc_val, tzinfo tz) except? -1: # OSError may be thrown by tzlocal on windows at or close to 1970-01-01 # see https://github.com/pandas-dev/pandas/pull/37591#issuecomment-720628241 cdef int64_t _tz_localize_using_tzinfo_api( - int64_t val, tzinfo tz, bint to_utc=True, bint* fold=NULL + int64_t val, + tzinfo tz, + bint to_utc=True, + NPY_DATETIMEUNIT reso=NPY_DATETIMEUNIT.NPY_FR_ns, + bint* fold=NULL, ) except? -1: """ Convert the i8 representation of a datetime from a general-case timezone to @@ -552,6 +590,7 @@ cdef int64_t _tz_localize_using_tzinfo_api( tz : tzinfo to_utc : bint True if converting _to_ UTC, False if going the other direction. + reso : NPY_DATETIMEUNIT fold : bint*, default NULL pointer to fold: whether datetime ends up in a fold or not after adjustment. @@ -571,8 +610,9 @@ cdef int64_t _tz_localize_using_tzinfo_api( datetime dt int64_t delta timedelta td + int64_t pps = periods_per_second(reso) - dt64_to_dtstruct(val, &dts) + pandas_datetime_to_datetimestruct(val, reso, &dts) # datetime_new is cython-optimized constructor if not to_utc: @@ -590,7 +630,7 @@ cdef int64_t _tz_localize_using_tzinfo_api( dts.min, dts.sec, dts.us, None) td = tz.utcoffset(dt) - delta = int(td.total_seconds() * 1_000_000_000) + delta = int(td.total_seconds() * pps) return delta diff --git a/pandas/_libs/tslibs/vectorized.pyx b/pandas/_libs/tslibs/vectorized.pyx index 8e0d5ece0e155..a52823681def6 100644 --- a/pandas/_libs/tslibs/vectorized.pyx +++ b/pandas/_libs/tslibs/vectorized.pyx @@ -58,7 +58,7 @@ def tz_convert_from_utc(ndarray stamps, tzinfo tz): ndarray[int64] """ cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=NPY_FR_ns) int64_t utc_val, local_val Py_ssize_t pos, i, n = stamps.size @@ -130,7 +130,7 @@ def ints_to_pydatetime( ndarray[object] of type specified by box """ cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=NPY_FR_ns) int64_t utc_val, local_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -229,7 +229,7 @@ cdef inline c_Resolution _reso_stamp(npy_datetimestruct *dts): def get_resolution(ndarray stamps, tzinfo tz=None) -> Resolution: # stamps is int64_t, any ndim cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=NPY_FR_ns) int64_t utc_val, local_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -281,7 +281,7 @@ cpdef ndarray normalize_i8_timestamps(ndarray stamps, tzinfo tz, NPY_DATETIMEUNI result : int64 ndarray of converted of normalized nanosecond timestamps """ cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=reso) int64_t utc_val, local_val, res_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -328,7 +328,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) - is_normalized : bool True if all stamps are normalized """ cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=reso) int64_t utc_val, local_val Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning @@ -357,7 +357,7 @@ def is_date_array_normalized(ndarray stamps, tzinfo tz, NPY_DATETIMEUNIT reso) - def dt64arr_to_periodarr(ndarray stamps, int freq, tzinfo tz): # stamps is int64_t, arbitrary ndim cdef: - Localizer info = Localizer(tz) + Localizer info = Localizer(tz, reso=NPY_FR_ns) Py_ssize_t i, n = stamps.size Py_ssize_t pos = -1 # unused, avoid not-initialized warning int64_t utc_val, local_val, res_val From dbe134b30703ec66b434fe55af4233fef528f035 Mon Sep 17 00:00:00 2001 From: Brock Date: Sun, 5 Jun 2022 12:19:26 -0700 Subject: [PATCH 2/2] update setup.py --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 54aba599526d2..cb713e6d74392 100755 --- a/setup.py +++ b/setup.py @@ -549,6 +549,7 @@ def srcpath(name=None, suffix=".pyx", subdir="src"): "_libs.tslibs.tzconversion": { "pyxfile": "_libs/tslibs/tzconversion", "depends": tseries_depends, + "sources": ["pandas/_libs/tslibs/src/datetime/np_datetime.c"], }, "_libs.tslibs.vectorized": {"pyxfile": "_libs/tslibs/vectorized"}, "_libs.testing": {"pyxfile": "_libs/testing"},