- 
          
- 
                Notifications
    You must be signed in to change notification settings 
- Fork 19.2k
ENH: implement DatetimeLikeArray #19902
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
df9f894
              004f137
              db72494
              80b525e
              54009d4
              65dc829
              4c5a05c
              b91ddac
              080e477
              e19f70a
              47d365e
              3a67bce
              7fc73eb
              9edd9bc
              1236273
              1ab6263
              9a28188
              6b17031
              b03689a
              a055d40
              d1faeb6
              fcb8d6a
              8cee92c
              375329e
              71dfe08
              59c60a2
              9db2b78
              308c25b
              94bdfcb
              d589e2a
              0d4f48a
              1b910c7
              cece116
              828022a
              ed83046
              c1934db
              a684c2d
              File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -1,3 +1,6 @@ | ||
| from .base import (ExtensionArray, # noqa | ||
| ExtensionScalarOpsMixin) | ||
| from .categorical import Categorical # noqa | ||
| from .datetimes import DatetimeArrayMixin # noqa | ||
| from .period import PeriodArrayMixin # noqa | ||
| from .timedelta import TimedeltaArrayMixin # noqa | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,169 @@ | ||
| # -*- coding: utf-8 -*- | ||
|  | ||
| import numpy as np | ||
|  | ||
| from pandas._libs import iNaT | ||
| from pandas._libs.tslibs.timedeltas import delta_to_nanoseconds | ||
|  | ||
| from pandas.tseries import frequencies | ||
|  | ||
| import pandas.core.common as com | ||
| from pandas.core.algorithms import checked_add_with_arr | ||
|  | ||
|  | ||
| class DatetimeLikeArrayMixin(object): | ||
| """ | ||
| Shared Base/Mixin class for DatetimeArray, TimedeltaArray, PeriodArray | ||
|  | ||
| Assumes that __new__/__init__ defines: | ||
| _data | ||
| _freq | ||
|  | ||
| and that the inheriting class has methods: | ||
| _validate_frequency | ||
| """ | ||
|  | ||
| @property | ||
| def _box_func(self): | ||
| """ | ||
| box function to get object from internal representation | ||
| """ | ||
| raise com.AbstractMethodError(self) | ||
|  | ||
| def __iter__(self): | ||
| return (self._box_func(v) for v in self.asi8) | ||
|  | ||
| @property | ||
| def values(self): | ||
| """ return the underlying data as an ndarray """ | ||
| return self._data.view(np.ndarray) | ||
|  | ||
| @property | ||
| def asi8(self): | ||
| # do not cache or you'll create a memory leak | ||
| return self.values.view('i8') | ||
|  | ||
| # ------------------------------------------------------------------ | ||
| # Null Handling | ||
|  | ||
| @property # NB: override with cache_readonly in immutable subclasses | ||
| def _isnan(self): | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. this should be  There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I'm open to the name-change. This is the existing name. | ||
| """ return if each value is nan""" | ||
| return (self.asi8 == iNaT) | ||
|  | ||
| @property # NB: override with cache_readonly in immutable subclasses | ||
| def hasnans(self): | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. same: is this needed on the arrays? | ||
| """ return if I have any nans; enables various perf speedups """ | ||
| return self._isnan.any() | ||
|  | ||
| def _maybe_mask_results(self, result, fill_value=None, convert=None): | ||
| There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. how is this different from  | ||
| """ | ||
| Parameters | ||
| ---------- | ||
| result : a ndarray | ||
| convert : string/dtype or None | ||
|  | ||
| Returns | ||
| ------- | ||
| result : ndarray with values replace by the fill_value | ||
|  | ||
| mask the result if needed, convert to the provided dtype if its not | ||
| None | ||
|  | ||
| This is an internal routine | ||
| """ | ||
|  | ||
| if self.hasnans: | ||
| if convert: | ||
| result = result.astype(convert) | ||
| if fill_value is None: | ||
| fill_value = np.nan | ||
| result[self._isnan] = fill_value | ||
| return result | ||
|  | ||
| # ------------------------------------------------------------------ | ||
| # Frequency Properties/Methods | ||
|  | ||
| @property | ||
| def freq(self): | ||
| """Return the frequency object if it is set, otherwise None""" | ||
| return self._freq | ||
|  | ||
| @freq.setter | ||
| def freq(self, value): | ||
| if value is not None: | ||
| value = frequencies.to_offset(value) | ||
| self._validate_frequency(self, value) | ||
|  | ||
| self._freq = value | ||
|  | ||
| @property | ||
| def freqstr(self): | ||
| """ | ||
| Return the frequency object as a string if its set, otherwise None | ||
| """ | ||
| if self.freq is None: | ||
| return None | ||
| return self.freq.freqstr | ||
|  | ||
| @property # NB: override with cache_readonly in immutable subclasses | ||
| def inferred_freq(self): | ||
| """ | ||
| Tryies to return a string representing a frequency guess, | ||
| generated by infer_freq. Returns None if it can't autodetect the | ||
| frequency. | ||
| """ | ||
| try: | ||
| return frequencies.infer_freq(self) | ||
| except ValueError: | ||
| return None | ||
|  | ||
| # ------------------------------------------------------------------ | ||
| # Arithmetic Methods | ||
|  | ||
| def _add_datelike(self, other): | ||
| raise TypeError("cannot add {cls} and {typ}" | ||
| .format(cls=type(self).__name__, | ||
| typ=type(other).__name__)) | ||
|  | ||
| def _sub_datelike(self, other): | ||
| raise com.AbstractMethodError(self) | ||
|  | ||
| def _sub_period(self, other): | ||
| return NotImplemented | ||
|  | ||
| def _add_offset(self, offset): | ||
| raise com.AbstractMethodError(self) | ||
|  | ||
| def _add_delta(self, other): | ||
| return NotImplemented | ||
|  | ||
| def _add_delta_td(self, other): | ||
| """ | ||
| Add a delta of a timedeltalike | ||
| return the i8 result view | ||
| """ | ||
| inc = delta_to_nanoseconds(other) | ||
| new_values = checked_add_with_arr(self.asi8, inc, | ||
| arr_mask=self._isnan).view('i8') | ||
| if self.hasnans: | ||
| new_values[self._isnan] = iNaT | ||
| return new_values.view('i8') | ||
|  | ||
| def _add_delta_tdi(self, other): | ||
| """ | ||
| Add a delta of a TimedeltaIndex | ||
| return the i8 result view | ||
| """ | ||
| if not len(self) == len(other): | ||
| raise ValueError("cannot add indices of unequal length") | ||
|  | ||
| self_i8 = self.asi8 | ||
| other_i8 = other.asi8 | ||
| new_values = checked_add_with_arr(self_i8, other_i8, | ||
| arr_mask=self._isnan, | ||
| b_mask=other._isnan) | ||
| if self.hasnans or other.hasnans: | ||
| mask = (self._isnan) | (other._isnan) | ||
| new_values[mask] = iNaT | ||
| return new_values.view('i8') | ||
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,110 @@ | ||
| # -*- coding: utf-8 -*- | ||
| import warnings | ||
|  | ||
| import numpy as np | ||
|  | ||
| from pandas._libs.tslib import Timestamp, NaT, iNaT | ||
| from pandas._libs.tslibs import timezones | ||
|  | ||
| from pandas.util._decorators import cache_readonly | ||
|  | ||
| from pandas.core.dtypes.common import _NS_DTYPE, is_datetime64tz_dtype | ||
| from pandas.core.dtypes.dtypes import DatetimeTZDtype | ||
|  | ||
| from .datetimelike import DatetimeLikeArrayMixin | ||
|  | ||
|  | ||
| class DatetimeArrayMixin(DatetimeLikeArrayMixin): | ||
| """ | ||
| Assumes that subclass __new__/__init__ defines: | ||
| tz | ||
| _freq | ||
| _data | ||
| """ | ||
|  | ||
| # ----------------------------------------------------------------- | ||
| # Descriptive Properties | ||
|  | ||
| @property | ||
| def _box_func(self): | ||
| return lambda x: Timestamp(x, freq=self.freq, tz=self.tz) | ||
|  | ||
| @cache_readonly | ||
| def dtype(self): | ||
| if self.tz is None: | ||
| return _NS_DTYPE | ||
| return DatetimeTZDtype('ns', self.tz) | ||
|  | ||
| @property | ||
| def tzinfo(self): | ||
| """ | ||
| Alias for tz attribute | ||
| """ | ||
| return self.tz | ||
|  | ||
| @property # NB: override with cache_readonly in immutable subclasses | ||
| def _timezone(self): | ||
| """ Comparable timezone both for pytz / dateutil""" | ||
| return timezones.get_timezone(self.tzinfo) | ||
|  | ||
| @property | ||
| def offset(self): | ||
| """get/set the frequency of the instance""" | ||
| msg = ('DatetimeIndex.offset has been deprecated and will be removed ' | ||
| 'in a future version; use DatetimeIndex.freq instead.') | ||
| warnings.warn(msg, FutureWarning, stacklevel=2) | ||
| return self.freq | ||
|  | ||
| @offset.setter | ||
| def offset(self, value): | ||
| """get/set the frequency of the instance""" | ||
| msg = ('DatetimeIndex.offset has been deprecated and will be removed ' | ||
| 'in a future version; use DatetimeIndex.freq instead.') | ||
| warnings.warn(msg, FutureWarning, stacklevel=2) | ||
| self.freq = value | ||
|  | ||
| # ----------------------------------------------------------------- | ||
| # Comparison Methods | ||
|  | ||
| def _has_same_tz(self, other): | ||
| zzone = self._timezone | ||
|  | ||
| # vzone sholdn't be None if value is non-datetime like | ||
| if isinstance(other, np.datetime64): | ||
| # convert to Timestamp as np.datetime64 doesn't have tz attr | ||
| other = Timestamp(other) | ||
| vzone = timezones.get_timezone(getattr(other, 'tzinfo', '__no_tz__')) | ||
| return zzone == vzone | ||
|  | ||
| def _assert_tzawareness_compat(self, other): | ||
| # adapted from _Timestamp._assert_tzawareness_compat | ||
| other_tz = getattr(other, 'tzinfo', None) | ||
| if is_datetime64tz_dtype(other): | ||
| # Get tzinfo from Series dtype | ||
| other_tz = other.dtype.tz | ||
| if other is NaT: | ||
| # pd.NaT quacks both aware and naive | ||
| pass | ||
| elif self.tz is None: | ||
| if other_tz is not None: | ||
| raise TypeError('Cannot compare tz-naive and tz-aware ' | ||
| 'datetime-like objects.') | ||
| elif other_tz is None: | ||
| raise TypeError('Cannot compare tz-naive and tz-aware ' | ||
| 'datetime-like objects') | ||
|  | ||
| # ----------------------------------------------------------------- | ||
| # Arithmetic Methods | ||
|  | ||
| def _sub_datelike_dti(self, other): | ||
| """subtraction of two DatetimeIndexes""" | ||
| if not len(self) == len(other): | ||
| raise ValueError("cannot add indices of unequal length") | ||
|  | ||
| self_i8 = self.asi8 | ||
| other_i8 = other.asi8 | ||
| new_values = self_i8 - other_i8 | ||
| if self.hasnans or other.hasnans: | ||
| mask = (self._isnan) | (other._isnan) | ||
| new_values[mask] = iNaT | ||
| return new_values.view('timedelta64[ns]') | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,28 @@ | ||
| # -*- coding: utf-8 -*- | ||
|  | ||
| from pandas._libs.tslibs.period import Period | ||
|  | ||
| from pandas.util._decorators import cache_readonly | ||
|  | ||
| from pandas.core.dtypes.dtypes import PeriodDtype | ||
|  | ||
| from .datetimelike import DatetimeLikeArrayMixin | ||
|  | ||
|  | ||
| class PeriodArrayMixin(DatetimeLikeArrayMixin): | ||
| @property | ||
| def _box_func(self): | ||
| return lambda x: Period._from_ordinal(ordinal=x, freq=self.freq) | ||
|  | ||
| @cache_readonly | ||
| def dtype(self): | ||
| return PeriodDtype.construct_from_string(self.freq) | ||
|  | ||
| @property | ||
| def _ndarray_values(self): | ||
| # Ordinals | ||
| return self._data | ||
|  | ||
| @property | ||
| def asi8(self): | ||
| return self._ndarray_values.view('i8') | 
| Original file line number | Diff line number | Diff line change | 
|---|---|---|
| @@ -0,0 +1,17 @@ | ||
| # -*- coding: utf-8 -*- | ||
|  | ||
| from pandas._libs.tslib import Timedelta | ||
|  | ||
| from pandas.core.dtypes.common import _TD_DTYPE | ||
|  | ||
| from .datetimelike import DatetimeLikeArrayMixin | ||
|  | ||
|  | ||
| class TimedeltaArrayMixin(DatetimeLikeArrayMixin): | ||
| @property | ||
| def _box_func(self): | ||
| return lambda x: Timedelta(x, unit='ns') | ||
|  | ||
| @property | ||
| def dtype(self): | ||
| return _TD_DTYPE | 
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Do we want this for our arrays?
I am not sure this should be called
.values.And we already have
_ndarray_values?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is a good question, and I think relates to the inheritance vs composition question. As with several other comments, the narrow-context answer is that this PR is designed to involve essentially zero changes to behavior, is just moving methods/properties from their existing locations in the index classes.
As for questions about whether a method/property is needed in the array classes, these are all going to be needed by the arithmetic/comparison methods (ported in a later pass)