From b29dfc60dde5399c982542e409cb9a5a76309dce Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 16 Aug 2018 10:45:38 -0500 Subject: [PATCH 1/6] Support NDFrame.shift with EAs Uses take internally. Closes https://github.com/pandas-dev/pandas/issues/22386 --- doc/source/whatsnew/v0.24.0.txt | 1 + pandas/core/internals/blocks.py | 18 ++++++++++++++++++ pandas/tests/extension/base/methods.py | 25 +++++++++++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index cf12759c051fc..ed40589acff34 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -444,6 +444,7 @@ ExtensionType Changes the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) +- :meth:`~Series.shift` now works with extension arrays, rather than raising an AttributeError (:isseu:`22386`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) - diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index f0635014b166b..603367976f771 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2061,6 +2061,24 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=None, limit=limit), placement=self.mgr_locs) + def shift(self, periods, axis=0, mgr=None): + # type: (int, int, Optional[BlockPlacement]) -> List[ExtensionBlock] + indexer = np.roll(np.arange(len(self)), periods) + + if periods > 0: + indexer[:periods] = -1 + else: + indexer[periods:] = -1 + + new_values = self.values.take(indexer, allow_fill=True) + return [self.make_block_same_class(new_values, + placement=self.mgr_locs, + ndim=self.ndim)] + + @property + def _ftype(self): + return getattr(self.values, '_pandas_ftype', Block._ftype) + class NumericBlock(Block): __slots__ = () diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index c660687f16590..faceac7a7c289 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -138,3 +138,28 @@ def test_combine_add(self, data_repeated): expected = pd.Series( orig_data1._from_sequence([a + val for a in list(orig_data1)])) self.assert_series_equal(result, expected) + + @pytest.mark.parametrize('frame', [True, False]) + @pytest.mark.parametrize('periods, indices', [ + (-2, [2, 3, 4, -1, -1]), + (0, [0, 1, 2, 3, 4]), + (2, [-1, -1, 0, 1, 2]), + ]) + def test_container_shift_negative(self, data, frame, periods, indices): + # https://github.com/pandas-dev/pandas/issues/22386 + subset = data[:5] + data = pd.Series(subset, name='A') + expected = pd.Series(subset.take(indices, allow_fill=True), name='A') + + if frame: + result = data.to_frame(name='A').assign(B=1).shift(periods) + expected = pd.concat([ + expected, + pd.Series([1] * 5, name='B').shift(periods) + ], axis=1) + compare = tm.assert_frame_equal + else: + result = data.shift(periods) + compare = tm.assert_series_equal + + compare(result, expected) From c9800359696e6497b1c22b12a416d00afa768dd3 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 16 Aug 2018 14:20:21 -0500 Subject: [PATCH 2/6] Updated --- doc/source/whatsnew/v0.24.0.txt | 2 +- pandas/core/arrays/base.py | 27 ++++++++++++++++++++++++++ pandas/core/internals/blocks.py | 20 ++----------------- pandas/tests/extension/base/methods.py | 4 ++-- 4 files changed, 32 insertions(+), 21 deletions(-) diff --git a/doc/source/whatsnew/v0.24.0.txt b/doc/source/whatsnew/v0.24.0.txt index ed40589acff34..2ec7d94f3ef58 100644 --- a/doc/source/whatsnew/v0.24.0.txt +++ b/doc/source/whatsnew/v0.24.0.txt @@ -444,7 +444,7 @@ ExtensionType Changes the ``ExtensionDtype`` has gained the method ``construct_array_type`` (:issue:`21185`) - The ``ExtensionArray`` constructor, ``_from_sequence`` now take the keyword arg ``copy=False`` (:issue:`21185`) - Bug in :meth:`Series.get` for ``Series`` using ``ExtensionArray`` and integer index (:issue:`21257`) -- :meth:`~Series.shift` now works with extension arrays, rather than raising an AttributeError (:isseu:`22386`) +- :meth:`~Series.shift` now dispatches to :meth:`ExtensionArray.shift` (:issue:`22386`) - :meth:`Series.combine()` works correctly with :class:`~pandas.api.extensions.ExtensionArray` inside of :class:`Series` (:issue:`20825`) - :meth:`Series.combine()` with scalar argument now works for any function type (:issue:`21248`) - diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index cb82625e818a1..e224ff12b6085 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -400,6 +400,33 @@ def dropna(self): return self[~self.isna()] + def shift(self, periods=1): + # type: (int) -> ExtensionArray + """ + Shift values by desired number. + + Newly introduced missing values are filled with + ``self.dtype.na_value``. + + Parameters + ---------- + periods : int, default 1 + The number of periods to shift. Negative values are allowed + for shifting backwards. + + Returns + ------- + shifted : ExtensionArray + """ + indexer = np.roll(np.arange(len(self)), periods) + + if periods > 0: + indexer[:periods] = -1 + else: + indexer[periods:] = -1 + + return self.take(indexer, allow_fill=True) + def unique(self): """Compute the ExtensionArray of unique values. diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 603367976f771..0ef22e65b672f 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2062,23 +2062,11 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=None, placement=self.mgr_locs) def shift(self, periods, axis=0, mgr=None): - # type: (int, int, Optional[BlockPlacement]) -> List[ExtensionBlock] - indexer = np.roll(np.arange(len(self)), periods) - - if periods > 0: - indexer[:periods] = -1 - else: - indexer[periods:] = -1 - - new_values = self.values.take(indexer, allow_fill=True) - return [self.make_block_same_class(new_values, + # type: (int, Optional[BlockPlacement]) -> List[ExtensionBlock] + return [self.make_block_same_class(self.values.shift(periods=periods), placement=self.mgr_locs, ndim=self.ndim)] - @property - def _ftype(self): - return getattr(self.values, '_pandas_ftype', Block._ftype) - class NumericBlock(Block): __slots__ = () @@ -2702,10 +2690,6 @@ def _try_coerce_result(self, result): return result - def shift(self, periods, axis=0, mgr=None): - return self.make_block_same_class(values=self.values.shift(periods), - placement=self.mgr_locs) - def to_dense(self): # Categorical.get_values returns a DatetimeIndex for datetime # categories, so we can't simply use `np.asarray(self.values)` like diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index faceac7a7c289..1819c0e40ce69 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -157,9 +157,9 @@ def test_container_shift_negative(self, data, frame, periods, indices): expected, pd.Series([1] * 5, name='B').shift(periods) ], axis=1) - compare = tm.assert_frame_equal + compare = self.assert_frame_equal else: result = data.shift(periods) - compare = tm.assert_series_equal + compare = self.assert_series_equal compare(result, expected) From c4b0b9736e93f0ae1e397c0217281594dfa814cc Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Thu, 16 Aug 2018 14:36:39 -0500 Subject: [PATCH 3/6] Slice based --- pandas/core/arrays/base.py | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e224ff12b6085..e85e019003fde 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -418,14 +418,17 @@ def shift(self, periods=1): ------- shifted : ExtensionArray """ - indexer = np.roll(np.arange(len(self)), periods) - + if periods == 0: + return self.copy() + empty = self._from_sequence([self.dtype.na_value] * abs(periods), + dtype=self.dtype) if periods > 0: - indexer[:periods] = -1 + a = empty + b = self[:-periods] else: - indexer[periods:] = -1 - - return self.take(indexer, allow_fill=True) + a = self[abs(periods):] + b = empty + return self._concat_same_type([a, b]) def unique(self): """Compute the ExtensionArray of unique values. From 8d404bce9bfbd111ec0fe7156fec30c5ea868336 Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 20 Aug 2018 07:38:52 -0500 Subject: [PATCH 4/6] update name --- pandas/tests/extension/base/methods.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/extension/base/methods.py b/pandas/tests/extension/base/methods.py index 1819c0e40ce69..c8656808739c4 100644 --- a/pandas/tests/extension/base/methods.py +++ b/pandas/tests/extension/base/methods.py @@ -145,7 +145,7 @@ def test_combine_add(self, data_repeated): (0, [0, 1, 2, 3, 4]), (2, [-1, -1, 0, 1, 2]), ]) - def test_container_shift_negative(self, data, frame, periods, indices): + def test_container_shift(self, data, frame, periods, indices): # https://github.com/pandas-dev/pandas/issues/22386 subset = data[:5] data = pd.Series(subset, name='A') From 64f51f4991c55930490c9cde04020b055b79b14b Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Mon, 20 Aug 2018 08:02:40 -0500 Subject: [PATCH 5/6] Doc notes --- pandas/core/arrays/base.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index e85e019003fde..8c09b352c01d9 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -59,6 +59,10 @@ class ExtensionArray(object): * factorize / _values_for_factorize * argsort / _values_for_argsort + The remaining methods implemented on this class should be performant, + as they only compose abstract methods. Still, a more efficient + implementation may be available, and these methods can be overridden. + This class does not inherit from 'abc.ABCMeta' for performance reasons. Methods and properties required by the interface raise ``pandas.errors.AbstractMethodError`` and no ``register`` method is @@ -418,6 +422,8 @@ def shift(self, periods=1): ------- shifted : ExtensionArray """ + # Note: this implementation assumes that `self.dtype.na_value` can be + # stored in an instance of your ExtensionArray with `self.dtype`. if periods == 0: return self.copy() empty = self._from_sequence([self.dtype.na_value] * abs(periods), From c5b556d77466ae317c4772628c1f5ef4392b1bad Mon Sep 17 00:00:00 2001 From: Tom Augspurger Date: Wed, 22 Aug 2018 07:30:18 -0500 Subject: [PATCH 6/6] Added versionadded [ci skip] [ci skip] --- pandas/core/arrays/base.py | 2 ++ pandas/core/internals/blocks.py | 6 ++++++ 2 files changed, 8 insertions(+) diff --git a/pandas/core/arrays/base.py b/pandas/core/arrays/base.py index 8c09b352c01d9..7bf13fb2fecc0 100644 --- a/pandas/core/arrays/base.py +++ b/pandas/core/arrays/base.py @@ -412,6 +412,8 @@ def shift(self, periods=1): Newly introduced missing values are filled with ``self.dtype.na_value``. + .. versionadded:: 0.24.0 + Parameters ---------- periods : int, default 1 diff --git a/pandas/core/internals/blocks.py b/pandas/core/internals/blocks.py index 6e857719d0247..e735b35653cd4 100644 --- a/pandas/core/internals/blocks.py +++ b/pandas/core/internals/blocks.py @@ -2069,6 +2069,12 @@ def interpolate(self, method='pad', axis=0, inplace=False, limit=None, placement=self.mgr_locs) def shift(self, periods, axis=0, mgr=None): + """ + Shift the block by `periods`. + + Dispatches to underlying ExtensionArray and re-boxes in an + ExtensionBlock. + """ # type: (int, Optional[BlockPlacement]) -> List[ExtensionBlock] return [self.make_block_same_class(self.values.shift(periods=periods), placement=self.mgr_locs,