From 7b928b2417b2ee41ccb386552ad931b57c22c1d1 Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 3 Apr 2021 19:30:35 -0700 Subject: [PATCH 1/5] REF: move Series-specific methods from NDFrame --- pandas/core/frame.py | 36 ++++++++++++++++++ pandas/core/generic.py | 84 ++---------------------------------------- pandas/core/series.py | 72 ++++++++++++++++++++++++++++++++++++ 3 files changed, 111 insertions(+), 81 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 484b01f2c04f0..babf7bfefbf5e 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3813,6 +3813,42 @@ def _box_col_values(self, values, loc: int) -> Series: klass = self._constructor_sliced return klass(values, index=self.index, name=name, fastpath=True) + # ---------------------------------------------------------------------- + # Lookup Caching + + def _clear_item_cache(self) -> None: + self._item_cache.clear() + + def _get_item_cache(self, item: Hashable) -> Series: + """Return the cached item, item represents a label indexer.""" + cache = self._item_cache + res = cache.get(item) + if res is None: + # All places that call _get_item_cache have unique columns, + # pending resolution of GH#33047 + + loc = self.columns.get_loc(item) + values = self._mgr.iget(loc) + res = self._box_col_values(values, loc).__finalize__(self) + + cache[item] = res + res._set_as_cached(item, self) + + # for a chain + res._is_copy = self._is_copy + return res + + def _reset_cacher(self) -> None: + # no-op for DataFrame + pass + + def _maybe_cache_changed(self, item, value) -> None: + """ + The object has called back to us saying maybe it has changed. + """ + loc = self._info_axis.get_loc(item) + self._mgr.iset(loc, value) + # ---------------------------------------------------------------------- # Unsorted diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 6b4e3c7caef50..b9895bcab58d9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -876,8 +876,6 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: def pop(self, item: Hashable) -> Union[Series, Any]: result = self[item] del self[item] - if self.ndim == 2: - result._reset_cacher() return result @@ -3519,45 +3517,12 @@ def to_csv( # ---------------------------------------------------------------------- # Lookup Caching - @final - def _set_as_cached(self, item, cacher) -> None: - """ - Set the _cacher attribute on the calling object with a weakref to - cacher. - """ - self._cacher = (item, weakref.ref(cacher)) - - @final def _reset_cacher(self) -> None: """ Reset the cacher. """ - if hasattr(self, "_cacher"): - del self._cacher - - @final - def _maybe_cache_changed(self, item, value) -> None: - """ - The object has called back to us saying maybe it has changed. - """ - loc = self._info_axis.get_loc(item) - self._mgr.iset(loc, value) - - @final - @property - def _is_cached(self) -> bool_t: - """Return boolean indicating if self is cached or not.""" - return getattr(self, "_cacher", None) is not None - - @final - def _get_cacher(self): - """return my cacher or None""" - cacher = getattr(self, "_cacher", None) - if cacher is not None: - cacher = cacher[1]() - return cacher + raise AbstractMethodError(self) - @final def _maybe_update_cacher( self, clear: bool_t = False, verify_is_copy: bool_t = True ) -> None: @@ -3572,22 +3537,6 @@ def _maybe_update_cacher( verify_is_copy : bool, default True Provide is_copy checks. """ - cacher = getattr(self, "_cacher", None) - if cacher is not None: - ref = cacher[1]() - - # we are trying to reference a dead referent, hence - # a copy - if ref is None: - del self._cacher - else: - if len(self) == len(ref): - # otherwise, either self or ref has swapped in new arrays - ref._maybe_cache_changed(cacher[0], self) - else: - # GH#33675 we have swapped in a new array, so parent - # reference to self is now invalid - ref._item_cache.pop(cacher[0], None) if verify_is_copy: self._check_setitem_copy(stacklevel=5, t="referent") @@ -3595,9 +3544,8 @@ def _maybe_update_cacher( if clear: self._clear_item_cache() - @final def _clear_item_cache(self) -> None: - self._item_cache.clear() + raise AbstractMethodError(self) # ---------------------------------------------------------------------- # Indexing Methods @@ -3893,26 +3841,6 @@ class animal locomotion def __getitem__(self, item): raise AbstractMethodError(self) - @final - def _get_item_cache(self, item): - """Return the cached item, item represents a label indexer.""" - cache = self._item_cache - res = cache.get(item) - if res is None: - # All places that call _get_item_cache have unique columns, - # pending resolution of GH#33047 - - loc = self.columns.get_loc(item) - values = self._mgr.iget(loc) - res = self._box_col_values(values, loc).__finalize__(self) - - cache[item] = res - res._set_as_cached(item, self) - - # for a chain - res._is_copy = self._is_copy - return res - def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries: """ Construct a slice of this container. @@ -3938,7 +3866,6 @@ def _set_is_copy(self, ref: FrameOrSeries, copy: bool_t = True) -> None: assert ref is not None self._is_copy = weakref.ref(ref) - @final def _check_is_chained_assignment_possible(self) -> bool_t: """ Check if we are a view, have a cacher, and are of mixed type. @@ -3950,12 +3877,7 @@ def _check_is_chained_assignment_possible(self) -> bool_t: single-dtype meaning that the cacher should be updated following setting. """ - if self._is_view and self._is_cached: - ref = self._get_cacher() - if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(stacklevel=4, t="referent", force=True) - return True - elif self._is_copy: + if self._is_copy: self._check_setitem_copy(stacklevel=4, t="referent") return False diff --git a/pandas/core/series.py b/pandas/core/series.py index 4ade9992e9e3e..acac600ca1a07 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -23,6 +23,7 @@ overload, ) import warnings +import weakref import numpy as np @@ -1145,6 +1146,77 @@ def _set_value(self, label, value, takeable: bool = False): self._set_values(loc, value) + # ---------------------------------------------------------------------- + # Lookup Caching + + @property + def _is_cached(self) -> bool: + """Return boolean indicating if self is cached or not.""" + return getattr(self, "_cacher", None) is not None + + def _get_cacher(self): + """return my cacher or None""" + cacher = getattr(self, "_cacher", None) + if cacher is not None: + cacher = cacher[1]() + return cacher + + def _reset_cacher(self) -> None: + """ + Reset the cacher. + """ + if hasattr(self, "_cacher"): + # should only get here with self.ndim == 1 + del self._cacher + + def _set_as_cached(self, item, cacher) -> None: + """ + Set the _cacher attribute on the calling object with a weakref to + cacher. + """ + self._cacher = (item, weakref.ref(cacher)) + + def _clear_item_cache(self) -> None: + # no-op for Series + pass + + def _check_is_chained_assignment_possible(self) -> bool: + """ + See NDFrame._check_is_chained_assignment_possible.__doc__ + """ + if self._is_view and self._is_cached: + ref = self._get_cacher() + if ref is not None and ref._is_mixed_type: + self._check_setitem_copy(stacklevel=4, t="referent", force=True) + return True + return super()._check_is_chained_assignment_possible() + + def _maybe_update_cacher( + self, clear: bool = False, verify_is_copy: bool = True + ) -> None: + """ + See NDFrame._maybe_update_cacher.__doc__ + """ + cacher = getattr(self, "_cacher", None) + if cacher is not None: + assert self.ndim == 1 + ref: DataFrame = cacher[1]() + + # we are trying to reference a dead referent, hence + # a copy + if ref is None: + del self._cacher + else: + if len(self) == len(ref): + # otherwise, either self or ref has swapped in new arrays + ref._maybe_cache_changed(cacher[0], self) + else: + # GH#33675 we have swapped in a new array, so parent + # reference to self is now invalid + ref._item_cache.pop(cacher[0], None) + + super()._maybe_update_cacher(clear=clear, verify_is_copy=verify_is_copy) + # ---------------------------------------------------------------------- # Unsorted From 124021d54fef185b505ecf59a1d39997afc056bd Mon Sep 17 00:00:00 2001 From: Brock Date: Sat, 3 Apr 2021 20:28:43 -0700 Subject: [PATCH 2/5] mypy fixup --- pandas/core/frame.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index babf7bfefbf5e..86d4297364161 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3428,7 +3428,7 @@ def __getitem__(self, key): # - we have a MultiIndex on columns (test on self.columns, #21309) if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex): # GH#26490 using data[key] can cause RecursionError - data = data._get_item_cache(key) + return data._get_item_cache(key) return data From 0e466ddd078b622ed64dfc0a3263b82f410850d2 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 5 Apr 2021 11:13:23 -0700 Subject: [PATCH 3/5] restore test removed 40578 --- pandas/tests/extension/test_numpy.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index 35e5abe9ce4e7..e11e74f16030c 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -330,6 +330,17 @@ def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) + def test_fillna_fill_other(self, data_missing): + # Same as the parent class test, but with PandasDtype for expected["B"] + # instead of equivalent numpy dtype + data = data_missing + result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0}) + + expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) + expected["B"] = expected["B"].astype(PandasDtype(expected["B"].dtype)) + + self.assert_frame_equal(result, expected) + class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): @pytest.mark.skip(reason="Incorrect expected.") From 38ed4b59a3fa7f8482e59eeb7451f528ab1977e6 Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 5 Apr 2021 12:11:54 -0700 Subject: [PATCH 4/5] post-merge fixup --- pandas/core/frame.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d830f67495c1b..c9869a7799437 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3836,12 +3836,13 @@ def _reset_cacher(self) -> None: # no-op for DataFrame pass - def _maybe_cache_changed(self, item, value) -> None: + def _maybe_cache_changed(self, item, value: Series) -> None: """ The object has called back to us saying maybe it has changed. """ loc = self._info_axis.get_loc(item) - self._mgr.iset(loc, value) + arraylike = value._values + self._mgr.iset(loc, arraylike) # ---------------------------------------------------------------------- # Unsorted From 7f325461d3d3e47dfb45c9361f7c25f3d594c22e Mon Sep 17 00:00:00 2001 From: Brock Date: Mon, 5 Apr 2021 13:28:16 -0700 Subject: [PATCH 5/5] un-revert --- pandas/tests/extension/test_numpy.py | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/pandas/tests/extension/test_numpy.py b/pandas/tests/extension/test_numpy.py index e11e74f16030c..35e5abe9ce4e7 100644 --- a/pandas/tests/extension/test_numpy.py +++ b/pandas/tests/extension/test_numpy.py @@ -330,17 +330,6 @@ def test_fillna_frame(self, data_missing): # Non-scalar "scalar" values. super().test_fillna_frame(data_missing) - def test_fillna_fill_other(self, data_missing): - # Same as the parent class test, but with PandasDtype for expected["B"] - # instead of equivalent numpy dtype - data = data_missing - result = pd.DataFrame({"A": data, "B": [np.nan] * len(data)}).fillna({"B": 0.0}) - - expected = pd.DataFrame({"A": data, "B": [0.0] * len(result)}) - expected["B"] = expected["B"].astype(PandasDtype(expected["B"].dtype)) - - self.assert_frame_equal(result, expected) - class TestReshaping(BaseNumPyTests, base.BaseReshapingTests): @pytest.mark.skip(reason="Incorrect expected.")