diff --git a/pandas/core/frame.py b/pandas/core/frame.py index d1d1993931062..c9869a7799437 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -3424,7 +3424,7 @@ def __getitem__(self, key): # - we have a MultiIndex on columns (test on self.columns, #21309) if data.shape[1] == 1 and not isinstance(self.columns, MultiIndex): # GH#26490 using data[key] can cause RecursionError - data = data._get_item_cache(key) + return data._get_item_cache(key) return data @@ -3807,6 +3807,43 @@ def _box_col_values(self, values, loc: int) -> Series: klass = self._constructor_sliced return klass(values, index=self.index, name=name, fastpath=True) + # ---------------------------------------------------------------------- + # Lookup Caching + + def _clear_item_cache(self) -> None: + self._item_cache.clear() + + def _get_item_cache(self, item: Hashable) -> Series: + """Return the cached item, item represents a label indexer.""" + cache = self._item_cache + res = cache.get(item) + if res is None: + # All places that call _get_item_cache have unique columns, + # pending resolution of GH#33047 + + loc = self.columns.get_loc(item) + values = self._mgr.iget(loc) + res = self._box_col_values(values, loc).__finalize__(self) + + cache[item] = res + res._set_as_cached(item, self) + + # for a chain + res._is_copy = self._is_copy + return res + + def _reset_cacher(self) -> None: + # no-op for DataFrame + pass + + def _maybe_cache_changed(self, item, value: Series) -> None: + """ + The object has called back to us saying maybe it has changed. + """ + loc = self._info_axis.get_loc(item) + arraylike = value._values + self._mgr.iset(loc, arraylike) + # ---------------------------------------------------------------------- # Unsorted diff --git a/pandas/core/generic.py b/pandas/core/generic.py index 25c10c215e8cc..b9895bcab58d9 100644 --- a/pandas/core/generic.py +++ b/pandas/core/generic.py @@ -876,8 +876,6 @@ def droplevel(self: FrameOrSeries, level, axis=0) -> FrameOrSeries: def pop(self, item: Hashable) -> Union[Series, Any]: result = self[item] del self[item] - if self.ndim == 2: - result._reset_cacher() return result @@ -3519,46 +3517,12 @@ def to_csv( # ---------------------------------------------------------------------- # Lookup Caching - @final - def _set_as_cached(self, item, cacher) -> None: - """ - Set the _cacher attribute on the calling object with a weakref to - cacher. - """ - self._cacher = (item, weakref.ref(cacher)) - - @final def _reset_cacher(self) -> None: """ Reset the cacher. """ - if hasattr(self, "_cacher"): - del self._cacher - - @final - def _maybe_cache_changed(self, item, value) -> None: - """ - The object has called back to us saying maybe it has changed. - """ - loc = self._info_axis.get_loc(item) - arraylike = value._values - self._mgr.iset(loc, arraylike) - - @final - @property - def _is_cached(self) -> bool_t: - """Return boolean indicating if self is cached or not.""" - return getattr(self, "_cacher", None) is not None - - @final - def _get_cacher(self): - """return my cacher or None""" - cacher = getattr(self, "_cacher", None) - if cacher is not None: - cacher = cacher[1]() - return cacher + raise AbstractMethodError(self) - @final def _maybe_update_cacher( self, clear: bool_t = False, verify_is_copy: bool_t = True ) -> None: @@ -3573,22 +3537,6 @@ def _maybe_update_cacher( verify_is_copy : bool, default True Provide is_copy checks. """ - cacher = getattr(self, "_cacher", None) - if cacher is not None: - ref = cacher[1]() - - # we are trying to reference a dead referent, hence - # a copy - if ref is None: - del self._cacher - else: - if len(self) == len(ref): - # otherwise, either self or ref has swapped in new arrays - ref._maybe_cache_changed(cacher[0], self) - else: - # GH#33675 we have swapped in a new array, so parent - # reference to self is now invalid - ref._item_cache.pop(cacher[0], None) if verify_is_copy: self._check_setitem_copy(stacklevel=5, t="referent") @@ -3596,9 +3544,8 @@ def _maybe_update_cacher( if clear: self._clear_item_cache() - @final def _clear_item_cache(self) -> None: - self._item_cache.clear() + raise AbstractMethodError(self) # ---------------------------------------------------------------------- # Indexing Methods @@ -3894,26 +3841,6 @@ class animal locomotion def __getitem__(self, item): raise AbstractMethodError(self) - @final - def _get_item_cache(self, item): - """Return the cached item, item represents a label indexer.""" - cache = self._item_cache - res = cache.get(item) - if res is None: - # All places that call _get_item_cache have unique columns, - # pending resolution of GH#33047 - - loc = self.columns.get_loc(item) - values = self._mgr.iget(loc) - res = self._box_col_values(values, loc).__finalize__(self) - - cache[item] = res - res._set_as_cached(item, self) - - # for a chain - res._is_copy = self._is_copy - return res - def _slice(self: FrameOrSeries, slobj: slice, axis=0) -> FrameOrSeries: """ Construct a slice of this container. @@ -3939,7 +3866,6 @@ def _set_is_copy(self, ref: FrameOrSeries, copy: bool_t = True) -> None: assert ref is not None self._is_copy = weakref.ref(ref) - @final def _check_is_chained_assignment_possible(self) -> bool_t: """ Check if we are a view, have a cacher, and are of mixed type. @@ -3951,12 +3877,7 @@ def _check_is_chained_assignment_possible(self) -> bool_t: single-dtype meaning that the cacher should be updated following setting. """ - if self._is_view and self._is_cached: - ref = self._get_cacher() - if ref is not None and ref._is_mixed_type: - self._check_setitem_copy(stacklevel=4, t="referent", force=True) - return True - elif self._is_copy: + if self._is_copy: self._check_setitem_copy(stacklevel=4, t="referent") return False diff --git a/pandas/core/series.py b/pandas/core/series.py index 4ade9992e9e3e..acac600ca1a07 100644 --- a/pandas/core/series.py +++ b/pandas/core/series.py @@ -23,6 +23,7 @@ overload, ) import warnings +import weakref import numpy as np @@ -1145,6 +1146,77 @@ def _set_value(self, label, value, takeable: bool = False): self._set_values(loc, value) + # ---------------------------------------------------------------------- + # Lookup Caching + + @property + def _is_cached(self) -> bool: + """Return boolean indicating if self is cached or not.""" + return getattr(self, "_cacher", None) is not None + + def _get_cacher(self): + """return my cacher or None""" + cacher = getattr(self, "_cacher", None) + if cacher is not None: + cacher = cacher[1]() + return cacher + + def _reset_cacher(self) -> None: + """ + Reset the cacher. + """ + if hasattr(self, "_cacher"): + # should only get here with self.ndim == 1 + del self._cacher + + def _set_as_cached(self, item, cacher) -> None: + """ + Set the _cacher attribute on the calling object with a weakref to + cacher. + """ + self._cacher = (item, weakref.ref(cacher)) + + def _clear_item_cache(self) -> None: + # no-op for Series + pass + + def _check_is_chained_assignment_possible(self) -> bool: + """ + See NDFrame._check_is_chained_assignment_possible.__doc__ + """ + if self._is_view and self._is_cached: + ref = self._get_cacher() + if ref is not None and ref._is_mixed_type: + self._check_setitem_copy(stacklevel=4, t="referent", force=True) + return True + return super()._check_is_chained_assignment_possible() + + def _maybe_update_cacher( + self, clear: bool = False, verify_is_copy: bool = True + ) -> None: + """ + See NDFrame._maybe_update_cacher.__doc__ + """ + cacher = getattr(self, "_cacher", None) + if cacher is not None: + assert self.ndim == 1 + ref: DataFrame = cacher[1]() + + # we are trying to reference a dead referent, hence + # a copy + if ref is None: + del self._cacher + else: + if len(self) == len(ref): + # otherwise, either self or ref has swapped in new arrays + ref._maybe_cache_changed(cacher[0], self) + else: + # GH#33675 we have swapped in a new array, so parent + # reference to self is now invalid + ref._item_cache.pop(cacher[0], None) + + super()._maybe_update_cacher(clear=clear, verify_is_copy=verify_is_copy) + # ---------------------------------------------------------------------- # Unsorted