Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -381,6 +381,7 @@ Other Removals
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting frequencies in :class:`Minute`, :class:`Milli`, :class:`Micro`, :class:`Nano` (:issue:`57627`)
- Enforced deprecation of strings ``T``, ``L``, ``U``, and ``N`` denoting units in :class:`Timedelta` (:issue:`57627`)
- Enforced deprecation of the behavior of :func:`concat` when ``len(keys) != len(objs)`` would truncate to the shorter of the two. Now this raises a ``ValueError`` (:issue:`43485`)
- Enforced deprecation of the behavior of :meth:`DataFrame.replace` and :meth:`Series.replace` with :class:`CategoricalDtype` that would introduce new categories. (:issue:`58270`)
- Enforced deprecation of values "pad", "ffill", "bfill", and "backfill" for :meth:`Series.interpolate` and :meth:`DataFrame.interpolate` (:issue:`57869`)
- Enforced deprecation removing :meth:`Categorical.to_list`, use ``obj.tolist()`` instead (:issue:`51254`)
- Enforced silent-downcasting deprecation for :ref:`all relevant methods <whatsnew_220.silent_downcasting>` (:issue:`54710`)
Expand Down
58 changes: 0 additions & 58 deletions pandas/core/arrays/categorical.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
cast,
overload,
)
import warnings

import numpy as np

Expand All @@ -23,7 +22,6 @@
)
from pandas._libs.arrays import NDArrayBacked
from pandas.compat.numpy import function as nv
from pandas.util._exceptions import find_stack_level
from pandas.util._validators import validate_bool_kwarg

from pandas.core.dtypes.cast import (
Expand Down Expand Up @@ -2673,62 +2671,6 @@ def isin(self, values: ArrayLike) -> npt.NDArray[np.bool_]:
code_values = code_values[null_mask | (code_values >= 0)]
return algorithms.isin(self.codes, code_values)

@overload
def _replace(self, *, to_replace, value, inplace: Literal[False] = ...) -> Self: ...

@overload
def _replace(self, *, to_replace, value, inplace: Literal[True]) -> None: ...

def _replace(self, *, to_replace, value, inplace: bool = False) -> Self | None:
from pandas import Index

orig_dtype = self.dtype

inplace = validate_bool_kwarg(inplace, "inplace")
cat = self if inplace else self.copy()

mask = isna(np.asarray(value))
if mask.any():
removals = np.asarray(to_replace)[mask]
removals = cat.categories[cat.categories.isin(removals)]
new_cat = cat.remove_categories(removals)
NDArrayBacked.__init__(cat, new_cat.codes, new_cat.dtype)

ser = cat.categories.to_series()
ser = ser.replace(to_replace=to_replace, value=value)

all_values = Index(ser)

# GH51016: maintain order of existing categories
idxr = cat.categories.get_indexer_for(all_values)
locs = np.arange(len(ser))
locs = np.where(idxr == -1, locs, idxr)
locs = locs.argsort()

new_categories = ser.take(locs)
new_categories = new_categories.drop_duplicates(keep="first")
index_categories = Index(new_categories)
new_codes = recode_for_categories(
cat._codes, all_values, index_categories, copy=False
)
new_dtype = CategoricalDtype(index_categories, ordered=self.dtype.ordered)
NDArrayBacked.__init__(cat, new_codes, new_dtype)

if new_dtype != orig_dtype:
warnings.warn(
# GH#55147
"The behavior of Series.replace (and DataFrame.replace) with "
"CategoricalDtype is deprecated. In a future version, replace "
"will only be used for cases that preserve the categories. "
"To change the categories, use ser.cat.rename_categories "
"instead.",
FutureWarning,
stacklevel=find_stack_level(),
)
if not inplace:
return cat
return None

# ------------------------------------------------------------------------
# String methods interface
def _str_map(
Expand Down
17 changes: 0 additions & 17 deletions pandas/core/internals/blocks.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,6 @@
)
from pandas.core.array_algos.transforms import shift
from pandas.core.arrays import (
Categorical,
DatetimeArray,
ExtensionArray,
IntervalArray,
Expand Down Expand Up @@ -696,14 +695,6 @@ def replace(
# go through replace_list
values = self.values

if isinstance(values, Categorical):
# TODO: avoid special-casing
# GH49404
blk = self._maybe_copy(inplace)
values = cast(Categorical, blk.values)
values._replace(to_replace=to_replace, value=value, inplace=True)
return [blk]

if not self._can_hold_element(to_replace):
# We cannot hold `to_replace`, so we know immediately that
# replacing it is a no-op.
Expand Down Expand Up @@ -803,14 +794,6 @@ def replace_list(
"""
values = self.values

if isinstance(values, Categorical):
# TODO: avoid special-casing
# GH49404
blk = self._maybe_copy(inplace)
values = cast(Categorical, blk.values)
values._replace(to_replace=src_list, value=dest_list, inplace=True)
return [blk]

# Exclude anything that we know we won't contain
pairs = [
(x, y) for x, y in zip(src_list, dest_list) if self._can_hold_element(x)
Expand Down
118 changes: 39 additions & 79 deletions pandas/tests/arrays/categorical/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,106 +6,66 @@


@pytest.mark.parametrize(
"to_replace,value,expected,flip_categories",
"to_replace,value,expected",
[
# one-to-one
(1, 2, [2, 2, 3], False),
(1, 4, [4, 2, 3], False),
(4, 1, [1, 2, 3], False),
(5, 6, [1, 2, 3], False),
(4, 1, [1, 2, 3]),
(3, 1, [1, 2, 1]),
# many-to-one
([1], 2, [2, 2, 3], False),
([1, 2], 3, [3, 3, 3], False),
([1, 2], 4, [4, 4, 3], False),
((1, 2, 4), 5, [5, 5, 3], False),
((5, 6), 2, [1, 2, 3], False),
([1], [2], [2, 2, 3], False),
([1, 4], [5, 2], [5, 2, 3], False),
# GH49404: overlap between to_replace and value
([1, 2, 3], [2, 3, 4], [2, 3, 4], False),
# GH50872, GH46884: replace with null
(1, None, [None, 2, 3], False),
(1, pd.NA, [None, 2, 3], False),
# check_categorical sorts categories, which crashes on mixed dtypes
(3, "4", [1, 2, "4"], False),
([1, 2, "3"], "5", ["5", "5", 3], True),
((5, 6), 2, [1, 2, 3]),
((3, 2), 1, [1, 1, 1]),
],
)
@pytest.mark.filterwarnings(
"ignore:.*with CategoricalDtype is deprecated:FutureWarning"
)
def test_replace_categorical_series(to_replace, value, expected, flip_categories):
def test_replace_categorical_series(to_replace, value, expected):
# GH 31720

ser = pd.Series([1, 2, 3], dtype="category")
result = ser.replace(to_replace, value)
expected = pd.Series(expected, dtype="category")
ser.replace(to_replace, value, inplace=True)

if flip_categories:
expected = expected.cat.set_categories(expected.cat.categories[::-1])

tm.assert_series_equal(expected, result, check_category_order=False)
tm.assert_series_equal(expected, ser, check_category_order=False)
expected = pd.Series(Categorical(expected, categories=[1, 2, 3]))
tm.assert_series_equal(result, expected)


@pytest.mark.parametrize(
"to_replace, value, result, expected_error_msg",
"to_replace,value",
[
("b", "c", ["a", "c"], "Categorical.categories are different"),
("c", "d", ["a", "b"], None),
# https://github.com/pandas-dev/pandas/issues/33288
("a", "a", ["a", "b"], None),
("b", None, ["a", None], "Categorical.categories length are different"),
# one-to-one
(3, 5),
# many-to-one
((3, 2), 5),
],
)
def test_replace_categorical(to_replace, value, result, expected_error_msg):
# GH#26988
cat = Categorical(["a", "b"])
expected = Categorical(result)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if expected_error_msg is not None else None
with tm.assert_produces_warning(warn, match=msg):
result = pd.Series(cat, copy=False).replace(to_replace, value)._values
def test_replace_categorical_series_new_category_raises(to_replace, value):
# GH 31720
ser = pd.Series([1, 2, 3], dtype="category")
with pytest.raises(
TypeError, match="Cannot setitem on a Categorical with a new category"
):
ser.replace(to_replace, value)

tm.assert_categorical_equal(result, expected)
if to_replace == "b": # the "c" test is supposed to be unchanged
with pytest.raises(AssertionError, match=expected_error_msg):
# ensure non-inplace call does not affect original
tm.assert_categorical_equal(cat, expected)

ser = pd.Series(cat, copy=False)
with tm.assert_produces_warning(warn, match=msg):
ser.replace(to_replace, value, inplace=True)
tm.assert_categorical_equal(cat, expected)
def test_replace_maintain_ordering():
# GH51016
dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
ser = pd.Series([0, 1, 2], dtype=dtype)
result = ser.replace(0, 2)
expected = pd.Series([2, 1, 2], dtype=dtype)
tm.assert_series_equal(expected, result, check_category_order=True)


def test_replace_categorical_ea_dtype():
# GH49404
cat = Categorical(pd.array(["a", "b"], dtype="string"))
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
cat = Categorical(pd.array(["a", "b", "c"], dtype="string"))
result = pd.Series(cat).replace(["a", "b"], ["c", "c"])._values
expected = Categorical(
pd.array(["c"] * 3, dtype="string"),
categories=pd.array(["a", "b", "c"], dtype="string"),
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = pd.Series(cat).replace(["a", "b"], ["c", pd.NA])._values
expected = Categorical(pd.array(["c", pd.NA], dtype="string"))
tm.assert_categorical_equal(result, expected)


def test_replace_maintain_ordering():
# GH51016
dtype = pd.CategoricalDtype([0, 1, 2], ordered=True)
ser = pd.Series([0, 1, 2], dtype=dtype)
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
result = ser.replace(0, 2)
expected_dtype = pd.CategoricalDtype([1, 2], ordered=True)
expected = pd.Series([2, 1, 2], dtype=expected_dtype)
tm.assert_series_equal(expected, result, check_category_order=True)
def test_replace_categorical_ea_dtype_different_cats_raises():
# GH49404
cat = Categorical(pd.array(["a", "b"], dtype="string"))
with pytest.raises(
TypeError, match="Cannot setitem on a Categorical with a new category"
):
pd.Series(cat).replace(["a", "b"], ["c", pd.NA])
56 changes: 12 additions & 44 deletions pandas/tests/copy_view/test_replace.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,18 +129,14 @@ def test_replace_to_replace_wrong_dtype():
def test_replace_list_categorical():
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
arr = get_array(df, "a")
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
df.replace(["c"], value="a", inplace=True)

df.replace(["c"], value="a", inplace=True)
assert np.shares_memory(arr.codes, get_array(df, "a").codes)
assert df._mgr._has_no_reference(0)

df_orig = df.copy()
with tm.assert_produces_warning(FutureWarning, match=msg):
df2 = df.replace(["b"], value="a")
df.replace(["b"], value="a")
df2 = df.apply(lambda x: x.cat.rename_categories({"b": "d"}))
assert not np.shares_memory(arr.codes, get_array(df2, "a").codes)

tm.assert_frame_equal(df, df_orig)
Expand All @@ -150,13 +146,7 @@ def test_replace_list_inplace_refs_categorical():
df = DataFrame({"a": ["a", "b", "c"]}, dtype="category")
view = df[:]
df_orig = df.copy()
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
with tm.assert_produces_warning(FutureWarning, match=msg):
df.replace(["c"], value="a", inplace=True)
assert not np.shares_memory(get_array(view, "a").codes, get_array(df, "a").codes)
df.replace(["c"], value="a", inplace=True)
tm.assert_frame_equal(df_orig, view)


Expand Down Expand Up @@ -195,56 +185,34 @@ def test_replace_inplace_reference_no_op(to_replace):


@pytest.mark.parametrize("to_replace", [1, [1]])
@pytest.mark.parametrize("val", [1, 1.5])
def test_replace_categorical_inplace_reference(val, to_replace):
def test_replace_categorical_inplace_reference(to_replace):
df = DataFrame({"a": Categorical([1, 2, 3])})
df_orig = df.copy()
arr_a = get_array(df, "a")
view = df[:]
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
df.replace(to_replace=to_replace, value=val, inplace=True)

df.replace(to_replace=to_replace, value=1, inplace=True)
assert not np.shares_memory(get_array(df, "a").codes, arr_a.codes)
assert df._mgr._has_no_reference(0)
assert view._mgr._has_no_reference(0)
tm.assert_frame_equal(view, df_orig)


@pytest.mark.parametrize("val", [1, 1.5])
def test_replace_categorical_inplace(val):
def test_replace_categorical_inplace():
df = DataFrame({"a": Categorical([1, 2, 3])})
arr_a = get_array(df, "a")
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
df.replace(to_replace=1, value=val, inplace=True)
df.replace(to_replace=1, value=1, inplace=True)

assert np.shares_memory(get_array(df, "a").codes, arr_a.codes)
assert df._mgr._has_no_reference(0)

expected = DataFrame({"a": Categorical([val, 2, 3])})
expected = DataFrame({"a": Categorical([1, 2, 3])})
tm.assert_frame_equal(df, expected)


@pytest.mark.parametrize("val", [1, 1.5])
def test_replace_categorical(val):
def test_replace_categorical():
df = DataFrame({"a": Categorical([1, 2, 3])})
df_orig = df.copy()
msg = (
r"The behavior of Series\.replace \(and DataFrame.replace\) "
"with CategoricalDtype"
)
warn = FutureWarning if val == 1.5 else None
with tm.assert_produces_warning(warn, match=msg):
df2 = df.replace(to_replace=1, value=val)
df2 = df.replace(to_replace=1, value=1)

assert df._mgr._has_no_reference(0)
assert df2._mgr._has_no_reference(0)
Expand Down
Loading