From e81867ab466535af64c8d3a0d5bb22b4429bd293 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Sun, 21 Sep 2025 16:07:09 +0200 Subject: [PATCH 1/2] BUG: limit select_dtypes(object) back compat fix to default str dtype --- pandas/core/frame.py | 8 ++++++- .../tests/frame/methods/test_select_dtypes.py | 24 +++++++++++++++++++ 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 414f02f41e9f0..110bb8d89cddd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -144,6 +144,7 @@ TimedeltaArray, ) from pandas.core.arrays.sparse import SparseFrameAccessor +from pandas.core.arrays.string_ import StringDtype from pandas.core.construction import ( ensure_wrapped_if_datetimelike, sanitize_array, @@ -5157,7 +5158,12 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool: and getattr(dtype, "_is_numeric", False) and not is_bool_dtype(dtype) ) - or (dtype.type is str and np.object_ in dtypes_set) + # backwards compat for the default `str` dtype being selected by object + or ( + isinstance(dtype, StringDtype) + and dtype.na_value is np.nan + and np.object_ in dtypes_set + ) ) def predicate(arr: ArrayLike) -> bool: diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index d3e28d328c8fd..19b4448521c62 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -485,3 +485,27 @@ def test_select_dtypes_no_view(self): result = df.select_dtypes(include=["number"]) result.iloc[0, 0] = 0 tm.assert_frame_equal(df, df_orig) + + def test_select_dtype_object_and_str(self, using_infer_string): + # https://github.com/pandas-dev/pandas/issues/61916 + df = DataFrame( + { + "a": ["a", "b", "c"], + "b": [1, 2, 3], + "c": pd.array(["a", "b", "c"], dtype="string"), + } + ) + + # with "object" -> only select the object or default str dtype column + result = df.select_dtypes(include=["object"]) + expected = df[["a"]] + tm.assert_frame_equal(result, expected) + + # with "string" -> select both the default 'str' and the nullable 'string' + result = df.select_dtypes(include=["string"]) + if using_infer_string: + expected = df[["a", "c"]] + else: + expected = df[["c"]] + expected = df[["a", "c"]] + tm.assert_frame_equal(result, expected) From 2d5b9d2b784d6342b5e18b8d6200f12c14e9b266 Mon Sep 17 00:00:00 2001 From: Joris Van den Bossche Date: Mon, 22 Sep 2025 08:52:41 +0200 Subject: [PATCH 2/2] fixup --- pandas/tests/frame/methods/test_select_dtypes.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index 19b4448521c62..1ba6b9c437726 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -507,5 +507,4 @@ def test_select_dtype_object_and_str(self, using_infer_string): expected = df[["a", "c"]] else: expected = df[["c"]] - expected = df[["a", "c"]] tm.assert_frame_equal(result, expected)