diff --git a/pandas/core/frame.py b/pandas/core/frame.py index 414f02f41e9f0..110bb8d89cddd 100644 --- a/pandas/core/frame.py +++ b/pandas/core/frame.py @@ -144,6 +144,7 @@ TimedeltaArray, ) from pandas.core.arrays.sparse import SparseFrameAccessor +from pandas.core.arrays.string_ import StringDtype from pandas.core.construction import ( ensure_wrapped_if_datetimelike, sanitize_array, @@ -5157,7 +5158,12 @@ def dtype_predicate(dtype: DtypeObj, dtypes_set) -> bool: and getattr(dtype, "_is_numeric", False) and not is_bool_dtype(dtype) ) - or (dtype.type is str and np.object_ in dtypes_set) + # backwards compat for the default `str` dtype being selected by object + or ( + isinstance(dtype, StringDtype) + and dtype.na_value is np.nan + and np.object_ in dtypes_set + ) ) def predicate(arr: ArrayLike) -> bool: diff --git a/pandas/tests/frame/methods/test_select_dtypes.py b/pandas/tests/frame/methods/test_select_dtypes.py index d3e28d328c8fd..1ba6b9c437726 100644 --- a/pandas/tests/frame/methods/test_select_dtypes.py +++ b/pandas/tests/frame/methods/test_select_dtypes.py @@ -485,3 +485,26 @@ def test_select_dtypes_no_view(self): result = df.select_dtypes(include=["number"]) result.iloc[0, 0] = 0 tm.assert_frame_equal(df, df_orig) + + def test_select_dtype_object_and_str(self, using_infer_string): + # https://github.com/pandas-dev/pandas/issues/61916 + df = DataFrame( + { + "a": ["a", "b", "c"], + "b": [1, 2, 3], + "c": pd.array(["a", "b", "c"], dtype="string"), + } + ) + + # with "object" -> only select the object or default str dtype column + result = df.select_dtypes(include=["object"]) + expected = df[["a"]] + tm.assert_frame_equal(result, expected) + + # with "string" -> select both the default 'str' and the nullable 'string' + result = df.select_dtypes(include=["string"]) + if using_infer_string: + expected = df[["a", "c"]] + else: + expected = df[["c"]] + tm.assert_frame_equal(result, expected)