From 6ef4dff64fd94cb26645ba30217202de17805962 Mon Sep 17 00:00:00 2001 From: jbrockmendel Date: Mon, 29 Sep 2025 04:36:06 -0700 Subject: [PATCH] BUG: String[pyarrow] comparison with mixed object (#62424) Co-authored-by: Joris Van den Bossche --- doc/source/whatsnew/v2.3.3.rst | 2 +- pandas/core/arrays/arrow/array.py | 18 ++++++++++++------ pandas/tests/extension/test_string.py | 16 ++++++++++++++++ 3 files changed, 29 insertions(+), 7 deletions(-) diff --git a/doc/source/whatsnew/v2.3.3.rst b/doc/source/whatsnew/v2.3.3.rst index bc5a4c5b27a90..c00717e09c702 100644 --- a/doc/source/whatsnew/v2.3.3.rst +++ b/doc/source/whatsnew/v2.3.3.rst @@ -47,7 +47,7 @@ Bug fixes - Fix regression in ``~Series.str.contains``, ``~Series.str.match`` and ``~Series.str.fullmatch`` with a compiled regex and custom flags (:issue:`62240`) - Fix :meth:`Series.str.match` and :meth:`Series.str.fullmatch` not matching patterns with groups correctly for the Arrow-backed string dtype (:issue:`61072`) - +- Fix comparing a :class:`StringDtype` Series with mixed objects raising an error (:issue:`60228`) Improvements and fixes for Copy-on-Write ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ diff --git a/pandas/core/arrays/arrow/array.py b/pandas/core/arrays/arrow/array.py index 52177300cc3ea..e16a126ac10ee 100644 --- a/pandas/core/arrays/arrow/array.py +++ b/pandas/core/arrays/arrow/array.py @@ -727,12 +727,18 @@ def _cmp_method(self, other, op): pc_func = ARROW_CMP_FUNCS[op.__name__] if isinstance(other, (ExtensionArray, np.ndarray, list)): try: - result = pc_func(self._pa_array, self._box_pa(other)) - except pa.ArrowNotImplementedError: - # TODO: could this be wrong if other is object dtype? - # in which case we need to operate pointwise? - result = ops.invalid_comparison(self, other, op) - result = pa.array(result, type=pa.bool_()) + boxed = self._box_pa(other) + except pa.lib.ArrowInvalid: + # e.g. GH#60228 [1, "b"] we have to operate pointwise + res_values = [op(x, y) for x, y in zip(self, other)] + result = pa.array(res_values, type=pa.bool_(), from_pandas=True) + else: + try: + result = pc_func(self._pa_array, boxed) + except pa.ArrowNotImplementedError: + result = ops.invalid_comparison(self, other, op) + result = pa.array(result, type=pa.bool_()) + elif is_scalar(other): try: result = pc_func(self._pa_array, self._box_pa(other)) diff --git a/pandas/tests/extension/test_string.py b/pandas/tests/extension/test_string.py index a4f568d63a0a3..71d4f7cc5c4bf 100644 --- a/pandas/tests/extension/test_string.py +++ b/pandas/tests/extension/test_string.py @@ -275,3 +275,19 @@ def test_searchsorted_with_na_raises(data_for_sorting, as_series): ) with pytest.raises(ValueError, match=msg): arr.searchsorted(b) + + +def test_mixed_object_comparison(dtype): + # GH#60228 + ser = pd.Series(["a", "b"], dtype=dtype) + + mixed = pd.Series([1, "b"], dtype=object) + + result = ser == mixed + expected = pd.Series([False, True], dtype=bool) + if dtype.storage == "python" and dtype.na_value is pd.NA: + expected = expected.astype("boolean") + elif dtype.storage == "pyarrow" and dtype.na_value is pd.NA: + expected = expected.astype("bool[pyarrow]") + + tm.assert_series_equal(result, expected)