From e0824bc5199c33065eb2950767073535e4da819c Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 10 Dec 2023 19:42:57 -0500 Subject: [PATCH 1/2] join with empty not respecting sort param --- pandas/core/indexes/base.py | 36 ++++++++++++------- .../tests/frame/methods/test_combine_first.py | 2 +- pandas/tests/frame/test_arithmetic.py | 6 ++-- pandas/tests/reshape/merge/test_join.py | 2 ++ pandas/tests/series/test_logical_ops.py | 8 ++--- 5 files changed, 34 insertions(+), 20 deletions(-) diff --git a/pandas/core/indexes/base.py b/pandas/core/indexes/base.py index 9d998b46dbeed..5dc4a85ba9792 100644 --- a/pandas/core/indexes/base.py +++ b/pandas/core/indexes/base.py @@ -4576,9 +4576,6 @@ def join( pother, how=how, level=level, return_indexers=True, sort=sort ) - lindexer: np.ndarray | None - rindexer: np.ndarray | None - # try to figure out the join level # GH3662 if level is None and (self._is_multi or other._is_multi): @@ -4592,25 +4589,38 @@ def join( if level is not None and (self._is_multi or other._is_multi): return self._join_level(other, level, how=how) + lidx: np.ndarray | None + ridx: np.ndarray | None + if len(other) == 0: if how in ("left", "outer"): - join_index = self._view() - rindexer = np.broadcast_to(np.intp(-1), len(join_index)) - return join_index, None, rindexer + if sort and not self.is_monotonic_increasing: + lidx = self.argsort() + join_index = self.take(lidx) + else: + lidx = None + join_index = self._view() + ridx = np.broadcast_to(np.intp(-1), len(join_index)) + return join_index, lidx, ridx elif how in ("right", "inner", "cross"): join_index = other._view() - lindexer = np.array([]) - return join_index, lindexer, None + lidx = np.array([], dtype=np.intp) + return join_index, lidx, None if len(self) == 0: if how in ("right", "outer"): - join_index = other._view() - lindexer = np.broadcast_to(np.intp(-1), len(join_index)) - return join_index, lindexer, None + if sort and not other.is_monotonic_increasing: + ridx = other.argsort() + join_index = other.take(ridx) + else: + ridx = None + join_index = other._view() + lidx = np.broadcast_to(np.intp(-1), len(join_index)) + return join_index, lidx, ridx elif how in ("left", "inner", "cross"): join_index = self._view() - rindexer = np.array([]) - return join_index, None, rindexer + ridx = np.array([], dtype=np.intp) + return join_index, None, ridx if self.dtype != other.dtype: dtype = self._find_common_type_compat(other) diff --git a/pandas/tests/frame/methods/test_combine_first.py b/pandas/tests/frame/methods/test_combine_first.py index 941e4c03464ea..8aeab5dacd8b4 100644 --- a/pandas/tests/frame/methods/test_combine_first.py +++ b/pandas/tests/frame/methods/test_combine_first.py @@ -82,7 +82,7 @@ def test_combine_first(self, float_frame, using_infer_string): tm.assert_frame_equal(comb, float_frame) comb = DataFrame().combine_first(float_frame) - tm.assert_frame_equal(comb, float_frame) + tm.assert_frame_equal(comb, float_frame.sort_index()) comb = float_frame.combine_first(DataFrame(index=["faz", "boo"])) assert "faz" in comb.index diff --git a/pandas/tests/frame/test_arithmetic.py b/pandas/tests/frame/test_arithmetic.py index ec3222efab5a8..42ce658701355 100644 --- a/pandas/tests/frame/test_arithmetic.py +++ b/pandas/tests/frame/test_arithmetic.py @@ -629,10 +629,12 @@ def test_arith_flex_frame_corner(self, float_frame): # corner cases result = float_frame.add(float_frame[:0]) - tm.assert_frame_equal(result, float_frame * np.nan) + expected = float_frame.sort_index() * np.nan + tm.assert_frame_equal(result, expected) result = float_frame[:0].add(float_frame) - tm.assert_frame_equal(result, float_frame * np.nan) + expected = float_frame.sort_index() * np.nan + tm.assert_frame_equal(result, expected) with pytest.raises(NotImplementedError, match="fill_value"): float_frame.add(float_frame.iloc[0], fill_value=3) diff --git a/pandas/tests/reshape/merge/test_join.py b/pandas/tests/reshape/merge/test_join.py index 3408e6e4731bd..4cc887c32b585 100644 --- a/pandas/tests/reshape/merge/test_join.py +++ b/pandas/tests/reshape/merge/test_join.py @@ -1023,6 +1023,8 @@ def test_join_empty(left_empty, how, exp): expected = DataFrame(columns=["B", "C"], dtype="int64") if how != "cross": expected = expected.rename_axis("A") + if how == "outer": + expected = expected.sort_index() tm.assert_frame_equal(result, expected) diff --git a/pandas/tests/series/test_logical_ops.py b/pandas/tests/series/test_logical_ops.py index 166f52181fed4..153b4bfaaf444 100644 --- a/pandas/tests/series/test_logical_ops.py +++ b/pandas/tests/series/test_logical_ops.py @@ -40,11 +40,11 @@ def test_logical_operators_bool_dtype_with_empty(self): s_empty = Series([], dtype=object) res = s_tft & s_empty - expected = s_fff + expected = s_fff.sort_index() tm.assert_series_equal(res, expected) res = s_tft | s_empty - expected = s_tft + expected = s_tft.sort_index() tm.assert_series_equal(res, expected) def test_logical_operators_int_dtype_with_int_dtype(self): @@ -397,11 +397,11 @@ def test_logical_ops_label_based(self, using_infer_string): empty = Series([], dtype=object) result = a & empty.copy() - expected = Series([False, False, False], list("bca")) + expected = Series([False, False, False], list("abc")) tm.assert_series_equal(result, expected) result = a | empty.copy() - expected = Series([True, False, True], list("bca")) + expected = Series([True, True, False], list("abc")) tm.assert_series_equal(result, expected) # vs non-matching From 77cf1261bec858201e14922a1ebdf85513a75a9f Mon Sep 17 00:00:00 2001 From: Luke Manley Date: Sun, 10 Dec 2023 19:45:45 -0500 Subject: [PATCH 2/2] whatsnew --- doc/source/whatsnew/v2.2.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v2.2.0.rst b/doc/source/whatsnew/v2.2.0.rst index acec8379ee5b3..711f0420aa8e7 100644 --- a/doc/source/whatsnew/v2.2.0.rst +++ b/doc/source/whatsnew/v2.2.0.rst @@ -247,7 +247,7 @@ These are bug fixes that might have notable behavior changes. In previous versions of pandas, :func:`merge` and :meth:`DataFrame.join` did not always return a result that followed the documented sort behavior. pandas now -follows the documented sort behavior in merge and join operations (:issue:`54611`, :issue:`56426`). +follows the documented sort behavior in merge and join operations (:issue:`54611`, :issue:`56426`, :issue:`56443`). As documented, ``sort=True`` sorts the join keys lexicographically in the resulting :class:`DataFrame`. With ``sort=False``, the order of the join keys depends on the