From 603f499c1bbb8868aaf7abca2346bbebc4dd50fc Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Tue, 27 Jul 2021 18:36:37 -0400 Subject: [PATCH 01/10] Test centered rolling window with offset (#42753). --- pandas/tests/window/test_rolling.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 77ca482936298..3075170f0784d 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -219,6 +219,20 @@ def test_datetimelike_centered_selections( tm.assert_frame_equal(result, expected, check_dtype=False) +@pytest.mark.parametrize("closed", ["right", "both", "left", "neither"]) +def test_datetimelike_centered_offset_covers_all( + closed): + # GH 42753 + + window = '3s' + index=[Timestamp("20130101 09:00:01"), Timestamp("20130101 09:00:02")] + df = DataFrame({"x": 1}, index=index) + + result = df.rolling(window, closed=closed, center=True).sum() + + expected = DataFrame({"x": [2.0, 2.0]}, index=index) + + tm.assert_frame_equal(result, expected, check_dtype=False) def test_even_number_window_alignment(): # see discussion in GH 38780 From aa8f6f1d38aac4dd98d6425ad394cc78e03d466a Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Wed, 28 Jul 2021 09:35:45 -0400 Subject: [PATCH 02/10] Improved test. --- pandas/tests/window/test_rolling.py | 25 ++++++++++++++++++++----- 1 file changed, 20 insertions(+), 5 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 3075170f0784d..b2b6c398588c5 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -219,21 +219,36 @@ def test_datetimelike_centered_selections( tm.assert_frame_equal(result, expected, check_dtype=False) -@pytest.mark.parametrize("closed", ["right", "both", "left", "neither"]) + +@pytest.mark.parametrize( + "window,closed,expected", + [ + ('3s', 'right', [3.0, 3.0, 3.0]), + ('3s', 'both', [3.0, 3.0, 3.0]), + ('3s', 'left', [3.0, 3.0, 3.0]), + ('3s', 'neither', [3.0, 3.0, 3.0]), + ('2s', 'right', [3.0, 2.0, 2.0]), + ('2s', 'both', [3.0, 3.0, 3.0]), + ('2s', 'left', [1.0, 3.0, 3.0]), + ('2s', 'neither', [1.0, 2.0, 2.0]) + ] +) def test_datetimelike_centered_offset_covers_all( - closed): + window, closed, expected): # GH 42753 - window = '3s' - index=[Timestamp("20130101 09:00:01"), Timestamp("20130101 09:00:02")] + index=[Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:02")] df = DataFrame({"x": 1}, index=index) result = df.rolling(window, closed=closed, center=True).sum() - expected = DataFrame({"x": [2.0, 2.0]}, index=index) + expected = DataFrame({"x": expected}, index=index) tm.assert_frame_equal(result, expected, check_dtype=False) + def test_even_number_window_alignment(): # see discussion in GH 38780 s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3)) From 1a4271ae3f4c48dd8d5458c3037ae7365a22b514 Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Tue, 27 Jul 2021 21:39:47 -0400 Subject: [PATCH 03/10] Fixed calculate_variable_window_bounds. --- pandas/_libs/window/indexers.pyx | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index d188770576e05..ff86557fff2e8 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -79,11 +79,11 @@ def calculate_variable_window_bounds( else: end[0] = 0 if center: - for j in range(0, num_values + 1): - if (index[j] == index[0] + index_growth_sign * window_size / 2 and - right_closed): + for j in range(0, num_values): + if ((index[j] < index[0] + index_growth_sign * window_size / 2) or + (index[j] == index[0] + index_growth_sign * window_size / 2 and + right_closed)): end[0] = j + 1 - break elif index[j] >= index[0] + index_growth_sign * window_size / 2: end[0] = j break From 2170a7e00c941b495fa49bad9608431164e5a8a8 Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Wed, 28 Jul 2021 13:31:27 -0400 Subject: [PATCH 04/10] Fixed PEP8 issues. --- pandas/tests/window/test_rolling.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index b2b6c398588c5..b78c3b88580ff 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -233,13 +233,12 @@ def test_datetimelike_centered_selections( ('2s', 'neither', [1.0, 2.0, 2.0]) ] ) -def test_datetimelike_centered_offset_covers_all( - window, closed, expected): +def test_datetimelike_centered_offset_covers_all(window, closed, expected): # GH 42753 - index=[Timestamp("20130101 09:00:01"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:02")] + index = [Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:02")] df = DataFrame({"x": 1}, index=index) result = df.rolling(window, closed=closed, center=True).sum() From 3e5a36fadb54a99490a370ca9aba6cb13708f3ea Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Wed, 28 Jul 2021 15:45:26 -0400 Subject: [PATCH 05/10] Code standard compliance. --- pandas/_libs/window/indexers.pyx | 2 +- pandas/tests/window/test_rolling.py | 26 ++++++++++++++------------ 2 files changed, 15 insertions(+), 13 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index ff86557fff2e8..f38f80b6b6f58 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -82,7 +82,7 @@ def calculate_variable_window_bounds( for j in range(0, num_values): if ((index[j] < index[0] + index_growth_sign * window_size / 2) or (index[j] == index[0] + index_growth_sign * window_size / 2 and - right_closed)): + right_closed)): end[0] = j + 1 elif index[j] >= index[0] + index_growth_sign * window_size / 2: end[0] = j diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index b78c3b88580ff..4732d592d3388 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -223,22 +223,24 @@ def test_datetimelike_centered_selections( @pytest.mark.parametrize( "window,closed,expected", [ - ('3s', 'right', [3.0, 3.0, 3.0]), - ('3s', 'both', [3.0, 3.0, 3.0]), - ('3s', 'left', [3.0, 3.0, 3.0]), - ('3s', 'neither', [3.0, 3.0, 3.0]), - ('2s', 'right', [3.0, 2.0, 2.0]), - ('2s', 'both', [3.0, 3.0, 3.0]), - ('2s', 'left', [1.0, 3.0, 3.0]), - ('2s', 'neither', [1.0, 2.0, 2.0]) - ] + ("3s", "right", [3.0, 3.0, 3.0]), + ("3s", "both", [3.0, 3.0, 3.0]), + ("3s", "left", [3.0, 3.0, 3.0]), + ("3s", "neither", [3.0, 3.0, 3.0]), + ("2s", "right", [3.0, 2.0, 2.0]), + ("2s", "both", [3.0, 3.0, 3.0]), + ("2s", "left", [1.0, 3.0, 3.0]), + ("2s", "neither", [1.0, 2.0, 2.0]), + ], ) def test_datetimelike_centered_offset_covers_all(window, closed, expected): # GH 42753 - index = [Timestamp("20130101 09:00:01"), - Timestamp("20130101 09:00:02"), - Timestamp("20130101 09:00:02")] + index = [ + Timestamp("20130101 09:00:01"), + Timestamp("20130101 09:00:02"), + Timestamp("20130101 09:00:02"), + ] df = DataFrame({"x": 1}, index=index) result = df.rolling(window, closed=closed, center=True).sum() From f6371d8e6edbbc97b9e9c188e7afa4fd5733d2e6 Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Wed, 28 Jul 2021 19:01:53 -0400 Subject: [PATCH 06/10] Parametrize test for DF and Series. --- pandas/tests/window/test_rolling.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4732d592d3388..448fbd3244997 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -233,7 +233,9 @@ def test_datetimelike_centered_selections( ("2s", "neither", [1.0, 2.0, 2.0]), ], ) -def test_datetimelike_centered_offset_covers_all(window, closed, expected): +def test_datetimelike_centered_offset_covers_all( + window, closed, expected, frame_or_series +): # GH 42753 index = [ @@ -241,13 +243,11 @@ def test_datetimelike_centered_offset_covers_all(window, closed, expected): Timestamp("20130101 09:00:02"), Timestamp("20130101 09:00:02"), ] - df = DataFrame({"x": 1}, index=index) + df = frame_or_series([1, 1, 1], index=index) result = df.rolling(window, closed=closed, center=True).sum() - - expected = DataFrame({"x": expected}, index=index) - - tm.assert_frame_equal(result, expected, check_dtype=False) + expected = frame_or_series(expected, index=index) + tm.assert_equal(result, expected, check_dtype=False) def test_even_number_window_alignment(): From 178117837173c931aa8bf446975013d53d65ca42 Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Wed, 28 Jul 2021 19:39:18 -0400 Subject: [PATCH 07/10] Added bugfix to whatsnew 1.3.2. --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 84e2f1ad33809..136b366758c0d 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -25,7 +25,7 @@ Fixed regressions Bug fixes ~~~~~~~~~ -- +- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when window is an offset that covers all the rows (:issue:`42753`) - .. --------------------------------------------------------------------------- From be0ffffc2c515ecabfd130faa39bdfff6c81bd33 Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Wed, 4 Aug 2021 20:36:47 -0400 Subject: [PATCH 08/10] Use temporary variable in loop. --- pandas/_libs/window/indexers.pyx | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index f38f80b6b6f58..197345b3ce6ac 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -79,12 +79,11 @@ def calculate_variable_window_bounds( else: end[0] = 0 if center: + end_bound = index[0] + index_growth_sign * window_size / 2 for j in range(0, num_values): - if ((index[j] < index[0] + index_growth_sign * window_size / 2) or - (index[j] == index[0] + index_growth_sign * window_size / 2 and - right_closed)): + if (index[j] < end_bound) or (index[j] == end_bound and right_closed): end[0] = j + 1 - elif index[j] >= index[0] + index_growth_sign * window_size / 2: + elif index[j] >= end_bound: end[0] = j break From 3882651af8028e17a6f3ce48eee0035b81826fe8 Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Fri, 6 Aug 2021 07:37:10 -0400 Subject: [PATCH 09/10] Improved test. --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 7e2b114aa18f9..c49871bf3e142 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -247,7 +247,7 @@ def test_datetimelike_centered_offset_covers_all( result = df.rolling(window, closed=closed, center=True).sum() expected = frame_or_series(expected, index=index) - tm.assert_equal(result, expected, check_dtype=False) + tm.assert_equal(result, expected) def test_even_number_window_alignment(): From fd4af1457da5bcc68828167756ae249bbccfdecc Mon Sep 17 00:00:00 2001 From: Jaime Di Cristina <74217512+dicristina@users.noreply.github.com> Date: Fri, 6 Aug 2021 12:35:21 -0400 Subject: [PATCH 10/10] Clarification in whatsnew. --- doc/source/whatsnew/v1.3.2.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.2.rst b/doc/source/whatsnew/v1.3.2.rst index 4ed3e181ce0f2..70fff75f84dc3 100644 --- a/doc/source/whatsnew/v1.3.2.rst +++ b/doc/source/whatsnew/v1.3.2.rst @@ -32,7 +32,7 @@ Bug fixes ~~~~~~~~~ - Bug in :meth:`pandas.read_excel` modifies the dtypes dictionary when reading a file with duplicate columns (:issue:`42462`) - 1D slices over extension types turn into N-dimensional slices over ExtensionArrays (:issue:`42430`) -- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when window is an offset that covers all the rows (:issue:`42753`) +- Fixed bug in :meth:`Series.rolling` and :meth:`DataFrame.rolling` not calculating window bounds correctly for the first row when ``center=True`` and ``window`` is an offset that covers all the rows (:issue:`42753`) - :meth:`.Styler.hide_columns` now hides the index name header row as well as column headers (:issue:`42101`) - Bug in de-serializing datetime indexes in PYTHONOPTIMIZED mode (:issue:`42866`) -