From 11c1bb70cdb549f4ede323eb4f4e0f8d942519a1 Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 3 Sep 2020 09:40:57 -0500 Subject: [PATCH 01/54] update syntax for pandas style --- pandas/_libs/window/indexers.pyx | 41 +++++++++++++++++++++++++++----- pandas/core/window/rolling.py | 26 +++++++++++++------- 2 files changed, 52 insertions(+), 15 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 9af1159a805ec..ae67f5909eb56 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -11,7 +11,7 @@ def calculate_variable_window_bounds( int64_t num_values, int64_t window_size, object min_periods, # unused but here to match get_window_bounds signature - object center, # unused but here to match get_window_bounds signature + object center, object closed, const int64_t[:] index ): @@ -30,7 +30,7 @@ def calculate_variable_window_bounds( ignored, exists for compatibility center : object - ignored, exists for compatibility + center the rolling window on the current observation closed : str string of side of the window that should be closed @@ -45,6 +45,7 @@ def calculate_variable_window_bounds( cdef: bint left_closed = False bint right_closed = False + bint center_window = False int index_growth_sign = 1 ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound @@ -62,6 +63,8 @@ def calculate_variable_window_bounds( if index[num_values - 1] < index[0]: index_growth_sign = -1 + if center: + center_window = True start = np.empty(num_values, dtype='int64') start.fill(-1) @@ -76,14 +79,27 @@ def calculate_variable_window_bounds( # right endpoint is open else: end[0] = 0 + if center_window: + for j in range(0, num_values+1): + if (index[j] == index[0] + index_growth_sign*window_size/2 and + right_closed): + end[0] = j+1 + break + elif index[j] >= index[0] + index_growth_sign * window_size/2: + end[0] = j + break with nogil: # start is start of slice interval (including) # end is end of slice interval (not including) for i in range(1, num_values): - end_bound = index[i] - start_bound = index[i] - index_growth_sign * window_size + if center_window: + end_bound = index[i] + index_growth_sign * window_size/2 + start_bound = index[i] - index_growth_sign * window_size/2 + else: + end_bound = index[i] + start_bound = index[i] - index_growth_sign * window_size # left endpoint is closed if left_closed: @@ -97,14 +113,27 @@ def calculate_variable_window_bounds( start[i] = j break + # for centered window advance the end bound until we are + # outside the constraint + if center_window: + for j in range(end[i - 1], num_values+1): + if j == num_values: + end[i] = j + elif ((index[j] - end_bound) * index_growth_sign == 0 and + right_closed): + end[i] = j+1 + break + elif (index[j] - end_bound) * index_growth_sign >= 0: + end[i] = j + break # end bound is previous end # or current index - if (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: + elif (index[end[i - 1]] - end_bound) * index_growth_sign <= 0: end[i] = i + 1 else: end[i] = end[i - 1] # right endpoint is open - if not right_closed: + if not right_closed and not center_window: end[i] -= 1 return start, end diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index a3f60c0bc5098..9326da3757122 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -462,7 +462,9 @@ def _get_window_indexer(self, window: int) -> BaseIndexer: if isinstance(self.window, BaseIndexer): return self.window if self.is_freq_type: - return VariableWindowIndexer(index_array=self._on.asi8, window_size=window) + return VariableWindowIndexer( + index_array=self._on.asi8, window_size=window, center=self.center + ) return FixedWindowIndexer(window_size=window) def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series": @@ -470,7 +472,6 @@ def _apply_series(self, homogeneous_func: Callable[..., ArrayLike]) -> "Series": Series version of _apply_blockwise """ _, obj = self._create_blocks(self._selected_obj) - try: values = self._prep_values(obj.values) except (TypeError, NotImplementedError) as err: @@ -567,7 +568,14 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() - offset = calculate_center_offset(window) if center else 0 + offset = ( + calculate_center_offset(window) + if center + and not isinstance( + self._get_window_indexer(window), VariableWindowIndexer + ) + else 0 + ) additional_nans = np.array([np.nan] * offset) if not is_weighted: @@ -610,7 +618,9 @@ def calc(x): if use_numba_cache: NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - if center: + if center and not isinstance( + self._get_window_indexer(window), VariableWindowIndexer + ): result = self._center_window(result, window) return result @@ -1963,15 +1973,13 @@ def validate(self): if (self.obj.empty or self.is_datetimelike) and isinstance( self.window, (str, BaseOffset, timedelta) ): - self._validate_monotonic() freq = self._validate_freq() - # we don't allow center - if self.center: + # we don't allow center for offset based windows + if self.center and self.obj.empty: raise NotImplementedError( - "center is not implemented for " - "datetimelike and offset based windows" + "center is not implemented for " "offset based windows" ) # this will raise ValueError on non-fixed freqs From c73ffc4e56f43687842a25c299edaa365808ef2a Mon Sep 17 00:00:00 2001 From: Adam Miller Date: Thu, 3 Sep 2020 12:39:07 -0500 Subject: [PATCH 02/54] fix syntax error --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 757a7220561bd..0b087bebe4e32 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1966,7 +1966,7 @@ def validate(self): # we don't allow center for offset based windows if self.center and self.obj.empty: raise NotImplementedError( - "center is not implemented for " "offset based windows" + "center is not implemented for offset based windows" ) # this will raise ValueError on non-fixed freqs From dca9d04a34df149abfa6712247bcd9d3e15e952e Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Tue, 29 Dec 2020 15:56:24 +0100 Subject: [PATCH 03/54] reintroduce calculate_center_offset as private function --- pandas/core/window/rolling.py | 29 +++++++++++++++++++++++------ 1 file changed, 23 insertions(+), 6 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index d5e0bfbbf2d07..b868938c6d244 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -457,11 +457,28 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() + def _calculate_center_offset(window) -> int: + """ + Calculate an offset necessary to have the window label to be centered. + + Parameters + ---------- + window: ndarray or int + window weights or window + + Returns + ------- + int + """ + if not is_integer(window): + window = len(window) + return int((window - 1) / 2.0) + offset = ( - calculate_center_offset(window) - if center + _calculate_center_offset(self.window) + if self.center and not isinstance( - self._get_window_indexer(window), VariableWindowIndexer + self._get_window_indexer(self.window), VariableWindowIndexer ) else 0 ) @@ -492,10 +509,10 @@ def calc(x): # if use_numba_cache: # NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - if center and not isinstance( - self._get_window_indexer(window), VariableWindowIndexer + if self.center and not isinstance( + self._get_window_indexer(), VariableWindowIndexer ): - result = self._center_window(result, window) + result = self._center_window(result, self.window) return result From 37cb6fa40d03c7376c20ccb70b30cb6e05faddd9 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Tue, 29 Dec 2020 17:02:05 +0100 Subject: [PATCH 04/54] fix double declaration of index_growth_sign --- pandas/_libs/window/indexers.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index da10e618a55c6..ad51af42763d5 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -46,7 +46,6 @@ def calculate_variable_window_bounds( bint left_closed = False bint right_closed = False bint center_window = False - int index_growth_sign = 1 ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound, index_growth_sign = 1 Py_ssize_t i, j From 81e0e4e7626c0dc136d1fc7c6b660f131ebcd551 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Wed, 30 Dec 2020 13:48:58 +0100 Subject: [PATCH 05/54] apply review suggestions --- pandas/_libs/window/indexers.pyx | 2 +- pandas/core/window/rolling.py | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index ad51af42763d5..c7d6721b417c3 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -11,7 +11,7 @@ def calculate_variable_window_bounds( int64_t num_values, int64_t window_size, object min_periods, # unused but here to match get_window_bounds signature - object center, + bint center, object closed, const int64_t[:] index ): diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index b868938c6d244..acdf1edd7b9be 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -335,15 +335,11 @@ def _get_window_indexer(self) -> BaseIndexer: if isinstance(self.window, BaseIndexer): return self.window - if self.is_freq_type: - return VariableWindowIndexer( - index_array=self._on.asi8, window_size=self.window, center=self.center - ) - - # TODO: is this replacing above if clause or adding another case? if self._win_freq_i8 is not None: return VariableWindowIndexer( - index_array=self._index_array, window_size=self._win_freq_i8 + index_array=self._index_array, + window_size=self._win_freq_i8, + center=self.center, ) return FixedWindowIndexer(window_size=self.window) @@ -477,9 +473,7 @@ def _calculate_center_offset(window) -> int: offset = ( _calculate_center_offset(self.window) if self.center - and not isinstance( - self._get_window_indexer(self.window), VariableWindowIndexer - ) + and not isinstance(self._get_window_indexer(), VariableWindowIndexer) else 0 ) additional_nans = np.array([np.nan] * offset) From 463c7f055fce4bc2784dbaf60a45d6b46d2f844f Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Wed, 30 Dec 2020 14:24:32 +0100 Subject: [PATCH 06/54] remove unneccessary exception --- pandas/core/window/rolling.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index acdf1edd7b9be..1277c4e0872ed 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1927,12 +1927,6 @@ def validate(self): self._validate_monotonic() - # we don't allow center for offset based windows - if self.center and self.obj.empty: - raise NotImplementedError( - "center is not implemented for offset based windows" - ) - # this will raise ValueError on non-fixed freqs try: freq = to_offset(self.window) From 321f07cbd644245b97f9d90f642afdaf8b63fdc7 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Tue, 5 Jan 2021 17:00:34 +0100 Subject: [PATCH 07/54] add method "_center_window" to class BaseWindow --- pandas/core/window/rolling.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 1277c4e0872ed..4a390840ff691 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -415,6 +415,19 @@ def _apply_tablewise( self._insert_on_column(out, obj) return out + def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: + """ + Center the result in the window for weighted rolling aggregations. + """ + if self.axis > result.ndim - 1: + raise ValueError("Requested axis is larger then no. of argument dimensions") + + if offset > 0: + lead_indexer = [slice(None)] * result.ndim + lead_indexer[self.axis] = slice(offset, None) + result = np.copy(result[tuple(lead_indexer)]) + return result + def _apply( self, func: Callable[..., Any], From 9270bab437b15068cb14c91262675277d51b1c87 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Tue, 5 Jan 2021 18:26:52 +0100 Subject: [PATCH 08/54] use spaces around operators --- pandas/_libs/window/indexers.pyx | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index c7d6721b417c3..b2421236e14b2 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -80,11 +80,11 @@ def calculate_variable_window_bounds( end[0] = 0 if center_window: for j in range(0, num_values+1): - if (index[j] == index[0] + index_growth_sign*window_size/2 and + if (index[j] == index[0] + index_growth_sign * window_size / 2 and right_closed): - end[0] = j+1 + end[0] = j + 1 break - elif index[j] >= index[0] + index_growth_sign * window_size/2: + elif index[j] >= index[0] + index_growth_sign * window_size / 2: end[0] = j break @@ -94,8 +94,8 @@ def calculate_variable_window_bounds( # end is end of slice interval (not including) for i in range(1, num_values): if center_window: - end_bound = index[i] + index_growth_sign * window_size/2 - start_bound = index[i] - index_growth_sign * window_size/2 + end_bound = index[i] + index_growth_sign * window_size / 2 + start_bound = index[i] - index_growth_sign * window_size / 2 else: end_bound = index[i] start_bound = index[i] - index_growth_sign * window_size @@ -115,12 +115,12 @@ def calculate_variable_window_bounds( # for centered window advance the end bound until we are # outside the constraint if center_window: - for j in range(end[i - 1], num_values+1): + for j in range(end[i - 1], num_values + 1): if j == num_values: end[i] = j elif ((index[j] - end_bound) * index_growth_sign == 0 and right_closed): - end[i] = j+1 + end[i] = j + 1 break elif (index[j] - end_bound) * index_growth_sign >= 0: end[i] = j From dd33e328284a28d1071a03c1d786a5ae70415815 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Wed, 6 Jan 2021 13:22:09 +0100 Subject: [PATCH 09/54] remove unneccessary calculations from rolling.py, tests work again --- pandas/core/window/rolling.py | 48 ----------------------------------- 1 file changed, 48 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 4a390840ff691..edb912ba243b1 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -415,19 +415,6 @@ def _apply_tablewise( self._insert_on_column(out, obj) return out - def _center_window(self, result: np.ndarray, offset: int) -> np.ndarray: - """ - Center the result in the window for weighted rolling aggregations. - """ - if self.axis > result.ndim - 1: - raise ValueError("Requested axis is larger then no. of argument dimensions") - - if offset > 0: - lead_indexer = [slice(None)] * result.ndim - lead_indexer[self.axis] = slice(offset, None) - result = np.copy(result[tuple(lead_indexer)]) - return result - def _apply( self, func: Callable[..., Any], @@ -466,33 +453,7 @@ def homogeneous_func(values: np.ndarray): if values.size == 0: return values.copy() - def _calculate_center_offset(window) -> int: - """ - Calculate an offset necessary to have the window label to be centered. - - Parameters - ---------- - window: ndarray or int - window weights or window - - Returns - ------- - int - """ - if not is_integer(window): - window = len(window) - return int((window - 1) / 2.0) - - offset = ( - _calculate_center_offset(self.window) - if self.center - and not isinstance(self._get_window_indexer(), VariableWindowIndexer) - else 0 - ) - additional_nans = np.array([np.nan] * offset) - def calc(x): - x = np.concatenate((x, additional_nans)) start, end = window_indexer.get_window_bounds( num_values=len(x), @@ -512,15 +473,6 @@ def calc(x): if numba_cache_key is not None: NUMBA_FUNC_CACHE[numba_cache_key] = func - # TODO: is this equivalent to the above? - # if use_numba_cache: - # NUMBA_FUNC_CACHE[(kwargs["original_func"], "rolling_apply")] = func - - if self.center and not isinstance( - self._get_window_indexer(), VariableWindowIndexer - ): - result = self._center_window(result, self.window) - return result if self.method == "single": From 6e4da84702c8ce11108ea33dadbede0701aa5aac Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Thu, 7 Jan 2021 11:22:45 +0100 Subject: [PATCH 10/54] remove "test_invalid_center_datetimelike" --- pandas/tests/window/test_timeseries_window.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/pandas/tests/window/test_timeseries_window.py b/pandas/tests/window/test_timeseries_window.py index 0782ef2f4ce7b..7cd319480083b 100644 --- a/pandas/tests/window/test_timeseries_window.py +++ b/pandas/tests/window/test_timeseries_window.py @@ -83,12 +83,6 @@ def test_invalid_minp(self, minp): with pytest.raises(ValueError, match=msg): self.regular.rolling(window="1D", min_periods=minp) - def test_invalid_center_datetimelike(self): - # center is not implemented - msg = "center is not implemented for datetimelike and offset based windows" - with pytest.raises(NotImplementedError, match=msg): - self.regular.rolling(window="1D", center=True) - def test_on(self): df = self.regular From e0966e8ca391a3ec59f2c930986c8ccb8b3d2179 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Thu, 7 Jan 2021 11:31:46 +0100 Subject: [PATCH 11/54] remove white spaces and TODO --- pandas/core/window/rolling.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index edb912ba243b1..3f15eea76d3dd 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -334,7 +334,6 @@ def _get_window_indexer(self) -> BaseIndexer: """ if isinstance(self.window, BaseIndexer): return self.window - if self._win_freq_i8 is not None: return VariableWindowIndexer( index_array=self._index_array, @@ -454,7 +453,6 @@ def homogeneous_func(values: np.ndarray): return values.copy() def calc(x): - start, end = window_indexer.get_window_bounds( num_values=len(x), min_periods=min_periods, @@ -1884,7 +1882,6 @@ def validate(self): # we allow rolling on a datetimelike index if ( self.obj.empty - # TODO: add "or self.is_datetimelike"? or isinstance( self._on, (ABCDatetimeIndex, ABCTimedeltaIndex, ABCPeriodIndex) ) From b82f5146a60ce9b7c281c841a3d903a175d21211 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Fri, 29 Jan 2021 09:25:59 +0100 Subject: [PATCH 12/54] remove unnecessary lines --- pandas/_libs/window/indexers.pyx | 2 -- 1 file changed, 2 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index b2421236e14b2..41c5e2f229778 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -62,8 +62,6 @@ def calculate_variable_window_bounds( if index[num_values - 1] < index[0]: index_growth_sign = -1 - if center: - center_window = True start = np.empty(num_values, dtype='int64') start.fill(-1) From 9c4cc58bf0156dd96fbf7e864f5f5e05ee652e1a Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Fri, 29 Jan 2021 10:01:32 +0100 Subject: [PATCH 13/54] change existing test to cover new case --- pandas/tests/window/test_rolling.py | 32 ++++++++++++++++++++++------- 1 file changed, 25 insertions(+), 7 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index b275b64ff706b..9a7d1f0a71b05 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -121,6 +121,7 @@ def test_numpy_compat(method): getattr(r, method)(dtype=np.float64) +# maybe this one too? def test_closed_fixed(closed, arithmetic_win_operators): # GH 34315 func_name = arithmetic_win_operators @@ -135,7 +136,28 @@ def test_closed_fixed(closed, arithmetic_win_operators): tm.assert_frame_equal(result, expected) -def test_closed_fixed_binary_col(): +@pytest.mark.parametrize( + "center, expected", + [ + ( + False, + DataFrame( + [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], + columns=["binary_col"], + index=date_range(start="2020-01-01", freq="min", periods=8), + ), + ), + ( + True, + DataFrame( + [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], + columns=["binary_col"], + index=date_range(start="2020-01-01", freq="min", periods=8), + ), + ), + ], +) +def test_closed_fixed_binary_col(center, expected): # GH 34315 data = [0, 1, 1, 0, 0, 1, 0, 1] df = DataFrame( @@ -143,13 +165,8 @@ def test_closed_fixed_binary_col(): index=date_range(start="2020-01-01", freq="min", periods=len(data)), ) - rolling = df.rolling(window=len(df), closed="left", min_periods=1) + rolling = df.rolling(window=len(df), closed="left", min_periods=1, center=center) result = rolling.mean() - expected = DataFrame( - [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], - columns=["binary_col"], - index=date_range(start="2020-01-01", freq="min", periods=len(data)), - ) tm.assert_frame_equal(result, expected) @@ -394,6 +411,7 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture): tm.assert_frame_equal(result, expected) +# maybe this? center true/false def test_rolling_window_as_string(): # see gh-22590 date_today = datetime.now() From c27f50e3ba7b400ceaf26c899151b0049d98f14d Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Fri, 29 Jan 2021 14:54:34 +0100 Subject: [PATCH 14/54] adapt test parameters --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 9a7d1f0a71b05..0ad45ac8c3f18 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -150,7 +150,7 @@ def test_closed_fixed(closed, arithmetic_win_operators): ( True, DataFrame( - [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], + [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5], columns=["binary_col"], index=date_range(start="2020-01-01", freq="min", periods=8), ), From abaa43bb9625401db5b7c37edc675bc370bf5714 Mon Sep 17 00:00:00 2001 From: sevberg Date: Fri, 29 Jan 2021 16:12:00 +0100 Subject: [PATCH 15/54] add center testing to test_closed_fixed --- pandas/tests/window/test_rolling.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 0ad45ac8c3f18..14421e50a64d1 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -122,16 +122,25 @@ def test_numpy_compat(method): # maybe this one too? -def test_closed_fixed(closed, arithmetic_win_operators): +@pytest.mark.parametrize( + "center", + [ + False, + True, + ], +) +def test_closed_fixed(closed, arithmetic_win_operators, center): # GH 34315 func_name = arithmetic_win_operators df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) - result = getattr(df_fixed.rolling(2, closed=closed, min_periods=1), func_name)() - expected = getattr(df_time.rolling("2D", closed=closed), func_name)().reset_index( - drop=True - ) + result = getattr( + df_fixed.rolling(2, closed=closed, min_periods=1, center=center), func_name + )() + expected = getattr( + df_time.rolling("2D", closed=closed, center=center), func_name + )().reset_index(drop=True) tm.assert_frame_equal(result, expected) From dc046da627cc44e45e2d3e1b4de9f1db211ec9a9 Mon Sep 17 00:00:00 2001 From: sevberg Date: Fri, 29 Jan 2021 16:14:06 +0100 Subject: [PATCH 16/54] clean test_closed_fixed_binary_col --- pandas/tests/window/test_rolling.py | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 14421e50a64d1..19250628e47aa 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -146,33 +146,30 @@ def test_closed_fixed(closed, arithmetic_win_operators, center): @pytest.mark.parametrize( - "center, expected", + "center, expected_data", [ ( False, - DataFrame( - [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], - columns=["binary_col"], - index=date_range(start="2020-01-01", freq="min", periods=8), - ), + [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], ), ( True, - DataFrame( - [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5], - columns=["binary_col"], - index=date_range(start="2020-01-01", freq="min", periods=8), - ), + [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5], ), ], ) -def test_closed_fixed_binary_col(center, expected): +def test_closed_fixed_binary_col(center, expected_data): # GH 34315 data = [0, 1, 1, 0, 0, 1, 0, 1] df = DataFrame( {"binary_col": data}, index=date_range(start="2020-01-01", freq="min", periods=len(data)), ) + expected = DataFrame( + expected_data, + columns=["binary_col"], + index=date_range(start="2020-01-01", freq="min", periods=8), + ) rolling = df.rolling(window=len(df), closed="left", min_periods=1, center=center) result = rolling.mean() From 95e3f26bfb83b861ad4d48c97820439a23e69dbb Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Mon, 1 Feb 2021 09:41:42 +0100 Subject: [PATCH 17/54] fix formatting and rename `center_window` to `center` --- pandas/_libs/window/indexers.pyx | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 41c5e2f229778..a92f752f992a4 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -45,7 +45,7 @@ def calculate_variable_window_bounds( cdef: bint left_closed = False bint right_closed = False - bint center_window = False + bint center = False ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound, index_growth_sign = 1 Py_ssize_t i, j @@ -76,10 +76,9 @@ def calculate_variable_window_bounds( # right endpoint is open else: end[0] = 0 - if center_window: - for j in range(0, num_values+1): - if (index[j] == index[0] + index_growth_sign * window_size / 2 and - right_closed): + if center: + for j in range(0, num_values + 1): + if (index[j] == index[0] + index_growth_sign * window_size / 2 and right_closed): end[0] = j + 1 break elif index[j] >= index[0] + index_growth_sign * window_size / 2: @@ -91,7 +90,7 @@ def calculate_variable_window_bounds( # start is start of slice interval (including) # end is end of slice interval (not including) for i in range(1, num_values): - if center_window: + if center: end_bound = index[i] + index_growth_sign * window_size / 2 start_bound = index[i] - index_growth_sign * window_size / 2 else: @@ -112,7 +111,7 @@ def calculate_variable_window_bounds( # for centered window advance the end bound until we are # outside the constraint - if center_window: + if center: for j in range(end[i - 1], num_values + 1): if j == num_values: end[i] = j @@ -131,6 +130,6 @@ def calculate_variable_window_bounds( end[i] = end[i - 1] # right endpoint is open - if not right_closed and not center_window: + if not right_closed and not center: end[i] -= 1 return start, end From 8d582a12db7a732b0c0e777cff799e3460d696bf Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Mon, 1 Feb 2021 09:42:13 +0100 Subject: [PATCH 18/54] define len of test data dynamically --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 19250628e47aa..515a67ce9a120 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -168,7 +168,7 @@ def test_closed_fixed_binary_col(center, expected_data): expected = DataFrame( expected_data, columns=["binary_col"], - index=date_range(start="2020-01-01", freq="min", periods=8), + index=date_range(start="2020-01-01", freq="min", periods=len(expected_data)), ) rolling = df.rolling(window=len(df), closed="left", min_periods=1, center=center) From 8d5a55c4b9e655d10232c07262eda495a9cb1dcf Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Mon, 1 Feb 2021 10:07:07 +0100 Subject: [PATCH 19/54] move if-statement back into two lines (failed before) --- pandas/_libs/window/indexers.pyx | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index a92f752f992a4..962fbf9003675 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -78,7 +78,8 @@ def calculate_variable_window_bounds( end[0] = 0 if center: for j in range(0, num_values + 1): - if (index[j] == index[0] + index_growth_sign * window_size / 2 and right_closed): + if (index[j] == index[0] + index_growth_sign * window_size / 2 and + right_closed): end[0] = j + 1 break elif index[j] >= index[0] + index_growth_sign * window_size / 2: From 525cc69ff9cfe56bec07f8e69c82a89626a52b19 Mon Sep 17 00:00:00 2001 From: sevberg Date: Mon, 1 Feb 2021 11:32:07 +0100 Subject: [PATCH 20/54] remove hard-coded center --- pandas/_libs/window/indexers.pyx | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index 962fbf9003675..dcb5176c9f440 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -45,7 +45,6 @@ def calculate_variable_window_bounds( cdef: bint left_closed = False bint right_closed = False - bint center = False ndarray[int64_t, ndim=1] start, end int64_t start_bound, end_bound, index_growth_sign = 1 Py_ssize_t i, j From 6ac79b9739f7c8307550178491d58a3b085bb6c1 Mon Sep 17 00:00:00 2001 From: sevberg Date: Mon, 1 Feb 2021 12:31:50 +0100 Subject: [PATCH 21/54] remove fixture --- pandas/tests/window/test_rolling.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 515a67ce9a120..4a3b65f73ed7d 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -121,14 +121,6 @@ def test_numpy_compat(method): getattr(r, method)(dtype=np.float64) -# maybe this one too? -@pytest.mark.parametrize( - "center", - [ - False, - True, - ], -) def test_closed_fixed(closed, arithmetic_win_operators, center): # GH 34315 func_name = arithmetic_win_operators From 9bf6ce340ae1933c1c04979acba99e90ab1c852b Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Mon, 1 Feb 2021 13:22:51 +0100 Subject: [PATCH 22/54] use `center` fixture for `test_closed_fixed_binary_col` --- pandas/tests/window/test_rolling.py | 21 +++++++-------------- 1 file changed, 7 insertions(+), 14 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4a3b65f73ed7d..d748ba403298a 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -137,26 +137,19 @@ def test_closed_fixed(closed, arithmetic_win_operators, center): tm.assert_frame_equal(result, expected) -@pytest.mark.parametrize( - "center, expected_data", - [ - ( - False, - [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571], - ), - ( - True, - [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5], - ), - ], -) -def test_closed_fixed_binary_col(center, expected_data): +def test_closed_fixed_binary_col(center): # GH 34315 data = [0, 1, 1, 0, 0, 1, 0, 1] df = DataFrame( {"binary_col": data}, index=date_range(start="2020-01-01", freq="min", periods=len(data)), ) + + if center: + expected_data = [2 / 3, 0.5, 0.4, 0.5, 0.428571, 0.5, 0.571429, 0.5] + else: + expected_data = [np.nan, 0, 0.5, 2 / 3, 0.5, 0.4, 0.5, 0.428571] + expected = DataFrame( expected_data, columns=["binary_col"], From 4f98fc505ee9a09f03148b812433b23f134802ef Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Mon, 1 Feb 2021 17:19:55 +0100 Subject: [PATCH 23/54] add `center` testing to `test_rolling_window_as_string` --- pandas/tests/window/test_rolling.py | 132 +++++++++++++++++++--------- 1 file changed, 89 insertions(+), 43 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index d748ba403298a..dcbddad44004a 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -403,7 +403,7 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture): # maybe this? center true/false -def test_rolling_window_as_string(): +def test_rolling_window_as_string(center): # see gh-22590 date_today = datetime.now() days = date_range(date_today, date_today + timedelta(365), freq="D") @@ -414,50 +414,96 @@ def test_rolling_window_as_string(): df = DataFrame({"DateCol": days, "metric": data}) df.set_index("DateCol", inplace=True) - result = df.rolling(window="21D", min_periods=2, closed="left")["metric"].agg("max") - - expData = ( - [np.nan] * 2 - + [88.0] * 16 - + [97.0] * 9 - + [98.0] - + [99.0] * 21 - + [95.0] * 16 - + [93.0] * 5 - + [89.0] * 5 - + [96.0] * 21 - + [94.0] * 14 - + [90.0] * 13 - + [88.0] * 2 - + [90.0] * 9 - + [96.0] * 21 - + [95.0] * 6 - + [91.0] - + [87.0] * 6 - + [92.0] * 21 - + [83.0] * 2 - + [86.0] * 10 - + [87.0] * 5 - + [98.0] * 21 - + [97.0] * 14 - + [93.0] * 7 - + [87.0] * 4 - + [86.0] * 4 - + [95.0] * 21 - + [85.0] * 14 - + [83.0] * 2 - + [76.0] * 5 - + [81.0] * 2 - + [98.0] * 21 - + [95.0] * 14 - + [91.0] * 7 - + [86.0] - + [93.0] * 3 - + [95.0] * 20 - ) + result = df.rolling(window="21D", min_periods=2, closed="left", center=center)[ + "metric" + ].agg("max") + + if center: + print(f"center: {center}: inside IF") + expected_data = ( + [np.nan] * 2 + + [88.0] * 16 + + [97.0] * 9 + + [98.0] + + [99.0] * 21 + + [95.0] * 16 + + [93.0] * 5 + + [89.0] * 5 + + [96.0] * 21 # 21 + + [94.0] * 14 # 14 + + [90.0] * 13 + + [88.0] * 2 + + [90.0] * 9 + + [96.0] * 21 # 21 + + [95.0] * 6 + + [91.0] + + [87.0] * 6 # + + [92.0] * 21 # + + [83.0] * 2 # + + [86.0] * 10 # + + [87.0] * 5 # + + [98.0] * 21 # + + [97.0] * 14 # + + [93.0] * 7 # + + [87.0] * 4 # + + [86.0] * 4 # + + [95.0] * 21 # + + [85.0] * 14 # + + [83.0] * 2 # + + [76.0] * 5 # + + [81.0] * 2 # + + [98.0] * 21 # + + [95.0] * 14 # + + [91.0] * 7 + + [86.0] + + [93.0] * 3 + + [95.0] * 20 + ) + + else: + print(f"center: {center}: inside ELSE") + expected_data = ( + [np.nan] * 2 + + [88.0] * 16 + + [97.0] * 9 + + [98.0] + + [99.0] * 21 + + [95.0] * 16 + + [93.0] * 5 + + [89.0] * 5 + + [96.0] * 21 + + [94.0] * 14 + + [90.0] * 13 + + [88.0] * 2 + + [90.0] * 9 + + [96.0] * 21 + + [95.0] * 6 + + [91.0] + + [87.0] * 6 + + [92.0] * 21 + + [83.0] * 2 + + [86.0] * 10 + + [87.0] * 5 + + [98.0] * 21 + + [97.0] * 14 + + [93.0] * 7 + + [87.0] * 4 + + [86.0] * 4 + + [95.0] * 21 + + [85.0] * 14 + + [83.0] * 2 + + [76.0] * 5 + + [81.0] * 2 + + [98.0] * 21 + + [95.0] * 14 + + [91.0] * 7 + + [86.0] + + [93.0] * 3 + + [95.0] * 20 + ) expected = Series( - expData, index=days.rename("DateCol")._with_freq(None), name="metric" + expected_data, index=days.rename("DateCol")._with_freq(None), name="metric" ) tm.assert_series_equal(result, expected) From e5ae3b2510fa50bbeb735b70757abaedd177b268 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Tue, 2 Feb 2021 10:15:27 +0100 Subject: [PATCH 24/54] correct expected data and remove debug prints --- pandas/tests/window/test_rolling.py | 49 ++++++++++++++--------------- 1 file changed, 23 insertions(+), 26 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index dcbddad44004a..fbf77478a3c2a 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -402,7 +402,6 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture): tm.assert_frame_equal(result, expected) -# maybe this? center true/false def test_rolling_window_as_string(center): # see gh-22590 date_today = datetime.now() @@ -419,49 +418,47 @@ def test_rolling_window_as_string(center): ].agg("max") if center: - print(f"center: {center}: inside IF") expected_data = ( - [np.nan] * 2 - + [88.0] * 16 + [88.0] * 7 + [97.0] * 9 + [98.0] + [99.0] * 21 + [95.0] * 16 + [93.0] * 5 + [89.0] * 5 - + [96.0] * 21 # 21 - + [94.0] * 14 # 14 + + [96.0] * 21 + + [94.0] * 14 + [90.0] * 13 + [88.0] * 2 + [90.0] * 9 - + [96.0] * 21 # 21 + + [96.0] * 21 + [95.0] * 6 + [91.0] - + [87.0] * 6 # - + [92.0] * 21 # - + [83.0] * 2 # - + [86.0] * 10 # - + [87.0] * 5 # - + [98.0] * 21 # - + [97.0] * 14 # - + [93.0] * 7 # - + [87.0] * 4 # - + [86.0] * 4 # - + [95.0] * 21 # - + [85.0] * 14 # - + [83.0] * 2 # - + [76.0] * 5 # - + [81.0] * 2 # - + [98.0] * 21 # - + [95.0] * 14 # + + [87.0] * 6 + + [92.0] * 21 + + [83.0] * 2 + + [86.0] * 10 + + [87.0] * 5 + + [98.0] * 21 + + [97.0] * 14 + + [93.0] * 7 + + [87.0] * 4 + + [86.0] * 4 + + [95.0] * 21 + + [85.0] * 14 + + [83.0] * 2 + + [76.0] * 5 + + [81.0] * 2 + + [98.0] * 21 + + [95.0] * 14 + [91.0] * 7 + [86.0] + [93.0] * 3 - + [95.0] * 20 + + [95.0] * 29 + + [77.0] * 2 ) else: - print(f"center: {center}: inside ELSE") expected_data = ( [np.nan] * 2 + [88.0] * 16 From d106940c78dfd61a2dee3676ed195c0e9ca26fc7 Mon Sep 17 00:00:00 2001 From: sevberg Date: Tue, 2 Feb 2021 20:44:27 +0100 Subject: [PATCH 25/54] align ddof usage of rolling sem with nanops nansem --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 683da7dc1f9f4..69e4e0eb56caf 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2208,7 +2208,7 @@ def skew(self, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["sem"]) def sem(self, ddof=1, *args, **kwargs): - return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) + return self.std(ddof=ddof, *args, **kwargs) / self.count().pow(0.5) _agg_doc = dedent( """ From d4f6d225300fa81476e39255c6b963416c3d8486 Mon Sep 17 00:00:00 2001 From: sevberg Date: Tue, 2 Feb 2021 20:46:02 +0100 Subject: [PATCH 26/54] explicitly test centered datetimelike windows --- pandas/tests/window/test_rolling.py | 58 +++++++++++++++++++++++++++-- 1 file changed, 55 insertions(+), 3 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index fbf77478a3c2a..1ed50ea848d60 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -121,22 +121,74 @@ def test_numpy_compat(method): getattr(r, method)(dtype=np.float64) -def test_closed_fixed(closed, arithmetic_win_operators, center): +def test_closed_fixed(closed, arithmetic_win_operators): # GH 34315 func_name = arithmetic_win_operators df_fixed = DataFrame({"A": [0, 1, 2, 3, 4]}) df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) result = getattr( - df_fixed.rolling(2, closed=closed, min_periods=1, center=center), func_name + df_fixed.rolling(2, closed=closed, min_periods=1, center=False), func_name )() expected = getattr( - df_time.rolling("2D", closed=closed, center=center), func_name + df_time.rolling("2D", closed=closed, min_periods=1, center=False), func_name )().reset_index(drop=True) tm.assert_frame_equal(result, expected) +def test_datetimelike_centered_selections(closed, arithmetic_win_operators): + # GH 34315 + func_name = arithmetic_win_operators + df_time = DataFrame( + {"A": [0.0, 1.0, 2.0, 3.0, 4.0]}, index=date_range("2020", periods=5) + ) + + if closed == "both": + window_selections = [ + [True, True, False, False, False], + [True, True, True, False, False], + [False, True, True, True, False], + [False, False, True, True, True], + [False, False, False, True, True], + ] + elif closed == "left": + window_selections = [ + [True, False, False, False, False], + [True, True, False, False, False], + [False, True, True, False, False], + [False, False, True, True, False], + [False, False, False, True, True], + ] + elif closed == "right": + window_selections = [ + [True, True, False, False, False], + [False, True, True, False, False], + [False, False, True, True, False], + [False, False, False, True, True], + [False, False, False, False, True], + ] + else: # closed=="neither" + window_selections = [ + [True, False, False, False, False], + [False, True, False, False, False], + [False, False, True, False, False], + [False, False, False, True, False], + [False, False, False, False, True], + ] + + expected = DataFrame( + {"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]}, + index=date_range("2020", periods=5), + ) + + result = getattr( + df_time.rolling("2D", closed=closed, min_periods=1, center=True), func_name + )() + + tm.assert_frame_equal(result, expected, check_dtype=False) + + def test_closed_fixed_binary_col(center): # GH 34315 data = [0, 1, 1, 0, 0, 1, 0, 1] From c2a733390df3ff3ca9ddafe453e2710888c7a626 Mon Sep 17 00:00:00 2001 From: sevberg Date: Tue, 2 Feb 2021 20:48:04 +0100 Subject: [PATCH 27/54] correct sem test --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 1ed50ea848d60..4329d18117c5c 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -991,7 +991,7 @@ def test_rolling_sem(frame_or_series): result = obj.rolling(2, min_periods=1).sem() if isinstance(result, DataFrame): result = Series(result[0].values) - expected = Series([np.nan] + [0.707107] * 2) + expected = Series([np.nan] + [0.5] * 2) tm.assert_series_equal(result, expected) From 73313e610a486bdecd940af96eb86d8139a7391a Mon Sep 17 00:00:00 2001 From: sevberg Date: Tue, 2 Feb 2021 23:25:31 +0100 Subject: [PATCH 28/54] align rolling.sem with nanops.nansem --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 69e4e0eb56caf..88148f0f54878 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1663,7 +1663,7 @@ def skew(self, **kwargs): ) def sem(self, ddof: int = 1, *args, **kwargs): - return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) + return self.std(*args, **kwargs) / self.count().pow(0.5) _shared_docs["sem"] = dedent( """ From 92f89928c515403f61aa80fa06b73a082872b0c4 Mon Sep 17 00:00:00 2001 From: sevberg Date: Tue, 2 Feb 2021 23:25:40 +0100 Subject: [PATCH 29/54] fix test --- pandas/tests/window/test_groupby.py | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index b89fb35ac3a70..413a9aa44204c 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -522,17 +522,28 @@ def test_groupby_rolling_count_closed_on(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - ("func", "kwargs"), - [("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})], + ("func", "kwargs", "expected_data"), + [ + ( + "rolling", + {"window": 2, "min_periods": 1}, + {"a": [np.nan] * 5, "b": [np.nan, 0.5, np.nan, 0.5, 0.5]}, + ), + ( + "expanding", + {}, + {"a": [np.nan] * 5, "b": [np.nan, 0.5, np.nan, 0.5, 0.577350]}, + ), + ], ) - def test_groupby_rolling_sem(self, func, kwargs): + def test_groupby_rolling_sem(self, func, kwargs, expected_data): # GH: 26476 df = DataFrame( [["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"] ) result = getattr(df.groupby("a"), func)(**kwargs).sem() expected = DataFrame( - {"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]}, + expected_data, index=MultiIndex.from_tuples( [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None] ), From 648d2d372d2e615bd1fb86ccd9e6836c4d36ff33 Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 14:23:05 +0100 Subject: [PATCH 30/54] revert ddof behavior --- pandas/core/window/rolling.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index 88148f0f54878..ed159b7e0108c 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -1663,7 +1663,7 @@ def skew(self, **kwargs): ) def sem(self, ddof: int = 1, *args, **kwargs): - return self.std(*args, **kwargs) / self.count().pow(0.5) + return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) _shared_docs["sem"] = dedent( """ @@ -2051,8 +2051,7 @@ def validate(self): freq = to_offset(self.window) except (TypeError, ValueError) as err: raise ValueError( - f"passed window {self.window} is not " - "compatible with a datetimelike index" + f"passed window {self.window} is not " "compatible with a d index" ) from err if isinstance(self._on, ABCPeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) @@ -2208,7 +2207,7 @@ def skew(self, **kwargs): @Substitution(name="rolling") @Appender(_shared_docs["sem"]) def sem(self, ddof=1, *args, **kwargs): - return self.std(ddof=ddof, *args, **kwargs) / self.count().pow(0.5) + return self.std(*args, **kwargs) / (self.count() - ddof).pow(0.5) _agg_doc = dedent( """ From 278d33f0e18016064574402be6177b584bcf38a0 Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 14:23:17 +0100 Subject: [PATCH 31/54] revert sem test --- pandas/tests/window/test_groupby.py | 19 ++++--------------- 1 file changed, 4 insertions(+), 15 deletions(-) diff --git a/pandas/tests/window/test_groupby.py b/pandas/tests/window/test_groupby.py index 413a9aa44204c..b89fb35ac3a70 100644 --- a/pandas/tests/window/test_groupby.py +++ b/pandas/tests/window/test_groupby.py @@ -522,28 +522,17 @@ def test_groupby_rolling_count_closed_on(self): tm.assert_series_equal(result, expected) @pytest.mark.parametrize( - ("func", "kwargs", "expected_data"), - [ - ( - "rolling", - {"window": 2, "min_periods": 1}, - {"a": [np.nan] * 5, "b": [np.nan, 0.5, np.nan, 0.5, 0.5]}, - ), - ( - "expanding", - {}, - {"a": [np.nan] * 5, "b": [np.nan, 0.5, np.nan, 0.5, 0.577350]}, - ), - ], + ("func", "kwargs"), + [("rolling", {"window": 2, "min_periods": 1}), ("expanding", {})], ) - def test_groupby_rolling_sem(self, func, kwargs, expected_data): + def test_groupby_rolling_sem(self, func, kwargs): # GH: 26476 df = DataFrame( [["a", 1], ["a", 2], ["b", 1], ["b", 2], ["b", 3]], columns=["a", "b"] ) result = getattr(df.groupby("a"), func)(**kwargs).sem() expected = DataFrame( - expected_data, + {"a": [np.nan] * 5, "b": [np.nan, 0.70711, np.nan, 0.70711, 0.70711]}, index=MultiIndex.from_tuples( [("a", 0), ("a", 1), ("b", 2), ("b", 3), ("b", 4)], names=["a", None] ), From 5e50f36ccbcb648568dbac21a7b6358eca476bb4 Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 14:23:29 +0100 Subject: [PATCH 32/54] side-step ddof bug --- pandas/tests/window/test_rolling.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 4329d18117c5c..1bf5884cd372b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -182,9 +182,14 @@ def test_datetimelike_centered_selections(closed, arithmetic_win_operators): index=date_range("2020", periods=5), ) + if func_name == "sem": + kwargs = {"ddof": 0} + else: + kwargs = {} + result = getattr( df_time.rolling("2D", closed=closed, min_periods=1, center=True), func_name - )() + )(**kwargs) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -991,7 +996,7 @@ def test_rolling_sem(frame_or_series): result = obj.rolling(2, min_periods=1).sem() if isinstance(result, DataFrame): result = Series(result[0].values) - expected = Series([np.nan] + [0.5] * 2) + expected = Series([np.nan] + [0.7071067811865476] * 2) tm.assert_series_equal(result, expected) From c11cf15bfd67aa8c3d0283f46e6ecef1a6e8f814 Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 14:33:11 +0100 Subject: [PATCH 33/54] fix black failure --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index ed159b7e0108c..cac7bc623d8dd 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2051,7 +2051,7 @@ def validate(self): freq = to_offset(self.window) except (TypeError, ValueError) as err: raise ValueError( - f"passed window {self.window} is not " "compatible with a d index" + f"passed window {self.window} is not compatible with a d index" ) from err if isinstance(self._on, ABCPeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) From 2e3f875c5361ce0480279a4f17661e0b91f6d941 Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 15:08:26 +0100 Subject: [PATCH 34/54] disable black --- pandas/core/window/rolling.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index cac7bc623d8dd..cdb16c5b94088 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2051,7 +2051,8 @@ def validate(self): freq = to_offset(self.window) except (TypeError, ValueError) as err: raise ValueError( - f"passed window {self.window} is not compatible with a d index" + f"passed window {self.window} is not " + "compatible with a d index" ) from err if isinstance(self._on, ABCPeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) From f63309b163d55f59b65bab0ca1ff3c5d46130aba Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 15:18:54 +0100 Subject: [PATCH 35/54] fix missing datetimelike word --- pandas/core/window/rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/core/window/rolling.py b/pandas/core/window/rolling.py index cdb16c5b94088..683da7dc1f9f4 100644 --- a/pandas/core/window/rolling.py +++ b/pandas/core/window/rolling.py @@ -2052,7 +2052,7 @@ def validate(self): except (TypeError, ValueError) as err: raise ValueError( f"passed window {self.window} is not " - "compatible with a d index" + "compatible with a datetimelike index" ) from err if isinstance(self._on, ABCPeriodIndex): self._win_freq_i8 = freq.nanos / (self._on.freq.nanos / self._on.freq.n) From 9f76a418f982ee5fb51e9e28564b12e1a85b28d8 Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 18:15:23 +0100 Subject: [PATCH 36/54] update whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 7fe0b53d7d2ff..be45752e47ca6 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -55,7 +55,7 @@ Other enhancements - :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`) - :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes. - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) - +- :class:`Rolling` now support centered datetime-like windows (:issue:`38780`) .. --------------------------------------------------------------------------- .. _whatsnew_130.notable_bug_fixes: From f05ed61571b897e896db9b1720fce956d125792a Mon Sep 17 00:00:00 2001 From: sevberg Date: Wed, 3 Feb 2021 18:32:10 +0100 Subject: [PATCH 37/54] add to enhancements --- doc/source/whatsnew/v1.3.0.rst | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index be45752e47ca6..e132034ca4b70 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -33,6 +33,30 @@ For example: storage_options=headers ) +Centered Datetime-Like Rolling Windows +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing rolling calculations on :class:`DataFrame` and :class:`Series` +objects with a datetime-like index, a centered datetime-like window can now be +used (:issue:`38780`). +For example: + +.. code-block:: ipython + + In [1]: df = DataFrame( + {"A": [0, 1, 2, 3, 4]}, + index=date_range("2020", periods=5, freq="1D") + ) + + In [2]: df.rolling("2D", center=True).mean() + Out [2]: + A + 2020-01-01 0.5 + 2020-01-02 1.5 + 2020-01-03 2.5 + 2020-01-04 3.5 + 2020-01-05 4.0 + .. _whatsnew_130.enhancements.other: Other enhancements @@ -55,7 +79,6 @@ Other enhancements - :meth:`DataFrame.plot.scatter` can now accept a categorical column as the argument to ``c`` (:issue:`12380`, :issue:`31357`) - :meth:`.Styler.set_tooltips` allows on hover tooltips to be added to styled HTML dataframes. - :meth:`Series.loc.__getitem__` and :meth:`Series.loc.__setitem__` with :class:`MultiIndex` now raising helpful error message when indexer has too many dimensions (:issue:`35349`) -- :class:`Rolling` now support centered datetime-like windows (:issue:`38780`) .. --------------------------------------------------------------------------- .. _whatsnew_130.notable_bug_fixes: From 0520e182bd25817286da8a6a8a62b91add3b1d23 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Thu, 25 Feb 2021 15:51:20 +0100 Subject: [PATCH 38/54] trim trailing whitespaces --- doc/source/whatsnew/v1.3.0.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index f75b291aaef26..1c500b58ddf9f 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -37,19 +37,19 @@ Centered Datetime-Like Rolling Windows ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ When performing rolling calculations on :class:`DataFrame` and :class:`Series` -objects with a datetime-like index, a centered datetime-like window can now be +objects with a datetime-like index, a centered datetime-like window can now be used (:issue:`38780`). For example: .. code-block:: ipython In [1]: df = DataFrame( - {"A": [0, 1, 2, 3, 4]}, + {"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5, freq="1D") ) In [2]: df.rolling("2D", center=True).mean() - Out [2]: + Out [2]: A 2020-01-01 0.5 2020-01-02 1.5 From 5b9b8ff4331b09db3ae954f1d97a2fd05291aa47 Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Mon, 29 Mar 2021 14:19:44 +0200 Subject: [PATCH 39/54] correct `too many blank lines` --- pandas/tests/window/test_rolling.py | 1 - 1 file changed, 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 962775a09011f..68a3cbec9a519 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -462,7 +462,6 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture): tm.assert_frame_equal(result, expected) - def test_rolling_window_as_string(using_array_manager, center): # see gh-22590 date_today = datetime.now() From fca3b4da9dc17926444a06ea0e6d6854d2f3d71c Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Mon, 29 Mar 2021 14:32:09 +0200 Subject: [PATCH 40/54] fix wrong var name after merge --- pandas/tests/window/test_rolling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 68a3cbec9a519..c567b8175bc6e 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -563,7 +563,7 @@ def test_rolling_window_as_string(using_array_manager, center): if not using_array_manager: # INFO(ArrayManager) preserves the frequence of the index index = index._with_freq(None) - expected = Series(expData, index=index, name="metric") + expected = Series(expected_data, index=index, name="metric") tm.assert_series_equal(result, expected) From 6c1c58a7f4603194034c338a6363b2d15b9cc13c Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Wed, 31 Mar 2021 14:13:51 +0200 Subject: [PATCH 41/54] remove unused `type: ignore` --- pandas/core/missing.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/pandas/core/missing.py b/pandas/core/missing.py index 41d7fed66469d..9409f236f3f72 100644 --- a/pandas/core/missing.py +++ b/pandas/core/missing.py @@ -862,6 +862,4 @@ def _rolling_window(a: np.ndarray, window: int): shape = a.shape[:-1] + (a.shape[-1] - window + 1, window) strides = a.strides + (a.strides[-1],) # error: Module has no attribute "stride_tricks" - return np.lib.stride_tricks.as_strided( # type: ignore[attr-defined] - a, shape=shape, strides=strides - ) + return np.lib.stride_tricks.as_strided(a, shape=shape, strides=strides) From f44c6e694408158b2af99327cb50d7e51a98852b Mon Sep 17 00:00:00 2001 From: "lucas.loltz" Date: Tue, 6 Apr 2021 09:56:16 +0200 Subject: [PATCH 42/54] fix datatype in docstring (now bool) --- pandas/_libs/window/indexers.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pandas/_libs/window/indexers.pyx b/pandas/_libs/window/indexers.pyx index eded5ca0169ef..5e2b137db64a6 100644 --- a/pandas/_libs/window/indexers.pyx +++ b/pandas/_libs/window/indexers.pyx @@ -32,7 +32,7 @@ def calculate_variable_window_bounds( min_periods : object ignored, exists for compatibility - center : object + center : bint center the rolling window on the current observation closed : str From 3dcad64cc2a15b111c750eb15c331c601eaf25e4 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 7 Apr 2021 11:22:48 +0200 Subject: [PATCH 43/54] dd parametrize for window_selections --- pandas/tests/window/test_rolling.py | 82 +++++++++++++++++------------ 1 file changed, 48 insertions(+), 34 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 30a3dfe281aec..84f497950b765 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -140,46 +140,60 @@ def test_closed_fixed(closed, arithmetic_win_operators): tm.assert_frame_equal(result, expected) -def test_datetimelike_centered_selections(closed, arithmetic_win_operators): +@pytest.mark.parametrize( + "closed, window_selections", + [ + ( + "both", + [ + [True, True, False, False, False], + [True, True, True, False, False], + [False, True, True, True, False], + [False, False, True, True, True], + [False, False, False, True, True], + ], + ), + ( + "left", + [ + [True, False, False, False, False], + [True, True, False, False, False], + [False, True, True, False, False], + [False, False, True, True, False], + [False, False, False, True, True], + ], + ), + ( + "right", + [ + [True, True, False, False, False], + [False, True, True, False, False], + [False, False, True, True, False], + [False, False, False, True, True], + [False, False, False, False, True], + ], + ), + ( + "neither", + [ + [True, False, False, False, False], + [False, True, False, False, False], + [False, False, True, False, False], + [False, False, False, True, False], + [False, False, False, False, True], + ], + ), + ], +) +def test_datetimelike_centered_selections( + closed, window_selections, arithmetic_win_operators +): # GH 34315 func_name = arithmetic_win_operators df_time = DataFrame( {"A": [0.0, 1.0, 2.0, 3.0, 4.0]}, index=date_range("2020", periods=5) ) - if closed == "both": - window_selections = [ - [True, True, False, False, False], - [True, True, True, False, False], - [False, True, True, True, False], - [False, False, True, True, True], - [False, False, False, True, True], - ] - elif closed == "left": - window_selections = [ - [True, False, False, False, False], - [True, True, False, False, False], - [False, True, True, False, False], - [False, False, True, True, False], - [False, False, False, True, True], - ] - elif closed == "right": - window_selections = [ - [True, True, False, False, False], - [False, True, True, False, False], - [False, False, True, True, False], - [False, False, False, True, True], - [False, False, False, False, True], - ] - else: # closed=="neither" - window_selections = [ - [True, False, False, False, False], - [False, True, False, False, False], - [False, False, True, False, False], - [False, False, False, True, False], - [False, False, False, False, True], - ] - expected = DataFrame( {"A": [getattr(df_time["A"].iloc[s], func_name)() for s in window_selections]}, index=date_range("2020", periods=5), From 0f9f6dfc3459d5cac549847d6da99bdcc00c44a1 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 7 Apr 2021 12:22:25 +0200 Subject: [PATCH 44/54] black formatting --- pandas/tests/window/test_rolling.py | 66 +++++++++++++++++++++++------ 1 file changed, 52 insertions(+), 14 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 84f497950b765..bcf12c29cf46d 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -80,7 +80,8 @@ def test_constructor_with_timedelta_window(window): # GH 15440 n = 10 df = DataFrame( - {"value": np.arange(n)}, index=date_range("2015-12-24", periods=n, freq="D") + {"value": np.arange(n)}, + index=date_range("2015-12-24", periods=n, freq="D"), ) expected_data = np.append([0.0, 1.0], np.arange(3.0, 27.0, 3)) @@ -99,7 +100,8 @@ def test_constructor_timedelta_window_and_minperiods(window, raw): # GH 15305 n = 10 df = DataFrame( - {"value": np.arange(n)}, index=date_range("2017-08-08", periods=n, freq="D") + {"value": np.arange(n)}, + index=date_range("2017-08-08", periods=n, freq="D"), ) expected = DataFrame( {"value": np.append([np.NaN, 1.0], np.arange(3.0, 27.0, 3))}, @@ -131,10 +133,12 @@ def test_closed_fixed(closed, arithmetic_win_operators): df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) result = getattr( - df_fixed.rolling(2, closed=closed, min_periods=1, center=False), func_name + df_fixed.rolling(2, closed=closed, min_periods=1, center=False), + func_name, )() expected = getattr( - df_time.rolling("2D", closed=closed, min_periods=1, center=False), func_name + df_time.rolling("2D", closed=closed, min_periods=1, center=False), + func_name, )().reset_index(drop=True) tm.assert_frame_equal(result, expected) @@ -205,7 +209,8 @@ def test_datetimelike_centered_selections( kwargs = {} result = getattr( - df_time.rolling("2D", closed=closed, min_periods=1, center=True), func_name + df_time.rolling("2D", closed=closed, min_periods=1, center=True), + func_name, )(**kwargs) tm.assert_frame_equal(result, expected, check_dtype=False) @@ -259,7 +264,8 @@ def test_closed_one_entry(func): def test_closed_one_entry_groupby(func): # GH24718 ser = DataFrame( - data={"A": [1, 1, 2], "B": [3, 2, 1]}, index=date_range("2000", periods=3) + data={"A": [1, 1, 2], "B": [3, 2, 1]}, + index=date_range("2000", periods=3), ) result = getattr( ser.groupby("A", sort=False)["B"].rolling("10D", closed="left"), func @@ -286,7 +292,8 @@ def test_closed_one_entry_groupby(func): def test_closed_min_max_datetime(input_dtype, func, closed, expected): # see gh-21704 ser = Series( - data=np.arange(10).astype(input_dtype), index=date_range("2000", periods=10) + data=np.arange(10).astype(input_dtype), + index=date_range("2000", periods=10), ) result = getattr(ser.rolling("3D", closed=closed), func)() @@ -745,8 +752,18 @@ def test_iter_rolling_on_dataframe(expected, window): 3, 1, ), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 1), - (Series([1, 2, 3]), [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], 2, 2), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], + 2, + 1, + ), + ( + Series([1, 2, 3]), + [([1], [0]), ([1, 2], [0, 1]), ([2, 3], [1, 2])], + 2, + 2, + ), (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 0), (Series([1, 2, 3]), [([1], [0]), ([2], [1]), ([3], [2])], 1, 1), (Series([1, 2]), [([1], [0]), ([1, 2], [0, 1])], 2, 0), @@ -918,7 +935,18 @@ def test_rolling_numerical_too_large_numbers(): ds[2] = -9e33 result = ds.rolling(5).mean() expected = Series( - [np.nan, np.nan, np.nan, np.nan, -1.8e33, -1.8e33, -1.8e33, 5.0, 6.0, 7.0], + [ + np.nan, + np.nan, + np.nan, + np.nan, + -1.8e33, + -1.8e33, + -1.8e33, + 5.0, + 6.0, + 7.0, + ], index=dates, ) tm.assert_series_equal(result, expected) @@ -934,7 +962,8 @@ def test_rolling_mixed_dtypes_axis_1(func, value): df["c"] = 1.0 result = getattr(df.rolling(window=2, min_periods=1, axis=1), func)() expected = DataFrame( - {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, index=[1, 2] + {"a": [1.0, 1.0], "b": [value, value], "c": [value, value]}, + index=[1, 2], ) tm.assert_frame_equal(result, expected) @@ -1132,8 +1161,14 @@ def test_rolling_decreasing_indices(method): 318.0, ], ), - ("mean", [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5]), - ("sum", [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0]), + ( + "mean", + [float("nan"), 7.5, float("nan"), 21.5, 6.0, 9.166667, 13.0, 17.5], + ), + ( + "sum", + [float("nan"), 30.0, float("nan"), 86.0, 30.0, 55.0, 91.0, 140.0], + ), ( "skew", [ @@ -1197,7 +1232,10 @@ def get_window_bounds(self, num_values, min_periods, center, closed): @pytest.mark.parametrize( ("index", "window"), - [([0, 1, 2, 3, 4], 2), (date_range("2001-01-01", freq="D", periods=5), "2D")], + [ + ([0, 1, 2, 3, 4], 2), + (date_range("2001-01-01", freq="D", periods=5), "2D"), + ], ) def test_rolling_corr_timedelta_index(index, window): # GH: 31286 From 315b320a51bbbf9b2bd2e2ca1dfc548ef00692a0 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 7 Apr 2021 12:31:02 +0200 Subject: [PATCH 45/54] parametrize "test_rolling_window_as_string" outside of function --- pandas/tests/window/test_rolling.py | 173 +++++++++++++++------------- 1 file changed, 90 insertions(+), 83 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index bcf12c29cf46d..b25baf29cf90b 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -483,7 +483,96 @@ def test_rolling_datetime(axis_frame, tz_naive_fixture): tm.assert_frame_equal(result, expected) -def test_rolling_window_as_string(using_array_manager, center): +@pytest.mark.parametrize( + "center, expected_data", + [ + ( + True, + ( + [88.0] * 7 + + [97.0] * 9 + + [98.0] + + [99.0] * 21 + + [95.0] * 16 + + [93.0] * 5 + + [89.0] * 5 + + [96.0] * 21 + + [94.0] * 14 + + [90.0] * 13 + + [88.0] * 2 + + [90.0] * 9 + + [96.0] * 21 + + [95.0] * 6 + + [91.0] + + [87.0] * 6 + + [92.0] * 21 + + [83.0] * 2 + + [86.0] * 10 + + [87.0] * 5 + + [98.0] * 21 + + [97.0] * 14 + + [93.0] * 7 + + [87.0] * 4 + + [86.0] * 4 + + [95.0] * 21 + + [85.0] * 14 + + [83.0] * 2 + + [76.0] * 5 + + [81.0] * 2 + + [98.0] * 21 + + [95.0] * 14 + + [91.0] * 7 + + [86.0] + + [93.0] * 3 + + [95.0] * 29 + + [77.0] * 2 + ), + ), + ( + False, + ( + [np.nan] * 2 + + [88.0] * 16 + + [97.0] * 9 + + [98.0] + + [99.0] * 21 + + [95.0] * 16 + + [93.0] * 5 + + [89.0] * 5 + + [96.0] * 21 + + [94.0] * 14 + + [90.0] * 13 + + [88.0] * 2 + + [90.0] * 9 + + [96.0] * 21 + + [95.0] * 6 + + [91.0] + + [87.0] * 6 + + [92.0] * 21 + + [83.0] * 2 + + [86.0] * 10 + + [87.0] * 5 + + [98.0] * 21 + + [97.0] * 14 + + [93.0] * 7 + + [87.0] * 4 + + [86.0] * 4 + + [95.0] * 21 + + [85.0] * 14 + + [83.0] * 2 + + [76.0] * 5 + + [81.0] * 2 + + [98.0] * 21 + + [95.0] * 14 + + [91.0] * 7 + + [86.0] + + [93.0] * 3 + + [95.0] * 20 + ), + ), + ], +) +def test_rolling_window_as_string(center, expected_data, using_array_manager): # see gh-22590 date_today = datetime.now() days = date_range(date_today, date_today + timedelta(365), freq="D") @@ -498,88 +587,6 @@ def test_rolling_window_as_string(using_array_manager, center): "metric" ].agg("max") - if center: - expected_data = ( - [88.0] * 7 - + [97.0] * 9 - + [98.0] - + [99.0] * 21 - + [95.0] * 16 - + [93.0] * 5 - + [89.0] * 5 - + [96.0] * 21 - + [94.0] * 14 - + [90.0] * 13 - + [88.0] * 2 - + [90.0] * 9 - + [96.0] * 21 - + [95.0] * 6 - + [91.0] - + [87.0] * 6 - + [92.0] * 21 - + [83.0] * 2 - + [86.0] * 10 - + [87.0] * 5 - + [98.0] * 21 - + [97.0] * 14 - + [93.0] * 7 - + [87.0] * 4 - + [86.0] * 4 - + [95.0] * 21 - + [85.0] * 14 - + [83.0] * 2 - + [76.0] * 5 - + [81.0] * 2 - + [98.0] * 21 - + [95.0] * 14 - + [91.0] * 7 - + [86.0] - + [93.0] * 3 - + [95.0] * 29 - + [77.0] * 2 - ) - - else: - expected_data = ( - [np.nan] * 2 - + [88.0] * 16 - + [97.0] * 9 - + [98.0] - + [99.0] * 21 - + [95.0] * 16 - + [93.0] * 5 - + [89.0] * 5 - + [96.0] * 21 - + [94.0] * 14 - + [90.0] * 13 - + [88.0] * 2 - + [90.0] * 9 - + [96.0] * 21 - + [95.0] * 6 - + [91.0] - + [87.0] * 6 - + [92.0] * 21 - + [83.0] * 2 - + [86.0] * 10 - + [87.0] * 5 - + [98.0] * 21 - + [97.0] * 14 - + [93.0] * 7 - + [87.0] * 4 - + [86.0] * 4 - + [95.0] * 21 - + [85.0] * 14 - + [83.0] * 2 - + [76.0] * 5 - + [81.0] * 2 - + [98.0] * 21 - + [95.0] * 14 - + [91.0] * 7 - + [86.0] - + [93.0] * 3 - + [95.0] * 20 - ) - index = days.rename("DateCol") if not using_array_manager: # INFO(ArrayManager) preserves the frequence of the index From f7d111001eaf44483f5dac4154a42c3b048677ad Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 7 Apr 2021 12:40:19 +0200 Subject: [PATCH 46/54] add whatsnew note --- doc/source/whatsnew/v1.3.0.rst | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 63902b53ea36d..b226ba426eedd 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -134,6 +134,30 @@ a copy will no longer be made (:issue:`32960`) The default behavior when not passing ``copy`` will remain unchanged, i.e. a copy will be made. +Centered Datetime-Like Rolling Windows +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +When performing rolling calculations on :class:`DataFrame` and :class:`Series` +objects with a datetime-like index, a centered datetime-like window can now be +used (:issue:`38780`). +For example: + +.. code-block:: ipython + + In [1]: df = DataFrame( + {"A": [0, 1, 2, 3, 4]}, + index=date_range("2020", periods=5, freq="1D") + ) + + In [2]: df.rolling("2D", center=True).mean() + Out [2]: + A + 2020-01-01 0.5 + 2020-01-02 1.5 + 2020-01-03 2.5 + 2020-01-04 3.5 + 2020-01-05 4.0 + .. _whatsnew_130.enhancements.other: Other enhancements From fc88ae4c4ca2887d8a23ff45c26060635379c557 Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 7 Apr 2021 17:13:45 +0200 Subject: [PATCH 47/54] remove center=False, is already default --- pandas/tests/window/test_rolling.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index b25baf29cf90b..7cfe8f32b7e8a 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -133,11 +133,11 @@ def test_closed_fixed(closed, arithmetic_win_operators): df_time = DataFrame({"A": [0, 1, 2, 3, 4]}, index=date_range("2020", periods=5)) result = getattr( - df_fixed.rolling(2, closed=closed, min_periods=1, center=False), + df_fixed.rolling(2, closed=closed, min_periods=1), func_name, )() expected = getattr( - df_time.rolling("2D", closed=closed, min_periods=1, center=False), + df_time.rolling("2D", closed=closed, min_periods=1), func_name, )().reset_index(drop=True) From e07a1f2474befcb81b09bcbf0fad13d004c0a03a Mon Sep 17 00:00:00 2001 From: Lucas Date: Wed, 7 Apr 2021 17:14:39 +0200 Subject: [PATCH 48/54] remove prompts and output from whatsnew --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index b226ba426eedd..ce5ec56138ffb 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -142,7 +142,7 @@ objects with a datetime-like index, a centered datetime-like window can now be used (:issue:`38780`). For example: -.. code-block:: ipython +.. ipython:: python In [1]: df = DataFrame( {"A": [0, 1, 2, 3, 4]}, From cefbb1628ac764fe1fe391e8f378ac7c0c0ea98f Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 8 Apr 2021 09:48:51 +0200 Subject: [PATCH 49/54] remove `in` and `out` annotations --- doc/source/whatsnew/v1.3.0.rst | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index ce5ec56138ffb..40450e89bbc19 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -144,19 +144,12 @@ For example: .. ipython:: python - In [1]: df = DataFrame( - {"A": [0, 1, 2, 3, 4]}, - index=date_range("2020", periods=5, freq="1D") - ) + df = pd.DataFrame( + {"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2020", periods=5, freq="1D") + ) + + df.rolling("2D", center=True).mean() - In [2]: df.rolling("2D", center=True).mean() - Out [2]: - A - 2020-01-01 0.5 - 2020-01-02 1.5 - 2020-01-03 2.5 - 2020-01-04 3.5 - 2020-01-05 4.0 .. _whatsnew_130.enhancements.other: From edbfd21a5f48009aa19e010c2a22f2dcfa68e083 Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 8 Apr 2021 09:54:02 +0200 Subject: [PATCH 50/54] add datetime-like center example --- doc/source/user_guide/window.rst | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index be9c04ae5d4f3..6f7d5b6ce242d 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -156,6 +156,16 @@ By default the labels are set to the right edge of the window, but a s.rolling(window=5).mean() s.rolling(window=5, center=True).mean() +This can also be applied to datetime-like indices. + +.. ipython:: python + + df = pd.DataFrame( + {"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2020", periods=5, freq="1D") + ) + + df.rolling("2D", center=True).mean() + .. _window.endpoints: From bfc0f0d8130d49d37337df186561da4a403613e3 Mon Sep 17 00:00:00 2001 From: Lucas Date: Thu, 8 Apr 2021 11:31:59 +0200 Subject: [PATCH 51/54] add test for different behavior of window alignment --- pandas/tests/window/test_rolling.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/pandas/tests/window/test_rolling.py b/pandas/tests/window/test_rolling.py index 7cfe8f32b7e8a..9abae632e5da3 100644 --- a/pandas/tests/window/test_rolling.py +++ b/pandas/tests/window/test_rolling.py @@ -216,6 +216,20 @@ def test_datetimelike_centered_selections( tm.assert_frame_equal(result, expected, check_dtype=False) +def test_even_number_window_alignment(): + # see discussion in GH 38780 + s = Series(range(3), index=date_range(start="2020-01-01", freq="D", periods=3)) + + # behavior of index- and datetime-based windows differs here! + # s.rolling(window=2, min_periods=1, center=True).mean() + + result = s.rolling(window="2D", min_periods=1, center=True).mean() + + expected = Series([0.5, 1.5, 2], index=s.index) + + tm.assert_series_equal(result, expected) + + def test_closed_fixed_binary_col(center): # GH 34315 data = [0, 1, 1, 0, 0, 1, 0, 1] From 43e04ed581bb86d58af0cf2e651bfb6f5523bc5c Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 9 Apr 2021 13:55:28 +0200 Subject: [PATCH 52/54] additional output in centering example --- doc/source/whatsnew/v1.3.0.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/source/whatsnew/v1.3.0.rst b/doc/source/whatsnew/v1.3.0.rst index 40450e89bbc19..44266c55c70eb 100644 --- a/doc/source/whatsnew/v1.3.0.rst +++ b/doc/source/whatsnew/v1.3.0.rst @@ -147,7 +147,7 @@ For example: df = pd.DataFrame( {"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2020", periods=5, freq="1D") ) - + df df.rolling("2D", center=True).mean() From 1e724dcd382a3d481789704d70d6466e5f2866df Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 9 Apr 2021 13:56:24 +0200 Subject: [PATCH 53/54] additional output in centering example and comparison --- doc/source/user_guide/window.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 6f7d5b6ce242d..0674d2f0c018e 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -163,7 +163,8 @@ This can also be applied to datetime-like indices. df = pd.DataFrame( {"A": [0, 1, 2, 3, 4]}, index=pd.date_range("2020", periods=5, freq="1D") ) - + df + df.rolling("2D", center=False).mean() df.rolling("2D", center=True).mean() From 47a3b141b196b3a2acbb65a2db3e208bcd863801 Mon Sep 17 00:00:00 2001 From: Lucas Date: Fri, 9 Apr 2021 14:14:36 +0200 Subject: [PATCH 54/54] add version tag 1.3 --- doc/source/user_guide/window.rst | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/source/user_guide/window.rst b/doc/source/user_guide/window.rst index 0674d2f0c018e..5efb3f40f5018 100644 --- a/doc/source/user_guide/window.rst +++ b/doc/source/user_guide/window.rst @@ -156,8 +156,9 @@ By default the labels are set to the right edge of the window, but a s.rolling(window=5).mean() s.rolling(window=5, center=True).mean() -This can also be applied to datetime-like indices. +This can also be applied to datetime-like indices. +.. versionadded:: 1.3 .. ipython:: python df = pd.DataFrame(