Skip to content
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ Bug fixes
- Fixed bug in :meth:`Series.diff` allowing non-integer values for the ``periods`` argument. (:issue:`56607`)
- Fixed bug in :meth:`Series.rank` that doesn't preserve missing values for nullable integers when ``na_option='keep'``. (:issue:`56976`)
- Fixed bug in :meth:`Series.replace` and :meth:`DataFrame.replace` inconsistently replacing matching instances when ``regex=True`` and missing values are present. (:issue:`56599`)
- Fixed bug in :meth:`Series.kurt` with low variance arrays getting zeroed out even when numerically stable (:issue:`57972`)

Categorical
^^^^^^^^^^^
Expand Down
16 changes: 10 additions & 6 deletions pandas/core/nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1351,9 +1351,13 @@ def nankurt(
# floating point error
#
# #18044 in _libs/windows.pyx calc_kurt follow this behavior
# to fix the fperr to treat denom <1e-14 as zero
numerator = _zero_out_fperr(numerator)
denominator = _zero_out_fperr(denominator)
# to fix the fperr to treat denom <1e-14 as zero (default cutoff)
# GH-57972 set cutoff lower for low variance arrays to prevent cutoff of otherwise
# numerically stable values. Scipy.kurtosis and this implementation start
# diverging for examples with cutoffs below e-281
cutoff = 1e-281
numerator = _zero_out_fperr(numerator, cutoff)
denominator = _zero_out_fperr(denominator, cutoff)

if not isinstance(denominator, np.ndarray):
# if ``denom`` is a scalar, check these corner cases first before
Expand Down Expand Up @@ -1565,12 +1569,12 @@ def check_below_min_count(
return False


def _zero_out_fperr(arg):
def _zero_out_fperr(arg, cutoff=1e-14):
# #18044 reference this behavior to fix rolling skew/kurt issue
if isinstance(arg, np.ndarray):
return np.where(np.abs(arg) < 1e-14, 0, arg)
return np.where(np.abs(arg) < cutoff, 0, arg)
else:
return arg.dtype.type(0) if np.abs(arg) < 1e-14 else arg
return arg.dtype.type(0) if np.abs(arg) < cutoff else arg


@disallow("M8", "m8")
Expand Down
15 changes: 15 additions & 0 deletions pandas/tests/test_nanops.py
Original file line number Diff line number Diff line change
Expand Up @@ -1105,6 +1105,21 @@ def test_nans_skipna(self, samples, actual_kurt):
kurt = nanops.nankurt(samples, skipna=True)
tm.assert_almost_equal(kurt, actual_kurt)

def test_arrays_with_low_variance(self):
# GH-57972
# sample arrays with low variance have a lower threshold for breakdown
# of numerical stability and should be handled accordingly
n = 10_000
n2 = 10
# scipy.kurt is nan at e-81,
# both kurtosis start diverging from each other around e-76
scale = 1e-72
low_var = np.array([-2.3 * scale] * n2 + [-4.1 * scale] * n2 + [0.0] * n)
# calculated with scipy.status kurtosis(low_var_samples, bias=False)
scipy_kurt = 632.556235239126
kurt = nanops.nankurt(low_var)
tm.assert_almost_equal(kurt, scipy_kurt)

@property
def prng(self):
return np.random.default_rng(2)
Expand Down