Skip to content
Merged
1 change: 1 addition & 0 deletions doc/source/whatsnew/v3.0.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -673,6 +673,7 @@ Timezones

Numeric
^^^^^^^
- Bug in :meth:`DataFrame.corr` where numerical precision errors resulted in correlations above ``1.0`` (:issue:`61120`)
- Bug in :meth:`DataFrame.quantile` where the column type was not preserved when ``numeric_only=True`` with a list-like ``q`` produced an empty result (:issue:`59035`)
- Bug in ``np.matmul`` with :class:`Index` inputs raising a ``TypeError`` (:issue:`57079`)

Expand Down
11 changes: 8 additions & 3 deletions pandas/_libs/algos.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -353,10 +353,9 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
float64_t[:, ::1] result
uint8_t[:, :] mask
int64_t nobs = 0
float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy
float64_t vx, vy, dx, dy, meanx, meany, divisor, ssqdmx, ssqdmy, covxy, val

N, K = (<object>mat).shape

if minp is None:
minpv = 1
else:
Expand Down Expand Up @@ -389,8 +388,14 @@ def nancorr(const float64_t[:, :] mat, bint cov=False, minp=None):
else:
divisor = (nobs - 1.0) if cov else sqrt(ssqdmx * ssqdmy)

# clip `covxy / divisor` to ensure coeff is within bounds
if divisor != 0:
result[xi, yi] = result[yi, xi] = covxy / divisor
val = covxy / divisor
if val > 1.0:
val = 1.0
elif val < -1.0:
val = -1.0
result[xi, yi] = result[yi, xi] = val
else:
result[xi, yi] = result[yi, xi] = NaN

Expand Down
12 changes: 12 additions & 0 deletions pandas/tests/frame/methods/test_cov_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -485,3 +485,15 @@ def test_corrwith_min_periods_boolean(self):
result = df_bool.corrwith(ser_bool, min_periods=3)
expected = Series([0.57735, 0.57735], index=["A", "B"])
tm.assert_series_equal(result, expected)

def test_corr_within_bounds(self):
df1 = DataFrame({"x": [0, 1], "y": [1.35951, 1.3595100000000007]})
result1 = df1.corr().max().max()
expected1 = 1.0
tm.assert_equal(result1, expected1)

rng = np.random.default_rng(seed=42)
df2 = DataFrame(rng.random((100, 4)))
corr_matrix = df2.corr()
assert corr_matrix.min().min() >= -1.0
assert corr_matrix.max().max() <= 1.0
Loading