Skip to content
Merged
Show file tree
Hide file tree
Changes from 8 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion doc/source/whatsnew/v1.1.0.rst
Original file line number Diff line number Diff line change
Expand Up @@ -569,7 +569,7 @@ Numeric
- Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`)
- Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`)
- Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`)
-
- Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`)

Conversion
^^^^^^^^^^
Expand Down
23 changes: 11 additions & 12 deletions pandas/core/frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,6 @@
validate_numeric_casting,
)
from pandas.core.dtypes.common import (
ensure_float64,
ensure_int64,
ensure_platform_int,
infer_dtype_from_object,
Expand Down Expand Up @@ -7871,16 +7870,16 @@ def corr(self, method="pearson", min_periods=1) -> "DataFrame":
numeric_df = self._get_numeric_data()
cols = numeric_df.columns
idx = cols.copy()
mat = numeric_df.values
mat = numeric_df.astype(float).to_numpy()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add copy=False here


if method == "pearson":
correl = libalgos.nancorr(ensure_float64(mat), minp=min_periods)
correl = libalgos.nancorr(mat, minp=min_periods)
elif method == "spearman":
correl = libalgos.nancorr_spearman(ensure_float64(mat), minp=min_periods)
correl = libalgos.nancorr_spearman(mat, minp=min_periods)
elif method == "kendall" or callable(method):
if min_periods is None:
min_periods = 1
mat = ensure_float64(mat).T
mat = mat.T
corrf = nanops.get_corr_func(method)
K = len(cols)
correl = np.empty((K, K), dtype=float)
Expand Down Expand Up @@ -8006,19 +8005,19 @@ def cov(self, min_periods=None) -> "DataFrame":
numeric_df = self._get_numeric_data()
cols = numeric_df.columns
idx = cols.copy()
mat = numeric_df.values
mat = numeric_df.astype(float).to_numpy()

if notna(mat).all():
if min_periods is not None and min_periods > len(mat):
baseCov = np.empty((mat.shape[1], mat.shape[1]))
baseCov.fill(np.nan)
base_cov = np.empty((mat.shape[1], mat.shape[1]))
base_cov.fill(np.nan)
else:
baseCov = np.cov(mat.T)
baseCov = baseCov.reshape((len(cols), len(cols)))
base_cov = np.cov(mat.T)
base_cov = base_cov.reshape((len(cols), len(cols)))
else:
baseCov = libalgos.nancorr(ensure_float64(mat), cov=True, minp=min_periods)
base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods)

return self._constructor(baseCov, index=idx, columns=cols)
return self._constructor(base_cov, index=idx, columns=cols)

def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series:
"""
Expand Down
27 changes: 27 additions & 0 deletions pandas/tests/frame/methods/test_cov_corr.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,17 @@ def test_cov(self, float_frame, float_string_frame):
)
tm.assert_frame_equal(result, expected)

@pytest.mark.parametrize(
"other_column", [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])]
)
def test_cov_nullable_integer(self, other_column):
# https://github.com/pandas-dev/pandas/issues/33803
data = pd.DataFrame({"a": pd.array([1, 2, None]), "b": other_column})
result = data.cov()
arr = np.array([[0.5, 0.5], [0.5, 1.0]])
expected = pd.DataFrame(arr, columns=["a", "b"], index=["a", "b"])
tm.assert_frame_equal(result, expected)


class TestDataFrameCorr:
# DataFrame.corr(), as opposed to DataFrame.corrwith
Expand Down Expand Up @@ -153,6 +164,22 @@ def test_corr_int(self):
df3.cov()
df3.corr()

@td.skip_if_no_scipy
@pytest.mark.parametrize(
"nullable_column", [pd.array([1, 2, 3]), pd.array([1, 2, None])]
)
@pytest.mark.parametrize(
"other_column",
[pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, np.nan])],
)
@pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"])
def test_corr_nullable_integer(self, nullable_column, other_column, method):
# https://github.com/pandas-dev/pandas/issues/33803
data = pd.DataFrame({"a": nullable_column, "b": other_column})
result = data.corr(method=method)
expected = pd.DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
tm.assert_frame_equal(result, expected)


class TestDataFrameCorrWith:
def test_corrwith(self, datetime_frame):
Expand Down