pandas-dev · jreback · Apr 27, 2020 · Apr 26, 2020 · Apr 26, 2020 · Apr 26, 2020
diff --git a/doc/source/whatsnew/v1.1.0.rst b/doc/source/whatsnew/v1.1.0.rst
@@ -569,7 +569,7 @@ Numeric
 - Bug in :meth:`DataFrame.mean` with ``numeric_only=False`` and either ``datetime64`` dtype or ``PeriodDtype`` column incorrectly raising ``TypeError`` (:issue:`32426`)
 - Bug in :meth:`DataFrame.count` with ``level="foo"`` and index level ``"foo"`` containing NaNs causes segmentation fault (:issue:`21824`)
 - Bug in :meth:`DataFrame.diff` with ``axis=1`` returning incorrect results with mixed dtypes (:issue:`32995`)
--
+- Bug in :meth:`DataFrame.corr` and :meth:`DataFrame.cov` raising when handling nullable integer columns with ``pandas.NA`` (:issue:`33803`)
 
 Conversion
 ^^^^^^^^^^

diff --git a/pandas/core/frame.py b/pandas/core/frame.py
@@ -84,7 +84,6 @@
     validate_numeric_casting,
 )
 from pandas.core.dtypes.common import (
-    ensure_float64,
     ensure_int64,
     ensure_platform_int,
     infer_dtype_from_object,
@@ -7871,16 +7870,16 @@ def corr(self, method="pearson", min_periods=1) -> "DataFrame":
         numeric_df = self._get_numeric_data()
         cols = numeric_df.columns
         idx = cols.copy()
-        mat = numeric_df.values
+        mat = numeric_df.astype(float).to_numpy()
 
         if method == "pearson":
-            correl = libalgos.nancorr(ensure_float64(mat), minp=min_periods)
+            correl = libalgos.nancorr(mat, minp=min_periods)
         elif method == "spearman":
-            correl = libalgos.nancorr_spearman(ensure_float64(mat), minp=min_periods)
+            correl = libalgos.nancorr_spearman(mat, minp=min_periods)
         elif method == "kendall" or callable(method):
             if min_periods is None:
                 min_periods = 1
-            mat = ensure_float64(mat).T
+            mat = mat.T
             corrf = nanops.get_corr_func(method)
             K = len(cols)
             correl = np.empty((K, K), dtype=float)
@@ -8006,19 +8005,19 @@ def cov(self, min_periods=None) -> "DataFrame":
         numeric_df = self._get_numeric_data()
         cols = numeric_df.columns
         idx = cols.copy()
-        mat = numeric_df.values
+        mat = numeric_df.astype(float).to_numpy()
 
         if notna(mat).all():
             if min_periods is not None and min_periods > len(mat):
-                baseCov = np.empty((mat.shape[1], mat.shape[1]))
-                baseCov.fill(np.nan)
+                base_cov = np.empty((mat.shape[1], mat.shape[1]))
+                base_cov.fill(np.nan)
             else:
-                baseCov = np.cov(mat.T)
-            baseCov = baseCov.reshape((len(cols), len(cols)))
+                base_cov = np.cov(mat.T)
+            base_cov = base_cov.reshape((len(cols), len(cols)))
         else:
-            baseCov = libalgos.nancorr(ensure_float64(mat), cov=True, minp=min_periods)
+            base_cov = libalgos.nancorr(mat, cov=True, minp=min_periods)
 
-        return self._constructor(baseCov, index=idx, columns=cols)
+        return self._constructor(base_cov, index=idx, columns=cols)
 
     def corrwith(self, other, axis=0, drop=False, method="pearson") -> Series:
         """

diff --git a/pandas/tests/frame/methods/test_cov_corr.py b/pandas/tests/frame/methods/test_cov_corr.py
@@ -58,6 +58,17 @@ def test_cov(self, float_frame, float_string_frame):
         )
         tm.assert_frame_equal(result, expected)
 
+    @pytest.mark.parametrize(
+        "other_column", [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0])]
+    )
+    def test_cov_nullable_integer(self, other_column):
+        # https://github.com/pandas-dev/pandas/issues/33803
+        data = pd.DataFrame({"a": pd.array([1, 2, None]), "b": other_column})
+        result = data.cov()
+        arr = np.array([[0.5, 0.5], [0.5, 1.0]])
+        expected = pd.DataFrame(arr, columns=["a", "b"], index=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+
 
 class TestDataFrameCorr:
     # DataFrame.corr(), as opposed to DataFrame.corrwith
@@ -153,6 +164,22 @@ def test_corr_int(self):
         df3.cov()
         df3.corr()
 
+    @td.skip_if_no_scipy
+    @pytest.mark.parametrize(
+        "nullable_column", [pd.array([1, 2, 3]), pd.array([1, 2, None])]
+    )
+    @pytest.mark.parametrize(
+        "other_column",
+        [pd.array([1, 2, 3]), np.array([1.0, 2.0, 3.0]), np.array([1.0, 2.0, np.nan])],
+    )
+    @pytest.mark.parametrize("method", ["pearson", "spearman", "kendall"])
+    def test_corr_nullable_integer(self, nullable_column, other_column, method):
+        # https://github.com/pandas-dev/pandas/issues/33803
+        data = pd.DataFrame({"a": nullable_column, "b": other_column})
+        result = data.corr(method=method)
+        expected = pd.DataFrame(np.ones((2, 2)), columns=["a", "b"], index=["a", "b"])
+        tm.assert_frame_equal(result, expected)
+
 
 class TestDataFrameCorrWith:
     def test_corrwith(self, datetime_frame):