rapidsai · rapids-bot · Sep 17, 2024 · Sep 10, 2024 · Sep 11, 2024 · Sep 11, 2024
@@ -49,8 +49,24 @@ def to_dask_dataframe(self, **kwargs):
 
         return self.to_backend("pandas", **kwargs)
 
+    def _prepare_cov_corr(self, min_periods, numeric_only):
+        # Upstream version of this method sets min_periods
+        # to 2 by default (which is not supported by cudf)
+        # TODO: Remove when cudf supports both min_periods
+        # and numeric_only
+        # See: https://github.com/rapidsai/cudf/issues/12626
+        # See: https://github.com/rapidsai/cudf/issues/9009
+        self._meta.cov(min_periods=min_periods)
+
+        frame = self
+        if numeric_only:
+            numerics = self._meta._get_numeric_data()
+            if len(numerics.columns) != len(self.columns):
+                frame = frame[list(numerics.columns)]
+        return frame, min_periods
+
     # var can be removed if cudf#15179 is addressed.
-    # See: https://github.com/rapidsai/cudf/issues/15179
+    # See: https://github.com/rapidsai/cudf/issues/14935
     def var(
         self,
         axis=0,

@@ -1007,3 +1007,20 @@ def test_to_backend_simplify():
         df2 = df.to_backend("cudf")[["y"]].simplify()
         df3 = df[["y"]].to_backend("cudf").to_backend("cudf").simplify()
         assert df2._name == df3._name
+
+
+@pytest.mark.parametrize("numeric_only", [True, False])
+@pytest.mark.parametrize("op", ["corr", "cov"])
+def test_cov_corr(op, numeric_only):
+    df = cudf.DataFrame.from_dict(
+        {
+            "x": np.random.randint(0, 5, size=10),
+            "y": np.random.normal(size=10),
+        }
+    )
+    ddf = dd.from_pandas(df, npartitions=2)
+    res = getattr(ddf, op)(numeric_only=numeric_only)
+    # Use to_pandas until cudf supports numeric_only
+    # (See: https://github.com/rapidsai/cudf/issues/12626)
+    expect = getattr(df.to_pandas(), op)(numeric_only=numeric_only)
+    dd.assert_eq(res, expect)