diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py index babead9ca97..58e2241e810 100644 --- a/python/cudf/cudf/core/_base_index.py +++ b/python/cudf/cudf/core/_base_index.py @@ -1096,12 +1096,12 @@ def difference(self, other, sort=None): other = cudf.Index(other, name=getattr(other, "name", self.name)) if not len(other): - res = self._get_reconciled_name_object(other) + res = self._get_reconciled_name_object(other).unique() if sort: return res.sort_values() return res elif self.equals(other): - res = self[:0]._get_reconciled_name_object(other) + res = self[:0]._get_reconciled_name_object(other).unique() if sort: return res.sort_values() return res diff --git a/python/cudf/cudf/tests/test_index.py b/python/cudf/cudf/tests/test_index.py index 7a190fb428a..3cbfea8063f 100644 --- a/python/cudf/cudf/tests/test_index.py +++ b/python/cudf/cudf/tests/test_index.py @@ -15,7 +15,7 @@ import cudf from cudf.api.extensions import no_default from cudf.api.types import is_bool_dtype -from cudf.core._compat import PANDAS_GE_200 +from cudf.core._compat import PANDAS_GE_200, PANDAS_GE_220 from cudf.core.index import ( CategoricalIndex, DatetimeIndex, @@ -797,9 +797,26 @@ def test_index_to_series(data): "name_data,name_other", [("abc", "c"), (None, "abc"), ("abc", pd.NA), ("abc", "abc")], ) -def test_index_difference(data, other, sort, name_data, name_other): +def test_index_difference(request, data, other, sort, name_data, name_other): pd_data = pd.Index(data, name=name_data) pd_other = pd.Index(other, name=name_other) + request.applymarker( + pytest.mark.xfail( + condition=PANDAS_GE_220 + and isinstance(pd_data.dtype, pd.CategoricalDtype) + and not isinstance(pd_other.dtype, pd.CategoricalDtype) + and pd_other.isnull().any(), + reason="https://github.com/pandas-dev/pandas/issues/57318", + ) + ) + request.applymarker( + pytest.mark.xfail( + condition=not PANDAS_GE_220 + and len(pd_other) == 0 + and len(pd_data) != len(pd_data.unique()), + reason="Bug fixed in pandas-2.2+", + ) + ) gd_data = cudf.from_pandas(pd_data) gd_other = cudf.from_pandas(pd_other)