Skip to content

Commit

Permalink
Align Index APIs with pandas 2.x (#16361)
Browse files Browse the repository at this point in the history
Similar to #16310, the follow APIs have been modified to adjust/add parameters

* `to_flat_index`
* `isin`
* `unique`
* `transpose`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16361
  • Loading branch information
mroeschke authored Jul 29, 2024
1 parent 743e164 commit f8eb63e
Show file tree
Hide file tree
Showing 7 changed files with 78 additions and 16 deletions.
5 changes: 5 additions & 0 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,11 @@ def on_missing_reference(app, env, node, contnode):
("py:class", "ScalarLike"),
("py:class", "ParentType"),
("py:class", "ColumnLike"),
("py:class", "ColumnLike"),
("py:obj", "cudf.Index.transpose"),
("py:obj", "cudf.Index.T"),
("py:obj", "cudf.Index.to_flat_index"),
("py:obj", "cudf.MultiIndex.to_flat_index"),
# TODO: Remove this when we figure out why typing_extensions doesn't seem
# to map types correctly for intersphinx
("py:class", "typing_extensions.Self"),
Expand Down
25 changes: 23 additions & 2 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,24 @@ def to_numpy(self):
"""Convert to a numpy array."""
raise NotImplementedError

def to_flat_index(self) -> Self:
"""
Identity method.
This is implemented for compatibility with subclass implementations
when chaining.
Returns
-------
pd.Index
Caller.
See Also
--------
MultiIndex.to_flat_index : Subclass implementation.
"""
return self

def any(self):
"""
Return whether any elements is True in Index.
Expand Down Expand Up @@ -945,7 +963,7 @@ def to_pandas(self, *, nullable: bool = False, arrow_type: bool = False):
"""
raise NotImplementedError

def isin(self, values):
def isin(self, values, level=None):
"""Return a boolean array where the index values are in values.
Compute boolean array of whether each index value is found in
Expand All @@ -956,6 +974,9 @@ def isin(self, values):
----------
values : set, list-like, Index
Sought values.
level : str or int, optional
Name or position of the index level to use (if the index is a
`MultiIndex`).
Returns
-------
Expand All @@ -979,7 +1000,7 @@ def isin(self, values):
# ColumnBase.isin).
raise NotImplementedError

def unique(self):
def unique(self, level: int | None = None):
"""
Return unique values in the index.
Expand Down
24 changes: 20 additions & 4 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,12 @@ def memory_usage(self, deep: bool = False) -> int:
)
return 0

def unique(self) -> Self:
def unique(self, level: int | None = None) -> Self:
# RangeIndex always has unique values
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
return self.copy()

@_performance_tracking
Expand Down Expand Up @@ -964,7 +968,11 @@ def _indices_of(self, value) -> cudf.core.column.NumericalColumn:
i = []
return as_column(i, dtype=size_type_dtype)

def isin(self, values):
def isin(self, values, level=None):
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
if is_scalar(values):
raise TypeError(
"only list-like objects are allowed to be passed "
Expand Down Expand Up @@ -1616,12 +1624,20 @@ def append(self, other):

return self._concat(to_concat)

def unique(self):
def unique(self, level: int | None = None) -> Self:
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
return cudf.core.index._index_from_data(
{self.name: self._values.unique()}, name=self.name
)

def isin(self, values):
def isin(self, values, level=None):
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
if is_scalar(values):
raise TypeError(
"only list-like objects are allowed to be passed "
Expand Down
16 changes: 14 additions & 2 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,15 @@ def from_tuples(cls, tuples, sortorder: int | None = None, names=None):
def to_numpy(self):
return self.values_host

def to_flat_index(self):
"""
Convert a MultiIndex to an Index of Tuples containing the level values.
This is not currently implemented
"""
# TODO: Could implement as Index of ListDtype?
raise NotImplementedError("to_flat_index is not currently supported.")

@property # type: ignore
@_performance_tracking
def values_host(self):
Expand Down Expand Up @@ -1734,8 +1743,11 @@ def fillna(self, value):
return super().fillna(value=value)

@_performance_tracking
def unique(self):
return self.drop_duplicates(keep="first")
def unique(self, level: int | None = None) -> Self | cudf.Index:
if level is None:
return self.drop_duplicates(keep="first")
else:
return self.get_level_values(level).unique()

@_performance_tracking
def nunique(self, dropna: bool = True) -> int:
Expand Down
8 changes: 0 additions & 8 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2775,14 +2775,6 @@ def cov(self, other, min_periods=None, ddof: int | None = None):
f"{other.dtype}"
)

@_performance_tracking
def transpose(self):
"""Return the transpose, which is by definition self."""

return self

T = property(transpose, doc=transpose.__doc__)

@_performance_tracking
def duplicated(self, keep="first"):
"""
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,10 @@ def where(self, cond, other=None, inplace=False):
result = cudf._lib.copying.copy_if_else(input_col, other, cond)

return _make_categorical_like(result, self_column)

@_performance_tracking
def transpose(self):
"""Return the transpose, which is by definition self."""
return self

T = property(transpose, doc=transpose.__doc__)
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2170,3 +2170,12 @@ def test_bool_raises():
lfunc_args_and_kwargs=[[cudf.MultiIndex.from_arrays([range(1)])]],
rfunc_args_and_kwargs=[[pd.MultiIndex.from_arrays([range(1)])]],
)


def test_unique_level():
pd_mi = pd.MultiIndex.from_arrays([[1, 1, 2], [3, 3, 2]])
cudf_mi = cudf.MultiIndex.from_pandas(pd_mi)

result = pd_mi.unique(level=1)
expected = cudf_mi.unique(level=1)
assert_eq(result, expected)

0 comments on commit f8eb63e

Please sign in to comment.