From 6e90ea585b840d7954c1ed76a3df684e0f91af28 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 13 Aug 2024 16:29:57 -0700 Subject: [PATCH] Initial commit --- python/cudf/cudf/__init__.py | 2 +- python/cudf/cudf/core/algorithms.py | 39 +++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index e14815a1b0d..a77ca32908e 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -24,7 +24,7 @@ register_series_accessor, ) from cudf.api.types import dtype -from cudf.core.algorithms import factorize +from cudf.core.algorithms import factorize, unique from cudf.core.cut import cut from cudf.core.dataframe import DataFrame, from_dataframe, from_pandas, merge from cudf.core.dtypes import ( diff --git a/python/cudf/cudf/core/algorithms.py b/python/cudf/cudf/core/algorithms.py index 6c69fbd2637..87e4e87f554 100644 --- a/python/cudf/cudf/core/algorithms.py +++ b/python/cudf/cudf/core/algorithms.py @@ -143,3 +143,42 @@ def _interpolation(column: ColumnBase, index: BaseIndex) -> ColumnBase: first_nan_idx = valid_locs.values.argmax().item() result[:first_nan_idx] = np.nan return as_column(result) + + +def unique(values): + """ + Return unique values from array-like + + Parameters + ---------- + values : 1d array-like + + Returns + ------- + cudf.Series, + + The return can be: + + * Index : when the input is an Index + * Categorical : when the input is a Categorical dtype + * cudf.Series : when the input is a Series + + Return numpy.ndarray or ExtensionArray. + + See Also + -------- + Index.unique : Return unique values from an Index. + Series.unique : Return unique values of Series object. + + Examples + -------- + >>> cudf.unique(cudf.Series([2, 1, 3, 3])) + Series([2, 1, 3]) + + >>> cudf.unique(pd.Series([2] + [1] * 5)) + Series([2, 1]) + """ + try: + return values.unique() + except Exception: + raise TypeError(f"Cannot call unique on type {type(values)}")