Skip to content

Commit

Permalink
Return Interval object in pandas compat mode for IntervalIndex reduct…
Browse files Browse the repository at this point in the history
…ions (#16523)

xref #16507

In non pandas compat mode, I think this still makes sense to return a `dict` since that's the "scalar" type of a cudf struct/interval type, but in pandas compat mode we should match pandas and return an Interval.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: #16523
  • Loading branch information
mroeschke authored Aug 15, 2024
1 parent f4a9b1c commit 1e220b7
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 1 deletion.
6 changes: 5 additions & 1 deletion python/cudf/cudf/_lib/reduce.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,11 @@ def reduce(reduction_op, Column incol, dtype=None, **kwargs):
result,
dtype=col_dtype.__class__(precision, scale),
).value
return DeviceScalar.from_pylibcudf(result).value
scalar = DeviceScalar.from_pylibcudf(result).value
if isinstance(col_dtype, cudf.StructDtype):
# TODO: Utilize column_metadata in libcudf to maintain field labels
return dict(zip(col_dtype.fields.keys(), scalar.values()))
return scalar


@acquire_spill_lock()
Expand Down
14 changes: 14 additions & 0 deletions python/cudf/cudf/core/column/interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
from cudf.core.dtypes import IntervalDtype

if TYPE_CHECKING:
from cudf._typing import ScalarLike
from cudf.core.column import ColumnBase


Expand Down Expand Up @@ -186,3 +187,16 @@ def element_indexing(self, index: int):
if cudf.get_option("mode.pandas_compatible"):
return pd.Interval(**result, closed=self.dtype.closed)
return result

def _reduce(
self,
op: str,
skipna: bool | None = None,
min_count: int = 0,
*args,
**kwargs,
) -> ScalarLike:
result = super()._reduce(op, skipna, min_count, *args, **kwargs)
if cudf.get_option("mode.pandas_compatible"):
return pd.Interval(**result, closed=self.dtype.closed)
return result
11 changes: 11 additions & 0 deletions python/cudf/cudf/tests/test_interval.py
Original file line number Diff line number Diff line change
Expand Up @@ -194,3 +194,14 @@ def test_intervaldtype_eq_string_with_attributes():
dtype = cudf.IntervalDtype("int64", closed="left")
assert dtype == "interval"
assert dtype == "interval[int64, left]"


def test_reduction_return_interval_pandas_compatible():
ii = pd.IntervalIndex.from_tuples(
[("2017-01-03", "2017-01-04")], dtype="interval[datetime64[ns], right]"
)
cudf_ii = cudf.IntervalIndex.from_pandas(ii)
with cudf.option_context("mode.pandas_compatible", True):
result = cudf_ii.min()
expected = ii.min()
assert result == expected

0 comments on commit 1e220b7

Please sign in to comment.