From 39b42f98da532dad54b76ed62d936eb8ead3594d Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 1 Sep 2023 14:34:42 -0700 Subject: [PATCH] Raise NotImplementedError for Categoricals with timezones --- python/cudf/cudf/core/column/column.py | 31 ++++++++++++++++++++----- python/cudf/cudf/tests/test_datetime.py | 2 ++ python/cudf/cudf/tests/test_interval.py | 11 +++++---- 3 files changed, 33 insertions(+), 11 deletions(-) diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py index ad761ea8d18..9dde17a1045 100644 --- a/python/cudf/cudf/core/column/column.py +++ b/python/cudf/cudf/core/column/column.py @@ -2033,9 +2033,19 @@ def as_column( f"{arbitrary.dtype} is not supported. Convert first to " f"{arbitrary.dtype.subtype}." ) - if is_categorical_dtype(arbitrary): + if is_categorical_dtype(arbitrary.dtype): + if isinstance( + arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype + ): + raise NotImplementedError( + "cuDF does not yet support timezone-aware datetimes" + ) data = as_column(pa.array(arbitrary, from_pandas=True)) elif is_interval_dtype(arbitrary.dtype): + if isinstance(arbitrary.dtype.subtype, pd.DatetimeTZDtype): + raise NotImplementedError( + "cuDF does not yet support timezone-aware datetimes" + ) data = as_column(pa.array(arbitrary, from_pandas=True)) elif arbitrary.dtype == np.bool_: data = as_column(cupy.asarray(arbitrary), dtype=arbitrary.dtype) @@ -2262,11 +2272,20 @@ def as_column( elif isinstance(arbitrary, pd.core.arrays.masked.BaseMaskedArray): data = as_column(pa.Array.from_pandas(arbitrary), dtype=dtype) elif ( - isinstance(arbitrary, pd.DatetimeIndex) - and isinstance(arbitrary.dtype, pd.DatetimeTZDtype) - ) or ( - isinstance(arbitrary, pd.IntervalIndex) - and is_datetime64tz_dtype(arbitrary.dtype.subtype) + ( + isinstance(arbitrary, pd.DatetimeIndex) + and isinstance(arbitrary.dtype, pd.DatetimeTZDtype) + ) + or ( + isinstance(arbitrary, pd.IntervalIndex) + and is_datetime64tz_dtype(arbitrary.dtype.subtype) + ) + or ( + isinstance(arbitrary, pd.CategoricalIndex) + and isinstance( + arbitrary.dtype.categories.dtype, pd.DatetimeTZDtype + ) + ) ): raise NotImplementedError( "cuDF does not yet support timezone-aware datetimes" diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py index abcc057f823..b1685950241 100644 --- a/python/cudf/cudf/tests/test_datetime.py +++ b/python/cudf/cudf/tests/test_datetime.py @@ -2095,6 +2095,8 @@ def test_construction_from_tz_timestamps(data): _ = cudf.Index(data) with pytest.raises(NotImplementedError): _ = cudf.DatetimeIndex(data) + with pytest.raises(NotImplementedError): + cudf.CategoricalIndex(data) @pytest.mark.parametrize("op", _cmpops) diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py index 9704be44b95..a27de60c2c5 100644 --- a/python/cudf/cudf/tests/test_interval.py +++ b/python/cudf/cudf/tests/test_interval.py @@ -167,17 +167,18 @@ def test_interval_index_unique(): assert_eq(expected, actual) +@pytest.mark.parametrize("box", [pd.Series, pd.IntervalIndex]) @pytest.mark.parametrize("tz", ["US/Eastern", None]) -def test_interval_with_datetime(tz): +def test_interval_with_datetime(tz, box): dti = pd.date_range( start=pd.Timestamp("20180101", tz=tz), end=pd.Timestamp("20181231", tz=tz), freq="M", ) - pidx = pd.IntervalIndex.from_breaks(dti) + pobj = box(pd.IntervalIndex.from_breaks(dti)) if tz is None: - gidx = cudf.from_pandas(pidx) - assert_eq(pidx, gidx) + gobj = cudf.from_pandas(pobj) + assert_eq(pobj, gobj) else: with pytest.raises(NotImplementedError): - cudf.from_pandas(pidx) + cudf.from_pandas(pobj)