From 5681c840c05f53425c4f181a9d7a813aa5196190 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 31 Jul 2024 11:29:25 -0700 Subject: [PATCH 01/12] Inital commit --- python/cudf/cudf/__init__.py | 3 +- python/cudf/cudf/core/timestamp.py | 67 +++++++++++++++++++++ python/cudf/cudf/pandas/_wrappers/pandas.py | 6 +- 3 files changed, 72 insertions(+), 4 deletions(-) create mode 100644 python/cudf/cudf/core/timestamp.py diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index e14815a1b0d..8b1e91f20d4 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -60,6 +60,7 @@ ) from cudf.core.scalar import Scalar from cudf.core.series import Series, isclose +from cudf.core.timestamp import Timestamp from cudf.core.tools.datetimes import DateOffset, date_range, to_datetime from cudf.core.tools.numeric import to_numeric from cudf.io import ( @@ -110,7 +111,7 @@ "Series", "StructDtype", "TimedeltaIndex", - "api", + "Timestamp" "api", "concat", "crosstab", "cut", diff --git a/python/cudf/cudf/core/timestamp.py b/python/cudf/cudf/core/timestamp.py new file mode 100644 index 00000000000..6a9f1e178c1 --- /dev/null +++ b/python/cudf/cudf/core/timestamp.py @@ -0,0 +1,67 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +import numpy as np +import pandas as pd + +from cudf.core.scalar import Scalar + + +class Timestamp: + def __init__( + self, + *args, + **kwargs, + ): + pd_ts_kwargs = {k: v for k, v in kwargs.items() if k != "dtype"} + ts = pd.Timestamp(*args, **pd_ts_kwargs) + self._scalar = Scalar(ts, dtype=kwargs.get("dtype")) + + @property + def value(self) -> int: + return pd.Timestamp(self._scalar.value).value + + @property + def year(self) -> int: + return pd.Timestamp(self._scalar.value).year + + @property + def month(self) -> int: + return pd.Timestamp(self._scalar.value).month + + @property + def day(self) -> int: + return pd.Timestamp(self._scalar.value).month + + @property + def hour(self) -> int: + return pd.Timestamp(self._scalar.value).hour + + @property + def minute(self) -> int: + return pd.Timestamp(self._scalar.value).minute + + @property + def second(self) -> int: + return pd.Timestamp(self._scalar.value).second + + @property + def microsecond(self) -> int: + return pd.Timestamp(self._scalar.value).microsecond + + @property + def nanosecond(self) -> int: + return pd.Timestamp(self._scalar.value).nanosecond + + def __repr__(self): + return pd.Timestamp(self._scalar._host_value).__repr__() + + @property + def asm8(self) -> np.datetime64: + return self._scalar.value + + def to_pandas(self): + return pd.Timestamp(self._scalar.value) + + @classmethod + def from_pandas(cls, obj: pd.Timestamp): + return cls(obj) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 478108f36f1..e96f96c4cb4 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -154,10 +154,10 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): Timestamp = make_final_proxy_type( "Timestamp", - _Unusable, + cudf.Timestamp, pd.Timestamp, - fast_to_slow=_Unusable(), - slow_to_fast=_Unusable(), + fast_to_slow=lambda fast: fast.to_pandas(), + slow_to_fast=cudf.from_pandas, additional_attributes={ "__hash__": _FastSlowAttribute("__hash__"), "__new__": Timestamp_Timedelta__new__, From 48bbdbb08d820e1eafeb0e76fc289023cc8c96a0 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 31 Jul 2024 11:36:22 -0700 Subject: [PATCH 02/12] Fix typo --- python/cudf/cudf/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/__init__.py b/python/cudf/cudf/__init__.py index 8b1e91f20d4..0196dd72c04 100644 --- a/python/cudf/cudf/__init__.py +++ b/python/cudf/cudf/__init__.py @@ -111,7 +111,8 @@ "Series", "StructDtype", "TimedeltaIndex", - "Timestamp" "api", + "Timestamp", + "api", "concat", "crosstab", "cut", From f88bae77d40a79e937c1f937f713570b5fabae45 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 1 Aug 2024 05:46:12 -0700 Subject: [PATCH 03/12] Inherit from Scalar --- python/cudf/cudf/core/scalar.py | 8 +++--- python/cudf/cudf/core/timestamp.py | 28 +++++++++---------- .../cudf/cudf_pandas_tests/test_profiler.py | 2 +- 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index f6331aa1f49..2300beb0658 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -42,11 +42,11 @@ def __init__(self, names, bases, attrs, maxsize=128): self.__maxsize = maxsize self.__instances = OrderedDict() - def __call__(self, value, dtype=None): + def __call__(self, *args, **kwargs): # the cache key is constructed from the arguments, and also # the _types_ of the arguments, since objects of different # types can compare equal - cache_key = (value, type(value), dtype, type(dtype)) + cache_key = (args, kwargs) try: # try retrieving an instance from the cache: self.__instances.move_to_end(cache_key) @@ -54,7 +54,7 @@ def __call__(self, value, dtype=None): except KeyError: # if an instance couldn't be found in the cache, # construct it and add to cache: - obj = super().__call__(value, dtype=dtype) + obj = super().__call__(*args, **kwargs) try: self.__instances[cache_key] = obj except TypeError: @@ -65,7 +65,7 @@ def __call__(self, value, dtype=None): return obj except TypeError: # couldn't hash the arguments, don't cache: - return super().__call__(value, dtype=dtype) + return super().__call__(*args, **kwargs) def _clear_instance_cache(self): self.__instances.clear() diff --git a/python/cudf/cudf/core/timestamp.py b/python/cudf/cudf/core/timestamp.py index 6a9f1e178c1..1cdc6f65029 100644 --- a/python/cudf/cudf/core/timestamp.py +++ b/python/cudf/cudf/core/timestamp.py @@ -6,7 +6,7 @@ from cudf.core.scalar import Scalar -class Timestamp: +class Timestamp(Scalar): def __init__( self, *args, @@ -14,53 +14,53 @@ def __init__( ): pd_ts_kwargs = {k: v for k, v in kwargs.items() if k != "dtype"} ts = pd.Timestamp(*args, **pd_ts_kwargs) - self._scalar = Scalar(ts, dtype=kwargs.get("dtype")) + super().__init__(ts) @property def value(self) -> int: - return pd.Timestamp(self._scalar.value).value + return pd.Timestamp(super().value).value @property def year(self) -> int: - return pd.Timestamp(self._scalar.value).year + return pd.Timestamp(super().value).year @property def month(self) -> int: - return pd.Timestamp(self._scalar.value).month + return pd.Timestamp(super().value).month @property def day(self) -> int: - return pd.Timestamp(self._scalar.value).month + return pd.Timestamp(super().value).day @property def hour(self) -> int: - return pd.Timestamp(self._scalar.value).hour + return pd.Timestamp(super().value).hour @property def minute(self) -> int: - return pd.Timestamp(self._scalar.value).minute + return pd.Timestamp(super().value).minute @property def second(self) -> int: - return pd.Timestamp(self._scalar.value).second + return pd.Timestamp(super().value).second @property def microsecond(self) -> int: - return pd.Timestamp(self._scalar.value).microsecond + return pd.Timestamp(super().value).microsecond @property def nanosecond(self) -> int: - return pd.Timestamp(self._scalar.value).nanosecond + return pd.Timestamp(super().value).nanosecond def __repr__(self): - return pd.Timestamp(self._scalar._host_value).__repr__() + return pd.Timestamp(self.value).__repr__() @property def asm8(self) -> np.datetime64: - return self._scalar.value + return super().value def to_pandas(self): - return pd.Timestamp(self._scalar.value) + return pd.Timestamp(super().value) @classmethod def from_pandas(cls, obj: pd.Timestamp): diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index 588398265f2..d3516a379e5 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -42,7 +42,7 @@ def test_profiler(): for name, func in per_function_stats.items(): assert ( len(func["cpu"]) == 0 - if "Time" not in name + if "Timedelta" not in name else len(func["gpu"]) == 0 ) From e34bf9a63cd428911b3a550c085ea1a9bc5ea8eb Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 7 Aug 2024 15:48:05 -0700 Subject: [PATCH 04/12] Add some more methods --- python/cudf/cudf/core/timestamp.py | 35 ++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/timestamp.py b/python/cudf/cudf/core/timestamp.py index 1cdc6f65029..6c0b568defb 100644 --- a/python/cudf/cudf/core/timestamp.py +++ b/python/cudf/cudf/core/timestamp.py @@ -1,5 +1,9 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from __future__ import annotations + +from datetime import datetime, timedelta + import numpy as np import pandas as pd @@ -17,8 +21,8 @@ def __init__( super().__init__(ts) @property - def value(self) -> int: - return pd.Timestamp(super().value).value + def value(self) -> np.datetime64: + return super().value @property def year(self) -> int: @@ -65,3 +69,30 @@ def to_pandas(self): @classmethod def from_pandas(cls, obj: pd.Timestamp): return cls(obj) + + @classmethod + def from_scalar(cls, obj: Scalar): + return cls(obj.value) + + def _to_scalar(self): + return Scalar(self.value) + + def __add__(self, other: timedelta | np.timedelta64): + return self.from_scalar(self._to_scalar() + other) + + def __radd__(self, other: timedelta): + return self + other + + def __sub__( + self, other: datetime | timedelta | np.timedelta64 + ) -> pd.Timedelta: + if isinstance(other, datetime): + return pd.Timedelta(self.value - other) + elif isinstance(other, self.__class__): + return pd.Timedelta(self.value - other.value) + elif isinstance(other, (timedelta, np.timedelta64)): + return self.from_scalar(self._to_scalar() - other) + else: + raise TypeError( + f"Subtraction not supported between types {type(self)} and {type(other)}" + ) From 9f083bea6a9cf7e7158e5183d9fc71d41ebba613 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 14 Aug 2024 20:37:01 -0700 Subject: [PATCH 05/12] create new cache key --- python/cudf/cudf/core/scalar.py | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index 2300beb0658..4cb35001cba 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -18,6 +18,19 @@ ) +def generate_arg_type_tuple(args, kwargs): + result = [] + if args != (): + for arg in args: + result.extend([arg, type(arg)]) + + if kwargs != {}: + for kwarg, value in kwargs.items(): + result.extend([value, type(value)]) + + return tuple(result) + + # Note that the metaclass below can easily be generalized for use with # other classes, if needed in the future. Simply replace the arguments # of the `__call__` method with `*args` and `**kwargs`. This will @@ -46,7 +59,7 @@ def __call__(self, *args, **kwargs): # the cache key is constructed from the arguments, and also # the _types_ of the arguments, since objects of different # types can compare equal - cache_key = (args, kwargs) + cache_key = generate_arg_type_tuple(args, kwargs) try: # try retrieving an instance from the cache: self.__instances.move_to_end(cache_key) From 1c85ad9830909394807217d632c06f3c715a3209 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 15 Aug 2024 06:46:17 -0700 Subject: [PATCH 06/12] Add missing attrs and methods to timestamp --- python/cudf/cudf/core/scalar.py | 11 +- python/cudf/cudf/core/timestamp.py | 291 ++++++++++++++++++ .../cudf/cudf_pandas_tests/test_profiler.py | 2 +- 3 files changed, 298 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/core/scalar.py b/python/cudf/cudf/core/scalar.py index 4cb35001cba..a4942440735 100644 --- a/python/cudf/cudf/core/scalar.py +++ b/python/cudf/cudf/core/scalar.py @@ -16,6 +16,7 @@ get_allowed_combinations_for_operator, to_cudf_compatible_scalar, ) +from cudf.utils.performance_tracking import _performance_tracking def generate_arg_type_tuple(args, kwargs): @@ -31,11 +32,10 @@ def generate_arg_type_tuple(args, kwargs): return tuple(result) -# Note that the metaclass below can easily be generalized for use with -# other classes, if needed in the future. Simply replace the arguments -# of the `__call__` method with `*args` and `**kwargs`. This will -# result in additional overhead when constructing the cache key, as -# unpacking *args and **kwargs is not cheap. See the discussion in +# The cache key is generated from args and kwargs so that the cache can support +# objects that inherit from Scalar. This will result in additional overhead +# when constructing the cache key, as unpacking *args and **kwargs is not cheap. +# For now, we'll track the performance of __call__. See the discussion in # https://github.com/rapidsai/cudf/pull/11246#discussion_r955843532 # for details. class CachedScalarInstanceMeta(type): @@ -55,6 +55,7 @@ def __init__(self, names, bases, attrs, maxsize=128): self.__maxsize = maxsize self.__instances = OrderedDict() + @_performance_tracking def __call__(self, *args, **kwargs): # the cache key is constructed from the arguments, and also # the _types_ of the arguments, since objects of different diff --git a/python/cudf/cudf/core/timestamp.py b/python/cudf/cudf/core/timestamp.py index 6c0b568defb..57b99c9b655 100644 --- a/python/cudf/cudf/core/timestamp.py +++ b/python/cudf/cudf/core/timestamp.py @@ -96,3 +96,294 @@ def __sub__( raise TypeError( f"Subtraction not supported between types {type(self)} and {type(other)}" ) + + @property + def as_unit(self): + raise NotImplementedError( + "The attribute 'as_unit' is not implemented." + ) + + @property + def day_of_week(self): + raise NotImplementedError( + "The attribute 'day_of_week' is not implemented." + ) + + @property + def day_of_year(self): + raise NotImplementedError( + "The attribute 'day_of_year' is not implemented." + ) + + @property + def dayofweek(self): + raise NotImplementedError( + "The attribute 'dayofweek' is not implemented." + ) + + @property + def dayofyear(self): + raise NotImplementedError( + "The attribute 'dayofyear' is not implemented." + ) + + @property + def days_in_month(self): + raise NotImplementedError( + "The attribute 'days_in_month' is not implemented." + ) + + @property + def daysinmonth(self): + raise NotImplementedError( + "The attribute 'daysinmonth' is not implemented." + ) + + @property + def fold(self): + raise NotImplementedError("The attribute 'fold' is not implemented.") + + @property + def is_leap_year(self): + raise NotImplementedError( + "The attribute 'is_leap_year' is not implemented." + ) + + @property + def is_month_end(self): + raise NotImplementedError( + "The attribute 'is_month_end' is not implemented." + ) + + @property + def is_month_start(self): + raise NotImplementedError( + "The attribute 'is_month_start' is not implemented." + ) + + @property + def is_quarter_end(self): + raise NotImplementedError( + "The attribute 'is_quarter_end' is not implemented." + ) + + @property + def is_quarter_start(self): + raise NotImplementedError( + "The attribute 'is_quarter_start' is not implemented." + ) + + @property + def is_year_end(self): + raise NotImplementedError( + "The attribute 'is_year_end' is not implemented." + ) + + @property + def is_year_start(self): + raise NotImplementedError( + "The attribute 'is_year_start' is not implemented." + ) + + @property + def min(self): + raise NotImplementedError("The attribute 'min' is not implemented.") + + @property + def quarter(self): + raise NotImplementedError( + "The attribute 'quarter' is not implemented." + ) + + @property + def resolution(self): + raise NotImplementedError( + "The attribute 'resolution' is not implemented." + ) + + @property + def timestamp(self): + raise NotImplementedError( + "The attribute 'timestamp' is not implemented." + ) + + @property + def tz(self): + raise NotImplementedError("The attribute 'tz' is not implemented.") + + @property + def tzinfo(self): + raise NotImplementedError("The attribute 'tzinfo' is not implemented.") + + @property + def unit(self): + raise NotImplementedError("The attribute 'unit' is not implemented.") + + @property + def week(self): + raise NotImplementedError("The attribute 'week' is not implemented.") + + @property + def weekday(self): + raise NotImplementedError( + "The attribute 'weekday' is not implemented." + ) + + @property + def weekofyear(self): + raise NotImplementedError( + "The attribute 'weekofyear' is not implemented." + ) + + def astimezone(self, tz=None): + raise NotImplementedError( + "The method 'astimezone' is not implemented." + ) + + def ceil(self, freq=None): + raise NotImplementedError("The method 'ceil' is not implemented.") + + @classmethod + def combine(cls, date, time): + raise NotImplementedError("The method 'combine' is not implemented.") + + def ctime(self): + raise NotImplementedError("The method 'ctime' is not implemented.") + + def date(self): + raise NotImplementedError("The method 'date' is not implemented.") + + def day_name(self): + raise NotImplementedError("The method 'day_name' is not implemented.") + + def dst(self): + raise NotImplementedError("The method 'dst' is not implemented.") + + def floor(self, freq=None): + raise NotImplementedError("The method 'floor' is not implemented.") + + @classmethod + def fromisoformat(cls, date_string): + raise NotImplementedError( + "The method 'fromisoformat' is not implemented." + ) + + @classmethod + def fromordinal(cls, n): + raise NotImplementedError( + "The method 'fromordinal' is not implemented." + ) + + @classmethod + def fromtimestamp(cls, timestamp, tz=None): + raise NotImplementedError( + "The method 'fromtimestamp' is not implemented." + ) + + def isocalendar(self): + raise NotImplementedError( + "The method 'isocalendar' is not implemented." + ) + + def isoformat(self, sep="T"): + raise NotImplementedError("The method 'isoformat' is not implemented.") + + def isoweekday(self): + raise NotImplementedError( + "The method 'isoweekday' is not implemented." + ) + + def max(self): + raise NotImplementedError("The method 'max' is not implemented.") + + def month_name(self): + raise NotImplementedError( + "The method 'month_name' is not implemented." + ) + + def normalize(self): + raise NotImplementedError("The method 'normalize' is not implemented.") + + @classmethod + def now(cls, tz=None): + raise NotImplementedError("The method 'now' is not implemented.") + + def replace(self, **kwargs): + raise NotImplementedError("The method 'replace' is not implemented.") + + def round(self, freq=None): + raise NotImplementedError("The method 'round' is not implemented.") + + def strftime(self, format): + raise NotImplementedError("The method 'strftime' is not implemented.") + + @classmethod + def strptime(cls, date_string, format): + raise NotImplementedError("The method 'strptime' is not implemented.") + + def time(self): + raise NotImplementedError("The method 'time' is not implemented.") + + def timetuple(self): + raise NotImplementedError("The method 'timetuple' is not implemented.") + + def timetz(self): + raise NotImplementedError("The method 'timetz' is not implemented.") + + def to_datetime64(self): + raise NotImplementedError( + "The method 'to_datetime64' is not implemented." + ) + + def to_julian_date(self): + raise NotImplementedError( + "The method 'to_julian_date' is not implemented." + ) + + def to_numpy(self): + raise NotImplementedError("The method 'to_numpy' is not implemented.") + + def to_period(self, freq=None): + raise NotImplementedError("The method 'to_period' is not implemented.") + + def to_pydatetime(self): + raise NotImplementedError( + "The method 'to_pydatetime' is not implemented." + ) + + def today(self): + raise NotImplementedError("The method 'today' is not implemented.") + + def toordinal(self): + raise NotImplementedError("The method 'toordinal' is not implemented.") + + def tz_convert(self, tz): + raise NotImplementedError( + "The method 'tz_convert' is not implemented." + ) + + def tz_localize(self, tz): + raise NotImplementedError( + "The method 'tz_localize' is not implemented." + ) + + def tzname(self): + raise NotImplementedError("The method 'tzname' is not implemented.") + + @classmethod + def utcfromtimestamp(cls, timestamp): + raise NotImplementedError( + "The method 'utcfromtimestamp' is not implemented." + ) + + @classmethod + def utcnow(cls): + raise NotImplementedError("The method 'utcnow' is not implemented.") + + def utcoffset(self): + raise NotImplementedError("The method 'utcoffset' is not implemented.") + + def utctimetuple(self): + raise NotImplementedError( + "The method 'utctimetuple' is not implemented." + ) diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index d3516a379e5..e05e580cbcb 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -37,7 +37,7 @@ def test_profiler(): "DataFrame.sum", "Series.__getitem__", "Timedelta", - "_Timestamp.__add__", + "Timestamp.__add__", } for name, func in per_function_stats.items(): assert ( From 6c09ff9b9062a729d929cc1788d5633e375b3119 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 15 Aug 2024 12:28:00 -0700 Subject: [PATCH 07/12] Fix profile test --- python/cudf/cudf/pandas/fast_slow_proxy.py | 1 + python/cudf/cudf_pandas_tests/test_profiler.py | 6 ++++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index bb678fd1efe..f15ab89d735 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -931,6 +931,7 @@ def _fast_slow_function_call( f"The exception was {e}." ) except Exception as err: + print(err) with nvtx.annotate( "EXECUTE_SLOW", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index e05e580cbcb..22b73fe85bb 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -37,12 +37,14 @@ def test_profiler(): "DataFrame.sum", "Series.__getitem__", "Timedelta", - "Timestamp.__add__", + "_Timestamp.__add__", } + # TODO: Swap _Timestamp.__add__ for Timestamp.__add__ + # when cudf.Timedelta support is added for name, func in per_function_stats.items(): assert ( len(func["cpu"]) == 0 - if "Timedelta" not in name + if "Timedelta" not in name or "_Time" not in name else len(func["gpu"]) == 0 ) From 8e24ce12aaf8426a3bebc933b772cb782a2c1c92 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 5 Sep 2024 11:16:02 -0700 Subject: [PATCH 08/12] refactor and add tests --- python/cudf/cudf/core/dataframe.py | 2 + python/cudf/cudf/core/timestamp.py | 38 +++++---- python/cudf/cudf/tests/test_timestamp.py | 85 +++++++++++++++++++ .../cudf/cudf_pandas_tests/test_profiler.py | 6 +- 4 files changed, 114 insertions(+), 17 deletions(-) create mode 100644 python/cudf/cudf/tests/test_timestamp.py diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 7a171fe9e05..99729adfcc4 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -8261,6 +8261,8 @@ def from_pandas(obj, nan_as_null=no_default): return cudf.CategoricalDtype.from_pandas(obj) elif isinstance(obj, pd.IntervalDtype): return cudf.IntervalDtype.from_pandas(obj) + elif isinstance(obj, pd.Timestamp): + return cudf.Timestamp.from_pandas(obj) else: raise TypeError( f"from_pandas unsupported for object of type {type(obj).__name__}" diff --git a/python/cudf/cudf/core/timestamp.py b/python/cudf/cudf/core/timestamp.py index 57b99c9b655..3aff57557c6 100644 --- a/python/cudf/cudf/core/timestamp.py +++ b/python/cudf/cudf/core/timestamp.py @@ -10,6 +10,8 @@ from cudf.core.scalar import Scalar +# TODO: Use cupy when it supports timestamps and timedeltas. See https://github.com/cupy/cupy/issues/2622 +# TODO: Replace with cudf.Timedelta. See https://github.com/rapidsai/cudf/issues/5882 class Timestamp(Scalar): def __init__( self, @@ -21,50 +23,54 @@ def __init__( super().__init__(ts) @property - def value(self) -> np.datetime64: + def value(self) -> int: + return self.to_pandas().value + + @property + def _scalar_value(self) -> Scalar: return super().value @property def year(self) -> int: - return pd.Timestamp(super().value).year + return self.to_pandas().year @property def month(self) -> int: - return pd.Timestamp(super().value).month + return self.to_pandas().month @property def day(self) -> int: - return pd.Timestamp(super().value).day + return self.to_pandas().day @property def hour(self) -> int: - return pd.Timestamp(super().value).hour + return self.to_pandas().hour @property def minute(self) -> int: - return pd.Timestamp(super().value).minute + return self.to_pandas().minute @property def second(self) -> int: - return pd.Timestamp(super().value).second + return self.to_pandas().second @property def microsecond(self) -> int: - return pd.Timestamp(super().value).microsecond + return self.to_pandas().microsecond @property def nanosecond(self) -> int: - return pd.Timestamp(super().value).nanosecond + return self.to_pandas().nanosecond def __repr__(self): return pd.Timestamp(self.value).__repr__() @property def asm8(self) -> np.datetime64: - return super().value + return self._scalar_value def to_pandas(self): - return pd.Timestamp(super().value) + return pd.Timestamp(self._scalar_value) @classmethod def from_pandas(cls, obj: pd.Timestamp): @@ -74,11 +80,11 @@ def from_pandas(cls, obj: pd.Timestamp): def from_scalar(cls, obj: Scalar): return cls(obj.value) - def _to_scalar(self): - return Scalar(self.value) + def to_scalar(self): + return Scalar(self._scalar_value) def __add__(self, other: timedelta | np.timedelta64): - return self.from_scalar(self._to_scalar() + other) + return self.from_scalar(self.to_scalar() + other) def __radd__(self, other: timedelta): return self + other @@ -87,11 +93,11 @@ def __sub__( self, other: datetime | timedelta | np.timedelta64 ) -> pd.Timedelta: if isinstance(other, datetime): - return pd.Timedelta(self.value - other) + return pd.Timedelta(self.to_pandas() - other) elif isinstance(other, self.__class__): return pd.Timedelta(self.value - other.value) elif isinstance(other, (timedelta, np.timedelta64)): - return self.from_scalar(self._to_scalar() - other) + return self.from_scalar(self.to_scalar() - other) else: raise TypeError( f"Subtraction not supported between types {type(self)} and {type(other)}" diff --git a/python/cudf/cudf/tests/test_timestamp.py b/python/cudf/cudf/tests/test_timestamp.py new file mode 100644 index 00000000000..3b7264fd5d6 --- /dev/null +++ b/python/cudf/cudf/tests/test_timestamp.py @@ -0,0 +1,85 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from datetime import datetime + +import numpy as np +import pandas as pd +import pytest + +import cudf +from cudf.testing import assert_eq + + +@pytest.fixture +def ts(): + pts = pd.Timestamp("2024-08-31 12:34:56.789123456") + gts = cudf.from_pandas(pts) + return pts, gts + + +@pytest.mark.parametrize( + "attr", + [ + "value", + "year", + "month", + "day", + "hour", + "minute", + "second", + "microsecond", + "nanosecond", + ], +) +def test_timestamp_properties(ts, attr): + pts, gts = ts + res = getattr(pts, attr) + expect = getattr(gts, attr) + + assert_eq(res, expect) + + +def test_timestamp_to_scalar(ts): + pts, gts = ts + + res = gts.to_scalar() + expect = cudf.Scalar(pts) + + assert_eq(res, expect) + + +def test_timestamp_from_scalar(ts): + pts, gts = ts + s = cudf.Scalar(pts) + + res = cudf.Timestamp.from_scalar(s) + expect = gts + + assert_eq(res, expect) + + +def test_add_timestamp_timedelta(ts): + pts, gts = ts + ptd = pd.Timedelta(1) + + res = gts + ptd + expect = pts + ptd + + assert_eq(res, expect) + + +@pytest.mark.parametrize( + "lhs", + [ + pd.Timedelta(1), + datetime(2024, 9, 5, 1, 1, 1, 1), + np.timedelta64(5, "D"), + ], +) +def test_subtract_timestamp_timedelta(ts, lhs): + pts, gts = ts + + res = gts - lhs + expect = pts - lhs + + assert_eq(res, expect) diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index d33d2336b4b..313683fe125 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -50,9 +50,13 @@ def test_profiler(): # TODO: Swap _Timestamp.__add__ for Timestamp.__add__ # when cudf.Timedelta support is added for name, func in per_function_stats.items(): + if name == "Timestamp": + assert len(func["cpu"]) != 0 + assert len(func["gpu"]) != 0 + continue assert ( len(func["cpu"]) == 0 - if "Timedelta" not in name or "_Time" not in name + if "Time" not in name else len(func["gpu"]) == 0 ) From 9302b78d43b4becc06894e6cb301251790e055b5 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 6 Sep 2024 08:07:08 -0700 Subject: [PATCH 09/12] remove print --- python/cudf/cudf/pandas/fast_slow_proxy.py | 1 - 1 file changed, 1 deletion(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 708e5e5fdfa..afa1ce5f86c 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -942,7 +942,6 @@ def _fast_slow_function_call( f"The exception was {e}." ) except Exception as err: - print(err) with nvtx.annotate( "EXECUTE_SLOW", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], From 083856a0e070f8877ae4b485f52eea1587282cfc Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 9 Sep 2024 08:50:36 -0700 Subject: [PATCH 10/12] revert cudf.pandas changes --- python/cudf/cudf/pandas/_wrappers/pandas.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 30e9932f241..6d03063fa27 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -160,10 +160,10 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): Timestamp = make_final_proxy_type( "Timestamp", - cudf.Timestamp, + _Unusable, pd.Timestamp, - fast_to_slow=lambda fast: fast.to_pandas(), - slow_to_fast=cudf.from_pandas, + fast_to_slow=_Unusable(), + slow_to_fast=_Unusable(), additional_attributes={ "__hash__": _FastSlowAttribute("__hash__"), "__new__": Timestamp_Timedelta__new__, From 653e65a77977fc79dd845859478eb62bd085b05a Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 11 Sep 2024 19:25:05 -0700 Subject: [PATCH 11/12] make proxy Timestamp usable --- python/cudf/cudf/core/timestamp.py | 2 +- python/cudf/cudf/pandas/_wrappers/pandas.py | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/timestamp.py b/python/cudf/cudf/core/timestamp.py index 3aff57557c6..5ac60058add 100644 --- a/python/cudf/cudf/core/timestamp.py +++ b/python/cudf/cudf/core/timestamp.py @@ -63,7 +63,7 @@ def nanosecond(self) -> int: return self.to_pandas().nanosecond def __repr__(self): - return pd.Timestamp(self.value).__repr__() + return self.to_pandas().__repr__() @property def asm8(self) -> np.datetime64: diff --git a/python/cudf/cudf/pandas/_wrappers/pandas.py b/python/cudf/cudf/pandas/_wrappers/pandas.py index 6d03063fa27..30e9932f241 100644 --- a/python/cudf/cudf/pandas/_wrappers/pandas.py +++ b/python/cudf/cudf/pandas/_wrappers/pandas.py @@ -160,10 +160,10 @@ def Timestamp_Timedelta__new__(cls, *args, **kwargs): Timestamp = make_final_proxy_type( "Timestamp", - _Unusable, + cudf.Timestamp, pd.Timestamp, - fast_to_slow=_Unusable(), - slow_to_fast=_Unusable(), + fast_to_slow=lambda fast: fast.to_pandas(), + slow_to_fast=cudf.from_pandas, additional_attributes={ "__hash__": _FastSlowAttribute("__hash__"), "__new__": Timestamp_Timedelta__new__, From 7d9586cf29a64dcd5ae9b6dc6fb32f35212e8d89 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 16 Sep 2024 11:41:58 -0700 Subject: [PATCH 12/12] fix test_profile --- python/cudf/cudf_pandas_tests/test_profiler.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/cudf/cudf_pandas_tests/test_profiler.py b/python/cudf/cudf_pandas_tests/test_profiler.py index 313683fe125..28566963d0a 100644 --- a/python/cudf/cudf_pandas_tests/test_profiler.py +++ b/python/cudf/cudf_pandas_tests/test_profiler.py @@ -51,8 +51,7 @@ def test_profiler(): # when cudf.Timedelta support is added for name, func in per_function_stats.items(): if name == "Timestamp": - assert len(func["cpu"]) != 0 - assert len(func["gpu"]) != 0 + assert len(func["cpu"]) == 0 continue assert ( len(func["cpu"]) == 0