Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[FEA] Add support for cudf.Timestamp #16450

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions python/cudf/cudf/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,6 +70,7 @@
)
from cudf.core.scalar import Scalar
from cudf.core.series import Series, isclose
from cudf.core.timestamp import Timestamp
from cudf.core.tools.datetimes import DateOffset, date_range, to_datetime
from cudf.core.tools.numeric import to_numeric
from cudf.io import (
Expand Down Expand Up @@ -121,6 +122,7 @@
"Series",
"StructDtype",
"TimedeltaIndex",
"Timestamp",
"api",
"concat",
"crosstab",
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8261,6 +8261,8 @@ def from_pandas(obj, nan_as_null=no_default):
return cudf.CategoricalDtype.from_pandas(obj)
elif isinstance(obj, pd.IntervalDtype):
return cudf.IntervalDtype.from_pandas(obj)
elif isinstance(obj, pd.Timestamp):
return cudf.Timestamp.from_pandas(obj)
else:
raise TypeError(
f"from_pandas unsupported for object of type {type(obj).__name__}"
Expand Down
32 changes: 23 additions & 9 deletions python/cudf/cudf/core/scalar.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,26 @@
get_allowed_combinations_for_operator,
to_cudf_compatible_scalar,
)
from cudf.utils.performance_tracking import _performance_tracking


# Note that the metaclass below can easily be generalized for use with
# other classes, if needed in the future. Simply replace the arguments
# of the `__call__` method with `*args` and `**kwargs`. This will
# result in additional overhead when constructing the cache key, as
# unpacking *args and **kwargs is not cheap. See the discussion in
def generate_arg_type_tuple(args, kwargs):
result = []
if args != ():
for arg in args:
result.extend([arg, type(arg)])

if kwargs != {}:
for kwarg, value in kwargs.items():
result.extend([value, type(value)])

return tuple(result)


# The cache key is generated from args and kwargs so that the cache can support
# objects that inherit from Scalar. This will result in additional overhead
# when constructing the cache key, as unpacking *args and **kwargs is not cheap.
# For now, we'll track the performance of __call__. See the discussion in
# https://github.com/rapidsai/cudf/pull/11246#discussion_r955843532
# for details.
class CachedScalarInstanceMeta(type):
Expand All @@ -42,19 +55,20 @@ def __init__(self, names, bases, attrs, maxsize=128):
self.__maxsize = maxsize
self.__instances = OrderedDict()

def __call__(self, value, dtype=None):
@_performance_tracking
def __call__(self, *args, **kwargs):
# the cache key is constructed from the arguments, and also
# the _types_ of the arguments, since objects of different
# types can compare equal
cache_key = (value, type(value), dtype, type(dtype))
cache_key = generate_arg_type_tuple(args, kwargs)
try:
# try retrieving an instance from the cache:
self.__instances.move_to_end(cache_key)
return self.__instances[cache_key]
except KeyError:
# if an instance couldn't be found in the cache,
# construct it and add to cache:
obj = super().__call__(value, dtype=dtype)
obj = super().__call__(*args, **kwargs)
try:
self.__instances[cache_key] = obj
except TypeError:
Expand All @@ -65,7 +79,7 @@ def __call__(self, value, dtype=None):
return obj
except TypeError:
# couldn't hash the arguments, don't cache:
return super().__call__(value, dtype=dtype)
return super().__call__(*args, **kwargs)

def _clear_instance_cache(self):
self.__instances.clear()
Expand Down
Loading
Loading