Skip to content

Commit

Permalink
add tests
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar committed Aug 27, 2024
1 parent f447534 commit 501e92f
Show file tree
Hide file tree
Showing 3 changed files with 37 additions and 0 deletions.
5 changes: 5 additions & 0 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -1443,6 +1443,11 @@ def __repr__(self):
output[:break_idx].replace("'", "") + output[break_idx:]
)
else:
# Too many non-unique categories will cause
# the output to take too long. In this case, we
# split the categories into data and categories
# and generate the repr separately and
# merge them.
pd_cats = pd.Categorical(
preprocess.astype(preprocess.categories.dtype).to_pandas()
)
Expand Down
21 changes: 21 additions & 0 deletions python/cudf/cudf/testing/_utils.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
# Copyright (c) 2020-2024, NVIDIA CORPORATION.

import itertools
import signal
import string
from collections import abc
from contextlib import contextmanager
Expand Down Expand Up @@ -368,3 +369,23 @@ def sv_to_udf_str_testing_lowering(context, builder, sig, args):
return cast_string_view_to_udf_string(
context, builder, sig.args[0], sig.return_type, args[0]
)


class cudf_timeout:
"""
Context manager to raise a TimeoutError after a specified number of seconds.
"""

def __init__(self, seconds, *, timeout_message=""):
self.seconds = int(seconds)
self.timeout_message = timeout_message

def _timeout_handler(self, signum, frame):
raise TimeoutError(self.timeout_message)

def __enter__(self):
signal.signal(signal.SIGALRM, self._timeout_handler)
signal.alarm(self.seconds)

def __exit__(self, type, value, traceback):
signal.alarm(0)
11 changes: 11 additions & 0 deletions python/cudf/cudf/tests/test_repr.py
Original file line number Diff line number Diff line change
Expand Up @@ -1480,3 +1480,14 @@ def test_interval_index_repr():
gi = cudf.from_pandas(pi)

assert repr(pi) == repr(gi)


def test_large_unique_categories_repr():
# Unfortunately, this is a long running test (takes about 1 minute)
# and there is no way we can reduce the time
pi = pd.CategoricalIndex(range(100_000_000))
gi = cudf.CategoricalIndex(range(100_000_000))
expected_repr = repr(pi)
with utils.cudf_timeout(2, timeout_message="Failed to repr fast enough"):
actual_repr = repr(gi)
assert expected_repr == actual_repr

0 comments on commit 501e92f

Please sign in to comment.