From 76a5e3251fb8821d7a9d07a6c1af85b4179f1fbe Mon Sep 17 00:00:00 2001 From: Matthew Murray <41342305+Matt711@users.noreply.github.com> Date: Tue, 12 Nov 2024 22:18:53 -0500 Subject: [PATCH] Raise errors on specific types of fallback in `cudf.pandas` (#17268) Closes #14975 Authors: - Matthew Murray (https://github.com/Matt711) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: https://github.com/rapidsai/cudf/pull/17268 --- python/cudf/cudf/pandas/fast_slow_proxy.py | 52 ++++++++++++-- .../cudf_pandas_tests/test_cudf_pandas.py | 71 ++++++++++++++++++- 2 files changed, 115 insertions(+), 8 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 9768a6c4a2f..40893ee2614 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -16,6 +16,8 @@ import numpy as np +from rmm import RMMError + from ..options import _env_get_bool from ..testing import assert_eq from .annotation import nvtx @@ -899,12 +901,52 @@ def _assert_fast_slow_eq(left, right): assert_eq(left, right) -class ProxyFallbackError(Exception): - """Raised when fallback occurs""" +class FallbackError(Exception): + """Raises when fallback occurs""" + + pass + + +class OOMFallbackError(FallbackError): + """Raises when cuDF produces a MemoryError or an rmm.RMMError""" + + pass + + +class NotImplementedFallbackError(FallbackError): + """Raises cuDF produces a NotImplementedError""" + + pass + + +class AttributeFallbackError(FallbackError): + """Raises when cuDF produces an AttributeError""" + + pass + + +class TypeFallbackError(FallbackError): + """Raises when cuDF produces a TypeError""" pass +def _raise_fallback_error(err, name): + """Raises a fallback error.""" + err_message = f"Falling back to the slow path. The exception was {err}. \ + The function called was {name}." + exception_map = { + (RMMError, MemoryError): OOMFallbackError, + NotImplementedError: NotImplementedFallbackError, + AttributeError: AttributeFallbackError, + TypeError: TypeFallbackError, + } + for err_type, fallback_err_type in exception_map.items(): + if isinstance(err, err_type): + raise fallback_err_type(err_message) from err + raise FallbackError(err_message) from err + + def _fast_function_call(): """ Placeholder fast function for pytest profiling purposes. @@ -981,16 +1023,14 @@ def _fast_slow_function_call( f"The exception was {e}." ) except Exception as err: - if _env_get_bool("CUDF_PANDAS_FAIL_ON_FALLBACK", False): - raise ProxyFallbackError( - f"The operation failed with cuDF, the reason was {type(err)}: {err}" - ) from err with nvtx.annotate( "EXECUTE_SLOW", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], domain="cudf_pandas", ): slow_args, slow_kwargs = _slow_arg(args), _slow_arg(kwargs) + if _env_get_bool("CUDF_PANDAS_FAIL_ON_FALLBACK", False): + _raise_fallback_error(err, slow_args[0].__name__) if _env_get_bool("LOG_FAST_FALLBACK", False): from ._logger import log_fallback diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index d48fbad0ec3..4473a0e6f12 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -31,10 +31,16 @@ from packaging import version from pytz import utc +from rmm import RMMError + from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220, PANDAS_VERSION from cudf.pandas import LOADED, Profiler from cudf.pandas.fast_slow_proxy import ( - ProxyFallbackError, + AttributeFallbackError, + FallbackError, + NotImplementedFallbackError, + OOMFallbackError, + TypeFallbackError, _Unusable, is_proxy_object, ) @@ -1758,10 +1764,71 @@ def add_one_ufunc(a): def test_fallback_raises_error(monkeypatch): with monkeypatch.context() as monkeycontext: monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True") - with pytest.raises(ProxyFallbackError): + with pytest.raises(FallbackError): pd.Series(range(2)).astype(object) +def mock_mean_memory_error(self, *args, **kwargs): + raise MemoryError() + + +def mock_mean_rmm_error(self, *args, **kwargs): + raise RMMError(1, "error") + + +def mock_mean_not_impl_error(self, *args, **kwargs): + raise NotImplementedError() + + +def mock_mean_attr_error(self, *args, **kwargs): + raise AttributeError() + + +def mock_mean_type_error(self, *args, **kwargs): + raise TypeError() + + +@pytest.mark.parametrize( + "mock_mean, err", + [ + ( + mock_mean_memory_error, + OOMFallbackError, + ), + ( + mock_mean_rmm_error, + OOMFallbackError, + ), + ( + mock_mean_not_impl_error, + NotImplementedFallbackError, + ), + ( + mock_mean_attr_error, + AttributeFallbackError, + ), + ( + mock_mean_type_error, + TypeFallbackError, + ), + ], +) +def test_fallback_raises_specific_error( + monkeypatch, + mock_mean, + err, +): + with monkeypatch.context() as monkeycontext: + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", mock_mean) + monkeycontext.setenv("CUDF_PANDAS_FAIL_ON_FALLBACK", "True") + s = xpd.Series([1, 2]) + with pytest.raises(err, match="Falling back to the slow path"): + assert s.mean() == 1.5 + + # Must explicitly undo the patch. Proxy dispatch doesn't work with monkeypatch contexts. + monkeypatch.setattr(xpd.Series.mean, "_fsproxy_fast", cudf.Series.mean) + + @pytest.mark.parametrize( "attrs", [