From 21350fc2ac070315d110fca55cb6781ed7905596 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Fri, 19 Apr 2024 07:17:09 -1000 Subject: [PATCH] Allow apply udf to reference global modules in cudf.pandas (#15569) closes #15548 `_replace_closurevars` creates a new function by replacing objects with their fast versions. When creating the new function, it populates `globals` from the result of `inspect.getclosurevars`, but it don't think it comprehensively returns _all_ the globals accessible to the function (`function.__globals__`) To minimize the change, the "fast globals" are still sourced from `inspect.getclosurevars`, and those update the `old_function.__globals__` when creating a new function. Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/15569 --- python/cudf/cudf/pandas/fast_slow_proxy.py | 9 ++++++--- python/cudf/cudf_pandas_tests/test_cudf_pandas.py | 12 ++++++++++++ 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index e811ba1351a..9d8c174b297 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -1108,7 +1108,7 @@ def _replace_closurevars( if any(c == types.CellType() for c in f.__closure__): return f - f_nonlocals, f_globals, f_builtins, _ = inspect.getclosurevars(f) + f_nonlocals, f_globals, _, _ = inspect.getclosurevars(f) g_globals = _transform_arg(f_globals, attribute_name, seen) g_nonlocals = _transform_arg(f_nonlocals, attribute_name, seen) @@ -1121,11 +1121,14 @@ def _replace_closurevars( return f g_closure = tuple(types.CellType(val) for val in g_nonlocals.values()) - g_globals["__builtins__"] = f_builtins + + # https://github.com/rapidsai/cudf/issues/15548 + new_g_globals = f.__globals__.copy() + new_g_globals.update(g_globals) g = types.FunctionType( f.__code__, - g_globals, + new_g_globals, name=f.__name__, argdefs=f.__defaults__, closure=g_closure, diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index f017b46866f..90356a01404 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1208,3 +1208,15 @@ def test_pickle_groupby(dataframe): def test_isinstance_base_offset(): offset = xpd.tseries.frequencies.to_offset("1s") assert isinstance(offset, xpd.tseries.offsets.BaseOffset) + + +def test_apply_slow_path_udf_references_global_module(): + def my_apply(df, unused): + # `datetime` Raised `KeyError: __import__` + datetime.datetime.strptime(df["Minute"], "%H:%M:%S") + return pd.to_numeric(1) + + df = xpd.DataFrame({"Minute": ["09:00:00"]}) + result = df.apply(my_apply, axis=1, unused=True) + expected = xpd.Series([1]) + tm.assert_series_equal(result, expected)