From 7136fb957ee9384626b345b204497e5d57ef15f4 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 19 Aug 2024 11:54:01 -0700 Subject: [PATCH 01/12] Make isinstance check pass for proxy ndarrays --- python/cudf/cudf/pandas/_wrappers/numpy.py | 3 +++ python/cudf/cudf/pandas/fast_slow_proxy.py | 20 ++++++++++++++++++- python/cudf/cudf/pandas/proxy_base.py | 12 +++++++++++ .../cudf_pandas_tests/test_cudf_pandas.py | 8 ++++++++ 4 files changed, 42 insertions(+), 1 deletion(-) create mode 100644 python/cudf/cudf/pandas/proxy_base.py diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 90ac5198270..bc8145d1e01 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -14,6 +14,7 @@ make_final_proxy_type, make_intermediate_proxy_type, ) +from ..proxy_base import ProxyNDarrayBase from .common import ( array_interface, array_method, @@ -111,12 +112,14 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): numpy.ndarray, fast_to_slow=cupy.ndarray.get, slow_to_fast=cupy.asarray, + bases=(ProxyNDarrayBase,), additional_attributes={ "__array__": array_method, # So that pa.array(wrapped-numpy-array) works "__arrow_array__": arrow_array_method, "__cuda_array_interface__": cuda_array_interface, "__array_interface__": array_interface, + "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), # ndarrays are unhashable "__hash__": None, # iter(cupy-array) produces an iterable of zero-dim device diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index bb678fd1efe..61aa6310082 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -19,6 +19,7 @@ from ..options import _env_get_bool from ..testing import assert_eq from .annotation import nvtx +from .proxy_base import ProxyNDarrayBase def call_operator(fn, args, kwargs): @@ -564,7 +565,11 @@ def _fsproxy_wrap(cls, value, func): _FinalProxy subclasses can override this classmethod if they need particular behaviour when wrapped up. """ - proxy = object.__new__(cls) + base_class = _get_proxy_base_class(cls) + if base_class is object: + proxy = base_class.__new__(cls) + else: + proxy = base_class.__new__(cls, value) proxy._fsproxy_wrapped = value return proxy @@ -1193,6 +1198,19 @@ def is_proxy_object(obj: Any) -> bool: return False +def _get_proxy_base_class(cls): + """Returns the proxy base class if one exists""" + for proxy_class in PROXY_BASE_CLASSES: + if proxy_class in cls.__mro__: + return proxy_class + return object + + +PROXY_BASE_CLASSES: set[type] = { + ProxyNDarrayBase, +} + + NUMPY_TYPES: set[str] = set(np.sctypeDict.values()) diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py new file mode 100644 index 00000000000..adcc654a759 --- /dev/null +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -0,0 +1,12 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np + + +class ProxyNDarrayBase(np.ndarray): + def __new__(cls, input_array, *args, **kwargs): + obj = super().__new__(cls, shape=(0,)) + obj._fsproxy_wrapped = input_array + return np.asarray(obj).view(cls) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 6292022d8e4..e5483fff913 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1632,3 +1632,11 @@ def test_change_index_name(index): assert s.index.name == name assert df.index.name == name + + +def test_numpy_ndarray_isinstancecheck(series): + s1, s2 = series + arr1 = s1.values + arr2 = s2.values + assert isinstance(arr1, np.ndarray) + assert isinstance(arr2, np.ndarray) From dcc806f9c8dc2e446d421fb461dd0d2a4e2967a4 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 20 Aug 2024 09:58:49 -0700 Subject: [PATCH 02/12] make asarray use wrapped array --- python/cudf/cudf/pandas/_wrappers/numpy.py | 13 +++++++++++++ python/cudf/cudf/pandas/proxy_base.py | 15 ++++++++++++--- .../cudf/cudf_pandas_tests/test_cudf_pandas.py | 16 ++++++++++++---- 3 files changed, 37 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index bc8145d1e01..3d83f44f8de 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -11,6 +11,7 @@ from ..fast_slow_proxy import ( _FastSlowAttribute, + is_proxy_object, make_final_proxy_type, make_intermediate_proxy_type, ) @@ -106,6 +107,18 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): return super(cls, cls)._fsproxy_wrap(arr, constructor) +numpy_asarray = numpy.asarray + + +def asarray(*args, **kwargs): + if is_proxy_object(args[0]): + return numpy_asarray(args[0]._fsproxy_slow, *args[1:], **kwargs) + return numpy_asarray(*args, **kwargs) + + +numpy.asarray = asarray + + ndarray = make_final_proxy_type( "ndarray", cupy.ndarray, diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index adcc654a759..23bd5f141ca 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -4,9 +4,18 @@ import numpy as np +numpy_asarray = np.asarray + class ProxyNDarrayBase(np.ndarray): def __new__(cls, input_array, *args, **kwargs): - obj = super().__new__(cls, shape=(0,)) - obj._fsproxy_wrapped = input_array - return np.asarray(obj).view(cls) + if isinstance(input_array, np.ndarray): + obj = input_array + else: + obj = super().__new__( + cls, shape=input_array.shape, dtype=input_array.dtype + ) + view = numpy_asarray(obj).view(cls) + view._fsproxy_wrapped = input_array + + return view diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index e5483fff913..979ccf5ca5e 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1634,9 +1634,17 @@ def test_change_index_name(index): assert df.index.name == name -def test_numpy_ndarray_isinstancecheck(series): - s1, s2 = series - arr1 = s1.values - arr2 = s2.values +def test_numpy_ndarray_isinstancecheck(array): + arr1, arr2 = array assert isinstance(arr1, np.ndarray) assert isinstance(arr2, np.ndarray) + + +def test_numpy_ndarray_np_ufunc(array): + arr1, arr2 = array + + @np.vectorize + def add_one_ufunc(arr): + return arr + 1 + + tm.assert_almost_equal(add_one_ufunc(arr1), add_one_ufunc(arr2)) From e7b89489023a65c8f095a6fe1c4847d8ae12a055 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 21 Aug 2024 16:48:08 -0700 Subject: [PATCH 03/12] tackle ufuncs --- python/cudf/cudf/pandas/_wrappers/numpy.py | 41 ++++++++++++++++++- python/cudf/cudf/pandas/proxy_base.py | 20 +++++---- .../cudf_pandas_tests/test_cudf_pandas.py | 36 +++++++++++++++- 3 files changed, 86 insertions(+), 11 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 3d83f44f8de..cf8c7d1939a 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -6,6 +6,7 @@ import cupy import cupy._core.flags +import numba import numpy from packaging import version @@ -119,6 +120,44 @@ def asarray(*args, **kwargs): numpy.asarray = asarray +def convert_args_to_slow(inputs): + new_inputs = [] + for x in inputs: + if isinstance(x, ProxyNDarrayBase): + if hasattr(x._fsproxy_wrapped, "get"): + new_inputs.append(numpy.asarray(x._fsproxy_wrapped.get())) + else: + new_inputs.append(numpy.asarray(x._fsproxy_wrapped)) + else: + new_inputs.append(x) + inputs = tuple(new_inputs) + return inputs + + +def convert_args_to_fast(inputs): + new_inputs = [] + for x in inputs: + if isinstance(x, ProxyNDarrayBase): + new_inputs.append(cupy.asarray(x._fsproxy_wrapped)) + else: + new_inputs.append(x) + inputs = tuple(new_inputs) + return inputs + + +def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if isinstance(ufunc, numba.np.ufunc.dufunc.DUFunc): + inputs = convert_args_to_slow(inputs) + elif isinstance(ufunc, (numpy.ufunc, numpy.vectorize)): + inputs = convert_args_to_slow(inputs) + elif isinstance(ufunc, (cupy.ufunc, cupy.vectorize)): + inputs = convert_args_to_fast(inputs) + else: + raise TypeError(f"Unrecognized ufunc of type {type(ufunc)}") + result = getattr(ufunc, method)(*inputs, **kwargs) + return result + + ndarray = make_final_proxy_type( "ndarray", cupy.ndarray, @@ -132,7 +171,7 @@ def asarray(*args, **kwargs): "__arrow_array__": arrow_array_method, "__cuda_array_interface__": cuda_array_interface, "__array_interface__": array_interface, - "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), + "__array_ufunc__": ndarray__array_ufunc__, # ndarrays are unhashable "__hash__": None, # iter(cupy-array) produces an iterable of zero-dim device diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index 23bd5f141ca..a8fc25101ef 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -9,13 +9,17 @@ class ProxyNDarrayBase(np.ndarray): def __new__(cls, input_array, *args, **kwargs): - if isinstance(input_array, np.ndarray): - obj = input_array - else: - obj = super().__new__( - cls, shape=input_array.shape, dtype=input_array.dtype - ) - view = numpy_asarray(obj).view(cls) - view._fsproxy_wrapped = input_array + obj = super().__new__( + cls, shape=input_array.shape, dtype=input_array.dtype + ) + view = numpy_asarray(obj, dtype=input_array.dtype).view(cls) return view + + def __array_finalize__(self, obj): + if obj is None: + return + for attr in list(self.__dict__.keys()): + if attr not in {"__new__", "__array_finalize__"}: + delattr(self, attr) + self._fsproxy_wrapped = obj diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 979ccf5ca5e..2468c816146 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -14,14 +14,16 @@ import types from io import BytesIO, StringIO +import cupy as cp import numpy as np import pyarrow as pa import pytest -from numba import NumbaDeprecationWarning +from numba import NumbaDeprecationWarning, vectorize from pytz import utc from cudf.pandas import LOADED, Profiler from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object +from cudf.testing import assert_eq if not LOADED: raise ImportError("These tests must be run with cudf.pandas loaded") @@ -1647,4 +1649,34 @@ def test_numpy_ndarray_np_ufunc(array): def add_one_ufunc(arr): return arr + 1 - tm.assert_almost_equal(add_one_ufunc(arr1), add_one_ufunc(arr2)) + assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2)) + + +def test_numpy_ndarray_cp_ufunc(array): + arr1, arr2 = array + + @cp.vectorize + def add_one_ufunc(arr): + return arr + 1 + + assert_eq(add_one_ufunc(cp.asarray(arr1)), add_one_ufunc(arr2)) + + +def test_numpy_ndarray_numba_ufunc(array): + arr1, arr2 = array + + @vectorize + def add_one_ufunc(arr): + return arr + 1 + + assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2)) + + +def test_numpy_ndarray_numba_cuda_ufunc(array): + arr1, arr2 = array + + @vectorize(["int64(int64)"], target="cuda") + def add_one_ufunc(a): + return a + 1 + + assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2))) From a1bee535e01ac2bb0b85442c5d7270e246d8c1df Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 21 Aug 2024 17:27:42 -0700 Subject: [PATCH 04/12] refactor --- python/cudf/cudf/pandas/_wrappers/numpy.py | 6 +++--- python/cudf/cudf/pandas/proxy_base.py | 3 --- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index cf8c7d1939a..dacb41dfec6 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -146,9 +146,9 @@ def convert_args_to_fast(inputs): def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs): - if isinstance(ufunc, numba.np.ufunc.dufunc.DUFunc): - inputs = convert_args_to_slow(inputs) - elif isinstance(ufunc, (numpy.ufunc, numpy.vectorize)): + if isinstance( + ufunc, (numpy.ufunc, numpy.vectorize, numba.np.ufunc.dufunc.DUFunc) + ): inputs = convert_args_to_slow(inputs) elif isinstance(ufunc, (cupy.ufunc, cupy.vectorize)): inputs = convert_args_to_fast(inputs) diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index a8fc25101ef..a4d6f903bcc 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -19,7 +19,4 @@ def __new__(cls, input_array, *args, **kwargs): def __array_finalize__(self, obj): if obj is None: return - for attr in list(self.__dict__.keys()): - if attr not in {"__new__", "__array_finalize__"}: - delattr(self, attr) self._fsproxy_wrapped = obj From 40f3e149a7d3e9c4d144b0a4bf0896fcd51099d8 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 26 Aug 2024 15:39:16 -0700 Subject: [PATCH 05/12] monkeypatch np.dot --- python/cudf/cudf/pandas/_wrappers/numpy.py | 29 +++++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index dacb41dfec6..8754864b6ca 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -111,13 +111,34 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): numpy_asarray = numpy.asarray -def asarray(*args, **kwargs): - if is_proxy_object(args[0]): - return numpy_asarray(args[0]._fsproxy_slow, *args[1:], **kwargs) - return numpy_asarray(*args, **kwargs) +def asarray(a, dtype=None, order=None, *, device=None, copy=None, like=None): + if is_proxy_object(a): + return numpy_asarray( + a._fsproxy_slow, dtype, order, device=None, copy=None, like=None + ) + return numpy_asarray(a, dtype, order, device=None, copy=None, like=None) numpy.asarray = asarray +numpy.asarray.__doc__ = numpy_asarray.__doc__ +numpy.asarray.__module__ = numpy_asarray.__module__ + +numpy_dot = numpy.dot + + +def dot(a, b, out=None): + if is_proxy_object(a) and is_proxy_object(b): + return numpy_dot(a._fsproxy_slow, b._fsproxy_slow, out=out) + elif is_proxy_object(a): + return numpy_dot(a._fsproxy_slow, b, out=out) + elif is_proxy_object(b): + return numpy_dot(a, b._fsproxy_slow, out=out) + return numpy_dot(a, b, out=out) + + +numpy.dot = dot +numpy.dot.__doc__ = numpy_dot.__doc__ +numpy.dot.__module__ = numpy_dot.__module__ def convert_args_to_slow(inputs): From 216aeb1fc04b708cd0f3f17d2a19a7ac65156447 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 27 Aug 2024 07:38:48 -0700 Subject: [PATCH 06/12] device is a kwarg --- python/cudf/cudf/pandas/_wrappers/numpy.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 8754864b6ca..83d5cbff94e 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -114,9 +114,9 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): def asarray(a, dtype=None, order=None, *, device=None, copy=None, like=None): if is_proxy_object(a): return numpy_asarray( - a._fsproxy_slow, dtype, order, device=None, copy=None, like=None + a._fsproxy_slow, dtype, order, device=device, copy=copy, like=like ) - return numpy_asarray(a, dtype, order, device=None, copy=None, like=None) + return numpy_asarray(a, dtype, order, device=device, copy=copy, like=like) numpy.asarray = asarray From 1fb43f59ee22f0478bf11e6d8b5b327fce4975c8 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 27 Aug 2024 17:51:18 -0700 Subject: [PATCH 07/12] DtoH on instance creation --- python/cudf/cudf/pandas/_wrappers/numpy.py | 81 ++++------------------ python/cudf/cudf/pandas/proxy_base.py | 21 +++--- 2 files changed, 26 insertions(+), 76 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 83d5cbff94e..c366334b1f5 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -6,11 +6,11 @@ import cupy import cupy._core.flags -import numba import numpy from packaging import version from ..fast_slow_proxy import ( + _fast_slow_function_call, _FastSlowAttribute, is_proxy_object, make_final_proxy_type, @@ -108,74 +108,21 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): return super(cls, cls)._fsproxy_wrap(arr, constructor) -numpy_asarray = numpy.asarray - - -def asarray(a, dtype=None, order=None, *, device=None, copy=None, like=None): - if is_proxy_object(a): - return numpy_asarray( - a._fsproxy_slow, dtype, order, device=device, copy=copy, like=like - ) - return numpy_asarray(a, dtype, order, device=device, copy=copy, like=like) - - -numpy.asarray = asarray -numpy.asarray.__doc__ = numpy_asarray.__doc__ -numpy.asarray.__module__ = numpy_asarray.__module__ - -numpy_dot = numpy.dot - - -def dot(a, b, out=None): - if is_proxy_object(a) and is_proxy_object(b): - return numpy_dot(a._fsproxy_slow, b._fsproxy_slow, out=out) - elif is_proxy_object(a): - return numpy_dot(a._fsproxy_slow, b, out=out) - elif is_proxy_object(b): - return numpy_dot(a, b._fsproxy_slow, out=out) - return numpy_dot(a, b, out=out) - - -numpy.dot = dot -numpy.dot.__doc__ = numpy_dot.__doc__ -numpy.dot.__module__ = numpy_dot.__module__ - - -def convert_args_to_slow(inputs): - new_inputs = [] - for x in inputs: - if isinstance(x, ProxyNDarrayBase): - if hasattr(x._fsproxy_wrapped, "get"): - new_inputs.append(numpy.asarray(x._fsproxy_wrapped.get())) - else: - new_inputs.append(numpy.asarray(x._fsproxy_wrapped)) - else: - new_inputs.append(x) - inputs = tuple(new_inputs) - return inputs - - -def convert_args_to_fast(inputs): - new_inputs = [] - for x in inputs: - if isinstance(x, ProxyNDarrayBase): - new_inputs.append(cupy.asarray(x._fsproxy_wrapped)) - else: - new_inputs.append(x) - inputs = tuple(new_inputs) - return inputs - - def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs): - if isinstance( - ufunc, (numpy.ufunc, numpy.vectorize, numba.np.ufunc.dufunc.DUFunc) + result, _ = _fast_slow_function_call( + getattr(ufunc, method), + *inputs, + **kwargs, + ) + if isinstance(result, tuple): + if is_proxy_object(result[0]) and isinstance( + result[0]._fsproxy_wrapped, numpy.ndarray + ): + return tuple([numpy.asarray(x) for x in result]) + elif is_proxy_object(result) and isinstance( + result._fsproxy_wrapped, numpy.ndarray ): - inputs = convert_args_to_slow(inputs) - elif isinstance(ufunc, (cupy.ufunc, cupy.vectorize)): - inputs = convert_args_to_fast(inputs) - else: - raise TypeError(f"Unrecognized ufunc of type {type(ufunc)}") - result = getattr(ufunc, method)(*inputs, **kwargs) + return numpy.asarray(result) return result diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index a4d6f903bcc..c0f7f8233e4 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -2,19 +2,22 @@ # All rights reserved. # SPDX-License-Identifier: Apache-2.0 +import cupy as cp import numpy as np -numpy_asarray = np.asarray - class ProxyNDarrayBase(np.ndarray): - def __new__(cls, input_array, *args, **kwargs): - obj = super().__new__( - cls, shape=input_array.shape, dtype=input_array.dtype - ) - view = numpy_asarray(obj, dtype=input_array.dtype).view(cls) - - return view + def __new__(cls, arr): + if isinstance(arr, cp.ndarray): + obj = np.asarray(arr.get(), dtype=arr.dtype).view(cls) + return obj + elif isinstance(arr, np.ndarray): + obj = np.asarray(arr, dtype=arr.dtype).view(cls) + return obj + else: + raise TypeError( + "Unsupported array type. Must be numpy.ndarray or cupy.ndarray" + ) def __array_finalize__(self, obj): if obj is None: From 67f28c07f2b969f924036e10febfde9b60281978 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 29 Aug 2024 14:19:17 -0700 Subject: [PATCH 08/12] address review --- python/cudf/cudf/pandas/_wrappers/numpy.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index c366334b1f5..d5e669cb58f 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -118,7 +118,7 @@ def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs): if is_proxy_object(result[0]) and isinstance( result[0]._fsproxy_wrapped, numpy.ndarray ): - return tuple([numpy.asarray(x) for x in result]) + return tuple(numpy.asarray(x) for x in result) elif is_proxy_object(result) and isinstance( result._fsproxy_wrapped, numpy.ndarray ): From 78bc30a1c1d1accaa584e30aa9b9c15cf06ade22 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 3 Sep 2024 13:38:17 -0700 Subject: [PATCH 09/12] cleanup --- python/cudf/cudf/pandas/_wrappers/numpy.py | 20 +++++++++++--------- python/cudf/cudf/pandas/fast_slow_proxy.py | 1 + python/cudf/cudf/pandas/proxy_base.py | 6 +++--- 3 files changed, 15 insertions(+), 12 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index d5e669cb58f..2e02a33c349 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -109,20 +109,22 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs): + if is_proxy_object(inputs[0]) and isinstance(ufunc, numpy.ufunc): + ndarray_type = type(inputs[0]._fsproxy_slow) + inputs = tuple( + x._fsproxy_slow + if is_proxy_object(x) and isinstance(x, numpy.ndarray) + else x + for x in inputs + ) + return ndarray_type.__array_ufunc__( + self, ufunc, method, *inputs, **kwargs + ) result, _ = _fast_slow_function_call( getattr(ufunc, method), *inputs, **kwargs, ) - if isinstance(result, tuple): - if is_proxy_object(result[0]) and isinstance( - result[0]._fsproxy_wrapped, numpy.ndarray - ): - return tuple(numpy.asarray(x) for x in result) - elif is_proxy_object(result) and isinstance( - result._fsproxy_wrapped, numpy.ndarray - ): - return numpy.asarray(result) return result diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index a671d8eb947..2579c92e04b 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -936,6 +936,7 @@ def _fast_slow_function_call( f"The exception was {e}." ) except Exception as err: + print(err) with nvtx.annotate( "EXECUTE_SLOW", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index c0f7f8233e4..dd5eb9663cd 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -9,10 +9,10 @@ class ProxyNDarrayBase(np.ndarray): def __new__(cls, arr): if isinstance(arr, cp.ndarray): - obj = np.asarray(arr.get(), dtype=arr.dtype).view(cls) + obj = arr.get().view(cls) return obj elif isinstance(arr, np.ndarray): - obj = np.asarray(arr, dtype=arr.dtype).view(cls) + obj = arr.view(cls) return obj else: raise TypeError( @@ -22,4 +22,4 @@ def __new__(cls, arr): def __array_finalize__(self, obj): if obj is None: return - self._fsproxy_wrapped = obj + self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", obj) From 8c8bc3e391f31ea4e926d38925d2ab745ce81685 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 4 Sep 2024 07:12:22 -0700 Subject: [PATCH 10/12] address review --- python/cudf/cudf/pandas/_wrappers/numpy.py | 20 +++++++++----------- python/cudf/cudf/pandas/fast_slow_proxy.py | 9 +++++++-- python/cudf/cudf/pandas/proxy_base.py | 9 +++------ 3 files changed, 19 insertions(+), 19 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 2e02a33c349..d5e669cb58f 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -109,22 +109,20 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs): - if is_proxy_object(inputs[0]) and isinstance(ufunc, numpy.ufunc): - ndarray_type = type(inputs[0]._fsproxy_slow) - inputs = tuple( - x._fsproxy_slow - if is_proxy_object(x) and isinstance(x, numpy.ndarray) - else x - for x in inputs - ) - return ndarray_type.__array_ufunc__( - self, ufunc, method, *inputs, **kwargs - ) result, _ = _fast_slow_function_call( getattr(ufunc, method), *inputs, **kwargs, ) + if isinstance(result, tuple): + if is_proxy_object(result[0]) and isinstance( + result[0]._fsproxy_wrapped, numpy.ndarray + ): + return tuple(numpy.asarray(x) for x in result) + elif is_proxy_object(result) and isinstance( + result._fsproxy_wrapped, numpy.ndarray + ): + return numpy.asarray(result) return result diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 2579c92e04b..afa1ce5f86c 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -565,11 +565,17 @@ def _fsproxy_wrap(cls, value, func): _FinalProxy subclasses can override this classmethod if they need particular behaviour when wrapped up. """ + # TODO: Replace the if-elif-else using singledispatch helper function base_class = _get_proxy_base_class(cls) if base_class is object: proxy = base_class.__new__(cls) - else: + elif base_class is ProxyNDarrayBase: proxy = base_class.__new__(cls, value) + else: + raise TypeError( + f"Cannot create an proxy instance of {cls.__name__} using base class {base_class.__name__}. " + f"Expected either 'object' or another type in 'PROXY_BASE_CLASSES'" + ) proxy._fsproxy_wrapped = value return proxy @@ -936,7 +942,6 @@ def _fast_slow_function_call( f"The exception was {e}." ) except Exception as err: - print(err) with nvtx.annotate( "EXECUTE_SLOW", color=_CUDF_PANDAS_NVTX_COLORS["EXECUTE_SLOW"], diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index dd5eb9663cd..6f732834e94 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -9,15 +9,12 @@ class ProxyNDarrayBase(np.ndarray): def __new__(cls, arr): if isinstance(arr, cp.ndarray): - obj = arr.get().view(cls) - return obj - elif isinstance(arr, np.ndarray): - obj = arr.view(cls) - return obj - else: + arr = arr.get() + if not isinstance(arr, np.ndarray): raise TypeError( "Unsupported array type. Must be numpy.ndarray or cupy.ndarray" ) + return np.asarray(arr, dtype=arr.dtype).view(cls) def __array_finalize__(self, obj): if obj is None: From 9e7e3de66e258f86dddfa35f58cd970493f3a7a9 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 4 Sep 2024 15:22:55 -0700 Subject: [PATCH 11/12] test third-party integration tests --- .github/workflows/pr.yaml | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 8730804e8b6..fe07b61d39e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -36,6 +36,7 @@ jobs: - unit-tests-cudf-pandas - pandas-tests - pandas-tests-diff + - third-party-integration-tests-cudf-pandas secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 if: always() @@ -304,3 +305,14 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" + third-party-integration-tests-cudf-pandas: + needs: wheel-build-cudf + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 + with: + build_type: pull-request + node_type: "gpu-v100-latest-1" + arch: "amd64" + container_image: "rapidsai/ci-conda:latest" + run_script: | + ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml From e2047e09a0cb965da6450f724b25e6798bec139c Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 4 Sep 2024 18:20:44 -0700 Subject: [PATCH 12/12] remove pr job --- .github/workflows/pr.yaml | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index fe07b61d39e..8730804e8b6 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -36,7 +36,6 @@ jobs: - unit-tests-cudf-pandas - pandas-tests - pandas-tests-diff - - third-party-integration-tests-cudf-pandas secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.10 if: always() @@ -305,14 +304,3 @@ jobs: node_type: cpu4 build_type: pull-request run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh" - third-party-integration-tests-cudf-pandas: - needs: wheel-build-cudf - secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.10 - with: - build_type: pull-request - node_type: "gpu-v100-latest-1" - arch: "amd64" - container_image: "rapidsai/ci-conda:latest" - run_script: | - ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml