From 365e1b0bab714f9045dce0d4c4ac7166eebc5be2 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 16 Jul 2024 09:19:24 -0700 Subject: [PATCH 1/6] merge conflict --- python/cudf/cudf/pandas/_wrappers/numpy.py | 2 ++ python/cudf/cudf/pandas/fast_slow_proxy.py | 23 ++++++++++++++++++++-- 2 files changed, 23 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 3b012169676..051ed76a0fb 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -10,6 +10,7 @@ import numpy.core.multiarray from ..fast_slow_proxy import ( + ProxyNDarray, _FastSlowAttribute, make_final_proxy_type, make_intermediate_proxy_type, @@ -111,6 +112,7 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): numpy.ndarray, fast_to_slow=cupy.ndarray.get, slow_to_fast=cupy.asarray, + bases=(ProxyNDarray,), additional_attributes={ "__array__": array_method, # So that pa.array(wrapped-numpy-array) works diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index dfb729cae6b..1b5ffb0447c 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -14,6 +14,7 @@ from enum import IntEnum from typing import Any, Callable, Literal, Mapping +import cupy as cp import numpy as np from ..options import _env_get_bool @@ -47,6 +48,20 @@ def call_operator(fn, args, kwargs): ) +class ProxyNDarray(np.ndarray): + def __new__(cls, arr): + if isinstance(arr, cp.ndarray): + obj = np.asarray(arr.get()).view(cls) + return obj + elif isinstance(arr, np.ndarray): + obj = np.asarray(arr).view(cls) + return obj + else: + raise TypeError( + "Unsupported array type. Must be numpy.ndarray or cupy.ndarray" + ) + + def callers_module_name(): # Call f_back twice since this function adds an extra frame return inspect.currentframe().f_back.f_back.f_globals["__name__"] @@ -564,8 +579,12 @@ def _fsproxy_wrap(cls, value, func): _FinalProxy subclasses can override this classmethod if they need particular behaviour when wrapped up. """ - proxy = object.__new__(cls) - proxy._fsproxy_wrapped = value + if np.ndarray in cls.__mro__: + proxy = ProxyNDarray.__new__(cls, value) + proxy._fsproxy_wrapped = value + else: + proxy = object.__new__(cls) + proxy._fsproxy_wrapped = value return proxy def __reduce__(self): From b21a47aa0fd9569aeed5bf1614a22213cd76ef69 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 16 Jul 2024 16:19:22 -0700 Subject: [PATCH 2/6] Add __array_finalize__ to ProxyNDarrayBase --- python/cudf/cudf/pandas/_wrappers/numpy.py | 4 ++-- python/cudf/cudf/pandas/fast_slow_proxy.py | 7 +++++-- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index 051ed76a0fb..a720c20122e 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -10,7 +10,7 @@ import numpy.core.multiarray from ..fast_slow_proxy import ( - ProxyNDarray, + ProxyNDarrayBase, _FastSlowAttribute, make_final_proxy_type, make_intermediate_proxy_type, @@ -112,7 +112,7 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): numpy.ndarray, fast_to_slow=cupy.ndarray.get, slow_to_fast=cupy.asarray, - bases=(ProxyNDarray,), + bases=(ProxyNDarrayBase,), additional_attributes={ "__array__": array_method, # So that pa.array(wrapped-numpy-array) works diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 1b5ffb0447c..d03492564dc 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -48,7 +48,7 @@ def call_operator(fn, args, kwargs): ) -class ProxyNDarray(np.ndarray): +class ProxyNDarrayBase(np.ndarray): def __new__(cls, arr): if isinstance(arr, cp.ndarray): obj = np.asarray(arr.get()).view(cls) @@ -61,6 +61,9 @@ def __new__(cls, arr): "Unsupported array type. Must be numpy.ndarray or cupy.ndarray" ) + def __array_finalize__(self, obj): + self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", None) + def callers_module_name(): # Call f_back twice since this function adds an extra frame @@ -580,7 +583,7 @@ def _fsproxy_wrap(cls, value, func): need particular behaviour when wrapped up. """ if np.ndarray in cls.__mro__: - proxy = ProxyNDarray.__new__(cls, value) + proxy = ProxyNDarrayBase.__new__(cls, value) proxy._fsproxy_wrapped = value else: proxy = object.__new__(cls) From f1e6670996f519ccc6d0caa57480e7466235dc0e Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Wed, 17 Jul 2024 14:23:08 -0700 Subject: [PATCH 3/6] Add __array_ufunc__ to proxy array --- python/cudf/cudf/pandas/_wrappers/numpy.py | 3 ++- python/cudf/cudf/pandas/fast_slow_proxy.py | 19 +----------------- python/cudf/cudf/pandas/proxy_base.py | 23 ++++++++++++++++++++++ 3 files changed, 26 insertions(+), 19 deletions(-) create mode 100644 python/cudf/cudf/pandas/proxy_base.py diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index a720c20122e..eabea9713f1 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -10,11 +10,11 @@ import numpy.core.multiarray from ..fast_slow_proxy import ( - ProxyNDarrayBase, _FastSlowAttribute, make_final_proxy_type, make_intermediate_proxy_type, ) +from ..proxy_base import ProxyNDarrayBase from .common import ( array_interface, array_method, @@ -119,6 +119,7 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): "__arrow_array__": arrow_array_method, "__cuda_array_interface__": cuda_array_interface, "__array_interface__": array_interface, + "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), # ndarrays are unhashable "__hash__": None, # iter(cupy-array) produces an iterable of zero-dim device diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index d03492564dc..6e1693bf083 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -14,12 +14,12 @@ from enum import IntEnum from typing import Any, Callable, Literal, Mapping -import cupy as cp import numpy as np from ..options import _env_get_bool from ..testing import assert_eq from .annotation import nvtx +from .proxy_base import ProxyNDarrayBase def call_operator(fn, args, kwargs): @@ -48,23 +48,6 @@ def call_operator(fn, args, kwargs): ) -class ProxyNDarrayBase(np.ndarray): - def __new__(cls, arr): - if isinstance(arr, cp.ndarray): - obj = np.asarray(arr.get()).view(cls) - return obj - elif isinstance(arr, np.ndarray): - obj = np.asarray(arr).view(cls) - return obj - else: - raise TypeError( - "Unsupported array type. Must be numpy.ndarray or cupy.ndarray" - ) - - def __array_finalize__(self, obj): - self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", None) - - def callers_module_name(): # Call f_back twice since this function adds an extra frame return inspect.currentframe().f_back.f_back.f_globals["__name__"] diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py new file mode 100644 index 00000000000..61d9cde127c --- /dev/null +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -0,0 +1,23 @@ +# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES. +# All rights reserved. +# SPDX-License-Identifier: Apache-2.0 + +import cupy as cp +import numpy as np + + +class ProxyNDarrayBase(np.ndarray): + def __new__(cls, arr): + if isinstance(arr, cp.ndarray): + obj = np.asarray(arr.get()).view(cls) + return obj + elif isinstance(arr, np.ndarray): + obj = np.asarray(arr).view(cls) + return obj + else: + raise TypeError( + "Unsupported array type. Must be numpy.ndarray or cupy.ndarray" + ) + + def __array_finalize__(self, obj): + self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", None) From 108895b4f88b34911f5ae9a277ec11dc400c07d3 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 19 Jul 2024 12:19:37 -0700 Subject: [PATCH 4/6] ensure __array_ufunc__ returns a real numpy array --- python/cudf/cudf/pandas/_wrappers/numpy.py | 1 - python/cudf/cudf/pandas/proxy_base.py | 4 ++++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index eabea9713f1..f8254109bd0 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -119,7 +119,6 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): "__arrow_array__": arrow_array_method, "__cuda_array_interface__": cuda_array_interface, "__array_interface__": array_interface, - "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), # ndarrays are unhashable "__hash__": None, # iter(cupy-array) produces an iterable of zero-dim device diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index 61d9cde127c..6af37c4d1ba 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -21,3 +21,7 @@ def __new__(cls, arr): def __array_finalize__(self, obj): self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", None) + + def __array_ufunc__(self, *args, **kwargs): + args = (args[0], args[1], np.asarray(args[2]), np.asarray(args[3])) + return super().__array_ufunc__(*args, **kwargs) From 4bc0d950f20bcf5554647614b0b2ba271a91fe40 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 13 Aug 2024 06:38:10 -0700 Subject: [PATCH 5/6] Add a test and TODO --- python/cudf/cudf/pandas/fast_slow_proxy.py | 18 ++++++++++++++++++ .../cudf/cudf_pandas_tests/test_cudf_pandas.py | 8 ++++++++ 2 files changed, 26 insertions(+) diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 53b0d8bf720..1bea9b91a7f 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -565,6 +565,7 @@ def _fsproxy_wrap(cls, value, func): _FinalProxy subclasses can override this classmethod if they need particular behaviour when wrapped up. """ + # TODO: Use _has_proxy_base_class to perform the check if np.ndarray in cls.__mro__: proxy = ProxyNDarrayBase.__new__(cls, value) proxy._fsproxy_wrapped = value @@ -1198,6 +1199,23 @@ def is_proxy_object(obj: Any) -> bool: return False +def _has_proxy_base_class(cls): + """Determine if an object is proxy object + + Parameters + ---------- + cls : type + The type to check. + + """ + return any(base in cls.__mro__ for base in PROXY_BASE_CLASSES) + + +PROXY_BASE_CLASSES: set[type] = { + ProxyNDarrayBase, +} + + NUMPY_TYPES: set[str] = set(np.sctypeDict.values()) diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py index 6292022d8e4..e5483fff913 100644 --- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py +++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py @@ -1632,3 +1632,11 @@ def test_change_index_name(index): assert s.index.name == name assert df.index.name == name + + +def test_numpy_ndarray_isinstancecheck(series): + s1, s2 = series + arr1 = s1.values + arr2 = s2.values + assert isinstance(arr1, np.ndarray) + assert isinstance(arr2, np.ndarray) From 23365d24c855a0fb419b8e8ad6fe6873fd914107 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Thu, 15 Aug 2024 17:17:46 -0700 Subject: [PATCH 6/6] Address review --- python/cudf/cudf/pandas/_wrappers/numpy.py | 1 + python/cudf/cudf/pandas/fast_slow_proxy.py | 27 +++++++++------------- python/cudf/cudf/pandas/proxy_base.py | 4 ---- 3 files changed, 12 insertions(+), 20 deletions(-) diff --git a/python/cudf/cudf/pandas/_wrappers/numpy.py b/python/cudf/cudf/pandas/_wrappers/numpy.py index f8254109bd0..eabea9713f1 100644 --- a/python/cudf/cudf/pandas/_wrappers/numpy.py +++ b/python/cudf/cudf/pandas/_wrappers/numpy.py @@ -119,6 +119,7 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor): "__arrow_array__": arrow_array_method, "__cuda_array_interface__": cuda_array_interface, "__array_interface__": array_interface, + "__array_ufunc__": _FastSlowAttribute("__array_ufunc__"), # ndarrays are unhashable "__hash__": None, # iter(cupy-array) produces an iterable of zero-dim device diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py index 1bea9b91a7f..61aa6310082 100644 --- a/python/cudf/cudf/pandas/fast_slow_proxy.py +++ b/python/cudf/cudf/pandas/fast_slow_proxy.py @@ -565,13 +565,12 @@ def _fsproxy_wrap(cls, value, func): _FinalProxy subclasses can override this classmethod if they need particular behaviour when wrapped up. """ - # TODO: Use _has_proxy_base_class to perform the check - if np.ndarray in cls.__mro__: - proxy = ProxyNDarrayBase.__new__(cls, value) - proxy._fsproxy_wrapped = value + base_class = _get_proxy_base_class(cls) + if base_class is object: + proxy = base_class.__new__(cls) else: - proxy = object.__new__(cls) - proxy._fsproxy_wrapped = value + proxy = base_class.__new__(cls, value) + proxy._fsproxy_wrapped = value return proxy def __reduce__(self): @@ -1199,16 +1198,12 @@ def is_proxy_object(obj: Any) -> bool: return False -def _has_proxy_base_class(cls): - """Determine if an object is proxy object - - Parameters - ---------- - cls : type - The type to check. - - """ - return any(base in cls.__mro__ for base in PROXY_BASE_CLASSES) +def _get_proxy_base_class(cls): + """Returns the proxy base class if one exists""" + for proxy_class in PROXY_BASE_CLASSES: + if proxy_class in cls.__mro__: + return proxy_class + return object PROXY_BASE_CLASSES: set[type] = { diff --git a/python/cudf/cudf/pandas/proxy_base.py b/python/cudf/cudf/pandas/proxy_base.py index 6af37c4d1ba..61d9cde127c 100644 --- a/python/cudf/cudf/pandas/proxy_base.py +++ b/python/cudf/cudf/pandas/proxy_base.py @@ -21,7 +21,3 @@ def __new__(cls, arr): def __array_finalize__(self, obj): self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", None) - - def __array_ufunc__(self, *args, **kwargs): - args = (args[0], args[1], np.asarray(args[2]), np.asarray(args[3])) - return super().__array_ufunc__(*args, **kwargs)