Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Make isinstance check pass for proxy ndarrays #16601

Merged
merged 25 commits into from
Sep 5, 2024
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
7136fb9
Make isinstance check pass for proxy ndarrays
Matt711 Aug 19, 2024
dcc806f
make asarray use wrapped array
Matt711 Aug 20, 2024
e7b8948
tackle ufuncs
Matt711 Aug 21, 2024
a1bee53
refactor
Matt711 Aug 22, 2024
9a05ab1
Merge branch 'feat/ndarray-instance-check' of github.com:Matt711/cudf…
Matt711 Aug 22, 2024
59adfe7
Merge branch 'branch-24.10' into feat/ndarray-instance-check
Matt711 Aug 22, 2024
f0c33f9
Merge branch 'branch-24.10' into feat/ndarray-instance-check
Matt711 Aug 26, 2024
9a33199
Merge branch 'feat/ndarray-instance-check' of github.com:Matt711/cudf…
Matt711 Aug 26, 2024
40f3e14
monkeypatch np.dot
Matt711 Aug 26, 2024
216aeb1
device is a kwarg
Matt711 Aug 27, 2024
9134944
Merge branch 'branch-24.10' of github.com:rapidsai/cudf into feat/nda…
Matt711 Aug 27, 2024
1fb43f5
DtoH on instance creation
Matt711 Aug 28, 2024
e78adb0
merge conflicts
Matt711 Aug 29, 2024
ff73c61
Merge branch 'branch-24.10' of github.com:rapidsai/cudf into feat/nda…
Matt711 Aug 29, 2024
67f28c0
address review
Matt711 Aug 29, 2024
4ef0abb
Merge branch 'branch-24.10' of github.com:rapidsai/cudf into feat/nda…
Matt711 Aug 30, 2024
78bc30a
cleanup
Matt711 Sep 3, 2024
a4892be
Merge branch 'branch-24.10' of github.com:rapidsai/cudf into feat/nda…
Matt711 Sep 3, 2024
8c8bc3e
address review
Matt711 Sep 4, 2024
232c9c3
Merge branch 'branch-24.10' into feat/ndarray-instance-check
Matt711 Sep 4, 2024
9e7e3de
test third-party integration tests
Matt711 Sep 4, 2024
9eb8297
Merge branch 'feat/ndarray-instance-check' of github.com:Matt711/cudf…
Matt711 Sep 4, 2024
3eee8d1
Merge branch 'branch-24.10' into feat/ndarray-instance-check
galipremsagar Sep 4, 2024
e2047e0
remove pr job
Matt711 Sep 5, 2024
2ec3ad1
Merge branch 'branch-24.10' into feat/ndarray-instance-check
Matt711 Sep 5, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions .github/workflows/pr.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ jobs:
- unit-tests-cudf-pandas
- pandas-tests
- pandas-tests-diff
- third-party-integration-tests-cudf-pandas
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
if: always()
Expand Down Expand Up @@ -304,3 +305,14 @@ jobs:
node_type: cpu4
build_type: pull-request
run_script: "ci/cudf_pandas_scripts/pandas-tests/diff.sh"
third-party-integration-tests-cudf-pandas:
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
needs: wheel-build-cudf
secrets: inherit
uses: rapidsai/shared-workflows/.github/workflows/[email protected]
with:
build_type: pull-request
node_type: "gpu-v100-latest-1"
arch: "amd64"
container_image: "rapidsai/ci-conda:latest"
run_script: |
ci/cudf_pandas_scripts/third-party-integration/test.sh python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
23 changes: 23 additions & 0 deletions python/cudf/cudf/pandas/_wrappers/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,13 @@
from packaging import version

from ..fast_slow_proxy import (
_fast_slow_function_call,
_FastSlowAttribute,
is_proxy_object,
make_final_proxy_type,
make_intermediate_proxy_type,
)
from ..proxy_base import ProxyNDarrayBase
from .common import (
array_interface,
array_method,
Expand Down Expand Up @@ -105,18 +108,38 @@ def wrap_ndarray(cls, arr: cupy.ndarray | numpy.ndarray, constructor):
return super(cls, cls)._fsproxy_wrap(arr, constructor)


def ndarray__array_ufunc__(self, ufunc, method, *inputs, **kwargs):
result, _ = _fast_slow_function_call(
getattr(ufunc, method),
*inputs,
**kwargs,
)
if isinstance(result, tuple):
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
if is_proxy_object(result[0]) and isinstance(
result[0]._fsproxy_wrapped, numpy.ndarray
):
return tuple(numpy.asarray(x) for x in result)
elif is_proxy_object(result) and isinstance(
result._fsproxy_wrapped, numpy.ndarray
):
return numpy.asarray(result)
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
return result


ndarray = make_final_proxy_type(
"ndarray",
cupy.ndarray,
numpy.ndarray,
fast_to_slow=cupy.ndarray.get,
slow_to_fast=cupy.asarray,
bases=(ProxyNDarrayBase,),
additional_attributes={
"__array__": array_method,
# So that pa.array(wrapped-numpy-array) works
"__arrow_array__": arrow_array_method,
"__cuda_array_interface__": cuda_array_interface,
"__array_interface__": array_interface,
"__array_ufunc__": ndarray__array_ufunc__,
# ndarrays are unhashable
"__hash__": None,
# iter(cupy-array) produces an iterable of zero-dim device
Expand Down
26 changes: 25 additions & 1 deletion python/cudf/cudf/pandas/fast_slow_proxy.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from ..options import _env_get_bool
from ..testing import assert_eq
from .annotation import nvtx
from .proxy_base import ProxyNDarrayBase


def call_operator(fn, args, kwargs):
Expand Down Expand Up @@ -564,7 +565,17 @@ def _fsproxy_wrap(cls, value, func):
_FinalProxy subclasses can override this classmethod if they
need particular behaviour when wrapped up.
"""
proxy = object.__new__(cls)
# TODO: Replace the if-elif-else using singledispatch helper function
base_class = _get_proxy_base_class(cls)
if base_class is object:
proxy = base_class.__new__(cls)
elif base_class is ProxyNDarrayBase:
proxy = base_class.__new__(cls, value)
else:
raise TypeError(
f"Cannot create an proxy instance of {cls.__name__} using base class {base_class.__name__}. "
f"Expected either 'object' or another type in 'PROXY_BASE_CLASSES'"
)
proxy._fsproxy_wrapped = value
return proxy

Expand Down Expand Up @@ -1193,6 +1204,19 @@ def is_proxy_object(obj: Any) -> bool:
return False


def _get_proxy_base_class(cls):
"""Returns the proxy base class if one exists"""
for proxy_class in PROXY_BASE_CLASSES:
if proxy_class in cls.__mro__:
return proxy_class
return object


PROXY_BASE_CLASSES: set[type] = {
ProxyNDarrayBase,
}


NUMPY_TYPES: set[str] = set(np.sctypeDict.values())


Expand Down
22 changes: 22 additions & 0 deletions python/cudf/cudf/pandas/proxy_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# SPDX-FileCopyrightText: Copyright (c) 2024, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

import cupy as cp
import numpy as np


class ProxyNDarrayBase(np.ndarray):
def __new__(cls, arr):
if isinstance(arr, cp.ndarray):
arr = arr.get()
if not isinstance(arr, np.ndarray):
raise TypeError(
"Unsupported array type. Must be numpy.ndarray or cupy.ndarray"
)
return np.asarray(arr, dtype=arr.dtype).view(cls)

def __array_finalize__(self, obj):
if obj is None:
return
self._fsproxy_wrapped = getattr(obj, "_fsproxy_wrapped", obj)
50 changes: 49 additions & 1 deletion python/cudf/cudf_pandas_tests/test_cudf_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,19 @@
import types
from io import BytesIO, StringIO

import cupy as cp
import jupyter_client
import nbformat
import numpy as np
import pyarrow as pa
import pytest
from nbconvert.preprocessors import ExecutePreprocessor
from numba import NumbaDeprecationWarning
from numba import NumbaDeprecationWarning, vectorize
from pytz import utc

from cudf.pandas import LOADED, Profiler
from cudf.pandas.fast_slow_proxy import _Unusable, is_proxy_object
from cudf.testing import assert_eq

if not LOADED:
raise ImportError("These tests must be run with cudf.pandas loaded")
Expand Down Expand Up @@ -1686,3 +1688,49 @@ def test_notebook_slow_repr():
assert (
string in html_result
), f"Expected string {string} not found in the output"


def test_numpy_ndarray_isinstancecheck(array):
arr1, arr2 = array
assert isinstance(arr1, np.ndarray)
assert isinstance(arr2, np.ndarray)


def test_numpy_ndarray_np_ufunc(array):
arr1, arr2 = array

@np.vectorize
def add_one_ufunc(arr):
return arr + 1

assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2))


def test_numpy_ndarray_cp_ufunc(array):
arr1, arr2 = array

@cp.vectorize
def add_one_ufunc(arr):
return arr + 1

assert_eq(add_one_ufunc(cp.asarray(arr1)), add_one_ufunc(arr2))


def test_numpy_ndarray_numba_ufunc(array):
arr1, arr2 = array

@vectorize
def add_one_ufunc(arr):
return arr + 1

assert_eq(add_one_ufunc(arr1), add_one_ufunc(arr2))


def test_numpy_ndarray_numba_cuda_ufunc(array):
arr1, arr2 = array

@vectorize(["int64(int64)"], target="cuda")
def add_one_ufunc(a):
return a + 1

assert_eq(cp.asarray(add_one_ufunc(arr1)), cp.asarray(add_one_ufunc(arr2)))
Matt711 marked this conversation as resolved.
Show resolved Hide resolved
Loading