Skip to content

Commit

Permalink
Remove cuDF dependency from pylibcudf column from_device tests (#16441)
Browse files Browse the repository at this point in the history
This removes the need to `import cudf` in `test_column_from_device` and removes a runtime dependency on numpy in the associated pylibcudf column method.

Authors:
  - https://github.com/brandon-b-miller
  - Thomas Li (https://github.com/lithomas1)

Approvers:
  - Thomas Li (https://github.com/lithomas1)
  - Lawrence Mitchell (https://github.com/wence-)

URL: #16441
  • Loading branch information
brandon-b-miller authored Jul 31, 2024
1 parent dab8660 commit be84225
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 14 deletions.
9 changes: 4 additions & 5 deletions python/cudf/cudf/_lib/pylibcudf/column.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,11 @@ from cudf._lib.pylibcudf.libcudf.types cimport size_type

from .gpumemoryview cimport gpumemoryview
from .scalar cimport Scalar
from .types cimport DataType, type_id
from .types cimport DataType, size_of, type_id
from .utils cimport int_to_bitmask_ptr, int_to_void_ptr

import functools

import numpy as np


cdef class Column:
"""A container of nullable device data as a column of elements.
Expand Down Expand Up @@ -303,14 +301,15 @@ cdef class Column:
raise ValueError("mask not yet supported.")

typestr = iface['typestr'][1:]
data_type = _datatype_from_dtype_desc(typestr)

if not is_c_contiguous(
iface['shape'],
iface['strides'],
np.dtype(typestr).itemsize
size_of(data_type)
):
raise ValueError("Data must be C-contiguous")

data_type = _datatype_from_dtype_desc(typestr)
size = iface['shape'][0]
return Column(
data_type,
Expand Down
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/libcudf/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -98,3 +98,5 @@ cdef extern from "cudf/types.hpp" namespace "cudf" nogil:
HIGHER
MIDPOINT
NEAREST

cdef size_type size_of(data_type t) except +
2 changes: 2 additions & 0 deletions python/cudf/cudf/_lib/pylibcudf/types.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -27,3 +27,5 @@ cdef class DataType:

@staticmethod
cdef DataType from_libcudf(data_type dt)

cpdef size_type size_of(DataType t)
16 changes: 15 additions & 1 deletion python/cudf/cudf/_lib/pylibcudf/types.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,12 @@

from libc.stdint cimport int32_t

from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type, type_id
from cudf._lib.pylibcudf.libcudf.types cimport (
data_type,
size_of as cpp_size_of,
size_type,
type_id,
)
from cudf._lib.pylibcudf.libcudf.utilities.type_dispatcher cimport type_to_id

from cudf._lib.pylibcudf.libcudf.types import type_id as TypeId # no-cython-lint, isort:skip
Expand Down Expand Up @@ -69,6 +74,15 @@ cdef class DataType:
ret.c_obj = dt
return ret

cpdef size_type size_of(DataType t):
"""Returns the size in bytes of elements of the specified data_type.
Only fixed-width types are supported.
For details, see :cpp:func:`size_of`.
"""
with nogil:
return cpp_size_of(t.c_obj)

SIZE_TYPE = DataType(type_to_id[size_type]())
SIZE_TYPE_ID = SIZE_TYPE.id()
39 changes: 31 additions & 8 deletions python/cudf/cudf/pylibcudf_tests/test_column_from_device.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@
import pytest
from utils import assert_column_eq

import cudf
import rmm

from cudf._lib import pylibcudf as plc

VALID_TYPES = [
Expand Down Expand Up @@ -35,17 +36,39 @@ def valid_type(request):
return request.param


class DataBuffer:
def __init__(self, obj, dtype):
self.obj = rmm.DeviceBuffer.to_device(obj)
self.dtype = dtype
self.shape = (int(len(self.obj) / self.dtype.itemsize),)
self.strides = (self.dtype.itemsize,)
self.typestr = self.dtype.str

@property
def __cuda_array_interface__(self):
return {
"data": self.obj.__cuda_array_interface__["data"],
"shape": self.shape,
"strides": self.strides,
"typestr": self.typestr,
"version": 0,
}


@pytest.fixture
def valid_column(valid_type):
def input_column(valid_type):
if valid_type == pa.bool_():
return pa.array([True, False, True], type=valid_type)
return pa.array([1, 2, 3], type=valid_type)


def test_from_cuda_array_interface(valid_column):
col = plc.column.Column.from_cuda_array_interface_obj(
cudf.Series(valid_column)
)
expect = valid_column
@pytest.fixture
def iface_obj(input_column):
data = input_column.to_numpy(zero_copy_only=False)
return DataBuffer(data.view("uint8"), data.dtype)


def test_from_cuda_array_interface(input_column, iface_obj):
col = plc.column.Column.from_cuda_array_interface_obj(iface_obj)

assert_column_eq(expect, col)
assert_column_eq(input_column, col)

0 comments on commit be84225

Please sign in to comment.