Skip to content

Commit

Permalink
unpack_from_memoryviews
Browse files Browse the repository at this point in the history
  • Loading branch information
madsbk committed Oct 9, 2024
1 parent e7140fb commit 851e725
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 1 deletion.
3 changes: 3 additions & 0 deletions python/pylibcudf/pylibcudf/contiguous_split.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
from libcpp.memory cimport unique_ptr
from pylibcudf.libcudf.contiguous_split cimport packed_columns

from .gpumemoryview cimport gpumemoryview
from .table cimport Table


Expand All @@ -15,3 +16,5 @@ cdef class PackedColumns:
cpdef PackedColumns pack(Table input)

cpdef Table unpack(PackedColumns input)

cpdef Table unpack_from_memoryviews(memoryview metadata, gpumemoryview gpu_data)
39 changes: 38 additions & 1 deletion python/pylibcudf/pylibcudf/contiguous_split.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view

from .gpumemoryview cimport gpumemoryview
from .table cimport Table
from .utils cimport int_to_void_ptr

from types import SimpleNamespace

Expand Down Expand Up @@ -123,5 +124,41 @@ cpdef Table unpack(PackedColumns input):
Copy of the packed columns.
"""
cdef table_view v = cpp_unpack(dereference(input.c_obj))
cdef unique_ptr[table] t = make_unique[table](v)
cdef unique_ptr[table] t = make_unique[table](v) # Copy
return Table.from_libcudf(move(t))


cpdef Table unpack_from_memoryviews(memoryview metadata, gpumemoryview gpu_data):
"""Deserialize the result of `pack`.
Copies the result of a serialized table into a table.
Contrary to the libcudf C++ function, the returned table is a copy
of the serialized data.
For details, see :cpp:func:`cudf::unpack`.
Parameters
----------
metadata : memoryview
The packed metadata to unpack.
gpu_data : gpumemoryview
The packed gpu_data to unpack.
Returns
-------
Table
Copy of the packed columns.
"""
if metadata.nbytes == 0:
if gpu_data.__cuda_array_interface__["data"][0] != 0:
raise ValueError("expect an empty gpu_data when unpackking an empty table")
return Table.from_libcudf(make_unique[table](table_view()))

# Extract the raw data pointers
cdef const uint8_t[::1] _metadata = metadata
cdef const uint8_t* metadata_ptr = &_metadata[0]
cdef const uint8_t* gpu_data_ptr = <uint8_t*>int_to_void_ptr(gpu_data.ptr)

cdef table_view v = cpp_unpack(metadata_ptr, gpu_data_ptr)
cdef unique_ptr[table] t = make_unique[table](v) # Copy
return Table.from_libcudf(move(t))
5 changes: 5 additions & 0 deletions python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -26,3 +26,8 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil:
cdef packed_columns pack (const table_view& input) except +

cdef table_view unpack (const packed_columns& input) except +

cdef table_view unpack (
const uint8_t* metadata,
const uint8_t* gpu_data
) except +
9 changes: 9 additions & 0 deletions python/pylibcudf/pylibcudf/tests/test_contiguous_split.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

import cupy
import pyarrow as pa
import pylibcudf as plc
import pytest
Expand All @@ -19,5 +20,13 @@
def test_pack_and_unpack(arrow_tbl):
plc_tbl = plc.interop.from_arrow(arrow_tbl)
packed = plc.contiguous_split.pack(plc_tbl)

# Copy the buffers to simulate IO
metadata = memoryview(bytes(packed.metadata))
gpu_data = plc.gpumemoryview(cupy.array(packed.gpu_data, copy=True))

res = plc.contiguous_split.unpack_from_memoryviews(metadata, gpu_data)
assert_table_eq(arrow_tbl, res)

res = plc.contiguous_split.unpack(packed)
assert_table_eq(arrow_tbl, res)

0 comments on commit 851e725

Please sign in to comment.