From 9d5491b667e9ed96b19fc38c4014f995f064fff5 Mon Sep 17 00:00:00 2001 From: "Mads R. B. Kristensen" Date: Wed, 9 Oct 2024 13:02:45 +0200 Subject: [PATCH] unpack_from_memoryviews --- .../pylibcudf/pylibcudf/contiguous_split.pxd | 3 ++ .../pylibcudf/pylibcudf/contiguous_split.pyx | 39 ++++++++++++++++++- .../pylibcudf/libcudf/contiguous_split.pxd | 5 +++ .../pylibcudf/tests/test_contiguous_split.py | 9 +++++ 4 files changed, 55 insertions(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/contiguous_split.pxd index 27453126ca8..2a10cb5b3d5 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/contiguous_split.pxd @@ -3,6 +3,7 @@ from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.contiguous_split cimport packed_columns +from .gpumemoryview cimport gpumemoryview from .table cimport Table @@ -15,3 +16,5 @@ cdef class PackedColumns: cpdef PackedColumns pack(Table input) cpdef Table unpack(PackedColumns input) + +cpdef Table unpack_from_memoryviews(memoryview metadata, gpumemoryview gpu_data) diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx index 779c085a577..767f9a7490a 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyx +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx @@ -14,6 +14,7 @@ from pylibcudf.libcudf.table.table_view cimport table_view from .gpumemoryview cimport gpumemoryview from .table cimport Table +from .utils cimport int_to_void_ptr from types import SimpleNamespace @@ -123,5 +124,41 @@ cpdef Table unpack(PackedColumns input): Copy of the packed columns. """ cdef table_view v = cpp_unpack(dereference(input.c_obj)) - cdef unique_ptr[table] t = make_unique[table](v) + cdef unique_ptr[table] t = make_unique[table](v) # Copy + return Table.from_libcudf(move(t)) + + +cpdef Table unpack_from_memoryviews(memoryview metadata, gpumemoryview gpu_data): + """Deserialize the result of `pack`. + + Copies the result of a serialized table into a table. + Contrary to the libcudf C++ function, the returned table is a copy + of the serialized data. + + For details, see :cpp:func:`cudf::unpack`. + + Parameters + ---------- + metadata : memoryview + The packed metadata to unpack. + gpu_data : gpumemoryview + The packed gpu_data to unpack. + + Returns + ------- + Table + Copy of the packed columns. + """ + if metadata.nbytes == 0: + if gpu_data.__cuda_array_interface__["data"][0] != 0: + raise ValueError("expect an empty gpu_data when unpackking an empty table") + return Table.from_libcudf(make_unique[table](table_view())) + + # Extract the raw data pointers + cdef const uint8_t[::1] _metadata = metadata + cdef const uint8_t* metadata_ptr = &_metadata[0] + cdef const uint8_t* gpu_data_ptr = int_to_void_ptr(gpu_data.ptr) + + cdef table_view v = cpp_unpack(metadata_ptr, gpu_data_ptr) + cdef unique_ptr[table] t = make_unique[table](v) # Copy return Table.from_libcudf(move(t)) diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd index cadac6a0022..e19e7fff334 100644 --- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd @@ -26,3 +26,8 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil: cdef packed_columns pack (const table_view& input) except + cdef table_view unpack (const packed_columns& input) except + + + cdef table_view unpack ( + const uint8_t* metadata, + const uint8_t* gpu_data + ) except + diff --git a/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py b/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py index f5746dea640..864189de093 100644 --- a/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py +++ b/python/pylibcudf/pylibcudf/tests/test_contiguous_split.py @@ -1,5 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +import cupy import pyarrow as pa import pylibcudf as plc import pytest @@ -19,5 +20,13 @@ def test_pack_and_unpack(arrow_tbl): plc_tbl = plc.interop.from_arrow(arrow_tbl) packed = plc.contiguous_split.pack(plc_tbl) + res = plc.contiguous_split.unpack(packed) assert_table_eq(arrow_tbl, res) + + # Copy the buffers to simulate IO + metadata = memoryview(bytes(packed.metadata)) + gpu_data = plc.gpumemoryview(cupy.array(packed.gpu_data, copy=True)) + + res = plc.contiguous_split.unpack_from_memoryviews(metadata, gpu_data) + assert_table_eq(arrow_tbl, res)