From 5d8f067a5492c216d1de1894f8e6524dcec9bb99 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 18 Oct 2024 09:23:59 -0700 Subject: [PATCH] migrate PackedColumns --- python/cudf/cudf/_lib/copying.pxd | 6 ++-- python/cudf/cudf/_lib/copying.pyx | 35 +++++++++++++------ .../pylibcudf/pylibcudf/contiguous_split.pyx | 10 +++++- .../pylibcudf/libcudf/contiguous_split.pxd | 1 + 4 files changed, 37 insertions(+), 15 deletions(-) diff --git a/python/cudf/cudf/_lib/copying.pxd b/python/cudf/cudf/_lib/copying.pxd index 14c7d2066d8..f3450fa31a4 100644 --- a/python/cudf/cudf/_lib/copying.pxd +++ b/python/cudf/cudf/_lib/copying.pxd @@ -1,10 +1,10 @@ # Copyright (c) 2021-2024, NVIDIA CORPORATION. -from pylibcudf.libcudf.contiguous_split cimport packed_columns - +# from pylibcudf.libcudf.contiguous_split cimport packed_columns +cimport pylibcudf as plc cdef class _CPackedColumns: - cdef packed_columns c_obj + cdef plc.contiguous_split.PackedColumns c_obj cdef object column_names cdef object column_dtypes cdef object index_names diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx index 30353c4be6c..265b92ff645 100644 --- a/python/cudf/cudf/_lib/copying.pyx +++ b/python/cudf/cudf/_lib/copying.pyx @@ -38,6 +38,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport size_type from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_table_view +import pylibcudf as plc # workaround for https://github.com/cython/cython/issues/3885 ctypedef const scalar constscalar @@ -376,11 +377,13 @@ cdef class _CPackedColumns: or input_table.index.stop != len(input_table) or input_table.index.step != 1 ): - input_table_view = table_view_from_table(input_table) + # input_table_view = table_view_from_table(input_table) + columns = input_table._index._columns + input_table._columns p.index_names = input_table._index_names else: - input_table_view = table_view_from_table( - input_table, ignore_index=True) + # input_table_view = table_view_from_table( + # input_table, ignore_index=True) + columns = input_table._columns p.column_names = input_table._column_names p.column_dtypes = {} @@ -388,17 +391,24 @@ cdef class _CPackedColumns: if isinstance(col.dtype, cudf.core.dtypes._BaseDtype): p.column_dtypes[name] = col.dtype - p.c_obj = move(cpp_contiguous_split.pack(input_table_view)) + # p.c_obj = move(cpp_contiguous_split.pack(input_table_view)) + p.c_obj = plc.contigous_split.pack( + pylibcudf.Table( + [ + col.to_pylibcudf(mode="read") for col in columns + ] + ) + ) return p @property def gpu_data_ptr(self): - return int(self.c_obj.gpu_data.get()[0].data()) + return self.c_obj.gpu_data_ptr @property def gpu_data_size(self): - return int(self.c_obj.gpu_data.get()[0].size()) + return self.c_obj.gpu_data_size def serialize(self): header = {} @@ -416,10 +426,10 @@ cdef class _CPackedColumns: header["column-names"] = self.column_names header["index-names"] = self.index_names - if self.c_obj.metadata.get()[0].data() != NULL: + if self.c_obj.c_obj.get().metadata.get()[0].data() != NULL: header["metadata"] = list( - - self.c_obj.metadata.get()[0].data() + + self.c_obj.c_obj.get().metadata.get()[0].data() ) column_dtypes = {} @@ -453,7 +463,10 @@ cdef class _CPackedColumns: ) data.gpu_data = move(dbuf.c_obj) - p.c_obj = move(data) + # p.c_obj = move(data) + p.c_obj = plc.contiguous_split.PackedColumns.from_libcudf( + move(unique_ptr[cpp_contiguous_split.packed_columns](&data)) + ) p.column_names = header["column-names"] p.index_names = header["index-names"] @@ -469,7 +482,7 @@ cdef class _CPackedColumns: def unpack(self): output_table = cudf.DataFrame._from_data(*data_from_table_view( - cpp_contiguous_split.unpack(self.c_obj), + plc.contigous_split.unpack(self.c_obj).view(), self, self.column_names, self.index_names diff --git a/python/pylibcudf/pylibcudf/contiguous_split.pyx b/python/pylibcudf/pylibcudf/contiguous_split.pyx index ed926a3fcc0..f213b9fce48 100644 --- a/python/pylibcudf/pylibcudf/contiguous_split.pyx +++ b/python/pylibcudf/pylibcudf/contiguous_split.pyx @@ -1,7 +1,7 @@ # Copyright (c) 2024, NVIDIA CORPORATION. from cython.operator cimport dereference -from libc.stdint cimport uint8_t +from libc.stdint cimport uint8_t, uintptr_t from libcpp.memory cimport make_unique, unique_ptr from libcpp.utility cimport move from libcpp.vector cimport vector @@ -100,6 +100,14 @@ cdef class PackedColumns: DeviceBuffer.c_from_unique_ptr(move(dereference(self.c_obj).gpu_data)) ) ) + + @property + def gpu_data_ptr(self): + return int(self.c_obj.get().gpu_data.get()[0].data()) + + @property + def gpu_data_size(self): + return int(self.c_obj.get().gpu_data.get()[0].size()) cpdef PackedColumns pack(Table input): diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd index 9bb5e0725d1..dec26cd7090 100644 --- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd @@ -2,6 +2,7 @@ from libc.stdint cimport uint8_t from libcpp.memory cimport unique_ptr +from libcpp cimport bool from libcpp.vector cimport vector from pylibcudf.libcudf.table.table_view cimport table_view from pylibcudf.libcudf.types cimport size_type