Skip to content

Commit

Permalink
migrate PackedColumns
Browse files Browse the repository at this point in the history
  • Loading branch information
Matt711 committed Oct 18, 2024
1 parent 62e8247 commit 5d8f067
Show file tree
Hide file tree
Showing 4 changed files with 37 additions and 15 deletions.
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/copying.pxd
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from pylibcudf.libcudf.contiguous_split cimport packed_columns

# from pylibcudf.libcudf.contiguous_split cimport packed_columns
cimport pylibcudf as plc

cdef class _CPackedColumns:
cdef packed_columns c_obj
cdef plc.contiguous_split.PackedColumns c_obj
cdef object column_names
cdef object column_dtypes
cdef object index_names
35 changes: 24 additions & 11 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type

from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_table_view
import pylibcudf as plc

# workaround for https://github.com/cython/cython/issues/3885
ctypedef const scalar constscalar
Expand Down Expand Up @@ -376,29 +377,38 @@ cdef class _CPackedColumns:
or input_table.index.stop != len(input_table)
or input_table.index.step != 1
):
input_table_view = table_view_from_table(input_table)
# input_table_view = table_view_from_table(input_table)
columns = input_table._index._columns + input_table._columns
p.index_names = input_table._index_names
else:
input_table_view = table_view_from_table(
input_table, ignore_index=True)
# input_table_view = table_view_from_table(
# input_table, ignore_index=True)
columns = input_table._columns

p.column_names = input_table._column_names
p.column_dtypes = {}
for name, col in input_table._column_labels_and_values:
if isinstance(col.dtype, cudf.core.dtypes._BaseDtype):
p.column_dtypes[name] = col.dtype

p.c_obj = move(cpp_contiguous_split.pack(input_table_view))
# p.c_obj = move(cpp_contiguous_split.pack(input_table_view))
p.c_obj = plc.contigous_split.pack(
pylibcudf.Table(
[
col.to_pylibcudf(mode="read") for col in columns
]
)
)

return p

@property
def gpu_data_ptr(self):
return int(<uintptr_t>self.c_obj.gpu_data.get()[0].data())
return self.c_obj.gpu_data_ptr

@property
def gpu_data_size(self):
return int(<size_t>self.c_obj.gpu_data.get()[0].size())
return self.c_obj.gpu_data_size

def serialize(self):
header = {}
Expand All @@ -416,10 +426,10 @@ cdef class _CPackedColumns:

header["column-names"] = self.column_names
header["index-names"] = self.index_names
if self.c_obj.metadata.get()[0].data() != NULL:
if self.c_obj.c_obj.get().metadata.get()[0].data() != NULL:
header["metadata"] = list(
<uint8_t[:self.c_obj.metadata.get()[0].size()]>
self.c_obj.metadata.get()[0].data()
<uint8_t[:self.c_obj.c_obj.get().metadata.get()[0].size()]>
self.c_obj.c_obj.get().metadata.get()[0].data()
)

column_dtypes = {}
Expand Down Expand Up @@ -453,7 +463,10 @@ cdef class _CPackedColumns:
)
data.gpu_data = move(dbuf.c_obj)

p.c_obj = move(data)
# p.c_obj = move(data)
p.c_obj = plc.contiguous_split.PackedColumns.from_libcudf(
move(unique_ptr[cpp_contiguous_split.packed_columns](&data))
)
p.column_names = header["column-names"]
p.index_names = header["index-names"]

Expand All @@ -469,7 +482,7 @@ cdef class _CPackedColumns:

def unpack(self):
output_table = cudf.DataFrame._from_data(*data_from_table_view(
cpp_contiguous_split.unpack(self.c_obj),
plc.contigous_split.unpack(self.c_obj).view(),
self,
self.column_names,
self.index_names
Expand Down
10 changes: 9 additions & 1 deletion python/pylibcudf/pylibcudf/contiguous_split.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libc.stdint cimport uint8_t
from libc.stdint cimport uint8_t, uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector
Expand Down Expand Up @@ -100,6 +100,14 @@ cdef class PackedColumns:
DeviceBuffer.c_from_unique_ptr(move(dereference(self.c_obj).gpu_data))
)
)

@property
def gpu_data_ptr(self):
return int(<uintptr_t>self.c_obj.get().gpu_data.get()[0].data())

@property
def gpu_data_size(self):
return int(<size_t>self.c_obj.get().gpu_data.get()[0].size())


cpdef PackedColumns pack(Table input):
Expand Down
1 change: 1 addition & 0 deletions python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from libc.stdint cimport uint8_t
from libcpp.memory cimport unique_ptr
from libcpp cimport bool
from libcpp.vector cimport vector
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport size_type
Expand Down

0 comments on commit 5d8f067

Please sign in to comment.