Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] Implement contiguous_split in pylibcudf #16953

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions python/cudf/cudf/_lib/copying.pxd
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
# Copyright (c) 2021-2024, NVIDIA CORPORATION.

from pylibcudf.libcudf.contiguous_split cimport packed_columns

# from pylibcudf.libcudf.contiguous_split cimport packed_columns
cimport pylibcudf as plc

cdef class _CPackedColumns:
cdef packed_columns c_obj
cdef plc.contiguous_split.PackedColumns c_obj
cdef object column_names
cdef object column_dtypes
cdef object index_names
35 changes: 24 additions & 11 deletions python/cudf/cudf/_lib/copying.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
from pylibcudf.libcudf.types cimport size_type

from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_table_view
import pylibcudf as plc

# workaround for https://github.com/cython/cython/issues/3885
ctypedef const scalar constscalar
Expand Down Expand Up @@ -352,29 +353,38 @@ cdef class _CPackedColumns:
or input_table.index.stop != len(input_table)
or input_table.index.step != 1
):
input_table_view = table_view_from_table(input_table)
# input_table_view = table_view_from_table(input_table)
columns = input_table._index._columns + input_table._columns
p.index_names = input_table._index_names
else:
input_table_view = table_view_from_table(
input_table, ignore_index=True)
# input_table_view = table_view_from_table(
# input_table, ignore_index=True)
columns = input_table._columns

p.column_names = input_table._column_names
p.column_dtypes = {}
for name, col in input_table._column_labels_and_values:
if isinstance(col.dtype, cudf.core.dtypes._BaseDtype):
p.column_dtypes[name] = col.dtype

p.c_obj = move(cpp_contiguous_split.pack(input_table_view))
# p.c_obj = move(cpp_contiguous_split.pack(input_table_view))
p.c_obj = plc.contigous_split.pack(
pylibcudf.Table(
[
col.to_pylibcudf(mode="read") for col in columns
]
)
)

return p

@property
def gpu_data_ptr(self):
return int(<uintptr_t>self.c_obj.gpu_data.get()[0].data())
return self.c_obj.gpu_data_ptr

@property
def gpu_data_size(self):
return int(<size_t>self.c_obj.gpu_data.get()[0].size())
return self.c_obj.gpu_data_size

def serialize(self):
header = {}
Expand All @@ -392,10 +402,10 @@ cdef class _CPackedColumns:

header["column-names"] = self.column_names
header["index-names"] = self.index_names
if self.c_obj.metadata.get()[0].data() != NULL:
if self.c_obj.c_obj.get().metadata.get()[0].data() != NULL:
header["metadata"] = list(
<uint8_t[:self.c_obj.metadata.get()[0].size()]>
self.c_obj.metadata.get()[0].data()
<uint8_t[:self.c_obj.c_obj.get().metadata.get()[0].size()]>
self.c_obj.c_obj.get().metadata.get()[0].data()
)

column_dtypes = {}
Expand Down Expand Up @@ -429,7 +439,10 @@ cdef class _CPackedColumns:
)
data.gpu_data = move(dbuf.c_obj)

p.c_obj = move(data)
# p.c_obj = move(data)
p.c_obj = plc.contiguous_split.PackedColumns.from_libcudf(
move(unique_ptr[cpp_contiguous_split.packed_columns](&data))
)
p.column_names = header["column-names"]
p.index_names = header["index-names"]

Expand All @@ -445,7 +458,7 @@ cdef class _CPackedColumns:

def unpack(self):
output_table = cudf.DataFrame._from_data(*data_from_table_view(
cpp_contiguous_split.unpack(self.c_obj),
plc.contigous_split.unpack(self.c_obj).view(),
self,
self.column_names,
self.index_names
Expand Down
10 changes: 9 additions & 1 deletion python/pylibcudf/pylibcudf/contiguous_split.pyx
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# Copyright (c) 2024, NVIDIA CORPORATION.

from cython.operator cimport dereference
from libc.stdint cimport uint8_t
from libc.stdint cimport uint8_t, uintptr_t
from libcpp.memory cimport make_unique, unique_ptr
from libcpp.utility cimport move
from libcpp.vector cimport vector
Expand Down Expand Up @@ -100,6 +100,14 @@ cdef class PackedColumns:
DeviceBuffer.c_from_unique_ptr(move(dereference(self.c_obj).gpu_data))
)
)

@property
def gpu_data_ptr(self):
return int(<uintptr_t>self.c_obj.get().gpu_data.get()[0].data())

@property
def gpu_data_size(self):
return int(<size_t>self.c_obj.get().gpu_data.get()[0].size())


cpdef PackedColumns pack(Table input):
Expand Down
18 changes: 15 additions & 3 deletions python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

from libc.stdint cimport uint8_t
from libcpp.memory cimport unique_ptr
from libcpp cimport bool
from libcpp.vector cimport vector
from pylibcudf.libcudf.table.table_view cimport table_view
from pylibcudf.libcudf.types cimport size_type
Expand All @@ -14,11 +15,22 @@ cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil:
unique_ptr[vector[uint8_t]] metadata
unique_ptr[device_buffer] gpu_data

cdef struct contiguous_split_result:
cdef cppclass packed_table:
table_view table
vector[device_buffer] all_data
packed_columns data

cdef vector[contiguous_split_result] contiguous_split (
cdef cppclass chunked_pack:
size_type get_total_contiguous_size() except +
bool has_next() except +
unique_ptr[vector[uint8_t]] build_metadata() except +

@staticmethod
unique_ptr[vector[chunked_pack]] create(
table_view table,
size_type buffer_size,
) except +

cdef vector[packed_table] contiguous_split (
table_view input_table,
vector[size_type] splits
) except +
Expand Down
Loading