diff --git a/python/pylibcudf/pylibcudf/io/parquet.pxd b/python/pylibcudf/pylibcudf/io/parquet.pxd index 3f95e90e204..e19d61e4d1d 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pxd +++ b/python/pylibcudf/pylibcudf/io/parquet.pxd @@ -55,15 +55,15 @@ cdef class ParquetWriterOptions: cpdef void set_column_chunks_file_paths(self, list file_paths) - cpdef void set_row_group_size_bytes(self, int size_bytes) + cpdef void set_row_group_size_bytes(self, size_t size_bytes) - cpdef void set_row_group_size_rows(self, int size_rows) + cpdef void set_row_group_size_rows(self, size_type size_rows) - cpdef void set_max_page_size_bytes(self, int size_bytes) + cpdef void set_max_page_size_bytes(self, size_t size_bytes) - cpdef void set_max_page_size_rows(self, int size_rows) + cpdef void set_max_page_size_rows(self, size_type size_rows) - cpdef void set_max_dictionary_size(self, int size_rows) + cpdef void set_max_dictionary_size(self, size_t size_rows) cdef class ParquetWriterOptionsBuilder: cdef parquet_writer_options_builder c_obj diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx index 7218eb4ab71..18ab3429431 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyx +++ b/python/pylibcudf/pylibcudf/io/parquet.pyx @@ -310,7 +310,7 @@ cdef class ParquetWriterOptions: """ self.c_obj.set_column_chunks_file_paths([fp.encode() for fp in file_paths]) - cpdef void set_row_group_size_bytes(self, int size_bytes): + cpdef void set_row_group_size_bytes(self, size_t size_bytes): """ Sets the maximum row group size, in bytes. @@ -325,7 +325,7 @@ cdef class ParquetWriterOptions: """ self.c_obj.set_row_group_size_bytes(size_bytes) - cpdef void set_row_group_size_rows(self, int size_rows): + cpdef void set_row_group_size_rows(self, size_type size_rows): """ Sets the maximum row group size, in rows. @@ -340,7 +340,7 @@ cdef class ParquetWriterOptions: """ self.c_obj.set_row_group_size_rows(size_rows) - cpdef void set_max_page_size_bytes(self, int size_bytes): + cpdef void set_max_page_size_bytes(self, size_t size_bytes): """ Sets the maximum uncompressed page size, in bytes. @@ -355,7 +355,7 @@ cdef class ParquetWriterOptions: """ self.c_obj.set_max_page_size_bytes(size_bytes) - cpdef void set_max_page_size_rows(self, int size_rows): + cpdef void set_max_page_size_rows(self, size_type size_rows): """ Sets the maximum page size, in rows. @@ -370,7 +370,7 @@ cdef class ParquetWriterOptions: """ self.c_obj.set_max_page_size_rows(size_rows) - cpdef void set_max_dictionary_size(self, int size_rows): + cpdef void set_max_dictionary_size(self, size_t size_rows): """ Sets the maximum dictionary size, in bytes. @@ -539,7 +539,7 @@ cdef class ParquetWriterOptionsBuilder: cpdef ParquetWriterOptions build(self): """ - Options member once it's built + Create a ParquetWriterOptions from the set options. Returns ------- diff --git a/python/pylibcudf/pylibcudf/io/types.pxd b/python/pylibcudf/pylibcudf/io/types.pxd index 7340697e1e7..90b43cf0ff5 100644 --- a/python/pylibcudf/pylibcudf/io/types.pxd +++ b/python/pylibcudf/pylibcudf/io/types.pxd @@ -1,4 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from libc.stdint cimport uint8_t, int32_t from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.vector cimport vector @@ -19,6 +20,7 @@ from pylibcudf.libcudf.io.types cimport ( table_metadata, table_with_metadata, ) +from pylibcudf.libcudf.types cimport size_type from pylibcudf.table cimport Table @@ -41,13 +43,13 @@ cdef class ColumnInMetadata: cpdef ColumnInMetadata set_int96_timestamps(self, bool req) - cpdef ColumnInMetadata set_decimal_precision(self, int req) + cpdef ColumnInMetadata set_decimal_precision(self, uint8_t req) - cpdef ColumnInMetadata child(self, int i) + cpdef ColumnInMetadata child(self, size_type i) cpdef ColumnInMetadata set_output_as_binary(self, bool binary) - cpdef ColumnInMetadata set_type_length(self, int type_length) + cpdef ColumnInMetadata set_type_length(self, int32_t type_length) cpdef ColumnInMetadata set_skip_compression(self, bool skip) diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi index ebaaf054e66..04f276cfeee 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyi +++ b/python/pylibcudf/pylibcudf/io/types.pyi @@ -65,6 +65,19 @@ class PartitionInfo: class TableInputMetadata: def __init__(self, table: Table): ... +class ColumnInMetadata: + def set_name(self, name: str) -> ColumnInMetadata: ... + def set_nullability(self, nullable: bool) -> ColumnInMetadata: ... + def set_list_column_as_map(self) -> ColumnInMetadata: ... + def set_int96_timestamps(self, req: bool) -> ColumnInMetadata: ... + def set_decimal_precision(self, precision: int) -> ColumnInMetadata: ... + def child(self, i: int) -> ColumnInMetadata: ... + def set_output_as_binary(self, binary: bool) -> ColumnInMetadata: ... + def set_type_length(self, type_length: int) -> ColumnInMetadata: ... + def set_skip_compression(self, skip: bool) -> ColumnInMetadata: ... + def set_encoding(self, encoding: ColumnEncoding) -> ColumnInMetadata: ... + def get_name(self) -> str: ... + class TableWithMetadata: tbl: Table def __init__( diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 80542059aed..f4bff285d68 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -2,6 +2,7 @@ from cpython.buffer cimport PyBUF_READ from cpython.memoryview cimport PyMemoryView_FromMemory +from libc.stdint cimport uint8_t, int32_t from libcpp cimport bool from libcpp.memory cimport unique_ptr from libcpp.string cimport string @@ -20,6 +21,7 @@ from pylibcudf.libcudf.io.types cimport ( table_input_metadata, table_with_metadata, ) +from pylibcudf.libcudf.types cimport size_type import codecs import errno @@ -63,7 +65,7 @@ cdef class PartitionInfo: num_rows : int The number of rows in the partition. """ - def __init__(self, int start_row, int num_rows): + def __init__(self, size_type start_row, size_type num_rows): self.c_obj = partition_info(start_row, num_rows) @@ -146,7 +148,7 @@ cdef class ColumnInMetadata: self.c_obj.set_int96_timestamps(req) return self - cpdef ColumnInMetadata set_decimal_precision(self, int precision): + cpdef ColumnInMetadata set_decimal_precision(self, uint8_t precision): """ Set the decimal precision of this column. Only valid if this column is a decimal (fixed-point) type. @@ -163,7 +165,7 @@ cdef class ColumnInMetadata: self.c_obj.set_decimal_precision(precision) return self - cpdef ColumnInMetadata child(self, int i): + cpdef ColumnInMetadata child(self, size_type i): """ Get reference to a child of this column. @@ -194,7 +196,7 @@ cdef class ColumnInMetadata: self.c_obj.set_output_as_binary(binary) return self - cpdef ColumnInMetadata set_type_length(self, int type_length): + cpdef ColumnInMetadata set_type_length(self, int32_t type_length): """ Sets the length of fixed length data.