From 010c1da8a2a3314b3700c3a314a1aeac6c84136c Mon Sep 17 00:00:00 2001 From: Lawrence Mitchell Date: Tue, 19 Nov 2024 15:29:25 +0000 Subject: [PATCH] Add type stub information --- python/pylibcudf/pylibcudf/io/parquet.pyi | 46 ++++++++++++++++++++++- python/pylibcudf/pylibcudf/io/types.pyi | 6 +++ python/pylibcudf/pylibcudf/io/types.pyx | 2 + 3 files changed, 53 insertions(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyi b/python/pylibcudf/pylibcudf/io/parquet.pyi index bcf1d1cce09..fa34f2b938a 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyi +++ b/python/pylibcudf/pylibcudf/io/parquet.pyi @@ -1,7 +1,20 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from collections.abc import Mapping +from typing import Self + from pylibcudf.expressions import Expression -from pylibcudf.io.types import SourceInfo, TableWithMetadata +from pylibcudf.io.types import ( + CompressionType, + DictionaryPolicy, + PartitionInfo, + SinkInfo, + SourceInfo, + StatisticsFreq, + TableInputMetadata, + TableWithMetadata, +) +from pylibcudf.table import Table class ChunkedParquetReader: def __init__( @@ -34,3 +47,34 @@ def read_parquet( # reader_column_schema: ReaderColumnSchema = *, # timestamp_type: DataType = * ) -> TableWithMetadata: ... + +class ParquetWriterOptions: + def __init__(self): ... + @staticmethod + def builder( + sink: SinkInfo, table: Table + ) -> ParquetWriterOptionsBuilder: ... + def set_partitions(self, partitions: list[PartitionInfo]) -> None: ... + def set_column_chunks_file_paths(self, file_paths: list[str]) -> None: ... + def set_row_group_size_bytes(self, size_bytes: int) -> None: ... + def set_row_group_size_rows(self, size_rows: int) -> None: ... + def set_max_page_size_bytes(self, size_bytes: int) -> None: ... + def set_max_page_size_rows(self, size_rows: int) -> None: ... + def set_max_dictionary_size(self, size_rows: int) -> None: ... + +class ParquetWriterOptionsBuilder: + def __init__(self): ... + def metadata(self, metadata: TableInputMetadata) -> Self: ... + def key_value_metadata( + self, metadata: list[Mapping[str, str]] + ) -> Self: ... + def compression(self, compression: CompressionType) -> Self: ... + def stats_level(self, sf: StatisticsFreq) -> Self: ... + def int96_timestamps(self, enabled: bool) -> Self: ... + def write_v2_headers(self, enabled: bool) -> Self: ... + def dictionary_policy(self, val: DictionaryPolicy) -> Self: ... + def utc_timestamps(self, enabled: bool) -> Self: ... + def write_arrow_schema(self, enabled: bool) -> Self: ... + def build(self) -> ParquetWriterOptions: ... + +def write_parquet(options: ParquetWriterOptions) -> memoryview: ... diff --git a/python/pylibcudf/pylibcudf/io/types.pyi b/python/pylibcudf/pylibcudf/io/types.pyi index a4f4fc13bdc..ebaaf054e66 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyi +++ b/python/pylibcudf/pylibcudf/io/types.pyi @@ -59,6 +59,12 @@ class QuoteStyle(IntEnum): ColumnNameSpec: TypeAlias = tuple[str, list[ColumnNameSpec]] ChildNameSpec: TypeAlias = Mapping[str, ChildNameSpec] +class PartitionInfo: + def __init__(self, start_row: int, num_rows: int): ... + +class TableInputMetadata: + def __init__(self, table: Table): ... + class TableWithMetadata: tbl: Table def __init__( diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx index 1ccb6145aff..80542059aed 100644 --- a/python/pylibcudf/pylibcudf/io/types.pyx +++ b/python/pylibcudf/pylibcudf/io/types.pyx @@ -42,10 +42,12 @@ __all__ = [ "CompressionType", "DictionaryPolicy", "JSONRecoveryMode", + "PartitionInfo", "QuoteStyle", "SinkInfo", "SourceInfo", "StatisticsFreq", + "TableInputMetadata", "TableWithMetadata", ]