From b554b999f7f4b4e542985a1f3a0a4b19680f502f Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 16 Sep 2024 16:14:22 -0700 Subject: [PATCH 1/5] improve pylibcudf docs --- python/pylibcudf/pylibcudf/binaryop.pyx | 2 +- .../pylibcudf/pylibcudf/column_factories.pyx | 18 ++++++++++++++++++ python/pylibcudf/pylibcudf/lists.pyx | 12 ++++++++++++ 3 files changed, 31 insertions(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/binaryop.pyx b/python/pylibcudf/pylibcudf/binaryop.pyx index 5a67f4d6cdb..5f9d145139a 100644 --- a/python/pylibcudf/pylibcudf/binaryop.pyx +++ b/python/pylibcudf/pylibcudf/binaryop.pyx @@ -94,7 +94,7 @@ cpdef bool is_supported_operation( ): """Check if an operation is supported for the given data types. - For details, see :cpp:func::is_supported_operation`. + For details, see :cpp:func::`is_supported_operation`. Parameters ---------- diff --git a/python/pylibcudf/pylibcudf/column_factories.pyx b/python/pylibcudf/pylibcudf/column_factories.pyx index 4601cba515a..e9085e3ea02 100644 --- a/python/pylibcudf/pylibcudf/column_factories.pyx +++ b/python/pylibcudf/pylibcudf/column_factories.pyx @@ -18,6 +18,20 @@ from .types import MaskState, TypeId cpdef Column make_empty_column(MakeEmptyColumnOperand type_or_id): + """Creates an empty column of the specified type. + + For details, see :cpp:func::`make_empty_column`. + + Parameters + ---------- + type_or_id : Union[DataType, type_id, object] + The column data type. + + Returns + ------- + Column + An empty Column + """ cdef unique_ptr[column] result cdef type_id id @@ -60,7 +74,11 @@ cpdef Column make_numeric_column( size_type size, MaskArg mstate ): + """Creates an empty numeric column. + + For details, see :cpp:func::`make_numeric_column`. + """ cdef unique_ptr[column] result cdef mask_state state diff --git a/python/pylibcudf/pylibcudf/lists.pyx b/python/pylibcudf/pylibcudf/lists.pyx index 947caddc485..6f82124d06e 100644 --- a/python/pylibcudf/pylibcudf/lists.pyx +++ b/python/pylibcudf/pylibcudf/lists.pyx @@ -52,6 +52,8 @@ cpdef Table explode_outer(Table input, size_type explode_column_idx): All other columns will be duplicated for each element in the list. + For details, see :cpp:func:`explode_outer`. + Parameters ---------- input : Table @@ -75,6 +77,8 @@ cpdef Table explode_outer(Table input, size_type explode_column_idx): cpdef Column concatenate_rows(Table input): """Concatenate multiple lists columns into a single lists column row-wise. + For details, see :cpp:func:`concatenate_list_elements`. + Parameters ---------- input : Table @@ -96,6 +100,8 @@ cpdef Column concatenate_rows(Table input): cpdef Column concatenate_list_elements(Column input, bool dropna): """Concatenate multiple lists on the same row into a single list. + For details, see :cpp:func:`concatenate_list_elements`. + Parameters ---------- input : Column @@ -168,6 +174,8 @@ cpdef Column contains_nulls(Column input): """Create a column of bool values indicating whether each row in the lists column contains a null value. + For details, see :cpp:func:`contains_nulls`. + Parameters ---------- input : Column @@ -290,6 +298,8 @@ cpdef Column segmented_gather(Column input, Column gather_map_list): cpdef Column extract_list_element(Column input, ColumnOrSizeType index): """Create a column of extracted list elements. + For details, see :cpp:func:`extract_list_element`. + Parameters ---------- input : Column @@ -318,6 +328,8 @@ cpdef Column count_elements(Column input): list element in the given lists column. For details, see :cpp:func:`count_elements`. + For details, see :cpp:func:`count_elements`. + Parameters ---------- input : Column From da11ff3b9933d0dd930fb18d5e8f5837f422daa2 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Fri, 20 Sep 2024 13:09:22 -0700 Subject: [PATCH 2/5] add missing libcudf doc links --- python/pylibcudf/pylibcudf/expressions.pyx | 4 ++-- python/pylibcudf/pylibcudf/filling.pyx | 2 +- python/pylibcudf/pylibcudf/groupby.pyx | 2 +- python/pylibcudf/pylibcudf/io/avro.pyx | 2 ++ python/pylibcudf/pylibcudf/io/parquet.pyx | 4 ++++ python/pylibcudf/pylibcudf/labeling.pyx | 2 ++ python/pylibcudf/pylibcudf/merge.pyx | 2 ++ python/pylibcudf/pylibcudf/quantiles.pyx | 4 ++++ python/pylibcudf/pylibcudf/reshape.pyx | 4 ++++ python/pylibcudf/pylibcudf/search.pyx | 6 ++++++ python/pylibcudf/pylibcudf/sorting.pyx | 20 +++++++++++++++++++ .../pylibcudf/pylibcudf/stream_compaction.pyx | 18 +++++++++++++++++ python/pylibcudf/pylibcudf/transform.pyx | 2 ++ 13 files changed, 68 insertions(+), 4 deletions(-) diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index a44c9e25987..caf57128c86 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -198,8 +198,8 @@ cdef class ColumnNameReference(Expression): Name of this column in the table metadata (provided when the expression is evaluated). """ - def __cinit__(self, str name): + def __cinit__(self, str colmun_name): self.c_obj = \ move(make_unique[libcudf_exp.column_name_reference]( - (name.encode("utf-8")) + (column_name.encode("utf-8")) )) diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx index 61b430e64aa..b4623ee90a1 100644 --- a/python/pylibcudf/pylibcudf/filling.pyx +++ b/python/pylibcudf/pylibcudf/filling.pyx @@ -89,7 +89,7 @@ cpdef void fill_in_place( dereference(value.c_obj) ) -cpdef Column sequence(size_type size, Scalar init, Scalar step): +cpdef Column sequence(int size, Scalar init, Scalar step): """Create a sequence column of size ``size`` with initial value ``init`` and step ``step``. diff --git a/python/pylibcudf/pylibcudf/groupby.pyx b/python/pylibcudf/pylibcudf/groupby.pyx index ae5d33aaa46..afb95dba5b3 100644 --- a/python/pylibcudf/pylibcudf/groupby.pyx +++ b/python/pylibcudf/pylibcudf/groupby.pyx @@ -286,7 +286,7 @@ cdef class GroupBy: Returns ------- - Tuple[List[int], Table, Table]] + Tuple[List[int], Table, Table] A tuple of tables containing three items: - A list of integer offsets into the group keys/values - A table of group keys diff --git a/python/pylibcudf/pylibcudf/io/avro.pyx b/python/pylibcudf/pylibcudf/io/avro.pyx index 667c67f4c36..438b0ff1634 100644 --- a/python/pylibcudf/pylibcudf/io/avro.pyx +++ b/python/pylibcudf/pylibcudf/io/avro.pyx @@ -20,6 +20,8 @@ cpdef TableWithMetadata read_avro( """ Reads an Avro dataset into a :py:class:`~.types.TableWithMetadata`. + For details, see :cpp:func:`read_avro`. + Parameters ---------- source_info: SourceInfo diff --git a/python/pylibcudf/pylibcudf/io/parquet.pyx b/python/pylibcudf/pylibcudf/io/parquet.pyx index df1f1b14247..981ca7b8159 100644 --- a/python/pylibcudf/pylibcudf/io/parquet.pyx +++ b/python/pylibcudf/pylibcudf/io/parquet.pyx @@ -59,6 +59,8 @@ cdef class ChunkedParquetReader: """ Reads chunks of a Parquet file into a :py:class:`~.types.TableWithMetadata`. + For details, see :cpp:class:`chunked_parquet_reader`. + Parameters ---------- source_info : SourceInfo @@ -167,6 +169,8 @@ cpdef read_parquet( ): """Reads an Parquet file into a :py:class:`~.types.TableWithMetadata`. + For details, see :cpp:func:`read_parquet`. + Parameters ---------- source_info : SourceInfo diff --git a/python/pylibcudf/pylibcudf/labeling.pyx b/python/pylibcudf/pylibcudf/labeling.pyx index b5a7445df36..b3f6a92d85c 100644 --- a/python/pylibcudf/pylibcudf/labeling.pyx +++ b/python/pylibcudf/pylibcudf/labeling.pyx @@ -20,6 +20,8 @@ cpdef Column label_bins( ): """Labels elements based on membership in the specified bins. + For details see :cpp:func:`label_bins`. + Parameters ---------- input : Column diff --git a/python/pylibcudf/pylibcudf/merge.pyx b/python/pylibcudf/pylibcudf/merge.pyx index a7d43c9d158..6d707b67449 100644 --- a/python/pylibcudf/pylibcudf/merge.pyx +++ b/python/pylibcudf/pylibcudf/merge.pyx @@ -19,6 +19,8 @@ cpdef Table merge ( ): """Merge a set of sorted tables. + For details see :cpp:func:`merge`. + Parameters ---------- tables_to_merge : list diff --git a/python/pylibcudf/pylibcudf/quantiles.pyx b/python/pylibcudf/pylibcudf/quantiles.pyx index b847ade774d..3a771fbe7ef 100644 --- a/python/pylibcudf/pylibcudf/quantiles.pyx +++ b/python/pylibcudf/pylibcudf/quantiles.pyx @@ -30,6 +30,8 @@ cpdef Column quantile( Computes the specified quantiles by interpolating values between which they lie, using the interpolation strategy specified in interp. + For details see :cpp:func:`quantile`. + Parameters ---------- input: Column @@ -91,6 +93,8 @@ cpdef Table quantiles( specified quantiles. In the event a quantile lies in between rows, the specified interpolation strategy is used to pick between the rows. + For details see :cpp:func:`quantiles`. + Parameters ---------- input: Table diff --git a/python/pylibcudf/pylibcudf/reshape.pyx b/python/pylibcudf/pylibcudf/reshape.pyx index a99145be900..eb1499ebbea 100644 --- a/python/pylibcudf/pylibcudf/reshape.pyx +++ b/python/pylibcudf/pylibcudf/reshape.pyx @@ -23,6 +23,8 @@ cpdef Column interleave_columns(Table source_table): in = [[A1, A2, A3], [B1, B2, B3]] return = [A1, B1, A2, B2, A3, B3] + For details, see :cpp:func:`interleave_columns`. + Parameters ---------- source_table: Table @@ -44,6 +46,8 @@ cpdef Column interleave_columns(Table source_table): cpdef Table tile(Table source_table, size_type count): """Repeats the rows from input table count times to form a new table. + For details, see :cpp:func:`tile`. + Parameters ---------- source_table: Table diff --git a/python/pylibcudf/pylibcudf/search.pyx b/python/pylibcudf/pylibcudf/search.pyx index ff2468f3f9c..814bc6553d8 100644 --- a/python/pylibcudf/pylibcudf/search.pyx +++ b/python/pylibcudf/pylibcudf/search.pyx @@ -19,6 +19,8 @@ cpdef Column lower_bound( ): """Find smallest indices in haystack where needles may be inserted to retain order. + For details, see :cpp:func:`lower_bound`. + Parameters ---------- haystack : Table @@ -58,6 +60,8 @@ cpdef Column upper_bound( ): """Find largest indices in haystack where needles may be inserted to retain order. + For details, see :cpp:func:`upper_bound`. + Parameters ---------- haystack : Table @@ -92,6 +96,8 @@ cpdef Column upper_bound( cpdef Column contains(Column haystack, Column needles): """Check whether needles are present in haystack. + For details, see :cpp:func:`contains`. + Parameters ---------- haystack : Table diff --git a/python/pylibcudf/pylibcudf/sorting.pyx b/python/pylibcudf/pylibcudf/sorting.pyx index bd173eebacb..42289d54bca 100644 --- a/python/pylibcudf/pylibcudf/sorting.pyx +++ b/python/pylibcudf/pylibcudf/sorting.pyx @@ -16,6 +16,8 @@ from .table cimport Table cpdef Column sorted_order(Table source_table, list column_order, list null_precedence): """Computes the row indices required to sort the table. + For details, see :cpp:func:`sorted_order`. + Parameters ---------- source_table : Table @@ -52,6 +54,8 @@ cpdef Column stable_sorted_order( """Computes the row indices required to sort the table, preserving order of equal elements. + For details, see :cpp:func:`stable_sorted_order`. + Parameters ---------- source_table : Table @@ -90,6 +94,8 @@ cpdef Column rank( ): """Computes the rank of each element in the column. + For details, see :cpp:func:`rank`. + Parameters ---------- input_view : Column @@ -128,6 +134,8 @@ cpdef Column rank( cpdef bool is_sorted(Table tbl, list column_order, list null_precedence): """Checks if the table is sorted. + For details, see :cpp:func:`is_sorted`. + Parameters ---------- tbl : Table @@ -165,6 +173,8 @@ cpdef Table segmented_sort_by_key( ): """Sorts the table by key, within segments. + For details, see :cpp:func:`segmented_sort_by_key`. + Parameters ---------- values : Table @@ -209,6 +219,8 @@ cpdef Table stable_segmented_sort_by_key( """Sorts the table by key preserving order of equal elements, within segments. + For details, see :cpp:func:`stable_segmented_sort_by_key`. + Parameters ---------- values : Table @@ -251,6 +263,8 @@ cpdef Table sort_by_key( ): """Sorts the table by key. + For details, see :cpp:func:`sort_by_key`. + Parameters ---------- values : Table @@ -290,6 +304,8 @@ cpdef Table stable_sort_by_key( ): """Sorts the table by key preserving order of equal elements. + For details, see :cpp:func:`stable_sort_by_key`. + Parameters ---------- values : Table @@ -324,6 +340,8 @@ cpdef Table stable_sort_by_key( cpdef Table sort(Table source_table, list column_order, list null_precedence): """Sorts the table. + For details, see :cpp:func:`sort`. + Parameters ---------- source_table : Table @@ -355,6 +373,8 @@ cpdef Table sort(Table source_table, list column_order, list null_precedence): cpdef Table stable_sort(Table source_table, list column_order, list null_precedence): """Sorts the table preserving order of equal elements. + For details, see :cpp:func:`stable_sort`. + Parameters ---------- source_table : Table diff --git a/python/pylibcudf/pylibcudf/stream_compaction.pyx b/python/pylibcudf/pylibcudf/stream_compaction.pyx index b574bfa9fa2..d5475ea79d5 100644 --- a/python/pylibcudf/pylibcudf/stream_compaction.pyx +++ b/python/pylibcudf/pylibcudf/stream_compaction.pyx @@ -25,6 +25,8 @@ from .table cimport Table cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold): """Filters out rows from the input table based on the presence of nulls. + For details, see :cpp:func:`drop_nulls`. + Parameters ---------- source_table : Table @@ -53,6 +55,8 @@ cpdef Table drop_nulls(Table source_table, list keys, size_type keep_threshold): cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold): """Filters out rows from the input table based on the presence of NaNs. + For details, see :cpp:func:`drop_nans`. + Parameters ---------- source_table : Table @@ -81,6 +85,8 @@ cpdef Table drop_nans(Table source_table, list keys, size_type keep_threshold): cpdef Table apply_boolean_mask(Table source_table, Column boolean_mask): """Filters out rows from the input table based on a boolean mask. + For details, see :cpp:func:`apply_boolean_mask`. + Parameters ---------- source_table : Table @@ -111,6 +117,8 @@ cpdef Table unique( ): """Filter duplicate consecutive rows from the input table. + For details, see :cpp:func:`unique`. + Parameters ---------- input : Table @@ -153,6 +161,8 @@ cpdef Table distinct( ): """Get the distinct rows from the input table. + For details, see :cpp:func:`distinct`. + Parameters ---------- input : Table @@ -191,6 +201,8 @@ cpdef Column distinct_indices( ): """Get the indices of the distinct rows from the input table. + For details, see :cpp:func:`distinct_indices`. + Parameters ---------- input : Table @@ -226,6 +238,8 @@ cpdef Table stable_distinct( ): """Get the distinct rows from the input table, preserving input order. + For details, see :cpp:func:`stable_distinct`. + Parameters ---------- input : Table @@ -263,6 +277,8 @@ cpdef size_type unique_count( ): """Returns the number of unique consecutive elements in the input column. + For details, see :cpp:func:`unique_count`. + Parameters ---------- source : Column @@ -294,6 +310,8 @@ cpdef size_type distinct_count( ): """Returns the number of distinct elements in the input column. + For details, see :cpp:func:`distinct_count`. + Parameters ---------- source : Column diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx index 100ccb580ce..45558fe2b2f 100644 --- a/python/pylibcudf/pylibcudf/transform.pyx +++ b/python/pylibcudf/pylibcudf/transform.pyx @@ -14,6 +14,8 @@ from .gpumemoryview cimport gpumemoryview cpdef tuple[gpumemoryview, int] nans_to_nulls(Column input): """Create a null mask preserving existing nulls and converting nans to null. + For details, see :cpp:func:`nans_to_nulls`. + Parameters ---------- input : Column From d75df82a141c6f26b565106be73837a0b1c9a1b3 Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Sat, 21 Sep 2024 06:55:29 -0700 Subject: [PATCH 3/5] fix declaration, revert name change --- python/pylibcudf/pylibcudf/expressions.pyx | 4 ++-- python/pylibcudf/pylibcudf/filling.pxd | 2 +- python/pylibcudf/pylibcudf/filling.pyx | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/python/pylibcudf/pylibcudf/expressions.pyx b/python/pylibcudf/pylibcudf/expressions.pyx index caf57128c86..a44c9e25987 100644 --- a/python/pylibcudf/pylibcudf/expressions.pyx +++ b/python/pylibcudf/pylibcudf/expressions.pyx @@ -198,8 +198,8 @@ cdef class ColumnNameReference(Expression): Name of this column in the table metadata (provided when the expression is evaluated). """ - def __cinit__(self, str colmun_name): + def __cinit__(self, str name): self.c_obj = \ move(make_unique[libcudf_exp.column_name_reference]( - (column_name.encode("utf-8")) + (name.encode("utf-8")) )) diff --git a/python/pylibcudf/pylibcudf/filling.pxd b/python/pylibcudf/pylibcudf/filling.pxd index b9345f8cd42..301b652b6ac 100644 --- a/python/pylibcudf/pylibcudf/filling.pxd +++ b/python/pylibcudf/pylibcudf/filling.pxd @@ -24,7 +24,7 @@ cpdef void fill_in_place( ) cpdef Column sequence( - size_type size, + int size, Scalar init, Scalar step, ) diff --git a/python/pylibcudf/pylibcudf/filling.pyx b/python/pylibcudf/pylibcudf/filling.pyx index b4623ee90a1..61b430e64aa 100644 --- a/python/pylibcudf/pylibcudf/filling.pyx +++ b/python/pylibcudf/pylibcudf/filling.pyx @@ -89,7 +89,7 @@ cpdef void fill_in_place( dereference(value.c_obj) ) -cpdef Column sequence(int size, Scalar init, Scalar step): +cpdef Column sequence(size_type size, Scalar init, Scalar step): """Create a sequence column of size ``size`` with initial value ``init`` and step ``step``. From 6941df606044293a5d1580a01b4faad21084b0fd Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Mon, 23 Sep 2024 05:25:12 -0700 Subject: [PATCH 4/5] update declaration --- python/pylibcudf/pylibcudf/filling.pxd | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/filling.pxd b/python/pylibcudf/pylibcudf/filling.pxd index 301b652b6ac..b9345f8cd42 100644 --- a/python/pylibcudf/pylibcudf/filling.pxd +++ b/python/pylibcudf/pylibcudf/filling.pxd @@ -24,7 +24,7 @@ cpdef void fill_in_place( ) cpdef Column sequence( - int size, + size_type size, Scalar init, Scalar step, ) From ca82992d91333c8b6fc356c2dcca06bf86fa4a7e Mon Sep 17 00:00:00 2001 From: Matthew Murray Date: Tue, 24 Sep 2024 21:04:41 -0700 Subject: [PATCH 5/5] update branch, fix typo in doc string --- python/pylibcudf/pylibcudf/strings/findall.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 03ecb13a50e..3a6b87504b3 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -13,7 +13,7 @@ cpdef Column findall(Column input, RegexProgram pattern): Returns a lists column of strings for each matching occurrence using the regex_program pattern within each string. - For details, see For details, see :cpp:func:`cudf::strings::findall`. + For details, see :cpp:func:`cudf::strings::findall`. Parameters ----------