-
Notifications
You must be signed in to change notification settings - Fork 912
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add string.convert.convert_lists APIs to pylibcudf (#16997)
Contributes to #15162 Authors: - Matthew Roeschke (https://github.com/mroeschke) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) URL: #16997
- Loading branch information
Showing
19 changed files
with
187 additions
and
32 deletions.
There are no files selected for viewing
6 changes: 6 additions & 0 deletions
6
.../cudf/source/user_guide/api_docs/pylibcudf/strings/convert/convert_booleans.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
================ | ||
convert_booleans | ||
================ | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_booleans | ||
:members: |
6 changes: 6 additions & 0 deletions
6
.../cudf/source/user_guide/api_docs/pylibcudf/strings/convert/convert_datetime.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
================ | ||
convert_datetime | ||
================ | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_datetime | ||
:members: |
6 changes: 6 additions & 0 deletions
6
...cudf/source/user_guide/api_docs/pylibcudf/strings/convert/convert_durations.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
================= | ||
convert_durations | ||
================= | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_durations | ||
:members: |
6 changes: 6 additions & 0 deletions
6
...df/source/user_guide/api_docs/pylibcudf/strings/convert/convert_fixed_point.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
=================== | ||
convert_fixed_point | ||
=================== | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_fixed_point | ||
:members: |
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/convert/convert_floats.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
============== | ||
convert_floats | ||
============== | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_floats | ||
:members: |
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/convert/convert_ipv4.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
============ | ||
convert_ipv4 | ||
============ | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_ipv4 | ||
:members: |
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/convert/convert_lists.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
============= | ||
convert_lists | ||
============= | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_lists | ||
:members: |
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/convert/convert_urls.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
============ | ||
convert_urls | ||
============ | ||
|
||
.. automodule:: pylibcudf.strings.convert.convert_urls | ||
:members: |
14 changes: 14 additions & 0 deletions
14
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/convert/index.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
convert | ||
======= | ||
|
||
.. toctree:: | ||
:maxdepth: 1 | ||
|
||
convert_booleans | ||
convert_datetime | ||
convert_durations | ||
convert_fixed_point | ||
convert_floats | ||
convert_ipv4 | ||
convert_lists | ||
convert_urls |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -21,3 +21,9 @@ strings | |
split | ||
strip | ||
wrap | ||
|
||
.. toctree:: | ||
:maxdepth: 2 | ||
:caption: Subpackages | ||
|
||
convert/index.rst |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,5 +6,6 @@ from . cimport ( | |
convert_fixed_point, | ||
convert_floats, | ||
convert_ipv4, | ||
convert_lists, | ||
convert_urls, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -6,5 +6,6 @@ | |
convert_fixed_point, | ||
convert_floats, | ||
convert_ipv4, | ||
convert_lists, | ||
convert_urls, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
11 changes: 11 additions & 0 deletions
11
python/pylibcudf/pylibcudf/strings/convert/convert_lists.pxd
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from pylibcudf.column cimport Column | ||
from pylibcudf.scalar cimport Scalar | ||
|
||
|
||
cpdef Column format_list_column( | ||
Column input, | ||
Scalar na_rep=*, | ||
Column separators=* | ||
) |
72 changes: 72 additions & 0 deletions
72
python/pylibcudf/pylibcudf/strings/convert/convert_lists.pyx
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,72 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
from pylibcudf.column cimport Column | ||
from pylibcudf.column_factories cimport make_empty_column | ||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.scalar.scalar cimport string_scalar | ||
from pylibcudf.libcudf.scalar.scalar_factories cimport ( | ||
make_string_scalar as cpp_make_string_scalar, | ||
) | ||
from pylibcudf.libcudf.strings.convert cimport ( | ||
convert_lists as cpp_convert_lists, | ||
) | ||
from pylibcudf.scalar cimport Scalar | ||
from pylibcudf.types cimport type_id | ||
|
||
from cython.operator import dereference | ||
|
||
|
||
cpdef Column format_list_column( | ||
Column input, | ||
Scalar na_rep=None, | ||
Column separators=None | ||
): | ||
""" | ||
Convert a list column of strings into a formatted strings column. | ||
For details, see :cpp:func`cudf::strings::format_list_column` | ||
Parameters | ||
---------- | ||
input : Column | ||
Lists column to format | ||
na_rep : Scalar | ||
Replacement string for null elements. | ||
Default, empty string | ||
separators : Column | ||
Strings to use for enclosing list components and separating elements. | ||
Default, ``,``, ``[``, ``]`` | ||
Returns | ||
------- | ||
Column | ||
New strings column | ||
""" | ||
cdef unique_ptr[column] c_result | ||
|
||
if na_rep is None: | ||
na_rep = Scalar.from_libcudf( | ||
cpp_make_string_scalar("".encode()) | ||
) | ||
|
||
cdef const string_scalar* c_na_rep = <const string_scalar*>( | ||
na_rep.c_obj.get() | ||
) | ||
|
||
if separators is None: | ||
separators = make_empty_column(type_id.STRING) | ||
|
||
with nogil: | ||
c_result = move( | ||
cpp_convert_lists.format_list_column( | ||
input.view(), | ||
dereference(c_na_rep), | ||
separators.view() | ||
) | ||
) | ||
|
||
return Column.from_libcudf(move(c_result)) |
21 changes: 21 additions & 0 deletions
21
python/pylibcudf/pylibcudf/tests/test_string_convert_lists.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
import pyarrow as pa | ||
import pylibcudf as plc | ||
import pytest | ||
from utils import assert_column_eq | ||
|
||
|
||
@pytest.mark.parametrize("na_rep", [None, pa.scalar("")]) | ||
@pytest.mark.parametrize("separators", [None, pa.array([",", "[", "]"])]) | ||
def test_format_list_column(na_rep, separators): | ||
arr = pa.array([["1", "A"], None]) | ||
result = plc.strings.convert.convert_lists.format_list_column( | ||
plc.interop.from_arrow(arr), | ||
na_rep if na_rep is None else plc.interop.from_arrow(na_rep), | ||
separators | ||
if separators is None | ||
else plc.interop.from_arrow(separators), | ||
) | ||
expected = pa.array(["[1,A]", ""]) | ||
assert_column_eq(result, expected) |