-
Notifications
You must be signed in to change notification settings - Fork 912
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
9 changed files
with
106 additions
and
33 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,5 +10,6 @@ strings | |
find | ||
regex_flags | ||
regex_program | ||
repeat | ||
replace | ||
slice |
6 changes: 6 additions & 0 deletions
6
docs/cudf/source/user_guide/api_docs/pylibcudf/strings/repeat.rst
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
====== | ||
repeat | ||
====== | ||
|
||
.. automodule:: pylibcudf.strings.repeat | ||
:members: |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -8,6 +8,7 @@ | |
find, | ||
regex_flags, | ||
regex_program, | ||
repeat, | ||
replace, | ||
slice, | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,10 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.types cimport size_type | ||
|
||
ctypedef fused ColumnorSizeType: | ||
Column | ||
size_type | ||
|
||
cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
from libcpp.memory cimport unique_ptr | ||
from libcpp.utility cimport move | ||
from pylibcudf.column cimport Column | ||
from pylibcudf.libcudf.column.column cimport column | ||
from pylibcudf.libcudf.strings cimport repeat as cpp_repeat | ||
from pylibcudf.libcudf.types cimport size_type | ||
|
||
|
||
cpdef Column repeat_strings(Column input, ColumnorSizeType repeat_times): | ||
""" | ||
Repeat each string in the given strings column by the numbers | ||
of times given in another numeric column. | ||
For details, see :cpp:func:`cudf::strings::repeat`. | ||
Parameters | ||
---------- | ||
input : Column | ||
The column containing strings to repeat. | ||
repeat_times : Column or int | ||
Number(s) of times that the corresponding input strings | ||
for each row are repeated. | ||
Returns | ||
------- | ||
Column | ||
New column containing the repeated strings. | ||
""" | ||
cdef unique_ptr[column] c_result | ||
|
||
if ColumnorSizeType is Column: | ||
with nogil: | ||
c_result = move( | ||
cpp_repeat.repeat_strings( | ||
input.view(), | ||
repeat_times.view() | ||
) | ||
) | ||
elif ColumnorSizeType is size_type: | ||
with nogil: | ||
c_result = move( | ||
cpp_repeat.repeat_strings( | ||
input.view(), | ||
repeat_times | ||
) | ||
) | ||
else: | ||
raise ValueError("repeat_times must be size_type or integer") | ||
|
||
return Column.from_libcudf(move(c_result)) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
# Copyright (c) 2024, NVIDIA CORPORATION. | ||
|
||
import pyarrow as pa | ||
import pyarrow.compute as pc | ||
import pylibcudf as plc | ||
import pytest | ||
|
||
|
||
@pytest.mark.parametrize("repeats", [pa.array([2, 2]), 2]) | ||
def test_repeat_strings(repeats): | ||
arr = pa.array(["1", None]) | ||
plc_result = plc.strings.repeat.repeat_strings( | ||
plc.interop.from_arrow(arr), | ||
plc.interop.from_arrow(repeats) | ||
if not isinstance(repeats, int) | ||
else repeats, | ||
) | ||
result = plc.interop.to_arrow(plc_result) | ||
expected = pa.chunked_array(pc.binary_repeat(arr, repeats)) | ||
assert result.equals(expected) |