From 664edcb600022f61936bae1030278e97ee93adbe Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 4 Sep 2024 13:43:25 -0400 Subject: [PATCH 01/12] Add cudf::strings::find_re API --- cpp/doxygen/regex.md | 1 + cpp/include/cudf/strings/findall.hpp | 29 +++++++++++ cpp/src/strings/search/findall.cu | 49 +++++++++++++++++++ cpp/tests/strings/findall_tests.cpp | 17 +++++++ python/cudf/cudf/_lib/strings/__init__.py | 4 +- python/cudf/cudf/_lib/strings/findall.pyx | 28 ++++++++++- python/cudf/cudf/core/column/string.py | 29 +++++++++++ python/cudf/cudf/tests/test_string.py | 20 ++++++++ .../pylibcudf/libcudf/strings/findall.pxd | 4 ++ 9 files changed, 178 insertions(+), 3 deletions(-) diff --git a/cpp/doxygen/regex.md b/cpp/doxygen/regex.md index 8d206f245dc..926b2f785c9 100644 --- a/cpp/doxygen/regex.md +++ b/cpp/doxygen/regex.md @@ -8,6 +8,7 @@ This page specifies which regular expression (regex) features are currently supp - cudf::strings::extract() - cudf::strings::extract_all_record() - cudf::strings::findall() +- cudf::strings::find_re() - cudf::strings::replace_re() - cudf::strings::replace_with_backrefs() - cudf::strings::split_re() diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index 26249b6842c..876c81ebe0c 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -68,6 +68,35 @@ std::unique_ptr findall( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); +/** + * @brief Returns the starting character index of the first match for the given pattern + * in each row of the input column + * + * @code{.pseudo} + * Example: + * s = ["bunny", "rabbit", "hare", "dog"] + * p = regex_program::create("[be]") + * r = find_re(s, p) + * r is now [0, 2, 3, -1] + * @endcode + * + * A null output row occurs if the corresponding input row is null. + * A -1 is returned for rows that do not contain a match. + * + * See the @ref md_regex "Regex Features" page for details on patterns supported by this API. + * + * @param input Strings instance for this operation + * @param prog Regex program instance + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate the returned column's device memory + * @return New column of integers + */ +std::unique_ptr find_re( + strings_column_view const& input, + regex_program const& prog, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + /** @} */ // end of doxygen group } // namespace strings } // namespace CUDF_EXPORT cudf diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu index 2f7e7352458..9ce0fbdcf8a 100644 --- a/cpp/src/strings/search/findall.cu +++ b/cpp/src/strings/search/findall.cu @@ -122,6 +122,46 @@ std::unique_ptr findall(strings_column_view const& input, mr); } +namespace { +struct find_re_fn { + column_device_view d_strings; + + __device__ size_type operator()(size_type const idx, + reprog_device const prog, + int32_t const thread_idx) const + { + if (d_strings.is_null(idx)) { return 0; } + auto const d_str = d_strings.element(idx); + + auto const result = prog.find(thread_idx, d_str, d_str.begin()); + if (!result.has_value()) { return -1; } + return result.value().first; + } +}; +} // namespace + +std::unique_ptr find_re(strings_column_view const& input, + regex_program const& prog, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + auto results = make_numeric_column(data_type{type_to_id()}, + input.size(), + cudf::detail::copy_bitmask(input.parent(), stream, mr), + input.null_count(), + stream, + mr); + if (input.is_empty()) { return results; } + + auto d_results = results->mutable_view().data(); + + auto d_prog = regex_device_builder::create_prog_device(prog, stream); + + auto const d_strings = column_device_view::create(input.parent(), stream); + launch_transform_kernel(find_re_fn{*d_strings}, *d_prog, d_results, input.size(), stream); + + return results; +} } // namespace detail // external API @@ -135,5 +175,14 @@ std::unique_ptr findall(strings_column_view const& input, return detail::findall(input, prog, stream, mr); } +std::unique_ptr find_re(strings_column_view const& input, + regex_program const& prog, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::find_re(input, prog, stream, mr); +} + } // namespace strings } // namespace cudf diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp index 4582dcb1e38..b29f02b7420 100644 --- a/cpp/tests/strings/findall_tests.cpp +++ b/cpp/tests/strings/findall_tests.cpp @@ -17,6 +17,7 @@ #include #include #include +#include #include #include @@ -120,3 +121,19 @@ TEST_F(StringsFindallTests, LargeRegex) LCW expected({LCW{large_regex.c_str()}, LCW{}, LCW{}}); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } + +TEST_F(StringsFindallTests, FindTest) +{ + auto const valids = cudf::test::iterators::null_at(5); + cudf::test::strings_column_wrapper input( + {"3A", "May4", "Jan2021", "March", "A9BC", "", "", "abcdef ghijklm 12345"}, valids); + auto sv = cudf::strings_column_view(input); + + auto pattern = std::string("\\d+"); + + auto prog = cudf::strings::regex_program::create(pattern); + auto results = cudf::strings::find_re(sv, *prog); + auto expected = + cudf::test::fixed_width_column_wrapper({0, 3, 3, -1, 1, 0, -1, 15}, valids); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); +} diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py index 47a194c4fda..2b4b32c08bd 100644 --- a/python/cudf/cudf/_lib/strings/__init__.py +++ b/python/cudf/cudf/_lib/strings/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. from cudf._lib.nvtext.edit_distance import edit_distance, edit_distance_matrix from cudf._lib.nvtext.generate_ngrams import ( generate_character_ngrams, @@ -66,7 +66,7 @@ startswith_multiple, ) from cudf._lib.strings.find_multiple import find_multiple -from cudf._lib.strings.findall import findall +from cudf._lib.strings.findall import find_re, findall from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object from cudf._lib.strings.padding import ( SideType, diff --git a/python/cudf/cudf/_lib/strings/findall.pyx b/python/cudf/cudf/_lib/strings/findall.pyx index 3cf2084e30a..1db0fc89490 100644 --- a/python/cudf/cudf/_lib/strings/findall.pyx +++ b/python/cudf/cudf/_lib/strings/findall.pyx @@ -10,7 +10,10 @@ from cudf.core.buffer import acquire_spill_lock from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.strings.findall cimport findall as cpp_findall +from pylibcudf.libcudf.strings.findall cimport ( + find_re as cpp_find_re, + findall as cpp_findall, +) from pylibcudf.libcudf.strings.regex_flags cimport regex_flags from pylibcudf.libcudf.strings.regex_program cimport regex_program @@ -38,3 +41,26 @@ def findall(Column source_strings, object pattern, uint32_t flags): )) return Column.from_unique_ptr(move(c_result)) + + +@acquire_spill_lock() +def find_re(Column source_strings, object pattern, uint32_t flags): + """ + Returns character positions where the pattern first matches + the elements in source_strings. + """ + cdef unique_ptr[column] c_result + cdef column_view source_view = source_strings.view() + + cdef string pattern_string = str(pattern).encode() + cdef regex_flags c_flags = flags + cdef unique_ptr[regex_program] c_prog + + with nogil: + c_prog = move(regex_program.create(pattern_string, c_flags)) + c_result = move(cpp_find_re( + source_view, + dereference(c_prog) + )) + + return Column.from_unique_ptr(move(c_result)) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 16e6908f308..3cbe6e8a89a 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -3626,6 +3626,35 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex: data = libstrings.findall(self._column, pat, flags) return self._return_or_inplace(data) + def find_re(self, pat: str, flags: int = 0) -> SeriesOrIndex: + """ + Find first occurrence of pattern or regular expression in the + Series/Index. + + Parameters + ---------- + pat : str + Pattern or regular expression. + flags : int, default 0 (no flags) + Flags to pass through to the regex engine (e.g. re.MULTILINE) + + Returns + ------- + Series + A Series of position values where the pattern first matches + each string. + """ + if isinstance(pat, re.Pattern): + flags = pat.flags & ~re.U + pat = pat.pattern + if not _is_supported_regex_flags(flags): + raise NotImplementedError( + "unsupported value for `flags` parameter" + ) + + data = libstrings.find_re(self._column, pat, flags) + return self._return_or_inplace(data) + def find_multiple(self, patterns: SeriesOrIndex) -> cudf.Series: """ Find all first occurrences of patterns in the Series/Index. diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py index cc88cc79769..45143211a11 100644 --- a/python/cudf/cudf/tests/test_string.py +++ b/python/cudf/cudf/tests/test_string.py @@ -1899,6 +1899,26 @@ def test_string_findall(pat, flags): assert_eq(expected, actual) +@pytest.mark.parametrize( + "pat, flags, pos", + [ + ("Monkey", 0, [-1, 0, -1, -1]), + ("on", 0, [2, 1, -1, 1]), + ("bit", 0, [-1, -1, 3, -1]), + ("on$", 0, [2, -1, -1, -1]), + ("on$", re.MULTILINE, [2, -1, -1, 1]), + ("o.*k", re.DOTALL, [-1, 1, -1, 1]), + ], +) +def test_string_find_re(pat, flags, pos): + test_data = ["Lion", "Monkey", "Rabbit", "Don\nkey"] + gs = cudf.Series(test_data) + + expected = pd.Series(pos, dtype=np.int32) + actual = gs.str.find_re(pat, flags) + assert_eq(expected, actual) + + def test_string_replace_multi(): ps = pd.Series(["hello", "goodbye"]) gs = cudf.Series(["hello", "goodbye"]) diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd index b25724586e1..99eafd80473 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd @@ -11,3 +11,7 @@ cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] findall( column_view source_strings, regex_program) except + + + cdef unique_ptr[column] find_re( + column_view source_strings, + regex_program) except + From 96933c965cb26cf5f9dc8360637f8cf5ac97ee67 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 11 Sep 2024 10:59:23 -0400 Subject: [PATCH 02/12] fix memory-resource-ref parameter --- cpp/include/cudf/strings/findall.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp index f64ecee340d..867764b6d9a 100644 --- a/cpp/include/cudf/strings/findall.hpp +++ b/cpp/include/cudf/strings/findall.hpp @@ -93,7 +93,7 @@ std::unique_ptr find_re( strings_column_view const& input, regex_program const& prog, rmm::cuda_stream_view stream = cudf::get_default_stream(), - rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource()); + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); /** @} */ // end of doxygen group } // namespace strings From 99651c1e29918aa7e8eb73b9cf1e571fdc879483 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 25 Sep 2024 13:31:23 -0400 Subject: [PATCH 03/12] fix imports --- python/cudf/cudf/_lib/strings/findall.pyx | 12 ------------ 1 file changed, 12 deletions(-) diff --git a/python/cudf/cudf/_lib/strings/findall.pyx b/python/cudf/cudf/_lib/strings/findall.pyx index c1125d1ebb7..3e7a504d535 100644 --- a/python/cudf/cudf/_lib/strings/findall.pyx +++ b/python/cudf/cudf/_lib/strings/findall.pyx @@ -4,18 +4,6 @@ from libc.stdint cimport uint32_t from cudf.core.buffer import acquire_spill_lock -<<<<<<< HEAD -from pylibcudf.libcudf.column.column cimport column -from pylibcudf.libcudf.column.column_view cimport column_view -from pylibcudf.libcudf.strings.findall cimport ( - find_re as cpp_find_re, - findall as cpp_findall, -) -from pylibcudf.libcudf.strings.regex_flags cimport regex_flags -from pylibcudf.libcudf.strings.regex_program cimport regex_program - -======= ->>>>>>> branch-24.12 from cudf._lib.column cimport Column import pylibcudf as plc From 3caea9d16513f789f8ec8d8847dc8c0d0cb7f632 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 25 Sep 2024 17:35:43 -0400 Subject: [PATCH 04/12] fix pylibcudf declarations --- .../pylibcudf/libcudf/strings/findall.pxd | 8 ++--- .../pylibcudf/pylibcudf/strings/findall.pxd | 1 + .../pylibcudf/pylibcudf/strings/findall.pyx | 30 +++++++++++++++++++ 3 files changed, 35 insertions(+), 4 deletions(-) diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd index 99eafd80473..0d286c36446 100644 --- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd @@ -9,9 +9,9 @@ from pylibcudf.libcudf.strings.regex_program cimport regex_program cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil: cdef unique_ptr[column] findall( - column_view source_strings, - regex_program) except + + column_view input, + regex_program prog) except + cdef unique_ptr[column] find_re( - column_view source_strings, - regex_program) except + + column_view input, + regex_program prog) except + diff --git a/python/pylibcudf/pylibcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/strings/findall.pxd index 54afa088141..3c35a9c9aa9 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pxd +++ b/python/pylibcudf/pylibcudf/strings/findall.pxd @@ -4,4 +4,5 @@ from pylibcudf.column cimport Column from pylibcudf.strings.regex_program cimport RegexProgram +cpdef Column find_re(Column input, RegexProgram pattern) cpdef Column findall(Column input, RegexProgram pattern) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 03ecb13a50e..021607cda67 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -38,3 +38,33 @@ cpdef Column findall(Column input, RegexProgram pattern): ) return Column.from_libcudf(move(c_result)) + + +cpdef Column find_re(Column input, RegexProgram pattern): + """ + Returns character positions where the pattern first matches + the elements in source_strings. + + For details, see For details, see :cpp:func:`cudf::strings::find_re`. + Parameters + ---------- + input : Column + Strings instance for this operation + pattern : RegexProgram + Regex pattern + Returns + ------- + Column + New column of integers + """ + cdef unique_ptr[column] c_result + + with nogil: + c_result = move( + cpp_findall.find_re( + input.view(), + pattern.c_obj.get()[0] + ) + ) + + return Column.from_libcudf(move(c_result)) From 3fdcfd233fa6960f86bf501ba48533ab3d8d834e Mon Sep 17 00:00:00 2001 From: David Wendt Date: Wed, 25 Sep 2024 21:12:57 -0400 Subject: [PATCH 05/12] add blank lines to docstring --- python/pylibcudf/pylibcudf/strings/findall.pyx | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 021607cda67..b2ed5683529 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -46,12 +46,14 @@ cpdef Column find_re(Column input, RegexProgram pattern): the elements in source_strings. For details, see For details, see :cpp:func:`cudf::strings::find_re`. + Parameters ---------- input : Column Strings instance for this operation pattern : RegexProgram Regex pattern + Returns ------- Column From bd088649049da3de08b8ad2cdf3f4b9cd94944a0 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 26 Sep 2024 08:24:44 -0400 Subject: [PATCH 06/12] add empty test --- cpp/src/strings/search/findall.cu | 9 +++------ cpp/tests/strings/findall_tests.cpp | 13 +++++++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu index e6300ab63ff..0c1633dc3c9 100644 --- a/cpp/src/strings/search/findall.cu +++ b/cpp/src/strings/search/findall.cu @@ -134,8 +134,7 @@ struct find_re_fn { auto const d_str = d_strings.element(idx); auto const result = prog.find(thread_idx, d_str, d_str.begin()); - if (!result.has_value()) { return -1; } - return result.value().first; + return result.has_value() ? result.value().first : -1; } }; } // namespace @@ -153,10 +152,8 @@ std::unique_ptr find_re(strings_column_view const& input, mr); if (input.is_empty()) { return results; } - auto d_results = results->mutable_view().data(); - - auto d_prog = regex_device_builder::create_prog_device(prog, stream); - + auto d_results = results->mutable_view().data(); + auto d_prog = regex_device_builder::create_prog_device(prog, stream); auto const d_strings = column_device_view::create(input.parent(), stream); launch_transform_kernel(find_re_fn{*d_strings}, *d_prog, d_results, input.size(), stream); diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp index 767ba05f158..ae423a81e7e 100644 --- a/cpp/tests/strings/findall_tests.cpp +++ b/cpp/tests/strings/findall_tests.cpp @@ -165,3 +165,16 @@ TEST_F(StringsFindallTests, FindTest) cudf::test::fixed_width_column_wrapper({0, 3, 3, -1, 1, 0, -1, 15}, valids); CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); } + +TEST_F(StringsFindallTests, EmptyTest) +{ + cudf::test::strings_column_wrapper input; + auto sv = cudf::strings_column_view(input); + + auto pattern = std::string("\\w+"); + + auto prog = cudf::strings::regex_program::create(pattern); + auto results = cudf::strings::find_re(sv, *prog); + auto expected = cudf::test::fixed_width_column_wrapper(); + CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected); +} From 5041c42af1042b517f35e986608ebfcdb7358ebf Mon Sep 17 00:00:00 2001 From: David Wendt Date: Thu, 26 Sep 2024 15:59:18 -0400 Subject: [PATCH 07/12] add find_re cpp reference --- python/cudf/cudf/core/column/string.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index dfdf6dfa0ba..e72e763b913 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -3629,6 +3629,8 @@ def find_re(self, pat: str, flags: int = 0) -> SeriesOrIndex: Find first occurrence of pattern or regular expression in the Series/Index. + For details, see :cpp:func::`find_re`. + Parameters ---------- pat : str @@ -3647,7 +3649,7 @@ def find_re(self, pat: str, flags: int = 0) -> SeriesOrIndex: pat = pat.pattern if not _is_supported_regex_flags(flags): raise NotImplementedError( - "unsupported value for `flags` parameter" + "Unsupported value for `flags` parameter" ) data = libstrings.find_re(self._column, pat, flags) From a77047501d278e7ab9ad1c1f8407e2fc3ecb3d72 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Fri, 27 Sep 2024 07:19:02 -0400 Subject: [PATCH 08/12] add pylibcudf pytest --- .../pylibcudf/tests/test_string_findall.py | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/python/pylibcudf/pylibcudf/tests/test_string_findall.py b/python/pylibcudf/pylibcudf/tests/test_string_findall.py index 994552fa276..debfad92d00 100644 --- a/python/pylibcudf/pylibcudf/tests/test_string_findall.py +++ b/python/pylibcudf/pylibcudf/tests/test_string_findall.py @@ -21,3 +21,20 @@ def test_findall(): type=pa_result.type, ) assert_column_eq(result, expected) + + +def test_find_re(): + arr = pa.array(["bunny", "rabbit", "hare", "dog"]) + pattern = "[eb]" + result = plc.strings.findall.find_re( + plc.interop.from_arrow(arr), + plc.strings.regex_program.RegexProgram.create( + pattern, plc.strings.regex_flags.RegexFlags.DEFAULT + ), + ) + pa_result = plc.interop.to_arrow(result) + expected = pa.array( + [0, 2, 3, -1], + type=pa_result.type, + ) + assert_column_eq(result, expected) From 2305d437980fa7d4eb90c4ed2c3650709d06530d Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 30 Sep 2024 11:40:54 -0400 Subject: [PATCH 09/12] add stream test for find_re --- cpp/tests/streams/strings/find_test.cpp | 1 + 1 file changed, 1 insertion(+) diff --git a/cpp/tests/streams/strings/find_test.cpp b/cpp/tests/streams/strings/find_test.cpp index 52839c6fc9f..e5a1ee0988c 100644 --- a/cpp/tests/streams/strings/find_test.cpp +++ b/cpp/tests/streams/strings/find_test.cpp @@ -46,4 +46,5 @@ TEST_F(StringsFindTest, Find) auto const pattern = std::string("[a-z]"); auto const prog = cudf::strings::regex_program::create(pattern); cudf::strings::findall(view, *prog, cudf::test::get_default_stream()); + cudf::strings::find_re(view, *prog, cudf::test::get_default_stream()); } From 1afe3caf75da324bfc38e0f578a305c49f520e12 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Mon, 30 Sep 2024 14:51:06 -0400 Subject: [PATCH 10/12] add example to docstring --- python/cudf/cudf/core/column/string.py | 13 +++++++++++-- python/pylibcudf/pylibcudf/strings/findall.pyx | 4 ++-- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index e72e763b913..69e42e58cd0 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -3629,8 +3629,6 @@ def find_re(self, pat: str, flags: int = 0) -> SeriesOrIndex: Find first occurrence of pattern or regular expression in the Series/Index. - For details, see :cpp:func::`find_re`. - Parameters ---------- pat : str @@ -3643,6 +3641,17 @@ def find_re(self, pat: str, flags: int = 0) -> SeriesOrIndex: Series A Series of position values where the pattern first matches each string. + + Examples + -------- + >>> import cudf + >>> s = cudf.Series(['Lion', 'Monkey', 'Rabbit', 'Cat']) + >>> s.str.find_re('[ti]') + 0 1 + 1 -1 + 2 4 + 3 2 + dtype: int32 """ if isinstance(pat, re.Pattern): flags = pat.flags & ~re.U diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 14cf1f8e7ec..57cc0bb94c7 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -43,9 +43,9 @@ cpdef Column findall(Column input, RegexProgram pattern): cpdef Column find_re(Column input, RegexProgram pattern): """ Returns character positions where the pattern first matches - the elements in source_strings. + the elements in input strings. - For details, see For details, see :cpp:func:`cudf::strings::find_re`. + For details, see :cpp_func:`find_re` Parameters ---------- From 01dc7225e14a225b74ba5b97422cc4d1874d3ef1 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 1 Oct 2024 10:11:40 -0400 Subject: [PATCH 11/12] fix docstring --- python/pylibcudf/pylibcudf/strings/findall.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 57cc0bb94c7..01833308ebe 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -45,7 +45,7 @@ cpdef Column find_re(Column input, RegexProgram pattern): Returns character positions where the pattern first matches the elements in input strings. - For details, see :cpp_func:`find_re` + For details, see :cpp_func:`cudf::strings::find_re` Parameters ---------- From e7590059da9c1d02c6d15aeb2b0b2deceb7d1640 Mon Sep 17 00:00:00 2001 From: David Wendt Date: Tue, 1 Oct 2024 12:49:09 -0400 Subject: [PATCH 12/12] fix docstring again --- python/pylibcudf/pylibcudf/strings/findall.pyx | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx index 01833308ebe..5212dc4594d 100644 --- a/python/pylibcudf/pylibcudf/strings/findall.pyx +++ b/python/pylibcudf/pylibcudf/strings/findall.pyx @@ -45,7 +45,7 @@ cpdef Column find_re(Column input, RegexProgram pattern): Returns character positions where the pattern first matches the elements in input strings. - For details, see :cpp_func:`cudf::strings::find_re` + For details, see :cpp:func:`cudf::strings::find_re` Parameters ----------