address review

rapidsai · Sep 25, 2024 · 0059b38 · 0059b38
1 parent b274381
commit 0059b38
Show file tree

Hide file tree

Showing 3 changed files with 36 additions and 34 deletions.
diff --git a/cpp/include/cudf/strings/char_types/char_types.hpp b/cpp/include/cudf/strings/char_types/char_types.hpp
@@ -30,7 +30,7 @@ namespace strings {
  */
 
 /**
- * @brief Returns a boolean column identifying strings entries in which all
+ * @brief Returns a boolean column identifying string entries where all
  * characters are of the type specified.
  *
  * The output row entry will be set to false if the corresponding string element
@@ -105,7 +105,8 @@ std::unique_ptr<column> all_characters_of_type(
  *        `types_to_remove` will be filtered.
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @param stream CUDA stream used for device memory operations and kernel launches
- * @return New column of boolean results for each string
+ * @return New strings column with the specified characters filtered out and replaced with specified
+ * replacement string.
  */
 std::unique_ptr<column> filter_characters_of_type(
   strings_column_view const& input,

diff --git a/python/cudf/cudf/_lib/strings/char_types.pyx b/python/cudf/cudf/_lib/strings/char_types.pyx
@@ -6,21 +6,21 @@ from cudf.core.buffer import acquire_spill_lock
 
 from cudf._lib.column cimport Column
 
-import pylibcudf as plc
+from pylibcudf.strings import char_types
 
 
 @acquire_spill_lock()
 def filter_alphanum(Column source_strings, object py_repl, bool keep=True):
     """
     Returns a Column of strings keeping only alphanumeric character types.
     """
-    plc_column = plc.strings.char_types.filter_characters_of_type(
+    plc_column = char_types.filter_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.ALL_TYPES if keep
-        else plc.strings.char_types.StringCharacterTypes.ALPHANUM,
+        char_types.StringCharacterTypes.ALL_TYPES if keep
+        else char_types.StringCharacterTypes.ALPHANUM,
         py_repl.device_value.c_value,
-        plc.strings.char_types.StringCharacterTypes.ALPHANUM if keep
-        else plc.strings.char_types.StringCharacterTypes.ALL_TYPES
+        char_types.StringCharacterTypes.ALPHANUM if keep
+        else char_types.StringCharacterTypes.ALL_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -32,10 +32,10 @@ def is_decimal(Column source_strings):
     that contain only decimal characters -- those that can be used
     to extract base10 numbers.
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.DECIMAL,
-        plc.strings.char_types.StringCharacterTypes.ALL_TYPES
+        char_types.StringCharacterTypes.DECIMAL,
+        char_types.StringCharacterTypes.ALL_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -48,10 +48,10 @@ def is_alnum(Column source_strings):
 
     Equivalent to: is_alpha() or is_digit() or is_numeric() or is_decimal()
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.ALPHANUM,
-        plc.strings.char_types.StringCharacterTypes.ALL_TYPES
+        char_types.StringCharacterTypes.ALPHANUM,
+        char_types.StringCharacterTypes.ALL_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -62,10 +62,10 @@ def is_alpha(Column source_strings):
     Returns a Column of boolean values with True for `source_strings`
     that contain only alphabetic characters.
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.ALPHA,
-        plc.strings.char_types.StringCharacterTypes.ALL_TYPES
+        char_types.StringCharacterTypes.ALPHA,
+        char_types.StringCharacterTypes.ALL_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -76,10 +76,10 @@ def is_digit(Column source_strings):
     Returns a Column of boolean values with True for `source_strings`
     that contain only decimal and digit characters.
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.DIGIT,
-        plc.strings.char_types.StringCharacterTypes.ALL_TYPES
+        char_types.StringCharacterTypes.DIGIT,
+        char_types.StringCharacterTypes.ALL_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -91,10 +91,10 @@ def is_numeric(Column source_strings):
     that contain only numeric characters. These include digit and
     numeric characters.
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.NUMERIC,
-        plc.strings.char_types.StringCharacterTypes.ALL_TYPES
+        char_types.StringCharacterTypes.NUMERIC,
+        char_types.StringCharacterTypes.ALL_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -105,10 +105,10 @@ def is_upper(Column source_strings):
     Returns a Column of boolean values with True for `source_strings`
     that contain only upper-case characters.
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.UPPER,
-        plc.strings.char_types.StringCharacterTypes.CASE_TYPES
+        char_types.StringCharacterTypes.UPPER,
+        char_types.StringCharacterTypes.CASE_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -119,10 +119,10 @@ def is_lower(Column source_strings):
     Returns a Column of boolean values with True for `source_strings`
     that contain only lower-case characters.
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.LOWER,
-        plc.strings.char_types.StringCharacterTypes.CASE_TYPES
+        char_types.StringCharacterTypes.LOWER,
+        char_types.StringCharacterTypes.CASE_TYPES
     )
     return Column.from_pylibcudf(plc_column)
 
@@ -133,9 +133,9 @@ def is_space(Column source_strings):
     Returns a Column of boolean values with True for `source_strings`
     that contains all characters which are spaces only.
     """
-    plc_column = plc.strings.char_types.all_characters_of_type(
+    plc_column = char_types.all_characters_of_type(
         source_strings.to_pylibcudf(mode="read"),
-        plc.strings.char_types.StringCharacterTypes.SPACE,
-        plc.strings.char_types.StringCharacterTypes.ALL_TYPES
+        char_types.StringCharacterTypes.SPACE,
+        char_types.StringCharacterTypes.ALL_TYPES
     )
     return Column.from_pylibcudf(plc_column)
diff --git a/python/pylibcudf/pylibcudf/strings/char_types.pyx b/python/pylibcudf/pylibcudf/strings/char_types.pyx
@@ -19,7 +19,7 @@ cpdef Column all_characters_of_type(
     string_character_types verify_types
 ):
     """
-    Filter specific character types from a column of strings.
+    Identifies strings where all characters match the specified type.
 
     Parameters
     ----------
@@ -72,7 +72,8 @@ cpdef Column filter_characters_of_type(
     Returns
     -------
     Column
-        New column of boolean results for each string
+        New column with the specified characters filtered out and
+        replaced with the specified replacement string.
     """
     cdef const string_scalar* c_replacement = <const string_scalar*>(
         replacement.c_obj.get()