From 63a5d2e708fffde63891d3f4767d444748d8e1dd Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 2 Oct 2024 07:24:20 -1000
Subject: [PATCH 01/25] Add string.wrap APIs to pylibcudf (#16935)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16935
---
 .../api_docs/pylibcudf/strings/index.rst      |  1 +
 .../api_docs/pylibcudf/strings/wrap.rst       |  6 +++
 python/cudf/cudf/_lib/strings/wrap.pyx        | 24 ++++-------
 .../pylibcudf/libcudf/strings/wrap.pxd        |  2 +-
 .../pylibcudf/strings/CMakeLists.txt          |  1 +
 .../pylibcudf/pylibcudf/strings/__init__.pxd  |  2 +
 .../pylibcudf/pylibcudf/strings/__init__.py   |  2 +
 python/pylibcudf/pylibcudf/strings/wrap.pxd   |  7 ++++
 python/pylibcudf/pylibcudf/strings/wrap.pyx   | 42 +++++++++++++++++++
 .../pylibcudf/tests/test_string_wrap.py       | 24 +++++++++++
 10 files changed, 93 insertions(+), 18 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/strings/wrap.rst
 create mode 100644 python/pylibcudf/pylibcudf/strings/wrap.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/wrap.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_wrap.py

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
index e73ea3370ec..5a06adf6a11 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
@@ -18,3 +18,4 @@ strings
     slice
     split
     strip
+    wrap
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/wrap.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/wrap.rst
new file mode 100644
index 00000000000..bd825f78568
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/wrap.rst
@@ -0,0 +1,6 @@
+====
+wrap
+====
+
+.. automodule:: pylibcudf.strings.wrap
+   :members:
diff --git a/python/cudf/cudf/_lib/strings/wrap.pyx b/python/cudf/cudf/_lib/strings/wrap.pyx
index eed5cf33b10..2b40f01f818 100644
--- a/python/cudf/cudf/_lib/strings/wrap.pyx
+++ b/python/cudf/cudf/_lib/strings/wrap.pyx
@@ -1,17 +1,13 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
 from cudf.core.buffer import acquire_spill_lock
 
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.strings.wrap cimport wrap as cpp_wrap
 from pylibcudf.libcudf.types cimport size_type
 
 from cudf._lib.column cimport Column
 
+import pylibcudf as plc
+
 
 @acquire_spill_lock()
 def wrap(Column source_strings,
@@ -21,14 +17,8 @@ def wrap(Column source_strings,
     in the Column to be formatted in paragraphs
     with length less than a given `width`.
     """
-
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_wrap(
-            source_view,
-            width
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_result = plc.strings.wrap.wrap(
+        source_strings.to_pylibcudf(mode="read"),
+        width
+    )
+    return Column.from_pylibcudf(plc_result)
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
index c0053391328..abc1bd43ad2 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/wrap.pxd
@@ -9,5 +9,5 @@ from pylibcudf.libcudf.types cimport size_type
 cdef extern from "cudf/strings/wrap.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] wrap(
-        column_view source_strings,
+        column_view input,
         size_type width) except +
diff --git a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
index d92f806efbe..e3343b38740 100644
--- a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
@@ -30,6 +30,7 @@ set(cython_sources
     slice.pyx
     strip.pyx
     translate.pyx
+    wrap.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pxd b/python/pylibcudf/pylibcudf/strings/__init__.pxd
index 788e2c99ab1..a61c98fe77c 100644
--- a/python/pylibcudf/pylibcudf/strings/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/__init__.pxd
@@ -18,6 +18,7 @@ from . cimport (
     split,
     strip,
     translate,
+    wrap,
 )
 from .side_type cimport side_type
 
@@ -39,4 +40,5 @@ __all__ = [
     "split",
     "side_type",
     "translate",
+    "wrap",
 ]
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py
index bcaeb073d0b..ab3ad971db6 100644
--- a/python/pylibcudf/pylibcudf/strings/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/__init__.py
@@ -19,6 +19,7 @@
     split,
     strip,
     translate,
+    wrap,
 )
 from .side_type import SideType
 
@@ -40,4 +41,5 @@
     "split",
     "SideType",
     "translate",
+    "wrap",
 ]
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pxd b/python/pylibcudf/pylibcudf/strings/wrap.pxd
new file mode 100644
index 00000000000..fcc86650acf
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pxd
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.types cimport size_type
+
+
+cpdef Column wrap(Column input, size_type width)
diff --git a/python/pylibcudf/pylibcudf/strings/wrap.pyx b/python/pylibcudf/pylibcudf/strings/wrap.pyx
new file mode 100644
index 00000000000..11e31f54eee
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/wrap.pyx
@@ -0,0 +1,42 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings cimport wrap as cpp_wrap
+from pylibcudf.libcudf.types cimport size_type
+
+
+cpdef Column wrap(Column input, size_type width):
+    """
+    Wraps strings onto multiple lines shorter than `width` by
+    replacing appropriate white space with
+    new-line characters (ASCII 0x0A).
+
+    For details, see :cpp:func:`cudf::strings::wrap`.
+
+    Parameters
+    ----------
+    input : Column
+        String column
+
+    width : int
+        Maximum character width of a line within each string
+
+    Returns
+    -------
+    Column
+        Column of wrapped strings
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_wrap.wrap(
+                input.view(),
+                width,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
new file mode 100644
index 00000000000..85abd3a2bae
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
@@ -0,0 +1,24 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import textwrap
+
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_wrap():
+    pa_array = pa.array(
+        [
+            "the quick brown fox jumped over the lazy brown dog",
+            "hello, world",
+            None,
+        ]
+    )
+    result = plc.strings.wrap.wrap(plc.interop.from_arrow(pa_array), 12)
+    expected = pa.array(
+        [
+            textwrap.fill(val, 12) if isinstance(val, str) else val
+            for val in pa_array.to_pylist()
+        ]
+    )
+    assert_column_eq(expected, result)

From 6af1d2294075e4ef6e5a77a52cdadf341a31b1a3 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 2 Oct 2024 08:47:02 -1000
Subject: [PATCH 02/25] Add string padding and side_type APIs to pylibcudf
 (#16833)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16833
---
 .../api_docs/pylibcudf/strings/index.rst      |   2 +
 .../api_docs/pylibcudf/strings/padding.rst    |   6 +
 .../api_docs/pylibcudf/strings/side_type.rst  |   6 +
 python/cudf/cudf/_lib/strings/__init__.py     |   9 +-
 python/cudf/cudf/_lib/strings/padding.pyx     | 112 +++---------------
 python/cudf/cudf/_lib/strings/strip.pyx       |  67 +++--------
 python/cudf/cudf/core/column/string.py        |   4 +-
 .../pylibcudf/libcudf/strings/padding.pxd     |   4 +-
 .../pylibcudf/libcudf/strings/side_type.pxd   |  12 +-
 .../pylibcudf/libcudf/strings/strip.pxd       |   4 +-
 .../pylibcudf/strings/CMakeLists.txt          |   1 +
 .../pylibcudf/pylibcudf/strings/__init__.pxd  |   2 +
 .../pylibcudf/pylibcudf/strings/__init__.py   |   2 +
 .../pylibcudf/pylibcudf/strings/padding.pxd   |  11 ++
 .../pylibcudf/pylibcudf/strings/padding.pyx   |  75 ++++++++++++
 .../pylibcudf/pylibcudf/strings/side_type.pxd |   1 -
 .../pylibcudf/pylibcudf/strings/side_type.pyx |   1 -
 .../pylibcudf/tests/test_string_padding.py    |  26 ++++
 18 files changed, 175 insertions(+), 170 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/strings/padding.rst
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/strings/side_type.rst
 create mode 100644 python/pylibcudf/pylibcudf/strings/padding.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/padding.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_padding.py

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
index 5a06adf6a11..48dc8a13c3e 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/index.rst
@@ -11,10 +11,12 @@ strings
     find
     find_multiple
     findall
+    padding
     regex_flags
     regex_program
     repeat
     replace
+    side_type
     slice
     split
     strip
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/padding.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/padding.rst
new file mode 100644
index 00000000000..5b417024fd5
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/padding.rst
@@ -0,0 +1,6 @@
+=======
+padding
+=======
+
+.. automodule:: pylibcudf.strings.padding
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/side_type.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/side_type.rst
new file mode 100644
index 00000000000..d5aef9c4f75
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/strings/side_type.rst
@@ -0,0 +1,6 @@
+=========
+side_type
+=========
+
+.. automodule:: pylibcudf.strings.side_type
+   :members:
diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py
index 4bf8a9b1a8f..049dbab4851 100644
--- a/python/cudf/cudf/_lib/strings/__init__.py
+++ b/python/cudf/cudf/_lib/strings/__init__.py
@@ -73,14 +73,7 @@
 from cudf._lib.strings.find_multiple import find_multiple
 from cudf._lib.strings.findall import findall
 from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object
-from cudf._lib.strings.padding import (
-    SideType,
-    center,
-    ljust,
-    pad,
-    rjust,
-    zfill,
-)
+from cudf._lib.strings.padding import center, ljust, pad, rjust, zfill
 from cudf._lib.strings.repeat import repeat_scalar, repeat_sequence
 from cudf._lib.strings.replace import (
     insert,
diff --git a/python/cudf/cudf/_lib/strings/padding.pyx b/python/cudf/cudf/_lib/strings/padding.pyx
index d0239e91ec3..015a2ebab8a 100644
--- a/python/cudf/cudf/_lib/strings/padding.pyx
+++ b/python/cudf/cudf/_lib/strings/padding.pyx
@@ -1,64 +1,31 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
-from libcpp.utility cimport move
-
 from cudf.core.buffer import acquire_spill_lock
 
-from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
 
 from cudf._lib.column cimport Column
 
-from enum import IntEnum
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.strings.padding cimport (
-    pad as cpp_pad,
-    zfill as cpp_zfill,
-)
-from pylibcudf.libcudf.strings.side_type cimport (
-    side_type,
-    underlying_type_t_side_type,
-)
-
-
-class SideType(IntEnum):
-    LEFT = <underlying_type_t_side_type> side_type.LEFT
-    RIGHT = <underlying_type_t_side_type> side_type.RIGHT
-    BOTH = <underlying_type_t_side_type> side_type.BOTH
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
 def pad(Column source_strings,
         size_type width,
         fill_char,
-        side=SideType.LEFT):
+        side=plc.strings.side_type.SideType.LEFT):
     """
     Returns a Column by padding strings in `source_strings`
     up to the given `width`. Direction of padding is to be specified by `side`.
     The additional characters being filled can be changed by specifying
     `fill_char`.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    cdef string f_char = <string>str(fill_char).encode()
-
-    cdef side_type pad_direction = <side_type>(
-        <underlying_type_t_side_type> side
+    plc_result = plc.strings.padding.pad(
+        source_strings.to_pylibcudf(mode="read"),
+        width,
+        side,
+        fill_char,
     )
-
-    with nogil:
-        c_result = move(cpp_pad(
-            source_view,
-            width,
-            pad_direction,
-            f_char
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_pylibcudf(plc_result)
 
 
 @acquire_spill_lock()
@@ -68,19 +35,13 @@ def zfill(Column source_strings,
     Returns a Column by prepending strings in `source_strings`
     with '0' characters up to the given `width`.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_zfill(
-            source_view,
-            width
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_result = plc.strings.padding.zfill(
+        source_strings.to_pylibcudf(mode="read"),
+        width
+    )
+    return Column.from_pylibcudf(plc_result)
 
 
-@acquire_spill_lock()
 def center(Column source_strings,
            size_type width,
            fill_char):
@@ -89,23 +50,9 @@ def center(Column source_strings,
     in `source_strings` with additional character, `fill_char`
     up to the given `width`.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    cdef string f_char = <string>str(fill_char).encode()
-
-    with nogil:
-        c_result = move(cpp_pad(
-            source_view,
-            width,
-            side_type.BOTH,
-            f_char
-        ))
+    return pad(source_strings, width, fill_char, plc.strings.side_type.SideType.BOTH)
 
-    return Column.from_unique_ptr(move(c_result))
 
-
-@acquire_spill_lock()
 def ljust(Column source_strings,
           size_type width,
           fill_char):
@@ -113,23 +60,9 @@ def ljust(Column source_strings,
     Returns a Column by filling right side of strings in `source_strings`
     with additional character, `fill_char` up to the given `width`.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    cdef string f_char = <string>str(fill_char).encode()
+    return pad(source_strings, width, fill_char, plc.strings.side_type.SideType.RIGHT)
 
-    with nogil:
-        c_result = move(cpp_pad(
-            source_view,
-            width,
-            side_type.RIGHT,
-            f_char
-        ))
 
-    return Column.from_unique_ptr(move(c_result))
-
-
-@acquire_spill_lock()
 def rjust(Column source_strings,
           size_type width,
           fill_char):
@@ -137,17 +70,4 @@ def rjust(Column source_strings,
     Returns a Column by filling left side of strings in `source_strings`
     with additional character, `fill_char` up to the given `width`.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    cdef string f_char = <string>str(fill_char).encode()
-
-    with nogil:
-        c_result = move(cpp_pad(
-            source_view,
-            width,
-            side_type.LEFT,
-            f_char
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    return pad(source_strings, width, fill_char, plc.strings.side_type.SideType.LEFT)
diff --git a/python/cudf/cudf/_lib/strings/strip.pyx b/python/cudf/cudf/_lib/strings/strip.pyx
index 38ecb21a94c..982c5a600e7 100644
--- a/python/cudf/cudf/_lib/strings/strip.pyx
+++ b/python/cudf/cudf/_lib/strings/strip.pyx
@@ -1,18 +1,8 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
 from cudf.core.buffer import acquire_spill_lock
 
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from pylibcudf.libcudf.strings.side_type cimport side_type
-from pylibcudf.libcudf.strings.strip cimport strip as cpp_strip
-
 from cudf._lib.column cimport Column
-from cudf._lib.scalar cimport DeviceScalar
 import pylibcudf as plc
 
 
@@ -24,15 +14,12 @@ def strip(Column source_strings,
     The set of characters need be stripped from left and right side
     can be specified by `py_repl`.
     """
-
-    cdef DeviceScalar repl = py_repl.device_value
-    return Column.from_pylibcudf(
-        plc.strings.strip.strip(
-            source_strings.to_pylibcudf(mode="read"),
-            plc.strings.SideType.BOTH,
-            repl.c_value
-        )
+    plc_result = plc.strings.strip.strip(
+        source_strings.to_pylibcudf(mode="read"),
+        plc.strings.side_type.SideType.BOTH,
+        py_repl.device_value.c_value,
     )
+    return Column.from_pylibcudf(plc_result)
 
 
 @acquire_spill_lock()
@@ -43,24 +30,12 @@ def lstrip(Column source_strings,
     The set of characters need be stripped from left side can
     be specified by `py_repl`.
     """
-
-    cdef DeviceScalar repl = py_repl.device_value
-
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    cdef const string_scalar* scalar_str = <const string_scalar*>(
-        repl.get_raw_ptr()
+    plc_result = plc.strings.strip.strip(
+        source_strings.to_pylibcudf(mode="read"),
+        plc.strings.side_type.SideType.LEFT,
+        py_repl.device_value.c_value,
     )
-
-    with nogil:
-        c_result = move(cpp_strip(
-            source_view,
-            side_type.LEFT,
-            scalar_str[0]
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_pylibcudf(plc_result)
 
 
 @acquire_spill_lock()
@@ -71,21 +46,9 @@ def rstrip(Column source_strings,
     The set of characters need be stripped from right side can
     be specified by `py_repl`.
     """
-
-    cdef DeviceScalar repl = py_repl.device_value
-
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    cdef const string_scalar* scalar_str = <const string_scalar*>(
-        repl.get_raw_ptr()
+    plc_result = plc.strings.strip.strip(
+        source_strings.to_pylibcudf(mode="read"),
+        plc.strings.side_type.SideType.RIGHT,
+        py_repl.device_value.c_value,
     )
-
-    with nogil:
-        c_result = move(cpp_strip(
-            source_view,
-            side_type.RIGHT,
-            scalar_str[0]
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_pylibcudf(plc_result)
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index da422db5eae..88df57b1b3b 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -11,6 +11,8 @@
 import pandas as pd
 import pyarrow as pa
 
+import pylibcudf as plc
+
 import cudf
 import cudf.api.types
 from cudf import _lib as libcudf
@@ -2966,7 +2968,7 @@ def pad(
             raise TypeError(msg)
 
         try:
-            side = libstrings.SideType[side.upper()]
+            side = plc.strings.side_type.SideType[side.upper()]
         except KeyError:
             raise ValueError(
                 "side has to be either one of {'left', 'right', 'both'}"
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
index 657fe61eb14..875f8cafd14 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/padding.pxd
@@ -12,11 +12,11 @@ from pylibcudf.libcudf.types cimport size_type
 cdef extern from "cudf/strings/padding.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] pad(
-        column_view source_strings,
+        column_view input,
         size_type width,
         side_type side,
         string fill_char) except +
 
     cdef unique_ptr[column] zfill(
-        column_view source_strings,
+        column_view input,
         size_type width) except +
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd
index 019ff3f17ba..e92c5dc1d66 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/side_type.pxd
@@ -1,12 +1,10 @@
 # Copyright (c) 2022-2024, NVIDIA CORPORATION.
-from libc.stdint cimport int32_t
+from libcpp cimport int
 
 
 cdef extern from "cudf/strings/side_type.hpp" namespace "cudf::strings" nogil:
 
-    cpdef enum class side_type(int32_t):
-        LEFT 'cudf::strings::side_type::LEFT'
-        RIGHT 'cudf::strings::side_type::RIGHT'
-        BOTH 'cudf::strings::side_type::BOTH'
-
-ctypedef int32_t underlying_type_t_side_type
+    cpdef enum class side_type(int):
+        LEFT
+        RIGHT
+        BOTH
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
index b0ca771762d..dd527a78e7f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/strip.pxd
@@ -10,6 +10,6 @@ from pylibcudf.libcudf.strings.side_type cimport side_type
 cdef extern from "cudf/strings/strip.hpp" namespace "cudf::strings" nogil:
 
     cdef unique_ptr[column] strip(
-        column_view source_strings,
-        side_type stype,
+        column_view input,
+        side_type side,
         string_scalar to_strip) except +
diff --git a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
index e3343b38740..eeb44d19333 100644
--- a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
@@ -22,6 +22,7 @@ set(cython_sources
     find.pyx
     find_multiple.pyx
     findall.pyx
+    padding.pyx
     regex_flags.pyx
     regex_program.pyx
     repeat.pyx
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pxd b/python/pylibcudf/pylibcudf/strings/__init__.pxd
index a61c98fe77c..187ef113073 100644
--- a/python/pylibcudf/pylibcudf/strings/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/__init__.pxd
@@ -11,9 +11,11 @@ from . cimport (
     find,
     find_multiple,
     findall,
+    padding,
     regex_flags,
     regex_program,
     replace,
+    side_type,
     slice,
     split,
     strip,
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py
index ab3ad971db6..6033cea0625 100644
--- a/python/pylibcudf/pylibcudf/strings/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/__init__.py
@@ -11,10 +11,12 @@
     find,
     find_multiple,
     findall,
+    padding,
     regex_flags,
     regex_program,
     repeat,
     replace,
+    side_type,
     slice,
     split,
     strip,
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pxd b/python/pylibcudf/pylibcudf/strings/padding.pxd
new file mode 100644
index 00000000000..a035a5ad187
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/padding.pxd
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.string cimport string
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.strings.side_type cimport side_type
+from pylibcudf.libcudf.types cimport size_type
+
+
+cpdef Column pad(Column input, size_type width, side_type side, str fill_char)
+
+cpdef Column zfill(Column input, size_type width)
diff --git a/python/pylibcudf/pylibcudf/strings/padding.pyx b/python/pylibcudf/pylibcudf/strings/padding.pyx
new file mode 100644
index 00000000000..24daaaa3838
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/padding.pyx
@@ -0,0 +1,75 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings cimport padding as cpp_padding
+from pylibcudf.libcudf.strings.side_type cimport side_type
+
+
+cpdef Column pad(Column input, size_type width, side_type side, str fill_char):
+    """
+    Add padding to each string using a provided character.
+
+    For details, see :cpp:func:`cudf::strings::pad`.
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation
+    width : int
+        The minimum number of characters for each string.
+    side : SideType
+        Where to place the padding characters.
+    fill_char : str
+        Single UTF-8 character to use for padding
+
+    Returns
+    -------
+    Column
+        New column with padded strings.
+    """
+    cdef unique_ptr[column] c_result
+    cdef string c_fill_char = fill_char.encode("utf-8")
+
+    with nogil:
+        c_result = move(
+            cpp_padding.pad(
+                input.view(),
+                width,
+                side,
+                c_fill_char,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column zfill(Column input, size_type width):
+    """
+    Add '0' as padding to the left of each string.
+
+    For details, see :cpp:func:`cudf::strings::zfill`.
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation
+    width : int
+        The minimum number of characters for each string.
+
+    Returns
+    -------
+    Column
+        New column of strings.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_padding.zfill(
+                input.view(),
+                width,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pxd b/python/pylibcudf/pylibcudf/strings/side_type.pxd
index 34b7a580380..34b03e9bc27 100644
--- a/python/pylibcudf/pylibcudf/strings/side_type.pxd
+++ b/python/pylibcudf/pylibcudf/strings/side_type.pxd
@@ -1,3 +1,2 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-
 from pylibcudf.libcudf.strings.side_type cimport side_type
diff --git a/python/pylibcudf/pylibcudf/strings/side_type.pyx b/python/pylibcudf/pylibcudf/strings/side_type.pyx
index acdc7d6ff1f..cf0c770cc11 100644
--- a/python/pylibcudf/pylibcudf/strings/side_type.pyx
+++ b/python/pylibcudf/pylibcudf/strings/side_type.pyx
@@ -1,4 +1,3 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-
 from pylibcudf.libcudf.strings.side_type import \
     side_type as SideType  # no-cython-lint
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_padding.py b/python/pylibcudf/pylibcudf/tests/test_string_padding.py
new file mode 100644
index 00000000000..2ba775d17ae
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_padding.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
+
+
+def test_pad():
+    arr = pa.array(["a", "1", None])
+    plc_result = plc.strings.padding.pad(
+        plc.interop.from_arrow(arr),
+        2,
+        plc.strings.side_type.SideType.LEFT,
+        "!",
+    )
+    result = plc.interop.to_arrow(plc_result)
+    expected = pa.chunked_array(pc.utf8_lpad(arr, 2, padding="!"))
+    assert result.equals(expected)
+
+
+def test_zfill():
+    arr = pa.array(["a", "1", None])
+    plc_result = plc.strings.padding.zfill(plc.interop.from_arrow(arr), 2)
+    result = plc.interop.to_arrow(plc_result)
+    expected = pa.chunked_array(pc.utf8_lpad(arr, 2, padding="0"))
+    assert result.equals(expected)

From 8a9df040e18b2f54df67ad6fde94969990e61b7f Mon Sep 17 00:00:00 2001
From: Ray Douglass <3107146+raydouglass@users.noreply.github.com>
Date: Wed, 2 Oct 2024 14:59:00 -0400
Subject: [PATCH 03/25] Add license to the pylibcudf wheel (#16976)

Add the license file symlink to the `pylibcudf` wheels
---
 python/pylibcudf/LICENSE | 1 +
 1 file changed, 1 insertion(+)
 create mode 120000 python/pylibcudf/LICENSE

diff --git a/python/pylibcudf/LICENSE b/python/pylibcudf/LICENSE
new file mode 120000
index 00000000000..30cff7403da
--- /dev/null
+++ b/python/pylibcudf/LICENSE
@@ -0,0 +1 @@
+../../LICENSE
\ No newline at end of file

From 466e37973d3b9aef4d14a7aa0cd48df0b886300d Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Wed, 2 Oct 2024 20:09:21 -0400
Subject: [PATCH 04/25] Fix performance regression for
 generate_character_ngrams (#16849)

Fixes performance regression in `nvtext::generate_character_ngrams` introduced in #16212. Thread-per-row kernel is faster for smaller strings.

Authors:
  - David Wendt (https://github.com/davidwendt)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Bradley Dice (https://github.com/bdice)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/16849
---
 cpp/src/text/generate_ngrams.cu | 50 ++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 16 deletions(-)

diff --git a/cpp/src/text/generate_ngrams.cu b/cpp/src/text/generate_ngrams.cu
index a87ecb81b9d..997b0278fe2 100644
--- a/cpp/src/text/generate_ngrams.cu
+++ b/cpp/src/text/generate_ngrams.cu
@@ -22,6 +22,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/sizes_to_offsets_iterator.cuh>
 #include <cudf/hashing/detail/murmurhash3_x86_32.cuh>
+#include <cudf/lists/detail/lists_column_factories.hpp>
 #include <cudf/strings/detail/strings_children.cuh>
 #include <cudf/strings/detail/utilities.cuh>
 #include <cudf/strings/string_view.cuh>
@@ -48,6 +49,9 @@
 namespace nvtext {
 namespace detail {
 namespace {
+// long strings threshold found with benchmarking
+constexpr cudf::size_type AVG_CHAR_BYTES_THRESHOLD = 64;
+
 /**
  * @brief Generate ngrams from strings column.
  *
@@ -173,33 +177,39 @@ constexpr cudf::thread_index_type bytes_per_thread = 4;
 /**
  * @brief Counts the number of ngrams in each row of the given strings column
  *
- * Each warp processes a single string.
+ * Each warp/thread processes a single string.
  * Formula is `count = max(0,str.length() - ngrams + 1)`
  * If a string has less than ngrams characters, its count is 0.
  */
 CUDF_KERNEL void count_char_ngrams_kernel(cudf::column_device_view const d_strings,
                                           cudf::size_type ngrams,
+                                          cudf::size_type tile_size,
                                           cudf::size_type* d_counts)
 {
   auto const idx = cudf::detail::grid_1d::global_thread_id();
 
-  auto const str_idx = idx / cudf::detail::warp_size;
+  auto const str_idx = idx / tile_size;
   if (str_idx >= d_strings.size()) { return; }
   if (d_strings.is_null(str_idx)) {
     d_counts[str_idx] = 0;
     return;
   }
 
+  auto const d_str = d_strings.element<cudf::string_view>(str_idx);
+  if (tile_size == 1) {
+    d_counts[str_idx] = cuda::std::max(0, (d_str.length() + 1 - ngrams));
+    return;
+  }
+
   namespace cg    = cooperative_groups;
   auto const warp = cg::tiled_partition<cudf::detail::warp_size>(cg::this_thread_block());
 
-  auto const d_str = d_strings.element<cudf::string_view>(str_idx);
-  auto const end   = d_str.data() + d_str.size_bytes();
+  auto const end = d_str.data() + d_str.size_bytes();
 
   auto const lane_idx   = warp.thread_rank();
   cudf::size_type count = 0;
   for (auto itr = d_str.data() + (lane_idx * bytes_per_thread); itr < end;
-       itr += cudf::detail::warp_size * bytes_per_thread) {
+       itr += tile_size * bytes_per_thread) {
     for (auto s = itr; (s < (itr + bytes_per_thread)) && (s < end); ++s) {
       count += static_cast<cudf::size_type>(cudf::strings::detail::is_begin_utf8_char(*s));
     }
@@ -256,19 +266,27 @@ std::unique_ptr<cudf::column> generate_character_ngrams(cudf::strings_column_vie
                "Parameter ngrams should be an integer value of 2 or greater",
                std::invalid_argument);
 
-  auto const strings_count = input.size();
-  if (strings_count == 0) {  // if no strings, return an empty column
-    return cudf::make_empty_column(cudf::data_type{cudf::type_id::STRING});
+  if (input.is_empty()) {  // if no strings, return an empty column
+    return cudf::lists::detail::make_empty_lists_column(
+      cudf::data_type{cudf::type_id::STRING}, stream, mr);
+  }
+  if (input.size() == input.null_count()) {
+    return cudf::lists::detail::make_all_nulls_lists_column(
+      input.size(), cudf::data_type{cudf::type_id::STRING}, stream, mr);
   }
 
   auto const d_strings = cudf::column_device_view::create(input.parent(), stream);
 
   auto [offsets, total_ngrams] = [&] {
-    auto counts           = rmm::device_uvector<cudf::size_type>(input.size(), stream);
-    auto const num_blocks = cudf::util::div_rounding_up_safe(
-      static_cast<cudf::thread_index_type>(input.size()) * cudf::detail::warp_size, block_size);
-    count_char_ngrams_kernel<<<num_blocks, block_size, 0, stream.value()>>>(
-      *d_strings, ngrams, counts.data());
+    auto counts               = rmm::device_uvector<cudf::size_type>(input.size(), stream);
+    auto const avg_char_bytes = (input.chars_size(stream) / (input.size() - input.null_count()));
+    auto const tile_size      = (avg_char_bytes < AVG_CHAR_BYTES_THRESHOLD)
+                                  ? 1                         // thread per row
+                                  : cudf::detail::warp_size;  // warp per row
+    auto const grid           = cudf::detail::grid_1d(
+      static_cast<cudf::thread_index_type>(input.size()) * tile_size, block_size);
+    count_char_ngrams_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
+      *d_strings, ngrams, tile_size, counts.data());
     return cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr);
   }();
   auto d_offsets = offsets->view().data<cudf::size_type>();
@@ -277,8 +295,8 @@ std::unique_ptr<cudf::column> generate_character_ngrams(cudf::strings_column_vie
                "Insufficient number of characters in each string to generate ngrams");
 
   character_ngram_generator_fn generator{*d_strings, ngrams, d_offsets};
-  auto [offsets_column, chars] = cudf::strings::detail::make_strings_children(
-    generator, strings_count, total_ngrams, stream, mr);
+  auto [offsets_column, chars] =
+    cudf::strings::detail::make_strings_children(generator, input.size(), total_ngrams, stream, mr);
 
   auto output = cudf::make_strings_column(
     total_ngrams, std::move(offsets_column), chars.release(), 0, rmm::device_buffer{});
@@ -368,7 +386,7 @@ std::unique_ptr<cudf::column> hash_character_ngrams(cudf::strings_column_view co
   auto [offsets, total_ngrams] = [&] {
     auto counts = rmm::device_uvector<cudf::size_type>(input.size(), stream);
     count_char_ngrams_kernel<<<grid.num_blocks, grid.num_threads_per_block, 0, stream.value()>>>(
-      *d_strings, ngrams, counts.data());
+      *d_strings, ngrams, cudf::detail::warp_size, counts.data());
     return cudf::detail::make_offsets_child_column(counts.begin(), counts.end(), stream, mr);
   }();
   auto d_offsets = offsets->view().data<cudf::size_type>();

From 7ae536031effd31d1c7aab63d1af812b0fc2a291 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Wed, 2 Oct 2024 20:26:17 -0700
Subject: [PATCH 05/25] Batch memcpy the last offsets for output buffers of str
 and list cols in PQ reader (#16905)

This PR adds the capability to batch memcpy the last offsets for the output buffers of string and list columns in PQ reader. This reduces the overhead from several `cudaMemcpyAsync` calls when reading wide strings and/or list columns tables. This optimization was found as well as ORC changes were contributed by @vuule. See this [comment](https://github.com/rapidsai/cudf/pull/16905#issuecomment-2375532577) for performance improvement data and discussion.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16905
---
 cpp/benchmarks/CMakeLists.txt                 |   5 -
 .../io/utilities/batched_memset_bench.cpp     | 101 -------------
 .../cudf/detail/utilities/batched_memcpy.hpp  |  67 +++++++++
 .../utilities}/batched_memset.hpp             |   4 +-
 cpp/src/io/orc/stripe_enc.cu                  |  64 +++++---
 cpp/src/io/parquet/page_data.cu               |  26 ++++
 cpp/src/io/parquet/parquet_gpu.hpp            |  12 ++
 cpp/src/io/parquet/reader_impl.cpp            |  24 ++-
 cpp/src/io/parquet/reader_impl_preprocess.cu  |   6 +-
 cpp/tests/CMakeLists.txt                      |   3 +-
 .../utilities_tests/batched_memcpy_tests.cu   | 139 ++++++++++++++++++
 .../utilities_tests/batched_memset_tests.cu   |   4 +-
 12 files changed, 308 insertions(+), 147 deletions(-)
 delete mode 100644 cpp/benchmarks/io/utilities/batched_memset_bench.cpp
 create mode 100644 cpp/include/cudf/detail/utilities/batched_memcpy.hpp
 rename cpp/include/cudf/{io/detail => detail/utilities}/batched_memset.hpp (98%)
 create mode 100644 cpp/tests/utilities_tests/batched_memcpy_tests.cu

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 4113e38dcf4..110b4557840 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -392,11 +392,6 @@ ConfigureNVBench(JSON_READER_NVBENCH io/json/nested_json.cpp io/json/json_reader
 ConfigureNVBench(JSON_READER_OPTION_NVBENCH io/json/json_reader_option.cpp)
 ConfigureNVBench(JSON_WRITER_NVBENCH io/json/json_writer.cpp)
 
-# ##################################################################################################
-# * multi buffer memset benchmark
-# ----------------------------------------------------------------------
-ConfigureNVBench(BATCHED_MEMSET_BENCH io/utilities/batched_memset_bench.cpp)
-
 # ##################################################################################################
 # * io benchmark ---------------------------------------------------------------------
 ConfigureNVBench(MULTIBYTE_SPLIT_NVBENCH io/text/multibyte_split.cpp)
diff --git a/cpp/benchmarks/io/utilities/batched_memset_bench.cpp b/cpp/benchmarks/io/utilities/batched_memset_bench.cpp
deleted file mode 100644
index 2905895a63b..00000000000
--- a/cpp/benchmarks/io/utilities/batched_memset_bench.cpp
+++ /dev/null
@@ -1,101 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/io/cuio_common.hpp>
-#include <benchmarks/io/nvbench_helpers.hpp>
-
-#include <cudf/io/parquet.hpp>
-#include <cudf/utilities/default_stream.hpp>
-
-#include <nvbench/nvbench.cuh>
-
-// Size of the data in the benchmark dataframe; chosen to be low enough to allow benchmarks to
-// run on most GPUs, but large enough to allow highest throughput
-constexpr size_t data_size = 512 << 20;
-
-void parquet_read_common(cudf::size_type num_rows_to_read,
-                         cudf::size_type num_cols_to_read,
-                         cuio_source_sink_pair& source_sink,
-                         nvbench::state& state)
-{
-  cudf::io::parquet_reader_options read_opts =
-    cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
-
-  auto mem_stats_logger = cudf::memory_stats_logger();
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
-  state.exec(
-    nvbench::exec_tag::sync | nvbench::exec_tag::timer, [&](nvbench::launch& launch, auto& timer) {
-      try_drop_l3_cache();
-
-      timer.start();
-      auto const result = cudf::io::read_parquet(read_opts);
-      timer.stop();
-
-      CUDF_EXPECTS(result.tbl->num_columns() == num_cols_to_read, "Unexpected number of columns");
-      CUDF_EXPECTS(result.tbl->num_rows() == num_rows_to_read, "Unexpected number of rows");
-    });
-
-  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
-  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
-  state.add_buffer_size(
-    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
-  state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
-}
-
-template <data_type DataType>
-void bench_batched_memset(nvbench::state& state, nvbench::type_list<nvbench::enum_type<DataType>>)
-{
-  auto const d_type      = get_type_or_group(static_cast<int32_t>(DataType));
-  auto const num_cols    = static_cast<cudf::size_type>(state.get_int64("num_cols"));
-  auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
-  auto const run_length  = static_cast<cudf::size_type>(state.get_int64("run_length"));
-  auto const source_type = retrieve_io_type_enum(state.get_string("io_type"));
-  auto const compression = cudf::io::compression_type::NONE;
-  cuio_source_sink_pair source_sink(source_type);
-  auto const tbl =
-    create_random_table(cycle_dtypes(d_type, num_cols),
-                        table_size_bytes{data_size},
-                        data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
-  auto const view = tbl->view();
-
-  cudf::io::parquet_writer_options write_opts =
-    cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
-      .compression(compression);
-  cudf::io::write_parquet(write_opts);
-  auto const num_rows = view.num_rows();
-
-  parquet_read_common(num_rows, num_cols, source_sink, state);
-}
-
-using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
-                                            data_type::FLOAT,
-                                            data_type::DECIMAL,
-                                            data_type::TIMESTAMP,
-                                            data_type::DURATION,
-                                            data_type::STRING,
-                                            data_type::LIST,
-                                            data_type::STRUCT>;
-
-NVBENCH_BENCH_TYPES(bench_batched_memset, NVBENCH_TYPE_AXES(d_type_list))
-  .set_name("batched_memset")
-  .set_type_axes_names({"data_type"})
-  .add_int64_axis("num_cols", {1000})
-  .add_string_axis("io_type", {"DEVICE_BUFFER"})
-  .set_min_samples(4)
-  .add_int64_axis("cardinality", {0, 1000})
-  .add_int64_axis("run_length", {1, 32});
diff --git a/cpp/include/cudf/detail/utilities/batched_memcpy.hpp b/cpp/include/cudf/detail/utilities/batched_memcpy.hpp
new file mode 100644
index 00000000000..ed0ab9e6e5b
--- /dev/null
+++ b/cpp/include/cudf/detail/utilities/batched_memcpy.hpp
@@ -0,0 +1,67 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf/detail/iterator.cuh>
+#include <cudf/utilities/memory_resource.hpp>
+
+#include <rmm/cuda_stream_view.hpp>
+#include <rmm/device_buffer.hpp>
+
+#include <cub/device/device_memcpy.cuh>
+#include <cuda/functional>
+#include <thrust/iterator/constant_iterator.h>
+
+namespace CUDF_EXPORT cudf {
+namespace detail {
+
+/**
+ * @brief A helper function that copies a vector of vectors from source to destination addresses in
+ * a batched manner.
+ *
+ * @tparam SrcIterator **[inferred]** The type of device-accessible source addresses iterator
+ * @tparam DstIterator **[inferred]** The type of device-accessible destination address iterator
+ * @tparam SizeIterator **[inferred]** The type of device-accessible buffer size iterator
+ *
+ * @param src_iter Device-accessible iterator to source addresses
+ * @param dst_iter Device-accessible iterator to destination addresses
+ * @param size_iter Device-accessible iterator to the buffer sizes (in bytes)
+ * @param num_buffs Number of buffers to be copied
+ * @param stream CUDA stream to use
+ */
+template <typename SrcIterator, typename DstIterator, typename SizeIterator>
+void batched_memcpy_async(SrcIterator src_iter,
+                          DstIterator dst_iter,
+                          SizeIterator size_iter,
+                          size_t num_buffs,
+                          rmm::cuda_stream_view stream)
+{
+  size_t temp_storage_bytes = 0;
+  cub::DeviceMemcpy::Batched(
+    nullptr, temp_storage_bytes, src_iter, dst_iter, size_iter, num_buffs, stream.value());
+
+  rmm::device_buffer d_temp_storage{temp_storage_bytes, stream.value()};
+
+  cub::DeviceMemcpy::Batched(d_temp_storage.data(),
+                             temp_storage_bytes,
+                             src_iter,
+                             dst_iter,
+                             size_iter,
+                             num_buffs,
+                             stream.value());
+}
+
+}  // namespace detail
+}  // namespace CUDF_EXPORT cudf
diff --git a/cpp/include/cudf/io/detail/batched_memset.hpp b/cpp/include/cudf/detail/utilities/batched_memset.hpp
similarity index 98%
rename from cpp/include/cudf/io/detail/batched_memset.hpp
rename to cpp/include/cudf/detail/utilities/batched_memset.hpp
index 1c74be4a9fe..75f738f7529 100644
--- a/cpp/include/cudf/io/detail/batched_memset.hpp
+++ b/cpp/include/cudf/detail/utilities/batched_memset.hpp
@@ -28,7 +28,7 @@
 #include <thrust/transform.h>
 
 namespace CUDF_EXPORT cudf {
-namespace io::detail {
+namespace detail {
 
 /**
  * @brief A helper function that takes in a vector of device spans and memsets them to the
@@ -78,5 +78,5 @@ void batched_memset(std::vector<cudf::device_span<T>> const& bufs,
     d_temp_storage.data(), temp_storage_bytes, iter_in, iter_out, sizes, num_bufs, stream);
 }
 
-}  // namespace io::detail
+}  // namespace detail
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/io/orc/stripe_enc.cu b/cpp/src/io/orc/stripe_enc.cu
index 5c70e35fd2e..ed0b6969154 100644
--- a/cpp/src/io/orc/stripe_enc.cu
+++ b/cpp/src/io/orc/stripe_enc.cu
@@ -20,6 +20,8 @@
 #include "orc_gpu.hpp"
 
 #include <cudf/column/column_device_view.cuh>
+#include <cudf/detail/utilities/batched_memcpy.hpp>
+#include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/logger.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
@@ -1087,37 +1089,42 @@ CUDF_KERNEL void __launch_bounds__(block_size)
 /**
  * @brief Merge chunked column data into a single contiguous stream
  *
- * @param[in,out] strm_desc StripeStream device array [stripe][stream]
- * @param[in,out] streams List of encoder chunk streams [column][rowgroup]
+ * @param[in] strm_desc StripeStream device array [stripe][stream]
+ * @param[in] streams List of encoder chunk streams [column][rowgroup]
+ * @param[out] srcs  List of source encoder chunk stream data addresses
+ * @param[out] dsts List of destination StripeStream data addresses
+ * @param[out] sizes List of stream sizes in bytes
  */
 // blockDim {compact_streams_block_size,1,1}
 CUDF_KERNEL void __launch_bounds__(compact_streams_block_size)
-  gpuCompactOrcDataStreams(device_2dspan<StripeStream> strm_desc,
-                           device_2dspan<encoder_chunk_streams> streams)
+  gpuInitBatchedMemcpy(device_2dspan<StripeStream const> strm_desc,
+                       device_2dspan<encoder_chunk_streams> streams,
+                       device_span<uint8_t*> srcs,
+                       device_span<uint8_t*> dsts,
+                       device_span<size_t> sizes)
 {
-  __shared__ __align__(16) StripeStream ss;
-
-  auto const stripe_id = blockIdx.x;
+  auto const stripe_id = cudf::detail::grid_1d::global_thread_id();
   auto const stream_id = blockIdx.y;
-  auto const t         = threadIdx.x;
+  if (stripe_id >= strm_desc.size().first) { return; }
 
-  if (t == 0) { ss = strm_desc[stripe_id][stream_id]; }
-  __syncthreads();
+  auto const out_id = stream_id * strm_desc.size().first + stripe_id;
+  StripeStream ss   = strm_desc[stripe_id][stream_id];
 
   if (ss.data_ptr == nullptr) { return; }
 
   auto const cid = ss.stream_type;
   auto dst_ptr   = ss.data_ptr;
   for (auto group = ss.first_chunk_id; group < ss.first_chunk_id + ss.num_chunks; ++group) {
+    auto const out_id = stream_id * streams.size().second + group;
+    srcs[out_id]      = streams[ss.column_id][group].data_ptrs[cid];
+    dsts[out_id]      = dst_ptr;
+
+    // Also update the stream here, data will be copied in a separate kernel
+    streams[ss.column_id][group].data_ptrs[cid] = dst_ptr;
+
     auto const len = streams[ss.column_id][group].lengths[cid];
-    if (len > 0) {
-      auto const src_ptr = streams[ss.column_id][group].data_ptrs[cid];
-      for (uint32_t i = t; i < len; i += blockDim.x) {
-        dst_ptr[i] = src_ptr[i];
-      }
-      __syncthreads();
-    }
-    if (t == 0) { streams[ss.column_id][group].data_ptrs[cid] = dst_ptr; }
+    // len is the size (in bytes) of the current stream.
+    sizes[out_id] = len;
     dst_ptr += len;
   }
 }
@@ -1325,9 +1332,26 @@ void CompactOrcDataStreams(device_2dspan<StripeStream> strm_desc,
                            device_2dspan<encoder_chunk_streams> enc_streams,
                            rmm::cuda_stream_view stream)
 {
+  auto const num_rowgroups = enc_streams.size().second;
+  auto const num_streams   = strm_desc.size().second;
+  auto const num_stripes   = strm_desc.size().first;
+  auto const num_chunks    = num_rowgroups * num_streams;
+  auto srcs                = cudf::detail::make_zeroed_device_uvector_async<uint8_t*>(
+    num_chunks, stream, rmm::mr::get_current_device_resource());
+  auto dsts = cudf::detail::make_zeroed_device_uvector_async<uint8_t*>(
+    num_chunks, stream, rmm::mr::get_current_device_resource());
+  auto lengths = cudf::detail::make_zeroed_device_uvector_async<size_t>(
+    num_chunks, stream, rmm::mr::get_current_device_resource());
+
   dim3 dim_block(compact_streams_block_size, 1);
-  dim3 dim_grid(strm_desc.size().first, strm_desc.size().second);
-  gpuCompactOrcDataStreams<<<dim_grid, dim_block, 0, stream.value()>>>(strm_desc, enc_streams);
+  dim3 dim_grid(cudf::util::div_rounding_up_unsafe(num_stripes, compact_streams_block_size),
+                strm_desc.size().second);
+  gpuInitBatchedMemcpy<<<dim_grid, dim_block, 0, stream.value()>>>(
+    strm_desc, enc_streams, srcs, dsts, lengths);
+
+  // Copy streams in a batched manner.
+  cudf::detail::batched_memcpy_async(
+    srcs.begin(), dsts.begin(), lengths.begin(), lengths.size(), stream);
 }
 
 std::optional<writer_compression_statistics> CompressOrcDataStreams(
diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu
index e0d50d7ccf9..b3276c81c1f 100644
--- a/cpp/src/io/parquet/page_data.cu
+++ b/cpp/src/io/parquet/page_data.cu
@@ -17,6 +17,8 @@
 #include "page_data.cuh"
 #include "page_decode.cuh"
 
+#include <cudf/detail/utilities/batched_memcpy.hpp>
+
 #include <rmm/exec_policy.hpp>
 
 #include <thrust/reduce.h>
@@ -466,4 +468,28 @@ void __host__ DecodeSplitPageData(cudf::detail::hostdevice_span<PageInfo> pages,
   }
 }
 
+void WriteFinalOffsets(host_span<size_type const> offsets,
+                       host_span<size_type* const> buff_addrs,
+                       rmm::cuda_stream_view stream)
+{
+  // Copy offsets to device and create an iterator
+  auto d_src_data = cudf::detail::make_device_uvector_async(
+    offsets, stream, cudf::get_current_device_resource_ref());
+  // Iterator for the source (scalar) data
+  auto src_iter = cudf::detail::make_counting_transform_iterator(
+    static_cast<std::size_t>(0),
+    cuda::proclaim_return_type<size_type*>(
+      [src = d_src_data.begin()] __device__(std::size_t i) { return src + i; }));
+
+  // Copy buffer addresses to device and create an iterator
+  auto d_dst_addrs = cudf::detail::make_device_uvector_async(
+    buff_addrs, stream, cudf::get_current_device_resource_ref());
+  // size_iter is simply a constant iterator of sizeof(size_type) bytes.
+  auto size_iter = thrust::make_constant_iterator(sizeof(size_type));
+
+  // Copy offsets to buffers in batched manner.
+  cudf::detail::batched_memcpy_async(
+    src_iter, d_dst_addrs.begin(), size_iter, offsets.size(), stream);
+}
+
 }  // namespace cudf::io::parquet::detail
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index e631e12119d..a8ba3a969ce 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -797,6 +797,18 @@ void DecodeSplitPageData(cudf::detail::hostdevice_span<PageInfo> pages,
                          kernel_error::pointer error_code,
                          rmm::cuda_stream_view stream);
 
+/**
+ * @brief Writes the final offsets to the corresponding list and string buffer end addresses in a
+ * batched manner.
+ *
+ * @param offsets Host span of final offsets
+ * @param buff_addrs Host span of corresponding output col buffer end addresses
+ * @param stream CUDA stream to use
+ */
+void WriteFinalOffsets(host_span<size_type const> offsets,
+                       host_span<size_type* const> buff_addrs,
+                       rmm::cuda_stream_view stream);
+
 /**
  * @brief Launches kernel for reading the string column data stored in the pages
  *
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 7d817bde7af..1b69ccb7742 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -371,13 +371,15 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num
     CUDF_FAIL("Parquet data decode failed with code(s) " + kernel_error::to_string(error));
   }
 
-  // for list columns, add the final offset to every offset buffer.
-  // TODO : make this happen in more efficiently. Maybe use thrust::for_each
-  // on each buffer.
+  // For list and string columns, add the final offset to every offset buffer.
   // Note : the reason we are doing this here instead of in the decode kernel is
   // that it is difficult/impossible for a given page to know that it is writing the very
   // last value that should then be followed by a terminator (because rows can span
   // page boundaries).
+  std::vector<size_type*> out_buffers;
+  std::vector<size_type> final_offsets;
+  out_buffers.reserve(_input_columns.size());
+  final_offsets.reserve(_input_columns.size());
   for (size_t idx = 0; idx < _input_columns.size(); idx++) {
     input_column_info const& input_col = _input_columns[idx];
 
@@ -393,25 +395,21 @@ void reader::impl::decode_page_data(read_mode mode, size_t skip_rows, size_t num
 
         // the final offset for a list at level N is the size of it's child
         size_type const offset = child.type.id() == type_id::LIST ? child.size - 1 : child.size;
-        CUDF_CUDA_TRY(cudaMemcpyAsync(static_cast<size_type*>(out_buf.data()) + (out_buf.size - 1),
-                                      &offset,
-                                      sizeof(size_type),
-                                      cudaMemcpyDefault,
-                                      _stream.value()));
+        out_buffers.emplace_back(static_cast<size_type*>(out_buf.data()) + (out_buf.size - 1));
+        final_offsets.emplace_back(offset);
         out_buf.user_data |= PARQUET_COLUMN_BUFFER_FLAG_LIST_TERMINATED;
       } else if (out_buf.type.id() == type_id::STRING) {
         // need to cap off the string offsets column
         auto const sz = static_cast<size_type>(col_string_sizes[idx]);
         if (sz <= strings::detail::get_offset64_threshold()) {
-          CUDF_CUDA_TRY(cudaMemcpyAsync(static_cast<size_type*>(out_buf.data()) + out_buf.size,
-                                        &sz,
-                                        sizeof(size_type),
-                                        cudaMemcpyDefault,
-                                        _stream.value()));
+          out_buffers.emplace_back(static_cast<size_type*>(out_buf.data()) + out_buf.size);
+          final_offsets.emplace_back(sz);
         }
       }
     }
   }
+  // Write the final offsets for list and string columns in a batched manner
+  WriteFinalOffsets(final_offsets, out_buffers, _stream);
 
   // update null counts in the final column buffers
   for (size_t idx = 0; idx < subpass.pages.size(); idx++) {
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 3763c2e8e6d..8cab68ea721 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -19,9 +19,9 @@
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/batched_memset.hpp>
 #include <cudf/detail/utilities/integer_utils.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/io/detail/batched_memset.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/exec_policy.hpp>
@@ -1656,9 +1656,9 @@ void reader::impl::allocate_columns(read_mode mode, size_t skip_rows, size_t num
     }
   }
 
-  cudf::io::detail::batched_memset(memset_bufs, static_cast<std::byte>(0), _stream);
+  cudf::detail::batched_memset(memset_bufs, static_cast<std::byte>(0), _stream);
   // Need to set null mask bufs to all high bits
-  cudf::io::detail::batched_memset(
+  cudf::detail::batched_memset(
     nullmask_bufs, std::numeric_limits<cudf::bitmask_type>::max(), _stream);
 }
 
diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt
index b67d922d377..4596ec65ce7 100644
--- a/cpp/tests/CMakeLists.txt
+++ b/cpp/tests/CMakeLists.txt
@@ -385,6 +385,8 @@ ConfigureTest(
 # * utilities tests -------------------------------------------------------------------------------
 ConfigureTest(
   UTILITIES_TEST
+  utilities_tests/batched_memcpy_tests.cu
+  utilities_tests/batched_memset_tests.cu
   utilities_tests/column_debug_tests.cpp
   utilities_tests/column_utilities_tests.cpp
   utilities_tests/column_wrapper_tests.cpp
@@ -395,7 +397,6 @@ ConfigureTest(
   utilities_tests/pinned_memory_tests.cpp
   utilities_tests/type_check_tests.cpp
   utilities_tests/type_list_tests.cpp
-  utilities_tests/batched_memset_tests.cu
 )
 
 # ##################################################################################################
diff --git a/cpp/tests/utilities_tests/batched_memcpy_tests.cu b/cpp/tests/utilities_tests/batched_memcpy_tests.cu
new file mode 100644
index 00000000000..98657f8e224
--- /dev/null
+++ b/cpp/tests/utilities_tests/batched_memcpy_tests.cu
@@ -0,0 +1,139 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <cudf_test/base_fixture.hpp>
+#include <cudf_test/cudf_gtest.hpp>
+#include <cudf_test/type_lists.hpp>
+
+#include <cudf/detail/utilities/batched_memcpy.hpp>
+#include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/io/parquet.hpp>
+#include <cudf/utilities/memory_resource.hpp>
+#include <cudf/utilities/span.hpp>
+
+#include <rmm/device_uvector.hpp>
+#include <rmm/device_vector.hpp>
+
+#include <thrust/iterator/counting_iterator.h>
+#include <thrust/iterator/transform_iterator.h>
+#include <thrust/tuple.h>
+
+#include <iterator>
+#include <numeric>
+#include <random>
+#include <type_traits>
+
+template <typename T>
+struct BatchedMemcpyTest : public cudf::test::BaseFixture {};
+
+TEST(BatchedMemcpyTest, BasicTest)
+{
+  using T1 = int64_t;
+
+  // Device init
+  auto stream = cudf::get_default_stream();
+  auto mr     = cudf::get_current_device_resource_ref();
+
+  // Buffer lengths (in number of elements)
+  std::vector<size_t> const h_lens{
+    50000, 4, 1000, 0, 250000, 1, 100, 8000, 0, 1, 100, 1000, 10000, 100000, 0, 1, 100000};
+
+  // Total number of buffers
+  auto const num_buffs = h_lens.size();
+
+  // Exclusive sum of buffer lengths for pointers
+  std::vector<size_t> h_lens_excl_sum(num_buffs);
+  std::exclusive_scan(h_lens.begin(), h_lens.end(), h_lens_excl_sum.begin(), 0);
+
+  // Corresponding buffer sizes (in bytes)
+  std::vector<size_t> h_sizes_bytes;
+  h_sizes_bytes.reserve(num_buffs);
+  std::transform(
+    h_lens.cbegin(), h_lens.cend(), std::back_inserter(h_sizes_bytes), [&](auto& size) {
+      return size * sizeof(T1);
+    });
+
+  // Initialize random engine
+  auto constexpr seed = 0xcead;
+  std::mt19937 engine{seed};
+  using uniform_distribution =
+    typename std::conditional_t<std::is_same_v<T1, bool>,
+                                std::bernoulli_distribution,
+                                std::conditional_t<std::is_floating_point_v<T1>,
+                                                   std::uniform_real_distribution<T1>,
+                                                   std::uniform_int_distribution<T1>>>;
+  uniform_distribution dist{};
+
+  // Generate a src vector of random data vectors
+  std::vector<std::vector<T1>> h_sources;
+  h_sources.reserve(num_buffs);
+  std::transform(h_lens.begin(), h_lens.end(), std::back_inserter(h_sources), [&](auto size) {
+    std::vector<T1> data(size);
+    std::generate_n(data.begin(), size, [&]() { return T1{dist(engine)}; });
+    return data;
+  });
+  // Copy the vectors to device
+  std::vector<rmm::device_uvector<T1>> h_device_vecs;
+  h_device_vecs.reserve(h_sources.size());
+  std::transform(
+    h_sources.begin(), h_sources.end(), std::back_inserter(h_device_vecs), [stream, mr](auto& vec) {
+      return cudf::detail::make_device_uvector_async(vec, stream, mr);
+    });
+  // Pointers to the source vectors
+  std::vector<T1*> h_src_ptrs;
+  h_src_ptrs.reserve(h_sources.size());
+  std::transform(
+    h_device_vecs.begin(), h_device_vecs.end(), std::back_inserter(h_src_ptrs), [](auto& vec) {
+      return static_cast<T1*>(vec.data());
+    });
+  // Copy the source data pointers to device
+  auto d_src_ptrs = cudf::detail::make_device_uvector_async(h_src_ptrs, stream, mr);
+
+  // Total number of elements in all buffers
+  auto const total_buff_len = std::accumulate(h_lens.cbegin(), h_lens.cend(), 0);
+
+  // Create one giant buffer for destination
+  auto d_dst_data = cudf::detail::make_zeroed_device_uvector_async<T1>(total_buff_len, stream, mr);
+  // Pointers to destination buffers within the giant destination buffer
+  std::vector<T1*> h_dst_ptrs(num_buffs);
+  std::for_each(thrust::make_counting_iterator(static_cast<size_t>(0)),
+                thrust::make_counting_iterator(num_buffs),
+                [&](auto i) { return h_dst_ptrs[i] = d_dst_data.data() + h_lens_excl_sum[i]; });
+  // Copy destination data pointers to device
+  auto d_dst_ptrs = cudf::detail::make_device_uvector_async(h_dst_ptrs, stream, mr);
+
+  // Copy buffer size iterators (in bytes) to device
+  auto d_sizes_bytes = cudf::detail::make_device_uvector_async(h_sizes_bytes, stream, mr);
+
+  // Run the batched memcpy
+  cudf::detail::batched_memcpy_async(
+    d_src_ptrs.begin(), d_dst_ptrs.begin(), d_sizes_bytes.begin(), num_buffs, stream);
+
+  // Expected giant destination buffer after the memcpy
+  std::vector<T1> expected_buffer;
+  expected_buffer.reserve(total_buff_len);
+  std::for_each(h_sources.cbegin(), h_sources.cend(), [&expected_buffer](auto& source) {
+    expected_buffer.insert(expected_buffer.end(), source.begin(), source.end());
+  });
+
+  // Copy over the result destination buffer to host and synchronize the stream
+  auto result_dst_buffer =
+    cudf::detail::make_std_vector_sync<T1>(cudf::device_span<T1>(d_dst_data), stream);
+
+  // Check if both vectors are equal
+  EXPECT_TRUE(
+    std::equal(expected_buffer.begin(), expected_buffer.end(), result_dst_buffer.begin()));
+}
diff --git a/cpp/tests/utilities_tests/batched_memset_tests.cu b/cpp/tests/utilities_tests/batched_memset_tests.cu
index bed0f40d70e..0eeb7b95318 100644
--- a/cpp/tests/utilities_tests/batched_memset_tests.cu
+++ b/cpp/tests/utilities_tests/batched_memset_tests.cu
@@ -18,8 +18,8 @@
 #include <cudf_test/cudf_gtest.hpp>
 #include <cudf_test/type_lists.hpp>
 
+#include <cudf/detail/utilities/batched_memset.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
-#include <cudf/io/detail/batched_memset.hpp>
 #include <cudf/io/parquet.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
@@ -78,7 +78,7 @@ TEST(MultiBufferTestIntegral, BasicTest1)
     });
 
   // Function Call
-  cudf::io::detail::batched_memset(memset_bufs, uint64_t{0}, stream);
+  cudf::detail::batched_memset(memset_bufs, uint64_t{0}, stream);
 
   // Set all buffer regions to 0 for expected comparison
   std::for_each(

From 2ec6cb32d825d2ef255d0e56497c20be30713d32 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 2 Oct 2024 18:07:52 -1000
Subject: [PATCH 06/25] Fix astype from tz-aware type to tz-aware type (#16980)

closes https://github.com/rapidsai/cudf/issues/16973

Also matches astype from tz-naive to tz-aware type like pandas

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/16980
---
 python/cudf/cudf/core/column/datetime.py      | 15 +++++++++++++
 .../cudf/tests/series/test_datetimelike.py    | 22 +++++++++++++++++++
 2 files changed, 37 insertions(+)

diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index d0ea4612a1b..2c9b0baa9b6 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -480,6 +480,11 @@ def normalize_binop_value(self, other: DatetimeLikeScalar) -> ScalarLike:
     def as_datetime_column(self, dtype: Dtype) -> DatetimeColumn:
         if dtype == self.dtype:
             return self
+        elif isinstance(dtype, pd.DatetimeTZDtype):
+            raise TypeError(
+                "Cannot use .astype to convert from timezone-naive dtype to timezone-aware dtype. "
+                "Use tz_localize instead."
+            )
         return libcudf.unary.cast(self, dtype=dtype)
 
     def as_timedelta_column(self, dtype: Dtype) -> None:  # type: ignore[override]
@@ -940,6 +945,16 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn:
     def as_string_column(self) -> cudf.core.column.StringColumn:
         return self._local_time.as_string_column()
 
+    def as_datetime_column(self, dtype: Dtype) -> DatetimeColumn:
+        if isinstance(dtype, pd.DatetimeTZDtype) and dtype != self.dtype:
+            if dtype.unit != self.time_unit:
+                # TODO: Doesn't check that new unit is valid.
+                casted = self._with_type_metadata(dtype)
+            else:
+                casted = self
+            return casted.tz_convert(str(dtype.tz))
+        return super().as_datetime_column(dtype)
+
     def get_dt_field(self, field: str) -> ColumnBase:
         return libcudf.datetime.extract_datetime_component(
             self._local_time, field
diff --git a/python/cudf/cudf/tests/series/test_datetimelike.py b/python/cudf/cudf/tests/series/test_datetimelike.py
index cea86a5499e..691da224f44 100644
--- a/python/cudf/cudf/tests/series/test_datetimelike.py
+++ b/python/cudf/cudf/tests/series/test_datetimelike.py
@@ -266,3 +266,25 @@ def test_pandas_compatible_non_zoneinfo_raises(klass):
     with cudf.option_context("mode.pandas_compatible", True):
         with pytest.raises(NotImplementedError):
             cudf.from_pandas(pandas_obj)
+
+
+def test_astype_naive_to_aware_raises():
+    ser = cudf.Series([datetime.datetime(2020, 1, 1)])
+    with pytest.raises(TypeError):
+        ser.astype("datetime64[ns, UTC]")
+    with pytest.raises(TypeError):
+        ser.to_pandas().astype("datetime64[ns, UTC]")
+
+
+@pytest.mark.parametrize("unit", ["ns", "us"])
+def test_astype_aware_to_aware(unit):
+    ser = cudf.Series(
+        [datetime.datetime(2020, 1, 1, tzinfo=datetime.timezone.utc)]
+    )
+    result = ser.astype(f"datetime64[{unit}, US/Pacific]")
+    expected = ser.to_pandas().astype(f"datetime64[{unit}, US/Pacific]")
+    zoneinfo_type = pd.DatetimeTZDtype(
+        expected.dtype.unit, zoneinfo.ZoneInfo(str(expected.dtype.tz))
+    )
+    expected = ser.astype(zoneinfo_type)
+    assert_eq(result, expected)

From 3faa3ee8b869a8450f6352c7770fb155b321d926 Mon Sep 17 00:00:00 2001
From: David Wendt <45795991+davidwendt@users.noreply.github.com>
Date: Thu, 3 Oct 2024 08:53:08 -0400
Subject: [PATCH 07/25] Add cudf::strings::find_re API (#16742)

Adds the `cudf::strings::find_re` and `str.find_re` API to libcudf/pylibcudf/cudf. This function returns the character position where the pattern first matches in each row of the input column. If a match is not found, -1 is returned for that corresponding row.

Closes #16729

Authors:
  - David Wendt (https://github.com/davidwendt)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Matthew Murray (https://github.com/Matt711)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16742
---
 cpp/doxygen/regex.md                          |  1 +
 cpp/include/cudf/strings/findall.hpp          | 29 ++++++++++++
 cpp/src/strings/search/findall.cu             | 46 +++++++++++++++++++
 cpp/tests/streams/strings/find_test.cpp       |  1 +
 cpp/tests/strings/findall_tests.cpp           | 35 +++++++++++---
 python/cudf/cudf/_lib/strings/__init__.py     |  2 +-
 python/cudf/cudf/_lib/strings/findall.pyx     | 16 +++++++
 python/cudf/cudf/core/column/string.py        | 40 ++++++++++++++++
 python/cudf/cudf/tests/test_string.py         | 20 ++++++++
 .../pylibcudf/libcudf/strings/findall.pxd     |  4 ++
 .../pylibcudf/pylibcudf/strings/findall.pxd   |  1 +
 .../pylibcudf/pylibcudf/strings/findall.pyx   | 32 +++++++++++++
 .../pylibcudf/tests/test_string_findall.py    | 17 +++++++
 13 files changed, 237 insertions(+), 7 deletions(-)

diff --git a/cpp/doxygen/regex.md b/cpp/doxygen/regex.md
index 6d1c91a5752..6902b1948bd 100644
--- a/cpp/doxygen/regex.md
+++ b/cpp/doxygen/regex.md
@@ -8,6 +8,7 @@ This page specifies which regular expression (regex) features are currently supp
 - cudf::strings::extract()
 - cudf::strings::extract_all_record()
 - cudf::strings::findall()
+- cudf::strings::find_re()
 - cudf::strings::replace_re()
 - cudf::strings::replace_with_backrefs()
 - cudf::strings::split_re()
diff --git a/cpp/include/cudf/strings/findall.hpp b/cpp/include/cudf/strings/findall.hpp
index c6b9bc7e58a..867764b6d9a 100644
--- a/cpp/include/cudf/strings/findall.hpp
+++ b/cpp/include/cudf/strings/findall.hpp
@@ -66,6 +66,35 @@ std::unique_ptr<column> findall(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Returns the starting character index of the first match for the given pattern
+ * in each row of the input column
+ *
+ * @code{.pseudo}
+ * Example:
+ * s = ["bunny", "rabbit", "hare", "dog"]
+ * p = regex_program::create("[be]")
+ * r = find_re(s, p)
+ * r is now [0, 2, 3, -1]
+ * @endcode
+ *
+ * A null output row occurs if the corresponding input row is null.
+ * A -1 is returned for rows that do not contain a match.
+ *
+ * See the @ref md_regex "Regex Features" page for details on patterns supported by this API.
+ *
+ * @param input Strings instance for this operation
+ * @param prog Regex program instance
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the returned column's device memory
+ * @return New column of integers
+ */
+std::unique_ptr<column> find_re(
+  strings_column_view const& input,
+  regex_program const& prog,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /** @} */  // end of doxygen group
 }  // namespace strings
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index d8c1b50a94b..21708e48a25 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -126,6 +126,43 @@ std::unique_ptr<column> findall(strings_column_view const& input,
                            mr);
 }
 
+namespace {
+struct find_re_fn {
+  column_device_view d_strings;
+
+  __device__ size_type operator()(size_type const idx,
+                                  reprog_device const prog,
+                                  int32_t const thread_idx) const
+  {
+    if (d_strings.is_null(idx)) { return 0; }
+    auto const d_str = d_strings.element<string_view>(idx);
+
+    auto const result = prog.find(thread_idx, d_str, d_str.begin());
+    return result.has_value() ? result.value().first : -1;
+  }
+};
+}  // namespace
+
+std::unique_ptr<column> find_re(strings_column_view const& input,
+                                regex_program const& prog,
+                                rmm::cuda_stream_view stream,
+                                rmm::device_async_resource_ref mr)
+{
+  auto results = make_numeric_column(data_type{type_to_id<size_type>()},
+                                     input.size(),
+                                     cudf::detail::copy_bitmask(input.parent(), stream, mr),
+                                     input.null_count(),
+                                     stream,
+                                     mr);
+  if (input.is_empty()) { return results; }
+
+  auto d_results       = results->mutable_view().data<size_type>();
+  auto d_prog          = regex_device_builder::create_prog_device(prog, stream);
+  auto const d_strings = column_device_view::create(input.parent(), stream);
+  launch_transform_kernel(find_re_fn{*d_strings}, *d_prog, d_results, input.size(), stream);
+
+  return results;
+}
 }  // namespace detail
 
 // external API
@@ -139,5 +176,14 @@ std::unique_ptr<column> findall(strings_column_view const& input,
   return detail::findall(input, prog, stream, mr);
 }
 
+std::unique_ptr<column> find_re(strings_column_view const& input,
+                                regex_program const& prog,
+                                rmm::cuda_stream_view stream,
+                                rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::find_re(input, prog, stream, mr);
+}
+
 }  // namespace strings
 }  // namespace cudf
diff --git a/cpp/tests/streams/strings/find_test.cpp b/cpp/tests/streams/strings/find_test.cpp
index 52839c6fc9f..e5a1ee0988c 100644
--- a/cpp/tests/streams/strings/find_test.cpp
+++ b/cpp/tests/streams/strings/find_test.cpp
@@ -46,4 +46,5 @@ TEST_F(StringsFindTest, Find)
   auto const pattern = std::string("[a-z]");
   auto const prog    = cudf::strings::regex_program::create(pattern);
   cudf::strings::findall(view, *prog, cudf::test::get_default_stream());
+  cudf::strings::find_re(view, *prog, cudf::test::get_default_stream());
 }
diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp
index 73da4d081e2..4821a7fa999 100644
--- a/cpp/tests/strings/findall_tests.cpp
+++ b/cpp/tests/strings/findall_tests.cpp
@@ -19,6 +19,7 @@
 #include <cudf_test/base_fixture.hpp>
 #include <cudf_test/column_utilities.hpp>
 #include <cudf_test/column_wrapper.hpp>
+#include <cudf_test/iterator_utilities.hpp>
 #include <cudf_test/table_utilities.hpp>
 
 #include <cudf/strings/findall.hpp>
@@ -149,6 +150,22 @@ TEST_F(StringsFindallTests, LargeRegex)
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
 
+TEST_F(StringsFindallTests, FindTest)
+{
+  auto const valids = cudf::test::iterators::null_at(5);
+  cudf::test::strings_column_wrapper input(
+    {"3A", "May4", "Jan2021", "March", "A9BC", "", "", "abcdef ghijklm 12345"}, valids);
+  auto sv = cudf::strings_column_view(input);
+
+  auto pattern = std::string("\\d+");
+
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::find_re(sv, *prog);
+  auto expected =
+    cudf::test::fixed_width_column_wrapper<cudf::size_type>({0, 3, 3, -1, 1, 0, -1, 15}, valids);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+}
+
 TEST_F(StringsFindallTests, NoMatches)
 {
   cudf::test::strings_column_wrapper input({"abc\nfff\nabc", "fff\nabc\nlll", "abc", "", "abc\n"});
@@ -169,10 +186,16 @@ TEST_F(StringsFindallTests, EmptyTest)
   auto prog = cudf::strings::regex_program::create(pattern);
 
   cudf::test::strings_column_wrapper input;
-  auto sv      = cudf::strings_column_view(input);
-  auto results = cudf::strings::findall(sv, *prog);
-
-  using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
-  LCW expected;
-  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+  auto sv = cudf::strings_column_view(input);
+  {
+    auto results = cudf::strings::findall(sv, *prog);
+    using LCW    = cudf::test::lists_column_wrapper<cudf::string_view>;
+    LCW expected;
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+  }
+  {
+    auto results  = cudf::strings::find_re(sv, *prog);
+    auto expected = cudf::test::fixed_width_column_wrapper<cudf::size_type>{};
+    CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+  }
 }
diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py
index 049dbab4851..e712937f816 100644
--- a/python/cudf/cudf/_lib/strings/__init__.py
+++ b/python/cudf/cudf/_lib/strings/__init__.py
@@ -71,7 +71,7 @@
     startswith_multiple,
 )
 from cudf._lib.strings.find_multiple import find_multiple
-from cudf._lib.strings.findall import findall
+from cudf._lib.strings.findall import find_re, findall
 from cudf._lib.strings.json import GetJsonObjectOptions, get_json_object
 from cudf._lib.strings.padding import center, ljust, pad, rjust, zfill
 from cudf._lib.strings.repeat import repeat_scalar, repeat_sequence
diff --git a/python/cudf/cudf/_lib/strings/findall.pyx b/python/cudf/cudf/_lib/strings/findall.pyx
index 0e758d5b322..3e7a504d535 100644
--- a/python/cudf/cudf/_lib/strings/findall.pyx
+++ b/python/cudf/cudf/_lib/strings/findall.pyx
@@ -23,3 +23,19 @@ def findall(Column source_strings, object pattern, uint32_t flags):
         prog,
     )
     return Column.from_pylibcudf(plc_result)
+
+
+@acquire_spill_lock()
+def find_re(Column source_strings, object pattern, uint32_t flags):
+    """
+    Returns character positions where the pattern first matches
+    the elements in source_strings.
+    """
+    prog = plc.strings.regex_program.RegexProgram.create(
+        str(pattern), flags
+    )
+    plc_result = plc.strings.findall.find_re(
+        source_strings.to_pylibcudf(mode="read"),
+        prog,
+    )
+    return Column.from_pylibcudf(plc_result)
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 88df57b1b3b..b50e23bd52e 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -3626,6 +3626,46 @@ def findall(self, pat: str, flags: int = 0) -> SeriesOrIndex:
         data = libstrings.findall(self._column, pat, flags)
         return self._return_or_inplace(data)
 
+    def find_re(self, pat: str, flags: int = 0) -> SeriesOrIndex:
+        """
+        Find first occurrence of pattern or regular expression in the
+        Series/Index.
+
+        Parameters
+        ----------
+        pat : str
+            Pattern or regular expression.
+        flags : int, default 0 (no flags)
+            Flags to pass through to the regex engine (e.g. re.MULTILINE)
+
+        Returns
+        -------
+        Series
+            A Series of position values where the pattern first matches
+            each string.
+
+        Examples
+        --------
+        >>> import cudf
+        >>> s = cudf.Series(['Lion', 'Monkey', 'Rabbit', 'Cat'])
+        >>> s.str.find_re('[ti]')
+        0    1
+        1   -1
+        2    4
+        3    2
+        dtype: int32
+        """
+        if isinstance(pat, re.Pattern):
+            flags = pat.flags & ~re.U
+            pat = pat.pattern
+        if not _is_supported_regex_flags(flags):
+            raise NotImplementedError(
+                "Unsupported value for `flags` parameter"
+            )
+
+        data = libstrings.find_re(self._column, pat, flags)
+        return self._return_or_inplace(data)
+
     def find_multiple(self, patterns: SeriesOrIndex) -> cudf.Series:
         """
         Find all first occurrences of patterns in the Series/Index.
diff --git a/python/cudf/cudf/tests/test_string.py b/python/cudf/cudf/tests/test_string.py
index cc88cc79769..45143211a11 100644
--- a/python/cudf/cudf/tests/test_string.py
+++ b/python/cudf/cudf/tests/test_string.py
@@ -1899,6 +1899,26 @@ def test_string_findall(pat, flags):
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "pat, flags, pos",
+    [
+        ("Monkey", 0, [-1, 0, -1, -1]),
+        ("on", 0, [2, 1, -1, 1]),
+        ("bit", 0, [-1, -1, 3, -1]),
+        ("on$", 0, [2, -1, -1, -1]),
+        ("on$", re.MULTILINE, [2, -1, -1, 1]),
+        ("o.*k", re.DOTALL, [-1, 1, -1, 1]),
+    ],
+)
+def test_string_find_re(pat, flags, pos):
+    test_data = ["Lion", "Monkey", "Rabbit", "Don\nkey"]
+    gs = cudf.Series(test_data)
+
+    expected = pd.Series(pos, dtype=np.int32)
+    actual = gs.str.find_re(pat, flags)
+    assert_eq(expected, actual)
+
+
 def test_string_replace_multi():
     ps = pd.Series(["hello", "goodbye"])
     gs = cudf.Series(["hello", "goodbye"])
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
index e0a8b776465..0d286c36446 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/findall.pxd
@@ -11,3 +11,7 @@ cdef extern from "cudf/strings/findall.hpp" namespace "cudf::strings" nogil:
     cdef unique_ptr[column] findall(
         column_view input,
         regex_program prog) except +
+
+    cdef unique_ptr[column] find_re(
+        column_view input,
+        regex_program prog) except +
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pxd b/python/pylibcudf/pylibcudf/strings/findall.pxd
index 54afa088141..3c35a9c9aa9 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pxd
+++ b/python/pylibcudf/pylibcudf/strings/findall.pxd
@@ -4,4 +4,5 @@ from pylibcudf.column cimport Column
 from pylibcudf.strings.regex_program cimport RegexProgram
 
 
+cpdef Column find_re(Column input, RegexProgram pattern)
 cpdef Column findall(Column input, RegexProgram pattern)
diff --git a/python/pylibcudf/pylibcudf/strings/findall.pyx b/python/pylibcudf/pylibcudf/strings/findall.pyx
index 3a6b87504b3..5212dc4594d 100644
--- a/python/pylibcudf/pylibcudf/strings/findall.pyx
+++ b/python/pylibcudf/pylibcudf/strings/findall.pyx
@@ -38,3 +38,35 @@ cpdef Column findall(Column input, RegexProgram pattern):
         )
 
     return Column.from_libcudf(move(c_result))
+
+
+cpdef Column find_re(Column input, RegexProgram pattern):
+    """
+    Returns character positions where the pattern first matches
+    the elements in input strings.
+
+    For details, see :cpp:func:`cudf::strings::find_re`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation
+    pattern : RegexProgram
+        Regex pattern
+
+    Returns
+    -------
+    Column
+        New column of integers
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_findall.find_re(
+                input.view(),
+                pattern.c_obj.get()[0]
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_findall.py b/python/pylibcudf/pylibcudf/tests/test_string_findall.py
index 994552fa276..debfad92d00 100644
--- a/python/pylibcudf/pylibcudf/tests/test_string_findall.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_findall.py
@@ -21,3 +21,20 @@ def test_findall():
         type=pa_result.type,
     )
     assert_column_eq(result, expected)
+
+
+def test_find_re():
+    arr = pa.array(["bunny", "rabbit", "hare", "dog"])
+    pattern = "[eb]"
+    result = plc.strings.findall.find_re(
+        plc.interop.from_arrow(arr),
+        plc.strings.regex_program.RegexProgram.create(
+            pattern, plc.strings.regex_flags.RegexFlags.DEFAULT
+        ),
+    )
+    pa_result = plc.interop.to_arrow(result)
+    expected = pa.array(
+        [0, 2, 3, -1],
+        type=pa_result.type,
+    )
+    assert_column_eq(result, expected)

From bd3b3327a6326ffea4658d682b8b9087e32da98a Mon Sep 17 00:00:00 2001
From: Kyle Edwards <kyedwards@nvidia.com>
Date: Thu, 3 Oct 2024 16:25:09 -0400
Subject: [PATCH 08/25] Restore export of nvcomp outside of wheel builds
 (#16988)

Fixes https://github.com/rapidsai/cudf/issues/16986

Authors:
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16988
---
 cpp/CMakeLists.txt                    | 1 +
 cpp/cmake/thirdparty/get_nvcomp.cmake | 6 +++++-
 python/libcudf/CMakeLists.txt         | 3 +++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 136f43ee706..f7a5dd2f2fb 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -52,6 +52,7 @@ option(JITIFY_USE_CACHE "Use a file cache for JIT compiled kernels" ON)
 option(CUDF_BUILD_TESTUTIL "Whether to build the test utilities contained in libcudf" ON)
 mark_as_advanced(CUDF_BUILD_TESTUTIL)
 option(CUDF_USE_PROPRIETARY_NVCOMP "Download and use NVCOMP with proprietary extensions" ON)
+option(CUDF_EXPORT_NVCOMP "Export NVCOMP as a dependency" ON)
 option(CUDF_LARGE_STRINGS_DISABLED "Build with large string support disabled" OFF)
 mark_as_advanced(CUDF_LARGE_STRINGS_DISABLED)
 option(
diff --git a/cpp/cmake/thirdparty/get_nvcomp.cmake b/cpp/cmake/thirdparty/get_nvcomp.cmake
index 1b6a1730161..33b1b45fb44 100644
--- a/cpp/cmake/thirdparty/get_nvcomp.cmake
+++ b/cpp/cmake/thirdparty/get_nvcomp.cmake
@@ -16,7 +16,11 @@
 function(find_and_configure_nvcomp)
 
   include(${rapids-cmake-dir}/cpm/nvcomp.cmake)
-  rapids_cpm_nvcomp(USE_PROPRIETARY_BINARY ${CUDF_USE_PROPRIETARY_NVCOMP})
+  set(export_args)
+  if(CUDF_EXPORT_NVCOMP)
+    set(export_args BUILD_EXPORT_SET cudf-exports INSTALL_EXPORT_SET cudf-exports)
+  endif()
+  rapids_cpm_nvcomp(${export_args} USE_PROPRIETARY_BINARY ${CUDF_USE_PROPRIETARY_NVCOMP})
 
   # Per-thread default stream
   if(TARGET nvcomp AND CUDF_USE_PER_THREAD_DEFAULT_STREAM)
diff --git a/python/libcudf/CMakeLists.txt b/python/libcudf/CMakeLists.txt
index 2b208e2e021..5f9a04d3cee 100644
--- a/python/libcudf/CMakeLists.txt
+++ b/python/libcudf/CMakeLists.txt
@@ -41,6 +41,9 @@ set(BUILD_TESTS OFF)
 set(BUILD_BENCHMARKS OFF)
 set(CUDF_BUILD_TESTUTIL OFF)
 set(CUDF_BUILD_STREAMS_TEST_UTIL OFF)
+if(USE_NVCOMP_RUNTIME_WHEEL)
+  set(CUDF_EXPORT_NVCOMP OFF)
+endif()
 set(CUDA_STATIC_RUNTIME ON)
 
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}/lib)

From 010839172ecb5a99609044a98031ff5b7578cd64 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 3 Oct 2024 19:44:20 -0500
Subject: [PATCH 09/25] Use `libcudf` wheel from PR rather than nightly for
 `polars-polars` CI test job (#16975)

This PR fixes an issue where one `cudf-polars` CI job uses the `pylibcudf` wheel generated from the branch being tested, but pulls a libcudf nightly which can cause issues when introducing cython/c++ changes simultaneously.

Authors:
  - https://github.com/brandon-b-miller

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16975
---
 ci/test_cudf_polars_polars_tests.sh | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/ci/test_cudf_polars_polars_tests.sh b/ci/test_cudf_polars_polars_tests.sh
index 55399d0371a..f5bcdc62604 100755
--- a/ci/test_cudf_polars_polars_tests.sh
+++ b/ci/test_cudf_polars_polars_tests.sh
@@ -24,14 +24,17 @@ rapids-logger "Download wheels"
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
 
-# Download the pylibcudf built in the previous step
-RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-pylibcudf-dep
+# Download libcudf and pylibcudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="libcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 cpp ./local-libcudf-dep
+RAPIDS_PY_WHEEL_NAME="pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 python ./local-pylibcudf-dep
 
-rapids-logger "Install pylibcudf"
-python -m pip install ./local-pylibcudf-dep/pylibcudf*.whl
+rapids-logger "Install libcudf, pylibcudf and cudf_polars"
+python -m pip install \
+    -v \
+    "$(echo ./dist/cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}*.whl)[test]" \
+    "$(echo ./local-libcudf-dep/libcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)" \
+    "$(echo ./local-pylibcudf-dep/pylibcudf_${RAPIDS_PY_CUDA_SUFFIX}*.whl)"
 
-rapids-logger "Install cudf_polars"
-python -m pip install $(echo ./dist/cudf_polars*.whl)
 
 TAG=$(python -c 'import polars; print(f"py-{polars.__version__}")')
 rapids-logger "Clone polars to ${TAG}"

From 2fa2e6a554096181b0a625cdc50368893dbaaa1f Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Fri, 4 Oct 2024 16:08:37 +0100
Subject: [PATCH 10/25] Switched AST benchmarks from GoogleBench to NVBench
 (#16952)

This merge request switches the Benchmarking solution for the AST benchmark from GoogleBench to NVBench. ~It also refactors the L2 cache flushing functionality of `cuda_event_timer` into a separate function `flush_L2_device_cache`, since NVBench already performs the timing, synchronization, and timer setup necessary.~

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/16952
---
 cpp/benchmarks/CMakeLists.txt    |  2 +-
 cpp/benchmarks/ast/transform.cpp | 51 +++++++++++---------------------
 2 files changed, 18 insertions(+), 35 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 110b4557840..1e13bf176c1 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -330,7 +330,7 @@ ConfigureNVBench(CSV_WRITER_NVBENCH io/csv/csv_writer.cpp)
 
 # ##################################################################################################
 # * ast benchmark ---------------------------------------------------------------------------------
-ConfigureBench(AST_BENCH ast/transform.cpp)
+ConfigureNVBench(AST_NVBENCH ast/transform.cpp)
 
 # ##################################################################################################
 # * binaryop benchmark ----------------------------------------------------------------------------
diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp
index 65a44532cf1..f44f26e4d2c 100644
--- a/cpp/benchmarks/ast/transform.cpp
+++ b/cpp/benchmarks/ast/transform.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,14 +15,16 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/transform.hpp>
 #include <cudf/types.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
+
 #include <thrust/iterator/counting_iterator.h>
 
+#include <nvbench/nvbench.cuh>
+
 #include <algorithm>
 #include <list>
 #include <memory>
@@ -35,13 +37,10 @@ enum class TreeType {
 };
 
 template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
-class AST : public cudf::benchmark {};
-
-template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
-static void BM_ast_transform(benchmark::State& state)
+static void BM_ast_transform(nvbench::state& state)
 {
-  auto const table_size{static_cast<cudf::size_type>(state.range(0))};
-  auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};
+  auto const table_size  = static_cast<cudf::size_type>(state.get_int64("table_size"));
+  auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
 
   // Create table data
   auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
@@ -86,38 +85,22 @@ static void BM_ast_transform(benchmark::State& state)
 
   auto const& expression_tree_root = expressions.back();
 
-  // Execute benchmark
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf::compute_column(table, expression_tree_root);
-  }
-
   // Use the number of bytes read from global memory
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0) *
-                          (tree_levels + 1) * sizeof(key_type));
-}
+  state.add_global_memory_reads<key_type>(table_size * (tree_levels + 1));
 
-static void CustomRanges(benchmark::internal::Benchmark* b)
-{
-  auto row_counts       = std::vector<cudf::size_type>{100'000, 1'000'000, 10'000'000, 100'000'000};
-  auto operation_counts = std::vector<cudf::size_type>{1, 5, 10};
-  for (auto const& row_count : row_counts) {
-    for (auto const& operation_count : operation_counts) {
-      b->Args({row_count, operation_count});
-    }
-  }
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
 }
 
 #define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
-  BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable)     \
-  (::benchmark::State & st)                                                                \
+  static void name(::nvbench::state& st)                                                   \
   {                                                                                        \
-    BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st);                    \
+    ::BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st);                  \
   }                                                                                        \
-  BENCHMARK_REGISTER_F(AST, name)                                                          \
-    ->Apply(CustomRanges)                                                                  \
-    ->Unit(benchmark::kMillisecond)                                                        \
-    ->UseManualTime();
+  NVBENCH_BENCH(name)                                                                      \
+    .set_name(#name)                                                                       \
+    .add_int64_axis("tree_levels", {1, 5, 10})                                             \
+    .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
 
 AST_TRANSFORM_BENCHMARK_DEFINE(
   ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);

From a78432184f20f7acf493eaa8d1928cfee29d1771 Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Fri, 4 Oct 2024 16:19:37 +0100
Subject: [PATCH 11/25] Switched BINARY_OP Benchmarks from GoogleBench to
 NVBench (#16963)

This merge request switches the Benchmarking solution for the BINARY_OP benchmarks from GoogleBench to NVBench

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)

URL: https://github.com/rapidsai/cudf/pull/16963
---
 cpp/benchmarks/CMakeLists.txt                 |  2 +-
 cpp/benchmarks/binaryop/binaryop.cpp          | 65 ++++++-------------
 cpp/benchmarks/binaryop/compiled_binaryop.cpp | 47 ++++++--------
 3 files changed, 40 insertions(+), 74 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 1e13bf176c1..b8a53cd8bd9 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -334,7 +334,7 @@ ConfigureNVBench(AST_NVBENCH ast/transform.cpp)
 
 # ##################################################################################################
 # * binaryop benchmark ----------------------------------------------------------------------------
-ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp)
+ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp)
 
 # ##################################################################################################
 # * nvtext benchmark -------------------------------------------------------------------
diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp
index fa98d9e601a..7d267a88764 100644
--- a/cpp/benchmarks/binaryop/binaryop.cpp
+++ b/cpp/benchmarks/binaryop/binaryop.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,15 +15,14 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/binaryop.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 
+#include <nvbench/nvbench.cuh>
+
 #include <algorithm>
-#include <vector>
 
 // This set of benchmarks is designed to be a comparison for the AST benchmarks
 
@@ -33,13 +32,10 @@ enum class TreeType {
 };
 
 template <typename key_type, TreeType tree_type, bool reuse_columns>
-class BINARYOP : public cudf::benchmark {};
-
-template <typename key_type, TreeType tree_type, bool reuse_columns>
-static void BM_binaryop_transform(benchmark::State& state)
+static void BM_binaryop_transform(nvbench::state& state)
 {
-  auto const table_size{static_cast<cudf::size_type>(state.range(0))};
-  auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};
+  auto const table_size{static_cast<cudf::size_type>(state.get_int64("table_size"))};
+  auto const tree_levels{static_cast<cudf::size_type>(state.get_int64("tree_levels"))};
 
   // Create table data
   auto const n_cols       = reuse_columns ? 1 : tree_levels + 1;
@@ -47,9 +43,10 @@ static void BM_binaryop_transform(benchmark::State& state)
     cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
   cudf::table_view table{*source_table};
 
-  // Execute benchmark
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
+  // Use the number of bytes read from global memory
+  state.add_global_memory_reads<key_type>(table_size * (tree_levels + 1));
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
     // Execute tree that chains additions like (((a + b) + c) + d)
     auto const op               = cudf::binary_operator::ADD;
     auto const result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
@@ -64,16 +61,18 @@ static void BM_binaryop_transform(benchmark::State& state)
         result = cudf::binary_operation(result->view(), col, op, result_data_type);
       });
     }
-  }
-
-  // Use the number of bytes read from global memory
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0) *
-                          (tree_levels + 1) * sizeof(key_type));
+  });
 }
 
 #define BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns) \
-  BENCHMARK_TEMPLATE_DEFINE_F(BINARYOP, name, key_type, tree_type, reuse_columns)     \
-  (::benchmark::State & st) { BM_binaryop_transform<key_type, tree_type, reuse_columns>(st); }
+                                                                                      \
+  static void name(::nvbench::state& st)                                              \
+  {                                                                                   \
+    BM_binaryop_transform<key_type, tree_type, reuse_columns>(st);                    \
+  }                                                                                   \
+  NVBENCH_BENCH(name)                                                                 \
+    .add_int64_axis("tree_levels", {1, 2, 5, 10})                                     \
+    .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
 
 BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique,
                                     int32_t,
@@ -87,29 +86,3 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique,
                                     double,
                                     TreeType::IMBALANCED_LEFT,
                                     false);
-
-static void CustomRanges(benchmark::internal::Benchmark* b)
-{
-  auto row_counts       = std::vector<cudf::size_type>{100'000, 1'000'000, 10'000'000, 100'000'000};
-  auto operation_counts = std::vector<cudf::size_type>{1, 2, 5, 10};
-  for (auto const& row_count : row_counts) {
-    for (auto const& operation_count : operation_counts) {
-      b->Args({row_count, operation_count});
-    }
-  }
-}
-
-BENCHMARK_REGISTER_F(BINARYOP, binaryop_int32_imbalanced_unique)
-  ->Apply(CustomRanges)
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
-
-BENCHMARK_REGISTER_F(BINARYOP, binaryop_int32_imbalanced_reuse)
-  ->Apply(CustomRanges)
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
-
-BENCHMARK_REGISTER_F(BINARYOP, binaryop_double_imbalanced_unique)
-  ->Apply(CustomRanges)
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
index 7086a61c7c5..bc0ff69bce9 100644
--- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp
+++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
@@ -15,20 +15,18 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/binaryop.hpp>
 
-class COMPILED_BINARYOP : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
 template <typename TypeLhs, typename TypeRhs, typename TypeOut>
-void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
+void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
 {
-  auto const column_size{static_cast<cudf::size_type>(state.range(0))};
+  auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
 
   auto const source_table = create_random_table(
-    {cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{column_size});
+    {cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{table_size});
 
   auto lhs = cudf::column_view(source_table->get_column(0));
   auto rhs = cudf::column_view(source_table->get_column(1));
@@ -38,31 +36,26 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
   // Call once for hot cache.
   cudf::binary_operation(lhs, rhs, binop, output_dtype);
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    cudf::binary_operation(lhs, rhs, binop, output_dtype);
-  }
-
   // use number of bytes read and written to global memory
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * column_size *
-                          (sizeof(TypeLhs) + sizeof(TypeRhs) + sizeof(TypeOut)));
+  state.add_global_memory_reads<TypeLhs>(table_size);
+  state.add_global_memory_reads<TypeRhs>(table_size);
+  state.add_global_memory_reads<TypeOut>(table_size);
+
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); });
 }
 
+#define BM_STRINGIFY(a) #a
+
 // TODO tparam boolean for null.
-#define BM_BINARYOP_BENCHMARK_DEFINE(name, lhs, rhs, bop, tout)           \
-  BENCHMARK_DEFINE_F(COMPILED_BINARYOP, name)                             \
-  (::benchmark::State & st)                                               \
-  {                                                                       \
-    BM_compiled_binaryop<lhs, rhs, tout>(st, cudf::binary_operator::bop); \
-  }                                                                       \
-  BENCHMARK_REGISTER_F(COMPILED_BINARYOP, name)                           \
-    ->Unit(benchmark::kMicrosecond)                                       \
-    ->UseManualTime()                                                     \
-    ->Arg(10000)      /* 10k */                                           \
-    ->Arg(100000)     /* 100k */                                          \
-    ->Arg(1000000)    /* 1M */                                            \
-    ->Arg(10000000)   /* 10M */                                           \
-    ->Arg(100000000); /* 100M */
+#define BM_BINARYOP_BENCHMARK_DEFINE(name, lhs, rhs, bop, tout)               \
+  static void name(::nvbench::state& st)                                      \
+  {                                                                           \
+    ::BM_compiled_binaryop<lhs, rhs, tout>(st, ::cudf::binary_operator::bop); \
+  }                                                                           \
+  NVBENCH_BENCH(name)                                                         \
+    .set_name("compiled_binary_op_" BM_STRINGIFY(name))                       \
+    .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})
 
 #define build_name(a, b, c, d) a##_##b##_##c##_##d
 

From 39342b8762c734aa2a94b94815bef75869a4e59c Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 4 Oct 2024 09:39:20 -0700
Subject: [PATCH 12/25] Properly handle the mapped and registered regions in
 `memory_mapped_source` (#16865)

Depends on https://github.com/rapidsai/cudf/pull/16826

Set of fixes that improve robustness on the non-GDS file input:

1. Avoid registering beyond the byte range - addresses problems when reading adjacent byte ranges from multiple threads (GH only).
2. Allow reading data outside of the memory mapped region. This prevents issues with very long rows in CSV or JSON input.
3. Copy host data when the range being read is only partially registered. This avoids errors when trying to copy the host data range to the device (GH only).

Modifies the datasource class hierarchy to avoid reuse of direct file `host_read`s

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16865
---
 cpp/include/cudf/io/datasource.hpp  |  22 +++-
 cpp/src/io/functions.cpp            |  14 ++-
 cpp/src/io/utilities/datasource.cpp | 157 +++++++++++++++++-----------
 cpp/tests/io/csv_test.cpp           |  35 +++++++
 4 files changed, 158 insertions(+), 70 deletions(-)

diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
index b12fbe39a57..dc14802adc1 100644
--- a/cpp/include/cudf/io/datasource.hpp
+++ b/cpp/include/cudf/io/datasource.hpp
@@ -86,14 +86,28 @@ class datasource {
   /**
    * @brief Creates a source from a file path.
    *
+   * @note Parameters `offset`, `max_size_estimate` and `min_size_estimate` are hints to the
+   * `datasource` implementation about the expected range of the data that will be read. The
+   * implementation may use these hints to optimize the read operation. These parameters are usually
+   * based on the byte range option. In this case, `min_size_estimate` should be no greater than the
+   * byte range to avoid potential issues when reading adjacent ranges. `max_size_estimate` can
+   * include padding after the byte range, to include additional data that may be needed for
+   * processing.
+   *
+   @throws cudf::logic_error if the minimum size estimate is greater than the maximum size estimate
+   *
    * @param[in] filepath Path to the file to use
-   * @param[in] offset Bytes from the start of the file (the default is zero)
-   * @param[in] size Bytes from the offset; use zero for entire file (the default is zero)
+   * @param[in] offset Starting byte offset from which data will be read (the default is zero)
+   * @param[in] max_size_estimate Upper estimate of the data range that will be read (the default is
+   * zero, which means the whole file after `offset`)
+   * @param[in] min_size_estimate Lower estimate of the data range that will be read (the default is
+   * zero, which means the whole file after `offset`)
    * @return Constructed datasource object
    */
   static std::unique_ptr<datasource> create(std::string const& filepath,
-                                            size_t offset = 0,
-                                            size_t size   = 0);
+                                            size_t offset            = 0,
+                                            size_t max_size_estimate = 0,
+                                            size_t min_size_estimate = 0);
 
   /**
    * @brief Creates a source from a host memory buffer.
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index de8eea9e99b..5a060902eb2 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -122,14 +122,16 @@ chunked_parquet_writer_options_builder chunked_parquet_writer_options::builder(
 namespace {
 
 std::vector<std::unique_ptr<cudf::io::datasource>> make_datasources(source_info const& info,
-                                                                    size_t range_offset = 0,
-                                                                    size_t range_size   = 0)
+                                                                    size_t offset            = 0,
+                                                                    size_t max_size_estimate = 0,
+                                                                    size_t min_size_estimate = 0)
 {
   switch (info.type()) {
     case io_type::FILEPATH: {
       auto sources = std::vector<std::unique_ptr<cudf::io::datasource>>();
       for (auto const& filepath : info.filepaths()) {
-        sources.emplace_back(cudf::io::datasource::create(filepath, range_offset, range_size));
+        sources.emplace_back(
+          cudf::io::datasource::create(filepath, offset, max_size_estimate, min_size_estimate));
       }
       return sources;
     }
@@ -211,7 +213,8 @@ table_with_metadata read_json(json_reader_options options,
 
   auto datasources = make_datasources(options.get_source(),
                                       options.get_byte_range_offset(),
-                                      options.get_byte_range_size_with_padding());
+                                      options.get_byte_range_size_with_padding(),
+                                      options.get_byte_range_size());
 
   return json::detail::read_json(datasources, options, stream, mr);
 }
@@ -238,7 +241,8 @@ table_with_metadata read_csv(csv_reader_options options,
 
   auto datasources = make_datasources(options.get_source(),
                                       options.get_byte_range_offset(),
-                                      options.get_byte_range_size_with_padding());
+                                      options.get_byte_range_size_with_padding(),
+                                      options.get_byte_range_size());
 
   CUDF_EXPECTS(datasources.size() == 1, "Only a single source is currently supported.");
 
diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index e4313eba454..0be976b6144 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -32,6 +32,7 @@
 #include <unistd.h>
 
 #include <unordered_map>
+#include <vector>
 
 namespace cudf {
 namespace io {
@@ -54,6 +55,30 @@ class file_source : public datasource {
     }
   }
 
+  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
+  {
+    lseek(_file.desc(), offset, SEEK_SET);
+
+    // Clamp length to available data
+    ssize_t const read_size = std::min(size, _file.size() - offset);
+
+    std::vector<uint8_t> v(read_size);
+    CUDF_EXPECTS(read(_file.desc(), v.data(), read_size) == read_size, "read failed");
+    return buffer::create(std::move(v));
+  }
+
+  size_t host_read(size_t offset, size_t size, uint8_t* dst) override
+  {
+    lseek(_file.desc(), offset, SEEK_SET);
+
+    // Clamp length to available data
+    auto const read_size = std::min(size, _file.size() - offset);
+
+    CUDF_EXPECTS(read(_file.desc(), dst, read_size) == static_cast<ssize_t>(read_size),
+                 "read failed");
+    return read_size;
+  }
+
   ~file_source() override = default;
 
   [[nodiscard]] bool supports_device_read() const override
@@ -138,40 +163,63 @@ class file_source : public datasource {
  */
 class memory_mapped_source : public file_source {
  public:
-  explicit memory_mapped_source(char const* filepath, size_t offset, size_t size)
+  explicit memory_mapped_source(char const* filepath,
+                                size_t offset,
+                                size_t max_size_estimate,
+                                size_t min_size_estimate)
     : file_source(filepath)
   {
     if (_file.size() != 0) {
-      map(_file.desc(), offset, size);
-      register_mmap_buffer();
+      // Memory mapping is not exclusive, so we can include the whole region we expect to read
+      map(_file.desc(), offset, max_size_estimate);
+      // Buffer registration is exclusive (can't overlap with other registered buffers) so we
+      // register the lower estimate; this avoids issues when reading adjacent ranges from the same
+      // file from multiple threads
+      register_mmap_buffer(offset, min_size_estimate);
     }
   }
 
   ~memory_mapped_source() override
   {
     if (_map_addr != nullptr) {
-      munmap(_map_addr, _map_size);
+      unmap();
       unregister_mmap_buffer();
     }
   }
 
   std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
   {
-    CUDF_EXPECTS(offset >= _map_offset, "Requested offset is outside mapping");
+    // Clamp length to available data
+    auto const read_size = std::min(size, +_file.size() - offset);
+
+    // If the requested range is outside of the mapped region, read from the file
+    if (offset < _map_offset or offset + read_size > (_map_offset + _map_size)) {
+      return file_source::host_read(offset, read_size);
+    }
 
-    // Clamp length to available data in the mapped region
-    auto const read_size = std::min(size, _map_size - (offset - _map_offset));
+    // If the requested range is only partially within the registered region, copy to a new
+    // host buffer to make the data safe to copy to the device
+    if (_reg_addr != nullptr and
+        (offset < _reg_offset or offset + read_size > (_reg_offset + _reg_size))) {
+      auto const src = static_cast<uint8_t*>(_map_addr) + (offset - _map_offset);
+
+      return std::make_unique<owning_buffer<std::vector<uint8_t>>>(
+        std::vector<uint8_t>(src, src + read_size));
+    }
 
     return std::make_unique<non_owning_buffer>(
-      static_cast<uint8_t*>(_map_addr) + (offset - _map_offset), read_size);
+      static_cast<uint8_t*>(_map_addr) + offset - _map_offset, read_size);
   }
 
   size_t host_read(size_t offset, size_t size, uint8_t* dst) override
   {
-    CUDF_EXPECTS(offset >= _map_offset, "Requested offset is outside mapping");
+    // Clamp length to available data
+    auto const read_size = std::min(size, +_file.size() - offset);
 
-    // Clamp length to available data in the mapped region
-    auto const read_size = std::min(size, _map_size - (offset - _map_offset));
+    // If the requested range is outside of the mapped region, read from the file
+    if (offset < _map_offset or offset + read_size > (_map_offset + _map_size)) {
+      return file_source::host_read(offset, read_size, dst);
+    }
 
     auto const src = static_cast<uint8_t*>(_map_addr) + (offset - _map_offset);
     std::memcpy(dst, src, read_size);
@@ -184,16 +232,18 @@ class memory_mapped_source : public file_source {
    *
    * Fixes nvbugs/4215160
    */
-  void register_mmap_buffer()
+  void register_mmap_buffer(size_t offset, size_t size)
   {
-    if (_map_addr == nullptr or _map_size == 0 or not pageableMemoryAccessUsesHostPageTables()) {
-      return;
-    }
+    if (_map_addr == nullptr or not pageableMemoryAccessUsesHostPageTables()) { return; }
 
-    auto const result = cudaHostRegister(_map_addr, _map_size, cudaHostRegisterDefault);
-    if (result == cudaSuccess) {
-      _is_map_registered = true;
-    } else {
+    // Registered region must be within the mapped region
+    _reg_offset = std::max(offset, _map_offset);
+    _reg_size   = std::min(size != 0 ? size : _map_size, (_map_offset + _map_size) - _reg_offset);
+
+    _reg_addr         = static_cast<std::byte*>(_map_addr) - _map_offset + _reg_offset;
+    auto const result = cudaHostRegister(_reg_addr, _reg_size, cudaHostRegisterReadOnly);
+    if (result != cudaSuccess) {
+      _reg_addr = nullptr;
       CUDF_LOG_WARN("cudaHostRegister failed with {} ({})",
                     static_cast<int>(result),
                     cudaGetErrorString(result));
@@ -205,10 +255,12 @@ class memory_mapped_source : public file_source {
    */
   void unregister_mmap_buffer()
   {
-    if (not _is_map_registered) { return; }
+    if (_reg_addr == nullptr) { return; }
 
-    auto const result = cudaHostUnregister(_map_addr);
-    if (result != cudaSuccess) {
+    auto const result = cudaHostUnregister(_reg_addr);
+    if (result == cudaSuccess) {
+      _reg_addr = nullptr;
+    } else {
       CUDF_LOG_WARN("cudaHostUnregister failed with {} ({})",
                     static_cast<int>(result),
                     cudaGetErrorString(result));
@@ -226,52 +278,30 @@ class memory_mapped_source : public file_source {
 
     // Size for `mmap()` needs to include the page padding
     _map_size = size + (offset - _map_offset);
+    if (_map_size == 0) { return; }
 
     // Check if accessing a region within already mapped area
     _map_addr = mmap(nullptr, _map_size, PROT_READ, MAP_PRIVATE, fd, _map_offset);
     CUDF_EXPECTS(_map_addr != MAP_FAILED, "Cannot create memory mapping");
   }
 
- private:
-  size_t _map_size        = 0;
-  size_t _map_offset      = 0;
-  void* _map_addr         = nullptr;
-  bool _is_map_registered = false;
-};
-
-/**
- * @brief Implementation class for reading from a file using `read` calls
- *
- * Potentially faster than `memory_mapped_source` when only a small portion of the file is read
- * through the host.
- */
-class direct_read_source : public file_source {
- public:
-  explicit direct_read_source(char const* filepath) : file_source(filepath) {}
-
-  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
+  void unmap()
   {
-    lseek(_file.desc(), offset, SEEK_SET);
-
-    // Clamp length to available data
-    ssize_t const read_size = std::min(size, _file.size() - offset);
-
-    std::vector<uint8_t> v(read_size);
-    CUDF_EXPECTS(read(_file.desc(), v.data(), read_size) == read_size, "read failed");
-    return buffer::create(std::move(v));
+    if (_map_addr != nullptr) {
+      auto const result = munmap(_map_addr, _map_size);
+      if (result != 0) { CUDF_LOG_WARN("munmap failed with {}", result); }
+      _map_addr = nullptr;
+    }
   }
 
-  size_t host_read(size_t offset, size_t size, uint8_t* dst) override
-  {
-    lseek(_file.desc(), offset, SEEK_SET);
-
-    // Clamp length to available data
-    auto const read_size = std::min(size, _file.size() - offset);
+ private:
+  size_t _map_offset = 0;
+  size_t _map_size   = 0;
+  void* _map_addr    = nullptr;
 
-    CUDF_EXPECTS(read(_file.desc(), dst, read_size) == static_cast<ssize_t>(read_size),
-                 "read failed");
-    return read_size;
-  }
+  size_t _reg_offset = 0;
+  size_t _reg_size   = 0;
+  void* _reg_addr    = nullptr;
 };
 
 /**
@@ -431,16 +461,21 @@ class user_datasource_wrapper : public datasource {
 
 std::unique_ptr<datasource> datasource::create(std::string const& filepath,
                                                size_t offset,
-                                               size_t size)
+                                               size_t max_size_estimate,
+                                               size_t min_size_estimate)
 {
+  CUDF_EXPECTS(max_size_estimate == 0 or min_size_estimate <= max_size_estimate,
+               "Invalid min/max size estimates for datasource creation");
+
 #ifdef CUFILE_FOUND
   if (cufile_integration::is_always_enabled()) {
     // avoid mmap as GDS is expected to be used for most reads
-    return std::make_unique<direct_read_source>(filepath.c_str());
+    return std::make_unique<file_source>(filepath.c_str());
   }
 #endif
   // Use our own memory mapping implementation for direct file reads
-  return std::make_unique<memory_mapped_source>(filepath.c_str(), offset, size);
+  return std::make_unique<memory_mapped_source>(
+    filepath.c_str(), offset, max_size_estimate, min_size_estimate);
 }
 
 std::unique_ptr<datasource> datasource::create(host_buffer const& buffer)
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index dc14824d834..0028dd946e3 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -2516,4 +2516,39 @@ TEST_F(CsvReaderTest, UTF8BOM)
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result_view, expected);
 }
 
+void expect_buffers_equal(cudf::io::datasource::buffer* lhs, cudf::io::datasource::buffer* rhs)
+{
+  ASSERT_EQ(lhs->size(), rhs->size());
+  EXPECT_EQ(0, std::memcmp(lhs->data(), rhs->data(), lhs->size()));
+}
+
+TEST_F(CsvReaderTest, OutOfMapBoundsReads)
+{
+  // write a lot of data into a file
+  auto filepath        = temp_env->get_temp_dir() + "OutOfMapBoundsReads.csv";
+  auto const num_rows  = 1 << 20;
+  auto const row       = std::string{"0,1,2,3,4,5,6,7,8,9\n"};
+  auto const file_size = num_rows * row.size();
+  {
+    std::ofstream outfile(filepath, std::ofstream::out);
+    for (size_t i = 0; i < num_rows; ++i) {
+      outfile << row;
+    }
+  }
+
+  // Only memory map the middle of the file
+  auto source         = cudf::io::datasource::create(filepath, file_size / 2, file_size / 4);
+  auto full_source    = cudf::io::datasource::create(filepath);
+  auto const all_data = source->host_read(0, file_size);
+  auto ref_data       = full_source->host_read(0, file_size);
+  expect_buffers_equal(ref_data.get(), all_data.get());
+
+  auto const start_data = source->host_read(file_size / 2, file_size / 2);
+  expect_buffers_equal(full_source->host_read(file_size / 2, file_size / 2).get(),
+                       start_data.get());
+
+  auto const end_data = source->host_read(0, file_size / 2 + 512);
+  expect_buffers_equal(full_source->host_read(0, file_size / 2 + 512).get(), end_data.get());
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From d15bbfdded7181fdc23d33fa5efae181b4af2e2b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 07:45:54 -1000
Subject: [PATCH 13/25] Allow melt(var_name=) to be a falsy label (#16981)

closes https://github.com/rapidsai/cudf/issues/16972

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16981
---
 python/cudf/cudf/core/reshape.py       | 2 +-
 python/cudf/cudf/tests/test_reshape.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 6e5abb2b82b..3d132c92d54 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -681,7 +681,7 @@ def _tile(A, reps):
     nval = len(value_vars)
     dtype = min_unsigned_type(nval)
 
-    if not var_name:
+    if var_name is None:
         var_name = "variable"
 
     if not value_vars:
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 4235affd4d1..3adbe1d2a74 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -119,6 +119,15 @@ def test_melt_str_scalar_id_var():
     assert_eq(result, expected)
 
 
+def test_melt_falsy_var_name():
+    df = cudf.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
+    result = cudf.melt(df, id_vars=["A"], value_vars=["B"], var_name="")
+    expected = pd.melt(
+        df.to_pandas(), id_vars=["A"], value_vars=["B"], var_name=""
+    )
+    assert_eq(result, expected)
+
+
 @pytest.mark.parametrize("num_cols", [1, 2, 10])
 @pytest.mark.parametrize("num_rows", [1, 2, 1000])
 @pytest.mark.parametrize(

From 04c17ded6563f4caaeeb51319672c10587401e33 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Fri, 4 Oct 2024 14:06:23 -0400
Subject: [PATCH 14/25] [FEA]  Migrate nvtext/edit_distance APIs to pylibcudf
 (#16957)

Apart of #15162. This PR migrates `edit_distance.pxd` to pylibcudf

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Yunsong Wang (https://github.com/PointKernel)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/16957
---
 cpp/include/nvtext/edit_distance.hpp          |  2 +-
 .../user_guide/api_docs/pylibcudf/index.rst   |  1 +
 .../pylibcudf/nvtext/edit_distance.rst        |  6 ++
 .../api_docs/pylibcudf/nvtext/index.rst       |  7 +++
 .../cudf/cudf/_lib/nvtext/edit_distance.pyx   | 34 +++-------
 python/pylibcudf/pylibcudf/CMakeLists.txt     |  1 +
 python/pylibcudf/pylibcudf/__init__.pxd       |  2 +
 python/pylibcudf/pylibcudf/__init__.py        |  2 +
 .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 22 +++++++
 .../pylibcudf/pylibcudf/nvtext/__init__.pxd   |  7 +++
 python/pylibcudf/pylibcudf/nvtext/__init__.py |  7 +++
 .../pylibcudf/nvtext/edit_distance.pxd        |  8 +++
 .../pylibcudf/nvtext/edit_distance.pyx        | 63 +++++++++++++++++++
 .../tests/test_nvtext_edit_distance.py        | 34 ++++++++++
 14 files changed, 171 insertions(+), 25 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.pxd
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.py
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py

diff --git a/cpp/include/nvtext/edit_distance.hpp b/cpp/include/nvtext/edit_distance.hpp
index 723ba310a1e..dca590baebf 100644
--- a/cpp/include/nvtext/edit_distance.hpp
+++ b/cpp/include/nvtext/edit_distance.hpp
@@ -57,7 +57,7 @@ namespace CUDF_EXPORT nvtext {
  * @param targets Strings to compute edit distance against `input`
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New strings columns of with replaced strings
+ * @return New lists column of edit distance values
  */
 std::unique_ptr<cudf::column> edit_distance(
   cudf::strings_column_view const& input,
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index e21536e2e97..052479d6720 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -49,3 +49,4 @@ This page provides API documentation for pylibcudf.
 
     io/index.rst
     strings/index.rst
+    nvtext/index.rst
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst
new file mode 100644
index 00000000000..abb45e426a8
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst
@@ -0,0 +1,6 @@
+=============
+edit_distance
+=============
+
+.. automodule:: pylibcudf.nvtext.edit_distance
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
new file mode 100644
index 00000000000..b5cd5ee42c3
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
@@ -0,0 +1,7 @@
+nvtext
+======
+
+.. toctree::
+    :maxdepth: 1
+
+    edit_distance
diff --git a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
index e3c2273345a..3dd99c42d76 100644
--- a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
+++ b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
@@ -2,37 +2,23 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.nvtext.edit_distance cimport (
-    edit_distance as cpp_edit_distance,
-    edit_distance_matrix as cpp_edit_distance_matrix,
-)
+from pylibcudf cimport nvtext
 
 from cudf._lib.column cimport Column
 
 
 @acquire_spill_lock()
 def edit_distance(Column strings, Column targets):
-    cdef column_view c_strings = strings.view()
-    cdef column_view c_targets = targets.view()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(cpp_edit_distance(c_strings, c_targets))
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.edit_distance.edit_distance(
+        strings.to_pylibcudf(mode="read"),
+        targets.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(result)
 
 
 @acquire_spill_lock()
 def edit_distance_matrix(Column strings):
-    cdef column_view c_strings = strings.view()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(cpp_edit_distance_matrix(c_strings))
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.edit_distance.edit_distance_matrix(
+        strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(result)
diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt
index a7cb66d7b16..1d72eacac12 100644
--- a/python/pylibcudf/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/CMakeLists.txt
@@ -66,3 +66,4 @@ target_link_libraries(pylibcudf_interop PUBLIC nanoarrow)
 add_subdirectory(libcudf)
 add_subdirectory(strings)
 add_subdirectory(io)
+add_subdirectory(nvtext)
diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd
index a384edd456d..b98b37fe0fd 100644
--- a/python/pylibcudf/pylibcudf/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/__init__.pxd
@@ -17,6 +17,7 @@ from . cimport (
     lists,
     merge,
     null_mask,
+    nvtext,
     partitioning,
     quantiles,
     reduce,
@@ -78,4 +79,5 @@ __all__ = [
     "transpose",
     "types",
     "unary",
+    "nvtext",
 ]
diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py
index 2a5365e8fad..304f27be340 100644
--- a/python/pylibcudf/pylibcudf/__init__.py
+++ b/python/pylibcudf/pylibcudf/__init__.py
@@ -28,6 +28,7 @@
     lists,
     merge,
     null_mask,
+    nvtext,
     partitioning,
     quantiles,
     reduce,
@@ -92,4 +93,5 @@
     "transpose",
     "types",
     "unary",
+    "nvtext",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
new file mode 100644
index 00000000000..ebe1fda1f12
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
@@ -0,0 +1,22 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources edit_distance.pyx)
+
+set(linked_libraries cudf::cudf)
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_nvtext_ ASSOCIATED_TARGETS cudf
+)
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
new file mode 100644
index 00000000000..82f7c425b1d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from . cimport edit_distance
+
+__all__ = [
+    "edit_distance",
+]
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py
new file mode 100644
index 00000000000..986652a241f
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from . import edit_distance
+
+__all__ = [
+    "edit_distance",
+]
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
new file mode 100644
index 00000000000..446b95afabb
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+
+
+cpdef Column edit_distance(Column input, Column targets)
+
+cpdef Column edit_distance_matrix(Column input)
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
new file mode 100644
index 00000000000..fc98ccbc50c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
@@ -0,0 +1,63 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.edit_distance cimport (
+    edit_distance as cpp_edit_distance,
+    edit_distance_matrix as cpp_edit_distance_matrix,
+)
+
+
+cpdef Column edit_distance(Column input, Column targets):
+    """
+    Returns the edit distance between individual strings in two strings columns
+
+    For details, see :cpp:func:`edit_distance`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    targets : Column
+        Strings to compute edit distance against
+
+    Returns
+    -------
+    Column
+        New column of edit distance values
+    """
+    cdef column_view c_strings = input.view()
+    cdef column_view c_targets = targets.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_edit_distance(c_strings, c_targets))
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column edit_distance_matrix(Column input):
+    """
+    Returns the edit distance between all strings in the input strings column
+
+    For details, see :cpp:func:`edit_distance_matrix`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+
+    Returns
+    -------
+    Column
+        New column of edit distance values
+    """
+    cdef column_view c_strings = input.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_edit_distance_matrix(c_strings))
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py
new file mode 100644
index 00000000000..7d93c471cc4
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture(scope="module")
+def edit_distance_data():
+    arr1 = ["hallo", "goodbye", "world"]
+    arr2 = ["hello", "", "world"]
+    return pa.array(arr1), pa.array(arr2)
+
+
+def test_edit_distance(edit_distance_data):
+    input_col, targets = edit_distance_data
+    result = plc.nvtext.edit_distance.edit_distance(
+        plc.interop.from_arrow(input_col),
+        plc.interop.from_arrow(targets),
+    )
+    expected = pa.array([1, 7, 0], type=pa.int32())
+    assert_column_eq(result, expected)
+
+
+def test_edit_distance_matrix(edit_distance_data):
+    input_col, _ = edit_distance_data
+    result = plc.nvtext.edit_distance.edit_distance_matrix(
+        plc.interop.from_arrow(input_col)
+    )
+    expected = pa.array(
+        [[0, 7, 4], [7, 0, 6], [4, 6, 0]], type=pa.list_(pa.int32())
+    )
+    assert_column_eq(expected, result)

From efaa0b50c6ffd15c6506847987cb531e5f6ba955 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 08:20:34 -1000
Subject: [PATCH 15/25] Add string.convert.convert_datetime/convert_booleans
 APIs to pylibcudf (#16971)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Also address a review in https://github.com/rapidsai/cudf/pull/16935#discussion_r1783726477

This also modifies some `format` arguments in `convert_datetime.pyx` to accept `str` instead of `bytes` (`const string&`) to align more with Python. Let me know if you prefer to change this back

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16971
---
 python/cudf/cudf/_lib/string_casting.pyx      | 110 +++---------------
 python/cudf_polars/cudf_polars/dsl/expr.py    |   4 +-
 .../strings/convert/convert_booleans.pxd      |   4 +-
 .../strings/convert/convert_datetime.pxd      |   6 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |   2 +-
 .../pylibcudf/strings/convert/__init__.pxd    |   2 +-
 .../pylibcudf/strings/convert/__init__.py     |   2 +-
 .../strings/convert/convert_booleans.pxd      |   9 ++
 .../strings/convert/convert_booleans.pyx      |  91 +++++++++++++++
 .../strings/convert/convert_datetime.pxd      |  11 +-
 .../strings/convert/convert_datetime.pyx      |  82 +++++++++++--
 .../pylibcudf/tests/test_string_convert.py    |   2 +-
 .../tests/test_string_convert_booleans.py     |  26 +++++
 .../tests/test_string_convert_datetime.py     |  46 ++++++++
 .../pylibcudf/tests/test_string_wrap.py       |   5 +-
 15 files changed, 286 insertions(+), 116 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 60a6795a402..55ff38f472d 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -3,9 +3,6 @@
 from cudf._lib.column cimport Column
 
 from cudf._lib.scalar import as_device_scalar
-
-from cudf._lib.scalar cimport DeviceScalar
-
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 
 from libcpp.memory cimport unique_ptr
@@ -14,14 +11,6 @@ from libcpp.utility cimport move
 
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from pylibcudf.libcudf.strings.convert.convert_booleans cimport (
-    from_booleans as cpp_from_booleans,
-    to_booleans as cpp_to_booleans,
-)
-from pylibcudf.libcudf.strings.convert.convert_datetime cimport (
-    is_timestamp as cpp_is_timestamp,
-)
 from pylibcudf.libcudf.strings.convert.convert_floats cimport (
     from_floats as cpp_from_floats,
     to_floats as cpp_to_floats,
@@ -427,77 +416,21 @@ def stoul(Column input_col):
     return string_to_integer(input_col, cudf.dtype("uint64"))
 
 
-def _to_booleans(Column input_col, object string_true="True"):
-    """
-    Converting/Casting input column of type string to boolean column
-
-    Parameters
-    ----------
-    input_col : input column of type string
-    string_true : string that represents True
-
-    Returns
-    -------
-    A Column with string values cast to boolean
-    """
-
-    cdef DeviceScalar str_true = as_device_scalar(string_true)
-    cdef column_view input_column_view = input_col.view()
-    cdef const string_scalar* string_scalar_true = <const string_scalar*>(
-        str_true.get_raw_ptr())
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_to_booleans(
-                input_column_view,
-                string_scalar_true[0]))
-
-    return Column.from_unique_ptr(move(c_result))
-
-
 def to_booleans(Column input_col):
-
-    return _to_booleans(input_col)
-
-
-def _from_booleans(
-        Column input_col,
-        object string_true="True",
-        object string_false="False"):
-    """
-    Converting/Casting input column of type boolean to string column
-
-    Parameters
-    ----------
-    input_col : input column of type boolean
-    string_true : string that represents True
-    string_false : string that represents False
-
-    Returns
-    -------
-    A Column with boolean values cast to string
-    """
-
-    cdef DeviceScalar str_true = as_device_scalar(string_true)
-    cdef DeviceScalar str_false = as_device_scalar(string_false)
-    cdef column_view input_column_view = input_col.view()
-    cdef const string_scalar* string_scalar_true = <const string_scalar*>(
-        str_true.get_raw_ptr())
-    cdef const string_scalar* string_scalar_false = <const string_scalar*>(
-        str_false.get_raw_ptr())
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_from_booleans(
-                input_column_view,
-                string_scalar_true[0],
-                string_scalar_false[0]))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_booleans.to_booleans(
+        input_col.to_pylibcudf(mode="read"),
+        as_device_scalar("True").c_value,
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def from_booleans(Column input_col):
-    return _from_booleans(input_col)
+    plc_column = plc.strings.convert.convert_booleans.from_booleans(
+        input_col.to_pylibcudf(mode="read"),
+        as_device_scalar("True").c_value,
+        as_device_scalar("False").c_value,
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def int2timestamp(
@@ -520,11 +453,10 @@ def int2timestamp(
     A Column with date-time represented in string format
 
     """
-    cdef string c_timestamp_format = format.encode("UTF-8")
     return Column.from_pylibcudf(
         plc.strings.convert.convert_datetime.from_timestamps(
             input_col.to_pylibcudf(mode="read"),
-            c_timestamp_format,
+            format,
             names.to_pylibcudf(mode="read")
         )
     )
@@ -545,12 +477,11 @@ def timestamp2int(Column input_col, dtype, format):
 
     """
     dtype = dtype_to_pylibcudf_type(dtype)
-    cdef string c_timestamp_format = format.encode('UTF-8')
     return Column.from_pylibcudf(
         plc.strings.convert.convert_datetime.to_timestamps(
             input_col.to_pylibcudf(mode="read"),
             dtype,
-            c_timestamp_format
+            format
         )
     )
 
@@ -572,16 +503,11 @@ def istimestamp(Column input_col, str format):
     """
     if input_col.size == 0:
         return cudf.core.column.column_empty(0, dtype=cudf.dtype("bool"))
-    cdef column_view input_column_view = input_col.view()
-    cdef string c_timestamp_format = <string>str(format).encode('UTF-8')
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_is_timestamp(
-                input_column_view,
-                c_timestamp_format))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_datetime.is_timestamp(
+        input_col.to_pylibcudf(mode="read"),
+        format
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def timedelta2int(Column input_col, dtype, format):
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index c401e5a2f17..54476b7fedc 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -914,7 +914,7 @@ def do_evaluate(
             col = self.children[0].evaluate(df, context=context, mapping=mapping)
 
             is_timestamps = plc.strings.convert.convert_datetime.is_timestamp(
-                col.obj, format.encode()
+                col.obj, format
             )
 
             if strict:
@@ -937,7 +937,7 @@ def do_evaluate(
                 )
                 return Column(
                     plc.strings.convert.convert_datetime.to_timestamps(
-                        res.columns()[0], self.dtype, format.encode()
+                        res.columns()[0], self.dtype, format
                     )
                 )
         elif self.name == pl_expr.StringFunction.Replace:
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
index 83a9573baad..e6688cfff81 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
@@ -8,10 +8,10 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_booleans(
-        column_view input_col,
+        column_view input,
         string_scalar true_string) except +
 
     cdef unique_ptr[column] from_booleans(
-        column_view input_col,
+        column_view booleans,
         string_scalar true_string,
         string_scalar false_string) except +
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
index fa8975c4df9..fceddd58df0 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
@@ -10,14 +10,14 @@ from pylibcudf.libcudf.types cimport data_type
 cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_timestamps(
-        column_view input_col,
+        column_view input,
         data_type timestamp_type,
         string format) except +
 
     cdef unique_ptr[column] from_timestamps(
-        column_view input_col,
+        column_view timestamps,
         string format,
-        column_view input_strings_names) except +
+        column_view names) except +
 
     cdef unique_ptr[column] is_timestamp(
         column_view input_col,
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index 175c9b3738e..3febc78dfd2 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources convert_durations.pyx convert_datetime.pyx)
+set(cython_sources convert_booleans.pyx convert_durations.pyx convert_datetime.pyx)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index 05324cb49df..5525bca46d6 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -1,2 +1,2 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . cimport convert_datetime, convert_durations
+from . cimport convert_booleans, convert_datetime, convert_durations
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index d803399d53c..2340ebe9a26 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -1,2 +1,2 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . import convert_datetime, convert_durations
+from . import convert_booleans, convert_datetime, convert_durations
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
new file mode 100644
index 00000000000..312ac3c0ca0
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.scalar cimport Scalar
+
+
+cpdef Column to_booleans(Column input, Scalar true_string)
+
+cpdef Column from_booleans(Column booleans, Scalar true_string, Scalar false_string)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
new file mode 100644
index 00000000000..0c10f821ab6
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
@@ -0,0 +1,91 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.convert cimport (
+    convert_booleans as cpp_convert_booleans,
+)
+from pylibcudf.scalar cimport Scalar
+
+from cython.operator import dereference
+
+
+cpdef Column to_booleans(Column input, Scalar true_string):
+    """
+    Returns a new bool column by parsing boolean values from the strings
+    in the provided strings column.
+
+    For details, see :cpp:func:`cudf::strings::to_booleans`.
+
+    Parameters
+    ----------
+    input :  Column
+        Strings instance for this operation
+
+    true_string : Scalar
+        String to expect for true. Non-matching strings are false
+
+    Returns
+    -------
+    Column
+        New bool column converted from strings.
+    """
+    cdef unique_ptr[column] c_result
+    cdef const string_scalar* c_true_string = <const string_scalar*>(
+        true_string.c_obj.get()
+    )
+
+    with nogil:
+        c_result = move(
+            cpp_convert_booleans.to_booleans(
+                input.view(),
+                dereference(c_true_string)
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column from_booleans(Column booleans, Scalar true_string, Scalar false_string):
+    """
+    Returns a new strings column converting the boolean values from the
+    provided column into strings.
+
+    For details, see :cpp:func:`cudf::strings::from_booleans`.
+
+    Parameters
+    ----------
+    booleans :  Column
+        Boolean column to convert.
+
+    true_string : Scalar
+        String to use for true in the output column.
+
+    false_string : Scalar
+        String to use for false in the output column.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+    cdef const string_scalar* c_true_string = <const string_scalar*>(
+        true_string.c_obj.get()
+    )
+    cdef const string_scalar* c_false_string = <const string_scalar*>(
+        false_string.c_obj.get()
+    )
+
+    with nogil:
+        c_result = move(
+            cpp_convert_booleans.from_booleans(
+                booleans.view(),
+                dereference(c_true_string),
+                dereference(c_false_string),
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
index 07c84d263d6..80ec168644b 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
@@ -8,11 +8,16 @@ from pylibcudf.types cimport DataType
 cpdef Column to_timestamps(
     Column input,
     DataType timestamp_type,
-    const string& format
+    str format
 )
 
 cpdef Column from_timestamps(
-    Column input,
-    const string& format,
+    Column timestamps,
+    str format,
     Column input_strings_names
 )
+
+cpdef Column is_timestamp(
+    Column input,
+    str format,
+)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
index fcacb096f87..0ee60812e00 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
@@ -15,28 +15,74 @@ from pylibcudf.types import DataType
 cpdef Column to_timestamps(
     Column input,
     DataType timestamp_type,
-    const string& format
+    str format
 ):
+    """
+    Returns a new timestamp column converting a strings column into
+    timestamps using the provided format pattern.
+
+    For details, see cpp:`cudf::strings::to_timestamps`.
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    timestamp_type : DataType
+        The timestamp type used for creating the output column.
+
+    format : str
+        String specifying the timestamp format in strings.
+
+    Returns
+    -------
+    Column
+        New datetime column
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
     with nogil:
         c_result = cpp_convert_datetime.to_timestamps(
             input.view(),
             timestamp_type.c_obj,
-            format
+            c_format
         )
 
     return Column.from_libcudf(move(c_result))
 
 cpdef Column from_timestamps(
-    Column input,
-    const string& format,
+    Column timestamps,
+    str format,
     Column input_strings_names
 ):
+    """
+    Returns a new strings column converting a timestamp column into
+    strings using the provided format pattern.
+
+    For details, see cpp:`cudf::strings::from_timestamps`.
+
+    Parameters
+    ----------
+    timestamps : Column
+        Timestamp values to convert
+
+    format : str
+        The string specifying output format.
+
+    input_strings_names : Column
+        The string names to use for weekdays ("%a", "%A") and months ("%b", "%B").
+
+    Returns
+    -------
+    Column
+        New strings column with formatted timestamps.
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
     with nogil:
         c_result = cpp_convert_datetime.from_timestamps(
-            input.view(),
-            format,
+            timestamps.view(),
+            c_format,
             input_strings_names.view()
         )
 
@@ -44,13 +90,33 @@ cpdef Column from_timestamps(
 
 cpdef Column is_timestamp(
     Column input,
-    const string& format
+    str format
 ):
+    """
+    Verifies the given strings column can be parsed to timestamps
+    using the provided format pattern.
+
+    For details, see cpp:`cudf::strings::is_timestamp`.
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    format : str
+        String specifying the timestamp format in strings.
+
+    Returns
+    -------
+    Column
+        New bool column.
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
     with nogil:
         c_result = cpp_convert_datetime.is_timestamp(
             input.view(),
-            format
+            c_format
         )
 
     return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert.py b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
index e9e95459d0e..22bb4971cb1 100644
--- a/python/pylibcudf/pylibcudf/tests/test_string_convert.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
@@ -62,7 +62,7 @@ def test_to_datetime(
     got = plc.strings.convert.convert_datetime.to_timestamps(
         plc_timestamp_col,
         plc.interop.from_arrow(timestamp_type),
-        format.encode(),
+        format,
     )
     assert_column_eq(expect, got)
 
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py
new file mode 100644
index 00000000000..117c59ff1b8
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_to_booleans():
+    pa_array = pa.array(["true", None, "True"])
+    result = plc.strings.convert.convert_booleans.to_booleans(
+        plc.interop.from_arrow(pa_array),
+        plc.interop.from_arrow(pa.scalar("True")),
+    )
+    expected = pa.array([False, None, True])
+    assert_column_eq(result, expected)
+
+
+def test_from_booleans():
+    pa_array = pa.array([True, None, False])
+    result = plc.strings.convert.convert_booleans.from_booleans(
+        plc.interop.from_arrow(pa_array),
+        plc.interop.from_arrow(pa.scalar("A")),
+        plc.interop.from_arrow(pa.scalar("B")),
+    )
+    expected = pa.array(["A", None, "B"])
+    assert_column_eq(result, expected)
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py
new file mode 100644
index 00000000000..f3e84286a36
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import datetime
+
+import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture
+def fmt():
+    return "%Y-%m-%dT%H:%M:%S"
+
+
+def test_to_timestamp(fmt):
+    arr = pa.array(["2020-01-01T01:01:01", None])
+    result = plc.strings.convert.convert_datetime.to_timestamps(
+        plc.interop.from_arrow(arr),
+        plc.DataType(plc.TypeId.TIMESTAMP_SECONDS),
+        fmt,
+    )
+    expected = pc.strptime(arr, fmt, "s")
+    assert_column_eq(result, expected)
+
+
+def test_from_timestamp(fmt):
+    arr = pa.array([datetime.datetime(2020, 1, 1, 1, 1, 1), None])
+    result = plc.strings.convert.convert_datetime.from_timestamps(
+        plc.interop.from_arrow(arr),
+        fmt,
+        plc.interop.from_arrow(pa.array([], type=pa.string())),
+    )
+    # pc.strftime will add the extra %f
+    expected = pa.array(["2020-01-01T01:01:01", None])
+    assert_column_eq(result, expected)
+
+
+def test_is_timestamp(fmt):
+    arr = pa.array(["2020-01-01T01:01:01", None, "2020-01-01"])
+    result = plc.strings.convert.convert_datetime.is_timestamp(
+        plc.interop.from_arrow(arr),
+        fmt,
+    )
+    expected = pa.array([True, None, False])
+    assert_column_eq(result, expected)
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
index 85abd3a2bae..a1c820cd586 100644
--- a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
@@ -7,6 +7,7 @@
 
 
 def test_wrap():
+    width = 12
     pa_array = pa.array(
         [
             "the quick brown fox jumped over the lazy brown dog",
@@ -14,10 +15,10 @@ def test_wrap():
             None,
         ]
     )
-    result = plc.strings.wrap.wrap(plc.interop.from_arrow(pa_array), 12)
+    result = plc.strings.wrap.wrap(plc.interop.from_arrow(pa_array), width)
     expected = pa.array(
         [
-            textwrap.fill(val, 12) if isinstance(val, str) else val
+            textwrap.fill(val, width) if isinstance(val, str) else val
             for val in pa_array.to_pylist()
         ]
     )

From a8da1ff2b393abbafa27dddcf4c19481ec853c28 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 4 Oct 2024 12:11:31 -0700
Subject: [PATCH 16/25] Deprecate support for directly accessing logger
 (#16964)

This PR removes support for accessing cudf's underlying spdlog logger directly.

Contributes to https://github.com/rapidsai/build-planning/issues/104

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/16964
---
 cpp/include/cudf/detail/utilities/logger.hpp | 14 ++++----
 cpp/include/cudf/utilities/logger.hpp        |  8 ++++-
 cpp/src/utilities/logger.cpp                 |  4 ++-
 cpp/tests/utilities_tests/logger_tests.cpp   | 37 ++++++++++----------
 4 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/logger.hpp b/cpp/include/cudf/detail/utilities/logger.hpp
index 8c1c3c28df8..e7643eb44bd 100644
--- a/cpp/include/cudf/detail/utilities/logger.hpp
+++ b/cpp/include/cudf/detail/utilities/logger.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,9 +19,9 @@
 #include <cudf/utilities/logger.hpp>
 
 // Log messages that require computation should only be used at level TRACE and DEBUG
-#define CUDF_LOG_TRACE(...)    SPDLOG_LOGGER_TRACE(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_DEBUG(...)    SPDLOG_LOGGER_DEBUG(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_INFO(...)     SPDLOG_LOGGER_INFO(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_WARN(...)     SPDLOG_LOGGER_WARN(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_ERROR(...)    SPDLOG_LOGGER_ERROR(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&cudf::logger(), __VA_ARGS__)
+#define CUDF_LOG_TRACE(...)    SPDLOG_LOGGER_TRACE(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_DEBUG(...)    SPDLOG_LOGGER_DEBUG(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_INFO(...)     SPDLOG_LOGGER_INFO(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_WARN(...)     SPDLOG_LOGGER_WARN(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_ERROR(...)    SPDLOG_LOGGER_ERROR(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&cudf::detail::logger(), __VA_ARGS__)
diff --git a/cpp/include/cudf/utilities/logger.hpp b/cpp/include/cudf/utilities/logger.hpp
index 45d5d1b12e1..982554a23f5 100644
--- a/cpp/include/cudf/utilities/logger.hpp
+++ b/cpp/include/cudf/utilities/logger.hpp
@@ -22,6 +22,10 @@
 
 namespace CUDF_EXPORT cudf {
 
+namespace detail {
+spdlog::logger& logger();
+}
+
 /**
  * @brief Returns the global logger.
  *
@@ -43,6 +47,8 @@ namespace CUDF_EXPORT cudf {
  *
  * @return spdlog::logger& The logger.
  */
-spdlog::logger& logger();
+[[deprecated(
+  "Support for direct access to spdlog loggers in cudf is planned for removal")]] spdlog::logger&
+logger();
 
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/utilities/logger.cpp b/cpp/src/utilities/logger.cpp
index d54f5677c4c..e52fffbd8c6 100644
--- a/cpp/src/utilities/logger.cpp
+++ b/cpp/src/utilities/logger.cpp
@@ -74,8 +74,10 @@ struct logger_wrapper {
 
 }  // namespace
 
-spdlog::logger& cudf::logger()
+spdlog::logger& cudf::detail::logger()
 {
   static logger_wrapper wrapped{};
   return wrapped.logger_;
 }
+
+spdlog::logger& cudf::logger() { return cudf::detail::logger(); }
diff --git a/cpp/tests/utilities_tests/logger_tests.cpp b/cpp/tests/utilities_tests/logger_tests.cpp
index d052e20eedb..cfab570833b 100644
--- a/cpp/tests/utilities_tests/logger_tests.cpp
+++ b/cpp/tests/utilities_tests/logger_tests.cpp
@@ -28,16 +28,17 @@ class LoggerTest : public cudf::test::BaseFixture {
   std::vector<spdlog::sink_ptr> prev_sinks;
 
  public:
-  LoggerTest() : prev_level{cudf::logger().level()}, prev_sinks{cudf::logger().sinks()}
+  LoggerTest()
+    : prev_level{cudf::detail::logger().level()}, prev_sinks{cudf::detail::logger().sinks()}
   {
-    cudf::logger().sinks() = {std::make_shared<spdlog::sinks::ostream_sink_mt>(oss)};
-    cudf::logger().set_formatter(
+    cudf::detail::logger().sinks() = {std::make_shared<spdlog::sinks::ostream_sink_mt>(oss)};
+    cudf::detail::logger().set_formatter(
       std::unique_ptr<spdlog::formatter>(new spdlog::pattern_formatter("%v")));
   }
   ~LoggerTest() override
   {
-    cudf::logger().set_level(prev_level);
-    cudf::logger().sinks() = prev_sinks;
+    cudf::detail::logger().set_level(prev_level);
+    cudf::detail::logger().sinks() = prev_sinks;
   }
 
   void clear_sink() { oss.str(""); }
@@ -46,32 +47,32 @@ class LoggerTest : public cudf::test::BaseFixture {
 
 TEST_F(LoggerTest, Basic)
 {
-  cudf::logger().critical("crit msg");
+  cudf::detail::logger().critical("crit msg");
   ASSERT_EQ(this->sink_content(), "crit msg\n");
 }
 
 TEST_F(LoggerTest, DefaultLevel)
 {
-  cudf::logger().trace("trace");
-  cudf::logger().debug("debug");
-  cudf::logger().info("info");
-  cudf::logger().warn("warn");
-  cudf::logger().error("error");
-  cudf::logger().critical("critical");
+  cudf::detail::logger().trace("trace");
+  cudf::detail::logger().debug("debug");
+  cudf::detail::logger().info("info");
+  cudf::detail::logger().warn("warn");
+  cudf::detail::logger().error("error");
+  cudf::detail::logger().critical("critical");
   ASSERT_EQ(this->sink_content(), "warn\nerror\ncritical\n");
 }
 
 TEST_F(LoggerTest, CustomLevel)
 {
-  cudf::logger().set_level(spdlog::level::warn);
-  cudf::logger().info("info");
-  cudf::logger().warn("warn");
+  cudf::detail::logger().set_level(spdlog::level::warn);
+  cudf::detail::logger().info("info");
+  cudf::detail::logger().warn("warn");
   ASSERT_EQ(this->sink_content(), "warn\n");
 
   this->clear_sink();
 
-  cudf::logger().set_level(spdlog::level::debug);
-  cudf::logger().trace("trace");
-  cudf::logger().debug("debug");
+  cudf::detail::logger().set_level(spdlog::level::debug);
+  cudf::detail::logger().trace("trace");
+  cudf::detail::logger().debug("debug");
   ASSERT_EQ(this->sink_content(), "debug\n");
 }

From 119aa9d9c5cffc2de460f52f11fb4a78f8b51dce Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 12:08:26 -1000
Subject: [PATCH 17/25] Add string.convert.convert_fixed_type APIs to pylibcudf
 (#16984)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16984
---
 .../strings/convert/convert_fixed_point.pyx   |  69 +++--------
 .../strings/convert/convert_fixed_point.pxd   |   8 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |   4 +-
 .../pylibcudf/strings/convert/__init__.pxd    |   7 +-
 .../pylibcudf/strings/convert/__init__.py     |   7 +-
 .../strings/convert/convert_fixed_point.pxd   |  11 ++
 .../strings/convert/convert_fixed_point.pyx   | 107 ++++++++++++++++++
 .../tests/test_string_convert_fixed_point.py  |  34 ++++++
 8 files changed, 188 insertions(+), 59 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py

diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
index a8df8c9a92c..96dcd021c3b 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -1,22 +1,11 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
-import cudf
-
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
 from cudf.core.buffer import acquire_spill_lock
 
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.strings.convert.convert_fixed_point cimport (
-    from_fixed_point as cpp_from_fixed_point,
-    is_fixed_point as cpp_is_fixed_point,
-    to_fixed_point as cpp_to_fixed_point,
-)
-from pylibcudf.libcudf.types cimport data_type, type_id
-
 from cudf._lib.column cimport Column
+from cudf._lib.types cimport dtype_to_pylibcudf_type
+
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
@@ -32,14 +21,10 @@ def from_decimal(Column input_col):
     -------
     A column of strings representing the input decimal values.
     """
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_from_fixed_point(
-                input_column_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_fixed_point.from_fixed_point(
+        input_col.to_pylibcudf(mode="read"),
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 @acquire_spill_lock()
@@ -57,25 +42,11 @@ def to_decimal(Column input_col, object out_type):
     -------
     A column of decimals parsed from the string values.
     """
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    cdef int scale = out_type.scale
-    cdef data_type c_out_type
-    if isinstance(out_type, cudf.Decimal32Dtype):
-        c_out_type = data_type(type_id.DECIMAL32, -scale)
-    elif isinstance(out_type, cudf.Decimal64Dtype):
-        c_out_type = data_type(type_id.DECIMAL64, -scale)
-    elif isinstance(out_type, cudf.Decimal128Dtype):
-        c_out_type = data_type(type_id.DECIMAL128, -scale)
-    else:
-        raise TypeError("should be a decimal dtype")
-    with nogil:
-        c_result = move(
-            cpp_to_fixed_point(
-                input_column_view,
-                c_out_type))
-
-    result = Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_fixed_point.to_fixed_point(
+        input_col.to_pylibcudf(mode="read"),
+        dtype_to_pylibcudf_type(out_type),
+    )
+    result = Column.from_pylibcudf(plc_column)
     result.dtype.precision = out_type.precision
     return result
 
@@ -98,14 +69,8 @@ def is_fixed_point(Column input_col, object dtype):
     -------
     A Column of booleans indicating valid decimal conversion.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = input_col.view()
-    cdef int scale = dtype.scale
-    cdef data_type c_dtype = data_type(type_id.DECIMAL64, -scale)
-    with nogil:
-        c_result = move(cpp_is_fixed_point(
-            source_view,
-            c_dtype
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_fixed_point.is_fixed_point(
+        input_col.to_pylibcudf(mode="read"),
+        dtype_to_pylibcudf_type(dtype),
+    )
+    return Column.from_pylibcudf(plc_column)
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
index 6f820f3c9a4..72ab329f2dd 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
@@ -9,13 +9,13 @@ from pylibcudf.libcudf.types cimport data_type
 cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_fixed_point(
-        column_view input_col,
+        column_view input,
         data_type output_type) except +
 
     cdef unique_ptr[column] from_fixed_point(
-        column_view input_col) except +
+        column_view input) except +
 
     cdef unique_ptr[column] is_fixed_point(
-        column_view source_strings,
-        data_type output_type
+        column_view input,
+        data_type decimal_type
     ) except +
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index 3febc78dfd2..fe8da975566 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -12,7 +12,9 @@
 # the License.
 # =============================================================================
 
-set(cython_sources convert_booleans.pyx convert_durations.pyx convert_datetime.pyx)
+set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
+                   convert_fixed_point.pyx
+)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index 5525bca46d6..36abf463371 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -1,2 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . cimport convert_booleans, convert_datetime, convert_durations
+from . cimport (
+    convert_booleans,
+    convert_datetime,
+    convert_durations,
+    convert_fixed_point,
+)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index 2340ebe9a26..c0be4093836 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -1,2 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . import convert_booleans, convert_datetime, convert_durations
+from . import (
+    convert_booleans,
+    convert_datetime,
+    convert_durations,
+    convert_fixed_point,
+)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
new file mode 100644
index 00000000000..049b9b3fffe
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.types cimport DataType
+
+
+cpdef Column to_fixed_point(Column input, DataType output_type)
+
+cpdef Column from_fixed_point(Column input)
+
+cpdef Column is_fixed_point(Column input, DataType decimal_type=*)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
new file mode 100644
index 00000000000..40dadf6f967
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
@@ -0,0 +1,107 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings.convert cimport (
+    convert_fixed_point as cpp_fixed_point,
+)
+from pylibcudf.types cimport DataType, type_id
+
+
+cpdef Column to_fixed_point(Column input, DataType output_type):
+    """
+    Returns a new fixed-point column parsing decimal values from the
+    provided strings column.
+
+    For details, see :cpp:details:`cudf::strings::to_fixed_point`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    output_type : DataType
+        Type of fixed-point column to return including the scale value.
+
+    Returns
+    -------
+    Column
+        New column of output_type.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_fixed_point.to_fixed_point(
+                input.view(),
+                output_type.c_obj,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column from_fixed_point(Column input):
+    """
+    Returns a new strings column converting the fixed-point values
+    into a strings column.
+
+    For details, see :cpp:details:`cudf::strings::from_fixed_point`
+
+    Parameters
+    ----------
+    input : Column
+        Fixed-point column to convert.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_fixed_point.from_fixed_point(
+                input.view(),
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column is_fixed_point(Column input, DataType decimal_type=None):
+    """
+    Returns a boolean column identifying strings in which all
+    characters are valid for conversion to fixed-point.
+
+    For details, see :cpp:details:`cudf::strings::is_fixed_point`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    decimal_type : DataType
+        Fixed-point type (with scale) used only for checking overflow.
+        Defaults to Decimal64
+
+    Returns
+    -------
+    Column
+        New column of boolean results for each string.
+    """
+    cdef unique_ptr[column] c_result
+
+    if decimal_type is None:
+        decimal_type = DataType(type_id.DECIMAL64)
+
+    with nogil:
+        c_result = move(
+            cpp_fixed_point.is_fixed_point(
+                input.view(),
+                decimal_type.c_obj,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py
new file mode 100644
index 00000000000..b1c4d729604
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import decimal
+
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_to_fixed_point():
+    typ = pa.decimal128(38, 2)
+    arr = pa.array(["123", "1.23", None])
+    result = plc.strings.convert.convert_fixed_point.to_fixed_point(
+        plc.interop.from_arrow(arr), plc.interop.from_arrow(typ)
+    )
+    expected = arr.cast(typ)
+    assert_column_eq(result, expected)
+
+
+def test_from_fixed_point():
+    arr = pa.array([decimal.Decimal("1.1"), None])
+    result = plc.strings.convert.convert_fixed_point.from_fixed_point(
+        plc.interop.from_arrow(arr),
+    )
+    expected = pa.array(["1.1", None])
+    assert_column_eq(result, expected)
+
+
+def test_is_fixed_point():
+    arr = pa.array(["123", "1.23", "1.2.3", "", None])
+    result = plc.strings.convert.convert_fixed_point.is_fixed_point(
+        plc.interop.from_arrow(arr),
+    )
+    expected = pa.array([True, True, False, False, None])
+    assert_column_eq(result, expected)

From 77f3a5d3229ed1b3186fe9f4d5b5b04d124c6a4d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 12:33:44 -1000
Subject: [PATCH 18/25] Add docstrings and test for strings.convert_durations
 APIs for pylibcudf (#16982)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Since the implementation already existed:

* Added docstrings
* Like https://github.com/rapidsai/cudf/pull/16971, made the `format` parameter accept `str` instead
* Aligned parameter names closer to pylibcudf
* Added missing `move`s
* Moved `convert_duration` tests to `test_string_convert_duration.py` and added a new test for `from_durations`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16982
---
 python/cudf/cudf/_lib/string_casting.pyx      |  7 +-
 .../strings/convert/convert_durations.pxd     |  2 +-
 .../strings/convert/convert_durations.pxd     |  6 +-
 .../strings/convert/convert_durations.pyx     | 73 ++++++++++++++++---
 .../pylibcudf/tests/test_string_convert.py    | 43 -----------
 .../tests/test_string_convert_durations.py    | 61 ++++++++++++++++
 6 files changed, 130 insertions(+), 62 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 55ff38f472d..fe19379bf93 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -525,12 +525,11 @@ def timedelta2int(Column input_col, dtype, format):
 
     """
     dtype = dtype_to_pylibcudf_type(dtype)
-    cdef string c_timestamp_format = format.encode('UTF-8')
     return Column.from_pylibcudf(
         plc.strings.convert.convert_durations.to_durations(
             input_col.to_pylibcudf(mode="read"),
             dtype,
-            c_timestamp_format
+            format
         )
     )
 
@@ -549,12 +548,10 @@ def int2timedelta(Column input_col, str format):
     A Column with Timedelta represented in string format
 
     """
-
-    cdef string c_duration_format = format.encode('UTF-8')
     return Column.from_pylibcudf(
         plc.strings.convert.convert_durations.from_durations(
             input_col.to_pylibcudf(mode="read"),
-            c_duration_format
+            format
         )
     )
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
index ebe10574353..43ffad1d89f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
@@ -10,7 +10,7 @@ from pylibcudf.libcudf.types cimport data_type
 cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_durations(
-        const column_view & strings_col,
+        const column_view & input,
         data_type duration_type,
         const string & format) except +
 
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
index ac11b8959ed..eecdade4ef9 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
@@ -8,10 +8,10 @@ from pylibcudf.types cimport DataType
 cpdef Column to_durations(
     Column input,
     DataType duration_type,
-    const string& format
+    str format
 )
 
 cpdef Column from_durations(
-    Column input,
-    const string& format
+    Column durations,
+    str format=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
index f3e0b7c9c8e..76c5809c3d5 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
@@ -15,27 +15,80 @@ from pylibcudf.types import DataType
 cpdef Column to_durations(
     Column input,
     DataType duration_type,
-    const string& format
+    str format
 ):
+    """
+    Returns a new duration column converting a strings column into
+    durations using the provided format pattern.
+
+    For details, see cpp:func:`cudf::strings::to_durations`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    duration_type : DataType
+        The duration type used for creating the output column.
+
+    format : str
+        String specifying the duration format in strings.
+
+    Returns
+    -------
+    Column
+        New duration column.
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
+
     with nogil:
-        c_result = cpp_convert_durations.to_durations(
-            input.view(),
-            duration_type.c_obj,
-            format
+        c_result = move(
+            cpp_convert_durations.to_durations(
+                input.view(),
+                duration_type.c_obj,
+                c_format
+            )
         )
 
     return Column.from_libcudf(move(c_result))
 
 cpdef Column from_durations(
-    Column input,
-    const string& format
+    Column durations,
+    str format=None
 ):
+    """
+    Returns a new strings column converting a duration column into
+    strings using the provided format pattern.
+
+    For details, see cpp:func:`cudf::strings::from_durations`
+
+    Parameters
+    ----------
+    durations : Column
+        Duration values to convert.
+
+    format : str
+        The string specifying output format.
+        Default format is "%D days %H:%M:%S".
+
+    Returns
+    -------
+    Column
+        New strings column with formatted durations.
+    """
     cdef unique_ptr[column] c_result
+
+    if format is None:
+        format = "%D days %H:%M:%S"
+    cdef string c_format = format.encode()
+
     with nogil:
-        c_result = cpp_convert_durations.from_durations(
-            input.view(),
-            format
+        c_result = move(
+            cpp_convert_durations.from_durations(
+                durations.view(),
+                c_format
+            )
         )
 
     return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert.py b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
index 22bb4971cb1..69f7a0fdd33 100644
--- a/python/pylibcudf/pylibcudf/tests/test_string_convert.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
@@ -1,7 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from datetime import datetime
-
 import pyarrow as pa
 import pylibcudf as plc
 import pytest
@@ -21,39 +19,16 @@ def timestamp_type(request):
     return request.param
 
 
-@pytest.fixture(
-    scope="module",
-    params=[
-        pa.duration("ns"),
-        pa.duration("us"),
-        pa.duration("ms"),
-        pa.duration("s"),
-    ],
-)
-def duration_type(request):
-    return request.param
-
-
 @pytest.fixture(scope="module")
 def pa_timestamp_col():
     return pa.array(["2011-01-01", "2011-01-02", "2011-01-03"])
 
 
-@pytest.fixture(scope="module")
-def pa_duration_col():
-    return pa.array(["05:20:25"])
-
-
 @pytest.fixture(scope="module")
 def plc_timestamp_col(pa_timestamp_col):
     return plc.interop.from_arrow(pa_timestamp_col)
 
 
-@pytest.fixture(scope="module")
-def plc_duration_col(pa_duration_col):
-    return plc.interop.from_arrow(pa_duration_col)
-
-
 @pytest.mark.parametrize("format", ["%Y-%m-%d"])
 def test_to_datetime(
     pa_timestamp_col, plc_timestamp_col, timestamp_type, format
@@ -65,21 +40,3 @@ def test_to_datetime(
         format,
     )
     assert_column_eq(expect, got)
-
-
-@pytest.mark.parametrize("format", ["%H:%M:%S"])
-def test_to_duration(pa_duration_col, plc_duration_col, duration_type, format):
-    def to_timedelta(duration_str):
-        date = datetime.strptime(duration_str, format)
-        return date - datetime(1900, 1, 1)  # "%H:%M:%S" zero date
-
-    expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast(
-        duration_type
-    )
-
-    got = plc.strings.convert.convert_durations.to_durations(
-        plc_duration_col,
-        plc.interop.from_arrow(duration_type),
-        format.encode(),
-    )
-    assert_column_eq(expect, got)
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py
new file mode 100644
index 00000000000..6d704309bfd
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from datetime import datetime, timedelta
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture(
+    params=[
+        pa.duration("ns"),
+        pa.duration("us"),
+        pa.duration("ms"),
+        pa.duration("s"),
+    ],
+)
+def duration_type(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def pa_duration_col():
+    return pa.array(["05:20:25"])
+
+
+@pytest.fixture(scope="module")
+def plc_duration_col(pa_duration_col):
+    return plc.interop.from_arrow(pa_duration_col)
+
+
+def test_to_duration(pa_duration_col, plc_duration_col, duration_type):
+    format = "%H:%M:%S"
+
+    def to_timedelta(duration_str):
+        date = datetime.strptime(duration_str, format)
+        return date - datetime(1900, 1, 1)  # "%H:%M:%S" zero date
+
+    expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast(
+        duration_type
+    )
+
+    got = plc.strings.convert.convert_durations.to_durations(
+        plc_duration_col,
+        plc.interop.from_arrow(duration_type),
+        format,
+    )
+    assert_column_eq(expect, got)
+
+
+@pytest.mark.parametrize("format", [None, "%D days %H:%M:%S"])
+def test_from_durations(format):
+    pa_array = pa.array(
+        [timedelta(days=1, hours=1, minutes=1, seconds=1), None]
+    )
+    result = plc.strings.convert.convert_durations.from_durations(
+        plc.interop.from_arrow(pa_array), format
+    )
+    expected = pa.array(["1 days 01:01:01", None])
+    assert_column_eq(result, expected)

From c958d8e88d8c0cb149b1442ab91705853167a609 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 4 Oct 2024 18:45:30 -0500
Subject: [PATCH 19/25] Upgrade pandas pinnings to support `2.2.3` (#16882)

Pandas released a newer version `2.2.3` with very minimal fixes but one that adds support for python-3.13 and numpy 2.1 compatibility. This PR updates pinnings in `cudf` to support `2.2.3`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/16882
---
 ci/test_python_cudf.sh                           | 2 +-
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/meta.yaml                     | 2 +-
 conda/recipes/pylibcudf/meta.yaml                | 2 +-
 dependencies.yaml                                | 6 +++++-
 python/cudf/cudf/core/_compat.py                 | 2 +-
 python/cudf/pyproject.toml                       | 2 +-
 python/dask_cudf/pyproject.toml                  | 2 +-
 9 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index 2386414b32e..9528549a562 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -9,7 +9,7 @@ source ./ci/test_python_common.sh test_python_cudf
 
 rapids-logger "Check GPU usage"
 nvidia-smi
-
+rapids-print-env
 EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 8b45d26c367..bd5e6c3d569 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -63,7 +63,7 @@ dependencies:
 - openpyxl
 - packaging
 - pandas
-- pandas>=2.0,<2.2.3dev0
+- pandas>=2.0,<2.2.4dev0
 - pandoc
 - polars>=1.8,<1.9
 - pre-commit
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 354c1360e5a..565a3ebfa3c 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -61,7 +61,7 @@ dependencies:
 - openpyxl
 - packaging
 - pandas
-- pandas>=2.0,<2.2.3dev0
+- pandas>=2.0,<2.2.4dev0
 - pandoc
 - polars>=1.8,<1.9
 - pre-commit
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 25e69b89789..2c254415318 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -78,7 +78,7 @@ requirements:
   run:
     - python
     - typing_extensions >=4.0.0
-    - pandas >=2.0,<2.2.3dev0
+    - pandas >=2.0,<2.2.4dev0
     - cupy >=12.0.0
     - numba-cuda >=0.0.13
     - numpy >=1.23,<3.0a0
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
index 7c1efa0176c..3d965f30986 100644
--- a/conda/recipes/pylibcudf/meta.yaml
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -77,7 +77,7 @@ requirements:
   run:
     - python
     - typing_extensions >=4.0.0
-    - pandas >=2.0,<2.2.3dev0
+    - pandas >=2.0,<2.2.4dev0
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<18.0.0a0
     - {{ pin_compatible('rmm', max_pin='x.x') }}
diff --git a/dependencies.yaml b/dependencies.yaml
index b192158c4ea..3561b22965d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -602,7 +602,7 @@ dependencies:
         packages:
           - fsspec>=0.6.0
           - &numpy numpy>=1.23,<3.0a0
-          - pandas>=2.0,<2.2.3dev0
+          - pandas>=2.0,<2.2.4dev0
   run_pylibcudf:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -748,6 +748,10 @@ dependencies:
             packages:
               - *numba-cuda-dep
               - pandas==2.0.*
+          - matrix: {dependencies: "latest"}
+            packages:
+              - numba-cuda==0.0.15
+              - pandas==2.2.3
           - matrix:
             packages:
       - output_types: conda
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index e2bdecbe67a..871ffc6269d 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -3,7 +3,7 @@
 import pandas as pd
 from packaging import version
 
-PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.2.2")
+PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.2.3")
 PANDAS_VERSION = version.parse(pd.__version__)
 
 
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 605f9be5a49..1b730ffd13c 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -28,7 +28,7 @@ dependencies = [
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
     "packaging",
-    "pandas>=2.0,<2.2.3dev0",
+    "pandas>=2.0,<2.2.4dev0",
     "ptxcompiler",
     "pyarrow>=14.0.0,<18.0.0a0",
     "pylibcudf==24.12.*,>=0.0.0a0",
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 76e47b50c3b..ce825c7647b 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<3.0a0",
-    "pandas>=2.0,<2.2.3dev0",
+    "pandas>=2.0,<2.2.4dev0",
     "rapids-dask-dependency==24.12.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [

From 33b8dfa42ff9a600adfa6d10c7740169a0340338 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 15:30:19 -1000
Subject: [PATCH 20/25] Add string.convert.convert_ipv4 APIs to pylibcudf
 (#16994)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16994
---
 python/cudf/cudf/_lib/string_casting.pyx      | 42 +++------
 .../libcudf/strings/convert/convert_ipv4.pxd  |  6 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |  2 +-
 .../pylibcudf/strings/convert/__init__.pxd    |  1 +
 .../pylibcudf/strings/convert/__init__.py     |  1 +
 .../strings/convert/convert_ipv4.pxd          | 10 ++
 .../strings/convert/convert_ipv4.pyx          | 92 +++++++++++++++++++
 .../tests/test_string_convert_ipv4.py         | 31 +++++++
 8 files changed, 151 insertions(+), 34 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index fe19379bf93..76c862a8657 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -22,11 +22,6 @@ from pylibcudf.libcudf.strings.convert.convert_integers cimport (
     is_hex as cpp_is_hex,
     to_integers as cpp_to_integers,
 )
-from pylibcudf.libcudf.strings.convert.convert_ipv4 cimport (
-    integers_to_ipv4 as cpp_integers_to_ipv4,
-    ipv4_to_integers as cpp_ipv4_to_integers,
-    is_ipv4 as cpp_is_ipv4,
-)
 from pylibcudf.libcudf.types cimport data_type, type_id
 
 from cudf._lib.types cimport underlying_type_t_type_id
@@ -569,14 +564,10 @@ def int2ip(Column input_col):
     A Column with integer represented in string ipv4 format
 
     """
-
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_integers_to_ipv4(input_column_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_ipv4.integers_to_ipv4(
+        input_col.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def ip2int(Column input_col):
@@ -592,14 +583,10 @@ def ip2int(Column input_col):
     A Column with ipv4 represented as integer
 
     """
-
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_ipv4_to_integers(input_column_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_ipv4.ipv4_to_integers(
+        input_col.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def is_ipv4(Column source_strings):
@@ -608,15 +595,10 @@ def is_ipv4(Column source_strings):
     that have strings in IPv4 format. This format is nnn.nnn.nnn.nnn
     where nnn is integer digits in [0,255].
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_is_ipv4(
-            source_view
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_ipv4.is_ipv4(
+        source_strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def htoi(Column input_col, **kwargs):
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
index fe571cfced6..801db438e92 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
@@ -8,11 +8,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] ipv4_to_integers(
-        column_view input_col) except +
+        column_view input) except +
 
     cdef unique_ptr[column] integers_to_ipv4(
-        column_view input_col) except +
+        column_view integers) except +
 
     cdef unique_ptr[column] is_ipv4(
-        column_view source_strings
+        column_view input
     ) except +
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index fe8da975566..eb0d6ee6999 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
-                   convert_fixed_point.pyx
+                   convert_fixed_point.pyx convert_ipv4.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index 36abf463371..431beed8e5d 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -4,4 +4,5 @@ from . cimport (
     convert_datetime,
     convert_durations,
     convert_fixed_point,
+    convert_ipv4,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index c0be4093836..a601b562c2e 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -4,4 +4,5 @@
     convert_datetime,
     convert_durations,
     convert_fixed_point,
+    convert_ipv4,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
new file mode 100644
index 00000000000..c61f5c0bdca
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+
+
+cpdef Column ipv4_to_integers(Column input)
+
+cpdef Column integers_to_ipv4(Column integers)
+
+cpdef Column is_ipv4(Column input)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
new file mode 100644
index 00000000000..f2a980d4269
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
@@ -0,0 +1,92 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4
+
+
+cpdef Column ipv4_to_integers(Column input):
+    """
+    Converts IPv4 addresses into integers.
+
+    For details, see cpp:func:`cudf::strings::ipv4_to_integers`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation
+
+    Returns
+    -------
+    Column
+        New uint32 column converted from strings.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_ipv4.ipv4_to_integers(
+                input.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column integers_to_ipv4(Column integers):
+    """
+    Converts integers into IPv4 addresses as strings.
+
+    For details, see cpp:func:`cudf::strings::integers_to_ipv4`
+
+    Parameters
+    ----------
+    integers : Column
+        Integer (uint32) column to convert.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_ipv4.integers_to_ipv4(
+                integers.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column is_ipv4(Column input):
+    """
+    Returns a boolean column identifying strings in which all
+    characters are valid for conversion to integers from IPv4 format.
+
+    For details, see cpp:func:`cudf::strings::is_ipv4`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    Returns
+    -------
+    Column
+        New column of boolean results for each string.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_ipv4.is_ipv4(
+                input.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py
new file mode 100644
index 00000000000..4dc3e512624
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_ipv4_to_integers():
+    arr = pa.array(["123.45.67.890", None])
+    result = plc.strings.convert.convert_ipv4.ipv4_to_integers(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array([2066564730, None], type=pa.uint32())
+    assert_column_eq(result, expected)
+
+
+def test_integers_to_ipv4():
+    arr = pa.array([1, 0, None], type=pa.uint32())
+    result = plc.strings.convert.convert_ipv4.integers_to_ipv4(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array(["0.0.0.1", "0.0.0.0", None])
+    assert_column_eq(result, expected)
+
+
+def test_is_ipv4():
+    arr = pa.array(["0.0.0.1", "1.2.34", "A", None])
+    result = plc.strings.convert.convert_ipv4.is_ipv4(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array([True, False, False, None])
+    assert_column_eq(result, expected)

From fcff2b6ef7d6db62fc064ad10ffc6c873fc85b58 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Sat, 5 Oct 2024 02:52:53 -0500
Subject: [PATCH 21/25] Fix write_json to handle empty string column (#16995)

Add empty string column condition for write_json
bypass make_strings_children for empty column because when grid size is zero, it throws cuda error.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/16995
---
 cpp/src/io/json/write_json.cu     |  3 +++
 cpp/tests/io/json/json_writer.cpp | 37 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index dc7199d7ab1..e1241f8f90c 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -170,6 +170,9 @@ struct escape_strings_fn {
                                               rmm::cuda_stream_view stream,
                                               rmm::device_async_resource_ref mr)
   {
+    if (column_v.is_empty()) {  // empty begets empty
+      return make_empty_column(type_id::STRING);
+    }
     auto [offsets_column, chars] =
       cudf::strings::detail::make_strings_children(*this, column_v.size(), stream, mr);
 
diff --git a/cpp/tests/io/json/json_writer.cpp b/cpp/tests/io/json/json_writer.cpp
index 2c4e29a01b9..39d31c406a5 100644
--- a/cpp/tests/io/json/json_writer.cpp
+++ b/cpp/tests/io/json/json_writer.cpp
@@ -70,6 +70,43 @@ TEST_F(JsonWriterTest, EmptyInput)
   EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
 }
 
+TEST_F(JsonWriterTest, EmptyLeaf)
+{
+  cudf::test::strings_column_wrapper col1{""};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 0};
+  auto col2 = make_lists_column(1,
+                                offsets.release(),
+                                cudf::test::strings_column_wrapper{}.release(),
+                                0,
+                                rmm::device_buffer{},
+                                cudf::test::get_default_stream());
+  auto col3 = cudf::test::lists_column_wrapper<int>::make_one_empty_row_column();
+  cudf::table_view tbl_view{{col1, *col2, col3}};
+  cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"col3"}}};
+
+  std::vector<char> out_buffer;
+  auto destination = cudf::io::sink_info(&out_buffer);
+  auto out_options = cudf::io::json_writer_options_builder(destination, tbl_view)
+                       .include_nulls(true)
+                       .metadata(mt)
+                       .lines(false)
+                       .na_rep("null")
+                       .build();
+
+  // Empty columns in table
+  cudf::io::write_json(out_options, cudf::test::get_default_stream());
+  std::string const expected = R"([{"col1":"","col2":[],"col3":[]}])";
+  EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size()));
+
+  // Empty columns in table - JSON Lines
+  out_buffer.clear();
+  out_options.enable_lines(true);
+  cudf::io::write_json(out_options, cudf::test::get_default_stream());
+  std::string const expected_lines = R"({"col1":"","col2":[],"col3":[]})"
+                                     "\n";
+  EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
+}
+
 TEST_F(JsonWriterTest, ErrorCases)
 {
   cudf::test::strings_column_wrapper col1{"a", "b", "c"};

From bfd568b4f5c4dd9799b60a2975c1fd183e9b99aa Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:04:24 -0400
Subject: [PATCH 22/25] Remove unused import (#17005)

This PR removes an unused unused import in cudf which was causing errors in doc builds.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17005
---
 python/cudf/cudf/_lib/string_casting.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 76c862a8657..d9595f4ab0a 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -6,7 +6,6 @@ from cudf._lib.scalar import as_device_scalar
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 
 from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from pylibcudf.libcudf.column.column cimport column

From f926a61c7d31b7b33c3a3482507e9efb44b2cc36 Mon Sep 17 00:00:00 2001
From: Ben Jarmak <104460670+jarmak-nv@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:37:55 -0400
Subject: [PATCH 23/25] Add release tracking to project automation scripts
 (#17001)

This PR adds two new jobs to the project automations. One to extract the version number from the branch name, and one to set the project `Release` field to the version found.

Authors:
  - Ben Jarmak (https://github.com/jarmak-nv)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17001
---
 .../workflows/pr_issue_status_automation.yml  | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml
index af8d1289ea1..6f0e88fb245 100644
--- a/.github/workflows/pr_issue_status_automation.yml
+++ b/.github/workflows/pr_issue_status_automation.yml
@@ -62,3 +62,33 @@ jobs:
         UPDATE_ITEM: true
         UPDATE_LINKED_ISSUES: true
       secrets: inherit
+
+    process-branch-name:
+      if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
+      needs: get-project-id
+      runs-on: ubuntu-latest
+      outputs:
+        branch-name: ${{ steps.process-branch-name.outputs.branch-name }}
+      steps:
+        - name: Extract branch name
+          id: process-branch-name
+          run: |
+            branch=${{ github.event.pull_request.base.ref }}
+            release=${branch#branch-}
+            echo "branch-name=$release" >> "$GITHUB_OUTPUT"
+
+    update-release:
+      # This job sets the PR and its linked issues to the release they are targeting
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.12
+      if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
+      needs: [get-project-id, process-branch-name]
+      with:
+        PROJECT_ID: "PVT_kwDOAp2shc4AiNzl"
+        SINGLE_SELECT_FIELD_ID: "PVTSSF_lADOAp2shc4AiNzlzgg52UQ"
+        SINGLE_SELECT_FIELD_NAME: "Release"
+        SINGLE_SELECT_OPTION_VALUE: "${{ needs.process-branch-name.outputs.branch-name }}"
+        ITEM_PROJECT_ID: "${{ needs.get-project-id.outputs.ITEM_PROJECT_ID }}"
+        ITEM_NODE_ID: "${{ github.event.pull_request.node_id }}"
+        UPDATE_ITEM: true
+        UPDATE_LINKED_ISSUES: true
+      secrets: inherit

From 7e1e4757e753253a99df110fd3814d0136289ef2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 7 Oct 2024 12:41:47 -0700
Subject: [PATCH 24/25] Address all remaining clang-tidy errors (#16956)

With this set of changes I get a clean run of clang-tidy (with one caveat that I'll explain in the follow-up PR to add clang-tidy to pre-commit/CI).

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)
  - David Wendt (https://github.com/davidwendt)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/16956
---
 cpp/.clang-tidy                               | 43 ++++++++++++---
 cpp/cmake/thirdparty/get_nanoarrow.cmake      |  8 +--
 .../nanoarrow_clang_tidy_compliance.diff      | 38 ++++++++++++++
 .../patches/nanoarrow_override.json           | 18 +++++++
 cpp/include/cudf/table/table.hpp              |  2 +-
 cpp/include/cudf/table/table_view.hpp         |  2 +-
 cpp/src/io/avro/avro.cpp                      |  3 +-
 cpp/src/io/orc/orc.hpp                        |  2 +-
 .../io/parquet/compact_protocol_reader.cpp    |  6 ++-
 cpp/src/io/utilities/data_sink.cpp            |  6 ++-
 cpp/src/io/utilities/hostdevice_span.hpp      |  8 +--
 cpp/src/utilities/host_memory.cpp             | 13 ++++-
 cpp/tests/binaryop/binop-compiled-test.cpp    |  6 ++-
 cpp/tests/binaryop/util/operation.h           |  4 +-
 cpp/tests/column/column_test.cpp              |  4 +-
 cpp/tests/copying/slice_tests.cpp             | 12 +++--
 cpp/tests/copying/slice_tests.cuh             | 21 ++++----
 cpp/tests/copying/split_tests.cpp             | 52 ++++++++++---------
 .../hashing/murmurhash3_x64_128_test.cpp      |  4 +-
 cpp/tests/hashing/sha256_test.cpp             |  2 -
 cpp/tests/interop/from_arrow_device_test.cpp  | 12 ++---
 cpp/tests/interop/from_arrow_host_test.cpp    |  6 +--
 cpp/tests/interop/from_arrow_test.cpp         |  8 +--
 cpp/tests/interop/to_arrow_device_test.cpp    | 12 ++---
 cpp/tests/interop/to_arrow_host_test.cpp      |  6 +--
 cpp/tests/interop/to_arrow_test.cpp           | 14 ++---
 cpp/tests/io/comp/decomp_test.cpp             | 36 ++++++++-----
 cpp/tests/io/csv_test.cpp                     | 12 ++---
 cpp/tests/io/json/json_test.cpp               |  6 +--
 cpp/tests/io/orc_test.cpp                     | 37 ++++++-------
 cpp/tests/io/parquet_misc_test.cpp            |  2 +-
 cpp/tests/io/parquet_reader_test.cpp          |  7 +--
 cpp/tests/io/parquet_v2_test.cpp              | 36 ++++++-------
 cpp/tests/io/parquet_writer_test.cpp          | 17 +++---
 cpp/tests/join/distinct_join_tests.cpp        | 10 ++--
 cpp/tests/merge/merge_string_test.cpp         |  4 +-
 cpp/tests/merge/merge_test.cpp                |  6 +--
 .../reductions/segmented_reduction_tests.cpp  |  9 ++--
 cpp/tests/replace/replace_tests.cpp           |  4 +-
 cpp/tests/rolling/collect_ops_test.cpp        |  8 +--
 cpp/tests/rolling/offset_row_window_test.cpp  | 12 +++--
 cpp/tests/rolling/rolling_test.cpp            |  2 +-
 cpp/tests/scalar/scalar_test.cpp              |  6 +--
 cpp/tests/search/search_list_test.cpp         |  3 +-
 cpp/tests/sort/sort_test.cpp                  |  2 +-
 cpp/tests/stream_compaction/unique_tests.cpp  |  1 -
 cpp/tests/streams/stream_compaction_test.cpp  |  2 -
 cpp/tests/strings/integers_tests.cpp          |  3 +-
 cpp/tests/structs/structs_column_tests.cpp    |  5 +-
 cpp/tests/transform/bools_to_mask_test.cpp    |  2 +-
 .../integration/unary_transform_test.cpp      | 28 +++++-----
 51 files changed, 344 insertions(+), 228 deletions(-)
 create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
 create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_override.json

diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index b791d846d1d..2d4f8c0d80e 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -1,18 +1,47 @@
 ---
+# Notes on disabled checks
+# ------------------------
+# modernize-use-equals-default:
+#     auto-fix is broken (doesn't insert =default correctly)
+# modernize-concat-nested-namespaces:
+#     auto-fix is broken (can delete code)
+# modernize-use-trailing-return-type:
+#     Purely stylistic, no benefit to rewriting everything
+# modernize-return-braced-init-list:
+#     Stylistically we prefer to see the return type at the return site.
+#     See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
+#     for more information.
+# modernize-use-bool-literals:
+#     Our tests use int flags for validity masks extensively and we prefer that
+# clang-analyzer-cplusplus.NewDeleteLeaks:
+#     This check has numerous bugs, see
+#     https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
+#     We encounter at least
+#     https://github.com/llvm/llvm-project/issues/60896
+#     https://github.com/llvm/llvm-project/issues/69602
+# clang-analyzer-optin.core.EnumCastOutOfRange
+#     We use enums as flags in multiple cases and this check makes ORing flags invalid
+# clang-analyzer-optin.cplusplus.UninitializedObject'
+#     There is an error in nanoarrow that none of the clang-tidy filters (i.e.
+#     header-filter and exclude-header-filter are able to properly avoid. This
+#     merits further investigation
+#
+# We need to verify that broken checks are still broken
 Checks:
       'modernize-*,
        -modernize-use-equals-default,
        -modernize-concat-nested-namespaces,
        -modernize-use-trailing-return-type,
-       -modernize-use-bool-literals'
-
-      # -modernize-use-equals-default        # auto-fix is broken (doesn't insert =default correctly)
-      # -modernize-concat-nested-namespaces  # auto-fix is broken (can delete code)
-      # -modernize-use-trailing-return-type  # just a preference
+       -modernize-return-braced-init-list,
+       -modernize-use-bool-literals,
+       clang-analyzer-*,
+       -clang-analyzer-cplusplus.NewDeleteLeaks,
+       -clang-analyzer-optin.core.EnumCastOutOfRange,
+       -clang-analyzer-optin.cplusplus.UninitializedObject'
 
 WarningsAsErrors: ''
-HeaderFilterRegex: ''
-AnalyzeTemporaryDtors: false
+HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
+ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
 FormatStyle:     none
 CheckOptions:
  - key:             modernize-loop-convert.MaxCopySize
diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake
index 8df1b431095..d7d7fcca044 100644
--- a/cpp/cmake/thirdparty/get_nanoarrow.cmake
+++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake
@@ -14,15 +14,17 @@
 
 # This function finds nanoarrow and sets any additional necessary environment variables.
 function(find_and_configure_nanoarrow)
+  include(${rapids-cmake-dir}/cpm/package_override.cmake)
+
+  set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
+  rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")
+
   # Currently we need to always build nanoarrow so we don't pickup a previous installed version
   set(CPM_DOWNLOAD_nanoarrow ON)
   rapids_cpm_find(
     nanoarrow 0.6.0.dev
     GLOBAL_TARGETS nanoarrow
     CPM_ARGS
-    GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
-    GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
-    GIT_SHALLOW FALSE
     OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
   )
   set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
new file mode 100644
index 00000000000..e9a36fcb567
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
@@ -0,0 +1,38 @@
+diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h
+index caa6be4..70ec8a2 100644
+--- a/src/nanoarrow/common/inline_buffer.h
++++ b/src/nanoarrow/common/inline_buffer.h
+@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
+ }
+ 
+ static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
+-  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) |
++  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT
+                    ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) |
+                    ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) |
+                    ((values[7] + 0x7f) & 0x80));
+@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
+     // set bits within a single byte
+     const uint8_t only_byte_mask =
+         i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
+-    bits[bytes_begin] &= only_byte_mask;
++    bits[bytes_begin] &= only_byte_mask;  // NOLINT
+     bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
+     return;
+   }
+ 
+   // set/clear trailing bits of first byte
+-  bits[bytes_begin] &= first_byte_mask;
++  bits[bytes_begin] &= first_byte_mask;  // NOLINT
+   bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
+ 
+   if (bytes_end - bytes_begin > 2) {
+@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+   n_remaining -= n_full_bytes * 8;
+   if (n_remaining > 0) {
+     // Zero out the last byte
+-    *out_cursor = 0x00;
++    *out_cursor = 0x00;  // NOLINT
+     for (int i = 0; i < n_remaining; i++) {
+       ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+     }
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
new file mode 100644
index 00000000000..d529787e7c8
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
@@ -0,0 +1,18 @@
+
+{
+  "packages" : {
+    "nanoarrow" : {
+      "version" : "0.6.0.dev",
+      "git_url" : "https://github.com/apache/arrow-nanoarrow.git",
+      "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
+      "git_shallow" : false,
+      "patches" : [
+        {
+          "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
+          "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
+          "fixed_in" : ""
+        }
+      ]
+    }
+  }
+}
diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp
index 762131a174f..15fdad21d9f 100644
--- a/cpp/include/cudf/table/table.hpp
+++ b/cpp/include/cudf/table/table.hpp
@@ -148,7 +148,7 @@ class table {
     std::vector<column_view> columns(std::distance(begin, end));
     std::transform(
       begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); });
-    return table_view(columns);
+    return table_view{columns};
   }
 
   /**
diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp
index 4a990f67ce4..d41176590ea 100644
--- a/cpp/include/cudf/table/table_view.hpp
+++ b/cpp/include/cudf/table/table_view.hpp
@@ -241,7 +241,7 @@ class table_view : public detail::table_view_base<column_view> {
   {
     std::vector<column_view> columns(std::distance(begin, end));
     std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); });
-    return table_view(columns);
+    return table_view{columns};
   }
 
   /**
diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp
index 03cf6d4a0e0..d5caa4720ac 100644
--- a/cpp/src/io/avro/avro.cpp
+++ b/cpp/src/io/avro/avro.cpp
@@ -16,6 +16,7 @@
 
 #include "avro.hpp"
 
+#include <array>
 #include <cstring>
 #include <unordered_map>
 
@@ -302,7 +303,7 @@ bool schema_parser::parse(std::vector<schema_entry>& schema, std::string const&
   // Empty schema
   if (json_str == "[]") return true;
 
-  char depthbuf[MAX_SCHEMA_DEPTH];
+  std::array<char, MAX_SCHEMA_DEPTH> depthbuf;
   int depth = 0, parent_idx = -1, entry_idx = -1;
   json_state_e state = state_attrname;
   std::string str;
diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp
index 790532c9d54..5ab36fdae8e 100644
--- a/cpp/src/io/orc/orc.hpp
+++ b/cpp/src/io/orc/orc.hpp
@@ -258,7 +258,7 @@ class ProtobufReader {
 
  private:
   template <int index>
-  friend class FunctionSwitchImpl;
+  friend struct FunctionSwitchImpl;
 
   void skip_bytes(size_t bytecnt)
   {
diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index b978799b8bc..312a5243687 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -228,7 +228,8 @@ class parquet_field_string : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * string fails
  */
-struct parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
+class parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
+ public:
   parquet_field_string_list(int f, std::vector<std::string>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
@@ -396,8 +397,9 @@ class parquet_field_binary : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * binary fails
  */
-struct parquet_field_binary_list
+class parquet_field_binary_list
   : public parquet_field_list<std::vector<uint8_t>, FieldType::BINARY> {
+ public:
   parquet_field_binary_list(int f, std::vector<std::vector<uint8_t>>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp
index 1dbb9369115..0b76f3d3e8f 100644
--- a/cpp/src/io/utilities/data_sink.cpp
+++ b/cpp/src/io/utilities/data_sink.cpp
@@ -50,7 +50,8 @@ class file_sink : public data_sink {
     }
   }
 
-  ~file_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~file_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
@@ -114,7 +115,8 @@ class host_buffer_sink : public data_sink {
  public:
   explicit host_buffer_sink(std::vector<char>* buffer) : buffer_(buffer) {}
 
-  ~host_buffer_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~host_buffer_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
diff --git a/cpp/src/io/utilities/hostdevice_span.hpp b/cpp/src/io/utilities/hostdevice_span.hpp
index d9eac423901..1d8b34addbd 100644
--- a/cpp/src/io/utilities/hostdevice_span.hpp
+++ b/cpp/src/io/utilities/hostdevice_span.hpp
@@ -43,8 +43,8 @@ class hostdevice_span {
   template <typename C,
             // Only supported containers of types convertible to T
             std::enable_if_t<std::is_convertible_v<
-              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
-              T (*)[]>>* = nullptr>
+              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],  // NOLINT
+              T (*)[]>>* = nullptr>                                                  // NOLINT
   constexpr hostdevice_span(C& in) : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
   {
   }
@@ -54,8 +54,8 @@ class hostdevice_span {
   template <typename C,
             // Only supported containers of types convertible to T
             std::enable_if_t<std::is_convertible_v<
-              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
-              T (*)[]>>* = nullptr>
+              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],  // NOLINT
+              T (*)[]>>* = nullptr>                                                  // NOLINT
   constexpr hostdevice_span(C const& in)
     : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
   {
diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp
index 125b98c4a67..9d8e3cf2fa6 100644
--- a/cpp/src/utilities/host_memory.cpp
+++ b/cpp/src/utilities/host_memory.cpp
@@ -115,12 +115,19 @@ class fixed_pinned_pool_memory_resource {
     return !operator==(other);
   }
 
-  friend void get_property(fixed_pinned_pool_memory_resource const&,
+  // clang-tidy will complain about this function because it is completely
+  // unused at runtime and only exist for tag introspection by CCCL, so we
+  // ignore linting. This masks a real issue if we ever want to compile with
+  // clang, though, which is that the function will actually be compiled out by
+  // clang. If cudf were ever to try to support clang as a compile we would
+  // need to force the compiler to emit this symbol. The same goes for the
+  // other get_property definitions in this file.
+  friend void get_property(fixed_pinned_pool_memory_resource const&,  // NOLINT
                            cuda::mr::device_accessible) noexcept
   {
   }
 
-  friend void get_property(fixed_pinned_pool_memory_resource const&,
+  friend void get_property(fixed_pinned_pool_memory_resource const&,  // NOLINT
                            cuda::mr::host_accessible) noexcept
   {
   }
@@ -235,7 +242,9 @@ class new_delete_memory_resource {
 
   bool operator!=(new_delete_memory_resource const& other) const { return !operator==(other); }
 
+  // NOLINTBEGIN
   friend void get_property(new_delete_memory_resource const&, cuda::mr::host_accessible) noexcept {}
+  // NOLINTEND
 };
 
 static_assert(cuda::mr::resource_with<new_delete_memory_resource, cuda::mr::host_accessible>,
diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp
index 06e0d193d80..aa5b49567e6 100644
--- a/cpp/tests/binaryop/binop-compiled-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-test.cpp
@@ -557,7 +557,11 @@ auto NullOp_Result(cudf::column_view lhs, cudf::column_view rhs)
   std::transform(thrust::make_counting_iterator(0),
                  thrust::make_counting_iterator(lhs.size()),
                  result.begin(),
-                 [&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut {
+                 [&lhs_data    = lhs_data,
+                  &lhs_mask    = lhs_mask,
+                  &rhs_data    = rhs_data,
+                  &rhs_mask    = rhs_mask,
+                  &result_mask = result_mask](auto i) -> TypeOut {
                    auto lhs_valid    = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i);
                    auto rhs_valid    = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i);
                    bool output_valid = lhs_valid or rhs_valid;
diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h
index d36b48d666a..ef1ccfccab5 100644
--- a/cpp/tests/binaryop/util/operation.h
+++ b/cpp/tests/binaryop/util/operation.h
@@ -100,7 +100,7 @@ struct Mul {
             std::enable_if_t<(cudf::is_duration_t<LhsT>::value && std::is_integral_v<RhsT>) ||
                                (cudf::is_duration_t<RhsT>::value && std::is_integral_v<LhsT>),
                              void>* = nullptr>
-  OutT DurationProduct(LhsT x, RhsT y) const
+  [[nodiscard]] OutT DurationProduct(LhsT x, RhsT y) const
   {
     return x * y;
   }
@@ -128,7 +128,7 @@ struct Div {
     typename LhsT,
     typename RhsT,
     std::enable_if_t<(std::is_integral_v<RhsT> || cudf::is_duration<RhsT>()), void>* = nullptr>
-  OutT DurationDivide(LhsT x, RhsT y) const
+  [[nodiscard]] OutT DurationDivide(LhsT x, RhsT y) const
   {
     return x / y;
   }
diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp
index 14b4197de71..631f5150829 100644
--- a/cpp/tests/column/column_test.cpp
+++ b/cpp/tests/column/column_test.cpp
@@ -340,7 +340,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorNoMask)
 
   cudf::column moved_to{std::move(original)};
 
-  EXPECT_EQ(0, original.size());
+  EXPECT_EQ(0, original.size());  // NOLINT
   EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());
 
   verify_column_views(moved_to);
@@ -359,7 +359,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask)
   cudf::column moved_to{std::move(original)};
   verify_column_views(moved_to);
 
-  EXPECT_EQ(0, original.size());
+  EXPECT_EQ(0, original.size());  // NOLINT
   EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());
 
   // Verify move
diff --git a/cpp/tests/copying/slice_tests.cpp b/cpp/tests/copying/slice_tests.cpp
index bebd3d25610..aef0d4ad78a 100644
--- a/cpp/tests/copying/slice_tests.cpp
+++ b/cpp/tests/copying/slice_tests.cpp
@@ -29,6 +29,7 @@
 #include <cudf/utilities/type_dispatcher.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
+#include <array>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -370,11 +371,12 @@ TEST_F(SliceStringTableTest, StringWithNulls)
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
diff --git a/cpp/tests/copying/slice_tests.cuh b/cpp/tests/copying/slice_tests.cuh
index a180740f143..1e037294527 100644
--- a/cpp/tests/copying/slice_tests.cuh
+++ b/cpp/tests/copying/slice_tests.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -148,7 +148,7 @@ std::vector<cudf::table> create_expected_tables(cudf::size_type num_cols,
       }
     }
 
-    result.push_back(cudf::table(std::move(cols)));
+    result.emplace_back(std::move(cols));
   }
 
   return result;
@@ -163,13 +163,12 @@ inline std::vector<cudf::test::strings_column_wrapper> create_expected_string_co
 
   for (unsigned long index = 0; index < indices.size(); index += 2) {
     if (not nullable) {
-      result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index],
-                                                          strings.begin() + indices[index + 1]));
+      result.emplace_back(strings.begin() + indices[index], strings.begin() + indices[index + 1]);
     } else {
       auto valids = cudf::detail::make_counting_transform_iterator(
         indices[index], [](auto i) { return i % 2 == 0; });
-      result.push_back(cudf::test::strings_column_wrapper(
-        strings.begin() + indices[index], strings.begin() + indices[index + 1], valids));
+      result.emplace_back(
+        strings.begin() + indices[index], strings.begin() + indices[index + 1], valids);
     }
   }
 
@@ -184,16 +183,16 @@ inline std::vector<cudf::test::strings_column_wrapper> create_expected_string_co
   std::vector<cudf::test::strings_column_wrapper> result = {};
 
   for (unsigned long index = 0; index < indices.size(); index += 2) {
-    result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index],
-                                                        strings.begin() + indices[index + 1],
-                                                        validity.begin() + indices[index]));
+    result.emplace_back(strings.begin() + indices[index],
+                        strings.begin() + indices[index + 1],
+                        validity.begin() + indices[index]);
   }
 
   return result;
 }
 
 inline std::vector<cudf::table> create_expected_string_tables(
-  std::vector<std::string> const strings[2],
+  std::vector<std::vector<std::string>> const strings,
   std::vector<cudf::size_type> const& indices,
   bool nullable)
 {
@@ -216,7 +215,7 @@ inline std::vector<cudf::table> create_expected_string_tables(
       }
     }
 
-    result.push_back(cudf::table(std::move(cols)));
+    result.emplace_back(std::move(cols));
   }
 
   return result;
diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp
index ee3e7da5e0f..b56b0f2d3f8 100644
--- a/cpp/tests/copying/split_tests.cpp
+++ b/cpp/tests/copying/split_tests.cpp
@@ -35,6 +35,7 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -135,7 +136,7 @@ std::vector<cudf::table> create_expected_tables_for_splits(
 }
 
 std::vector<cudf::table> create_expected_string_tables_for_splits(
-  std::vector<std::string> const strings[2],
+  std::vector<std::vector<std::string>> const strings,
   std::vector<cudf::size_type> const& splits,
   bool nullable)
 {
@@ -144,8 +145,8 @@ std::vector<cudf::table> create_expected_string_tables_for_splits(
 }
 
 std::vector<cudf::table> create_expected_string_tables_for_splits(
-  std::vector<std::string> const strings[2],
-  std::vector<bool> const validity[2],
+  std::vector<std::vector<std::string>> const strings,
+  std::vector<std::vector<bool>> const validity,
   std::vector<cudf::size_type> const& splits)
 {
   std::vector<cudf::size_type> indices = splits_to_indices(splits, strings[0].size());
@@ -627,11 +628,12 @@ void split_string_with_invalids(SplitFunc Split,
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
@@ -658,11 +660,12 @@ void split_empty_output_strings_column_value(SplitFunc Split,
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
@@ -684,9 +687,9 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare)
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
 
   std::vector<cudf::size_type> splits{2, 5, 9};
 
@@ -699,16 +702,17 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare)
     EXPECT_NO_THROW(Split(empty_table, splits));
   }
 
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), no_valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), no_valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
   scols.push_back(sw[1].release());
   cudf::table src_table(std::move(scols));
   auto result = Split(src_table, splits);
 
-  std::vector<bool> validity_masks[2] = {std::vector<bool>(strings[0].size()),
-                                         std::vector<bool>(strings[0].size())};
+  std::vector<std::vector<bool>> validity_masks{std::vector<bool>(strings[0].size()),
+                                                std::vector<bool>(strings[0].size())};
   std::generate(
     validity_masks[1].begin(), validity_masks[1].end(), [i = 0]() mutable { return i++ % 2 == 0; });
 
@@ -1913,9 +1917,9 @@ TEST_F(ContiguousSplitTableCornerCases, MixedColumnTypes)
   cudf::size_type start = 0;
   auto valids = cudf::detail::make_counting_transform_iterator(start, [](auto i) { return true; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
 
   std::vector<std::unique_ptr<cudf::column>> cols;
 
@@ -2377,7 +2381,7 @@ TEST_F(ContiguousSplitTableCornerCases, OutBufferToSmall)
 {
   // internally, contiguous split chunks GPU work in 1MB contiguous copies
   // so the output buffer must be 1MB or larger.
-  EXPECT_THROW(cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error);
+  EXPECT_THROW(auto _ = cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error);
 }
 
 TEST_F(ContiguousSplitTableCornerCases, ChunkSpanTooSmall)
diff --git a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
index 4fb8f78b558..0e68050f935 100644
--- a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
+++ b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,6 @@
 
 #include <cudf/hashing.hpp>
 
-constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
-
 using NumericTypesNoBools =
   cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
 
diff --git a/cpp/tests/hashing/sha256_test.cpp b/cpp/tests/hashing/sha256_test.cpp
index cc95c7a2f0f..8bc47c92c6b 100644
--- a/cpp/tests/hashing/sha256_test.cpp
+++ b/cpp/tests/hashing/sha256_test.cpp
@@ -23,8 +23,6 @@
 #include <cudf/hashing.hpp>
 #include <cudf/utilities/error.hpp>
 
-constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
-
 class SHA256HashTest : public cudf::test::BaseFixture {};
 
 TEST_F(SHA256HashTest, EmptyTable)
diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp
index a4dc7531765..2151ec6e22f 100644
--- a/cpp/tests/interop/from_arrow_device_test.cpp
+++ b/cpp/tests/interop/from_arrow_device_test.cpp
@@ -270,9 +270,9 @@ TEST_F(FromArrowDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
@@ -414,9 +414,9 @@ TEST_F(FromArrowDeviceTest, DictionaryIndicesType)
 {
   std::vector<std::unique_ptr<cudf::column>> columns;
   auto col = cudf::test::fixed_width_column_wrapper<int64_t>({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
 
   cudf::table expected_table(std::move(columns));
   cudf::table_view expected_table_view = expected_table.view();
diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp
index cbfa4911c3c..ef9936b214c 100644
--- a/cpp/tests/interop/from_arrow_host_test.cpp
+++ b/cpp/tests/interop/from_arrow_host_test.cpp
@@ -309,9 +309,9 @@ TEST_F(FromArrowHostDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 81c406c0faf..6e742b9e4cf 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -52,7 +52,7 @@ std::unique_ptr<cudf::table> get_cudf_table()
                          .release());
   auto col4 = cudf::test::fixed_width_column_wrapper<int32_t>({1, 2, 5, 2, 7},
                                                               {true, false, true, true, true});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
                          {true, false, true, false, true}, {true, false, true, true, false})
                          .release());
@@ -339,9 +339,9 @@ TEST_F(FromArrowTest, DictionaryIndicesType)
   std::vector<std::unique_ptr<cudf::column>> columns;
   auto col = cudf::test::fixed_width_column_wrapper<int64_t>({1, 2, 5, 2, 7},
                                                              {true, false, true, true, true});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
 
   cudf::table expected_table(std::move(columns));
 
diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp
index 51216a8512c..7ba586461dc 100644
--- a/cpp/tests/interop/to_arrow_device_test.cpp
+++ b/cpp/tests/interop/to_arrow_device_test.cpp
@@ -55,7 +55,7 @@ get_nanoarrow_cudf_table(cudf::size_type length)
   auto col4 = cudf::test::fixed_width_column_wrapper<int64_t>(
     test_data.int64_data.begin(), test_data.int64_data.end(), test_data.validity.begin());
   auto dict_col = cudf::dictionary::encode(col4);
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(test_data.bool_data.begin(),
                                                                     test_data.bool_data.end(),
                                                                     test_data.bool_validity.begin())
@@ -82,8 +82,8 @@ get_nanoarrow_cudf_table(cudf::size_type length)
       test_data.string_data.begin(), test_data.string_data.end(), test_data.validity.begin())
       .release();
   vector_of_columns cols;
-  cols.push_back(move(int_column));
-  cols.push_back(move(str_column));
+  cols.push_back(std::move(int_column));
+  cols.push_back(std::move(str_column));
   auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper<bool>(
     test_data.bool_data_validity.begin(), test_data.bool_data_validity.end()));
   columns.emplace_back(
@@ -575,9 +575,9 @@ TEST_F(ToArrowDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp
index fc0ed6c9352..fcb4433b42e 100644
--- a/cpp/tests/interop/to_arrow_host_test.cpp
+++ b/cpp/tests/interop/to_arrow_host_test.cpp
@@ -436,9 +436,9 @@ TEST_F(ToArrowHostDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index 90ae12cdd90..a6aa4b22eca 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -90,7 +90,7 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
   auto col4 = cudf::test::fixed_width_column_wrapper<int64_t>(
     int64_data.begin(), int64_data.end(), validity.begin());
   auto dict_col = cudf::dictionary::encode(col4);
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
                          bool_data.begin(), bool_data.end(), bool_validity.begin())
                          .release());
@@ -112,8 +112,8 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
     cudf::test::strings_column_wrapper(string_data.begin(), string_data.end(), validity.begin())
       .release();
   vector_of_columns cols;
-  cols.push_back(move(int_column));
-  cols.push_back(move(str_column));
+  cols.push_back(std::move(int_column));
+  cols.push_back(std::move(str_column));
   auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper<bool>(
     bool_data_validity.begin(), bool_data_validity.end()));
   columns.emplace_back(
@@ -294,9 +294,9 @@ TEST_F(ToArrowTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
@@ -438,7 +438,7 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};  // NOLINT
     ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp
index 840cf263ed9..54262dc3b44 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/decomp_test.cpp
@@ -39,19 +39,19 @@ using cudf::device_span;
  */
 template <typename Decompressor>
 struct DecompressTest : public cudf::test::BaseFixture {
-  std::vector<uint8_t> vector_from_string(char const* str) const
+  [[nodiscard]] std::vector<uint8_t> vector_from_string(std::string const str) const
   {
-    return std::vector<uint8_t>(reinterpret_cast<uint8_t const*>(str),
-                                reinterpret_cast<uint8_t const*>(str) + strlen(str));
+    return {reinterpret_cast<uint8_t const*>(str.c_str()),
+            reinterpret_cast<uint8_t const*>(str.c_str()) + strlen(str.c_str())};
   }
 
-  void Decompress(std::vector<uint8_t>* decompressed,
+  void Decompress(std::vector<uint8_t>& decompressed,
                   uint8_t const* compressed,
                   size_t compressed_size)
   {
     auto stream = cudf::get_default_stream();
     rmm::device_buffer src{compressed, compressed_size, stream};
-    rmm::device_uvector<uint8_t> dst{decompressed->size(), stream};
+    rmm::device_uvector<uint8_t> dst{decompressed.size(), stream};
 
     cudf::detail::hostdevice_vector<device_span<uint8_t const>> inf_in(1, stream);
     inf_in[0] = {static_cast<uint8_t const*>(src.data()), src.size()};
@@ -67,7 +67,7 @@ struct DecompressTest : public cudf::test::BaseFixture {
 
     static_cast<Decompressor*>(this)->dispatch(inf_in, inf_out, inf_stat);
     CUDF_CUDA_TRY(cudaMemcpyAsync(
-      decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
+      decompressed.data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
     inf_stat.device_to_host_sync(stream);
     ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS);
   }
@@ -125,49 +125,57 @@ struct NvcompConfigTest : public cudf::test::BaseFixture {};
 
 TEST_F(GzipDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0x1f, 0x8b, 0x8,  0x0,  0x9,  0x63, 0x99, 0x5c, 0x2,  0xff, 0xcb, 0x48, 0xcd, 0xc9, 0xc9, 0x57,
     0x28, 0xcf, 0x2f, 0xca, 0x49, 0x1,  0x0,  0x85, 0x11, 0x4a, 0xd,  0xb,  0x0,  0x0,  0x0};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(SnappyDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0xb, 0x28, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(SnappyDecompressTest, ShortLiteralAfterLongCopyAtStartup)
 {
-  constexpr char uncompressed[]  = "Aaaaaaaaaaaah!";
+  std::string const uncompressed{"Aaaaaaaaaaaah!"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {14, 0x0, 'A', 0x0, 'a', (10 - 4) * 4 + 1, 1, 0x4, 'h', '!'};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(BrotliDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0xb, 0x5, 0x80, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x3};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 0028dd946e3..b265dcf9273 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -63,9 +63,9 @@ auto dtype()
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 using column     = cudf::column;
 using table      = cudf::table;
 using table_view = cudf::table_view;
@@ -954,7 +954,7 @@ TEST_F(CsvReaderTest, Strings)
   ASSERT_EQ(type_id::STRING, view.column(1).type().id());
 
   expect_column_data_equal(
-    std::vector<std::string>{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"},
+    std::vector<std::string>{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"},
     view.column(1));
 }
 
@@ -1014,7 +1014,7 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored)
   ASSERT_EQ(type_id::STRING, view.column(1).type().id());
 
   expect_column_data_equal(
-    std::vector<std::string>{"\"abcdef ghi\"", "\"jkl \"\"mno\"\" pqr\"", "stu \"vwx\" yz"},
+    std::vector<std::string>{"\"abcdef ghi\"", R"("jkl ""mno"" pqr")", "stu \"vwx\" yz"},
     view.column(1));
 }
 
@@ -1830,7 +1830,7 @@ TEST_F(CsvReaderTest, StringsWithWriter)
 
   auto int_column = column_wrapper<int32_t>{10, 20, 30};
   auto string_column =
-    column_wrapper<cudf::string_view>{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"};
+    column_wrapper<cudf::string_view>{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"};
   cudf::table_view input_table(std::vector<cudf::column_view>{int_column, string_column});
 
   // TODO add quoting style flag?
diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp
index 49ad0c408dc..cb6716f4a18 100644
--- a/cpp/tests/io/json/json_test.cpp
+++ b/cpp/tests/io/json/json_test.cpp
@@ -68,9 +68,9 @@ auto dtype()
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 
 cudf::test::TempDirTestEnvironment* const temp_env =
   static_cast<cudf::test::TempDirTestEnvironment*>(
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 89e704f3ed3..cce0adbf317 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -43,9 +43,9 @@
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 
 using str_col     = column_wrapper<cudf::string_view>;
 using bool_col    = column_wrapper<bool>;
@@ -1358,21 +1358,22 @@ TEST_P(OrcWriterTestStripes, StripeSize)
   cols.push_back(col.release());
   auto const expected = std::make_unique<table>(std::move(cols));
 
-  auto validate = [&](std::vector<char> const& orc_buffer) {
-    auto const expected_stripe_num =
-      std::max<cudf::size_type>(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes);
-    auto const stats = cudf::io::read_parsed_orc_statistics(
-      cudf::io::source_info(orc_buffer.data(), orc_buffer.size()));
-    EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num);
-
-    cudf::io::orc_reader_options in_opts =
-      cudf::io::orc_reader_options::builder(
-        cudf::io::source_info(orc_buffer.data(), orc_buffer.size()))
-        .use_index(false);
-    auto result = cudf::io::read_orc(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
-  };
+  auto validate =
+    [&, &size_bytes = size_bytes, &size_rows = size_rows](std::vector<char> const& orc_buffer) {
+      auto const expected_stripe_num =
+        std::max<cudf::size_type>(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes);
+      auto const stats = cudf::io::read_parsed_orc_statistics(
+        cudf::io::source_info(orc_buffer.data(), orc_buffer.size()));
+      EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num);
+
+      cudf::io::orc_reader_options in_opts =
+        cudf::io::orc_reader_options::builder(
+          cudf::io::source_info(orc_buffer.data(), orc_buffer.size()))
+          .use_index(false);
+      auto result = cudf::io::read_orc(in_opts);
+
+      CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
+    };
 
   {
     std::vector<char> out_buffer_chunked;
diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp
index 8b03e94191e..f1286a00d22 100644
--- a/cpp/tests/io/parquet_misc_test.cpp
+++ b/cpp/tests/io/parquet_misc_test.cpp
@@ -98,7 +98,7 @@ TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaListSliced)
   // list<T>
   constexpr int vals_per_row = 4;
   auto c1_offset_iter        = cudf::detail::make_counting_transform_iterator(
-    0, [vals_per_row](cudf::size_type idx) { return idx * vals_per_row; });
+    0, [](cudf::size_type idx) { return idx * vals_per_row; });
   cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
                                                                      c1_offset_iter + num_rows + 1);
   cudf::test::fixed_width_column_wrapper<T> c1_vals(
diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp
index dc8e68b3a15..4a5309f3ba7 100644
--- a/cpp/tests/io/parquet_reader_test.cpp
+++ b/cpp/tests/io/parquet_reader_test.cpp
@@ -1189,15 +1189,12 @@ TEST_F(ParquetReaderTest, NestingOptimizationTest)
   cudf::test::fixed_width_column_wrapper<int> values(value_iter, value_iter + num_values, validity);
 
   // ~256k values with num_nesting_levels = 16
-  int total_values_produced = num_values;
-  auto prev_col             = values.release();
+  auto prev_col = values.release();
   for (int idx = 0; idx < num_nesting_levels; idx++) {
-    auto const depth    = num_nesting_levels - idx;
     auto const num_rows = (1 << (num_nesting_levels - idx));
 
     auto offsets_iter = cudf::detail::make_counting_transform_iterator(
-      0, [depth, rows_per_level](cudf::size_type i) { return i * rows_per_level; });
-    total_values_produced += (num_rows + 1);
+      0, [](cudf::size_type i) { return i * rows_per_level; });
 
     cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offsets_iter,
                                                                     offsets_iter + num_rows + 1);
diff --git a/cpp/tests/io/parquet_v2_test.cpp b/cpp/tests/io/parquet_v2_test.cpp
index 7c305235ea6..a0b48f54854 100644
--- a/cpp/tests/io/parquet_v2_test.cpp
+++ b/cpp/tests/io/parquet_v2_test.cpp
@@ -1302,24 +1302,24 @@ TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls)
   table_view expected({col0, col1, col2, col3, col4, col5, col6, col7});
 
   std::array<int64_t, 9> expected_null_counts{4, 4, 4, 6, 4, 6, 4, 5, 11};
-  std::vector<int64_t> const expected_def_hists[] = {{1, 1, 2, 3},
-                                                     {1, 3, 10},
-                                                     {1, 1, 2, 10},
-                                                     {1, 1, 2, 2, 8},
-                                                     {1, 1, 1, 1, 10},
-                                                     {1, 1, 1, 1, 2, 8},
-                                                     {1, 3, 9},
-                                                     {1, 3, 1, 8},
-                                                     {1, 0, 4, 1, 1, 4, 9}};
-  std::vector<int64_t> const expected_rep_hists[] = {{4, 3},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 5},
-                                                     {4, 4, 5},
-                                                     {4, 6, 2, 8}};
+  std::vector<std::vector<int64_t>> const expected_def_hists = {{1, 1, 2, 3},
+                                                                {1, 3, 10},
+                                                                {1, 1, 2, 10},
+                                                                {1, 1, 2, 2, 8},
+                                                                {1, 1, 1, 1, 10},
+                                                                {1, 1, 1, 1, 2, 8},
+                                                                {1, 3, 9},
+                                                                {1, 3, 1, 8},
+                                                                {1, 0, 4, 1, 1, 4, 9}};
+  std::vector<std::vector<int64_t>> const expected_rep_hists = {{4, 3},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 5},
+                                                                {4, 4, 5},
+                                                                {4, 6, 2, 8}};
 
   auto const filepath = temp_env->get_temp_filepath("ColumnIndexListWithNulls.parquet");
   auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp
index 8794f2ee304..6c5e9cdf07a 100644
--- a/cpp/tests/io/parquet_writer_test.cpp
+++ b/cpp/tests/io/parquet_writer_test.cpp
@@ -290,7 +290,8 @@ class custom_test_data_sink : public cudf::io::data_sink {
     CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file");
   }
 
-  ~custom_test_data_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~custom_test_data_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
@@ -981,13 +982,15 @@ TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
 
 TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation)
 {
-  std::vector<uint8_t> truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
-                                          {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  std::array<std::vector<uint8_t>, 3> truncated_min{
+    {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
+     {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+     {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}};
 
-  std::vector<uint8_t> truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
-                                          {0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  std::array<std::vector<uint8_t>, 3> truncated_max{
+    {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
+     {0xff},
+     {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}};
 
   cudf::test::lists_column_wrapper<uint8_t> col0{
     {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}};
diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp
index 93754091b3f..178edc52dd3 100644
--- a/cpp/tests/join/distinct_join_tests.cpp
+++ b/cpp/tests/join/distinct_join_tests.cpp
@@ -314,7 +314,7 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -362,7 +362,7 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -398,7 +398,7 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -423,7 +423,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}};
   strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {true, true, false, true, true});
@@ -468,7 +468,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::YES>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   auto col0_gold_names_col = strcol_wrapper{
     "Samuel Vimes", "Detritus", "Carrot Ironfoundersson", "Samuel Vimes", "Angua von Überwald"};
diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp
index 97979e79010..bea044496b3 100644
--- a/cpp/tests/merge/merge_string_test.cpp
+++ b/cpp/tests/merge/merge_string_test.cpp
@@ -97,7 +97,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyColumns)
                                             "hi",
                                             "hj"});
 
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
@@ -296,7 +296,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyNullColumns)
                                             true,
                                             false,
                                             false});
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp
index 2e09f25b51f..6208d395f0a 100644
--- a/cpp/tests/merge/merge_test.cpp
+++ b/cpp/tests/merge/merge_test.cpp
@@ -349,7 +349,7 @@ TYPED_TEST(MergeTest_, Merge1KeyColumns)
   cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(seq_out1)::value_type>
     expectedDataWrap1(seq_out1, seq_out1 + outputRows);
 
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
@@ -452,7 +452,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns)
   cudf::size_type inputRows = 40;
 
   // data: 0  2  4  6 | valid: 1 1 1 0
-  auto sequence1       = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) {
+  auto sequence1       = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
       return 0;  // <- no shortcut to this can avoid compiler errors
     } else {
@@ -465,7 +465,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns)
     leftColWrap1(sequence1, sequence1 + inputRows, valid_sequence1);
 
   // data: 1  3  5  7 | valid: 1 1 1 0
-  auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) {
+  auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
       return 1;
     } else
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index 19996f827cf..bc0321bd40a 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -1092,11 +1092,10 @@ TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets)
   auto aggregates =
     std::vector<std::unique_ptr<cudf::segmented_reduce_aggregation,
                                 std::default_delete<cudf::segmented_reduce_aggregation>>>();
-  aggregates.push_back(std::move(cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(std::move(cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(std::move(cudf::make_sum_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(
-    std::move(cudf::make_product_aggregation<cudf::segmented_reduce_aggregation>()));
+  aggregates.push_back(cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_sum_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_product_aggregation<cudf::segmented_reduce_aggregation>());
 
   auto output_type = cudf::data_type{cudf::type_to_id<int32_t>()};
   for (auto&& agg : aggregates) {
diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp
index 1858cd7782e..b12bf08520f 100644
--- a/cpp/tests/replace/replace_tests.cpp
+++ b/cpp/tests/replace/replace_tests.cpp
@@ -356,7 +356,7 @@ void test_replace(cudf::host_span<T const> input_column,
 
   for (size_t i = 0; i < values_to_replace_column.size(); i++) {
     size_t k  = 0;
-    auto pred = [=, &k, &reference_result, &expected_valid, &isReplaced](T element) {
+    auto pred = [=, &k, &expected_valid, &isReplaced](T element) {
       bool toBeReplaced = false;
       if (!isReplaced[k]) {
         if (!input_has_nulls || expected_valid[k]) {
@@ -503,7 +503,7 @@ TYPED_TEST(ReplaceTest, LargeScaleReplaceTest)
   const size_t REPLACE_SIZE = 10000;
 
   thrust::host_vector<TypeParam> input_column(DATA_SIZE);
-  std::generate(std::begin(input_column), std::end(input_column), [REPLACE_SIZE]() {
+  std::generate(std::begin(input_column), std::end(input_column), []() {
     return std::rand() % (REPLACE_SIZE);
   });
 
diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp
index f702dc78371..165e0347785 100644
--- a/cpp/tests/rolling/collect_ops_test.cpp
+++ b/cpp/tests/rolling/collect_ops_test.cpp
@@ -214,7 +214,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowHonoursMinPeriods)
                          *cudf::make_collect_list_aggregation<cudf::rolling_aggregation>());
   auto expected_result_2 = cudf::test::lists_column_wrapper<T, int32_t>{
     {{}, {0, 1, 2, 3}, {1, 2, 3, 4}, {2, 3, 4, 5}, {}, {}},
-    cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) {
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
       return i != 0 && i < 4;
     })}.release();
 
@@ -338,7 +338,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods)
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 4, 8, 12, 12, 12}.release();
     auto expected_num_rows = expected_offsets->size() - 1;
     auto null_mask_iter    = cudf::detail::make_counting_transform_iterator(
-      cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; });
+      cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; });
 
     auto [null_mask, null_count] =
       cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows);
@@ -373,7 +373,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods)
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 3, 5, 8, 8, 8}.release();
     auto expected_num_rows = expected_offsets->size() - 1;
     auto null_mask_iter    = cudf::detail::make_counting_transform_iterator(
-      cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; });
+      cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; });
 
     auto [null_mask, null_count] =
       cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows);
@@ -1499,7 +1499,7 @@ TYPED_TEST(TypedCollectSetTest, RollingWindowHonoursMinPeriods)
                         *cudf::make_collect_set_aggregation<cudf::rolling_aggregation>());
   auto expected_result_2 = cudf::test::lists_column_wrapper<T, int32_t>{
     {{}, {0, 1, 2}, {1, 2, 4}, {2, 4, 5}, {}, {}},
-    cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) {
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
       return i != 0 && i < 4;
     })}.release();
 
diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp
index ec726878b34..0eaab0c9f7a 100644
--- a/cpp/tests/rolling/offset_row_window_test.cpp
+++ b/cpp/tests/rolling/offset_row_window_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,6 +41,11 @@ using cudf::test::iterators::nulls_at;
 
 auto constexpr null = int32_t{0};  // NULL representation for int32_t;
 
+// clang-tidy doesn't think std::transform can handle a
+// thrust::constant_iterator, so this is a workaround that uses nulls_at
+// instead of no_nulls
+auto no_nulls_list() { return nulls_at({}); }
+
 struct OffsetRowWindowTest : public cudf::test::BaseFixture {
   static ints_column const _keys;    // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
   static ints_column const _values;  // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
@@ -210,7 +215,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_0_to_2)
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *run_rolling(*AGG_COLLECT_LIST),
-    lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, no_nulls});
+    lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}},
+                 no_nulls_list()});
 }
 
 TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2)
@@ -250,7 +256,7 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *run_rolling(*AGG_COLLECT_LIST),
     lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}},
-                 no_nulls});
+                 no_nulls_list()});
 }
 
 // To test that preceding bounds are clamped correctly at group boundaries.
diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp
index c2c22986975..6e0dc16dca9 100644
--- a/cpp/tests/rolling/rolling_test.cpp
+++ b/cpp/tests/rolling/rolling_test.cpp
@@ -541,7 +541,7 @@ class RollingTest : public cudf::test::BaseFixture {
 
     agg_op op;
     for (cudf::size_type i = 0; i < num_rows; i++) {
-      OutputType val = agg_op::template identity<OutputType>();
+      auto val = agg_op::template identity<OutputType>();
 
       // load sizes
       min_periods = std::max(min_periods, 1);  // at least one observation is required
diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp
index 2d37de920d5..2b79911a95a 100644
--- a/cpp/tests/scalar/scalar_test.cpp
+++ b/cpp/tests/scalar/scalar_test.cpp
@@ -190,7 +190,7 @@ TEST_F(ListScalarTest, MoveConstructorNonNested)
 
   EXPECT_EQ(mask_ptr, s2.validity_data());
   EXPECT_EQ(data_ptr, s2.view().data<int32_t>());
-  EXPECT_EQ(s.view().data<int32_t>(), nullptr);
+  EXPECT_EQ(s.view().data<int32_t>(), nullptr);  // NOLINT
 }
 
 TEST_F(ListScalarTest, MoveConstructorNested)
@@ -205,8 +205,8 @@ TEST_F(ListScalarTest, MoveConstructorNested)
   EXPECT_EQ(mask_ptr, s2.validity_data());
   EXPECT_EQ(offset_ptr, s2.view().child(0).data<cudf::size_type>());
   EXPECT_EQ(data_ptr, s2.view().child(1).data<int32_t>());
-  EXPECT_EQ(s.view().data<int32_t>(), nullptr);
-  EXPECT_EQ(s.view().num_children(), 0);
+  EXPECT_EQ(s.view().data<int32_t>(), nullptr);  // NOLINT
+  EXPECT_EQ(s.view().num_children(), 0);         // NOLINT
 }
 
 struct StructScalarTest : public cudf::test::BaseFixture {};
diff --git a/cpp/tests/search/search_list_test.cpp b/cpp/tests/search/search_list_test.cpp
index 48711c21715..7584003e800 100644
--- a/cpp/tests/search/search_list_test.cpp
+++ b/cpp/tests/search/search_list_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,7 +35,6 @@ using strings_col = cudf::test::strings_column_wrapper;
 
 constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR};
 constexpr int32_t null{0};  // Mark for null child elements at the current level
-constexpr int32_t XXX{0};   // Mark for null elements at all levels
 
 using TestTypes = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
                                      cudf::test::FloatingPointTypes,
diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp
index e84275f41ef..6a35e977b46 100644
--- a/cpp/tests/sort/sort_test.cpp
+++ b/cpp/tests/sort/sort_test.cpp
@@ -1087,7 +1087,7 @@ TEST_F(SortCornerTest, WithEmptyStructColumn)
   child_columns2.push_back(std::move(child_col_1));
   int_col col4{{5, 4, 3, 2, 1, 0}};
   std::vector<std::unique_ptr<cudf::column>> grand_child;
-  grand_child.push_back(std::move(col4.release()));
+  grand_child.push_back(col4.release());
   auto child_col_2 = cudf::make_structs_column(6, std::move(grand_child), 0, rmm::device_buffer{});
   child_columns2.push_back(std::move(child_col_2));
   auto struct_col3 =
diff --git a/cpp/tests/stream_compaction/unique_tests.cpp b/cpp/tests/stream_compaction/unique_tests.cpp
index 4d7d23dc881..d5b6915b520 100644
--- a/cpp/tests/stream_compaction/unique_tests.cpp
+++ b/cpp/tests/stream_compaction/unique_tests.cpp
@@ -43,7 +43,6 @@ auto constexpr KEEP_ANY     = cudf::duplicate_keep_option::KEEP_ANY;
 auto constexpr KEEP_FIRST   = cudf::duplicate_keep_option::KEEP_FIRST;
 auto constexpr KEEP_LAST    = cudf::duplicate_keep_option::KEEP_LAST;
 auto constexpr KEEP_NONE    = cudf::duplicate_keep_option::KEEP_NONE;
-auto constexpr NULL_EQUAL   = cudf::null_equality::EQUAL;
 auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL;
 
 using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp
index 443f4548b2c..07b2d77cc04 100644
--- a/cpp/tests/streams/stream_compaction_test.cpp
+++ b/cpp/tests/streams/stream_compaction_test.cpp
@@ -29,8 +29,6 @@
 
 #include <cmath>
 
-auto constexpr null{0};  // null at current level
-auto constexpr XXX{0};   // null pushed down from parent level
 auto constexpr NaN          = std::numeric_limits<double>::quiet_NaN();
 auto constexpr KEEP_ANY     = cudf::duplicate_keep_option::KEEP_ANY;
 auto constexpr KEEP_FIRST   = cudf::duplicate_keep_option::KEEP_FIRST;
diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp
index ce5f68de3c9..26bcfe8028d 100644
--- a/cpp/tests/strings/integers_tests.cpp
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -30,6 +30,7 @@
 #include <thrust/host_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <string>
 #include <vector>
 
@@ -425,7 +426,7 @@ TYPED_TEST(StringsIntegerConvertTest, IntegerToHex)
     if (v == 0) { return std::string("00"); }
     // special handling for single-byte types
     if constexpr (std::is_same_v<TypeParam, int8_t> || std::is_same_v<TypeParam, uint8_t>) {
-      char const hex_digits[16] = {
+      std::array const hex_digits = {
         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
       std::string str;
       str += hex_digits[(v & 0xF0) >> 4];
diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp
index f0010fc1ed9..219bd6d8b01 100644
--- a/cpp/tests/structs/structs_column_tests.cpp
+++ b/cpp/tests/structs/structs_column_tests.cpp
@@ -635,9 +635,8 @@ TEST_F(StructColumnWrapperTest, TestStructsColumnWithEmptyChild)
   auto mask_vec = std::vector<bool>{true, false, false};
   auto [null_mask, null_count] =
     cudf::test::detail::make_null_mask(mask_vec.begin(), mask_vec.end());
-  auto structs_col =
-    cudf::make_structs_column(num_rows, std::move(cols), null_count, std::move(null_mask));
-  EXPECT_NO_THROW(structs_col->view());
+  EXPECT_NO_THROW(auto structs_col = cudf::make_structs_column(
+                    num_rows, std::move(cols), null_count, std::move(null_mask)));
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp
index 215ca158f37..2684123c08a 100644
--- a/cpp/tests/transform/bools_to_mask_test.cpp
+++ b/cpp/tests/transform/bools_to_mask_test.cpp
@@ -32,7 +32,7 @@ struct MaskToNullTest : public cudf::test::BaseFixture {
   {
     cudf::test::fixed_width_column_wrapper<bool> input_column(
       input.begin(), input.end(), val.begin());
-    std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and<bool>());
+    std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and<>());
 
     auto sample = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
 
diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp
index 1785848ec77..0bdf5b321ac 100644
--- a/cpp/tests/transform/integration/unary_transform_test.cpp
+++ b/cpp/tests/transform/integration/unary_transform_test.cpp
@@ -47,7 +47,7 @@ void test_udf(char const* udf, Op op, Data data_init, cudf::size_type size, bool
 TEST_F(UnaryOperationIntegrationTest, Transform_FP32_FP32)
 {
   // c = a*a*a*a
-  char const* cuda =
+  std::string const cuda =
     R"***(
 __device__ inline void    fdsf   (
        float* C,
@@ -58,7 +58,7 @@ __device__ inline void    fdsf   (
 }
 )***";
 
-  char const* ptx =
+  std::string const ptx =
     R"***(
 //
 // Generated by NVIDIA NVVM Compiler
@@ -101,17 +101,17 @@ __device__ inline void    fdsf   (
   auto op        = [](dtype a) { return a * a * a * a; };
   auto data_init = [](cudf::size_type row) { return row % 3; };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32)
 {
   // c = a * a - a
-  char const cuda[] =
+  std::string const cuda =
     "__device__ inline void f(int* output,int input){*output = input*input - input;}";
 
-  char const* ptx =
+  std::string const ptx =
     R"***(
 .func _Z1fPii(
         .param .b64 _Z1fPii_param_0,
@@ -136,8 +136,8 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32)
   auto op        = [](dtype a) { return a * a - a; };
   auto data_init = [](cudf::size_type row) { return row % 78; };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8)
@@ -145,7 +145,7 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8)
   // Capitalize all the lower case letters
   // Assuming ASCII, the PTX code is compiled from the following CUDA code
 
-  char const cuda[] =
+  std::string const cuda =
     R"***(
 __device__ inline void f(
   signed char* output,
@@ -159,7 +159,7 @@ __device__ inline void f(
 }
 )***";
 
-  char const ptx[] =
+  std::string const ptx =
     R"***(
 .func _Z1fPcc(
         .param .b64 _Z1fPcc_param_0,
@@ -191,15 +191,15 @@ __device__ inline void f(
   auto op        = [](dtype a) { return std::toupper(a); };
   auto data_init = [](cudf::size_type row) { return 'a' + (row % 26); };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_Datetime)
 {
   // Add one day to timestamp in microseconds
 
-  char const cuda[] =
+  std::string const cuda =
     R"***(
 __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input)
 {
@@ -217,7 +217,7 @@ __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input)
   auto random_eng = cudf::test::UniformRandomGenerator<cudf::timestamp_us::rep>(0, 100000000);
   auto data_init  = [&random_eng](cudf::size_type row) { return random_eng.generate(); };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
 }
 
 }  // namespace transformation

From 2d02bdce9e3efae232dea4a5b8b2eecf5c0f8a93 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 7 Oct 2024 16:30:58 -0500
Subject: [PATCH 25/25] Implement `extract_datetime_component` in
 `libcudf`/`pylibcudf` (#16776)

Closes https://github.com/rapidsai/cudf/issues/16735

Authors:
  - https://github.com/brandon-b-miller
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Lawrence Mitchell (https://github.com/wence-)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16776
---
 cpp/include/cudf/datetime.hpp                 |  34 +++++
 cpp/include/cudf/detail/datetime.hpp          |  10 ++
 cpp/src/datetime/datetime_ops.cu              |  88 ++++++------
 cpp/tests/datetime/datetime_ops_test.cpp      | 130 ++++++++++++++++++
 python/cudf/cudf/_lib/datetime.pyx            |  39 +++++-
 python/cudf_polars/cudf_polars/dsl/expr.py    |  40 ++++--
 python/pylibcudf/pylibcudf/datetime.pxd       |   7 +
 python/pylibcudf/pylibcudf/datetime.pyx       |  71 ++++------
 .../pylibcudf/libcudf/CMakeLists.txt          |   5 +-
 .../pylibcudf/pylibcudf/libcudf/datetime.pxd  |  17 +++
 .../pylibcudf/pylibcudf/libcudf/datetime.pyx  |   0
 .../pylibcudf/tests/test_datetime.py          |  55 ++++----
 12 files changed, 358 insertions(+), 138 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/datetime.pyx

diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp
index 7359a0d5fde..1eaea5b6374 100644
--- a/cpp/include/cudf/datetime.hpp
+++ b/cpp/include/cudf/datetime.hpp
@@ -38,6 +38,22 @@ namespace datetime {
  * @file
  */
 
+/**
+ * @brief Types of datetime components that may be extracted.
+ */
+enum class datetime_component : uint8_t {
+  YEAR,
+  MONTH,
+  DAY,
+  WEEKDAY,
+  HOUR,
+  MINUTE,
+  SECOND,
+  MILLISECOND,
+  MICROSECOND,
+  NANOSECOND
+};
+
 /**
  * @brief  Extracts year from any datetime type and returns an int16_t
  * cudf::column.
@@ -207,6 +223,24 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Extracts the specified datetime component from any datetime type and
+ * returns an int16_t cudf::column.
+ *
+ * @param column cudf::column_view of the input datetime values
+ * @param component The datetime component to extract
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate device memory of the returned column
+ *
+ * @returns cudf::column of the extracted int16_t datetime component
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP
+ */
+std::unique_ptr<cudf::column> extract_datetime_component(
+  cudf::column_view const& column,
+  datetime_component component,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /** @} */  // end of group
 /**
  * @addtogroup datetime_compute
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index 9db7e48498f..df3050d6494 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -115,6 +115,16 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view cons
                                                           rmm::cuda_stream_view stream,
                                                           rmm::device_async_resource_ref mr);
 
+/**
+ * @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
+ * rmm::cuda_stream_view, rmm::device_async_resource_ref)
+ *
+ */
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr);
+
 /**
  * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view,
  * rmm::device_async_resource_ref)
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index ddb0dbcd96d..a497cedb3bc 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -44,19 +44,6 @@
 namespace cudf {
 namespace datetime {
 namespace detail {
-enum class datetime_component {
-  INVALID = 0,
-  YEAR,
-  MONTH,
-  DAY,
-  WEEKDAY,
-  HOUR,
-  MINUTE,
-  SECOND,
-  MILLISECOND,
-  MICROSECOND,
-  NANOSECOND
-};
 
 enum class rounding_function {
   CEIL,   ///< Rounds up to the next integer multiple of the provided frequency
@@ -453,90 +440,70 @@ std::unique_ptr<column> extract_year(column_view const& column,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::YEAR>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr);
 }
 
 std::unique_ptr<column> extract_month(column_view const& column,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MONTH>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr);
 }
 
 std::unique_ptr<column> extract_day(column_view const& column,
                                     rmm::cuda_stream_view stream,
                                     rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::DAY>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr);
 }
 
 std::unique_ptr<column> extract_weekday(column_view const& column,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::WEEKDAY>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
 }
 
 std::unique_ptr<column> extract_hour(column_view const& column,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::HOUR>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr);
 }
 
 std::unique_ptr<column> extract_minute(column_view const& column,
                                        rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MINUTE>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
 }
 
 std::unique_ptr<column> extract_second(column_view const& column,
                                        rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::SECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MILLISECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MICROSECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
                                                     rmm::cuda_stream_view stream,
                                                     rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::NANOSECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
 }
 
 std::unique_ptr<column> last_day_of_month(column_view const& column,
@@ -576,6 +543,32 @@ std::unique_ptr<column> extract_quarter(column_view const& column,
   return apply_datetime_op<extract_quarter_op, type_id::INT16>(column, stream, mr);
 }
 
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr)
+{
+#define extract(field)                                                                 \
+  case field:                                                                          \
+    return apply_datetime_op<extract_component_operator<field>, cudf::type_id::INT16>( \
+      column, stream, mr)
+
+  switch (component) {
+    extract(datetime_component::YEAR);
+    extract(datetime_component::MONTH);
+    extract(datetime_component::DAY);
+    extract(datetime_component::WEEKDAY);
+    extract(datetime_component::HOUR);
+    extract(datetime_component::MINUTE);
+    extract(datetime_component::SECOND);
+    extract(datetime_component::MILLISECOND);
+    extract(datetime_component::MICROSECOND);
+    extract(datetime_component::NANOSECOND);
+    default: CUDF_FAIL("Unsupported datetime component.");
+  }
+#undef extract
+}
+
 }  // namespace detail
 
 std::unique_ptr<column> ceil_datetimes(column_view const& column,
@@ -661,6 +654,15 @@ std::unique_ptr<column> extract_second(column_view const& column,
   return detail::extract_second(column, stream, mr);
 }
 
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract_datetime_component(column, component, stream, mr);
+}
+
 std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp
index 13577c4d0ea..603edb27c7c 100644
--- a/cpp/tests/datetime/datetime_ops_test.cpp
+++ b/cpp/tests/datetime/datetime_ops_test.cpp
@@ -196,6 +196,136 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{0, 0, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns),
                                  fixed_width_column_wrapper<int16_t>{766, 424, 623});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1969, 1970, 1970});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{12, 1, 1});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{31, 1, 1});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{14, 12, 7});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{14, 12, 7});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{23, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{1, 0, 32});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{1, 0, 32});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{59, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{12, 0, 12});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{12, 0, 12});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{59, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{762, 0, 929});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{976, 23, 987});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{675, 432, 234});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{766, 424, 623});
 }
 
 template <typename T>
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index bc5e085ec39..d844466120f 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -13,12 +13,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.filling cimport calendrical_month_sequence
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.datetime import DatetimeComponent
 
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
-import pylibcudf as plc
-
 
 @acquire_spill_lock()
 def add_months(Column col, Column months):
@@ -40,9 +39,39 @@ def add_months(Column col, Column months):
 
 @acquire_spill_lock()
 def extract_datetime_component(Column col, object field):
-    result = Column.from_pylibcudf(
-        plc.datetime.extract_datetime_component(col.to_pylibcudf(mode="read"), field)
-    )
+
+    cdef unique_ptr[column] c_result
+    cdef column_view col_view = col.view()
+    cdef libcudf_datetime.datetime_component component
+
+    component_names = {
+        "year": DatetimeComponent.YEAR,
+        "month": DatetimeComponent.MONTH,
+        "day": DatetimeComponent.DAY,
+        "weekday": DatetimeComponent.WEEKDAY,
+        "hour": DatetimeComponent.HOUR,
+        "minute": DatetimeComponent.MINUTE,
+        "second": DatetimeComponent.SECOND,
+        "millisecond": DatetimeComponent.MILLISECOND,
+        "microsecond": DatetimeComponent.MICROSECOND,
+        "nanosecond": DatetimeComponent.NANOSECOND,
+    }
+    if field == "day_of_year":
+        with nogil:
+            c_result = move(libcudf_datetime.day_of_year(col_view))
+    elif field in component_names:
+        component = component_names[field]
+        with nogil:
+            c_result = move(
+                libcudf_datetime.extract_datetime_component(
+                    col_view,
+                    component
+                )
+            )
+    else:
+        raise ValueError(f"Invalid field: '{field}'")
+
+    result = Column.from_unique_ptr(move(c_result))
 
     if field == "weekday":
         # Pandas counts Monday-Sunday as 0-6
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 54476b7fedc..a418560b31c 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -961,16 +961,16 @@ def do_evaluate(
 class TemporalFunction(Expr):
     __slots__ = ("name", "options", "children")
     _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = {
-        pl_expr.TemporalFunction.Year: "year",
-        pl_expr.TemporalFunction.Month: "month",
-        pl_expr.TemporalFunction.Day: "day",
-        pl_expr.TemporalFunction.WeekDay: "weekday",
-        pl_expr.TemporalFunction.Hour: "hour",
-        pl_expr.TemporalFunction.Minute: "minute",
-        pl_expr.TemporalFunction.Second: "second",
-        pl_expr.TemporalFunction.Millisecond: "millisecond",
-        pl_expr.TemporalFunction.Microsecond: "microsecond",
-        pl_expr.TemporalFunction.Nanosecond: "nanosecond",
+        pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR,
+        pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH,
+        pl_expr.TemporalFunction.Day: plc.datetime.DatetimeComponent.DAY,
+        pl_expr.TemporalFunction.WeekDay: plc.datetime.DatetimeComponent.WEEKDAY,
+        pl_expr.TemporalFunction.Hour: plc.datetime.DatetimeComponent.HOUR,
+        pl_expr.TemporalFunction.Minute: plc.datetime.DatetimeComponent.MINUTE,
+        pl_expr.TemporalFunction.Second: plc.datetime.DatetimeComponent.SECOND,
+        pl_expr.TemporalFunction.Millisecond: plc.datetime.DatetimeComponent.MILLISECOND,
+        pl_expr.TemporalFunction.Microsecond: plc.datetime.DatetimeComponent.MICROSECOND,
+        pl_expr.TemporalFunction.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND,
     }
     _non_child = ("dtype", "name", "options")
     children: tuple[Expr, ...]
@@ -1003,8 +1003,12 @@ def do_evaluate(
         ]
         (column,) = columns
         if self.name == pl_expr.TemporalFunction.Microsecond:
-            millis = plc.datetime.extract_datetime_component(column.obj, "millisecond")
-            micros = plc.datetime.extract_datetime_component(column.obj, "microsecond")
+            millis = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MILLISECOND
+            )
+            micros = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MICROSECOND
+            )
             millis_as_micros = plc.binaryop.binary_operation(
                 millis,
                 plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())),
@@ -1019,9 +1023,15 @@ def do_evaluate(
             )
             return Column(total_micros)
         elif self.name == pl_expr.TemporalFunction.Nanosecond:
-            millis = plc.datetime.extract_datetime_component(column.obj, "millisecond")
-            micros = plc.datetime.extract_datetime_component(column.obj, "microsecond")
-            nanos = plc.datetime.extract_datetime_component(column.obj, "nanosecond")
+            millis = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MILLISECOND
+            )
+            micros = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MICROSECOND
+            )
+            nanos = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.NANOSECOND
+            )
             millis_as_nanos = plc.binaryop.binary_operation(
                 millis,
                 plc.interop.from_arrow(pa.scalar(1_000_000, type=pa.int32())),
diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd
index 2fce48cf1b4..72ce680ba7a 100644
--- a/python/pylibcudf/pylibcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/datetime.pxd
@@ -1,8 +1,15 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from pylibcudf.libcudf.datetime cimport datetime_component
+
 from .column cimport Column
 
 
 cpdef Column extract_year(
     Column col
 )
+
+cpdef Column extract_datetime_component(
+    Column col,
+    datetime_component component
+)
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index e8e0caaf42d..784d29128bf 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -3,19 +3,14 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.datetime cimport (
-    day_of_year as cpp_day_of_year,
-    extract_day as cpp_extract_day,
-    extract_hour as cpp_extract_hour,
-    extract_microsecond_fraction as cpp_extract_microsecond_fraction,
-    extract_millisecond_fraction as cpp_extract_millisecond_fraction,
-    extract_minute as cpp_extract_minute,
-    extract_month as cpp_extract_month,
-    extract_nanosecond_fraction as cpp_extract_nanosecond_fraction,
-    extract_second as cpp_extract_second,
-    extract_weekday as cpp_extract_weekday,
+    datetime_component,
+    extract_datetime_component as cpp_extract_datetime_component,
     extract_year as cpp_extract_year,
 )
 
+from pylibcudf.libcudf.datetime import \
+    datetime_component as DatetimeComponent  # no-cython-lint
+
 from .column cimport Column
 
 
@@ -41,41 +36,29 @@ cpdef Column extract_year(
         result = move(cpp_extract_year(values.view()))
     return Column.from_libcudf(move(result))
 
+cpdef Column extract_datetime_component(
+    Column values,
+    datetime_component component
+):
+    """
+    Extract a datetime component from a datetime column.
 
-def extract_datetime_component(Column col, str field):
+    For details, see :cpp:func:`cudf::extract_datetime_component`.
 
-    cdef unique_ptr[column] c_result
+    Parameters
+    ----------
+    values : Column
+        The column to extract the component from.
+    component : DatetimeComponent
+        The datetime component to extract.
 
-    with nogil:
-        if field == "year":
-            c_result = move(cpp_extract_year(col.view()))
-        elif field == "month":
-            c_result = move(cpp_extract_month(col.view()))
-        elif field == "day":
-            c_result = move(cpp_extract_day(col.view()))
-        elif field == "weekday":
-            c_result = move(cpp_extract_weekday(col.view()))
-        elif field == "hour":
-            c_result = move(cpp_extract_hour(col.view()))
-        elif field == "minute":
-            c_result = move(cpp_extract_minute(col.view()))
-        elif field == "second":
-            c_result = move(cpp_extract_second(col.view()))
-        elif field == "millisecond":
-            c_result = move(
-                cpp_extract_millisecond_fraction(col.view())
-            )
-        elif field == "microsecond":
-            c_result = move(
-                cpp_extract_microsecond_fraction(col.view())
-            )
-        elif field == "nanosecond":
-            c_result = move(
-                cpp_extract_nanosecond_fraction(col.view())
-            )
-        elif field == "day_of_year":
-            c_result = move(cpp_day_of_year(col.view()))
-        else:
-            raise ValueError(f"Invalid datetime field: '{field}'")
+    Returns
+    -------
+    Column
+        Column with the extracted component.
+    """
+    cdef unique_ptr[column] result
 
-    return Column.from_libcudf(move(c_result))
+    with nogil:
+        result = move(cpp_extract_datetime_component(values.view(), component))
+    return Column.from_libcudf(move(result))
diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
index 2167616690f..15beaee47d4 100644
--- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
@@ -12,8 +12,9 @@
 # the License.
 # =============================================================================
 
-set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx labeling.pyx reduce.pyx
-                   replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
+set(cython_sources
+    aggregation.pyx binaryop.pyx copying.pyx datetime.pyx expressions.pyx labeling.pyx reduce.pyx
+    replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
index a4465343197..73cdfb96af5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+from libc.stdint cimport uint8_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
@@ -7,6 +8,18 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
+    cpdef enum class datetime_component(uint8_t):
+        YEAR
+        MONTH
+        DAY
+        WEEKDAY
+        HOUR
+        MINUTE
+        SECOND
+        MILLISECOND
+        MICROSECOND
+        NANOSECOND
+
     cdef unique_ptr[column] extract_year(const column_view& column) except +
     cdef unique_ptr[column] extract_month(const column_view& column) except +
     cdef unique_ptr[column] extract_day(const column_view& column) except +
@@ -23,6 +36,10 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
     cdef unique_ptr[column] extract_nanosecond_fraction(
         const column_view& column
     ) except +
+    cdef unique_ptr[column] extract_datetime_component(
+        const column_view& column,
+        datetime_component component
+    ) except +
 
     ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency":
         DAY "cudf::datetime::rounding_frequency::DAY"
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pyx b/python/pylibcudf/pylibcudf/libcudf/datetime.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py
index 89c96829e71..75930d59058 100644
--- a/python/pylibcudf/pylibcudf/tests/test_datetime.py
+++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import datetime
-import functools
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -10,19 +9,6 @@
 from utils import assert_column_eq
 
 
-@pytest.fixture
-def date_column(has_nulls):
-    values = [
-        datetime.date(1999, 1, 1),
-        datetime.date(2024, 10, 12),
-        datetime.date(1, 1, 1),
-        datetime.date(9999, 1, 1),
-    ]
-    if has_nulls:
-        values[2] = None
-    return plc.interop.from_arrow(pa.array(values, type=pa.date32()))
-
-
 @pytest.fixture(scope="module", params=["s", "ms", "us", "ns"])
 def datetime_column(has_nulls, request):
     values = [
@@ -40,24 +26,35 @@ def datetime_column(has_nulls, request):
     )
 
 
-@pytest.mark.parametrize(
-    "component, pc_fun",
-    [
-        ("year", pc.year),
-        ("month", pc.month),
-        ("day", pc.day),
-        ("weekday", functools.partial(pc.day_of_week, count_from_zero=False)),
-        ("hour", pc.hour),
-        ("minute", pc.minute),
-        ("second", pc.second),
-        ("millisecond", pc.millisecond),
-        ("microsecond", pc.microsecond),
-        ("nanosecond", pc.nanosecond),
+@pytest.fixture(
+    params=[
+        ("year", plc.datetime.DatetimeComponent.YEAR),
+        ("month", plc.datetime.DatetimeComponent.MONTH),
+        ("day", plc.datetime.DatetimeComponent.DAY),
+        ("day_of_week", plc.datetime.DatetimeComponent.WEEKDAY),
+        ("hour", plc.datetime.DatetimeComponent.HOUR),
+        ("minute", plc.datetime.DatetimeComponent.MINUTE),
+        ("second", plc.datetime.DatetimeComponent.SECOND),
+        ("millisecond", plc.datetime.DatetimeComponent.MILLISECOND),
+        ("microsecond", plc.datetime.DatetimeComponent.MICROSECOND),
+        ("nanosecond", plc.datetime.DatetimeComponent.NANOSECOND),
     ],
+    ids=lambda x: x[0],
 )
-def test_extraction(datetime_column, component, pc_fun):
+def component(request):
+    return request.param
+
+
+def test_extract_datetime_component(datetime_column, component):
+    attr, component = component
+    kwargs = {}
+    if attr == "day_of_week":
+        kwargs = {"count_from_zero": False}
     got = plc.datetime.extract_datetime_component(datetime_column, component)
     # libcudf produces an int16, arrow produces an int64
-    expect = pc_fun(plc.interop.to_arrow(datetime_column)).cast(pa.int16())
+
+    expect = getattr(pc, attr)(
+        plc.interop.to_arrow(datetime_column), **kwargs
+    ).cast(pa.int16())
 
     assert_column_eq(expect, got)