From 2fa2e6a554096181b0a625cdc50368893dbaaa1f Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Fri, 4 Oct 2024 16:08:37 +0100
Subject: [PATCH 01/33] Switched AST benchmarks from GoogleBench to NVBench
 (#16952)

This merge request switches the Benchmarking solution for the AST benchmark from GoogleBench to NVBench. ~It also refactors the L2 cache flushing functionality of `cuda_event_timer` into a separate function `flush_L2_device_cache`, since NVBench already performs the timing, synchronization, and timer setup necessary.~

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/16952
---
 cpp/benchmarks/CMakeLists.txt    |  2 +-
 cpp/benchmarks/ast/transform.cpp | 51 +++++++++++---------------------
 2 files changed, 18 insertions(+), 35 deletions(-)
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 110b4557840..1e13bf176c1 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -330,7 +330,7 @@ ConfigureNVBench(CSV_WRITER_NVBENCH io/csv/csv_writer.cpp)
 
 # ##################################################################################################
 # * ast benchmark ---------------------------------------------------------------------------------
-ConfigureBench(AST_BENCH ast/transform.cpp)
+ConfigureNVBench(AST_NVBENCH ast/transform.cpp)
 
 # ##################################################################################################
 # * binaryop benchmark ----------------------------------------------------------------------------
diff --git a/cpp/benchmarks/ast/transform.cpp b/cpp/benchmarks/ast/transform.cpp
index 65a44532cf1..f44f26e4d2c 100644
--- a/cpp/benchmarks/ast/transform.cpp
+++ b/cpp/benchmarks/ast/transform.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,14 +15,16 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/transform.hpp>
 #include <cudf/types.hpp>
 
+#include <rmm/cuda_stream_view.hpp>
+
 #include <thrust/iterator/counting_iterator.h>
 
+#include <nvbench/nvbench.cuh>
+
 #include <algorithm>
 #include <list>
 #include <memory>
@@ -35,13 +37,10 @@ enum class TreeType {
 };
 
 template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
-class AST : public cudf::benchmark {};
-
-template <typename key_type, TreeType tree_type, bool reuse_columns, bool Nullable>
-static void BM_ast_transform(benchmark::State& state)
+static void BM_ast_transform(nvbench::state& state)
 {
-  auto const table_size{static_cast<cudf::size_type>(state.range(0))};
-  auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};
+  auto const table_size  = static_cast<cudf::size_type>(state.get_int64("table_size"));
+  auto const tree_levels = static_cast<cudf::size_type>(state.get_int64("tree_levels"));
 
   // Create table data
   auto const n_cols = reuse_columns ? 1 : tree_levels + 1;
@@ -86,38 +85,22 @@ static void BM_ast_transform(benchmark::State& state)
 
   auto const& expression_tree_root = expressions.back();
 
-  // Execute benchmark
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
-    cudf::compute_column(table, expression_tree_root);
-  }
-
   // Use the number of bytes read from global memory
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0) *
-                          (tree_levels + 1) * sizeof(key_type));
-}
+  state.add_global_memory_reads<key_type>(table_size * (tree_levels + 1));
 
-static void CustomRanges(benchmark::internal::Benchmark* b)
-{
-  auto row_counts       = std::vector<cudf::size_type>{100'000, 1'000'000, 10'000'000, 100'000'000};
-  auto operation_counts = std::vector<cudf::size_type>{1, 5, 10};
-  for (auto const& row_count : row_counts) {
-    for (auto const& operation_count : operation_counts) {
-      b->Args({row_count, operation_count});
-    }
-  }
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { cudf::compute_column(table, expression_tree_root); });
 }
 
 #define AST_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns, nullable) \
-  BENCHMARK_TEMPLATE_DEFINE_F(AST, name, key_type, tree_type, reuse_columns, nullable)     \
-  (::benchmark::State & st)                                                                \
+  static void name(::nvbench::state& st)                                                   \
   {                                                                                        \
-    BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st);                    \
+    ::BM_ast_transform<key_type, tree_type, reuse_columns, nullable>(st);                  \
   }                                                                                        \
-  BENCHMARK_REGISTER_F(AST, name)                                                          \
-    ->Apply(CustomRanges)                                                                  \
-    ->Unit(benchmark::kMillisecond)                                                        \
-    ->UseManualTime();
+  NVBENCH_BENCH(name)                                                                      \
+    .set_name(#name)                                                                       \
+    .add_int64_axis("tree_levels", {1, 5, 10})                                             \
+    .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
 
 AST_TRANSFORM_BENCHMARK_DEFINE(
   ast_int32_imbalanced_unique, int32_t, TreeType::IMBALANCED_LEFT, false, false);

From a78432184f20f7acf493eaa8d1928cfee29d1771 Mon Sep 17 00:00:00 2001
From: Basit Ayantunde <rlamarrr@gmail.com>
Date: Fri, 4 Oct 2024 16:19:37 +0100
Subject: [PATCH 02/33] Switched BINARY_OP Benchmarks from GoogleBench to
 NVBench (#16963)

This merge request switches the Benchmarking solution for the BINARY_OP benchmarks from GoogleBench to NVBench

Authors:
  - Basit Ayantunde (https://github.com/lamarrr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Tianyu Liu (https://github.com/kingcrimsontianyu)

URL: https://github.com/rapidsai/cudf/pull/16963
---
 cpp/benchmarks/CMakeLists.txt                 |  2 +-
 cpp/benchmarks/binaryop/binaryop.cpp          | 65 ++++++-------------
 cpp/benchmarks/binaryop/compiled_binaryop.cpp | 47 ++++++--------
 3 files changed, 40 insertions(+), 74 deletions(-)

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 1e13bf176c1..b8a53cd8bd9 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -334,7 +334,7 @@ ConfigureNVBench(AST_NVBENCH ast/transform.cpp)
 
 # ##################################################################################################
 # * binaryop benchmark ----------------------------------------------------------------------------
-ConfigureBench(BINARYOP_BENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp)
+ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binaryop.cpp)
 
 # ##################################################################################################
 # * nvtext benchmark -------------------------------------------------------------------
diff --git a/cpp/benchmarks/binaryop/binaryop.cpp b/cpp/benchmarks/binaryop/binaryop.cpp
index fa98d9e601a..7d267a88764 100644
--- a/cpp/benchmarks/binaryop/binaryop.cpp
+++ b/cpp/benchmarks/binaryop/binaryop.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -15,15 +15,14 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/binaryop.hpp>
 #include <cudf/table/table_view.hpp>
 #include <cudf/types.hpp>
 
+#include <nvbench/nvbench.cuh>
+
 #include <algorithm>
-#include <vector>
 
 // This set of benchmarks is designed to be a comparison for the AST benchmarks
 
@@ -33,13 +32,10 @@ enum class TreeType {
 };
 
 template <typename key_type, TreeType tree_type, bool reuse_columns>
-class BINARYOP : public cudf::benchmark {};
-
-template <typename key_type, TreeType tree_type, bool reuse_columns>
-static void BM_binaryop_transform(benchmark::State& state)
+static void BM_binaryop_transform(nvbench::state& state)
 {
-  auto const table_size{static_cast<cudf::size_type>(state.range(0))};
-  auto const tree_levels{static_cast<cudf::size_type>(state.range(1))};
+  auto const table_size{static_cast<cudf::size_type>(state.get_int64("table_size"))};
+  auto const tree_levels{static_cast<cudf::size_type>(state.get_int64("tree_levels"))};
 
   // Create table data
   auto const n_cols       = reuse_columns ? 1 : tree_levels + 1;
@@ -47,9 +43,10 @@ static void BM_binaryop_transform(benchmark::State& state)
     cycle_dtypes({cudf::type_to_id<key_type>()}, n_cols), row_count{table_size});
   cudf::table_view table{*source_table};
 
-  // Execute benchmark
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);  // flush_l2_cache = true, stream = 0
+  // Use the number of bytes read from global memory
+  state.add_global_memory_reads<key_type>(table_size * (tree_levels + 1));
+
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch&) {
     // Execute tree that chains additions like (((a + b) + c) + d)
     auto const op               = cudf::binary_operator::ADD;
     auto const result_data_type = cudf::data_type(cudf::type_to_id<key_type>());
@@ -64,16 +61,18 @@ static void BM_binaryop_transform(benchmark::State& state)
         result = cudf::binary_operation(result->view(), col, op, result_data_type);
       });
     }
-  }
-
-  // Use the number of bytes read from global memory
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0) *
-                          (tree_levels + 1) * sizeof(key_type));
+  });
 }
 
 #define BINARYOP_TRANSFORM_BENCHMARK_DEFINE(name, key_type, tree_type, reuse_columns) \
-  BENCHMARK_TEMPLATE_DEFINE_F(BINARYOP, name, key_type, tree_type, reuse_columns)     \
-  (::benchmark::State & st) { BM_binaryop_transform<key_type, tree_type, reuse_columns>(st); }
+                                                                                      \
+  static void name(::nvbench::state& st)                                              \
+  {                                                                                   \
+    BM_binaryop_transform<key_type, tree_type, reuse_columns>(st);                    \
+  }                                                                                   \
+  NVBENCH_BENCH(name)                                                                 \
+    .add_int64_axis("tree_levels", {1, 2, 5, 10})                                     \
+    .add_int64_axis("table_size", {100'000, 1'000'000, 10'000'000, 100'000'000})
 
 BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_int32_imbalanced_unique,
                                     int32_t,
@@ -87,29 +86,3 @@ BINARYOP_TRANSFORM_BENCHMARK_DEFINE(binaryop_double_imbalanced_unique,
                                     double,
                                     TreeType::IMBALANCED_LEFT,
                                     false);
-
-static void CustomRanges(benchmark::internal::Benchmark* b)
-{
-  auto row_counts       = std::vector<cudf::size_type>{100'000, 1'000'000, 10'000'000, 100'000'000};
-  auto operation_counts = std::vector<cudf::size_type>{1, 2, 5, 10};
-  for (auto const& row_count : row_counts) {
-    for (auto const& operation_count : operation_counts) {
-      b->Args({row_count, operation_count});
-    }
-  }
-}
-
-BENCHMARK_REGISTER_F(BINARYOP, binaryop_int32_imbalanced_unique)
-  ->Apply(CustomRanges)
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
-
-BENCHMARK_REGISTER_F(BINARYOP, binaryop_int32_imbalanced_reuse)
-  ->Apply(CustomRanges)
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
-
-BENCHMARK_REGISTER_F(BINARYOP, binaryop_double_imbalanced_unique)
-  ->Apply(CustomRanges)
-  ->Unit(benchmark::kMillisecond)
-  ->UseManualTime();
diff --git a/cpp/benchmarks/binaryop/compiled_binaryop.cpp b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
index 7086a61c7c5..bc0ff69bce9 100644
--- a/cpp/benchmarks/binaryop/compiled_binaryop.cpp
+++ b/cpp/benchmarks/binaryop/compiled_binaryop.cpp
@@ -15,20 +15,18 @@
  */
 
 #include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
 
 #include <cudf/binaryop.hpp>
 
-class COMPILED_BINARYOP : public cudf::benchmark {};
+#include <nvbench/nvbench.cuh>
 
 template <typename TypeLhs, typename TypeRhs, typename TypeOut>
-void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
+void BM_compiled_binaryop(nvbench::state& state, cudf::binary_operator binop)
 {
-  auto const column_size{static_cast<cudf::size_type>(state.range(0))};
+  auto const table_size = static_cast<cudf::size_type>(state.get_int64("table_size"));
 
   auto const source_table = create_random_table(
-    {cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{column_size});
+    {cudf::type_to_id<TypeLhs>(), cudf::type_to_id<TypeRhs>()}, row_count{table_size});
 
   auto lhs = cudf::column_view(source_table->get_column(0));
   auto rhs = cudf::column_view(source_table->get_column(1));
@@ -38,31 +36,26 @@ void BM_compiled_binaryop(benchmark::State& state, cudf::binary_operator binop)
   // Call once for hot cache.
   cudf::binary_operation(lhs, rhs, binop, output_dtype);
 
-  for (auto _ : state) {
-    cuda_event_timer timer(state, true);
-    cudf::binary_operation(lhs, rhs, binop, output_dtype);
-  }
-
   // use number of bytes read and written to global memory
-  state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * column_size *
-                          (sizeof(TypeLhs) + sizeof(TypeRhs) + sizeof(TypeOut)));
+  state.add_global_memory_reads<TypeLhs>(table_size);
+  state.add_global_memory_reads<TypeRhs>(table_size);
+  state.add_global_memory_reads<TypeOut>(table_size);
+
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { cudf::binary_operation(lhs, rhs, binop, output_dtype); });
 }
 
+#define BM_STRINGIFY(a) #a
+
 // TODO tparam boolean for null.
-#define BM_BINARYOP_BENCHMARK_DEFINE(name, lhs, rhs, bop, tout)           \
-  BENCHMARK_DEFINE_F(COMPILED_BINARYOP, name)                             \
-  (::benchmark::State & st)                                               \
-  {                                                                       \
-    BM_compiled_binaryop<lhs, rhs, tout>(st, cudf::binary_operator::bop); \
-  }                                                                       \
-  BENCHMARK_REGISTER_F(COMPILED_BINARYOP, name)                           \
-    ->Unit(benchmark::kMicrosecond)                                       \
-    ->UseManualTime()                                                     \
-    ->Arg(10000)      /* 10k */                                           \
-    ->Arg(100000)     /* 100k */                                          \
-    ->Arg(1000000)    /* 1M */                                            \
-    ->Arg(10000000)   /* 10M */                                           \
-    ->Arg(100000000); /* 100M */
+#define BM_BINARYOP_BENCHMARK_DEFINE(name, lhs, rhs, bop, tout)               \
+  static void name(::nvbench::state& st)                                      \
+  {                                                                           \
+    ::BM_compiled_binaryop<lhs, rhs, tout>(st, ::cudf::binary_operator::bop); \
+  }                                                                           \
+  NVBENCH_BENCH(name)                                                         \
+    .set_name("compiled_binary_op_" BM_STRINGIFY(name))                       \
+    .add_int64_axis("table_size", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000})
 
 #define build_name(a, b, c, d) a##_##b##_##c##_##d
 

From 39342b8762c734aa2a94b94815bef75869a4e59c Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Fri, 4 Oct 2024 09:39:20 -0700
Subject: [PATCH 03/33] Properly handle the mapped and registered regions in
 `memory_mapped_source` (#16865)

Depends on https://github.com/rapidsai/cudf/pull/16826

Set of fixes that improve robustness on the non-GDS file input:

1. Avoid registering beyond the byte range - addresses problems when reading adjacent byte ranges from multiple threads (GH only).
2. Allow reading data outside of the memory mapped region. This prevents issues with very long rows in CSV or JSON input.
3. Copy host data when the range being read is only partially registered. This avoids errors when trying to copy the host data range to the device (GH only).

Modifies the datasource class hierarchy to avoid reuse of direct file `host_read`s

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Basit Ayantunde (https://github.com/lamarrr)
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16865
---
 cpp/include/cudf/io/datasource.hpp  |  22 +++-
 cpp/src/io/functions.cpp            |  14 ++-
 cpp/src/io/utilities/datasource.cpp | 157 +++++++++++++++++-----------
 cpp/tests/io/csv_test.cpp           |  35 +++++++
 4 files changed, 158 insertions(+), 70 deletions(-)

diff --git a/cpp/include/cudf/io/datasource.hpp b/cpp/include/cudf/io/datasource.hpp
index b12fbe39a57..dc14802adc1 100644
--- a/cpp/include/cudf/io/datasource.hpp
+++ b/cpp/include/cudf/io/datasource.hpp
@@ -86,14 +86,28 @@ class datasource {
   /**
    * @brief Creates a source from a file path.
    *
+   * @note Parameters `offset`, `max_size_estimate` and `min_size_estimate` are hints to the
+   * `datasource` implementation about the expected range of the data that will be read. The
+   * implementation may use these hints to optimize the read operation. These parameters are usually
+   * based on the byte range option. In this case, `min_size_estimate` should be no greater than the
+   * byte range to avoid potential issues when reading adjacent ranges. `max_size_estimate` can
+   * include padding after the byte range, to include additional data that may be needed for
+   * processing.
+   *
+   @throws cudf::logic_error if the minimum size estimate is greater than the maximum size estimate
+   *
    * @param[in] filepath Path to the file to use
-   * @param[in] offset Bytes from the start of the file (the default is zero)
-   * @param[in] size Bytes from the offset; use zero for entire file (the default is zero)
+   * @param[in] offset Starting byte offset from which data will be read (the default is zero)
+   * @param[in] max_size_estimate Upper estimate of the data range that will be read (the default is
+   * zero, which means the whole file after `offset`)
+   * @param[in] min_size_estimate Lower estimate of the data range that will be read (the default is
+   * zero, which means the whole file after `offset`)
    * @return Constructed datasource object
    */
   static std::unique_ptr<datasource> create(std::string const& filepath,
-                                            size_t offset = 0,
-                                            size_t size   = 0);
+                                            size_t offset            = 0,
+                                            size_t max_size_estimate = 0,
+                                            size_t min_size_estimate = 0);
 
   /**
    * @brief Creates a source from a host memory buffer.
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index de8eea9e99b..5a060902eb2 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -122,14 +122,16 @@ chunked_parquet_writer_options_builder chunked_parquet_writer_options::builder(
 namespace {
 
 std::vector<std::unique_ptr<cudf::io::datasource>> make_datasources(source_info const& info,
-                                                                    size_t range_offset = 0,
-                                                                    size_t range_size   = 0)
+                                                                    size_t offset            = 0,
+                                                                    size_t max_size_estimate = 0,
+                                                                    size_t min_size_estimate = 0)
 {
   switch (info.type()) {
     case io_type::FILEPATH: {
       auto sources = std::vector<std::unique_ptr<cudf::io::datasource>>();
       for (auto const& filepath : info.filepaths()) {
-        sources.emplace_back(cudf::io::datasource::create(filepath, range_offset, range_size));
+        sources.emplace_back(
+          cudf::io::datasource::create(filepath, offset, max_size_estimate, min_size_estimate));
       }
       return sources;
     }
@@ -211,7 +213,8 @@ table_with_metadata read_json(json_reader_options options,
 
   auto datasources = make_datasources(options.get_source(),
                                       options.get_byte_range_offset(),
-                                      options.get_byte_range_size_with_padding());
+                                      options.get_byte_range_size_with_padding(),
+                                      options.get_byte_range_size());
 
   return json::detail::read_json(datasources, options, stream, mr);
 }
@@ -238,7 +241,8 @@ table_with_metadata read_csv(csv_reader_options options,
 
   auto datasources = make_datasources(options.get_source(),
                                       options.get_byte_range_offset(),
-                                      options.get_byte_range_size_with_padding());
+                                      options.get_byte_range_size_with_padding(),
+                                      options.get_byte_range_size());
 
   CUDF_EXPECTS(datasources.size() == 1, "Only a single source is currently supported.");
 
diff --git a/cpp/src/io/utilities/datasource.cpp b/cpp/src/io/utilities/datasource.cpp
index e4313eba454..0be976b6144 100644
--- a/cpp/src/io/utilities/datasource.cpp
+++ b/cpp/src/io/utilities/datasource.cpp
@@ -32,6 +32,7 @@
 #include <unistd.h>
 
 #include <unordered_map>
+#include <vector>
 
 namespace cudf {
 namespace io {
@@ -54,6 +55,30 @@ class file_source : public datasource {
     }
   }
 
+  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
+  {
+    lseek(_file.desc(), offset, SEEK_SET);
+
+    // Clamp length to available data
+    ssize_t const read_size = std::min(size, _file.size() - offset);
+
+    std::vector<uint8_t> v(read_size);
+    CUDF_EXPECTS(read(_file.desc(), v.data(), read_size) == read_size, "read failed");
+    return buffer::create(std::move(v));
+  }
+
+  size_t host_read(size_t offset, size_t size, uint8_t* dst) override
+  {
+    lseek(_file.desc(), offset, SEEK_SET);
+
+    // Clamp length to available data
+    auto const read_size = std::min(size, _file.size() - offset);
+
+    CUDF_EXPECTS(read(_file.desc(), dst, read_size) == static_cast<ssize_t>(read_size),
+                 "read failed");
+    return read_size;
+  }
+
   ~file_source() override = default;
 
   [[nodiscard]] bool supports_device_read() const override
@@ -138,40 +163,63 @@ class file_source : public datasource {
  */
 class memory_mapped_source : public file_source {
  public:
-  explicit memory_mapped_source(char const* filepath, size_t offset, size_t size)
+  explicit memory_mapped_source(char const* filepath,
+                                size_t offset,
+                                size_t max_size_estimate,
+                                size_t min_size_estimate)
     : file_source(filepath)
   {
     if (_file.size() != 0) {
-      map(_file.desc(), offset, size);
-      register_mmap_buffer();
+      // Memory mapping is not exclusive, so we can include the whole region we expect to read
+      map(_file.desc(), offset, max_size_estimate);
+      // Buffer registration is exclusive (can't overlap with other registered buffers) so we
+      // register the lower estimate; this avoids issues when reading adjacent ranges from the same
+      // file from multiple threads
+      register_mmap_buffer(offset, min_size_estimate);
     }
   }
 
   ~memory_mapped_source() override
   {
     if (_map_addr != nullptr) {
-      munmap(_map_addr, _map_size);
+      unmap();
       unregister_mmap_buffer();
     }
   }
 
   std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
   {
-    CUDF_EXPECTS(offset >= _map_offset, "Requested offset is outside mapping");
+    // Clamp length to available data
+    auto const read_size = std::min(size, +_file.size() - offset);
+
+    // If the requested range is outside of the mapped region, read from the file
+    if (offset < _map_offset or offset + read_size > (_map_offset + _map_size)) {
+      return file_source::host_read(offset, read_size);
+    }
 
-    // Clamp length to available data in the mapped region
-    auto const read_size = std::min(size, _map_size - (offset - _map_offset));
+    // If the requested range is only partially within the registered region, copy to a new
+    // host buffer to make the data safe to copy to the device
+    if (_reg_addr != nullptr and
+        (offset < _reg_offset or offset + read_size > (_reg_offset + _reg_size))) {
+      auto const src = static_cast<uint8_t*>(_map_addr) + (offset - _map_offset);
+
+      return std::make_unique<owning_buffer<std::vector<uint8_t>>>(
+        std::vector<uint8_t>(src, src + read_size));
+    }
 
     return std::make_unique<non_owning_buffer>(
-      static_cast<uint8_t*>(_map_addr) + (offset - _map_offset), read_size);
+      static_cast<uint8_t*>(_map_addr) + offset - _map_offset, read_size);
   }
 
   size_t host_read(size_t offset, size_t size, uint8_t* dst) override
   {
-    CUDF_EXPECTS(offset >= _map_offset, "Requested offset is outside mapping");
+    // Clamp length to available data
+    auto const read_size = std::min(size, +_file.size() - offset);
 
-    // Clamp length to available data in the mapped region
-    auto const read_size = std::min(size, _map_size - (offset - _map_offset));
+    // If the requested range is outside of the mapped region, read from the file
+    if (offset < _map_offset or offset + read_size > (_map_offset + _map_size)) {
+      return file_source::host_read(offset, read_size, dst);
+    }
 
     auto const src = static_cast<uint8_t*>(_map_addr) + (offset - _map_offset);
     std::memcpy(dst, src, read_size);
@@ -184,16 +232,18 @@ class memory_mapped_source : public file_source {
    *
    * Fixes nvbugs/4215160
    */
-  void register_mmap_buffer()
+  void register_mmap_buffer(size_t offset, size_t size)
   {
-    if (_map_addr == nullptr or _map_size == 0 or not pageableMemoryAccessUsesHostPageTables()) {
-      return;
-    }
+    if (_map_addr == nullptr or not pageableMemoryAccessUsesHostPageTables()) { return; }
 
-    auto const result = cudaHostRegister(_map_addr, _map_size, cudaHostRegisterDefault);
-    if (result == cudaSuccess) {
-      _is_map_registered = true;
-    } else {
+    // Registered region must be within the mapped region
+    _reg_offset = std::max(offset, _map_offset);
+    _reg_size   = std::min(size != 0 ? size : _map_size, (_map_offset + _map_size) - _reg_offset);
+
+    _reg_addr         = static_cast<std::byte*>(_map_addr) - _map_offset + _reg_offset;
+    auto const result = cudaHostRegister(_reg_addr, _reg_size, cudaHostRegisterReadOnly);
+    if (result != cudaSuccess) {
+      _reg_addr = nullptr;
       CUDF_LOG_WARN("cudaHostRegister failed with {} ({})",
                     static_cast<int>(result),
                     cudaGetErrorString(result));
@@ -205,10 +255,12 @@ class memory_mapped_source : public file_source {
    */
   void unregister_mmap_buffer()
   {
-    if (not _is_map_registered) { return; }
+    if (_reg_addr == nullptr) { return; }
 
-    auto const result = cudaHostUnregister(_map_addr);
-    if (result != cudaSuccess) {
+    auto const result = cudaHostUnregister(_reg_addr);
+    if (result == cudaSuccess) {
+      _reg_addr = nullptr;
+    } else {
       CUDF_LOG_WARN("cudaHostUnregister failed with {} ({})",
                     static_cast<int>(result),
                     cudaGetErrorString(result));
@@ -226,52 +278,30 @@ class memory_mapped_source : public file_source {
 
     // Size for `mmap()` needs to include the page padding
     _map_size = size + (offset - _map_offset);
+    if (_map_size == 0) { return; }
 
     // Check if accessing a region within already mapped area
     _map_addr = mmap(nullptr, _map_size, PROT_READ, MAP_PRIVATE, fd, _map_offset);
     CUDF_EXPECTS(_map_addr != MAP_FAILED, "Cannot create memory mapping");
   }
 
- private:
-  size_t _map_size        = 0;
-  size_t _map_offset      = 0;
-  void* _map_addr         = nullptr;
-  bool _is_map_registered = false;
-};
-
-/**
- * @brief Implementation class for reading from a file using `read` calls
- *
- * Potentially faster than `memory_mapped_source` when only a small portion of the file is read
- * through the host.
- */
-class direct_read_source : public file_source {
- public:
-  explicit direct_read_source(char const* filepath) : file_source(filepath) {}
-
-  std::unique_ptr<buffer> host_read(size_t offset, size_t size) override
+  void unmap()
   {
-    lseek(_file.desc(), offset, SEEK_SET);
-
-    // Clamp length to available data
-    ssize_t const read_size = std::min(size, _file.size() - offset);
-
-    std::vector<uint8_t> v(read_size);
-    CUDF_EXPECTS(read(_file.desc(), v.data(), read_size) == read_size, "read failed");
-    return buffer::create(std::move(v));
+    if (_map_addr != nullptr) {
+      auto const result = munmap(_map_addr, _map_size);
+      if (result != 0) { CUDF_LOG_WARN("munmap failed with {}", result); }
+      _map_addr = nullptr;
+    }
   }
 
-  size_t host_read(size_t offset, size_t size, uint8_t* dst) override
-  {
-    lseek(_file.desc(), offset, SEEK_SET);
-
-    // Clamp length to available data
-    auto const read_size = std::min(size, _file.size() - offset);
+ private:
+  size_t _map_offset = 0;
+  size_t _map_size   = 0;
+  void* _map_addr    = nullptr;
 
-    CUDF_EXPECTS(read(_file.desc(), dst, read_size) == static_cast<ssize_t>(read_size),
-                 "read failed");
-    return read_size;
-  }
+  size_t _reg_offset = 0;
+  size_t _reg_size   = 0;
+  void* _reg_addr    = nullptr;
 };
 
 /**
@@ -431,16 +461,21 @@ class user_datasource_wrapper : public datasource {
 
 std::unique_ptr<datasource> datasource::create(std::string const& filepath,
                                                size_t offset,
-                                               size_t size)
+                                               size_t max_size_estimate,
+                                               size_t min_size_estimate)
 {
+  CUDF_EXPECTS(max_size_estimate == 0 or min_size_estimate <= max_size_estimate,
+               "Invalid min/max size estimates for datasource creation");
+
 #ifdef CUFILE_FOUND
   if (cufile_integration::is_always_enabled()) {
     // avoid mmap as GDS is expected to be used for most reads
-    return std::make_unique<direct_read_source>(filepath.c_str());
+    return std::make_unique<file_source>(filepath.c_str());
   }
 #endif
   // Use our own memory mapping implementation for direct file reads
-  return std::make_unique<memory_mapped_source>(filepath.c_str(), offset, size);
+  return std::make_unique<memory_mapped_source>(
+    filepath.c_str(), offset, max_size_estimate, min_size_estimate);
 }
 
 std::unique_ptr<datasource> datasource::create(host_buffer const& buffer)
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index dc14824d834..0028dd946e3 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -2516,4 +2516,39 @@ TEST_F(CsvReaderTest, UTF8BOM)
   CUDF_TEST_EXPECT_TABLES_EQUIVALENT(result_view, expected);
 }
 
+void expect_buffers_equal(cudf::io::datasource::buffer* lhs, cudf::io::datasource::buffer* rhs)
+{
+  ASSERT_EQ(lhs->size(), rhs->size());
+  EXPECT_EQ(0, std::memcmp(lhs->data(), rhs->data(), lhs->size()));
+}
+
+TEST_F(CsvReaderTest, OutOfMapBoundsReads)
+{
+  // write a lot of data into a file
+  auto filepath        = temp_env->get_temp_dir() + "OutOfMapBoundsReads.csv";
+  auto const num_rows  = 1 << 20;
+  auto const row       = std::string{"0,1,2,3,4,5,6,7,8,9\n"};
+  auto const file_size = num_rows * row.size();
+  {
+    std::ofstream outfile(filepath, std::ofstream::out);
+    for (size_t i = 0; i < num_rows; ++i) {
+      outfile << row;
+    }
+  }
+
+  // Only memory map the middle of the file
+  auto source         = cudf::io::datasource::create(filepath, file_size / 2, file_size / 4);
+  auto full_source    = cudf::io::datasource::create(filepath);
+  auto const all_data = source->host_read(0, file_size);
+  auto ref_data       = full_source->host_read(0, file_size);
+  expect_buffers_equal(ref_data.get(), all_data.get());
+
+  auto const start_data = source->host_read(file_size / 2, file_size / 2);
+  expect_buffers_equal(full_source->host_read(file_size / 2, file_size / 2).get(),
+                       start_data.get());
+
+  auto const end_data = source->host_read(0, file_size / 2 + 512);
+  expect_buffers_equal(full_source->host_read(0, file_size / 2 + 512).get(), end_data.get());
+}
+
 CUDF_TEST_PROGRAM_MAIN()

From d15bbfdded7181fdc23d33fa5efae181b4af2e2b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 07:45:54 -1000
Subject: [PATCH 04/33] Allow melt(var_name=) to be a falsy label (#16981)

closes https://github.com/rapidsai/cudf/issues/16972

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16981
---
 python/cudf/cudf/core/reshape.py       | 2 +-
 python/cudf/cudf/tests/test_reshape.py | 9 +++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 6e5abb2b82b..3d132c92d54 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -681,7 +681,7 @@ def _tile(A, reps):
     nval = len(value_vars)
     dtype = min_unsigned_type(nval)
 
-    if not var_name:
+    if var_name is None:
         var_name = "variable"
 
     if not value_vars:
diff --git a/python/cudf/cudf/tests/test_reshape.py b/python/cudf/cudf/tests/test_reshape.py
index 4235affd4d1..3adbe1d2a74 100644
--- a/python/cudf/cudf/tests/test_reshape.py
+++ b/python/cudf/cudf/tests/test_reshape.py
@@ -119,6 +119,15 @@ def test_melt_str_scalar_id_var():
     assert_eq(result, expected)
 
 
+def test_melt_falsy_var_name():
+    df = cudf.DataFrame({"A": ["a", "b", "c"], "B": [1, 3, 5], "C": [2, 4, 6]})
+    result = cudf.melt(df, id_vars=["A"], value_vars=["B"], var_name="")
+    expected = pd.melt(
+        df.to_pandas(), id_vars=["A"], value_vars=["B"], var_name=""
+    )
+    assert_eq(result, expected)
+
+
 @pytest.mark.parametrize("num_cols", [1, 2, 10])
 @pytest.mark.parametrize("num_rows", [1, 2, 1000])
 @pytest.mark.parametrize(

From 04c17ded6563f4caaeeb51319672c10587401e33 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Fri, 4 Oct 2024 14:06:23 -0400
Subject: [PATCH 05/33] [FEA]  Migrate nvtext/edit_distance APIs to pylibcudf
 (#16957)

Apart of #15162. This PR migrates `edit_distance.pxd` to pylibcudf

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Yunsong Wang (https://github.com/PointKernel)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/16957
---
 cpp/include/nvtext/edit_distance.hpp          |  2 +-
 .../user_guide/api_docs/pylibcudf/index.rst   |  1 +
 .../pylibcudf/nvtext/edit_distance.rst        |  6 ++
 .../api_docs/pylibcudf/nvtext/index.rst       |  7 +++
 .../cudf/cudf/_lib/nvtext/edit_distance.pyx   | 34 +++-------
 python/pylibcudf/pylibcudf/CMakeLists.txt     |  1 +
 python/pylibcudf/pylibcudf/__init__.pxd       |  2 +
 python/pylibcudf/pylibcudf/__init__.py        |  2 +
 .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt | 22 +++++++
 .../pylibcudf/pylibcudf/nvtext/__init__.pxd   |  7 +++
 python/pylibcudf/pylibcudf/nvtext/__init__.py |  7 +++
 .../pylibcudf/nvtext/edit_distance.pxd        |  8 +++
 .../pylibcudf/nvtext/edit_distance.pyx        | 63 +++++++++++++++++++
 .../tests/test_nvtext_edit_distance.py        | 34 ++++++++++
 14 files changed, 171 insertions(+), 25 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.pxd
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/__init__.py
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py

diff --git a/cpp/include/nvtext/edit_distance.hpp b/cpp/include/nvtext/edit_distance.hpp
index 723ba310a1e..dca590baebf 100644
--- a/cpp/include/nvtext/edit_distance.hpp
+++ b/cpp/include/nvtext/edit_distance.hpp
@@ -57,7 +57,7 @@ namespace CUDF_EXPORT nvtext {
  * @param targets Strings to compute edit distance against `input`
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
- * @return New strings columns of with replaced strings
+ * @return New lists column of edit distance values
  */
 std::unique_ptr<cudf::column> edit_distance(
   cudf::strings_column_view const& input,
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index e21536e2e97..052479d6720 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -49,3 +49,4 @@ This page provides API documentation for pylibcudf.
 
     io/index.rst
     strings/index.rst
+    nvtext/index.rst
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst
new file mode 100644
index 00000000000..abb45e426a8
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/edit_distance.rst
@@ -0,0 +1,6 @@
+=============
+edit_distance
+=============
+
+.. automodule:: pylibcudf.nvtext.edit_distance
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
new file mode 100644
index 00000000000..b5cd5ee42c3
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
@@ -0,0 +1,7 @@
+nvtext
+======
+
+.. toctree::
+    :maxdepth: 1
+
+    edit_distance
diff --git a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
index e3c2273345a..3dd99c42d76 100644
--- a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
+++ b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
@@ -2,37 +2,23 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.nvtext.edit_distance cimport (
-    edit_distance as cpp_edit_distance,
-    edit_distance_matrix as cpp_edit_distance_matrix,
-)
+from pylibcudf cimport nvtext
 
 from cudf._lib.column cimport Column
 
 
 @acquire_spill_lock()
 def edit_distance(Column strings, Column targets):
-    cdef column_view c_strings = strings.view()
-    cdef column_view c_targets = targets.view()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(cpp_edit_distance(c_strings, c_targets))
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.edit_distance.edit_distance(
+        strings.to_pylibcudf(mode="read"),
+        targets.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(result)
 
 
 @acquire_spill_lock()
 def edit_distance_matrix(Column strings):
-    cdef column_view c_strings = strings.view()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(cpp_edit_distance_matrix(c_strings))
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.edit_distance.edit_distance_matrix(
+        strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(result)
diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt
index a7cb66d7b16..1d72eacac12 100644
--- a/python/pylibcudf/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/CMakeLists.txt
@@ -66,3 +66,4 @@ target_link_libraries(pylibcudf_interop PUBLIC nanoarrow)
 add_subdirectory(libcudf)
 add_subdirectory(strings)
 add_subdirectory(io)
+add_subdirectory(nvtext)
diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd
index a384edd456d..b98b37fe0fd 100644
--- a/python/pylibcudf/pylibcudf/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/__init__.pxd
@@ -17,6 +17,7 @@ from . cimport (
     lists,
     merge,
     null_mask,
+    nvtext,
     partitioning,
     quantiles,
     reduce,
@@ -78,4 +79,5 @@ __all__ = [
     "transpose",
     "types",
     "unary",
+    "nvtext",
 ]
diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py
index 2a5365e8fad..304f27be340 100644
--- a/python/pylibcudf/pylibcudf/__init__.py
+++ b/python/pylibcudf/pylibcudf/__init__.py
@@ -28,6 +28,7 @@
     lists,
     merge,
     null_mask,
+    nvtext,
     partitioning,
     quantiles,
     reduce,
@@ -92,4 +93,5 @@
     "transpose",
     "types",
     "unary",
+    "nvtext",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
new file mode 100644
index 00000000000..ebe1fda1f12
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
@@ -0,0 +1,22 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources edit_distance.pyx)
+
+set(linked_libraries cudf::cudf)
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX pylibcudf_nvtext_ ASSOCIATED_TARGETS cudf
+)
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
new file mode 100644
index 00000000000..82f7c425b1d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from . cimport edit_distance
+
+__all__ = [
+    "edit_distance",
+]
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py
new file mode 100644
index 00000000000..986652a241f
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from . import edit_distance
+
+__all__ = [
+    "edit_distance",
+]
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
new file mode 100644
index 00000000000..446b95afabb
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pxd
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+
+
+cpdef Column edit_distance(Column input, Column targets)
+
+cpdef Column edit_distance_matrix(Column input)
diff --git a/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
new file mode 100644
index 00000000000..fc98ccbc50c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/edit_distance.pyx
@@ -0,0 +1,63 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.edit_distance cimport (
+    edit_distance as cpp_edit_distance,
+    edit_distance_matrix as cpp_edit_distance_matrix,
+)
+
+
+cpdef Column edit_distance(Column input, Column targets):
+    """
+    Returns the edit distance between individual strings in two strings columns
+
+    For details, see :cpp:func:`edit_distance`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    targets : Column
+        Strings to compute edit distance against
+
+    Returns
+    -------
+    Column
+        New column of edit distance values
+    """
+    cdef column_view c_strings = input.view()
+    cdef column_view c_targets = targets.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_edit_distance(c_strings, c_targets))
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column edit_distance_matrix(Column input):
+    """
+    Returns the edit distance between all strings in the input strings column
+
+    For details, see :cpp:func:`edit_distance_matrix`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+
+    Returns
+    -------
+    Column
+        New column of edit distance values
+    """
+    cdef column_view c_strings = input.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_edit_distance_matrix(c_strings))
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py
new file mode 100644
index 00000000000..7d93c471cc4
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_edit_distance.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture(scope="module")
+def edit_distance_data():
+    arr1 = ["hallo", "goodbye", "world"]
+    arr2 = ["hello", "", "world"]
+    return pa.array(arr1), pa.array(arr2)
+
+
+def test_edit_distance(edit_distance_data):
+    input_col, targets = edit_distance_data
+    result = plc.nvtext.edit_distance.edit_distance(
+        plc.interop.from_arrow(input_col),
+        plc.interop.from_arrow(targets),
+    )
+    expected = pa.array([1, 7, 0], type=pa.int32())
+    assert_column_eq(result, expected)
+
+
+def test_edit_distance_matrix(edit_distance_data):
+    input_col, _ = edit_distance_data
+    result = plc.nvtext.edit_distance.edit_distance_matrix(
+        plc.interop.from_arrow(input_col)
+    )
+    expected = pa.array(
+        [[0, 7, 4], [7, 0, 6], [4, 6, 0]], type=pa.list_(pa.int32())
+    )
+    assert_column_eq(expected, result)

From efaa0b50c6ffd15c6506847987cb531e5f6ba955 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 08:20:34 -1000
Subject: [PATCH 06/33] Add string.convert.convert_datetime/convert_booleans
 APIs to pylibcudf (#16971)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Also address a review in https://github.com/rapidsai/cudf/pull/16935#discussion_r1783726477

This also modifies some `format` arguments in `convert_datetime.pyx` to accept `str` instead of `bytes` (`const string&`) to align more with Python. Let me know if you prefer to change this back

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16971
---
 python/cudf/cudf/_lib/string_casting.pyx      | 110 +++---------------
 python/cudf_polars/cudf_polars/dsl/expr.py    |   4 +-
 .../strings/convert/convert_booleans.pxd      |   4 +-
 .../strings/convert/convert_datetime.pxd      |   6 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |   2 +-
 .../pylibcudf/strings/convert/__init__.pxd    |   2 +-
 .../pylibcudf/strings/convert/__init__.py     |   2 +-
 .../strings/convert/convert_booleans.pxd      |   9 ++
 .../strings/convert/convert_booleans.pyx      |  91 +++++++++++++++
 .../strings/convert/convert_datetime.pxd      |  11 +-
 .../strings/convert/convert_datetime.pyx      |  82 +++++++++++--
 .../pylibcudf/tests/test_string_convert.py    |   2 +-
 .../tests/test_string_convert_booleans.py     |  26 +++++
 .../tests/test_string_convert_datetime.py     |  46 ++++++++
 .../pylibcudf/tests/test_string_wrap.py       |   5 +-
 15 files changed, 286 insertions(+), 116 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 60a6795a402..55ff38f472d 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -3,9 +3,6 @@
 from cudf._lib.column cimport Column
 
 from cudf._lib.scalar import as_device_scalar
-
-from cudf._lib.scalar cimport DeviceScalar
-
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 
 from libcpp.memory cimport unique_ptr
@@ -14,14 +11,6 @@ from libcpp.utility cimport move
 
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from pylibcudf.libcudf.strings.convert.convert_booleans cimport (
-    from_booleans as cpp_from_booleans,
-    to_booleans as cpp_to_booleans,
-)
-from pylibcudf.libcudf.strings.convert.convert_datetime cimport (
-    is_timestamp as cpp_is_timestamp,
-)
 from pylibcudf.libcudf.strings.convert.convert_floats cimport (
     from_floats as cpp_from_floats,
     to_floats as cpp_to_floats,
@@ -427,77 +416,21 @@ def stoul(Column input_col):
     return string_to_integer(input_col, cudf.dtype("uint64"))
 
 
-def _to_booleans(Column input_col, object string_true="True"):
-    """
-    Converting/Casting input column of type string to boolean column
-
-    Parameters
-    ----------
-    input_col : input column of type string
-    string_true : string that represents True
-
-    Returns
-    -------
-    A Column with string values cast to boolean
-    """
-
-    cdef DeviceScalar str_true = as_device_scalar(string_true)
-    cdef column_view input_column_view = input_col.view()
-    cdef const string_scalar* string_scalar_true = <const string_scalar*>(
-        str_true.get_raw_ptr())
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_to_booleans(
-                input_column_view,
-                string_scalar_true[0]))
-
-    return Column.from_unique_ptr(move(c_result))
-
-
 def to_booleans(Column input_col):
-
-    return _to_booleans(input_col)
-
-
-def _from_booleans(
-        Column input_col,
-        object string_true="True",
-        object string_false="False"):
-    """
-    Converting/Casting input column of type boolean to string column
-
-    Parameters
-    ----------
-    input_col : input column of type boolean
-    string_true : string that represents True
-    string_false : string that represents False
-
-    Returns
-    -------
-    A Column with boolean values cast to string
-    """
-
-    cdef DeviceScalar str_true = as_device_scalar(string_true)
-    cdef DeviceScalar str_false = as_device_scalar(string_false)
-    cdef column_view input_column_view = input_col.view()
-    cdef const string_scalar* string_scalar_true = <const string_scalar*>(
-        str_true.get_raw_ptr())
-    cdef const string_scalar* string_scalar_false = <const string_scalar*>(
-        str_false.get_raw_ptr())
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_from_booleans(
-                input_column_view,
-                string_scalar_true[0],
-                string_scalar_false[0]))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_booleans.to_booleans(
+        input_col.to_pylibcudf(mode="read"),
+        as_device_scalar("True").c_value,
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def from_booleans(Column input_col):
-    return _from_booleans(input_col)
+    plc_column = plc.strings.convert.convert_booleans.from_booleans(
+        input_col.to_pylibcudf(mode="read"),
+        as_device_scalar("True").c_value,
+        as_device_scalar("False").c_value,
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def int2timestamp(
@@ -520,11 +453,10 @@ def int2timestamp(
     A Column with date-time represented in string format
 
     """
-    cdef string c_timestamp_format = format.encode("UTF-8")
     return Column.from_pylibcudf(
         plc.strings.convert.convert_datetime.from_timestamps(
             input_col.to_pylibcudf(mode="read"),
-            c_timestamp_format,
+            format,
             names.to_pylibcudf(mode="read")
         )
     )
@@ -545,12 +477,11 @@ def timestamp2int(Column input_col, dtype, format):
 
     """
     dtype = dtype_to_pylibcudf_type(dtype)
-    cdef string c_timestamp_format = format.encode('UTF-8')
     return Column.from_pylibcudf(
         plc.strings.convert.convert_datetime.to_timestamps(
             input_col.to_pylibcudf(mode="read"),
             dtype,
-            c_timestamp_format
+            format
         )
     )
 
@@ -572,16 +503,11 @@ def istimestamp(Column input_col, str format):
     """
     if input_col.size == 0:
         return cudf.core.column.column_empty(0, dtype=cudf.dtype("bool"))
-    cdef column_view input_column_view = input_col.view()
-    cdef string c_timestamp_format = <string>str(format).encode('UTF-8')
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_is_timestamp(
-                input_column_view,
-                c_timestamp_format))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_datetime.is_timestamp(
+        input_col.to_pylibcudf(mode="read"),
+        format
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def timedelta2int(Column input_col, dtype, format):
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index c401e5a2f17..54476b7fedc 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -914,7 +914,7 @@ def do_evaluate(
             col = self.children[0].evaluate(df, context=context, mapping=mapping)
 
             is_timestamps = plc.strings.convert.convert_datetime.is_timestamp(
-                col.obj, format.encode()
+                col.obj, format
             )
 
             if strict:
@@ -937,7 +937,7 @@ def do_evaluate(
                 )
                 return Column(
                     plc.strings.convert.convert_datetime.to_timestamps(
-                        res.columns()[0], self.dtype, format.encode()
+                        res.columns()[0], self.dtype, format
                     )
                 )
         elif self.name == pl_expr.StringFunction.Replace:
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
index 83a9573baad..e6688cfff81 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_booleans.pxd
@@ -8,10 +8,10 @@ from pylibcudf.libcudf.scalar.scalar cimport string_scalar
 cdef extern from "cudf/strings/convert/convert_booleans.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_booleans(
-        column_view input_col,
+        column_view input,
         string_scalar true_string) except +
 
     cdef unique_ptr[column] from_booleans(
-        column_view input_col,
+        column_view booleans,
         string_scalar true_string,
         string_scalar false_string) except +
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
index fa8975c4df9..fceddd58df0 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_datetime.pxd
@@ -10,14 +10,14 @@ from pylibcudf.libcudf.types cimport data_type
 cdef extern from "cudf/strings/convert/convert_datetime.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_timestamps(
-        column_view input_col,
+        column_view input,
         data_type timestamp_type,
         string format) except +
 
     cdef unique_ptr[column] from_timestamps(
-        column_view input_col,
+        column_view timestamps,
         string format,
-        column_view input_strings_names) except +
+        column_view names) except +
 
     cdef unique_ptr[column] is_timestamp(
         column_view input_col,
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index 175c9b3738e..3febc78dfd2 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources convert_durations.pyx convert_datetime.pyx)
+set(cython_sources convert_booleans.pyx convert_durations.pyx convert_datetime.pyx)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index 05324cb49df..5525bca46d6 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -1,2 +1,2 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . cimport convert_datetime, convert_durations
+from . cimport convert_booleans, convert_datetime, convert_durations
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index d803399d53c..2340ebe9a26 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -1,2 +1,2 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . import convert_datetime, convert_durations
+from . import convert_booleans, convert_datetime, convert_durations
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
new file mode 100644
index 00000000000..312ac3c0ca0
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pxd
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.scalar cimport Scalar
+
+
+cpdef Column to_booleans(Column input, Scalar true_string)
+
+cpdef Column from_booleans(Column booleans, Scalar true_string, Scalar false_string)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
new file mode 100644
index 00000000000..0c10f821ab6
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_booleans.pyx
@@ -0,0 +1,91 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.strings.convert cimport (
+    convert_booleans as cpp_convert_booleans,
+)
+from pylibcudf.scalar cimport Scalar
+
+from cython.operator import dereference
+
+
+cpdef Column to_booleans(Column input, Scalar true_string):
+    """
+    Returns a new bool column by parsing boolean values from the strings
+    in the provided strings column.
+
+    For details, see :cpp:func:`cudf::strings::to_booleans`.
+
+    Parameters
+    ----------
+    input :  Column
+        Strings instance for this operation
+
+    true_string : Scalar
+        String to expect for true. Non-matching strings are false
+
+    Returns
+    -------
+    Column
+        New bool column converted from strings.
+    """
+    cdef unique_ptr[column] c_result
+    cdef const string_scalar* c_true_string = <const string_scalar*>(
+        true_string.c_obj.get()
+    )
+
+    with nogil:
+        c_result = move(
+            cpp_convert_booleans.to_booleans(
+                input.view(),
+                dereference(c_true_string)
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column from_booleans(Column booleans, Scalar true_string, Scalar false_string):
+    """
+    Returns a new strings column converting the boolean values from the
+    provided column into strings.
+
+    For details, see :cpp:func:`cudf::strings::from_booleans`.
+
+    Parameters
+    ----------
+    booleans :  Column
+        Boolean column to convert.
+
+    true_string : Scalar
+        String to use for true in the output column.
+
+    false_string : Scalar
+        String to use for false in the output column.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+    cdef const string_scalar* c_true_string = <const string_scalar*>(
+        true_string.c_obj.get()
+    )
+    cdef const string_scalar* c_false_string = <const string_scalar*>(
+        false_string.c_obj.get()
+    )
+
+    with nogil:
+        c_result = move(
+            cpp_convert_booleans.from_booleans(
+                booleans.view(),
+                dereference(c_true_string),
+                dereference(c_false_string),
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
index 07c84d263d6..80ec168644b 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pxd
@@ -8,11 +8,16 @@ from pylibcudf.types cimport DataType
 cpdef Column to_timestamps(
     Column input,
     DataType timestamp_type,
-    const string& format
+    str format
 )
 
 cpdef Column from_timestamps(
-    Column input,
-    const string& format,
+    Column timestamps,
+    str format,
     Column input_strings_names
 )
+
+cpdef Column is_timestamp(
+    Column input,
+    str format,
+)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
index fcacb096f87..0ee60812e00 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_datetime.pyx
@@ -15,28 +15,74 @@ from pylibcudf.types import DataType
 cpdef Column to_timestamps(
     Column input,
     DataType timestamp_type,
-    const string& format
+    str format
 ):
+    """
+    Returns a new timestamp column converting a strings column into
+    timestamps using the provided format pattern.
+
+    For details, see cpp:`cudf::strings::to_timestamps`.
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    timestamp_type : DataType
+        The timestamp type used for creating the output column.
+
+    format : str
+        String specifying the timestamp format in strings.
+
+    Returns
+    -------
+    Column
+        New datetime column
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
     with nogil:
         c_result = cpp_convert_datetime.to_timestamps(
             input.view(),
             timestamp_type.c_obj,
-            format
+            c_format
         )
 
     return Column.from_libcudf(move(c_result))
 
 cpdef Column from_timestamps(
-    Column input,
-    const string& format,
+    Column timestamps,
+    str format,
     Column input_strings_names
 ):
+    """
+    Returns a new strings column converting a timestamp column into
+    strings using the provided format pattern.
+
+    For details, see cpp:`cudf::strings::from_timestamps`.
+
+    Parameters
+    ----------
+    timestamps : Column
+        Timestamp values to convert
+
+    format : str
+        The string specifying output format.
+
+    input_strings_names : Column
+        The string names to use for weekdays ("%a", "%A") and months ("%b", "%B").
+
+    Returns
+    -------
+    Column
+        New strings column with formatted timestamps.
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
     with nogil:
         c_result = cpp_convert_datetime.from_timestamps(
-            input.view(),
-            format,
+            timestamps.view(),
+            c_format,
             input_strings_names.view()
         )
 
@@ -44,13 +90,33 @@ cpdef Column from_timestamps(
 
 cpdef Column is_timestamp(
     Column input,
-    const string& format
+    str format
 ):
+    """
+    Verifies the given strings column can be parsed to timestamps
+    using the provided format pattern.
+
+    For details, see cpp:`cudf::strings::is_timestamp`.
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    format : str
+        String specifying the timestamp format in strings.
+
+    Returns
+    -------
+    Column
+        New bool column.
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
     with nogil:
         c_result = cpp_convert_datetime.is_timestamp(
             input.view(),
-            format
+            c_format
         )
 
     return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert.py b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
index e9e95459d0e..22bb4971cb1 100644
--- a/python/pylibcudf/pylibcudf/tests/test_string_convert.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
@@ -62,7 +62,7 @@ def test_to_datetime(
     got = plc.strings.convert.convert_datetime.to_timestamps(
         plc_timestamp_col,
         plc.interop.from_arrow(timestamp_type),
-        format.encode(),
+        format,
     )
     assert_column_eq(expect, got)
 
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py
new file mode 100644
index 00000000000..117c59ff1b8
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_booleans.py
@@ -0,0 +1,26 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_to_booleans():
+    pa_array = pa.array(["true", None, "True"])
+    result = plc.strings.convert.convert_booleans.to_booleans(
+        plc.interop.from_arrow(pa_array),
+        plc.interop.from_arrow(pa.scalar("True")),
+    )
+    expected = pa.array([False, None, True])
+    assert_column_eq(result, expected)
+
+
+def test_from_booleans():
+    pa_array = pa.array([True, None, False])
+    result = plc.strings.convert.convert_booleans.from_booleans(
+        plc.interop.from_arrow(pa_array),
+        plc.interop.from_arrow(pa.scalar("A")),
+        plc.interop.from_arrow(pa.scalar("B")),
+    )
+    expected = pa.array(["A", None, "B"])
+    assert_column_eq(result, expected)
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py
new file mode 100644
index 00000000000..f3e84286a36
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_datetime.py
@@ -0,0 +1,46 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import datetime
+
+import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture
+def fmt():
+    return "%Y-%m-%dT%H:%M:%S"
+
+
+def test_to_timestamp(fmt):
+    arr = pa.array(["2020-01-01T01:01:01", None])
+    result = plc.strings.convert.convert_datetime.to_timestamps(
+        plc.interop.from_arrow(arr),
+        plc.DataType(plc.TypeId.TIMESTAMP_SECONDS),
+        fmt,
+    )
+    expected = pc.strptime(arr, fmt, "s")
+    assert_column_eq(result, expected)
+
+
+def test_from_timestamp(fmt):
+    arr = pa.array([datetime.datetime(2020, 1, 1, 1, 1, 1), None])
+    result = plc.strings.convert.convert_datetime.from_timestamps(
+        plc.interop.from_arrow(arr),
+        fmt,
+        plc.interop.from_arrow(pa.array([], type=pa.string())),
+    )
+    # pc.strftime will add the extra %f
+    expected = pa.array(["2020-01-01T01:01:01", None])
+    assert_column_eq(result, expected)
+
+
+def test_is_timestamp(fmt):
+    arr = pa.array(["2020-01-01T01:01:01", None, "2020-01-01"])
+    result = plc.strings.convert.convert_datetime.is_timestamp(
+        plc.interop.from_arrow(arr),
+        fmt,
+    )
+    expected = pa.array([True, None, False])
+    assert_column_eq(result, expected)
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
index 85abd3a2bae..a1c820cd586 100644
--- a/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_wrap.py
@@ -7,6 +7,7 @@
 
 
 def test_wrap():
+    width = 12
     pa_array = pa.array(
         [
             "the quick brown fox jumped over the lazy brown dog",
@@ -14,10 +15,10 @@ def test_wrap():
             None,
         ]
     )
-    result = plc.strings.wrap.wrap(plc.interop.from_arrow(pa_array), 12)
+    result = plc.strings.wrap.wrap(plc.interop.from_arrow(pa_array), width)
     expected = pa.array(
         [
-            textwrap.fill(val, 12) if isinstance(val, str) else val
+            textwrap.fill(val, width) if isinstance(val, str) else val
             for val in pa_array.to_pylist()
         ]
     )

From a8da1ff2b393abbafa27dddcf4c19481ec853c28 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Fri, 4 Oct 2024 12:11:31 -0700
Subject: [PATCH 07/33] Deprecate support for directly accessing logger
 (#16964)

This PR removes support for accessing cudf's underlying spdlog logger directly.

Contributes to https://github.com/rapidsai/build-planning/issues/104

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)
  - Yunsong Wang (https://github.com/PointKernel)

URL: https://github.com/rapidsai/cudf/pull/16964
---
 cpp/include/cudf/detail/utilities/logger.hpp | 14 ++++----
 cpp/include/cudf/utilities/logger.hpp        |  8 ++++-
 cpp/src/utilities/logger.cpp                 |  4 ++-
 cpp/tests/utilities_tests/logger_tests.cpp   | 37 ++++++++++----------
 4 files changed, 36 insertions(+), 27 deletions(-)

diff --git a/cpp/include/cudf/detail/utilities/logger.hpp b/cpp/include/cudf/detail/utilities/logger.hpp
index 8c1c3c28df8..e7643eb44bd 100644
--- a/cpp/include/cudf/detail/utilities/logger.hpp
+++ b/cpp/include/cudf/detail/utilities/logger.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,9 +19,9 @@
 #include <cudf/utilities/logger.hpp>
 
 // Log messages that require computation should only be used at level TRACE and DEBUG
-#define CUDF_LOG_TRACE(...)    SPDLOG_LOGGER_TRACE(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_DEBUG(...)    SPDLOG_LOGGER_DEBUG(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_INFO(...)     SPDLOG_LOGGER_INFO(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_WARN(...)     SPDLOG_LOGGER_WARN(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_ERROR(...)    SPDLOG_LOGGER_ERROR(&cudf::logger(), __VA_ARGS__)
-#define CUDF_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&cudf::logger(), __VA_ARGS__)
+#define CUDF_LOG_TRACE(...)    SPDLOG_LOGGER_TRACE(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_DEBUG(...)    SPDLOG_LOGGER_DEBUG(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_INFO(...)     SPDLOG_LOGGER_INFO(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_WARN(...)     SPDLOG_LOGGER_WARN(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_ERROR(...)    SPDLOG_LOGGER_ERROR(&cudf::detail::logger(), __VA_ARGS__)
+#define CUDF_LOG_CRITICAL(...) SPDLOG_LOGGER_CRITICAL(&cudf::detail::logger(), __VA_ARGS__)
diff --git a/cpp/include/cudf/utilities/logger.hpp b/cpp/include/cudf/utilities/logger.hpp
index 45d5d1b12e1..982554a23f5 100644
--- a/cpp/include/cudf/utilities/logger.hpp
+++ b/cpp/include/cudf/utilities/logger.hpp
@@ -22,6 +22,10 @@
 
 namespace CUDF_EXPORT cudf {
 
+namespace detail {
+spdlog::logger& logger();
+}
+
 /**
  * @brief Returns the global logger.
  *
@@ -43,6 +47,8 @@ namespace CUDF_EXPORT cudf {
  *
  * @return spdlog::logger& The logger.
  */
-spdlog::logger& logger();
+[[deprecated(
+  "Support for direct access to spdlog loggers in cudf is planned for removal")]] spdlog::logger&
+logger();
 
 }  // namespace CUDF_EXPORT cudf
diff --git a/cpp/src/utilities/logger.cpp b/cpp/src/utilities/logger.cpp
index d54f5677c4c..e52fffbd8c6 100644
--- a/cpp/src/utilities/logger.cpp
+++ b/cpp/src/utilities/logger.cpp
@@ -74,8 +74,10 @@ struct logger_wrapper {
 
 }  // namespace
 
-spdlog::logger& cudf::logger()
+spdlog::logger& cudf::detail::logger()
 {
   static logger_wrapper wrapped{};
   return wrapped.logger_;
 }
+
+spdlog::logger& cudf::logger() { return cudf::detail::logger(); }
diff --git a/cpp/tests/utilities_tests/logger_tests.cpp b/cpp/tests/utilities_tests/logger_tests.cpp
index d052e20eedb..cfab570833b 100644
--- a/cpp/tests/utilities_tests/logger_tests.cpp
+++ b/cpp/tests/utilities_tests/logger_tests.cpp
@@ -28,16 +28,17 @@ class LoggerTest : public cudf::test::BaseFixture {
   std::vector<spdlog::sink_ptr> prev_sinks;
 
  public:
-  LoggerTest() : prev_level{cudf::logger().level()}, prev_sinks{cudf::logger().sinks()}
+  LoggerTest()
+    : prev_level{cudf::detail::logger().level()}, prev_sinks{cudf::detail::logger().sinks()}
   {
-    cudf::logger().sinks() = {std::make_shared<spdlog::sinks::ostream_sink_mt>(oss)};
-    cudf::logger().set_formatter(
+    cudf::detail::logger().sinks() = {std::make_shared<spdlog::sinks::ostream_sink_mt>(oss)};
+    cudf::detail::logger().set_formatter(
       std::unique_ptr<spdlog::formatter>(new spdlog::pattern_formatter("%v")));
   }
   ~LoggerTest() override
   {
-    cudf::logger().set_level(prev_level);
-    cudf::logger().sinks() = prev_sinks;
+    cudf::detail::logger().set_level(prev_level);
+    cudf::detail::logger().sinks() = prev_sinks;
   }
 
   void clear_sink() { oss.str(""); }
@@ -46,32 +47,32 @@ class LoggerTest : public cudf::test::BaseFixture {
 
 TEST_F(LoggerTest, Basic)
 {
-  cudf::logger().critical("crit msg");
+  cudf::detail::logger().critical("crit msg");
   ASSERT_EQ(this->sink_content(), "crit msg\n");
 }
 
 TEST_F(LoggerTest, DefaultLevel)
 {
-  cudf::logger().trace("trace");
-  cudf::logger().debug("debug");
-  cudf::logger().info("info");
-  cudf::logger().warn("warn");
-  cudf::logger().error("error");
-  cudf::logger().critical("critical");
+  cudf::detail::logger().trace("trace");
+  cudf::detail::logger().debug("debug");
+  cudf::detail::logger().info("info");
+  cudf::detail::logger().warn("warn");
+  cudf::detail::logger().error("error");
+  cudf::detail::logger().critical("critical");
   ASSERT_EQ(this->sink_content(), "warn\nerror\ncritical\n");
 }
 
 TEST_F(LoggerTest, CustomLevel)
 {
-  cudf::logger().set_level(spdlog::level::warn);
-  cudf::logger().info("info");
-  cudf::logger().warn("warn");
+  cudf::detail::logger().set_level(spdlog::level::warn);
+  cudf::detail::logger().info("info");
+  cudf::detail::logger().warn("warn");
   ASSERT_EQ(this->sink_content(), "warn\n");
 
   this->clear_sink();
 
-  cudf::logger().set_level(spdlog::level::debug);
-  cudf::logger().trace("trace");
-  cudf::logger().debug("debug");
+  cudf::detail::logger().set_level(spdlog::level::debug);
+  cudf::detail::logger().trace("trace");
+  cudf::detail::logger().debug("debug");
   ASSERT_EQ(this->sink_content(), "debug\n");
 }

From 119aa9d9c5cffc2de460f52f11fb4a78f8b51dce Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 12:08:26 -1000
Subject: [PATCH 08/33] Add string.convert.convert_fixed_type APIs to pylibcudf
 (#16984)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16984
---
 .../strings/convert/convert_fixed_point.pyx   |  69 +++--------
 .../strings/convert/convert_fixed_point.pxd   |   8 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |   4 +-
 .../pylibcudf/strings/convert/__init__.pxd    |   7 +-
 .../pylibcudf/strings/convert/__init__.py     |   7 +-
 .../strings/convert/convert_fixed_point.pxd   |  11 ++
 .../strings/convert/convert_fixed_point.pyx   | 107 ++++++++++++++++++
 .../tests/test_string_convert_fixed_point.py  |  34 ++++++
 8 files changed, 188 insertions(+), 59 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py

diff --git a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
index a8df8c9a92c..96dcd021c3b 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_fixed_point.pyx
@@ -1,22 +1,11 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
-import cudf
-
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
 from cudf.core.buffer import acquire_spill_lock
 
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.strings.convert.convert_fixed_point cimport (
-    from_fixed_point as cpp_from_fixed_point,
-    is_fixed_point as cpp_is_fixed_point,
-    to_fixed_point as cpp_to_fixed_point,
-)
-from pylibcudf.libcudf.types cimport data_type, type_id
-
 from cudf._lib.column cimport Column
+from cudf._lib.types cimport dtype_to_pylibcudf_type
+
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
@@ -32,14 +21,10 @@ def from_decimal(Column input_col):
     -------
     A column of strings representing the input decimal values.
     """
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_from_fixed_point(
-                input_column_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_fixed_point.from_fixed_point(
+        input_col.to_pylibcudf(mode="read"),
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 @acquire_spill_lock()
@@ -57,25 +42,11 @@ def to_decimal(Column input_col, object out_type):
     -------
     A column of decimals parsed from the string values.
     """
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    cdef int scale = out_type.scale
-    cdef data_type c_out_type
-    if isinstance(out_type, cudf.Decimal32Dtype):
-        c_out_type = data_type(type_id.DECIMAL32, -scale)
-    elif isinstance(out_type, cudf.Decimal64Dtype):
-        c_out_type = data_type(type_id.DECIMAL64, -scale)
-    elif isinstance(out_type, cudf.Decimal128Dtype):
-        c_out_type = data_type(type_id.DECIMAL128, -scale)
-    else:
-        raise TypeError("should be a decimal dtype")
-    with nogil:
-        c_result = move(
-            cpp_to_fixed_point(
-                input_column_view,
-                c_out_type))
-
-    result = Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_fixed_point.to_fixed_point(
+        input_col.to_pylibcudf(mode="read"),
+        dtype_to_pylibcudf_type(out_type),
+    )
+    result = Column.from_pylibcudf(plc_column)
     result.dtype.precision = out_type.precision
     return result
 
@@ -98,14 +69,8 @@ def is_fixed_point(Column input_col, object dtype):
     -------
     A Column of booleans indicating valid decimal conversion.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = input_col.view()
-    cdef int scale = dtype.scale
-    cdef data_type c_dtype = data_type(type_id.DECIMAL64, -scale)
-    with nogil:
-        c_result = move(cpp_is_fixed_point(
-            source_view,
-            c_dtype
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_fixed_point.is_fixed_point(
+        input_col.to_pylibcudf(mode="read"),
+        dtype_to_pylibcudf_type(dtype),
+    )
+    return Column.from_pylibcudf(plc_column)
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
index 6f820f3c9a4..72ab329f2dd 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_fixed_point.pxd
@@ -9,13 +9,13 @@ from pylibcudf.libcudf.types cimport data_type
 cdef extern from "cudf/strings/convert/convert_fixed_point.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_fixed_point(
-        column_view input_col,
+        column_view input,
         data_type output_type) except +
 
     cdef unique_ptr[column] from_fixed_point(
-        column_view input_col) except +
+        column_view input) except +
 
     cdef unique_ptr[column] is_fixed_point(
-        column_view source_strings,
-        data_type output_type
+        column_view input,
+        data_type decimal_type
     ) except +
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index 3febc78dfd2..fe8da975566 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -12,7 +12,9 @@
 # the License.
 # =============================================================================
 
-set(cython_sources convert_booleans.pyx convert_durations.pyx convert_datetime.pyx)
+set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
+                   convert_fixed_point.pyx
+)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index 5525bca46d6..36abf463371 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -1,2 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . cimport convert_booleans, convert_datetime, convert_durations
+from . cimport (
+    convert_booleans,
+    convert_datetime,
+    convert_durations,
+    convert_fixed_point,
+)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index 2340ebe9a26..c0be4093836 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -1,2 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-from . import convert_booleans, convert_datetime, convert_durations
+from . import (
+    convert_booleans,
+    convert_datetime,
+    convert_durations,
+    convert_fixed_point,
+)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
new file mode 100644
index 00000000000..049b9b3fffe
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pxd
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.types cimport DataType
+
+
+cpdef Column to_fixed_point(Column input, DataType output_type)
+
+cpdef Column from_fixed_point(Column input)
+
+cpdef Column is_fixed_point(Column input, DataType decimal_type=*)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
new file mode 100644
index 00000000000..40dadf6f967
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_fixed_point.pyx
@@ -0,0 +1,107 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings.convert cimport (
+    convert_fixed_point as cpp_fixed_point,
+)
+from pylibcudf.types cimport DataType, type_id
+
+
+cpdef Column to_fixed_point(Column input, DataType output_type):
+    """
+    Returns a new fixed-point column parsing decimal values from the
+    provided strings column.
+
+    For details, see :cpp:details:`cudf::strings::to_fixed_point`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    output_type : DataType
+        Type of fixed-point column to return including the scale value.
+
+    Returns
+    -------
+    Column
+        New column of output_type.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_fixed_point.to_fixed_point(
+                input.view(),
+                output_type.c_obj,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column from_fixed_point(Column input):
+    """
+    Returns a new strings column converting the fixed-point values
+    into a strings column.
+
+    For details, see :cpp:details:`cudf::strings::from_fixed_point`
+
+    Parameters
+    ----------
+    input : Column
+        Fixed-point column to convert.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_fixed_point.from_fixed_point(
+                input.view(),
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column is_fixed_point(Column input, DataType decimal_type=None):
+    """
+    Returns a boolean column identifying strings in which all
+    characters are valid for conversion to fixed-point.
+
+    For details, see :cpp:details:`cudf::strings::is_fixed_point`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    decimal_type : DataType
+        Fixed-point type (with scale) used only for checking overflow.
+        Defaults to Decimal64
+
+    Returns
+    -------
+    Column
+        New column of boolean results for each string.
+    """
+    cdef unique_ptr[column] c_result
+
+    if decimal_type is None:
+        decimal_type = DataType(type_id.DECIMAL64)
+
+    with nogil:
+        c_result = move(
+            cpp_fixed_point.is_fixed_point(
+                input.view(),
+                decimal_type.c_obj,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py
new file mode 100644
index 00000000000..b1c4d729604
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_fixed_point.py
@@ -0,0 +1,34 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import decimal
+
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_to_fixed_point():
+    typ = pa.decimal128(38, 2)
+    arr = pa.array(["123", "1.23", None])
+    result = plc.strings.convert.convert_fixed_point.to_fixed_point(
+        plc.interop.from_arrow(arr), plc.interop.from_arrow(typ)
+    )
+    expected = arr.cast(typ)
+    assert_column_eq(result, expected)
+
+
+def test_from_fixed_point():
+    arr = pa.array([decimal.Decimal("1.1"), None])
+    result = plc.strings.convert.convert_fixed_point.from_fixed_point(
+        plc.interop.from_arrow(arr),
+    )
+    expected = pa.array(["1.1", None])
+    assert_column_eq(result, expected)
+
+
+def test_is_fixed_point():
+    arr = pa.array(["123", "1.23", "1.2.3", "", None])
+    result = plc.strings.convert.convert_fixed_point.is_fixed_point(
+        plc.interop.from_arrow(arr),
+    )
+    expected = pa.array([True, True, False, False, None])
+    assert_column_eq(result, expected)

From 77f3a5d3229ed1b3186fe9f4d5b5b04d124c6a4d Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 12:33:44 -1000
Subject: [PATCH 09/33] Add docstrings and test for strings.convert_durations
 APIs for pylibcudf (#16982)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Since the implementation already existed:

* Added docstrings
* Like https://github.com/rapidsai/cudf/pull/16971, made the `format` parameter accept `str` instead
* Aligned parameter names closer to pylibcudf
* Added missing `move`s
* Moved `convert_duration` tests to `test_string_convert_duration.py` and added a new test for `from_durations`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16982
---
 python/cudf/cudf/_lib/string_casting.pyx      |  7 +-
 .../strings/convert/convert_durations.pxd     |  2 +-
 .../strings/convert/convert_durations.pxd     |  6 +-
 .../strings/convert/convert_durations.pyx     | 73 ++++++++++++++++---
 .../pylibcudf/tests/test_string_convert.py    | 43 -----------
 .../tests/test_string_convert_durations.py    | 61 ++++++++++++++++
 6 files changed, 130 insertions(+), 62 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 55ff38f472d..fe19379bf93 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -525,12 +525,11 @@ def timedelta2int(Column input_col, dtype, format):
 
     """
     dtype = dtype_to_pylibcudf_type(dtype)
-    cdef string c_timestamp_format = format.encode('UTF-8')
     return Column.from_pylibcudf(
         plc.strings.convert.convert_durations.to_durations(
             input_col.to_pylibcudf(mode="read"),
             dtype,
-            c_timestamp_format
+            format
         )
     )
 
@@ -549,12 +548,10 @@ def int2timedelta(Column input_col, str format):
     A Column with Timedelta represented in string format
 
     """
-
-    cdef string c_duration_format = format.encode('UTF-8')
     return Column.from_pylibcudf(
         plc.strings.convert.convert_durations.from_durations(
             input_col.to_pylibcudf(mode="read"),
-            c_duration_format
+            format
         )
     )
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
index ebe10574353..43ffad1d89f 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_durations.pxd
@@ -10,7 +10,7 @@ from pylibcudf.libcudf.types cimport data_type
 cdef extern from "cudf/strings/convert/convert_durations.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_durations(
-        const column_view & strings_col,
+        const column_view & input,
         data_type duration_type,
         const string & format) except +
 
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
index ac11b8959ed..eecdade4ef9 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pxd
@@ -8,10 +8,10 @@ from pylibcudf.types cimport DataType
 cpdef Column to_durations(
     Column input,
     DataType duration_type,
-    const string& format
+    str format
 )
 
 cpdef Column from_durations(
-    Column input,
-    const string& format
+    Column durations,
+    str format=*
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
index f3e0b7c9c8e..76c5809c3d5 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_durations.pyx
@@ -15,27 +15,80 @@ from pylibcudf.types import DataType
 cpdef Column to_durations(
     Column input,
     DataType duration_type,
-    const string& format
+    str format
 ):
+    """
+    Returns a new duration column converting a strings column into
+    durations using the provided format pattern.
+
+    For details, see cpp:func:`cudf::strings::to_durations`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    duration_type : DataType
+        The duration type used for creating the output column.
+
+    format : str
+        String specifying the duration format in strings.
+
+    Returns
+    -------
+    Column
+        New duration column.
+    """
     cdef unique_ptr[column] c_result
+    cdef string c_format = format.encode()
+
     with nogil:
-        c_result = cpp_convert_durations.to_durations(
-            input.view(),
-            duration_type.c_obj,
-            format
+        c_result = move(
+            cpp_convert_durations.to_durations(
+                input.view(),
+                duration_type.c_obj,
+                c_format
+            )
         )
 
     return Column.from_libcudf(move(c_result))
 
 cpdef Column from_durations(
-    Column input,
-    const string& format
+    Column durations,
+    str format=None
 ):
+    """
+    Returns a new strings column converting a duration column into
+    strings using the provided format pattern.
+
+    For details, see cpp:func:`cudf::strings::from_durations`
+
+    Parameters
+    ----------
+    durations : Column
+        Duration values to convert.
+
+    format : str
+        The string specifying output format.
+        Default format is "%D days %H:%M:%S".
+
+    Returns
+    -------
+    Column
+        New strings column with formatted durations.
+    """
     cdef unique_ptr[column] c_result
+
+    if format is None:
+        format = "%D days %H:%M:%S"
+    cdef string c_format = format.encode()
+
     with nogil:
-        c_result = cpp_convert_durations.from_durations(
-            input.view(),
-            format
+        c_result = move(
+            cpp_convert_durations.from_durations(
+                durations.view(),
+                c_format
+            )
         )
 
     return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert.py b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
index 22bb4971cb1..69f7a0fdd33 100644
--- a/python/pylibcudf/pylibcudf/tests/test_string_convert.py
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert.py
@@ -1,7 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from datetime import datetime
-
 import pyarrow as pa
 import pylibcudf as plc
 import pytest
@@ -21,39 +19,16 @@ def timestamp_type(request):
     return request.param
 
 
-@pytest.fixture(
-    scope="module",
-    params=[
-        pa.duration("ns"),
-        pa.duration("us"),
-        pa.duration("ms"),
-        pa.duration("s"),
-    ],
-)
-def duration_type(request):
-    return request.param
-
-
 @pytest.fixture(scope="module")
 def pa_timestamp_col():
     return pa.array(["2011-01-01", "2011-01-02", "2011-01-03"])
 
 
-@pytest.fixture(scope="module")
-def pa_duration_col():
-    return pa.array(["05:20:25"])
-
-
 @pytest.fixture(scope="module")
 def plc_timestamp_col(pa_timestamp_col):
     return plc.interop.from_arrow(pa_timestamp_col)
 
 
-@pytest.fixture(scope="module")
-def plc_duration_col(pa_duration_col):
-    return plc.interop.from_arrow(pa_duration_col)
-
-
 @pytest.mark.parametrize("format", ["%Y-%m-%d"])
 def test_to_datetime(
     pa_timestamp_col, plc_timestamp_col, timestamp_type, format
@@ -65,21 +40,3 @@ def test_to_datetime(
         format,
     )
     assert_column_eq(expect, got)
-
-
-@pytest.mark.parametrize("format", ["%H:%M:%S"])
-def test_to_duration(pa_duration_col, plc_duration_col, duration_type, format):
-    def to_timedelta(duration_str):
-        date = datetime.strptime(duration_str, format)
-        return date - datetime(1900, 1, 1)  # "%H:%M:%S" zero date
-
-    expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast(
-        duration_type
-    )
-
-    got = plc.strings.convert.convert_durations.to_durations(
-        plc_duration_col,
-        plc.interop.from_arrow(duration_type),
-        format.encode(),
-    )
-    assert_column_eq(expect, got)
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py
new file mode 100644
index 00000000000..6d704309bfd
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_durations.py
@@ -0,0 +1,61 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from datetime import datetime, timedelta
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture(
+    params=[
+        pa.duration("ns"),
+        pa.duration("us"),
+        pa.duration("ms"),
+        pa.duration("s"),
+    ],
+)
+def duration_type(request):
+    return request.param
+
+
+@pytest.fixture(scope="module")
+def pa_duration_col():
+    return pa.array(["05:20:25"])
+
+
+@pytest.fixture(scope="module")
+def plc_duration_col(pa_duration_col):
+    return plc.interop.from_arrow(pa_duration_col)
+
+
+def test_to_duration(pa_duration_col, plc_duration_col, duration_type):
+    format = "%H:%M:%S"
+
+    def to_timedelta(duration_str):
+        date = datetime.strptime(duration_str, format)
+        return date - datetime(1900, 1, 1)  # "%H:%M:%S" zero date
+
+    expect = pa.array([to_timedelta(d.as_py()) for d in pa_duration_col]).cast(
+        duration_type
+    )
+
+    got = plc.strings.convert.convert_durations.to_durations(
+        plc_duration_col,
+        plc.interop.from_arrow(duration_type),
+        format,
+    )
+    assert_column_eq(expect, got)
+
+
+@pytest.mark.parametrize("format", [None, "%D days %H:%M:%S"])
+def test_from_durations(format):
+    pa_array = pa.array(
+        [timedelta(days=1, hours=1, minutes=1, seconds=1), None]
+    )
+    result = plc.strings.convert.convert_durations.from_durations(
+        plc.interop.from_arrow(pa_array), format
+    )
+    expected = pa.array(["1 days 01:01:01", None])
+    assert_column_eq(result, expected)

From c958d8e88d8c0cb149b1442ab91705853167a609 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 4 Oct 2024 18:45:30 -0500
Subject: [PATCH 10/33] Upgrade pandas pinnings to support `2.2.3` (#16882)

Pandas released a newer version `2.2.3` with very minimal fixes but one that adds support for python-3.13 and numpy 2.1 compatibility. This PR updates pinnings in `cudf` to support `2.2.3`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/16882
---
 ci/test_python_cudf.sh                           | 2 +-
 conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +-
 conda/environments/all_cuda-125_arch-x86_64.yaml | 2 +-
 conda/recipes/cudf/meta.yaml                     | 2 +-
 conda/recipes/pylibcudf/meta.yaml                | 2 +-
 dependencies.yaml                                | 6 +++++-
 python/cudf/cudf/core/_compat.py                 | 2 +-
 python/cudf/pyproject.toml                       | 2 +-
 python/dask_cudf/pyproject.toml                  | 2 +-
 9 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/ci/test_python_cudf.sh b/ci/test_python_cudf.sh
index 2386414b32e..9528549a562 100755
--- a/ci/test_python_cudf.sh
+++ b/ci/test_python_cudf.sh
@@ -9,7 +9,7 @@ source ./ci/test_python_common.sh test_python_cudf
 
 rapids-logger "Check GPU usage"
 nvidia-smi
-
+rapids-print-env
 EXITCODE=0
 trap "EXITCODE=1" ERR
 set +e
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index 8b45d26c367..bd5e6c3d569 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -63,7 +63,7 @@ dependencies:
 - openpyxl
 - packaging
 - pandas
-- pandas>=2.0,<2.2.3dev0
+- pandas>=2.0,<2.2.4dev0
 - pandoc
 - polars>=1.8,<1.9
 - pre-commit
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index 354c1360e5a..565a3ebfa3c 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -61,7 +61,7 @@ dependencies:
 - openpyxl
 - packaging
 - pandas
-- pandas>=2.0,<2.2.3dev0
+- pandas>=2.0,<2.2.4dev0
 - pandoc
 - polars>=1.8,<1.9
 - pre-commit
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index 25e69b89789..2c254415318 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -78,7 +78,7 @@ requirements:
   run:
     - python
     - typing_extensions >=4.0.0
-    - pandas >=2.0,<2.2.3dev0
+    - pandas >=2.0,<2.2.4dev0
     - cupy >=12.0.0
     - numba-cuda >=0.0.13
     - numpy >=1.23,<3.0a0
diff --git a/conda/recipes/pylibcudf/meta.yaml b/conda/recipes/pylibcudf/meta.yaml
index 7c1efa0176c..3d965f30986 100644
--- a/conda/recipes/pylibcudf/meta.yaml
+++ b/conda/recipes/pylibcudf/meta.yaml
@@ -77,7 +77,7 @@ requirements:
   run:
     - python
     - typing_extensions >=4.0.0
-    - pandas >=2.0,<2.2.3dev0
+    - pandas >=2.0,<2.2.4dev0
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<18.0.0a0
     - {{ pin_compatible('rmm', max_pin='x.x') }}
diff --git a/dependencies.yaml b/dependencies.yaml
index b192158c4ea..3561b22965d 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -602,7 +602,7 @@ dependencies:
         packages:
           - fsspec>=0.6.0
           - &numpy numpy>=1.23,<3.0a0
-          - pandas>=2.0,<2.2.3dev0
+          - pandas>=2.0,<2.2.4dev0
   run_pylibcudf:
     common:
       - output_types: [conda, requirements, pyproject]
@@ -748,6 +748,10 @@ dependencies:
             packages:
               - *numba-cuda-dep
               - pandas==2.0.*
+          - matrix: {dependencies: "latest"}
+            packages:
+              - numba-cuda==0.0.15
+              - pandas==2.2.3
           - matrix:
             packages:
       - output_types: conda
diff --git a/python/cudf/cudf/core/_compat.py b/python/cudf/cudf/core/_compat.py
index e2bdecbe67a..871ffc6269d 100644
--- a/python/cudf/cudf/core/_compat.py
+++ b/python/cudf/cudf/core/_compat.py
@@ -3,7 +3,7 @@
 import pandas as pd
 from packaging import version
 
-PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.2.2")
+PANDAS_CURRENT_SUPPORTED_VERSION = version.parse("2.2.3")
 PANDAS_VERSION = version.parse(pd.__version__)
 
 
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 605f9be5a49..1b730ffd13c 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -28,7 +28,7 @@ dependencies = [
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
     "packaging",
-    "pandas>=2.0,<2.2.3dev0",
+    "pandas>=2.0,<2.2.4dev0",
     "ptxcompiler",
     "pyarrow>=14.0.0,<18.0.0a0",
     "pylibcudf==24.12.*,>=0.0.0a0",
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index 76e47b50c3b..ce825c7647b 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -23,7 +23,7 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "numpy>=1.23,<3.0a0",
-    "pandas>=2.0,<2.2.3dev0",
+    "pandas>=2.0,<2.2.4dev0",
     "rapids-dask-dependency==24.12.*,>=0.0.0a0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.
 classifiers = [

From 33b8dfa42ff9a600adfa6d10c7740169a0340338 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 4 Oct 2024 15:30:19 -1000
Subject: [PATCH 11/33] Add string.convert.convert_ipv4 APIs to pylibcudf
 (#16994)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16994
---
 python/cudf/cudf/_lib/string_casting.pyx      | 42 +++------
 .../libcudf/strings/convert/convert_ipv4.pxd  |  6 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |  2 +-
 .../pylibcudf/strings/convert/__init__.pxd    |  1 +
 .../pylibcudf/strings/convert/__init__.py     |  1 +
 .../strings/convert/convert_ipv4.pxd          | 10 ++
 .../strings/convert/convert_ipv4.pyx          | 92 +++++++++++++++++++
 .../tests/test_string_convert_ipv4.py         | 31 +++++++
 8 files changed, 151 insertions(+), 34 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index fe19379bf93..76c862a8657 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -22,11 +22,6 @@ from pylibcudf.libcudf.strings.convert.convert_integers cimport (
     is_hex as cpp_is_hex,
     to_integers as cpp_to_integers,
 )
-from pylibcudf.libcudf.strings.convert.convert_ipv4 cimport (
-    integers_to_ipv4 as cpp_integers_to_ipv4,
-    ipv4_to_integers as cpp_ipv4_to_integers,
-    is_ipv4 as cpp_is_ipv4,
-)
 from pylibcudf.libcudf.types cimport data_type, type_id
 
 from cudf._lib.types cimport underlying_type_t_type_id
@@ -569,14 +564,10 @@ def int2ip(Column input_col):
     A Column with integer represented in string ipv4 format
 
     """
-
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_integers_to_ipv4(input_column_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_ipv4.integers_to_ipv4(
+        input_col.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def ip2int(Column input_col):
@@ -592,14 +583,10 @@ def ip2int(Column input_col):
     A Column with ipv4 represented as integer
 
     """
-
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_ipv4_to_integers(input_column_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_ipv4.ipv4_to_integers(
+        input_col.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def is_ipv4(Column source_strings):
@@ -608,15 +595,10 @@ def is_ipv4(Column source_strings):
     that have strings in IPv4 format. This format is nnn.nnn.nnn.nnn
     where nnn is integer digits in [0,255].
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_is_ipv4(
-            source_view
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_ipv4.is_ipv4(
+        source_strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def htoi(Column input_col, **kwargs):
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
index fe571cfced6..801db438e92 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_ipv4.pxd
@@ -8,11 +8,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 cdef extern from "cudf/strings/convert/convert_ipv4.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] ipv4_to_integers(
-        column_view input_col) except +
+        column_view input) except +
 
     cdef unique_ptr[column] integers_to_ipv4(
-        column_view input_col) except +
+        column_view integers) except +
 
     cdef unique_ptr[column] is_ipv4(
-        column_view source_strings
+        column_view input
     ) except +
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index fe8da975566..eb0d6ee6999 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
-                   convert_fixed_point.pyx
+                   convert_fixed_point.pyx convert_ipv4.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index 36abf463371..431beed8e5d 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -4,4 +4,5 @@ from . cimport (
     convert_datetime,
     convert_durations,
     convert_fixed_point,
+    convert_ipv4,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index c0be4093836..a601b562c2e 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -4,4 +4,5 @@
     convert_datetime,
     convert_durations,
     convert_fixed_point,
+    convert_ipv4,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
new file mode 100644
index 00000000000..c61f5c0bdca
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pxd
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+
+
+cpdef Column ipv4_to_integers(Column input)
+
+cpdef Column integers_to_ipv4(Column integers)
+
+cpdef Column is_ipv4(Column input)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
new file mode 100644
index 00000000000..f2a980d4269
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_ipv4.pyx
@@ -0,0 +1,92 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings.convert cimport convert_ipv4 as cpp_convert_ipv4
+
+
+cpdef Column ipv4_to_integers(Column input):
+    """
+    Converts IPv4 addresses into integers.
+
+    For details, see cpp:func:`cudf::strings::ipv4_to_integers`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation
+
+    Returns
+    -------
+    Column
+        New uint32 column converted from strings.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_ipv4.ipv4_to_integers(
+                input.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column integers_to_ipv4(Column integers):
+    """
+    Converts integers into IPv4 addresses as strings.
+
+    For details, see cpp:func:`cudf::strings::integers_to_ipv4`
+
+    Parameters
+    ----------
+    integers : Column
+        Integer (uint32) column to convert.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_ipv4.integers_to_ipv4(
+                integers.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column is_ipv4(Column input):
+    """
+    Returns a boolean column identifying strings in which all
+    characters are valid for conversion to integers from IPv4 format.
+
+    For details, see cpp:func:`cudf::strings::is_ipv4`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    Returns
+    -------
+    Column
+        New column of boolean results for each string.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_ipv4.is_ipv4(
+                input.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py
new file mode 100644
index 00000000000..4dc3e512624
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_ipv4.py
@@ -0,0 +1,31 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_ipv4_to_integers():
+    arr = pa.array(["123.45.67.890", None])
+    result = plc.strings.convert.convert_ipv4.ipv4_to_integers(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array([2066564730, None], type=pa.uint32())
+    assert_column_eq(result, expected)
+
+
+def test_integers_to_ipv4():
+    arr = pa.array([1, 0, None], type=pa.uint32())
+    result = plc.strings.convert.convert_ipv4.integers_to_ipv4(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array(["0.0.0.1", "0.0.0.0", None])
+    assert_column_eq(result, expected)
+
+
+def test_is_ipv4():
+    arr = pa.array(["0.0.0.1", "1.2.34", "A", None])
+    result = plc.strings.convert.convert_ipv4.is_ipv4(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array([True, False, False, None])
+    assert_column_eq(result, expected)

From fcff2b6ef7d6db62fc064ad10ffc6c873fc85b58 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Sat, 5 Oct 2024 02:52:53 -0500
Subject: [PATCH 12/33] Fix write_json to handle empty string column (#16995)

Add empty string column condition for write_json
bypass make_strings_children for empty column because when grid size is zero, it throws cuda error.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/16995
---
 cpp/src/io/json/write_json.cu     |  3 +++
 cpp/tests/io/json/json_writer.cpp | 37 +++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

diff --git a/cpp/src/io/json/write_json.cu b/cpp/src/io/json/write_json.cu
index dc7199d7ab1..e1241f8f90c 100644
--- a/cpp/src/io/json/write_json.cu
+++ b/cpp/src/io/json/write_json.cu
@@ -170,6 +170,9 @@ struct escape_strings_fn {
                                               rmm::cuda_stream_view stream,
                                               rmm::device_async_resource_ref mr)
   {
+    if (column_v.is_empty()) {  // empty begets empty
+      return make_empty_column(type_id::STRING);
+    }
     auto [offsets_column, chars] =
       cudf::strings::detail::make_strings_children(*this, column_v.size(), stream, mr);
 
diff --git a/cpp/tests/io/json/json_writer.cpp b/cpp/tests/io/json/json_writer.cpp
index 2c4e29a01b9..39d31c406a5 100644
--- a/cpp/tests/io/json/json_writer.cpp
+++ b/cpp/tests/io/json/json_writer.cpp
@@ -70,6 +70,43 @@ TEST_F(JsonWriterTest, EmptyInput)
   EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
 }
 
+TEST_F(JsonWriterTest, EmptyLeaf)
+{
+  cudf::test::strings_column_wrapper col1{""};
+  cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets{0, 0};
+  auto col2 = make_lists_column(1,
+                                offsets.release(),
+                                cudf::test::strings_column_wrapper{}.release(),
+                                0,
+                                rmm::device_buffer{},
+                                cudf::test::get_default_stream());
+  auto col3 = cudf::test::lists_column_wrapper<int>::make_one_empty_row_column();
+  cudf::table_view tbl_view{{col1, *col2, col3}};
+  cudf::io::table_metadata mt{{{"col1"}, {"col2"}, {"col3"}}};
+
+  std::vector<char> out_buffer;
+  auto destination = cudf::io::sink_info(&out_buffer);
+  auto out_options = cudf::io::json_writer_options_builder(destination, tbl_view)
+                       .include_nulls(true)
+                       .metadata(mt)
+                       .lines(false)
+                       .na_rep("null")
+                       .build();
+
+  // Empty columns in table
+  cudf::io::write_json(out_options, cudf::test::get_default_stream());
+  std::string const expected = R"([{"col1":"","col2":[],"col3":[]}])";
+  EXPECT_EQ(expected, std::string(out_buffer.data(), out_buffer.size()));
+
+  // Empty columns in table - JSON Lines
+  out_buffer.clear();
+  out_options.enable_lines(true);
+  cudf::io::write_json(out_options, cudf::test::get_default_stream());
+  std::string const expected_lines = R"({"col1":"","col2":[],"col3":[]})"
+                                     "\n";
+  EXPECT_EQ(expected_lines, std::string(out_buffer.data(), out_buffer.size()));
+}
+
 TEST_F(JsonWriterTest, ErrorCases)
 {
   cudf::test::strings_column_wrapper col1{"a", "b", "c"};

From bfd568b4f5c4dd9799b60a2975c1fd183e9b99aa Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:04:24 -0400
Subject: [PATCH 13/33] Remove unused import (#17005)

This PR removes an unused unused import in cudf which was causing errors in doc builds.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17005
---
 python/cudf/cudf/_lib/string_casting.pyx | 1 -
 1 file changed, 1 deletion(-)

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index 76c862a8657..d9595f4ab0a 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -6,7 +6,6 @@ from cudf._lib.scalar import as_device_scalar
 from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
 
 from libcpp.memory cimport unique_ptr
-from libcpp.string cimport string
 from libcpp.utility cimport move
 
 from pylibcudf.libcudf.column.column cimport column

From f926a61c7d31b7b33c3a3482507e9efb44b2cc36 Mon Sep 17 00:00:00 2001
From: Ben Jarmak <104460670+jarmak-nv@users.noreply.github.com>
Date: Mon, 7 Oct 2024 12:37:55 -0400
Subject: [PATCH 14/33] Add release tracking to project automation scripts
 (#17001)

This PR adds two new jobs to the project automations. One to extract the version number from the branch name, and one to set the project `Release` field to the version found.

Authors:
  - Ben Jarmak (https://github.com/jarmak-nv)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/17001
---
 .../workflows/pr_issue_status_automation.yml  | 30 +++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml
index af8d1289ea1..6f0e88fb245 100644
--- a/.github/workflows/pr_issue_status_automation.yml
+++ b/.github/workflows/pr_issue_status_automation.yml
@@ -62,3 +62,33 @@ jobs:
         UPDATE_ITEM: true
         UPDATE_LINKED_ISSUES: true
       secrets: inherit
+
+    process-branch-name:
+      if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
+      needs: get-project-id
+      runs-on: ubuntu-latest
+      outputs:
+        branch-name: ${{ steps.process-branch-name.outputs.branch-name }}
+      steps:
+        - name: Extract branch name
+          id: process-branch-name
+          run: |
+            branch=${{ github.event.pull_request.base.ref }}
+            release=${branch#branch-}
+            echo "branch-name=$release" >> "$GITHUB_OUTPUT"
+
+    update-release:
+      # This job sets the PR and its linked issues to the release they are targeting
+      uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.12
+      if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
+      needs: [get-project-id, process-branch-name]
+      with:
+        PROJECT_ID: "PVT_kwDOAp2shc4AiNzl"
+        SINGLE_SELECT_FIELD_ID: "PVTSSF_lADOAp2shc4AiNzlzgg52UQ"
+        SINGLE_SELECT_FIELD_NAME: "Release"
+        SINGLE_SELECT_OPTION_VALUE: "${{ needs.process-branch-name.outputs.branch-name }}"
+        ITEM_PROJECT_ID: "${{ needs.get-project-id.outputs.ITEM_PROJECT_ID }}"
+        ITEM_NODE_ID: "${{ github.event.pull_request.node_id }}"
+        UPDATE_ITEM: true
+        UPDATE_LINKED_ISSUES: true
+      secrets: inherit

From 7e1e4757e753253a99df110fd3814d0136289ef2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 7 Oct 2024 12:41:47 -0700
Subject: [PATCH 15/33] Address all remaining clang-tidy errors (#16956)

With this set of changes I get a clean run of clang-tidy (with one caveat that I'll explain in the follow-up PR to add clang-tidy to pre-commit/CI).

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)
  - David Wendt (https://github.com/davidwendt)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/16956
---
 cpp/.clang-tidy                               | 43 ++++++++++++---
 cpp/cmake/thirdparty/get_nanoarrow.cmake      |  8 +--
 .../nanoarrow_clang_tidy_compliance.diff      | 38 ++++++++++++++
 .../patches/nanoarrow_override.json           | 18 +++++++
 cpp/include/cudf/table/table.hpp              |  2 +-
 cpp/include/cudf/table/table_view.hpp         |  2 +-
 cpp/src/io/avro/avro.cpp                      |  3 +-
 cpp/src/io/orc/orc.hpp                        |  2 +-
 .../io/parquet/compact_protocol_reader.cpp    |  6 ++-
 cpp/src/io/utilities/data_sink.cpp            |  6 ++-
 cpp/src/io/utilities/hostdevice_span.hpp      |  8 +--
 cpp/src/utilities/host_memory.cpp             | 13 ++++-
 cpp/tests/binaryop/binop-compiled-test.cpp    |  6 ++-
 cpp/tests/binaryop/util/operation.h           |  4 +-
 cpp/tests/column/column_test.cpp              |  4 +-
 cpp/tests/copying/slice_tests.cpp             | 12 +++--
 cpp/tests/copying/slice_tests.cuh             | 21 ++++----
 cpp/tests/copying/split_tests.cpp             | 52 ++++++++++---------
 .../hashing/murmurhash3_x64_128_test.cpp      |  4 +-
 cpp/tests/hashing/sha256_test.cpp             |  2 -
 cpp/tests/interop/from_arrow_device_test.cpp  | 12 ++---
 cpp/tests/interop/from_arrow_host_test.cpp    |  6 +--
 cpp/tests/interop/from_arrow_test.cpp         |  8 +--
 cpp/tests/interop/to_arrow_device_test.cpp    | 12 ++---
 cpp/tests/interop/to_arrow_host_test.cpp      |  6 +--
 cpp/tests/interop/to_arrow_test.cpp           | 14 ++---
 cpp/tests/io/comp/decomp_test.cpp             | 36 ++++++++-----
 cpp/tests/io/csv_test.cpp                     | 12 ++---
 cpp/tests/io/json/json_test.cpp               |  6 +--
 cpp/tests/io/orc_test.cpp                     | 37 ++++++-------
 cpp/tests/io/parquet_misc_test.cpp            |  2 +-
 cpp/tests/io/parquet_reader_test.cpp          |  7 +--
 cpp/tests/io/parquet_v2_test.cpp              | 36 ++++++-------
 cpp/tests/io/parquet_writer_test.cpp          | 17 +++---
 cpp/tests/join/distinct_join_tests.cpp        | 10 ++--
 cpp/tests/merge/merge_string_test.cpp         |  4 +-
 cpp/tests/merge/merge_test.cpp                |  6 +--
 .../reductions/segmented_reduction_tests.cpp  |  9 ++--
 cpp/tests/replace/replace_tests.cpp           |  4 +-
 cpp/tests/rolling/collect_ops_test.cpp        |  8 +--
 cpp/tests/rolling/offset_row_window_test.cpp  | 12 +++--
 cpp/tests/rolling/rolling_test.cpp            |  2 +-
 cpp/tests/scalar/scalar_test.cpp              |  6 +--
 cpp/tests/search/search_list_test.cpp         |  3 +-
 cpp/tests/sort/sort_test.cpp                  |  2 +-
 cpp/tests/stream_compaction/unique_tests.cpp  |  1 -
 cpp/tests/streams/stream_compaction_test.cpp  |  2 -
 cpp/tests/strings/integers_tests.cpp          |  3 +-
 cpp/tests/structs/structs_column_tests.cpp    |  5 +-
 cpp/tests/transform/bools_to_mask_test.cpp    |  2 +-
 .../integration/unary_transform_test.cpp      | 28 +++++-----
 51 files changed, 344 insertions(+), 228 deletions(-)
 create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
 create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_override.json

diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index b791d846d1d..2d4f8c0d80e 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -1,18 +1,47 @@
 ---
+# Notes on disabled checks
+# ------------------------
+# modernize-use-equals-default:
+#     auto-fix is broken (doesn't insert =default correctly)
+# modernize-concat-nested-namespaces:
+#     auto-fix is broken (can delete code)
+# modernize-use-trailing-return-type:
+#     Purely stylistic, no benefit to rewriting everything
+# modernize-return-braced-init-list:
+#     Stylistically we prefer to see the return type at the return site.
+#     See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
+#     for more information.
+# modernize-use-bool-literals:
+#     Our tests use int flags for validity masks extensively and we prefer that
+# clang-analyzer-cplusplus.NewDeleteLeaks:
+#     This check has numerous bugs, see
+#     https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
+#     We encounter at least
+#     https://github.com/llvm/llvm-project/issues/60896
+#     https://github.com/llvm/llvm-project/issues/69602
+# clang-analyzer-optin.core.EnumCastOutOfRange
+#     We use enums as flags in multiple cases and this check makes ORing flags invalid
+# clang-analyzer-optin.cplusplus.UninitializedObject'
+#     There is an error in nanoarrow that none of the clang-tidy filters (i.e.
+#     header-filter and exclude-header-filter are able to properly avoid. This
+#     merits further investigation
+#
+# We need to verify that broken checks are still broken
 Checks:
       'modernize-*,
        -modernize-use-equals-default,
        -modernize-concat-nested-namespaces,
        -modernize-use-trailing-return-type,
-       -modernize-use-bool-literals'
-
-      # -modernize-use-equals-default        # auto-fix is broken (doesn't insert =default correctly)
-      # -modernize-concat-nested-namespaces  # auto-fix is broken (can delete code)
-      # -modernize-use-trailing-return-type  # just a preference
+       -modernize-return-braced-init-list,
+       -modernize-use-bool-literals,
+       clang-analyzer-*,
+       -clang-analyzer-cplusplus.NewDeleteLeaks,
+       -clang-analyzer-optin.core.EnumCastOutOfRange,
+       -clang-analyzer-optin.cplusplus.UninitializedObject'
 
 WarningsAsErrors: ''
-HeaderFilterRegex: ''
-AnalyzeTemporaryDtors: false
+HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
+ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
 FormatStyle:     none
 CheckOptions:
  - key:             modernize-loop-convert.MaxCopySize
diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake
index 8df1b431095..d7d7fcca044 100644
--- a/cpp/cmake/thirdparty/get_nanoarrow.cmake
+++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake
@@ -14,15 +14,17 @@
 
 # This function finds nanoarrow and sets any additional necessary environment variables.
 function(find_and_configure_nanoarrow)
+  include(${rapids-cmake-dir}/cpm/package_override.cmake)
+
+  set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
+  rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")
+
   # Currently we need to always build nanoarrow so we don't pickup a previous installed version
   set(CPM_DOWNLOAD_nanoarrow ON)
   rapids_cpm_find(
     nanoarrow 0.6.0.dev
     GLOBAL_TARGETS nanoarrow
     CPM_ARGS
-    GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
-    GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
-    GIT_SHALLOW FALSE
     OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
   )
   set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
new file mode 100644
index 00000000000..e9a36fcb567
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
@@ -0,0 +1,38 @@
+diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h
+index caa6be4..70ec8a2 100644
+--- a/src/nanoarrow/common/inline_buffer.h
++++ b/src/nanoarrow/common/inline_buffer.h
+@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
+ }
+ 
+ static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
+-  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) |
++  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT
+                    ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) |
+                    ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) |
+                    ((values[7] + 0x7f) & 0x80));
+@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
+     // set bits within a single byte
+     const uint8_t only_byte_mask =
+         i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
+-    bits[bytes_begin] &= only_byte_mask;
++    bits[bytes_begin] &= only_byte_mask;  // NOLINT
+     bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
+     return;
+   }
+ 
+   // set/clear trailing bits of first byte
+-  bits[bytes_begin] &= first_byte_mask;
++  bits[bytes_begin] &= first_byte_mask;  // NOLINT
+   bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
+ 
+   if (bytes_end - bytes_begin > 2) {
+@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+   n_remaining -= n_full_bytes * 8;
+   if (n_remaining > 0) {
+     // Zero out the last byte
+-    *out_cursor = 0x00;
++    *out_cursor = 0x00;  // NOLINT
+     for (int i = 0; i < n_remaining; i++) {
+       ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+     }
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
new file mode 100644
index 00000000000..d529787e7c8
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
@@ -0,0 +1,18 @@
+
+{
+  "packages" : {
+    "nanoarrow" : {
+      "version" : "0.6.0.dev",
+      "git_url" : "https://github.com/apache/arrow-nanoarrow.git",
+      "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
+      "git_shallow" : false,
+      "patches" : [
+        {
+          "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
+          "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
+          "fixed_in" : ""
+        }
+      ]
+    }
+  }
+}
diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp
index 762131a174f..15fdad21d9f 100644
--- a/cpp/include/cudf/table/table.hpp
+++ b/cpp/include/cudf/table/table.hpp
@@ -148,7 +148,7 @@ class table {
     std::vector<column_view> columns(std::distance(begin, end));
     std::transform(
       begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); });
-    return table_view(columns);
+    return table_view{columns};
   }
 
   /**
diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp
index 4a990f67ce4..d41176590ea 100644
--- a/cpp/include/cudf/table/table_view.hpp
+++ b/cpp/include/cudf/table/table_view.hpp
@@ -241,7 +241,7 @@ class table_view : public detail::table_view_base<column_view> {
   {
     std::vector<column_view> columns(std::distance(begin, end));
     std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); });
-    return table_view(columns);
+    return table_view{columns};
   }
 
   /**
diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp
index 03cf6d4a0e0..d5caa4720ac 100644
--- a/cpp/src/io/avro/avro.cpp
+++ b/cpp/src/io/avro/avro.cpp
@@ -16,6 +16,7 @@
 
 #include "avro.hpp"
 
+#include <array>
 #include <cstring>
 #include <unordered_map>
 
@@ -302,7 +303,7 @@ bool schema_parser::parse(std::vector<schema_entry>& schema, std::string const&
   // Empty schema
   if (json_str == "[]") return true;
 
-  char depthbuf[MAX_SCHEMA_DEPTH];
+  std::array<char, MAX_SCHEMA_DEPTH> depthbuf;
   int depth = 0, parent_idx = -1, entry_idx = -1;
   json_state_e state = state_attrname;
   std::string str;
diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp
index 790532c9d54..5ab36fdae8e 100644
--- a/cpp/src/io/orc/orc.hpp
+++ b/cpp/src/io/orc/orc.hpp
@@ -258,7 +258,7 @@ class ProtobufReader {
 
  private:
   template <int index>
-  friend class FunctionSwitchImpl;
+  friend struct FunctionSwitchImpl;
 
   void skip_bytes(size_t bytecnt)
   {
diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index b978799b8bc..312a5243687 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -228,7 +228,8 @@ class parquet_field_string : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * string fails
  */
-struct parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
+class parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
+ public:
   parquet_field_string_list(int f, std::vector<std::string>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
@@ -396,8 +397,9 @@ class parquet_field_binary : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * binary fails
  */
-struct parquet_field_binary_list
+class parquet_field_binary_list
   : public parquet_field_list<std::vector<uint8_t>, FieldType::BINARY> {
+ public:
   parquet_field_binary_list(int f, std::vector<std::vector<uint8_t>>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp
index 1dbb9369115..0b76f3d3e8f 100644
--- a/cpp/src/io/utilities/data_sink.cpp
+++ b/cpp/src/io/utilities/data_sink.cpp
@@ -50,7 +50,8 @@ class file_sink : public data_sink {
     }
   }
 
-  ~file_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~file_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
@@ -114,7 +115,8 @@ class host_buffer_sink : public data_sink {
  public:
   explicit host_buffer_sink(std::vector<char>* buffer) : buffer_(buffer) {}
 
-  ~host_buffer_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~host_buffer_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
diff --git a/cpp/src/io/utilities/hostdevice_span.hpp b/cpp/src/io/utilities/hostdevice_span.hpp
index d9eac423901..1d8b34addbd 100644
--- a/cpp/src/io/utilities/hostdevice_span.hpp
+++ b/cpp/src/io/utilities/hostdevice_span.hpp
@@ -43,8 +43,8 @@ class hostdevice_span {
   template <typename C,
             // Only supported containers of types convertible to T
             std::enable_if_t<std::is_convertible_v<
-              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
-              T (*)[]>>* = nullptr>
+              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],  // NOLINT
+              T (*)[]>>* = nullptr>                                                  // NOLINT
   constexpr hostdevice_span(C& in) : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
   {
   }
@@ -54,8 +54,8 @@ class hostdevice_span {
   template <typename C,
             // Only supported containers of types convertible to T
             std::enable_if_t<std::is_convertible_v<
-              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
-              T (*)[]>>* = nullptr>
+              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],  // NOLINT
+              T (*)[]>>* = nullptr>                                                  // NOLINT
   constexpr hostdevice_span(C const& in)
     : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
   {
diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp
index 125b98c4a67..9d8e3cf2fa6 100644
--- a/cpp/src/utilities/host_memory.cpp
+++ b/cpp/src/utilities/host_memory.cpp
@@ -115,12 +115,19 @@ class fixed_pinned_pool_memory_resource {
     return !operator==(other);
   }
 
-  friend void get_property(fixed_pinned_pool_memory_resource const&,
+  // clang-tidy will complain about this function because it is completely
+  // unused at runtime and only exist for tag introspection by CCCL, so we
+  // ignore linting. This masks a real issue if we ever want to compile with
+  // clang, though, which is that the function will actually be compiled out by
+  // clang. If cudf were ever to try to support clang as a compile we would
+  // need to force the compiler to emit this symbol. The same goes for the
+  // other get_property definitions in this file.
+  friend void get_property(fixed_pinned_pool_memory_resource const&,  // NOLINT
                            cuda::mr::device_accessible) noexcept
   {
   }
 
-  friend void get_property(fixed_pinned_pool_memory_resource const&,
+  friend void get_property(fixed_pinned_pool_memory_resource const&,  // NOLINT
                            cuda::mr::host_accessible) noexcept
   {
   }
@@ -235,7 +242,9 @@ class new_delete_memory_resource {
 
   bool operator!=(new_delete_memory_resource const& other) const { return !operator==(other); }
 
+  // NOLINTBEGIN
   friend void get_property(new_delete_memory_resource const&, cuda::mr::host_accessible) noexcept {}
+  // NOLINTEND
 };
 
 static_assert(cuda::mr::resource_with<new_delete_memory_resource, cuda::mr::host_accessible>,
diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp
index 06e0d193d80..aa5b49567e6 100644
--- a/cpp/tests/binaryop/binop-compiled-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-test.cpp
@@ -557,7 +557,11 @@ auto NullOp_Result(cudf::column_view lhs, cudf::column_view rhs)
   std::transform(thrust::make_counting_iterator(0),
                  thrust::make_counting_iterator(lhs.size()),
                  result.begin(),
-                 [&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut {
+                 [&lhs_data    = lhs_data,
+                  &lhs_mask    = lhs_mask,
+                  &rhs_data    = rhs_data,
+                  &rhs_mask    = rhs_mask,
+                  &result_mask = result_mask](auto i) -> TypeOut {
                    auto lhs_valid    = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i);
                    auto rhs_valid    = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i);
                    bool output_valid = lhs_valid or rhs_valid;
diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h
index d36b48d666a..ef1ccfccab5 100644
--- a/cpp/tests/binaryop/util/operation.h
+++ b/cpp/tests/binaryop/util/operation.h
@@ -100,7 +100,7 @@ struct Mul {
             std::enable_if_t<(cudf::is_duration_t<LhsT>::value && std::is_integral_v<RhsT>) ||
                                (cudf::is_duration_t<RhsT>::value && std::is_integral_v<LhsT>),
                              void>* = nullptr>
-  OutT DurationProduct(LhsT x, RhsT y) const
+  [[nodiscard]] OutT DurationProduct(LhsT x, RhsT y) const
   {
     return x * y;
   }
@@ -128,7 +128,7 @@ struct Div {
     typename LhsT,
     typename RhsT,
     std::enable_if_t<(std::is_integral_v<RhsT> || cudf::is_duration<RhsT>()), void>* = nullptr>
-  OutT DurationDivide(LhsT x, RhsT y) const
+  [[nodiscard]] OutT DurationDivide(LhsT x, RhsT y) const
   {
     return x / y;
   }
diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp
index 14b4197de71..631f5150829 100644
--- a/cpp/tests/column/column_test.cpp
+++ b/cpp/tests/column/column_test.cpp
@@ -340,7 +340,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorNoMask)
 
   cudf::column moved_to{std::move(original)};
 
-  EXPECT_EQ(0, original.size());
+  EXPECT_EQ(0, original.size());  // NOLINT
   EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());
 
   verify_column_views(moved_to);
@@ -359,7 +359,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask)
   cudf::column moved_to{std::move(original)};
   verify_column_views(moved_to);
 
-  EXPECT_EQ(0, original.size());
+  EXPECT_EQ(0, original.size());  // NOLINT
   EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());
 
   // Verify move
diff --git a/cpp/tests/copying/slice_tests.cpp b/cpp/tests/copying/slice_tests.cpp
index bebd3d25610..aef0d4ad78a 100644
--- a/cpp/tests/copying/slice_tests.cpp
+++ b/cpp/tests/copying/slice_tests.cpp
@@ -29,6 +29,7 @@
 #include <cudf/utilities/type_dispatcher.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
+#include <array>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -370,11 +371,12 @@ TEST_F(SliceStringTableTest, StringWithNulls)
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
diff --git a/cpp/tests/copying/slice_tests.cuh b/cpp/tests/copying/slice_tests.cuh
index a180740f143..1e037294527 100644
--- a/cpp/tests/copying/slice_tests.cuh
+++ b/cpp/tests/copying/slice_tests.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -148,7 +148,7 @@ std::vector<cudf::table> create_expected_tables(cudf::size_type num_cols,
       }
     }
 
-    result.push_back(cudf::table(std::move(cols)));
+    result.emplace_back(std::move(cols));
   }
 
   return result;
@@ -163,13 +163,12 @@ inline std::vector<cudf::test::strings_column_wrapper> create_expected_string_co
 
   for (unsigned long index = 0; index < indices.size(); index += 2) {
     if (not nullable) {
-      result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index],
-                                                          strings.begin() + indices[index + 1]));
+      result.emplace_back(strings.begin() + indices[index], strings.begin() + indices[index + 1]);
     } else {
       auto valids = cudf::detail::make_counting_transform_iterator(
         indices[index], [](auto i) { return i % 2 == 0; });
-      result.push_back(cudf::test::strings_column_wrapper(
-        strings.begin() + indices[index], strings.begin() + indices[index + 1], valids));
+      result.emplace_back(
+        strings.begin() + indices[index], strings.begin() + indices[index + 1], valids);
     }
   }
 
@@ -184,16 +183,16 @@ inline std::vector<cudf::test::strings_column_wrapper> create_expected_string_co
   std::vector<cudf::test::strings_column_wrapper> result = {};
 
   for (unsigned long index = 0; index < indices.size(); index += 2) {
-    result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index],
-                                                        strings.begin() + indices[index + 1],
-                                                        validity.begin() + indices[index]));
+    result.emplace_back(strings.begin() + indices[index],
+                        strings.begin() + indices[index + 1],
+                        validity.begin() + indices[index]);
   }
 
   return result;
 }
 
 inline std::vector<cudf::table> create_expected_string_tables(
-  std::vector<std::string> const strings[2],
+  std::vector<std::vector<std::string>> const strings,
   std::vector<cudf::size_type> const& indices,
   bool nullable)
 {
@@ -216,7 +215,7 @@ inline std::vector<cudf::table> create_expected_string_tables(
       }
     }
 
-    result.push_back(cudf::table(std::move(cols)));
+    result.emplace_back(std::move(cols));
   }
 
   return result;
diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp
index ee3e7da5e0f..b56b0f2d3f8 100644
--- a/cpp/tests/copying/split_tests.cpp
+++ b/cpp/tests/copying/split_tests.cpp
@@ -35,6 +35,7 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -135,7 +136,7 @@ std::vector<cudf::table> create_expected_tables_for_splits(
 }
 
 std::vector<cudf::table> create_expected_string_tables_for_splits(
-  std::vector<std::string> const strings[2],
+  std::vector<std::vector<std::string>> const strings,
   std::vector<cudf::size_type> const& splits,
   bool nullable)
 {
@@ -144,8 +145,8 @@ std::vector<cudf::table> create_expected_string_tables_for_splits(
 }
 
 std::vector<cudf::table> create_expected_string_tables_for_splits(
-  std::vector<std::string> const strings[2],
-  std::vector<bool> const validity[2],
+  std::vector<std::vector<std::string>> const strings,
+  std::vector<std::vector<bool>> const validity,
   std::vector<cudf::size_type> const& splits)
 {
   std::vector<cudf::size_type> indices = splits_to_indices(splits, strings[0].size());
@@ -627,11 +628,12 @@ void split_string_with_invalids(SplitFunc Split,
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
@@ -658,11 +660,12 @@ void split_empty_output_strings_column_value(SplitFunc Split,
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
@@ -684,9 +687,9 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare)
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
 
   std::vector<cudf::size_type> splits{2, 5, 9};
 
@@ -699,16 +702,17 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare)
     EXPECT_NO_THROW(Split(empty_table, splits));
   }
 
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), no_valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), no_valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
   scols.push_back(sw[1].release());
   cudf::table src_table(std::move(scols));
   auto result = Split(src_table, splits);
 
-  std::vector<bool> validity_masks[2] = {std::vector<bool>(strings[0].size()),
-                                         std::vector<bool>(strings[0].size())};
+  std::vector<std::vector<bool>> validity_masks{std::vector<bool>(strings[0].size()),
+                                                std::vector<bool>(strings[0].size())};
   std::generate(
     validity_masks[1].begin(), validity_masks[1].end(), [i = 0]() mutable { return i++ % 2 == 0; });
 
@@ -1913,9 +1917,9 @@ TEST_F(ContiguousSplitTableCornerCases, MixedColumnTypes)
   cudf::size_type start = 0;
   auto valids = cudf::detail::make_counting_transform_iterator(start, [](auto i) { return true; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
 
   std::vector<std::unique_ptr<cudf::column>> cols;
 
@@ -2377,7 +2381,7 @@ TEST_F(ContiguousSplitTableCornerCases, OutBufferToSmall)
 {
   // internally, contiguous split chunks GPU work in 1MB contiguous copies
   // so the output buffer must be 1MB or larger.
-  EXPECT_THROW(cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error);
+  EXPECT_THROW(auto _ = cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error);
 }
 
 TEST_F(ContiguousSplitTableCornerCases, ChunkSpanTooSmall)
diff --git a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
index 4fb8f78b558..0e68050f935 100644
--- a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
+++ b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,6 @@
 
 #include <cudf/hashing.hpp>
 
-constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
-
 using NumericTypesNoBools =
   cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
 
diff --git a/cpp/tests/hashing/sha256_test.cpp b/cpp/tests/hashing/sha256_test.cpp
index cc95c7a2f0f..8bc47c92c6b 100644
--- a/cpp/tests/hashing/sha256_test.cpp
+++ b/cpp/tests/hashing/sha256_test.cpp
@@ -23,8 +23,6 @@
 #include <cudf/hashing.hpp>
 #include <cudf/utilities/error.hpp>
 
-constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
-
 class SHA256HashTest : public cudf::test::BaseFixture {};
 
 TEST_F(SHA256HashTest, EmptyTable)
diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp
index a4dc7531765..2151ec6e22f 100644
--- a/cpp/tests/interop/from_arrow_device_test.cpp
+++ b/cpp/tests/interop/from_arrow_device_test.cpp
@@ -270,9 +270,9 @@ TEST_F(FromArrowDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
@@ -414,9 +414,9 @@ TEST_F(FromArrowDeviceTest, DictionaryIndicesType)
 {
   std::vector<std::unique_ptr<cudf::column>> columns;
   auto col = cudf::test::fixed_width_column_wrapper<int64_t>({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
 
   cudf::table expected_table(std::move(columns));
   cudf::table_view expected_table_view = expected_table.view();
diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp
index cbfa4911c3c..ef9936b214c 100644
--- a/cpp/tests/interop/from_arrow_host_test.cpp
+++ b/cpp/tests/interop/from_arrow_host_test.cpp
@@ -309,9 +309,9 @@ TEST_F(FromArrowHostDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 81c406c0faf..6e742b9e4cf 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -52,7 +52,7 @@ std::unique_ptr<cudf::table> get_cudf_table()
                          .release());
   auto col4 = cudf::test::fixed_width_column_wrapper<int32_t>({1, 2, 5, 2, 7},
                                                               {true, false, true, true, true});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
                          {true, false, true, false, true}, {true, false, true, true, false})
                          .release());
@@ -339,9 +339,9 @@ TEST_F(FromArrowTest, DictionaryIndicesType)
   std::vector<std::unique_ptr<cudf::column>> columns;
   auto col = cudf::test::fixed_width_column_wrapper<int64_t>({1, 2, 5, 2, 7},
                                                              {true, false, true, true, true});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
 
   cudf::table expected_table(std::move(columns));
 
diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp
index 51216a8512c..7ba586461dc 100644
--- a/cpp/tests/interop/to_arrow_device_test.cpp
+++ b/cpp/tests/interop/to_arrow_device_test.cpp
@@ -55,7 +55,7 @@ get_nanoarrow_cudf_table(cudf::size_type length)
   auto col4 = cudf::test::fixed_width_column_wrapper<int64_t>(
     test_data.int64_data.begin(), test_data.int64_data.end(), test_data.validity.begin());
   auto dict_col = cudf::dictionary::encode(col4);
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(test_data.bool_data.begin(),
                                                                     test_data.bool_data.end(),
                                                                     test_data.bool_validity.begin())
@@ -82,8 +82,8 @@ get_nanoarrow_cudf_table(cudf::size_type length)
       test_data.string_data.begin(), test_data.string_data.end(), test_data.validity.begin())
       .release();
   vector_of_columns cols;
-  cols.push_back(move(int_column));
-  cols.push_back(move(str_column));
+  cols.push_back(std::move(int_column));
+  cols.push_back(std::move(str_column));
   auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper<bool>(
     test_data.bool_data_validity.begin(), test_data.bool_data_validity.end()));
   columns.emplace_back(
@@ -575,9 +575,9 @@ TEST_F(ToArrowDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp
index fc0ed6c9352..fcb4433b42e 100644
--- a/cpp/tests/interop/to_arrow_host_test.cpp
+++ b/cpp/tests/interop/to_arrow_host_test.cpp
@@ -436,9 +436,9 @@ TEST_F(ToArrowHostDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index 90ae12cdd90..a6aa4b22eca 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -90,7 +90,7 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
   auto col4 = cudf::test::fixed_width_column_wrapper<int64_t>(
     int64_data.begin(), int64_data.end(), validity.begin());
   auto dict_col = cudf::dictionary::encode(col4);
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
                          bool_data.begin(), bool_data.end(), bool_validity.begin())
                          .release());
@@ -112,8 +112,8 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
     cudf::test::strings_column_wrapper(string_data.begin(), string_data.end(), validity.begin())
       .release();
   vector_of_columns cols;
-  cols.push_back(move(int_column));
-  cols.push_back(move(str_column));
+  cols.push_back(std::move(int_column));
+  cols.push_back(std::move(str_column));
   auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper<bool>(
     bool_data_validity.begin(), bool_data_validity.end()));
   columns.emplace_back(
@@ -294,9 +294,9 @@ TEST_F(ToArrowTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
@@ -438,7 +438,7 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};  // NOLINT
     ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp
index 840cf263ed9..54262dc3b44 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/decomp_test.cpp
@@ -39,19 +39,19 @@ using cudf::device_span;
  */
 template <typename Decompressor>
 struct DecompressTest : public cudf::test::BaseFixture {
-  std::vector<uint8_t> vector_from_string(char const* str) const
+  [[nodiscard]] std::vector<uint8_t> vector_from_string(std::string const str) const
   {
-    return std::vector<uint8_t>(reinterpret_cast<uint8_t const*>(str),
-                                reinterpret_cast<uint8_t const*>(str) + strlen(str));
+    return {reinterpret_cast<uint8_t const*>(str.c_str()),
+            reinterpret_cast<uint8_t const*>(str.c_str()) + strlen(str.c_str())};
   }
 
-  void Decompress(std::vector<uint8_t>* decompressed,
+  void Decompress(std::vector<uint8_t>& decompressed,
                   uint8_t const* compressed,
                   size_t compressed_size)
   {
     auto stream = cudf::get_default_stream();
     rmm::device_buffer src{compressed, compressed_size, stream};
-    rmm::device_uvector<uint8_t> dst{decompressed->size(), stream};
+    rmm::device_uvector<uint8_t> dst{decompressed.size(), stream};
 
     cudf::detail::hostdevice_vector<device_span<uint8_t const>> inf_in(1, stream);
     inf_in[0] = {static_cast<uint8_t const*>(src.data()), src.size()};
@@ -67,7 +67,7 @@ struct DecompressTest : public cudf::test::BaseFixture {
 
     static_cast<Decompressor*>(this)->dispatch(inf_in, inf_out, inf_stat);
     CUDF_CUDA_TRY(cudaMemcpyAsync(
-      decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
+      decompressed.data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
     inf_stat.device_to_host_sync(stream);
     ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS);
   }
@@ -125,49 +125,57 @@ struct NvcompConfigTest : public cudf::test::BaseFixture {};
 
 TEST_F(GzipDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0x1f, 0x8b, 0x8,  0x0,  0x9,  0x63, 0x99, 0x5c, 0x2,  0xff, 0xcb, 0x48, 0xcd, 0xc9, 0xc9, 0x57,
     0x28, 0xcf, 0x2f, 0xca, 0x49, 0x1,  0x0,  0x85, 0x11, 0x4a, 0xd,  0xb,  0x0,  0x0,  0x0};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(SnappyDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0xb, 0x28, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(SnappyDecompressTest, ShortLiteralAfterLongCopyAtStartup)
 {
-  constexpr char uncompressed[]  = "Aaaaaaaaaaaah!";
+  std::string const uncompressed{"Aaaaaaaaaaaah!"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {14, 0x0, 'A', 0x0, 'a', (10 - 4) * 4 + 1, 1, 0x4, 'h', '!'};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(BrotliDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0xb, 0x5, 0x80, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x3};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 0028dd946e3..b265dcf9273 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -63,9 +63,9 @@ auto dtype()
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 using column     = cudf::column;
 using table      = cudf::table;
 using table_view = cudf::table_view;
@@ -954,7 +954,7 @@ TEST_F(CsvReaderTest, Strings)
   ASSERT_EQ(type_id::STRING, view.column(1).type().id());
 
   expect_column_data_equal(
-    std::vector<std::string>{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"},
+    std::vector<std::string>{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"},
     view.column(1));
 }
 
@@ -1014,7 +1014,7 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored)
   ASSERT_EQ(type_id::STRING, view.column(1).type().id());
 
   expect_column_data_equal(
-    std::vector<std::string>{"\"abcdef ghi\"", "\"jkl \"\"mno\"\" pqr\"", "stu \"vwx\" yz"},
+    std::vector<std::string>{"\"abcdef ghi\"", R"("jkl ""mno"" pqr")", "stu \"vwx\" yz"},
     view.column(1));
 }
 
@@ -1830,7 +1830,7 @@ TEST_F(CsvReaderTest, StringsWithWriter)
 
   auto int_column = column_wrapper<int32_t>{10, 20, 30};
   auto string_column =
-    column_wrapper<cudf::string_view>{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"};
+    column_wrapper<cudf::string_view>{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"};
   cudf::table_view input_table(std::vector<cudf::column_view>{int_column, string_column});
 
   // TODO add quoting style flag?
diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp
index 49ad0c408dc..cb6716f4a18 100644
--- a/cpp/tests/io/json/json_test.cpp
+++ b/cpp/tests/io/json/json_test.cpp
@@ -68,9 +68,9 @@ auto dtype()
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 
 cudf::test::TempDirTestEnvironment* const temp_env =
   static_cast<cudf::test::TempDirTestEnvironment*>(
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 89e704f3ed3..cce0adbf317 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -43,9 +43,9 @@
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 
 using str_col     = column_wrapper<cudf::string_view>;
 using bool_col    = column_wrapper<bool>;
@@ -1358,21 +1358,22 @@ TEST_P(OrcWriterTestStripes, StripeSize)
   cols.push_back(col.release());
   auto const expected = std::make_unique<table>(std::move(cols));
 
-  auto validate = [&](std::vector<char> const& orc_buffer) {
-    auto const expected_stripe_num =
-      std::max<cudf::size_type>(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes);
-    auto const stats = cudf::io::read_parsed_orc_statistics(
-      cudf::io::source_info(orc_buffer.data(), orc_buffer.size()));
-    EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num);
-
-    cudf::io::orc_reader_options in_opts =
-      cudf::io::orc_reader_options::builder(
-        cudf::io::source_info(orc_buffer.data(), orc_buffer.size()))
-        .use_index(false);
-    auto result = cudf::io::read_orc(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
-  };
+  auto validate =
+    [&, &size_bytes = size_bytes, &size_rows = size_rows](std::vector<char> const& orc_buffer) {
+      auto const expected_stripe_num =
+        std::max<cudf::size_type>(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes);
+      auto const stats = cudf::io::read_parsed_orc_statistics(
+        cudf::io::source_info(orc_buffer.data(), orc_buffer.size()));
+      EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num);
+
+      cudf::io::orc_reader_options in_opts =
+        cudf::io::orc_reader_options::builder(
+          cudf::io::source_info(orc_buffer.data(), orc_buffer.size()))
+          .use_index(false);
+      auto result = cudf::io::read_orc(in_opts);
+
+      CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
+    };
 
   {
     std::vector<char> out_buffer_chunked;
diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp
index 8b03e94191e..f1286a00d22 100644
--- a/cpp/tests/io/parquet_misc_test.cpp
+++ b/cpp/tests/io/parquet_misc_test.cpp
@@ -98,7 +98,7 @@ TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaListSliced)
   // list<T>
   constexpr int vals_per_row = 4;
   auto c1_offset_iter        = cudf::detail::make_counting_transform_iterator(
-    0, [vals_per_row](cudf::size_type idx) { return idx * vals_per_row; });
+    0, [](cudf::size_type idx) { return idx * vals_per_row; });
   cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
                                                                      c1_offset_iter + num_rows + 1);
   cudf::test::fixed_width_column_wrapper<T> c1_vals(
diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp
index dc8e68b3a15..4a5309f3ba7 100644
--- a/cpp/tests/io/parquet_reader_test.cpp
+++ b/cpp/tests/io/parquet_reader_test.cpp
@@ -1189,15 +1189,12 @@ TEST_F(ParquetReaderTest, NestingOptimizationTest)
   cudf::test::fixed_width_column_wrapper<int> values(value_iter, value_iter + num_values, validity);
 
   // ~256k values with num_nesting_levels = 16
-  int total_values_produced = num_values;
-  auto prev_col             = values.release();
+  auto prev_col = values.release();
   for (int idx = 0; idx < num_nesting_levels; idx++) {
-    auto const depth    = num_nesting_levels - idx;
     auto const num_rows = (1 << (num_nesting_levels - idx));
 
     auto offsets_iter = cudf::detail::make_counting_transform_iterator(
-      0, [depth, rows_per_level](cudf::size_type i) { return i * rows_per_level; });
-    total_values_produced += (num_rows + 1);
+      0, [](cudf::size_type i) { return i * rows_per_level; });
 
     cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offsets_iter,
                                                                     offsets_iter + num_rows + 1);
diff --git a/cpp/tests/io/parquet_v2_test.cpp b/cpp/tests/io/parquet_v2_test.cpp
index 7c305235ea6..a0b48f54854 100644
--- a/cpp/tests/io/parquet_v2_test.cpp
+++ b/cpp/tests/io/parquet_v2_test.cpp
@@ -1302,24 +1302,24 @@ TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls)
   table_view expected({col0, col1, col2, col3, col4, col5, col6, col7});
 
   std::array<int64_t, 9> expected_null_counts{4, 4, 4, 6, 4, 6, 4, 5, 11};
-  std::vector<int64_t> const expected_def_hists[] = {{1, 1, 2, 3},
-                                                     {1, 3, 10},
-                                                     {1, 1, 2, 10},
-                                                     {1, 1, 2, 2, 8},
-                                                     {1, 1, 1, 1, 10},
-                                                     {1, 1, 1, 1, 2, 8},
-                                                     {1, 3, 9},
-                                                     {1, 3, 1, 8},
-                                                     {1, 0, 4, 1, 1, 4, 9}};
-  std::vector<int64_t> const expected_rep_hists[] = {{4, 3},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 5},
-                                                     {4, 4, 5},
-                                                     {4, 6, 2, 8}};
+  std::vector<std::vector<int64_t>> const expected_def_hists = {{1, 1, 2, 3},
+                                                                {1, 3, 10},
+                                                                {1, 1, 2, 10},
+                                                                {1, 1, 2, 2, 8},
+                                                                {1, 1, 1, 1, 10},
+                                                                {1, 1, 1, 1, 2, 8},
+                                                                {1, 3, 9},
+                                                                {1, 3, 1, 8},
+                                                                {1, 0, 4, 1, 1, 4, 9}};
+  std::vector<std::vector<int64_t>> const expected_rep_hists = {{4, 3},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 5},
+                                                                {4, 4, 5},
+                                                                {4, 6, 2, 8}};
 
   auto const filepath = temp_env->get_temp_filepath("ColumnIndexListWithNulls.parquet");
   auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp
index 8794f2ee304..6c5e9cdf07a 100644
--- a/cpp/tests/io/parquet_writer_test.cpp
+++ b/cpp/tests/io/parquet_writer_test.cpp
@@ -290,7 +290,8 @@ class custom_test_data_sink : public cudf::io::data_sink {
     CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file");
   }
 
-  ~custom_test_data_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~custom_test_data_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
@@ -981,13 +982,15 @@ TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
 
 TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation)
 {
-  std::vector<uint8_t> truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
-                                          {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  std::array<std::vector<uint8_t>, 3> truncated_min{
+    {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
+     {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+     {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}};
 
-  std::vector<uint8_t> truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
-                                          {0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  std::array<std::vector<uint8_t>, 3> truncated_max{
+    {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
+     {0xff},
+     {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}};
 
   cudf::test::lists_column_wrapper<uint8_t> col0{
     {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}};
diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp
index 93754091b3f..178edc52dd3 100644
--- a/cpp/tests/join/distinct_join_tests.cpp
+++ b/cpp/tests/join/distinct_join_tests.cpp
@@ -314,7 +314,7 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -362,7 +362,7 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -398,7 +398,7 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -423,7 +423,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}};
   strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {true, true, false, true, true});
@@ -468,7 +468,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::YES>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   auto col0_gold_names_col = strcol_wrapper{
     "Samuel Vimes", "Detritus", "Carrot Ironfoundersson", "Samuel Vimes", "Angua von Überwald"};
diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp
index 97979e79010..bea044496b3 100644
--- a/cpp/tests/merge/merge_string_test.cpp
+++ b/cpp/tests/merge/merge_string_test.cpp
@@ -97,7 +97,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyColumns)
                                             "hi",
                                             "hj"});
 
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
@@ -296,7 +296,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyNullColumns)
                                             true,
                                             false,
                                             false});
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp
index 2e09f25b51f..6208d395f0a 100644
--- a/cpp/tests/merge/merge_test.cpp
+++ b/cpp/tests/merge/merge_test.cpp
@@ -349,7 +349,7 @@ TYPED_TEST(MergeTest_, Merge1KeyColumns)
   cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(seq_out1)::value_type>
     expectedDataWrap1(seq_out1, seq_out1 + outputRows);
 
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
@@ -452,7 +452,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns)
   cudf::size_type inputRows = 40;
 
   // data: 0  2  4  6 | valid: 1 1 1 0
-  auto sequence1       = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) {
+  auto sequence1       = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
       return 0;  // <- no shortcut to this can avoid compiler errors
     } else {
@@ -465,7 +465,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns)
     leftColWrap1(sequence1, sequence1 + inputRows, valid_sequence1);
 
   // data: 1  3  5  7 | valid: 1 1 1 0
-  auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) {
+  auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
       return 1;
     } else
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index 19996f827cf..bc0321bd40a 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -1092,11 +1092,10 @@ TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets)
   auto aggregates =
     std::vector<std::unique_ptr<cudf::segmented_reduce_aggregation,
                                 std::default_delete<cudf::segmented_reduce_aggregation>>>();
-  aggregates.push_back(std::move(cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(std::move(cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(std::move(cudf::make_sum_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(
-    std::move(cudf::make_product_aggregation<cudf::segmented_reduce_aggregation>()));
+  aggregates.push_back(cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_sum_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_product_aggregation<cudf::segmented_reduce_aggregation>());
 
   auto output_type = cudf::data_type{cudf::type_to_id<int32_t>()};
   for (auto&& agg : aggregates) {
diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp
index 1858cd7782e..b12bf08520f 100644
--- a/cpp/tests/replace/replace_tests.cpp
+++ b/cpp/tests/replace/replace_tests.cpp
@@ -356,7 +356,7 @@ void test_replace(cudf::host_span<T const> input_column,
 
   for (size_t i = 0; i < values_to_replace_column.size(); i++) {
     size_t k  = 0;
-    auto pred = [=, &k, &reference_result, &expected_valid, &isReplaced](T element) {
+    auto pred = [=, &k, &expected_valid, &isReplaced](T element) {
       bool toBeReplaced = false;
       if (!isReplaced[k]) {
         if (!input_has_nulls || expected_valid[k]) {
@@ -503,7 +503,7 @@ TYPED_TEST(ReplaceTest, LargeScaleReplaceTest)
   const size_t REPLACE_SIZE = 10000;
 
   thrust::host_vector<TypeParam> input_column(DATA_SIZE);
-  std::generate(std::begin(input_column), std::end(input_column), [REPLACE_SIZE]() {
+  std::generate(std::begin(input_column), std::end(input_column), []() {
     return std::rand() % (REPLACE_SIZE);
   });
 
diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp
index f702dc78371..165e0347785 100644
--- a/cpp/tests/rolling/collect_ops_test.cpp
+++ b/cpp/tests/rolling/collect_ops_test.cpp
@@ -214,7 +214,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowHonoursMinPeriods)
                          *cudf::make_collect_list_aggregation<cudf::rolling_aggregation>());
   auto expected_result_2 = cudf::test::lists_column_wrapper<T, int32_t>{
     {{}, {0, 1, 2, 3}, {1, 2, 3, 4}, {2, 3, 4, 5}, {}, {}},
-    cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) {
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
       return i != 0 && i < 4;
     })}.release();
 
@@ -338,7 +338,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods)
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 4, 8, 12, 12, 12}.release();
     auto expected_num_rows = expected_offsets->size() - 1;
     auto null_mask_iter    = cudf::detail::make_counting_transform_iterator(
-      cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; });
+      cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; });
 
     auto [null_mask, null_count] =
       cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows);
@@ -373,7 +373,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods)
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 3, 5, 8, 8, 8}.release();
     auto expected_num_rows = expected_offsets->size() - 1;
     auto null_mask_iter    = cudf::detail::make_counting_transform_iterator(
-      cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; });
+      cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; });
 
     auto [null_mask, null_count] =
       cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows);
@@ -1499,7 +1499,7 @@ TYPED_TEST(TypedCollectSetTest, RollingWindowHonoursMinPeriods)
                         *cudf::make_collect_set_aggregation<cudf::rolling_aggregation>());
   auto expected_result_2 = cudf::test::lists_column_wrapper<T, int32_t>{
     {{}, {0, 1, 2}, {1, 2, 4}, {2, 4, 5}, {}, {}},
-    cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) {
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
       return i != 0 && i < 4;
     })}.release();
 
diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp
index ec726878b34..0eaab0c9f7a 100644
--- a/cpp/tests/rolling/offset_row_window_test.cpp
+++ b/cpp/tests/rolling/offset_row_window_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,6 +41,11 @@ using cudf::test::iterators::nulls_at;
 
 auto constexpr null = int32_t{0};  // NULL representation for int32_t;
 
+// clang-tidy doesn't think std::transform can handle a
+// thrust::constant_iterator, so this is a workaround that uses nulls_at
+// instead of no_nulls
+auto no_nulls_list() { return nulls_at({}); }
+
 struct OffsetRowWindowTest : public cudf::test::BaseFixture {
   static ints_column const _keys;    // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
   static ints_column const _values;  // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
@@ -210,7 +215,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_0_to_2)
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *run_rolling(*AGG_COLLECT_LIST),
-    lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, no_nulls});
+    lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}},
+                 no_nulls_list()});
 }
 
 TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2)
@@ -250,7 +256,7 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *run_rolling(*AGG_COLLECT_LIST),
     lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}},
-                 no_nulls});
+                 no_nulls_list()});
 }
 
 // To test that preceding bounds are clamped correctly at group boundaries.
diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp
index c2c22986975..6e0dc16dca9 100644
--- a/cpp/tests/rolling/rolling_test.cpp
+++ b/cpp/tests/rolling/rolling_test.cpp
@@ -541,7 +541,7 @@ class RollingTest : public cudf::test::BaseFixture {
 
     agg_op op;
     for (cudf::size_type i = 0; i < num_rows; i++) {
-      OutputType val = agg_op::template identity<OutputType>();
+      auto val = agg_op::template identity<OutputType>();
 
       // load sizes
       min_periods = std::max(min_periods, 1);  // at least one observation is required
diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp
index 2d37de920d5..2b79911a95a 100644
--- a/cpp/tests/scalar/scalar_test.cpp
+++ b/cpp/tests/scalar/scalar_test.cpp
@@ -190,7 +190,7 @@ TEST_F(ListScalarTest, MoveConstructorNonNested)
 
   EXPECT_EQ(mask_ptr, s2.validity_data());
   EXPECT_EQ(data_ptr, s2.view().data<int32_t>());
-  EXPECT_EQ(s.view().data<int32_t>(), nullptr);
+  EXPECT_EQ(s.view().data<int32_t>(), nullptr);  // NOLINT
 }
 
 TEST_F(ListScalarTest, MoveConstructorNested)
@@ -205,8 +205,8 @@ TEST_F(ListScalarTest, MoveConstructorNested)
   EXPECT_EQ(mask_ptr, s2.validity_data());
   EXPECT_EQ(offset_ptr, s2.view().child(0).data<cudf::size_type>());
   EXPECT_EQ(data_ptr, s2.view().child(1).data<int32_t>());
-  EXPECT_EQ(s.view().data<int32_t>(), nullptr);
-  EXPECT_EQ(s.view().num_children(), 0);
+  EXPECT_EQ(s.view().data<int32_t>(), nullptr);  // NOLINT
+  EXPECT_EQ(s.view().num_children(), 0);         // NOLINT
 }
 
 struct StructScalarTest : public cudf::test::BaseFixture {};
diff --git a/cpp/tests/search/search_list_test.cpp b/cpp/tests/search/search_list_test.cpp
index 48711c21715..7584003e800 100644
--- a/cpp/tests/search/search_list_test.cpp
+++ b/cpp/tests/search/search_list_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,7 +35,6 @@ using strings_col = cudf::test::strings_column_wrapper;
 
 constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR};
 constexpr int32_t null{0};  // Mark for null child elements at the current level
-constexpr int32_t XXX{0};   // Mark for null elements at all levels
 
 using TestTypes = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
                                      cudf::test::FloatingPointTypes,
diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp
index e84275f41ef..6a35e977b46 100644
--- a/cpp/tests/sort/sort_test.cpp
+++ b/cpp/tests/sort/sort_test.cpp
@@ -1087,7 +1087,7 @@ TEST_F(SortCornerTest, WithEmptyStructColumn)
   child_columns2.push_back(std::move(child_col_1));
   int_col col4{{5, 4, 3, 2, 1, 0}};
   std::vector<std::unique_ptr<cudf::column>> grand_child;
-  grand_child.push_back(std::move(col4.release()));
+  grand_child.push_back(col4.release());
   auto child_col_2 = cudf::make_structs_column(6, std::move(grand_child), 0, rmm::device_buffer{});
   child_columns2.push_back(std::move(child_col_2));
   auto struct_col3 =
diff --git a/cpp/tests/stream_compaction/unique_tests.cpp b/cpp/tests/stream_compaction/unique_tests.cpp
index 4d7d23dc881..d5b6915b520 100644
--- a/cpp/tests/stream_compaction/unique_tests.cpp
+++ b/cpp/tests/stream_compaction/unique_tests.cpp
@@ -43,7 +43,6 @@ auto constexpr KEEP_ANY     = cudf::duplicate_keep_option::KEEP_ANY;
 auto constexpr KEEP_FIRST   = cudf::duplicate_keep_option::KEEP_FIRST;
 auto constexpr KEEP_LAST    = cudf::duplicate_keep_option::KEEP_LAST;
 auto constexpr KEEP_NONE    = cudf::duplicate_keep_option::KEEP_NONE;
-auto constexpr NULL_EQUAL   = cudf::null_equality::EQUAL;
 auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL;
 
 using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp
index 443f4548b2c..07b2d77cc04 100644
--- a/cpp/tests/streams/stream_compaction_test.cpp
+++ b/cpp/tests/streams/stream_compaction_test.cpp
@@ -29,8 +29,6 @@
 
 #include <cmath>
 
-auto constexpr null{0};  // null at current level
-auto constexpr XXX{0};   // null pushed down from parent level
 auto constexpr NaN          = std::numeric_limits<double>::quiet_NaN();
 auto constexpr KEEP_ANY     = cudf::duplicate_keep_option::KEEP_ANY;
 auto constexpr KEEP_FIRST   = cudf::duplicate_keep_option::KEEP_FIRST;
diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp
index ce5f68de3c9..26bcfe8028d 100644
--- a/cpp/tests/strings/integers_tests.cpp
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -30,6 +30,7 @@
 #include <thrust/host_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <string>
 #include <vector>
 
@@ -425,7 +426,7 @@ TYPED_TEST(StringsIntegerConvertTest, IntegerToHex)
     if (v == 0) { return std::string("00"); }
     // special handling for single-byte types
     if constexpr (std::is_same_v<TypeParam, int8_t> || std::is_same_v<TypeParam, uint8_t>) {
-      char const hex_digits[16] = {
+      std::array const hex_digits = {
         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
       std::string str;
       str += hex_digits[(v & 0xF0) >> 4];
diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp
index f0010fc1ed9..219bd6d8b01 100644
--- a/cpp/tests/structs/structs_column_tests.cpp
+++ b/cpp/tests/structs/structs_column_tests.cpp
@@ -635,9 +635,8 @@ TEST_F(StructColumnWrapperTest, TestStructsColumnWithEmptyChild)
   auto mask_vec = std::vector<bool>{true, false, false};
   auto [null_mask, null_count] =
     cudf::test::detail::make_null_mask(mask_vec.begin(), mask_vec.end());
-  auto structs_col =
-    cudf::make_structs_column(num_rows, std::move(cols), null_count, std::move(null_mask));
-  EXPECT_NO_THROW(structs_col->view());
+  EXPECT_NO_THROW(auto structs_col = cudf::make_structs_column(
+                    num_rows, std::move(cols), null_count, std::move(null_mask)));
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp
index 215ca158f37..2684123c08a 100644
--- a/cpp/tests/transform/bools_to_mask_test.cpp
+++ b/cpp/tests/transform/bools_to_mask_test.cpp
@@ -32,7 +32,7 @@ struct MaskToNullTest : public cudf::test::BaseFixture {
   {
     cudf::test::fixed_width_column_wrapper<bool> input_column(
       input.begin(), input.end(), val.begin());
-    std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and<bool>());
+    std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and<>());
 
     auto sample = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
 
diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp
index 1785848ec77..0bdf5b321ac 100644
--- a/cpp/tests/transform/integration/unary_transform_test.cpp
+++ b/cpp/tests/transform/integration/unary_transform_test.cpp
@@ -47,7 +47,7 @@ void test_udf(char const* udf, Op op, Data data_init, cudf::size_type size, bool
 TEST_F(UnaryOperationIntegrationTest, Transform_FP32_FP32)
 {
   // c = a*a*a*a
-  char const* cuda =
+  std::string const cuda =
     R"***(
 __device__ inline void    fdsf   (
        float* C,
@@ -58,7 +58,7 @@ __device__ inline void    fdsf   (
 }
 )***";
 
-  char const* ptx =
+  std::string const ptx =
     R"***(
 //
 // Generated by NVIDIA NVVM Compiler
@@ -101,17 +101,17 @@ __device__ inline void    fdsf   (
   auto op        = [](dtype a) { return a * a * a * a; };
   auto data_init = [](cudf::size_type row) { return row % 3; };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32)
 {
   // c = a * a - a
-  char const cuda[] =
+  std::string const cuda =
     "__device__ inline void f(int* output,int input){*output = input*input - input;}";
 
-  char const* ptx =
+  std::string const ptx =
     R"***(
 .func _Z1fPii(
         .param .b64 _Z1fPii_param_0,
@@ -136,8 +136,8 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32)
   auto op        = [](dtype a) { return a * a - a; };
   auto data_init = [](cudf::size_type row) { return row % 78; };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8)
@@ -145,7 +145,7 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8)
   // Capitalize all the lower case letters
   // Assuming ASCII, the PTX code is compiled from the following CUDA code
 
-  char const cuda[] =
+  std::string const cuda =
     R"***(
 __device__ inline void f(
   signed char* output,
@@ -159,7 +159,7 @@ __device__ inline void f(
 }
 )***";
 
-  char const ptx[] =
+  std::string const ptx =
     R"***(
 .func _Z1fPcc(
         .param .b64 _Z1fPcc_param_0,
@@ -191,15 +191,15 @@ __device__ inline void f(
   auto op        = [](dtype a) { return std::toupper(a); };
   auto data_init = [](cudf::size_type row) { return 'a' + (row % 26); };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_Datetime)
 {
   // Add one day to timestamp in microseconds
 
-  char const cuda[] =
+  std::string const cuda =
     R"***(
 __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input)
 {
@@ -217,7 +217,7 @@ __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input)
   auto random_eng = cudf::test::UniformRandomGenerator<cudf::timestamp_us::rep>(0, 100000000);
   auto data_init  = [&random_eng](cudf::size_type row) { return random_eng.generate(); };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
 }
 
 }  // namespace transformation

From 2d02bdce9e3efae232dea4a5b8b2eecf5c0f8a93 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 7 Oct 2024 16:30:58 -0500
Subject: [PATCH 16/33] Implement `extract_datetime_component` in
 `libcudf`/`pylibcudf` (#16776)

Closes https://github.com/rapidsai/cudf/issues/16735

Authors:
  - https://github.com/brandon-b-miller
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Lawrence Mitchell (https://github.com/wence-)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16776
---
 cpp/include/cudf/datetime.hpp                 |  34 +++++
 cpp/include/cudf/detail/datetime.hpp          |  10 ++
 cpp/src/datetime/datetime_ops.cu              |  88 ++++++------
 cpp/tests/datetime/datetime_ops_test.cpp      | 130 ++++++++++++++++++
 python/cudf/cudf/_lib/datetime.pyx            |  39 +++++-
 python/cudf_polars/cudf_polars/dsl/expr.py    |  40 ++++--
 python/pylibcudf/pylibcudf/datetime.pxd       |   7 +
 python/pylibcudf/pylibcudf/datetime.pyx       |  71 ++++------
 .../pylibcudf/libcudf/CMakeLists.txt          |   5 +-
 .../pylibcudf/pylibcudf/libcudf/datetime.pxd  |  17 +++
 .../pylibcudf/pylibcudf/libcudf/datetime.pyx  |   0
 .../pylibcudf/tests/test_datetime.py          |  55 ++++----
 12 files changed, 358 insertions(+), 138 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/datetime.pyx

diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp
index 7359a0d5fde..1eaea5b6374 100644
--- a/cpp/include/cudf/datetime.hpp
+++ b/cpp/include/cudf/datetime.hpp
@@ -38,6 +38,22 @@ namespace datetime {
  * @file
  */
 
+/**
+ * @brief Types of datetime components that may be extracted.
+ */
+enum class datetime_component : uint8_t {
+  YEAR,
+  MONTH,
+  DAY,
+  WEEKDAY,
+  HOUR,
+  MINUTE,
+  SECOND,
+  MILLISECOND,
+  MICROSECOND,
+  NANOSECOND
+};
+
 /**
  * @brief  Extracts year from any datetime type and returns an int16_t
  * cudf::column.
@@ -207,6 +223,24 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Extracts the specified datetime component from any datetime type and
+ * returns an int16_t cudf::column.
+ *
+ * @param column cudf::column_view of the input datetime values
+ * @param component The datetime component to extract
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate device memory of the returned column
+ *
+ * @returns cudf::column of the extracted int16_t datetime component
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP
+ */
+std::unique_ptr<cudf::column> extract_datetime_component(
+  cudf::column_view const& column,
+  datetime_component component,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /** @} */  // end of group
 /**
  * @addtogroup datetime_compute
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index 9db7e48498f..df3050d6494 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -115,6 +115,16 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view cons
                                                           rmm::cuda_stream_view stream,
                                                           rmm::device_async_resource_ref mr);
 
+/**
+ * @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
+ * rmm::cuda_stream_view, rmm::device_async_resource_ref)
+ *
+ */
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr);
+
 /**
  * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view,
  * rmm::device_async_resource_ref)
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index ddb0dbcd96d..a497cedb3bc 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -44,19 +44,6 @@
 namespace cudf {
 namespace datetime {
 namespace detail {
-enum class datetime_component {
-  INVALID = 0,
-  YEAR,
-  MONTH,
-  DAY,
-  WEEKDAY,
-  HOUR,
-  MINUTE,
-  SECOND,
-  MILLISECOND,
-  MICROSECOND,
-  NANOSECOND
-};
 
 enum class rounding_function {
   CEIL,   ///< Rounds up to the next integer multiple of the provided frequency
@@ -453,90 +440,70 @@ std::unique_ptr<column> extract_year(column_view const& column,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::YEAR>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr);
 }
 
 std::unique_ptr<column> extract_month(column_view const& column,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MONTH>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr);
 }
 
 std::unique_ptr<column> extract_day(column_view const& column,
                                     rmm::cuda_stream_view stream,
                                     rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::DAY>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr);
 }
 
 std::unique_ptr<column> extract_weekday(column_view const& column,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::WEEKDAY>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
 }
 
 std::unique_ptr<column> extract_hour(column_view const& column,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::HOUR>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr);
 }
 
 std::unique_ptr<column> extract_minute(column_view const& column,
                                        rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MINUTE>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
 }
 
 std::unique_ptr<column> extract_second(column_view const& column,
                                        rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::SECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MILLISECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MICROSECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
                                                     rmm::cuda_stream_view stream,
                                                     rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::NANOSECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
 }
 
 std::unique_ptr<column> last_day_of_month(column_view const& column,
@@ -576,6 +543,32 @@ std::unique_ptr<column> extract_quarter(column_view const& column,
   return apply_datetime_op<extract_quarter_op, type_id::INT16>(column, stream, mr);
 }
 
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr)
+{
+#define extract(field)                                                                 \
+  case field:                                                                          \
+    return apply_datetime_op<extract_component_operator<field>, cudf::type_id::INT16>( \
+      column, stream, mr)
+
+  switch (component) {
+    extract(datetime_component::YEAR);
+    extract(datetime_component::MONTH);
+    extract(datetime_component::DAY);
+    extract(datetime_component::WEEKDAY);
+    extract(datetime_component::HOUR);
+    extract(datetime_component::MINUTE);
+    extract(datetime_component::SECOND);
+    extract(datetime_component::MILLISECOND);
+    extract(datetime_component::MICROSECOND);
+    extract(datetime_component::NANOSECOND);
+    default: CUDF_FAIL("Unsupported datetime component.");
+  }
+#undef extract
+}
+
 }  // namespace detail
 
 std::unique_ptr<column> ceil_datetimes(column_view const& column,
@@ -661,6 +654,15 @@ std::unique_ptr<column> extract_second(column_view const& column,
   return detail::extract_second(column, stream, mr);
 }
 
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract_datetime_component(column, component, stream, mr);
+}
+
 std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp
index 13577c4d0ea..603edb27c7c 100644
--- a/cpp/tests/datetime/datetime_ops_test.cpp
+++ b/cpp/tests/datetime/datetime_ops_test.cpp
@@ -196,6 +196,136 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{0, 0, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns),
                                  fixed_width_column_wrapper<int16_t>{766, 424, 623});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1969, 1970, 1970});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{12, 1, 1});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{31, 1, 1});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{14, 12, 7});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{14, 12, 7});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{23, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{1, 0, 32});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{1, 0, 32});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{59, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{12, 0, 12});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{12, 0, 12});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{59, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{762, 0, 929});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{976, 23, 987});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{675, 432, 234});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{766, 424, 623});
 }
 
 template <typename T>
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index bc5e085ec39..d844466120f 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -13,12 +13,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.filling cimport calendrical_month_sequence
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.datetime import DatetimeComponent
 
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
-import pylibcudf as plc
-
 
 @acquire_spill_lock()
 def add_months(Column col, Column months):
@@ -40,9 +39,39 @@ def add_months(Column col, Column months):
 
 @acquire_spill_lock()
 def extract_datetime_component(Column col, object field):
-    result = Column.from_pylibcudf(
-        plc.datetime.extract_datetime_component(col.to_pylibcudf(mode="read"), field)
-    )
+
+    cdef unique_ptr[column] c_result
+    cdef column_view col_view = col.view()
+    cdef libcudf_datetime.datetime_component component
+
+    component_names = {
+        "year": DatetimeComponent.YEAR,
+        "month": DatetimeComponent.MONTH,
+        "day": DatetimeComponent.DAY,
+        "weekday": DatetimeComponent.WEEKDAY,
+        "hour": DatetimeComponent.HOUR,
+        "minute": DatetimeComponent.MINUTE,
+        "second": DatetimeComponent.SECOND,
+        "millisecond": DatetimeComponent.MILLISECOND,
+        "microsecond": DatetimeComponent.MICROSECOND,
+        "nanosecond": DatetimeComponent.NANOSECOND,
+    }
+    if field == "day_of_year":
+        with nogil:
+            c_result = move(libcudf_datetime.day_of_year(col_view))
+    elif field in component_names:
+        component = component_names[field]
+        with nogil:
+            c_result = move(
+                libcudf_datetime.extract_datetime_component(
+                    col_view,
+                    component
+                )
+            )
+    else:
+        raise ValueError(f"Invalid field: '{field}'")
+
+    result = Column.from_unique_ptr(move(c_result))
 
     if field == "weekday":
         # Pandas counts Monday-Sunday as 0-6
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 54476b7fedc..a418560b31c 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -961,16 +961,16 @@ def do_evaluate(
 class TemporalFunction(Expr):
     __slots__ = ("name", "options", "children")
     _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = {
-        pl_expr.TemporalFunction.Year: "year",
-        pl_expr.TemporalFunction.Month: "month",
-        pl_expr.TemporalFunction.Day: "day",
-        pl_expr.TemporalFunction.WeekDay: "weekday",
-        pl_expr.TemporalFunction.Hour: "hour",
-        pl_expr.TemporalFunction.Minute: "minute",
-        pl_expr.TemporalFunction.Second: "second",
-        pl_expr.TemporalFunction.Millisecond: "millisecond",
-        pl_expr.TemporalFunction.Microsecond: "microsecond",
-        pl_expr.TemporalFunction.Nanosecond: "nanosecond",
+        pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR,
+        pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH,
+        pl_expr.TemporalFunction.Day: plc.datetime.DatetimeComponent.DAY,
+        pl_expr.TemporalFunction.WeekDay: plc.datetime.DatetimeComponent.WEEKDAY,
+        pl_expr.TemporalFunction.Hour: plc.datetime.DatetimeComponent.HOUR,
+        pl_expr.TemporalFunction.Minute: plc.datetime.DatetimeComponent.MINUTE,
+        pl_expr.TemporalFunction.Second: plc.datetime.DatetimeComponent.SECOND,
+        pl_expr.TemporalFunction.Millisecond: plc.datetime.DatetimeComponent.MILLISECOND,
+        pl_expr.TemporalFunction.Microsecond: plc.datetime.DatetimeComponent.MICROSECOND,
+        pl_expr.TemporalFunction.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND,
     }
     _non_child = ("dtype", "name", "options")
     children: tuple[Expr, ...]
@@ -1003,8 +1003,12 @@ def do_evaluate(
         ]
         (column,) = columns
         if self.name == pl_expr.TemporalFunction.Microsecond:
-            millis = plc.datetime.extract_datetime_component(column.obj, "millisecond")
-            micros = plc.datetime.extract_datetime_component(column.obj, "microsecond")
+            millis = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MILLISECOND
+            )
+            micros = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MICROSECOND
+            )
             millis_as_micros = plc.binaryop.binary_operation(
                 millis,
                 plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())),
@@ -1019,9 +1023,15 @@ def do_evaluate(
             )
             return Column(total_micros)
         elif self.name == pl_expr.TemporalFunction.Nanosecond:
-            millis = plc.datetime.extract_datetime_component(column.obj, "millisecond")
-            micros = plc.datetime.extract_datetime_component(column.obj, "microsecond")
-            nanos = plc.datetime.extract_datetime_component(column.obj, "nanosecond")
+            millis = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MILLISECOND
+            )
+            micros = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MICROSECOND
+            )
+            nanos = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.NANOSECOND
+            )
             millis_as_nanos = plc.binaryop.binary_operation(
                 millis,
                 plc.interop.from_arrow(pa.scalar(1_000_000, type=pa.int32())),
diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd
index 2fce48cf1b4..72ce680ba7a 100644
--- a/python/pylibcudf/pylibcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/datetime.pxd
@@ -1,8 +1,15 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from pylibcudf.libcudf.datetime cimport datetime_component
+
 from .column cimport Column
 
 
 cpdef Column extract_year(
     Column col
 )
+
+cpdef Column extract_datetime_component(
+    Column col,
+    datetime_component component
+)
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index e8e0caaf42d..784d29128bf 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -3,19 +3,14 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.datetime cimport (
-    day_of_year as cpp_day_of_year,
-    extract_day as cpp_extract_day,
-    extract_hour as cpp_extract_hour,
-    extract_microsecond_fraction as cpp_extract_microsecond_fraction,
-    extract_millisecond_fraction as cpp_extract_millisecond_fraction,
-    extract_minute as cpp_extract_minute,
-    extract_month as cpp_extract_month,
-    extract_nanosecond_fraction as cpp_extract_nanosecond_fraction,
-    extract_second as cpp_extract_second,
-    extract_weekday as cpp_extract_weekday,
+    datetime_component,
+    extract_datetime_component as cpp_extract_datetime_component,
     extract_year as cpp_extract_year,
 )
 
+from pylibcudf.libcudf.datetime import \
+    datetime_component as DatetimeComponent  # no-cython-lint
+
 from .column cimport Column
 
 
@@ -41,41 +36,29 @@ cpdef Column extract_year(
         result = move(cpp_extract_year(values.view()))
     return Column.from_libcudf(move(result))
 
+cpdef Column extract_datetime_component(
+    Column values,
+    datetime_component component
+):
+    """
+    Extract a datetime component from a datetime column.
 
-def extract_datetime_component(Column col, str field):
+    For details, see :cpp:func:`cudf::extract_datetime_component`.
 
-    cdef unique_ptr[column] c_result
+    Parameters
+    ----------
+    values : Column
+        The column to extract the component from.
+    component : DatetimeComponent
+        The datetime component to extract.
 
-    with nogil:
-        if field == "year":
-            c_result = move(cpp_extract_year(col.view()))
-        elif field == "month":
-            c_result = move(cpp_extract_month(col.view()))
-        elif field == "day":
-            c_result = move(cpp_extract_day(col.view()))
-        elif field == "weekday":
-            c_result = move(cpp_extract_weekday(col.view()))
-        elif field == "hour":
-            c_result = move(cpp_extract_hour(col.view()))
-        elif field == "minute":
-            c_result = move(cpp_extract_minute(col.view()))
-        elif field == "second":
-            c_result = move(cpp_extract_second(col.view()))
-        elif field == "millisecond":
-            c_result = move(
-                cpp_extract_millisecond_fraction(col.view())
-            )
-        elif field == "microsecond":
-            c_result = move(
-                cpp_extract_microsecond_fraction(col.view())
-            )
-        elif field == "nanosecond":
-            c_result = move(
-                cpp_extract_nanosecond_fraction(col.view())
-            )
-        elif field == "day_of_year":
-            c_result = move(cpp_day_of_year(col.view()))
-        else:
-            raise ValueError(f"Invalid datetime field: '{field}'")
+    Returns
+    -------
+    Column
+        Column with the extracted component.
+    """
+    cdef unique_ptr[column] result
 
-    return Column.from_libcudf(move(c_result))
+    with nogil:
+        result = move(cpp_extract_datetime_component(values.view(), component))
+    return Column.from_libcudf(move(result))
diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
index 2167616690f..15beaee47d4 100644
--- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
@@ -12,8 +12,9 @@
 # the License.
 # =============================================================================
 
-set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx labeling.pyx reduce.pyx
-                   replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
+set(cython_sources
+    aggregation.pyx binaryop.pyx copying.pyx datetime.pyx expressions.pyx labeling.pyx reduce.pyx
+    replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
index a4465343197..73cdfb96af5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+from libc.stdint cimport uint8_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
@@ -7,6 +8,18 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
+    cpdef enum class datetime_component(uint8_t):
+        YEAR
+        MONTH
+        DAY
+        WEEKDAY
+        HOUR
+        MINUTE
+        SECOND
+        MILLISECOND
+        MICROSECOND
+        NANOSECOND
+
     cdef unique_ptr[column] extract_year(const column_view& column) except +
     cdef unique_ptr[column] extract_month(const column_view& column) except +
     cdef unique_ptr[column] extract_day(const column_view& column) except +
@@ -23,6 +36,10 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
     cdef unique_ptr[column] extract_nanosecond_fraction(
         const column_view& column
     ) except +
+    cdef unique_ptr[column] extract_datetime_component(
+        const column_view& column,
+        datetime_component component
+    ) except +
 
     ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency":
         DAY "cudf::datetime::rounding_frequency::DAY"
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pyx b/python/pylibcudf/pylibcudf/libcudf/datetime.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py
index 89c96829e71..75930d59058 100644
--- a/python/pylibcudf/pylibcudf/tests/test_datetime.py
+++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import datetime
-import functools
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -10,19 +9,6 @@
 from utils import assert_column_eq
 
 
-@pytest.fixture
-def date_column(has_nulls):
-    values = [
-        datetime.date(1999, 1, 1),
-        datetime.date(2024, 10, 12),
-        datetime.date(1, 1, 1),
-        datetime.date(9999, 1, 1),
-    ]
-    if has_nulls:
-        values[2] = None
-    return plc.interop.from_arrow(pa.array(values, type=pa.date32()))
-
-
 @pytest.fixture(scope="module", params=["s", "ms", "us", "ns"])
 def datetime_column(has_nulls, request):
     values = [
@@ -40,24 +26,35 @@ def datetime_column(has_nulls, request):
     )
 
 
-@pytest.mark.parametrize(
-    "component, pc_fun",
-    [
-        ("year", pc.year),
-        ("month", pc.month),
-        ("day", pc.day),
-        ("weekday", functools.partial(pc.day_of_week, count_from_zero=False)),
-        ("hour", pc.hour),
-        ("minute", pc.minute),
-        ("second", pc.second),
-        ("millisecond", pc.millisecond),
-        ("microsecond", pc.microsecond),
-        ("nanosecond", pc.nanosecond),
+@pytest.fixture(
+    params=[
+        ("year", plc.datetime.DatetimeComponent.YEAR),
+        ("month", plc.datetime.DatetimeComponent.MONTH),
+        ("day", plc.datetime.DatetimeComponent.DAY),
+        ("day_of_week", plc.datetime.DatetimeComponent.WEEKDAY),
+        ("hour", plc.datetime.DatetimeComponent.HOUR),
+        ("minute", plc.datetime.DatetimeComponent.MINUTE),
+        ("second", plc.datetime.DatetimeComponent.SECOND),
+        ("millisecond", plc.datetime.DatetimeComponent.MILLISECOND),
+        ("microsecond", plc.datetime.DatetimeComponent.MICROSECOND),
+        ("nanosecond", plc.datetime.DatetimeComponent.NANOSECOND),
     ],
+    ids=lambda x: x[0],
 )
-def test_extraction(datetime_column, component, pc_fun):
+def component(request):
+    return request.param
+
+
+def test_extract_datetime_component(datetime_column, component):
+    attr, component = component
+    kwargs = {}
+    if attr == "day_of_week":
+        kwargs = {"count_from_zero": False}
     got = plc.datetime.extract_datetime_component(datetime_column, component)
     # libcudf produces an int16, arrow produces an int64
-    expect = pc_fun(plc.interop.to_arrow(datetime_column)).cast(pa.int16())
+
+    expect = getattr(pc, attr)(
+        plc.interop.to_arrow(datetime_column), **kwargs
+    ).cast(pa.int16())
 
     assert_column_eq(expect, got)

From 09ed2105b841fe29be75af8b0d5a41fc09e7b6ac Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Mon, 7 Oct 2024 21:25:29 -0400
Subject: [PATCH 17/33] Migrate nvtext generate_ngrams APIs to pylibcudf
 (#17006)

Apart of #15162

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17006
---
 .../pylibcudf/nvtext/generate_ngrams.rst      |   6 +
 .../api_docs/pylibcudf/nvtext/index.rst       |   1 +
 .../cudf/cudf/_lib/nvtext/generate_ngrams.pyx |  77 +++---------
 .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt |   2 +-
 .../pylibcudf/pylibcudf/nvtext/__init__.pxd   |   3 +-
 python/pylibcudf/pylibcudf/nvtext/__init__.py |   3 +-
 .../pylibcudf/nvtext/generate_ngrams.pxd      |  12 ++
 .../pylibcudf/nvtext/generate_ngrams.pyx      | 111 ++++++++++++++++++
 .../tests/test_nvtext_generate_ngrams.py      |  54 +++++++++
 9 files changed, 207 insertions(+), 62 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/generate_ngrams.rst
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/generate_ngrams.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/generate_ngrams.rst
new file mode 100644
index 00000000000..d68199271bd
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/generate_ngrams.rst
@@ -0,0 +1,6 @@
+===============
+generate_ngrams
+===============
+
+.. automodule:: pylibcudf.nvtext.generate_ngrams
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
index b5cd5ee42c3..2e03b589c8b 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
@@ -5,3 +5,4 @@ nvtext
     :maxdepth: 1
 
     edit_distance
+    generate_ngrams
diff --git a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
index 6591b527eec..7fdf9258b7f 100644
--- a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
+++ b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
@@ -2,75 +2,34 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.nvtext.generate_ngrams cimport (
-    generate_character_ngrams as cpp_generate_character_ngrams,
-    generate_ngrams as cpp_generate_ngrams,
-    hash_character_ngrams as cpp_hash_character_ngrams,
-)
-from pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from pylibcudf.libcudf.types cimport size_type
-
 from cudf._lib.column cimport Column
-from cudf._lib.scalar cimport DeviceScalar
+
+from pylibcudf import nvtext
 
 
 @acquire_spill_lock()
 def generate_ngrams(Column strings, int ngrams, object py_separator):
-
-    cdef DeviceScalar separator = py_separator.device_value
-
-    cdef column_view c_strings = strings.view()
-    cdef size_type c_ngrams = ngrams
-    cdef const string_scalar* c_separator = <const string_scalar*>separator\
-        .get_raw_ptr()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_generate_ngrams(
-                c_strings,
-                c_ngrams,
-                c_separator[0]
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.generate_ngrams.generate_ngrams(
+        strings.to_pylibcudf(mode="read"),
+        ngrams,
+        py_separator.device_value.c_value
+    )
+    return Column.from_pylibcudf(result)
 
 
 @acquire_spill_lock()
 def generate_character_ngrams(Column strings, int ngrams):
-    cdef column_view c_strings = strings.view()
-    cdef size_type c_ngrams = ngrams
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_generate_character_ngrams(
-                c_strings,
-                c_ngrams
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.generate_ngrams.generate_character_ngrams(
+        strings.to_pylibcudf(mode="read"),
+        ngrams
+    )
+    return Column.from_pylibcudf(result)
 
 
 @acquire_spill_lock()
 def hash_character_ngrams(Column strings, int ngrams):
-    cdef column_view c_strings = strings.view()
-    cdef size_type c_ngrams = ngrams
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_hash_character_ngrams(
-                c_strings,
-                c_ngrams
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.generate_ngrams.hash_character_ngrams(
+        strings.to_pylibcudf(mode="read"),
+        ngrams
+    )
+    return Column.from_pylibcudf(result)
diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
index ebe1fda1f12..eb5617a1da6 100644
--- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources edit_distance.pyx)
+set(cython_sources edit_distance.pyx generate_ngrams.pyx)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
index 82f7c425b1d..7f5fa2b9925 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
@@ -1,7 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . cimport edit_distance
+from . cimport edit_distance, generate_ngrams
 
 __all__ = [
     "edit_distance",
+    "generate_ngrams",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py
index 986652a241f..a66ce984745 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.py
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py
@@ -1,7 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . import edit_distance
+from . import edit_distance, generate_ngrams
 
 __all__ = [
     "edit_distance",
+    "generate_ngrams",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
new file mode 100644
index 00000000000..f15eb1f25e9
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
@@ -0,0 +1,12 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
+
+
+cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator)
+
+cpdef Column generate_character_ngrams(Column input, size_type ngrams=*)
+
+cpdef Column hash_character_ngrams(Column input, size_type ngrams=*)
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
new file mode 100644
index 00000000000..8c7a8edc01d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
@@ -0,0 +1,111 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.generate_ngrams cimport (
+    generate_character_ngrams as cpp_generate_character_ngrams,
+    generate_ngrams as cpp_generate_ngrams,
+    hash_character_ngrams as cpp_hash_character_ngrams,
+)
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
+
+
+cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator):
+    """
+    Returns a single column of strings by generating ngrams from a strings column.
+
+    For details, see :cpp:func:`generate_ngrams`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    ngram : size_type
+        The ngram number to generate
+    separator : Scalar
+        The string to use for separating ngram tokens
+
+    Returns
+    -------
+    Column
+        New strings columns of tokens
+    """
+    cdef column_view c_strings = input.view()
+    cdef const string_scalar* c_separator = <const string_scalar*>separator.c_obj.get()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_generate_ngrams(
+                c_strings,
+                ngrams,
+                c_separator[0]
+            )
+        )
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column generate_character_ngrams(Column input, size_type ngrams = 2):
+    """
+    Returns a lists column of ngrams of characters within each string.
+
+    For details, see :cpp:func:`generate_character_ngrams`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    ngram : size_type
+        The ngram number to generate
+
+    Returns
+    -------
+    Column
+        Lists column of strings
+    """
+    cdef column_view c_strings = input.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_generate_character_ngrams(
+                c_strings,
+                ngrams,
+            )
+        )
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column hash_character_ngrams(Column input, size_type ngrams = 2):
+    """
+    Returns a lists column of hash values of the characters in each string
+
+    For details, see :cpp:func:`hash_character_ngrams`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    ngram : size_type
+        The ngram number to generate
+
+    Returns
+    -------
+    Column
+        Lists column of hash values
+    """
+    cdef column_view c_strings = input.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_hash_character_ngrams(
+                c_strings,
+                ngrams,
+            )
+        )
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py
new file mode 100644
index 00000000000..5cf9874d595
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture(scope="module")
+def input_col():
+    arr = ["ab", "cde", "fgh"]
+    return pa.array(arr)
+
+
+@pytest.mark.parametrize("ngram", [2, 3])
+@pytest.mark.parametrize("sep", ["_", "**", ","])
+def test_generate_ngrams(input_col, ngram, sep):
+    result = plc.nvtext.generate_ngrams.generate_ngrams(
+        plc.interop.from_arrow(input_col),
+        ngram,
+        plc.interop.from_arrow(pa.scalar(sep)),
+    )
+    expected = pa.array([f"ab{sep}cde", f"cde{sep}fgh"])
+    if ngram == 3:
+        expected = pa.array([f"ab{sep}cde{sep}fgh"])
+    assert_column_eq(result, expected)
+
+
+@pytest.mark.parametrize("ngram", [2, 3])
+def test_generate_character_ngrams(input_col, ngram):
+    result = plc.nvtext.generate_ngrams.generate_character_ngrams(
+        plc.interop.from_arrow(input_col),
+        ngram,
+    )
+    expected = pa.array([["ab"], ["cd", "de"], ["fg", "gh"]])
+    if ngram == 3:
+        expected = pa.array([[], ["cde"], ["fgh"]])
+    assert_column_eq(result, expected)
+
+
+@pytest.mark.parametrize("ngram", [2, 3])
+def test_hash_character_ngrams(input_col, ngram):
+    result = plc.nvtext.generate_ngrams.hash_character_ngrams(
+        plc.interop.from_arrow(input_col),
+        ngram,
+    )
+    pa_result = plc.interop.to_arrow(result)
+    assert all(
+        len(got) == max(0, len(s.as_py()) - ngram + 1)
+        for got, s in zip(pa_result, input_col)
+    )
+    assert pa_result.type == pa.list_(
+        pa.field("element", pa.uint32(), nullable=False)
+    )

From 219ec0e7fbff37d7387d25e93510b55a8782e2bf Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 8 Oct 2024 13:40:40 +0100
Subject: [PATCH 18/33] Expunge NamedColumn (#16962)

Everything in the expression evaluation now operates on columns without names. DataFrame construction takes either a mapping from string-valued names to columns, or a sequence of pairs of names and columns.

This removes some duplicate code in the NamedColumn class (by removing it) where we had to fight the inheritance hierarchy.

- Closes #16272

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Matthew Murray (https://github.com/Matt711)

URL: https://github.com/rapidsai/cudf/pull/16962
---
 .../cudf_polars/containers/__init__.py        |   4 +-
 .../cudf_polars/containers/column.py          | 110 ++++++----------
 .../cudf_polars/containers/dataframe.py       | 111 ++++++++---------
 python/cudf_polars/cudf_polars/dsl/expr.py    |  19 ++-
 python/cudf_polars/cudf_polars/dsl/ir.py      | 117 +++++++++---------
 python/cudf_polars/docs/overview.md           |  18 +--
 .../tests/containers/test_column.py           |   9 +-
 .../tests/containers/test_dataframe.py        |  39 +++---
 .../tests/expressions/test_sort.py            |   2 +-
 .../cudf_polars/tests/utils/test_broadcast.py |  20 +--
 10 files changed, 209 insertions(+), 240 deletions(-)

diff --git a/python/cudf_polars/cudf_polars/containers/__init__.py b/python/cudf_polars/cudf_polars/containers/__init__.py
index 06bb08953f1..3b1eff4a0d0 100644
--- a/python/cudf_polars/cudf_polars/containers/__init__.py
+++ b/python/cudf_polars/cudf_polars/containers/__init__.py
@@ -5,7 +5,7 @@
 
 from __future__ import annotations
 
-__all__: list[str] = ["DataFrame", "Column", "NamedColumn"]
+__all__: list[str] = ["DataFrame", "Column"]
 
-from cudf_polars.containers.column import Column, NamedColumn
+from cudf_polars.containers.column import Column
 from cudf_polars.containers.dataframe import DataFrame
diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 3fe3e5557cb..00186098e54 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -15,7 +15,7 @@
 
     import polars as pl
 
-__all__: list[str] = ["Column", "NamedColumn"]
+__all__: list[str] = ["Column"]
 
 
 class Column:
@@ -26,6 +26,9 @@ class Column:
     order: plc.types.Order
     null_order: plc.types.NullOrder
     is_scalar: bool
+    # Optional name, only ever set by evaluation of NamedExpr nodes
+    # The internal evaluation should not care about the name.
+    name: str | None
 
     def __init__(
         self,
@@ -34,14 +37,12 @@ def __init__(
         is_sorted: plc.types.Sorted = plc.types.Sorted.NO,
         order: plc.types.Order = plc.types.Order.ASCENDING,
         null_order: plc.types.NullOrder = plc.types.NullOrder.BEFORE,
+        name: str | None = None,
     ):
         self.obj = column
         self.is_scalar = self.obj.size() == 1
-        if self.obj.size() <= 1:
-            is_sorted = plc.types.Sorted.YES
-        self.is_sorted = is_sorted
-        self.order = order
-        self.null_order = null_order
+        self.name = name
+        self.set_sorted(is_sorted=is_sorted, order=order, null_order=null_order)
 
     @functools.cached_property
     def obj_scalar(self) -> plc.Scalar:
@@ -63,9 +64,26 @@ def obj_scalar(self) -> plc.Scalar:
             )
         return plc.copying.get_element(self.obj, 0)
 
+    def rename(self, name: str | None, /) -> Self:
+        """
+        Return a shallow copy with a new name.
+
+        Parameters
+        ----------
+        name
+            New name
+
+        Returns
+        -------
+        Shallow copy of self with new name set.
+        """
+        new = self.copy()
+        new.name = name
+        return new
+
     def sorted_like(self, like: Column, /) -> Self:
         """
-        Copy sortedness properties from a column onto self.
+        Return a shallow copy with sortedness from like.
 
         Parameters
         ----------
@@ -74,20 +92,23 @@ def sorted_like(self, like: Column, /) -> Self:
 
         Returns
         -------
-        Self with metadata set.
+        Shallow copy of self with metadata set.
 
         See Also
         --------
         set_sorted, copy_metadata
         """
-        return self.set_sorted(
-            is_sorted=like.is_sorted, order=like.order, null_order=like.null_order
+        return type(self)(
+            self.obj,
+            name=self.name,
+            is_sorted=like.is_sorted,
+            order=like.order,
+            null_order=like.null_order,
         )
 
-    # TODO: Return Column once #16272 is fixed.
-    def astype(self, dtype: plc.DataType) -> plc.Column:
+    def astype(self, dtype: plc.DataType) -> Column:
         """
-        Return the backing column as the requested dtype.
+        Cast the column to as the requested dtype.
 
         Parameters
         ----------
@@ -109,8 +130,10 @@ def astype(self, dtype: plc.DataType) -> plc.Column:
         the current one.
         """
         if self.obj.type() != dtype:
-            return plc.unary.cast(self.obj, dtype)
-        return self.obj
+            return Column(plc.unary.cast(self.obj, dtype), name=self.name).sorted_like(
+                self
+            )
+        return self
 
     def copy_metadata(self, from_: pl.Series, /) -> Self:
         """
@@ -129,6 +152,7 @@ def copy_metadata(self, from_: pl.Series, /) -> Self:
         --------
         set_sorted, sorted_like
         """
+        self.name = from_.name
         if len(from_) <= 1:
             return self
         ascending = from_.flags["SORTED_ASC"]
@@ -192,6 +216,7 @@ def copy(self) -> Self:
             is_sorted=self.is_sorted,
             order=self.order,
             null_order=self.null_order,
+            name=self.name,
         )
 
     def mask_nans(self) -> Self:
@@ -217,58 +242,3 @@ def nan_count(self) -> int:
                 )
             ).as_py()
         return 0
-
-
-class NamedColumn(Column):
-    """A column with a name."""
-
-    name: str
-
-    def __init__(
-        self,
-        column: plc.Column,
-        name: str,
-        *,
-        is_sorted: plc.types.Sorted = plc.types.Sorted.NO,
-        order: plc.types.Order = plc.types.Order.ASCENDING,
-        null_order: plc.types.NullOrder = plc.types.NullOrder.BEFORE,
-    ) -> None:
-        super().__init__(
-            column, is_sorted=is_sorted, order=order, null_order=null_order
-        )
-        self.name = name
-
-    def copy(self, *, new_name: str | None = None) -> Self:
-        """
-        A shallow copy of the column.
-
-        Parameters
-        ----------
-        new_name
-            Optional new name for the copied column.
-
-        Returns
-        -------
-        New column sharing data with self.
-        """
-        return type(self)(
-            self.obj,
-            self.name if new_name is None else new_name,
-            is_sorted=self.is_sorted,
-            order=self.order,
-            null_order=self.null_order,
-        )
-
-    def mask_nans(self) -> Self:
-        """Return a shallow copy of self with nans masked out."""
-        # Annoying, the inheritance is not right (can't call the
-        # super-type mask_nans), but will sort that by refactoring
-        # later.
-        if plc.traits.is_floating_point(self.obj.type()):
-            old_count = self.obj.null_count()
-            mask, new_count = plc.transform.nans_to_nulls(self.obj)
-            result = type(self)(self.obj.with_mask(mask, new_count), self.name)
-            if old_count == new_count:
-                return result.sorted_like(self)
-            return result
-        return self.copy()
diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index f3e3862d0cc..2c195f6637c 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -5,43 +5,50 @@
 
 from __future__ import annotations
 
-import itertools
 from functools import cached_property
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 import pyarrow as pa
 import pylibcudf as plc
 
 import polars as pl
 
-from cudf_polars.containers.column import NamedColumn
+from cudf_polars.containers import Column
 from cudf_polars.utils import dtypes
 
 if TYPE_CHECKING:
-    from collections.abc import Mapping, Sequence, Set
+    from collections.abc import Iterable, Mapping, Sequence, Set
 
     from typing_extensions import Self
 
-    from cudf_polars.containers import Column
-
 
 __all__: list[str] = ["DataFrame"]
 
 
+# Pacify the type checker. DataFrame init asserts that all the columns
+# have a string name, so let's narrow the type.
+class NamedColumn(Column):
+    name: str
+
+
 class DataFrame:
     """A representation of a dataframe."""
 
-    columns: list[NamedColumn]
+    column_map: dict[str, Column]
     table: plc.Table
+    columns: list[NamedColumn]
 
-    def __init__(self, columns: Sequence[NamedColumn]) -> None:
-        self.columns = list(columns)
-        self._column_map = {c.name: c for c in self.columns}
-        self.table = plc.Table([c.obj for c in columns])
+    def __init__(self, columns: Iterable[Column]) -> None:
+        columns = list(columns)
+        if any(c.name is None for c in columns):
+            raise ValueError("All columns must have a name")
+        self.columns = [cast(NamedColumn, c) for c in columns]
+        self.column_map = {c.name: c for c in self.columns}
+        self.table = plc.Table([c.obj for c in self.columns])
 
     def copy(self) -> Self:
         """Return a shallow copy of self."""
-        return type(self)([c.copy() for c in self.columns])
+        return type(self)(c.copy() for c in self.columns)
 
     def to_polars(self) -> pl.DataFrame:
         """Convert to a polars DataFrame."""
@@ -51,42 +58,38 @@ def to_polars(self) -> pl.DataFrame:
         # https://github.com/pola-rs/polars/issues/11632
         # To guarantee we produce correct names, we therefore
         # serialise with names we control and rename with that map.
-        name_map = {f"column_{i}": c.name for i, c in enumerate(self.columns)}
+        name_map = {f"column_{i}": name for i, name in enumerate(self.column_map)}
         table: pa.Table = plc.interop.to_arrow(
             self.table,
             [plc.interop.ColumnMetadata(name=name) for name in name_map],
         )
         df: pl.DataFrame = pl.from_arrow(table)
         return df.rename(name_map).with_columns(
-            *(
-                pl.col(c.name).set_sorted(
-                    descending=c.order == plc.types.Order.DESCENDING
-                )
-                if c.is_sorted
-                else pl.col(c.name)
-                for c in self.columns
-            )
+            pl.col(c.name).set_sorted(descending=c.order == plc.types.Order.DESCENDING)
+            if c.is_sorted
+            else pl.col(c.name)
+            for c in self.columns
         )
 
     @cached_property
     def column_names_set(self) -> frozenset[str]:
         """Return the column names as a set."""
-        return frozenset(c.name for c in self.columns)
+        return frozenset(self.column_map)
 
     @cached_property
     def column_names(self) -> list[str]:
         """Return a list of the column names."""
-        return [c.name for c in self.columns]
+        return list(self.column_map)
 
     @cached_property
     def num_columns(self) -> int:
         """Number of columns."""
-        return len(self.columns)
+        return len(self.column_map)
 
     @cached_property
     def num_rows(self) -> int:
         """Number of rows."""
-        return 0 if len(self.columns) == 0 else self.table.num_rows()
+        return self.table.num_rows() if self.column_map else 0
 
     @classmethod
     def from_polars(cls, df: pl.DataFrame) -> Self:
@@ -111,12 +114,8 @@ def from_polars(cls, df: pl.DataFrame) -> Self:
         # No-op if the schema is unchanged.
         d_table = plc.interop.from_arrow(table.cast(schema))
         return cls(
-            [
-                NamedColumn(column, h_col.name).copy_metadata(h_col)
-                for column, h_col in zip(
-                    d_table.columns(), df.iter_columns(), strict=True
-                )
-            ]
+            Column(column).copy_metadata(h_col)
+            for column, h_col in zip(d_table.columns(), df.iter_columns(), strict=True)
         )
 
     @classmethod
@@ -144,17 +143,14 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self:
         if table.num_columns() != len(names):
             raise ValueError("Mismatching name and table length.")
         return cls(
-            [
-                NamedColumn(c, name)
-                for c, name in zip(table.columns(), names, strict=True)
-            ]
+            Column(c, name=name) for c, name in zip(table.columns(), names, strict=True)
         )
 
     def sorted_like(
         self, like: DataFrame, /, *, subset: Set[str] | None = None
     ) -> Self:
         """
-        Copy sortedness from a dataframe onto self.
+        Return a shallow copy with sortedness copied from like.
 
         Parameters
         ----------
@@ -165,7 +161,7 @@ def sorted_like(
 
         Returns
         -------
-        Self with metadata set.
+        Shallow copy of self with metadata set.
 
         Raises
         ------
@@ -175,13 +171,12 @@ def sorted_like(
         if like.column_names != self.column_names:
             raise ValueError("Can only copy from identically named frame")
         subset = self.column_names_set if subset is None else subset
-        self.columns = [
+        return type(self)(
             c.sorted_like(other) if c.name in subset else c
             for c, other in zip(self.columns, like.columns, strict=True)
-        ]
-        return self
+        )
 
-    def with_columns(self, columns: Sequence[NamedColumn]) -> Self:
+    def with_columns(self, columns: Iterable[Column], *, replace_only=False) -> Self:
         """
         Return a new dataframe with extra columns.
 
@@ -189,6 +184,8 @@ def with_columns(self, columns: Sequence[NamedColumn]) -> Self:
         ----------
         columns
             Columns to add
+        replace_only
+            If true, then only replacements are allowed (matching by name).
 
         Returns
         -------
@@ -196,36 +193,30 @@ def with_columns(self, columns: Sequence[NamedColumn]) -> Self:
 
         Notes
         -----
-        If column names overlap, newer names replace older ones.
+        If column names overlap, newer names replace older ones, and
+        appear in the same order as the original frame.
         """
-        columns = list(
-            {c.name: c for c in itertools.chain(self.columns, columns)}.values()
-        )
-        return type(self)(columns)
+        new = {c.name: c for c in columns}
+        if replace_only and not self.column_names_set.issuperset(new.keys()):
+            raise ValueError("Cannot replace with non-existing names")
+        return type(self)((self.column_map | new).values())
 
     def discard_columns(self, names: Set[str]) -> Self:
         """Drop columns by name."""
-        return type(self)([c for c in self.columns if c.name not in names])
+        return type(self)(column for column in self.columns if column.name not in names)
 
     def select(self, names: Sequence[str]) -> Self:
         """Select columns by name returning DataFrame."""
-        want = set(names)
-        if not want.issubset(self.column_names_set):
-            raise ValueError("Can't select missing names")
-        return type(self)([self._column_map[name] for name in names])
-
-    def replace_columns(self, *columns: NamedColumn) -> Self:
-        """Return a new dataframe with columns replaced by name."""
-        new = {c.name: c for c in columns}
-        if not set(new).issubset(self.column_names_set):
-            raise ValueError("Cannot replace with non-existing names")
-        return type(self)([new.get(c.name, c) for c in self.columns])
+        try:
+            return type(self)(self.column_map[name] for name in names)
+        except KeyError as e:
+            raise ValueError("Can't select missing names") from e
 
     def rename_columns(self, mapping: Mapping[str, str]) -> Self:
         """Rename some columns."""
-        return type(self)([c.copy(new_name=mapping.get(c.name)) for c in self.columns])
+        return type(self)(c.rename(mapping.get(c.name, c.name)) for c in self.columns)
 
-    def select_columns(self, names: Set[str]) -> list[NamedColumn]:
+    def select_columns(self, names: Set[str]) -> list[Column]:
         """Select columns by name."""
         return [c for c in self.columns if c.name in names]
 
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index a418560b31c..f7775ceb238 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -27,7 +27,7 @@
 from polars.exceptions import InvalidOperationError
 from polars.polars import _expr_nodes as pl_expr
 
-from cudf_polars.containers import Column, NamedColumn
+from cudf_polars.containers import Column
 from cudf_polars.utils import dtypes, sorting
 
 if TYPE_CHECKING:
@@ -313,7 +313,7 @@ def evaluate(
         *,
         context: ExecutionContext = ExecutionContext.FRAME,
         mapping: Mapping[Expr, Column] | None = None,
-    ) -> NamedColumn:
+    ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
 
@@ -328,20 +328,15 @@ def evaluate(
 
         Returns
         -------
-        NamedColumn attaching a name to an evaluated Column
+        Evaluated Column with name attached.
 
         See Also
         --------
         :meth:`Expr.evaluate` for details, this function just adds the
         name to a column produced from an expression.
         """
-        obj = self.value.evaluate(df, context=context, mapping=mapping)
-        return NamedColumn(
-            obj.obj,
-            self.name,
-            is_sorted=obj.is_sorted,
-            order=obj.order,
-            null_order=obj.null_order,
+        return self.value.evaluate(df, context=context, mapping=mapping).rename(
+            self.name
         )
 
     def collect_agg(self, *, depth: int) -> AggInfo:
@@ -428,7 +423,9 @@ def do_evaluate(
         mapping: Mapping[Expr, Column] | None = None,
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        return df._column_map[self.name]
+        # Deliberately remove the name here so that we guarantee
+        # evaluation of the IR produces names.
+        return df.column_map[self.name].rename(None)
 
     def collect_agg(self, *, depth: int) -> AggInfo:
         """Collect information about aggregations in groupbys."""
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 1c61075be22..e319c363a23 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -26,7 +26,7 @@
 import polars as pl
 
 import cudf_polars.dsl.expr as expr
-from cudf_polars.containers import DataFrame, NamedColumn
+from cudf_polars.containers import Column, DataFrame
 from cudf_polars.utils import dtypes, sorting
 
 if TYPE_CHECKING:
@@ -57,9 +57,7 @@
 ]
 
 
-def broadcast(
-    *columns: NamedColumn, target_length: int | None = None
-) -> list[NamedColumn]:
+def broadcast(*columns: Column, target_length: int | None = None) -> list[Column]:
     """
     Broadcast a sequence of columns to a common length.
 
@@ -112,12 +110,12 @@ def broadcast(
     return [
         column
         if column.obj.size() != 1
-        else NamedColumn(
+        else Column(
             plc.Column.from_scalar(column.obj_scalar, nrows),
-            column.name,
             is_sorted=plc.types.Sorted.YES,
             order=plc.types.Order.ASCENDING,
             null_order=plc.types.NullOrder.BEFORE,
+            name=column.name,
         )
         for column in columns
     ]
@@ -385,15 +383,17 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             init = plc.interop.from_arrow(
                 pa.scalar(offset, type=plc.interop.to_arrow(dtype))
             )
-            index = NamedColumn(
+            index = Column(
                 plc.filling.sequence(df.num_rows, init, step),
-                name,
                 is_sorted=plc.types.Sorted.YES,
                 order=plc.types.Order.ASCENDING,
                 null_order=plc.types.NullOrder.AFTER,
+                name=name,
             )
             df = DataFrame([index, *df.columns])
-        assert all(c.obj.type() == self.schema[c.name] for c in df.columns)
+        assert all(
+            c.obj.type() == self.schema[name] for name, c in df.column_map.items()
+        )
         if self.predicate is None:
             return df
         else:
@@ -588,15 +588,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 requests.append(plc.groupby.GroupByRequest(col, [req]))
                 replacements.append(rep)
         group_keys, raw_tables = grouper.aggregate(requests)
-        # TODO: names
-        raw_columns: list[NamedColumn] = []
+        raw_columns: list[Column] = []
         for i, table in enumerate(raw_tables):
             (column,) = table.columns()
-            raw_columns.append(NamedColumn(column, f"tmp{i}"))
+            raw_columns.append(Column(column, name=f"tmp{i}"))
         mapping = dict(zip(replacements, raw_columns, strict=True))
         result_keys = [
-            NamedColumn(gk, k.name)
-            for gk, k in zip(group_keys.columns(), keys, strict=True)
+            Column(grouped_key, name=key.name)
+            for key, grouped_key in zip(keys, group_keys.columns(), strict=True)
         ]
         result_subs = DataFrame(raw_columns)
         results = [
@@ -639,8 +638,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.copying.OutOfBoundsPolicy.DONT_CHECK,
             )
             broadcasted = [
-                NamedColumn(reordered, b.name)
-                for reordered, b in zip(
+                Column(reordered, name=old.name)
+                for reordered, old in zip(
                     ordered_table.columns(), broadcasted, strict=True
                 )
             ]
@@ -787,20 +786,20 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             # result, not the gather maps
             columns = plc.join.cross_join(left.table, right.table).columns()
             left_cols = [
-                NamedColumn(new, old.name).sorted_like(old)
+                Column(new, name=old.name).sorted_like(old)
                 for new, old in zip(
                     columns[: left.num_columns], left.columns, strict=True
                 )
             ]
             right_cols = [
-                NamedColumn(
+                Column(
                     new,
-                    old.name
-                    if old.name not in left.column_names_set
-                    else f"{old.name}{suffix}",
+                    name=name
+                    if name not in left.column_names_set
+                    else f"{name}{suffix}",
                 )
-                for new, old in zip(
-                    columns[left.num_columns :], right.columns, strict=True
+                for new, name in zip(
+                    columns[left.num_columns :], right.column_names, strict=True
                 )
             ]
             return DataFrame([*left_cols, *right_cols])
@@ -838,18 +837,19 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.copying.gather(right.table, rg, right_policy), right.column_names
             )
             if coalesce and how != "inner":
-                left = left.replace_columns(
-                    *(
-                        NamedColumn(
+                left = left.with_columns(
+                    (
+                        Column(
                             plc.replace.replace_nulls(left_col.obj, right_col.obj),
-                            left_col.name,
+                            name=left_col.name,
                         )
                         for left_col, right_col in zip(
                             left.select_columns(left_on.column_names_set),
                             right.select_columns(right_on.column_names_set),
                             strict=True,
                         )
-                    )
+                    ),
+                    replace_only=True,
                 )
                 right = right.discard_columns(right_on.column_names_set)
             if how == "right":
@@ -931,9 +931,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         df = self.df.evaluate(cache=cache)
         if self.subset is None:
             indices = list(range(df.num_columns))
+            keys_sorted = all(c.is_sorted for c in df.column_map.values())
         else:
             indices = [i for i, k in enumerate(df.column_names) if k in self.subset]
-        keys_sorted = all(df.columns[i].is_sorted for i in indices)
+            keys_sorted = all(df.column_map[name].is_sorted for name in self.subset)
         if keys_sorted:
             table = plc.stream_compaction.unique(
                 df.table,
@@ -954,10 +955,11 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.types.NullEquality.EQUAL,
                 plc.types.NanEquality.ALL_EQUAL,
             )
+        # TODO: Is this sortedness setting correct
         result = DataFrame(
             [
-                NamedColumn(c, old.name).sorted_like(old)
-                for c, old in zip(table.columns(), df.columns, strict=True)
+                Column(new, name=old.name).sorted_like(old)
+                for new, old in zip(table.columns(), df.columns, strict=True)
             ]
         )
         if keys_sorted or self.stable:
@@ -1008,30 +1010,30 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         sort_keys = broadcast(
             *(k.evaluate(df) for k in self.by), target_length=df.num_rows
         )
-        names = {c.name: i for i, c in enumerate(df.columns)}
         # TODO: More robust identification here.
-        keys_in_result = [
-            i
-            for k in sort_keys
-            if (i := names.get(k.name)) is not None and k.obj is df.columns[i].obj
-        ]
+        keys_in_result = {
+            k.name: i
+            for i, k in enumerate(sort_keys)
+            if k.name in df.column_map and k.obj is df.column_map[k.name].obj
+        }
         table = self.do_sort(
             df.table,
             plc.Table([k.obj for k in sort_keys]),
             self.order,
             self.null_order,
         )
-        columns = [
-            NamedColumn(c, old.name)
-            for c, old in zip(table.columns(), df.columns, strict=True)
-        ]
-        # If a sort key is in the result table, set the sortedness property
-        for k, i in enumerate(keys_in_result):
-            columns[i] = columns[i].set_sorted(
-                is_sorted=plc.types.Sorted.YES,
-                order=self.order[k],
-                null_order=self.null_order[k],
-            )
+        columns: list[Column] = []
+        for name, c in zip(df.column_map, table.columns(), strict=True):
+            column = Column(c, name=name)
+            # If a sort key is in the result table, set the sortedness property
+            if name in keys_in_result:
+                i = keys_in_result[name]
+                column = column.set_sorted(
+                    is_sorted=plc.types.Sorted.YES,
+                    order=self.order[i],
+                    null_order=self.null_order[i],
+                )
+            columns.append(column)
         return DataFrame(columns).slice(self.zlice)
 
 
@@ -1080,7 +1082,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         df = self.df.evaluate(cache=cache)
         # This can reorder things.
         columns = broadcast(
-            *df.select(list(self.schema.keys())).columns, target_length=df.num_rows
+            *(df.column_map[name] for name in self.schema), target_length=df.num_rows
         )
         return DataFrame(columns)
 
@@ -1125,7 +1127,7 @@ def __post_init__(self) -> None:
             old, new, _ = self.options
             # TODO: perhaps polars should validate renaming in the IR?
             if len(new) != len(set(new)) or (
-                set(new) & (set(self.df.schema.keys() - set(old)))
+                set(new) & (set(self.df.schema.keys()) - set(old))
             ):
                 raise NotImplementedError("Duplicate new names in rename.")
         elif self.name == "unpivot":
@@ -1170,7 +1172,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             npiv = len(pivotees)
             df = self.df.evaluate(cache=cache)
             index_columns = [
-                NamedColumn(col, name)
+                Column(col, name=name)
                 for col, name in zip(
                     plc.reshape.tile(df.select(indices).table, npiv).columns(),
                     indices,
@@ -1191,13 +1193,16 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 df.num_rows,
             ).columns()
             value_column = plc.concatenate.concatenate(
-                [c.astype(self.schema[value_name]) for c in df.select(pivotees).columns]
+                [
+                    df.column_map[pivotee].astype(self.schema[value_name]).obj
+                    for pivotee in pivotees
+                ]
             )
             return DataFrame(
                 [
                     *index_columns,
-                    NamedColumn(variable_column, variable_name),
-                    NamedColumn(value_column, value_name),
+                    Column(variable_column, name=variable_name),
+                    Column(value_column, name=value_name),
                 ]
             )
         else:
@@ -1278,6 +1283,4 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             )
             for df in dfs
         ]
-        return DataFrame(
-            list(itertools.chain.from_iterable(df.columns for df in dfs)),
-        )
+        return DataFrame(itertools.chain.from_iterable(df.columns for df in dfs))
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index bff44af1468..7837a275f20 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -201,21 +201,21 @@ the logical plan in any case, so is reasonably natural.
 # Containers
 
 Containers should be constructed as relatively lightweight objects
-around their pylibcudf counterparts. We have four (in
+around their pylibcudf counterparts. We have three (in
 `cudf_polars/containers/`):
 
 1. `Scalar` (a wrapper around a pylibcudf `Scalar`)
 2. `Column` (a wrapper around a pylibcudf `Column`)
-3. `NamedColumn` (a `Column` with an additional name)
-4. `DataFrame` (a wrapper around a pylibcudf `Table`)
+3. `DataFrame` (a wrapper around a pylibcudf `Table`)
 
 The interfaces offered by these are somewhat in flux, but broadly
-speaking, a `DataFrame` is just a list of `NamedColumn`s which each
-hold a `Column` plus a string `name`. `NamedColumn`s are only ever
-constructed via `NamedExpr`s, which are the top-level expression node
-that lives inside an `IR` node. This means that the expression
-evaluator never has to concern itself with column names: columns are
-only ever decorated with names when constructing a `DataFrame`.
+speaking, a `DataFrame` is just a mapping from string `name`s to
+`Column`s, and thus also holds a pylibcudf `Table`. Names are only
+attached to `Column`s and hence inserted into `DataFrames` via
+`NamedExpr`s, which are the top-level expression nodes that live
+inside an `IR` node. This means that the expression evaluator never
+has to concern itself with column names: columns are only ever
+decorated with names when constructing a `DataFrame`.
 
 The columns keep track of metadata (for example, whether or not they
 are sorted). We could imagine tracking more metadata, like minimum and
diff --git a/python/cudf_polars/tests/containers/test_column.py b/python/cudf_polars/tests/containers/test_column.py
index 19919877f84..1f26ab1af9f 100644
--- a/python/cudf_polars/tests/containers/test_column.py
+++ b/python/cudf_polars/tests/containers/test_column.py
@@ -3,13 +3,11 @@
 
 from __future__ import annotations
 
-from functools import partial
-
 import pyarrow
 import pylibcudf as plc
 import pytest
 
-from cudf_polars.containers import Column, NamedColumn
+from cudf_polars.containers import Column
 
 
 def test_non_scalar_access_raises():
@@ -55,11 +53,10 @@ def test_shallow_copy():
 
 
 @pytest.mark.parametrize("typeid", [plc.TypeId.INT8, plc.TypeId.FLOAT32])
-@pytest.mark.parametrize("constructor", [Column, partial(NamedColumn, name="name")])
-def test_mask_nans(typeid, constructor):
+def test_mask_nans(typeid):
     dtype = plc.DataType(typeid)
     values = pyarrow.array([0, 0, 0], type=plc.interop.to_arrow(dtype))
-    column = constructor(plc.interop.from_arrow(values))
+    column = Column(plc.interop.from_arrow(values))
     masked = column.mask_nans()
     assert column.obj.null_count() == masked.obj.null_count()
 
diff --git a/python/cudf_polars/tests/containers/test_dataframe.py b/python/cudf_polars/tests/containers/test_dataframe.py
index 39fb44d55a5..5c68fb8f0aa 100644
--- a/python/cudf_polars/tests/containers/test_dataframe.py
+++ b/python/cudf_polars/tests/containers/test_dataframe.py
@@ -8,18 +8,18 @@
 
 import polars as pl
 
-from cudf_polars.containers import DataFrame, NamedColumn
+from cudf_polars.containers import Column, DataFrame
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 
 
 def test_select_missing_raises():
     df = DataFrame(
         [
-            NamedColumn(
+            Column(
                 plc.column_factories.make_numeric_column(
                     plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
                 ),
-                "a",
+                name="a",
             )
         ]
     )
@@ -30,17 +30,17 @@ def test_select_missing_raises():
 def test_replace_missing_raises():
     df = DataFrame(
         [
-            NamedColumn(
+            Column(
                 plc.column_factories.make_numeric_column(
                     plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
                 ),
-                "a",
+                name="a",
             )
         ]
     )
-    replacement = df.columns[0].copy(new_name="b")
+    replacement = df.column_map["a"].copy().rename("b")
     with pytest.raises(ValueError):
-        df.replace_columns(replacement)
+        df.with_columns([replacement], replace_only=True)
 
 
 def test_from_table_wrong_names():
@@ -55,14 +55,23 @@ def test_from_table_wrong_names():
         DataFrame.from_table(table, ["a", "b"])
 
 
+def test_unnamed_column_raise():
+    payload = plc.column_factories.make_numeric_column(
+        plc.DataType(plc.TypeId.INT8), 0, plc.MaskState.ALL_VALID
+    )
+
+    with pytest.raises(ValueError):
+        DataFrame([Column(payload, name="a"), Column(payload)])
+
+
 def test_sorted_like_raises_mismatching_names():
     df = DataFrame(
         [
-            NamedColumn(
+            Column(
                 plc.column_factories.make_numeric_column(
                     plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
                 ),
-                "a",
+                name="a",
             )
         ]
     )
@@ -72,11 +81,11 @@ def test_sorted_like_raises_mismatching_names():
 
 
 def test_shallow_copy():
-    column = NamedColumn(
+    column = Column(
         plc.column_factories.make_numeric_column(
             plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
         ),
-        "a",
+        name="a",
     )
     column.set_sorted(
         is_sorted=plc.types.Sorted.YES,
@@ -85,13 +94,13 @@ def test_shallow_copy():
     )
     df = DataFrame([column])
     copy = df.copy()
-    copy.columns[0].set_sorted(
+    copy.column_map["a"].set_sorted(
         is_sorted=plc.types.Sorted.NO,
         order=plc.types.Order.ASCENDING,
         null_order=plc.types.NullOrder.AFTER,
     )
-    assert df.columns[0].is_sorted == plc.types.Sorted.YES
-    assert copy.columns[0].is_sorted == plc.types.Sorted.NO
+    assert df.column_map["a"].is_sorted == plc.types.Sorted.YES
+    assert copy.column_map["a"].is_sorted == plc.types.Sorted.NO
 
 
 def test_sorted_flags_preserved_empty():
@@ -100,7 +109,7 @@ def test_sorted_flags_preserved_empty():
 
     gf = DataFrame.from_polars(df)
 
-    (a,) = gf.columns
+    a = gf.column_map["a"]
 
     assert a.is_sorted == plc.types.Sorted.YES
 
diff --git a/python/cudf_polars/tests/expressions/test_sort.py b/python/cudf_polars/tests/expressions/test_sort.py
index 76c7648813a..2a37683478b 100644
--- a/python/cudf_polars/tests/expressions/test_sort.py
+++ b/python/cudf_polars/tests/expressions/test_sort.py
@@ -69,7 +69,7 @@ def test_setsorted(descending, nulls_last, with_nulls):
 
     df = translate_ir(q._ldf.visit()).evaluate(cache={})
 
-    (a,) = df.columns
+    a = df.column_map["a"]
 
     assert a.is_sorted == plc.types.Sorted.YES
     null_order = (
diff --git a/python/cudf_polars/tests/utils/test_broadcast.py b/python/cudf_polars/tests/utils/test_broadcast.py
index 35aaef44e1f..e7770bfadac 100644
--- a/python/cudf_polars/tests/utils/test_broadcast.py
+++ b/python/cudf_polars/tests/utils/test_broadcast.py
@@ -6,34 +6,35 @@
 import pylibcudf as plc
 import pytest
 
-from cudf_polars.containers import NamedColumn
+from cudf_polars.containers import Column
 from cudf_polars.dsl.ir import broadcast
 
 
 @pytest.mark.parametrize("target", [4, None])
 def test_broadcast_all_scalar(target):
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8), 1, plc.MaskState.ALL_VALID
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
     result = broadcast(*columns, target_length=target)
     expected = 1 if target is None else target
 
+    assert [c.name for c in result] == [f"col{i}" for i in range(3)]
     assert all(column.obj.size() == expected for column in result)
 
 
 def test_invalid_target_length():
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8), 4, plc.MaskState.ALL_VALID
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
@@ -43,11 +44,11 @@ def test_invalid_target_length():
 
 def test_broadcast_mismatching_column_lengths():
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8), i + 1, plc.MaskState.ALL_VALID
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
@@ -58,16 +59,17 @@ def test_broadcast_mismatching_column_lengths():
 @pytest.mark.parametrize("nrows", [0, 5])
 def test_broadcast_with_scalars(nrows):
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8),
                 nrows if i == 0 else 1,
                 plc.MaskState.ALL_VALID,
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
 
     result = broadcast(*columns)
+    assert [c.name for c in result] == [f"col{i}" for i in range(3)]
     assert all(column.obj.size() == nrows for column in result)

From bcf9425a8fc8bfe4a08840749a16cf83e1bc89e8 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 8 Oct 2024 14:54:04 +0100
Subject: [PATCH 19/33] Compute whole column variance using numerically stable
 approach (#16448)

We use the pairwise approach of Chan, Golub, and LeVeque (1983).

- Closes #16444

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: https://github.com/rapidsai/cudf/pull/16448
---
 .../reduction/detail/reduction_operators.cuh  |  48 ++++---
 cpp/src/reductions/compound.cuh               |  26 ++--
 cpp/tests/reductions/reduction_tests.cpp      | 131 ++++++++----------
 .../java/ai/rapids/cudf/ReductionTest.java    |   4 +-
 .../cudf/cudf/core/column/numerical_base.py   |  10 +-
 python/cudf/cudf/core/series.py               |   2 +-
 python/cudf/cudf/tests/test_datetime.py       |  18 +--
 7 files changed, 116 insertions(+), 123 deletions(-)

diff --git a/cpp/include/cudf/reduction/detail/reduction_operators.cuh b/cpp/include/cudf/reduction/detail/reduction_operators.cuh
index 4cf8564ab3a..5694362af8f 100644
--- a/cpp/include/cudf/reduction/detail/reduction_operators.cuh
+++ b/cpp/include/cudf/reduction/detail/reduction_operators.cuh
@@ -31,17 +31,41 @@ namespace detail {
 // intermediate data structure to compute `var`, `std`
 template <typename ResultType>
 struct var_std {
-  ResultType value;          /// the value
-  ResultType value_squared;  /// the value of squared
-
-  CUDF_HOST_DEVICE inline var_std(ResultType _value = 0, ResultType _value_squared = 0)
-    : value(_value), value_squared(_value_squared){};
+  // Uses the pairwise approach of Chan, Golub, and LeVeque,
+  // _Algorithms for computing the sample variance: analysis and
+  // recommendations_ (1983)
+  // https://doi.org/10.1080/00031305.1983.10483115
+  // Also http://www.cs.yale.edu/publications/techreports/tr222.pdf
+  // This is a modification of Youngs and Cramer's online approach.
+  ResultType running_sum;
+  ResultType running_square_deviations;
+  size_type count;
+
+  CUDF_HOST_DEVICE inline var_std(ResultType t = 0, ResultType s = 0, size_type n = 0)
+    : running_sum(t), running_square_deviations(s), count(n){};
 
   using this_t = var_std<ResultType>;
 
   CUDF_HOST_DEVICE inline this_t operator+(this_t const& rhs) const
   {
-    return this_t((this->value + rhs.value), (this->value_squared + rhs.value_squared));
+    // Updates as per equations 1.5a and 1.5b in the paper
+    // T_{1,m+n} = T_{1,m} + T_{m+1,n+1}
+    // S_{1,m+n} = S_{1,m} + S_{m+1,n+1} + m/(n(m+n)) * (n/m T_{1,m} - T_{m+1,n+1})**2
+    // Here the first m samples are in this, the remaining n samples are in rhs.
+    auto const m = this->count;
+    auto const n = rhs.count;
+    // Avoid division by zero.
+    if (m == 0) { return rhs; }
+    if (n == 0) { return *this; }
+    auto const tm   = this->running_sum;
+    auto const tn   = rhs.running_sum;
+    auto const sm   = this->running_square_deviations;
+    auto const sn   = rhs.running_square_deviations;
+    auto const tmn  = tm + tn;
+    auto const diff = ((static_cast<ResultType>(n) / m) * tm) - tn;
+    // Computing m/n(m+n) as m/n/(m+n) to avoid integer overflow
+    auto const smn = sm + sn + ((static_cast<ResultType>(m) / n) / (m + n)) * diff * diff;
+    return {tmn, smn, m + n};
   };
 };
 
@@ -50,10 +74,7 @@ template <typename ResultType>
 struct transformer_var_std {
   using OutputType = var_std<ResultType>;
 
-  CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value)
-  {
-    return OutputType(value, value * value);
-  };
+  CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value) { return {value, 0, 1}; };
 };
 
 // ------------------------------------------------------------------------
@@ -257,12 +278,7 @@ struct variance : public compound_op<variance> {
                                                              cudf::size_type const& count,
                                                              cudf::size_type const& ddof)
     {
-      ResultType mean     = input.value / count;
-      ResultType asum     = input.value_squared;
-      cudf::size_type div = count - ddof;
-      ResultType var      = asum / div - ((mean * mean) * count) / div;
-
-      return var;
+      return input.running_square_deviations / (count - ddof);
     };
   };
 };
diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh
index 6bc8b48832f..cd9fade164a 100644
--- a/cpp/src/reductions/compound.cuh
+++ b/cpp/src/reductions/compound.cuh
@@ -18,13 +18,18 @@
 
 #include <cudf/dictionary/detail/iterator.cuh>
 #include <cudf/reduction/detail/reduction.cuh>
+#include <cudf/reduction/detail/reduction_operators.cuh>
 #include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
+#include <stdexcept>
+#include <type_traits>
+
 namespace cudf {
 namespace reduction {
 namespace compound {
@@ -53,9 +58,17 @@ std::unique_ptr<scalar> compound_reduction(column_view const& col,
 {
   auto const valid_count = col.size() - col.null_count();
 
+  // All null input produces all null output
+  if (valid_count == 0 ||
+      // Only care about ddof for standard deviation and variance right now
+      valid_count <= ddof && (std::is_same_v<Op, cudf::reduction::detail::op::standard_deviation> ||
+                              std::is_same_v<Op, cudf::reduction::detail::op::variance>)) {
+    auto result = cudf::make_fixed_width_scalar(output_dtype, stream, mr);
+    result->set_valid_async(false, stream);
+    return result;
+  }
   // reduction by iterator
   auto dcol = cudf::column_device_view::create(col, stream);
-  std::unique_ptr<scalar> result;
   Op compound_op{};
 
   if (!cudf::is_dictionary(col.type())) {
@@ -63,25 +76,21 @@ std::unique_ptr<scalar> compound_reduction(column_view const& col,
       auto it = thrust::make_transform_iterator(
         dcol->pair_begin<ElementType, true>(),
         compound_op.template get_null_replacing_element_transformer<ResultType>());
-      result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
+      return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
         it, col.size(), compound_op, valid_count, ddof, stream, mr);
     } else {
       auto it = thrust::make_transform_iterator(
         dcol->begin<ElementType>(), compound_op.template get_element_transformer<ResultType>());
-      result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
+      return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
         it, col.size(), compound_op, valid_count, ddof, stream, mr);
     }
   } else {
     auto it = thrust::make_transform_iterator(
       cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType>(*dcol, col.has_nulls()),
       compound_op.template get_null_replacing_element_transformer<ResultType>());
-    result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
+    return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
       it, col.size(), compound_op, valid_count, ddof, stream, mr);
   }
-
-  // set scalar is valid
-  result->set_valid_async(col.null_count() < col.size(), stream);
-  return result;
 };
 
 // @brief result type dispatcher for compound reduction (a.k.a. mean, var, std)
@@ -137,6 +146,7 @@ struct element_type_dispatcher {
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
   {
+    CUDF_EXPECTS(ddof >= 0, "ddof must be non-negative", std::domain_error);
     return cudf::type_dispatcher(
       output_dtype, result_type_dispatcher<ElementType, Op>(), col, output_dtype, ddof, stream, mr);
   }
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 1e9e13ded93..bdb98372836 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -33,8 +33,12 @@
 #include <cudf/types.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
+#include <thrust/copy.h>
 #include <thrust/iterator/counting_iterator.h>
 
+#include <algorithm>
+#include <iostream>
+#include <iterator>
 #include <vector>
 
 using aggregation        = cudf::aggregation;
@@ -765,6 +769,25 @@ TYPED_TEST(MultiStepReductionTest, Mean)
             expected_value_nulls);
 }
 
+template <typename T>
+double calc_var(std::vector<T> const& v, int ddof, std::vector<bool> const& mask = {})
+{
+  auto const values = [&]() {
+    if (mask.empty()) { return v; }
+    std::vector<T> masked{};
+    thrust::copy_if(
+      v.begin(), v.end(), mask.begin(), std::back_inserter(masked), [](auto m) { return m; });
+    return masked;
+  }();
+  auto const valid_count = values.size();
+  double const mean      = std::accumulate(values.cbegin(), values.cend(), double{0}) / valid_count;
+  double const sq_sum_of_differences =
+    std::accumulate(values.cbegin(), values.cend(), double{0}, [mean](double acc, auto const v) {
+      return acc + std::pow(v - mean, 2);
+    });
+  return sq_sum_of_differences / (valid_count - ddof);
+}
+
 // This test is disabled for only a Debug build because a compiler error
 // documented in cpp/src/reductions/std.cu and cpp/src/reductions/var.cu
 #ifdef NDEBUG
@@ -777,25 +800,12 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
   std::vector<int> int_values({-3, 2, 1, 0, 5, -3, -2, 28});
   std::vector<bool> host_bools({true, true, false, true, true, true, false, true});
 
-  auto calc_var = [](std::vector<T>& v, cudf::size_type valid_count, int ddof) {
-    double mean = std::accumulate(v.begin(), v.end(), double{0});
-    mean /= valid_count;
-
-    double sum_of_sq = std::accumulate(
-      v.begin(), v.end(), double{0}, [](double acc, TypeParam i) { return acc + i * i; });
-
-    cudf::size_type div = valid_count - ddof;
-
-    double var = sum_of_sq / div - ((mean * mean) * valid_count) / div;
-    return var;
-  };
-
   // test without nulls
   std::vector<T> v = convert_values<T>(int_values);
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
 
   auto const ddof = 1;
-  double var      = calc_var(v, v.size(), ddof);
+  double var      = calc_var(v, ddof);
   double std      = std::sqrt(var);
   auto var_agg    = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg    = cudf::make_std_aggregation<reduce_aggregation>(ddof);
@@ -811,23 +821,19 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
-  cudf::size_type valid_count =
-    cudf::column_view(col_nulls).size() - cudf::column_view(col_nulls).null_count();
-  auto replaced_array = replace_nulls(v, host_bools, T{0});
-
-  double var_nulls = calc_var(replaced_array, valid_count, ddof);
-  double std_nulls = std::sqrt(var_nulls);
+  double var_nulls                                    = calc_var(v, ddof, host_bools);
+  double std_nulls                                    = std::sqrt(var_nulls);
 
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            var_nulls);
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            std_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   var_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   std_nulls);
 }
 
 // ----------------------------------------------------------------------------
@@ -1139,23 +1145,10 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   std::vector<double> int_values({-3, 2, 1, 0, 5, -3, -2, 28});
   std::vector<bool> host_bools({true, true, false, true, true, true, false, true});
 
-  auto calc_var = [ddof](std::vector<double>& v, cudf::size_type valid_count) {
-    double mean = std::accumulate(v.begin(), v.end(), double{0});
-    mean /= valid_count;
-
-    double sum_of_sq = std::accumulate(
-      v.begin(), v.end(), double{0}, [](double acc, double i) { return acc + i * i; });
-
-    cudf::size_type div = valid_count - ddof;
-
-    double var = sum_of_sq / div - ((mean * mean) * valid_count) / div;
-    return var;
-  };
-
   // test without nulls
   cudf::test::fixed_width_column_wrapper<double> col(int_values.begin(), int_values.end());
 
-  double var   = calc_var(int_values, int_values.size());
+  double var   = calc_var(int_values, ddof);
   double std   = std::sqrt(var);
   auto var_agg = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg = cudf::make_std_aggregation<reduce_aggregation>(ddof);
@@ -1172,23 +1165,19 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   // test with nulls
   cudf::test::fixed_width_column_wrapper<double> col_nulls =
     construct_null_column(int_values, host_bools);
-  cudf::size_type valid_count =
-    cudf::column_view(col_nulls).size() - cudf::column_view(col_nulls).null_count();
-  auto replaced_array = replace_nulls<double>(int_values, host_bools, int{0});
-
-  double var_nulls = calc_var(replaced_array, valid_count);
+  double var_nulls = calc_var(int_values, ddof, host_bools);
   double std_nulls = std::sqrt(var_nulls);
 
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            var_nulls);
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            std_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   var_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   std_nulls);
 }
 
 //-------------------------------------------------------------------
@@ -2471,21 +2460,11 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   std::vector<T> v = convert_values<T>(int_values);
   cudf::data_type output_type{cudf::type_to_id<double>()};
 
-  auto calc_var = [](std::vector<T> const& v, cudf::size_type valid_count, cudf::size_type ddof) {
-    double mean = std::accumulate(v.cbegin(), v.cend(), double{0});
-    mean /= valid_count;
-    double sum_of_sq = std::accumulate(
-      v.cbegin(), v.cend(), double{0}, [](double acc, TypeParam i) { return acc + i * i; });
-    auto const div = valid_count - ddof;
-    double var     = sum_of_sq / div - ((mean * mean) * valid_count) / div;
-    return var;
-  };
-
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
 
   cudf::size_type const ddof = 1;
-  double var                 = calc_var(v, v.size(), ddof);
+  double var                 = calc_var(v, ddof);
   double std                 = std::sqrt(var);
   auto var_agg               = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg               = cudf::make_std_aggregation<reduce_aggregation>(ddof);
@@ -2497,15 +2476,13 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   std::vector<bool> validity({true, true, false, true, true, true, false, true});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  cudf::size_type const valid_count = std::count(validity.begin(), validity.end(), true);
-
-  double var_nulls = calc_var(replace_nulls(v, validity, T{0}), valid_count, ddof);
+  double var_nulls = calc_var(v, ddof, validity);
   double std_nulls = std::sqrt(var_nulls);
 
-  EXPECT_EQ(this->template reduction_test<double>(col_nulls, *var_agg, output_type).first,
-            var_nulls);
-  EXPECT_EQ(this->template reduction_test<double>(col_nulls, *std_agg, output_type).first,
-            std_nulls);
+  EXPECT_DOUBLE_EQ(this->template reduction_test<double>(col_nulls, *var_agg, output_type).first,
+                   var_nulls);
+  EXPECT_DOUBLE_EQ(this->template reduction_test<double>(col_nulls, *std_agg, output_type).first,
+                   std_nulls);
 }
 
 TYPED_TEST(DictionaryReductionTest, NthElement)
diff --git a/java/src/test/java/ai/rapids/cudf/ReductionTest.java b/java/src/test/java/ai/rapids/cudf/ReductionTest.java
index 8cc7df1ce7f..6bd6603d71b 100644
--- a/java/src/test/java/ai/rapids/cudf/ReductionTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ReductionTest.java
@@ -612,13 +612,13 @@ void testWithSetOutputType() {
       assertEquals(expected, result);
     }
 
-    try (Scalar expected = Scalar.fromFloat(1.666667f);
+    try (Scalar expected = Scalar.fromFloat(1.6666666f);
          ColumnVector cv = ColumnVector.fromBytes(new byte[]{1, 2, 3, 4});
          Scalar result = cv.variance(DType.FLOAT32)) {
       assertEquals(expected, result);
     }
 
-    try (Scalar expected = Scalar.fromFloat(1.2909945f);
+    try (Scalar expected = Scalar.fromFloat(1.2909944f);
          ColumnVector cv = ColumnVector.fromBytes(new byte[]{1, 2, 3, 4});
          Scalar result = cv.standardDeviation(DType.FLOAT32)) {
       assertEquals(expected, result);
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 3b8dd05c13a..f6ab91f2f01 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -180,9 +180,12 @@ def var(
         min_count: int = 0,
         ddof=1,
     ):
-        return self._reduce(
+        result = self._reduce(
             "var", skipna=skipna, min_count=min_count, ddof=ddof
         )
+        if result is NA:
+            return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
+        return result
 
     def std(
         self,
@@ -190,9 +193,12 @@ def std(
         min_count: int = 0,
         ddof=1,
     ):
-        return self._reduce(
+        result = self._reduce(
             "std", skipna=skipna, min_count=min_count, ddof=ddof
         )
+        if result is NA:
+            return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
+        return result
 
     def median(self, skipna: bool | None = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index acd97c2047c..41ee94b72c8 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2943,7 +2943,7 @@ def corr(self, other, method="pearson", min_periods=None):
         >>> ser1 = cudf.Series([0.9, 0.13, 0.62])
         >>> ser2 = cudf.Series([0.12, 0.26, 0.51])
         >>> ser1.corr(ser2, method="pearson")
-        -0.20454263717316112
+        -0.20454263717316126
         >>> ser1.corr(ser2, method="spearman")
         -0.5
         """
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 4a2345fc009..976b12a9ab5 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -2525,23 +2525,7 @@ def test_dti_asi8():
 
 @pytest.mark.parametrize(
     "method, kwargs",
-    [
-        ["mean", {}],
-        pytest.param(
-            "std",
-            {},
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/16444"
-            ),
-        ),
-        pytest.param(
-            "std",
-            {"ddof": 0},
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/16444"
-            ),
-        ),
-    ],
+    [["mean", {}], ["std", {}], ["std", {"ddof": 0}]],
 )
 def test_dti_reduction(method, kwargs):
     pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")

From cc23474129433d7700058c311be5154340a6bce3 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Tue, 8 Oct 2024 17:12:10 +0100
Subject: [PATCH 20/33] Turn on `xfail_strict = true` for all python packages
 (#16977)

The cudf tests already treat tests that are expected to fail but pass as errors, but at the time we introduced that change, we didn't do the same for the other packages. Do that now, it turns out there are only a few xpassing tests.

While here, it turns out that having multiple different pytest configuration files does not work. `pytest.ini` takes precedence over other options, and it's "first file wins". Consequently, the merge of #16851 turned off `xfail_strict = true` (and other options) for many of the subpackages.

To fix this, migrate all pytest configuration into the appropriate section of the `pyproject.toml` files, so that all tool configuration lives in the same place. We also add a section in the developer guide to document this choice.

- Closes #12391
- Closes #16974

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/16977
---
 docs/cudf/source/developer_guide/testing.md   | 17 ++++++++
 python/cudf/cudf/tests/pytest.ini             | 19 ---------
 python/cudf/cudf_pandas_tests/pytest.ini      |  9 ++++
 .../cudf_pandas_tests/test_cudf_pandas.py     | 41 +++++++++++--------
 python/cudf/pyproject.toml                    | 21 ++++++++++
 python/cudf_kafka/cudf_kafka/tests/pytest.ini |  4 --
 python/cudf_kafka/pyproject.toml              |  3 ++
 python/cudf_polars/pyproject.toml             |  5 +++
 python/cudf_polars/tests/pytest.ini           |  4 --
 python/custreamz/custreamz/tests/pytest.ini   |  4 --
 .../custreamz/tests/test_dataframes.py        | 18 +++-----
 python/custreamz/pyproject.toml               |  6 +++
 python/dask_cudf/dask_cudf/tests/pytest.ini   |  4 --
 python/dask_cudf/pyproject.toml               |  3 ++
 python/pylibcudf/pylibcudf/tests/pytest.ini   |  9 ----
 python/pylibcudf/pyproject.toml               | 10 +++++
 16 files changed, 103 insertions(+), 74 deletions(-)
 delete mode 100644 python/cudf/cudf/tests/pytest.ini
 create mode 100644 python/cudf/cudf_pandas_tests/pytest.ini
 delete mode 100644 python/cudf_kafka/cudf_kafka/tests/pytest.ini
 delete mode 100644 python/cudf_polars/tests/pytest.ini
 delete mode 100644 python/custreamz/custreamz/tests/pytest.ini
 delete mode 100644 python/dask_cudf/dask_cudf/tests/pytest.ini
 delete mode 100644 python/pylibcudf/pylibcudf/tests/pytest.ini

diff --git a/docs/cudf/source/developer_guide/testing.md b/docs/cudf/source/developer_guide/testing.md
index f12f809d5db..22cc1b5b8de 100644
--- a/docs/cudf/source/developer_guide/testing.md
+++ b/docs/cudf/source/developer_guide/testing.md
@@ -7,6 +7,23 @@ specifically the [`pytest-cov`](https://github.com/pytest-dev/pytest-cov) plugin
 Code coverage reports are uploaded to [Codecov](https://app.codecov.io/gh/rapidsai/cudf).
 Each PR also indicates whether it increases or decreases test coverage.
 
+### Configuring pytest
+
+Pytest will accept configuration in [multiple different
+files](https://docs.pytest.org/en/stable/reference/customize.html),
+with a specified discovery and precedence order. Note in particular
+that there is no automatic "include" mechanism, as soon as a matching
+configuration file is found, discovery stops.
+
+For preference, so that all tool configuration lives in the same
+place, we use `pyproject.toml`-based configuration. Test configuration
+for a given package should live in that package's `pyproject.toml`
+file.
+
+Where tests do not naturally belong to a project, for example the
+`cudf.pandas` integration tests and the cuDF benchmarks, use a
+`pytest.ini` file as close to the tests as possible.
+
 ## Test organization
 
 How tests are organized depends on which of the following two groups they fall into:
diff --git a/python/cudf/cudf/tests/pytest.ini b/python/cudf/cudf/tests/pytest.ini
deleted file mode 100644
index 496a322ff80..00000000000
--- a/python/cudf/cudf/tests/pytest.ini
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
-
-[pytest]
-markers =
-    spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON`
-xfail_strict = true
-filterwarnings =
-    error
-    ignore:::.*xdist.*
-    ignore:::.*pytest.*
-    # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
-    ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore
-    # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
-    ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning
-    # PerformanceWarning from cupy warming up the JIT cache
-    ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning
-    # Ignore numba PEP 456 warning specific to arm machines
-    ignore:FNV hashing is not implemented in Numba.*:UserWarning
-addopts = --tb=native
diff --git a/python/cudf/cudf_pandas_tests/pytest.ini b/python/cudf/cudf_pandas_tests/pytest.ini
new file mode 100644
index 00000000000..46e2448ea24
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/pytest.ini
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Note, this config file overrides the default "cudf" test config in
+# ../pyproject.toml We do so deliberately because we have different
+# treatment of markers and warnings
+[pytest]
+addopts = --tb=native --strict-config --strict-markers
+empty_parameter_set_mark = fail_at_collect
+xfail_strict = true
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 2bbed40e34e..a74b7148c00 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import collections
+import contextlib
 import copy
 import datetime
 import operator
@@ -21,10 +22,15 @@
 import pyarrow as pa
 import pytest
 from nbconvert.preprocessors import ExecutePreprocessor
-from numba import NumbaDeprecationWarning, vectorize
+from numba import (
+    NumbaDeprecationWarning,
+    __version__ as numba_version,
+    vectorize,
+)
+from packaging import version
 from pytz import utc
 
-from cudf.core._compat import PANDAS_GE_220
+from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220, PANDAS_VERSION
 from cudf.pandas import LOADED, Profiler
 from cudf.pandas.fast_slow_proxy import (
     ProxyFallbackError,
@@ -52,8 +58,6 @@
     get_calendar,
 )
 
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-
 # Accelerated pandas has the real pandas and cudf modules as attributes
 pd = xpd._fsproxy_slow
 cudf = xpd._fsproxy_fast
@@ -622,10 +626,6 @@ def test_array_function_series_fallback(series):
     tm.assert_equal(expect, got)
 
 
-@pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
 def test_timedeltaproperties(series):
     psr, sr = series
     psr, sr = psr.astype("timedelta64[ns]"), sr.astype("timedelta64[ns]")
@@ -685,10 +685,6 @@ def test_maintain_container_subclasses(multiindex):
     assert isinstance(got, xpd.core.indexes.frozen.FrozenList)
 
 
-@pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas due to unsupported boxcar window type",
-)
 def test_rolling_win_type():
     pdf = pd.DataFrame(range(5))
     df = xpd.DataFrame(range(5))
@@ -697,8 +693,14 @@ def test_rolling_win_type():
     tm.assert_equal(result, expected)
 
 
-@pytest.mark.skip(
-    reason="Requires Numba 0.59 to fix segfaults on ARM. See https://github.com/numba/llvmlite/pull/1009"
+@pytest.mark.skipif(
+    version.parse(numba_version) < version.parse("0.59"),
+    reason="Requires Numba 0.59 to fix segfaults on ARM. See https://github.com/numba/llvmlite/pull/1009",
+)
+@pytest.mark.xfail(
+    version.parse(numba_version) >= version.parse("0.59")
+    and PANDAS_VERSION < version.parse("2.1"),
+    reason="numba.generated_jit removed in 0.59, requires pandas >= 2.1",
 )
 def test_rolling_apply_numba_engine():
     def weighted_mean(x):
@@ -709,7 +711,12 @@ def weighted_mean(x):
     pdf = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
     df = xpd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
 
-    with pytest.warns(NumbaDeprecationWarning):
+    ctx = (
+        contextlib.nullcontext()
+        if PANDAS_GE_210
+        else pytest.warns(NumbaDeprecationWarning)
+    )
+    with ctx:
         expect = pdf.rolling(2, method="table", min_periods=0).apply(
             weighted_mean, raw=True, engine="numba"
         )
@@ -1305,7 +1312,7 @@ def max_times_two(self):
 
 
 @pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION < version.parse("2.1"),
     reason="DatetimeArray.__floordiv__ missing in pandas-2.0.0",
 )
 def test_floordiv_array_vs_df():
@@ -1580,7 +1587,7 @@ def test_numpy_cupy_flatiter(series):
 
 
 @pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION < version.parse("2.1"),
     reason="pyarrow_numpy storage type was not supported in pandas-2.0.0",
 )
 def test_arrow_string_arrays():
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 1b730ffd13c..c0776fd0de6 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -124,6 +124,27 @@ skip = [
     "__init__.py",
 ]
 
+[tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
+filterwarnings = [
+    "error",
+    "ignore:::.*xdist.*",
+    "ignore:::.*pytest.*",
+    # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
+    "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore",
+    # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
+    "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning",
+    # PerformanceWarning from cupy warming up the JIT cache
+    "ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning",
+    # Ignore numba PEP 456 warning specific to arm machines
+    "ignore:FNV hashing is not implemented in Numba.*:UserWarning"
+]
+markers = [
+    "spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON`"
+]
+xfail_strict = true
+
 [tool.rapids-build-backend]
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
diff --git a/python/cudf_kafka/cudf_kafka/tests/pytest.ini b/python/cudf_kafka/cudf_kafka/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/cudf_kafka/cudf_kafka/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index a1a3ec37842..87e19a2bccf 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -79,9 +79,12 @@ skip = [
 ]
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
   "error"
 ]
+xfail_strict = true
 
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index f55031e0826..5345fad41a2 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -50,6 +50,11 @@ license-files = ["LICENSE"]
 version = {file = "cudf_polars/VERSION"}
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
+filterwarnings = [
+  "error"
+]
 xfail_strict = true
 
 [tool.coverage.report]
diff --git a/python/cudf_polars/tests/pytest.ini b/python/cudf_polars/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/cudf_polars/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/custreamz/custreamz/tests/pytest.ini b/python/custreamz/custreamz/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/custreamz/custreamz/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py
index bae4b051cae..8c0130d2818 100644
--- a/python/custreamz/custreamz/tests/test_dataframes.py
+++ b/python/custreamz/custreamz/tests/test_dataframes.py
@@ -377,24 +377,16 @@ def test_setitem_overwrites(stream):
     [
         ({}, "sum"),
         ({}, "mean"),
-        pytest.param({}, "min"),
+        ({}, "min"),
         pytest.param(
             {},
             "median",
             marks=pytest.mark.xfail(reason="Unavailable for rolling objects"),
         ),
-        pytest.param({}, "max"),
-        pytest.param(
-            {},
-            "var",
-            marks=pytest.mark.xfail(reason="Unavailable for rolling objects"),
-        ),
-        pytest.param({}, "count"),
-        pytest.param(
-            {"ddof": 0},
-            "std",
-            marks=pytest.mark.xfail(reason="Unavailable for rolling objects"),
-        ),
+        ({}, "max"),
+        ({}, "var"),
+        ({}, "count"),
+        ({"ddof": 0}, "std"),
         pytest.param(
             {"quantile": 0.5},
             "quantile",
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index 85ab0024bb5..af45f49d9b4 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -111,6 +111,8 @@ skip = [
 ]
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error",
     "ignore:unclosed <socket.socket:ResourceWarning",
@@ -118,4 +120,8 @@ filterwarnings = [
     # Should be fixed in the next streamz release
     # https://github.com/python-streamz/streamz/commit/2812f1f961dfcb3f17e948d8b12a12472975558e
     "ignore:pkg_resources is deprecated as an API:DeprecationWarning:streamz",
+    "ignore:Deprecated call to `pkg_resources.declare_namespace:DeprecationWarning",
+    # Ignore numba PEP 456 warning specific to arm machines
+    "ignore:FNV hashing is not implemented in Numba.*:UserWarning"
 ]
+xfail_strict = true
diff --git a/python/dask_cudf/dask_cudf/tests/pytest.ini b/python/dask_cudf/dask_cudf/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/dask_cudf/dask_cudf/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index ce825c7647b..fbcd7ae5dfb 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -115,6 +115,8 @@ skip = [
 ]
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error::FutureWarning",
     "error::DeprecationWarning",
@@ -125,3 +127,4 @@ filterwarnings = [
     "ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning",
     "ignore:String support for `aggregate_files` is experimental. Behavior may change in the future.:FutureWarning:dask",
 ]
+xfail_strict = true
diff --git a/python/pylibcudf/pylibcudf/tests/pytest.ini b/python/pylibcudf/pylibcudf/tests/pytest.ini
deleted file mode 100644
index f572f85ca49..00000000000
--- a/python/pylibcudf/pylibcudf/tests/pytest.ini
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-xfail_strict = true
-filterwarnings =
-    error
-    ignore:::.*xdist.*
-    ignore:::.*pytest.*
-addopts = --tb=native
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index a8224f54e1c..be65142850f 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -95,6 +95,16 @@ skip = [
     "__init__.py",
 ]
 
+[tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
+filterwarnings = [
+  "error",
+  "ignore:::.*xdist.*",
+  "ignore:::.*pytest.*"
+]
+xfail_strict = true
+
 [tool.rapids-build-backend]
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"

From 553d8ec197c45f7d10ae4571f625e97d7b88be82 Mon Sep 17 00:00:00 2001
From: Karthikeyan <6488848+karthikeyann@users.noreply.github.com>
Date: Tue, 8 Oct 2024 11:25:42 -0500
Subject: [PATCH 21/33] Performance optimization of JSON validation (#16996)

As part of JSON validation, field, value and string tokens are validated. Right now the code has single transform_inclusive_scan. Since this transform functor is a heavy operation, it slows down the entire scan drastically.
This PR splits transform and scan in validation. The runtime of validation went from 200ms to 20ms.

Also, a few hardcoded string comparisons are moved to trie.

Authors:
  - Karthikeyan (https://github.com/karthikeyann)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - Vukasin Milovanovic (https://github.com/vuule)
  - Robert (Bobby) Evans (https://github.com/revans2)

URL: https://github.com/rapidsai/cudf/pull/16996
---
 cpp/src/io/json/process_tokens.cu | 88 ++++++++++++++++++-------------
 1 file changed, 50 insertions(+), 38 deletions(-)

diff --git a/cpp/src/io/json/process_tokens.cu b/cpp/src/io/json/process_tokens.cu
index 83c7b663980..d41d137a2c9 100644
--- a/cpp/src/io/json/process_tokens.cu
+++ b/cpp/src/io/json/process_tokens.cu
@@ -22,6 +22,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/detail/tokenize_json.hpp>
+#include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
@@ -87,13 +88,25 @@ void validate_token_stream(device_span<char const> d_input,
 {
   CUDF_FUNC_RANGE();
   if (!options.is_strict_validation()) { return; }
+
+  rmm::device_uvector<bool> d_invalid = cudf::detail::make_zeroed_device_uvector_async<bool>(
+    tokens.size(), stream, cudf::get_current_device_resource_ref());
+
   using token_t = cudf::io::json::token_t;
-  cudf::detail::optional_trie trie_na =
-    cudf::detail::create_serialized_trie(options.get_na_values(), stream);
-  auto trie_na_view    = cudf::detail::make_trie_view(trie_na);
+  auto literals = options.get_na_values();
+  literals.emplace_back("null");  // added these too to single trie
+  literals.emplace_back("true");
+  literals.emplace_back("false");
+
+  cudf::detail::optional_trie trie_literals =
+    cudf::detail::create_serialized_trie(literals, stream);
+  cudf::detail::optional_trie trie_nonnumeric = cudf::detail::create_serialized_trie(
+    {"NaN", "Infinity", "+INF", "+Infinity", "-INF", "-Infinity"}, stream);
+
   auto validate_values = cuda::proclaim_return_type<bool>(
     [data                        = d_input.data(),
-     trie_na                     = trie_na_view,
+     trie_literals               = cudf::detail::make_trie_view(trie_literals),
+     trie_nonnumeric             = cudf::detail::make_trie_view(trie_nonnumeric),
      allow_numeric_leading_zeros = options.is_allowed_numeric_leading_zeros(),
      allow_nonnumeric =
        options.is_allowed_nonnumeric_numbers()] __device__(SymbolOffsetT start,
@@ -101,24 +114,15 @@ void validate_token_stream(device_span<char const> d_input,
       // This validates an unquoted value. A value must match https://www.json.org/json-en.html
       // but the leading and training whitespace should already have been removed, and is not
       // a string
-      auto c               = data[start];
-      auto is_null_literal = serialized_trie_contains(trie_na, {data + start, end - start});
-      if (is_null_literal) {
-        return true;
-      } else if ('n' == c) {
-        return substr_eq(data, start, end, 4, "null");
-      } else if ('t' == c) {
-        return substr_eq(data, start, end, 4, "true");
-      } else if ('f' == c) {
-        return substr_eq(data, start, end, 5, "false");
-      } else if (allow_nonnumeric && c == 'N') {
-        return substr_eq(data, start, end, 3, "NaN");
-      } else if (allow_nonnumeric && c == 'I') {
-        return substr_eq(data, start, end, 8, "Infinity");
-      } else if (allow_nonnumeric && c == '+') {
-        return substr_eq(data, start, end, 4, "+INF") ||
-               substr_eq(data, start, end, 9, "+Infinity");
-      } else if ('-' == c || c <= '9' && 'c' >= '0') {
+      auto const is_literal = serialized_trie_contains(trie_literals, {data + start, end - start});
+      if (is_literal) { return true; }
+      if (allow_nonnumeric) {
+        auto const is_nonnumeric =
+          serialized_trie_contains(trie_nonnumeric, {data + start, end - start});
+        if (is_nonnumeric) { return true; }
+      }
+      auto c = data[start];
+      if ('-' == c || c <= '9' && 'c' >= '0') {
         // number
         auto num_state = number_state::START;
         for (auto at = start; at < end; at++) {
@@ -140,9 +144,6 @@ void validate_token_stream(device_span<char const> d_input,
                 num_state = number_state::LEADING_ZERO;
               } else if (c >= '1' && c <= '9') {
                 num_state = number_state::WHOLE;
-              } else if (allow_nonnumeric && 'I' == c) {
-                return substr_eq(data, start, end, 4, "-INF") ||
-                       substr_eq(data, start, end, 9, "-Infinity");
               } else {
                 return false;
               }
@@ -273,33 +274,44 @@ void validate_token_stream(device_span<char const> d_input,
 
   auto num_tokens = tokens.size();
   auto count_it   = thrust::make_counting_iterator(0);
-  auto predicate  = [tokens        = tokens.begin(),
-                    token_indices = token_indices.begin(),
-                    validate_values,
-                    validate_strings] __device__(auto i) -> bool {
+  auto predicate  = cuda::proclaim_return_type<bool>([tokens        = tokens.begin(),
+                                                     token_indices = token_indices.begin(),
+                                                     validate_values,
+                                                     validate_strings] __device__(auto i) -> bool {
     if (tokens[i] == token_t::ValueEnd) {
       return !validate_values(token_indices[i - 1], token_indices[i]);
     } else if (tokens[i] == token_t::FieldNameEnd || tokens[i] == token_t::StringEnd) {
       return !validate_strings(token_indices[i - 1], token_indices[i]);
     }
     return false;
-  };
+  });
+
+  auto conditional_invalidout_it =
+    cudf::detail::make_tabulate_output_iterator(cuda::proclaim_return_type<void>(
+      [d_invalid = d_invalid.begin()] __device__(size_type i, bool x) -> void {
+        if (x) { d_invalid[i] = true; }
+      }));
+  thrust::transform(rmm::exec_policy_nosync(stream),
+                    count_it,
+                    count_it + num_tokens,
+                    conditional_invalidout_it,
+                    predicate);
 
   using scan_type            = write_if::scan_type;
   auto conditional_write     = write_if{tokens.begin(), num_tokens};
   auto conditional_output_it = cudf::detail::make_tabulate_output_iterator(conditional_write);
-  auto transform_op          = cuda::proclaim_return_type<scan_type>(
-    [predicate, tokens = tokens.begin()] __device__(auto i) -> scan_type {
-      if (predicate(i)) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
-      return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
-    });
-  auto binary_op = cuda::proclaim_return_type<scan_type>(
+  auto binary_op             = cuda::proclaim_return_type<scan_type>(
     [] __device__(scan_type prev, scan_type curr) -> scan_type {
       auto op_result = (prev.first == token_t::ErrorBegin ? prev.first : curr.first);
-      return scan_type((curr.second ? curr.first : op_result), prev.second | curr.second);
+      return {(curr.second ? curr.first : op_result), prev.second | curr.second};
+    });
+  auto transform_op = cuda::proclaim_return_type<scan_type>(
+    [d_invalid = d_invalid.begin(), tokens = tokens.begin()] __device__(auto i) -> scan_type {
+      if (d_invalid[i]) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
+      return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
     });
 
-  thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+  thrust::transform_inclusive_scan(rmm::exec_policy_nosync(stream),
                                    count_it,
                                    count_it + num_tokens,
                                    conditional_output_it,

From 618a93fc99ccb916177cb03429c69c8bbd5639b3 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Tue, 8 Oct 2024 17:24:45 -0400
Subject: [PATCH 22/33] Migrate nvtext jaccard API to pylibcudf (#17007)

Apart of #15162

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)

URL: https://github.com/rapidsai/cudf/pull/17007
---
 .../api_docs/pylibcudf/nvtext/index.rst       |  1 +
 .../api_docs/pylibcudf/nvtext/jaccard.rst     |  6 +++
 python/cudf/cudf/_lib/nvtext/jaccard.pyx      | 33 ++++---------
 .../pylibcudf/pylibcudf/nvtext/CMakeLists.txt |  2 +-
 .../pylibcudf/pylibcudf/nvtext/__init__.pxd   |  3 +-
 python/pylibcudf/pylibcudf/nvtext/__init__.py |  3 +-
 python/pylibcudf/pylibcudf/nvtext/jaccard.pxd |  7 +++
 python/pylibcudf/pylibcudf/nvtext/jaccard.pyx | 47 +++++++++++++++++++
 .../pylibcudf/tests/test_nvtext_jaccard.py    | 37 +++++++++++++++
 9 files changed, 111 insertions(+), 28 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/jaccard.rst
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
 create mode 100644 python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
index 2e03b589c8b..6300f77d686 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
@@ -6,3 +6,4 @@ nvtext
 
     edit_distance
     generate_ngrams
+    jaccard
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/jaccard.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/jaccard.rst
new file mode 100644
index 00000000000..ea59657c25e
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/jaccard.rst
@@ -0,0 +1,6 @@
+=======
+jaccard
+=======
+
+.. automodule:: pylibcudf.nvtext.jaccard
+   :members:
diff --git a/python/cudf/cudf/_lib/nvtext/jaccard.pyx b/python/cudf/cudf/_lib/nvtext/jaccard.pyx
index 0ebf7c281e3..c964d0206b7 100644
--- a/python/cudf/cudf/_lib/nvtext/jaccard.pyx
+++ b/python/cudf/cudf/_lib/nvtext/jaccard.pyx
@@ -2,33 +2,16 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.nvtext.jaccard cimport (
-    jaccard_index as cpp_jaccard_index,
-)
-from pylibcudf.libcudf.types cimport size_type
-
 from cudf._lib.column cimport Column
 
+from pylibcudf import nvtext
+
 
 @acquire_spill_lock()
 def jaccard_index(Column input1, Column input2, int width):
-    cdef column_view c_input1 = input1.view()
-    cdef column_view c_input2 = input2.view()
-    cdef size_type c_width = width
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_jaccard_index(
-                c_input1,
-                c_input2,
-                c_width
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.jaccard.jaccard_index(
+        input1.to_pylibcudf(mode="read"),
+        input2.to_pylibcudf(mode="read"),
+        width,
+    )
+    return Column.from_pylibcudf(result)
diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
index eb5617a1da6..9913e1fbadb 100644
--- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources edit_distance.pyx generate_ngrams.pyx)
+set(cython_sources edit_distance.pyx generate_ngrams.pyx jaccard.pyx)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
index 7f5fa2b9925..5f1762b1e3d 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
@@ -1,8 +1,9 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . cimport edit_distance, generate_ngrams
+from . cimport edit_distance, generate_ngrams, jaccard
 
 __all__ = [
     "edit_distance",
     "generate_ngrams",
+    "jaccard",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py
index a66ce984745..1c0ddb1e5a4 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.py
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py
@@ -1,8 +1,9 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . import edit_distance, generate_ngrams
+from . import edit_distance, generate_ngrams, jaccard
 
 __all__ = [
     "edit_distance",
     "generate_ngrams",
+    "jaccard",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
new file mode 100644
index 00000000000..a4d4a15335b
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pxd
@@ -0,0 +1,7 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.types cimport size_type
+
+
+cpdef Column jaccard_index(Column input1, Column input2, size_type width)
diff --git a/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
new file mode 100644
index 00000000000..9334d7ce751
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/jaccard.pyx
@@ -0,0 +1,47 @@
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.jaccard cimport (
+    jaccard_index as cpp_jaccard_index,
+)
+from pylibcudf.libcudf.types cimport size_type
+
+
+cpdef Column jaccard_index(Column input1, Column input2, size_type width):
+    """
+    Returns the Jaccard similarity between individual rows in two strings columns.
+
+    For details, see :cpp:func:`jaccard_index`
+
+    Parameters
+    ----------
+    input1 : Column
+        Input strings column
+    input2 : Column
+        Input strings column
+    width : size_type
+        The ngram number to generate
+
+    Returns
+    -------
+    Column
+        Index calculation values
+    """
+    cdef column_view c_input1 = input1.view()
+    cdef column_view c_input2 = input2.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_jaccard_index(
+                c_input1,
+                c_input2,
+                width
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py
new file mode 100644
index 00000000000..d5a168426b1
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_jaccard.py
@@ -0,0 +1,37 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture(scope="module")
+def input_data():
+    input1 = ["the fuzzy dog", "little piggy", "funny bunny", "chatty parrot"]
+    input2 = ["the fuzzy cat", "bitty piggy", "funny bunny", "silent partner"]
+    return pa.array(input1), pa.array(input2)
+
+
+@pytest.mark.parametrize("width", [2, 3])
+def test_jaccard_index(input_data, width):
+    def get_tokens(s, width):
+        return [s[i : i + width] for i in range(len(s) - width + 1)]
+
+    def jaccard_index(s1, s2, width):
+        x = set(get_tokens(s1, width))
+        y = set(get_tokens(s2, width))
+        return len(x & y) / len(x | y)
+
+    input1, input2 = input_data
+    result = plc.nvtext.jaccard.jaccard_index(
+        plc.interop.from_arrow(input1), plc.interop.from_arrow(input2), width
+    )
+    expected = pa.array(
+        [
+            jaccard_index(s1.as_py(), s2.as_py(), width)
+            for s1, s2 in zip(input1, input2)
+        ],
+        type=pa.float32(),
+    )
+    assert_column_eq(result, expected)

From 349ba5d37789938a34c1ad75eb5eb57f1db85b2c Mon Sep 17 00:00:00 2001
From: James Lamb <jlamb@nvidia.com>
Date: Tue, 8 Oct 2024 17:06:10 -0500
Subject: [PATCH 23/33] make conda installs in CI stricter (#17013)

Contributes to https://github.com/rapidsai/build-planning/issues/106

Proposes specifying the RAPIDS version in `conda install` calls in CI that install CI artifacts, to reduce the risk of CI jobs picking up artifacts from other releases.

Authors:
  - James Lamb (https://github.com/jameslamb)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17013
---
 ci/build_docs.sh         | 8 +++++---
 ci/test_cpp_common.sh    | 7 ++++++-
 ci/test_java.sh          | 4 +++-
 ci/test_notebooks.sh     | 5 ++++-
 ci/test_python_common.sh | 5 ++++-
 ci/test_python_other.sh  | 6 +++++-
 6 files changed, 27 insertions(+), 8 deletions(-)

diff --git a/ci/build_docs.sh b/ci/build_docs.sh
index c67d127e635..dae6ac46757 100755
--- a/ci/build_docs.sh
+++ b/ci/build_docs.sh
@@ -3,8 +3,7 @@
 
 set -euo pipefail
 
-export RAPIDS_VERSION="$(rapids-version)"
-export RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
 export RAPIDS_VERSION_NUMBER="$RAPIDS_VERSION_MAJOR_MINOR"
 
 rapids-logger "Create test conda environment"
@@ -29,7 +28,10 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  libcudf pylibcudf cudf dask-cudf
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "pylibcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 export RAPIDS_DOCS_DIR="$(mktemp -d)"
 
diff --git a/ci/test_cpp_common.sh b/ci/test_cpp_common.sh
index f5a8de543f6..e8f6e9388f4 100755
--- a/ci/test_cpp_common.sh
+++ b/ci/test_cpp_common.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate C++ testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -31,7 +33,10 @@ rapids-print-env
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
-  libcudf libcudf_kafka libcudf-tests libcudf-example
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf-tests=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf-example=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi
diff --git a/ci/test_java.sh b/ci/test_java.sh
index 629ad11014a..9b7b2e48dd6 100755
--- a/ci/test_java.sh
+++ b/ci/test_java.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate Java testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -30,7 +32,7 @@ CPP_CHANNEL=$(rapids-download-conda-from-s3 cpp)
 
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
-  libcudf
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi
diff --git a/ci/test_notebooks.sh b/ci/test_notebooks.sh
index da9478ce25d..3e0712a0691 100755
--- a/ci/test_notebooks.sh
+++ b/ci/test_notebooks.sh
@@ -5,6 +5,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate notebook testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -30,7 +32,8 @@ PYTHON_CHANNEL=$(rapids-download-conda-from-s3 python)
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  cudf libcudf
+  "cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 NBTEST="$(realpath "$(dirname "$0")/utils/nbtest.sh")"
 pushd notebooks
diff --git a/ci/test_python_common.sh b/ci/test_python_common.sh
index dc70661a17a..81e82908eb4 100755
--- a/ci/test_python_common.sh
+++ b/ci/test_python_common.sh
@@ -7,6 +7,8 @@ set -euo pipefail
 
 . /opt/conda/etc/profile.d/conda.sh
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-logger "Generate Python testing dependencies"
 
 ENV_YAML_DIR="$(mktemp -d)"
@@ -38,4 +40,5 @@ rapids-print-env
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  cudf libcudf
+  "cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "libcudf=${RAPIDS_VERSION_MAJOR_MINOR}"
diff --git a/ci/test_python_other.sh b/ci/test_python_other.sh
index 67c97ad29a5..eee1d54083f 100755
--- a/ci/test_python_other.sh
+++ b/ci/test_python_other.sh
@@ -7,10 +7,14 @@ cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../
 # Common setup steps shared by Python test jobs
 source ./ci/test_python_common.sh test_python_other
 
+RAPIDS_VERSION_MAJOR_MINOR="$(rapids-version-major-minor)"
+
 rapids-mamba-retry install \
   --channel "${CPP_CHANNEL}" \
   --channel "${PYTHON_CHANNEL}" \
-  dask-cudf cudf_kafka custreamz
+  "dask-cudf=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "cudf_kafka=${RAPIDS_VERSION_MAJOR_MINOR}" \
+  "custreamz=${RAPIDS_VERSION_MAJOR_MINOR}"
 
 rapids-logger "Check GPU usage"
 nvidia-smi

From 5b931aca22a06734332963577a91e6af90bb6a68 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Tue, 8 Oct 2024 14:35:13 -1000
Subject: [PATCH 24/33] Add string.convert.convert_urls APIs to pylibcudf
 (#17003)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Also I believe the cpp docstrings were incorrect, but could use a second look.

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - https://github.com/brandon-b-miller
  - Nghia Truong (https://github.com/ttnghia)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/17003
---
 .../cudf/strings/convert/convert_urls.hpp     |  4 +-
 .../_lib/strings/convert/convert_urls.pyx     | 36 +++--------
 .../libcudf/strings/convert/convert_urls.pxd  |  4 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |  2 +-
 .../pylibcudf/strings/convert/__init__.pxd    |  1 +
 .../pylibcudf/strings/convert/__init__.py     |  1 +
 .../strings/convert/convert_urls.pxd          |  8 +++
 .../strings/convert/convert_urls.pyx          | 63 +++++++++++++++++++
 .../tests/test_string_convert_urls.py         | 36 +++++++++++
 9 files changed, 121 insertions(+), 34 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py

diff --git a/cpp/include/cudf/strings/convert/convert_urls.hpp b/cpp/include/cudf/strings/convert/convert_urls.hpp
index d6e87f9d543..febc63d8779 100644
--- a/cpp/include/cudf/strings/convert/convert_urls.hpp
+++ b/cpp/include/cudf/strings/convert/convert_urls.hpp
@@ -28,7 +28,7 @@ namespace strings {
  */
 
 /**
- * @brief Decodes each string using URL encoding.
+ * @brief Encodes each string using URL encoding.
  *
  * Converts mostly non-ascii characters and control characters into UTF-8 hex code-points
  * prefixed with '%'. For example, the space character must be converted to characters '%20' where
@@ -49,7 +49,7 @@ std::unique_ptr<column> url_encode(
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
 /**
- * @brief Encodes each string using URL encoding.
+ * @brief Decodes each string using URL encoding.
  *
  * Converts all character sequences starting with '%' into character code-points
  * interpreting the 2 following characters as hex values to create the code-point.
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
index e52116d6247..d5c2f771970 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_urls.pyx
@@ -1,17 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
+import pylibcudf as plc
 
 from cudf.core.buffer import acquire_spill_lock
 
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.strings.convert.convert_urls cimport (
-    url_decode as cpp_url_decode,
-    url_encode as cpp_url_encode,
-)
-
 from cudf._lib.column cimport Column
 
 
@@ -28,17 +20,10 @@ def url_decode(Column source_strings):
     -------
     URL decoded string column
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_url_decode(
-            source_view
-        ))
-
-    return Column.from_unique_ptr(
-        move(c_result)
+    plc_column = plc.strings.convert.convert_urls.url_decode(
+        source_strings.to_pylibcudf(mode="read")
     )
+    return Column.from_pylibcudf(plc_column)
 
 
 @acquire_spill_lock()
@@ -57,14 +42,7 @@ def url_encode(Column source_strings):
     -------
     URL encoded string column
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_url_encode(
-            source_view
-        ))
-
-    return Column.from_unique_ptr(
-        move(c_result)
+    plc_column = plc.strings.convert.convert_urls.url_encode(
+        source_strings.to_pylibcudf(mode="read")
     )
+    return Column.from_pylibcudf(plc_column)
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
index 5c07b698454..cb319ad143b 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_urls.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 cdef extern from "cudf/strings/convert/convert_urls.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] url_encode(
-        column_view input_col) except +
+        column_view input) except +
 
     cdef unique_ptr[column] url_decode(
-        column_view input_col) except +
+        column_view input) except +
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index eb0d6ee6999..41aeb72039b 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
-                   convert_fixed_point.pyx convert_ipv4.pyx
+                   convert_fixed_point.pyx convert_ipv4.pyx convert_urls.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index 431beed8e5d..b4b0b521e39 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -5,4 +5,5 @@ from . cimport (
     convert_durations,
     convert_fixed_point,
     convert_ipv4,
+    convert_urls,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index a601b562c2e..409620fce45 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -5,4 +5,5 @@
     convert_durations,
     convert_fixed_point,
     convert_ipv4,
+    convert_urls,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd
new file mode 100644
index 00000000000..da05ce93426
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pxd
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+
+
+cpdef Column url_encode(Column Input)
+
+cpdef Column url_decode(Column Input)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
new file mode 100644
index 00000000000..a5e080e53b7
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_urls.pyx
@@ -0,0 +1,63 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings.convert cimport convert_urls as cpp_convert_urls
+
+
+cpdef Column url_encode(Column input):
+    """
+    Encodes each string using URL encoding.
+
+    For details, see :cpp:func:`cudf::strings::url_encode`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_urls.url_encode(
+                input.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column url_decode(Column input):
+    """
+    Decodes each string using URL encoding.
+
+    For details, see :cpp:func:`cudf::strings::url_decode`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    Returns
+    -------
+    Column
+        New strings column.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_urls.url_decode(
+                input.view()
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py
new file mode 100644
index 00000000000..fee8c3fb8f6
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_urls.py
@@ -0,0 +1,36 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import urllib
+
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_url_encode():
+    data = ["/home/nfs", None]
+    arr = pa.array(data)
+    result = plc.strings.convert.convert_urls.url_encode(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array(
+        [
+            urllib.parse.quote(url, safe="") if isinstance(url, str) else url
+            for url in data
+        ]
+    )
+    assert_column_eq(result, expected)
+
+
+def test_url_decode():
+    data = ["%2Fhome%2fnfs", None]
+    arr = pa.array(data)
+    result = plc.strings.convert.convert_urls.url_decode(
+        plc.interop.from_arrow(arr)
+    )
+    expected = pa.array(
+        [
+            urllib.parse.unquote(url) if isinstance(url, str) else url
+            for url in data
+        ]
+    )
+    assert_column_eq(result, expected)

From ded4dd2acbf2c5933765853eab56f4d37599c909 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Tue, 8 Oct 2024 18:02:14 -0700
Subject: [PATCH 25/33] Add pinning for pyarrow in wheels (#17018)

We have recently observed a number of seg faults in our Python tests. From some investigation, the error comes from the import of pyarrow loading the bundled libarrow.so, and in particular when that library runs a jemalloc function `background_thread_entry`. We have observed similar (but not identical) errors in the past that have to do with as-yet unsolved problems in the way that arrow handles multi-threaded environments. The error is currently only observed on arm runners and with pyarrow 17.0.0. In my tests the error is highly sensitive to everything from import order to unrelated code segments, suggesting a race condition, some form of memory corruption, or perhaps symbol resolution errors at runtime. As a result, I have had limited success in drilling down further into specific causes, especially since attempts to rebuild libarrow.so also squash the error and I therefore cannot use debug symbols. From some offline discussion we decided that avoiding the problematic version is a sufficient fix for now. Due to the sensitivity, I am simply skipping 17.0.0 in this PR. I suspect that future builds of pyarrow will also usually not exhibit this bug (although it may recur occasionally on specific versions of pyarrow). Therefore, rather than lowering the upper bound I would prefer to allow us to float and see if and when this problem reappears. Since our DFG+RBB combination for wheel builds does not yet support any matrix entry other than `cuda`, I'm using environment markers to specify the constraint rather than a matrix entry in dependencies.yaml.

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/17018
---
 dependencies.yaml                                | 11 ++++++++++-
 python/cudf/pyproject.toml                       |  3 ++-
 python/cudf_polars/tests/expressions/test_agg.py |  2 +-
 python/pylibcudf/pyproject.toml                  |  3 ++-
 4 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/dependencies.yaml b/dependencies.yaml
index 3561b22965d..ca17917c905 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -421,9 +421,18 @@ dependencies:
           - cython>=3.0.3
   pyarrow_run:
     common:
-      - output_types: [conda, requirements, pyproject]
+      - output_types: [conda]
         packages:
           - pyarrow>=14.0.0,<18.0.0a0
+      - output_types: [requirements, pyproject]
+        packages:
+          # pyarrow 17.0.0 wheels have a subtle issue around threading that
+          # can cause segmentation faults around imports on arm. It appears to
+          # be highly dependent on the exact build configuration, so we'll just
+          # avoid 17.0.0 for now unless we observe similar issues in future
+          # releases as well.
+          - pyarrow>=14.0.0,<18.0.0a0; platform_machine=='x86_64'
+          - pyarrow>=14.0.0,<18.0.0a0,!=17.0.0; platform_machine=='aarch64'
   cuda_version:
     specific:
       - output_types: conda
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index c0776fd0de6..feab04ffadc 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -30,7 +30,8 @@ dependencies = [
     "packaging",
     "pandas>=2.0,<2.2.4dev0",
     "ptxcompiler",
-    "pyarrow>=14.0.0,<18.0.0a0",
+    "pyarrow>=14.0.0,<18.0.0a0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=14.0.0,<18.0.0a0; platform_machine=='x86_64'",
     "pylibcudf==24.12.*,>=0.0.0a0",
     "rich",
     "rmm==24.12.*,>=0.0.0a0",
diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py
index 56055f4c6c2..3001a61101a 100644
--- a/python/cudf_polars/tests/expressions/test_agg.py
+++ b/python/cudf_polars/tests/expressions/test_agg.py
@@ -93,7 +93,7 @@ def test_bool_agg(agg, request):
     expr = getattr(pl.col("a"), agg)()
     q = df.select(expr)
 
-    assert_gpu_result_equal(q)
+    assert_gpu_result_equal(q, check_exact=False)
 
 
 @pytest.mark.parametrize("cum_agg", expr.UnaryFunction._supported_cum_aggs)
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index be65142850f..c9a685de3e9 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -22,7 +22,8 @@ dependencies = [
     "libcudf==24.12.*,>=0.0.0a0",
     "nvtx>=0.2.1",
     "packaging",
-    "pyarrow>=14.0.0,<18.0.0a0",
+    "pyarrow>=14.0.0,<18.0.0a0,!=17.0.0; platform_machine=='aarch64'",
+    "pyarrow>=14.0.0,<18.0.0a0; platform_machine=='x86_64'",
     "rmm==24.12.*,>=0.0.0a0",
     "typing_extensions>=4.0.0",
 ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`.

From a6853f4b3832b5338a4d0cd9d0b93c7bcd1ce884 Mon Sep 17 00:00:00 2001
From: Srinivas Yadav <43375352+srinivasyadav18@users.noreply.github.com>
Date: Tue, 8 Oct 2024 23:03:03 -0500
Subject: [PATCH 26/33] Refactor `histogram` reduction using
 `cuco::static_set::insert_and_find` (#16485)

Refactors `histogram` reduce and groupby aggregations using `cuco::static_set::insert_and_find`. Speed improvement results [here](https://github.com/rapidsai/cudf/pull/16485#issuecomment-2394855796) and [here](https://github.com/rapidsai/cudf/pull/16485#issuecomment-2394865692).

Authors:
  - Srinivas Yadav (https://github.com/srinivasyadav18)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/16485
---
 cpp/benchmarks/CMakeLists.txt                 |  10 +-
 cpp/benchmarks/groupby/group_histogram.cpp    |  90 ++++++++++
 cpp/benchmarks/reduction/histogram.cpp        |  68 +++++++
 .../cudf/detail/hash_reduce_by_row.cuh        | 169 ------------------
 cpp/src/reductions/histogram.cu               | 164 +++++++----------
 5 files changed, 231 insertions(+), 270 deletions(-)
 create mode 100644 cpp/benchmarks/groupby/group_histogram.cpp
 create mode 100644 cpp/benchmarks/reduction/histogram.cpp
 delete mode 100644 cpp/include/cudf/detail/hash_reduce_by_row.cuh

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index b8a53cd8bd9..b0f75b25975 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -245,6 +245,7 @@ ConfigureNVBench(
   REDUCTION_NVBENCH
   reduction/anyall.cpp
   reduction/dictionary.cpp
+  reduction/histogram.cpp
   reduction/minmax.cpp
   reduction/rank.cpp
   reduction/reduce.cpp
@@ -270,8 +271,13 @@ ConfigureBench(
 )
 
 ConfigureNVBench(
-  GROUPBY_NVBENCH groupby/group_max.cpp groupby/group_max_multithreaded.cpp
-  groupby/group_nunique.cpp groupby/group_rank.cpp groupby/group_struct_keys.cpp
+  GROUPBY_NVBENCH
+  groupby/group_histogram.cpp
+  groupby/group_max.cpp
+  groupby/group_max_multithreaded.cpp
+  groupby/group_nunique.cpp
+  groupby/group_rank.cpp
+  groupby/group_struct_keys.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/groupby/group_histogram.cpp b/cpp/benchmarks/groupby/group_histogram.cpp
new file mode 100644
index 00000000000..cd7f9f298af
--- /dev/null
+++ b/cpp/benchmarks/groupby/group_histogram.cpp
@@ -0,0 +1,90 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+
+#include <cudf/groupby.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename Type>
+void groupby_histogram_helper(nvbench::state& state,
+                              cudf::size_type num_rows,
+                              cudf::size_type cardinality,
+                              double null_probability)
+{
+  auto const keys = [&] {
+    data_profile const profile =
+      data_profile_builder()
+        .cardinality(cardinality)
+        .no_validity()
+        .distribution(cudf::type_to_id<int32_t>(), distribution_id::UNIFORM, 0, num_rows);
+    return create_random_column(cudf::type_to_id<int32_t>(), row_count{num_rows}, profile);
+  }();
+
+  auto const values = [&] {
+    auto builder = data_profile_builder().cardinality(0).distribution(
+      cudf::type_to_id<Type>(), distribution_id::UNIFORM, 0, num_rows);
+    if (null_probability > 0) {
+      builder.null_probability(null_probability);
+    } else {
+      builder.no_validity();
+    }
+    return create_random_column(
+      cudf::type_to_id<Type>(), row_count{num_rows}, data_profile{builder});
+  }();
+
+  // Vector of 1 request
+  std::vector<cudf::groupby::aggregation_request> requests(1);
+  requests.back().values = values->view();
+  requests.back().aggregations.push_back(
+    cudf::make_histogram_aggregation<cudf::groupby_aggregation>());
+
+  auto const mem_stats_logger = cudf::memory_stats_logger();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto gb_obj       = cudf::groupby::groupby(cudf::table_view({keys->view()}));
+    auto const result = gb_obj.aggregate(requests);
+  });
+
+  auto const elapsed_time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(num_rows) / elapsed_time, "rows/s");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+}
+
+template <typename Type>
+void bench_groupby_histogram(nvbench::state& state, nvbench::type_list<Type>)
+{
+  auto const cardinality      = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const num_rows         = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const null_probability = state.get_float64("null_probability");
+
+  if (cardinality > num_rows) {
+    state.skip("cardinality > num_rows");
+    return;
+  }
+
+  groupby_histogram_helper<Type>(state, num_rows, cardinality, null_probability);
+}
+
+NVBENCH_BENCH_TYPES(bench_groupby_histogram,
+                    NVBENCH_TYPE_AXES(nvbench::type_list<int32_t, int64_t, float, double>))
+  .set_name("groupby_histogram")
+  .add_float64_axis("null_probability", {0, 0.1, 0.9})
+  .add_int64_axis("cardinality", {100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000})
+  .add_int64_axis("num_rows", {100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000});
diff --git a/cpp/benchmarks/reduction/histogram.cpp b/cpp/benchmarks/reduction/histogram.cpp
new file mode 100644
index 00000000000..d0925de5c87
--- /dev/null
+++ b/cpp/benchmarks/reduction/histogram.cpp
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "cudf/aggregation.hpp"
+#include "cudf/detail/aggregation/aggregation.hpp"
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/common/nvbench_utilities.hpp>
+#include <benchmarks/common/table_utilities.hpp>
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/detail/aggregation/aggregation.hpp>
+#include <cudf/reduction.hpp>
+#include <cudf/reduction/detail/histogram.hpp>
+#include <cudf/types.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+template <typename type>
+static void nvbench_reduction_histogram(nvbench::state& state, nvbench::type_list<type>)
+{
+  auto const dtype = cudf::type_to_id<type>();
+
+  auto const cardinality      = static_cast<cudf::size_type>(state.get_int64("cardinality"));
+  auto const num_rows         = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const null_probability = state.get_float64("null_probability");
+
+  if (cardinality > num_rows) {
+    state.skip("cardinality > num_rows");
+    return;
+  }
+
+  data_profile const profile = data_profile_builder()
+                                 .null_probability(null_probability)
+                                 .cardinality(cardinality)
+                                 .distribution(dtype, distribution_id::UNIFORM, 0, num_rows);
+
+  auto const input = create_random_column(dtype, row_count{num_rows}, profile);
+  auto agg         = cudf::make_histogram_aggregation<cudf::reduce_aggregation>();
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    rmm::cuda_stream_view stream_view{launch.get_stream()};
+    auto result = cudf::reduce(*input, *agg, input->type(), stream_view);
+  });
+
+  state.add_element_count(input->size());
+}
+
+using data_type = nvbench::type_list<int32_t, int64_t>;
+
+NVBENCH_BENCH_TYPES(nvbench_reduction_histogram, NVBENCH_TYPE_AXES(data_type))
+  .set_name("histogram")
+  .add_float64_axis("null_probability", {0.1})
+  .add_int64_axis("cardinality",
+                  {0, 100, 1'000, 10'000, 100'000, 1'000'000, 10'000'000, 50'000'000})
+  .add_int64_axis("num_rows", {10'000, 100'000, 1'000'000, 10'000'000, 100'000'000});
diff --git a/cpp/include/cudf/detail/hash_reduce_by_row.cuh b/cpp/include/cudf/detail/hash_reduce_by_row.cuh
deleted file mode 100644
index 7de79b31bc7..00000000000
--- a/cpp/include/cudf/detail/hash_reduce_by_row.cuh
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2022-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <cudf/detail/cuco_helpers.hpp>
-#include <cudf/hashing/detail/helper_functions.cuh>
-#include <cudf/table/experimental/row_operators.cuh>
-#include <cudf/types.hpp>
-#include <cudf/utilities/memory_resource.hpp>
-
-#include <rmm/cuda_stream_view.hpp>
-#include <rmm/device_uvector.hpp>
-#include <rmm/exec_policy.hpp>
-
-#include <cuco/static_map.cuh>
-#include <thrust/for_each.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/uninitialized_fill.h>
-
-namespace cudf::detail {
-
-using hash_map_type = cuco::legacy::
-  static_map<size_type, size_type, cuda::thread_scope_device, cudf::detail::cuco_allocator<char>>;
-
-/**
- * @brief The base struct for customized reduction functor to perform reduce-by-key with keys are
- * rows that compared equal.
- *
- * TODO: We need to switch to use `static_reduction_map` when it is ready
- * (https://github.com/NVIDIA/cuCollections/pull/98).
- */
-template <typename MapView, typename KeyHasher, typename KeyEqual, typename OutputType>
-struct reduce_by_row_fn_base {
- protected:
-  MapView const d_map;
-  KeyHasher const d_hasher;
-  KeyEqual const d_equal;
-  OutputType* const d_output;
-
-  reduce_by_row_fn_base(MapView const& d_map,
-                        KeyHasher const& d_hasher,
-                        KeyEqual const& d_equal,
-                        OutputType* const d_output)
-    : d_map{d_map}, d_hasher{d_hasher}, d_equal{d_equal}, d_output{d_output}
-  {
-  }
-
-  /**
-   * @brief Return a pointer to the output array at the given index.
-   *
-   * @param idx The access index
-   * @return A pointer to the given index in the output array
-   */
-  __device__ OutputType* get_output_ptr(size_type const idx) const
-  {
-    auto const iter = d_map.find(idx, d_hasher, d_equal);
-
-    if (iter != d_map.end()) {
-      // Only one (undetermined) index value of the duplicate rows could be inserted into the map.
-      // As such, looking up for all indices of duplicate rows always returns the same value.
-      auto const inserted_idx = iter->second.load(cuda::std::memory_order_relaxed);
-
-      // All duplicate rows will have concurrent access to this same output slot.
-      return &d_output[inserted_idx];
-    } else {
-      // All input `idx` values have been inserted into the map before.
-      // Thus, searching for an `idx` key resulting in the `end()` iterator only happens if
-      // `d_equal(idx, idx) == false`.
-      // Such situations are due to comparing nulls or NaNs which are considered as always unequal.
-      // In those cases, all rows containing nulls or NaNs are distinct. Just return their direct
-      // output slot.
-      return &d_output[idx];
-    }
-  }
-};
-
-/**
- * @brief Perform a reduction on groups of rows that are compared equal.
- *
- * This is essentially a reduce-by-key operation with keys are non-contiguous rows and are compared
- * equal. A hash table is used to find groups of equal rows.
- *
- * At the beginning of the operation, the entire output array is filled with a value given by
- * the `init` parameter. Then, the reduction result for each row group is written into the output
- * array at the index of an unspecified row in the group.
- *
- * @tparam ReduceFuncBuilder The builder class that must have a `build()` method returning a
- *         reduction functor derived from `reduce_by_row_fn_base`
- * @tparam OutputType Type of the reduction results
- * @param map The auxiliary map to perform reduction
- * @param preprocessed_input The preprocessed of the input rows for computing row hashing and row
- *        comparisons
- * @param num_rows The number of all input rows
- * @param has_nulls Indicate whether the input rows has any nulls at any nested levels
- * @param has_nested_columns Indicates whether the input table has any nested columns
- * @param nulls_equal Flag to specify whether null elements should be considered as equal
- * @param nans_equal Flag to specify whether NaN values in floating point column should be
- *        considered equal.
- * @param init The initial value for reduction of each row group
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @param mr Device memory resource used to allocate the returned vector
- * @return A device_uvector containing the reduction results
- */
-template <typename ReduceFuncBuilder, typename OutputType>
-rmm::device_uvector<OutputType> hash_reduce_by_row(
-  hash_map_type const& map,
-  std::shared_ptr<cudf::experimental::row::equality::preprocessed_table> const preprocessed_input,
-  size_type num_rows,
-  cudf::nullate::DYNAMIC has_nulls,
-  bool has_nested_columns,
-  null_equality nulls_equal,
-  nan_equality nans_equal,
-  ReduceFuncBuilder func_builder,
-  OutputType init,
-  rmm::cuda_stream_view stream,
-  rmm::device_async_resource_ref mr)
-{
-  auto const map_dview  = map.get_device_view();
-  auto const row_hasher = cudf::experimental::row::hash::row_hasher(preprocessed_input);
-  auto const key_hasher = row_hasher.device_hasher(has_nulls);
-  auto const row_comp   = cudf::experimental::row::equality::self_comparator(preprocessed_input);
-
-  auto reduction_results = rmm::device_uvector<OutputType>(num_rows, stream, mr);
-  thrust::uninitialized_fill(
-    rmm::exec_policy(stream), reduction_results.begin(), reduction_results.end(), init);
-
-  auto const reduce_by_row = [&](auto const value_comp) {
-    if (has_nested_columns) {
-      auto const key_equal = row_comp.equal_to<true>(has_nulls, nulls_equal, value_comp);
-      thrust::for_each(
-        rmm::exec_policy(stream),
-        thrust::make_counting_iterator(0),
-        thrust::make_counting_iterator(num_rows),
-        func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin()));
-    } else {
-      auto const key_equal = row_comp.equal_to<false>(has_nulls, nulls_equal, value_comp);
-      thrust::for_each(
-        rmm::exec_policy(stream),
-        thrust::make_counting_iterator(0),
-        thrust::make_counting_iterator(num_rows),
-        func_builder.build(map_dview, key_hasher, key_equal, reduction_results.begin()));
-    }
-  };
-
-  if (nans_equal == nan_equality::ALL_EQUAL) {
-    using nan_equal_comparator =
-      cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
-    reduce_by_row(nan_equal_comparator{});
-  } else {
-    using nan_unequal_comparator = cudf::experimental::row::equality::physical_equality_comparator;
-    reduce_by_row(nan_unequal_comparator{});
-  }
-
-  return reduction_results;
-}
-
-}  // namespace cudf::detail
diff --git a/cpp/src/reductions/histogram.cu b/cpp/src/reductions/histogram.cu
index 362b5f74c46..b40b2b6dd2e 100644
--- a/cpp/src/reductions/histogram.cu
+++ b/cpp/src/reductions/histogram.cu
@@ -15,18 +15,24 @@
  */
 
 #include <cudf/column/column_factories.hpp>
+#include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/gather.hpp>
-#include <cudf/detail/hash_reduce_by_row.cuh>
 #include <cudf/detail/iterator.cuh>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/structs/structs_column_view.hpp>
+#include <cudf/table/experimental/row_operators.cuh>
 #include <cudf/utilities/memory_resource.hpp>
 
+#include <rmm/exec_policy.hpp>
+
+#include <cuco/operator.hpp>
+#include <cuco/static_set.cuh>
 #include <cuda/atomic>
 #include <cuda/functional>
 #include <thrust/copy.h>
 #include <thrust/iterator/zip_iterator.h>
 #include <thrust/tuple.h>
+#include <thrust/uninitialized_fill.h>
 
 #include <optional>
 
@@ -34,61 +40,12 @@ namespace cudf::reduction::detail {
 
 namespace {
 
+// A CUDA Cooperative Group of 1 thread for the hash set for histogram
+auto constexpr DEFAULT_HISTOGRAM_CG_SIZE = 1;
+
 // Always use 64-bit signed integer for storing count.
 using histogram_count_type = int64_t;
 
-/**
- * @brief The functor to accumulate the frequency of each distinct rows in the input table.
- */
-template <typename MapView, typename KeyHasher, typename KeyEqual, typename CountType>
-struct reduce_fn : cudf::detail::reduce_by_row_fn_base<MapView, KeyHasher, KeyEqual, CountType> {
-  CountType const* d_partial_output;
-
-  reduce_fn(MapView const& d_map,
-            KeyHasher const& d_hasher,
-            KeyEqual const& d_equal,
-            CountType* const d_output,
-            CountType const* const d_partial_output)
-    : cudf::detail::reduce_by_row_fn_base<MapView, KeyHasher, KeyEqual, CountType>{d_map,
-                                                                                   d_hasher,
-                                                                                   d_equal,
-                                                                                   d_output},
-      d_partial_output{d_partial_output}
-  {
-  }
-
-  // Count the number of rows in each group of rows that are compared equal.
-  __device__ void operator()(size_type const idx) const
-  {
-    auto const increment = d_partial_output ? d_partial_output[idx] : CountType{1};
-    auto const count =
-      cuda::atomic_ref<CountType, cuda::thread_scope_device>(*this->get_output_ptr(idx));
-    count.fetch_add(increment, cuda::std::memory_order_relaxed);
-  }
-};
-
-/**
- * @brief The builder to construct an instance of `reduce_fn` functor.
- */
-template <typename CountType>
-struct reduce_func_builder {
-  CountType const* const d_partial_output;
-
-  reduce_func_builder(CountType const* const d_partial_output) : d_partial_output{d_partial_output}
-  {
-  }
-
-  template <typename MapView, typename KeyHasher, typename KeyEqual>
-  auto build(MapView const& d_map,
-             KeyHasher const& d_hasher,
-             KeyEqual const& d_equal,
-             CountType* const d_output)
-  {
-    return reduce_fn<MapView, KeyHasher, KeyEqual, CountType>{
-      d_map, d_hasher, d_equal, d_output, d_partial_output};
-  }
-};
-
 /**
  * @brief Specialized functor to check for not-zero of the second component of the input.
  */
@@ -163,14 +120,6 @@ compute_row_frequencies(table_view const& input,
                "Nested types are not yet supported in histogram aggregation.",
                std::invalid_argument);
 
-  auto map = cudf::detail::hash_map_type{
-    compute_hash_table_size(input.num_rows()),
-    cuco::empty_key{-1},
-    cuco::empty_value{std::numeric_limits<size_type>::min()},
-
-    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
-    stream.value()};
-
   auto const preprocessed_input =
     cudf::experimental::row::hash::preprocessed_table::create(input, stream);
   auto const has_nulls = nullate::DYNAMIC{cudf::has_nested_nulls(input)};
@@ -179,51 +128,68 @@ compute_row_frequencies(table_view const& input,
   auto const key_hasher = row_hasher.device_hasher(has_nulls);
   auto const row_comp   = cudf::experimental::row::equality::self_comparator(preprocessed_input);
 
-  auto const pair_iter = cudf::detail::make_counting_transform_iterator(
-    size_type{0},
-    cuda::proclaim_return_type<cuco::pair<size_type, size_type>>(
-      [] __device__(size_type const i) { return cuco::make_pair(i, i); }));
-
   // Always compare NaNs as equal.
   using nan_equal_comparator =
     cudf::experimental::row::equality::nan_equal_physical_equality_comparator;
   auto const value_comp = nan_equal_comparator{};
+  // Hard set the tparam `has_nested_columns` = false for now as we don't yet support nested columns
+  auto const key_equal = row_comp.equal_to<false>(has_nulls, null_equality::EQUAL, value_comp);
+
+  using row_hash =
+    cudf::experimental::row::hash::device_row_hasher<cudf::hashing::detail::default_hash,
+                                                     cudf::nullate::DYNAMIC>;
+
+  size_t const num_rows = input.num_rows();
+
+  // Construct a vector to store reduced counts and init to zero
+  rmm::device_uvector<histogram_count_type> reduction_results(num_rows, stream, mr);
+  thrust::uninitialized_fill(rmm::exec_policy_nosync(stream),
+                             reduction_results.begin(),
+                             reduction_results.end(),
+                             histogram_count_type{0});
+
+  // Construct a hash set
+  auto row_set = cuco::static_set{
+    cuco::extent{num_rows},
+    cudf::detail::CUCO_DESIRED_LOAD_FACTOR,
+    cuco::empty_key<size_type>{-1},
+    key_equal,
+    cuco::linear_probing<DEFAULT_HISTOGRAM_CG_SIZE, row_hash>{key_hasher},
+    {},  // thread scope
+    {},  // storage
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value()};
 
-  if (has_nested_columns) {
-    auto const key_equal = row_comp.equal_to<true>(has_nulls, null_equality::EQUAL, value_comp);
-    map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value());
-  } else {
-    auto const key_equal = row_comp.equal_to<false>(has_nulls, null_equality::EQUAL, value_comp);
-    map.insert(pair_iter, pair_iter + input.num_rows(), key_hasher, key_equal, stream.value());
-  }
-
-  // Gather the indices of distinct rows.
-  auto distinct_indices = std::make_unique<rmm::device_uvector<size_type>>(
-    static_cast<size_type>(map.get_size()), stream, mr);
-
-  // Store the number of occurrences of each distinct row.
-  auto distinct_counts = make_numeric_column(data_type{type_to_id<histogram_count_type>()},
-                                             static_cast<size_type>(map.get_size()),
-                                             mask_state::UNALLOCATED,
-                                             stream,
-                                             mr);
+  // Device-accessible reference to the hash set with `insert_and_find` operator
+  auto row_set_ref = row_set.ref(cuco::op::insert_and_find);
 
   // Compute frequencies (aka distinct counts) for the input rows.
   // Note that we consider null and NaNs as always equal.
-  auto const reduction_results = cudf::detail::hash_reduce_by_row(
-    map,
-    preprocessed_input,
-    input.num_rows(),
-    has_nulls,
-    has_nested_columns,
-    null_equality::EQUAL,
-    nan_equality::ALL_EQUAL,
-    reduce_func_builder<histogram_count_type>{
-      partial_counts ? partial_counts.value().begin<histogram_count_type>() : nullptr},
-    histogram_count_type{0},
-    stream,
-    cudf::get_current_device_resource_ref());
-
+  thrust::for_each(
+    rmm::exec_policy_nosync(stream),
+    thrust::make_counting_iterator<size_t>(0),
+    thrust::make_counting_iterator<size_t>(num_rows),
+    [set_ref = row_set_ref,
+     increments =
+       partial_counts.has_value() ? partial_counts.value().begin<histogram_count_type>() : nullptr,
+     counts = reduction_results.begin()] __device__(auto const idx) mutable {
+      auto const [inserted_idx_ptr, _] = set_ref.insert_and_find(idx);
+      cuda::atomic_ref<histogram_count_type, cuda::thread_scope_device> count_ref{
+        counts[*inserted_idx_ptr]};
+      auto const increment = increments ? increments[idx] : histogram_count_type{1};
+      count_ref.fetch_add(increment, cuda::std::memory_order_relaxed);
+    });
+
+  // Set-size is the number of distinct (inserted) rows
+  auto const set_size = row_set.size(stream);
+
+  // Vector of distinct indices
+  auto distinct_indices = std::make_unique<rmm::device_uvector<size_type>>(set_size, stream, mr);
+  // Column of distinct counts
+  auto distinct_counts = make_numeric_column(
+    data_type{type_to_id<histogram_count_type>()}, set_size, mask_state::UNALLOCATED, stream, mr);
+
+  // Copy row indices and counts to the output if counts are non-zero
   auto const input_it = thrust::make_zip_iterator(
     thrust::make_tuple(thrust::make_counting_iterator(0), reduction_results.begin()));
   auto const output_it = thrust::make_zip_iterator(thrust::make_tuple(
@@ -232,7 +198,7 @@ compute_row_frequencies(table_view const& input,
   // Reduction results above are either group sizes of equal rows, or `0`.
   // The final output is non-zero group sizes only.
   thrust::copy_if(
-    rmm::exec_policy(stream), input_it, input_it + input.num_rows(), output_it, is_not_zero{});
+    rmm::exec_policy_nosync(stream), input_it, input_it + num_rows, output_it, is_not_zero{});
 
   return {std::move(distinct_indices), std::move(distinct_counts)};
 }

From bfac5e5d9b2c10718d2f0f925b4f2c9f62d8fea1 Mon Sep 17 00:00:00 2001
From: Peixin <pxli@nyu.edu>
Date: Wed, 9 Oct 2024 13:56:10 +0800
Subject: [PATCH 27/33] Disable kvikio remote I/O to avoid openssl dependencies
 in JNI build (#17026)

the same issue as https://github.com/NVIDIA/spark-rapids-jni/issues/2475 due to https://github.com/rapidsai/kvikio/pull/464

Port the fix from https://github.com/NVIDIA/spark-rapids-jni/pull/2476, verified locally

Authors:
  - Peixin (https://github.com/pxLi)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/17026
---
 java/ci/build-in-docker.sh | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/java/ci/build-in-docker.sh b/java/ci/build-in-docker.sh
index 5a429bdc739..4b5379cf0f1 100755
--- a/java/ci/build-in-docker.sh
+++ b/java/ci/build-in-docker.sh
@@ -64,7 +64,8 @@ cmake .. -G"${CMAKE_GENERATOR}" \
          -DBUILD_TESTS=$BUILD_CPP_TESTS \
          -DCUDF_USE_PER_THREAD_DEFAULT_STREAM=$ENABLE_PTDS \
          -DRMM_LOGGING_LEVEL=$RMM_LOGGING_LEVEL \
-         -DBUILD_SHARED_LIBS=OFF
+         -DBUILD_SHARED_LIBS=OFF \
+         -DKvikIO_REMOTE_SUPPORT=OFF
 
 if [[ -z "${PARALLEL_LEVEL}" ]]; then
     cmake --build .

From 319a53327ac7c921a78979a1f23c5caf7171129d Mon Sep 17 00:00:00 2001
From: Ray Douglass <ray@raydouglass.com>
Date: Wed, 9 Oct 2024 09:38:30 -0400
Subject: [PATCH 28/33] Update Changelog [skip ci]

---
 CHANGELOG.md | 296 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 296 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index f2a7c337675..7a75b2a95a4 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,299 @@
+# cudf 24.10.00 (9 Oct 2024)
+
+## 🚨 Breaking Changes
+
+- Whitespace normalization of nested column coerced as string column in JSONL inputs ([#16759](https://github.com/rapidsai/cudf/pull/16759)) [@shrshi](https://github.com/shrshi)
+- Add libcudf wrappers around current_device_resource functions. ([#16679](https://github.com/rapidsai/cudf/pull/16679)) [@harrism](https://github.com/harrism)
+- Fix empty cluster handling in tdigest merge ([#16675](https://github.com/rapidsai/cudf/pull/16675)) [@jihoonson](https://github.com/jihoonson)
+- Remove java ColumnView.copyWithBooleanColumnAsValidity ([#16660](https://github.com/rapidsai/cudf/pull/16660)) [@revans2](https://github.com/revans2)
+- Support reading multiple PQ sources with mismatching nullability for columns ([#16639](https://github.com/rapidsai/cudf/pull/16639)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Remove arrow_io_source ([#16607](https://github.com/rapidsai/cudf/pull/16607)) [@vyasr](https://github.com/vyasr)
+- Remove legacy Arrow interop APIs ([#16590](https://github.com/rapidsai/cudf/pull/16590)) [@vyasr](https://github.com/vyasr)
+- Remove NativeFile support from cudf Python ([#16589](https://github.com/rapidsai/cudf/pull/16589)) [@vyasr](https://github.com/vyasr)
+- Revert &quot;Make proxy NumPy arrays pass isinstance check in `cudf.pandas`&quot; ([#16586](https://github.com/rapidsai/cudf/pull/16586)) [@Matt711](https://github.com/Matt711)
+- Align public utility function signatures  with pandas 2.x ([#16565](https://github.com/rapidsai/cudf/pull/16565)) [@mroeschke](https://github.com/mroeschke)
+- Disallow cudf.Index accepting column in favor of ._from_column ([#16549](https://github.com/rapidsai/cudf/pull/16549)) [@mroeschke](https://github.com/mroeschke)
+- Refactor dictionary encoding in PQ writer to migrate to the new `cuco::static_map` ([#16541](https://github.com/rapidsai/cudf/pull/16541)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Change IPv4 convert APIs to support UINT32 instead of INT64 ([#16489](https://github.com/rapidsai/cudf/pull/16489)) [@davidwendt](https://github.com/davidwendt)
+- enable list to be forced as string in JSON reader. ([#16472](https://github.com/rapidsai/cudf/pull/16472)) [@karthikeyann](https://github.com/karthikeyann)
+- Disallow cudf.Series to accept column in favor of `._from_column` ([#16454](https://github.com/rapidsai/cudf/pull/16454)) [@mroeschke](https://github.com/mroeschke)
+- Align groupby APIs with pandas 2.x ([#16403](https://github.com/rapidsai/cudf/pull/16403)) [@mroeschke](https://github.com/mroeschke)
+- Align misc DataFrame and MultiIndex methods with pandas 2.x ([#16402](https://github.com/rapidsai/cudf/pull/16402)) [@mroeschke](https://github.com/mroeschke)
+- Align Index APIs with pandas 2.x ([#16361](https://github.com/rapidsai/cudf/pull/16361)) [@mroeschke](https://github.com/mroeschke)
+- Add `stream` param to stream compaction APIs ([#16295](https://github.com/rapidsai/cudf/pull/16295)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+
+## 🐛 Bug Fixes
+
+- Add license to the pylibcudf wheel ([#16976](https://github.com/rapidsai/cudf/pull/16976)) [@raydouglass](https://github.com/raydouglass)
+- Parse newline as whitespace character while tokenizing JSONL inputs with non-newline delimiter ([#16950](https://github.com/rapidsai/cudf/pull/16950)) [@shrshi](https://github.com/shrshi)
+- Add dask-cudf workaround for missing `rename_axis` support in cudf ([#16899](https://github.com/rapidsai/cudf/pull/16899)) [@rjzamora](https://github.com/rjzamora)
+- Update oldest deps for `pyarrow` &amp; `numpy` ([#16883](https://github.com/rapidsai/cudf/pull/16883)) [@galipremsagar](https://github.com/galipremsagar)
+- Update labeler for pylibcudf ([#16868](https://github.com/rapidsai/cudf/pull/16868)) [@vyasr](https://github.com/vyasr)
+- Revert &quot;Refactor mixed_semi_join using cuco::static_set&quot; ([#16855](https://github.com/rapidsai/cudf/pull/16855)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Fix metadata after implicit array conversion from Dask cuDF ([#16842](https://github.com/rapidsai/cudf/pull/16842)) [@rjzamora](https://github.com/rjzamora)
+- Add cudf.pandas dependencies.yaml to update-version.sh ([#16840](https://github.com/rapidsai/cudf/pull/16840)) [@raydouglass](https://github.com/raydouglass)
+- Use cupy 12.2.0 as oldest dependency pinning on CUDA 12 ARM ([#16808](https://github.com/rapidsai/cudf/pull/16808)) [@bdice](https://github.com/bdice)
+- Revert &quot;Fix empty cluster handling in tdigest merge ([#16675)&quot; (#16800](https://github.com/rapidsai/cudf/pull/16675)&quot; (#16800)) [@jihoonson](https://github.com/jihoonson)
+- Intentionally leak thread_local CUDA resources to avoid crash (part 1) ([#16787](https://github.com/rapidsai/cudf/pull/16787)) [@kingcrimsontianyu](https://github.com/kingcrimsontianyu)
+- Fix `cov`/`corr` bug in dask-cudf ([#16786](https://github.com/rapidsai/cudf/pull/16786)) [@rjzamora](https://github.com/rjzamora)
+- Fix slice_strings wide strings logic with multi-byte characters ([#16777](https://github.com/rapidsai/cudf/pull/16777)) [@davidwendt](https://github.com/davidwendt)
+- Fix nvbench output for sha512 ([#16773](https://github.com/rapidsai/cudf/pull/16773)) [@davidwendt](https://github.com/davidwendt)
+- Allow read_csv(header=None) to return int column labels in `mode.pandas_compatible` ([#16769](https://github.com/rapidsai/cudf/pull/16769)) [@mroeschke](https://github.com/mroeschke)
+- Whitespace normalization of nested column coerced as string column in JSONL inputs ([#16759](https://github.com/rapidsai/cudf/pull/16759)) [@shrshi](https://github.com/shrshi)
+- Fix DataFrame.drop(columns=cudf.Series/Index, axis=1) ([#16712](https://github.com/rapidsai/cudf/pull/16712)) [@mroeschke](https://github.com/mroeschke)
+- Use merge base when calculating changed files ([#16709](https://github.com/rapidsai/cudf/pull/16709)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Ensure we pass the has_nulls tparam to mixed_join kernels ([#16708](https://github.com/rapidsai/cudf/pull/16708)) [@abellina](https://github.com/abellina)
+- Add boost-devel to Java CI Docker image ([#16707](https://github.com/rapidsai/cudf/pull/16707)) [@jlowe](https://github.com/jlowe)
+- [BUG] Add gpu node type to cudf-pandas 3rd-party integration nightly CI job ([#16704](https://github.com/rapidsai/cudf/pull/16704)) [@Matt711](https://github.com/Matt711)
+- Fix typo in column_factories.hpp comment from &#39;depth 1&#39; to &#39;depth 2&#39; ([#16700](https://github.com/rapidsai/cudf/pull/16700)) [@a-hirota](https://github.com/a-hirota)
+- Fix Series.to_frame(name=None) setting a None name ([#16698](https://github.com/rapidsai/cudf/pull/16698)) [@mroeschke](https://github.com/mroeschke)
+- Disable gtests/ERROR_TEST during compute-sanitizer memcheck test ([#16691](https://github.com/rapidsai/cudf/pull/16691)) [@davidwendt](https://github.com/davidwendt)
+- Enable batched multi-source reading of JSONL files with large records ([#16687](https://github.com/rapidsai/cudf/pull/16687)) [@shrshi](https://github.com/shrshi)
+- Handle `ordered` parameter in `CategoricalIndex.__repr__` ([#16683](https://github.com/rapidsai/cudf/pull/16683)) [@galipremsagar](https://github.com/galipremsagar)
+- Fix loc/iloc.__setitem__[:, loc] with non cupy types ([#16677](https://github.com/rapidsai/cudf/pull/16677)) [@mroeschke](https://github.com/mroeschke)
+- Fix empty cluster handling in tdigest merge ([#16675](https://github.com/rapidsai/cudf/pull/16675)) [@jihoonson](https://github.com/jihoonson)
+- Fix `cudf::rank` not getting enough params ([#16666](https://github.com/rapidsai/cudf/pull/16666)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Fix slowdown in `CategoricalIndex.__repr__` ([#16665](https://github.com/rapidsai/cudf/pull/16665)) [@galipremsagar](https://github.com/galipremsagar)
+- Remove java ColumnView.copyWithBooleanColumnAsValidity ([#16660](https://github.com/rapidsai/cudf/pull/16660)) [@revans2](https://github.com/revans2)
+- Fix slowdown in DataFrame repr in jupyter notebook ([#16656](https://github.com/rapidsai/cudf/pull/16656)) [@galipremsagar](https://github.com/galipremsagar)
+- Preserve Series name in duplicated method. ([#16655](https://github.com/rapidsai/cudf/pull/16655)) [@bdice](https://github.com/bdice)
+- Fix interval_range right child non-zero offset ([#16651](https://github.com/rapidsai/cudf/pull/16651)) [@mroeschke](https://github.com/mroeschke)
+- fix libcudf wheel publishing, make package-type explicit in wheel publishing ([#16650](https://github.com/rapidsai/cudf/pull/16650)) [@jameslamb](https://github.com/jameslamb)
+- Revert &quot;Hide all gtest symbols in cudftestutil ([#16546)&quot; (#16644](https://github.com/rapidsai/cudf/pull/16546)&quot; (#16644)) [@robertmaynard](https://github.com/robertmaynard)
+- Fix integer overflow in indexalator pointer logic ([#16643](https://github.com/rapidsai/cudf/pull/16643)) [@davidwendt](https://github.com/davidwendt)
+- Allow for binops between two differently sized DecimalDtypes ([#16638](https://github.com/rapidsai/cudf/pull/16638)) [@mroeschke](https://github.com/mroeschke)
+- Move pragma once in rolling/jit/operation.hpp. ([#16636](https://github.com/rapidsai/cudf/pull/16636)) [@bdice](https://github.com/bdice)
+- Fix overflow bug in low-memory JSON reader ([#16632](https://github.com/rapidsai/cudf/pull/16632)) [@shrshi](https://github.com/shrshi)
+- Add the missing `num_aggregations` axis for `groupby_max_cardinality` ([#16630](https://github.com/rapidsai/cudf/pull/16630)) [@PointKernel](https://github.com/PointKernel)
+- Fix strings::detail::copy_range when target contains nulls ([#16626](https://github.com/rapidsai/cudf/pull/16626)) [@davidwendt](https://github.com/davidwendt)
+- Fix function parameters with common dependency modified during their evaluation ([#16620](https://github.com/rapidsai/cudf/pull/16620)) [@ttnghia](https://github.com/ttnghia)
+- bug-fix: Don&#39;t enable the CUDA language if testing was requested when finding cudf ([#16615](https://github.com/rapidsai/cudf/pull/16615)) [@cryos](https://github.com/cryos)
+- bug-fix: cudf/io/json.hpp use after move ([#16609](https://github.com/rapidsai/cudf/pull/16609)) [@NicolasDenoyelle](https://github.com/NicolasDenoyelle)
+- Remove CUDA whole compilation ODR violations ([#16603](https://github.com/rapidsai/cudf/pull/16603)) [@robertmaynard](https://github.com/robertmaynard)
+- MAINT: Adapt to numpy hiding flagsobject away ([#16593](https://github.com/rapidsai/cudf/pull/16593)) [@seberg](https://github.com/seberg)
+- Revert &quot;Make proxy NumPy arrays pass isinstance check in `cudf.pandas`&quot; ([#16586](https://github.com/rapidsai/cudf/pull/16586)) [@Matt711](https://github.com/Matt711)
+- Switch python version to `3.10` in `cudf.pandas` pandas test scripts ([#16559](https://github.com/rapidsai/cudf/pull/16559)) [@galipremsagar](https://github.com/galipremsagar)
+- Hide all gtest symbols in cudftestutil ([#16546](https://github.com/rapidsai/cudf/pull/16546)) [@robertmaynard](https://github.com/robertmaynard)
+- Update the java code to properly deal with lists being returned as strings ([#16536](https://github.com/rapidsai/cudf/pull/16536)) [@revans2](https://github.com/revans2)
+- Register `read_parquet` and `read_csv` with dask-expr ([#16535](https://github.com/rapidsai/cudf/pull/16535)) [@rjzamora](https://github.com/rjzamora)
+- Change cudf::empty_like to not include offsets for empty strings columns ([#16529](https://github.com/rapidsai/cudf/pull/16529)) [@davidwendt](https://github.com/davidwendt)
+- Fix DataFrame reductions with median returning scalar instead of Series ([#16527](https://github.com/rapidsai/cudf/pull/16527)) [@mroeschke](https://github.com/mroeschke)
+- Allow DataFrame.sort_values(by=) to select an index level ([#16519](https://github.com/rapidsai/cudf/pull/16519)) [@mroeschke](https://github.com/mroeschke)
+- Fix `date_range(start, end, freq)` when end-start is divisible by freq ([#16516](https://github.com/rapidsai/cudf/pull/16516)) [@mroeschke](https://github.com/mroeschke)
+- Preserve array name in MultiIndex.from_arrays ([#16515](https://github.com/rapidsai/cudf/pull/16515)) [@mroeschke](https://github.com/mroeschke)
+- Disallow indexing by selecting duplicate labels ([#16514](https://github.com/rapidsai/cudf/pull/16514)) [@mroeschke](https://github.com/mroeschke)
+- Fix `.replace(Index, Index)` raising a TypeError ([#16513](https://github.com/rapidsai/cudf/pull/16513)) [@mroeschke](https://github.com/mroeschke)
+- Check index bounds in compact protocol reader. ([#16493](https://github.com/rapidsai/cudf/pull/16493)) [@bdice](https://github.com/bdice)
+- Fix build failures with GCC 13 ([#16488](https://github.com/rapidsai/cudf/pull/16488)) [@PointKernel](https://github.com/PointKernel)
+- Fix all-empty input column for strings split APIs ([#16466](https://github.com/rapidsai/cudf/pull/16466)) [@davidwendt](https://github.com/davidwendt)
+- Fix segmented-sort overlapped input/output indices ([#16463](https://github.com/rapidsai/cudf/pull/16463)) [@davidwendt](https://github.com/davidwendt)
+- Fix merge conflict for auto merge 16447 ([#16449](https://github.com/rapidsai/cudf/pull/16449)) [@davidwendt](https://github.com/davidwendt)
+
+## 📖 Documentation
+
+- Fix links in Dask cuDF documentation ([#16929](https://github.com/rapidsai/cudf/pull/16929)) [@rjzamora](https://github.com/rjzamora)
+- Improve aggregation documentation ([#16822](https://github.com/rapidsai/cudf/pull/16822)) [@PointKernel](https://github.com/PointKernel)
+- Add best practices page to Dask cuDF docs ([#16821](https://github.com/rapidsai/cudf/pull/16821)) [@rjzamora](https://github.com/rjzamora)
+- [DOC] Update Pylibcudf doc strings ([#16810](https://github.com/rapidsai/cudf/pull/16810)) [@Matt711](https://github.com/Matt711)
+- Recommending `miniforge` for conda install ([#16782](https://github.com/rapidsai/cudf/pull/16782)) [@mmccarty](https://github.com/mmccarty)
+- Add labeling pylibcudf doc pages ([#16779](https://github.com/rapidsai/cudf/pull/16779)) [@mroeschke](https://github.com/mroeschke)
+- Migrate dask-cudf README improvements to dask-cudf sphinx docs ([#16765](https://github.com/rapidsai/cudf/pull/16765)) [@rjzamora](https://github.com/rjzamora)
+- [DOC] Remove out of date section from cudf.pandas docs ([#16697](https://github.com/rapidsai/cudf/pull/16697)) [@Matt711](https://github.com/Matt711)
+- Add performance tips to cudf.pandas FAQ. ([#16693](https://github.com/rapidsai/cudf/pull/16693)) [@bdice](https://github.com/bdice)
+- Update documentation for Dask cuDF ([#16671](https://github.com/rapidsai/cudf/pull/16671)) [@rjzamora](https://github.com/rjzamora)
+- Add missing pylibcudf strings docs ([#16471](https://github.com/rapidsai/cudf/pull/16471)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- DOC: Refresh pylibcudf guide ([#15856](https://github.com/rapidsai/cudf/pull/15856)) [@lithomas1](https://github.com/lithomas1)
+
+## 🚀 New Features
+
+- Build `cudf-polars` with `build.sh` ([#16898](https://github.com/rapidsai/cudf/pull/16898)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Add polars to &quot;all&quot; dependency list. ([#16875](https://github.com/rapidsai/cudf/pull/16875)) [@bdice](https://github.com/bdice)
+- nvCOMP GZIP integration ([#16770](https://github.com/rapidsai/cudf/pull/16770)) [@vuule](https://github.com/vuule)
+- [FEA] Add support for `cudf.NamedAgg` ([#16744](https://github.com/rapidsai/cudf/pull/16744)) [@Matt711](https://github.com/Matt711)
+- Add experimental `filesystem=&quot;arrow&quot;` support in `dask_cudf.read_parquet` ([#16684](https://github.com/rapidsai/cudf/pull/16684)) [@rjzamora](https://github.com/rjzamora)
+- Relax Arrow pin ([#16681](https://github.com/rapidsai/cudf/pull/16681)) [@vyasr](https://github.com/vyasr)
+- Add libcudf wrappers around current_device_resource functions. ([#16679](https://github.com/rapidsai/cudf/pull/16679)) [@harrism](https://github.com/harrism)
+- Move NDS-H examples into benchmarks ([#16663](https://github.com/rapidsai/cudf/pull/16663)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- [FEA] Add third-party library integration testing of cudf.pandas to cudf ([#16645](https://github.com/rapidsai/cudf/pull/16645)) [@Matt711](https://github.com/Matt711)
+- Make isinstance check pass for proxy ndarrays ([#16601](https://github.com/rapidsai/cudf/pull/16601)) [@Matt711](https://github.com/Matt711)
+- [FEA] Add an environment variable to fail on fallback in `cudf.pandas` ([#16562](https://github.com/rapidsai/cudf/pull/16562)) [@Matt711](https://github.com/Matt711)
+- [FEA] Add support for `cudf.unique` ([#16554](https://github.com/rapidsai/cudf/pull/16554)) [@Matt711](https://github.com/Matt711)
+- [FEA] Support named aggregations in `df.groupby().agg()` ([#16528](https://github.com/rapidsai/cudf/pull/16528)) [@Matt711](https://github.com/Matt711)
+- Change IPv4 convert APIs to support UINT32 instead of INT64 ([#16489](https://github.com/rapidsai/cudf/pull/16489)) [@davidwendt](https://github.com/davidwendt)
+- enable list to be forced as string in JSON reader. ([#16472](https://github.com/rapidsai/cudf/pull/16472)) [@karthikeyann](https://github.com/karthikeyann)
+- Remove cuDF dependency from pylibcudf column from_device tests ([#16441](https://github.com/rapidsai/cudf/pull/16441)) [@brandon-b-miller](https://github.com/brandon-b-miller)
+- Enable cudf.pandas REPL and -c command support ([#16428](https://github.com/rapidsai/cudf/pull/16428)) [@bdice](https://github.com/bdice)
+- Setup pylibcudf package ([#16299](https://github.com/rapidsai/cudf/pull/16299)) [@lithomas1](https://github.com/lithomas1)
+- Add a libcudf/thrust-based TPC-H derived datagen ([#16294](https://github.com/rapidsai/cudf/pull/16294)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Make proxy NumPy arrays pass isinstance check in `cudf.pandas` ([#16286](https://github.com/rapidsai/cudf/pull/16286)) [@Matt711](https://github.com/Matt711)
+- Add skiprows and nrows to parquet reader ([#16214](https://github.com/rapidsai/cudf/pull/16214)) [@lithomas1](https://github.com/lithomas1)
+- Upgrade to nvcomp 4.0.1 ([#16076](https://github.com/rapidsai/cudf/pull/16076)) [@vuule](https://github.com/vuule)
+- Migrate ORC reader to pylibcudf ([#16042](https://github.com/rapidsai/cudf/pull/16042)) [@lithomas1](https://github.com/lithomas1)
+- JSON reader validation of values ([#15968](https://github.com/rapidsai/cudf/pull/15968)) [@karthikeyann](https://github.com/karthikeyann)
+- Implement exposed null mask APIs in pylibcudf ([#15908](https://github.com/rapidsai/cudf/pull/15908)) [@charlesbluca](https://github.com/charlesbluca)
+- Word-based nvtext::minhash function ([#15368](https://github.com/rapidsai/cudf/pull/15368)) [@davidwendt](https://github.com/davidwendt)
+
+## 🛠️ Improvements
+
+- Make tests deterministic ([#16910](https://github.com/rapidsai/cudf/pull/16910)) [@galipremsagar](https://github.com/galipremsagar)
+- Update update-version.sh to use packaging lib ([#16891](https://github.com/rapidsai/cudf/pull/16891)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Pin polars for 24.10 and update polars test suite xfail list ([#16886](https://github.com/rapidsai/cudf/pull/16886)) [@wence-](https://github.com/wence-)
+- Add in support for setting delim when parsing JSON through java ([#16867) (#16880](https://github.com/rapidsai/cudf/pull/16867) (#16880)) [@revans2](https://github.com/revans2)
+- Remove unnecessary flag from build.sh ([#16879](https://github.com/rapidsai/cudf/pull/16879)) [@vyasr](https://github.com/vyasr)
+- Ignore numba warning specific to ARM runners ([#16872](https://github.com/rapidsai/cudf/pull/16872)) [@galipremsagar](https://github.com/galipremsagar)
+- Display deltas for `cudf.pandas` test summary ([#16864](https://github.com/rapidsai/cudf/pull/16864)) [@galipremsagar](https://github.com/galipremsagar)
+- Switch to using native `traceback` ([#16851](https://github.com/rapidsai/cudf/pull/16851)) [@galipremsagar](https://github.com/galipremsagar)
+- JSON tree algorithm code reorg ([#16836](https://github.com/rapidsai/cudf/pull/16836)) [@karthikeyann](https://github.com/karthikeyann)
+- Add string.repeats API to pylibcudf ([#16834](https://github.com/rapidsai/cudf/pull/16834)) [@mroeschke](https://github.com/mroeschke)
+- Use CI workflow branch &#39;branch-24.10&#39; again ([#16832](https://github.com/rapidsai/cudf/pull/16832)) [@jameslamb](https://github.com/jameslamb)
+- Rename the NDS-H benchmark binaries ([#16831](https://github.com/rapidsai/cudf/pull/16831)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Add string.findall APIs to pylibcudf ([#16825](https://github.com/rapidsai/cudf/pull/16825)) [@mroeschke](https://github.com/mroeschke)
+- Add string.extract APIs to pylibcudf ([#16823](https://github.com/rapidsai/cudf/pull/16823)) [@mroeschke](https://github.com/mroeschke)
+- use get-pr-info from nv-gha-runners ([#16819](https://github.com/rapidsai/cudf/pull/16819)) [@AyodeAwe](https://github.com/AyodeAwe)
+- Add string.contains APIs to pylibcudf ([#16814](https://github.com/rapidsai/cudf/pull/16814)) [@mroeschke](https://github.com/mroeschke)
+- Forward-merge branch-24.08 to branch-24.10 ([#16813](https://github.com/rapidsai/cudf/pull/16813)) [@bdice](https://github.com/bdice)
+- Add io_type axis with default `PINNED_BUFFER` to nvbench PQ multithreaded reader ([#16809](https://github.com/rapidsai/cudf/pull/16809)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Update fmt (to 11.0.2) and spdlog (to 1.14.1). ([#16806](https://github.com/rapidsai/cudf/pull/16806)) [@jameslamb](https://github.com/jameslamb)
+- Add ability to set parquet row group max #rows and #bytes in java ([#16805](https://github.com/rapidsai/cudf/pull/16805)) [@pmattione-nvidia](https://github.com/pmattione-nvidia)
+- Add in option for Java JSON APIs to do column pruning in CUDF ([#16796](https://github.com/rapidsai/cudf/pull/16796)) [@revans2](https://github.com/revans2)
+- Support drop_first in get_dummies ([#16795](https://github.com/rapidsai/cudf/pull/16795)) [@mroeschke](https://github.com/mroeschke)
+- Exposed stream-ordering to join API ([#16793](https://github.com/rapidsai/cudf/pull/16793)) [@lamarrr](https://github.com/lamarrr)
+- Add string.attributes APIs to pylibcudf ([#16785](https://github.com/rapidsai/cudf/pull/16785)) [@mroeschke](https://github.com/mroeschke)
+- Java: Make ColumnVector.fromViewWithContiguousAllocation public ([#16784](https://github.com/rapidsai/cudf/pull/16784)) [@jlowe](https://github.com/jlowe)
+- Add partitioning APIs to pylibcudf ([#16781](https://github.com/rapidsai/cudf/pull/16781)) [@mroeschke](https://github.com/mroeschke)
+- Optimization of tdigest merge aggregation. ([#16780](https://github.com/rapidsai/cudf/pull/16780)) [@nvdbaranec](https://github.com/nvdbaranec)
+- use libkvikio wheels in wheel builds ([#16778](https://github.com/rapidsai/cudf/pull/16778)) [@jameslamb](https://github.com/jameslamb)
+- Exposed stream-ordering to datetime API ([#16774](https://github.com/rapidsai/cudf/pull/16774)) [@lamarrr](https://github.com/lamarrr)
+- Add io/timezone APIs to pylibcudf ([#16771](https://github.com/rapidsai/cudf/pull/16771)) [@mroeschke](https://github.com/mroeschke)
+- Remove `MultiIndex._poplevel` inplace implementation. ([#16767](https://github.com/rapidsai/cudf/pull/16767)) [@mroeschke](https://github.com/mroeschke)
+- allow pandas patch version to float in cudf-pandas unit tests ([#16763](https://github.com/rapidsai/cudf/pull/16763)) [@jameslamb](https://github.com/jameslamb)
+- Simplify the nvCOMP adapter ([#16762](https://github.com/rapidsai/cudf/pull/16762)) [@vuule](https://github.com/vuule)
+- Add labeling APIs to pylibcudf ([#16761](https://github.com/rapidsai/cudf/pull/16761)) [@mroeschke](https://github.com/mroeschke)
+- Add transform APIs to pylibcudf ([#16760](https://github.com/rapidsai/cudf/pull/16760)) [@mroeschke](https://github.com/mroeschke)
+- Add a benchmark to study Parquet reader&#39;s performance for wide tables ([#16751](https://github.com/rapidsai/cudf/pull/16751)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Change the Parquet writer&#39;s `default_row_group_size_bytes` from 128MB to inf ([#16750](https://github.com/rapidsai/cudf/pull/16750)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Add transpose API to pylibcudf ([#16749](https://github.com/rapidsai/cudf/pull/16749)) [@mroeschke](https://github.com/mroeschke)
+- Add support for Python 3.12, update Kafka dependencies to 2.5.x ([#16745](https://github.com/rapidsai/cudf/pull/16745)) [@jameslamb](https://github.com/jameslamb)
+- Generate GPU vs CPU usage metrics per pytest file in pandas testsuite for `cudf.pandas` ([#16739](https://github.com/rapidsai/cudf/pull/16739)) [@galipremsagar](https://github.com/galipremsagar)
+- Refactor cudf pandas integration tests CI ([#16728](https://github.com/rapidsai/cudf/pull/16728)) [@Matt711](https://github.com/Matt711)
+- Remove ERROR_TEST gtest from libcudf ([#16722](https://github.com/rapidsai/cudf/pull/16722)) [@davidwendt](https://github.com/davidwendt)
+- Use Series._from_column more consistently to avoid validation ([#16716](https://github.com/rapidsai/cudf/pull/16716)) [@mroeschke](https://github.com/mroeschke)
+- remove some unnecessary libcudf nightly builds ([#16714](https://github.com/rapidsai/cudf/pull/16714)) [@jameslamb](https://github.com/jameslamb)
+- Remove xfail from torch-cudf.pandas integration test ([#16705](https://github.com/rapidsai/cudf/pull/16705)) [@Matt711](https://github.com/Matt711)
+- Add return type annotations to MultiIndex ([#16696](https://github.com/rapidsai/cudf/pull/16696)) [@mroeschke](https://github.com/mroeschke)
+- Add type annotations to Index classes, utilize _from_column more ([#16695](https://github.com/rapidsai/cudf/pull/16695)) [@mroeschke](https://github.com/mroeschke)
+- Have interval_range use IntervalIndex.from_breaks, remove column_empty_same_mask ([#16694](https://github.com/rapidsai/cudf/pull/16694)) [@mroeschke](https://github.com/mroeschke)
+- Increase timeouts for couple of tests ([#16692](https://github.com/rapidsai/cudf/pull/16692)) [@galipremsagar](https://github.com/galipremsagar)
+- Replace raw device_memory_resource pointer in pylibcudf Cython ([#16674](https://github.com/rapidsai/cudf/pull/16674)) [@harrism](https://github.com/harrism)
+- switch from typing.Callable to collections.abc.Callable ([#16670](https://github.com/rapidsai/cudf/pull/16670)) [@jameslamb](https://github.com/jameslamb)
+- Update rapidsai/pre-commit-hooks ([#16669](https://github.com/rapidsai/cudf/pull/16669)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Multi-file and Parquet-aware prefetching from remote storage ([#16657](https://github.com/rapidsai/cudf/pull/16657)) [@rjzamora](https://github.com/rjzamora)
+- Access Frame attributes instead of ColumnAccessor attributes when available ([#16652](https://github.com/rapidsai/cudf/pull/16652)) [@mroeschke](https://github.com/mroeschke)
+- Use non-mangled type names in nvbench output ([#16649](https://github.com/rapidsai/cudf/pull/16649)) [@davidwendt](https://github.com/davidwendt)
+- Add pylibcudf build dir in build.sh for `clean` ([#16648](https://github.com/rapidsai/cudf/pull/16648)) [@galipremsagar](https://github.com/galipremsagar)
+- Prune workflows based on changed files ([#16642](https://github.com/rapidsai/cudf/pull/16642)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Remove arrow dependency ([#16640](https://github.com/rapidsai/cudf/pull/16640)) [@vyasr](https://github.com/vyasr)
+- Support reading multiple PQ sources with mismatching nullability for columns ([#16639](https://github.com/rapidsai/cudf/pull/16639)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Drop Python 3.9 support ([#16637](https://github.com/rapidsai/cudf/pull/16637)) [@jameslamb](https://github.com/jameslamb)
+- Support DecimalDtype meta in dask_cudf ([#16634](https://github.com/rapidsai/cudf/pull/16634)) [@mroeschke](https://github.com/mroeschke)
+- Add `num_multiprocessors` utility ([#16628](https://github.com/rapidsai/cudf/pull/16628)) [@PointKernel](https://github.com/PointKernel)
+- Annotate `ColumnAccessor._data` labels as `Hashable` ([#16623](https://github.com/rapidsai/cudf/pull/16623)) [@mroeschke](https://github.com/mroeschke)
+- Remove build_categorical_column in favor of CategoricalColumn constructor ([#16617](https://github.com/rapidsai/cudf/pull/16617)) [@mroeschke](https://github.com/mroeschke)
+- Move apply_boolean_mask benchmark to nvbench ([#16616](https://github.com/rapidsai/cudf/pull/16616)) [@davidwendt](https://github.com/davidwendt)
+- Revise `get_reader_filepath_or_buffer` to handle a list of data sources ([#16613](https://github.com/rapidsai/cudf/pull/16613)) [@rjzamora](https://github.com/rjzamora)
+- do not install cudf in cudf_polars wheel tests ([#16612](https://github.com/rapidsai/cudf/pull/16612)) [@jameslamb](https://github.com/jameslamb)
+- remove streamz git dependency, standardize build dependency names, consolidate some dependency lists ([#16611](https://github.com/rapidsai/cudf/pull/16611)) [@jameslamb](https://github.com/jameslamb)
+- Fix C++ and Cython io types ([#16610](https://github.com/rapidsai/cudf/pull/16610)) [@vyasr](https://github.com/vyasr)
+- Remove arrow_io_source ([#16607](https://github.com/rapidsai/cudf/pull/16607)) [@vyasr](https://github.com/vyasr)
+- Remove thrust::optional from expression evaluator ([#16604](https://github.com/rapidsai/cudf/pull/16604)) [@bdice](https://github.com/bdice)
+- Add stricter typing and validation to ColumnAccessor ([#16602](https://github.com/rapidsai/cudf/pull/16602)) [@mroeschke](https://github.com/mroeschke)
+- make more use of YAML anchors in dependencies.yaml ([#16597](https://github.com/rapidsai/cudf/pull/16597)) [@jameslamb](https://github.com/jameslamb)
+- Enable testing `cudf.pandas` unit tests for all minor versions of pandas ([#16595](https://github.com/rapidsai/cudf/pull/16595)) [@galipremsagar](https://github.com/galipremsagar)
+- Extend the Parquet writer&#39;s dictionary encoding benchmark. ([#16591](https://github.com/rapidsai/cudf/pull/16591)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Remove legacy Arrow interop APIs ([#16590](https://github.com/rapidsai/cudf/pull/16590)) [@vyasr](https://github.com/vyasr)
+- Remove NativeFile support from cudf Python ([#16589](https://github.com/rapidsai/cudf/pull/16589)) [@vyasr](https://github.com/vyasr)
+- Add build job for pylibcudf ([#16587](https://github.com/rapidsai/cudf/pull/16587)) [@vyasr](https://github.com/vyasr)
+- Add `public` qualifier for some member functions in Java class `Schema` ([#16583](https://github.com/rapidsai/cudf/pull/16583)) [@ttnghia](https://github.com/ttnghia)
+- Enable gtests previously disabled for compute-sanitizer bug ([#16581](https://github.com/rapidsai/cudf/pull/16581)) [@davidwendt](https://github.com/davidwendt)
+- [FEA] Add filesystem argument to `cudf.read_parquet` ([#16577](https://github.com/rapidsai/cudf/pull/16577)) [@rjzamora](https://github.com/rjzamora)
+- Ensure size is always passed to NumericalColumn ([#16576](https://github.com/rapidsai/cudf/pull/16576)) [@mroeschke](https://github.com/mroeschke)
+- standardize and consolidate wheel installations in testing scripts ([#16575](https://github.com/rapidsai/cudf/pull/16575)) [@jameslamb](https://github.com/jameslamb)
+- Performance improvement for strings::slice for wide strings ([#16574](https://github.com/rapidsai/cudf/pull/16574)) [@davidwendt](https://github.com/davidwendt)
+- Add `ToCudfBackend` expression to dask-cudf ([#16573](https://github.com/rapidsai/cudf/pull/16573)) [@rjzamora](https://github.com/rjzamora)
+- CI: Test against old versions of key dependencies ([#16570](https://github.com/rapidsai/cudf/pull/16570)) [@seberg](https://github.com/seberg)
+- Replace `NativeFile` dependency in dask-cudf Parquet reader ([#16569](https://github.com/rapidsai/cudf/pull/16569)) [@rjzamora](https://github.com/rjzamora)
+- Align public utility function signatures  with pandas 2.x ([#16565](https://github.com/rapidsai/cudf/pull/16565)) [@mroeschke](https://github.com/mroeschke)
+- Move libcudf reduction google-benchmarks to nvbench ([#16564](https://github.com/rapidsai/cudf/pull/16564)) [@davidwendt](https://github.com/davidwendt)
+- Rework strings::slice benchmark to use nvbench ([#16563](https://github.com/rapidsai/cudf/pull/16563)) [@davidwendt](https://github.com/davidwendt)
+- Reenable arrow tests ([#16556](https://github.com/rapidsai/cudf/pull/16556)) [@vyasr](https://github.com/vyasr)
+- Clean up reshaping ops ([#16553](https://github.com/rapidsai/cudf/pull/16553)) [@mroeschke](https://github.com/mroeschke)
+- Disallow cudf.Index accepting column in favor of ._from_column ([#16549](https://github.com/rapidsai/cudf/pull/16549)) [@mroeschke](https://github.com/mroeschke)
+- Rewrite remaining Python Arrow interop conversions using the C Data Interface ([#16548](https://github.com/rapidsai/cudf/pull/16548)) [@vyasr](https://github.com/vyasr)
+- [REVIEW] JSON host tree algorithms ([#16545](https://github.com/rapidsai/cudf/pull/16545)) [@shrshi](https://github.com/shrshi)
+- Refactor dictionary encoding in PQ writer to migrate to the new `cuco::static_map` ([#16541](https://github.com/rapidsai/cudf/pull/16541)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Remove hardcoded versions from workflows. ([#16540](https://github.com/rapidsai/cudf/pull/16540)) [@bdice](https://github.com/bdice)
+- Ensure comparisons with pyints and integer series always succeed ([#16532](https://github.com/rapidsai/cudf/pull/16532)) [@seberg](https://github.com/seberg)
+- Remove unneeded output size parameter from internal count_matches utility ([#16531](https://github.com/rapidsai/cudf/pull/16531)) [@davidwendt](https://github.com/davidwendt)
+- Remove invalid column_view usage in string-scalar-to-column function ([#16530](https://github.com/rapidsai/cudf/pull/16530)) [@davidwendt](https://github.com/davidwendt)
+- Raise NotImplementedError for Series.rename that&#39;s not a scalar ([#16525](https://github.com/rapidsai/cudf/pull/16525)) [@mroeschke](https://github.com/mroeschke)
+- Remove deprecated public APIs from libcudf ([#16524](https://github.com/rapidsai/cudf/pull/16524)) [@davidwendt](https://github.com/davidwendt)
+- Return Interval object in pandas compat mode for IntervalIndex reductions ([#16523](https://github.com/rapidsai/cudf/pull/16523)) [@mroeschke](https://github.com/mroeschke)
+- Update json normalization to take device_buffer ([#16520](https://github.com/rapidsai/cudf/pull/16520)) [@karthikeyann](https://github.com/karthikeyann)
+- Rework cudf::io::text::byte_range_info class member functions ([#16518](https://github.com/rapidsai/cudf/pull/16518)) [@davidwendt](https://github.com/davidwendt)
+- Remove unneeded pair-iterator benchmark ([#16511](https://github.com/rapidsai/cudf/pull/16511)) [@davidwendt](https://github.com/davidwendt)
+- Update pre-commit hooks ([#16510](https://github.com/rapidsai/cudf/pull/16510)) [@KyleFromNVIDIA](https://github.com/KyleFromNVIDIA)
+- Improve update-version.sh ([#16506](https://github.com/rapidsai/cudf/pull/16506)) [@bdice](https://github.com/bdice)
+- Use tool.scikit-build.cmake.version, set scikit-build-core minimum-version ([#16503](https://github.com/rapidsai/cudf/pull/16503)) [@jameslamb](https://github.com/jameslamb)
+- Pass batch size to JSON reader using environment variable ([#16502](https://github.com/rapidsai/cudf/pull/16502)) [@shrshi](https://github.com/shrshi)
+- Remove a deprecated multibyte_split API ([#16501](https://github.com/rapidsai/cudf/pull/16501)) [@davidwendt](https://github.com/davidwendt)
+- Add interop example for `arrow::StringViewArray` to `cudf::column` ([#16498](https://github.com/rapidsai/cudf/pull/16498)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Add keep option to distinct nvbench ([#16497](https://github.com/rapidsai/cudf/pull/16497)) [@bdice](https://github.com/bdice)
+- Use more idomatic cudf APIs in dask_cudf meta generation ([#16487](https://github.com/rapidsai/cudf/pull/16487)) [@mroeschke](https://github.com/mroeschke)
+- Fix typo in dispatch_row_equal. ([#16473](https://github.com/rapidsai/cudf/pull/16473)) [@bdice](https://github.com/bdice)
+- Use explicit construction of column subclass instead of `build_column` when type is known ([#16470](https://github.com/rapidsai/cudf/pull/16470)) [@mroeschke](https://github.com/mroeschke)
+- Move exception handler into pylibcudf from cudf ([#16468](https://github.com/rapidsai/cudf/pull/16468)) [@lithomas1](https://github.com/lithomas1)
+- Make StructColumn.__init__ strict ([#16467](https://github.com/rapidsai/cudf/pull/16467)) [@mroeschke](https://github.com/mroeschke)
+- Make ListColumn.__init__ strict ([#16465](https://github.com/rapidsai/cudf/pull/16465)) [@mroeschke](https://github.com/mroeschke)
+- Make Timedelta/DatetimeColumn.__init__ strict ([#16464](https://github.com/rapidsai/cudf/pull/16464)) [@mroeschke](https://github.com/mroeschke)
+- Make NumericalColumn.__init__ strict ([#16457](https://github.com/rapidsai/cudf/pull/16457)) [@mroeschke](https://github.com/mroeschke)
+- Make CategoricalColumn.__init__ strict ([#16456](https://github.com/rapidsai/cudf/pull/16456)) [@mroeschke](https://github.com/mroeschke)
+- Disallow cudf.Series to accept column in favor of `._from_column` ([#16454](https://github.com/rapidsai/cudf/pull/16454)) [@mroeschke](https://github.com/mroeschke)
+- Expose `stream` param in transform APIs ([#16452](https://github.com/rapidsai/cudf/pull/16452)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Add upper bound pin for polars ([#16442](https://github.com/rapidsai/cudf/pull/16442)) [@wence-](https://github.com/wence-)
+- Make (Indexed)Frame.__init__ require data (and index) ([#16430](https://github.com/rapidsai/cudf/pull/16430)) [@mroeschke](https://github.com/mroeschke)
+- Add Java APIs to copy column data to host asynchronously ([#16429](https://github.com/rapidsai/cudf/pull/16429)) [@jlowe](https://github.com/jlowe)
+- Update docs of the TPC-H derived examples ([#16423](https://github.com/rapidsai/cudf/pull/16423)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Use RMM adaptor constructors instead of factories. ([#16414](https://github.com/rapidsai/cudf/pull/16414)) [@bdice](https://github.com/bdice)
+- Align ewm APIs with pandas 2.x ([#16413](https://github.com/rapidsai/cudf/pull/16413)) [@mroeschke](https://github.com/mroeschke)
+- Remove checking for specific tests in memcheck script ([#16412](https://github.com/rapidsai/cudf/pull/16412)) [@davidwendt](https://github.com/davidwendt)
+- Add stream parameter to reshape APIs ([#16410](https://github.com/rapidsai/cudf/pull/16410)) [@davidwendt](https://github.com/davidwendt)
+- Align groupby APIs with pandas 2.x ([#16403](https://github.com/rapidsai/cudf/pull/16403)) [@mroeschke](https://github.com/mroeschke)
+- Align misc DataFrame and MultiIndex methods with pandas 2.x ([#16402](https://github.com/rapidsai/cudf/pull/16402)) [@mroeschke](https://github.com/mroeschke)
+- update some branch references in GitHub Actions configs ([#16397](https://github.com/rapidsai/cudf/pull/16397)) [@jameslamb](https://github.com/jameslamb)
+- Support reading matching projected and filter cols from Parquet files with otherwise mismatched schemas ([#16394](https://github.com/rapidsai/cudf/pull/16394)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Merge branch-24.08 into branch-24.10 ([#16393](https://github.com/rapidsai/cudf/pull/16393)) [@jameslamb](https://github.com/jameslamb)
+- Add query 10 to the TPC-H suite ([#16392](https://github.com/rapidsai/cudf/pull/16392)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Use `make_host_vector` instead of `make_std_vector` to facilitate pinned memory optimizations ([#16386](https://github.com/rapidsai/cudf/pull/16386)) [@vuule](https://github.com/vuule)
+- Fix some issues with deprecated / removed cccl facilities ([#16377](https://github.com/rapidsai/cudf/pull/16377)) [@miscco](https://github.com/miscco)
+- Align IntervalIndex APIs with pandas 2.x ([#16371](https://github.com/rapidsai/cudf/pull/16371)) [@mroeschke](https://github.com/mroeschke)
+- Align CategoricalIndex APIs with pandas 2.x ([#16369](https://github.com/rapidsai/cudf/pull/16369)) [@mroeschke](https://github.com/mroeschke)
+- Align TimedeltaIndex APIs with pandas 2.x ([#16368](https://github.com/rapidsai/cudf/pull/16368)) [@mroeschke](https://github.com/mroeschke)
+- Align DatetimeIndex APIs with pandas 2.x ([#16367](https://github.com/rapidsai/cudf/pull/16367)) [@mroeschke](https://github.com/mroeschke)
+- fix [tool.setuptools] reference in custreamz config ([#16365](https://github.com/rapidsai/cudf/pull/16365)) [@jameslamb](https://github.com/jameslamb)
+- Align Index APIs with pandas 2.x ([#16361](https://github.com/rapidsai/cudf/pull/16361)) [@mroeschke](https://github.com/mroeschke)
+- Rebuild for &amp; Support NumPy 2 ([#16300](https://github.com/rapidsai/cudf/pull/16300)) [@jakirkham](https://github.com/jakirkham)
+- Add `stream` param to stream compaction APIs ([#16295](https://github.com/rapidsai/cudf/pull/16295)) [@JayjeetAtGithub](https://github.com/JayjeetAtGithub)
+- Added batch memset to memset data and validity buffers in parquet reader ([#16281](https://github.com/rapidsai/cudf/pull/16281)) [@sdrp713](https://github.com/sdrp713)
+- Deduplicate decimal32/decimal64 to decimal128 conversion function ([#16236](https://github.com/rapidsai/cudf/pull/16236)) [@mhaseeb123](https://github.com/mhaseeb123)
+- Refactor mixed_semi_join using cuco::static_set ([#16230](https://github.com/rapidsai/cudf/pull/16230)) [@srinivasyadav18](https://github.com/srinivasyadav18)
+- Improve performance of hash_character_ngrams using warp-per-string kernel ([#16212](https://github.com/rapidsai/cudf/pull/16212)) [@davidwendt](https://github.com/davidwendt)
+- Add environment variable to log cudf.pandas fallback calls ([#16161](https://github.com/rapidsai/cudf/pull/16161)) [@mroeschke](https://github.com/mroeschke)
+- Add libcudf example with large strings ([#15983](https://github.com/rapidsai/cudf/pull/15983)) [@davidwendt](https://github.com/davidwendt)
+- JSON tree algorithms refactor I: CSR data structure for column tree ([#15979](https://github.com/rapidsai/cudf/pull/15979)) [@shrshi](https://github.com/shrshi)
+- Support multiple new-line characters in regex APIs ([#15961](https://github.com/rapidsai/cudf/pull/15961)) [@davidwendt](https://github.com/davidwendt)
+- adding wheel build for libcudf ([#15483](https://github.com/rapidsai/cudf/pull/15483)) [@msarahan](https://github.com/msarahan)
+- Replace usages of `thrust::optional` with `std::optional` ([#15091](https://github.com/rapidsai/cudf/pull/15091)) [@miscco](https://github.com/miscco)
+
 # cudf 24.08.00 (7 Aug 2024)
 
 ## 🚨 Breaking Changes

From dfdae599622841bf3f4d523c01eee3ae1fe933f0 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Wed, 9 Oct 2024 14:02:28 -0400
Subject: [PATCH 29/33] Use std::optional for host types (#17015)

cuda::std::optional shouldn't be used for host types such as `std::vector` as it requires the constructors of the `T` types to be host+device.

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - MithunR (https://github.com/mythrocks)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/17015
---
 .../io/parquet/compact_protocol_reader.cpp    |  8 +--
 cpp/src/io/parquet/parquet.hpp                | 64 +++++++++----------
 cpp/src/io/parquet/parquet_gpu.hpp            | 14 ++--
 cpp/src/io/parquet/predicate_pushdown.cpp     |  6 +-
 cpp/src/io/parquet/reader_impl.cpp            |  2 +-
 cpp/src/io/parquet/reader_impl_chunking.cu    |  8 +--
 cpp/src/io/parquet/reader_impl_helpers.cpp    |  6 +-
 cpp/src/io/parquet/writer_impl.cu             |  8 +--
 cpp/tests/io/parquet_common.cpp               |  2 +-
 cpp/tests/io/parquet_common.hpp               |  2 +-
 10 files changed, 59 insertions(+), 61 deletions(-)

diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index 312a5243687..d276e946a51 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -309,10 +309,10 @@ class parquet_field_struct : public parquet_field {
 template <typename E, typename T>
 class parquet_field_union_struct : public parquet_field {
   E& enum_val;
-  cuda::std::optional<T>& val;  // union structs are always wrapped in std::optional
+  std::optional<T>& val;  // union structs are always wrapped in std::optional
 
  public:
-  parquet_field_union_struct(int f, E& ev, cuda::std::optional<T>& v)
+  parquet_field_union_struct(int f, E& ev, std::optional<T>& v)
     : parquet_field(f), enum_val(ev), val(v)
   {
   }
@@ -439,10 +439,10 @@ class parquet_field_struct_blob : public parquet_field {
  */
 template <typename T, typename FieldFunctor>
 class parquet_field_optional : public parquet_field {
-  cuda::std::optional<T>& val;
+  std::optional<T>& val;
 
  public:
-  parquet_field_optional(int f, cuda::std::optional<T>& v) : parquet_field(f), val(v) {}
+  parquet_field_optional(int f, std::optional<T>& v) : parquet_field(f), val(v) {}
 
   inline void operator()(CompactProtocolReader* cpr, int field_type)
   {
diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp
index 7c985643887..2851ef67a65 100644
--- a/cpp/src/io/parquet/parquet.hpp
+++ b/cpp/src/io/parquet/parquet.hpp
@@ -20,8 +20,6 @@
 
 #include <cudf/types.hpp>
 
-#include <cuda/std/optional>
-
 #include <cstdint>
 #include <optional>
 #include <string>
@@ -94,10 +92,10 @@ struct LogicalType {
     BSON
   };
   Type type;
-  cuda::std::optional<DecimalType> decimal_type;
-  cuda::std::optional<TimeType> time_type;
-  cuda::std::optional<TimestampType> timestamp_type;
-  cuda::std::optional<IntType> int_type;
+  std::optional<DecimalType> decimal_type;
+  std::optional<TimeType> time_type;
+  std::optional<TimestampType> timestamp_type;
+  std::optional<IntType> int_type;
 
   LogicalType(Type tp = UNDEFINED) : type(tp) {}
   LogicalType(DecimalType&& dt) : type(DECIMAL), decimal_type(dt) {}
@@ -178,21 +176,21 @@ struct SchemaElement {
   // 5: nested fields
   int32_t num_children = 0;
   // 6: DEPRECATED: record the original type before conversion to parquet type
-  cuda::std::optional<ConvertedType> converted_type;
+  std::optional<ConvertedType> converted_type;
   // 7: DEPRECATED: record the scale for DECIMAL converted type
   int32_t decimal_scale = 0;
   // 8: DEPRECATED: record the precision for DECIMAL converted type
   int32_t decimal_precision = 0;
   // 9: save field_id from original schema
-  cuda::std::optional<int32_t> field_id;
+  std::optional<int32_t> field_id;
   // 10: replaces converted type
-  cuda::std::optional<LogicalType> logical_type;
+  std::optional<LogicalType> logical_type;
 
   // extra cudf specific fields
   bool output_as_byte_array = false;
 
   // cudf type determined from arrow:schema
-  cuda::std::optional<type_id> arrow_type;
+  std::optional<type_id> arrow_type;
 
   // The following fields are filled in later during schema initialization
   int max_definition_level = 0;
@@ -258,21 +256,21 @@ struct SchemaElement {
  */
 struct Statistics {
   // deprecated max value in signed comparison order
-  cuda::std::optional<std::vector<uint8_t>> max;
+  std::optional<std::vector<uint8_t>> max;
   // deprecated min value in signed comparison order
-  cuda::std::optional<std::vector<uint8_t>> min;
+  std::optional<std::vector<uint8_t>> min;
   // count of null values in the column
-  cuda::std::optional<int64_t> null_count;
+  std::optional<int64_t> null_count;
   // count of distinct values occurring
-  cuda::std::optional<int64_t> distinct_count;
+  std::optional<int64_t> distinct_count;
   // max value for column determined by ColumnOrder
-  cuda::std::optional<std::vector<uint8_t>> max_value;
+  std::optional<std::vector<uint8_t>> max_value;
   // min value for column determined by ColumnOrder
-  cuda::std::optional<std::vector<uint8_t>> min_value;
+  std::optional<std::vector<uint8_t>> min_value;
   // If true, max_value is the actual maximum value for a column
-  cuda::std::optional<bool> is_max_value_exact;
+  std::optional<bool> is_max_value_exact;
   // If true, min_value is the actual minimum value for a column
-  cuda::std::optional<bool> is_min_value_exact;
+  std::optional<bool> is_min_value_exact;
 };
 
 /**
@@ -281,7 +279,7 @@ struct Statistics {
 struct SizeStatistics {
   // Number of variable-width bytes stored for the page/chunk. Should not be set for anything
   // but the BYTE_ARRAY physical type.
-  cuda::std::optional<int64_t> unencoded_byte_array_data_bytes;
+  std::optional<int64_t> unencoded_byte_array_data_bytes;
   /**
    * When present, there is expected to be one element corresponding to each
    * repetition (i.e. size=max repetition_level+1) where each element
@@ -290,14 +288,14 @@ struct SizeStatistics {
    *
    * This value should not be written if max_repetition_level is 0.
    */
-  cuda::std::optional<std::vector<int64_t>> repetition_level_histogram;
+  std::optional<std::vector<int64_t>> repetition_level_histogram;
 
   /**
    * Same as repetition_level_histogram except for definition levels.
    *
    * This value should not be written if max_definition_level is 0 or 1.
    */
-  cuda::std::optional<std::vector<int64_t>> definition_level_histogram;
+  std::optional<std::vector<int64_t>> definition_level_histogram;
 };
 
 /**
@@ -318,7 +316,7 @@ struct OffsetIndex {
   std::vector<PageLocation> page_locations;
   // per-page size info. see description of the same field in SizeStatistics. only present for
   // columns with a BYTE_ARRAY physical type.
-  cuda::std::optional<std::vector<int64_t>> unencoded_byte_array_data_bytes;
+  std::optional<std::vector<int64_t>> unencoded_byte_array_data_bytes;
 };
 
 /**
@@ -329,11 +327,11 @@ struct ColumnIndex {
   std::vector<std::vector<uint8_t>> min_values;  // lower bound for values in each page
   std::vector<std::vector<uint8_t>> max_values;  // upper bound for values in each page
   BoundaryOrder boundary_order =
-    BoundaryOrder::UNORDERED;  // Indicates if min and max values are ordered
-  cuda::std::optional<std::vector<int64_t>> null_counts;  // Optional count of null values per page
+    BoundaryOrder::UNORDERED;                       // Indicates if min and max values are ordered
+  std::optional<std::vector<int64_t>> null_counts;  // Optional count of null values per page
   // Repetition/definition level histograms for the column chunk
-  cuda::std::optional<std::vector<int64_t>> repetition_level_histogram;
-  cuda::std::optional<std::vector<int64_t>> definition_level_histogram;
+  std::optional<std::vector<int64_t>> repetition_level_histogram;
+  std::optional<std::vector<int64_t>> definition_level_histogram;
 };
 
 /**
@@ -383,11 +381,11 @@ struct ColumnChunkMetaData {
   Statistics statistics;
   // Set of all encodings used for pages in this column chunk. This information can be used to
   // determine if all data pages are dictionary encoded for example.
-  cuda::std::optional<std::vector<PageEncodingStats>> encoding_stats;
+  std::optional<std::vector<PageEncodingStats>> encoding_stats;
   // Optional statistics to help estimate total memory when converted to in-memory representations.
   // The histograms contained in these statistics can also be useful in some cases for more
   // fine-grained nullability/list length filter pushdown.
-  cuda::std::optional<SizeStatistics> size_statistics;
+  std::optional<SizeStatistics> size_statistics;
 };
 
 /**
@@ -429,13 +427,13 @@ struct RowGroup {
   int64_t num_rows = 0;
   // If set, specifies a sort ordering of the rows in this RowGroup.
   // The sorting columns can be a subset of all the columns.
-  cuda::std::optional<std::vector<SortingColumn>> sorting_columns;
+  std::optional<std::vector<SortingColumn>> sorting_columns;
   // Byte offset from beginning of file to first page (data or dictionary) in this row group
-  cuda::std::optional<int64_t> file_offset;
+  std::optional<int64_t> file_offset;
   // Total byte size of all compressed (and potentially encrypted) column data in this row group
-  cuda::std::optional<int64_t> total_compressed_size;
+  std::optional<int64_t> total_compressed_size;
   // Row group ordinal in the file
-  cuda::std::optional<int16_t> ordinal;
+  std::optional<int16_t> ordinal;
 };
 
 /**
@@ -460,7 +458,7 @@ struct FileMetaData {
   std::vector<RowGroup> row_groups;
   std::vector<KeyValue> key_value_metadata;
   std::string created_by = "";
-  cuda::std::optional<std::vector<ColumnOrder>> column_orders;
+  std::optional<std::vector<ColumnOrder>> column_orders;
 };
 
 /**
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index a8ba3a969ce..4f6d41a97da 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -395,7 +395,7 @@ struct ColumnChunkDesc {
                            uint8_t def_level_bits_,
                            uint8_t rep_level_bits_,
                            Compression codec_,
-                           cuda::std::optional<LogicalType> logical_type_,
+                           std::optional<LogicalType> logical_type_,
                            int32_t ts_clock_rate_,
                            int32_t src_col_index_,
                            int32_t src_col_schema_,
@@ -441,12 +441,12 @@ struct ColumnChunkDesc {
   int32_t num_data_pages{};                           // number of data pages
   int32_t num_dict_pages{};                           // number of dictionary pages
   PageInfo const* dict_page{};
-  string_index_pair* str_dict_index{};  // index for string dictionary
-  bitmask_type** valid_map_base{};      // base pointers of valid bit map for this column
-  void** column_data_base{};            // base pointers of column data
-  void** column_string_base{};          // base pointers of column string data
-  Compression codec{};                  // compressed codec enum
-  cuda::std::optional<LogicalType> logical_type{};  // logical type
+  string_index_pair* str_dict_index{};        // index for string dictionary
+  bitmask_type** valid_map_base{};            // base pointers of valid bit map for this column
+  void** column_data_base{};                  // base pointers of column data
+  void** column_string_base{};                // base pointers of column string data
+  Compression codec{};                        // compressed codec enum
+  std::optional<LogicalType> logical_type{};  // logical type
   int32_t ts_clock_rate{};  // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)
 
   int32_t src_col_index{};   // my input column index
diff --git a/cpp/src/io/parquet/predicate_pushdown.cpp b/cpp/src/io/parquet/predicate_pushdown.cpp
index b90ca36c8c7..f0a0bc0b51b 100644
--- a/cpp/src/io/parquet/predicate_pushdown.cpp
+++ b/cpp/src/io/parquet/predicate_pushdown.cpp
@@ -152,7 +152,7 @@ struct stats_caster {
         }
 
         void set_index(size_type index,
-                       cuda::std::optional<std::vector<uint8_t>> const& binary_value,
+                       std::optional<std::vector<uint8_t>> const& binary_value,
                        Type const type)
         {
           if (binary_value.has_value()) {
@@ -234,8 +234,8 @@ struct stats_caster {
             max.set_index(stats_idx, max_value, colchunk.meta_data.type);
           } else {
             // Marking it null, if column present in row group
-            min.set_index(stats_idx, cuda::std::nullopt, {});
-            max.set_index(stats_idx, cuda::std::nullopt, {});
+            min.set_index(stats_idx, std::nullopt, {});
+            max.set_index(stats_idx, std::nullopt, {});
           }
           stats_idx++;
         }
diff --git a/cpp/src/io/parquet/reader_impl.cpp b/cpp/src/io/parquet/reader_impl.cpp
index 1b69ccb7742..f0865c715bc 100644
--- a/cpp/src/io/parquet/reader_impl.cpp
+++ b/cpp/src/io/parquet/reader_impl.cpp
@@ -38,7 +38,7 @@ namespace {
 // be treated as a string. Currently the only logical type that has special handling is DECIMAL.
 // Other valid types in the future would be UUID (still treated as string) and FLOAT16 (which
 // for now would also be treated as a string).
-inline bool is_treat_fixed_length_as_string(cuda::std::optional<LogicalType> const& logical_type)
+inline bool is_treat_fixed_length_as_string(std::optional<LogicalType> const& logical_type)
 {
   if (!logical_type.has_value()) { return true; }
   return logical_type->type != LogicalType::DECIMAL;
diff --git a/cpp/src/io/parquet/reader_impl_chunking.cu b/cpp/src/io/parquet/reader_impl_chunking.cu
index c588fedb85c..27312a4da89 100644
--- a/cpp/src/io/parquet/reader_impl_chunking.cu
+++ b/cpp/src/io/parquet/reader_impl_chunking.cu
@@ -371,11 +371,11 @@ int64_t find_next_split(int64_t cur_pos,
  *
  * @return A tuple of Parquet clock rate and Parquet decimal type.
  */
-[[nodiscard]] std::tuple<int32_t, cuda::std::optional<LogicalType>> conversion_info(
+[[nodiscard]] std::tuple<int32_t, std::optional<LogicalType>> conversion_info(
   type_id column_type_id,
   type_id timestamp_type_id,
   Type physical,
-  cuda::std::optional<LogicalType> logical_type)
+  std::optional<LogicalType> logical_type)
 {
   int32_t const clock_rate =
     is_chrono(data_type{column_type_id}) ? to_clockrate(timestamp_type_id) : 0;
@@ -386,11 +386,11 @@ int64_t find_next_split(int64_t cur_pos,
     // if decimal but not outputting as float or decimal, then convert to no logical type
     if (column_type_id != type_id::FLOAT64 and
         not cudf::is_fixed_point(data_type{column_type_id})) {
-      return std::make_tuple(clock_rate, cuda::std::nullopt);
+      return {clock_rate, std::nullopt};
     }
   }
 
-  return std::make_tuple(clock_rate, std::move(logical_type));
+  return {clock_rate, std::move(logical_type)};
 }
 
 /**
diff --git a/cpp/src/io/parquet/reader_impl_helpers.cpp b/cpp/src/io/parquet/reader_impl_helpers.cpp
index 6d566b5815e..a6562d33de2 100644
--- a/cpp/src/io/parquet/reader_impl_helpers.cpp
+++ b/cpp/src/io/parquet/reader_impl_helpers.cpp
@@ -38,7 +38,7 @@ namespace flatbuf = cudf::io::parquet::flatbuf;
 
 namespace {
 
-cuda::std::optional<LogicalType> converted_to_logical_type(SchemaElement const& schema)
+std::optional<LogicalType> converted_to_logical_type(SchemaElement const& schema)
 {
   if (schema.converted_type.has_value()) {
     switch (schema.converted_type.value()) {
@@ -66,7 +66,7 @@ cuda::std::optional<LogicalType> converted_to_logical_type(SchemaElement const&
       default: return LogicalType{LogicalType::UNDEFINED};
     }
   }
-  return cuda::std::nullopt;
+  return std::nullopt;
 }
 
 }  // namespace
@@ -246,7 +246,7 @@ void metadata::sanitize_schema()
         struct_elem.repetition_type = REQUIRED;
         struct_elem.num_children    = schema_elem.num_children;
         struct_elem.type            = UNDEFINED_TYPE;
-        struct_elem.converted_type  = cuda::std::nullopt;
+        struct_elem.converted_type  = std::nullopt;
 
         // swap children
         struct_elem.children_idx = std::move(schema_elem.children_idx);
diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index ec05f35d405..190f13eb688 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -186,7 +186,7 @@ struct aggregate_writer_metadata {
     std::vector<std::vector<uint8_t>> column_indexes;
   };
   std::vector<per_file_metadata> files;
-  cuda::std::optional<std::vector<ColumnOrder>> column_orders = cuda::std::nullopt;
+  std::optional<std::vector<ColumnOrder>> column_orders = std::nullopt;
 };
 
 namespace {
@@ -472,7 +472,7 @@ struct leaf_schema_fn {
   std::enable_if_t<std::is_same_v<T, cudf::timestamp_ns>, void> operator()()
   {
     col_schema.type           = (timestamp_is_int96) ? Type::INT96 : Type::INT64;
-    col_schema.converted_type = cuda::std::nullopt;
+    col_schema.converted_type = std::nullopt;
     col_schema.stats_dtype    = statistics_dtype::dtype_timestamp64;
     if (timestamp_is_int96) {
       col_schema.ts_scale = -1000;  // negative value indicates division by absolute value
@@ -750,7 +750,7 @@ std::vector<schema_tree_node> construct_parquet_schema_tree(
           col_schema.type = Type::BYTE_ARRAY;
         }
 
-        col_schema.converted_type  = cuda::std::nullopt;
+        col_schema.converted_type  = std::nullopt;
         col_schema.stats_dtype     = statistics_dtype::dtype_byte_array;
         col_schema.repetition_type = col_nullable ? OPTIONAL : REQUIRED;
         col_schema.name = (schema[parent_idx].name == "list") ? "element" : col_meta.get_name();
@@ -2795,7 +2795,7 @@ std::unique_ptr<std::vector<uint8_t>> writer::merge_row_group_metadata(
   // See https://github.com/rapidsai/cudf/pull/14264#issuecomment-1778311615
   for (auto& se : md.schema) {
     if (se.logical_type.has_value() && se.logical_type.value().type == LogicalType::UNKNOWN) {
-      se.logical_type = cuda::std::nullopt;
+      se.logical_type = std::nullopt;
     }
   }
 
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
index 6141a40bc95..a1b8677eac8 100644
--- a/cpp/tests/io/parquet_common.cpp
+++ b/cpp/tests/io/parquet_common.cpp
@@ -744,7 +744,7 @@ int32_t compare(T& v1, T& v2)
 int32_t compare_binary(std::vector<uint8_t> const& v1,
                        std::vector<uint8_t> const& v2,
                        cudf::io::parquet::detail::Type ptype,
-                       cuda::std::optional<cudf::io::parquet::detail::ConvertedType> const& ctype)
+                       std::optional<cudf::io::parquet::detail::ConvertedType> const& ctype)
 {
   auto ctype_val = ctype.value_or(cudf::io::parquet::detail::UNKNOWN);
   switch (ptype) {
diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp
index bd1579eaa1b..c90b81ed27a 100644
--- a/cpp/tests/io/parquet_common.hpp
+++ b/cpp/tests/io/parquet_common.hpp
@@ -172,7 +172,7 @@ std::pair<cudf::table, std::string> create_parquet_typed_with_stats(std::string
 int32_t compare_binary(std::vector<uint8_t> const& v1,
                        std::vector<uint8_t> const& v2,
                        cudf::io::parquet::detail::Type ptype,
-                       cuda::std::optional<cudf::io::parquet::detail::ConvertedType> const& ctype);
+                       std::optional<cudf::io::parquet::detail::ConvertedType> const& ctype);
 
 void expect_compression_stats_empty(std::shared_ptr<cudf::io::writer_compression_statistics> stats);
 

From bd51a25ea6fdab6ab11e95e2c8192ed7eee43e75 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 9 Oct 2024 16:05:05 -0400
Subject: [PATCH 30/33] [DOC] Document limitation using  `cudf.pandas` proxy
 arrays (#16955)

When instantiating a `cudf.pandas` proxy array, a DtoH transfer occurs so that the data buffer is set correctly. We do this because functions which utilize NumPy's C API can utilize the data buffer directly instead of going through `__array__`. This PR documents this limitation.

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Matthew Roeschke (https://github.com/mroeschke)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16955
---
 docs/cudf/source/cudf_pandas/faq.md | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/docs/cudf/source/cudf_pandas/faq.md b/docs/cudf/source/cudf_pandas/faq.md
index 34b657488c1..5024747227e 100644
--- a/docs/cudf/source/cudf_pandas/faq.md
+++ b/docs/cudf/source/cudf_pandas/faq.md
@@ -181,6 +181,32 @@ There are a few known limitations that you should be aware of:
    ```
 - `cudf.pandas` (and cuDF in general) is only compatible with pandas 2. Version
   24.02 of cudf was the last to support pandas 1.5.x.
+- In order for `cudf.pandas` to produce a proxy array that ducktypes as a NumPy
+  array, we create a proxy type that actually subclasses `numpy.ndarray`. We can
+  verify this with an isinstance check.
+
+  ```python
+  %load_ext cudf.pandas
+  import pandas as pd
+  import numpy as np
+
+  arr = pd.Series([1, 1, 2]).unique() # returns a proxy array
+  isinstance(arr, np.ndarray) # returns True, where arr is a proxy array
+  ```
+  Because the proxy type ducktypes as a NumPy array, NumPy functions may attempt to
+  access internal members, such as the [data buffer](https://numpy.org/doc/stable/dev/internals.html#internal-organization-of-numpy-arrays), via the NumPy C API.
+  However, our proxy mechanism is designed to proxy function calls at the Python
+  level, which is incompatible with these types of accesses. To handle these
+  situations, we perform an eager device-to-host (DtoH) copy, which sets the data
+  buffer correctly but incurs the cost of extra time when creating the proxy array.
+  In the previous example, creating `arr` performed this kind of implicit DtoH transfer.
+
+  With this approach, we also get compatibility with third party libraries like `torch`.
+
+  ```python
+  import torch
+  x = torch.from_numpy(arr)
+  ```
 
 ## Can I force running on the CPU?
 

From c7b51195c675af47d0f3dd69c04d0fcc6920eca5 Mon Sep 17 00:00:00 2001
From: Vukasin Milovanovic <vmilovanovic@nvidia.com>
Date: Wed, 9 Oct 2024 15:17:32 -0700
Subject: [PATCH 31/33] Fix `host_span` constructor to correctly copy
 `is_device_accessible` (#17020)

One of the `host_span` constructors was not updated when we added `is_device_accessible`, so the value was not assigned.
This PR fixes this simple error and adds tests that checks that this property is correctly set when creating `host_span`s.

Authors:
  - Vukasin Milovanovic (https://github.com/vuule)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)
  - Muhammad Haseeb (https://github.com/mhaseeb123)

URL: https://github.com/rapidsai/cudf/pull/17020
---
 cpp/include/cudf/utilities/span.hpp           |  2 +-
 .../utilities_tests/pinned_memory_tests.cpp   | 20 +++++++++++++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 914731ea417..f3e1a61d075 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -288,7 +288,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
                                std::is_convertible_v<OtherT (*)[], T (*)[]>,  // NOLINT
                              void>* = nullptr>
   constexpr host_span(host_span<OtherT, OtherExtent> const& other) noexcept
-    : base(other.data(), other.size())
+    : base(other.data(), other.size()), _is_device_accessible{other.is_device_accessible()}
   {
   }
 
diff --git a/cpp/tests/utilities_tests/pinned_memory_tests.cpp b/cpp/tests/utilities_tests/pinned_memory_tests.cpp
index ae7c6fa8b8c..1e1e21fe18a 100644
--- a/cpp/tests/utilities_tests/pinned_memory_tests.cpp
+++ b/cpp/tests/utilities_tests/pinned_memory_tests.cpp
@@ -22,6 +22,7 @@
 #include <cudf/io/parquet.hpp>
 #include <cudf/utilities/default_stream.hpp>
 #include <cudf/utilities/pinned_memory.hpp>
+#include <cudf/utilities/span.hpp>
 
 #include <rmm/mr/device/pool_memory_resource.hpp>
 #include <rmm/mr/pinned_host_memory_resource.hpp>
@@ -125,3 +126,22 @@ TEST_F(PinnedMemoryTest, MakeHostVector)
     EXPECT_FALSE(vec.get_allocator().is_device_accessible());
   }
 }
+
+TEST_F(PinnedMemoryTest, HostSpan)
+{
+  auto test_ctors = [](auto&& vec) {
+    auto const is_vec_device_accessible = vec.get_allocator().is_device_accessible();
+    // Test conversion from a vector
+    auto const span = cudf::host_span<int16_t>{vec};
+    EXPECT_EQ(span.is_device_accessible(), is_vec_device_accessible);
+    // Test conversion from host_span with different type
+    auto const span_converted = cudf::host_span<int16_t const>{span};
+    EXPECT_EQ(span_converted.is_device_accessible(), is_vec_device_accessible);
+  };
+
+  cudf::set_allocate_host_as_pinned_threshold(7);
+  for (int i = 1; i < 10; i++) {
+    // some iterations will use pinned memory, some will not
+    test_ctors(cudf::detail::make_host_vector<int16_t>(i, cudf::get_default_stream()));
+  }
+}

From 3791c8a9d1aeb7474bb9ef324a089a569183406c Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Wed, 9 Oct 2024 13:45:02 -1000
Subject: [PATCH 32/33] Add string.convert_floats APIs to pylibcudf (#16990)

Contributes to https://github.com/rapidsai/cudf/issues/15162

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/16990
---
 python/cudf/cudf/_lib/string_casting.pyx      |  34 ++----
 .../_lib/strings/convert/convert_floats.pyx   |  24 ++---
 .../strings/convert/convert_floats.pxd        |   6 +-
 .../pylibcudf/strings/convert/CMakeLists.txt  |   2 +-
 .../pylibcudf/strings/convert/__init__.pxd    |   1 +
 .../pylibcudf/strings/convert/__init__.py     |   1 +
 .../strings/convert/convert_floats.pxd        |  11 ++
 .../strings/convert/convert_floats.pyx        | 101 ++++++++++++++++++
 .../tests/test_string_convert_floats.py       |  33 ++++++
 9 files changed, 165 insertions(+), 48 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd
 create mode 100644 python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
 create mode 100644 python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py

diff --git a/python/cudf/cudf/_lib/string_casting.pyx b/python/cudf/cudf/_lib/string_casting.pyx
index d9595f4ab0a..93b67bd4c9d 100644
--- a/python/cudf/cudf/_lib/string_casting.pyx
+++ b/python/cudf/cudf/_lib/string_casting.pyx
@@ -10,10 +10,6 @@ from libcpp.utility cimport move
 
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.strings.convert.convert_floats cimport (
-    from_floats as cpp_from_floats,
-    to_floats as cpp_to_floats,
-)
 from pylibcudf.libcudf.strings.convert.convert_integers cimport (
     from_integers as cpp_from_integers,
     hex_to_integers as cpp_hex_to_integers,
@@ -33,32 +29,18 @@ from cudf._lib.types cimport dtype_to_pylibcudf_type
 
 
 def floating_to_string(Column input_col):
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    with nogil:
-        c_result = move(
-            cpp_from_floats(
-                input_column_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_floats.from_floats(
+        input_col.to_pylibcudf(mode="read"),
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 def string_to_floating(Column input_col, object out_type):
-    cdef column_view input_column_view = input_col.view()
-    cdef unique_ptr[column] c_result
-    cdef type_id tid = <type_id> (
-        <underlying_type_t_type_id> (
-            SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[out_type]
-        )
+    plc_column = plc.strings.convert.convert_floats.to_floats(
+        input_col.to_pylibcudf(mode="read"),
+        dtype_to_pylibcudf_type(out_type)
     )
-    cdef data_type c_out_type = data_type(tid)
-    with nogil:
-        c_result = move(
-            cpp_to_floats(
-                input_column_view,
-                c_out_type))
-
-    return Column.from_unique_ptr(move(c_result))
+    return Column.from_pylibcudf(plc_column)
 
 
 def dtos(Column input_col):
diff --git a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
index 7965b588703..5da6e3f10cc 100644
--- a/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
+++ b/python/cudf/cudf/_lib/strings/convert/convert_floats.pyx
@@ -1,18 +1,11 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
 from cudf.core.buffer import acquire_spill_lock
 
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.strings.convert.convert_floats cimport (
-    is_float as cpp_is_float,
-)
-
 from cudf._lib.column cimport Column
 
+import pylibcudf as plc
+
 
 @acquire_spill_lock()
 def is_float(Column source_strings):
@@ -20,12 +13,7 @@ def is_float(Column source_strings):
     Returns a Column of boolean values with True for `source_strings`
     that have floats.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_is_float(
-            source_view
-        ))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.convert.convert_floats.is_float(
+        source_strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
index f4fc4674506..a45c7f9979e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings/convert/convert_floats.pxd
@@ -9,12 +9,12 @@ from pylibcudf.libcudf.types cimport data_type
 cdef extern from "cudf/strings/convert/convert_floats.hpp" namespace \
         "cudf::strings" nogil:
     cdef unique_ptr[column] to_floats(
-        column_view input_col,
+        column_view strings,
         data_type output_type) except +
 
     cdef unique_ptr[column] from_floats(
-        column_view input_col) except +
+        column_view floats) except +
 
     cdef unique_ptr[column] is_float(
-        column_view source_strings
+        column_view input
     ) except +
diff --git a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
index 41aeb72039b..7b228c06a18 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/convert/CMakeLists.txt
@@ -13,7 +13,7 @@
 # =============================================================================
 
 set(cython_sources convert_booleans.pyx convert_datetime.pyx convert_durations.pyx
-                   convert_fixed_point.pyx convert_ipv4.pyx convert_urls.pyx
+                   convert_fixed_point.pyx convert_floats.pyx convert_ipv4.pyx convert_urls.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
index b4b0b521e39..be6145384ad 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.pxd
@@ -4,6 +4,7 @@ from . cimport (
     convert_datetime,
     convert_durations,
     convert_fixed_point,
+    convert_floats,
     convert_ipv4,
     convert_urls,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/__init__.py b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
index 409620fce45..7c94387282b 100644
--- a/python/pylibcudf/pylibcudf/strings/convert/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/convert/__init__.py
@@ -4,6 +4,7 @@
     convert_datetime,
     convert_durations,
     convert_fixed_point,
+    convert_floats,
     convert_ipv4,
     convert_urls,
 )
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd
new file mode 100644
index 00000000000..1284ff552aa
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pxd
@@ -0,0 +1,11 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.types cimport DataType
+
+
+cpdef Column to_floats(Column strings, DataType output_type)
+
+cpdef Column from_floats(Column floats)
+
+cpdef Column is_float(Column input)
diff --git a/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
new file mode 100644
index 00000000000..8081aadb085
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/convert/convert_floats.pyx
@@ -0,0 +1,101 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings.convert cimport (
+    convert_floats as cpp_convert_floats,
+)
+from pylibcudf.types cimport DataType
+
+
+cpdef Column to_floats(Column strings, DataType output_type):
+    """
+    Returns a new numeric column by parsing float values from each string
+    in the provided strings column.
+
+    For details, see cpp:func:`cudf::strings::to_floats`
+
+    Parameters
+    ----------
+    strings : Column
+        Strings instance for this operation.
+
+    output_type : DataType
+        Type of float numeric column to return.
+
+    Returns
+    -------
+    Column
+        New column with floats converted from strings.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_floats.to_floats(
+                strings.view(),
+                output_type.c_obj,
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column from_floats(Column floats):
+    """
+    Returns a new strings column converting the float values from the
+    provided column into strings.
+
+    For details, see cpp:func:`cudf::strings::from_floats`
+
+    Parameters
+    ----------
+    floats : Column
+        Numeric column to convert.
+
+    Returns
+    -------
+    Column
+        New strings column with floats as strings.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_floats.from_floats(
+                floats.view(),
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column is_float(Column input):
+    """
+    Returns a boolean column identifying strings in which all
+    characters are valid for conversion to floats.
+
+    For details, see cpp:func:`cudf::strings::is_float`
+
+    Parameters
+    ----------
+    input : Column
+        Strings instance for this operation.
+
+    Returns
+    -------
+    Column
+        New column of boolean results for each string.
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_convert_floats.is_float(
+                input.view(),
+            )
+        )
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py b/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py
new file mode 100644
index 00000000000..e9918fab559
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_convert_floats.py
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+from utils import assert_column_eq
+
+
+def test_to_floats():
+    typ = pa.float32()
+    arr = pa.array(["-1.23", "1", None])
+    result = plc.strings.convert.convert_floats.to_floats(
+        plc.interop.from_arrow(arr), plc.interop.from_arrow(typ)
+    )
+    expected = arr.cast(typ)
+    assert_column_eq(result, expected)
+
+
+def test_from_floats():
+    arr = pa.array([-1.23, 1, None])
+    result = plc.strings.convert.convert_floats.from_floats(
+        plc.interop.from_arrow(arr),
+    )
+    expected = pa.array(["-1.23", "1.0", None])
+    assert_column_eq(result, expected)
+
+
+def test_is_float():
+    arr = pa.array(["-1.23", "1", "1.2.3", "A", None])
+    result = plc.strings.convert.convert_floats.is_float(
+        plc.interop.from_arrow(arr),
+    )
+    expected = pa.array([True, True, False, False, None])
+    assert_column_eq(result, expected)

From 31423d056c45bd6352f0c611ed5e63423b09b954 Mon Sep 17 00:00:00 2001
From: Matthew Murray <41342305+Matt711@users.noreply.github.com>
Date: Wed, 9 Oct 2024 21:01:30 -0400
Subject: [PATCH 33/33] Update all rmm imports to use pylibrmm/librmm (#16913)

This PR updates all the RMM imports to use pylibrmm/librmm now that `rmm._lib` is deprecated . It should be merged after [rmm/1676](https://github.com/rapidsai/rmm/pull/1676).

Authors:
  - Matthew Murray (https://github.com/Matt711)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: https://github.com/rapidsai/cudf/pull/16913
---
 docs/cudf/source/conf.py                                   | 5 ++++-
 python/cudf/cudf/_lib/column.pxd                           | 2 +-
 python/cudf/cudf/_lib/column.pyx                           | 2 +-
 python/cudf/cudf/_lib/copying.pyx                          | 2 +-
 python/cudf/cudf/_lib/scalar.pxd                           | 2 +-
 python/cudf/cudf/_lib/strings_udf.pyx                      | 3 ++-
 python/cudf/cudf/core/buffer/buffer.py                     | 2 +-
 python/cudf/cudf/core/buffer/spillable_buffer.py           | 4 ++--
 python/cudf/cudf/core/udf/strings_typing.py                | 2 +-
 python/pylibcudf/pylibcudf/column.pyx                      | 2 +-
 python/pylibcudf/pylibcudf/join.pyx                        | 2 +-
 python/pylibcudf/pylibcudf/libcudf/column/column.pxd       | 2 +-
 .../pylibcudf/libcudf/column/column_factories.pxd          | 2 +-
 python/pylibcudf/pylibcudf/libcudf/concatenate.pxd         | 2 +-
 python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd    | 2 +-
 python/pylibcudf/pylibcudf/libcudf/copying.pxd             | 2 +-
 python/pylibcudf/pylibcudf/libcudf/join.pxd                | 2 +-
 python/pylibcudf/pylibcudf/libcudf/null_mask.pxd           | 2 +-
 python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd         | 2 +-
 python/pylibcudf/pylibcudf/libcudf/transform.pxd           | 2 +-
 python/pylibcudf/pylibcudf/null_mask.pxd                   | 2 +-
 python/pylibcudf/pylibcudf/null_mask.pyx                   | 7 ++++---
 python/pylibcudf/pylibcudf/scalar.pxd                      | 2 +-
 python/pylibcudf/pylibcudf/scalar.pyx                      | 2 +-
 python/pylibcudf/pylibcudf/transform.pyx                   | 3 ++-
 25 files changed, 34 insertions(+), 28 deletions(-)

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index 95813907bf4..ecf619ddc44 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -342,7 +342,10 @@ def clean_all_xml_files(path):
     "cudf.Series": ("cudf.core.series.Series", "cudf.Series"),
     "cudf.Index": ("cudf.core.index.Index", "cudf.Index"),
     "cupy.core.core.ndarray": ("cupy.ndarray", "cupy.ndarray"),
-    "DeviceBuffer": ("rmm._lib.device_buffer.DeviceBuffer", "rmm.DeviceBuffer"),
+    # TODO: Replace the first entry in a follow-up with rmm.pylibrmm.device_buffer.DeviceBuffer
+    # when the RMM objects inventory is generated from branch-24.12. The RMM objects inventory
+    # can be accessed here : https://docs.rapids.ai/api/rmm/nightly/objects.inv
+    "DeviceBuffer": ("rmm.DeviceBuffer", "rmm.DeviceBuffer"),
 }
 
 
diff --git a/python/cudf/cudf/_lib/column.pxd b/python/cudf/cudf/_lib/column.pxd
index 8ceea4920e2..8b1d16f0d85 100644
--- a/python/cudf/cudf/_lib/column.pxd
+++ b/python/cudf/cudf/_lib/column.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.column.column_view cimport (
     mutable_column_view,
 )
 from pylibcudf.libcudf.types cimport size_type
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef class Column:
diff --git a/python/cudf/cudf/_lib/column.pyx b/python/cudf/cudf/_lib/column.pyx
index 99e4c21df8a..065655505b8 100644
--- a/python/cudf/cudf/_lib/column.pyx
+++ b/python/cudf/cudf/_lib/column.pyx
@@ -28,7 +28,7 @@ from libcpp.memory cimport make_unique, unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-from rmm._lib.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 from cudf._lib.types cimport (
     dtype_from_column_view,
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 49714091f46..30353c4be6c 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -8,7 +8,7 @@ from libcpp.memory cimport make_shared, shared_ptr, unique_ptr
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-from rmm._lib.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 import pylibcudf
 
diff --git a/python/cudf/cudf/_lib/scalar.pxd b/python/cudf/cudf/_lib/scalar.pxd
index 27095ca02d4..0f9820ed1db 100644
--- a/python/cudf/cudf/_lib/scalar.pxd
+++ b/python/cudf/cudf/_lib/scalar.pxd
@@ -4,7 +4,7 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 
 from pylibcudf.libcudf.scalar.scalar cimport scalar
-from rmm._lib.memory_resource cimport DeviceMemoryResource
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 
 cdef class DeviceScalar:
diff --git a/python/cudf/cudf/_lib/strings_udf.pyx b/python/cudf/cudf/_lib/strings_udf.pyx
index 78fc9f08bd8..dd2fafbe07f 100644
--- a/python/cudf/cudf/_lib/strings_udf.pyx
+++ b/python/cudf/cudf/_lib/strings_udf.pyx
@@ -23,7 +23,8 @@ from pylibcudf.libcudf.strings_udf cimport (
     to_string_view_array as cpp_to_string_view_array,
     udf_string,
 )
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 from cudf._lib.column cimport Column
 
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
index 32ae8c5ee53..caff019f575 100644
--- a/python/cudf/cudf/core/buffer/buffer.py
+++ b/python/cudf/cudf/core/buffer/buffer.py
@@ -284,7 +284,7 @@ def memoryview(
         """Read-only access to the buffer through host memory."""
         size = self._size if size is None else size
         host_buf = host_memory_allocation(size)
-        rmm._lib.device_buffer.copy_ptr_to_host(
+        rmm.pylibrmm.device_buffer.copy_ptr_to_host(
             self.get_ptr(mode="read") + offset, host_buf
         )
         return memoryview(host_buf).toreadonly()
diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
index 4c9e524ee05..b40c56c9a6b 100644
--- a/python/cudf/cudf/core/buffer/spillable_buffer.py
+++ b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -207,7 +207,7 @@ def spill(self, target: str = "cpu") -> None:
                     domain="cudf_python-spill",
                 ):
                     host_mem = host_memory_allocation(self.size)
-                    rmm._lib.device_buffer.copy_ptr_to_host(
+                    rmm.pylibrmm.device_buffer.copy_ptr_to_host(
                         self._ptr, host_mem
                     )
                 self._ptr_desc["memoryview"] = host_mem
@@ -352,7 +352,7 @@ def memoryview(
             else:
                 assert self._ptr_desc["type"] == "gpu"
                 ret = host_memory_allocation(size)
-                rmm._lib.device_buffer.copy_ptr_to_host(
+                rmm.pylibrmm.device_buffer.copy_ptr_to_host(
                     self._ptr + offset, ret
                 )
                 return ret
diff --git a/python/cudf/cudf/core/udf/strings_typing.py b/python/cudf/cudf/core/udf/strings_typing.py
index 43604ab21a7..a0cbe7ada19 100644
--- a/python/cudf/cudf/core/udf/strings_typing.py
+++ b/python/cudf/cudf/core/udf/strings_typing.py
@@ -99,7 +99,7 @@ def prepare_args(self, ty, val, **kwargs):
             ty.dtype, (StringView, UDFString)
         ):
             return types.uint64, val.ptr if isinstance(
-                val, rmm._lib.device_buffer.DeviceBuffer
+                val, rmm.pylibrmm.device_buffer.DeviceBuffer
             ) else val.get_ptr(mode="read")
         else:
             return ty, val
diff --git a/python/pylibcudf/pylibcudf/column.pyx b/python/pylibcudf/pylibcudf/column.pyx
index a37a12fc7e1..03808f0b664 100644
--- a/python/pylibcudf/pylibcudf/column.pyx
+++ b/python/pylibcudf/pylibcudf/column.pyx
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column_factories cimport make_column_from_scalar
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport size_type
 
-from rmm._lib.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 from .gpumemoryview cimport gpumemoryview
 from .scalar cimport Scalar
diff --git a/python/pylibcudf/pylibcudf/join.pyx b/python/pylibcudf/pylibcudf/join.pyx
index 25664286f19..b019ed8f099 100644
--- a/python/pylibcudf/pylibcudf/join.pyx
+++ b/python/pylibcudf/pylibcudf/join.pyx
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.types cimport null_equality
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 from .column cimport Column
 from .table cimport Table
diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
index 7a369701bbd..76f35cbba71 100644
--- a/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.column.column_view cimport (
 )
 from pylibcudf.libcudf.types cimport data_type, size_type
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/column/column.hpp" namespace "cudf" nogil:
diff --git a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
index f1a326bcd40..b2388858127 100644
--- a/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/column/column_factories.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.types cimport (
     type_id,
 )
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/column/column_factories.hpp" namespace "cudf" nogil:
diff --git a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
index 92f5a185a54..a09b6c01392 100644
--- a/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/concatenate.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column cimport column, column_view
 from pylibcudf.libcudf.table.table cimport table, table_view
 from pylibcudf.libcudf.utilities.host_span cimport host_span
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/concatenate.hpp" namespace "cudf" nogil:
diff --git a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
index cadac6a0022..6de9c4382d3 100644
--- a/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/contiguous_split.pxd
@@ -6,7 +6,7 @@ from libcpp.vector cimport vector
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/contiguous_split.hpp" namespace "cudf" nogil:
diff --git a/python/pylibcudf/pylibcudf/libcudf/copying.pxd b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
index 4d4a4ba9b89..e6e719d6436 100644
--- a/python/pylibcudf/pylibcudf/libcudf/copying.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/copying.pxd
@@ -16,7 +16,7 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport size_type
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 ctypedef const scalar constscalar
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/join.pxd b/python/pylibcudf/pylibcudf/libcudf/join.pxd
index 6f6c145b23c..21033a0284e 100644
--- a/python/pylibcudf/pylibcudf/libcudf/join.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/join.pxd
@@ -9,7 +9,7 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport null_equality, size_type
 
-from rmm._lib.device_uvector cimport device_uvector
+from rmm.librmm.device_uvector cimport device_uvector
 
 ctypedef unique_ptr[device_uvector[size_type]] gather_map_type
 ctypedef pair[gather_map_type, gather_map_type] gather_map_pair_type
diff --git a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
index 5f582091b06..27af4a3bdb1 100644
--- a/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/null_mask.pxd
@@ -6,7 +6,7 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport bitmask_type, mask_state, size_type
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/null_mask.hpp" namespace "cudf" nogil:
diff --git a/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd b/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd
index 0c8fe1060ac..2eca043e451 100644
--- a/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/strings_udf.pxd
@@ -8,7 +8,7 @@ from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.types cimport size_type
 
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/strings/udf/udf_string.hpp" namespace \
diff --git a/python/pylibcudf/pylibcudf/libcudf/transform.pxd b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
index 38298a7c1f1..d21510bd731 100644
--- a/python/pylibcudf/pylibcudf/libcudf/transform.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/transform.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport bitmask_type, data_type, size_type
 
-from rmm._lib.device_buffer cimport device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
 
 
 cdef extern from "cudf/transform.hpp" namespace "cudf" nogil:
diff --git a/python/pylibcudf/pylibcudf/null_mask.pxd b/python/pylibcudf/pylibcudf/null_mask.pxd
index ab5c0080312..9bdfaee2842 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pxd
+++ b/python/pylibcudf/pylibcudf/null_mask.pxd
@@ -2,7 +2,7 @@
 
 from pylibcudf.libcudf.types cimport mask_state, size_type
 
-from rmm._lib.device_buffer cimport DeviceBuffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 from .column cimport Column
 
diff --git a/python/pylibcudf/pylibcudf/null_mask.pyx b/python/pylibcudf/pylibcudf/null_mask.pyx
index 5bdde06f21f..aae39987dac 100644
--- a/python/pylibcudf/pylibcudf/null_mask.pyx
+++ b/python/pylibcudf/pylibcudf/null_mask.pyx
@@ -6,7 +6,8 @@ from libcpp.utility cimport move
 from pylibcudf.libcudf cimport null_mask as cpp_null_mask
 from pylibcudf.libcudf.types cimport mask_state, size_type
 
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 from pylibcudf.libcudf.types import mask_state as MaskState  # no-cython-lint
 
@@ -31,8 +32,8 @@ cpdef DeviceBuffer copy_bitmask(Column col):
     Returns
     -------
     rmm.DeviceBuffer
-        A ``DeviceBuffer`` containing ``col``'s bitmask, or an empty ``DeviceBuffer``
-        if ``col`` is not nullable
+        A ``DeviceBuffer`` containing ``col``'s bitmask, or an empty
+        ``DeviceBuffer`` if ``col`` is not nullable
     """
     cdef device_buffer db
 
diff --git a/python/pylibcudf/pylibcudf/scalar.pxd b/python/pylibcudf/pylibcudf/scalar.pxd
index 8664dfa4b7e..a273647c98d 100644
--- a/python/pylibcudf/pylibcudf/scalar.pxd
+++ b/python/pylibcudf/pylibcudf/scalar.pxd
@@ -4,7 +4,7 @@ from libcpp cimport bool
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 
-from rmm._lib.memory_resource cimport DeviceMemoryResource
+from rmm.pylibrmm.memory_resource cimport DeviceMemoryResource
 
 from .column cimport Column
 from .types cimport DataType
diff --git a/python/pylibcudf/pylibcudf/scalar.pyx b/python/pylibcudf/pylibcudf/scalar.pyx
index 3e20938af0c..d4888a62ad1 100644
--- a/python/pylibcudf/pylibcudf/scalar.pyx
+++ b/python/pylibcudf/pylibcudf/scalar.pyx
@@ -6,7 +6,7 @@ from libcpp.utility cimport move
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.scalar.scalar_factories cimport make_empty_scalar_like
 
-from rmm._lib.memory_resource cimport get_current_device_resource
+from rmm.pylibrmm.memory_resource cimport get_current_device_resource
 
 from .column cimport Column
 from .types cimport DataType
diff --git a/python/pylibcudf/pylibcudf/transform.pyx b/python/pylibcudf/pylibcudf/transform.pyx
index de425a27c15..74134caeb78 100644
--- a/python/pylibcudf/pylibcudf/transform.pyx
+++ b/python/pylibcudf/pylibcudf/transform.pyx
@@ -9,7 +9,8 @@ from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 from pylibcudf.libcudf.types cimport bitmask_type, size_type
 
-from rmm._lib.device_buffer cimport DeviceBuffer, device_buffer
+from rmm.librmm.device_buffer cimport device_buffer
+from rmm.pylibrmm.device_buffer cimport DeviceBuffer
 
 from .column cimport Column
 from .gpumemoryview cimport gpumemoryview