diff --git a/ci/cudf_pandas_scripts/third-party-integration/test.sh b/ci/cudf_pandas_scripts/third-party-integration/test.sh
index f8ddbaba0f3..30e3ffc9a43 100755
--- a/ci/cudf_pandas_scripts/third-party-integration/test.sh
+++ b/ci/cudf_pandas_scripts/third-party-integration/test.sh
@@ -26,6 +26,8 @@ main() {
     LIBS=${LIBS#[}
     LIBS=${LIBS%]}
 
+    ANY_FAILURES=0
+
     for lib in ${LIBS//,/ }; do
         lib=$(echo "$lib" | tr -d '""')
         echo "Running tests for library $lib"
@@ -56,10 +58,6 @@ main() {
         rapids-logger "Check GPU usage"
         nvidia-smi
 
-        EXITCODE=0
-        trap "EXITCODE=1" ERR
-        set +e
-
         rapids-logger "pytest ${lib}"
 
         NUM_PROCESSES=8
@@ -72,12 +70,20 @@ main() {
             fi
         done
 
+        EXITCODE=0
+        trap "EXITCODE=1" ERR
+        set +e
+
         TEST_DIR=${TEST_DIR} NUM_PROCESSES=${NUM_PROCESSES} ci/cudf_pandas_scripts/third-party-integration/run-library-tests.sh ${lib}
 
+        set -e
         rapids-logger "Test script exiting with value: ${EXITCODE}"
+        if [[ ${EXITCODE} != 0 ]]; then
+            ANY_FAILURES=1
+        fi
     done
 
-    exit ${EXITCODE}
+    exit ${ANY_FAILURES}
 }
 
 main "$@"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2f17b57b0a4..78f529a44d3 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -1105,7 +1105,7 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
       ${_tgt} PRIVATE "$<BUILD_INTERFACE:$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>>"
     )
     target_include_directories(${_tgt} PRIVATE "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/include>")
-    target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm)
+    target_link_libraries(${_tgt} PUBLIC CUDA::cudart rmm::rmm rmm::rmm_logger rmm::rmm_logger_impl)
     if(CUDF_BUILD_STACKTRACE_DEBUG)
       target_link_libraries(${_tgt} PRIVATE cudf_backtrace)
     endif()
diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 8e5ea900efa..749e1b628ee 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -140,8 +140,9 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
 endfunction()
 
 # ##################################################################################################
-# * column benchmarks -----------------------------------------------------------------------------
-ConfigureBench(COLUMN_CONCAT_BENCH column/concatenate.cpp)
+# * copying benchmarks
+# -----------------------------------------------------------------------------
+ConfigureNVBench(COPYING_NVBENCH copying/concatenate.cpp)
 
 # ##################################################################################################
 # * gather benchmark ------------------------------------------------------------------------------
@@ -351,11 +352,18 @@ ConfigureNVBench(BINARYOP_NVBENCH binaryop/binaryop.cpp binaryop/compiled_binary
 
 # ##################################################################################################
 # * nvtext benchmark -------------------------------------------------------------------
-ConfigureBench(TEXT_BENCH text/subword.cpp)
-
 ConfigureNVBench(
-  TEXT_NVBENCH text/edit_distance.cpp text/hash_ngrams.cpp text/jaccard.cpp text/minhash.cpp
-  text/ngrams.cpp text/normalize.cpp text/replace.cpp text/tokenize.cpp text/vocab.cpp
+  TEXT_NVBENCH
+  text/edit_distance.cpp
+  text/hash_ngrams.cpp
+  text/jaccard.cpp
+  text/minhash.cpp
+  text/ngrams.cpp
+  text/normalize.cpp
+  text/replace.cpp
+  text/subword.cpp
+  text/tokenize.cpp
+  text/vocab.cpp
 )
 
 # ##################################################################################################
diff --git a/cpp/benchmarks/column/concatenate.cpp b/cpp/benchmarks/column/concatenate.cpp
deleted file mode 100644
index 51106c72137..00000000000
--- a/cpp/benchmarks/column/concatenate.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#include <benchmarks/common/generate_input.hpp>
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/fixture/templated_benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-
-#include <cudf_test/column_wrapper.hpp>
-
-#include <cudf/concatenate.hpp>
-#include <cudf/table/table.hpp>
-#include <cudf/utilities/default_stream.hpp>
-
-#include <thrust/iterator/constant_iterator.h>
-#include <thrust/iterator/counting_iterator.h>
-#include <thrust/iterator/transform_iterator.h>
-
-#include <algorithm>
-#include <vector>
-
-class Concatenate : public cudf::benchmark {};
-
-template <typename T, bool Nullable>
-static void BM_concatenate(benchmark::State& state)
-{
-  cudf::size_type const num_rows = state.range(0);
-  cudf::size_type const num_cols = state.range(1);
-
-  auto input         = create_sequence_table(cycle_dtypes({cudf::type_to_id<T>()}, num_cols),
-                                     row_count{num_rows},
-                                     Nullable ? std::optional<double>{2.0 / 3.0} : std::nullopt);
-  auto input_columns = input->view();
-  std::vector<cudf::column_view> column_views(input_columns.begin(), input_columns.end());
-
-  CUDF_CHECK_CUDA(0);
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    auto result = cudf::concatenate(column_views);
-  }
-
-  state.SetBytesProcessed(state.iterations() * num_cols * num_rows * sizeof(T));
-}
-
-#define CONCAT_BENCHMARK_DEFINE(type, nullable)                             \
-  BENCHMARK_DEFINE_F(Concatenate, BM_concatenate##_##nullable_##nullable)   \
-  (::benchmark::State & st) { BM_concatenate<type, nullable>(st); }         \
-  BENCHMARK_REGISTER_F(Concatenate, BM_concatenate##_##nullable_##nullable) \
-    ->RangeMultiplier(8)                                                    \
-    ->Ranges({{1 << 6, 1 << 18}, {2, 1024}})                                \
-    ->Unit(benchmark::kMillisecond)                                         \
-    ->UseManualTime();
-
-CONCAT_BENCHMARK_DEFINE(int64_t, false)
-CONCAT_BENCHMARK_DEFINE(int64_t, true)
-
-template <typename T, bool Nullable>
-static void BM_concatenate_tables(benchmark::State& state)
-{
-  cudf::size_type const num_rows   = state.range(0);
-  cudf::size_type const num_cols   = state.range(1);
-  cudf::size_type const num_tables = state.range(2);
-
-  std::vector<std::unique_ptr<cudf::table>> tables(num_tables);
-  std::generate_n(tables.begin(), num_tables, [&]() {
-    return create_sequence_table(cycle_dtypes({cudf::type_to_id<T>()}, num_cols),
-                                 row_count{num_rows},
-                                 Nullable ? std::optional<double>{2.0 / 3.0} : std::nullopt);
-  });
-
-  // Generate table views
-  std::vector<cudf::table_view> table_views(num_tables);
-  std::transform(tables.begin(), tables.end(), table_views.begin(), [](auto& table) mutable {
-    return table->view();
-  });
-
-  CUDF_CHECK_CUDA(0);
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    auto result = cudf::concatenate(table_views);
-  }
-
-  state.SetBytesProcessed(state.iterations() * num_cols * num_rows * num_tables * sizeof(T));
-}
-
-#define CONCAT_TABLES_BENCHMARK_DEFINE(type, nullable)                             \
-  BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable)   \
-  (::benchmark::State & st) { BM_concatenate_tables<type, nullable>(st); }         \
-  BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_tables##_##nullable_##nullable) \
-    ->RangeMultiplier(8)                                                           \
-    ->Ranges({{1 << 8, 1 << 12}, {2, 32}, {2, 128}})                               \
-    ->Unit(benchmark::kMillisecond)                                                \
-    ->UseManualTime();
-
-CONCAT_TABLES_BENCHMARK_DEFINE(int64_t, false)
-CONCAT_TABLES_BENCHMARK_DEFINE(int64_t, true)
-
-class ConcatenateStrings : public cudf::benchmark {};
-
-template <bool Nullable>
-static void BM_concatenate_strings(benchmark::State& state)
-{
-  using column_wrapper = cudf::test::strings_column_wrapper;
-
-  auto const num_rows  = state.range(0);
-  auto const num_chars = state.range(1);
-  auto const num_cols  = state.range(2);
-
-  std::string str(num_chars, 'a');
-
-  // Create owning columns
-  std::vector<column_wrapper> columns;
-  columns.reserve(num_cols);
-  std::generate_n(std::back_inserter(columns), num_cols, [num_rows, c_str = str.c_str()]() {
-    auto iter = thrust::make_constant_iterator(c_str);
-    if (Nullable) {
-      auto count_it = thrust::make_counting_iterator(0);
-      auto valid_iter =
-        thrust::make_transform_iterator(count_it, [](auto i) { return i % 3 == 0; });
-      return column_wrapper(iter, iter + num_rows, valid_iter);
-    } else {
-      return column_wrapper(iter, iter + num_rows);
-    }
-  });
-
-  // Generate column views
-  std::vector<cudf::column_view> column_views;
-  column_views.reserve(columns.size());
-  std::transform(
-    columns.begin(), columns.end(), std::back_inserter(column_views), [](auto const& col) {
-      return static_cast<cudf::column_view>(col);
-    });
-
-  CUDF_CHECK_CUDA(0);
-
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true, cudf::get_default_stream());
-    auto result = cudf::concatenate(column_views);
-  }
-
-  state.SetBytesProcessed(state.iterations() * num_cols * num_rows *
-                          (sizeof(int32_t) + num_chars));  // offset + chars
-}
-
-#define CONCAT_STRINGS_BENCHMARK_DEFINE(nullable)                                   \
-  BENCHMARK_DEFINE_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable)   \
-  (::benchmark::State & st) { BM_concatenate_strings<nullable>(st); }               \
-  BENCHMARK_REGISTER_F(Concatenate, BM_concatenate_strings##_##nullable_##nullable) \
-    ->RangeMultiplier(8)                                                            \
-    ->Ranges({{1 << 8, 1 << 14}, {8, 128}, {2, 256}})                               \
-    ->Unit(benchmark::kMillisecond)                                                 \
-    ->UseManualTime();
-
-CONCAT_STRINGS_BENCHMARK_DEFINE(false)
-CONCAT_STRINGS_BENCHMARK_DEFINE(true)
diff --git a/cpp/benchmarks/copying/concatenate.cpp b/cpp/benchmarks/copying/concatenate.cpp
new file mode 100644
index 00000000000..586b479d0ad
--- /dev/null
+++ b/cpp/benchmarks/copying/concatenate.cpp
@@ -0,0 +1,84 @@
+/*
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#include <benchmarks/common/generate_input.hpp>
+
+#include <cudf/column/column_view.hpp>
+#include <cudf/concatenate.hpp>
+#include <cudf/strings/strings_column_view.hpp>
+#include <cudf/utilities/default_stream.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <vector>
+
+static void bench_concatenate(nvbench::state& state)
+{
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_cols = static_cast<cudf::size_type>(state.get_int64("num_cols"));
+  auto const nulls    = static_cast<cudf::size_type>(state.get_float64("nulls"));
+
+  auto input = create_sequence_table(
+    cycle_dtypes({cudf::type_to_id<int64_t>()}, num_cols), row_count{num_rows}, nulls);
+  auto input_columns = input->view();
+  auto column_views  = std::vector<cudf::column_view>(input_columns.begin(), input_columns.end());
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  state.add_global_memory_reads<int64_t>(num_rows * num_cols);
+  state.add_global_memory_writes<int64_t>(num_rows * num_cols);
+
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { auto result = cudf::concatenate(column_views); });
+}
+
+NVBENCH_BENCH(bench_concatenate)
+  .set_name("concatenate")
+  .add_int64_axis("num_rows", {64, 512, 4096, 32768, 262144})
+  .add_int64_axis("num_cols", {2, 8, 64, 512, 1024})
+  .add_float64_axis("nulls", {0.0, 0.3});
+
+static void bench_concatenate_strings(nvbench::state& state)
+{
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_cols  = static_cast<cudf::size_type>(state.get_int64("num_cols"));
+  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const nulls     = static_cast<cudf::size_type>(state.get_float64("nulls"));
+
+  data_profile const profile =
+    data_profile_builder()
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
+      .null_probability(nulls);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
+  auto const input  = column->view();
+
+  auto column_views = std::vector<cudf::column_view>(num_cols, input);
+
+  auto stream = cudf::get_default_stream();
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
+  auto const sv = cudf::strings_column_view(input);
+  state.add_global_memory_reads<int8_t>(sv.chars_size(stream) * num_cols);
+  state.add_global_memory_writes<int64_t>(sv.chars_size(stream) * num_cols);
+
+  state.exec(nvbench::exec_tag::sync,
+             [&](nvbench::launch&) { auto result = cudf::concatenate(column_views); });
+}
+
+NVBENCH_BENCH(bench_concatenate_strings)
+  .set_name("concatenate_strings")
+  .add_int64_axis("num_rows", {256, 512, 4096, 16384})
+  .add_int64_axis("num_cols", {2, 8, 64, 256})
+  .add_int64_axis("row_width", {32, 128})
+  .add_float64_axis("nulls", {0.0, 0.3});
diff --git a/cpp/benchmarks/string/case.cpp b/cpp/benchmarks/string/case.cpp
index cd4d3ca964b..9750475a079 100644
--- a/cpp/benchmarks/string/case.cpp
+++ b/cpp/benchmarks/string/case.cpp
@@ -24,18 +24,14 @@
 
 void bench_case(nvbench::state& state)
 {
-  auto const n_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const max_width = static_cast<int32_t>(state.get_int64("row_width"));
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const encoding  = state.get_string("encoding");
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(max_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, max_width);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
 
   auto col_view = column->view();
 
@@ -74,6 +70,7 @@ void bench_case(nvbench::state& state)
 
 NVBENCH_BENCH(bench_case)
   .set_name("case")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("encoding", {"ascii", "utf8"});
diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp
index eec9a5f54d7..abc5254392e 100644
--- a/cpp/benchmarks/string/char_types.cpp
+++ b/cpp/benchmarks/string/char_types.cpp
@@ -25,16 +25,12 @@
 static void bench_char_types(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const api_type  = state.get_string("api");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -61,6 +57,7 @@ static void bench_char_types(nvbench::state& state)
 
 NVBENCH_BENCH(bench_char_types)
   .set_name("char_types")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("api", {"all", "filter"});
diff --git a/cpp/benchmarks/string/contains.cpp b/cpp/benchmarks/string/contains.cpp
index a73017dda18..e3940cbc0c7 100644
--- a/cpp/benchmarks/string/contains.cpp
+++ b/cpp/benchmarks/string/contains.cpp
@@ -29,17 +29,12 @@ std::string patterns[] = {"^\\d+ [a-z]+", "[A-Z ]+\\d+ +\\d+[A-Z]+\\d+$", "5W43"
 
 static void bench_contains(nvbench::state& state)
 {
-  auto const n_rows        = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width     = static_cast<cudf::size_type>(state.get_int64("row_width"));
   auto const pattern_index = static_cast<cudf::size_type>(state.get_int64("pattern"));
   auto const hit_rate      = static_cast<cudf::size_type>(state.get_int64("hit_rate"));
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
-  auto col   = create_string_column(n_rows, row_width, hit_rate);
+  auto col   = create_string_column(num_rows, row_width, hit_rate);
   auto input = cudf::strings_column_view(col->view());
 
   auto pattern = patterns[pattern_index];
@@ -56,7 +51,7 @@ static void bench_contains(nvbench::state& state)
 
 NVBENCH_BENCH(bench_contains)
   .set_name("contains")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("hit_rate", {50, 100})  // percentage
   .add_int64_axis("pattern", {0, 1, 2});
diff --git a/cpp/benchmarks/string/copy_if_else.cpp b/cpp/benchmarks/string/copy_if_else.cpp
index e06cca497c2..5a5743dfddf 100644
--- a/cpp/benchmarks/string/copy_if_else.cpp
+++ b/cpp/benchmarks/string/copy_if_else.cpp
@@ -25,15 +25,11 @@
 static void bench_copy(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const str_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const source_table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, str_profile);
   auto const target_table =
@@ -58,5 +54,6 @@ static void bench_copy(nvbench::state& state)
 
 NVBENCH_BENCH(bench_copy)
   .set_name("copy_if_else")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/copy_range.cpp b/cpp/benchmarks/string/copy_range.cpp
index af217a49195..7e7353a0e78 100644
--- a/cpp/benchmarks/string/copy_range.cpp
+++ b/cpp/benchmarks/string/copy_range.cpp
@@ -25,16 +25,12 @@
 static void bench_copy_range(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile =
     data_profile_builder()
-      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
       .no_validity();
   auto const source_tables = create_random_table(
     {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, table_profile);
@@ -56,5 +52,6 @@ static void bench_copy_range(nvbench::state& state)
 
 NVBENCH_BENCH(bench_copy_range)
   .set_name("copy_range")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/count.cpp b/cpp/benchmarks/string/count.cpp
index f964bc5d224..cf90e316f71 100644
--- a/cpp/benchmarks/string/count.cpp
+++ b/cpp/benchmarks/string/count.cpp
@@ -30,16 +30,12 @@ static std::string patterns[] = {"\\d+", "a"};
 static void bench_count(nvbench::state& state)
 {
   auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width     = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width     = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width     = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const pattern_index = static_cast<cudf::size_type>(state.get_int64("pattern"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -61,6 +57,7 @@ static void bench_count(nvbench::state& state)
 
 NVBENCH_BENCH(bench_count)
   .set_name("count")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("pattern", {0, 1});
diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp
index af4fedb5799..d6866598ff4 100644
--- a/cpp/benchmarks/string/extract.cpp
+++ b/cpp/benchmarks/string/extract.cpp
@@ -32,11 +32,6 @@ static void bench_extract(nvbench::state& state)
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   auto groups = static_cast<cudf::size_type>(state.get_int64("groups"));
 
   std::default_random_engine generator;
@@ -79,6 +74,6 @@ static void bench_extract(nvbench::state& state)
 
 NVBENCH_BENCH(bench_extract)
   .set_name("extract")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("groups", {1, 2, 4});
diff --git a/cpp/benchmarks/string/join_strings.cpp b/cpp/benchmarks/string/join_strings.cpp
index 6dcf731ad3c..27652193b7b 100644
--- a/cpp/benchmarks/string/join_strings.cpp
+++ b/cpp/benchmarks/string/join_strings.cpp
@@ -25,15 +25,11 @@
 static void bench_join(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -54,5 +50,6 @@ static void bench_join(nvbench::state& state)
 
 NVBENCH_BENCH(bench_join)
   .set_name("strings_join")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/lengths.cpp b/cpp/benchmarks/string/lengths.cpp
index a19060ead3b..8156e19412b 100644
--- a/cpp/benchmarks/string/lengths.cpp
+++ b/cpp/benchmarks/string/lengths.cpp
@@ -25,15 +25,11 @@
 static void bench_lengths(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -51,5 +47,6 @@ static void bench_lengths(nvbench::state& state)
 
 NVBENCH_BENCH(bench_lengths)
   .set_name("lengths")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048, 4096})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/like.cpp b/cpp/benchmarks/string/like.cpp
index 105ae65cbe8..f6410aaef30 100644
--- a/cpp/benchmarks/string/like.cpp
+++ b/cpp/benchmarks/string/like.cpp
@@ -30,11 +30,6 @@ static void bench_like(nvbench::state& state)
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
   auto const hit_rate  = static_cast<int32_t>(state.get_int64("hit_rate"));
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   auto col   = create_string_column(n_rows, row_width, hit_rate);
   auto input = cudf::strings_column_view(col->view());
 
@@ -54,6 +49,6 @@ static void bench_like(nvbench::state& state)
 
 NVBENCH_BENCH(bench_like)
   .set_name("strings_like")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_int64_axis("hit_rate", {10, 25, 70, 100});
diff --git a/cpp/benchmarks/string/replace_re.cpp b/cpp/benchmarks/string/replace_re.cpp
index 4dcf1314f83..69426a2d484 100644
--- a/cpp/benchmarks/string/replace_re.cpp
+++ b/cpp/benchmarks/string/replace_re.cpp
@@ -26,18 +26,14 @@
 
 static void bench_replace(nvbench::state& state)
 {
-  auto const n_rows    = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const rtype     = state.get_string("type");
 
-  if (static_cast<std::size_t>(n_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
-  auto const column = create_random_column(cudf::type_id::STRING, row_count{n_rows}, profile);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
+  auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
 
   auto program = cudf::strings::regex_program::create("(\\d+)");
@@ -62,6 +58,7 @@ static void bench_replace(nvbench::state& state)
 
 NVBENCH_BENCH(bench_replace)
   .set_name("replace_re")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512})
-  .add_int64_axis("num_rows", {32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"replace", "backref"});
diff --git a/cpp/benchmarks/string/reverse.cpp b/cpp/benchmarks/string/reverse.cpp
index a2676609a40..e2e914cb350 100644
--- a/cpp/benchmarks/string/reverse.cpp
+++ b/cpp/benchmarks/string/reverse.cpp
@@ -25,15 +25,11 @@
 static void bench_reverse(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const table_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, table_profile);
   cudf::strings_column_view input(table->view().column(0));
@@ -51,5 +47,6 @@ static void bench_reverse(nvbench::state& state)
 
 NVBENCH_BENCH(bench_reverse)
   .set_name("reverse")
-  .add_int64_axis("row_width", {8, 16, 32, 64, 128})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/slice.cpp b/cpp/benchmarks/string/slice.cpp
index 1898f0340b6..c828a8ed0b0 100644
--- a/cpp/benchmarks/string/slice.cpp
+++ b/cpp/benchmarks/string/slice.cpp
@@ -36,11 +36,6 @@ static void bench_slice(nvbench::state& state)
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
   auto const stype     = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
     cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
@@ -76,6 +71,6 @@ static void bench_slice(nvbench::state& state)
 
 NVBENCH_BENCH(bench_slice)
   .set_name("slice")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {262144, 2097152, 16777216})
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"position", "multi"});
diff --git a/cpp/benchmarks/string/split.cpp b/cpp/benchmarks/string/split.cpp
index 9ef58daf0fc..9c7c27c4f07 100644
--- a/cpp/benchmarks/string/split.cpp
+++ b/cpp/benchmarks/string/split.cpp
@@ -28,16 +28,12 @@
 static void bench_split(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const stype     = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
   cudf::string_scalar target("+");
@@ -66,6 +62,7 @@ static void bench_split(nvbench::state& state)
 
 NVBENCH_BENCH(bench_split)
   .set_name("split")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"split", "split_ws", "record", "record_ws"});
diff --git a/cpp/benchmarks/string/split_re.cpp b/cpp/benchmarks/string/split_re.cpp
index 1fdb6e67109..34a7aa96e84 100644
--- a/cpp/benchmarks/string/split_re.cpp
+++ b/cpp/benchmarks/string/split_re.cpp
@@ -28,17 +28,13 @@
 static void bench_split(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   auto prog = cudf::strings::regex_program::create("\\d+");
 
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
 
@@ -56,5 +52,6 @@ static void bench_split(nvbench::state& state)
 
 NVBENCH_BENCH(bench_split)
   .set_name("split_re")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024, 2048})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/string/string_bench_args.hpp b/cpp/benchmarks/string/string_bench_args.hpp
deleted file mode 100644
index a34026281e8..00000000000
--- a/cpp/benchmarks/string/string_bench_args.hpp
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2021-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include <cudf/types.hpp>
-
-#include <benchmark/benchmark.h>
-
-#include <limits>
-
-/**
- * @brief Generate row count and row length argument ranges for a string benchmark.
- *
- * Generates a series of row count and row length arguments for string benchmarks.
- * Combinations of row count and row length that would exceed the maximum string character
- * column data length are not generated.
- *
- * @param b           Benchmark to update with row count and row length arguments.
- * @param min_rows    Minimum row count argument to generate.
- * @param max_rows    Maximum row count argument to generate.
- * @param rows_mult   Row count multiplier to generate intermediate row count arguments.
- * @param min_rowlen  Minimum row length argument to generate.
- * @param max_rowlen  Maximum row length argument to generate.
- * @param rowlen_mult Row length multiplier to generate intermediate row length arguments.
- */
-inline void generate_string_bench_args(benchmark::internal::Benchmark* b,
-                                       int min_rows,
-                                       int max_rows,
-                                       int rows_mult,
-                                       int min_rowlen,
-                                       int max_rowlen,
-                                       int rowlen_mult)
-{
-  for (int row_count = min_rows; row_count <= max_rows; row_count *= rows_mult) {
-    for (int rowlen = min_rowlen; rowlen <= max_rowlen; rowlen *= rowlen_mult) {
-      // avoid generating combinations that exceed the cudf column limit
-      size_t total_chars = static_cast<size_t>(row_count) * rowlen;
-      if (total_chars < static_cast<size_t>(std::numeric_limits<cudf::size_type>::max())) {
-        b->Args({row_count, rowlen});
-      }
-    }
-  }
-}
diff --git a/cpp/benchmarks/text/edit_distance.cpp b/cpp/benchmarks/text/edit_distance.cpp
index 6ffa90edb8f..0ad1ae30f8c 100644
--- a/cpp/benchmarks/text/edit_distance.cpp
+++ b/cpp/benchmarks/text/edit_distance.cpp
@@ -27,15 +27,11 @@
 static void bench_edit_distance(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
-
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
   data_profile const strings_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const strings_table = create_random_table(
     {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
   cudf::strings_column_view input1(strings_table->view().column(0));
@@ -55,5 +51,6 @@ static void bench_edit_distance(nvbench::state& state)
 
 NVBENCH_BENCH(bench_edit_distance)
   .set_name("edit_distance")
-  .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144})
-  .add_int64_axis("row_width", {8, 16, 32, 64, 128, 256});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144});
diff --git a/cpp/benchmarks/text/hash_ngrams.cpp b/cpp/benchmarks/text/hash_ngrams.cpp
index 4e5daf83a3c..7577cf00c0f 100644
--- a/cpp/benchmarks/text/hash_ngrams.cpp
+++ b/cpp/benchmarks/text/hash_ngrams.cpp
@@ -27,16 +27,12 @@
 static void bench_hash_ngrams(nvbench::state& state)
 {
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const ngrams    = static_cast<cudf::size_type>(state.get_int64("ngrams"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const strings_profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const strings_table =
     create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
   cudf::strings_column_view input(strings_table->view().column(0));
@@ -55,6 +51,7 @@ static void bench_hash_ngrams(nvbench::state& state)
 
 NVBENCH_BENCH(bench_hash_ngrams)
   .set_name("hash_ngrams")
-  .add_int64_axis("num_rows", {1024, 4096, 8192, 16364, 32768, 262144})
-  .add_int64_axis("row_width", {128, 512, 2048})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {128, 512, 2048})
+  .add_int64_axis("num_rows", {16384, 32768, 262144})
   .add_int64_axis("ngrams", {5, 10});
diff --git a/cpp/benchmarks/text/jaccard.cpp b/cpp/benchmarks/text/jaccard.cpp
index d5b74da6773..5506501138b 100644
--- a/cpp/benchmarks/text/jaccard.cpp
+++ b/cpp/benchmarks/text/jaccard.cpp
@@ -28,17 +28,13 @@
 static void bench_jaccard(nvbench::state& state)
 {
   auto const num_rows        = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width       = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width       = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width       = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const substring_width = static_cast<cudf::size_type>(state.get_int64("substring_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const strings_profile =
     data_profile_builder()
-      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
       .no_validity();
   auto const input_table = create_random_table(
     {cudf::type_id::STRING, cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
@@ -59,6 +55,7 @@ static void bench_jaccard(nvbench::state& state)
 
 NVBENCH_BENCH(bench_jaccard)
   .set_name("jaccard")
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {128, 512, 1024, 2048})
   .add_int64_axis("num_rows", {32768, 131072, 262144})
-  .add_int64_axis("row_width", {128, 512, 1024, 2048})
   .add_int64_axis("substring_width", {5, 10});
diff --git a/cpp/benchmarks/text/normalize.cpp b/cpp/benchmarks/text/normalize.cpp
index 71bccd80d39..594dc0de28a 100644
--- a/cpp/benchmarks/text/normalize.cpp
+++ b/cpp/benchmarks/text/normalize.cpp
@@ -28,16 +28,12 @@
 static void bench_normalize(nvbench::state& state)
 {
   auto const num_rows       = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width      = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width      = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width      = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const normalize_type = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile = data_profile_builder().distribution(
-    cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+    cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
 
@@ -60,6 +56,7 @@ static void bench_normalize(nvbench::state& state)
 
 NVBENCH_BENCH(bench_normalize)
   .set_name("normalize")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"spaces", "characters", "to_lower"});
diff --git a/cpp/benchmarks/text/replace.cpp b/cpp/benchmarks/text/replace.cpp
index 767ebab3eee..24ca4e5dfd7 100644
--- a/cpp/benchmarks/text/replace.cpp
+++ b/cpp/benchmarks/text/replace.cpp
@@ -31,11 +31,6 @@ static void bench_replace(nvbench::state& state)
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
   auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   std::vector<std::string> words{" ",        "one  ",    "two ",       "three ",     "four ",
                                  "five ",    "six  ",    "sevén  ",    "eight ",     "nine ",
                                  "ten   ",   "eleven ",  "twelve ",    "thirteen  ", "fourteen ",
@@ -71,5 +66,5 @@ static void bench_replace(nvbench::state& state)
 
 NVBENCH_BENCH(bench_replace)
   .set_name("replace")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216});
+  .add_int64_axis("row_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/text/subword.cpp b/cpp/benchmarks/text/subword.cpp
index dd8df695d3e..0b4e3bdefa5 100644
--- a/cpp/benchmarks/text/subword.cpp
+++ b/cpp/benchmarks/text/subword.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -14,9 +14,6 @@
  * limitations under the License.
  */
 
-#include <benchmarks/fixture/benchmark_fixture.hpp>
-#include <benchmarks/synchronization/synchronization.hpp>
-
 #include <cudf_test/column_wrapper.hpp>
 #include <cudf_test/file_utilities.hpp>
 
@@ -24,6 +21,8 @@
 
 #include <nvtext/subword_tokenize.hpp>
 
+#include <nvbench/nvbench.cuh>
+
 #include <filesystem>
 #include <fstream>
 #include <iostream>
@@ -54,40 +53,33 @@ static std::string create_hash_vocab_file()
   return hash_file;
 }
 
-static void BM_subword_tokenizer(benchmark::State& state)
+static void bench_subword_tokenizer(nvbench::state& state)
 {
-  auto const nrows = static_cast<cudf::size_type>(state.range(0));
-  std::vector<char const*> h_strings(nrows, "This is a test ");
+  auto const num_rows = static_cast<cudf::size_type>(state.get_int64("num_rows"));
+
+  std::vector<char const*> h_strings(num_rows, "This is a test ");
   cudf::test::strings_column_wrapper strings(h_strings.begin(), h_strings.end());
   static std::string hash_file = create_hash_vocab_file();
   std::vector<uint32_t> offsets{14};
-  uint32_t max_sequence_length = 64;
-  uint32_t stride              = 48;
-  uint32_t do_truncate         = 0;
-  uint32_t do_lower            = 1;
-  //
-  auto vocab = nvtext::load_vocabulary_file(hash_file);
-  for (auto _ : state) {
-    cuda_event_timer raii(state, true);
-    auto result = nvtext::subword_tokenize(cudf::strings_column_view{strings},
-                                           *vocab,
-                                           max_sequence_length,
-                                           stride,
-                                           do_lower,
-                                           do_truncate);
-  }
-}
+  uint32_t max_sequence = 64;
+  uint32_t stride       = 48;
+  uint32_t do_truncate  = 0;
+  uint32_t do_lower     = 1;
 
-class Subword : public cudf::benchmark {};
+  auto input = cudf::strings_column_view{strings};
 
-#define SUBWORD_BM_BENCHMARK_DEFINE(name)                                                        \
-  BENCHMARK_DEFINE_F(Subword, name)(::benchmark::State & state) { BM_subword_tokenizer(state); } \
-  BENCHMARK_REGISTER_F(Subword, name)                                                            \
-    ->RangeMultiplier(2)                                                                         \
-    ->Range(1 << 10, 1 << 17)                                                                    \
-    ->UseManualTime()                                                                            \
-    ->Unit(benchmark::kMillisecond);
+  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
+  auto chars_size = input.chars_size(cudf::get_default_stream());
+  state.add_global_memory_reads<nvbench::int8_t>(chars_size);
+  state.add_global_memory_writes<nvbench::int32_t>(num_rows * max_sequence);
 
-SUBWORD_BM_BENCHMARK_DEFINE(BM_subword_tokenizer);
+  auto vocab = nvtext::load_vocabulary_file(hash_file);
+  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
+    auto result =
+      nvtext::subword_tokenize(input, *vocab, max_sequence, stride, do_lower, do_truncate);
+  });
+}
 
-// BENCHMARK_MAIN();
+NVBENCH_BENCH(bench_subword_tokenizer)
+  .set_name("subword_tokenize")
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/text/tokenize.cpp b/cpp/benchmarks/text/tokenize.cpp
index e83310e0343..b9590c5539f 100644
--- a/cpp/benchmarks/text/tokenize.cpp
+++ b/cpp/benchmarks/text/tokenize.cpp
@@ -31,17 +31,13 @@
 static void bench_tokenize(nvbench::state& state)
 {
   auto const num_rows      = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width     = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width     = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width     = static_cast<cudf::size_type>(state.get_int64("max_width"));
   auto const tokenize_type = state.get_string("type");
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
   data_profile const profile =
     data_profile_builder()
-      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width)
+      .distribution(cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width)
       .no_validity();
   auto const column = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
   cudf::strings_column_view input(column->view());
@@ -82,6 +78,7 @@ static void bench_tokenize(nvbench::state& state)
 
 NVBENCH_BENCH(bench_tokenize)
   .set_name("tokenize")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {4096, 32768, 262144, 2097152, 16777216})
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152})
   .add_string_axis("type", {"whitespace", "multi", "count", "count_multi", "ngrams", "characters"});
diff --git a/cpp/benchmarks/text/vocab.cpp b/cpp/benchmarks/text/vocab.cpp
index 523d277df18..0502f375d99 100644
--- a/cpp/benchmarks/text/vocab.cpp
+++ b/cpp/benchmarks/text/vocab.cpp
@@ -33,16 +33,12 @@ static void bench_vocab_tokenize(nvbench::state& state)
 {
   auto const stream    = cudf::get_default_stream();
   auto const num_rows  = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width = static_cast<cudf::size_type>(state.get_int64("row_width"));
+  auto const min_width = static_cast<cudf::size_type>(state.get_int64("min_width"));
+  auto const max_width = static_cast<cudf::size_type>(state.get_int64("max_width"));
 
-  if (static_cast<std::size_t>(num_rows) * static_cast<std::size_t>(row_width) >=
-      static_cast<std::size_t>(std::numeric_limits<cudf::size_type>::max())) {
-    state.skip("Skip benchmarks greater than size_type limit");
-  }
-
-  auto const column = [num_rows, row_width] {
+  auto const column = [num_rows, min_width, max_width] {
     data_profile const profile = data_profile_builder().no_validity().distribution(
-      cudf::type_id::STRING, distribution_id::NORMAL, 0, row_width);
+      cudf::type_id::STRING, distribution_id::NORMAL, min_width, max_width);
     auto const col = create_random_column(cudf::type_id::STRING, row_count{num_rows}, profile);
     return cudf::strings::filter_characters_of_type(
       cudf::strings_column_view(col->view()),
@@ -85,5 +81,6 @@ static void bench_vocab_tokenize(nvbench::state& state)
 
 NVBENCH_BENCH(bench_vocab_tokenize)
   .set_name("vocab_tokenize")
-  .add_int64_axis("row_width", {32, 64, 128, 256, 512, 1024})
-  .add_int64_axis("num_rows", {262144, 524288, 1048576, 2097152, 4194304, 16777216});
+  .add_int64_axis("min_width", {0})
+  .add_int64_axis("max_width", {32, 64, 128, 256})
+  .add_int64_axis("num_rows", {32768, 262144, 2097152});
diff --git a/cpp/benchmarks/text/word_minhash.cpp b/cpp/benchmarks/text/word_minhash.cpp
deleted file mode 100644
index adc3dddc59c..00000000000
--- a/cpp/benchmarks/text/word_minhash.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include <benchmarks/common/generate_input.hpp>
-
-#include <cudf/column/column_factories.hpp>
-#include <cudf/filling.hpp>
-#include <cudf/scalar/scalar.hpp>
-#include <cudf/strings/strings_column_view.hpp>
-
-#include <nvtext/minhash.hpp>
-
-#include <rmm/device_buffer.hpp>
-
-#include <nvbench/nvbench.cuh>
-
-static void bench_word_minhash(nvbench::state& state)
-{
-  auto const num_rows   = static_cast<cudf::size_type>(state.get_int64("num_rows"));
-  auto const row_width  = static_cast<cudf::size_type>(state.get_int64("row_width"));
-  auto const seed_count = static_cast<cudf::size_type>(state.get_int64("seed_count"));
-  auto const base64     = state.get_int64("hash_type") == 64;
-
-  data_profile const strings_profile =
-    data_profile_builder().distribution(cudf::type_id::STRING, distribution_id::NORMAL, 0, 5);
-  auto strings_table =
-    create_random_table({cudf::type_id::STRING}, row_count{num_rows}, strings_profile);
-
-  auto const num_offsets = (num_rows / row_width) + 1;
-  auto offsets           = cudf::sequence(num_offsets,
-                                cudf::numeric_scalar<cudf::size_type>(0),
-                                cudf::numeric_scalar<cudf::size_type>(row_width));
-
-  auto source = cudf::make_lists_column(num_offsets - 1,
-                                        std::move(offsets),
-                                        std::move(strings_table->release().front()),
-                                        0,
-                                        rmm::device_buffer{});
-
-  data_profile const seeds_profile = data_profile_builder().no_validity().distribution(
-    cudf::type_to_id<cudf::hash_value_type>(), distribution_id::NORMAL, 0, 256);
-  auto const seed_type   = base64 ? cudf::type_id::UINT64 : cudf::type_id::UINT32;
-  auto const seeds_table = create_random_table({seed_type}, row_count{seed_count}, seeds_profile);
-  auto seeds             = seeds_table->get_column(0);
-
-  state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value()));
-
-  cudf::strings_column_view input(cudf::lists_column_view(source->view()).child());
-  auto chars_size = input.chars_size(cudf::get_default_stream());
-  state.add_global_memory_reads<nvbench::int8_t>(chars_size);
-  state.add_global_memory_writes<nvbench::int32_t>(num_rows);  // output are hashes
-
-  state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) {
-    auto result = base64 ? nvtext::word_minhash64(source->view(), seeds.view())
-                         : nvtext::word_minhash(source->view(), seeds.view());
-  });
-}
-
-NVBENCH_BENCH(bench_word_minhash)
-  .set_name("word_minhash")
-  .add_int64_axis("num_rows", {131072, 262144, 524288, 1048576, 2097152})
-  .add_int64_axis("row_width", {10, 100, 1000})
-  .add_int64_axis("seed_count", {2, 25})
-  .add_int64_axis("hash_type", {32, 64});
diff --git a/cpp/include/cudf/detail/copy_if_else.cuh b/cpp/include/cudf/detail/copy_if_else.cuh
index 5dc75b1a3fb..a7efb4e6e93 100644
--- a/cpp/include/cudf/detail/copy_if_else.cuh
+++ b/cpp/include/cudf/detail/copy_if_else.cuh
@@ -44,10 +44,11 @@ __launch_bounds__(block_size) CUDF_KERNEL
                            mutable_column_device_view out,
                            size_type* __restrict__ const valid_count)
 {
-  auto tidx                      = cudf::detail::grid_1d::global_thread_id<block_size>();
-  auto const stride              = cudf::detail::grid_1d::grid_stride<block_size>();
-  int const warp_id              = tidx / cudf::detail::warp_size;
-  size_type const warps_per_grid = gridDim.x * block_size / cudf::detail::warp_size;
+  auto tidx = cudf::detail::grid_1d::global_thread_id<block_size>();
+
+  auto const stride         = cudf::detail::grid_1d::grid_stride<block_size>();
+  auto const warp_id        = tidx / cudf::detail::warp_size;
+  auto const warps_per_grid = stride / cudf::detail::warp_size;
 
   // begin/end indices for the column data
   size_type const begin = 0;
@@ -60,7 +61,7 @@ __launch_bounds__(block_size) CUDF_KERNEL
 
   // lane id within the current warp
   constexpr size_type leader_lane{0};
-  int const lane_id = threadIdx.x % cudf::detail::warp_size;
+  auto const lane_id = threadIdx.x % cudf::detail::warp_size;
 
   size_type warp_valid_count{0};
 
diff --git a/cpp/src/copying/concatenate.cu b/cpp/src/copying/concatenate.cu
index d8419760120..6fc49afd7ac 100644
--- a/cpp/src/copying/concatenate.cu
+++ b/cpp/src/copying/concatenate.cu
@@ -308,7 +308,11 @@ std::unique_ptr<column> for_each_concatenate(host_span<column_view const> views,
 
   auto count = 0;
   for (auto& v : views) {
-    thrust::copy(rmm::exec_policy(stream), v.begin<T>(), v.end<T>(), m_view.begin<T>() + count);
+    cudaMemcpyAsync(m_view.begin<T>() + count,
+                    v.begin<T>(),
+                    v.size() * sizeof(T),
+                    cudaMemcpyDeviceToDevice,
+                    stream.value());
     count += v.size();
   }
 
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 2f6942fe139..cc5f256ea80 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -464,17 +464,6 @@ std::unique_ptr<column> make_all_nulls_column(schema_element const& schema,
  */
 column_name_info make_column_name_info(schema_element const& schema, std::string const& col_name);
 
-/**
- * @brief Get the path data type of a column by path if present in input schema
- *
- * @param path path of the column
- * @param options json reader options which holds schema
- * @return data type of the column if present
- */
-std::optional<data_type> get_path_data_type(
-  host_span<std::pair<std::string, cudf::io::json::NodeT> const> path,
-  cudf::io::json_reader_options const& options);
-
 /**
  * @brief Helper class to get path of a column by column id from reduced column tree
  *
diff --git a/cpp/src/io/json/parser_features.cpp b/cpp/src/io/json/parser_features.cpp
index 2da320b2af3..4b4827ca8d9 100644
--- a/cpp/src/io/json/parser_features.cpp
+++ b/cpp/src/io/json/parser_features.cpp
@@ -68,78 +68,6 @@ void json_reader_options::set_dtypes(schema_element types)
 }  // namespace cudf::io
 
 namespace cudf::io::json::detail {
-namespace {
-
-// example schema and its path.
-// "a": int             {"a", int}
-// "a": [ int ]         {"a", list}, {"element", int}
-// "a": { "b": int}     {"a", struct}, {"b", int}
-// "a": [ {"b": int }]  {"a", list}, {"element", struct}, {"b", int}
-// "a": [ null]         {"a", list}, {"element", str}
-// back() is root.
-// front() is leaf.
-/**
- * @brief Get the path data type of a column by path if present in input schema
- *
- * @param path path of the json column
- * @param root root of input schema element
- * @return data type of the column if present, otherwise std::nullopt
- */
-std::optional<data_type> get_path_data_type(
-  host_span<std::pair<std::string, cudf::io::json::NodeT> const> path, schema_element const& root)
-{
-  if (path.empty() || path.size() == 1) {
-    return root.type;
-  } else {
-    if (path.back().second == NC_STRUCT && root.type.id() == type_id::STRUCT) {
-      auto const child_name      = path.first(path.size() - 1).back().first;
-      auto const child_schema_it = root.child_types.find(child_name);
-      return (child_schema_it != std::end(root.child_types))
-               ? get_path_data_type(path.first(path.size() - 1), child_schema_it->second)
-               : std::optional<data_type>{};
-    } else if (path.back().second == NC_LIST && root.type.id() == type_id::LIST) {
-      auto const child_schema_it = root.child_types.find(list_child_name);
-      return (child_schema_it != std::end(root.child_types))
-               ? get_path_data_type(path.first(path.size() - 1), child_schema_it->second)
-               : std::optional<data_type>{};
-    }
-    return std::optional<data_type>{};
-  }
-}
-
-std::optional<schema_element> child_schema_element(std::string const& col_name,
-                                                   cudf::io::json_reader_options const& options)
-{
-  return std::visit(
-    cudf::detail::visitor_overload{
-      [col_name](std::vector<data_type> const& user_dtypes) -> std::optional<schema_element> {
-        auto column_index = atol(col_name.data());
-        return (static_cast<std::size_t>(column_index) < user_dtypes.size())
-                 ? std::optional<schema_element>{{user_dtypes[column_index]}}
-                 : std::optional<schema_element>{};
-      },
-      [col_name](
-        std::map<std::string, data_type> const& user_dtypes) -> std::optional<schema_element> {
-        return (user_dtypes.find(col_name) != std::end(user_dtypes))
-                 ? std::optional<schema_element>{{user_dtypes.find(col_name)->second}}
-                 : std::optional<schema_element>{};
-      },
-      [col_name](
-        std::map<std::string, schema_element> const& user_dtypes) -> std::optional<schema_element> {
-        return (user_dtypes.find(col_name) != std::end(user_dtypes))
-                 ? user_dtypes.find(col_name)->second
-                 : std::optional<schema_element>{};
-      },
-      [col_name](schema_element const& user_dtypes) -> std::optional<schema_element> {
-        return (user_dtypes.child_types.find(col_name) != std::end(user_dtypes.child_types))
-                 ? user_dtypes.child_types.find(col_name)->second
-                 : std::optional<schema_element>{};
-      }},
-    options.get_dtypes());
-}
-
-}  // namespace
-
 /// Created an empty column of the specified schema
 struct empty_column_functor {
   rmm::cuda_stream_view stream;
@@ -311,48 +239,4 @@ column_name_info make_column_name_info(schema_element const& schema, std::string
   }
   return info;
 }
-
-std::optional<data_type> get_path_data_type(
-  host_span<std::pair<std::string, cudf::io::json::NodeT> const> path,
-  cudf::io::json_reader_options const& options)
-{
-  if (path.empty()) return {};
-  std::optional<schema_element> col_schema = child_schema_element(path.back().first, options);
-  // check if it has value, then do recursive call and return.
-  if (col_schema.has_value()) {
-    return get_path_data_type(path, col_schema.value());
-  } else {
-    return {};
-  }
-}
-
-// idea: write a memoizer using template and lambda?, then call recursively.
-std::vector<path_from_tree::path_rep> path_from_tree::get_path(NodeIndexT this_col_id)
-{
-  std::vector<path_rep> path;
-  // stops at root.
-  while (this_col_id != parent_node_sentinel) {
-    auto type        = column_categories[this_col_id];
-    std::string name = "";
-    // code same as name_and_parent_index lambda.
-    auto parent_col_id = column_parent_ids[this_col_id];
-    if (parent_col_id == parent_node_sentinel || column_categories[parent_col_id] == NC_LIST) {
-      if (is_array_of_arrays && parent_col_id == row_array_parent_col_id) {
-        name = column_names[this_col_id];
-      } else {
-        name = list_child_name;
-      }
-    } else if (column_categories[parent_col_id] == NC_FN) {
-      auto field_name_col_id = parent_col_id;
-      parent_col_id          = column_parent_ids[parent_col_id];
-      name                   = column_names[field_name_col_id];
-    }
-    // "name": type/schema
-    path.emplace_back(name, type);
-    this_col_id = parent_col_id;
-    if (this_col_id == row_array_parent_col_id) return path;
-  }
-  return {};
-}
-
 }  // namespace cudf::io::json::detail
diff --git a/cpp/src/io/orc/writer_impl.cu b/cpp/src/io/orc/writer_impl.cu
index 0906017ee61..8e532b01788 100644
--- a/cpp/src/io/orc/writer_impl.cu
+++ b/cpp/src/io/orc/writer_impl.cu
@@ -28,6 +28,7 @@
 
 #include <cudf/detail/iterator.cuh>
 #include <cudf/detail/null_mask.hpp>
+#include <cudf/detail/utilities/batched_memcpy.hpp>
 #include <cudf/detail/utilities/cuda.cuh>
 #include <cudf/detail/utilities/stream_pool.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
@@ -1386,29 +1387,34 @@ encoded_footer_statistics finish_statistic_blobs(Footer const& footer,
   // we know the size of each array. The number of stripes per column in a chunk array can
   // be calculated by dividing the number of chunks by the number of columns.
   // That many chunks need to be copied at a time to the proper destination.
-  size_t num_entries_seen = 0;
+  size_t num_entries_seen        = 0;
+  auto const num_buffers_to_copy = per_chunk_stats.stripe_stat_chunks.size() * num_columns * 2;
+  auto h_srcs = cudf::detail::make_empty_host_vector<void*>(num_buffers_to_copy, stream);
+  auto h_dsts = cudf::detail::make_empty_host_vector<void*>(num_buffers_to_copy, stream);
+  auto h_lens = cudf::detail::make_empty_host_vector<size_t>(num_buffers_to_copy, stream);
+
   for (size_t i = 0; i < per_chunk_stats.stripe_stat_chunks.size(); ++i) {
     auto const stripes_per_col = per_chunk_stats.stripe_stat_chunks[i].size() / num_columns;
 
-    auto const chunk_bytes = stripes_per_col * sizeof(statistics_chunk);
-    auto const merge_bytes = stripes_per_col * sizeof(statistics_merge_group);
     for (size_t col = 0; col < num_columns; ++col) {
-      CUDF_CUDA_TRY(
-        cudaMemcpyAsync(stat_chunks.data() + (num_stripes * col) + num_entries_seen,
-                        per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col,
-                        chunk_bytes,
-                        cudaMemcpyDefault,
-                        stream.value()));
-      CUDF_CUDA_TRY(
-        cudaMemcpyAsync(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen,
-                        per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col,
-                        merge_bytes,
-                        cudaMemcpyDefault,
-                        stream.value()));
+      h_srcs.push_back(per_chunk_stats.stripe_stat_chunks[i].data() + col * stripes_per_col);
+      h_dsts.push_back(stat_chunks.data() + (num_stripes * col) + num_entries_seen);
+      h_lens.push_back(stripes_per_col * sizeof(statistics_chunk));
+
+      h_srcs.push_back(per_chunk_stats.stripe_stat_merge[i].device_ptr() + col * stripes_per_col);
+      h_dsts.push_back(stats_merge.device_ptr() + (num_stripes * col) + num_entries_seen);
+      h_lens.push_back(stripes_per_col * sizeof(statistics_merge_group));
     }
     num_entries_seen += stripes_per_col;
   }
 
+  auto const& mr    = cudf::get_current_device_resource_ref();
+  auto const d_srcs = cudf::detail::make_device_uvector_async(h_srcs, stream, mr);
+  auto const d_dsts = cudf::detail::make_device_uvector_async(h_dsts, stream, mr);
+  auto const d_lens = cudf::detail::make_device_uvector_async(h_lens, stream, mr);
+  cudf::detail::batched_memcpy_async(
+    d_srcs.begin(), d_dsts.begin(), d_lens.begin(), d_srcs.size(), stream);
+
   auto file_stats_merge =
     cudf::detail::make_host_vector<statistics_merge_group>(num_file_blobs, stream);
   for (auto i = 0u; i < num_file_blobs; ++i) {
diff --git a/cpp/src/partitioning/partitioning.cu b/cpp/src/partitioning/partitioning.cu
index ebab3beb08f..d6b85db3f0f 100644
--- a/cpp/src/partitioning/partitioning.cu
+++ b/cpp/src/partitioning/partitioning.cu
@@ -138,7 +138,7 @@ CUDF_KERNEL void compute_row_partition_numbers(row_hasher_t the_hasher,
   auto const stride = cudf::detail::grid_1d::grid_stride();
 
   // Initialize local histogram
-  size_type partition_number = threadIdx.x;
+  thread_index_type partition_number = threadIdx.x;
   while (partition_number < num_partitions) {
     shared_partition_sizes[partition_number] = 0;
     partition_number += blockDim.x;
@@ -207,7 +207,7 @@ CUDF_KERNEL void compute_row_output_locations(size_type* __restrict__ row_partit
   extern __shared__ size_type shared_partition_offsets[];
 
   // Initialize array of this blocks offsets from global array
-  size_type partition_number = threadIdx.x;
+  thread_index_type partition_number = threadIdx.x;
   while (partition_number < num_partitions) {
     shared_partition_offsets[partition_number] =
       block_partition_offsets[partition_number * gridDim.x + blockIdx.x];
@@ -303,7 +303,8 @@ CUDF_KERNEL void copy_block_partitions(InputIter input_iter,
 
   // Fetch the offset in the output buffer of each partition in this thread
   // block
-  for (size_type ipartition = threadIdx.x; ipartition < num_partitions; ipartition += blockDim.x) {
+  for (thread_index_type ipartition = threadIdx.x; ipartition < num_partitions;
+       ipartition += blockDim.x) {
     partition_offset_global[ipartition] =
       scanned_block_partition_sizes[ipartition * gridDim.x + blockIdx.x];
   }
diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
index d27420658d6..2128bacff80 100644
--- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
+++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu
@@ -385,7 +385,7 @@ CUDF_KERNEL void generate_cluster_limits_kernel(int delta,
                                                 size_type const* group_cluster_offsets,
                                                 bool has_nulls)
 {
-  int const tid = threadIdx.x + blockIdx.x * blockDim.x;
+  auto const tid = cudf::detail::grid_1d::global_thread_id();
 
   auto const group_index = tid;
   if (group_index >= num_groups) { return; }
diff --git a/cpp/src/transform/jit/kernel.cu b/cpp/src/transform/jit/kernel.cu
index 4fd0369c26b..9d96c11c3f2 100644
--- a/cpp/src/transform/jit/kernel.cu
+++ b/cpp/src/transform/jit/kernel.cu
@@ -38,8 +38,9 @@ CUDF_KERNEL void kernel(cudf::size_type size, TypeOut* out_data, TypeIn* in_data
 {
   // cannot use global_thread_id utility due to a JIT build issue by including
   // the `cudf/detail/utilities/cuda.cuh` header
-  thread_index_type const start  = threadIdx.x + blockIdx.x * blockDim.x;
-  thread_index_type const stride = blockDim.x * gridDim.x;
+  auto const block_size          = static_cast<thread_index_type>(blockDim.x);
+  thread_index_type const start  = threadIdx.x + blockIdx.x * block_size;
+  thread_index_type const stride = block_size * gridDim.x;
 
   for (auto i = start; i < static_cast<thread_index_type>(size); i += stride) {
     GENERIC_UNARY_OP(&out_data[i], in_data[i]);
diff --git a/cpp/src/transform/row_bit_count.cu b/cpp/src/transform/row_bit_count.cu
index 66bbe532e46..39c11295fbd 100644
--- a/cpp/src/transform/row_bit_count.cu
+++ b/cpp/src/transform/row_bit_count.cu
@@ -413,7 +413,7 @@ CUDF_KERNEL void compute_segment_sizes(device_span<column_device_view const> col
                                        size_type max_branch_depth)
 {
   extern __shared__ row_span thread_branch_stacks[];
-  int const tid = threadIdx.x + blockIdx.x * blockDim.x;
+  auto const tid = static_cast<size_type>(cudf::detail::grid_1d::global_thread_id());
 
   auto const num_segments = static_cast<size_type>(output.size());
   if (tid >= num_segments) { return; }
diff --git a/cpp/tests/bitmask/set_nullmask_tests.cu b/cpp/tests/bitmask/set_nullmask_tests.cu
index e95c9fb41c6..9f8d22ea94d 100644
--- a/cpp/tests/bitmask/set_nullmask_tests.cu
+++ b/cpp/tests/bitmask/set_nullmask_tests.cu
@@ -31,6 +31,7 @@
 #include <algorithm>
 #include <iostream>
 
+namespace {
 struct valid_bit_functor {
   cudf::bitmask_type const* _null_mask;
   __device__ bool operator()(cudf::size_type element_index) const noexcept
@@ -38,13 +39,7 @@ struct valid_bit_functor {
     return cudf::bit_is_set(_null_mask, element_index);
   }
 };
-
-std::ostream& operator<<(std::ostream& stream, thrust::host_vector<bool> const& bits)
-{
-  for (auto _bit : bits)
-    stream << int(_bit);
-  return stream;
-}
+}  // namespace
 
 struct SetBitmaskTest : public cudf::test::BaseFixture {
   void expect_bitmask_equal(cudf::bitmask_type const* bitmask,  // Device Ptr
diff --git a/cpp/tests/bitmask/valid_if_tests.cu b/cpp/tests/bitmask/valid_if_tests.cu
index 96f122f21a8..8ffcc552ecb 100644
--- a/cpp/tests/bitmask/valid_if_tests.cu
+++ b/cpp/tests/bitmask/valid_if_tests.cu
@@ -28,6 +28,7 @@
 
 struct ValidIfTest : public cudf::test::BaseFixture {};
 
+namespace {
 struct odds_valid {
   __host__ __device__ bool operator()(cudf::size_type i) { return i % 2; }
 };
@@ -37,6 +38,7 @@ struct all_valid {
 struct all_null {
   __host__ __device__ bool operator()(cudf::size_type i) { return false; }
 };
+}  // namespace
 
 TEST_F(ValidIfTest, EmptyRange)
 {
diff --git a/cpp/tests/column/bit_cast_test.cpp b/cpp/tests/column/bit_cast_test.cpp
index 5570a7d498c..1f29ea9e5fc 100644
--- a/cpp/tests/column/bit_cast_test.cpp
+++ b/cpp/tests/column/bit_cast_test.cpp
@@ -25,6 +25,7 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
+namespace {
 template <typename T, typename T2 = void>
 struct rep_type_impl {
   using type = void;
@@ -47,12 +48,14 @@ struct rep_type_impl<T, std::enable_if_t<cudf::is_fixed_point<T>()>> {
 
 template <typename T>
 using rep_type_t = typename rep_type_impl<T>::type;
+}  // namespace
 
 template <typename T>
 struct ColumnViewAllTypesTests : public cudf::test::BaseFixture {};
 
 TYPED_TEST_SUITE(ColumnViewAllTypesTests, cudf::test::FixedWidthTypes);
 
+namespace {
 template <typename FromType, typename ToType, typename Iterator>
 void do_bit_cast(cudf::column_view const& column_view, Iterator begin, Iterator end)
 {
@@ -102,6 +105,7 @@ void do_bit_cast(cudf::column_view const& column_view, Iterator begin, Iterator
     }
   }
 }
+}  // namespace
 
 TYPED_TEST(ColumnViewAllTypesTests, BitCast)
 {
diff --git a/cpp/tests/column/compound_test.cu b/cpp/tests/column/compound_test.cu
index d7e93fb22a3..fff3282fdd5 100644
--- a/cpp/tests/column/compound_test.cu
+++ b/cpp/tests/column/compound_test.cu
@@ -34,6 +34,7 @@
 
 struct CompoundColumnTest : public cudf::test::BaseFixture {};
 
+namespace {
 template <typename ColumnDeviceView>
 struct checker_for_level1 {
   ColumnDeviceView d_column;
@@ -62,6 +63,7 @@ struct checker_for_level2 {
     return bcheck;
   }
 };
+}  // namespace
 
 TEST_F(CompoundColumnTest, ChildrenLevel1)
 {
diff --git a/cpp/tests/device_atomics/device_atomics_test.cu b/cpp/tests/device_atomics/device_atomics_test.cu
index b81f8196d89..2fb24f6b31e 100644
--- a/cpp/tests/device_atomics/device_atomics_test.cu
+++ b/cpp/tests/device_atomics/device_atomics_test.cu
@@ -31,6 +31,7 @@
 
 #include <algorithm>
 
+namespace {
 template <typename T>
 CUDF_KERNEL void gpu_atomic_test(T* result, T* data, size_t size)
 {
@@ -109,6 +110,7 @@ std::enable_if_t<cudf::is_timestamp<T>(), T> accumulate(cudf::host_span<T const>
     xs.begin(), xs.end(), ys.begin(), [](T const& ts) { return ts.time_since_epoch().count(); });
   return T{typename T::duration{std::accumulate(ys.begin(), ys.end(), 0)}};
 }
+}  // namespace
 
 template <typename T>
 struct AtomicsTest : public cudf::test::BaseFixture {
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cpp b/cpp/tests/fixed_point/fixed_point_tests.cpp
index b96c6909e55..f8f8d525043 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cpp
+++ b/cpp/tests/fixed_point/fixed_point_tests.cpp
@@ -577,10 +577,12 @@ TEST_F(FixedPointTest, Decimal32FloatVector)
   float_vector_test(0.15, 20, -2, std::multiplies<>());
 }
 
+namespace {
 struct cast_to_int32_fn {
   using decimal32 = fixed_point<int32_t, Radix::BASE_10>;
   int32_t __host__ __device__ operator()(decimal32 fp) { return static_cast<int32_t>(fp); }
 };
+}  // namespace
 
 TYPED_TEST(FixedPointTestAllReps, FixedPointColumnWrapper)
 {
diff --git a/cpp/tests/fixed_point/fixed_point_tests.cu b/cpp/tests/fixed_point/fixed_point_tests.cu
index f34760341d8..ddc48c97012 100644
--- a/cpp/tests/fixed_point/fixed_point_tests.cu
+++ b/cpp/tests/fixed_point/fixed_point_tests.cu
@@ -72,10 +72,12 @@ TYPED_TEST(FixedPointTestAllReps, DecimalXXThrust)
   EXPECT_EQ(vec2, vec3);
 }
 
+namespace {
 struct cast_to_int32_fn {
   using decimal32 = fixed_point<int32_t, Radix::BASE_10>;
   int32_t __host__ __device__ operator()(decimal32 fp) { return static_cast<int32_t>(fp); }
 };
+}  // namespace
 
 TEST_F(FixedPointTest, DecimalXXThrustOnDevice)
 {
diff --git a/cpp/tests/groupby/tdigest_tests.cu b/cpp/tests/groupby/tdigest_tests.cu
index 4ae5d06b214..883a5093bd1 100644
--- a/cpp/tests/groupby/tdigest_tests.cu
+++ b/cpp/tests/groupby/tdigest_tests.cu
@@ -30,6 +30,7 @@
 #include <thrust/fill.h>
 #include <thrust/iterator/counting_iterator.h>
 
+namespace {
 /**
  * @brief Functor to generate a tdigest by key.
  *
@@ -116,6 +117,7 @@ struct tdigest_groupby_simple_merge_op {
     return std::move(result.second[0].results[0]);
   }
 };
+}  // namespace
 
 template <typename T>
 struct TDigestAllTypes : public cudf::test::BaseFixture {};
@@ -508,6 +510,7 @@ TEST_F(TDigestMergeTest, EmptyGroups)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result.second[0].results[0]);
 }
 
+namespace {
 std::unique_ptr<cudf::table> do_agg(
   cudf::column_view key,
   cudf::column_view val,
@@ -537,6 +540,7 @@ std::unique_ptr<cudf::table> do_agg(
 
   return std::make_unique<cudf::table>(std::move(result_columns));
 }
+}  // namespace
 
 TEST_F(TDigestMergeTest, AllValuesAreNull)
 {
diff --git a/cpp/tests/interop/dlpack_test.cpp b/cpp/tests/interop/dlpack_test.cpp
index ef4b9dd9b8a..b7106e823dd 100644
--- a/cpp/tests/interop/dlpack_test.cpp
+++ b/cpp/tests/interop/dlpack_test.cpp
@@ -26,6 +26,7 @@
 
 #include <dlpack/dlpack.h>
 
+namespace {
 struct dlpack_deleter {
   void operator()(DLManagedTensor* tensor) { tensor->deleter(tensor); }
 };
@@ -60,6 +61,7 @@ void validate_dtype(DLDataType const& dtype)
   EXPECT_EQ(1, dtype.lanes);
   EXPECT_EQ(sizeof(T) * 8, dtype.bits);
 }
+}  // namespace
 
 class DLPackUntypedTests : public cudf::test::BaseFixture {};
 
diff --git a/cpp/tests/io/json/json_tree.cpp b/cpp/tests/io/json/json_tree.cpp
index 887d4fa783f..5201a46ba7d 100644
--- a/cpp/tests/io/json/json_tree.cpp
+++ b/cpp/tests/io/json/json_tree.cpp
@@ -34,6 +34,8 @@
 
 namespace cuio_json = cudf::io::json;
 
+namespace {
+
 // Host copy of tree_meta_t
 struct tree_meta_t2 {
   std::vector<cuio_json::NodeT> node_categories;
@@ -43,8 +45,6 @@ struct tree_meta_t2 {
   std::vector<cuio_json::SymbolOffsetT> node_range_end;
 };
 
-namespace {
-
 tree_meta_t2 to_cpu_tree(cuio_json::tree_meta_t const& d_value, rmm::cuda_stream_view stream)
 {
   return {cudf::detail::make_std_vector_async(d_value.node_categories, stream),
diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu
index f988ae24b38..a67830a7864 100644
--- a/cpp/tests/io/json/json_tree_csr.cu
+++ b/cpp/tests/io/json/json_tree_csr.cu
@@ -36,6 +36,8 @@
 
 namespace cuio_json = cudf::io::json;
 
+namespace {
+
 struct h_tree_meta_t {
   std::vector<cuio_json::NodeT> node_categories;
   std::vector<cuio_json::NodeIndexT> parent_node_ids;
@@ -222,6 +224,7 @@ void run_test(std::string const& input, bool enable_lines = true)
   // assert equality between csr and meta formats
   ASSERT_TRUE(iseq);
 }
+}  // namespace
 
 struct JsonColumnTreeTests : public cudf::test::BaseFixture {};
 
diff --git a/cpp/tests/io/parquet_chunked_reader_test.cu b/cpp/tests/io/parquet_chunked_reader_test.cu
index 153a8a0c5aa..369376b6c95 100644
--- a/cpp/tests/io/parquet_chunked_reader_test.cu
+++ b/cpp/tests/io/parquet_chunked_reader_test.cu
@@ -1074,6 +1074,7 @@ TEST_F(ParquetChunkedReaderTest, TestChunkedReadNullCount)
   } while (reader.has_next());
 }
 
+namespace {
 constexpr size_t input_limit_expected_file_count = 4;
 
 std::vector<std::string> input_limit_get_test_names(std::string const& base_filename)
@@ -1133,6 +1134,7 @@ void input_limit_test_read(std::vector<std::string> const& test_filenames,
     CUDF_TEST_EXPECT_TABLES_EQUIVALENT(*result.first, t);
   }
 }
+}  // namespace
 
 struct ParquetChunkedReaderInputLimitConstrainedTest : public cudf::test::BaseFixture {};
 
@@ -1189,6 +1191,7 @@ TEST_F(ParquetChunkedReaderInputLimitConstrainedTest, MixedColumns)
 
 struct ParquetChunkedReaderInputLimitTest : public cudf::test::BaseFixture {};
 
+namespace {
 struct offset_gen {
   int const group_size;
   __device__ int operator()(int i) { return i * group_size; }
@@ -1198,6 +1201,8 @@ template <typename T>
 struct value_gen {
   __device__ T operator()(int i) { return i % 1024; }
 };
+}  // namespace
+
 TEST_F(ParquetChunkedReaderInputLimitTest, List)
 {
   auto base_path      = temp_env->get_temp_filepath("list");
@@ -1263,6 +1268,7 @@ TEST_F(ParquetChunkedReaderInputLimitTest, List)
   input_limit_test_read(test_filenames, tbl, 32 * 1024 * 1024, 64 * 1024 * 1024, expected_c);
 }
 
+namespace {
 void tiny_list_rowgroup_test(bool just_list_col)
 {
   auto iter = thrust::make_counting_iterator(0);
@@ -1320,6 +1326,7 @@ void tiny_list_rowgroup_test(bool just_list_col)
 
   CUDF_TEST_EXPECT_TABLES_EQUAL(*expected, *(result.first));
 }
+}  // namespace
 
 TEST_F(ParquetChunkedReaderInputLimitTest, TinyListRowGroupsSingle)
 {
@@ -1333,6 +1340,7 @@ TEST_F(ParquetChunkedReaderInputLimitTest, TinyListRowGroupsMixed)
   tiny_list_rowgroup_test(false);
 }
 
+namespace {
 struct char_values {
   __device__ int8_t operator()(int i)
   {
@@ -1341,6 +1349,8 @@ struct char_values {
     return index == 0 ? 'a' : (index == 1 ? 'b' : 'c');
   }
 };
+}  // namespace
+
 TEST_F(ParquetChunkedReaderInputLimitTest, Mixed)
 {
   auto base_path      = temp_env->get_temp_filepath("mixed_types");
diff --git a/cpp/tests/iterator/optional_iterator_test_numeric.cu b/cpp/tests/iterator/optional_iterator_test_numeric.cu
index 257c0979017..8377060b6ec 100644
--- a/cpp/tests/iterator/optional_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/optional_iterator_test_numeric.cu
@@ -26,16 +26,6 @@
 
 using TestingTypes = cudf::test::NumericTypes;
 
-namespace cudf {
-// To print meanvar for debug.
-// Needs to be in the cudf namespace for ADL
-template <typename T>
-std::ostream& operator<<(std::ostream& os, cudf::meanvar<T> const& rhs)
-{
-  return os << "[" << rhs.value << ", " << rhs.value_squared << ", " << rhs.count << "] ";
-};
-}  // namespace cudf
-
 template <typename T>
 struct NumericOptionalIteratorTest : public IteratorTest<T> {};
 
@@ -46,6 +36,7 @@ TYPED_TEST(NumericOptionalIteratorTest, nonull_optional_iterator)
 }
 TYPED_TEST(NumericOptionalIteratorTest, null_optional_iterator) { null_optional_iterator(*this); }
 
+namespace {
 // Transformers and Operators for optional_iterator test
 template <typename ElementType>
 struct transformer_optional_meanvar {
@@ -65,6 +56,7 @@ template <typename T>
 struct optional_to_meanvar {
   CUDF_HOST_DEVICE inline T operator()(cuda::std::optional<T> const& v) { return v.value_or(T{0}); }
 };
+}  // namespace
 
 // TODO: enable this test also at __CUDACC_DEBUG__
 // This test causes fatal compilation error only at device debug mode.
diff --git a/cpp/tests/iterator/pair_iterator_test_numeric.cu b/cpp/tests/iterator/pair_iterator_test_numeric.cu
index 3447aa0dde6..5f707232953 100644
--- a/cpp/tests/iterator/pair_iterator_test_numeric.cu
+++ b/cpp/tests/iterator/pair_iterator_test_numeric.cu
@@ -24,16 +24,6 @@
 
 using TestingTypes = cudf::test::NumericTypes;
 
-namespace cudf {
-// To print meanvar for debug.
-// Needs to be in the cudf namespace for ADL
-template <typename T>
-std::ostream& operator<<(std::ostream& os, cudf::meanvar<T> const& rhs)
-{
-  return os << "[" << rhs.value << ", " << rhs.value_squared << ", " << rhs.count << "] ";
-};
-}  // namespace cudf
-
 template <typename T>
 struct NumericPairIteratorTest : public IteratorTest<T> {};
 
@@ -53,6 +43,7 @@ struct transformer_pair_meanvar {
   };
 };
 
+namespace {
 struct sum_if_not_null {
   template <typename T>
   CUDF_HOST_DEVICE inline thrust::pair<T, bool> operator()(thrust::pair<T, bool> const& lhs,
@@ -66,6 +57,7 @@ struct sum_if_not_null {
       return {rhs};
   }
 };
+}  // namespace
 
 // TODO: enable this test also at __CUDACC_DEBUG__
 // This test causes fatal compilation error only at device debug mode.
diff --git a/cpp/tests/quantiles/percentile_approx_test.cpp b/cpp/tests/quantiles/percentile_approx_test.cpp
index 37414eb3fba..c146fd2ea4e 100644
--- a/cpp/tests/quantiles/percentile_approx_test.cpp
+++ b/cpp/tests/quantiles/percentile_approx_test.cpp
@@ -33,6 +33,7 @@
 
 #include <arrow/util/tdigest.h>
 
+namespace {
 std::unique_ptr<cudf::column> arrow_percentile_approx(cudf::column_view const& _values,
                                                       int delta,
                                                       std::vector<double> const& percentages)
@@ -315,6 +316,7 @@ cudf::data_type get_appropriate_type()
   if constexpr (cudf::is_fixed_point<T>()) { return cudf::data_type{cudf::type_to_id<T>(), -7}; }
   return cudf::data_type{cudf::type_to_id<T>()};
 }
+}  // namespace
 
 using PercentileApproxTypes =
   cudf::test::Concat<cudf::test::NumericTypes, cudf::test::FixedPointTypes>;
diff --git a/cpp/tests/reductions/tdigest_tests.cu b/cpp/tests/reductions/tdigest_tests.cu
index c8fec51e1c9..184725e17e0 100644
--- a/cpp/tests/reductions/tdigest_tests.cu
+++ b/cpp/tests/reductions/tdigest_tests.cu
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -25,6 +25,7 @@ template <typename T>
 struct ReductionTDigestAllTypes : public cudf::test::BaseFixture {};
 TYPED_TEST_SUITE(ReductionTDigestAllTypes, cudf::test::NumericTypes);
 
+namespace {
 struct reduce_op {
   std::unique_ptr<cudf::column> operator()(cudf::column_view const& values, int delta) const
   {
@@ -60,6 +61,7 @@ struct reduce_merge_op {
     return cudf::make_structs_column(tbl.num_rows(), std::move(cols), 0, rmm::device_buffer());
   }
 };
+}  // namespace
 
 TYPED_TEST(ReductionTDigestAllTypes, Simple)
 {
diff --git a/cpp/tests/streams/interop_test.cpp b/cpp/tests/streams/interop_test.cpp
index 7133baf6df1..79ea6b7d6d4 100644
--- a/cpp/tests/streams/interop_test.cpp
+++ b/cpp/tests/streams/interop_test.cpp
@@ -23,9 +23,11 @@
 
 #include <dlpack/dlpack.h>
 
+namespace {
 struct dlpack_deleter {
   void operator()(DLManagedTensor* tensor) { tensor->deleter(tensor); }
 };
+}  // namespace
 
 struct DLPackTest : public cudf::test::BaseFixture {};
 
diff --git a/cpp/tests/transform/row_bit_count_test.cu b/cpp/tests/transform/row_bit_count_test.cu
index 01a042130d6..7e203086fca 100644
--- a/cpp/tests/transform/row_bit_count_test.cu
+++ b/cpp/tests/transform/row_bit_count_test.cu
@@ -590,6 +590,7 @@ TEST_F(RowBitCount, EmptyChildColumnInListOfLists)
     cudf::test::fixed_width_column_wrapper<cudf::size_type>{32, 32, 32, 32});
 }
 
+namespace {
 struct sum_functor {
   cudf::size_type const* s0;
   cudf::size_type const* s1;
@@ -597,6 +598,7 @@ struct sum_functor {
 
   cudf::size_type operator() __device__(int i) { return s0[i] + s1[i] + s2[i]; }
 };
+}  // namespace
 
 TEST_F(RowBitCount, Table)
 {
diff --git a/cpp/tests/wrappers/timestamps_test.cu b/cpp/tests/wrappers/timestamps_test.cu
index 4086c5a91bb..8e5129dfbd2 100644
--- a/cpp/tests/wrappers/timestamps_test.cu
+++ b/cpp/tests/wrappers/timestamps_test.cu
@@ -37,6 +37,7 @@
 #include <thrust/logical.h>
 #include <thrust/sequence.h>
 
+namespace {
 template <typename T>
 struct ChronoColumnTest : public cudf::test::BaseFixture {
   cudf::size_type size() { return cudf::size_type(100); }
@@ -72,6 +73,7 @@ struct compare_chrono_elements_to_primitive_representation {
     return primitive == dur.count();
   }
 };
+}  // namespace
 
 TYPED_TEST_SUITE(ChronoColumnTest, cudf::test::ChronoTypes);
 
@@ -103,6 +105,7 @@ TYPED_TEST(ChronoColumnTest, ChronoDurationsMatchPrimitiveRepresentation)
                                *cudf::column_device_view::create(chrono_col)}));
 }
 
+namespace {
 template <typename ChronoT>
 struct compare_chrono_elements {
   cudf::binary_operator comp;
@@ -129,6 +132,7 @@ struct compare_chrono_elements {
     }
   }
 };
+}  // namespace
 
 TYPED_TEST(ChronoColumnTest, ChronosCanBeComparedInDeviceCode)
 {
diff --git a/java/src/main/java/ai/rapids/cudf/Scalar.java b/java/src/main/java/ai/rapids/cudf/Scalar.java
index 286b5c208c9..f3155bc5860 100644
--- a/java/src/main/java/ai/rapids/cudf/Scalar.java
+++ b/java/src/main/java/ai/rapids/cudf/Scalar.java
@@ -521,13 +521,28 @@ private static ColumnVector buildNullColumnVector(HostColumnVector.DataType host
   private static native long makeStructScalar(long[] viewHandles, boolean isValid);
   private static native long repeatString(long scalarHandle, int repeatTimes);
 
-  Scalar(DType type, long scalarHandle) {
+  /**
+   * Constructor to create a scalar from a native handle and a type.
+   *
+   * @param type The type of the scalar
+   * @param scalarHandle The native handle (pointer address) to the scalar data
+   */
+  public Scalar(DType type, long scalarHandle) {
     this.type = type;
     this.offHeap = new OffHeapState(scalarHandle);
     MemoryCleaner.register(this, offHeap);
     incRefCount();
   }
 
+  /**
+   * Get the native handle (native pointer address) for the scalar.
+   *
+   * @return The native handle
+   */
+  public long getScalarHandle() {
+    return offHeap.scalarHandle;
+  }
+
   /**
    * Increment the reference count for this scalar.  You need to call close on this
    * to decrement the reference count again.
@@ -542,10 +557,6 @@ public synchronized Scalar incRefCount() {
     return this;
   }
 
-  long getScalarHandle() {
-    return offHeap.scalarHandle;
-  }
-
   /**
    * Free the memory associated with a scalar.
    */
diff --git a/python/cudf/cudf/_lib/CMakeLists.txt b/python/cudf/cudf/_lib/CMakeLists.txt
index efe96ff6c3e..427ffcc8c12 100644
--- a/python/cudf/cudf/_lib/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/CMakeLists.txt
@@ -12,9 +12,8 @@
 # the License.
 # =============================================================================
 
-set(cython_sources
-    column.pyx copying.pyx csv.pyx groupby.pyx interop.pyx parquet.pyx reduce.pyx scalar.pyx
-    sort.pyx stream_compaction.pyx string_casting.pyx strings_udf.pyx types.pyx utils.pyx
+set(cython_sources column.pyx copying.pyx groupby.pyx interop.pyx scalar.pyx stream_compaction.pyx
+                   string_casting.pyx strings_udf.pyx types.pyx utils.pyx
 )
 set(linked_libraries cudf::cudf)
 
@@ -30,6 +29,3 @@ target_include_directories(interop PUBLIC "$<BUILD_INTERFACE:${DLPACK_INCLUDE_DI
 include(${rapids-cmake-dir}/export/find_package_root.cmake)
 include(../../../../cpp/cmake/thirdparty/get_nanoarrow.cmake)
 target_link_libraries(interop PUBLIC nanoarrow)
-
-add_subdirectory(io)
-add_subdirectory(nvtext)
diff --git a/python/cudf/cudf/_lib/__init__.py b/python/cudf/cudf/_lib/__init__.py
index 52e9b89da7b..26afdd62caf 100644
--- a/python/cudf/cudf/_lib/__init__.py
+++ b/python/cudf/cudf/_lib/__init__.py
@@ -3,13 +3,8 @@
 
 from . import (
     copying,
-    csv,
     groupby,
     interop,
-    nvtext,
-    parquet,
-    reduce,
-    sort,
     stream_compaction,
     string_casting,
     strings_udf,
diff --git a/python/cudf/cudf/_lib/copying.pyx b/python/cudf/cudf/_lib/copying.pyx
index 1f3f03f4be1..ef544dc89eb 100644
--- a/python/cudf/cudf/_lib/copying.pyx
+++ b/python/cudf/cudf/_lib/copying.pyx
@@ -1,7 +1,5 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-import pickle
-
 from libcpp cimport bool
 import pylibcudf
 
@@ -14,8 +12,6 @@ from cudf._lib.scalar import as_device_scalar
 
 from cudf._lib.scalar cimport DeviceScalar
 
-from cudf._lib.reduce import minmax
-
 from pylibcudf.libcudf.types cimport size_type
 
 from cudf._lib.utils cimport columns_from_pylibcudf_table, data_from_pylibcudf_table
@@ -36,7 +32,7 @@ def _gather_map_is_valid(
     """
     if not check_bounds or nullify or len(gather_map) == 0:
         return True
-    gm_min, gm_max = minmax(gather_map)
+    gm_min, gm_max = gather_map.minmax()
     return gm_min >= -nrows and gm_max < nrows
 
 
@@ -358,14 +354,13 @@ class PackedColumns(Serializable):
         header["index-names"] = self.index_names
         header["metadata"] = self._metadata.tobytes()
         for name, dtype in self.column_dtypes.items():
-            dtype_header, dtype_frames = dtype.serialize()
+            dtype_header, dtype_frames = dtype.device_serialize()
             self.column_dtypes[name] = (
                 dtype_header,
                 (len(frames), len(frames) + len(dtype_frames)),
             )
             frames.extend(dtype_frames)
         header["column-dtypes"] = self.column_dtypes
-        header["type-serialized"] = pickle.dumps(type(self))
         return header, frames
 
     @classmethod
@@ -373,9 +368,9 @@ class PackedColumns(Serializable):
         column_dtypes = {}
         for name, dtype in header["column-dtypes"].items():
             dtype_header, (start, stop) = dtype
-            column_dtypes[name] = pickle.loads(
-                dtype_header["type-serialized"]
-            ).deserialize(dtype_header, frames[start:stop])
+            column_dtypes[name] = Serializable.device_deserialize(
+                dtype_header, frames[start:stop]
+            )
         return cls(
             plc.contiguous_split.pack(
                 plc.contiguous_split.unpack_from_memoryviews(
diff --git a/python/cudf/cudf/_lib/csv.pyx b/python/cudf/cudf/_lib/csv.pyx
deleted file mode 100644
index 641fc18c203..00000000000
--- a/python/cudf/cudf/_lib/csv.pyx
+++ /dev/null
@@ -1,414 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp cimport bool
-
-cimport pylibcudf.libcudf.types as libcudf_types
-
-from cudf._lib.types cimport dtype_to_pylibcudf_type
-
-import errno
-import os
-from collections import abc
-from io import BytesIO, StringIO
-
-import numpy as np
-import pandas as pd
-
-import cudf
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from cudf._lib.utils cimport data_from_pylibcudf_io
-
-import pylibcudf as plc
-
-from cudf.api.types import is_hashable
-
-from pylibcudf.types cimport DataType
-
-CSV_HEX_TYPE_MAP = {
-    "hex": np.dtype("int64"),
-    "hex64": np.dtype("int64"),
-    "hex32": np.dtype("int32")
-}
-
-
-def validate_args(
-    object delimiter,
-    object sep,
-    bool delim_whitespace,
-    object decimal,
-    object thousands,
-    object nrows,
-    int skipfooter,
-    object byte_range,
-    int skiprows
-):
-    if delim_whitespace:
-        if delimiter is not None:
-            raise ValueError("cannot set both delimiter and delim_whitespace")
-        if sep != ',':
-            raise ValueError("cannot set both sep and delim_whitespace")
-
-    # Alias sep -> delimiter.
-    actual_delimiter = delimiter if delimiter else sep
-
-    if decimal == actual_delimiter:
-        raise ValueError("decimal cannot be the same as delimiter")
-
-    if thousands == actual_delimiter:
-        raise ValueError("thousands cannot be the same as delimiter")
-
-    if nrows is not None and skipfooter != 0:
-        raise ValueError("cannot use both nrows and skipfooter parameters")
-
-    if byte_range is not None:
-        if skipfooter != 0 or skiprows != 0 or nrows is not None:
-            raise ValueError("""cannot manually limit rows to be read when
-                                using the byte range parameter""")
-
-
-def read_csv(
-    object datasource,
-    object lineterminator="\n",
-    object quotechar='"',
-    int quoting=0,
-    bool doublequote=True,
-    object header="infer",
-    bool mangle_dupe_cols=True,
-    object usecols=None,
-    object sep=",",
-    object delimiter=None,
-    bool delim_whitespace=False,
-    bool skipinitialspace=False,
-    object names=None,
-    object dtype=None,
-    int skipfooter=0,
-    int skiprows=0,
-    bool dayfirst=False,
-    object compression="infer",
-    object thousands=None,
-    object decimal=".",
-    object true_values=None,
-    object false_values=None,
-    object nrows=None,
-    object byte_range=None,
-    bool skip_blank_lines=True,
-    object parse_dates=None,
-    object comment=None,
-    object na_values=None,
-    bool keep_default_na=True,
-    bool na_filter=True,
-    object prefix=None,
-    object index_col=None,
-):
-    """
-    Cython function to call into libcudf API, see `read_csv`.
-
-    See Also
-    --------
-    cudf.read_csv
-    """
-
-    if not isinstance(datasource, (BytesIO, StringIO, bytes)):
-        if not os.path.isfile(datasource):
-            raise FileNotFoundError(
-                errno.ENOENT, os.strerror(errno.ENOENT), datasource
-            )
-
-    if isinstance(datasource, StringIO):
-        datasource = datasource.read().encode()
-    elif isinstance(datasource, str) and not os.path.isfile(datasource):
-        datasource = datasource.encode()
-
-    validate_args(delimiter, sep, delim_whitespace, decimal, thousands,
-                  nrows, skipfooter, byte_range, skiprows)
-
-    # Alias sep -> delimiter.
-    if delimiter is None:
-        delimiter = sep
-
-    delimiter = str(delimiter)
-
-    if byte_range is None:
-        byte_range = (0, 0)
-
-    if compression is None:
-        c_compression = plc.io.types.CompressionType.NONE
-    else:
-        compression_map = {
-            "infer": plc.io.types.CompressionType.AUTO,
-            "gzip": plc.io.types.CompressionType.GZIP,
-            "bz2": plc.io.types.CompressionType.BZIP2,
-            "zip": plc.io.types.CompressionType.ZIP,
-        }
-        c_compression = compression_map[compression]
-
-    # We need this later when setting index cols
-    orig_header = header
-
-    if names is not None:
-        # explicitly mentioned name, so don't check header
-        if header is None or header == 'infer':
-            header = -1
-        else:
-            header = header
-        names = list(names)
-    else:
-        if header is None:
-            header = -1
-        elif header == 'infer':
-            header = 0
-
-    hex_cols = []
-
-    new_dtypes = []
-    if dtype is not None:
-        if isinstance(dtype, abc.Mapping):
-            new_dtypes = dict()
-            for k, v in dtype.items():
-                col_type = v
-                if is_hashable(v) and v in CSV_HEX_TYPE_MAP:
-                    col_type = CSV_HEX_TYPE_MAP[v]
-                    hex_cols.append(str(k))
-
-                new_dtypes[k] = _get_plc_data_type_from_dtype(
-                    cudf.dtype(col_type)
-                )
-        elif (
-            cudf.api.types.is_scalar(dtype) or
-            isinstance(dtype, (
-                np.dtype, pd.api.extensions.ExtensionDtype, type
-            ))
-        ):
-            if is_hashable(dtype) and dtype in CSV_HEX_TYPE_MAP:
-                dtype = CSV_HEX_TYPE_MAP[dtype]
-                hex_cols.append(0)
-
-            new_dtypes.append(
-                _get_plc_data_type_from_dtype(dtype)
-            )
-        elif isinstance(dtype, abc.Collection):
-            for index, col_dtype in enumerate(dtype):
-                if is_hashable(col_dtype) and col_dtype in CSV_HEX_TYPE_MAP:
-                    col_dtype = CSV_HEX_TYPE_MAP[col_dtype]
-                    hex_cols.append(index)
-
-                new_dtypes.append(
-                    _get_plc_data_type_from_dtype(col_dtype)
-                )
-        else:
-            raise ValueError(
-                "dtype should be a scalar/str/list-like/dict-like"
-            )
-    options = (
-        plc.io.csv.CsvReaderOptions.builder(plc.io.SourceInfo([datasource]))
-        .compression(c_compression)
-        .mangle_dupe_cols(mangle_dupe_cols)
-        .byte_range_offset(byte_range[0])
-        .byte_range_size(byte_range[1])
-        .nrows(nrows if nrows is not None else -1)
-        .skiprows(skiprows)
-        .skipfooter(skipfooter)
-        .quoting(quoting)
-        .lineterminator(str(lineterminator))
-        .quotechar(quotechar)
-        .decimal(decimal)
-        .delim_whitespace(delim_whitespace)
-        .skipinitialspace(skipinitialspace)
-        .skip_blank_lines(skip_blank_lines)
-        .doublequote(doublequote)
-        .keep_default_na(keep_default_na)
-        .na_filter(na_filter)
-        .dayfirst(dayfirst)
-        .build()
-    )
-
-    options.set_header(header)
-
-    if names is not None:
-        options.set_names([str(name) for name in names])
-
-    if prefix is not None:
-        options.set_prefix(prefix)
-
-    if usecols is not None:
-        if all(isinstance(col, int) for col in usecols):
-            options.set_use_cols_indexes(list(usecols))
-        else:
-            options.set_use_cols_names([str(name) for name in usecols])
-
-    if delimiter is not None:
-        options.set_delimiter(delimiter)
-
-    if thousands is not None:
-        options.set_thousands(thousands)
-
-    if comment is not None:
-        options.set_comment(comment)
-
-    if parse_dates is not None:
-        options.set_parse_dates(list(parse_dates))
-
-    if hex_cols is not None:
-        options.set_parse_hex(list(hex_cols))
-
-    options.set_dtypes(new_dtypes)
-
-    if true_values is not None:
-        options.set_true_values([str(val) for val in true_values])
-
-    if false_values is not None:
-        options.set_false_values([str(val) for val in false_values])
-
-    if na_values is not None:
-        options.set_na_values([str(val) for val in na_values])
-
-    df = cudf.DataFrame._from_data(
-        *data_from_pylibcudf_io(plc.io.csv.read_csv(options))
-    )
-
-    if dtype is not None:
-        if isinstance(dtype, abc.Mapping):
-            for k, v in dtype.items():
-                if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
-                    df._data[str(k)] = df._data[str(k)].astype(v)
-        elif (
-            cudf.api.types.is_scalar(dtype) or
-            isinstance(dtype, (
-                np.dtype, pd.api.extensions.ExtensionDtype, type
-            ))
-        ):
-            if isinstance(cudf.dtype(dtype), cudf.CategoricalDtype):
-                df = df.astype(dtype)
-        elif isinstance(dtype, abc.Collection):
-            for index, col_dtype in enumerate(dtype):
-                if isinstance(cudf.dtype(col_dtype), cudf.CategoricalDtype):
-                    col_name = df._column_names[index]
-                    df._data[col_name] = df._data[col_name].astype(col_dtype)
-
-    if names is not None and len(names) and isinstance(names[0], int):
-        df.columns = [int(x) for x in df._data]
-    elif names is None and header == -1 and cudf.get_option("mode.pandas_compatible"):
-        df.columns = [int(x) for x in df._column_names]
-
-    # Set index if the index_col parameter is passed
-    if index_col is not None and index_col is not False:
-        if isinstance(index_col, int):
-            index_col_name = df._data.get_labels_by_index(index_col)[0]
-            df = df.set_index(index_col_name)
-            if isinstance(index_col_name, str) and \
-                    names is None and orig_header == "infer":
-                if index_col_name.startswith("Unnamed:"):
-                    # TODO: Try to upstream it to libcudf
-                    # csv reader in future
-                    df._index.name = None
-            elif names is None:
-                df._index.name = index_col
-        else:
-            df = df.set_index(index_col)
-
-    return df
-
-
-@acquire_spill_lock()
-def write_csv(
-    table,
-    object path_or_buf=None,
-    object sep=",",
-    object na_rep="",
-    bool header=True,
-    object lineterminator="\n",
-    int rows_per_chunk=8,
-    bool index=True,
-):
-    """
-    Cython function to call into libcudf API, see `write_csv`.
-
-    See Also
-    --------
-    cudf.to_csv
-    """
-    index_and_not_empty = index is True and table.index is not None
-    columns = [
-        col.to_pylibcudf(mode="read") for col in table.index._columns
-    ] if index_and_not_empty else []
-    columns.extend(col.to_pylibcudf(mode="read") for col in table._columns)
-    col_names = []
-    if header:
-        all_names = list(table.index.names) if index_and_not_empty else []
-        all_names.extend(
-            na_rep if name is None or pd.isnull(name)
-            else name for name in table._column_names
-        )
-        col_names = [
-            '""' if (name in (None, '') and len(all_names) == 1)
-            else (str(name) if name not in (None, '') else '')
-            for name in all_names
-        ]
-    try:
-        plc.io.csv.write_csv(
-            (
-                plc.io.csv.CsvWriterOptions.builder(
-                    plc.io.SinkInfo([path_or_buf]), plc.Table(columns)
-                )
-                .names(col_names)
-                .na_rep(na_rep)
-                .include_header(header)
-                .rows_per_chunk(rows_per_chunk)
-                .line_terminator(str(lineterminator))
-                .inter_column_delimiter(str(sep))
-                .true_value("True")
-                .false_value("False")
-                .build()
-            )
-        )
-    except OverflowError:
-        raise OverflowError(
-            f"Writing CSV file with chunksize={rows_per_chunk} failed. "
-            "Consider providing a smaller chunksize argument."
-        )
-
-
-cdef DataType _get_plc_data_type_from_dtype(object dtype) except *:
-    # TODO: Remove this work-around Dictionary types
-    # in libcudf are fully mapped to categorical columns:
-    # https://github.com/rapidsai/cudf/issues/3960
-    if isinstance(dtype, cudf.CategoricalDtype):
-        dtype = dtype.categories.dtype
-    elif dtype == "category":
-        dtype = "str"
-
-    if isinstance(dtype, str):
-        if str(dtype) == "date32":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_DAYS
-            )
-        elif str(dtype) in ("date", "date64"):
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MILLISECONDS
-            )
-        elif str(dtype) == "timestamp":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MILLISECONDS
-            )
-        elif str(dtype) == "timestamp[us]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MICROSECONDS
-            )
-        elif str(dtype) == "timestamp[s]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_SECONDS
-            )
-        elif str(dtype) == "timestamp[ms]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_MILLISECONDS
-            )
-        elif str(dtype) == "timestamp[ns]":
-            return DataType(
-                libcudf_types.type_id.TIMESTAMP_NANOSECONDS
-            )
-
-    dtype = cudf.dtype(dtype)
-    return dtype_to_pylibcudf_type(dtype)
diff --git a/python/cudf/cudf/_lib/io/CMakeLists.txt b/python/cudf/cudf/_lib/io/CMakeLists.txt
deleted file mode 100644
index e7408cf2852..00000000000
--- a/python/cudf/cudf/_lib/io/CMakeLists.txt
+++ /dev/null
@@ -1,21 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-set(cython_sources utils.pyx)
-set(linked_libraries cudf::cudf)
-rapids_cython_create_modules(
-  CXX
-  SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX io_ ASSOCIATED_TARGETS cudf
-)
diff --git a/python/cudf/cudf/_lib/io/__init__.pxd b/python/cudf/cudf/_lib/io/__init__.pxd
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/io/__init__.py b/python/cudf/cudf/_lib/io/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/io/utils.pxd b/python/cudf/cudf/_lib/io/utils.pxd
deleted file mode 100644
index 9b8bab012e2..00000000000
--- a/python/cudf/cudf/_lib/io/utils.pxd
+++ /dev/null
@@ -1,31 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libcpp.memory cimport unique_ptr
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.io.data_sink cimport data_sink
-from pylibcudf.libcudf.io.types cimport (
-    column_name_info,
-    sink_info,
-    source_info,
-)
-
-from cudf._lib.column cimport Column
-
-
-cdef add_df_col_struct_names(
-    df,
-    child_names_dict
-)
-cdef update_col_struct_field_names(
-    Column col,
-    child_names
-)
-cdef update_struct_field_names(
-    table,
-    vector[column_name_info]& schema_info
-)
-cdef Column update_column_struct_field_names(
-    Column col,
-    column_name_info& info
-)
diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx
deleted file mode 100644
index df4675be599..00000000000
--- a/python/cudf/cudf/_lib/io/utils.pyx
+++ /dev/null
@@ -1,74 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-
-from libcpp.string cimport string
-
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.io.types cimport column_name_info
-
-from cudf._lib.column cimport Column
-
-from cudf.core.dtypes import StructDtype
-
-cdef add_df_col_struct_names(df, child_names_dict):
-    for name, child_names in child_names_dict.items():
-        col = df._data[name]
-
-        df._data[name] = update_col_struct_field_names(col, child_names)
-
-
-cdef update_col_struct_field_names(Column col, child_names):
-    if col.children:
-        children = list(col.children)
-        for i, (child, names) in enumerate(zip(children, child_names.values())):
-            children[i] = update_col_struct_field_names(
-                child,
-                names
-            )
-        col.set_base_children(tuple(children))
-
-    if isinstance(col.dtype, StructDtype):
-        col = col._rename_fields(
-            child_names.keys()
-        )
-
-    return col
-
-
-cdef update_struct_field_names(
-    table,
-    vector[column_name_info]& schema_info
-):
-    # Deprecated, remove in favor of add_col_struct_names
-    # when a reader is ported to pylibcudf
-    for i, (name, col) in enumerate(table._column_labels_and_values):
-        table._data[name] = update_column_struct_field_names(
-            col, schema_info[i]
-        )
-
-
-cdef Column update_column_struct_field_names(
-    Column col,
-    column_name_info& info
-):
-    cdef vector[string] field_names
-
-    if col.children:
-        children = list(col.children)
-        for i, child in enumerate(children):
-            children[i] = update_column_struct_field_names(
-                child,
-                info.children[i]
-            )
-        col.set_base_children(tuple(children))
-
-    if isinstance(col.dtype, StructDtype):
-        field_names.reserve(len(col.base_children))
-        for i in range(info.children.size()):
-            field_names.push_back(info.children[i].name)
-        col = col._rename_fields(
-            field_names
-        )
-
-    return col
diff --git a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt b/python/cudf/cudf/_lib/nvtext/CMakeLists.txt
deleted file mode 100644
index 22ec5d472f2..00000000000
--- a/python/cudf/cudf/_lib/nvtext/CMakeLists.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-# =============================================================================
-# Copyright (c) 2022-2024, NVIDIA CORPORATION.
-#
-# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
-# in compliance with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software distributed under the License
-# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
-# or implied. See the License for the specific language governing permissions and limitations under
-# the License.
-# =============================================================================
-
-set(cython_sources
-    byte_pair_encode.pyx edit_distance.pyx generate_ngrams.pyx jaccard.pyx minhash.pyx
-    ngrams_tokenize.pyx normalize.pyx replace.pyx stemmer.pyx subword_tokenize.pyx tokenize.pyx
-)
-set(linked_libraries cudf::cudf)
-rapids_cython_create_modules(
-  CXX
-  SOURCE_FILES "${cython_sources}"
-  LINKED_LIBRARIES "${linked_libraries}" MODULE_PREFIX nvtext_ ASSOCIATED_TARGETS cudf
-)
diff --git a/python/cudf/cudf/_lib/nvtext/__init__.pxd b/python/cudf/cudf/_lib/nvtext/__init__.pxd
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/nvtext/__init__.py b/python/cudf/cudf/_lib/nvtext/__init__.py
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx b/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
deleted file mode 100644
index 2b2762eead2..00000000000
--- a/python/cudf/cudf/_lib/nvtext/byte_pair_encode.pyx
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-from pylibcudf.nvtext.byte_pair_encode import BPEMergePairs  # no-cython-lint
-
-
-@acquire_spill_lock()
-def byte_pair_encoding(
-    Column strings,
-    object merge_pairs,
-    object separator
-):
-    return Column.from_pylibcudf(
-        nvtext.byte_pair_encode.byte_pair_encoding(
-            strings.to_pylibcudf(mode="read"),
-            merge_pairs,
-            separator.device_value.c_value
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx b/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
deleted file mode 100644
index 3dd99c42d76..00000000000
--- a/python/cudf/cudf/_lib/nvtext/edit_distance.pyx
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf cimport nvtext
-
-from cudf._lib.column cimport Column
-
-
-@acquire_spill_lock()
-def edit_distance(Column strings, Column targets):
-    result = nvtext.edit_distance.edit_distance(
-        strings.to_pylibcudf(mode="read"),
-        targets.to_pylibcudf(mode="read")
-    )
-    return Column.from_pylibcudf(result)
-
-
-@acquire_spill_lock()
-def edit_distance_matrix(Column strings):
-    result = nvtext.edit_distance.edit_distance_matrix(
-        strings.to_pylibcudf(mode="read")
-    )
-    return Column.from_pylibcudf(result)
diff --git a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
deleted file mode 100644
index 7fdf9258b7f..00000000000
--- a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def generate_ngrams(Column strings, int ngrams, object py_separator):
-    result = nvtext.generate_ngrams.generate_ngrams(
-        strings.to_pylibcudf(mode="read"),
-        ngrams,
-        py_separator.device_value.c_value
-    )
-    return Column.from_pylibcudf(result)
-
-
-@acquire_spill_lock()
-def generate_character_ngrams(Column strings, int ngrams):
-    result = nvtext.generate_ngrams.generate_character_ngrams(
-        strings.to_pylibcudf(mode="read"),
-        ngrams
-    )
-    return Column.from_pylibcudf(result)
-
-
-@acquire_spill_lock()
-def hash_character_ngrams(Column strings, int ngrams):
-    result = nvtext.generate_ngrams.hash_character_ngrams(
-        strings.to_pylibcudf(mode="read"),
-        ngrams
-    )
-    return Column.from_pylibcudf(result)
diff --git a/python/cudf/cudf/_lib/nvtext/jaccard.pyx b/python/cudf/cudf/_lib/nvtext/jaccard.pyx
deleted file mode 100644
index c964d0206b7..00000000000
--- a/python/cudf/cudf/_lib/nvtext/jaccard.pyx
+++ /dev/null
@@ -1,17 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def jaccard_index(Column input1, Column input2, int width):
-    result = nvtext.jaccard.jaccard_index(
-        input1.to_pylibcudf(mode="read"),
-        input2.to_pylibcudf(mode="read"),
-        width,
-    )
-    return Column.from_pylibcudf(result)
diff --git a/python/cudf/cudf/_lib/nvtext/minhash.pyx b/python/cudf/cudf/_lib/nvtext/minhash.pyx
deleted file mode 100644
index 9f2b3f92502..00000000000
--- a/python/cudf/cudf/_lib/nvtext/minhash.pyx
+++ /dev/null
@@ -1,35 +0,0 @@
-# Copyright (c) 2023-2024, NVIDIA CORPORATION.
-
-from libc.stdint cimport uint32_t, uint64_t
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def minhash(Column input, uint32_t seed, Column a, Column b, int width):
-    return Column.from_pylibcudf(
-        nvtext.minhash.minhash(
-            input.to_pylibcudf(mode="read"),
-            seed,
-            a.to_pylibcudf(mode="read"),
-            b.to_pylibcudf(mode="read"),
-            width,
-        )
-    )
-
-
-@acquire_spill_lock()
-def minhash64(Column input, uint64_t seed, Column a, Column b, int width):
-    return Column.from_pylibcudf(
-        nvtext.minhash.minhash64(
-            input.to_pylibcudf(mode="read"),
-            seed,
-            a.to_pylibcudf(mode="read"),
-            b.to_pylibcudf(mode="read"),
-            width,
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
deleted file mode 100644
index c125d92a24e..00000000000
--- a/python/cudf/cudf/_lib/nvtext/ngrams_tokenize.pyx
+++ /dev/null
@@ -1,24 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def ngrams_tokenize(
-    Column input,
-    int ngrams,
-    object py_delimiter,
-    object py_separator
-):
-    return Column.from_pylibcudf(
-        nvtext.ngrams_tokenize.ngrams_tokenize(
-            input.to_pylibcudf(mode="read"),
-            ngrams,
-            py_delimiter.device_value.c_value,
-            py_separator.device_value.c_value
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/normalize.pyx b/python/cudf/cudf/_lib/nvtext/normalize.pyx
deleted file mode 100644
index cc45123dd0a..00000000000
--- a/python/cudf/cudf/_lib/nvtext/normalize.pyx
+++ /dev/null
@@ -1,28 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def normalize_spaces(Column input):
-    return Column.from_pylibcudf(
-        nvtext.normalize.normalize_spaces(
-            input.to_pylibcudf(mode="read")
-        )
-    )
-
-
-@acquire_spill_lock()
-def normalize_characters(Column input, bool do_lower=True):
-    return Column.from_pylibcudf(
-        nvtext.normalize.normalize_characters(
-            input.to_pylibcudf(mode="read"),
-            do_lower,
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/replace.pyx b/python/cudf/cudf/_lib/nvtext/replace.pyx
deleted file mode 100644
index bec56ade83c..00000000000
--- a/python/cudf/cudf/_lib/nvtext/replace.pyx
+++ /dev/null
@@ -1,52 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf.libcudf.types cimport size_type
-
-from cudf._lib.column cimport Column
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def replace_tokens(Column strings,
-                   Column targets,
-                   Column replacements,
-                   object py_delimiter):
-    """
-    The `targets` tokens are searched for within each `strings`
-    in the Column and replaced with the corresponding `replacements`
-    if found. Tokens are identified by the `py_delimiter` character
-    provided.
-    """
-
-    return Column.from_pylibcudf(
-        nvtext.replace.replace_tokens(
-            strings.to_pylibcudf(mode="read"),
-            targets.to_pylibcudf(mode="read"),
-            replacements.to_pylibcudf(mode="read"),
-            py_delimiter.device_value.c_value,
-        )
-    )
-
-
-@acquire_spill_lock()
-def filter_tokens(Column strings,
-                  size_type min_token_length,
-                  object py_replacement,
-                  object py_delimiter):
-    """
-    Tokens smaller than `min_token_length` are removed from `strings`
-    in the Column and optionally replaced with the corresponding
-    `py_replacement` string. Tokens are identified by the `py_delimiter`
-    character provided.
-    """
-
-    return Column.from_pylibcudf(
-        nvtext.replace.filter_tokens(
-            strings.to_pylibcudf(mode="read"),
-            min_token_length,
-            py_replacement.device_value.c_value,
-            py_delimiter.device_value.c_value,
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/stemmer.pyx b/python/cudf/cudf/_lib/nvtext/stemmer.pyx
deleted file mode 100644
index 63a389b64d5..00000000000
--- a/python/cudf/cudf/_lib/nvtext/stemmer.pyx
+++ /dev/null
@@ -1,55 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from enum import IntEnum
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf.libcudf.nvtext.stemmer cimport (
-    letter_type,
-    underlying_type_t_letter_type,
-)
-from pylibcudf.libcudf.types cimport size_type
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-class LetterType(IntEnum):
-    CONSONANT = <underlying_type_t_letter_type> letter_type.CONSONANT
-    VOWEL = <underlying_type_t_letter_type> letter_type.VOWEL
-
-
-@acquire_spill_lock()
-def porter_stemmer_measure(Column strings):
-    return Column.from_pylibcudf(
-        nvtext.stemmer.porter_stemmer_measure(
-            strings.to_pylibcudf(mode="read"),
-        )
-    )
-
-
-@acquire_spill_lock()
-def is_letter(Column strings,
-              object ltype,
-              size_type index):
-    return Column.from_pylibcudf(
-        nvtext.stemmer.is_letter(
-            strings.to_pylibcudf(mode="read"),
-            ltype==LetterType.VOWEL,
-            index,
-        )
-    )
-
-
-@acquire_spill_lock()
-def is_letter_multi(Column strings,
-                    object ltype,
-                    Column indices):
-    return Column.from_pylibcudf(
-        nvtext.stemmer.is_letter(
-            strings.to_pylibcudf(mode="read"),
-            ltype==LetterType.VOWEL,
-            indices.to_pylibcudf(mode="read"),
-        )
-    )
diff --git a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx b/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
deleted file mode 100644
index 5e0bfb74705..00000000000
--- a/python/cudf/cudf/_lib/nvtext/subword_tokenize.pyx
+++ /dev/null
@@ -1,38 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from libc.stdint cimport uint32_t
-
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def subword_tokenize_inmem_hash(
-    Column strings,
-    object hashed_vocabulary,
-    uint32_t max_sequence_length=64,
-    uint32_t stride=48,
-    bool do_lower=True,
-    bool do_truncate=False,
-):
-    """
-    Subword tokenizes text series by using the pre-loaded hashed vocabulary
-    """
-    result = nvtext.subword_tokenize.subword_tokenize(
-        strings.to_pylibcudf(mode="read"),
-        hashed_vocabulary,
-        max_sequence_length,
-        stride,
-        do_lower,
-        do_truncate,
-    )
-    # return the 3 tensor components
-    tokens = Column.from_pylibcudf(result[0])
-    masks = Column.from_pylibcudf(result[1])
-    metadata = Column.from_pylibcudf(result[2])
-    return tokens, masks, metadata
diff --git a/python/cudf/cudf/_lib/nvtext/tokenize.pyx b/python/cudf/cudf/_lib/nvtext/tokenize.pyx
deleted file mode 100644
index f473c48e2f7..00000000000
--- a/python/cudf/cudf/_lib/nvtext/tokenize.pyx
+++ /dev/null
@@ -1,86 +0,0 @@
-# Copyright (c) 2018-2024, NVIDIA CORPORATION.
-
-from cudf.core.buffer import acquire_spill_lock
-
-from pylibcudf.libcudf.types cimport size_type
-
-from pylibcudf.nvtext.tokenize import TokenizeVocabulary  # no-cython-lint
-
-from cudf._lib.column cimport Column
-
-from pylibcudf import nvtext
-
-
-@acquire_spill_lock()
-def _tokenize_scalar(Column strings, object py_delimiter):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.tokenize_scalar(
-            strings.to_pylibcudf(mode="read"),
-            py_delimiter.device_value.c_value
-        )
-    )
-
-
-@acquire_spill_lock()
-def _tokenize_column(Column strings, Column delimiters):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.tokenize_column(
-            strings.to_pylibcudf(mode="read"),
-            delimiters.to_pylibcudf(mode="read"),
-        )
-    )
-
-
-@acquire_spill_lock()
-def _count_tokens_scalar(Column strings, object py_delimiter):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.count_tokens_scalar(
-            strings.to_pylibcudf(mode="read"),
-            py_delimiter.device_value.c_value
-        )
-    )
-
-
-@acquire_spill_lock()
-def _count_tokens_column(Column strings, Column delimiters):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.count_tokens_column(
-            strings.to_pylibcudf(mode="read"),
-            delimiters.to_pylibcudf(mode="read")
-        )
-    )
-
-
-@acquire_spill_lock()
-def character_tokenize(Column strings):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.character_tokenize(
-            strings.to_pylibcudf(mode="read")
-        )
-    )
-
-
-@acquire_spill_lock()
-def detokenize(Column strings, Column indices, object py_separator):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.detokenize(
-            strings.to_pylibcudf(mode="read"),
-            indices.to_pylibcudf(mode="read"),
-            py_separator.device_value.c_value
-        )
-    )
-
-
-@acquire_spill_lock()
-def tokenize_with_vocabulary(Column strings,
-                             object vocabulary,
-                             object py_delimiter,
-                             size_type default_id):
-    return Column.from_pylibcudf(
-        nvtext.tokenize.tokenize_with_vocabulary(
-            strings.to_pylibcudf(mode="read"),
-            vocabulary,
-            py_delimiter.device_value.c_value,
-            default_id
-        )
-    )
diff --git a/python/cudf/cudf/_lib/parquet.pyx b/python/cudf/cudf/_lib/parquet.pyx
deleted file mode 100644
index 00c434ae374..00000000000
--- a/python/cudf/cudf/_lib/parquet.pyx
+++ /dev/null
@@ -1,817 +0,0 @@
-# Copyright (c) 2019-2024, NVIDIA CORPORATION.
-
-import io
-
-import pyarrow as pa
-import itertools
-import cudf
-from cudf.core.buffer import acquire_spill_lock
-
-try:
-    import ujson as json
-except ImportError:
-    import json
-
-import numpy as np
-
-from cudf.api.types import is_list_like
-
-from cudf._lib.utils cimport _data_from_columns, data_from_pylibcudf_io
-
-from cudf._lib.utils import _index_level_name, generate_pandas_metadata
-
-from libc.stdint cimport int64_t
-from libcpp cimport bool
-
-from pylibcudf.expressions cimport Expression
-from pylibcudf.io.parquet cimport ChunkedParquetReader
-from pylibcudf.libcudf.io.types cimport (
-    statistics_freq,
-    compression_type,
-    dictionary_policy,
-)
-from pylibcudf.libcudf.types cimport size_type
-
-from cudf._lib.column cimport Column
-from cudf._lib.io.utils cimport (
-    add_df_col_struct_names,
-)
-
-import pylibcudf as plc
-
-from pylibcudf cimport Table
-
-from cudf.utils.ioutils import _ROW_GROUP_SIZE_BYTES_DEFAULT
-from pylibcudf.io.types cimport TableInputMetadata, SinkInfo, ColumnInMetadata
-from pylibcudf.io.parquet cimport ParquetChunkedWriter
-
-
-def _parse_metadata(meta):
-    file_is_range_index = False
-    file_index_cols = None
-    file_column_dtype = None
-
-    if 'index_columns' in meta and len(meta['index_columns']) > 0:
-        file_index_cols = meta['index_columns']
-
-        if isinstance(file_index_cols[0], dict) and \
-                file_index_cols[0]['kind'] == 'range':
-            file_is_range_index = True
-    if 'column_indexes' in meta and len(meta['column_indexes']) == 1:
-        file_column_dtype = meta['column_indexes'][0]["numpy_type"]
-    return file_is_range_index, file_index_cols, file_column_dtype
-
-
-cdef object _process_metadata(object df,
-                              list names,
-                              dict child_names,
-                              list per_file_user_data,
-                              object row_groups,
-                              object filepaths_or_buffers,
-                              bool allow_range_index,
-                              bool use_pandas_metadata,
-                              size_type nrows=-1,
-                              int64_t skip_rows=0,
-                              ):
-
-    add_df_col_struct_names(df, child_names)
-    index_col = None
-    is_range_index = True
-    column_index_type = None
-    index_col_names = None
-    meta = None
-    for single_file in per_file_user_data:
-        if b'pandas' not in single_file:
-            continue
-        json_str = single_file[b'pandas'].decode('utf-8')
-        meta = json.loads(json_str)
-        file_is_range_index, index_col, column_index_type = _parse_metadata(meta)
-        is_range_index &= file_is_range_index
-
-        if not file_is_range_index and index_col is not None \
-                and index_col_names is None:
-            index_col_names = {}
-            for idx_col in index_col:
-                for c in meta['columns']:
-                    if c['field_name'] == idx_col:
-                        index_col_names[idx_col] = c['name']
-
-    if meta is not None:
-        # Book keep each column metadata as the order
-        # of `meta["columns"]` and `column_names` are not
-        # guaranteed to be deterministic and same always.
-        meta_data_per_column = {
-            col_meta['name']: col_meta for col_meta in meta["columns"]
-        }
-
-        # update the decimal precision of each column
-        for col in names:
-            if isinstance(df._data[col].dtype, cudf.core.dtypes.DecimalDtype):
-                df._data[col].dtype.precision = (
-                    meta_data_per_column[col]["metadata"]["precision"]
-                )
-
-    # Set the index column
-    if index_col is not None and len(index_col) > 0:
-        if is_range_index:
-            if not allow_range_index:
-                return df
-
-            if len(per_file_user_data) > 1:
-                range_index_meta = {
-                    "kind": "range",
-                    "name": None,
-                    "start": 0,
-                    "stop": len(df),
-                    "step": 1
-                }
-            else:
-                range_index_meta = index_col[0]
-
-            if row_groups is not None:
-                per_file_metadata = [
-                    pa.parquet.read_metadata(
-                        # Pyarrow cannot read directly from bytes
-                        io.BytesIO(s) if isinstance(s, bytes) else s
-                    ) for s in filepaths_or_buffers
-                ]
-
-                filtered_idx = []
-                for i, file_meta in enumerate(per_file_metadata):
-                    row_groups_i = []
-                    start = 0
-                    for row_group in range(file_meta.num_row_groups):
-                        stop = start + file_meta.row_group(row_group).num_rows
-                        row_groups_i.append((start, stop))
-                        start = stop
-
-                    for rg in row_groups[i]:
-                        filtered_idx.append(
-                            cudf.RangeIndex(
-                                start=row_groups_i[rg][0],
-                                stop=row_groups_i[rg][1],
-                                step=range_index_meta['step']
-                            )
-                        )
-
-                if len(filtered_idx) > 0:
-                    idx = cudf.concat(filtered_idx)
-                else:
-                    idx = cudf.Index._from_column(cudf.core.column.column_empty(0))
-            else:
-                start = range_index_meta["start"] + skip_rows
-                stop = range_index_meta["stop"]
-                if nrows > -1:
-                    stop = start + nrows
-                idx = cudf.RangeIndex(
-                    start=start,
-                    stop=stop,
-                    step=range_index_meta['step'],
-                    name=range_index_meta['name']
-                )
-
-            df._index = idx
-        elif set(index_col).issubset(names):
-            index_data = df[index_col]
-            actual_index_names = iter(index_col_names.values())
-            if index_data._num_columns == 1:
-                idx = cudf.Index._from_column(
-                    index_data._columns[0],
-                    name=next(actual_index_names)
-                )
-            else:
-                idx = cudf.MultiIndex.from_frame(
-                    index_data,
-                    names=list(actual_index_names)
-                )
-            df.drop(columns=index_col, inplace=True)
-            df._index = idx
-        else:
-            if use_pandas_metadata:
-                df.index.names = index_col
-
-    if df._num_columns == 0 and column_index_type is not None:
-        df._data.label_dtype = cudf.dtype(column_index_type)
-
-    return df
-
-
-def read_parquet_chunked(
-    filepaths_or_buffers,
-    columns=None,
-    row_groups=None,
-    use_pandas_metadata=True,
-    size_t chunk_read_limit=0,
-    size_t pass_read_limit=1024000000,
-    size_type nrows=-1,
-    int64_t skip_rows=0,
-    allow_mismatched_pq_schemas=False
-):
-    # Note: If this function ever takes accepts filters
-    # allow_range_index needs to be False when a filter is passed
-    # (see read_parquet)
-    allow_range_index = columns is not None and len(columns) != 0
-
-    options = (
-        plc.io.parquet.ParquetReaderOptions.builder(
-            plc.io.SourceInfo(filepaths_or_buffers)
-        )
-        .use_pandas_metadata(use_pandas_metadata)
-        .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
-        .build()
-    )
-    if row_groups is not None:
-        options.set_row_groups(row_groups)
-    if nrows > -1:
-        options.set_num_rows(nrows)
-    if skip_rows != 0:
-        options.set_skip_rows(skip_rows)
-    if columns is not None:
-        options.set_columns(columns)
-
-    reader = ChunkedParquetReader(
-        options,
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-    )
-
-    tbl_w_meta = reader.read_chunk()
-    column_names = tbl_w_meta.column_names(include_children=False)
-    child_names = tbl_w_meta.child_names
-    per_file_user_data = tbl_w_meta.per_file_user_data
-    concatenated_columns = tbl_w_meta.tbl.columns()
-
-    # save memory
-    del tbl_w_meta
-
-    cdef Table tbl
-    while reader.has_next():
-        tbl = reader.read_chunk().tbl
-
-        for i in range(tbl.num_columns()):
-            concatenated_columns[i] = plc.concatenate.concatenate(
-                [concatenated_columns[i], tbl._columns[i]]
-            )
-            # Drop residual columns to save memory
-            tbl._columns[i] = None
-
-    df = cudf.DataFrame._from_data(
-        *_data_from_columns(
-            columns=[Column.from_pylibcudf(plc) for plc in concatenated_columns],
-            column_names=column_names,
-            index_names=None
-        )
-    )
-    df = _process_metadata(df, column_names, child_names,
-                           per_file_user_data, row_groups,
-                           filepaths_or_buffers,
-                           allow_range_index, use_pandas_metadata,
-                           nrows=nrows, skip_rows=skip_rows)
-    return df
-
-
-cpdef read_parquet(filepaths_or_buffers, columns=None, row_groups=None,
-                   use_pandas_metadata=True,
-                   Expression filters=None,
-                   size_type nrows=-1,
-                   int64_t skip_rows=0,
-                   allow_mismatched_pq_schemas=False):
-    """
-    Cython function to call into libcudf API, see `read_parquet`.
-
-    filters, if not None, should be an Expression that evaluates to a
-    boolean predicate as a function of columns being read.
-
-    See Also
-    --------
-    cudf.io.parquet.read_parquet
-    cudf.io.parquet.to_parquet
-    """
-
-    allow_range_index = True
-    if columns is not None and len(columns) == 0 or filters:
-        allow_range_index = False
-
-    options = (
-        plc.io.parquet.ParquetReaderOptions.builder(
-            plc.io.SourceInfo(filepaths_or_buffers)
-        )
-        .use_pandas_metadata(use_pandas_metadata)
-        .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
-        .build()
-    )
-    if row_groups is not None:
-        options.set_row_groups(row_groups)
-    if nrows > -1:
-        options.set_num_rows(nrows)
-    if skip_rows != 0:
-        options.set_skip_rows(skip_rows)
-    if columns is not None:
-        options.set_columns(columns)
-    if filters is not None:
-        options.set_filter(filters)
-
-    tbl_w_meta = plc.io.parquet.read_parquet(options)
-
-    df = cudf.DataFrame._from_data(
-        *data_from_pylibcudf_io(tbl_w_meta)
-    )
-
-    df = _process_metadata(df, tbl_w_meta.column_names(include_children=False),
-                           tbl_w_meta.child_names, tbl_w_meta.per_file_user_data,
-                           row_groups, filepaths_or_buffers,
-                           allow_range_index, use_pandas_metadata,
-                           nrows=nrows, skip_rows=skip_rows)
-    return df
-
-cpdef read_parquet_metadata(list filepaths_or_buffers):
-    """
-    Cython function to call into libcudf API, see `read_parquet_metadata`.
-
-    See Also
-    --------
-    cudf.io.parquet.read_parquet
-    cudf.io.parquet.to_parquet
-    """
-    parquet_metadata = plc.io.parquet_metadata.read_parquet_metadata(
-        plc.io.SourceInfo(filepaths_or_buffers)
-    )
-
-    # read all column names including index column, if any
-    col_names = [info.name() for info in parquet_metadata.schema().root().children()]
-
-    index_col_names = set()
-    json_str = parquet_metadata.metadata()['pandas']
-    if json_str != "":
-        meta = json.loads(json_str)
-        file_is_range_index, index_col, _ = _parse_metadata(meta)
-        if (
-            not file_is_range_index
-            and index_col is not None
-        ):
-            columns = meta['columns']
-            for idx_col in index_col:
-                for c in columns:
-                    if c['field_name'] == idx_col:
-                        index_col_names.add(idx_col)
-
-    # remove the index column from the list of column names
-    # only if index_col_names is not None
-    if len(index_col_names) >= 0:
-        col_names = [name for name in col_names if name not in index_col_names]
-
-    return (
-        parquet_metadata.num_rows(),
-        parquet_metadata.num_rowgroups(),
-        col_names,
-        len(col_names),
-        parquet_metadata.rowgroup_metadata()
-    )
-
-
-@acquire_spill_lock()
-def write_parquet(
-    table,
-    object filepaths_or_buffers,
-    object index=None,
-    object compression="snappy",
-    object statistics="ROWGROUP",
-    object metadata_file_path=None,
-    object int96_timestamps=False,
-    object row_group_size_bytes=None,
-    object row_group_size_rows=None,
-    object max_page_size_bytes=None,
-    object max_page_size_rows=None,
-    object max_dictionary_size=None,
-    object partitions_info=None,
-    object force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    object skip_compression=None,
-    object column_encoding=None,
-    object column_type_length=None,
-    object output_as_binary=None,
-    write_arrow_schema=False,
-):
-    """
-    Cython function to call into libcudf API, see `write_parquet`.
-
-    See Also
-    --------
-    cudf.io.parquet.write_parquet
-    """
-    if index is True or (
-        index is None and not isinstance(table._index, cudf.RangeIndex)
-    ):
-        columns = [*table.index._columns, *table._columns]
-        plc_table = plc.Table([col.to_pylibcudf(mode="read") for col in columns])
-        tbl_meta = TableInputMetadata(plc_table)
-        for level, idx_name in enumerate(table._index.names):
-            tbl_meta.column_metadata[level].set_name(
-                _index_level_name(idx_name, level, table._column_names)
-            )
-        num_index_cols_meta = len(table._index.names)
-    else:
-        plc_table = plc.Table(
-            [col.to_pylibcudf(mode="read") for col in table._columns]
-        )
-        tbl_meta = TableInputMetadata(plc_table)
-        num_index_cols_meta = 0
-
-    for i, name in enumerate(table._column_names, num_index_cols_meta):
-        if not isinstance(name, str):
-            if cudf.get_option("mode.pandas_compatible"):
-                tbl_meta.column_metadata[i].set_name(str(name))
-            else:
-                raise ValueError(
-                    "Writing a Parquet file requires string column names"
-                )
-        else:
-            tbl_meta.column_metadata[i].set_name(name)
-
-        _set_col_metadata(
-            table[name]._column,
-            tbl_meta.column_metadata[i],
-            force_nullable_schema,
-            None,
-            skip_compression,
-            column_encoding,
-            column_type_length,
-            output_as_binary
-        )
-    if partitions_info is not None:
-        user_data = [
-            {"pandas": generate_pandas_metadata(
-                table.iloc[start_row:start_row + num_row].copy(deep=False),
-                index
-            )}
-            for start_row, num_row in partitions_info
-        ]
-    else:
-        user_data = [{"pandas": generate_pandas_metadata(table, index)}]
-
-    if header_version not in ("1.0", "2.0"):
-        raise ValueError(
-            f"Invalid parquet header version: {header_version}. "
-            "Valid values are '1.0' and '2.0'"
-        )
-
-    dict_policy = (
-        plc.io.types.DictionaryPolicy.ADAPTIVE
-        if use_dictionary
-        else plc.io.types.DictionaryPolicy.NEVER
-    )
-
-    comp_type = _get_comp_type(compression)
-    stat_freq = _get_stat_freq(statistics)
-    options = (
-        plc.io.parquet.ParquetWriterOptions.builder(
-            plc.io.SinkInfo(filepaths_or_buffers), plc_table
-        )
-        .metadata(tbl_meta)
-        .key_value_metadata(user_data)
-        .compression(comp_type)
-        .stats_level(stat_freq)
-        .int96_timestamps(int96_timestamps)
-        .write_v2_headers(header_version == "2.0")
-        .dictionary_policy(dict_policy)
-        .utc_timestamps(False)
-        .write_arrow_schema(write_arrow_schema)
-        .build()
-    )
-    if partitions_info is not None:
-        options.set_partitions(
-            [plc.io.types.PartitionInfo(part[0], part[1]) for part in partitions_info]
-        )
-    if metadata_file_path is not None:
-        if is_list_like(metadata_file_path):
-            options.set_column_chunks_file_paths(metadata_file_path)
-        else:
-            options.set_column_chunks_file_paths([metadata_file_path])
-    if row_group_size_bytes is not None:
-        options.set_row_group_size_bytes(row_group_size_bytes)
-    if row_group_size_rows is not None:
-        options.set_row_group_size_rows(row_group_size_rows)
-    if max_page_size_bytes is not None:
-        options.set_max_page_size_bytes(max_page_size_bytes)
-    if max_page_size_rows is not None:
-        options.set_max_page_size_rows(max_page_size_rows)
-    if max_dictionary_size is not None:
-        options.set_max_dictionary_size(max_dictionary_size)
-    blob = plc.io.parquet.write_parquet(options)
-    if metadata_file_path is not None:
-        return np.asarray(blob.obj)
-    else:
-        return None
-
-
-cdef class ParquetWriter:
-    """
-    ParquetWriter lets you incrementally write out a Parquet file from a series
-    of cudf tables
-
-    Parameters
-    ----------
-    filepath_or_buffer : str, io.IOBase, os.PathLike, or list
-        File path or buffer to write to. The argument may also correspond
-        to a list of file paths or buffers.
-    index : bool or None, default None
-        If ``True``, include a dataframe's index(es) in the file output.
-        If ``False``, they will not be written to the file. If ``None``,
-        index(es) other than RangeIndex will be saved as columns.
-    compression : {'snappy', None}, default 'snappy'
-        Name of the compression to use. Use ``None`` for no compression.
-    statistics : {'ROWGROUP', 'PAGE', 'COLUMN', 'NONE'}, default 'ROWGROUP'
-        Level at which column statistics should be included in file.
-    row_group_size_bytes: int, default ``uint64 max``
-        Maximum size of each stripe of the output.
-        By default, a virtually infinite size equal to ``uint64 max`` will be used.
-    row_group_size_rows: int, default 1000000
-        Maximum number of rows of each stripe of the output.
-        By default, 1000000 (10^6 rows) will be used.
-    max_page_size_bytes: int, default 524288
-        Maximum uncompressed size of each page of the output.
-        By default, 524288 (512KB) will be used.
-    max_page_size_rows: int, default 20000
-        Maximum number of rows of each page of the output.
-        By default, 20000 will be used.
-    max_dictionary_size: int, default 1048576
-        Maximum size of the dictionary page for each output column chunk. Dictionary
-        encoding for column chunks that exceeds this limit will be disabled.
-        By default, 1048576 (1MB) will be used.
-    use_dictionary : bool, default True
-        If ``True``, enable dictionary encoding for Parquet page data
-        subject to ``max_dictionary_size`` constraints.
-        If ``False``, disable dictionary encoding for Parquet page data.
-    store_schema : bool, default False
-        If ``True``, enable computing and writing arrow schema to Parquet
-        file footer's key-value metadata section for faithful round-tripping.
-    See Also
-    --------
-    cudf.io.parquet.write_parquet
-    """
-    cdef bool initialized
-    cdef ParquetChunkedWriter writer
-    cdef SinkInfo sink
-    cdef TableInputMetadata tbl_meta
-    cdef str statistics
-    cdef object compression
-    cdef object index
-    cdef size_t row_group_size_bytes
-    cdef size_type row_group_size_rows
-    cdef size_t max_page_size_bytes
-    cdef size_type max_page_size_rows
-    cdef size_t max_dictionary_size
-    cdef bool use_dictionary
-    cdef bool write_arrow_schema
-
-    def __cinit__(self, object filepath_or_buffer, object index=None,
-                  object compression="snappy", str statistics="ROWGROUP",
-                  size_t row_group_size_bytes=_ROW_GROUP_SIZE_BYTES_DEFAULT,
-                  size_type row_group_size_rows=1000000,
-                  size_t max_page_size_bytes=524288,
-                  size_type max_page_size_rows=20000,
-                  size_t max_dictionary_size=1048576,
-                  bool use_dictionary=True,
-                  bool store_schema=False):
-        filepaths_or_buffers = (
-            list(filepath_or_buffer)
-            if is_list_like(filepath_or_buffer)
-            else [filepath_or_buffer]
-        )
-        self.sink = plc.io.SinkInfo(filepaths_or_buffers)
-        self.statistics = statistics
-        self.compression = compression
-        self.index = index
-        self.initialized = False
-        self.row_group_size_bytes = row_group_size_bytes
-        self.row_group_size_rows = row_group_size_rows
-        self.max_page_size_bytes = max_page_size_bytes
-        self.max_page_size_rows = max_page_size_rows
-        self.max_dictionary_size = max_dictionary_size
-        self.use_dictionary = use_dictionary
-        self.write_arrow_schema = store_schema
-
-    def write_table(self, table, object partitions_info=None):
-        """ Writes a single table to the file """
-        if not self.initialized:
-            self._initialize_chunked_state(
-                table,
-                num_partitions=len(partitions_info) if partitions_info else 1
-            )
-        if self.index is not False and (
-            table._index.name is not None or
-                isinstance(table._index, cudf.core.multiindex.MultiIndex)):
-            columns = [*table.index._columns, *table._columns]
-            plc_table = plc.Table([col.to_pylibcudf(mode="read") for col in columns])
-        else:
-            plc_table = plc.Table(
-                [col.to_pylibcudf(mode="read") for col in table._columns]
-            )
-        self.writer.write(plc_table, partitions_info)
-
-    def close(self, object metadata_file_path=None):
-        if not self.initialized:
-            return None
-        column_chunks_file_paths=[]
-        if metadata_file_path is not None:
-            if is_list_like(metadata_file_path):
-                column_chunks_file_paths = list(metadata_file_path)
-            else:
-                column_chunks_file_paths = [metadata_file_path]
-        blob = self.writer.close(column_chunks_file_paths)
-        if metadata_file_path is not None:
-            return np.asarray(blob.obj)
-        return None
-
-    def __enter__(self):
-        return self
-
-    def __exit__(self, *args):
-        self.close()
-
-    def _initialize_chunked_state(self, table, num_partitions=1):
-        """ Prepares all the values required to build the
-        chunked_parquet_writer_options and creates a writer"""
-
-        # Set the table_metadata
-        num_index_cols_meta = 0
-        plc_table = plc.Table(
-            [
-                col.to_pylibcudf(mode="read")
-                for col in table._columns
-            ]
-        )
-        self.tbl_meta = TableInputMetadata(plc_table)
-        if self.index is not False:
-            if isinstance(table._index, cudf.core.multiindex.MultiIndex):
-                plc_table = plc.Table(
-                    [
-                        col.to_pylibcudf(mode="read")
-                        for col in itertools.chain(table.index._columns, table._columns)
-                    ]
-                )
-                self.tbl_meta = TableInputMetadata(plc_table)
-                for level, idx_name in enumerate(table._index.names):
-                    self.tbl_meta.column_metadata[level].set_name(idx_name)
-                num_index_cols_meta = len(table._index.names)
-            else:
-                if table._index.name is not None:
-                    plc_table = plc.Table(
-                        [
-                            col.to_pylibcudf(mode="read")
-                            for col in itertools.chain(
-                                table.index._columns, table._columns
-                            )
-                        ]
-                    )
-                    self.tbl_meta = TableInputMetadata(plc_table)
-                    self.tbl_meta.column_metadata[0].set_name(table._index.name)
-                    num_index_cols_meta = 1
-
-        for i, name in enumerate(table._column_names, num_index_cols_meta):
-            self.tbl_meta.column_metadata[i].set_name(name)
-            _set_col_metadata(
-                table[name]._column,
-                self.tbl_meta.column_metadata[i],
-            )
-
-        index = (
-            False if isinstance(table._index, cudf.RangeIndex) else self.index
-        )
-        user_data = [{"pandas" : generate_pandas_metadata(table, index)}]*num_partitions
-        cdef compression_type comp_type = _get_comp_type(self.compression)
-        cdef statistics_freq stat_freq = _get_stat_freq(self.statistics)
-        cdef dictionary_policy dict_policy = (
-            plc.io.types.DictionaryPolicy.ADAPTIVE
-            if self.use_dictionary
-            else plc.io.types.DictionaryPolicy.NEVER
-        )
-        options = (
-            plc.io.parquet.ChunkedParquetWriterOptions.builder(self.sink)
-            .metadata(self.tbl_meta)
-            .key_value_metadata(user_data)
-            .compression(comp_type)
-            .stats_level(stat_freq)
-            .row_group_size_bytes(self.row_group_size_bytes)
-            .row_group_size_rows(self.row_group_size_rows)
-            .max_page_size_bytes(self.max_page_size_bytes)
-            .max_page_size_rows(self.max_page_size_rows)
-            .max_dictionary_size(self.max_dictionary_size)
-            .write_arrow_schema(self.write_arrow_schema)
-            .build()
-        )
-        options.set_dictionary_policy(dict_policy)
-        self.writer = plc.io.parquet.ParquetChunkedWriter.from_options(options)
-        self.initialized = True
-
-
-cpdef merge_filemetadata(object filemetadata_list):
-    """
-    Cython function to call into libcudf API, see `merge_row_group_metadata`.
-
-    See Also
-    --------
-    cudf.io.parquet.merge_row_group_metadata
-    """
-    return np.asarray(
-        plc.io.parquet.merge_row_group_metadata(filemetadata_list).obj
-    )
-
-
-cdef statistics_freq _get_stat_freq(str statistics):
-    result = getattr(
-        plc.io.types.StatisticsFreq,
-        f"STATISTICS_{statistics.upper()}",
-        None
-    )
-    if result is None:
-        raise ValueError("Unsupported `statistics_freq` type")
-    return result
-
-
-cdef compression_type _get_comp_type(object compression):
-    if compression is None:
-        return plc.io.types.CompressionType.NONE
-    result = getattr(
-        plc.io.types.CompressionType,
-        str(compression).upper(),
-        None
-    )
-    if result is None:
-        raise ValueError("Unsupported `compression` type")
-    return result
-
-
-cdef _set_col_metadata(
-    Column col,
-    ColumnInMetadata col_meta,
-    bool force_nullable_schema=False,
-    str path=None,
-    object skip_compression=None,
-    object column_encoding=None,
-    object column_type_length=None,
-    object output_as_binary=None,
-):
-    need_path = (skip_compression is not None or column_encoding is not None or
-                 column_type_length is not None or output_as_binary is not None)
-    name = col_meta.get_name() if need_path else None
-    full_path = path + "." + name if path is not None else name
-
-    if force_nullable_schema:
-        # Only set nullability if `force_nullable_schema`
-        # is true.
-        col_meta.set_nullability(True)
-
-    if skip_compression is not None and full_path in skip_compression:
-        col_meta.set_skip_compression(True)
-
-    if column_encoding is not None and full_path in column_encoding:
-        encoding = column_encoding[full_path]
-        if encoding is None:
-            c_encoding = plc.io.types.ColumnEncoding.USE_DEFAULT
-        else:
-            enc = str(encoding).upper()
-            c_encoding = getattr(plc.io.types.ColumnEncoding, enc, None)
-            if c_encoding is None:
-                raise ValueError("Unsupported `column_encoding` type")
-        col_meta.set_encoding(c_encoding)
-
-    if column_type_length is not None and full_path in column_type_length:
-        col_meta.set_output_as_binary(True)
-        col_meta.set_type_length(column_type_length[full_path])
-
-    if output_as_binary is not None and full_path in output_as_binary:
-        col_meta.set_output_as_binary(True)
-
-    if isinstance(col.dtype, cudf.StructDtype):
-        for i, (child_col, name) in enumerate(
-            zip(col.children, list(col.dtype.fields))
-        ):
-            col_meta.child(i).set_name(name)
-            _set_col_metadata(
-                child_col,
-                col_meta.child(i),
-                force_nullable_schema,
-                full_path,
-                skip_compression,
-                column_encoding,
-                column_type_length,
-                output_as_binary
-            )
-    elif isinstance(col.dtype, cudf.ListDtype):
-        if full_path is not None:
-            full_path = full_path + ".list"
-            col_meta.child(1).set_name("element")
-        _set_col_metadata(
-            col.children[1],
-            col_meta.child(1),
-            force_nullable_schema,
-            full_path,
-            skip_compression,
-            column_encoding,
-            column_type_length,
-            output_as_binary
-        )
-    elif isinstance(col.dtype, cudf.core.dtypes.DecimalDtype):
-        col_meta.set_decimal_precision(col.dtype.precision)
diff --git a/python/cudf/cudf/_lib/reduce.pyx b/python/cudf/cudf/_lib/reduce.pyx
deleted file mode 100644
index 2850cab93a1..00000000000
--- a/python/cudf/cudf/_lib/reduce.pyx
+++ /dev/null
@@ -1,135 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-import warnings
-
-import cudf
-from cudf.core.buffer import acquire_spill_lock
-
-from cudf._lib.column cimport Column
-from cudf._lib.scalar cimport DeviceScalar
-from cudf._lib.types cimport dtype_to_pylibcudf_type, is_decimal_type_id
-
-import pylibcudf
-
-from cudf.core._internals.aggregation import make_aggregation
-
-
-@acquire_spill_lock()
-def reduce(reduction_op, Column incol, dtype=None, **kwargs):
-    """
-    Top level Cython reduce function wrapping libcudf reductions.
-
-    Parameters
-    ----------
-    reduction_op : string
-        A string specifying the operation, e.g. sum, prod
-    incol : Column
-        A cuDF Column object
-    dtype: numpy.dtype, optional
-        A numpy data type to use for the output, defaults
-        to the same type as the input column
-    """
-    if dtype is not None:
-        warnings.warn(
-            "dtype is deprecated and will be remove in a future release. "
-            "Cast the result (e.g. .astype) after the operation instead.",
-            FutureWarning
-        )
-        col_dtype = dtype
-    else:
-        col_dtype = incol._reduction_result_dtype(reduction_op)
-
-    # check empty case
-    if len(incol) <= incol.null_count:
-        if reduction_op == 'sum' or reduction_op == 'sum_of_squares':
-            return incol.dtype.type(0)
-        if reduction_op == 'product':
-            return incol.dtype.type(1)
-        if reduction_op == "any":
-            return False
-
-        return cudf.utils.dtypes._get_nan_for_dtype(col_dtype)
-
-    result = pylibcudf.reduce.reduce(
-        incol.to_pylibcudf(mode="read"),
-        make_aggregation(reduction_op, kwargs).c_obj,
-        dtype_to_pylibcudf_type(col_dtype),
-    )
-
-    if is_decimal_type_id(result.type().id()):
-        scale = -result.type().scale()
-        precision = _reduce_precision(col_dtype, reduction_op, len(incol))
-        return DeviceScalar.from_pylibcudf(
-            result,
-            dtype=col_dtype.__class__(precision, scale),
-        ).value
-    scalar = DeviceScalar.from_pylibcudf(result).value
-    if isinstance(col_dtype, cudf.StructDtype):
-        # TODO: Utilize column_metadata in libcudf to maintain field labels
-        return dict(zip(col_dtype.fields.keys(), scalar.values()))
-    return scalar
-
-
-@acquire_spill_lock()
-def scan(scan_op, Column incol, inclusive, **kwargs):
-    """
-    Top level Cython scan function wrapping libcudf scans.
-
-    Parameters
-    ----------
-    incol : Column
-        A cuDF Column object
-    scan_op : string
-        A string specifying the operation, e.g. cumprod
-    inclusive: bool
-        Flag for including nulls in relevant scan
-    """
-    return Column.from_pylibcudf(
-        pylibcudf.reduce.scan(
-            incol.to_pylibcudf(mode="read"),
-            make_aggregation(scan_op, kwargs).c_obj,
-            pylibcudf.reduce.ScanType.INCLUSIVE if inclusive
-            else pylibcudf.reduce.ScanType.EXCLUSIVE,
-        )
-    )
-
-
-@acquire_spill_lock()
-def minmax(Column incol):
-    """
-    Top level Cython minmax function wrapping libcudf minmax.
-
-    Parameters
-    ----------
-    incol : Column
-        A cuDF Column object
-
-    Returns
-    -------
-    A pair of ``(min, max)`` values of ``incol``
-    """
-    min, max = pylibcudf.reduce.minmax(incol.to_pylibcudf(mode="read"))
-    return (
-        cudf.Scalar.from_device_scalar(DeviceScalar.from_pylibcudf(min)),
-        cudf.Scalar.from_device_scalar(DeviceScalar.from_pylibcudf(max)),
-    )
-
-
-def _reduce_precision(dtype, op, nrows):
-    """
-    Returns the result precision when performing the reduce
-    operation `op` for the given dtype and column size.
-
-    See: https://docs.microsoft.com/en-us/sql/t-sql/data-types/precision-scale-and-length-transact-sql
-    """  # noqa: E501
-    p = dtype.precision
-    if op in ("min", "max"):
-        new_p = p
-    elif op == "sum":
-        new_p = p + nrows - 1
-    elif op == "product":
-        new_p = p * nrows + nrows - 1
-    elif op == "sum_of_squares":
-        new_p = 2 * p + nrows
-    else:
-        raise NotImplementedError()
-    return max(min(new_p, dtype.MAX_PRECISION), 0)
diff --git a/python/cudf/cudf/_lib/sort.pyx b/python/cudf/cudf/_lib/sort.pyx
deleted file mode 100644
index eefe37d9880..00000000000
--- a/python/cudf/cudf/_lib/sort.pyx
+++ /dev/null
@@ -1,365 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-
-from itertools import repeat
-
-from cudf.core.buffer import acquire_spill_lock
-
-from libcpp cimport bool
-
-from pylibcudf.libcudf.aggregation cimport rank_method
-from cudf._lib.column cimport Column
-from cudf._lib.utils cimport columns_from_pylibcudf_table
-
-import pylibcudf
-
-
-@acquire_spill_lock()
-def is_sorted(
-    list source_columns, object ascending=None, object null_position=None
-):
-    """
-    Checks whether the rows of a `table` are sorted in lexicographical order.
-
-    Parameters
-    ----------
-    source_columns : list of columns
-        columns to be checked for sort order
-    ascending : None or list-like of booleans
-        None or list-like of boolean values indicating expected sort order of
-        each column. If list-like, size of list-like must be len(columns). If
-        None, all columns expected sort order is set to ascending. False (0) -
-        descending, True (1) - ascending.
-    null_position : None or list-like of booleans
-        None or list-like of boolean values indicating desired order of nulls
-        compared to other elements. If list-like, size of list-like must be
-        len(columns). If None, null order is set to before. False (0) - after,
-        True (1) - before.
-
-    Returns
-    -------
-    returns : boolean
-        Returns True, if sorted as expected by ``ascending`` and
-        ``null_position``, False otherwise.
-    """
-
-    if ascending is None:
-        column_order = [pylibcudf.types.Order.ASCENDING] * len(source_columns)
-    else:
-        if len(ascending) != len(source_columns):
-            raise ValueError(
-                f"Expected a list-like of length {len(source_columns)}, "
-                f"got length {len(ascending)} for `ascending`"
-            )
-        column_order = [pylibcudf.types.Order.DESCENDING] * len(source_columns)
-        for idx, val in enumerate(ascending):
-            if val:
-                column_order[idx] = pylibcudf.types.Order.ASCENDING
-
-    if null_position is None:
-        null_precedence = [pylibcudf.types.NullOrder.AFTER] * len(source_columns)
-    else:
-        if len(null_position) != len(source_columns):
-            raise ValueError(
-                f"Expected a list-like of length {len(source_columns)}, "
-                f"got length {len(null_position)} for `null_position`"
-            )
-        null_precedence = [pylibcudf.types.NullOrder.AFTER] * len(source_columns)
-        for idx, val in enumerate(null_position):
-            if val:
-                null_precedence[idx] = pylibcudf.types.NullOrder.BEFORE
-
-    return pylibcudf.sorting.is_sorted(
-        pylibcudf.Table(
-            [c.to_pylibcudf(mode="read") for c in source_columns]
-        ),
-        column_order,
-        null_precedence
-    )
-
-
-def ordering(column_order, null_precedence):
-    """
-    Construct order and null order vectors
-
-    Parameters
-    ----------
-    column_order
-        Iterable of bool (True for ascending order, False for descending)
-    null_precedence
-        Iterable string for null positions ("first" for start, "last" for end)
-
-    Both iterables must be the same length (not checked)
-
-    Returns
-    -------
-    pair of vectors (order, and null_order)
-    """
-    c_column_order = []
-    c_null_precedence = []
-    for asc, null in zip(column_order, null_precedence):
-        c_column_order.append(
-            pylibcudf.types.Order.ASCENDING if asc else pylibcudf.types.Order.DESCENDING
-        )
-        if asc ^ (null == "first"):
-            c_null_precedence.append(pylibcudf.types.NullOrder.AFTER)
-        elif asc ^ (null == "last"):
-            c_null_precedence.append(pylibcudf.types.NullOrder.BEFORE)
-        else:
-            raise ValueError(f"Invalid null precedence {null}")
-    return c_column_order, c_null_precedence
-
-
-@acquire_spill_lock()
-def order_by(
-    list columns_from_table,
-    object ascending,
-    str na_position,
-    *,
-    bool stable
-):
-    """
-    Get index to sort the table in ascending/descending order.
-
-    Parameters
-    ----------
-    columns_from_table : list[Column]
-        Columns from the table which will be sorted
-    ascending : sequence[bool]
-         Sequence of boolean values which correspond to each column
-         in the table to be sorted signifying the order of each column
-         True - Ascending and False - Descending
-    na_position : str
-        Whether null values should show up at the "first" or "last"
-        position of **all** sorted column.
-    stable : bool
-        Should the sort be stable? (no default)
-
-    Returns
-    -------
-    Column of indices that sorts the table
-    """
-    order = ordering(ascending, repeat(na_position))
-    func = getattr(pylibcudf.sorting, f"{'stable_' if stable else ''}sorted_order")
-
-    return Column.from_pylibcudf(
-        func(
-            pylibcudf.Table(
-                [c.to_pylibcudf(mode="read") for c in columns_from_table],
-            ),
-            order[0],
-            order[1],
-        )
-    )
-
-
-@acquire_spill_lock()
-def sort(
-    list values,
-    list column_order=None,
-    list null_precedence=None,
-):
-    """
-    Sort the table in ascending/descending order.
-
-    Parameters
-    ----------
-    values : list[Column]
-        Columns of the table which will be sorted
-    column_order : list[bool], optional
-        Sequence of boolean values which correspond to each column in
-        keys providing the sort order (default all True).
-        With True <=> ascending; False <=> descending.
-    null_precedence : list[str], optional
-        Sequence of "first" or "last" values (default "first")
-        indicating the position of null values when sorting the keys.
-    """
-    ncol = len(values)
-    order = ordering(
-        column_order or repeat(True, ncol),
-        null_precedence or repeat("first", ncol),
-    )
-    return columns_from_pylibcudf_table(
-        pylibcudf.sorting.sort(
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]),
-            order[0],
-            order[1],
-        )
-    )
-
-
-@acquire_spill_lock()
-def sort_by_key(
-    list values,
-    list keys,
-    object ascending,
-    object na_position,
-    *,
-    bool stable,
-):
-    """
-    Sort a table by given keys
-
-    Parameters
-    ----------
-    values : list[Column]
-        Columns of the table which will be sorted
-    keys : list[Column]
-        Columns making up the sort key
-    ascending : list[bool]
-        Sequence of boolean values which correspond to each column
-        in the table to be sorted signifying the order of each column
-        True - Ascending and False - Descending
-    na_position : list[str]
-        Sequence of "first" or "last" values (default "first")
-        indicating the position of null values when sorting the keys.
-    stable : bool
-        Should the sort be stable? (no default)
-
-    Returns
-    -------
-    list[Column]
-        list of value columns sorted by keys
-    """
-    order = ordering(ascending, na_position)
-    func = getattr(pylibcudf.sorting, f"{'stable_' if stable else ''}sort_by_key")
-    return columns_from_pylibcudf_table(
-        func(
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]),
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in keys]),
-            order[0],
-            order[1],
-        )
-    )
-
-
-@acquire_spill_lock()
-def segmented_sort_by_key(
-    list values,
-    list keys,
-    Column segment_offsets,
-    list column_order=None,
-    list null_precedence=None,
-    *,
-    bool stable,
-):
-    """
-    Sort segments of a table by given keys
-
-    Parameters
-    ----------
-    values : list[Column]
-        Columns of the table which will be sorted
-    keys : list[Column]
-        Columns making up the sort key
-    offsets : Column
-        Segment offsets
-    column_order : list[bool], optional
-        Sequence of boolean values which correspond to each column in
-        keys providing the sort order (default all True).
-        With True <=> ascending; False <=> descending.
-    null_precedence : list[str], optional
-        Sequence of "first" or "last" values (default "first")
-        indicating the position of null values when sorting the keys.
-    stable : bool
-        Should the sort be stable? (no default)
-
-    Returns
-    -------
-    list[Column]
-        list of value columns sorted by keys
-    """
-    ncol = len(values)
-    order = ordering(
-        column_order or repeat(True, ncol),
-        null_precedence or repeat("first", ncol),
-    )
-    func = getattr(
-        pylibcudf.sorting,
-        f"{'stable_' if stable else ''}segmented_sort_by_key"
-    )
-    return columns_from_pylibcudf_table(
-        func(
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in values]),
-            pylibcudf.Table([c.to_pylibcudf(mode="read") for c in keys]),
-            segment_offsets.to_pylibcudf(mode="read"),
-            order[0],
-            order[1],
-        )
-    )
-
-
-@acquire_spill_lock()
-def digitize(list source_columns, list bins, bool right=False):
-    """
-    Return the indices of the bins to which each value in source_table belongs.
-
-    Parameters
-    ----------
-    source_columns : Input columns to be binned.
-    bins : List containing columns of bins
-    right : Indicating whether the intervals include the
-            right or the left bin edge.
-    """
-    return Column.from_pylibcudf(
-        getattr(pylibcudf.search, "lower_bound" if right else "upper_bound")(
-            pylibcudf.Table(
-                [c.to_pylibcudf(mode="read") for c in bins]
-            ),
-            pylibcudf.Table(
-                [c.to_pylibcudf(mode="read") for c in source_columns]
-            ),
-            [pylibcudf.types.Order.ASCENDING]*len(bins),
-            [pylibcudf.types.NullOrder.BEFORE]*len(bins)
-        )
-    )
-
-
-@acquire_spill_lock()
-def rank_columns(list source_columns, rank_method method, str na_option,
-                 bool ascending, bool pct
-                 ):
-    """
-    Compute numerical data ranks (1 through n) of each column in the dataframe
-    """
-    column_order = (
-        pylibcudf.types.Order.ASCENDING
-        if ascending
-        else pylibcudf.types.Order.DESCENDING
-    )
-    # ascending
-    #    #top    = na_is_smallest
-    #    #bottom = na_is_largest
-    #    #keep   = na_is_largest
-    # descending
-    #    #top    = na_is_largest
-    #    #bottom = na_is_smallest
-    #    #keep   = na_is_smallest
-    if ascending:
-        if na_option == 'top':
-            null_precedence = pylibcudf.types.NullOrder.BEFORE
-        else:
-            null_precedence = pylibcudf.types.NullOrder.AFTER
-    else:
-        if na_option == 'top':
-            null_precedence = pylibcudf.types.NullOrder.AFTER
-        else:
-            null_precedence = pylibcudf.types.NullOrder.BEFORE
-    c_null_handling = (
-        pylibcudf.types.NullPolicy.EXCLUDE
-        if na_option == 'keep'
-        else pylibcudf.types.NullPolicy.INCLUDE
-    )
-
-    return [
-        Column.from_pylibcudf(
-            pylibcudf.sorting.rank(
-                col.to_pylibcudf(mode="read"),
-                method,
-                column_order,
-                c_null_handling,
-                null_precedence,
-                pct,
-            )
-        )
-        for col in source_columns
-    ]
diff --git a/python/cudf/cudf/_lib/strings/__init__.pxd b/python/cudf/cudf/_lib/strings/__init__.pxd
deleted file mode 100644
index e69de29bb2d..00000000000
diff --git a/python/cudf/cudf/_lib/strings/__init__.py b/python/cudf/cudf/_lib/strings/__init__.py
deleted file mode 100644
index b9095a22a42..00000000000
--- a/python/cudf/cudf/_lib/strings/__init__.py
+++ /dev/null
@@ -1,30 +0,0 @@
-# Copyright (c) 2020-2024, NVIDIA CORPORATION.
-from cudf._lib.nvtext.edit_distance import edit_distance, edit_distance_matrix
-from cudf._lib.nvtext.generate_ngrams import (
-    generate_character_ngrams,
-    generate_ngrams,
-    hash_character_ngrams,
-)
-from cudf._lib.nvtext.jaccard import jaccard_index
-from cudf._lib.nvtext.minhash import (
-    minhash,
-    minhash64,
-)
-from cudf._lib.nvtext.ngrams_tokenize import ngrams_tokenize
-from cudf._lib.nvtext.normalize import normalize_characters, normalize_spaces
-from cudf._lib.nvtext.replace import filter_tokens, replace_tokens
-from cudf._lib.nvtext.stemmer import (
-    LetterType,
-    is_letter,
-    is_letter_multi,
-    porter_stemmer_measure,
-)
-from cudf._lib.nvtext.tokenize import (
-    _count_tokens_column,
-    _count_tokens_scalar,
-    _tokenize_column,
-    _tokenize_scalar,
-    character_tokenize,
-    detokenize,
-    tokenize_with_vocabulary,
-)
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 2df154ee112..1b6152b81ca 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -2,7 +2,6 @@
 
 from __future__ import annotations
 
-import pickle
 import warnings
 from functools import cached_property
 from typing import TYPE_CHECKING, Any, Literal
@@ -330,13 +329,6 @@ def get_level_values(self, level):
         else:
             raise KeyError(f"Requested level with name {level} " "not found")
 
-    @classmethod
-    def deserialize(cls, header, frames):
-        # Dispatch deserialization to the appropriate index type in case
-        # deserialization is ever attempted with the base class directly.
-        idx_type = pickle.loads(header["type-serialized"])
-        return idx_type.deserialize(header, frames)
-
     @property
     def names(self):
         """
diff --git a/python/cudf/cudf/core/_internals/sorting.py b/python/cudf/cudf/core/_internals/sorting.py
new file mode 100644
index 00000000000..69f9e7664b1
--- /dev/null
+++ b/python/cudf/cudf/core/_internals/sorting.py
@@ -0,0 +1,205 @@
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
+
+import itertools
+from typing import TYPE_CHECKING, Literal
+
+import pylibcudf as plc
+
+from cudf._lib.column import Column
+from cudf.core.buffer import acquire_spill_lock
+
+if TYPE_CHECKING:
+    from collections.abc import Iterable
+
+    from cudf.core.column import ColumnBase
+
+
+@acquire_spill_lock()
+def is_sorted(
+    source_columns: list[ColumnBase],
+    ascending: list[bool] | None = None,
+    null_position: list[bool] | None = None,
+) -> bool:
+    """
+    Checks whether the rows of a `table` are sorted in lexicographical order.
+
+    Parameters
+    ----------
+    source_columns : list of columns
+        columns to be checked for sort order
+    ascending : None or list-like of booleans
+        None or list-like of boolean values indicating expected sort order of
+        each column. If list-like, size of list-like must be len(columns). If
+        None, all columns expected sort order is set to ascending. False (0) -
+        descending, True (1) - ascending.
+    null_position : None or list-like of booleans
+        None or list-like of boolean values indicating desired order of nulls
+        compared to other elements. If list-like, size of list-like must be
+        len(columns). If None, null order is set to before. False (0) - after,
+        True (1) - before.
+
+    Returns
+    -------
+    returns : boolean
+        Returns True, if sorted as expected by ``ascending`` and
+        ``null_position``, False otherwise.
+    """
+    if ascending is None:
+        column_order = [plc.types.Order.ASCENDING] * len(source_columns)
+    else:
+        if len(ascending) != len(source_columns):
+            raise ValueError(
+                f"Expected a list-like of length {len(source_columns)}, "
+                f"got length {len(ascending)} for `ascending`"
+            )
+        column_order = [
+            plc.types.Order.ASCENDING if asc else plc.types.Order.DESCENDING
+            for asc in ascending
+        ]
+
+    if null_position is None:
+        null_precedence = [plc.types.NullOrder.AFTER] * len(source_columns)
+    else:
+        if len(null_position) != len(source_columns):
+            raise ValueError(
+                f"Expected a list-like of length {len(source_columns)}, "
+                f"got length {len(null_position)} for `null_position`"
+            )
+        null_precedence = [
+            plc.types.NullOrder.BEFORE if null else plc.types.NullOrder.AFTER
+            for null in null_position
+        ]
+
+    return plc.sorting.is_sorted(
+        plc.Table([col.to_pylibcudf(mode="read") for col in source_columns]),
+        column_order,
+        null_precedence,
+    )
+
+
+def ordering(
+    column_order: list[bool],
+    null_precedence: Iterable[Literal["first", "last"]],
+) -> tuple[list[plc.types.Order], list[plc.types.NullOrder]]:
+    """
+    Construct order and null order vectors
+
+    Parameters
+    ----------
+    column_order
+        Iterable of bool (True for ascending order, False for descending)
+    null_precedence
+        Iterable string for null positions ("first" for start, "last" for end)
+
+    Both iterables must be the same length (not checked)
+
+    Returns
+    -------
+    pair of vectors (order, and null_order)
+    """
+    c_column_order = []
+    c_null_precedence = []
+    for asc, null in zip(column_order, null_precedence):
+        c_column_order.append(
+            plc.types.Order.ASCENDING if asc else plc.types.Order.DESCENDING
+        )
+        if asc ^ (null == "first"):
+            c_null_precedence.append(plc.types.NullOrder.AFTER)
+        elif asc ^ (null == "last"):
+            c_null_precedence.append(plc.types.NullOrder.BEFORE)
+        else:
+            raise ValueError(f"Invalid null precedence {null}")
+    return c_column_order, c_null_precedence
+
+
+@acquire_spill_lock()
+def order_by(
+    columns_from_table: list[ColumnBase],
+    ascending: list[bool],
+    na_position: Literal["first", "last"],
+    *,
+    stable: bool,
+):
+    """
+    Get index to sort the table in ascending/descending order.
+
+    Parameters
+    ----------
+    columns_from_table : list[Column]
+        Columns from the table which will be sorted
+    ascending : sequence[bool]
+         Sequence of boolean values which correspond to each column
+         in the table to be sorted signifying the order of each column
+         True - Ascending and False - Descending
+    na_position : str
+        Whether null values should show up at the "first" or "last"
+        position of **all** sorted column.
+    stable : bool
+        Should the sort be stable? (no default)
+
+    Returns
+    -------
+    Column of indices that sorts the table
+    """
+    order = ordering(ascending, itertools.repeat(na_position))
+    func = (
+        plc.sorting.stable_sorted_order if stable else plc.sorting.sorted_order
+    )
+    return Column.from_pylibcudf(
+        func(
+            plc.Table(
+                [col.to_pylibcudf(mode="read") for col in columns_from_table],
+            ),
+            order[0],
+            order[1],
+        )
+    )
+
+
+@acquire_spill_lock()
+def sort_by_key(
+    values: list[ColumnBase],
+    keys: list[ColumnBase],
+    ascending: list[bool],
+    na_position: list[Literal["first", "last"]],
+    *,
+    stable: bool,
+) -> list[ColumnBase]:
+    """
+    Sort a table by given keys
+
+    Parameters
+    ----------
+    values : list[Column]
+        Columns of the table which will be sorted
+    keys : list[Column]
+        Columns making up the sort key
+    ascending : list[bool]
+        Sequence of boolean values which correspond to each column
+        in the table to be sorted signifying the order of each column
+        True - Ascending and False - Descending
+    na_position : list[str]
+        Sequence of "first" or "last" values (default "first")
+        indicating the position of null values when sorting the keys.
+    stable : bool
+        Should the sort be stable? (no default)
+
+    Returns
+    -------
+    list[Column]
+        list of value columns sorted by keys
+    """
+    order = ordering(ascending, na_position)
+    func = (
+        plc.sorting.stable_sort_by_key if stable else plc.sorting.sort_by_key
+    )
+    return [
+        Column.from_pylibcudf(col)
+        for col in func(
+            plc.Table([col.to_pylibcudf(mode="read") for col in values]),
+            plc.Table([col.to_pylibcudf(mode="read") for col in keys]),
+            order[0],
+            order[1],
+        ).columns()
+    ]
diff --git a/python/cudf/cudf/core/abc.py b/python/cudf/cudf/core/abc.py
index ce6bb83bc77..c8ea03b04fe 100644
--- a/python/cudf/cudf/core/abc.py
+++ b/python/cudf/cudf/core/abc.py
@@ -1,8 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 """Common abstract base classes for cudf."""
 
-import pickle
-
 import numpy
 
 import cudf
@@ -22,6 +20,14 @@ class Serializable:
     latter converts back from that representation into an equivalent object.
     """
 
+    # A mapping from class names to the classes themselves. This is used to
+    # reconstruct the correct class when deserializing an object.
+    _name_type_map: dict = {}
+
+    def __init_subclass__(cls, /, **kwargs):
+        super().__init_subclass__(**kwargs)
+        cls._name_type_map[cls.__name__] = cls
+
     def serialize(self):
         """Generate an equivalent serializable representation of an object.
 
@@ -98,7 +104,7 @@ def device_serialize(self):
             )
             for f in frames
         )
-        header["type-serialized"] = pickle.dumps(type(self))
+        header["type-serialized-name"] = type(self).__name__
         header["is-cuda"] = [
             hasattr(f, "__cuda_array_interface__") for f in frames
         ]
@@ -128,10 +134,10 @@ def device_deserialize(cls, header, frames):
 
         :meta private:
         """
-        typ = pickle.loads(header["type-serialized"])
+        typ = cls._name_type_map[header["type-serialized-name"]]
         frames = [
             cudf.core.buffer.as_buffer(f) if c else memoryview(f)
-            for c, f in zip(header["is-cuda"], frames)
+            for c, f in zip(header["is-cuda"], frames, strict=True)
         ]
         return typ.deserialize(header, frames)
 
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
index ffa306bf93f..625938ca168 100644
--- a/python/cudf/cudf/core/buffer/buffer.py
+++ b/python/cudf/cudf/core/buffer/buffer.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import math
-import pickle
 import weakref
 from types import SimpleNamespace
 from typing import TYPE_CHECKING, Any, Literal
@@ -432,8 +431,7 @@ def serialize(self) -> tuple[dict, list]:
             second element is a list containing single frame.
         """
         header: dict[str, Any] = {}
-        header["type-serialized"] = pickle.dumps(type(self))
-        header["owner-type-serialized"] = pickle.dumps(type(self._owner))
+        header["owner-type-serialized-name"] = type(self._owner).__name__
         header["frame_count"] = 1
         frames = [self]
         return header, frames
@@ -460,7 +458,9 @@ def deserialize(cls, header: dict, frames: list) -> Self:
         if isinstance(frame, cls):
             return frame  # The frame is already deserialized
 
-        owner_type: BufferOwner = pickle.loads(header["owner-type-serialized"])
+        owner_type: BufferOwner = Serializable._name_type_map[
+            header["owner-type-serialized-name"]
+        ]
         if hasattr(frame, "__cuda_array_interface__"):
             owner = owner_type.from_device_memory(frame, exposed=False)
         else:
diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
index 7305ff651c6..cbb65229933 100644
--- a/python/cudf/cudf/core/buffer/spillable_buffer.py
+++ b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import collections.abc
-import pickle
 import time
 import weakref
 from threading import RLock
@@ -415,8 +414,7 @@ def serialize(self) -> tuple[dict, list]:
         header: dict[str, Any] = {}
         frames: list[Buffer | memoryview]
         with self._owner.lock:
-            header["type-serialized"] = pickle.dumps(self.__class__)
-            header["owner-type-serialized"] = pickle.dumps(type(self._owner))
+            header["owner-type-serialized-name"] = type(self._owner).__name__
             header["frame_count"] = 1
             if self.is_spilled:
                 frames = [self.memoryview()]
diff --git a/python/cudf/cudf/core/byte_pair_encoding.py b/python/cudf/cudf/core/byte_pair_encoding.py
index 8d38a5f2272..b49f5154697 100644
--- a/python/cudf/cudf/core/byte_pair_encoding.py
+++ b/python/cudf/cudf/core/byte_pair_encoding.py
@@ -5,9 +5,6 @@
 import pylibcudf as plc
 
 import cudf
-from cudf._lib.nvtext.byte_pair_encode import (
-    byte_pair_encoding as cpp_byte_pair_encoding,
-)
 
 
 class BytePairEncoder:
@@ -25,12 +22,12 @@ class BytePairEncoder:
     BytePairEncoder
     """
 
-    def __init__(self, merges_pair: "cudf.Series"):
+    def __init__(self, merges_pair: cudf.Series) -> None:
         self.merge_pairs = plc.nvtext.byte_pair_encode.BPEMergePairs(
             merges_pair._column.to_pylibcudf(mode="read")
         )
 
-    def __call__(self, text, separator: str = " ") -> cudf.Series:
+    def __call__(self, text: cudf.Series, separator: str = " ") -> cudf.Series:
         """
 
         Parameters
@@ -57,6 +54,6 @@ def __call__(self, text, separator: str = " ") -> cudf.Series:
         dtype: object
         """
         sep = cudf.Scalar(separator, dtype="str")
-        result = cpp_byte_pair_encoding(text._column, self.merge_pairs, sep)
-
-        return cudf.Series._from_column(result)
+        return cudf.Series._from_column(
+            text._column.byte_pair_encoding(self.merge_pairs, sep)
+        )
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 71ec11e75af..a0cf38c6f51 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1193,7 +1193,7 @@ def _concat(
                 f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
             )
         elif newsize == 0:
-            codes_col = column.column_empty(0, head.codes.dtype, masked=True)
+            codes_col = column.column_empty(0, head.codes.dtype)
         else:
             codes_col = column.concat_columns(codes)  # type: ignore[arg-type]
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index b317858077f..cc07af0f669 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-import pickle
+import warnings
 from collections import abc
 from collections.abc import MutableSequence, Sequence
 from functools import cached_property
@@ -32,7 +32,7 @@
     drop_duplicates,
     drop_nulls,
 )
-from cudf._lib.types import size_type_dtype
+from cudf._lib.types import dtype_to_pylibcudf_type, size_type_dtype
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     _is_pandas_nullable_extension_dtype,
@@ -42,7 +42,7 @@
     is_string_dtype,
 )
 from cudf.core._compat import PANDAS_GE_210
-from cudf.core._internals import unary
+from cudf.core._internals import aggregation, sorting, unary
 from cudf.core._internals.timezones import get_compatible_timezone
 from cudf.core.abc import Serializable
 from cudf.core.buffer import (
@@ -260,21 +260,17 @@ def all(self, skipna: bool = True) -> bool:
         # The skipna argument is only used for numerical columns.
         # If all entries are null the result is True, including when the column
         # is empty.
-
         if self.null_count == self.size:
             return True
-
-        return libcudf.reduce.reduce("all", self)
+        return self.reduce("all")
 
     def any(self, skipna: bool = True) -> bool:
         # Early exit for fast cases.
-
         if not skipna and self.has_nulls():
             return True
         elif skipna and self.null_count == self.size:
             return False
-
-        return libcudf.reduce.reduce("any", self)
+        return self.reduce("any")
 
     def dropna(self) -> Self:
         if self.has_nulls():
@@ -555,7 +551,7 @@ def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
         if stop < 0 and not (stride < 0 and stop == -1):
             stop = stop + len(self)
         if (stride > 0 and start >= stop) or (stride < 0 and start <= stop):
-            return cast(Self, column_empty(0, self.dtype, masked=True))
+            return cast(Self, column_empty(0, self.dtype))
         # compute mask slice
         if stride == 1:
             return libcudf.copying.column_slice(self, [start, stop])[
@@ -1000,13 +996,13 @@ def is_unique(self) -> bool:
 
     @cached_property
     def is_monotonic_increasing(self) -> bool:
-        return not self.has_nulls(include_nan=True) and libcudf.sort.is_sorted(
+        return not self.has_nulls(include_nan=True) and sorting.is_sorted(
             [self], [True], None
         )
 
     @cached_property
     def is_monotonic_decreasing(self) -> bool:
-        return not self.has_nulls(include_nan=True) and libcudf.sort.is_sorted(
+        return not self.has_nulls(include_nan=True) and sorting.is_sorted(
             [self], [False], None
         )
 
@@ -1030,15 +1026,20 @@ def contains(self, other: ColumnBase) -> ColumnBase:
     def sort_values(
         self: Self,
         ascending: bool = True,
-        na_position: str = "last",
+        na_position: Literal["first", "last"] = "last",
     ) -> Self:
         if (not ascending and self.is_monotonic_decreasing) or (
             ascending and self.is_monotonic_increasing
         ):
             return self.copy()
-        return libcudf.sort.sort(
-            [self], column_order=[ascending], null_precedence=[na_position]
-        )[0]
+        order = sorting.ordering([ascending], [na_position])
+        with acquire_spill_lock():
+            plc_table = plc.sorting.sort(
+                plc.Table([self.to_pylibcudf(mode="read")]),
+                order[0],
+                order[1],
+            )
+            return type(self).from_pylibcudf(plc_table.columns()[0])  # type: ignore[return-value]
 
     def distinct_count(self, dropna: bool = True) -> int:
         try:
@@ -1058,7 +1059,7 @@ def astype(self, dtype: Dtype, copy: bool = False) -> ColumnBase:
             if self.dtype == dtype:
                 result = self
             else:
-                result = column_empty(0, dtype=dtype, masked=self.nullable)
+                result = column_empty(0, dtype=dtype)
         elif dtype == "category":
             # TODO: Figure out why `cudf.dtype("category")`
             # astype's different than just the string
@@ -1208,7 +1209,7 @@ def argsort(
                 as_column(range(len(self) - 1, -1, -1)),
             )
         else:
-            return libcudf.sort.order_by(
+            return sorting.order_by(
                 [self], [ascending], na_position, stable=True
             )
 
@@ -1294,28 +1295,27 @@ def serialize(self) -> tuple[dict, list]:
 
         header: dict[Any, Any] = {}
         frames = []
-        header["type-serialized"] = pickle.dumps(type(self))
         try:
-            dtype, dtype_frames = self.dtype.serialize()
+            dtype, dtype_frames = self.dtype.device_serialize()
             header["dtype"] = dtype
             frames.extend(dtype_frames)
             header["dtype-is-cudf-serialized"] = True
         except AttributeError:
-            header["dtype"] = pickle.dumps(self.dtype)
+            header["dtype"] = self.dtype.str
             header["dtype-is-cudf-serialized"] = False
 
         if self.data is not None:
-            data_header, data_frames = self.data.serialize()
+            data_header, data_frames = self.data.device_serialize()
             header["data"] = data_header
             frames.extend(data_frames)
 
         if self.mask is not None:
-            mask_header, mask_frames = self.mask.serialize()
+            mask_header, mask_frames = self.mask.device_serialize()
             header["mask"] = mask_header
             frames.extend(mask_frames)
         if self.children:
             child_headers, child_frames = zip(
-                *(c.serialize() for c in self.children)
+                *(c.device_serialize() for c in self.children)
             )
             header["subheaders"] = list(child_headers)
             frames.extend(chain(*child_frames))
@@ -1327,8 +1327,7 @@ def serialize(self) -> tuple[dict, list]:
     def deserialize(cls, header: dict, frames: list) -> ColumnBase:
         def unpack(header, frames) -> tuple[Any, list]:
             count = header["frame_count"]
-            klass = pickle.loads(header["type-serialized"])
-            obj = klass.deserialize(header, frames[:count])
+            obj = cls.device_deserialize(header, frames[:count])
             return obj, frames[count:]
 
         assert header["frame_count"] == len(frames), (
@@ -1338,7 +1337,7 @@ def unpack(header, frames) -> tuple[Any, list]:
         if header["dtype-is-cudf-serialized"]:
             dtype, frames = unpack(header["dtype"], frames)
         else:
-            dtype = pickle.loads(header["dtype"])
+            dtype = np.dtype(header["dtype"])
         if "data" in header:
             data, frames = unpack(header["data"], frames)
         else:
@@ -1396,33 +1395,35 @@ def _reduce(
         )
         if isinstance(preprocessed, ColumnBase):
             dtype = kwargs.pop("dtype", None)
-            return libcudf.reduce.reduce(
-                op, preprocessed, dtype=dtype, **kwargs
-            )
+            return preprocessed.reduce(op, dtype, **kwargs)
         return preprocessed
 
+    def _can_return_nan(self, skipna: bool | None = None) -> bool:
+        return not skipna and self.has_nulls(include_nan=False)
+
     def _process_for_reduction(
         self, skipna: bool | None = None, min_count: int = 0
     ) -> ColumnBase | ScalarLike:
-        if skipna is None:
-            skipna = True
+        skipna = True if skipna is None else skipna
 
-        if self.has_nulls():
+        if self._can_return_nan(skipna=skipna):
+            return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
+
+        col = self.nans_to_nulls() if skipna else self
+        if col.has_nulls():
             if skipna:
-                result_col = self.dropna()
+                col = col.dropna()
             else:
                 return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
 
-        result_col = self
-
         # TODO: If and when pandas decides to validate that `min_count` >= 0 we
         # should insert comparable behavior.
         # https://github.com/pandas-dev/pandas/issues/50022
         if min_count > 0:
-            valid_count = len(result_col) - result_col.null_count
+            valid_count = len(col) - col.null_count
             if valid_count < min_count:
                 return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
-        return result_col
+        return col
 
     def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
         """
@@ -1515,7 +1516,7 @@ def _return_sentinel_column():
         del right_rows
         # reorder `codes` so that its values correspond to the
         # values of `self`:
-        (codes,) = libcudf.sort.sort_by_key(
+        (codes,) = sorting.sort_by_key(
             codes, [left_gather_map], [True], ["last"], stable=True
         )
         return codes.fillna(na_sentinel.value)
@@ -1532,6 +1533,91 @@ def one_hot_encode(
             for col in plc_table.columns()
         )
 
+    @acquire_spill_lock()
+    def scan(self, scan_op: str, inclusive: bool, **kwargs) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.reduce.scan(
+                self.to_pylibcudf(mode="read"),
+                aggregation.make_aggregation(scan_op, kwargs).c_obj,
+                plc.reduce.ScanType.INCLUSIVE
+                if inclusive
+                else plc.reduce.ScanType.EXCLUSIVE,
+            )
+        )
+
+    def reduce(self, reduction_op: str, dtype=None, **kwargs) -> ScalarLike:
+        if dtype is not None:
+            warnings.warn(
+                "dtype is deprecated and will be remove in a future release. "
+                "Cast the result (e.g. .astype) after the operation instead.",
+                FutureWarning,
+            )
+            col_dtype = dtype
+        else:
+            col_dtype = self._reduction_result_dtype(reduction_op)
+
+        # check empty case
+        if len(self) <= self.null_count:
+            if reduction_op == "sum" or reduction_op == "sum_of_squares":
+                return self.dtype.type(0)
+            if reduction_op == "product":
+                return self.dtype.type(1)
+            if reduction_op == "any":
+                return False
+
+            return cudf.utils.dtypes._get_nan_for_dtype(col_dtype)
+
+        with acquire_spill_lock():
+            plc_scalar = plc.reduce.reduce(
+                self.to_pylibcudf(mode="read"),
+                aggregation.make_aggregation(reduction_op, kwargs).c_obj,
+                dtype_to_pylibcudf_type(col_dtype),
+            )
+            result_col = type(self).from_pylibcudf(
+                plc.Column.from_scalar(plc_scalar, 1)
+            )
+            if plc_scalar.type().id() in {
+                plc.TypeId.DECIMAL128,
+                plc.TypeId.DECIMAL64,
+                plc.TypeId.DECIMAL32,
+            }:
+                scale = -plc_scalar.type().scale()
+                # https://docs.microsoft.com/en-us/sql/t-sql/data-types/precision-scale-and-length-transact-sql
+                p = col_dtype.precision
+                nrows = len(self)
+                if reduction_op in {"min", "max"}:
+                    new_p = p
+                elif reduction_op == "sum":
+                    new_p = p + nrows - 1
+                elif reduction_op == "product":
+                    new_p = p * nrows + nrows - 1
+                elif reduction_op == "sum_of_squares":
+                    new_p = 2 * p + nrows
+                else:
+                    raise NotImplementedError(
+                        f"{reduction_op} not implemented for decimal types."
+                    )
+                precision = max(min(new_p, col_dtype.MAX_PRECISION), 0)
+                new_dtype = type(col_dtype)(precision, scale)
+                result_col = result_col.astype(new_dtype)
+            elif isinstance(col_dtype, cudf.IntervalDtype):
+                result_col = type(self).from_struct_column(  # type: ignore[attr-defined]
+                    result_col, closed=col_dtype.closed
+                )
+        return result_col.element_indexing(0)
+
+    @acquire_spill_lock()
+    def minmax(self) -> tuple[ScalarLike, ScalarLike]:
+        min_val, max_val = plc.reduce.minmax(self.to_pylibcudf(mode="read"))
+        return (
+            type(self)
+            .from_pylibcudf(plc.Column.from_scalar(min_val, 1))
+            .element_indexing(0),
+            type(self)
+            .from_pylibcudf(plc.Column.from_scalar(max_val, 1))
+            .element_indexing(0),
+        )
+
 
 def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool:
     """Check if an object dtype Series or array contains NaN."""
@@ -1544,7 +1630,6 @@ def _has_any_nan(arbitrary: pd.Series | np.ndarray) -> bool:
 def column_empty(
     row_count: int,
     dtype: Dtype = "object",
-    masked: bool = False,
     for_numba: bool = False,
 ) -> ColumnBase:
     """
@@ -1561,9 +1646,6 @@ def column_empty(
     dtype : Dtype
         Type of the column.
 
-    masked : bool
-        Unused.
-
     for_numba : bool, default False
         If True, don't allocate a mask as it's not supported by numba.
     """
@@ -2307,7 +2389,9 @@ def serialize_columns(columns: list[ColumnBase]) -> tuple[list[dict], list]:
     frames = []
 
     if len(columns) > 0:
-        header_columns = [c.serialize() for c in columns]
+        header_columns: list[tuple[dict, list]] = [
+            c.device_serialize() for c in columns
+        ]
         headers, column_frames = zip(*header_columns)
         for f in column_frames:
             frames.extend(f)
@@ -2324,7 +2408,7 @@ def deserialize_columns(headers: list[dict], frames: list) -> list[ColumnBase]:
 
     for meta in headers:
         col_frame_count = meta["frame_count"]
-        col_typ = pickle.loads(meta["type-serialized"])
+        col_typ = Serializable._name_type_map[meta["type-serialized-name"]]
         colobj = col_typ.deserialize(meta, frames[:col_frame_count])
         columns.append(colobj)
         # Advance frames
@@ -2337,7 +2421,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
     """Concatenate a sequence of columns."""
     if len(objs) == 0:
         dtype = cudf.dtype(None)
-        return column_empty(0, dtype=dtype, masked=True)
+        return column_empty(0, dtype=dtype)
 
     # If all columns are `NumericalColumn` with different dtypes,
     # we cast them to a common dtype.
@@ -2384,7 +2468,7 @@ def concat_columns(objs: "MutableSequence[ColumnBase]") -> ColumnBase:
             f"size > {libcudf.MAX_COLUMN_SIZE_STR}"
         )
     elif newsize == 0:
-        return column_empty(0, head.dtype, masked=True)
+        return column_empty(0, head.dtype)
 
     # Filter out inputs that have 0 length, then concatenate.
     objs_with_len = [o for o in objs if len(o)]
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index b526a6efa51..81b82040b8d 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -598,14 +598,12 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn:
         if len(self) == 0:
             return cast(
                 cudf.core.column.StringColumn,
-                column.column_empty(0, dtype="object", masked=False),
+                column.column_empty(0, dtype="object"),
             )
         if format in _DATETIME_SPECIAL_FORMATS:
             names = as_column(_DATETIME_NAMES)
         else:
-            names = cudf.core.column.column_empty(
-                0, dtype="object", masked=False
-            )
+            names = column.column_empty(0, dtype="object")
         return string._datetime_to_str_typecast_functions[self.dtype](
             self, format, names
         )
diff --git a/python/cudf/cudf/core/column/interval.py b/python/cudf/cudf/core/column/interval.py
index 34975fc94f4..dd8f58a118e 100644
--- a/python/cudf/cudf/core/column/interval.py
+++ b/python/cudf/cudf/core/column/interval.py
@@ -14,7 +14,6 @@
 if TYPE_CHECKING:
     from typing_extensions import Self
 
-    from cudf._typing import ScalarLike
     from cudf.core.buffer import Buffer
     from cudf.core.column import ColumnBase
 
@@ -211,16 +210,3 @@ def element_indexing(self, index: int):
         if cudf.get_option("mode.pandas_compatible"):
             return pd.Interval(**result, closed=self.dtype.closed)
         return result
-
-    def _reduce(
-        self,
-        op: str,
-        skipna: bool | None = None,
-        min_count: int = 0,
-        *args,
-        **kwargs,
-    ) -> ScalarLike:
-        result = super()._reduce(op, skipna, min_count, *args, **kwargs)
-        if cudf.get_option("mode.pandas_compatible"):
-            return pd.Interval(**result, closed=self.dtype.closed)
-        return result
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 790cd6ea9bb..f099cef3331 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -420,22 +420,12 @@ def all(self, skipna: bool = True) -> bool:
         # If all entries are null the result is True, including when the column
         # is empty.
         result_col = self.nans_to_nulls() if skipna else self
-
-        if result_col.null_count == result_col.size:
-            return True
-
-        return libcudf.reduce.reduce("all", result_col)
+        return super(type(self), result_col).all(skipna=skipna)
 
     def any(self, skipna: bool = True) -> bool:
         # Early exit for fast cases.
         result_col = self.nans_to_nulls() if skipna else self
-
-        if not skipna and result_col.has_nulls():
-            return True
-        elif skipna and result_col.null_count == result_col.size:
-            return False
-
-        return libcudf.reduce.reduce("any", result_col)
+        return super(type(self), result_col).any(skipna=skipna)
 
     @functools.cached_property
     def nan_count(self) -> int:
@@ -483,19 +473,6 @@ def _process_values_for_isin(
     def _can_return_nan(self, skipna: bool | None = None) -> bool:
         return not skipna and self.has_nulls(include_nan=True)
 
-    def _process_for_reduction(
-        self, skipna: bool | None = None, min_count: int = 0
-    ) -> NumericalColumn | ScalarLike:
-        skipna = True if skipna is None else skipna
-
-        if self._can_return_nan(skipna=skipna):
-            return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
-
-        col = self.nans_to_nulls() if skipna else self
-        return super(NumericalColumn, col)._process_for_reduction(
-            skipna=skipna, min_count=min_count
-        )
-
     def find_and_replace(
         self,
         to_replace: ColumnLike,
@@ -741,6 +718,40 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
 
         return super()._reduction_result_dtype(reduction_op)
 
+    @acquire_spill_lock()
+    def digitize(self, bins: np.ndarray, right: bool = False) -> Self:
+        """Return the indices of the bins to which each value in column belongs.
+
+        Parameters
+        ----------
+        bins : np.ndarray
+            1-D column-like object of bins with same type as `column`, should be
+            monotonically increasing.
+        right : bool
+            Indicates whether interval contains the right or left bin edge.
+
+        Returns
+        -------
+        A column containing the indices
+        """
+        if self.dtype != bins.dtype:
+            raise ValueError(
+                "digitize() expects bins and input column have the same dtype."
+            )
+
+        bin_col = as_column(bins, dtype=bins.dtype)
+        if bin_col.nullable:
+            raise ValueError("`bins` cannot contain null entries.")
+
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            getattr(plc.search, "lower_bound" if right else "upper_bound")(
+                plc.Table([bin_col.to_pylibcudf(mode="read")]),
+                plc.Table([self.to_pylibcudf(mode="read")]),
+                [plc.types.Order.ASCENDING],
+                [plc.types.NullOrder.BEFORE],
+            )
+        )
+
 
 def _normalize_find_and_replace_input(
     input_column_dtype: DtypeObj, col_to_normalize: ColumnBase | list
@@ -795,34 +806,3 @@ def _normalize_find_and_replace_input(
     if not normalized_column.can_cast_safely(input_column_dtype):
         return normalized_column
     return normalized_column.astype(input_column_dtype)
-
-
-def digitize(
-    column: ColumnBase, bins: np.ndarray, right: bool = False
-) -> ColumnBase:
-    """Return the indices of the bins to which each value in column belongs.
-
-    Parameters
-    ----------
-    column : Column
-        Input column.
-    bins : Column-like
-        1-D column-like object of bins with same type as `column`, should be
-        monotonically increasing.
-    right : bool
-        Indicates whether interval contains the right or left bin edge.
-
-    Returns
-    -------
-    A column containing the indices
-    """
-    if not column.dtype == bins.dtype:
-        raise ValueError(
-            "Digitize() expects bins and input column have the same dtype."
-        )
-
-    bin_col = as_column(bins, dtype=bins.dtype)
-    if bin_col.nullable:
-        raise ValueError("`bins` cannot contain null entries.")
-
-    return as_column(libcudf.sort.digitize([column], [bin_col], right))
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 3f9abdabc2f..aaf2239a71e 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -10,7 +10,7 @@
 import pylibcudf as plc
 
 import cudf
-from cudf import _lib as libcudf
+from cudf.core._internals import sorting
 from cudf.core.buffer import Buffer, acquire_spill_lock
 from cudf.core.column.column import ColumnBase
 from cudf.core.missing import NA
@@ -139,12 +139,12 @@ def quantile(
             result = cast(
                 NumericalBaseColumn,
                 cudf.core.column.column_empty(
-                    row_count=len(q), dtype=self.dtype, masked=True
+                    row_count=len(q), dtype=self.dtype
                 ),
             )
         else:
             # get sorted indices and exclude nulls
-            indices = libcudf.sort.order_by(
+            indices = sorting.order_by(
                 [self], [True], "first", stable=True
             ).slice(self.null_count, len(self))
             with acquire_spill_lock():
@@ -263,6 +263,6 @@ def round(
             )
 
     def _scan(self, op: str) -> ColumnBase:
-        return libcudf.reduce.scan(
-            op.replace("cum", ""), self, True
-        )._with_type_metadata(self.dtype)
+        return self.scan(op.replace("cum", ""), True)._with_type_metadata(
+            self.dtype
+        )
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 06196717ce3..d76caa5c3b8 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -20,7 +20,7 @@
 import cudf.core.column.column as column
 import cudf.core.column.datetime as datetime
 from cudf import _lib as libcudf
-from cudf._lib import string_casting as str_cast, strings as libstrings
+from cudf._lib import string_casting as str_cast
 from cudf._lib.column import Column
 from cudf._lib.types import size_type_dtype
 from cudf.api.types import is_integer, is_scalar, is_string_dtype
@@ -45,6 +45,7 @@
         SeriesOrIndex,
     )
     from cudf.core.buffer import Buffer
+    from cudf.core.column.lists import ListColumn
     from cudf.core.column.numerical import NumericalColumn
 
 
@@ -624,7 +625,7 @@ def join(
 
     def _split_by_character(self):
         col = self._column.fillna("")  # sanitize nulls
-        result_col = libstrings.character_tokenize(col)
+        result_col = col.character_tokenize()
 
         offset_col = col.children[0]
 
@@ -4693,9 +4694,7 @@ def normalize_spaces(self) -> SeriesOrIndex:
         1    test string
         dtype: object
         """
-        return self._return_or_inplace(
-            libstrings.normalize_spaces(self._column)
-        )
+        return self._return_or_inplace(self._column.normalize_spaces())
 
     def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
         r"""
@@ -4743,7 +4742,7 @@ def normalize_characters(self, do_lower: bool = True) -> SeriesOrIndex:
         dtype: object
         """
         return self._return_or_inplace(
-            libstrings.normalize_characters(self._column, do_lower)
+            self._column.normalize_characters(do_lower)
         )
 
     def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
@@ -4775,16 +4774,16 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
         2    goodbye
         dtype: object
         """
-        delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
+        delim = _massage_string_arg(delimiter, "delimiter", allow_col=True)
 
-        if isinstance(delimiter, Column):
+        if isinstance(delim, Column):
             result = self._return_or_inplace(
-                libstrings._tokenize_column(self._column, delimiter),
+                self._column.tokenize_column(delim),
                 retain_index=False,
             )
-        elif isinstance(delimiter, cudf.Scalar):
+        elif isinstance(delim, cudf.Scalar):
             result = self._return_or_inplace(
-                libstrings._tokenize_scalar(self._column, delimiter),
+                self._column.tokenize_scalar(delim),
                 retain_index=False,
             )
         else:
@@ -4799,7 +4798,7 @@ def tokenize(self, delimiter: str = " ") -> SeriesOrIndex:
         return result
 
     def detokenize(
-        self, indices: "cudf.Series", separator: str = " "
+        self, indices: cudf.Series, separator: str = " "
     ) -> SeriesOrIndex:
         """
         Combines tokens into strings by concatenating them in the order
@@ -4829,9 +4828,9 @@ def detokenize(
         2          three
         dtype: object
         """
-        separator = _massage_string_arg(separator, "separator")
+        sep = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.detokenize(self._column, indices._column, separator),
+            self._column.detokenize(indices._column, sep),  # type: ignore[arg-type]
             retain_index=False,
         )
 
@@ -4882,17 +4881,15 @@ def character_tokenize(self) -> SeriesOrIndex:
         2    .
         dtype: object
         """
-        result_col = libstrings.character_tokenize(self._column)
+        result_col = self._column.character_tokenize()
         if isinstance(self._parent, cudf.Series):
             lengths = self.len().fillna(0)
             index = self._parent.index.repeat(lengths)
-            return cudf.Series._from_column(
+            return type(self._parent)._from_column(
                 result_col, name=self._parent.name, index=index
             )
-        elif isinstance(self._parent, cudf.BaseIndex):
-            return cudf.Index._from_column(result_col, name=self._parent.name)
         else:
-            return result_col
+            return self._return_or_inplace(result_col)
 
     def token_count(self, delimiter: str = " ") -> SeriesOrIndex:
         """
@@ -4919,15 +4916,15 @@ def token_count(self, delimiter: str = " ") -> SeriesOrIndex:
         2    0
         dtype: int32
         """
-        delimiter = _massage_string_arg(delimiter, "delimiter", allow_col=True)
-        if isinstance(delimiter, Column):
+        delim = _massage_string_arg(delimiter, "delimiter", allow_col=True)
+        if isinstance(delim, Column):
             return self._return_or_inplace(
-                libstrings._count_tokens_column(self._column, delimiter)
+                self._column.count_tokens_column(delim)
             )
 
-        elif isinstance(delimiter, cudf.Scalar):
+        elif isinstance(delim, cudf.Scalar):
             return self._return_or_inplace(
-                libstrings._count_tokens_scalar(self._column, delimiter)
+                self._column.count_tokens_scalar(delim)  # type: ignore[arg-type]
             )
         else:
             raise TypeError(
@@ -4966,9 +4963,9 @@ def ngrams(self, n: int = 2, separator: str = "_") -> SeriesOrIndex:
         2    xyz_hhh
         dtype: object
         """
-        separator = _massage_string_arg(separator, "separator")
+        sep = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.generate_ngrams(self._column, n, separator),
+            self._column.generate_ngrams(n, sep),  # type: ignore[arg-type]
             retain_index=False,
         )
 
@@ -5015,7 +5012,7 @@ def character_ngrams(
         dtype: list
         """
         result = self._return_or_inplace(
-            libstrings.generate_character_ngrams(self._column, n),
+            self._column.generate_character_ngrams(n),
             retain_index=True,
         )
         if isinstance(result, cudf.Series) and not as_list:
@@ -5060,7 +5057,7 @@ def hash_character_ngrams(
         """
 
         result = self._return_or_inplace(
-            libstrings.hash_character_ngrams(self._column, n),
+            self._column.hash_character_ngrams(n),
             retain_index=True,
         )
         if isinstance(result, cudf.Series) and not as_list:
@@ -5098,10 +5095,10 @@ def ngrams_tokenize(
         2    best_book
         dtype: object
         """
-        delimiter = _massage_string_arg(delimiter, "delimiter")
-        separator = _massage_string_arg(separator, "separator")
+        delim = _massage_string_arg(delimiter, "delimiter")
+        sep = _massage_string_arg(separator, "separator")
         return self._return_or_inplace(
-            libstrings.ngrams_tokenize(self._column, n, delimiter, separator),
+            self._column.ngrams_tokenize(n, delim, sep),  # type: ignore[arg-type]
             retain_index=False,
         )
 
@@ -5180,10 +5177,9 @@ def replace_tokens(
             )
 
         return self._return_or_inplace(
-            libstrings.replace_tokens(
-                self._column,
-                targets_column,
-                replacements_column,
+            self._column.replace_tokens(
+                targets_column,  # type: ignore[arg-type]
+                replacements_column,  # type: ignore[arg-type]
                 cudf.Scalar(delimiter, dtype="str"),
             ),
         )
@@ -5251,8 +5247,7 @@ def filter_tokens(
             )
 
         return self._return_or_inplace(
-            libstrings.filter_tokens(
-                self._column,
+            self._column.filter_tokens(
                 min_token_length,
                 cudf.Scalar(replacement, dtype="str"),
                 cudf.Scalar(delimiter, dtype="str"),
@@ -5278,9 +5273,7 @@ def porter_stemmer_measure(self) -> SeriesOrIndex:
         1    2
         dtype: int32
         """
-        return self._return_or_inplace(
-            libstrings.porter_stemmer_measure(self._column)
-        )
+        return self._return_or_inplace(self._column.porter_stemmer_measure())
 
     def is_consonant(self, position) -> SeriesOrIndex:
         """
@@ -5313,17 +5306,10 @@ def is_consonant(self, position) -> SeriesOrIndex:
         1    False
         dtype: bool
         """
-        ltype = libstrings.LetterType.CONSONANT
-
         if can_convert_to_column(position):
-            return self._return_or_inplace(
-                libstrings.is_letter_multi(
-                    self._column, ltype, column.as_column(position)
-                ),
-            )
-
+            position = column.as_column(position)
         return self._return_or_inplace(
-            libstrings.is_letter(self._column, ltype, position)
+            self._column.is_letter(False, position)  # type: ignore[arg-type]
         )
 
     def is_vowel(self, position) -> SeriesOrIndex:
@@ -5357,17 +5343,10 @@ def is_vowel(self, position) -> SeriesOrIndex:
         1     True
         dtype: bool
         """
-        ltype = libstrings.LetterType.VOWEL
-
         if can_convert_to_column(position):
-            return self._return_or_inplace(
-                libstrings.is_letter_multi(
-                    self._column, ltype, column.as_column(position)
-                ),
-            )
-
+            position = column.as_column(position)
         return self._return_or_inplace(
-            libstrings.is_letter(self._column, ltype, position)
+            self._column.is_letter(True, position)  # type: ignore[arg-type]
         )
 
     def edit_distance(self, targets) -> SeriesOrIndex:
@@ -5416,7 +5395,7 @@ def edit_distance(self, targets) -> SeriesOrIndex:
             )
 
         return self._return_or_inplace(
-            libstrings.edit_distance(self._column, targets_column)
+            self._column.edit_distance(targets_column)  # type: ignore[arg-type]
         )
 
     def edit_distance_matrix(self) -> SeriesOrIndex:
@@ -5456,9 +5435,7 @@ def edit_distance_matrix(self) -> SeriesOrIndex:
                 "Cannot compute edit distance between null strings. "
                 "Consider removing them using `dropna` or fill with `fillna`."
             )
-        return self._return_or_inplace(
-            libstrings.edit_distance_matrix(self._column)
-        )
+        return self._return_or_inplace(self._column.edit_distance_matrix())
 
     def minhash(
         self, seed: np.uint32, a: ColumnLike, b: ColumnLike, width: int
@@ -5508,7 +5485,7 @@ def minhash(
                 f"Expecting a Series with dtype uint32, got {type(b)}"
             )
         return self._return_or_inplace(
-            libstrings.minhash(self._column, seed, a_column, b_column, width)
+            self._column.minhash(seed, a_column, b_column, width)  # type: ignore[arg-type]
         )
 
     def minhash64(
@@ -5559,7 +5536,7 @@ def minhash64(
                 f"Expecting a Series with dtype uint64, got {type(b)}"
             )
         return self._return_or_inplace(
-            libstrings.minhash64(self._column, seed, a_column, b_column, width)
+            self._column.minhash64(seed, a_column, b_column, width)  # type: ignore[arg-type]
         )
 
     def jaccard_index(self, input: cudf.Series, width: int) -> SeriesOrIndex:
@@ -5585,13 +5562,14 @@ def jaccard_index(self, input: cudf.Series, width: int) -> SeriesOrIndex:
         1    0.307692
         dtype: float32
         """
-
         return self._return_or_inplace(
-            libstrings.jaccard_index(self._column, input._column, width),
+            self._column.jaccard_index(input._column, width)
         )
 
 
-def _massage_string_arg(value, name, allow_col=False):
+def _massage_string_arg(
+    value, name, allow_col: bool = False
+) -> StringColumn | cudf.Scalar:
     if isinstance(value, cudf.Scalar):
         return value
 
@@ -5602,9 +5580,9 @@ def _massage_string_arg(value, name, allow_col=False):
 
     if allow_col:
         if isinstance(value, list):
-            return column.as_column(value, dtype="str")
+            return column.as_column(value, dtype="str")  # type: ignore[return-value]
 
-        if isinstance(value, Column) and is_string_dtype(value.dtype):
+        if isinstance(value, StringColumn):
             return value
 
         allowed_types.append("Column")
@@ -5877,7 +5855,7 @@ def strptime(
                 f"dtype must be datetime or timedelta type, not {dtype}"
             )
         elif self.null_count == len(self):
-            return column.column_empty(len(self), dtype=dtype, masked=True)  # type: ignore[return-value]
+            return column.column_empty(len(self), dtype=dtype)  # type: ignore[return-value]
         elif (self == "None").any():
             raise ValueError(
                 "Cannot convert `None` value to datetime or timedelta."
@@ -6148,6 +6126,278 @@ def view(self, dtype) -> "cudf.core.column.ColumnBase":
 
         return to_view.view(dtype)
 
+    @acquire_spill_lock()
+    def minhash(
+        self,
+        seed: np.uint32,
+        a: NumericalColumn,
+        b: NumericalColumn,
+        width: int,
+    ) -> ListColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.minhash.minhash(
+                self.to_pylibcudf(mode="read"),
+                seed,
+                a.to_pylibcudf(mode="read"),
+                b.to_pylibcudf(mode="read"),
+                width,
+            )
+        )
+
+    @acquire_spill_lock()
+    def minhash64(
+        self,
+        seed: np.uint64,
+        a: NumericalColumn,
+        b: NumericalColumn,
+        width: int,
+    ) -> ListColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.minhash.minhash64(
+                self.to_pylibcudf(mode="read"),
+                seed,
+                a.to_pylibcudf(mode="read"),
+                b.to_pylibcudf(mode="read"),
+                width,
+            )
+        )
+
+    @acquire_spill_lock()
+    def jaccard_index(self, other: Self, width: int) -> NumericalColumn:
+        result = plc.nvtext.jaccard.jaccard_index(
+            self.to_pylibcudf(mode="read"),
+            other.to_pylibcudf(mode="read"),
+            width,
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def generate_ngrams(self, ngrams: int, separator: cudf.Scalar) -> Self:
+        result = plc.nvtext.generate_ngrams.generate_ngrams(
+            self.to_pylibcudf(mode="read"),
+            ngrams,
+            separator.device_value.c_value,
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def generate_character_ngrams(self, ngrams: int) -> ListColumn:
+        result = plc.nvtext.generate_ngrams.generate_character_ngrams(
+            self.to_pylibcudf(mode="read"), ngrams
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def hash_character_ngrams(self, ngrams: int) -> ListColumn:
+        result = plc.nvtext.generate_ngrams.hash_character_ngrams(
+            self.to_pylibcudf(mode="read"), ngrams
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def edit_distance(self, targets: Self) -> NumericalColumn:
+        result = plc.nvtext.edit_distance.edit_distance(
+            self.to_pylibcudf(mode="read"), targets.to_pylibcudf(mode="read")
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def edit_distance_matrix(self) -> ListColumn:
+        result = plc.nvtext.edit_distance.edit_distance_matrix(
+            self.to_pylibcudf(mode="read")
+        )
+        return type(self).from_pylibcudf(result)  # type: ignore[return-value]
+
+    @acquire_spill_lock()
+    def byte_pair_encoding(
+        self,
+        merge_pairs: plc.nvtext.byte_pair_encode.BPEMergePairs,
+        separator: cudf.Scalar,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.byte_pair_encode.byte_pair_encoding(
+                self.to_pylibcudf(mode="read"),
+                merge_pairs,
+                separator.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def ngrams_tokenize(
+        self,
+        ngrams: int,
+        delimiter: cudf.Scalar,
+        separator: cudf.Scalar,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.ngrams_tokenize.ngrams_tokenize(
+                self.to_pylibcudf(mode="read"),
+                ngrams,
+                delimiter.device_value.c_value,
+                separator.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def normalize_spaces(self) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.normalize.normalize_spaces(
+                self.to_pylibcudf(mode="read")
+            )
+        )
+
+    @acquire_spill_lock()
+    def normalize_characters(self, do_lower: bool = True) -> Self:
+        return Column.from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.normalize.normalize_characters(
+                self.to_pylibcudf(mode="read"),
+                do_lower,
+            )
+        )
+
+    @acquire_spill_lock()
+    def replace_tokens(
+        self, targets: Self, replacements: Self, delimiter: cudf.Scalar
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.replace.replace_tokens(
+                self.to_pylibcudf(mode="read"),
+                targets.to_pylibcudf(mode="read"),
+                replacements.to_pylibcudf(mode="read"),
+                delimiter.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def filter_tokens(
+        self,
+        min_token_length: int,
+        replacement: cudf.Scalar,
+        delimiter: cudf.Scalar,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.replace.filter_tokens(
+                self.to_pylibcudf(mode="read"),
+                min_token_length,
+                replacement.device_value.c_value,
+                delimiter.device_value.c_value,
+            )
+        )
+
+    @acquire_spill_lock()
+    def porter_stemmer_measure(self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.stemmer.porter_stemmer_measure(
+                self.to_pylibcudf(mode="read")
+            )
+        )
+
+    @acquire_spill_lock()
+    def is_letter(self, is_vowel: bool, index: int | NumericalColumn) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.stemmer.is_letter(
+                self.to_pylibcudf(mode="read"),
+                is_vowel,
+                index
+                if isinstance(index, int)
+                else index.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def subword_tokenize(
+        self,
+        hashed_vocabulary: plc.nvtext.subword_tokenize.HashedVocabulary,
+        max_sequence_length: int = 64,
+        stride: int = 48,
+        do_lower: bool = True,
+        do_truncate: bool = False,
+    ) -> tuple[ColumnBase, ColumnBase, ColumnBase]:
+        """
+        Subword tokenizes text series by using the pre-loaded hashed vocabulary
+        """
+        result = plc.nvtext.subword_tokenize.subword_tokenize(
+            self.to_pylibcudf(mode="read"),
+            hashed_vocabulary,
+            max_sequence_length,
+            stride,
+            do_lower,
+            do_truncate,
+        )
+        # return the 3 tensor components
+        tokens = type(self).from_pylibcudf(result[0])
+        masks = type(self).from_pylibcudf(result[1])
+        metadata = type(self).from_pylibcudf(result[2])
+        return tokens, masks, metadata
+
+    @acquire_spill_lock()
+    def tokenize_scalar(self, delimiter: cudf.Scalar) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.tokenize_scalar(
+                self.to_pylibcudf(mode="read"), delimiter.device_value.c_value
+            )
+        )
+
+    @acquire_spill_lock()
+    def tokenize_column(self, delimiters: Self) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.tokenize_column(
+                self.to_pylibcudf(mode="read"),
+                delimiters.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def count_tokens_scalar(self, delimiter: cudf.Scalar) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.count_tokens_scalar(
+                self.to_pylibcudf(mode="read"), delimiter.device_value.c_value
+            )
+        )
+
+    @acquire_spill_lock()
+    def count_tokens_column(self, delimiters: Self) -> NumericalColumn:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.count_tokens_column(
+                self.to_pylibcudf(mode="read"),
+                delimiters.to_pylibcudf(mode="read"),
+            )
+        )
+
+    @acquire_spill_lock()
+    def character_tokenize(self) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.character_tokenize(
+                self.to_pylibcudf(mode="read")
+            )
+        )
+
+    @acquire_spill_lock()
+    def tokenize_with_vocabulary(
+        self,
+        vocabulary: plc.nvtext.tokenize.TokenizeVocabulary,
+        delimiter: cudf.Scalar,
+        default_id: int,
+    ) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.tokenize_with_vocabulary(
+                self.to_pylibcudf(mode="read"),
+                vocabulary,
+                delimiter.device_value.c_value,
+                default_id,
+            )
+        )
+
+    @acquire_spill_lock()
+    def detokenize(self, indices: ColumnBase, separator: cudf.Scalar) -> Self:
+        return type(self).from_pylibcudf(  # type: ignore[return-value]
+            plc.nvtext.tokenize.detokenize(
+                self.to_pylibcudf(mode="read"),
+                indices.to_pylibcudf(mode="read"),
+                separator.device_value.c_value,
+            )
+        )
+
     def _modify_characters(
         self, method: Callable[[plc.Column], plc.Column]
     ) -> Self:
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index db6ad72ab56..ba765b50729 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -107,12 +107,9 @@ def memory_usage(self) -> int:
 
         return n
 
-    def element_indexing(self, index: int):
+    def element_indexing(self, index: int) -> dict:
         result = super().element_indexing(index)
-        return {
-            field: value
-            for field, value in zip(self.dtype.fields, result.values())
-        }
+        return dict(zip(self.dtype.fields, result.values()))
 
     def __setitem__(self, key, value):
         if isinstance(value, dict):
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index f3a7916aa35..8b1515acae2 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -294,7 +294,7 @@ def strftime(self, format: str) -> cudf.core.column.StringColumn:
         if len(self) == 0:
             return cast(
                 cudf.core.column.StringColumn,
-                column.column_empty(0, dtype="object", masked=False),
+                column.column_empty(0, dtype="object"),
             )
         else:
             return string._timedelta_to_str_typecast_functions[self.dtype](
diff --git a/python/cudf/cudf/core/copy_types.py b/python/cudf/cudf/core/copy_types.py
index 16d8964f083..4b6ad59c8e1 100644
--- a/python/cudf/cudf/core/copy_types.py
+++ b/python/cudf/cudf/core/copy_types.py
@@ -5,7 +5,6 @@
 from typing_extensions import Self
 
 import cudf
-import cudf._lib as libcudf
 from cudf._lib.types import size_type_dtype
 
 if TYPE_CHECKING:
@@ -70,8 +69,8 @@ def __init__(self, column: Any, nrows: int, *, nullify: bool):
             if self.column.dtype.kind not in {"i", "u"}:
                 raise TypeError("Gather map must have integer dtype")
             if not nullify:
-                lo, hi = libcudf.reduce.minmax(self.column)
-                if lo.value < -nrows or hi.value >= nrows:
+                lo, hi = self.column.minmax()
+                if lo < -nrows or hi >= nrows:
                     raise IndexError(
                         f"Gather map is out of bounds for [0, {nrows})"
                     )
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 325601e5311..fce361e18ea 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7,7 +7,6 @@
 import itertools
 import numbers
 import os
-import pickle
 import re
 import sys
 import textwrap
@@ -50,7 +49,6 @@
 )
 from cudf.core import column, df_protocol, indexing_utils, reshape
 from cudf.core._compat import PANDAS_LT_300
-from cudf.core.abc import Serializable
 from cudf.core.buffer import acquire_spill_lock, as_buffer
 from cudf.core.column import (
     CategoricalColumn,
@@ -588,7 +586,7 @@ class _DataFrameiAtIndexer(_DataFrameIlocIndexer):
     pass
 
 
-class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
+class DataFrame(IndexedFrame, GetAttrGetItemMixin):
     """
     A GPU Dataframe object.
 
@@ -776,9 +774,7 @@ def __init__(
                 label_dtype = getattr(columns, "dtype", None)
                 self._data = ColumnAccessor(
                     {
-                        k: column.column_empty(
-                            len(self), dtype="object", masked=True
-                        )
+                        k: column_empty(len(self), dtype="object")
                         for k in columns
                     },
                     level_names=tuple(columns.names)
@@ -981,8 +977,8 @@ def _init_from_series_list(self, data, columns, index):
         if columns is not None:
             for col_name in columns:
                 if col_name not in self._data:
-                    self._data[col_name] = column.column_empty(
-                        row_count=len(self), dtype=None, masked=True
+                    self._data[col_name] = column_empty(
+                        row_count=len(self), dtype=None
                     )
             self._data._level_names = (
                 tuple(columns.names)
@@ -1033,11 +1029,7 @@ def _init_from_list_like(self, data, index=None, columns=None):
             data = list(itertools.zip_longest(*data))
 
             if columns is not None and len(data) == 0:
-                data = [
-                    cudf.core.column.column_empty(row_count=0, dtype=None)
-                    for _ in columns
-                ]
-
+                data = [column_empty(row_count=0, dtype=None) for _ in columns]
             for col_name, col in enumerate(data):
                 self._data[col_name] = column.as_column(col)
             self._data.rangeindex = True
@@ -1076,9 +1068,8 @@ def _init_from_dict_like(
                 # the provided index, so we need to return a masked
                 # array of nulls if an index is given.
                 empty_column = functools.partial(
-                    cudf.core.column.column_empty,
-                    row_count=(0 if index is None else len(index)),
-                    masked=index is not None,
+                    column_empty,
+                    row_count=0 if index is None else len(index),
                 )
 
             data = {
@@ -1190,7 +1181,7 @@ def _constructor_expanddim(self):
     def serialize(self):
         header, frames = super().serialize()
 
-        header["index"], index_frames = self.index.serialize()
+        header["index"], index_frames = self.index.device_serialize()
         header["index_frame_count"] = len(index_frames)
         # For backwards compatibility with older versions of cuDF, index
         # columns are placed before data columns.
@@ -1205,8 +1196,7 @@ def deserialize(cls, header, frames):
             header, frames[header["index_frame_count"] :]
         )
 
-        idx_typ = pickle.loads(header["index"]["type-serialized"])
-        index = idx_typ.deserialize(header["index"], frames[:index_nframes])
+        index = cls.device_deserialize(header["index"], frames[:index_nframes])
         obj.index = index
 
         return obj
@@ -1424,7 +1414,7 @@ def __setitem__(self, arg, value):
                         new_columns = (
                             value
                             if key == arg
-                            else column.column_empty(
+                            else column_empty(
                                 row_count=length, dtype=col.dtype
                             )
                             for key, col in self._column_labels_and_values
@@ -2508,16 +2498,7 @@ def scatter_by_map(
                 )
 
             if map_index.size > 0:
-                plc_lo, plc_hi = plc.reduce.minmax(
-                    map_index.to_pylibcudf(mode="read")
-                )
-                # TODO: Use pylibcudf Scalar once APIs are more developed
-                lo = libcudf.column.Column.from_pylibcudf(
-                    plc.Column.from_scalar(plc_lo, 1)
-                ).element_indexing(0)
-                hi = libcudf.column.Column.from_pylibcudf(
-                    plc.Column.from_scalar(plc_hi, 1)
-                ).element_indexing(0)
+                lo, hi = map_index.minmax()
                 if lo < 0 or hi >= map_size:
                     raise ValueError("Partition map has invalid values")
 
@@ -3385,7 +3366,7 @@ def _insert(self, loc, name, value, nan_as_null=None, ignore_index=True):
                 if num_cols != 0:
                     ca = self._data._from_columns_like_self(
                         (
-                            column.column_empty(row_count=length, dtype=dtype)
+                            column_empty(row_count=length, dtype=dtype)
                             for _, dtype in self._dtypes
                         ),
                         verify=False,
@@ -3491,7 +3472,7 @@ def diff(self, periods=1, axis=0):
         if abs(periods) > len(self):
             df = cudf.DataFrame._from_data(
                 {
-                    name: column_empty(len(self), dtype=dtype, masked=True)
+                    name: column_empty(len(self), dtype=dtype)
                     for name, dtype in zip(self._column_names, self.dtypes)
                 }
             )
@@ -3871,9 +3852,7 @@ def agg(self, aggs, axis=None):
                 result = DataFrame(index=idxs, columns=cols)
                 for key in aggs.keys():
                     col = self[key]
-                    col_empty = column_empty(
-                        len(idxs), dtype=col.dtype, masked=True
-                    )
+                    col_empty = column_empty(len(idxs), dtype=col.dtype)
                     ans = cudf.Series._from_column(
                         col_empty, index=cudf.Index(idxs)
                     )
@@ -6189,9 +6168,7 @@ def quantile(
                         quant_index=False,
                     )._column
                     if len(res) == 0:
-                        res = column.column_empty(
-                            row_count=len(qs), dtype=ser.dtype
-                        )
+                        res = column_empty(row_count=len(qs), dtype=ser.dtype)
                     result[k] = res
             result = DataFrame._from_data(result)
 
@@ -7345,9 +7322,7 @@ def unnamed_group_generator():
             )
 
             all_nulls = functools.cache(
-                functools.partial(
-                    column_empty, self.shape[0], common_type, masked=True
-                )
+                functools.partial(column_empty, self.shape[0], common_type)
             )
 
             # homogenize the dtypes of the columns
@@ -8594,7 +8569,7 @@ def _cast_cols_to_common_dtypes(col_idxs, list_of_columns, dtypes, categories):
             # If column not in this df, fill with an all-null column
             if idx >= len(cols) or cols[idx] is None:
                 n = len(next(x for x in cols if x is not None))
-                cols[idx] = column_empty(row_count=n, dtype=dtype, masked=True)
+                cols[idx] = column_empty(row_count=n, dtype=dtype)
             else:
                 # If column is categorical, rebase the codes with the
                 # combined categories, and cast the new codes to the
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 801020664da..971f0be77f8 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -3,7 +3,6 @@
 
 import decimal
 import operator
-import pickle
 import textwrap
 import warnings
 from functools import cached_property
@@ -91,13 +90,13 @@ def dtype(arbitrary):
         raise TypeError(f"Cannot interpret {arbitrary} as a valid cuDF dtype")
 
 
-def _decode_type(
+def _check_type(
     cls: type,
     header: dict,
     frames: list,
     is_valid_class: Callable[[type, type], bool] = operator.is_,
-) -> tuple[dict, list, type]:
-    """Decode metadata-encoded type and check validity
+) -> None:
+    """Perform metadata-encoded type and check validity
 
     Parameters
     ----------
@@ -112,12 +111,6 @@ class performing deserialization
         serialization by `cls` (default is to check type equality), called
         as `is_valid_class(decoded_class, cls)`.
 
-    Returns
-    -------
-    tuple
-        Tuple of validated headers, frames, and the decoded class
-        constructor.
-
     Raises
     ------
     AssertionError
@@ -128,11 +121,11 @@ class performing deserialization
         f"Deserialization expected {header['frame_count']} frames, "
         f"but received {len(frames)}."
     )
-    klass = pickle.loads(header["type-serialized"])
+    klass = Serializable._name_type_map[header["type-serialized-name"]]
     assert is_valid_class(
-        klass, cls
+        klass,
+        cls,
     ), f"Header-encoded {klass=} does not match decoding {cls=}."
-    return header, frames, klass
 
 
 class _BaseDtype(ExtensionDtype, Serializable):
@@ -196,9 +189,7 @@ def categories(self) -> cudf.Index:
         Index(['b', 'a'], dtype='object')
         """
         if self._categories is None:
-            col = cudf.core.column.column_empty(
-                0, dtype="object", masked=False
-            )
+            col = cudf.core.column.column_empty(0, dtype="object")
         else:
             col = self._categories
         return cudf.Index._from_column(col)
@@ -305,13 +296,14 @@ def construct_from_string(self):
 
     def serialize(self):
         header = {}
-        header["type-serialized"] = pickle.dumps(type(self))
         header["ordered"] = self.ordered
 
         frames = []
 
         if self.categories is not None:
-            categories_header, categories_frames = self.categories.serialize()
+            categories_header, categories_frames = (
+                self.categories.device_serialize()
+            )
         header["categories"] = categories_header
         frames.extend(categories_frames)
         header["frame_count"] = len(frames)
@@ -319,15 +311,14 @@ def serialize(self):
 
     @classmethod
     def deserialize(cls, header, frames):
-        header, frames, klass = _decode_type(cls, header, frames)
+        _check_type(cls, header, frames)
         ordered = header["ordered"]
         categories_header = header["categories"]
         categories_frames = frames
-        categories_type = pickle.loads(categories_header["type-serialized"])
-        categories = categories_type.deserialize(
+        categories = Serializable.device_deserialize(
             categories_header, categories_frames
         )
-        return klass(categories=categories, ordered=ordered)
+        return cls(categories=categories, ordered=ordered)
 
     def __repr__(self):
         return self.to_pandas().__repr__()
@@ -495,12 +486,13 @@ def __hash__(self):
 
     def serialize(self) -> tuple[dict, list]:
         header: dict[str, Dtype] = {}
-        header["type-serialized"] = pickle.dumps(type(self))
 
         frames = []
 
         if isinstance(self.element_type, _BaseDtype):
-            header["element-type"], frames = self.element_type.serialize()
+            header["element-type"], frames = (
+                self.element_type.device_serialize()
+            )
         else:
             header["element-type"] = getattr(
                 self.element_type, "name", self.element_type
@@ -510,14 +502,14 @@ def serialize(self) -> tuple[dict, list]:
 
     @classmethod
     def deserialize(cls, header: dict, frames: list):
-        header, frames, klass = _decode_type(cls, header, frames)
+        _check_type(cls, header, frames)
         if isinstance(header["element-type"], dict):
-            element_type = pickle.loads(
-                header["element-type"]["type-serialized"]
-            ).deserialize(header["element-type"], frames)
+            element_type = Serializable.device_deserialize(
+                header["element-type"], frames
+            )
         else:
             element_type = header["element-type"]
-        return klass(element_type=element_type)
+        return cls(element_type=element_type)
 
     @cached_property
     def itemsize(self):
@@ -641,7 +633,6 @@ def __hash__(self):
 
     def serialize(self) -> tuple[dict, list]:
         header: dict[str, Any] = {}
-        header["type-serialized"] = pickle.dumps(type(self))
 
         frames: list[Buffer] = []
 
@@ -649,33 +640,31 @@ def serialize(self) -> tuple[dict, list]:
 
         for k, dtype in self.fields.items():
             if isinstance(dtype, _BaseDtype):
-                dtype_header, dtype_frames = dtype.serialize()
+                dtype_header, dtype_frames = dtype.device_serialize()
                 fields[k] = (
                     dtype_header,
                     (len(frames), len(frames) + len(dtype_frames)),
                 )
                 frames.extend(dtype_frames)
             else:
-                fields[k] = pickle.dumps(dtype)
+                fields[k] = dtype.str
         header["fields"] = fields
         header["frame_count"] = len(frames)
         return header, frames
 
     @classmethod
     def deserialize(cls, header: dict, frames: list):
-        header, frames, klass = _decode_type(cls, header, frames)
+        _check_type(cls, header, frames)
         fields = {}
         for k, dtype in header["fields"].items():
             if isinstance(dtype, tuple):
                 dtype_header, (start, stop) = dtype
-                fields[k] = pickle.loads(
-                    dtype_header["type-serialized"]
-                ).deserialize(
+                fields[k] = Serializable.device_deserialize(
                     dtype_header,
                     frames[start:stop],
                 )
             else:
-                fields[k] = pickle.loads(dtype)
+                fields[k] = np.dtype(dtype)
         return cls(fields)
 
     @cached_property
@@ -838,7 +827,6 @@ def _from_decimal(cls, decimal):
     def serialize(self) -> tuple[dict, list]:
         return (
             {
-                "type-serialized": pickle.dumps(type(self)),
                 "precision": self.precision,
                 "scale": self.scale,
                 "frame_count": 0,
@@ -848,11 +836,8 @@ def serialize(self) -> tuple[dict, list]:
 
     @classmethod
     def deserialize(cls, header: dict, frames: list):
-        header, frames, klass = _decode_type(
-            cls, header, frames, is_valid_class=issubclass
-        )
-        klass = pickle.loads(header["type-serialized"])
-        return klass(header["precision"], header["scale"])
+        _check_type(cls, header, frames, is_valid_class=issubclass)
+        return cls(header["precision"], header["scale"])
 
     def __eq__(self, other: Dtype) -> bool:
         if other is self:
@@ -960,18 +945,17 @@ def __hash__(self):
 
     def serialize(self) -> tuple[dict, list]:
         header = {
-            "type-serialized": pickle.dumps(type(self)),
-            "fields": pickle.dumps((self.subtype, self.closed)),
+            "fields": (self.subtype.str, self.closed),
             "frame_count": 0,
         }
         return header, []
 
     @classmethod
     def deserialize(cls, header: dict, frames: list):
-        header, frames, klass = _decode_type(cls, header, frames)
-        klass = pickle.loads(header["type-serialized"])
-        subtype, closed = pickle.loads(header["fields"])
-        return klass(subtype, closed=closed)
+        _check_type(cls, header, frames)
+        subtype, closed = header["fields"]
+        subtype = np.dtype(subtype)
+        return cls(subtype, closed=closed)
 
 
 def _is_categorical_dtype(obj):
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 84a3caf905f..4f40ba0bd92 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import operator
-import pickle
 import warnings
 from collections import abc
 from typing import TYPE_CHECKING, Any, Literal
@@ -23,7 +22,9 @@
 from cudf import _lib as libcudf
 from cudf.api.types import is_dtype_equal, is_scalar
 from cudf.core._compat import PANDAS_LT_300
+from cudf.core._internals import sorting
 from cudf.core._internals.search import search_sorted
+from cudf.core.abc import Serializable
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import (
     ColumnBase,
@@ -47,7 +48,7 @@
 
 
 # TODO: It looks like Frame is missing a declaration of `copy`, need to add
-class Frame(BinaryOperand, Scannable):
+class Frame(BinaryOperand, Scannable, Serializable):
     """A collection of Column objects with an optional index.
 
     Parameters
@@ -97,37 +98,80 @@ def ndim(self) -> int:
     @_performance_tracking
     def serialize(self):
         # TODO: See if self._data can be serialized outright
+        frames = []
         header = {
-            "type-serialized": pickle.dumps(type(self)),
-            "column_names": pickle.dumps(self._column_names),
-            "column_rangeindex": pickle.dumps(self._data.rangeindex),
-            "column_multiindex": pickle.dumps(self._data.multiindex),
-            "column_label_dtype": pickle.dumps(self._data.label_dtype),
-            "column_level_names": pickle.dumps(self._data._level_names),
+            "column_label_dtype": None,
+            "dtype-is-cudf-serialized": False,
         }
-        header["columns"], frames = serialize_columns(self._columns)
+        if (label_dtype := self._data.label_dtype) is not None:
+            try:
+                header["column_label_dtype"], frames = (
+                    label_dtype.device_serialize()
+                )
+                header["dtype-is-cudf-serialized"] = True
+            except AttributeError:
+                header["column_label_dtype"] = label_dtype.str
+
+        header["columns"], column_frames = serialize_columns(self._columns)
+        column_names, column_names_numpy_type = (
+            zip(
+                *[
+                    (cname.item(), type(cname).__name__)
+                    if isinstance(cname, np.generic)
+                    else (cname, "")
+                    for cname in self._column_names
+                ]
+            )
+            if self._column_names
+            else ((), ())
+        )
+        header |= {
+            "column_names": column_names,
+            "column_names_numpy_type": column_names_numpy_type,
+            "column_rangeindex": self._data.rangeindex,
+            "column_multiindex": self._data.multiindex,
+            "column_level_names": self._data._level_names,
+        }
+        frames.extend(column_frames)
+
         return header, frames
 
     @classmethod
     @_performance_tracking
     def deserialize(cls, header, frames):
-        cls_deserialize = pickle.loads(header["type-serialized"])
-        column_names = pickle.loads(header["column_names"])
-        columns = deserialize_columns(header["columns"], frames)
         kwargs = {}
+        dtype_header = header["column_label_dtype"]
+        if header["dtype-is-cudf-serialized"]:
+            count = dtype_header["frame_count"]
+            kwargs["label_dtype"] = cls.device_deserialize(
+                header, frames[:count]
+            )
+            frames = frames[count:]
+        else:
+            kwargs["label_dtype"] = (
+                np.dtype(dtype_header) if dtype_header is not None else None
+            )
+
+        columns = deserialize_columns(header["columns"], frames)
         for metadata in [
             "rangeindex",
             "multiindex",
-            "label_dtype",
             "level_names",
         ]:
             key = f"column_{metadata}"
             if key in header:
-                kwargs[metadata] = pickle.loads(header[key])
+                kwargs[metadata] = header[key]
+
+        column_names = [
+            getattr(np, cntype)(cname) if cntype != "" else cname
+            for cname, cntype in zip(
+                header["column_names"], header["column_names_numpy_type"]
+            )
+        ]
         col_accessor = ColumnAccessor(
             data=dict(zip(column_names, columns)), **kwargs
         )
-        return cls_deserialize._from_data(col_accessor)
+        return cls._from_data(col_accessor)
 
     @classmethod
     @_performance_tracking
@@ -1433,7 +1477,7 @@ def _get_sorted_inds(
         else:
             ascending_lst = list(ascending)
 
-        return libcudf.sort.order_by(
+        return sorting.order_by(
             list(to_sort),
             ascending_lst,
             na_position,
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index 0f12f266a95..b772d35846d 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -3,7 +3,6 @@
 
 import copy
 import itertools
-import pickle
 import textwrap
 import warnings
 from collections import abc
@@ -19,11 +18,11 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib import groupby as libgroupby
-from cudf._lib.sort import segmented_sort_by_key
 from cudf._lib.types import size_type_dtype
 from cudf.api.extensions import no_default
 from cudf.api.types import is_list_like, is_numeric_dtype
 from cudf.core._compat import PANDAS_LT_300
+from cudf.core._internals import sorting
 from cudf.core.abc import Serializable
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column.column import ColumnBase, StructDtype, as_column
@@ -494,9 +493,7 @@ def size(self):
         """
         Return the size of each group.
         """
-        col = cudf.core.column.column_empty(
-            len(self.obj), "int8", masked=False
-        )
+        col = cudf.core.column.column_empty(len(self.obj), "int8")
         result = (
             cudf.Series._from_column(col, name=getattr(self.obj, "name", None))
             .groupby(self.grouping, sort=self._sort, dropna=self._dropna)
@@ -524,7 +521,8 @@ def cumcount(self, ascending: bool = True):
         return (
             cudf.Series._from_column(
                 cudf.core.column.column_empty(
-                    len(self.obj), "int8", masked=False
+                    len(self.obj),
+                    "int8",
                 ),
                 index=self.obj.index,
             )
@@ -794,7 +792,7 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
                 # want, and right order is a matching gather map for
                 # the result table. Get the correct order by sorting
                 # the right gather map.
-                (right_order,) = libcudf.sort.sort_by_key(
+                (right_order,) = sorting.sort_by_key(
                     [right_order],
                     [left_order],
                     [True],
@@ -1250,15 +1248,20 @@ def sample(
                 for off, size in zip(group_offsets, size_per_group):
                     rs.shuffle(indices[off : off + size])
             else:
-                rng = cp.random.default_rng(seed=random_state)
-                (indices,) = segmented_sort_by_key(
-                    [as_column(indices)],
-                    [as_column(rng.random(size=nrows))],
-                    as_column(group_offsets),
-                    [],
-                    [],
-                    stable=True,
+                keys = cp.random.default_rng(seed=random_state).random(
+                    size=nrows
                 )
+                with acquire_spill_lock():
+                    plc_table = plc.sorting.stable_segmented_sort_by_key(
+                        plc.Table(
+                            [as_column(indices).to_pylibcudf(mode="read")]
+                        ),
+                        plc.Table([as_column(keys).to_pylibcudf(mode="read")]),
+                        as_column(group_offsets).to_pylibcudf(mode="read"),
+                        [plc.types.Order.ASCENDING],
+                        [plc.types.NullOrder.AFTER],
+                    )
+                    indices = ColumnBase.from_pylibcudf(plc_table.columns()[0])
                 indices = cp.asarray(indices.data_array_view(mode="read"))
             # Which indices are we going to want?
             want = np.arange(samples_per_group.sum(), dtype=size_type_dtype)
@@ -1281,7 +1284,7 @@ def serialize(self):
 
         obj_header, obj_frames = self.obj.serialize()
         header["obj"] = obj_header
-        header["obj_type"] = pickle.dumps(type(self.obj))
+        header["obj_type_name"] = type(self.obj).__name__
         header["num_obj_frames"] = len(obj_frames)
         frames.extend(obj_frames)
 
@@ -1296,7 +1299,7 @@ def serialize(self):
     def deserialize(cls, header, frames):
         kwargs = header["kwargs"]
 
-        obj_type = pickle.loads(header["obj_type"])
+        obj_type = Serializable._name_type_map[header["obj_type_name"]]
         obj = obj_type.deserialize(
             header["obj"], frames[: header["num_obj_frames"]]
         )
@@ -3329,8 +3332,8 @@ def _handle_misc(self, by):
     def serialize(self):
         header = {}
         frames = []
-        header["names"] = pickle.dumps(self.names)
-        header["_named_columns"] = pickle.dumps(self._named_columns)
+        header["names"] = self.names
+        header["_named_columns"] = self._named_columns
         column_header, column_frames = cudf.core.column.serialize_columns(
             self._key_columns
         )
@@ -3340,8 +3343,8 @@ def serialize(self):
 
     @classmethod
     def deserialize(cls, header, frames):
-        names = pickle.loads(header["names"])
-        _named_columns = pickle.loads(header["_named_columns"])
+        names = header["names"]
+        _named_columns = header["_named_columns"]
         key_columns = cudf.core.column.deserialize_columns(
             header["columns"], frames
         )
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index cc3d8448151..8d3ef1036d1 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import operator
-import pickle
 import warnings
 from collections.abc import Hashable, MutableMapping
 from functools import cache, cached_property
@@ -337,7 +336,7 @@ def _values(self) -> ColumnBase:
         if len(self) > 0:
             return column.as_column(self._range, dtype=self.dtype)
         else:
-            return column.column_empty(0, masked=False, dtype=self.dtype)
+            return column.column_empty(0, dtype=self.dtype)
 
     def _clean_nulls_from_index(self) -> Self:
         return self
@@ -497,9 +496,8 @@ def serialize(self):
         header["index_column"]["step"] = self.step
         frames = []
 
-        header["name"] = pickle.dumps(self.name)
-        header["dtype"] = pickle.dumps(self.dtype)
-        header["type-serialized"] = pickle.dumps(type(self))
+        header["name"] = self.name
+        header["dtype"] = self.dtype.str
         header["frame_count"] = 0
         return header, frames
 
@@ -507,11 +505,14 @@ def serialize(self):
     @_performance_tracking
     def deserialize(cls, header, frames):
         h = header["index_column"]
-        name = pickle.loads(header["name"])
+        name = header["name"]
         start = h["start"]
         stop = h["stop"]
         step = h.get("step", 1)
-        return RangeIndex(start=start, stop=stop, step=step, name=name)
+        dtype = np.dtype(header["dtype"])
+        return RangeIndex(
+            start=start, stop=stop, step=step, dtype=dtype, name=name
+        )
 
     @property  # type: ignore
     @_performance_tracking
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 0e6a5e03ea6..1a667e24bef 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -3851,7 +3851,6 @@ def _reindex(
                 if name in df._data
                 else cudf.core.column.column.column_empty(
                     dtype=dtypes.get(name, np.float64),
-                    masked=True,
                     row_count=len(index),
                 )
             )
@@ -6368,9 +6367,49 @@ def rank(
             elif source._num_columns != num_cols:
                 dropped_cols = True
 
-        result_columns = libcudf.sort.rank_columns(
-            [*source._columns], method_enum, na_option, ascending, pct
+        column_order = (
+            plc.types.Order.ASCENDING
+            if ascending
+            else plc.types.Order.DESCENDING
         )
+        # ascending
+        #    #top    = na_is_smallest
+        #    #bottom = na_is_largest
+        #    #keep   = na_is_largest
+        # descending
+        #    #top    = na_is_largest
+        #    #bottom = na_is_smallest
+        #    #keep   = na_is_smallest
+        if ascending:
+            if na_option == "top":
+                null_precedence = plc.types.NullOrder.BEFORE
+            else:
+                null_precedence = plc.types.NullOrder.AFTER
+        else:
+            if na_option == "top":
+                null_precedence = plc.types.NullOrder.AFTER
+            else:
+                null_precedence = plc.types.NullOrder.BEFORE
+        c_null_handling = (
+            plc.types.NullPolicy.EXCLUDE
+            if na_option == "keep"
+            else plc.types.NullPolicy.INCLUDE
+        )
+
+        with acquire_spill_lock():
+            result_columns = [
+                libcudf.column.Column.from_pylibcudf(
+                    plc.sorting.rank(
+                        col.to_pylibcudf(mode="read"),
+                        method_enum,
+                        column_order,
+                        c_null_handling,
+                        null_precedence,
+                        pct,
+                    )
+                )
+                for col in source._columns
+            ]
 
         if dropped_cols:
             result = type(source)._from_data(
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index 5c224176730..e7ea91c1f21 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -9,6 +9,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.types import size_type_dtype
+from cudf.core._internals import sorting
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.copy_types import GatherMap
 from cudf.core.join._join_helpers import (
@@ -256,7 +257,7 @@ def _gather_maps(self, left_cols, right_cols):
                 for map_, n, null in zip(maps, lengths, nullify)
             )
         )
-        return libcudf.sort.sort_by_key(
+        return sorting.sort_by_key(
             list(maps),
             # If how is right, right map is primary sort key.
             key_order[:: -1 if self.how == "right" else 1],
@@ -426,7 +427,7 @@ def _sort_result(self, result: cudf.DataFrame) -> cudf.DataFrame:
             else:
                 to_sort = [*result._columns]
                 index_names = None
-            result_columns = libcudf.sort.sort_by_key(
+            result_columns = sorting.sort_by_key(
                 to_sort,
                 by,
                 [True] * len(by),
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 173d4e1c584..a99e06e4a8e 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -5,7 +5,6 @@
 import itertools
 import numbers
 import operator
-import pickle
 import warnings
 from functools import cached_property
 from typing import TYPE_CHECKING, Any
@@ -23,6 +22,7 @@
 from cudf.api.types import is_integer, is_list_like, is_object_dtype, is_scalar
 from cudf.core import column
 from cudf.core._base_index import _return_get_indexer_result
+from cudf.core._internals import sorting
 from cudf.core.algorithms import factorize
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column_accessor import ColumnAccessor
@@ -192,12 +192,12 @@ def __init__(
         source_data = {}
         for i, (code, level) in enumerate(zip(new_codes, new_levels)):
             if len(code):
-                lo, hi = libcudf.reduce.minmax(code)
-                if lo.value < -1 or hi.value > len(level) - 1:
+                lo, hi = code.minmax()
+                if lo < -1 or hi > len(level) - 1:
                     raise ValueError(
                         f"Codes must be -1 <= codes <= {len(level) - 1}"
                     )
-                if lo.value == -1:
+                if lo == -1:
                     # Now we can gather and insert null automatically
                     code[code == -1] = np.iinfo(size_type_dtype).min
             result_col = libcudf.copying.gather(
@@ -921,15 +921,15 @@ def take(self, indices) -> Self:
     def serialize(self):
         header, frames = super().serialize()
         # Overwrite the names in _data with the true names.
-        header["column_names"] = pickle.dumps(self.names)
+        header["column_names"] = self.names
         return header, frames
 
     @classmethod
     @_performance_tracking
     def deserialize(cls, header, frames):
         # Spoof the column names to construct the frame, then set manually.
-        column_names = pickle.loads(header["column_names"])
-        header["column_names"] = pickle.dumps(range(0, len(column_names)))
+        column_names = header["column_names"]
+        header["column_names"] = range(0, len(column_names))
         obj = super().deserialize(header, frames)
         return obj._set_names(column_names)
 
@@ -1678,7 +1678,7 @@ def _is_sorted(self, ascending=None, null_position=None) -> bool:
                 f"Expected a list-like or None for `null_position`, got "
                 f"{type(null_position)}"
             )
-        return libcudf.sort.is_sorted(
+        return sorting.is_sorted(
             [*self._columns], ascending=ascending, null_position=null_position
         )
 
diff --git a/python/cudf/cudf/core/resample.py b/python/cudf/cudf/core/resample.py
index d95d252559f..391ee31f125 100644
--- a/python/cudf/cudf/core/resample.py
+++ b/python/cudf/cudf/core/resample.py
@@ -15,7 +15,6 @@
 # limitations under the License.
 from __future__ import annotations
 
-import pickle
 import warnings
 from typing import TYPE_CHECKING
 
@@ -26,6 +25,7 @@
 
 import cudf
 from cudf._lib.column import Column
+from cudf.core.abc import Serializable
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.groupby.groupby import (
     DataFrameGroupBy,
@@ -97,21 +97,21 @@ def serialize(self):
         header, frames = super().serialize()
         grouping_head, grouping_frames = self.grouping.serialize()
         header["grouping"] = grouping_head
-        header["resampler_type"] = pickle.dumps(type(self))
+        header["resampler_type"] = type(self).__name__
         header["grouping_frames_count"] = len(grouping_frames)
         frames.extend(grouping_frames)
         return header, frames
 
     @classmethod
     def deserialize(cls, header, frames):
-        obj_type = pickle.loads(header["obj_type"])
+        obj_type = Serializable._name_type_map[header["obj_type_name"]]
         obj = obj_type.deserialize(
             header["obj"], frames[: header["num_obj_frames"]]
         )
         grouping = _ResampleGrouping.deserialize(
             header["grouping"], frames[header["num_obj_frames"] :]
         )
-        resampler_cls = pickle.loads(header["resampler_type"])
+        resampler_cls = Serializable._name_type_map[header["resampler_type"]]
         out = resampler_cls.__new__(resampler_cls)
         out.grouping = grouping
         super().__init__(out, obj, by=grouping)
@@ -163,8 +163,8 @@ def serialize(self):
 
     @classmethod
     def deserialize(cls, header, frames):
-        names = pickle.loads(header["names"])
-        _named_columns = pickle.loads(header["_named_columns"])
+        names = header["names"]
+        _named_columns = header["_named_columns"]
         key_columns = cudf.core.column.deserialize_columns(
             header["columns"], frames[: -header["__bin_labels_count"]]
         )
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index be74b0f867a..961e5e11bc0 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -4,7 +4,6 @@
 
 import functools
 import inspect
-import pickle
 import textwrap
 import warnings
 from collections import abc
@@ -27,7 +26,6 @@
 )
 from cudf.core import indexing_utils
 from cudf.core._compat import PANDAS_LT_300
-from cudf.core.abc import Serializable
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import (
     ColumnBase,
@@ -414,7 +412,7 @@ def _loc_to_iloc(self, arg):
                 return indices
 
 
-class Series(SingleColumnFrame, IndexedFrame, Serializable):
+class Series(SingleColumnFrame, IndexedFrame):
     """
     One-dimensional GPU array (including time series).
 
@@ -899,7 +897,7 @@ def hasnans(self):
     def serialize(self):
         header, frames = super().serialize()
 
-        header["index"], index_frames = self.index.serialize()
+        header["index"], index_frames = self.index.device_serialize()
         header["index_frame_count"] = len(index_frames)
         # For backwards compatibility with older versions of cuDF, index
         # columns are placed before data columns.
@@ -915,8 +913,7 @@ def deserialize(cls, header, frames):
             header, frames[header["index_frame_count"] :]
         )
 
-        idx_typ = pickle.loads(header["index"]["type-serialized"])
-        index = idx_typ.deserialize(header["index"], frames[:index_nframes])
+        index = cls.device_deserialize(header["index"], frames[:index_nframes])
         obj.index = index
 
         return obj
@@ -3413,7 +3410,7 @@ def describe(
         )
 
     @_performance_tracking
-    def digitize(self, bins, right=False):
+    def digitize(self, bins: np.ndarray, right: bool = False) -> Self:
         """Return the indices of the bins to which each value belongs.
 
         Notes
@@ -3444,9 +3441,8 @@ def digitize(self, bins, right=False):
         3    2
         dtype: int32
         """
-        return Series._from_column(
-            cudf.core.column.numerical.digitize(self._column, bins, right),
-            name=self.name,
+        return type(self)._from_column(
+            self._column.digitize(bins, right), name=self.name
         )
 
     @_performance_tracking
diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
index dda1f199078..479838ef2a8 100644
--- a/python/cudf/cudf/core/subword_tokenizer.py
+++ b/python/cudf/cudf/core/subword_tokenizer.py
@@ -8,10 +8,6 @@
 
 import pylibcudf as plc
 
-from cudf._lib.nvtext.subword_tokenize import (
-    subword_tokenize_inmem_hash as cpp_subword_tokenize,
-)
-
 
 def _cast_to_appropriate_type(ar, cast_type):
     if cast_type == "cp":
@@ -210,8 +206,7 @@ def __call__(
         stride = max_length - stride
         # behavior varies from subword_tokenize but maps with huggingface
 
-        input_ids, attention_mask, metadata = cpp_subword_tokenize(
-            text._column,
+        input_ids, attention_mask, metadata = text._column.subword_tokenize(
             self.vocab_file,
             max_sequence_length=max_length,
             stride=stride,
diff --git a/python/cudf/cudf/core/tokenize_vocabulary.py b/python/cudf/cudf/core/tokenize_vocabulary.py
index 1e31376cce8..fb8b9b3131c 100644
--- a/python/cudf/cudf/core/tokenize_vocabulary.py
+++ b/python/cudf/cudf/core/tokenize_vocabulary.py
@@ -5,9 +5,6 @@
 import pylibcudf as plc
 
 import cudf
-from cudf._lib.nvtext.tokenize import (
-    tokenize_with_vocabulary as cpp_tokenize_with_vocabulary,
-)
 
 
 class TokenizeVocabulary:
@@ -20,7 +17,7 @@ class TokenizeVocabulary:
         Strings column of vocabulary terms
     """
 
-    def __init__(self, vocabulary: "cudf.Series"):
+    def __init__(self, vocabulary: cudf.Series) -> None:
         self.vocabulary = plc.nvtext.tokenize.TokenizeVocabulary(
             vocabulary._column.to_pylibcudf(mode="read")
         )
@@ -46,8 +43,8 @@ def tokenize(
         if delimiter is None:
             delimiter = ""
         delim = cudf.Scalar(delimiter, dtype="str")
-        result = cpp_tokenize_with_vocabulary(
-            text._column, self.vocabulary, delim, default_id
+        result = text._column.tokenize_with_vocabulary(
+            self.vocabulary, delim, default_id
         )
 
         return cudf.Series._from_column(result)
diff --git a/python/cudf/cudf/core/window/ewm.py b/python/cudf/cudf/core/window/ewm.py
index 094df955273..c4a063a50e8 100644
--- a/python/cudf/cudf/core/window/ewm.py
+++ b/python/cudf/cudf/core/window/ewm.py
@@ -6,7 +6,6 @@
 
 import numpy as np
 
-from cudf._lib.reduce import scan
 from cudf.api.types import is_numeric_dtype
 from cudf.core.window.rolling import _RollingBase
 
@@ -194,13 +193,8 @@ def _apply_agg_column(
         # as such we need to convert the nans to nulls before
         # passing them in.
         to_libcudf_column = source_column.astype("float64").nans_to_nulls()
-
-        return scan(
-            agg_name,
-            to_libcudf_column,
-            True,
-            com=self.com,
-            adjust=self.adjust,
+        return to_libcudf_column.scan(
+            agg_name, True, com=self.com, adjust=self.adjust
         )
 
 
diff --git a/python/cudf/cudf/io/csv.py b/python/cudf/cudf/io/csv.py
index 3dc8915bfd1..da9a66f3874 100644
--- a/python/cudf/cudf/io/csv.py
+++ b/python/cudf/cudf/io/csv.py
@@ -1,57 +1,73 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
+import errno
+import itertools
+import os
 import warnings
 from collections import abc
 from io import BytesIO, StringIO
+from typing import cast
 
 import numpy as np
+import pandas as pd
+
+import pylibcudf as plc
 
 import cudf
-from cudf import _lib as libcudf
-from cudf.api.types import is_scalar
+from cudf._lib.types import dtype_to_pylibcudf_type
+from cudf._lib.utils import data_from_pylibcudf_io
+from cudf.api.types import is_hashable, is_scalar
+from cudf.core.buffer import acquire_spill_lock
 from cudf.utils import ioutils
 from cudf.utils.dtypes import _maybe_convert_to_default_type
 from cudf.utils.performance_tracking import _performance_tracking
 
+_CSV_HEX_TYPE_MAP = {
+    "hex": np.dtype("int64"),
+    "hex64": np.dtype("int64"),
+    "hex32": np.dtype("int32"),
+}
+
 
 @_performance_tracking
 @ioutils.doc_read_csv()
 def read_csv(
     filepath_or_buffer,
-    sep=",",
-    delimiter=None,
+    sep: str = ",",
+    delimiter: str | None = None,
     header="infer",
     names=None,
     index_col=None,
     usecols=None,
     prefix=None,
-    mangle_dupe_cols=True,
+    mangle_dupe_cols: bool = True,
     dtype=None,
     true_values=None,
     false_values=None,
-    skipinitialspace=False,
-    skiprows=0,
-    skipfooter=0,
-    nrows=None,
+    skipinitialspace: bool = False,
+    skiprows: int = 0,
+    skipfooter: int = 0,
+    nrows: int | None = None,
     na_values=None,
-    keep_default_na=True,
-    na_filter=True,
-    skip_blank_lines=True,
+    keep_default_na: bool = True,
+    na_filter: bool = True,
+    skip_blank_lines: bool = True,
     parse_dates=None,
-    dayfirst=False,
+    dayfirst: bool = False,
     compression="infer",
-    thousands=None,
-    decimal=".",
-    lineterminator="\n",
-    quotechar='"',
-    quoting=0,
-    doublequote=True,
-    comment=None,
-    delim_whitespace=False,
-    byte_range=None,
+    thousands: str | None = None,
+    decimal: str = ".",
+    lineterminator: str = "\n",
+    quotechar: str = '"',
+    quoting: int = 0,
+    doublequote: bool = True,
+    comment: str | None = None,
+    delim_whitespace: bool = False,
+    byte_range: list[int] | tuple[int, int] | None = None,
     storage_options=None,
-    bytes_per_thread=None,
-):
+    bytes_per_thread: int | None = None,
+) -> cudf.DataFrame:
     """{docstring}"""
 
     if delim_whitespace is not False:
@@ -77,60 +93,225 @@ def read_csv(
     if na_values is not None and is_scalar(na_values):
         na_values = [na_values]
 
-    df = libcudf.csv.read_csv(
-        filepath_or_buffer,
-        lineterminator=lineterminator,
-        quotechar=quotechar,
-        quoting=quoting,
-        doublequote=doublequote,
-        header=header,
-        mangle_dupe_cols=mangle_dupe_cols,
-        usecols=usecols,
-        sep=sep,
-        delimiter=delimiter,
-        delim_whitespace=delim_whitespace,
-        skipinitialspace=skipinitialspace,
-        names=names,
-        dtype=dtype,
-        skipfooter=skipfooter,
-        skiprows=skiprows,
-        dayfirst=dayfirst,
-        compression=compression,
-        thousands=thousands,
-        decimal=decimal,
-        true_values=true_values,
-        false_values=false_values,
-        nrows=nrows,
-        byte_range=byte_range,
-        skip_blank_lines=skip_blank_lines,
-        parse_dates=parse_dates,
-        comment=comment,
-        na_values=na_values,
-        keep_default_na=keep_default_na,
-        na_filter=na_filter,
-        prefix=prefix,
-        index_col=index_col,
+    if not isinstance(filepath_or_buffer, (BytesIO, StringIO, bytes)):
+        if not os.path.isfile(filepath_or_buffer):
+            raise FileNotFoundError(
+                errno.ENOENT, os.strerror(errno.ENOENT), filepath_or_buffer
+            )
+
+    if isinstance(filepath_or_buffer, StringIO):
+        filepath_or_buffer = filepath_or_buffer.read().encode()
+    elif isinstance(filepath_or_buffer, str) and not os.path.isfile(
+        filepath_or_buffer
+    ):
+        filepath_or_buffer = filepath_or_buffer.encode()
+
+    _validate_args(
+        delimiter,
+        sep,
+        delim_whitespace,
+        decimal,
+        thousands,
+        nrows,
+        skipfooter,
+        byte_range,
+        skiprows,
+    )
+
+    # Alias sep -> delimiter.
+    if delimiter is None:
+        delimiter = sep
+
+    delimiter = str(delimiter)
+
+    if byte_range is None:
+        byte_range = (0, 0)
+
+    if compression is None:
+        c_compression = plc.io.types.CompressionType.NONE
+    else:
+        compression_map = {
+            "infer": plc.io.types.CompressionType.AUTO,
+            "gzip": plc.io.types.CompressionType.GZIP,
+            "bz2": plc.io.types.CompressionType.BZIP2,
+            "zip": plc.io.types.CompressionType.ZIP,
+        }
+        c_compression = compression_map[compression]
+
+    # We need this later when setting index cols
+    orig_header = header
+
+    if names is not None:
+        # explicitly mentioned name, so don't check header
+        if header is None or header == "infer":
+            header = -1
+        else:
+            header = header
+        names = list(names)
+    else:
+        if header is None:
+            header = -1
+        elif header == "infer":
+            header = 0
+
+    hex_cols: list[abc.Hashable] = []
+    new_dtypes: list[plc.DataType] | dict[abc.Hashable, plc.DataType] = []
+    if dtype is not None:
+        if isinstance(dtype, abc.Mapping):
+            new_dtypes = {}
+            for k, col_type in dtype.items():
+                if is_hashable(col_type) and col_type in _CSV_HEX_TYPE_MAP:
+                    col_type = _CSV_HEX_TYPE_MAP[col_type]
+                    hex_cols.append(str(k))
+
+                new_dtypes[k] = _get_plc_data_type_from_dtype(
+                    cudf.dtype(col_type)
+                )
+        elif cudf.api.types.is_scalar(dtype) or isinstance(
+            dtype, (np.dtype, pd.api.extensions.ExtensionDtype, type)
+        ):
+            if is_hashable(dtype) and dtype in _CSV_HEX_TYPE_MAP:
+                dtype = _CSV_HEX_TYPE_MAP[dtype]
+                hex_cols.append(0)
+
+            cast(list, new_dtypes).append(_get_plc_data_type_from_dtype(dtype))
+        elif isinstance(dtype, abc.Collection):
+            for index, col_dtype in enumerate(dtype):
+                if is_hashable(col_dtype) and col_dtype in _CSV_HEX_TYPE_MAP:
+                    col_dtype = _CSV_HEX_TYPE_MAP[col_dtype]
+                    hex_cols.append(index)
+
+                new_dtypes.append(_get_plc_data_type_from_dtype(col_dtype))
+        else:
+            raise ValueError(
+                "dtype should be a scalar/str/list-like/dict-like"
+            )
+    options = (
+        plc.io.csv.CsvReaderOptions.builder(
+            plc.io.SourceInfo([filepath_or_buffer])
+        )
+        .compression(c_compression)
+        .mangle_dupe_cols(mangle_dupe_cols)
+        .byte_range_offset(byte_range[0])
+        .byte_range_size(byte_range[1])
+        .nrows(nrows if nrows is not None else -1)
+        .skiprows(skiprows)
+        .skipfooter(skipfooter)
+        .quoting(quoting)
+        .lineterminator(str(lineterminator))
+        .quotechar(quotechar)
+        .decimal(decimal)
+        .delim_whitespace(delim_whitespace)
+        .skipinitialspace(skipinitialspace)
+        .skip_blank_lines(skip_blank_lines)
+        .doublequote(doublequote)
+        .keep_default_na(keep_default_na)
+        .na_filter(na_filter)
+        .dayfirst(dayfirst)
+        .build()
+    )
+
+    options.set_header(header)
+
+    if names is not None:
+        options.set_names([str(name) for name in names])
+
+    if prefix is not None:
+        options.set_prefix(prefix)
+
+    if usecols is not None:
+        if all(isinstance(col, int) for col in usecols):
+            options.set_use_cols_indexes(list(usecols))
+        else:
+            options.set_use_cols_names([str(name) for name in usecols])
+
+    if delimiter is not None:
+        options.set_delimiter(delimiter)
+
+    if thousands is not None:
+        options.set_thousands(thousands)
+
+    if comment is not None:
+        options.set_comment(comment)
+
+    if parse_dates is not None:
+        options.set_parse_dates(list(parse_dates))
+
+    if hex_cols is not None:
+        options.set_parse_hex(list(hex_cols))
+
+    options.set_dtypes(new_dtypes)
+
+    if true_values is not None:
+        options.set_true_values([str(val) for val in true_values])
+
+    if false_values is not None:
+        options.set_false_values([str(val) for val in false_values])
+
+    if na_values is not None:
+        options.set_na_values([str(val) for val in na_values])
+
+    df = cudf.DataFrame._from_data(
+        *data_from_pylibcudf_io(plc.io.csv.read_csv(options))
     )
 
+    if isinstance(dtype, abc.Mapping):
+        for k, v in dtype.items():
+            if isinstance(cudf.dtype(v), cudf.CategoricalDtype):
+                df._data[str(k)] = df._data[str(k)].astype(v)
+    elif dtype == "category" or isinstance(dtype, cudf.CategoricalDtype):
+        df = df.astype(dtype)
+    elif isinstance(dtype, abc.Collection) and not is_scalar(dtype):
+        for index, col_dtype in enumerate(dtype):
+            if isinstance(cudf.dtype(col_dtype), cudf.CategoricalDtype):
+                col_name = df._column_names[index]
+                df._data[col_name] = df._data[col_name].astype(col_dtype)
+
+    if names is not None and len(names) and isinstance(names[0], int):
+        df.columns = [int(x) for x in df._data]
+    elif (
+        names is None
+        and header == -1
+        and cudf.get_option("mode.pandas_compatible")
+    ):
+        df.columns = [int(x) for x in df._column_names]
+
+    # Set index if the index_col parameter is passed
+    if index_col is not None and index_col is not False:
+        if isinstance(index_col, int):
+            index_col_name = df._data.get_labels_by_index(index_col)[0]
+            df = df.set_index(index_col_name)
+            if (
+                isinstance(index_col_name, str)
+                and names is None
+                and orig_header == "infer"
+            ):
+                if index_col_name.startswith("Unnamed:"):
+                    # TODO: Try to upstream it to libcudf
+                    # csv reader in future
+                    df.index.name = None
+            elif names is None:
+                df.index.name = index_col
+        else:
+            df = df.set_index(index_col)
+
     if dtype is None or isinstance(dtype, abc.Mapping):
         # There exists some dtypes in the result columns that is inferred.
         # Find them and map them to the default dtypes.
         specified_dtypes = {} if dtype is None else dtype
-        unspecified_dtypes = {
-            name: dtype
-            for name, dtype in df._dtypes
-            if name not in specified_dtypes
-        }
         default_dtypes = {}
-
-        for name, dt in unspecified_dtypes.items():
-            if dt == np.dtype("i1"):
+        for name, dt in df._dtypes:
+            if name in specified_dtypes:
+                continue
+            elif dt == np.dtype("i1"):
                 # csv reader reads all null column as int8.
                 # The dtype should remain int8.
                 default_dtypes[name] = dt
             else:
                 default_dtypes[name] = _maybe_convert_to_default_type(dt)
-        df = df.astype(default_dtypes)
+
+        if default_dtypes:
+            df = df.astype(default_dtypes)
 
     return df
 
@@ -138,17 +319,17 @@ def read_csv(
 @_performance_tracking
 @ioutils.doc_to_csv()
 def to_csv(
-    df,
+    df: cudf.DataFrame,
     path_or_buf=None,
-    sep=",",
-    na_rep="",
+    sep: str = ",",
+    na_rep: str = "",
     columns=None,
-    header=True,
-    index=True,
+    header: bool = True,
+    index: bool = True,
     encoding=None,
     compression=None,
-    lineterminator="\n",
-    chunksize=None,
+    lineterminator: str = "\n",
+    chunksize: int | None = None,
     storage_options=None,
 ):
     """{docstring}"""
@@ -187,15 +368,10 @@ def to_csv(
             )
 
     for _, dtype in df._dtypes:
-        if isinstance(dtype, cudf.ListDtype):
-            raise NotImplementedError(
-                "Writing to csv format is not yet supported with "
-                "list columns."
-            )
-        elif isinstance(dtype, cudf.StructDtype):
+        if isinstance(dtype, (cudf.ListDtype, cudf.StructDtype)):
             raise NotImplementedError(
                 "Writing to csv format is not yet supported with "
-                "Struct columns."
+                f"{dtype} columns."
             )
 
     # TODO: Need to typecast categorical columns to the underlying
@@ -208,7 +384,7 @@ def to_csv(
         df = df.copy(deep=False)
         for col_name, col in df._column_labels_and_values:
             if isinstance(col.dtype, cudf.CategoricalDtype):
-                df._data[col_name] = col.astype(col.categories.dtype)
+                df._data[col_name] = col.astype(col.dtype.categories.dtype)
 
         if isinstance(df.index, cudf.CategoricalIndex):
             df.index = df.index.astype(df.index.categories.dtype)
@@ -218,7 +394,7 @@ def to_csv(
     if ioutils.is_fsspec_open_file(path_or_buf):
         with path_or_buf as file_obj:
             file_obj = ioutils.get_IOBase_writer(file_obj)
-            libcudf.csv.write_csv(
+            _plc_write_csv(
                 df,
                 path_or_buf=file_obj,
                 sep=sep,
@@ -229,7 +405,7 @@ def to_csv(
                 index=index,
             )
     else:
-        libcudf.csv.write_csv(
+        _plc_write_csv(
             df,
             path_or_buf=path_or_buf,
             sep=sep,
@@ -243,3 +419,127 @@ def to_csv(
     if return_as_string:
         path_or_buf.seek(0)
         return path_or_buf.read()
+
+
+@acquire_spill_lock()
+def _plc_write_csv(
+    table: cudf.DataFrame,
+    path_or_buf=None,
+    sep: str = ",",
+    na_rep: str = "",
+    header: bool = True,
+    lineterminator: str = "\n",
+    rows_per_chunk: int = 8,
+    index: bool = True,
+) -> None:
+    iter_columns = (
+        itertools.chain(table.index._columns, table._columns)
+        if index
+        else table._columns
+    )
+    columns = [col.to_pylibcudf(mode="read") for col in iter_columns]
+    col_names = []
+    if header:
+        table_names = (
+            na_rep if name is None or pd.isnull(name) else name
+            for name in table._column_names
+        )
+        iter_names = (
+            itertools.chain(table.index.names, table_names)
+            if index
+            else table_names
+        )
+        all_names = list(iter_names)
+        col_names = [
+            '""'
+            if (name in (None, "") and len(all_names) == 1)
+            else (str(name) if name not in (None, "") else "")
+            for name in all_names
+        ]
+    try:
+        plc.io.csv.write_csv(
+            (
+                plc.io.csv.CsvWriterOptions.builder(
+                    plc.io.SinkInfo([path_or_buf]), plc.Table(columns)
+                )
+                .names(col_names)
+                .na_rep(na_rep)
+                .include_header(header)
+                .rows_per_chunk(rows_per_chunk)
+                .line_terminator(str(lineterminator))
+                .inter_column_delimiter(str(sep))
+                .true_value("True")
+                .false_value("False")
+                .build()
+            )
+        )
+    except OverflowError as err:
+        raise OverflowError(
+            f"Writing CSV file with chunksize={rows_per_chunk} failed. "
+            "Consider providing a smaller chunksize argument."
+        ) from err
+
+
+def _validate_args(
+    delimiter: str | None,
+    sep: str,
+    delim_whitespace: bool,
+    decimal: str,
+    thousands: str | None,
+    nrows: int | None,
+    skipfooter: int,
+    byte_range: list[int] | tuple[int, int] | None,
+    skiprows: int,
+) -> None:
+    if delim_whitespace:
+        if delimiter is not None:
+            raise ValueError("cannot set both delimiter and delim_whitespace")
+        if sep != ",":
+            raise ValueError("cannot set both sep and delim_whitespace")
+
+    # Alias sep -> delimiter.
+    actual_delimiter = delimiter if delimiter else sep
+
+    if decimal == actual_delimiter:
+        raise ValueError("decimal cannot be the same as delimiter")
+
+    if thousands == actual_delimiter:
+        raise ValueError("thousands cannot be the same as delimiter")
+
+    if nrows is not None and skipfooter != 0:
+        raise ValueError("cannot use both nrows and skipfooter parameters")
+
+    if byte_range is not None:
+        if skipfooter != 0 or skiprows != 0 or nrows is not None:
+            raise ValueError(
+                "cannot manually limit rows to be read when using the byte range parameter"
+            )
+
+
+def _get_plc_data_type_from_dtype(dtype) -> plc.DataType:
+    # TODO: Remove this work-around Dictionary types
+    # in libcudf are fully mapped to categorical columns:
+    # https://github.com/rapidsai/cudf/issues/3960
+    if isinstance(dtype, cudf.CategoricalDtype):
+        dtype = dtype.categories.dtype
+    elif dtype == "category":
+        dtype = "str"
+
+    if isinstance(dtype, str):
+        if dtype == "date32":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_DAYS)
+        elif dtype in ("date", "date64"):
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MILLISECONDS)
+        elif dtype == "timestamp":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MILLISECONDS)
+        elif dtype == "timestamp[us]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MICROSECONDS)
+        elif dtype == "timestamp[s]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_SECONDS)
+        elif dtype == "timestamp[ms]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_MILLISECONDS)
+        elif dtype == "timestamp[ns]":
+            return plc.DataType(plc.types.TypeId.TIMESTAMP_NANOSECONDS)
+
+    dtype = cudf.dtype(dtype)
+    return dtype_to_pylibcudf_type(dtype)
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index 2382e9f12ed..153ee0fa01a 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
 from __future__ import annotations
 
+import io
 import itertools
 import math
 import operator
@@ -10,23 +11,42 @@
 from collections import defaultdict
 from contextlib import ExitStack
 from functools import partial, reduce
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any, Literal
 from uuid import uuid4
 
 import numpy as np
 import pandas as pd
+import pyarrow as pa
 from pyarrow import dataset as ds
 
+import pylibcudf as plc
+
 import cudf
-from cudf._lib import parquet as libparquet
+from cudf._lib.column import Column
+from cudf._lib.utils import (
+    _data_from_columns,
+    _index_level_name,
+    data_from_pylibcudf_io,
+    generate_pandas_metadata,
+)
 from cudf.api.types import is_list_like
+from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import as_column, column_empty
 from cudf.core.column.categorical import CategoricalColumn, as_unsigned_codes
 from cudf.utils import ioutils
 from cudf.utils.performance_tracking import _performance_tracking
 
+try:
+    import ujson as json  # type: ignore[import-untyped]
+except ImportError:
+    import json
+
 if TYPE_CHECKING:
-    from collections.abc import Callable
+    from collections.abc import Callable, Hashable
+
+    from typing_extensions import Self
+
+    from cudf.core.column import ColumnBase
 
 
 BYTE_SIZES = {
@@ -55,31 +75,200 @@
 }
 
 
+@acquire_spill_lock()
+def _plc_write_parquet(
+    table,
+    filepaths_or_buffers,
+    index: bool | None = None,
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    metadata_file_path: str | None = None,
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
+    max_dictionary_size: int | None = None,
+    partitions_info=None,
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
+    write_arrow_schema: bool = False,
+) -> np.ndarray | None:
+    """
+    Cython function to call into libcudf API, see `write_parquet`.
+
+    See Also
+    --------
+    cudf.io.parquet.write_parquet
+    """
+    if index is True or (
+        index is None and not isinstance(table.index, cudf.RangeIndex)
+    ):
+        columns = itertools.chain(table.index._columns, table._columns)
+        plc_table = plc.Table(
+            [col.to_pylibcudf(mode="read") for col in columns]
+        )
+        tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+        for level, idx_name in enumerate(table.index.names):
+            tbl_meta.column_metadata[level].set_name(
+                _index_level_name(idx_name, level, table._column_names)
+            )
+        num_index_cols_meta = len(table.index.names)
+    else:
+        plc_table = plc.Table(
+            [col.to_pylibcudf(mode="read") for col in table._columns]
+        )
+        tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+        num_index_cols_meta = 0
+
+    for i, name in enumerate(table._column_names, num_index_cols_meta):
+        if not isinstance(name, str):
+            if cudf.get_option("mode.pandas_compatible"):
+                tbl_meta.column_metadata[i].set_name(str(name))
+            else:
+                raise ValueError(
+                    "Writing a Parquet file requires string column names"
+                )
+        else:
+            tbl_meta.column_metadata[i].set_name(name)
+
+        _set_col_metadata(
+            table[name]._column,
+            tbl_meta.column_metadata[i],
+            force_nullable_schema,
+            None,
+            skip_compression,
+            column_encoding,
+            column_type_length,
+            output_as_binary,
+        )
+    if partitions_info is not None:
+        user_data = [
+            {
+                "pandas": generate_pandas_metadata(
+                    table.iloc[start_row : start_row + num_row].copy(
+                        deep=False
+                    ),
+                    index,
+                )
+            }
+            for start_row, num_row in partitions_info
+        ]
+    else:
+        user_data = [{"pandas": generate_pandas_metadata(table, index)}]
+
+    if header_version not in ("1.0", "2.0"):
+        raise ValueError(
+            f"Invalid parquet header version: {header_version}. "
+            "Valid values are '1.0' and '2.0'"
+        )
+
+    dict_policy = (
+        plc.io.types.DictionaryPolicy.ADAPTIVE
+        if use_dictionary
+        else plc.io.types.DictionaryPolicy.NEVER
+    )
+
+    comp_type = _get_comp_type(compression)
+    stat_freq = _get_stat_freq(statistics)
+    options = (
+        plc.io.parquet.ParquetWriterOptions.builder(
+            plc.io.SinkInfo(filepaths_or_buffers), plc_table
+        )
+        .metadata(tbl_meta)
+        .key_value_metadata(user_data)
+        .compression(comp_type)
+        .stats_level(stat_freq)
+        .int96_timestamps(int96_timestamps)
+        .write_v2_headers(header_version == "2.0")
+        .dictionary_policy(dict_policy)
+        .utc_timestamps(False)
+        .write_arrow_schema(write_arrow_schema)
+        .build()
+    )
+    if partitions_info is not None:
+        options.set_partitions(
+            [
+                plc.io.types.PartitionInfo(part[0], part[1])
+                for part in partitions_info
+            ]
+        )
+    if metadata_file_path is not None:
+        if is_list_like(metadata_file_path):
+            options.set_column_chunks_file_paths(metadata_file_path)
+        else:
+            options.set_column_chunks_file_paths([metadata_file_path])
+    if row_group_size_bytes is not None:
+        options.set_row_group_size_bytes(row_group_size_bytes)
+    if row_group_size_rows is not None:
+        options.set_row_group_size_rows(row_group_size_rows)
+    if max_page_size_bytes is not None:
+        options.set_max_page_size_bytes(max_page_size_bytes)
+    if max_page_size_rows is not None:
+        options.set_max_page_size_rows(max_page_size_rows)
+    if max_dictionary_size is not None:
+        options.set_max_dictionary_size(max_dictionary_size)
+    blob = plc.io.parquet.write_parquet(options)
+    if metadata_file_path is not None:
+        return np.asarray(blob.obj)
+    else:
+        return None
+
+
 @_performance_tracking
 def _write_parquet(
     df,
     paths,
-    compression="snappy",
-    index=None,
-    statistics="ROWGROUP",
-    metadata_file_path=None,
-    int96_timestamps=False,
-    row_group_size_bytes=None,
-    row_group_size_rows=None,
-    max_page_size_bytes=None,
-    max_page_size_rows=None,
-    max_dictionary_size=None,
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+    index: bool | None = None,
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    metadata_file_path: str | None = None,
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
+    max_dictionary_size: int | None = None,
     partitions_info=None,
     storage_options=None,
-    force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    skip_compression=None,
-    column_encoding=None,
-    column_type_length=None,
-    output_as_binary=None,
-    write_arrow_schema=True,
-):
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
+    write_arrow_schema: bool = True,
+) -> np.ndarray | None:
     if is_list_like(paths) and len(paths) > 1:
         if partitions_info is None:
             ValueError("partition info is required for multiple paths")
@@ -124,11 +313,11 @@ def _write_parquet(
             file_objs = [
                 ioutils.get_IOBase_writer(file_obj) for file_obj in fsspec_objs
             ]
-            write_parquet_res = libparquet.write_parquet(
+            write_parquet_res = _plc_write_parquet(
                 df, filepaths_or_buffers=file_objs, **common_args
             )
     else:
-        write_parquet_res = libparquet.write_parquet(
+        write_parquet_res = _plc_write_parquet(
             df, filepaths_or_buffers=paths_or_bufs, **common_args
         )
 
@@ -141,26 +330,38 @@ def _write_parquet(
 def write_to_dataset(
     df,
     root_path,
-    compression="snappy",
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
     filename=None,
     partition_cols=None,
     fs=None,
-    preserve_index=False,
-    return_metadata=False,
-    statistics="ROWGROUP",
-    int96_timestamps=False,
-    row_group_size_bytes=None,
-    row_group_size_rows=None,
-    max_page_size_bytes=None,
-    max_page_size_rows=None,
+    preserve_index: bool = False,
+    return_metadata: bool = False,
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
     storage_options=None,
-    force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    skip_compression=None,
-    column_encoding=None,
-    column_type_length=None,
-    output_as_binary=None,
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
     store_schema=False,
 ):
     """Wraps `to_parquet` to write partitioned Parquet datasets.
@@ -330,9 +531,29 @@ def write_to_dataset(
     return metadata
 
 
+def _parse_metadata(meta) -> tuple[bool, Any, Any]:
+    file_is_range_index = False
+    file_index_cols = None
+    file_column_dtype = None
+
+    if "index_columns" in meta and len(meta["index_columns"]) > 0:
+        file_index_cols = meta["index_columns"]
+
+        if (
+            isinstance(file_index_cols[0], dict)
+            and file_index_cols[0]["kind"] == "range"
+        ):
+            file_is_range_index = True
+    if "column_indexes" in meta and len(meta["column_indexes"]) == 1:
+        file_column_dtype = meta["column_indexes"][0]["numpy_type"]
+    return file_is_range_index, file_index_cols, file_column_dtype
+
+
 @ioutils.doc_read_parquet_metadata()
 @_performance_tracking
-def read_parquet_metadata(filepath_or_buffer):
+def read_parquet_metadata(
+    filepath_or_buffer,
+) -> tuple[int, int, list[Hashable], int, list[dict[str, int]]]:
     """{docstring}"""
 
     # List of filepaths or buffers
@@ -341,7 +562,39 @@ def read_parquet_metadata(filepath_or_buffer):
         bytes_per_thread=None,
     )
 
-    return libparquet.read_parquet_metadata(filepaths_or_buffers)
+    parquet_metadata = plc.io.parquet_metadata.read_parquet_metadata(
+        plc.io.SourceInfo(filepaths_or_buffers)
+    )
+
+    # read all column names including index column, if any
+    col_names = [
+        info.name() for info in parquet_metadata.schema().root().children()
+    ]
+
+    index_col_names = set()
+    json_str = parquet_metadata.metadata()["pandas"]
+    if json_str != "":
+        meta = json.loads(json_str)
+        file_is_range_index, index_col, _ = _parse_metadata(meta)
+        if not file_is_range_index and index_col is not None:
+            columns = meta["columns"]
+            for idx_col in index_col:
+                for c in columns:
+                    if c["field_name"] == idx_col:
+                        index_col_names.add(idx_col)
+
+    # remove the index column from the list of column names
+    # only if index_col_names is not None
+    if len(index_col_names) >= 0:
+        col_names = [name for name in col_names if name not in index_col_names]
+
+    return (
+        parquet_metadata.num_rows(),
+        parquet_metadata.num_rowgroups(),
+        col_names,
+        len(col_names),
+        parquet_metadata.rowgroup_metadata(),
+    )
 
 
 @_performance_tracking
@@ -886,7 +1139,6 @@ def _parquet_to_frame(
                     dfs[-1][name] = column_empty(
                         row_count=_len,
                         dtype=_dtype,
-                        masked=True,
                     )
                 else:
                     dfs[-1][name] = as_column(
@@ -913,16 +1165,18 @@ def _read_parquet(
     columns=None,
     row_groups=None,
     use_pandas_metadata=None,
-    nrows=None,
-    skip_rows=None,
-    allow_mismatched_pq_schemas=False,
+    nrows: int | None = None,
+    skip_rows: int | None = None,
+    allow_mismatched_pq_schemas: bool = False,
     *args,
     **kwargs,
-):
+) -> cudf.DataFrame:
     # Simple helper function to dispatch between
     # cudf and pyarrow to read parquet data
     if engine == "cudf":
-        if kwargs:
+        if set(kwargs.keys()).difference(
+            set(("_chunk_read_limit", "_pass_read_limit"))
+        ):
             raise ValueError(
                 "cudf engine doesn't support the "
                 f"following keyword arguments: {list(kwargs.keys())}"
@@ -932,30 +1186,123 @@ def _read_parquet(
                 "cudf engine doesn't support the "
                 f"following positional arguments: {list(args)}"
             )
+        if nrows is None:
+            nrows = -1
+        if skip_rows is None:
+            skip_rows = 0
         if cudf.get_option("io.parquet.low_memory"):
-            return libparquet.read_parquet_chunked(
+            # Note: If this function ever takes accepts filters
+            # allow_range_index needs to be False when a filter is passed
+            # (see read_parquet)
+            allow_range_index = columns is not None and len(columns) != 0
+
+            options = (
+                plc.io.parquet.ParquetReaderOptions.builder(
+                    plc.io.SourceInfo(filepaths_or_buffers)
+                )
+                .use_pandas_metadata(use_pandas_metadata)
+                .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
+                .build()
+            )
+            if row_groups is not None:
+                options.set_row_groups(row_groups)
+            if nrows > -1:
+                options.set_num_rows(nrows)
+            if skip_rows != 0:
+                options.set_skip_rows(skip_rows)
+            if columns is not None:
+                options.set_columns(columns)
+
+            reader = plc.io.parquet.ChunkedParquetReader(
+                options,
+                chunk_read_limit=kwargs.get("_chunk_read_limit", 0),
+                pass_read_limit=kwargs.get("_pass_read_limit", 1024000000),
+            )
+
+            tbl_w_meta = reader.read_chunk()
+            column_names = tbl_w_meta.column_names(include_children=False)
+            child_names = tbl_w_meta.child_names
+            per_file_user_data = tbl_w_meta.per_file_user_data
+            concatenated_columns = tbl_w_meta.tbl.columns()
+
+            # save memory
+            del tbl_w_meta
+
+            while reader.has_next():
+                tbl = reader.read_chunk().tbl
+
+                for i in range(tbl.num_columns()):
+                    concatenated_columns[i] = plc.concatenate.concatenate(
+                        [concatenated_columns[i], tbl._columns[i]]
+                    )
+                    # Drop residual columns to save memory
+                    tbl._columns[i] = None
+
+            df = cudf.DataFrame._from_data(
+                *_data_from_columns(
+                    columns=[
+                        Column.from_pylibcudf(plc)
+                        for plc in concatenated_columns
+                    ],
+                    column_names=column_names,
+                    index_names=None,
+                )
+            )
+            df = _process_metadata(
+                df,
+                column_names,
+                child_names,
+                per_file_user_data,
+                row_groups,
                 filepaths_or_buffers,
-                columns=columns,
-                row_groups=row_groups,
-                use_pandas_metadata=use_pandas_metadata,
-                nrows=nrows if nrows is not None else -1,
-                skip_rows=skip_rows if skip_rows is not None else 0,
-                allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
+                allow_range_index,
+                use_pandas_metadata,
+                nrows=nrows,
+                skip_rows=skip_rows,
             )
+            return df
         else:
-            if nrows is None:
-                nrows = -1
-            if skip_rows is None:
-                skip_rows = 0
-            return libparquet.read_parquet(
+            allow_range_index = True
+            filters = kwargs.get("filters", None)
+            if columns is not None and len(columns) == 0 or filters:
+                allow_range_index = False
+
+            options = (
+                plc.io.parquet.ParquetReaderOptions.builder(
+                    plc.io.SourceInfo(filepaths_or_buffers)
+                )
+                .use_pandas_metadata(use_pandas_metadata)
+                .allow_mismatched_pq_schemas(allow_mismatched_pq_schemas)
+                .build()
+            )
+            if row_groups is not None:
+                options.set_row_groups(row_groups)
+            if nrows > -1:
+                options.set_num_rows(nrows)
+            if skip_rows != 0:
+                options.set_skip_rows(skip_rows)
+            if columns is not None:
+                options.set_columns(columns)
+            if filters is not None:
+                options.set_filter(filters)
+
+            tbl_w_meta = plc.io.parquet.read_parquet(options)
+
+            df = cudf.DataFrame._from_data(*data_from_pylibcudf_io(tbl_w_meta))
+
+            df = _process_metadata(
+                df,
+                tbl_w_meta.column_names(include_children=False),
+                tbl_w_meta.child_names,
+                tbl_w_meta.per_file_user_data,
+                row_groups,
                 filepaths_or_buffers,
-                columns=columns,
-                row_groups=row_groups,
-                use_pandas_metadata=use_pandas_metadata,
+                allow_range_index,
+                use_pandas_metadata,
                 nrows=nrows,
                 skip_rows=skip_rows,
-                allow_mismatched_pq_schemas=allow_mismatched_pq_schemas,
             )
+            return df
     else:
         if (
             isinstance(filepaths_or_buffers, list)
@@ -980,28 +1327,40 @@ def to_parquet(
     df,
     path,
     engine="cudf",
-    compression="snappy",
-    index=None,
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+    index: bool | None = None,
     partition_cols=None,
     partition_file_name=None,
     partition_offsets=None,
-    statistics="ROWGROUP",
-    metadata_file_path=None,
-    int96_timestamps=False,
-    row_group_size_bytes=None,
-    row_group_size_rows=None,
-    max_page_size_bytes=None,
-    max_page_size_rows=None,
-    max_dictionary_size=None,
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+    metadata_file_path: str | None = None,
+    int96_timestamps: bool = False,
+    row_group_size_bytes: int | None = None,
+    row_group_size_rows: int | None = None,
+    max_page_size_bytes: int | None = None,
+    max_page_size_rows: int | None = None,
+    max_dictionary_size: int | None = None,
     storage_options=None,
-    return_metadata=False,
-    force_nullable_schema=False,
-    header_version="1.0",
-    use_dictionary=True,
-    skip_compression=None,
-    column_encoding=None,
-    column_type_length=None,
-    output_as_binary=None,
+    return_metadata: bool = False,
+    force_nullable_schema: bool = False,
+    header_version: Literal["1.0", "2.0"] = "1.0",
+    use_dictionary: bool = True,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
     store_schema=False,
     *args,
     **kwargs,
@@ -1114,10 +1473,11 @@ def to_parquet(
 
 
 @ioutils.doc_merge_parquet_filemetadata()
-def merge_parquet_filemetadata(filemetadata_list):
+def merge_parquet_filemetadata(filemetadata_list: list) -> np.ndarray:
     """{docstring}"""
-
-    return libparquet.merge_filemetadata(filemetadata_list)
+    return np.asarray(
+        plc.io.parquet.merge_row_group_metadata(filemetadata_list).obj
+    )
 
 
 def _generate_filename():
@@ -1205,10 +1565,207 @@ def _get_groups_and_offsets(
     return part_names, grouped_df, part_offsets
 
 
-ParquetWriter = libparquet.ParquetWriter
+class ParquetWriter:
+    """
+    ParquetWriter lets you incrementally write out a Parquet file from a series
+    of cudf tables
+
+    Parameters
+    ----------
+    filepath_or_buffer : str, io.IOBase, os.PathLike, or list
+        File path or buffer to write to. The argument may also correspond
+        to a list of file paths or buffers.
+    index : bool or None, default None
+        If ``True``, include a dataframe's index(es) in the file output.
+        If ``False``, they will not be written to the file. If ``None``,
+        index(es) other than RangeIndex will be saved as columns.
+    compression : {'snappy', None}, default 'snappy'
+        Name of the compression to use. Use ``None`` for no compression.
+    statistics : {'ROWGROUP', 'PAGE', 'COLUMN', 'NONE'}, default 'ROWGROUP'
+        Level at which column statistics should be included in file.
+    row_group_size_bytes: int, default ``uint64 max``
+        Maximum size of each stripe of the output.
+        By default, a virtually infinite size equal to ``uint64 max`` will be used.
+    row_group_size_rows: int, default 1000000
+        Maximum number of rows of each stripe of the output.
+        By default, 1000000 (10^6 rows) will be used.
+    max_page_size_bytes: int, default 524288
+        Maximum uncompressed size of each page of the output.
+        By default, 524288 (512KB) will be used.
+    max_page_size_rows: int, default 20000
+        Maximum number of rows of each page of the output.
+        By default, 20000 will be used.
+    max_dictionary_size: int, default 1048576
+        Maximum size of the dictionary page for each output column chunk. Dictionary
+        encoding for column chunks that exceeds this limit will be disabled.
+        By default, 1048576 (1MB) will be used.
+    use_dictionary : bool, default True
+        If ``True``, enable dictionary encoding for Parquet page data
+        subject to ``max_dictionary_size`` constraints.
+        If ``False``, disable dictionary encoding for Parquet page data.
+    store_schema : bool, default False
+        If ``True``, enable computing and writing arrow schema to Parquet
+        file footer's key-value metadata section for faithful round-tripping.
+
+    See Also
+    --------
+    cudf.io.parquet.write_parquet
+    """
+
+    def __init__(
+        self,
+        filepath_or_buffer,
+        index: bool | None = None,
+        compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+        statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
+        row_group_size_bytes: int = int(np.iinfo(np.uint64).max),
+        row_group_size_rows: int = 1000000,
+        max_page_size_bytes: int = 524288,
+        max_page_size_rows: int = 20000,
+        max_dictionary_size: int = 1048576,
+        use_dictionary: bool = True,
+        store_schema: bool = False,
+    ):
+        filepaths_or_buffers = (
+            list(filepath_or_buffer)
+            if is_list_like(filepath_or_buffer)
+            else [filepath_or_buffer]
+        )
+        self.sink = plc.io.SinkInfo(filepaths_or_buffers)
+        self.statistics = statistics
+        self.compression = compression
+        self.index = index
+        self.initialized = False
+        self.row_group_size_bytes = row_group_size_bytes
+        self.row_group_size_rows = row_group_size_rows
+        self.max_page_size_bytes = max_page_size_bytes
+        self.max_page_size_rows = max_page_size_rows
+        self.max_dictionary_size = max_dictionary_size
+        self.use_dictionary = use_dictionary
+        self.write_arrow_schema = store_schema
+
+    def write_table(self, table, partitions_info=None) -> None:
+        """Writes a single table to the file"""
+        if not self.initialized:
+            self._initialize_chunked_state(
+                table,
+                num_partitions=len(partitions_info) if partitions_info else 1,
+            )
+        if self.index is not False and (
+            table.index.name is not None
+            or isinstance(table.index, cudf.MultiIndex)
+        ):
+            columns = itertools.chain(table.index._columns, table._columns)
+            plc_table = plc.Table(
+                [col.to_pylibcudf(mode="read") for col in columns]
+            )
+        else:
+            plc_table = plc.Table(
+                [col.to_pylibcudf(mode="read") for col in table._columns]
+            )
+        self.writer.write(plc_table, partitions_info)
+
+    def close(self, metadata_file_path=None) -> np.ndarray | None:
+        if not self.initialized:
+            return None
+        column_chunks_file_paths = []
+        if metadata_file_path is not None:
+            if is_list_like(metadata_file_path):
+                column_chunks_file_paths = list(metadata_file_path)
+            else:
+                column_chunks_file_paths = [metadata_file_path]
+        blob = self.writer.close(column_chunks_file_paths)
+        if metadata_file_path is not None:
+            return np.asarray(blob.obj)
+        return None
+
+    def __enter__(self) -> Self:
+        return self
+
+    def __exit__(self, *args) -> None:
+        self.close()
+
+    def _initialize_chunked_state(
+        self, table, num_partitions: int = 1
+    ) -> None:
+        """Prepares all the values required to build the
+        chunked_parquet_writer_options and creates a writer
+        """
 
+        # Set the table_metadata
+        num_index_cols_meta = 0
+        plc_table = plc.Table(
+            [col.to_pylibcudf(mode="read") for col in table._columns]
+        )
+        self.tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+        if self.index is not False:
+            if isinstance(table.index, cudf.MultiIndex):
+                plc_table = plc.Table(
+                    [
+                        col.to_pylibcudf(mode="read")
+                        for col in itertools.chain(
+                            table.index._columns, table._columns
+                        )
+                    ]
+                )
+                self.tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+                for level, idx_name in enumerate(table.index.names):
+                    self.tbl_meta.column_metadata[level].set_name(idx_name)
+                num_index_cols_meta = len(table.index.names)
+            else:
+                if table.index.name is not None:
+                    plc_table = plc.Table(
+                        [
+                            col.to_pylibcudf(mode="read")
+                            for col in itertools.chain(
+                                table.index._columns, table._columns
+                            )
+                        ]
+                    )
+                    self.tbl_meta = plc.io.types.TableInputMetadata(plc_table)
+                    self.tbl_meta.column_metadata[0].set_name(table.index.name)
+                    num_index_cols_meta = 1
+
+        for i, name in enumerate(table._column_names, num_index_cols_meta):
+            self.tbl_meta.column_metadata[i].set_name(name)
+            _set_col_metadata(
+                table[name]._column,
+                self.tbl_meta.column_metadata[i],
+            )
 
-def _parse_bytes(s):
+        index = (
+            False if isinstance(table.index, cudf.RangeIndex) else self.index
+        )
+        user_data = [
+            {"pandas": generate_pandas_metadata(table, index)}
+        ] * num_partitions
+        comp_type = _get_comp_type(self.compression)
+        stat_freq = _get_stat_freq(self.statistics)
+        dict_policy = (
+            plc.io.types.DictionaryPolicy.ADAPTIVE
+            if self.use_dictionary
+            else plc.io.types.DictionaryPolicy.NEVER
+        )
+        options = (
+            plc.io.parquet.ChunkedParquetWriterOptions.builder(self.sink)
+            .metadata(self.tbl_meta)
+            .key_value_metadata(user_data)
+            .compression(comp_type)
+            .stats_level(stat_freq)
+            .row_group_size_bytes(self.row_group_size_bytes)
+            .row_group_size_rows(self.row_group_size_rows)
+            .max_page_size_bytes(self.max_page_size_bytes)
+            .max_page_size_rows(self.max_page_size_rows)
+            .max_dictionary_size(self.max_dictionary_size)
+            .write_arrow_schema(self.write_arrow_schema)
+            .build()
+        )
+        options.set_dictionary_policy(dict_policy)
+        self.writer = plc.io.parquet.ParquetChunkedWriter.from_options(options)
+        self.initialized = True
+
+
+def _parse_bytes(s: str) -> int:
     """Parse byte string to numbers
 
     Utility function vendored from Dask.
@@ -1345,8 +1902,8 @@ def __init__(
         path,
         partition_cols,
         index=None,
-        compression="snappy",
-        statistics="ROWGROUP",
+        compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None] = "snappy",
+        statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"] = "ROWGROUP",
         max_file_size=None,
         file_name_prefix=None,
         storage_options=None,
@@ -1370,9 +1927,7 @@ def __init__(
         self.partition_cols = partition_cols
         # Collection of `ParquetWriter`s, and the corresponding
         # partition_col values they're responsible for
-        self._chunked_writers: list[
-            tuple[libparquet.ParquetWriter, list[str], str]
-        ] = []
+        self._chunked_writers: list[tuple[ParquetWriter, list[str], str]] = []
         # Map of partition_col values to their ParquetWriter's index
         # in self._chunked_writers for reverse lookup
         self.path_cw_map: dict[str, int] = {}
@@ -1563,3 +2118,257 @@ def _hive_dirname(name, val):
     if pd.isna(val):
         val = "__HIVE_DEFAULT_PARTITION__"
     return f"{name}={val}"
+
+
+def _set_col_metadata(
+    col: ColumnBase,
+    col_meta: plc.io.types.ColumnInMetadata,
+    force_nullable_schema: bool = False,
+    path: str | None = None,
+    skip_compression: set[Hashable] | None = None,
+    column_encoding: dict[
+        Hashable,
+        Literal[
+            "PLAIN",
+            "DICTIONARY",
+            "DELTA_BINARY_PACKED",
+            "DELTA_LENGTH_BYTE_ARRAY",
+            "DELTA_BYTE_ARRAY",
+            "BYTE_STREAM_SPLIT",
+            "USE_DEFAULT",
+        ],
+    ]
+    | None = None,
+    column_type_length: dict | None = None,
+    output_as_binary: set[Hashable] | None = None,
+) -> None:
+    need_path = (
+        skip_compression is not None
+        or column_encoding is not None
+        or column_type_length is not None
+        or output_as_binary is not None
+    )
+    name = col_meta.get_name() if need_path else None
+    full_path = (
+        path + "." + name if (path is not None and name is not None) else name
+    )
+
+    if force_nullable_schema:
+        # Only set nullability if `force_nullable_schema`
+        # is true.
+        col_meta.set_nullability(True)
+
+    if skip_compression is not None and full_path in skip_compression:
+        col_meta.set_skip_compression(True)
+
+    if column_encoding is not None and full_path in column_encoding:
+        encoding = column_encoding[full_path]
+        if encoding is None:
+            c_encoding = plc.io.types.ColumnEncoding.USE_DEFAULT
+        else:
+            enc = str(encoding).upper()
+            c_encoding = getattr(plc.io.types.ColumnEncoding, enc, None)
+            if c_encoding is None:
+                raise ValueError("Unsupported `column_encoding` type")
+        col_meta.set_encoding(c_encoding)
+
+    if column_type_length is not None and full_path in column_type_length:
+        col_meta.set_output_as_binary(True)
+        col_meta.set_type_length(column_type_length[full_path])
+
+    if output_as_binary is not None and full_path in output_as_binary:
+        col_meta.set_output_as_binary(True)
+
+    if isinstance(col.dtype, cudf.StructDtype):
+        for i, (child_col, name) in enumerate(
+            zip(col.children, list(col.dtype.fields))
+        ):
+            col_meta.child(i).set_name(name)
+            _set_col_metadata(
+                child_col,
+                col_meta.child(i),
+                force_nullable_schema,
+                full_path,
+                skip_compression,
+                column_encoding,
+                column_type_length,
+                output_as_binary,
+            )
+    elif isinstance(col.dtype, cudf.ListDtype):
+        if full_path is not None:
+            full_path = full_path + ".list"
+            col_meta.child(1).set_name("element")
+        _set_col_metadata(
+            col.children[1],
+            col_meta.child(1),
+            force_nullable_schema,
+            full_path,
+            skip_compression,
+            column_encoding,
+            column_type_length,
+            output_as_binary,
+        )
+    elif isinstance(col.dtype, cudf.core.dtypes.DecimalDtype):
+        col_meta.set_decimal_precision(col.dtype.precision)
+
+
+def _get_comp_type(
+    compression: Literal["snappy", "ZSTD", "ZLIB", "LZ4", None],
+) -> plc.io.types.CompressionType:
+    if compression is None:
+        return plc.io.types.CompressionType.NONE
+    result = getattr(plc.io.types.CompressionType, compression.upper(), None)
+    if result is None:
+        raise ValueError("Unsupported `compression` type")
+    return result
+
+
+def _get_stat_freq(
+    statistics: Literal["ROWGROUP", "PAGE", "COLUMN", "NONE"],
+) -> plc.io.types.StatisticsFreq:
+    result = getattr(
+        plc.io.types.StatisticsFreq, f"STATISTICS_{statistics.upper()}", None
+    )
+    if result is None:
+        raise ValueError("Unsupported `statistics_freq` type")
+    return result
+
+
+def _process_metadata(
+    df: cudf.DataFrame,
+    names: list[Hashable],
+    child_names: dict,
+    per_file_user_data: list,
+    row_groups,
+    filepaths_or_buffers,
+    allow_range_index: bool,
+    use_pandas_metadata: bool,
+    nrows: int = -1,
+    skip_rows: int = 0,
+) -> cudf.DataFrame:
+    ioutils._add_df_col_struct_names(df, child_names)
+    index_col = None
+    is_range_index = True
+    column_index_type = None
+    index_col_names = None
+    meta = None
+    for single_file in per_file_user_data:
+        if b"pandas" not in single_file:
+            continue
+        json_str = single_file[b"pandas"].decode("utf-8")
+        meta = json.loads(json_str)
+        file_is_range_index, index_col, column_index_type = _parse_metadata(
+            meta
+        )
+        is_range_index &= file_is_range_index
+
+        if (
+            not file_is_range_index
+            and index_col is not None
+            and index_col_names is None
+        ):
+            index_col_names = {}
+            for idx_col in index_col:
+                for c in meta["columns"]:
+                    if c["field_name"] == idx_col:
+                        index_col_names[idx_col] = c["name"]
+
+    if meta is not None:
+        # Book keep each column metadata as the order
+        # of `meta["columns"]` and `column_names` are not
+        # guaranteed to be deterministic and same always.
+        meta_data_per_column = {
+            col_meta["name"]: col_meta for col_meta in meta["columns"]
+        }
+
+        # update the decimal precision of each column
+        for col in names:
+            if isinstance(df._data[col].dtype, cudf.core.dtypes.DecimalDtype):
+                df._data[col].dtype.precision = meta_data_per_column[col][
+                    "metadata"
+                ]["precision"]
+
+    # Set the index column
+    if index_col is not None and len(index_col) > 0:
+        if is_range_index:
+            if not allow_range_index:
+                return df
+
+            if len(per_file_user_data) > 1:
+                range_index_meta = {
+                    "kind": "range",
+                    "name": None,
+                    "start": 0,
+                    "stop": len(df),
+                    "step": 1,
+                }
+            else:
+                range_index_meta = index_col[0]
+
+            if row_groups is not None:
+                per_file_metadata = [
+                    pa.parquet.read_metadata(
+                        # Pyarrow cannot read directly from bytes
+                        io.BytesIO(s) if isinstance(s, bytes) else s
+                    )
+                    for s in filepaths_or_buffers
+                ]
+
+                filtered_idx = []
+                for i, file_meta in enumerate(per_file_metadata):
+                    row_groups_i = []
+                    start = 0
+                    for row_group in range(file_meta.num_row_groups):
+                        stop = start + file_meta.row_group(row_group).num_rows
+                        row_groups_i.append((start, stop))
+                        start = stop
+
+                    for rg in row_groups[i]:
+                        filtered_idx.append(
+                            cudf.RangeIndex(
+                                start=row_groups_i[rg][0],
+                                stop=row_groups_i[rg][1],
+                                step=range_index_meta["step"],
+                            )
+                        )
+
+                if len(filtered_idx) > 0:
+                    idx = cudf.concat(filtered_idx)
+                else:
+                    idx = cudf.Index._from_column(
+                        cudf.core.column.column_empty(0)
+                    )
+            else:
+                start = range_index_meta["start"] + skip_rows  # type: ignore[operator]
+                stop = range_index_meta["stop"]
+                if nrows > -1:
+                    stop = start + nrows
+                idx = cudf.RangeIndex(
+                    start=start,
+                    stop=stop,
+                    step=range_index_meta["step"],
+                    name=range_index_meta["name"],
+                )
+
+            df.index = idx
+        elif set(index_col).issubset(names):
+            index_data = df[index_col]
+            actual_index_names = iter(index_col_names.values())
+            if index_data._num_columns == 1:
+                idx = cudf.Index._from_column(
+                    index_data._columns[0], name=next(actual_index_names)
+                )
+            else:
+                idx = cudf.MultiIndex.from_frame(
+                    index_data, names=list(actual_index_names)
+                )
+            df.drop(columns=index_col, inplace=True)
+            df.index = idx
+        else:
+            if use_pandas_metadata:
+                df.index.names = index_col
+
+    if df._num_columns == 0 and column_index_type is not None:
+        df._data.label_dtype = cudf.dtype(column_index_type)
+
+    return df
diff --git a/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_23.12.pkl b/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_23.12.pkl
index 1ec077d10f7..64e06f0631d 100644
Binary files a/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_23.12.pkl and b/python/cudf/cudf/tests/data/pkl/stringColumnWithRangeIndex_cudf_23.12.pkl differ
diff --git a/python/cudf/cudf/tests/test_parquet.py b/python/cudf/cudf/tests/test_parquet.py
index 13efa71ebae..77d1f77d30b 100644
--- a/python/cudf/cudf/tests/test_parquet.py
+++ b/python/cudf/cudf/tests/test_parquet.py
@@ -22,7 +22,6 @@
 from pyarrow import parquet as pq
 
 import cudf
-from cudf._lib.parquet import read_parquet_chunked
 from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
 from cudf.io.parquet import (
     ParquetDatasetWriter,
@@ -3775,13 +3774,14 @@ def test_parquet_chunked_reader(
     )
     buffer = BytesIO()
     df.to_parquet(buffer, row_group_size=10000)
-    actual = read_parquet_chunked(
-        [buffer],
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-        use_pandas_metadata=use_pandas_metadata,
-        row_groups=row_groups,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            use_pandas_metadata=use_pandas_metadata,
+            row_groups=row_groups,
+        )
     expected = cudf.read_parquet(
         buffer, use_pandas_metadata=use_pandas_metadata, row_groups=row_groups
     )
@@ -3825,12 +3825,13 @@ def test_parquet_chunked_reader_structs(
     # Number of rows to read
     nrows = num_rows if num_rows is not None else len(df)
 
-    actual = read_parquet_chunked(
-        [buffer],
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-        nrows=nrows,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            nrows=nrows,
+        )
     expected = cudf.read_parquet(
         buffer,
         nrows=nrows,
@@ -3877,12 +3878,13 @@ def test_parquet_chunked_reader_string_decoders(
     nrows = num_rows if num_rows is not None else len(df)
 
     # Check with num_rows specified
-    actual = read_parquet_chunked(
-        [buffer],
-        chunk_read_limit=chunk_read_limit,
-        pass_read_limit=pass_read_limit,
-        nrows=nrows,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        actual = cudf.read_parquet(
+            [buffer],
+            _chunk_read_limit=chunk_read_limit,
+            _pass_read_limit=pass_read_limit,
+            nrows=nrows,
+        )
     expected = cudf.read_parquet(
         buffer,
         nrows=nrows,
@@ -3982,13 +3984,14 @@ def test_parquet_reader_with_mismatched_tables(store_schema):
     ).reset_index(drop=True)
 
     # Read with chunked reader (filter columns not supported)
-    got_chunked = read_parquet_chunked(
-        [buf1, buf2],
-        columns=["list", "d_list", "str"],
-        chunk_read_limit=240,
-        pass_read_limit=240,
-        allow_mismatched_pq_schemas=True,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        got_chunked = cudf.read_parquet(
+            [buf1, buf2],
+            columns=["list", "d_list", "str"],
+            _chunk_read_limit=240,
+            _pass_read_limit=240,
+            allow_mismatched_pq_schemas=True,
+        )
 
     # Construct the expected table without filter columns
     expected_chunked = cudf.concat(
@@ -4054,13 +4057,14 @@ def test_parquet_reader_with_mismatched_structs():
     )
 
     # Read with chunked reader
-    got_chunked = read_parquet_chunked(
-        [buf1, buf2],
-        columns=["struct.b.b_b.b_b_a"],
-        chunk_read_limit=240,
-        pass_read_limit=240,
-        allow_mismatched_pq_schemas=True,
-    )
+    with cudf.option_context("io.parquet.low_memory", True):
+        got_chunked = cudf.read_parquet(
+            [buf1, buf2],
+            columns=["struct.b.b_b.b_b_a"],
+            _chunk_read_limit=240,
+            _pass_read_limit=240,
+            allow_mismatched_pq_schemas=True,
+        )
     got_chunked = (
         cudf.Series(got_chunked["struct"])
         .struct.field("b")
diff --git a/python/cudf/cudf/tests/test_serialize.py b/python/cudf/cudf/tests/test_serialize.py
index 68f2aaf9cab..b50ed04427f 100644
--- a/python/cudf/cudf/tests/test_serialize.py
+++ b/python/cudf/cudf/tests/test_serialize.py
@@ -7,6 +7,7 @@
 import numpy as np
 import pandas as pd
 import pytest
+from packaging import version
 
 import cudf
 from cudf.testing import _utils as utils, assert_eq
@@ -149,13 +150,19 @@ def test_serialize(df, to_host):
 
 def test_serialize_dtype_error_checking():
     dtype = cudf.IntervalDtype("float", "right")
-    header, frames = dtype.serialize()
-    with pytest.raises(AssertionError):
-        # Invalid number of frames
-        type(dtype).deserialize(header, [None] * (header["frame_count"] + 1))
+    # Must call device_serialize (not serialize) to ensure that the type metadata is
+    # encoded in the header.
+    header, frames = dtype.device_serialize()
     with pytest.raises(AssertionError):
         # mismatching class
         cudf.StructDtype.deserialize(header, frames)
+    # The is-cuda flag list length must match the number of frames
+    header["is-cuda"] = [False]
+    with pytest.raises(AssertionError):
+        # Invalid number of frames
+        type(dtype).deserialize(
+            header, [np.zeros(1)] * (header["frame_count"] + 1)
+        )
 
 
 def test_serialize_dataframe():
@@ -382,6 +389,10 @@ def test_serialize_string_check_buffer_sizes():
     assert expect == got
 
 
+@pytest.mark.skipif(
+    version.parse(np.__version__) < version.parse("2.0.0"),
+    reason="The serialization of numpy 2.0 types is incompatible with numpy 1.x",
+)
 def test_deserialize_cudf_23_12(datadir):
     fname = datadir / "pkl" / "stringColumnWithRangeIndex_cudf_23.12.pkl"
 
diff --git a/python/cudf/cudf/tests/test_struct.py b/python/cudf/cudf/tests/test_struct.py
index 899d78c999b..b85943626a6 100644
--- a/python/cudf/cudf/tests/test_struct.py
+++ b/python/cudf/cudf/tests/test_struct.py
@@ -79,7 +79,7 @@ def test_series_construction_with_nulls():
 )
 def test_serialize_struct_dtype(fields):
     dtype = cudf.StructDtype(fields)
-    recreated = dtype.__class__.deserialize(*dtype.serialize())
+    recreated = dtype.__class__.device_deserialize(*dtype.device_serialize())
     assert recreated == dtype
 
 
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index d9a3da6666d..a04fcb8df7a 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -43,7 +43,6 @@
 }
 
 _BYTES_PER_THREAD_DEFAULT = 256 * 1024 * 1024
-_ROW_GROUP_SIZE_BYTES_DEFAULT = np.iinfo(np.uint64).max
 
 _docstring_remote_sources = """
 - cuDF supports local and remote data stores. See configuration details for
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
index e726b7fdca1..3891110e9d3 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/dependencies.yaml
@@ -76,13 +76,6 @@ files:
       - py_version
       - test_base
       - test_xgboost
-  test_catboost:
-    output: none
-    includes:
-      - cuda_version
-      - py_version
-      - test_base
-      - test_catboost
   test_cuml:
     output: none
     includes:
@@ -251,14 +244,6 @@ dependencies:
           - pip
           - pip:
             - xgboost>=2.0.1
-  test_catboost:
-    common:
-      - output_types: conda
-        packages:
-          - numpy
-          - scipy
-          - scikit-learn
-          - catboost
   test_cuml:
     common:
       - output_types: conda
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py
deleted file mode 100644
index 04cc69231fe..00000000000
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_catboost.py
+++ /dev/null
@@ -1,129 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-import numpy as np
-import pandas as pd
-import pytest
-from catboost import CatBoostClassifier, CatBoostRegressor, Pool
-from sklearn.datasets import make_classification, make_regression
-
-rng = np.random.default_rng(seed=42)
-
-
-def assert_catboost_equal(expect, got, rtol=1e-7, atol=0.0):
-    if isinstance(expect, (tuple, list)):
-        assert len(expect) == len(got)
-        for e, g in zip(expect, got):
-            assert_catboost_equal(e, g, rtol, atol)
-    elif isinstance(expect, np.ndarray):
-        np.testing.assert_allclose(expect, got, rtol=rtol, atol=atol)
-    elif isinstance(expect, pd.DataFrame):
-        pd.testing.assert_frame_equal(expect, got)
-    elif isinstance(expect, pd.Series):
-        pd.testing.assert_series_equal(expect, got)
-    else:
-        assert expect == got
-
-
-pytestmark = pytest.mark.assert_eq(fn=assert_catboost_equal)
-
-
-@pytest.fixture
-def regression_data():
-    X, y = make_regression(n_samples=100, n_features=10, random_state=42)
-    return pd.DataFrame(X), pd.Series(y)
-
-
-@pytest.fixture
-def classification_data():
-    X, y = make_classification(
-        n_samples=100, n_features=10, n_classes=2, random_state=42
-    )
-    return pd.DataFrame(X), pd.Series(y)
-
-
-def test_catboost_regressor_with_dataframe(regression_data):
-    X, y = regression_data
-    model = CatBoostRegressor(iterations=10, verbose=0)
-    model.fit(X, y)
-    predictions = model.predict(X)
-    return predictions
-
-
-def test_catboost_regressor_with_numpy(regression_data):
-    X, y = regression_data
-    model = CatBoostRegressor(iterations=10, verbose=0)
-    model.fit(X.values, y.values)
-    predictions = model.predict(X.values)
-    return predictions
-
-
-def test_catboost_classifier_with_dataframe(classification_data):
-    X, y = classification_data
-    model = CatBoostClassifier(iterations=10, verbose=0)
-    model.fit(X, y)
-    predictions = model.predict(X)
-    return predictions
-
-
-def test_catboost_classifier_with_numpy(classification_data):
-    X, y = classification_data
-    model = CatBoostClassifier(iterations=10, verbose=0)
-    model.fit(X.values, y.values)
-    predictions = model.predict(X.values)
-    return predictions
-
-
-def test_catboost_with_pool_and_dataframe(regression_data):
-    X, y = regression_data
-    train_pool = Pool(X, y)
-    model = CatBoostRegressor(iterations=10, verbose=0)
-    model.fit(train_pool)
-    predictions = model.predict(X)
-    return predictions
-
-
-def test_catboost_with_pool_and_numpy(regression_data):
-    X, y = regression_data
-    train_pool = Pool(X.values, y.values)
-    model = CatBoostRegressor(iterations=10, verbose=0)
-    model.fit(train_pool)
-    predictions = model.predict(X.values)
-    return predictions
-
-
-def test_catboost_with_categorical_features():
-    data = {
-        "numerical_feature": rng.standard_normal(100),
-        "categorical_feature": rng.choice(["A", "B", "C"], size=100),
-        "target": rng.integers(0, 2, size=100),
-    }
-    df = pd.DataFrame(data)
-    X = df[["numerical_feature", "categorical_feature"]]
-    y = df["target"]
-    cat_features = ["categorical_feature"]
-    model = CatBoostClassifier(
-        iterations=10, verbose=0, cat_features=cat_features
-    )
-    model.fit(X, y)
-    predictions = model.predict(X)
-    return predictions
-
-
-@pytest.mark.parametrize(
-    "X, y",
-    [
-        (
-            pd.DataFrame(rng.standard_normal((100, 5))),
-            pd.Series(rng.standard_normal(100)),
-        ),
-        (rng.standard_normal((100, 5)), rng.standard_normal(100)),
-    ],
-)
-def test_catboost_train_test_split(X, y):
-    from sklearn.model_selection import train_test_split
-
-    X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
-    model = CatBoostRegressor(iterations=10, verbose=0)
-    model.fit(X_train, y_train)
-    predictions = model.predict(X_test)
-    return len(X_train), len(X_test), len(y_train), len(y_test), predictions
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
index bef02c86355..8be48953974 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_holoviews.py
@@ -71,6 +71,9 @@ def test_holoviews_heatmap(df):
     )
 
 
+@pytest.mark.skip(
+    reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'"
+)
 def test_holoviews_histogram(df):
     return get_plot_info(hv.Histogram(df.values))
 
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
index 1909392b9f7..c91808021e8 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_matplotlib.py
@@ -33,6 +33,9 @@ def assert_plots_equal(expect, got):
 pytestmark = pytest.mark.assert_eq(fn=assert_plots_equal)
 
 
+@pytest.mark.skip(
+    reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'"
+)
 def test_line():
     df = pd.DataFrame({"x": [1, 2, 3, 4, 5], "y": [2, 4, 6, 8, 10]})
     (data,) = plt.plot(df["x"], df["y"], marker="o", linestyle="-")
@@ -40,6 +43,9 @@ def test_line():
     return plt.gca()
 
 
+@pytest.mark.skip(
+    reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'"
+)
 def test_bar():
     data = pd.Series([1, 2, 3, 4, 5], index=["a", "b", "c", "d", "e"])
     ax = data.plot(kind="bar")
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
index 472f1889354..4d35d9e8946 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_numpy.py
@@ -37,6 +37,9 @@ def test_numpy_dot(df):
     return np.dot(df, df.T)
 
 
+@pytest.mark.skip(
+    reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'"
+)
 def test_numpy_fft(sr):
     fft = np.fft.fft(sr)
     return fft
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
index ad287471aa0..7cea635afc4 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_pytorch.py
@@ -116,6 +116,9 @@ def test_torch_train(data):
     return model(test_x1, test_x2)
 
 
+@pytest.mark.skip(
+    reason="AssertionError: The values for attribute 'device' do not match: cpu != cuda:0."
+)
 def test_torch_tensor_ctor():
     s = pd.Series(range(5))
     return torch.tensor(s.values)
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
index 021c5bac9b7..f6a8a96ae3c 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_seaborn.py
@@ -54,6 +54,9 @@ def test_scatter(df):
     return ax
 
 
+@pytest.mark.skip(
+    reason="AttributeError: 'ndarray' object has no attribute '_fsproxy_wrapped'"
+)
 def test_lineplot_with_sns_data():
     df = sns.load_dataset("flights")
     ax = sns.lineplot(data=df, x="month", y="passengers")
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
index 0777d982ac2..f275659288e 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_stumpy_distributed.py
@@ -41,7 +41,7 @@ def test_multidimensional_distributed_timeseries(dask_client):
     rng = np.random.default_rng(seed=42)
     # Each row represents data from a different dimension while each column represents
     # data from the same dimension
-    your_time_series = rng.random(3, 1000)
+    your_time_series = rng.random((3, 1000))
     # Approximately, how many data points might be found in a pattern
     window_size = 50
 
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
index ba1f518cbfd..b4fad3024e7 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_tensorflow.py
@@ -271,6 +271,7 @@ def call(self, values):
         return tf.concat(values, axis=-1)
 
 
+@pytest.mark.xfail(reason="ValueError: Invalid dtype: object")
 def test_full_example_train_with_df(df, target):
     # https://www.tensorflow.org/tutorials/load_data/pandas_dataframe#full_example
     # Inputs are directly passed as dictionary of series
diff --git a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
index 70f1e6a4250..0fd632507a6 100644
--- a/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
+++ b/python/cudf/cudf_pandas_tests/third_party_integration_tests/tests/test_xgboost.py
@@ -113,6 +113,9 @@ def test_with_external_memory(
     return predt
 
 
+@pytest.mark.skip(
+    reason="TypeError: Implicit conversion to a NumPy array is not allowed. Please use `.get()` to construct a NumPy array explicitly."
+)
 @pytest.mark.parametrize("device", ["cpu", "cuda"])
 def test_predict(device: str) -> np.ndarray:
     reg = xgb.XGBRegressor(n_estimators=2, device=device)
diff --git a/python/dask_cudf/dask_cudf/tests/test_distributed.py b/python/dask_cudf/dask_cudf/tests/test_distributed.py
index d03180852eb..c28b7e49207 100644
--- a/python/dask_cudf/dask_cudf/tests/test_distributed.py
+++ b/python/dask_cudf/dask_cudf/tests/test_distributed.py
@@ -4,7 +4,7 @@
 import pytest
 
 import dask
-from dask import dataframe as dd
+from dask import array as da, dataframe as dd
 from dask.distributed import Client
 from distributed.utils_test import cleanup, loop, loop_in_thread  # noqa: F401
 
@@ -121,3 +121,17 @@ def test_unique():
                 ddf.x.unique().compute(),
                 check_index=False,
             )
+
+
+def test_serialization_of_numpy_types():
+    # Dask uses numpy integers as column names, which can break cudf serialization
+    with dask_cuda.LocalCUDACluster(n_workers=1) as cluster:
+        with Client(cluster):
+            with dask.config.set(
+                {"dataframe.backend": "cudf", "array.backend": "cupy"}
+            ):
+                rng = da.random.default_rng()
+                X_arr = rng.random((100, 10), chunks=(50, 10))
+                X = dd.from_dask_array(X_arr)
+                X = X[X.columns[0]]
+                X.compute()