diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index a65cae34653..bc237cc73b0 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -43,80 +43,52 @@ jobs:
     with:
       needs: ${{ toJSON(needs) }}
   changed-files:
-    runs-on: ubuntu-latest
-    name: "Check changed files"
-    outputs:
-      test_cpp: ${{ steps.changed-files.outputs.cpp_any_changed == 'true' }}
-      test_java: ${{ steps.changed-files.outputs.java_any_changed == 'true' }}
-      test_notebooks: ${{ steps.changed-files.outputs.notebooks_any_changed == 'true' }}
-      test_python: ${{ steps.changed-files.outputs.python_any_changed == 'true' }}
-      test_cudf_pandas: ${{ steps.changed-files.outputs.cudf_pandas_any_changed == 'true' }}
-    steps:
-      - name: Get PR info
-        id: get-pr-info
-        uses: nv-gha-runners/get-pr-info@main
-      - name: Checkout code repo
-        uses: actions/checkout@v4
-        with:
-          fetch-depth: 0
-          persist-credentials: false
-      - name: Calculate merge base
-        id: calculate-merge-base
-        env:
-          PR_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
-          BASE_SHA: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).base.sha }}
-        run: |
-          (echo -n "merge-base="; git merge-base "$BASE_SHA" "$PR_SHA") > "$GITHUB_OUTPUT"
-      - name: Get changed files
-        id: changed-files
-        uses: tj-actions/changed-files@v45
-        with:
-          base_sha: ${{ steps.calculate-merge-base.outputs.merge-base }}
-          sha: ${{ fromJSON(steps.get-pr-info.outputs.pr-info).head.sha }}
-          files_yaml: |
-            cpp:
-              - '**'
-              - '!CONTRIBUTING.md'
-              - '!README.md'
-              - '!docs/**'
-              - '!img/**'
-              - '!java/**'
-              - '!notebooks/**'
-              - '!python/**'
-              - '!ci/cudf_pandas_scripts/**'
-            java:
-              - '**'
-              - '!CONTRIBUTING.md'
-              - '!README.md'
-              - '!docs/**'
-              - '!img/**'
-              - '!notebooks/**'
-              - '!python/**'
-              - '!ci/cudf_pandas_scripts/**'
-            notebooks:
-              - '**'
-              - '!CONTRIBUTING.md'
-              - '!README.md'
-              - '!java/**'
-              - '!ci/cudf_pandas_scripts/**'
-            python:
-              - '**'
-              - '!CONTRIBUTING.md'
-              - '!README.md'
-              - '!docs/**'
-              - '!img/**'
-              - '!java/**'
-              - '!notebooks/**'
-              - '!ci/cudf_pandas_scripts/**'
-            cudf_pandas:
-              - '**'
-              - 'ci/cudf_pandas_scripts/**'
-              - '!CONTRIBUTING.md'
-              - '!README.md'
-              - '!docs/**'
-              - '!img/**'
-              - '!java/**'
-              - '!notebooks/**'
+    secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/changed-files.yaml@branch-24.12
+    with:
+      files_yaml: |
+        test_cpp:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/cudf_pandas_scripts/**'
+          - '!docs/**'
+          - '!img/**'
+          - '!java/**'
+          - '!notebooks/**'
+          - '!python/**'
+        test_cudf_pandas:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!docs/**'
+          - '!img/**'
+          - '!java/**'
+          - '!notebooks/**'
+        test_java:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/cudf_pandas_scripts/**'
+          - '!docs/**'
+          - '!img/**'
+          - '!notebooks/**'
+          - '!python/**'
+        test_notebooks:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/cudf_pandas_scripts/**'
+          - '!java/**'
+        test_python:
+          - '**'
+          - '!CONTRIBUTING.md'
+          - '!README.md'
+          - '!ci/cudf_pandas_scripts/**'
+          - '!docs/**'
+          - '!img/**'
+          - '!java/**'
+          - '!notebooks/**'
   checks:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/checks.yaml@branch-24.12
@@ -139,7 +111,7 @@ jobs:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-cpp-tests.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_cpp == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_cpp
     with:
       build_type: pull-request
   conda-python-build:
@@ -152,7 +124,7 @@ jobs:
     needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_python == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
       script: "ci/test_python_cudf.sh"
@@ -161,7 +133,7 @@ jobs:
     needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/conda-python-tests.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_python == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
       script: "ci/test_python_other.sh"
@@ -169,7 +141,7 @@ jobs:
     needs: [conda-cpp-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_java == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_java
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -190,7 +162,7 @@ jobs:
     needs: [conda-python-build, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/custom-job.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_notebooks == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_notebooks
     with:
       build_type: pull-request
       node_type: "gpu-v100-latest-1"
@@ -234,7 +206,7 @@ jobs:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_python == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       build_type: pull-request
       script: ci/test_wheel_cudf.sh
@@ -251,7 +223,7 @@ jobs:
     needs: [wheel-build-cudf-polars, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_python == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -283,7 +255,7 @@ jobs:
     needs: [wheel-build-dask-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_python == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -303,7 +275,7 @@ jobs:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
@@ -314,7 +286,7 @@ jobs:
     needs: [wheel-build-cudf, changed-files]
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.12
-    if: needs.changed-files.outputs.test_python == 'true' || needs.changed-files.outputs.test_cudf_pandas == 'true'
+    if: fromJSON(needs.changed-files.outputs.changed_file_groups).test_python || fromJSON(needs.changed-files.outputs.changed_file_groups).test_cudf_pandas
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
       matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
index 7c1fa705faa..bf76f4ed29a 100755
--- a/ci/build_wheel.sh
+++ b/ci/build_wheel.sh
@@ -12,4 +12,4 @@ rapids-generate-version > ./VERSION
 
 cd "${package_dir}"
 
-python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
+python -m pip wheel . -w dist -v --no-deps --disable-pip-version-check
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index f91bf1e7046..8b45d26c367 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -54,7 +54,7 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba>=0.57
+- numba-cuda>=0.0.13
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvcc_linux-64=11.8
@@ -65,6 +65,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.3dev0
 - pandoc
+- polars>=1.8,<1.9
 - pre-commit
 - ptxcompiler
 - pyarrow>=14.0.0,<18.0.0a0
diff --git a/conda/environments/all_cuda-125_arch-x86_64.yaml b/conda/environments/all_cuda-125_arch-x86_64.yaml
index f4ec6bd5407..354c1360e5a 100644
--- a/conda/environments/all_cuda-125_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-125_arch-x86_64.yaml
@@ -53,7 +53,7 @@ dependencies:
 - nbsphinx
 - ninja
 - notebook
-- numba>=0.57
+- numba-cuda>=0.0.13
 - numpy>=1.23,<3.0a0
 - numpydoc
 - nvcomp==4.0.1
@@ -63,6 +63,7 @@ dependencies:
 - pandas
 - pandas>=2.0,<2.2.3dev0
 - pandoc
+- polars>=1.8,<1.9
 - pre-commit
 - pyarrow>=14.0.0,<18.0.0a0
 - pydata-sphinx-theme!=0.14.2
diff --git a/conda/recipes/cudf/meta.yaml b/conda/recipes/cudf/meta.yaml
index e22b4a4eddc..25e69b89789 100644
--- a/conda/recipes/cudf/meta.yaml
+++ b/conda/recipes/cudf/meta.yaml
@@ -80,7 +80,7 @@ requirements:
     - typing_extensions >=4.0.0
     - pandas >=2.0,<2.2.3dev0
     - cupy >=12.0.0
-    - numba >=0.57
+    - numba-cuda >=0.0.13
     - numpy >=1.23,<3.0a0
     - pyarrow>=14.0.0,<18.0.0a0
     - libcudf ={{ version }}
diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index d766d98b45e..b791d846d1d 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -3,7 +3,8 @@ Checks:
       'modernize-*,
        -modernize-use-equals-default,
        -modernize-concat-nested-namespaces,
-       -modernize-use-trailing-return-type'
+       -modernize-use-trailing-return-type,
+       -modernize-use-bool-literals'
 
       # -modernize-use-equals-default        # auto-fix is broken (doesn't insert =default correctly)
       # -modernize-concat-nested-namespaces  # auto-fix is broken (can delete code)
diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh
index c3238cb94fd..35a39ef9758 100644
--- a/cpp/include/cudf/column/column_device_view.cuh
+++ b/cpp/include/cudf/column/column_device_view.cuh
@@ -1425,13 +1425,13 @@ struct pair_rep_accessor {
 
  private:
   template <typename R, std::enable_if_t<std::is_same_v<R, rep_type>, void>* = nullptr>
-  __device__ inline auto get_rep(cudf::size_type i) const
+  __device__ [[nodiscard]] inline auto get_rep(cudf::size_type i) const
   {
     return col.element<R>(i);
   }
 
   template <typename R, std::enable_if_t<not std::is_same_v<R, rep_type>, void>* = nullptr>
-  __device__ inline auto get_rep(cudf::size_type i) const
+  __device__ [[nodiscard]] inline auto get_rep(cudf::size_type i) const
   {
     return col.element<R>(i).value();
   }
diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp
index 3ef7bafe727..48f89b8be25 100644
--- a/cpp/include/cudf/column/column_view.hpp
+++ b/cpp/include/cudf/column/column_view.hpp
@@ -235,7 +235,7 @@ class column_view_base {
    *
    * @return Typed pointer to underlying data
    */
-  virtual void const* get_data() const noexcept { return _data; }
+  [[nodiscard]] virtual void const* get_data() const noexcept { return _data; }
 
   data_type _type{type_id::EMPTY};   ///< Element type
   size_type _size{};                 ///< Number of elements
@@ -695,7 +695,7 @@ class mutable_column_view : public detail::column_view_base {
    *
    * @return Typed pointer to underlying data
    */
-  void const* get_data() const noexcept override;
+  [[nodiscard]] void const* get_data() const noexcept override;
 
  private:
   friend mutable_column_view bit_cast(mutable_column_view const& input, data_type type);
diff --git a/cpp/include/cudf/detail/aggregation/aggregation.hpp b/cpp/include/cudf/detail/aggregation/aggregation.hpp
index 4255faea702..6661a461b8b 100644
--- a/cpp/include/cudf/detail/aggregation/aggregation.hpp
+++ b/cpp/include/cudf/detail/aggregation/aggregation.hpp
@@ -683,7 +683,7 @@ class ewma_aggregation final : public scan_aggregation {
   {
   }
 
-  std::unique_ptr<aggregation> clone() const override
+  [[nodiscard]] std::unique_ptr<aggregation> clone() const override
   {
     return std::make_unique<ewma_aggregation>(*this);
   }
@@ -694,7 +694,7 @@ class ewma_aggregation final : public scan_aggregation {
     return collector.visit(col_type, *this);
   }
 
-  bool is_equal(aggregation const& _other) const override
+  [[nodiscard]] bool is_equal(aggregation const& _other) const override
   {
     if (!this->aggregation::is_equal(_other)) { return false; }
     auto const& other = dynamic_cast<ewma_aggregation const&>(_other);
diff --git a/cpp/include/cudf/detail/groupby/sort_helper.hpp b/cpp/include/cudf/detail/groupby/sort_helper.hpp
index ce8783d8b79..d7a42d0eca5 100644
--- a/cpp/include/cudf/detail/groupby/sort_helper.hpp
+++ b/cpp/include/cudf/detail/groupby/sort_helper.hpp
@@ -211,7 +211,6 @@ struct sort_groupby_helper {
    */
   column_view keys_bitmask_column(rmm::cuda_stream_view stream);
 
- private:
   column_ptr _key_sorted_order;      ///< Indices to produce _keys in sorted order
   column_ptr _unsorted_keys_labels;  ///< Group labels for unsorted _keys
   column_ptr _keys_bitmask_column;   ///< Column representing rows with one or more nulls values
diff --git a/cpp/include/cudf/detail/utilities/host_vector.hpp b/cpp/include/cudf/detail/utilities/host_vector.hpp
index ecb8f910463..3f6ad7b7b1d 100644
--- a/cpp/include/cudf/detail/utilities/host_vector.hpp
+++ b/cpp/include/cudf/detail/utilities/host_vector.hpp
@@ -183,7 +183,7 @@ class rmm_host_allocator {
    */
   inline bool operator!=(rmm_host_allocator const& x) const { return !operator==(x); }
 
-  bool is_device_accessible() const { return _is_device_accessible; }
+  [[nodiscard]] bool is_device_accessible() const { return _is_device_accessible; }
 
  private:
   rmm::host_async_resource_ref mr;
diff --git a/cpp/include/cudf/dictionary/dictionary_column_view.hpp b/cpp/include/cudf/dictionary/dictionary_column_view.hpp
index dc822fee38b..5596f78a90b 100644
--- a/cpp/include/cudf/dictionary/dictionary_column_view.hpp
+++ b/cpp/include/cudf/dictionary/dictionary_column_view.hpp
@@ -47,7 +47,7 @@ class dictionary_column_view : private column_view {
   dictionary_column_view(column_view const& dictionary_column);
   dictionary_column_view(dictionary_column_view&&)      = default;  ///< Move constructor
   dictionary_column_view(dictionary_column_view const&) = default;  ///< Copy constructor
-  ~dictionary_column_view()                             = default;
+  ~dictionary_column_view() override                    = default;
 
   /**
    * @brief Move assignment operator
diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp
index 11c778408fe..c9df02f167a 100644
--- a/cpp/include/cudf/groupby.hpp
+++ b/cpp/include/cudf/groupby.hpp
@@ -36,7 +36,7 @@ namespace CUDF_EXPORT cudf {
 namespace groupby {
 namespace detail {
 namespace sort {
-class sort_groupby_helper;
+struct sort_groupby_helper;
 
 }  // namespace sort
 }  // namespace detail
diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp
index ff25a5bacae..b662b660557 100644
--- a/cpp/include/cudf/io/json.hpp
+++ b/cpp/include/cudf/io/json.hpp
@@ -105,6 +105,8 @@ class json_reader_options {
   char _delimiter = '\n';
   // Prune columns on read, selected based on the _dtypes option
   bool _prune_columns = false;
+  // Experimental features: new column tree construction
+  bool _experimental = false;
 
   // Bytes to skip from the start
   size_t _byte_range_offset = 0;
@@ -114,9 +116,6 @@ class json_reader_options {
   // Whether to parse dates as DD/MM versus MM/DD
   bool _dayfirst = false;
 
-  // Whether to use the legacy reader
-  bool _legacy = false;
-
   // Whether to keep the quote characters of string values
   bool _keep_quotes = false;
 
@@ -277,6 +276,15 @@ class json_reader_options {
    */
   [[nodiscard]] bool is_enabled_prune_columns() const { return _prune_columns; }
 
+  /**
+   * @brief Whether to enable experimental features.
+   *
+   * When set to true, experimental features, such as the new column tree construction,
+   * utf-8 matching of field names will be enabled.
+   * @return true if experimental features are enabled
+   */
+  [[nodiscard]] bool is_enabled_experimental() const { return _experimental; }
+
   /**
    * @brief Whether to parse dates as DD/MM versus MM/DD.
    *
@@ -453,6 +461,16 @@ class json_reader_options {
    */
   void enable_prune_columns(bool val) { _prune_columns = val; }
 
+  /**
+   * @brief Set whether to enable experimental features.
+   *
+   * When set to true, experimental features, such as the new column tree construction,
+   * utf-8 matching of field names will be enabled.
+   *
+   * @param val Boolean value to enable/disable experimental features
+   */
+  void enable_experimental(bool val) { _experimental = val; }
+
   /**
    * @brief Set whether to parse dates as DD/MM versus MM/DD.
    *
@@ -695,6 +713,21 @@ class json_reader_options_builder {
     return *this;
   }
 
+  /**
+   * @brief Set whether to enable experimental features.
+   *
+   * When set to true, experimental features, such as the new column tree construction,
+   * utf-8 matching of field names will be enabled.
+   *
+   * @param val Boolean value to enable/disable experimental features
+   * @return this for chaining
+   */
+  json_reader_options_builder& experimental(bool val)
+  {
+    options._experimental = val;
+    return *this;
+  }
+
   /**
    * @brief Set whether to parse dates as DD/MM versus MM/DD.
    *
diff --git a/cpp/include/cudf/io/parquet.hpp b/cpp/include/cudf/io/parquet.hpp
index ee03a382bec..bfe76d5690c 100644
--- a/cpp/include/cudf/io/parquet.hpp
+++ b/cpp/include/cudf/io/parquet.hpp
@@ -1200,7 +1200,7 @@ class parquet_writer_options : public parquet_writer_options_base {
    * @param sink The sink used for writer output
    * @param table Table to be written to output
    */
-  explicit parquet_writer_options(sink_info const& sink, table_view const& table);
+  explicit parquet_writer_options(sink_info const& sink, table_view table);
 
  public:
   /**
diff --git a/cpp/include/cudf/lists/lists_column_view.hpp b/cpp/include/cudf/lists/lists_column_view.hpp
index b117a871b64..d7057cfea7e 100644
--- a/cpp/include/cudf/lists/lists_column_view.hpp
+++ b/cpp/include/cudf/lists/lists_column_view.hpp
@@ -48,7 +48,7 @@ class lists_column_view : private column_view {
   lists_column_view(column_view const& lists_column);
   lists_column_view(lists_column_view&&)      = default;  ///< Move constructor
   lists_column_view(lists_column_view const&) = default;  ///< Copy constructor
-  ~lists_column_view()                        = default;
+  ~lists_column_view() override               = default;
   /**
    * @brief Copy assignment operator
    *
diff --git a/cpp/include/cudf/scalar/scalar.hpp b/cpp/include/cudf/scalar/scalar.hpp
index e8a498afc09..66be2a12fbe 100644
--- a/cpp/include/cudf/scalar/scalar.hpp
+++ b/cpp/include/cudf/scalar/scalar.hpp
@@ -47,6 +47,7 @@ namespace CUDF_EXPORT cudf {
  */
 class scalar {
  public:
+  scalar()                               = delete;
   virtual ~scalar()                      = default;
   scalar& operator=(scalar const& other) = delete;
   scalar& operator=(scalar&& other)      = delete;
@@ -96,8 +97,6 @@ class scalar {
   data_type _type{type_id::EMPTY};     ///< Logical type of value in the scalar
   rmm::device_scalar<bool> _is_valid;  ///< Device bool signifying validity
 
-  scalar() = delete;
-
   /**
    * @brief Move constructor for scalar.
    * @param other The other scalar to move from.
@@ -145,6 +144,7 @@ class fixed_width_scalar : public scalar {
  public:
   using value_type = T;  ///< Type of the value held by the scalar.
 
+  fixed_width_scalar()           = delete;
   ~fixed_width_scalar() override = default;
 
   /**
@@ -203,8 +203,6 @@ class fixed_width_scalar : public scalar {
  protected:
   rmm::device_scalar<T> _data;  ///< device memory containing the value
 
-  fixed_width_scalar() = delete;
-
   /**
    * @brief Construct a new fixed width scalar object.
    *
diff --git a/cpp/include/cudf/strings/detail/char_tables.hpp b/cpp/include/cudf/strings/detail/char_tables.hpp
index 5d6aff28826..6460d4f43ff 100644
--- a/cpp/include/cudf/strings/detail/char_tables.hpp
+++ b/cpp/include/cudf/strings/detail/char_tables.hpp
@@ -74,9 +74,9 @@ character_cases_table_type const* get_character_cases_table();
  */
 struct special_case_mapping {
   uint16_t num_upper_chars;
-  uint16_t upper[3];
+  uint16_t upper[3];  // NOLINT
   uint16_t num_lower_chars;
-  uint16_t lower[3];
+  uint16_t lower[3];  // NOLINT
 };
 
 /**
diff --git a/cpp/include/cudf/strings/regex/regex_program.hpp b/cpp/include/cudf/strings/regex/regex_program.hpp
index 9da859d9c87..1bf1c26f471 100644
--- a/cpp/include/cudf/strings/regex/regex_program.hpp
+++ b/cpp/include/cudf/strings/regex/regex_program.hpp
@@ -54,6 +54,8 @@ struct regex_program {
                                                regex_flags flags      = regex_flags::DEFAULT,
                                                capture_groups capture = capture_groups::EXTRACT);
 
+  regex_program() = delete;
+
   /**
    * @brief Move constructor
    *
@@ -115,8 +117,6 @@ struct regex_program {
   ~regex_program();
 
  private:
-  regex_program() = delete;
-
   std::string _pattern;
   regex_flags _flags;
   capture_groups _capture;
diff --git a/cpp/include/cudf/strings/string_view.cuh b/cpp/include/cudf/strings/string_view.cuh
index 14695c3bb27..34ed3c5618e 100644
--- a/cpp/include/cudf/strings/string_view.cuh
+++ b/cpp/include/cudf/strings/string_view.cuh
@@ -99,7 +99,7 @@ __device__ inline std::pair<size_type, size_type> bytes_to_character_position(st
  * values. Also, this char pointer serves as valid device pointer of identity
  * value for minimum operator on string values.
  */
-static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};
+static __constant__ char max_string_sentinel[5]{"\xF7\xBF\xBF\xBF"};  // NOLINT
 }  // namespace detail
 }  // namespace strings
 
@@ -283,14 +283,11 @@ __device__ inline size_type string_view::const_iterator::position() const { retu
 
 __device__ inline size_type string_view::const_iterator::byte_offset() const { return byte_pos; }
 
-__device__ inline string_view::const_iterator string_view::begin() const
-{
-  return const_iterator(*this, 0, 0);
-}
+__device__ inline string_view::const_iterator string_view::begin() const { return {*this, 0, 0}; }
 
 __device__ inline string_view::const_iterator string_view::end() const
 {
-  return const_iterator(*this, length(), size_bytes());
+  return {*this, length(), size_bytes()};
 }
 // @endcond
 
@@ -411,7 +408,7 @@ __device__ inline size_type string_view::find(char const* str,
 
 __device__ inline size_type string_view::find(char_utf8 chr, size_type pos, size_type count) const
 {
-  char str[sizeof(char_utf8)];
+  char str[sizeof(char_utf8)];  // NOLINT
   size_type chwidth = strings::detail::from_char_utf8(chr, str);
   return find(str, chwidth, pos, count);
 }
@@ -433,7 +430,7 @@ __device__ inline size_type string_view::rfind(char const* str,
 
 __device__ inline size_type string_view::rfind(char_utf8 chr, size_type pos, size_type count) const
 {
-  char str[sizeof(char_utf8)];
+  char str[sizeof(char_utf8)];  // NOLINT
   size_type chwidth = strings::detail::from_char_utf8(chr, str);
   return rfind(str, chwidth, pos, count);
 }
diff --git a/cpp/include/cudf/strings/strings_column_view.hpp b/cpp/include/cudf/strings/strings_column_view.hpp
index 4a2512eb7c5..6ec8d1238d6 100644
--- a/cpp/include/cudf/strings/strings_column_view.hpp
+++ b/cpp/include/cudf/strings/strings_column_view.hpp
@@ -45,7 +45,7 @@ class strings_column_view : private column_view {
   strings_column_view(column_view strings_column);
   strings_column_view(strings_column_view&&)      = default;  ///< Move constructor
   strings_column_view(strings_column_view const&) = default;  ///< Copy constructor
-  ~strings_column_view()                          = default;
+  ~strings_column_view() override                 = default;
   /**
    * @brief Copy assignment operator
    *
diff --git a/cpp/include/cudf/structs/structs_column_view.hpp b/cpp/include/cudf/structs/structs_column_view.hpp
index 19798f51656..91d7ddce955 100644
--- a/cpp/include/cudf/structs/structs_column_view.hpp
+++ b/cpp/include/cudf/structs/structs_column_view.hpp
@@ -42,7 +42,7 @@ class structs_column_view : public column_view {
   // Foundation members:
   structs_column_view(structs_column_view const&) = default;  ///< Copy constructor
   structs_column_view(structs_column_view&&)      = default;  ///< Move constructor
-  ~structs_column_view()                          = default;
+  ~structs_column_view() override                 = default;
   /**
    * @brief Copy assignment operator
    *
diff --git a/cpp/include/cudf/tdigest/tdigest_column_view.hpp b/cpp/include/cudf/tdigest/tdigest_column_view.hpp
index 2f19efa5630..da4954b859c 100644
--- a/cpp/include/cudf/tdigest/tdigest_column_view.hpp
+++ b/cpp/include/cudf/tdigest/tdigest_column_view.hpp
@@ -59,7 +59,7 @@ class tdigest_column_view : private column_view {
   tdigest_column_view(column_view const&);  ///< Construct tdigest_column_view from a column_view
   tdigest_column_view(tdigest_column_view&&)      = default;  ///< Move constructor
   tdigest_column_view(tdigest_column_view const&) = default;  ///< Copy constructor
-  ~tdigest_column_view()                          = default;
+  ~tdigest_column_view() override                 = default;
   /**
    * @brief Copy assignment operator
    *
diff --git a/cpp/include/cudf/utilities/span.hpp b/cpp/include/cudf/utilities/span.hpp
index 0daebc0dd8d..914731ea417 100644
--- a/cpp/include/cudf/utilities/span.hpp
+++ b/cpp/include/cudf/utilities/span.hpp
@@ -236,26 +236,26 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
 
   /// Constructor from container
   /// @param in The container to construct the span from
-  template <
-    typename C,
-    // Only supported containers of types convertible to T
-    std::enable_if_t<is_host_span_supported_container<C>::value &&
-                     std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
-                                             std::declval<C&>().data()))> (*)[],
-                                           T (*)[]>>* = nullptr>
+  template <typename C,
+            // Only supported containers of types convertible to T
+            std::enable_if_t<is_host_span_supported_container<C>::value &&
+                             std::is_convertible_v<
+                               std::remove_pointer_t<decltype(thrust::raw_pointer_cast(  // NOLINT
+                                 std::declval<C&>().data()))> (*)[],
+                               T (*)[]>>* = nullptr>  // NOLINT
   constexpr host_span(C& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
 
   /// Constructor from const container
   /// @param in The container to construct the span from
-  template <
-    typename C,
-    // Only supported containers of types convertible to T
-    std::enable_if_t<is_host_span_supported_container<C>::value &&
-                     std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
-                                             std::declval<C&>().data()))> (*)[],
-                                           T (*)[]>>* = nullptr>
+  template <typename C,
+            // Only supported containers of types convertible to T
+            std::enable_if_t<is_host_span_supported_container<C>::value &&
+                             std::is_convertible_v<
+                               std::remove_pointer_t<decltype(thrust::raw_pointer_cast(  // NOLINT
+                                 std::declval<C&>().data()))> (*)[],
+                               T (*)[]>>* = nullptr>  // NOLINT
   constexpr host_span(C const& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
@@ -264,7 +264,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
   /// @param in The host_vector to construct the span from
   template <typename OtherT,
             // Only supported containers of types convertible to T
-            std::enable_if_t<std::is_convertible_v<OtherT (*)[], T (*)[]>>* = nullptr>
+            std::enable_if_t<std::is_convertible_v<OtherT (*)[], T (*)[]>>* = nullptr>  // NOLINT
   constexpr host_span(cudf::detail::host_vector<OtherT>& in)
     : base(in.data(), in.size()), _is_device_accessible{in.get_allocator().is_device_accessible()}
   {
@@ -274,7 +274,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
   /// @param in The host_vector to construct the span from
   template <typename OtherT,
             // Only supported containers of types convertible to T
-            std::enable_if_t<std::is_convertible_v<OtherT (*)[], T (*)[]>>* = nullptr>
+            std::enable_if_t<std::is_convertible_v<OtherT (*)[], T (*)[]>>* = nullptr>  // NOLINT
   constexpr host_span(cudf::detail::host_vector<OtherT> const& in)
     : base(in.data(), in.size()), _is_device_accessible{in.get_allocator().is_device_accessible()}
   {
@@ -285,7 +285,7 @@ struct host_span : public cudf::detail::span_base<T, Extent, host_span<T, Extent
   template <typename OtherT,
             std::size_t OtherExtent,
             std::enable_if_t<(Extent == OtherExtent || Extent == dynamic_extent) &&
-                               std::is_convertible_v<OtherT (*)[], T (*)[]>,
+                               std::is_convertible_v<OtherT (*)[], T (*)[]>,  // NOLINT
                              void>* = nullptr>
   constexpr host_span(host_span<OtherT, OtherExtent> const& other) noexcept
     : base(other.data(), other.size())
@@ -333,26 +333,26 @@ struct device_span : public cudf::detail::span_base<T, Extent, device_span<T, Ex
 
   /// Constructor from container
   /// @param in The container to construct the span from
-  template <
-    typename C,
-    // Only supported containers of types convertible to T
-    std::enable_if_t<is_device_span_supported_container<C>::value &&
-                     std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
-                                             std::declval<C&>().data()))> (*)[],
-                                           T (*)[]>>* = nullptr>
+  template <typename C,
+            // Only supported containers of types convertible to T
+            std::enable_if_t<is_device_span_supported_container<C>::value &&
+                             std::is_convertible_v<
+                               std::remove_pointer_t<decltype(thrust::raw_pointer_cast(  // NOLINT
+                                 std::declval<C&>().data()))> (*)[],
+                               T (*)[]>>* = nullptr>  // NOLINT
   constexpr device_span(C& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
 
   /// Constructor from const container
   /// @param in The container to construct the span from
-  template <
-    typename C,
-    // Only supported containers of types convertible to T
-    std::enable_if_t<is_device_span_supported_container<C>::value &&
-                     std::is_convertible_v<std::remove_pointer_t<decltype(thrust::raw_pointer_cast(
-                                             std::declval<C&>().data()))> (*)[],
-                                           T (*)[]>>* = nullptr>
+  template <typename C,
+            // Only supported containers of types convertible to T
+            std::enable_if_t<is_device_span_supported_container<C>::value &&
+                             std::is_convertible_v<
+                               std::remove_pointer_t<decltype(thrust::raw_pointer_cast(  // NOLINT
+                                 std::declval<C&>().data()))> (*)[],
+                               T (*)[]>>* = nullptr>  // NOLINT
   constexpr device_span(C const& in) : base(thrust::raw_pointer_cast(in.data()), in.size())
   {
   }
@@ -362,7 +362,7 @@ struct device_span : public cudf::detail::span_base<T, Extent, device_span<T, Ex
   template <typename OtherT,
             std::size_t OtherExtent,
             std::enable_if_t<(Extent == OtherExtent || Extent == dynamic_extent) &&
-                               std::is_convertible_v<OtherT (*)[], T (*)[]>,
+                               std::is_convertible_v<OtherT (*)[], T (*)[]>,  // NOLINT
                              void>* = nullptr>
   constexpr device_span(device_span<OtherT, OtherExtent> const& other) noexcept
     : base(other.data(), other.size())
diff --git a/cpp/include/cudf_test/type_list_utilities.hpp b/cpp/include/cudf_test/type_list_utilities.hpp
index 1793a8ecce0..3c96c59f0b7 100644
--- a/cpp/include/cudf_test/type_list_utilities.hpp
+++ b/cpp/include/cudf_test/type_list_utilities.hpp
@@ -414,9 +414,8 @@ struct RemoveIfImpl<PRED, Types<>> {
 
 template <class PRED, class HEAD, class... TAIL>
 struct RemoveIfImpl<PRED, Types<HEAD, TAIL...>> {
-  using type =
-    Concat<typename std::conditional<PRED::template Call<HEAD>::value, Types<>, Types<HEAD>>::type,
-           typename RemoveIfImpl<PRED, Types<TAIL...>>::type>;
+  using type = Concat<std::conditional_t<PRED::template Call<HEAD>::value, Types<>, Types<HEAD>>,
+                      typename RemoveIfImpl<PRED, Types<TAIL...>>::type>;
 };
 // @endcond
 
diff --git a/cpp/src/datetime/timezone.cpp b/cpp/src/datetime/timezone.cpp
index cf239297255..a6b6cbbf0b5 100644
--- a/cpp/src/datetime/timezone.cpp
+++ b/cpp/src/datetime/timezone.cpp
@@ -38,7 +38,7 @@ std::string const tzif_system_directory = "/usr/share/zoneinfo/";
 struct timezone_file_header {
   uint32_t magic;          ///< "TZif"
   uint8_t version;         ///< 0:version1, '2':version2, '3':version3
-  uint8_t reserved15[15];  ///< unused, reserved for future use
+  uint8_t reserved15[15];  ///< unused, reserved for future use // NOLINT
   uint32_t isutccnt;       ///< number of UTC/local indicators contained in the body
   uint32_t isstdcnt;       ///< number of standard/wall indicators contained in the body
   uint32_t leapcnt;        ///< number of leap second records contained in the body
diff --git a/cpp/src/dictionary/dictionary_column_view.cpp b/cpp/src/dictionary/dictionary_column_view.cpp
index 4906e5b4f9c..3e4a201bba4 100644
--- a/cpp/src/dictionary/dictionary_column_view.cpp
+++ b/cpp/src/dictionary/dictionary_column_view.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020, NVIDIA CORPORATION.
+ * Copyright (c) 2020-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -36,8 +36,7 @@ column_view dictionary_column_view::indices() const noexcept { return child(0);
 
 column_view dictionary_column_view::get_indices_annotated() const noexcept
 {
-  return column_view(
-    indices().type(), size(), indices().head(), null_mask(), null_count(), offset());
+  return {indices().type(), size(), indices().head(), null_mask(), null_count(), offset()};
 }
 
 column_view dictionary_column_view::keys() const noexcept { return child(1); }
diff --git a/cpp/src/interop/dlpack.cpp b/cpp/src/interop/dlpack.cpp
index ba5b11b90d8..a1be6aade4e 100644
--- a/cpp/src/interop/dlpack.cpp
+++ b/cpp/src/interop/dlpack.cpp
@@ -118,8 +118,8 @@ DLDataType data_type_to_DLDataType(data_type type)
 
 // Context object to own memory allocated for DLManagedTensor
 struct dltensor_context {
-  int64_t shape[2];
-  int64_t strides[2];
+  int64_t shape[2];    // NOLINT
+  int64_t strides[2];  // NOLINT
   rmm::device_buffer buffer;
 
   static void deleter(DLManagedTensor* arg)
diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp
index 2041f03cd81..03cf6d4a0e0 100644
--- a/cpp/src/io/avro/avro.cpp
+++ b/cpp/src/io/avro/avro.cpp
@@ -199,7 +199,7 @@ bool container::parse(file_metadata* md, size_t max_num_rows, size_t first_row)
     // Read the next sync markers and ensure they match the first ones we
     // encountered.  If they don't, we have to assume the data is corrupted,
     // and thus, we terminate processing immediately.
-    uint64_t const sync_marker[] = {get_raw<uint64_t>(), get_raw<uint64_t>()};
+    std::array const sync_marker = {get_raw<uint64_t>(), get_raw<uint64_t>()};
     bool valid_sync_markers =
       ((sync_marker[0] == md->sync_marker[0]) && (sync_marker[1] == md->sync_marker[1]));
     if (!valid_sync_markers) { return false; }
diff --git a/cpp/src/io/avro/avro.hpp b/cpp/src/io/avro/avro.hpp
index f2813a1ba51..2e992546ccc 100644
--- a/cpp/src/io/avro/avro.hpp
+++ b/cpp/src/io/avro/avro.hpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -19,6 +19,7 @@
 #include "avro_common.hpp"
 
 #include <algorithm>
+#include <array>
 #include <cstddef>
 #include <cstdint>
 #include <cstdio>
@@ -100,15 +101,15 @@ struct column_desc {
  */
 struct file_metadata {
   std::map<std::string, std::string> user_data;
-  std::string codec         = "";
-  uint64_t sync_marker[2]   = {0, 0};
-  size_t metadata_size      = 0;
-  size_t total_data_size    = 0;
-  size_t selected_data_size = 0;
-  size_type num_rows        = 0;
-  size_type skip_rows       = 0;
-  size_type total_num_rows  = 0;
-  uint32_t max_block_size   = 0;
+  std::string codec                   = "";
+  std::array<uint64_t, 2> sync_marker = {0, 0};
+  size_t metadata_size                = 0;
+  size_t total_data_size              = 0;
+  size_t selected_data_size           = 0;
+  size_type num_rows                  = 0;
+  size_type skip_rows                 = 0;
+  size_type total_num_rows            = 0;
+  uint32_t max_block_size             = 0;
   std::vector<schema_entry> schema;
   std::vector<block_desc_s> block_list;
   std::vector<column_desc> columns;
diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp
index 602ff1734b6..1af45b41d8e 100644
--- a/cpp/src/io/comp/uncomp.cpp
+++ b/cpp/src/io/comp/uncomp.cpp
@@ -42,7 +42,7 @@ struct gz_file_header_s {
   uint8_t id2;        // 0x8b
   uint8_t comp_mthd;  // compression method (0-7=reserved, 8=deflate)
   uint8_t flags;      // flags (GZIPHeaderFlag)
-  uint8_t mtime[4];   // If non-zero: modification time (Unix format)
+  uint8_t mtime[4];   // If non-zero: modification time (Unix format)  // NOLINT
   uint8_t xflags;     // Extra compressor-specific flags
   uint8_t os;         // OS id
 };
@@ -103,7 +103,7 @@ struct zip_lfh_s {
 };
 
 struct bz2_file_header_s {
-  uint8_t sig[3];  // "BZh"
+  uint8_t sig[3];  // "BZh" // NOLINT
   uint8_t blksz;   // block size 1..9 in 100kB units (post-RLE)
 };
 
diff --git a/cpp/src/io/csv/reader_impl.cu b/cpp/src/io/csv/reader_impl.cu
index ebca334a715..8c32fc85f78 100644
--- a/cpp/src/io/csv/reader_impl.cu
+++ b/cpp/src/io/csv/reader_impl.cu
@@ -46,11 +46,8 @@
 #include <thrust/iterator/counting_iterator.h>
 
 #include <algorithm>
-#include <iostream>
 #include <memory>
-#include <numeric>
 #include <string>
-#include <tuple>
 #include <unordered_map>
 #include <unordered_set>
 #include <utility>
@@ -88,7 +85,7 @@ class selected_rows_offsets {
     : all{std::move(data)}, selected{selected_span}
   {
   }
-  selected_rows_offsets(rmm::cuda_stream_view stream) : all{0, stream}, selected{all} {}
+  explicit selected_rows_offsets(rmm::cuda_stream_view stream) : all{0, stream}, selected{all} {}
 
   operator device_span<uint64_t const>() const { return selected; }
   void shrink(size_t size)
@@ -196,15 +193,11 @@ void erase_except_last(C& container, rmm::cuda_stream_view stream)
   container.resize(1, stream);
 }
 
-size_t find_first_row_start(char row_terminator, host_span<char const> data)
+constexpr std::array<uint8_t, 3> UTF8_BOM = {0xEF, 0xBB, 0xBF};
+[[nodiscard]] bool has_utf8_bom(host_span<char const> data)
 {
-  // For now, look for the first terminator (assume the first terminator isn't within a quote)
-  // TODO: Attempt to infer this from the data
-  size_t pos = 0;
-  while (pos < data.size() && data[pos] != row_terminator) {
-    ++pos;
-  }
-  return std::min(pos + 1, data.size());
+  return data.size() >= UTF8_BOM.size() &&
+         memcmp(data.data(), UTF8_BOM.data(), UTF8_BOM.size()) == 0;
 }
 
 /**
@@ -213,20 +206,28 @@ size_t find_first_row_start(char row_terminator, host_span<char const> data)
  * This function scans the input data to record the row offsets (relative to the start of the
  * input data). A row is actually the data/offset between two termination symbols.
  *
- * @param data Uncompressed input data in host memory
- * @param range_begin Only include rows starting after this position
- * @param range_end Only include rows starting before this position
- * @param skip_rows Number of rows to skip from the start
- * @param num_rows Number of rows to read; -1: all remaining data
- * @param load_whole_file Hint that the entire data will be needed on gpu
- * @param stream CUDA stream used for device memory operations and kernel launches
- * @return Input data and row offsets in the device memory
+ *  @param[in] source The source data (may be compressed)
+ *  @param[in] reader_opts Settings for controlling reading behavior
+ *  @param[in] parse_opts Settings for controlling parsing behavior
+ *  @param[out] header The header row, if any
+ *  @param[in] data Host buffer containing uncompressed data, if input is compressed
+ *  @param[in] byte_range_offset Offset of the byte range
+ *  @param[in] range_begin Start of the first row, relative to the byte range start
+ *  @param[in] range_end End of the data to read, relative to the byte range start; equal to the
+ * data size if all data after byte_range_offset needs to be read
+ *  @param[in] skip_rows Number of rows to skip from the start
+ *  @param[in] num_rows Number of rows to read; -1 means all
+ *  @param[in] load_whole_file Indicates if the whole file should be read
+ *  @param[in] stream CUDA stream used for device memory operations and kernel launches
+ *  @return Input data and row offsets in the device memory
  */
 std::pair<rmm::device_uvector<char>, selected_rows_offsets> load_data_and_gather_row_offsets(
+  cudf::io::datasource* source,
   csv_reader_options const& reader_opts,
   parse_options const& parse_opts,
   std::vector<char>& header,
-  host_span<char const> data,
+  std::optional<host_span<char const>> data,
+  size_t byte_range_offset,
   size_t range_begin,
   size_t range_end,
   size_t skip_rows,
@@ -235,50 +236,81 @@ std::pair<rmm::device_uvector<char>, selected_rows_offsets> load_data_and_gather
   rmm::cuda_stream_view stream)
 {
   constexpr size_t max_chunk_bytes = 64 * 1024 * 1024;  // 64MB
-  size_t buffer_size               = std::min(max_chunk_bytes, data.size());
-  size_t max_blocks =
-    std::max<size_t>((buffer_size / cudf::io::csv::gpu::rowofs_block_bytes) + 1, 2);
-  cudf::detail::hostdevice_vector<uint64_t> row_ctx(max_blocks, stream);
-  size_t buffer_pos  = std::min(range_begin - std::min(range_begin, sizeof(char)), data.size());
-  size_t pos         = std::min(range_begin, data.size());
-  size_t header_rows = (reader_opts.get_header() >= 0) ? reader_opts.get_header() + 1 : 0;
-  uint64_t ctx       = 0;
+
+  auto const data_size      = data.has_value() ? data->size() : source->size();
+  auto const buffer_size    = std::min(max_chunk_bytes, data_size);
+  auto const max_input_size = [&] {
+    if (range_end == data_size) {
+      return data_size - byte_range_offset;
+    } else {
+      return std::min<size_t>(reader_opts.get_byte_range_size_with_padding(),
+                              data_size - byte_range_offset);
+    }
+  }();
+  auto const header_rows = (reader_opts.get_header() >= 0) ? reader_opts.get_header() + 1 : 0;
 
   // For compatibility with the previous parser, a row is considered in-range if the
   // previous row terminator is within the given range
-  range_end += (range_end < data.size());
+  range_end += (range_end < data_size);
 
-  // Reserve memory by allocating and then resetting the size
-  rmm::device_uvector<char> d_data{
-    (load_whole_file) ? data.size() : std::min(buffer_size * 2, data.size()), stream};
-  d_data.resize(0, stream);
+  auto pos = range_begin;
+  // When using byte range, need the line terminator of last line before the range
+  auto input_pos = byte_range_offset == 0 ? pos : pos - 1;
+  uint64_t ctx   = 0;
+
+  rmm::device_uvector<char> d_data{0, stream};
+  d_data.reserve((load_whole_file) ? data_size : std::min(buffer_size * 2, max_input_size), stream);
   rmm::device_uvector<uint64_t> all_row_offsets{0, stream};
+
+  auto const max_blocks =
+    std::max<size_t>((buffer_size / cudf::io::csv::gpu::rowofs_block_bytes) + 1, 2);
+  cudf::detail::hostdevice_vector<uint64_t> row_ctx(max_blocks, stream);
   do {
-    size_t target_pos = std::min(pos + max_chunk_bytes, data.size());
-    size_t chunk_size = target_pos - pos;
+    auto const target_pos = std::min(pos + max_chunk_bytes, max_input_size);
+    auto const chunk_size = target_pos - pos;
 
     auto const previous_data_size = d_data.size();
-    d_data.resize(target_pos - buffer_pos, stream);
-    CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.begin() + previous_data_size,
-                                  data.begin() + buffer_pos + previous_data_size,
-                                  target_pos - buffer_pos - previous_data_size,
-                                  cudaMemcpyDefault,
-                                  stream.value()));
+    d_data.resize(target_pos - input_pos, stream);
+
+    auto const read_offset = byte_range_offset + input_pos + previous_data_size;
+    auto const read_size   = target_pos - input_pos - previous_data_size;
+    if (data.has_value()) {
+      CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data() + previous_data_size,
+                                    data->data() + read_offset,
+                                    target_pos - input_pos - previous_data_size,
+                                    cudaMemcpyDefault,
+                                    stream.value()));
+    } else {
+      if (source->is_device_read_preferred(read_size)) {
+        source->device_read(read_offset,
+                            read_size,
+                            reinterpret_cast<uint8_t*>(d_data.data() + previous_data_size),
+                            stream);
+      } else {
+        auto const buffer = source->host_read(read_offset, read_size);
+        CUDF_CUDA_TRY(cudaMemcpyAsync(d_data.data() + previous_data_size,
+                                      buffer->data(),
+                                      buffer->size(),
+                                      cudaMemcpyDefault,
+                                      stream.value()));
+        stream.synchronize();  // To prevent buffer going out of scope before we copy the data.
+      }
+    }
 
     // Pass 1: Count the potential number of rows in each character block for each
     // possible parser state at the beginning of the block.
-    uint32_t num_blocks = cudf::io::csv::gpu::gather_row_offsets(parse_opts.view(),
-                                                                 row_ctx.device_ptr(),
-                                                                 device_span<uint64_t>(),
-                                                                 d_data,
-                                                                 chunk_size,
-                                                                 pos,
-                                                                 buffer_pos,
-                                                                 data.size(),
-                                                                 range_begin,
-                                                                 range_end,
-                                                                 skip_rows,
-                                                                 stream);
+    auto const num_blocks = cudf::io::csv::gpu::gather_row_offsets(parse_opts.view(),
+                                                                   row_ctx.device_ptr(),
+                                                                   device_span<uint64_t>(),
+                                                                   d_data,
+                                                                   chunk_size,
+                                                                   pos,
+                                                                   input_pos,
+                                                                   max_input_size,
+                                                                   range_begin,
+                                                                   range_end,
+                                                                   skip_rows,
+                                                                   stream);
     CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(),
                                   row_ctx.device_ptr(),
                                   num_blocks * sizeof(uint64_t),
@@ -312,14 +344,14 @@ std::pair<rmm::device_uvector<char>, selected_rows_offsets> load_data_and_gather
                                              d_data,
                                              chunk_size,
                                              pos,
-                                             buffer_pos,
-                                             data.size(),
+                                             input_pos,
+                                             max_input_size,
                                              range_begin,
                                              range_end,
                                              skip_rows,
                                              stream);
       // With byte range, we want to keep only one row out of the specified range
-      if (range_end < data.size()) {
+      if (range_end < data_size) {
         CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(),
                                       row_ctx.device_ptr(),
                                       num_blocks * sizeof(uint64_t),
@@ -356,18 +388,18 @@ std::pair<rmm::device_uvector<char>, selected_rows_offsets> load_data_and_gather
       size_t discard_bytes = std::max(d_data.size(), sizeof(char)) - sizeof(char);
       if (discard_bytes != 0) {
         erase_except_last(d_data, stream);
-        buffer_pos += discard_bytes;
+        input_pos += discard_bytes;
       }
     }
     pos = target_pos;
-  } while (pos < data.size());
+  } while (pos < max_input_size);
 
   auto const non_blank_row_offsets =
     io::csv::gpu::remove_blank_rows(parse_opts.view(), d_data, all_row_offsets, stream);
   auto row_offsets = selected_rows_offsets{std::move(all_row_offsets), non_blank_row_offsets};
 
   // Remove header rows and extract header
-  size_t const header_row_index = std::max<size_t>(header_rows, 1) - 1;
+  auto const header_row_index = std::max<size_t>(header_rows, 1) - 1;
   if (header_row_index + 1 < row_offsets.size()) {
     CUDF_CUDA_TRY(cudaMemcpyAsync(row_ctx.host_ptr(),
                                   row_offsets.data() + header_row_index,
@@ -376,11 +408,20 @@ std::pair<rmm::device_uvector<char>, selected_rows_offsets> load_data_and_gather
                                   stream.value()));
     stream.synchronize();
 
-    auto const header_start = buffer_pos + row_ctx[0];
-    auto const header_end   = buffer_pos + row_ctx[1];
-    CUDF_EXPECTS(header_start <= header_end && header_end <= data.size(),
+    auto const header_start = input_pos + row_ctx[0];
+    auto const header_end   = input_pos + row_ctx[1];
+    CUDF_EXPECTS(header_start <= header_end && header_end <= max_input_size,
                  "Invalid csv header location");
-    header.assign(data.begin() + header_start, data.begin() + header_end);
+    header.resize(header_end - header_start);
+    if (data.has_value()) {
+      std::copy(data->begin() + byte_range_offset + header_start,
+                data->begin() + byte_range_offset + header_end,
+                header.begin());
+    } else {
+      source->host_read(header_start + byte_range_offset,
+                        header_end - header_start,
+                        reinterpret_cast<uint8_t*>(header.data()));
+    }
     if (header_rows > 0) { row_offsets.erase_first_n(header_rows); }
   }
   // Apply num_rows limit
@@ -397,73 +438,89 @@ std::pair<rmm::device_uvector<char>, selected_rows_offsets> select_data_and_row_
   parse_options const& parse_opts,
   rmm::cuda_stream_view stream)
 {
-  auto range_offset      = reader_opts.get_byte_range_offset();
-  auto range_size        = reader_opts.get_byte_range_size();
-  auto range_size_padded = reader_opts.get_byte_range_size_with_padding();
-  auto skip_rows         = reader_opts.get_skiprows();
-  auto skip_end_rows     = reader_opts.get_skipfooter();
-  auto num_rows          = reader_opts.get_nrows();
+  auto range_offset  = reader_opts.get_byte_range_offset();
+  auto range_size    = reader_opts.get_byte_range_size();
+  auto skip_rows     = reader_opts.get_skiprows();
+  auto skip_end_rows = reader_opts.get_skipfooter();
+  auto num_rows      = reader_opts.get_nrows();
 
   if (range_offset > 0 || range_size > 0) {
     CUDF_EXPECTS(reader_opts.get_compression() == compression_type::NONE,
                  "Reading compressed data using `byte range` is unsupported");
   }
+  // TODO: Allow parsing the header outside the mapped range
+  CUDF_EXPECTS((range_offset == 0 || reader_opts.get_header() < 0),
+               "byte_range offset with header not supported");
 
-  // Transfer source data to GPU
-  if (!source->is_empty()) {
-    auto buffer =
-      source->host_read(range_offset, range_size_padded != 0 ? range_size_padded : source->size());
-    auto h_data =
-      host_span<char const>(reinterpret_cast<char const*>(buffer->data()), buffer->size());
-
-    std::vector<uint8_t> h_uncomp_data_owner;
-    if (reader_opts.get_compression() != compression_type::NONE) {
-      h_uncomp_data_owner =
-        decompress(reader_opts.get_compression(), {buffer->data(), buffer->size()});
-      h_data = {reinterpret_cast<char const*>(h_uncomp_data_owner.data()),
-                h_uncomp_data_owner.size()};
-      buffer.reset();
-    }
+  if (source->is_empty()) {
+    return {rmm::device_uvector<char>{0, stream}, selected_rows_offsets{stream}};
+  }
 
-    // check for and skip UTF-8 BOM
-    uint8_t const UTF8_BOM[] = {0xEF, 0xBB, 0xBF};
-    if (h_data.size() >= sizeof(UTF8_BOM) &&
-        memcmp(h_data.data(), UTF8_BOM, sizeof(UTF8_BOM)) == 0) {
-      h_data = h_data.subspan(sizeof(UTF8_BOM), h_data.size() - sizeof(UTF8_BOM));
-    }
+  std::optional<host_span<char const>> h_data;
+  std::vector<uint8_t> h_uncomp_data_owner;
+  if (reader_opts.get_compression() != compression_type::NONE) {
+    auto const h_comp_data = source->host_read(0, source->size());
+    h_uncomp_data_owner =
+      decompress(reader_opts.get_compression(), {h_comp_data->data(), h_comp_data->size()});
+    h_data = host_span<char const>{reinterpret_cast<char const*>(h_uncomp_data_owner.data()),
+                                   h_uncomp_data_owner.size()};
+  }
 
-    // None of the parameters for row selection is used, we are parsing the entire file
-    bool const load_whole_file = range_offset == 0 && range_size == 0 && skip_rows <= 0 &&
-                                 skip_end_rows <= 0 && num_rows == -1;
-
-    // With byte range, find the start of the first data row
-    size_t const data_start_offset =
-      (range_offset != 0) ? find_first_row_start(parse_opts.terminator, h_data) : 0;
-
-    // TODO: Allow parsing the header outside the mapped range
-    CUDF_EXPECTS((range_offset == 0 || reader_opts.get_header() < 0),
-                 "byte_range offset with header not supported");
-
-    // Gather row offsets
-    auto data_row_offsets =
-      load_data_and_gather_row_offsets(reader_opts,
-                                       parse_opts,
-                                       header,
-                                       h_data,
-                                       data_start_offset,
-                                       (range_size) ? range_size : h_data.size(),
-                                       (skip_rows > 0) ? skip_rows : 0,
-                                       num_rows,
-                                       load_whole_file,
-                                       stream);
-    auto& row_offsets = data_row_offsets.second;
-    // Exclude the rows that are to be skipped from the end
-    if (skip_end_rows > 0 && static_cast<size_t>(skip_end_rows) < row_offsets.size()) {
-      row_offsets.shrink(row_offsets.size() - skip_end_rows);
+  size_t data_start_offset = range_offset;
+  if (h_data.has_value()) {
+    if (has_utf8_bom(*h_data)) { data_start_offset += sizeof(UTF8_BOM); }
+  } else {
+    if (range_offset == 0) {
+      auto bom_buffer = source->host_read(0, std::min<size_t>(source->size(), sizeof(UTF8_BOM)));
+      auto bom_chars  = host_span<char const>{reinterpret_cast<char const*>(bom_buffer->data()),
+                                              bom_buffer->size()};
+      if (has_utf8_bom(bom_chars)) { data_start_offset += sizeof(UTF8_BOM); }
+    } else {
+      auto find_data_start_chunk_size = 1024ul;
+      while (data_start_offset < source->size()) {
+        auto const read_size =
+          std::min(find_data_start_chunk_size, source->size() - data_start_offset);
+        auto buffer = source->host_read(data_start_offset, read_size);
+        auto buffer_chars =
+          host_span<char const>{reinterpret_cast<char const*>(buffer->data()), buffer->size()};
+
+        if (auto first_row_start =
+              std::find(buffer_chars.begin(), buffer_chars.end(), parse_opts.terminator);
+            first_row_start != buffer_chars.end()) {
+          data_start_offset += std::distance(buffer_chars.begin(), first_row_start) + 1;
+          break;
+        }
+        data_start_offset += read_size;
+        find_data_start_chunk_size *= 2;
+      }
     }
-    return data_row_offsets;
   }
-  return {rmm::device_uvector<char>{0, stream}, selected_rows_offsets{stream}};
+
+  // None of the parameters for row selection is used, we are parsing the entire file
+  bool const load_whole_file =
+    range_offset == 0 && range_size == 0 && skip_rows <= 0 && skip_end_rows <= 0 && num_rows == -1;
+
+  // Transfer source data to GPU and gather row offsets
+  auto const uncomp_size = h_data.has_value() ? h_data->size() : source->size();
+  auto data_row_offsets  = load_data_and_gather_row_offsets(source,
+                                                           reader_opts,
+                                                           parse_opts,
+                                                           header,
+                                                           h_data,
+                                                           range_offset,
+                                                           data_start_offset - range_offset,
+                                                           (range_size) ? range_size : uncomp_size,
+                                                           (skip_rows > 0) ? skip_rows : 0,
+                                                           num_rows,
+                                                           load_whole_file,
+                                                           stream);
+  auto& row_offsets      = data_row_offsets.second;
+  // Exclude the rows that are to be skipped from the end
+  if (skip_end_rows > 0 && static_cast<size_t>(skip_end_rows) < row_offsets.size()) {
+    row_offsets.shrink(row_offsets.size() - skip_end_rows);
+  }
+
+  return data_row_offsets;
 }
 
 void select_data_types(host_span<data_type const> user_dtypes,
diff --git a/cpp/src/io/functions.cpp b/cpp/src/io/functions.cpp
index 0ca54da5aaf..de8eea9e99b 100644
--- a/cpp/src/io/functions.cpp
+++ b/cpp/src/io/functions.cpp
@@ -38,6 +38,7 @@
 #include <cudf/utilities/memory_resource.hpp>
 
 #include <algorithm>
+#include <utility>
 
 namespace cudf::io {
 
@@ -852,8 +853,8 @@ void parquet_writer_options_base::set_sorting_columns(std::vector<sorting_column
   _sorting_columns = std::move(sorting_columns);
 }
 
-parquet_writer_options::parquet_writer_options(sink_info const& sink, table_view const& table)
-  : parquet_writer_options_base(sink), _table(table)
+parquet_writer_options::parquet_writer_options(sink_info const& sink, table_view table)
+  : parquet_writer_options_base(sink), _table(std::move(table))
 {
 }
 
diff --git a/cpp/src/io/json/host_tree_algorithms.cu b/cpp/src/io/json/host_tree_algorithms.cu
index 70d61132b42..5855f1b5a5f 100644
--- a/cpp/src/io/json/host_tree_algorithms.cu
+++ b/cpp/src/io/json/host_tree_algorithms.cu
@@ -21,6 +21,7 @@
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
+#include <cudf/detail/utilities/visitor_overload.hpp>
 #include <cudf/strings/strings_column_view.hpp>
 #include <cudf/types.hpp>
 #include <cudf/utilities/error.hpp>
@@ -43,6 +44,7 @@
 #include <thrust/uninitialized_fill.h>
 
 #include <algorithm>
+#include <deque>
 
 namespace cudf::io::json::detail {
 
@@ -58,16 +60,15 @@ namespace cudf::io::json::detail {
  */
 rmm::device_uvector<NodeIndexT> get_values_column_indices(TreeDepthT const row_array_children_level,
                                                           tree_meta_t const& d_tree,
-                                                          device_span<NodeIndexT> col_ids,
+                                                          device_span<NodeIndexT const> col_ids,
                                                           size_type const num_columns,
                                                           rmm::cuda_stream_view stream)
 {
-  CUDF_FUNC_RANGE();
   auto [level2_nodes, level2_indices] = get_array_children_indices(
     row_array_children_level, d_tree.node_levels, d_tree.parent_node_ids, stream);
   auto col_id_location = thrust::make_permutation_iterator(col_ids.begin(), level2_nodes.begin());
   rmm::device_uvector<NodeIndexT> values_column_indices(num_columns, stream);
-  thrust::scatter(rmm::exec_policy(stream),
+  thrust::scatter(rmm::exec_policy_nosync(stream),
                   level2_indices.begin(),
                   level2_indices.end(),
                   col_id_location,
@@ -90,12 +91,11 @@ std::vector<std::string> copy_strings_to_host_sync(
   device_span<SymbolOffsetT const> node_range_end,
   rmm::cuda_stream_view stream)
 {
-  CUDF_FUNC_RANGE();
   auto const num_strings = node_range_begin.size();
   rmm::device_uvector<size_type> string_offsets(num_strings, stream);
   rmm::device_uvector<size_type> string_lengths(num_strings, stream);
   auto d_offset_pairs = thrust::make_zip_iterator(node_range_begin.begin(), node_range_end.begin());
-  thrust::transform(rmm::exec_policy(stream),
+  thrust::transform(rmm::exec_policy_nosync(stream),
                     d_offset_pairs,
                     d_offset_pairs + num_strings,
                     thrust::make_zip_iterator(string_offsets.begin(), string_lengths.begin()),
@@ -161,18 +161,18 @@ std::vector<std::string> copy_strings_to_host_sync(
 rmm::device_uvector<uint8_t> is_all_nulls_each_column(device_span<SymbolT const> input,
                                                       tree_meta_t const& d_column_tree,
                                                       tree_meta_t const& tree,
-                                                      device_span<NodeIndexT> col_ids,
+                                                      device_span<NodeIndexT const> col_ids,
                                                       cudf::io::json_reader_options const& options,
                                                       rmm::cuda_stream_view stream)
 {
   auto const num_nodes = col_ids.size();
   auto const num_cols  = d_column_tree.node_categories.size();
   rmm::device_uvector<uint8_t> is_all_nulls(num_cols, stream);
-  thrust::fill(rmm::exec_policy(stream), is_all_nulls.begin(), is_all_nulls.end(), true);
+  thrust::fill(rmm::exec_policy_nosync(stream), is_all_nulls.begin(), is_all_nulls.end(), true);
 
   auto parse_opt = parsing_options(options, stream);
   thrust::for_each_n(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     thrust::counting_iterator<size_type>(0),
     num_nodes,
     [options           = parse_opt.view(),
@@ -193,7 +193,7 @@ rmm::device_uvector<uint8_t> is_all_nulls_each_column(device_span<SymbolT const>
   return is_all_nulls;
 }
 
-NodeIndexT get_row_array_parent_col_id(device_span<NodeIndexT> col_ids,
+NodeIndexT get_row_array_parent_col_id(device_span<NodeIndexT const> col_ids,
                                        bool is_enabled_lines,
                                        rmm::cuda_stream_view stream)
 {
@@ -221,33 +221,34 @@ struct json_column_data {
   bitmask_type* validity;
 };
 
-std::pair<cudf::detail::host_vector<uint8_t>,
-          std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>>>
-build_tree(device_json_column& root,
-           std::vector<uint8_t> const& is_str_column_all_nulls,
-           tree_meta_t& d_column_tree,
-           device_span<NodeIndexT const> d_unique_col_ids,
-           device_span<size_type const> d_max_row_offsets,
-           std::vector<std::string> const& column_names,
-           NodeIndexT row_array_parent_col_id,
-           bool is_array_of_arrays,
-           cudf::io::json_reader_options const& options,
-           rmm::cuda_stream_view stream,
-           rmm::device_async_resource_ref mr);
-void scatter_offsets(
-  tree_meta_t& tree,
-  device_span<NodeIndexT> col_ids,
-  device_span<size_type> row_offsets,
-  device_span<size_type> node_ids,
-  device_span<size_type> sorted_col_ids,  // Reuse this for parent_col_ids
+using hashmap_of_device_columns =
+  std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>>;
+
+std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
+  device_json_column& root,
+  host_span<uint8_t const> is_str_column_all_nulls,
   tree_meta_t& d_column_tree,
-  host_span<const uint8_t> ignore_vals,
-  std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>>& columns,
-  rmm::cuda_stream_view stream);
+  device_span<NodeIndexT const> d_unique_col_ids,
+  device_span<size_type const> d_max_row_offsets,
+  std::vector<std::string> const& column_names,
+  NodeIndexT row_array_parent_col_id,
+  bool is_array_of_arrays,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+void scatter_offsets(tree_meta_t const& tree,
+                     device_span<NodeIndexT const> col_ids,
+                     device_span<size_type const> row_offsets,
+                     device_span<size_type> node_ids,
+                     device_span<size_type> sorted_col_ids,  // Reuse this for parent_col_ids
+                     tree_meta_t const& d_column_tree,
+                     host_span<const bool> ignore_vals,
+                     hashmap_of_device_columns const& columns,
+                     rmm::cuda_stream_view stream);
 
 /**
  * @brief Constructs `d_json_column` from node tree representation
- * Newly constructed columns are insert into `root`'s children.
+ * Newly constructed columns are inserted into `root`'s children.
  * `root` must be a list type.
  *
  * @param input Input JSON string device data
@@ -265,28 +266,28 @@ void scatter_offsets(
  * of child_offets and validity members of `d_json_column`
  */
 void make_device_json_column(device_span<SymbolT const> input,
-                             tree_meta_t& tree,
-                             device_span<NodeIndexT> col_ids,
-                             device_span<size_type> row_offsets,
+                             tree_meta_t const& tree,
+                             device_span<NodeIndexT const> col_ids,
+                             device_span<size_type const> row_offsets,
                              device_json_column& root,
                              bool is_array_of_arrays,
                              cudf::io::json_reader_options const& options,
                              rmm::cuda_stream_view stream,
                              rmm::device_async_resource_ref mr)
 {
-  CUDF_FUNC_RANGE();
-
   bool const is_enabled_lines                 = options.is_enabled_lines();
   bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
-  auto const num_nodes                        = col_ids.size();
-  rmm::device_uvector<NodeIndexT> sorted_col_ids(col_ids.size(), stream);  // make a copy
-  thrust::copy(rmm::exec_policy(stream), col_ids.begin(), col_ids.end(), sorted_col_ids.begin());
+  // make a copy
+  auto sorted_col_ids = cudf::detail::make_device_uvector_async(
+    col_ids, stream, cudf::get_current_device_resource_ref());
 
   // sort by {col_id} on {node_ids} stable
   rmm::device_uvector<NodeIndexT> node_ids(col_ids.size(), stream);
-  thrust::sequence(rmm::exec_policy(stream), node_ids.begin(), node_ids.end());
-  thrust::stable_sort_by_key(
-    rmm::exec_policy(stream), sorted_col_ids.begin(), sorted_col_ids.end(), node_ids.begin());
+  thrust::sequence(rmm::exec_policy_nosync(stream), node_ids.begin(), node_ids.end());
+  thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream),
+                             sorted_col_ids.begin(),
+                             sorted_col_ids.end(),
+                             node_ids.begin());
 
   NodeIndexT const row_array_parent_col_id =
     get_row_array_parent_col_id(col_ids, is_enabled_lines, stream);
@@ -316,7 +317,7 @@ void make_device_json_column(device_span<SymbolT const> input,
       cudf::detail::make_host_vector_sync(values_column_indices, stream);
     std::transform(unique_col_ids.begin(),
                    unique_col_ids.end(),
-                   column_names.begin(),
+                   column_names.cbegin(),
                    column_names.begin(),
                    [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id](
                      auto col_id, auto name) mutable {
@@ -333,17 +334,17 @@ void make_device_json_column(device_span<SymbolT const> input,
     }
     return std::vector<uint8_t>();
   }();
-  auto [ignore_vals, columns] = build_tree(root,
-                                           is_str_column_all_nulls,
-                                           d_column_tree,
-                                           d_unique_col_ids,
-                                           d_max_row_offsets,
-                                           column_names,
-                                           row_array_parent_col_id,
-                                           is_array_of_arrays,
-                                           options,
-                                           stream,
-                                           mr);
+  auto const [ignore_vals, columns] = build_tree(root,
+                                                 is_str_column_all_nulls,
+                                                 d_column_tree,
+                                                 d_unique_col_ids,
+                                                 d_max_row_offsets,
+                                                 column_names,
+                                                 row_array_parent_col_id,
+                                                 is_array_of_arrays,
+                                                 options,
+                                                 stream,
+                                                 mr);
 
   scatter_offsets(tree,
                   col_ids,
@@ -356,19 +357,18 @@ void make_device_json_column(device_span<SymbolT const> input,
                   stream);
 }
 
-std::pair<cudf::detail::host_vector<uint8_t>,
-          std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>>>
-build_tree(device_json_column& root,
-           std::vector<uint8_t> const& is_str_column_all_nulls,
-           tree_meta_t& d_column_tree,
-           device_span<NodeIndexT const> d_unique_col_ids,
-           device_span<size_type const> d_max_row_offsets,
-           std::vector<std::string> const& column_names,
-           NodeIndexT row_array_parent_col_id,
-           bool is_array_of_arrays,
-           cudf::io::json_reader_options const& options,
-           rmm::cuda_stream_view stream,
-           rmm::device_async_resource_ref mr)
+std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
+  device_json_column& root,
+  host_span<uint8_t const> is_str_column_all_nulls,
+  tree_meta_t& d_column_tree,
+  device_span<NodeIndexT const> d_unique_col_ids,
+  device_span<size_type const> d_max_row_offsets,
+  std::vector<std::string> const& column_names,
+  NodeIndexT row_array_parent_col_id,
+  bool is_array_of_arrays,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
 {
   bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
   auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream);
@@ -380,6 +380,7 @@ build_tree(device_json_column& root,
     cudf::detail::make_host_vector_async(d_column_tree.node_range_begin, stream);
   auto const max_row_offsets = cudf::detail::make_host_vector_async(d_max_row_offsets, stream);
   auto num_columns           = d_unique_col_ids.size();
+  stream.synchronize();
 
   auto to_json_col_type = [](auto category) {
     switch (category) {
@@ -439,11 +440,12 @@ build_tree(device_json_column& root,
   });
 
   // use hash map because we may skip field name's col_ids
-  std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>> columns;
+  hashmap_of_device_columns columns;
   // map{parent_col_id, child_col_name}> = child_col_id, used for null value column tracking
   std::map<std::pair<NodeIndexT, std::string>, NodeIndexT> mapped_columns;
   // find column_ids which are values, but should be ignored in validity
-  auto ignore_vals = cudf::detail::make_host_vector<uint8_t>(num_columns, stream);
+  auto ignore_vals = cudf::detail::make_host_vector<bool>(num_columns, stream);
+  std::fill(ignore_vals.begin(), ignore_vals.end(), false);
   std::vector<uint8_t> is_mixed_type_column(num_columns, 0);
   std::vector<uint8_t> is_pruned(num_columns, 0);
   // for columns that are not mixed type but have been forced as string
@@ -452,7 +454,7 @@ build_tree(device_json_column& root,
 
   std::function<void(NodeIndexT, device_json_column&)> remove_child_columns =
     [&](NodeIndexT this_col_id, device_json_column& col) {
-      for (auto col_name : col.column_order) {
+      for (auto const& col_name : col.column_order) {
         auto child_id                  = mapped_columns[{this_col_id, col_name}];
         is_mixed_type_column[child_id] = 1;
         remove_child_columns(child_id, col.child_columns.at(col_name));
@@ -523,7 +525,7 @@ build_tree(device_json_column& root,
     if (parent_col_id != parent_node_sentinel &&
           (is_mixed_type_column[parent_col_id] || is_pruned[this_col_id]) ||
         forced_as_string_column[parent_col_id]) {
-      ignore_vals[this_col_id] = 1;
+      ignore_vals[this_col_id] = true;
       if (is_mixed_type_column[parent_col_id]) { is_mixed_type_column[this_col_id] = 1; }
       if (forced_as_string_column[parent_col_id]) { forced_as_string_column[this_col_id] = true; }
       continue;
@@ -569,12 +571,12 @@ build_tree(device_json_column& root,
       }
 
       if (column_categories[this_col_id] == NC_VAL || column_categories[this_col_id] == NC_STR) {
-        ignore_vals[this_col_id] = 1;
+        ignore_vals[this_col_id] = true;
         continue;
       }
       if (column_categories[old_col_id] == NC_VAL || column_categories[old_col_id] == NC_STR) {
         // remap
-        ignore_vals[old_col_id] = 1;
+        ignore_vals[old_col_id] = true;
         mapped_columns.erase({parent_col_id, name});
         columns.erase(old_col_id);
         parent_col.child_columns.erase(name);
@@ -624,7 +626,7 @@ build_tree(device_json_column& root,
       auto parent_col_id = column_parent_ids[this_col_id];
       if (parent_col_id != parent_node_sentinel and is_mixed_type_column[parent_col_id] == 1) {
         is_mixed_type_column[this_col_id] = 1;
-        ignore_vals[this_col_id]          = 1;
+        ignore_vals[this_col_id]          = true;
         columns.erase(this_col_id);
       }
       // Convert only mixed type columns as string (so to copy), but not its children
@@ -644,7 +646,7 @@ build_tree(device_json_column& root,
     auto parent_col_id = column_parent_ids[this_col_id];
     if (parent_col_id != parent_node_sentinel and forced_as_string_column[parent_col_id]) {
       forced_as_string_column[this_col_id] = true;
-      ignore_vals[this_col_id]             = 1;
+      ignore_vals[this_col_id]             = true;
     }
     // Convert only mixed type columns as string (so to copy), but not its children
     if (parent_col_id != parent_node_sentinel and not forced_as_string_column[parent_col_id] and
@@ -664,16 +666,15 @@ build_tree(device_json_column& root,
   return {ignore_vals, columns};
 }
 
-void scatter_offsets(
-  tree_meta_t& tree,
-  device_span<NodeIndexT> col_ids,
-  device_span<size_type> row_offsets,
-  device_span<size_type> node_ids,
-  device_span<size_type> sorted_col_ids,  // Reuse this for parent_col_ids
-  tree_meta_t& d_column_tree,
-  host_span<const uint8_t> ignore_vals,
-  std::unordered_map<NodeIndexT, std::reference_wrapper<device_json_column>>& columns,
-  rmm::cuda_stream_view stream)
+void scatter_offsets(tree_meta_t const& tree,
+                     device_span<NodeIndexT const> col_ids,
+                     device_span<size_type const> row_offsets,
+                     device_span<size_type> node_ids,
+                     device_span<size_type> sorted_col_ids,  // Reuse this for parent_col_ids
+                     tree_meta_t const& d_column_tree,
+                     host_span<const bool> ignore_vals,
+                     hashmap_of_device_columns const& columns,
+                     rmm::cuda_stream_view stream)
 {
   auto const num_nodes   = col_ids.size();
   auto const num_columns = d_column_tree.node_categories.size();
@@ -695,7 +696,7 @@ void scatter_offsets(
 
   // 3. scatter string offsets to respective columns, set validity bits
   thrust::for_each_n(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     thrust::counting_iterator<size_type>(0),
     num_nodes,
     [column_categories = d_column_tree.node_categories.begin(),
@@ -739,7 +740,7 @@ void scatter_offsets(
                                                                   : col_ids[parent_node_ids[node_id]];
       }));
   auto const list_children_end = thrust::copy_if(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     thrust::make_zip_iterator(thrust::make_counting_iterator<size_type>(0), parent_col_id),
     thrust::make_zip_iterator(thrust::make_counting_iterator<size_type>(0), parent_col_id) +
       num_nodes,
@@ -757,12 +758,12 @@ void scatter_offsets(
 
   auto const num_list_children =
     list_children_end - thrust::make_zip_iterator(node_ids.begin(), parent_col_ids.begin());
-  thrust::stable_sort_by_key(rmm::exec_policy(stream),
+  thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream),
                              parent_col_ids.begin(),
                              parent_col_ids.begin() + num_list_children,
                              node_ids.begin());
   thrust::for_each_n(
-    rmm::exec_policy(stream),
+    rmm::exec_policy_nosync(stream),
     thrust::make_counting_iterator<size_type>(0),
     num_list_children,
     [node_ids        = node_ids.begin(),
@@ -805,4 +806,599 @@ void scatter_offsets(
   stream.synchronize();
 }
 
+namespace experimental {
+
+std::map<std::string, schema_element> unified_schema(cudf::io::json_reader_options const& options)
+{
+  return std::visit(
+    cudf::detail::visitor_overload{
+      [](std::vector<data_type> const& user_dtypes) {
+        std::map<std::string, schema_element> dnew;
+        std::transform(thrust::counting_iterator<size_t>(0),
+                       thrust::counting_iterator<size_t>(user_dtypes.size()),
+                       std::inserter(dnew, dnew.end()),
+                       [&user_dtypes](auto i) {
+                         return std::pair(std::to_string(i), schema_element{user_dtypes[i]});
+                       });
+        return dnew;
+      },
+      [](std::map<std::string, data_type> const& user_dtypes) {
+        std::map<std::string, schema_element> dnew;
+        std::transform(user_dtypes.begin(),
+                       user_dtypes.end(),
+                       std::inserter(dnew, dnew.end()),
+                       [](auto key_dtype) {
+                         return std::pair(key_dtype.first, schema_element{key_dtype.second});
+                       });
+        return dnew;
+      },
+      [](std::map<std::string, schema_element> const& user_dtypes) { return user_dtypes; }},
+    options.get_dtypes());
+}
+
+std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
+  device_json_column& root,
+  host_span<uint8_t const> is_str_column_all_nulls,
+  tree_meta_t& d_column_tree,
+  device_span<NodeIndexT const> d_unique_col_ids,
+  device_span<size_type const> d_max_row_offsets,
+  std::vector<std::string> const& column_names,
+  NodeIndexT row_array_parent_col_id,
+  bool is_array_of_arrays,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr);
+
+/**
+ * @brief Constructs `d_json_column` from node tree representation
+ * Newly constructed columns are inserted into `root`'s children.
+ * `root` must be a list type.
+ *
+ * @param input Input JSON string device data
+ * @param tree Node tree representation of the JSON string
+ * @param col_ids Column ids of the nodes in the tree
+ * @param row_offsets Row offsets of the nodes in the tree
+ * @param root Root node of the `d_json_column` tree
+ * @param is_array_of_arrays Whether the tree is an array of arrays
+ * @param options Parsing options specifying the parsing behaviour
+ * options affecting behaviour are
+ *   is_enabled_lines: Whether the input is a line-delimited JSON
+ *   is_enabled_mixed_types_as_string: Whether to enable reading mixed types as string
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate the device memory
+ * of child_offets and validity members of `d_json_column`
+ */
+void make_device_json_column(device_span<SymbolT const> input,
+                             tree_meta_t const& tree,
+                             device_span<NodeIndexT const> col_ids,
+                             device_span<size_type const> row_offsets,
+                             device_json_column& root,
+                             bool is_array_of_arrays,
+                             cudf::io::json_reader_options const& options,
+                             rmm::cuda_stream_view stream,
+                             rmm::device_async_resource_ref mr)
+{
+  bool const is_enabled_lines                 = options.is_enabled_lines();
+  bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
+  // make a copy
+  auto sorted_col_ids = cudf::detail::make_device_uvector_async(
+    col_ids, stream, cudf::get_current_device_resource_ref());
+
+  // sort by {col_id} on {node_ids} stable
+  rmm::device_uvector<NodeIndexT> node_ids(col_ids.size(), stream);
+  thrust::sequence(rmm::exec_policy_nosync(stream), node_ids.begin(), node_ids.end());
+  thrust::stable_sort_by_key(rmm::exec_policy_nosync(stream),
+                             sorted_col_ids.begin(),
+                             sorted_col_ids.end(),
+                             node_ids.begin());
+
+  NodeIndexT const row_array_parent_col_id =
+    get_row_array_parent_col_id(col_ids, is_enabled_lines, stream);
+
+  // 1. gather column information.
+  auto [d_column_tree, d_unique_col_ids, d_max_row_offsets] =
+    reduce_to_column_tree(tree,
+                          col_ids,
+                          sorted_col_ids,
+                          node_ids,
+                          row_offsets,
+                          is_array_of_arrays,
+                          row_array_parent_col_id,
+                          stream);
+
+  auto num_columns                      = d_unique_col_ids.size();
+  std::vector<std::string> column_names = copy_strings_to_host_sync(
+    input, d_column_tree.node_range_begin, d_column_tree.node_range_end, stream);
+  // array of arrays column names
+  if (is_array_of_arrays) {
+    auto const unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream);
+    auto const column_parent_ids =
+      cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream);
+    TreeDepthT const row_array_children_level = is_enabled_lines ? 1 : 2;
+    auto values_column_indices =
+      get_values_column_indices(row_array_children_level, tree, col_ids, num_columns, stream);
+    auto h_values_column_indices =
+      cudf::detail::make_host_vector_sync(values_column_indices, stream);
+    std::transform(unique_col_ids.begin(),
+                   unique_col_ids.end(),
+                   column_names.cbegin(),
+                   column_names.begin(),
+                   [&h_values_column_indices, &column_parent_ids, row_array_parent_col_id](
+                     auto col_id, auto name) mutable {
+                     return column_parent_ids[col_id] == row_array_parent_col_id
+                              ? std::to_string(h_values_column_indices[col_id])
+                              : name;
+                   });
+  }
+
+  auto const is_str_column_all_nulls = [&, &column_tree = d_column_tree]() {
+    if (is_enabled_mixed_types_as_string) {
+      return cudf::detail::make_std_vector_sync(
+        is_all_nulls_each_column(input, column_tree, tree, col_ids, options, stream), stream);
+    }
+    return std::vector<uint8_t>();
+  }();
+  auto const [ignore_vals, columns] = build_tree(root,
+                                                 is_str_column_all_nulls,
+                                                 d_column_tree,
+                                                 d_unique_col_ids,
+                                                 d_max_row_offsets,
+                                                 column_names,
+                                                 row_array_parent_col_id,
+                                                 is_array_of_arrays,
+                                                 options,
+                                                 stream,
+                                                 mr);
+  if (ignore_vals.empty()) return;
+  scatter_offsets(tree,
+                  col_ids,
+                  row_offsets,
+                  node_ids,
+                  sorted_col_ids,
+                  d_column_tree,
+                  ignore_vals,
+                  columns,
+                  stream);
+}
+
+std::pair<cudf::detail::host_vector<bool>, hashmap_of_device_columns> build_tree(
+  device_json_column& root,
+  host_span<uint8_t const> is_str_column_all_nulls,
+  tree_meta_t& d_column_tree,
+  device_span<NodeIndexT const> d_unique_col_ids,
+  device_span<size_type const> d_max_row_offsets,
+  std::vector<std::string> const& column_names,
+  NodeIndexT row_array_parent_col_id,
+  bool is_array_of_arrays,
+  cudf::io::json_reader_options const& options,
+  rmm::cuda_stream_view stream,
+  rmm::device_async_resource_ref mr)
+{
+  bool const is_enabled_lines                 = options.is_enabled_lines();
+  bool const is_enabled_mixed_types_as_string = options.is_enabled_mixed_types_as_string();
+  auto unique_col_ids = cudf::detail::make_host_vector_async(d_unique_col_ids, stream);
+  auto column_categories =
+    cudf::detail::make_host_vector_async(d_column_tree.node_categories, stream);
+  auto const column_parent_ids =
+    cudf::detail::make_host_vector_async(d_column_tree.parent_node_ids, stream);
+  auto column_range_beg =
+    cudf::detail::make_host_vector_async(d_column_tree.node_range_begin, stream);
+  auto const max_row_offsets = cudf::detail::make_host_vector_async(d_max_row_offsets, stream);
+  auto num_columns           = d_unique_col_ids.size();
+  stream.synchronize();
+
+  auto to_json_col_type = [](auto category) {
+    switch (category) {
+      case NC_STRUCT: return json_col_t::StructColumn;
+      case NC_LIST: return json_col_t::ListColumn;
+      case NC_STR: [[fallthrough]];
+      case NC_VAL: return json_col_t::StringColumn;
+      default: return json_col_t::Unknown;
+    }
+  };
+
+  auto initialize_json_columns = [&](auto i, auto& col_ref, auto column_category) {
+    auto& col = col_ref.get();
+    if (col.type != json_col_t::Unknown) { return; }
+    if (column_category == NC_ERR || column_category == NC_FN) {
+      return;
+    } else if (column_category == NC_VAL || column_category == NC_STR) {
+      col.string_offsets.resize(max_row_offsets[i] + 1, stream);
+      col.string_lengths.resize(max_row_offsets[i] + 1, stream);
+      thrust::fill(
+        rmm::exec_policy_nosync(stream),
+        thrust::make_zip_iterator(col.string_offsets.begin(), col.string_lengths.begin()),
+        thrust::make_zip_iterator(col.string_offsets.end(), col.string_lengths.end()),
+        thrust::make_tuple(0, 0));
+    } else if (column_category == NC_LIST) {
+      col.child_offsets.resize(max_row_offsets[i] + 2, stream);
+      thrust::uninitialized_fill(
+        rmm::exec_policy_nosync(stream), col.child_offsets.begin(), col.child_offsets.end(), 0);
+    }
+    col.num_rows = max_row_offsets[i] + 1;
+    col.validity =
+      cudf::detail::create_null_mask(col.num_rows, cudf::mask_state::ALL_NULL, stream, mr);
+    col.type = to_json_col_type(column_category);
+  };
+
+  // 2. generate nested columns tree and its device_memory
+  // reorder unique_col_ids w.r.t. column_range_begin for order of column to be in field order.
+  auto h_range_col_id_it =
+    thrust::make_zip_iterator(column_range_beg.begin(), unique_col_ids.begin());
+  std::sort(h_range_col_id_it, h_range_col_id_it + num_columns, [](auto const& a, auto const& b) {
+    return thrust::get<0>(a) < thrust::get<0>(b);
+  });
+  // adjacency list construction
+  std::map<NodeIndexT, std::vector<NodeIndexT>> adj;
+  for (auto const this_col_id : unique_col_ids) {
+    auto parent_col_id = column_parent_ids[this_col_id];
+    adj[parent_col_id].push_back(this_col_id);
+  }
+
+  // Pruning
+  auto is_pruned = cudf::detail::make_host_vector<bool>(num_columns, stream);
+  std::fill_n(is_pruned.begin(), num_columns, options.is_enabled_prune_columns());
+
+  // prune all children of a column, but not self.
+  auto ignore_all_children = [&](auto parent_col_id) {
+    std::deque<NodeIndexT> offspring;
+    if (adj.count(parent_col_id)) {
+      for (auto const& child : adj[parent_col_id]) {
+        offspring.push_back(child);
+      }
+    }
+    while (!offspring.empty()) {
+      auto this_id = offspring.front();
+      offspring.pop_front();
+      is_pruned[this_id] = true;
+      if (adj.count(this_id)) {
+        for (auto const& child : adj[this_id]) {
+          offspring.push_back(child);
+        }
+      }
+    }
+  };
+
+  // Pruning: iterate through schema and mark only those columns and enforce type.
+  // NoPruning: iterate through schema and enforce type.
+
+  if (adj[parent_node_sentinel].empty())
+    return {cudf::detail::make_host_vector<bool>(0, stream), {}};  // for empty file
+  CUDF_EXPECTS(adj[parent_node_sentinel].size() == 1, "Should be 1");
+  auto expected_types = cudf::detail::make_host_vector<NodeT>(num_columns, stream);
+  std::fill_n(expected_types.begin(), num_columns, NUM_NODE_CLASSES);
+
+  auto lookup_names = [&column_names](auto child_ids, auto name) {
+    for (auto const& child_id : child_ids) {
+      if (column_names[child_id] == name) return child_id;
+    }
+    return -1;
+  };
+  // recursive lambda on schema to mark columns as pruned.
+  std::function<void(NodeIndexT root, schema_element const& schema)> mark_is_pruned;
+  mark_is_pruned = [&is_pruned,
+                    &mark_is_pruned,
+                    &adj,
+                    &lookup_names,
+                    &column_categories,
+                    &expected_types,
+                    &ignore_all_children](NodeIndexT root, schema_element const& schema) -> void {
+    if (root == -1) return;
+    bool pass =
+      (schema.type == data_type{type_id::STRUCT} and column_categories[root] == NC_STRUCT) or
+      (schema.type == data_type{type_id::LIST} and column_categories[root] == NC_LIST) or
+      (schema.type != data_type{type_id::STRUCT} and schema.type != data_type{type_id::LIST} and
+       column_categories[root] != NC_FN);
+    if (!pass) {
+      // ignore all children of this column and prune this column.
+      is_pruned[root] = true;
+      ignore_all_children(root);
+      return;
+    }
+    is_pruned[root]    = false;
+    auto expected_type = [](auto type, auto cat) {
+      if (type == data_type{type_id::STRUCT} and cat == NC_STRUCT) return NC_STRUCT;
+      if (type == data_type{type_id::LIST} and cat == NC_LIST) return NC_LIST;
+      if (type != data_type{type_id::STRUCT} and type != data_type{type_id::LIST}) return NC_STR;
+      return NC_ERR;
+    }(schema.type, column_categories[root]);
+    expected_types[root] = expected_type;  // forced type.
+    // ignore children of nested columns, but not self.
+    if (expected_type == NC_STR and
+        (column_categories[root] == NC_STRUCT or column_categories[root] == NC_LIST))
+      ignore_all_children(root);
+    if (not(schema.type == data_type{type_id::STRUCT} or schema.type == data_type{type_id::LIST}))
+      return;  // no children to mark for non-nested.
+    auto child_ids = adj.count(root) ? adj[root] : std::vector<NodeIndexT>{};
+    if (schema.type == data_type{type_id::STRUCT}) {
+      for (auto const& key_pair : schema.child_types) {
+        auto col_id = lookup_names(child_ids, key_pair.first);
+        if (col_id == -1) continue;
+        is_pruned[col_id] = false;
+        for (auto const& child_id : adj[col_id])  // children of field (>1 if mixed)
+          mark_is_pruned(child_id, key_pair.second);
+      }
+    } else if (schema.type == data_type{type_id::LIST}) {
+      // partial solution for list children to have any name.
+      auto this_list_child_name =
+        schema.child_types.size() == 1 ? schema.child_types.begin()->first : list_child_name;
+      if (schema.child_types.count(this_list_child_name) == 0) return;
+      auto list_child = schema.child_types.at(this_list_child_name);
+      for (auto const& child_id : child_ids)
+        mark_is_pruned(child_id, list_child);
+    }
+  };
+  if (is_array_of_arrays) {
+    if (adj[adj[parent_node_sentinel][0]].empty())
+      return {cudf::detail::make_host_vector<bool>(0, stream), {}};
+    auto root_list_col_id =
+      is_enabled_lines ? adj[parent_node_sentinel][0] : adj[adj[parent_node_sentinel][0]][0];
+    // mark root and row array col_id as not pruned.
+    if (!is_enabled_lines) {
+      auto top_level_list_id       = adj[parent_node_sentinel][0];
+      is_pruned[top_level_list_id] = false;
+    }
+    is_pruned[root_list_col_id] = false;
+    std::visit(cudf::detail::visitor_overload{
+                 [&root_list_col_id, &adj, &mark_is_pruned, &column_names](
+                   std::vector<data_type> const& user_dtypes) -> void {
+                   for (size_t i = 0; i < adj[root_list_col_id].size() && i < user_dtypes.size();
+                        i++) {
+                     NodeIndexT const first_child_id = adj[root_list_col_id][i];
+                     auto name                       = column_names[first_child_id];
+                     auto value_id                   = std::stol(name);
+                     if (value_id >= 0 and value_id < static_cast<long>(user_dtypes.size()))
+                       mark_is_pruned(first_child_id, schema_element{user_dtypes[value_id]});
+                     // Note: mixed type - forced type, will work here.
+                   }
+                 },
+                 [&root_list_col_id, &adj, &mark_is_pruned, &column_names](
+                   std::map<std::string, data_type> const& user_dtypes) -> void {
+                   for (size_t i = 0; i < adj[root_list_col_id].size(); i++) {
+                     auto const first_child_id = adj[root_list_col_id][i];
+                     auto name                 = column_names[first_child_id];
+                     if (user_dtypes.count(name))
+                       mark_is_pruned(first_child_id, schema_element{user_dtypes.at(name)});
+                   }
+                 },
+                 [&root_list_col_id, &adj, &mark_is_pruned, &column_names](
+                   std::map<std::string, schema_element> const& user_dtypes) -> void {
+                   for (size_t i = 0; i < adj[root_list_col_id].size(); i++) {
+                     auto const first_child_id = adj[root_list_col_id][i];
+                     auto name                 = column_names[first_child_id];
+                     if (user_dtypes.count(name))
+                       mark_is_pruned(first_child_id, user_dtypes.at(name));
+                   }
+                 }},
+               options.get_dtypes());
+  } else {
+    auto root_struct_col_id =
+      is_enabled_lines
+        ? adj[parent_node_sentinel][0]
+        : (adj[adj[parent_node_sentinel][0]].empty() ? -1 : adj[adj[parent_node_sentinel][0]][0]);
+    // mark root and row struct col_id as not pruned.
+    if (!is_enabled_lines) {
+      auto top_level_list_id       = adj[parent_node_sentinel][0];
+      is_pruned[top_level_list_id] = false;
+    }
+    is_pruned[root_struct_col_id] = false;
+    schema_element u_schema{data_type{type_id::STRUCT}};
+    u_schema.child_types = unified_schema(options);
+    std::visit(
+      cudf::detail::visitor_overload{
+        [&is_pruned, &root_struct_col_id, &adj, &mark_is_pruned](
+          std::vector<data_type> const& user_dtypes) -> void {
+          for (size_t i = 0; i < adj[root_struct_col_id].size() && i < user_dtypes.size(); i++) {
+            NodeIndexT const first_field_id = adj[root_struct_col_id][i];
+            is_pruned[first_field_id]       = false;
+            for (auto const& child_id : adj[first_field_id])  // children of field (>1 if mixed)
+              mark_is_pruned(child_id, schema_element{user_dtypes[i]});
+          }
+        },
+        [&root_struct_col_id, &adj, &mark_is_pruned, &u_schema](
+          std::map<std::string, data_type> const& user_dtypes) -> void {
+          mark_is_pruned(root_struct_col_id, u_schema);
+        },
+        [&root_struct_col_id, &adj, &mark_is_pruned, &u_schema](
+          std::map<std::string, schema_element> const& user_dtypes) -> void {
+          mark_is_pruned(root_struct_col_id, u_schema);
+        }},
+      options.get_dtypes());
+  }
+  // Useful for array of arrays
+  auto named_level =
+    is_enabled_lines
+      ? adj[parent_node_sentinel][0]
+      : (adj[adj[parent_node_sentinel][0]].empty() ? -1 : adj[adj[parent_node_sentinel][0]][0]);
+
+  auto handle_mixed_types = [&column_categories,
+                             &is_str_column_all_nulls,
+                             &is_pruned,
+                             &expected_types,
+                             &is_enabled_mixed_types_as_string,
+                             &ignore_all_children](std::vector<NodeIndexT>& child_ids) {
+    // do these on unpruned columns only.
+    // when mixed types is disabled, ignore string sibling of nested column.
+    // when mixed types is disabled, and both list and struct columns are siblings, error out.
+    // when mixed types is enabled, force string type on all columns
+
+    // Remove pruned children (forced type will not clash here because other types are already
+    // pruned)
+    child_ids.erase(
+      std::remove_if(child_ids.begin(),
+                     child_ids.end(),
+                     [&is_pruned](NodeIndexT child_id) { return is_pruned[child_id]; }),
+      child_ids.end());
+    // find string id, struct id, list id.
+    NodeIndexT str_col_id{-1}, struct_col_id{-1}, list_col_id{-1};
+    for (auto const& child_id : child_ids) {
+      if (column_categories[child_id] == NC_VAL || column_categories[child_id] == NC_STR)
+        str_col_id = child_id;
+      else if (column_categories[child_id] == NC_STRUCT)
+        struct_col_id = child_id;
+      else if (column_categories[child_id] == NC_LIST)
+        list_col_id = child_id;
+    }
+    // conditions for handling mixed types.
+    if (is_enabled_mixed_types_as_string) {
+      if (struct_col_id != -1 and list_col_id != -1) {
+        expected_types[struct_col_id] = NC_STR;
+        expected_types[list_col_id]   = NC_STR;
+        // ignore children of nested columns.
+        ignore_all_children(struct_col_id);
+        ignore_all_children(list_col_id);
+      }
+      if ((struct_col_id != -1 or list_col_id != -1) and str_col_id != -1) {
+        if (is_str_column_all_nulls[str_col_id])
+          is_pruned[str_col_id] = true;
+        else {
+          // ignore children of nested columns.
+          if (struct_col_id != -1) {
+            expected_types[struct_col_id] = NC_STR;
+            ignore_all_children(struct_col_id);
+          }
+          if (list_col_id != -1) {
+            expected_types[list_col_id] = NC_STR;
+            ignore_all_children(list_col_id);
+          }
+        }
+      }
+    } else {
+      // if both are present, error out.
+      CUDF_EXPECTS(struct_col_id == -1 or list_col_id == -1,
+                   "A mix of lists and structs within the same column is not supported");
+      // either one only: so ignore str column.
+      if ((struct_col_id != -1 or list_col_id != -1) and str_col_id != -1) {
+        is_pruned[str_col_id] = true;
+      }
+    }
+  };
+
+  using dev_ref = std::reference_wrapper<device_json_column>;
+  std::unordered_map<NodeIndexT, dev_ref> columns;
+  columns.try_emplace(parent_node_sentinel, std::ref(root));
+  // convert adjaceny list to tree.
+  dev_ref parent_ref = std::ref(root);
+  // creates children column
+  std::function<void(NodeIndexT, dev_ref)> construct_tree;
+  construct_tree = [&](NodeIndexT root, dev_ref ref) -> void {
+    if (is_pruned[root]) return;
+    auto expected_category =
+      expected_types[root] == NUM_NODE_CLASSES ? column_categories[root] : expected_types[root];
+    initialize_json_columns(root, ref, expected_category);
+    auto child_ids = adj.count(root) ? adj[root] : std::vector<NodeIndexT>{};
+    if (expected_category == NC_STRUCT) {
+      // find field column ids, and its children and create columns.
+      for (auto const& field_id : child_ids) {
+        auto name = column_names[field_id];
+        if (is_pruned[field_id]) continue;
+        auto inserted =
+          ref.get().child_columns.try_emplace(name, device_json_column(stream, mr)).second;
+        ref.get().column_order.emplace_back(name);
+        CUDF_EXPECTS(inserted,
+                     "struct child column insertion failed, duplicate column name in the parent");
+        auto this_ref = std::ref(ref.get().child_columns.at(name));
+        // Mixed type handling
+        auto& value_col_ids = adj[field_id];
+        handle_mixed_types(value_col_ids);
+        if (value_col_ids.empty()) {
+          // If no column is present, remove the uninitialized column.
+          ref.get().child_columns.erase(name);
+          ref.get().column_order.pop_back();
+          continue;
+        }
+        for (auto const& child_id : value_col_ids)  // children of field (>1 if mixed)
+        {
+          if (is_pruned[child_id]) continue;
+          columns.try_emplace(child_id, this_ref);
+          construct_tree(child_id, this_ref);
+        }
+      }
+    } else if (expected_category == NC_LIST) {
+      // array of arrays interpreted as array of structs.
+      if (is_array_of_arrays and root == named_level) {
+        // create column names
+        std::map<NodeIndexT, std::vector<NodeIndexT>> array_values;
+        for (auto const& child_id : child_ids) {
+          if (is_pruned[child_id]) continue;
+          auto name = column_names[child_id];
+          array_values[std::stoi(name)].push_back(child_id);
+        }
+        //
+        for (auto const& value_id_pair : array_values) {
+          auto [value_id, value_col_ids] = value_id_pair;
+          auto name                      = std::to_string(value_id);
+          auto inserted =
+            ref.get().child_columns.try_emplace(name, device_json_column(stream, mr)).second;
+          ref.get().column_order.emplace_back(name);
+          CUDF_EXPECTS(inserted,
+                       "list child column insertion failed, duplicate column name in the parent");
+          auto this_ref = std::ref(ref.get().child_columns.at(name));
+          handle_mixed_types(value_col_ids);
+          if (value_col_ids.empty()) {
+            // If no column is present, remove the uninitialized column.
+            ref.get().child_columns.erase(name);
+            ref.get().column_order.pop_back();
+            continue;
+          }
+          for (auto const& child_id : value_col_ids)  // children of field (>1 if mixed)
+          {
+            if (is_pruned[child_id]) continue;
+            columns.try_emplace(child_id, this_ref);
+            construct_tree(child_id, this_ref);
+          }
+        }
+      } else {
+        if (child_ids.empty()) return;
+        auto inserted =
+          ref.get()
+            .child_columns.try_emplace(list_child_name, device_json_column(stream, mr))
+            .second;
+        CUDF_EXPECTS(inserted,
+                     "list child column insertion failed, duplicate column name in the parent");
+        ref.get().column_order.emplace_back(list_child_name);
+        auto this_ref = std::ref(ref.get().child_columns.at(list_child_name));
+        // Mixed type handling
+        handle_mixed_types(child_ids);
+        if (child_ids.empty()) {
+          // If no column is present, remove the uninitialized column.
+          ref.get().child_columns.erase(list_child_name);
+        }
+        for (auto const& child_id : child_ids) {
+          if (is_pruned[child_id]) continue;
+          columns.try_emplace(child_id, this_ref);
+          construct_tree(child_id, this_ref);
+        }
+      }
+    }
+  };
+  auto inserted = parent_ref.get()
+                    .child_columns.try_emplace(list_child_name, device_json_column(stream, mr))
+                    .second;
+  CUDF_EXPECTS(inserted, "child column insertion failed, duplicate column name in the parent");
+  parent_ref = std::ref(parent_ref.get().child_columns.at(list_child_name));
+  columns.try_emplace(adj[parent_node_sentinel][0], parent_ref);
+  construct_tree(adj[parent_node_sentinel][0], parent_ref);
+
+  // Forced string type due to input schema and mixed type as string.
+  for (size_t i = 0; i < expected_types.size(); i++) {
+    if (expected_types[i] == NC_STR) {
+      if (columns.count(i)) { columns.at(i).get().forced_as_string_column = true; }
+    }
+  }
+  std::transform(expected_types.cbegin(),
+                 expected_types.cend(),
+                 column_categories.cbegin(),
+                 expected_types.begin(),
+                 [](auto exp, auto cat) { return exp == NUM_NODE_CLASSES ? cat : exp; });
+  cudaMemcpyAsync(d_column_tree.node_categories.begin(),
+                  expected_types.data(),
+                  expected_types.size() * sizeof(column_categories[0]),
+                  cudaMemcpyDefault,
+                  stream.value());
+
+  return {is_pruned, columns};
+}
+}  // namespace experimental
+
 }  // namespace cudf::io::json::detail
diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu
index dfd9285f682..912e93d52ae 100644
--- a/cpp/src/io/json/json_column.cu
+++ b/cpp/src/io/json/json_column.cu
@@ -104,7 +104,7 @@ void print_tree(host_span<SymbolT const> input,
  * max row offsets of columns
  */
 std::tuple<tree_meta_t, rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
-reduce_to_column_tree(tree_meta_t& tree,
+reduce_to_column_tree(tree_meta_t const& tree,
                       device_span<NodeIndexT const> original_col_ids,
                       device_span<NodeIndexT const> sorted_col_ids,
                       device_span<NodeIndexT const> ordered_node_ids,
@@ -317,7 +317,7 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
     // Note: json_col modified here, moves this memory
   };
 
-  auto get_child_schema = [schema](auto child_name) -> std::optional<schema_element> {
+  auto get_child_schema = [&schema](auto child_name) -> std::optional<schema_element> {
     if (schema.has_value()) {
       auto const result = schema.value().child_types.find(child_name);
       if (result != std::end(schema.value().child_types)) { return result->second; }
@@ -325,6 +325,13 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
     return {};
   };
 
+  auto get_list_child_schema = [&schema]() -> std::optional<schema_element> {
+    if (schema.has_value()) {
+      if (schema.value().child_types.size() > 0) return schema.value().child_types.begin()->second;
+    }
+    return {};
+  };
+
   switch (json_col.type) {
     case json_col_t::StringColumn: {
       // move string_offsets to GPU and transform to string column
@@ -439,9 +446,8 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
                                                      rmm::device_buffer{},
                                                      0);
       // Create children column
-      auto child_schema_element = json_col.child_columns.empty()
-                                    ? std::optional<schema_element>{}
-                                    : get_child_schema(json_col.child_columns.begin()->first);
+      auto child_schema_element =
+        json_col.child_columns.empty() ? std::optional<schema_element>{} : get_list_child_schema();
       auto [child_column, names] =
         json_col.child_columns.empty() or (prune_columns and !child_schema_element.has_value())
           ? std::pair<std::unique_ptr<column>,
@@ -479,6 +485,16 @@ std::pair<std::unique_ptr<column>, std::vector<column_name_info>> device_json_co
   }
 }
 
+template <typename... Args>
+auto make_device_json_column_dispatch(bool experimental, Args&&... args)
+{
+  if (experimental) {
+    return experimental::make_device_json_column(std::forward<Args>(args)...);
+  } else {
+    return make_device_json_column(std::forward<Args>(args)...);
+  }
+}
+
 table_with_metadata device_parse_nested_json(device_span<SymbolT const> d_input,
                                              cudf::io::json_reader_options const& options,
                                              rmm::cuda_stream_view stream,
@@ -524,6 +540,7 @@ table_with_metadata device_parse_nested_json(device_span<SymbolT const> d_input,
                                   gpu_tree,
                                   is_array_of_arrays,
                                   options.is_enabled_lines(),
+                                  options.is_enabled_experimental(),
                                   stream,
                                   cudf::get_current_device_resource_ref());
 
@@ -536,15 +553,16 @@ table_with_metadata device_parse_nested_json(device_span<SymbolT const> d_input,
                0);
 
   // Get internal JSON column
-  make_device_json_column(d_input,
-                          gpu_tree,
-                          gpu_col_id,
-                          gpu_row_offsets,
-                          root_column,
-                          is_array_of_arrays,
-                          options,
-                          stream,
-                          mr);
+  make_device_json_column_dispatch(options.is_enabled_experimental(),
+                                   d_input,
+                                   gpu_tree,
+                                   gpu_col_id,
+                                   gpu_row_offsets,
+                                   root_column,
+                                   is_array_of_arrays,
+                                   options,
+                                   stream,
+                                   mr);
 
   // data_root refers to the root column of the data represented by the given JSON string
   auto& data_root =
diff --git a/cpp/src/io/json/json_tree.cu b/cpp/src/io/json/json_tree.cu
index 4d0dc010c57..d949635c1cc 100644
--- a/cpp/src/io/json/json_tree.cu
+++ b/cpp/src/io/json/json_tree.cu
@@ -14,17 +14,18 @@
  * limitations under the License.
  */
 
-#include "io/utilities/hostdevice_vector.hpp"
+#include "io/utilities/parsing_utils.cuh"
+#include "io/utilities/string_parsing.hpp"
 #include "nested_json.hpp"
 
 #include <cudf/detail/cuco_helpers.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
-#include <cudf/detail/scatter.cuh>
 #include <cudf/detail/utilities/algorithm.cuh>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/hashing/detail/default_hash.cuh>
 #include <cudf/hashing/detail/hashing.hpp>
 #include <cudf/hashing/detail/helper_functions.cuh>
+#include <cudf/strings/strings_column_view.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/span.hpp>
@@ -34,12 +35,14 @@
 #include <rmm/exec_policy.hpp>
 
 #include <cub/device/device_radix_sort.cuh>
+#include <cuco/static_map.cuh>
 #include <cuco/static_set.cuh>
 #include <cuda/functional>
 #include <thrust/binary_search.h>
 #include <thrust/copy.h>
 #include <thrust/count.h>
 #include <thrust/fill.h>
+#include <thrust/functional.h>
 #include <thrust/gather.h>
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/discard_iterator.h>
@@ -492,6 +495,85 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
           std::move(node_range_end)};
 }
 
+// Return field node ids after unicode decoding of field names and matching them to same field names
+std::pair<size_t, rmm::device_uvector<size_type>> remapped_field_nodes_after_unicode_decode(
+  device_span<SymbolT const> d_input,
+  tree_meta_t const& d_tree,
+  device_span<size_type const> keys,
+  rmm::cuda_stream_view stream)
+{
+  size_t num_keys = keys.size();
+  if (num_keys == 0) { return {num_keys, rmm::device_uvector<size_type>(num_keys, stream)}; }
+  rmm::device_uvector<size_type> offsets(num_keys, stream);
+  rmm::device_uvector<size_type> lengths(num_keys, stream);
+  auto offset_length_it = thrust::make_zip_iterator(offsets.begin(), lengths.begin());
+  thrust::transform(rmm::exec_policy_nosync(stream),
+                    keys.begin(),
+                    keys.end(),
+                    offset_length_it,
+                    [node_range_begin = d_tree.node_range_begin.data(),
+                     node_range_end   = d_tree.node_range_end.data()] __device__(auto key) {
+                      return thrust::make_tuple(node_range_begin[key],
+                                                node_range_end[key] - node_range_begin[key]);
+                    });
+  cudf::io::parse_options_view opt{',', '\n', '\0', '.'};
+  opt.keepquotes = true;
+
+  auto utf8_decoded_fields = parse_data(d_input.data(),
+                                        offset_length_it,
+                                        num_keys,
+                                        data_type{type_id::STRING},
+                                        rmm::device_buffer{},
+                                        0,
+                                        opt,
+                                        stream,
+                                        cudf::get_current_device_resource_ref());
+  // hash using iter, create a hashmap for 0-num_keys.
+  // insert and find. -> array
+  // store to static_map with keys as field key[index], and values as key[array[index]]
+
+  auto str_view         = strings_column_view{utf8_decoded_fields->view()};
+  auto const char_ptr   = str_view.chars_begin(stream);
+  auto const offset_ptr = str_view.offsets().begin<size_type>();
+
+  // String hasher
+  auto const d_hasher = cuda::proclaim_return_type<
+    typename cudf::hashing::detail::default_hash<cudf::string_view>::result_type>(
+    [char_ptr, offset_ptr] __device__(auto node_id) {
+      auto const field_name = cudf::string_view(char_ptr + offset_ptr[node_id],
+                                                offset_ptr[node_id + 1] - offset_ptr[node_id]);
+      return cudf::hashing::detail::default_hash<cudf::string_view>{}(field_name);
+    });
+  auto const d_equal = [char_ptr, offset_ptr] __device__(auto node_id1, auto node_id2) {
+    auto const field_name1 = cudf::string_view(char_ptr + offset_ptr[node_id1],
+                                               offset_ptr[node_id1 + 1] - offset_ptr[node_id1]);
+    auto const field_name2 = cudf::string_view(char_ptr + offset_ptr[node_id2],
+                                               offset_ptr[node_id2 + 1] - offset_ptr[node_id2]);
+    return field_name1 == field_name2;
+  };
+
+  using hasher_type                             = decltype(d_hasher);
+  constexpr size_type empty_node_index_sentinel = -1;
+  auto key_set                                  = cuco::static_set{
+    cuco::extent{compute_hash_table_size(num_keys)},
+    cuco::empty_key{empty_node_index_sentinel},
+    d_equal,
+    cuco::linear_probing<1, hasher_type>{d_hasher},
+                                     {},
+                                     {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    stream.value()};
+  auto const counting_iter = thrust::make_counting_iterator<size_type>(0);
+  rmm::device_uvector<size_type> found_keys(num_keys, stream);
+  key_set.insert_and_find_async(counting_iter,
+                                counting_iter + num_keys,
+                                found_keys.begin(),
+                                thrust::make_discard_iterator(),
+                                stream.value());
+  // set.size will synchronize the stream before return.
+  return {key_set.size(stream), std::move(found_keys)};
+}
+
 /**
  * @brief Generates unique node_type id for each node.
  * Field nodes with the same name are assigned the same node_type id.
@@ -500,11 +582,14 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
  * All inputs and outputs are in node_id order.
  * @param d_input JSON string in device memory
  * @param d_tree Tree representation of the JSON
+ * @param is_enabled_experimental Whether to enable experimental features such as
+ * utf8 field name support
  * @param stream CUDA stream used for device memory operations and kernel launches.
  * @return Vector of node_type ids
  */
 rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<SymbolT const> d_input,
                                                               tree_meta_t const& d_tree,
+                                                              bool is_enabled_experimental,
                                                               rmm::cuda_stream_view stream)
 {
   CUDF_FUNC_RANGE();
@@ -536,7 +621,7 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
   };
   // key-value pairs: uses node_id itself as node_type. (unique node_id for a field name due to
   // hashing)
-  auto const iter = thrust::make_counting_iterator<size_type>(0);
+  auto const counting_iter = thrust::make_counting_iterator<size_type>(0);
 
   auto const is_field_name_node = [node_categories =
                                      d_tree.node_categories.data()] __device__(auto node_id) {
@@ -554,15 +639,61 @@ rmm::device_uvector<size_type> hash_node_type_with_field_name(device_span<Symbol
                                      {},
     cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
     stream.value()};
-  key_set.insert_if_async(iter,
-                          iter + num_nodes,
+  key_set.insert_if_async(counting_iter,
+                          counting_iter + num_nodes,
                           thrust::counting_iterator<size_type>(0),  // stencil
                           is_field_name_node,
                           stream.value());
 
+  // experimental feature: utf8 field name support
+  // parse_data on field names,
+  // rehash it using another map,
+  // reassign the reverse map values to new matched node indices.
+  auto get_utf8_matched_field_nodes = [&]() {
+    auto make_map = [&stream](auto num_keys) {
+      using hasher_type3 = cudf::hashing::detail::default_hash<size_type>;
+      return cuco::static_map{
+        cuco::extent{compute_hash_table_size(num_keys, 100)},  // 100% occupancy
+        cuco::empty_key{empty_node_index_sentinel},
+        cuco::empty_value{empty_node_index_sentinel},
+        {},
+        cuco::linear_probing<1, hasher_type3>{hasher_type3{}},
+        {},
+        {},
+        cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+        stream.value()};
+    };
+    if (!is_enabled_experimental) { return std::pair{false, make_map(0)}; }
+    // get all unique field node ids for utf8 decoding
+    auto num_keys = key_set.size(stream);
+    rmm::device_uvector<size_type> keys(num_keys, stream);
+    key_set.retrieve_all(keys.data(), stream.value());
+
+    auto [num_unique_fields, found_keys] =
+      remapped_field_nodes_after_unicode_decode(d_input, d_tree, keys, stream);
+
+    auto is_need_remap = num_unique_fields != num_keys;
+    if (!is_need_remap) { return std::pair{false, make_map(0)}; }
+
+    // store to static_map with keys as field keys[index], and values as keys[found_keys[index]]
+    auto reverse_map        = make_map(num_keys);
+    auto matching_keys_iter = thrust::make_permutation_iterator(keys.begin(), found_keys.begin());
+    auto pair_iter =
+      thrust::make_zip_iterator(thrust::make_tuple(keys.begin(), matching_keys_iter));
+    reverse_map.insert_async(pair_iter, pair_iter + num_keys, stream);
+    return std::pair{is_need_remap, std::move(reverse_map)};
+  };
+  auto [is_need_remap, reverse_map] = get_utf8_matched_field_nodes();
+
   auto const get_hash_value =
-    [key_set = key_set.ref(cuco::op::find)] __device__(auto node_id) -> size_type {
+    [key_set       = key_set.ref(cuco::op::find),
+     is_need_remap = is_need_remap,
+     rm            = reverse_map.ref(cuco::op::find)] __device__(auto node_id) -> size_type {
     auto const it = key_set.find(node_id);
+    if (it != key_set.end() and is_need_remap) {
+      auto const it2 = rm.find(*it);
+      return (it2 == rm.end()) ? size_type{0} : it2->second;
+    }
     return (it == key_set.end()) ? size_type{0} : *it;
   };
 
@@ -771,6 +902,8 @@ std::pair<rmm::device_uvector<size_type>, rmm::device_uvector<size_type>> hash_n
  * @param d_tree Tree representation of the JSON
  * @param is_array_of_arrays Whether the tree is an array of arrays
  * @param is_enabled_lines Whether the input is a line-delimited JSON
+ * @param is_enabled_experimental Whether the experimental feature is enabled such as
+ * utf8 field name support
  * @param stream CUDA stream used for device memory operations and kernel launches
  * @param mr Device memory resource used to allocate the returned column's device memory
  * @return column_id, parent_column_id
@@ -780,6 +913,7 @@ std::pair<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<NodeIndexT>> gene
   tree_meta_t const& d_tree,
   bool is_array_of_arrays,
   bool is_enabled_lines,
+  bool is_enabled_experimental,
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr)
 {
@@ -793,7 +927,7 @@ std::pair<rmm::device_uvector<NodeIndexT>, rmm::device_uvector<NodeIndexT>> gene
   auto [col_id, unique_keys] = [&]() {
     // Convert node_category + field_name to node_type.
     rmm::device_uvector<size_type> node_type =
-      hash_node_type_with_field_name(d_input, d_tree, stream);
+      hash_node_type_with_field_name(d_input, d_tree, is_enabled_experimental, stream);
 
     // hash entire path from node to root.
     return hash_node_path(d_tree.node_levels,
@@ -948,12 +1082,13 @@ records_orient_tree_traversal(device_span<SymbolT const> d_input,
                               tree_meta_t const& d_tree,
                               bool is_array_of_arrays,
                               bool is_enabled_lines,
+                              bool is_enabled_experimental,
                               rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
-  auto [new_col_id, new_parent_col_id] =
-    generate_column_id(d_input, d_tree, is_array_of_arrays, is_enabled_lines, stream, mr);
+  auto [new_col_id, new_parent_col_id] = generate_column_id(
+    d_input, d_tree, is_array_of_arrays, is_enabled_lines, is_enabled_experimental, stream, mr);
 
   auto row_offsets = compute_row_offsets(
     std::move(new_parent_col_id), d_tree, is_array_of_arrays, is_enabled_lines, stream, mr);
diff --git a/cpp/src/io/json/nested_json.hpp b/cpp/src/io/json/nested_json.hpp
index 93ef2b46be1..3d9a51833e0 100644
--- a/cpp/src/io/json/nested_json.hpp
+++ b/cpp/src/io/json/nested_json.hpp
@@ -316,6 +316,8 @@ tree_meta_t get_tree_representation(device_span<PdaTokenT const> tokens,
  * index, level, begin index, and end index in the input JSON string
  * @param is_array_of_arrays Whether the tree is an array of arrays
  * @param is_enabled_lines Whether the input is a line-delimited JSON
+ * @param is_enabled_experimental Whether to enable experimental features such as utf-8 field name
+ * support
  * @param stream The CUDA stream to which kernels are dispatched
  * @param mr Optional, resource with which to allocate
  * @return A tuple of the output column indices and the row offsets within each column for each node
@@ -326,6 +328,7 @@ records_orient_tree_traversal(device_span<SymbolT const> d_input,
                               tree_meta_t const& d_tree,
                               bool is_array_of_arrays,
                               bool is_enabled_lines,
+                              bool is_enabled_experimental,
                               rmm::cuda_stream_view stream,
                               rmm::device_async_resource_ref mr);
 
@@ -352,7 +355,7 @@ get_array_children_indices(TreeDepthT row_array_children_level,
 /**
  * @brief Reduces node tree representation to column tree representation.
  *
- * @param node_tree Node tree representation of JSON string
+ * @param tree Node tree representation of JSON string
  * @param original_col_ids Column ids of nodes
  * @param sorted_col_ids Sorted column ids of nodes
  * @param ordered_node_ids Node ids of nodes sorted by column ids
@@ -365,7 +368,7 @@ get_array_children_indices(TreeDepthT row_array_children_level,
  */
 CUDF_EXPORT
 std::tuple<tree_meta_t, rmm::device_uvector<NodeIndexT>, rmm::device_uvector<size_type>>
-reduce_to_column_tree(tree_meta_t& node_tree,
+reduce_to_column_tree(tree_meta_t const& tree,
                       device_span<NodeIndexT const> original_col_ids,
                       device_span<NodeIndexT const> sorted_col_ids,
                       device_span<NodeIndexT const> ordered_node_ids,
@@ -393,14 +396,30 @@ reduce_to_column_tree(tree_meta_t& node_tree,
  * of child_offets and validity members of `d_json_column`
  */
 void make_device_json_column(device_span<SymbolT const> input,
-                             tree_meta_t& tree,
-                             device_span<NodeIndexT> col_ids,
-                             device_span<size_type> row_offsets,
+                             tree_meta_t const& tree,
+                             device_span<NodeIndexT const> col_ids,
+                             device_span<size_type const> row_offsets,
                              device_json_column& root,
                              bool is_array_of_arrays,
                              cudf::io::json_reader_options const& options,
                              rmm::cuda_stream_view stream,
                              rmm::device_async_resource_ref mr);
+
+namespace experimental {
+/**
+ * @copydoc cudf::io::json::detail::make_device_json_column
+ */
+void make_device_json_column(device_span<SymbolT const> input,
+                             tree_meta_t const& tree,
+                             device_span<NodeIndexT const> col_ids,
+                             device_span<size_type const> row_offsets,
+                             device_json_column& root,
+                             bool is_array_of_arrays,
+                             cudf::io::json_reader_options const& options,
+                             rmm::cuda_stream_view stream,
+                             rmm::device_async_resource_ref mr);
+}  // namespace experimental
+
 /**
  * @brief Retrieves the parse_options to be used for type inference and type casting
  *
diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu
index 1c15e147b13..bf81162a0ac 100644
--- a/cpp/src/io/json/nested_json_gpu.cu
+++ b/cpp/src/io/json/nested_json_gpu.cu
@@ -618,12 +618,12 @@ struct PdaSymbolToSymbolGroupId {
     constexpr auto pda_sgid_lookup_size =
       static_cast<int32_t>(sizeof(tos_sg_to_pda_sgid) / sizeof(tos_sg_to_pda_sgid[0]));
     // We map the delimiter character to LINE_BREAK symbol group id, and the newline character
-    // to OTHER. Note that delimiter cannot be any of opening(closing) brace, bracket, quote,
+    // to WHITE_SPACE. Note that delimiter cannot be any of opening(closing) brace, bracket, quote,
     // escape, comma, colon or whitespace characters.
     auto const symbol_position =
       symbol == delimiter
         ? static_cast<int32_t>('\n')
-        : (symbol == '\n' ? static_cast<int32_t>(delimiter) : static_cast<int32_t>(symbol));
+        : (symbol == '\n' ? static_cast<int32_t>(' ') : static_cast<int32_t>(symbol));
     PdaSymbolGroupIdT symbol_gid =
       tos_sg_to_pda_sgid[min(symbol_position, pda_sgid_lookup_size - 1)];
     return stack_idx * static_cast<PdaSymbolGroupIdT>(symbol_group_id::NUM_PDA_INPUT_SGS) +
diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp
index 2a73c3df41c..a8ba3a969ce 100644
--- a/cpp/src/io/parquet/parquet_gpu.hpp
+++ b/cpp/src/io/parquet/parquet_gpu.hpp
@@ -294,7 +294,8 @@ struct PageInfo {
   int32_t uncompressed_page_size;  // uncompressed data size in bytes
   // for V2 pages, the def and rep level data is not compressed, and lacks the 4-byte length
   // indicator. instead the lengths for these are stored in the header.
-  int32_t lvl_bytes[level_type::NUM_LEVEL_TYPES];  // length of the rep/def levels (V2 header)
+  int32_t                                    // NOLINT
+    lvl_bytes[level_type::NUM_LEVEL_TYPES];  // length of the rep/def levels (V2 header)
   // Number of values in this data page or dictionary.
   // Important : the # of input values does not necessarily
   // correspond to the number of rows in the output. It just reflects the number
@@ -345,7 +346,7 @@ struct PageInfo {
   PageNestingDecodeInfo* nesting_decode;
 
   // level decode buffers
-  uint8_t* lvl_decode_buf[level_type::NUM_LEVEL_TYPES];
+  uint8_t* lvl_decode_buf[level_type::NUM_LEVEL_TYPES];  // NOLINT
 
   // temporary space for decoding DELTA_BYTE_ARRAY encoded strings
   int64_t temp_string_size;
@@ -431,14 +432,14 @@ struct ColumnChunkDesc {
   size_t num_values{};               // total number of values in this column
   size_t start_row{};                // file-wide, absolute starting row of this chunk
   uint32_t num_rows{};               // number of rows in this chunk
-  int16_t max_level[level_type::NUM_LEVEL_TYPES]{};  // max definition/repetition level
-  int16_t max_nesting_depth{};                       // max nesting depth of the output
-  int32_t type_length{};                             // type length from schema (for FLBA only)
-  Type physical_type{};                              // parquet physical data type
-  uint8_t
-    level_bits[level_type::NUM_LEVEL_TYPES]{};  // bits to encode max definition/repetition levels
-  int32_t num_data_pages{};                     // number of data pages
-  int32_t num_dict_pages{};                     // number of dictionary pages
+  int16_t max_level[level_type::NUM_LEVEL_TYPES]{};   // max definition/repetition level  // NOLINT
+  int16_t max_nesting_depth{};                        // max nesting depth of the output
+  int32_t type_length{};                              // type length from schema (for FLBA only)
+  Type physical_type{};                               // parquet physical data type
+  uint8_t level_bits[level_type::NUM_LEVEL_TYPES]{};  // bits to encode max   // NOLINT
+                                                      // definition/repetition levels
+  int32_t num_data_pages{};                           // number of data pages
+  int32_t num_dict_pages{};                           // number of dictionary pages
   PageInfo const* dict_page{};
   string_index_pair* str_dict_index{};  // index for string dictionary
   bitmask_type** valid_map_base{};      // base pointers of valid bit map for this column
diff --git a/cpp/src/io/parquet/reader_impl.hpp b/cpp/src/io/parquet/reader_impl.hpp
index 2d46da14bec..62ffc4d3077 100644
--- a/cpp/src/io/parquet/reader_impl.hpp
+++ b/cpp/src/io/parquet/reader_impl.hpp
@@ -188,10 +188,10 @@ class reader::impl {
    *
    * Does not decompress the chunk data.
    *
-   * @return pair of boolean indicating if compressed chunks were found and a vector of futures for
+   * @return pair of boolean indicating if compressed chunks were found and a future for
    * read completion
    */
-  std::pair<bool, std::vector<std::future<void>>> read_column_chunks();
+  std::pair<bool, std::future<void>> read_column_chunks();
 
   /**
    * @brief Read compressed data and page information for the current pass.
diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu
index 3060e8739f9..8cab68ea721 100644
--- a/cpp/src/io/parquet/reader_impl_preprocess.cu
+++ b/cpp/src/io/parquet/reader_impl_preprocess.cu
@@ -964,7 +964,7 @@ void reader::impl::allocate_level_decode_space()
   }
 }
 
-std::pair<bool, std::vector<std::future<void>>> reader::impl::read_column_chunks()
+std::pair<bool, std::future<void>> reader::impl::read_column_chunks()
 {
   auto const& row_groups_info = _pass_itm_data->row_groups;
 
@@ -989,7 +989,6 @@ std::pair<bool, std::vector<std::future<void>>> reader::impl::read_column_chunks
   // TODO: make this respect the pass-wide skip_rows/num_rows instead of the file-wide
   // skip_rows/num_rows
   // auto remaining_rows            = num_rows;
-  std::vector<std::future<void>> read_chunk_tasks;
   size_type chunk_count = 0;
   for (auto const& rg : row_groups_info) {
     auto const& row_group       = _metadata->get_row_group(rg.index, rg.source_index);
@@ -1018,16 +1017,15 @@ std::pair<bool, std::vector<std::future<void>>> reader::impl::read_column_chunks
   }
 
   // Read compressed chunk data to device memory
-  read_chunk_tasks.push_back(read_column_chunks_async(_sources,
-                                                      raw_page_data,
-                                                      chunks,
-                                                      0,
-                                                      chunks.size(),
-                                                      column_chunk_offsets,
-                                                      chunk_source_map,
-                                                      _stream));
-
-  return {total_decompressed_size > 0, std::move(read_chunk_tasks)};
+  return {total_decompressed_size > 0,
+          read_column_chunks_async(_sources,
+                                   raw_page_data,
+                                   chunks,
+                                   0,
+                                   chunks.size(),
+                                   column_chunk_offsets,
+                                   chunk_source_map,
+                                   _stream)};
 }
 
 void reader::impl::read_compressed_data()
@@ -1042,9 +1040,7 @@ void reader::impl::read_compressed_data()
   auto const [has_compressed_data, read_chunks_tasks] = read_column_chunks();
   pass.has_compressed_data                            = has_compressed_data;
 
-  for (auto& task : read_chunks_tasks) {
-    task.wait();
-  }
+  read_chunks_tasks.wait();
 
   // Process dataset chunk pages into output columns
   auto const total_pages = _has_page_index ? count_page_headers_with_pgidx(chunks, _stream)
diff --git a/cpp/src/io/utilities/output_builder.cuh b/cpp/src/io/utilities/output_builder.cuh
index f7e6de03354..8183a66f4f0 100644
--- a/cpp/src/io/utilities/output_builder.cuh
+++ b/cpp/src/io/utilities/output_builder.cuh
@@ -307,8 +307,8 @@ class output_builder {
    * @param mr The memory resource used to allocate the output vector.
    * @return The output vector.
    */
-  rmm::device_uvector<T> gather(rmm::cuda_stream_view stream,
-                                rmm::device_async_resource_ref mr) const
+  [[nodiscard]] rmm::device_uvector<T> gather(rmm::cuda_stream_view stream,
+                                              rmm::device_async_resource_ref mr) const
   {
     rmm::device_uvector<T> output{size(), stream, mr};
     auto output_it = output.begin();
diff --git a/cpp/src/jit/parser.cpp b/cpp/src/jit/parser.cpp
index 398c36821cc..519ac2d1a2e 100644
--- a/cpp/src/jit/parser.cpp
+++ b/cpp/src/jit/parser.cpp
@@ -19,8 +19,6 @@
 #include <cudf/utilities/error.hpp>
 
 #include <algorithm>
-#include <cctype>
-#include <map>
 #include <set>
 #include <string>
 #include <utility>
@@ -28,7 +26,7 @@
 
 namespace cudf {
 namespace jit {
-constexpr char percent_escape[] = "_";
+constexpr char percent_escape[] = "_";  // NOLINT
 
 inline bool is_white(char const c) { return c == ' ' || c == '\n' || c == '\r' || c == '\t'; }
 
diff --git a/cpp/src/join/join_common_utils.hpp b/cpp/src/join/join_common_utils.hpp
index 86402a0e7de..573101cefd9 100644
--- a/cpp/src/join/join_common_utils.hpp
+++ b/cpp/src/join/join_common_utils.hpp
@@ -22,7 +22,6 @@
 #include <cudf/table/row_operators.cuh>
 #include <cudf/table/table_view.hpp>
 
-#include <cuco/static_map.cuh>
 #include <cuco/static_multimap.cuh>
 #include <cuda/atomic>
 
@@ -51,11 +50,6 @@ using mixed_multimap_type =
                         cudf::detail::cuco_allocator<char>,
                         cuco::legacy::double_hashing<1, hash_type, hash_type>>;
 
-using semi_map_type = cuco::legacy::static_map<hash_value_type,
-                                               size_type,
-                                               cuda::thread_scope_device,
-                                               cudf::detail::cuco_allocator<char>>;
-
 using row_hash_legacy =
   cudf::row_hasher<cudf::hashing::detail::default_hash, cudf::nullate::DYNAMIC>;
 
diff --git a/cpp/src/join/mixed_join_common_utils.cuh b/cpp/src/join/mixed_join_common_utils.cuh
index 19701816867..4a52cfe098a 100644
--- a/cpp/src/join/mixed_join_common_utils.cuh
+++ b/cpp/src/join/mixed_join_common_utils.cuh
@@ -25,6 +25,7 @@
 #include <rmm/device_uvector.hpp>
 
 #include <cub/cub.cuh>
+#include <cuco/static_set.cuh>
 
 namespace cudf {
 namespace detail {
@@ -160,6 +161,39 @@ struct pair_expression_equality : public expression_equality<has_nulls> {
   }
 };
 
+/**
+ * @brief Equality comparator that composes two row_equality comparators.
+ */
+struct double_row_equality_comparator {
+  row_equality const equality_comparator;
+  row_equality const conditional_comparator;
+
+  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
+  {
+    using experimental::row::lhs_index_type;
+    using experimental::row::rhs_index_type;
+
+    return equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) &&
+           conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index});
+  }
+};
+
+// A CUDA Cooperative Group of 1 thread for the hash set for mixed semi.
+auto constexpr DEFAULT_MIXED_SEMI_JOIN_CG_SIZE = 1;
+
+// The hash set type used by mixed_semi_join with the build_table.
+using hash_set_type =
+  cuco::static_set<size_type,
+                   cuco::extent<size_t>,
+                   cuda::thread_scope_device,
+                   double_row_equality_comparator,
+                   cuco::linear_probing<DEFAULT_MIXED_SEMI_JOIN_CG_SIZE, row_hash>,
+                   cudf::detail::cuco_allocator<char>,
+                   cuco::storage<1>>;
+
+// The hash_set_ref_type used by mixed_semi_join kerenels for probing.
+using hash_set_ref_type = hash_set_type::ref_type<cuco::contains_tag>;
+
 }  // namespace detail
 
 }  // namespace cudf
diff --git a/cpp/src/join/mixed_join_kernels_semi.cu b/cpp/src/join/mixed_join_kernels_semi.cu
index 7459ac3e99c..bd8c80652a0 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cu
+++ b/cpp/src/join/mixed_join_kernels_semi.cu
@@ -38,38 +38,48 @@ CUDF_KERNEL void __launch_bounds__(block_size)
                   table_device_view right_table,
                   table_device_view probe,
                   table_device_view build,
-                  row_hash const hash_probe,
                   row_equality const equality_probe,
-                  cudf::detail::semi_map_type::device_view hash_table_view,
+                  hash_set_ref_type set_ref,
                   cudf::device_span<bool> left_table_keep_mask,
                   cudf::ast::detail::expression_device_view device_expression_data)
 {
+  auto constexpr cg_size = hash_set_ref_type::cg_size;
+
+  auto const tile = cg::tiled_partition<cg_size>(cg::this_thread_block());
+
   // Normally the casting of a shared memory array is used to create multiple
   // arrays of different types from the shared memory buffer, but here it is
   // used to circumvent conflicts between arrays of different types between
   // different template instantiations due to the extern specifier.
   extern __shared__ char raw_intermediate_storage[];
-  cudf::ast::detail::IntermediateDataType<has_nulls>* intermediate_storage =
+  auto intermediate_storage =
     reinterpret_cast<cudf::ast::detail::IntermediateDataType<has_nulls>*>(raw_intermediate_storage);
   auto thread_intermediate_storage =
-    &intermediate_storage[threadIdx.x * device_expression_data.num_intermediates];
+    intermediate_storage + (tile.meta_group_rank() * device_expression_data.num_intermediates);
 
-  cudf::size_type const left_num_rows  = left_table.num_rows();
-  cudf::size_type const right_num_rows = right_table.num_rows();
-  auto const outer_num_rows            = left_num_rows;
+  // Equality evaluator to use
+  auto const evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
+    left_table, right_table, device_expression_data);
 
-  cudf::size_type outer_row_index = threadIdx.x + blockIdx.x * block_size;
+  // Make sure to swap_tables here as hash_set will use probe table as the left one
+  auto constexpr swap_tables = true;
+  auto const equality        = single_expression_equality<has_nulls>{
+    evaluator, thread_intermediate_storage, swap_tables, equality_probe};
 
-  auto evaluator = cudf::ast::detail::expression_evaluator<has_nulls>(
-    left_table, right_table, device_expression_data);
+  // Create set ref with the new equality comparator
+  auto const set_ref_equality = set_ref.with_key_eq(equality);
 
-  if (outer_row_index < outer_num_rows) {
-    // Figure out the number of elements for this key.
-    auto equality = single_expression_equality<has_nulls>{
-      evaluator, thread_intermediate_storage, false, equality_probe};
+  // Total number of rows to query the set
+  auto const outer_num_rows = left_table.num_rows();
+  // Grid stride for the tile
+  auto const cg_grid_stride = cudf::detail::grid_1d::grid_stride<block_size>() / cg_size;
 
-    left_table_keep_mask[outer_row_index] =
-      hash_table_view.contains(outer_row_index, hash_probe, equality);
+  // Find all the rows in the left table that are in the hash table
+  for (auto outer_row_index = cudf::detail::grid_1d::global_thread_id<block_size>() / cg_size;
+       outer_row_index < outer_num_rows;
+       outer_row_index += cg_grid_stride) {
+    auto const result = set_ref_equality.contains(tile, outer_row_index);
+    if (tile.thread_rank() == 0) { left_table_keep_mask[outer_row_index] = result; }
   }
 }
 
@@ -78,9 +88,8 @@ void launch_mixed_join_semi(bool has_nulls,
                             table_device_view right_table,
                             table_device_view probe,
                             table_device_view build,
-                            row_hash const hash_probe,
                             row_equality const equality_probe,
-                            cudf::detail::semi_map_type::device_view hash_table_view,
+                            hash_set_ref_type set_ref,
                             cudf::device_span<bool> left_table_keep_mask,
                             cudf::ast::detail::expression_device_view device_expression_data,
                             detail::grid_1d const config,
@@ -94,9 +103,8 @@ void launch_mixed_join_semi(bool has_nulls,
         right_table,
         probe,
         build,
-        hash_probe,
         equality_probe,
-        hash_table_view,
+        set_ref,
         left_table_keep_mask,
         device_expression_data);
   } else {
@@ -106,9 +114,8 @@ void launch_mixed_join_semi(bool has_nulls,
         right_table,
         probe,
         build,
-        hash_probe,
         equality_probe,
-        hash_table_view,
+        set_ref,
         left_table_keep_mask,
         device_expression_data);
   }
diff --git a/cpp/src/join/mixed_join_kernels_semi.cuh b/cpp/src/join/mixed_join_kernels_semi.cuh
index 43714ffb36a..b08298e64e4 100644
--- a/cpp/src/join/mixed_join_kernels_semi.cuh
+++ b/cpp/src/join/mixed_join_kernels_semi.cuh
@@ -45,9 +45,8 @@ namespace detail {
  * @param[in] right_table The right table
  * @param[in] probe The table with which to probe the hash table for matches.
  * @param[in] build The table with which the hash table was built.
- * @param[in] hash_probe The hasher used for the probe table.
  * @param[in] equality_probe The equality comparator used when probing the hash table.
- * @param[in] hash_table_view The hash table built from `build`.
+ * @param[in] set_ref The hash table device view built from `build`.
  * @param[out] left_table_keep_mask The result of the join operation with "true" element indicating
  * the corresponding index from left table is present in output
  * @param[in] device_expression_data Container of device data required to evaluate the desired
@@ -58,9 +57,8 @@ void launch_mixed_join_semi(bool has_nulls,
                             table_device_view right_table,
                             table_device_view probe,
                             table_device_view build,
-                            row_hash const hash_probe,
                             row_equality const equality_probe,
-                            cudf::detail::semi_map_type::device_view hash_table_view,
+                            hash_set_ref_type set_ref,
                             cudf::device_span<bool> left_table_keep_mask,
                             cudf::ast::detail::expression_device_view device_expression_data,
                             detail::grid_1d const config,
diff --git a/cpp/src/join/mixed_join_semi.cu b/cpp/src/join/mixed_join_semi.cu
index aa4fa281159..83a55eca50f 100644
--- a/cpp/src/join/mixed_join_semi.cu
+++ b/cpp/src/join/mixed_join_semi.cu
@@ -45,45 +45,6 @@
 namespace cudf {
 namespace detail {
 
-namespace {
-/**
- * @brief Device functor to create a pair of hash value and index for a given row.
- */
-struct make_pair_function_semi {
-  __device__ __forceinline__ cudf::detail::pair_type operator()(size_type i) const noexcept
-  {
-    // The value is irrelevant since we only ever use the hash map to check for
-    // membership of a particular row index.
-    return cuco::make_pair(static_cast<hash_value_type>(i), 0);
-  }
-};
-
-/**
- * @brief Equality comparator that composes two row_equality comparators.
- */
-class double_row_equality {
- public:
-  double_row_equality(row_equality equality_comparator, row_equality conditional_comparator)
-    : _equality_comparator{equality_comparator}, _conditional_comparator{conditional_comparator}
-  {
-  }
-
-  __device__ bool operator()(size_type lhs_row_index, size_type rhs_row_index) const noexcept
-  {
-    using experimental::row::lhs_index_type;
-    using experimental::row::rhs_index_type;
-
-    return _equality_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index}) &&
-           _conditional_comparator(lhs_index_type{lhs_row_index}, rhs_index_type{rhs_row_index});
-  }
-
- private:
-  row_equality _equality_comparator;
-  row_equality _conditional_comparator;
-};
-
-}  // namespace
-
 std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   table_view const& left_equality,
   table_view const& right_equality,
@@ -95,7 +56,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   rmm::cuda_stream_view stream,
   rmm::device_async_resource_ref mr)
 {
-  CUDF_EXPECTS((join_type != join_kind::INNER_JOIN) && (join_type != join_kind::LEFT_JOIN) &&
+  CUDF_EXPECTS((join_type != join_kind::INNER_JOIN) and (join_type != join_kind::LEFT_JOIN) and
                  (join_type != join_kind::FULL_JOIN),
                "Inner, left, and full joins should use mixed_join.");
 
@@ -136,7 +97,7 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   // output column and follow the null-supporting expression evaluation code
   // path.
   auto const has_nulls = cudf::nullate::DYNAMIC{
-    cudf::has_nulls(left_equality) || cudf::has_nulls(right_equality) ||
+    cudf::has_nulls(left_equality) or cudf::has_nulls(right_equality) or
     binary_predicate.may_evaluate_null(left_conditional, right_conditional, stream)};
 
   auto const parser = ast::detail::expression_parser{
@@ -155,27 +116,20 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   auto right_conditional_view = table_device_view::create(right_conditional, stream);
 
   auto const preprocessed_build =
-    experimental::row::equality::preprocessed_table::create(build, stream);
+    cudf::experimental::row::equality::preprocessed_table::create(build, stream);
   auto const preprocessed_probe =
-    experimental::row::equality::preprocessed_table::create(probe, stream);
+    cudf::experimental::row::equality::preprocessed_table::create(probe, stream);
   auto const row_comparator =
-    cudf::experimental::row::equality::two_table_comparator{preprocessed_probe, preprocessed_build};
+    cudf::experimental::row::equality::two_table_comparator{preprocessed_build, preprocessed_probe};
   auto const equality_probe = row_comparator.equal_to<false>(has_nulls, compare_nulls);
 
-  semi_map_type hash_table{
-    compute_hash_table_size(build.num_rows()),
-    cuco::empty_key{std::numeric_limits<hash_value_type>::max()},
-    cuco::empty_value{cudf::detail::JoinNoneValue},
-    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
-    stream.value()};
-
   // Create hash table containing all keys found in right table
   // TODO: To add support for nested columns we will need to flatten in many
   // places. However, this probably isn't worth adding any time soon since we
   // won't be able to support AST conditions for those types anyway.
   auto const build_nulls    = cudf::nullate::DYNAMIC{cudf::has_nulls(build)};
   auto const row_hash_build = cudf::experimental::row::hash::row_hasher{preprocessed_build};
-  auto const hash_build     = row_hash_build.device_hasher(build_nulls);
+
   // Since we may see multiple rows that are identical in the equality tables
   // but differ in the conditional tables, the equality comparator used for
   // insertion must account for both sets of tables. An alternative solution
@@ -190,20 +144,28 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
   auto const equality_build_equality =
     row_comparator_build.equal_to<false>(build_nulls, compare_nulls);
   auto const preprocessed_build_condtional =
-    experimental::row::equality::preprocessed_table::create(right_conditional, stream);
+    cudf::experimental::row::equality::preprocessed_table::create(right_conditional, stream);
   auto const row_comparator_conditional_build =
     cudf::experimental::row::equality::two_table_comparator{preprocessed_build_condtional,
                                                             preprocessed_build_condtional};
   auto const equality_build_conditional =
     row_comparator_conditional_build.equal_to<false>(build_nulls, compare_nulls);
-  double_row_equality equality_build{equality_build_equality, equality_build_conditional};
-  make_pair_function_semi pair_func_build{};
 
-  auto iter = cudf::detail::make_counting_transform_iterator(0, pair_func_build);
+  hash_set_type row_set{
+    {compute_hash_table_size(build.num_rows())},
+    cuco::empty_key{JoinNoneValue},
+    {equality_build_equality, equality_build_conditional},
+    {row_hash_build.device_hasher(build_nulls)},
+    {},
+    {},
+    cudf::detail::cuco_allocator<char>{rmm::mr::polymorphic_allocator<char>{}, stream},
+    {stream.value()}};
+
+  auto iter = thrust::make_counting_iterator(0);
 
   // skip rows that are null here.
   if ((compare_nulls == null_equality::EQUAL) or (not nullable(build))) {
-    hash_table.insert(iter, iter + right_num_rows, hash_build, equality_build, stream.value());
+    row_set.insert_async(iter, iter + right_num_rows, stream.value());
   } else {
     thrust::counting_iterator<cudf::size_type> stencil(0);
     auto const [row_bitmask, _] =
@@ -211,18 +173,19 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
     row_is_valid pred{static_cast<bitmask_type const*>(row_bitmask.data())};
 
     // insert valid rows
-    hash_table.insert_if(
-      iter, iter + right_num_rows, stencil, pred, hash_build, equality_build, stream.value());
+    row_set.insert_if_async(iter, iter + right_num_rows, stencil, pred, stream.value());
   }
 
-  auto hash_table_view = hash_table.get_device_view();
-
-  detail::grid_1d const config(outer_num_rows, DEFAULT_JOIN_BLOCK_SIZE);
-  auto const shmem_size_per_block = parser.shmem_per_thread * config.num_threads_per_block;
+  detail::grid_1d const config(outer_num_rows * hash_set_type::cg_size, DEFAULT_JOIN_BLOCK_SIZE);
+  auto const shmem_size_per_block =
+    parser.shmem_per_thread *
+    cuco::detail::int_div_ceil(config.num_threads_per_block, hash_set_type::cg_size);
 
   auto const row_hash   = cudf::experimental::row::hash::row_hasher{preprocessed_probe};
   auto const hash_probe = row_hash.device_hasher(has_nulls);
 
+  hash_set_ref_type const row_set_ref = row_set.ref(cuco::contains).with_hash_function(hash_probe);
+
   // Vector used to indicate indices from left/probe table which are present in output
   auto left_table_keep_mask = rmm::device_uvector<bool>(probe.num_rows(), stream);
 
@@ -231,9 +194,8 @@ std::unique_ptr<rmm::device_uvector<size_type>> mixed_join_semi(
                          *right_conditional_view,
                          *probe_view,
                          *build_view,
-                         hash_probe,
                          equality_probe,
-                         hash_table_view,
+                         row_set_ref,
                          cudf::device_span<bool>(left_table_keep_mask),
                          parser.device_expression_data,
                          config,
diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp
index 7c4c89bd3fb..51c6e765edd 100644
--- a/cpp/src/strings/regex/regcomp.cpp
+++ b/cpp/src/strings/regex/regcomp.cpp
@@ -35,7 +35,7 @@ namespace strings {
 namespace detail {
 namespace {
 // Bitmask of all operators
-#define OPERATOR_MASK 0200
+enum { OPERATOR_MASK = 0200 };
 enum OperatorType : int32_t {
   START        = 0200,  // Start, used for marker on stack
   LBRA_NC      = 0203,  // non-capturing group
@@ -50,7 +50,7 @@ enum OperatorType : int32_t {
   COUNTED_LAZY = 0215,
   NOP          = 0302,  // No operation, internal use only
 };
-#define ITEM_MASK 0300
+enum { ITEM_MASK = 0300 };
 
 static reclass cclass_w(CCLASS_W);   // \w
 static reclass cclass_s(CCLASS_S);   // \s
diff --git a/cpp/src/strings/search/findall.cu b/cpp/src/strings/search/findall.cu
index 067a513af96..d8c1b50a94b 100644
--- a/cpp/src/strings/search/findall.cu
+++ b/cpp/src/strings/search/findall.cu
@@ -23,6 +23,7 @@
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/offsets_iterator_factory.cuh>
+#include <cudf/lists/detail/lists_column_factories.hpp>
 #include <cudf/strings/detail/strings_column_factories.cuh>
 #include <cudf/strings/findall.hpp>
 #include <cudf/strings/string_view.cuh>
@@ -97,8 +98,11 @@ std::unique_ptr<column> findall(strings_column_view const& input,
                                 rmm::cuda_stream_view stream,
                                 rmm::device_async_resource_ref mr)
 {
-  auto const strings_count = input.size();
-  auto const d_strings     = column_device_view::create(input.parent(), stream);
+  if (input.is_empty()) {
+    return cudf::lists::detail::make_empty_lists_column(input.parent().type(), stream, mr);
+  }
+
+  auto const d_strings = column_device_view::create(input.parent(), stream);
 
   // create device object from regex_program
   auto d_prog = regex_device_builder::create_prog_device(prog, stream);
@@ -113,7 +117,7 @@ std::unique_ptr<column> findall(strings_column_view const& input,
   auto strings_output = findall_util(*d_strings, *d_prog, total_matches, d_offsets, stream, mr);
 
   // Build the lists column from the offsets and the strings
-  return make_lists_column(strings_count,
+  return make_lists_column(input.size(),
                            std::move(offsets),
                            std::move(strings_output),
                            input.null_count(),
diff --git a/cpp/src/utilities/stream_pool.cpp b/cpp/src/utilities/stream_pool.cpp
index 9824c472b20..8c29182bfb5 100644
--- a/cpp/src/utilities/stream_pool.cpp
+++ b/cpp/src/utilities/stream_pool.cpp
@@ -82,7 +82,7 @@ class rmm_cuda_stream_pool : public cuda_stream_pool {
     return streams;
   }
 
-  std::size_t get_stream_pool_size() const override { return STREAM_POOL_SIZE; }
+  [[nodiscard]] std::size_t get_stream_pool_size() const override { return STREAM_POOL_SIZE; }
 };
 
 /**
diff --git a/cpp/tests/ast/transform_tests.cpp b/cpp/tests/ast/transform_tests.cpp
index 6b350c137d0..a4bde50a21e 100644
--- a/cpp/tests/ast/transform_tests.cpp
+++ b/cpp/tests/ast/transform_tests.cpp
@@ -378,7 +378,7 @@ TEST_F(TransformTest, DeeplyNestedArithmeticLogicalExpression)
     auto expressions = std::list<cudf::ast::operation>();
 
     auto op = arithmetic_operator;
-    expressions.push_back(cudf::ast::operation(op, col_ref, col_ref));
+    expressions.emplace_back(op, col_ref, col_ref);
 
     for (int64_t i = 0; i < depth_level - 1; i++) {
       if (i == depth_level - 2) {
@@ -387,9 +387,9 @@ TEST_F(TransformTest, DeeplyNestedArithmeticLogicalExpression)
         op = arithmetic_operator;
       }
       if (nested_left_tree) {
-        expressions.push_back(cudf::ast::operation(op, expressions.back(), col_ref));
+        expressions.emplace_back(op, expressions.back(), col_ref);
       } else {
-        expressions.push_back(cudf::ast::operation(op, col_ref, expressions.back()));
+        expressions.emplace_back(op, col_ref, expressions.back());
       }
     }
     return expressions;
diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h
index c900c4c558c..d36b48d666a 100644
--- a/cpp/tests/binaryop/util/operation.h
+++ b/cpp/tests/binaryop/util/operation.h
@@ -48,7 +48,7 @@ struct Add {
                              void>* = nullptr>
   OutT operator()(TypeLhs lhs, TypeRhs rhs) const
   {
-    using TypeCommon = typename std::common_type<OutT, TypeLhs, TypeRhs>::type;
+    using TypeCommon = std::common_type_t<OutT, TypeLhs, TypeRhs>;
     return static_cast<OutT>(static_cast<TypeCommon>(lhs) + static_cast<TypeCommon>(rhs));
   }
 };
@@ -72,7 +72,7 @@ struct Sub {
                              void>* = nullptr>
   OutT operator()(TypeLhs lhs, TypeRhs rhs) const
   {
-    using TypeCommon = typename std::common_type<OutT, TypeLhs, TypeRhs>::type;
+    using TypeCommon = std::common_type_t<OutT, TypeLhs, TypeRhs>;
     return static_cast<OutT>(static_cast<TypeCommon>(lhs) - static_cast<TypeCommon>(rhs));
   }
 };
@@ -83,7 +83,7 @@ struct Mul {
             std::enable_if_t<!cudf::is_duration_t<OutT>::value, void>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs) const
   {
-    using TypeCommon = typename std::common_type<TypeOut, TypeLhs, TypeRhs>::type;
+    using TypeCommon = std::common_type_t<TypeOut, TypeLhs, TypeRhs>;
     return static_cast<TypeOut>(static_cast<TypeCommon>(lhs) * static_cast<TypeCommon>(rhs));
   }
 
@@ -112,7 +112,7 @@ struct Div {
             std::enable_if_t<!cudf::is_duration_t<LhsT>::value, void>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs)
   {
-    using TypeCommon = typename std::common_type<TypeOut, TypeLhs, TypeRhs>::type;
+    using TypeCommon = std::common_type_t<TypeOut, TypeLhs, TypeRhs>;
     return static_cast<TypeOut>(static_cast<TypeCommon>(lhs) / static_cast<TypeCommon>(rhs));
   }
 
@@ -191,33 +191,31 @@ struct FloorDiv {
 
 template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct Mod {
-  template <typename OutT = TypeOut,
-            typename LhsT = TypeLhs,
-            typename RhsT = TypeRhs,
-            std::enable_if_t<
-              (std::is_integral_v<typename std::common_type<OutT, LhsT, RhsT>::type>)>* = nullptr>
+  template <typename OutT                                                                 = TypeOut,
+            typename LhsT                                                                 = TypeLhs,
+            typename RhsT                                                                 = TypeRhs,
+            std::enable_if_t<(std::is_integral_v<std::common_type_t<OutT, LhsT, RhsT>>)>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs)
   {
-    using TypeCommon = typename std::common_type<TypeOut, TypeLhs, TypeRhs>::type;
+    using TypeCommon = std::common_type_t<TypeOut, TypeLhs, TypeRhs>;
     return static_cast<TypeOut>(static_cast<TypeCommon>(lhs) % static_cast<TypeCommon>(rhs));
   }
 
-  template <typename OutT                                                                 = TypeOut,
-            typename LhsT                                                                 = TypeLhs,
-            typename RhsT                                                                 = TypeRhs,
-            std::enable_if_t<(
-              std::is_same_v<typename std::common_type<OutT, LhsT, RhsT>::type, float>)>* = nullptr>
+  template <
+    typename OutT                                                                    = TypeOut,
+    typename LhsT                                                                    = TypeLhs,
+    typename RhsT                                                                    = TypeRhs,
+    std::enable_if_t<(std::is_same_v<std::common_type_t<OutT, LhsT, RhsT>, float>)>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs)
   {
     return static_cast<TypeOut>(fmod(static_cast<float>(lhs), static_cast<float>(rhs)));
   }
 
   template <
-    typename OutT = TypeOut,
-    typename LhsT = TypeLhs,
-    typename RhsT = TypeRhs,
-    std::enable_if_t<(std::is_same_v<typename std::common_type<OutT, LhsT, RhsT>::type, double>)>* =
-      nullptr>
+    typename OutT                                                                     = TypeOut,
+    typename LhsT                                                                     = TypeLhs,
+    typename RhsT                                                                     = TypeRhs,
+    std::enable_if_t<(std::is_same_v<std::common_type_t<OutT, LhsT, RhsT>, double>)>* = nullptr>
   TypeOut operator()(TypeLhs lhs, TypeRhs rhs)
   {
     return static_cast<TypeOut>(fmod(static_cast<double>(lhs), static_cast<double>(rhs)));
@@ -326,7 +324,7 @@ struct LogBase {
 
 template <typename TypeOut, typename TypeLhs, typename TypeRhs>
 struct PMod {
-  using CommonArgsT = typename std::common_type<TypeLhs, TypeRhs>::type;
+  using CommonArgsT = std::common_type_t<TypeLhs, TypeRhs>;
 
   TypeOut operator()(TypeLhs x, TypeRhs y) const
   {
@@ -351,8 +349,8 @@ struct PyMod {
   TypeOut operator()(TypeLhs x, TypeRhs y) const
   {
     if constexpr (std::is_floating_point_v<TypeLhs> or std::is_floating_point_v<TypeRhs>) {
-      double x1 = static_cast<double>(x);
-      double y1 = static_cast<double>(y);
+      auto x1 = static_cast<double>(x);
+      auto y1 = static_cast<double>(y);
       return fmod(fmod(x1, y1) + y1, y1);
     } else {
       return ((x % y) + y) % y;
diff --git a/cpp/tests/copying/copy_tests.cpp b/cpp/tests/copying/copy_tests.cpp
index 7c8729b6a77..4124f749012 100644
--- a/cpp/tests/copying/copy_tests.cpp
+++ b/cpp/tests/copying/copy_tests.cpp
@@ -73,44 +73,45 @@ TYPED_TEST(CopyTest, CopyIfElseTestLong)
   using T = TypeParam;
 
   // make sure we span at least 2 warps
-  int num_els = 64;
-
-  bool mask[] = {true, false, true, false, true, true, true,  true,  true,  true,  true, true, true,
-                 true, true,  true, true,  true, true, false, false, false, false, true, true, true,
-                 true, true,  true, true,  true, true, false, false, false, false, true, true, true,
-                 true, true,  true, true,  true, true, true,  true,  true,  true,  true, true, true,
-                 true, true,  true, true,  true, true, true,  true,  true,  true,  true, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
-
-  bool lhs_v[] = {true, true, true, true, false, false, true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true};
-  wrapper<T, int32_t> lhs_w({5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-                             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
-                             5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
-                            lhs_v);
-
-  bool rhs_v[] = {true, true, true, true, true, true, false, false, true, true, true, true, true,
-                  true, true, true, true, true, true, true,  true,  true, true, true, true, true,
-                  true, true, true, true, true, true, true,  true,  true, true, true, true, true,
-                  true, true, true, true, true, true, true,  true,  true, true, true, true, true,
-                  true, true, true, true, true, true, true,  true,  true, true, true, true};
-  wrapper<T, int32_t> rhs_w({6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-                             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
-                             6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6},
-                            rhs_v);
-
-  bool exp_v[] = {true, true, true, true, false, false, true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true, true,
-                  true, true, true, true, true,  true,  true, true, true, true, true, true};
-  wrapper<T, int32_t> expected_w({5, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
-                                  6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5,
-                                  5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
-                                 exp_v);
+  constexpr int num_els = 64;
+
+  std::array<bool, num_els> mask{
+    true, false, true, false, true, true, true,  true,  true,  true,  true, true, true,
+    true, true,  true, true,  true, true, false, false, false, false, true, true, true,
+    true, true,  true, true,  true, true, false, false, false, false, true, true, true,
+    true, true,  true, true,  true, true, true,  true,  true,  true,  true, true, true,
+    true, true,  true, true,  true, true, true,  true,  true,  true,  true, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
+
+  wrapper<T, int32_t> lhs_w(
+    {5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5,
+     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
+    {true, true, true, true, false, false, true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true});
+
+  wrapper<T, int32_t> rhs_w(
+    {6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6,
+     6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6, 6},
+    {true, true, true, true, true, true, false, false, true, true, true, true, true,
+     true, true, true, true, true, true, true,  true,  true, true, true, true, true,
+     true, true, true, true, true, true, true,  true,  true, true, true, true, true,
+     true, true, true, true, true, true, true,  true,  true, true, true, true, true,
+     true, true, true, true, true, true, true,  true,  true, true, true, true});
+
+  wrapper<T, int32_t> expected_w(
+    {5, 6, 5, 6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6,
+     6, 5, 5, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 5, 5, 5, 5, 5, 5, 5, 5,
+     5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5},
+    {true, true, true, true, false, false, true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true, true,
+     true, true, true, true, true,  true,  true, true, true, true, true, true});
 
   auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w);
@@ -318,19 +319,17 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestScalarColumn)
 {
   using T = TypeParam;
 
-  int num_els = 4;
-
-  bool mask[] = {true, false, false, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
+  std::array mask{true, false, false, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
   cudf::numeric_scalar<T> lhs_w(5);
 
   auto const rhs = cudf::test::make_type_param_vector<T>({6, 6, 6, 6});
-  bool rhs_v[]   = {true, false, true, true};
-  wrapper<T> rhs_w(rhs.begin(), rhs.end(), rhs_v);
+  std::array rhs_v{true, false, true, true};
+  wrapper<T> rhs_w(rhs.begin(), rhs.end(), rhs_v.begin());
 
   auto const expected = cudf::test::make_type_param_vector<T>({5, 6, 6, 5});
-  wrapper<T> expected_w(expected.begin(), expected.end(), rhs_v);
+  wrapper<T> expected_w(expected.begin(), expected.end(), rhs_v.begin());
 
   auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w);
@@ -340,20 +339,18 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestColumnScalar)
 {
   using T = TypeParam;
 
-  int num_els = 4;
-
-  bool mask[]   = {true, false, false, true};
-  bool mask_v[] = {true, true, true, false};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els, mask_v);
+  std::array mask{true, false, false, true};
+  std::array mask_v{true, true, true, false};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end(), mask_v.begin());
 
   auto const lhs = cudf::test::make_type_param_vector<T>({5, 5, 5, 5});
-  bool lhs_v[]   = {false, true, true, true};
-  wrapper<T> lhs_w(lhs.begin(), lhs.end(), lhs_v);
+  std::array lhs_v{false, true, true, true};
+  wrapper<T> lhs_w(lhs.begin(), lhs.end(), lhs_v.begin());
 
   cudf::numeric_scalar<T> rhs_w(6);
 
   auto const expected = cudf::test::make_type_param_vector<T>({5, 6, 6, 6});
-  wrapper<T> expected_w(expected.begin(), expected.end(), lhs_v);
+  wrapper<T> expected_w(expected.begin(), expected.end(), lhs_v.begin());
 
   auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w);
@@ -363,16 +360,14 @@ TYPED_TEST(CopyTestNumeric, CopyIfElseTestScalarScalar)
 {
   using T = TypeParam;
 
-  int num_els = 4;
-
-  bool mask[] = {true, false, false, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
+  std::array mask{true, false, false, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
   cudf::numeric_scalar<T> lhs_w(5);
   cudf::numeric_scalar<T> rhs_w(6, false);
 
   auto const expected = cudf::test::make_type_param_vector<T>({5, 6, 6, 5});
-  wrapper<T> expected_w(expected.begin(), expected.end(), mask);
+  wrapper<T> expected_w(expected.begin(), expected.end(), mask.begin());
 
   auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w);
@@ -405,17 +400,15 @@ TYPED_TEST(CopyTestChrono, CopyIfElseTestScalarColumn)
 {
   using T = TypeParam;
 
-  int num_els = 4;
-
-  bool mask[] = {true, false, false, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
+  std::array mask{true, false, false, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
   auto lhs_w = create_chrono_scalar<T>{}(cudf::test::make_type_param_scalar<T>(5), true);
 
-  bool rhs_v[] = {true, false, true, true};
-  wrapper<T, int32_t> rhs_w({6, 6, 6, 6}, rhs_v);
+  std::array rhs_v{true, false, true, true};
+  wrapper<T, int32_t> rhs_w({6, 6, 6, 6}, rhs_v.begin());
 
-  wrapper<T, int32_t> expected_w({5, 6, 6, 5}, rhs_v);
+  wrapper<T, int32_t> expected_w({5, 6, 6, 5}, rhs_v.begin());
 
   auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w);
@@ -425,17 +418,15 @@ TYPED_TEST(CopyTestChrono, CopyIfElseTestColumnScalar)
 {
   using T = TypeParam;
 
-  int num_els = 4;
-
-  bool mask[] = {true, false, false, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
+  std::array mask{true, false, false, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
-  bool lhs_v[] = {false, true, true, true};
-  wrapper<T, int32_t> lhs_w({5, 5, 5, 5}, lhs_v);
+  std::array lhs_v{false, true, true, true};
+  wrapper<T, int32_t> lhs_w({5, 5, 5, 5}, lhs_v.begin());
 
   auto rhs_w = create_chrono_scalar<T>{}(cudf::test::make_type_param_scalar<T>(6), true);
 
-  wrapper<T, int32_t> expected_w({5, 6, 6, 5}, lhs_v);
+  wrapper<T, int32_t> expected_w({5, 6, 6, 5}, lhs_v.begin());
 
   auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w);
@@ -445,15 +436,13 @@ TYPED_TEST(CopyTestChrono, CopyIfElseTestScalarScalar)
 {
   using T = TypeParam;
 
-  int num_els = 4;
-
-  bool mask[] = {true, false, false, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + num_els);
+  std::array mask{true, false, false, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
   auto lhs_w = create_chrono_scalar<T>{}(cudf::test::make_type_param_scalar<T>(5), true);
   auto rhs_w = create_chrono_scalar<T>{}(cudf::test::make_type_param_scalar<T>(6), false);
 
-  wrapper<T, int32_t> expected_w({5, 6, 6, 5}, mask);
+  wrapper<T, int32_t> expected_w({5, 6, 6, 5}, mask.begin());
 
   auto out = cudf::copy_if_else(lhs_w, rhs_w, mask_w);
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(out->view(), expected_w);
@@ -483,9 +472,9 @@ TEST_F(StringsCopyIfElseTest, CopyIfElse)
   std::vector<char const*> h_strings2{"zz", "", "yyy", "w", "ééé", "ooo"};
   cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[]   = {true, true, false, true, false, true};
-  bool mask_v[] = {true, true, true, true, true, false};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6, mask_v);
+  std::array mask{true, true, false, true, false, true};
+  std::array mask_v{true, true, true, true, true, false};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end(), mask_v.begin());
 
   auto results = cudf::copy_if_else(strings1, strings2, mask_w);
 
@@ -510,9 +499,9 @@ TEST_F(StringsCopyIfElseTest, CopyIfElseScalarColumn)
   std::vector<char const*> h_strings2{"zz", "", "yyy", "w", "ééé", "ooo"};
   cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[]   = {true, false, true, false, true, false};
-  bool mask_v[] = {true, true, true, true, true, false};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6, mask_v);
+  std::array mask{true, false, true, false, true, false};
+  std::array mask_v{true, true, true, true, true, false};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end(), mask_v.begin());
 
   auto results = cudf::copy_if_else(strings1, strings2, mask_w);
 
@@ -538,8 +527,8 @@ TEST_F(StringsCopyIfElseTest, CopyIfElseColumnScalar)
   std::vector<char const*> h_strings2{"zz", "", "yyy", "w", "ééé", "ooo"};
   cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[] = {false, true, true, true, false, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6);
+  std::array mask{false, true, true, true, false, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
   auto results = cudf::copy_if_else(strings2, strings1, mask_w);
 
@@ -565,9 +554,8 @@ TEST_F(StringsCopyIfElseTest, CopyIfElseScalarScalar)
   std::vector<char const*> h_string2{"aaa"};
   cudf::string_scalar string2{h_string2[0], false};
 
-  constexpr cudf::size_type mask_size = 6;
-  bool mask[]                         = {true, false, true, false, true, false};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + mask_size);
+  std::array mask{true, false, true, false, true, false};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
   auto results = cudf::copy_if_else(string1, string2, mask_w);
 
@@ -652,9 +640,9 @@ TEST_F(DictionaryCopyIfElseTest, ColumnColumn)
   cudf::test::dictionary_column_wrapper<std::string> input2(
     h_strings2.begin(), h_strings2.end(), valids);
 
-  bool mask[]   = {true, true, false, true, false, true};
-  bool mask_v[] = {true, true, true, true, true, false};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6, mask_v);
+  std::array mask{true, true, false, true, false, true};
+  std::array mask_v{true, true, true, true, true, false};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end(), mask_v.begin());
 
   auto results = cudf::copy_if_else(input1, input2, mask_w);
   auto decoded = cudf::dictionary::decode(cudf::dictionary_column_view(results->view()));
@@ -679,8 +667,8 @@ TEST_F(DictionaryCopyIfElseTest, ColumnScalar)
   cudf::test::dictionary_column_wrapper<std::string> input2(
     h_strings.begin(), h_strings.end(), valids);
 
-  bool mask[] = {false, true, true, true, false, true};
-  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask, mask + 6);
+  std::array mask{false, true, true, true, false, true};
+  cudf::test::fixed_width_column_wrapper<bool> mask_w(mask.begin(), mask.end());
 
   auto results = cudf::copy_if_else(input2, input1, mask_w);
   auto decoded = cudf::dictionary::decode(cudf::dictionary_column_view(results->view()));
diff --git a/cpp/tests/filling/sequence_tests.cpp b/cpp/tests/filling/sequence_tests.cpp
index 5651a26f192..0783b4e5bbb 100644
--- a/cpp/tests/filling/sequence_tests.cpp
+++ b/cpp/tests/filling/sequence_tests.cpp
@@ -41,8 +41,7 @@ TYPED_TEST(SequenceTypedTestFixture, Incrementing)
 
   cudf::size_type num_els = 10;
 
-  T expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-  cudf::test::fixed_width_column_wrapper<T> expected_w(expected, expected + num_els);
+  cudf::test::fixed_width_column_wrapper<T> expected_w({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
 
   auto result = cudf::sequence(num_els, init, step);
 
@@ -58,8 +57,8 @@ TYPED_TEST(SequenceTypedTestFixture, Decrementing)
 
   cudf::size_type num_els = 10;
 
-  T expected[] = {0, -5, -10, -15, -20, -25, -30, -35, -40, -45};
-  cudf::test::fixed_width_column_wrapper<T> expected_w(expected, expected + num_els);
+  cudf::test::fixed_width_column_wrapper<T> expected_w(
+    {0, -5, -10, -15, -20, -25, -30, -35, -40, -45});
 
   auto result = cudf::sequence(num_els, init, step);
 
@@ -75,8 +74,7 @@ TYPED_TEST(SequenceTypedTestFixture, EmptyOutput)
 
   cudf::size_type num_els = 0;
 
-  T expected[] = {};
-  cudf::test::fixed_width_column_wrapper<T> expected_w(expected, expected + num_els);
+  cudf::test::fixed_width_column_wrapper<T> expected_w({});
 
   auto result = cudf::sequence(num_els, init, step);
 
@@ -121,8 +119,7 @@ TYPED_TEST(SequenceTypedTestFixture, DefaultStep)
 
   cudf::size_type num_els = 10;
 
-  T expected[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
-  cudf::test::fixed_width_column_wrapper<T> expected_w(expected, expected + num_els);
+  cudf::test::fixed_width_column_wrapper<T> expected_w({0, 1, 2, 3, 4, 5, 6, 7, 8, 9});
 
   auto result = cudf::sequence(num_els, init);
 
diff --git a/cpp/tests/groupby/collect_list_tests.cpp b/cpp/tests/groupby/collect_list_tests.cpp
index 749f4013013..a79b6a32916 100644
--- a/cpp/tests/groupby/collect_list_tests.cpp
+++ b/cpp/tests/groupby/collect_list_tests.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -127,8 +127,9 @@ TYPED_TEST(groupby_collect_list_test, CollectListsWithNullExclusion)
   using LCW = cudf::test::lists_column_wrapper<V, int32_t>;
 
   cudf::test::fixed_width_column_wrapper<K, int32_t> keys{1, 1, 2, 2, 3, 3, 4, 4};
-  bool const validity_mask[] = {true, false, false, true, true, true, false, false};
-  LCW values{{{1, 2}, {3, 4}, {5, 6, 7}, LCW{}, {9, 10}, {11}, {20, 30, 40}, LCW{}}, validity_mask};
+  std::array const validity_mask{true, false, false, true, true, true, false, false};
+  LCW values{{{1, 2}, {3, 4}, {5, 6, 7}, LCW{}, {9, 10}, {11}, {20, 30, 40}, LCW{}},
+             validity_mask.data()};
 
   cudf::test::fixed_width_column_wrapper<K, int32_t> expect_keys{1, 2, 3, 4};
 
diff --git a/cpp/tests/groupby/mean_tests.cpp b/cpp/tests/groupby/mean_tests.cpp
index 0cb5ee30a8b..e9c72293649 100644
--- a/cpp/tests/groupby/mean_tests.cpp
+++ b/cpp/tests/groupby/mean_tests.cpp
@@ -49,7 +49,7 @@ TYPED_TEST(groupby_mean_test, basic)
 {
   using V  = TypeParam;
   using R  = cudf::detail::target_type_t<V, cudf::aggregation::MEAN>;
-  using RT = typename std::conditional<cudf::is_duration<R>(), int, double>::type;
+  using RT = std::conditional_t<cudf::is_duration<R>(), int, double>;
 
   cudf::test::fixed_width_column_wrapper<K> keys{1, 2, 3, 1, 2, 2, 1, 3, 3, 2};
   cudf::test::fixed_width_column_wrapper<V> vals{0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
@@ -114,7 +114,7 @@ TYPED_TEST(groupby_mean_test, null_keys_and_values)
 {
   using V  = TypeParam;
   using R  = cudf::detail::target_type_t<V, cudf::aggregation::MEAN>;
-  using RT = typename std::conditional<cudf::is_duration<R>(), int, double>::type;
+  using RT = std::conditional_t<cudf::is_duration<R>(), int, double>;
 
   cudf::test::fixed_width_column_wrapper<K> keys(
     {1, 2, 3, 1, 2, 2, 1, 3, 3, 2, 4},
diff --git a/cpp/tests/interop/dlpack_test.cpp b/cpp/tests/interop/dlpack_test.cpp
index 330f07ac8e2..ef4b9dd9b8a 100644
--- a/cpp/tests/interop/dlpack_test.cpp
+++ b/cpp/tests/interop/dlpack_test.cpp
@@ -225,8 +225,8 @@ TEST_F(DLPackUntypedTests, UnsupportedBroadcast1DTensorFromDlpack)
   constexpr int ndim = 1;
   // Broadcasted (stride-0) 1D tensor
   auto const data       = cudf::test::make_type_param_vector<T>({1});
-  int64_t shape[ndim]   = {5};
-  int64_t strides[ndim] = {0};
+  int64_t shape[ndim]   = {5};  // NOLINT
+  int64_t strides[ndim] = {0};  // NOLINT
 
   DLManagedTensor tensor{};
   tensor.dl_tensor.device.device_type = kDLCPU;
@@ -248,8 +248,8 @@ TEST_F(DLPackUntypedTests, UnsupportedStrided1DTensorFromDlpack)
   constexpr int ndim = 1;
   // Strided 1D tensor
   auto const data       = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
-  int64_t shape[ndim]   = {2};
-  int64_t strides[ndim] = {2};
+  int64_t shape[ndim]   = {2};  // NOLINT
+  int64_t strides[ndim] = {2};  // NOLINT
 
   DLManagedTensor tensor{};
   tensor.dl_tensor.device.device_type = kDLCPU;
@@ -271,7 +271,7 @@ TEST_F(DLPackUntypedTests, UnsupportedImplicitRowMajor2DTensorFromDlpack)
   constexpr int ndim = 2;
   // Row major 2D tensor
   auto const data     = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
-  int64_t shape[ndim] = {2, 2};
+  int64_t shape[ndim] = {2, 2};  // NOLINT
 
   DLManagedTensor tensor{};
   tensor.dl_tensor.device.device_type = kDLCPU;
@@ -293,8 +293,8 @@ TEST_F(DLPackUntypedTests, UnsupportedExplicitRowMajor2DTensorFromDlpack)
   constexpr int ndim = 2;
   // Row major 2D tensor with explicit strides
   auto const data       = cudf::test::make_type_param_vector<T>({1, 2, 3, 4});
-  int64_t shape[ndim]   = {2, 2};
-  int64_t strides[ndim] = {2, 1};
+  int64_t shape[ndim]   = {2, 2};  // NOLINT
+  int64_t strides[ndim] = {2, 1};  // NOLINT
 
   DLManagedTensor tensor{};
   tensor.dl_tensor.device.device_type = kDLCPU;
@@ -316,8 +316,8 @@ TEST_F(DLPackUntypedTests, UnsupportedStridedColMajor2DTensorFromDlpack)
   constexpr int ndim = 2;
   // Column major, but strided in fastest dimension
   auto const data       = cudf::test::make_type_param_vector<T>({1, 2, 3, 4, 5, 6, 7, 8});
-  int64_t shape[ndim]   = {2, 2};
-  int64_t strides[ndim] = {2, 4};
+  int64_t shape[ndim]   = {2, 2};  // NOLINT
+  int64_t strides[ndim] = {2, 4};  // NOLINT
 
   DLManagedTensor tensor{};
   tensor.dl_tensor.device.device_type = kDLCPU;
@@ -465,8 +465,8 @@ TYPED_TEST(DLPackNumericTests, FromDlpackCpu)
   using T         = TypeParam;
   auto const data = cudf::test::make_type_param_vector<T>({0, 1, 2, 3, 4, 0, 5, 6, 7, 8, 0});
   uint64_t const offset{sizeof(T)};
-  int64_t shape[2]   = {4, 2};
-  int64_t strides[2] = {1, 5};
+  int64_t shape[2]   = {4, 2};  // NOLINT
+  int64_t strides[2] = {1, 5};  // NOLINT
 
   DLManagedTensor tensor{};
   tensor.dl_tensor.device.device_type = kDLCPU;
diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp
index 48bc982d0e3..49ad0c408dc 100644
--- a/cpp/tests/io/json/json_test.cpp
+++ b/cpp/tests/io/json/json_test.cpp
@@ -858,8 +858,7 @@ TEST_P(JsonReaderRecordTest, JsonLinesObjects)
 
 TEST_P(JsonReaderRecordTest, JsonLinesObjectsStrings)
 {
-  auto const test_opt    = GetParam();
-  auto test_json_objects = [test_opt](std::string const& data) {
+  auto test_json_objects = [](std::string const& data) {
     cudf::io::json_reader_options in_options =
       cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
         .lines(true);
@@ -2575,6 +2574,30 @@ TEST_F(JsonReaderTest, ViableDelimiter)
   EXPECT_THROW(json_parser_options.set_delimiter('\t'), std::invalid_argument);
 }
 
+TEST_F(JsonReaderTest, ViableDelimiterNewlineWS)
+{
+  // Test input
+  std::string input = R"({"a":
+  100})";
+
+  cudf::io::json_reader_options json_parser_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{input.c_str(), input.size()})
+      .lines(true)
+      .delimiter('\0');
+
+  auto result = cudf::io::read_json(json_parser_options);
+  EXPECT_EQ(result.tbl->num_columns(), 1);
+  EXPECT_EQ(result.tbl->num_rows(), 1);
+
+  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::INT64);
+
+  EXPECT_EQ(result.metadata.schema_info[0].name, "a");
+
+  auto col1_iterator = thrust::constant_iterator<int64_t>(100);
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->get_column(0),
+                                 int64_wrapper(col1_iterator, col1_iterator + 1));
+}
+
 // Test case for dtype prune:
 // all paths, only one.
 // one present, another not present, nothing present
@@ -2856,6 +2879,59 @@ TEST_F(JsonReaderTest, JSONMixedTypeChildren)
   }
 }
 
+TEST_F(JsonReaderTest, MixedTypesWithSchema)
+{
+  std::string data = "{\"data\": {\"A\": 0, \"B\": 1}}\n{\"data\": [1,0]}\n";
+
+  std::map<std::string, cudf::io::schema_element> data_types;
+  std::map<std::string, cudf::io::schema_element> child_types;
+  child_types.insert(
+    std::pair{"element", cudf::io::schema_element{cudf::data_type{cudf::type_id::STRING, 0}, {}}});
+  data_types.insert(std::pair{
+    "data", cudf::io::schema_element{cudf::data_type{cudf::type_id::LIST, 0}, child_types}});
+
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
+      .dtypes(data_types)
+      .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL)
+      .normalize_single_quotes(true)
+      .normalize_whitespace(true)
+      .mixed_types_as_string(true)
+      .experimental(true)
+      .keep_quotes(true)
+      .lines(true);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
+  EXPECT_EQ(result.tbl->num_columns(), 1);
+  EXPECT_EQ(result.tbl->num_rows(), 2);
+  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::LIST);
+  EXPECT_EQ(result.tbl->get_column(0).child(1).type().id(), cudf::type_id::STRING);
+}
+
+TEST_F(JsonReaderTest, UnicodeFieldname)
+{
+  // unicode at nested and leaf levels
+  std::string data = R"({"data": {"a": 0, "b	c": 1}}
+  {"data": {"\u0061": 2, "\u0062\tc": 3}}
+  {"d\u0061ta": {"a": 4}})";
+
+  cudf::io::json_reader_options in_options =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{data.data(), data.size()})
+      .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL)
+      .experimental(true)
+      .lines(true);
+  cudf::io::table_with_metadata result = cudf::io::read_json(in_options);
+  EXPECT_EQ(result.tbl->num_columns(), 1);
+  EXPECT_EQ(result.tbl->num_rows(), 3);
+  EXPECT_EQ(result.tbl->get_column(0).type().id(), cudf::type_id::STRUCT);
+  EXPECT_EQ(result.tbl->get_column(0).num_children(), 2);
+  EXPECT_EQ(result.tbl->get_column(0).child(0).type().id(), cudf::type_id::INT64);
+  EXPECT_EQ(result.tbl->get_column(0).child(1).type().id(), cudf::type_id::INT64);
+  EXPECT_EQ(result.metadata.schema_info.at(0).name, "data");
+  EXPECT_EQ(result.metadata.schema_info.at(0).children.at(0).name, "a");
+  EXPECT_EQ(result.metadata.schema_info.at(0).children.at(1).name, "b\tc");
+  EXPECT_EQ(result.metadata.schema_info.at(0).children.size(), 2);
+}
+
 TEST_F(JsonReaderTest, JsonDtypeSchema)
 {
   std::string data = R"(
diff --git a/cpp/tests/io/json/json_tree.cpp b/cpp/tests/io/json/json_tree.cpp
index 875cc467b6a..15682c6ae6b 100644
--- a/cpp/tests/io/json/json_tree.cpp
+++ b/cpp/tests/io/json/json_tree.cpp
@@ -889,6 +889,7 @@ TEST_P(JsonTreeTraversalTest, CPUvsGPUTraversal)
                                                      gpu_tree,
                                                      is_array_of_arrays,
                                                      json_lines,
+                                                     false,
                                                      stream,
                                                      cudf::get_current_device_resource_ref());
 #if LIBCUDF_JSON_DEBUG_DUMP
diff --git a/cpp/tests/io/json/json_tree_csr.cu b/cpp/tests/io/json/json_tree_csr.cu
index a336b327732..f988ae24b38 100644
--- a/cpp/tests/io/json/json_tree_csr.cu
+++ b/cpp/tests/io/json/json_tree_csr.cu
@@ -168,6 +168,7 @@ void run_test(std::string const& input, bool enable_lines = true)
                                                      gpu_tree,
                                                      is_array_of_arrays,
                                                      options.is_enabled_lines(),
+                                                     false,
                                                      stream,
                                                      rmm::mr::get_current_device_resource());
   auto& gpu_col_id      = std::get<0>(tup);
diff --git a/cpp/tests/io/json/nested_json_test.cpp b/cpp/tests/io/json/nested_json_test.cpp
index 327169ae563..f32aba0e632 100644
--- a/cpp/tests/io/json/nested_json_test.cpp
+++ b/cpp/tests/io/json/nested_json_test.cpp
@@ -29,6 +29,7 @@
 #include <cudf/io/datasource.hpp>
 #include <cudf/io/json.hpp>
 #include <cudf/io/parquet.hpp>
+#include <cudf/io/types.hpp>
 #include <cudf/lists/lists_column_view.hpp>
 #include <cudf/scalar/scalar.hpp>
 #include <cudf/utilities/default_stream.hpp>
@@ -1196,4 +1197,181 @@ TEST_P(JsonDelimiterParamTest, RecoveringTokenStreamNewlineAndDelimiter)
   }
 }
 
+TEST_P(JsonDelimiterParamTest, RecoveringTokenStreamNewlineAsWSAndDelimiter)
+{
+  // Test input. Inline comments used to indicate character indexes
+  //                           012345678 <= line 0
+  char const delimiter = GetParam();
+
+  /* Input: (Note that \n is considered whitespace according to the JSON spec when it is not used as
+   * a delimiter for JSONL)
+   * {"a":2}
+   * {"a":<delimiter>{"a":{"a":[321<delimiter>{"a":[1]}
+   *
+   * <delimiter>{"b":123}
+   * {"b":123}<delimiter>
+   * {"b"\n:\n\n\n123\n}
+   */
+  std::string input = R"({"a":2})"
+                      "\n";
+  // starting position 8 (zero indexed)
+  input += R"({"a":)" + std::string(1, delimiter);
+  // starting position 14 (zero indexed)
+  input += R"({"a":{"a":[321)" + std::string(1, delimiter);
+  // starting position 29 (zero indexed)
+  input += R"({"a":[1]})" + std::string("\n\n") + std::string(1, delimiter);
+  // starting position 41 (zero indexed)
+  input += R"({"b":123})"
+           "\n";
+  // starting position 51 (zero indexed)
+  input += R"({"b":123})" + std::string(1, delimiter);
+  // starting position 61 (zero indexed)
+  input += R"({"b")" + std::string("\n:\n\n\n123\n}");
+
+  // Golden token stream sample
+  using token_t = cuio_json::token_t;
+  std::vector<std::pair<std::size_t, cuio_json::PdaTokenT>> golden_token_stream;
+  if (delimiter != '\n') {
+    golden_token_stream = {// Line 0 (valid)
+                           {0, token_t::StructBegin},
+                           {1, token_t::StructMemberBegin},
+                           {1, token_t::FieldNameBegin},
+                           {3, token_t::FieldNameEnd},
+                           {5, token_t::ValueBegin},
+                           {6, token_t::ValueEnd},
+                           {6, token_t::StructMemberEnd},
+                           {6, token_t::StructEnd},
+                           // Line 1 (invalid)
+                           {0, token_t::StructBegin},
+                           {0, token_t::StructEnd},
+                           // Line 2 (valid)
+                           {29, token_t::StructBegin},
+                           {30, token_t::StructMemberBegin},
+                           {30, token_t::FieldNameBegin},
+                           {32, token_t::FieldNameEnd},
+                           {34, token_t::ListBegin},
+                           {35, token_t::ValueBegin},
+                           {36, token_t::ValueEnd},
+                           {36, token_t::ListEnd},
+                           {37, token_t::StructMemberEnd},
+                           {37, token_t::StructEnd},
+                           // Line 3 (valid)
+                           {41, token_t::StructBegin},
+                           {42, token_t::StructMemberBegin},
+                           {42, token_t::FieldNameBegin},
+                           {44, token_t::FieldNameEnd},
+                           {46, token_t::ValueBegin},
+                           {49, token_t::ValueEnd},
+                           {49, token_t::StructMemberEnd},
+                           {49, token_t::StructEnd},
+                           // Line 4 (valid)
+                           {61, token_t::StructBegin},
+                           {62, token_t::StructMemberBegin},
+                           {62, token_t::FieldNameBegin},
+                           {64, token_t::FieldNameEnd},
+                           {70, token_t::ValueBegin},
+                           {73, token_t::ValueEnd},
+                           {74, token_t::StructMemberEnd},
+                           {74, token_t::StructEnd}};
+  } else {
+    /* Input:
+     * {"a":2}
+     * {"a":
+     * {"a":{"a":[321
+     * {"a":[1]}
+     *
+     *
+     * {"b":123}
+     * {"b":123}
+     * {"b"\n:\n\n\n123\n}
+     */
+    golden_token_stream = {// Line 0 (valid)
+                           {0, token_t::StructBegin},
+                           {1, token_t::StructMemberBegin},
+                           {1, token_t::FieldNameBegin},
+                           {3, token_t::FieldNameEnd},
+                           {5, token_t::ValueBegin},
+                           {6, token_t::ValueEnd},
+                           {6, token_t::StructMemberEnd},
+                           {6, token_t::StructEnd},
+                           // Line 1 (invalid)
+                           {0, token_t::StructBegin},
+                           {0, token_t::StructEnd},
+                           // Line 2 (invalid)
+                           {0, token_t::StructBegin},
+                           {0, token_t::StructEnd},
+                           // Line 3 (valid)
+                           {29, token_t::StructBegin},
+                           {30, token_t::StructMemberBegin},
+                           {30, token_t::FieldNameBegin},
+                           {32, token_t::FieldNameEnd},
+                           {34, token_t::ListBegin},
+                           {35, token_t::ValueBegin},
+                           {36, token_t::ValueEnd},
+                           {36, token_t::ListEnd},
+                           {37, token_t::StructMemberEnd},
+                           {37, token_t::StructEnd},
+                           // Line 4 (valid)
+                           {41, token_t::StructBegin},
+                           {42, token_t::StructMemberBegin},
+                           {42, token_t::FieldNameBegin},
+                           {44, token_t::FieldNameEnd},
+                           {46, token_t::ValueBegin},
+                           {49, token_t::ValueEnd},
+                           {49, token_t::StructMemberEnd},
+                           {49, token_t::StructEnd},
+                           // Line 5 (valid)
+                           {51, token_t::StructBegin},
+                           {52, token_t::StructMemberBegin},
+                           {52, token_t::FieldNameBegin},
+                           {54, token_t::FieldNameEnd},
+                           {56, token_t::ValueBegin},
+                           {59, token_t::ValueEnd},
+                           {59, token_t::StructMemberEnd},
+                           {59, token_t::StructEnd},
+                           // Line 6 (invalid)
+                           {0, token_t::StructBegin},
+                           {0, token_t::StructEnd},
+                           {0, token_t::StructBegin},
+                           {0, token_t::StructEnd},
+                           {0, token_t::StructBegin},
+                           {0, token_t::StructEnd},
+                           {0, token_t::StructBegin},
+                           {0, token_t::StructEnd}};
+  }
+
+  auto const stream = cudf::get_default_stream();
+
+  // Prepare input & output buffers
+  cudf::string_scalar const d_scalar(input, true, stream);
+  auto const d_input = cudf::device_span<cuio_json::SymbolT const>{
+    d_scalar.data(), static_cast<size_t>(d_scalar.size())};
+
+  // Default parsing options
+  cudf::io::json_reader_options const in_opts =
+    cudf::io::json_reader_options::builder(cudf::io::source_info{})
+      .recovery_mode(cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL)
+      .delimiter(delimiter)
+      .lines(true);
+
+  // Parse the JSON and get the token stream
+  auto [d_tokens_gpu, d_token_indices_gpu] = cuio_json::detail::get_token_stream(
+    d_input, in_opts, stream, cudf::get_current_device_resource_ref());
+  // Copy back the number of tokens that were written
+  auto const tokens_gpu        = cudf::detail::make_std_vector_async(d_tokens_gpu, stream);
+  auto const token_indices_gpu = cudf::detail::make_std_vector_async(d_token_indices_gpu, stream);
+
+  stream.synchronize();
+  // Verify the number of tokens matches
+  ASSERT_EQ(golden_token_stream.size(), tokens_gpu.size());
+  ASSERT_EQ(golden_token_stream.size(), token_indices_gpu.size());
+
+  for (std::size_t i = 0; i < tokens_gpu.size(); i++) {
+    // Ensure the index the tokens are pointing to do match
+    EXPECT_EQ(golden_token_stream[i].first, token_indices_gpu[i]) << "Mismatch at #" << i;
+    // Ensure the token category is correct
+    EXPECT_EQ(golden_token_stream[i].second, tokens_gpu[i]) << "Mismatch at #" << i;
+  }
+}
+
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 39ba62952b4..89e704f3ed3 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -38,6 +38,7 @@
 
 #include <src/io/comp/nvcomp_adapter.hpp>
 
+#include <array>
 #include <type_traits>
 
 template <typename T, typename SourceElementT = T>
@@ -767,14 +768,14 @@ TEST_F(OrcChunkedWriterTest, Metadata)
 
 TEST_F(OrcChunkedWriterTest, Strings)
 {
-  bool mask1[] = {true, true, false, true, true, true, true};
+  std::array mask1{true, true, false, true, true, true, true};
   std::vector<char const*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
-  str_col strings1(h_strings1.begin(), h_strings1.end(), mask1);
+  str_col strings1(h_strings1.begin(), h_strings1.end(), mask1.data());
   table_view tbl1({strings1});
 
-  bool mask2[] = {false, true, true, true, true, true, true};
+  std::array mask2{false, true, true, true, true, true, true};
   std::vector<char const*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
-  str_col strings2(h_strings2.begin(), h_strings2.end(), mask2);
+  str_col strings2(h_strings2.begin(), h_strings2.end(), mask2.data());
   table_view tbl2({strings2});
 
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
@@ -877,26 +878,26 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize)
 
   using T = TypeParam;
 
-  int num_els = 31;
+  constexpr int num_els{31};
 
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true};
+  std::array<bool, num_els> mask{false, true, true, true, true, true, true, true, true, true, true,
+                                 true,  true, true, true, true, true, true, true, true, true, true,
+                                 true,  true, true, true, true, true, true, true, true};
 
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+  std::array<T, num_els> c1a;
+  std::fill(c1a.begin(), c1a.end(), static_cast<T>(5));
+  std::array<T, num_els> c1b;
+  std::fill(c1b.begin(), c1b.end(), static_cast<T>(5));
+  column_wrapper<T> c1a_w(c1a.begin(), c1a.end(), mask.begin());
+  column_wrapper<T> c1b_w(c1b.begin(), c1b.end(), mask.begin());
   table_view tbl1({c1a_w, c1b_w});
 
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  std::array<T, num_els> c2a;
+  std::fill(c2a.begin(), c2a.end(), static_cast<T>(8));
+  std::array<T, num_els> c2b;
+  std::fill(c2b.begin(), c2b.end(), static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a.begin(), c2a.end(), mask.begin());
+  column_wrapper<T> c2b_w(c2b.begin(), c2b.end(), mask.begin());
   table_view tbl2({c2a_w, c2b_w});
 
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
@@ -920,26 +921,26 @@ TYPED_TEST(OrcChunkedWriterNumericTypeTest, UnalignedSize2)
 
   using T = TypeParam;
 
-  int num_els = 33;
+  constexpr int num_els = 33;
 
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true};
+  std::array<bool, num_els> mask{false, true, true, true, true, true, true, true, true, true, true,
+                                 true,  true, true, true, true, true, true, true, true, true, true,
+                                 true,  true, true, true, true, true, true, true, true, true, true};
 
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+  std::array<T, num_els> c1a;
+  std::fill(c1a.begin(), c1a.end(), static_cast<T>(5));
+  std::array<T, num_els> c1b;
+  std::fill(c1b.begin(), c1b.end(), static_cast<T>(5));
+  column_wrapper<T> c1a_w(c1a.begin(), c1a.end(), mask.begin());
+  column_wrapper<T> c1b_w(c1b.begin(), c1b.end(), mask.begin());
   table_view tbl1({c1a_w, c1b_w});
 
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  std::array<T, num_els> c2a;
+  std::fill(c2a.begin(), c2a.end(), static_cast<T>(8));
+  std::array<T, num_els> c2b;
+  std::fill(c2b.begin(), c2b.end(), static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a.begin(), c2a.end(), mask.begin());
+  column_wrapper<T> c2b_w(c2b.begin(), c2b.end(), mask.begin());
   table_view tbl2({c2a_w, c2b_w});
 
   auto expected = cudf::concatenate(std::vector<table_view>({tbl1, tbl2}));
@@ -1140,7 +1141,7 @@ TEST_F(OrcReaderTest, zstdCompressionRegression)
   }
 
   // Test with zstd compressed orc file with high compression ratio.
-  constexpr uint8_t input_buffer[] = {
+  constexpr std::array<uint8_t, 170> input_buffer{
     0x4f, 0x52, 0x43, 0x5a, 0x00, 0x00, 0x28, 0xb5, 0x2f, 0xfd, 0xa4, 0x34, 0xc7, 0x03, 0x00, 0x74,
     0x00, 0x00, 0x18, 0x41, 0xff, 0xaa, 0x02, 0x00, 0xbb, 0xff, 0x45, 0xc8, 0x01, 0x25, 0x30, 0x04,
     0x65, 0x00, 0x00, 0x10, 0xaa, 0x1f, 0x02, 0x00, 0x01, 0x29, 0x0b, 0xc7, 0x39, 0xb8, 0x02, 0xcb,
@@ -1154,7 +1155,7 @@ TEST_F(OrcReaderTest, zstdCompressionRegression)
     0x30, 0x09, 0x82, 0xf4, 0x03, 0x03, 0x4f, 0x52, 0x43, 0x17};
 
   auto source =
-    cudf::io::source_info(reinterpret_cast<char const*>(input_buffer), sizeof(input_buffer));
+    cudf::io::source_info(reinterpret_cast<char const*>(input_buffer.data()), input_buffer.size());
   cudf::io::orc_reader_options in_opts =
     cudf::io::orc_reader_options::builder(source).use_index(false);
 
diff --git a/cpp/tests/io/parquet_chunked_writer_test.cpp b/cpp/tests/io/parquet_chunked_writer_test.cpp
index 282c6f3adad..810fee89c48 100644
--- a/cpp/tests/io/parquet_chunked_writer_test.cpp
+++ b/cpp/tests/io/parquet_chunked_writer_test.cpp
@@ -124,15 +124,15 @@ TEST_F(ParquetChunkedWriterTest, Strings)
 {
   std::vector<std::unique_ptr<cudf::column>> cols;
 
-  bool mask1[] = {true, true, false, true, true, true, true};
+  std::array mask1{true, true, false, true, true, true, true};
   std::vector<char const*> h_strings1{"four", "score", "and", "seven", "years", "ago", "abcdefgh"};
-  cudf::test::strings_column_wrapper strings1(h_strings1.begin(), h_strings1.end(), mask1);
+  cudf::test::strings_column_wrapper strings1(h_strings1.begin(), h_strings1.end(), mask1.data());
   cols.push_back(strings1.release());
   cudf::table tbl1(std::move(cols));
 
-  bool mask2[] = {false, true, true, true, true, true, true};
+  std::array mask2{false, true, true, true, true, true, true};
   std::vector<char const*> h_strings2{"ooooo", "ppppppp", "fff", "j", "cccc", "bbb", "zzzzzzzzzzz"};
-  cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), mask2);
+  cudf::test::strings_column_wrapper strings2(h_strings2.begin(), h_strings2.end(), mask2.data());
   cols.push_back(strings2.release());
   cudf::table tbl2(std::move(cols));
 
@@ -771,29 +771,29 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize)
 
   using T = TypeParam;
 
-  int num_els = 31;
+  constexpr int num_els = 31;
   std::vector<std::unique_ptr<cudf::column>> cols;
 
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
+  std::array<bool, num_els> mask{false, true, true, true, true, true, true, true, true, true, true,
+                                 true,  true, true, true, true, true, true, true, true, true, true,
 
-                 true,  true, true, true, true, true, true, true, true};
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+                                 true,  true, true, true, true, true, true, true, true};
+  std::array<T, num_els> c1a;
+  std::fill(c1a.begin(), c1a.end(), static_cast<T>(5));
+  std::array<T, num_els> c1b;
+  std::fill(c1b.begin(), c1b.end(), static_cast<T>(5));
+  column_wrapper<T> c1a_w(c1a.begin(), c1a.end(), mask.begin());
+  column_wrapper<T> c1b_w(c1b.begin(), c1b.end(), mask.begin());
   cols.push_back(c1a_w.release());
   cols.push_back(c1b_w.release());
   cudf::table tbl1(std::move(cols));
 
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  std::array<T, num_els> c2a;
+  std::fill(c2a.begin(), c2a.end(), static_cast<T>(8));
+  std::array<T, num_els> c2b;
+  std::fill(c2b.begin(), c2b.end(), static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a.begin(), c2a.end(), mask.begin());
+  column_wrapper<T> c2b_w(c2b.begin(), c2b.end(), mask.begin());
   cols.push_back(c2a_w.release());
   cols.push_back(c2b_w.release());
   cudf::table tbl2(std::move(cols));
@@ -819,29 +819,29 @@ TYPED_TEST(ParquetChunkedWriterNumericTypeTest, UnalignedSize2)
 
   using T = TypeParam;
 
-  int num_els = 33;
+  constexpr int num_els = 33;
   std::vector<std::unique_ptr<cudf::column>> cols;
 
-  bool mask[] = {false, true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true,
-                 true,  true, true, true, true, true, true, true, true, true, true};
+  std::array<bool, num_els> mask{false, true, true, true, true, true, true, true, true, true, true,
+                                 true,  true, true, true, true, true, true, true, true, true, true,
+                                 true,  true, true, true, true, true, true, true, true, true, true};
 
-  T c1a[num_els];
-  std::fill(c1a, c1a + num_els, static_cast<T>(5));
-  T c1b[num_els];
-  std::fill(c1b, c1b + num_els, static_cast<T>(6));
-  column_wrapper<T> c1a_w(c1a, c1a + num_els, mask);
-  column_wrapper<T> c1b_w(c1b, c1b + num_els, mask);
+  std::array<T, num_els> c1a;
+  std::fill(c1a.begin(), c1a.end(), static_cast<T>(5));
+  std::array<T, num_els> c1b;
+  std::fill(c1b.begin(), c1b.end(), static_cast<T>(5));
+  column_wrapper<T> c1a_w(c1a.begin(), c1a.end(), mask.begin());
+  column_wrapper<T> c1b_w(c1b.begin(), c1b.end(), mask.begin());
   cols.push_back(c1a_w.release());
   cols.push_back(c1b_w.release());
   cudf::table tbl1(std::move(cols));
 
-  T c2a[num_els];
-  std::fill(c2a, c2a + num_els, static_cast<T>(8));
-  T c2b[num_els];
-  std::fill(c2b, c2b + num_els, static_cast<T>(9));
-  column_wrapper<T> c2a_w(c2a, c2a + num_els, mask);
-  column_wrapper<T> c2b_w(c2b, c2b + num_els, mask);
+  std::array<T, num_els> c2a;
+  std::fill(c2a.begin(), c2a.end(), static_cast<T>(8));
+  std::array<T, num_els> c2b;
+  std::fill(c2b.begin(), c2b.end(), static_cast<T>(9));
+  column_wrapper<T> c2a_w(c2a.begin(), c2a.end(), mask.begin());
+  column_wrapper<T> c2b_w(c2b.begin(), c2b.end(), mask.begin());
   cols.push_back(c2a_w.release());
   cols.push_back(c2b_w.release());
   cudf::table tbl2(std::move(cols));
diff --git a/cpp/tests/io/parquet_common.cpp b/cpp/tests/io/parquet_common.cpp
index 3dd5ad145ea..6141a40bc95 100644
--- a/cpp/tests/io/parquet_common.cpp
+++ b/cpp/tests/io/parquet_common.cpp
@@ -483,10 +483,10 @@ template <typename T>
 std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
 ascending()
 {
-  char buf[10];
+  std::array<char, 10> buf;
   auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", i);
-    return std::string(buf);
+    sprintf(buf.data(), "%09d", i);
+    return std::string(buf.data());
   });
   return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
 }
@@ -495,10 +495,10 @@ template <typename T>
 std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
 descending()
 {
-  char buf[10];
+  std::array<char, 10> buf;
   auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", num_ordered_rows - i);
-    return std::string(buf);
+    sprintf(buf.data(), "%09d", static_cast<short>(num_ordered_rows - i));
+    return std::string(buf.data());
   });
   return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
 }
@@ -507,10 +507,10 @@ template <typename T>
 std::enable_if_t<std::is_same_v<T, cudf::string_view>, cudf::test::strings_column_wrapper>
 unordered()
 {
-  char buf[10];
+  std::array<char, 10> buf;
   auto elements = cudf::detail::make_counting_transform_iterator(0, [&buf](auto i) {
-    sprintf(buf, "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
-    return std::string(buf);
+    sprintf(buf.data(), "%09d", (i % 2 == 0) ? i : (num_ordered_rows - i));
+    return std::string(buf.data());
   });
   return cudf::test::strings_column_wrapper(elements, elements + num_ordered_rows);
 }
diff --git a/cpp/tests/io/parquet_common.hpp b/cpp/tests/io/parquet_common.hpp
index bc6145d77da..bd1579eaa1b 100644
--- a/cpp/tests/io/parquet_common.hpp
+++ b/cpp/tests/io/parquet_common.hpp
@@ -35,9 +35,9 @@
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 using column     = cudf::column;
 using table      = cudf::table;
 using table_view = cudf::table_view;
diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp
index 01027d04658..8b03e94191e 100644
--- a/cpp/tests/io/parquet_misc_test.cpp
+++ b/cpp/tests/io/parquet_misc_test.cpp
@@ -23,6 +23,8 @@
 #include <cudf/stream_compaction.hpp>
 #include <cudf/transform.hpp>
 
+#include <array>
+
 ////////////////////////////////
 // delta encoding writer tests
 
@@ -225,10 +227,9 @@ TYPED_TEST(ParquetWriterComparableTypeTest, ThreeColumnSorted)
 
   // now check that the boundary order for chunk 1 is ascending,
   // chunk 2 is descending, and chunk 3 is unordered
-  cudf::io::parquet::detail::BoundaryOrder expected_orders[] = {
-    cudf::io::parquet::detail::BoundaryOrder::ASCENDING,
-    cudf::io::parquet::detail::BoundaryOrder::DESCENDING,
-    cudf::io::parquet::detail::BoundaryOrder::UNORDERED};
+  std::array expected_orders{cudf::io::parquet::detail::BoundaryOrder::ASCENDING,
+                             cudf::io::parquet::detail::BoundaryOrder::DESCENDING,
+                             cudf::io::parquet::detail::BoundaryOrder::UNORDERED};
 
   for (std::size_t i = 0; i < columns.size(); i++) {
     auto const ci = read_column_index(source, columns[i]);
diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp
index 6c61535359f..dc8e68b3a15 100644
--- a/cpp/tests/io/parquet_reader_test.cpp
+++ b/cpp/tests/io/parquet_reader_test.cpp
@@ -29,6 +29,8 @@
 #include <cudf/table/table_view.hpp>
 #include <cudf/transform.hpp>
 
+#include <array>
+
 TEST_F(ParquetReaderTest, UserBounds)
 {
   // trying to read more rows than there are should result in
@@ -569,7 +571,8 @@ TEST_F(ParquetReaderTest, DecimalRead)
        This test is a temporary test until python gains the ability to write decimal, so we're
        embedding
        a parquet file directly into the code here to prevent issues with finding the file */
-    unsigned char const decimals_parquet[] = {
+    constexpr unsigned int decimals_parquet_len = 2366;
+    std::array<unsigned char, decimals_parquet_len> const decimals_parquet{
       0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xb0, 0x03, 0x15, 0xb8, 0x03, 0x2c, 0x15, 0x6a,
       0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1c, 0x36, 0x02, 0x28, 0x04, 0x7f, 0x96, 0x98, 0x00,
       0x18, 0x04, 0x81, 0x69, 0x67, 0xff, 0x00, 0x00, 0x00, 0xd8, 0x01, 0xf0, 0xd7, 0x04, 0x00,
@@ -728,10 +731,10 @@ TEST_F(ParquetReaderTest, DecimalRead)
       0x30, 0x36, 0x30, 0x36, 0x39, 0x65, 0x35, 0x30, 0x63, 0x39, 0x62, 0x37, 0x39, 0x37, 0x30,
       0x62, 0x65, 0x62, 0x64, 0x31, 0x29, 0x19, 0x3c, 0x1c, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x1c,
       0x00, 0x00, 0x00, 0xd3, 0x02, 0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
-    unsigned int decimals_parquet_len = 2366;
 
-    cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{reinterpret_cast<char const*>(decimals_parquet), decimals_parquet_len});
+    cudf::io::parquet_reader_options read_opts =
+      cudf::io::parquet_reader_options::builder(cudf::io::source_info{
+        reinterpret_cast<char const*>(decimals_parquet.data()), decimals_parquet_len});
     auto result = cudf::io::read_parquet(read_opts);
 
     auto validity =
@@ -739,7 +742,7 @@ TEST_F(ParquetReaderTest, DecimalRead)
 
     EXPECT_EQ(result.tbl->view().num_columns(), 3);
 
-    int32_t col0_data[] = {
+    std::array<int32_t, 53> col0_data{
       -2354584, -190275,  8393572,  6446515,  -5687920, -1843550, -6897687, -6780385, 3428529,
       5842056,  -4312278, -4450603, -7516141, 2974667,  -4288640, 1065090,  -9410428, 7891355,
       1076244,  -1975984, 6999466,  2666959,  9262967,  7931374,  -1370640, 451074,   8799111,
@@ -753,29 +756,28 @@ TEST_F(ParquetReaderTest, DecimalRead)
       std::begin(col0_data), std::end(col0_data), validity, numeric::scale_type{-4});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
 
-    int64_t col1_data[] = {29274040266581,  -17210335917753, -58420730139037,
-                           68073792696254,  2236456014294,   13704555677045,
-                           -70797090469548, -52248605513407, -68976081919961,
-                           -34277313883112, 97774730521689,  21184241014572,
-                           -670882460254,   -40862944054399, -24079852370612,
-                           -88670167797498, -84007574359403, -71843004533519,
-                           -55538016554201, 3491435293032,   -29085437167297,
-                           36901882672273,  -98622066122568, -13974902998457,
-                           86712597643378,  -16835133643735, -94759096142232,
-                           30708340810940,  79086853262082,  78923696440892,
-                           -76316597208589, 37247268714759,  80303592631774,
-                           57790350050889,  19387319851064,  -33186875066145,
-                           69701203023404,  -7157433049060,  -7073790423437,
-                           92769171617714,  -75127120182184, -951893180618,
-                           64927618310150,  -53875897154023, -16168039035569,
-                           -24273449166429, -30359781249192, 35639397345991,
-                           45844829680593,  71401416837149,  0,
-                           -99999999999999, 99999999999999};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
-              sizeof(col1_data) / sizeof(col1_data[0]));
+    std::array<int64_t, 53> col1_data{29274040266581,  -17210335917753, -58420730139037,
+                                      68073792696254,  2236456014294,   13704555677045,
+                                      -70797090469548, -52248605513407, -68976081919961,
+                                      -34277313883112, 97774730521689,  21184241014572,
+                                      -670882460254,   -40862944054399, -24079852370612,
+                                      -88670167797498, -84007574359403, -71843004533519,
+                                      -55538016554201, 3491435293032,   -29085437167297,
+                                      36901882672273,  -98622066122568, -13974902998457,
+                                      86712597643378,  -16835133643735, -94759096142232,
+                                      30708340810940,  79086853262082,  78923696440892,
+                                      -76316597208589, 37247268714759,  80303592631774,
+                                      57790350050889,  19387319851064,  -33186875066145,
+                                      69701203023404,  -7157433049060,  -7073790423437,
+                                      92769171617714,  -75127120182184, -951893180618,
+                                      64927618310150,  -53875897154023, -16168039035569,
+                                      -24273449166429, -30359781249192, 35639397345991,
+                                      45844829680593,  71401416837149,  0,
+                                      -99999999999999, 99999999999999};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()), col1_data.size());
     cudf::test::fixed_point_column_wrapper<int64_t> col1(
-      std::begin(col1_data), std::end(col1_data), validity, numeric::scale_type{-5});
+      col1_data.begin(), col1_data.end(), validity, numeric::scale_type{-5});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
 
     cudf::io::parquet_reader_options read_strict_opts = read_opts;
@@ -786,7 +788,7 @@ TEST_F(ParquetReaderTest, DecimalRead)
     // dec7p3: Decimal(precision=7, scale=3) backed by FIXED_LENGTH_BYTE_ARRAY(length = 4)
     // dec12p11: Decimal(precision=12, scale=11) backed by FIXED_LENGTH_BYTE_ARRAY(length = 6)
     // dec20p1: Decimal(precision=20, scale=1) backed by FIXED_LENGTH_BYTE_ARRAY(length = 9)
-    unsigned char const fixed_len_bytes_decimal_parquet[] = {
+    std::array<unsigned char, 1226> const fixed_len_bytes_decimal_parquet{
       0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0xA8, 0x01, 0x15, 0xAE, 0x01, 0x2C, 0x15, 0x28,
       0x15, 0x00, 0x15, 0x06, 0x15, 0x08, 0x1C, 0x36, 0x02, 0x28, 0x04, 0x00, 0x97, 0x45, 0x72,
       0x18, 0x04, 0x00, 0x01, 0x81, 0x3B, 0x00, 0x00, 0x00, 0x54, 0xF0, 0x53, 0x04, 0x00, 0x00,
@@ -875,75 +877,72 @@ TEST_F(ParquetReaderTest, DecimalRead)
 
     cudf::io::parquet_reader_options read_opts =
       cudf::io::parquet_reader_options::builder(cudf::io::source_info{
-        reinterpret_cast<char const*>(fixed_len_bytes_decimal_parquet), parquet_len});
+        reinterpret_cast<char const*>(fixed_len_bytes_decimal_parquet.data()), parquet_len});
     auto result = cudf::io::read_parquet(read_opts);
     EXPECT_EQ(result.tbl->view().num_columns(), 3);
 
-    auto validity_c0    = cudf::test::iterators::nulls_at({19});
-    int32_t col0_data[] = {6361295, 698632,  7821423, 7073444, 9631892, 3021012, 5195059,
-                           9913714, 901749,  7776938, 3186566, 4955569, 5131067, 98619,
-                           2282579, 7521455, 4430706, 1937859, 4532040, 0};
+    auto validity_c0 = cudf::test::iterators::nulls_at({19});
+    std::array col0_data{6361295, 698632,  7821423, 7073444, 9631892, 3021012, 5195059,
+                         9913714, 901749,  7776938, 3186566, 4955569, 5131067, 98619,
+                         2282579, 7521455, 4430706, 1937859, 4532040, 0};
 
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()),
-              sizeof(col0_data) / sizeof(col0_data[0]));
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(0).size()), col0_data.size());
     cudf::test::fixed_point_column_wrapper<int32_t> col0(
-      std::begin(col0_data), std::end(col0_data), validity_c0, numeric::scale_type{-3});
+      col0_data.begin(), col0_data.end(), validity_c0, numeric::scale_type{-3});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(0), col0);
 
-    auto validity_c1    = cudf::test::iterators::nulls_at({18});
-    int64_t col1_data[] = {361378026250,
-                           30646804862,
-                           429930238629,
-                           418758703536,
-                           895494171113,
-                           435283865083,
-                           809096053722,
-                           -999999999999,
-                           426465099333,
-                           526684574144,
-                           826310892810,
-                           584686967589,
-                           113822282951,
-                           409236212092,
-                           420631167535,
-                           918438386086,
-                           -999999999999,
-                           489053889147,
-                           0,
-                           363993164092};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()),
-              sizeof(col1_data) / sizeof(col1_data[0]));
+    auto validity_c1 = cudf::test::iterators::nulls_at({18});
+    std::array<int64_t, 20> col1_data{361378026250,
+                                      30646804862,
+                                      429930238629,
+                                      418758703536,
+                                      895494171113,
+                                      435283865083,
+                                      809096053722,
+                                      -999999999999,
+                                      426465099333,
+                                      526684574144,
+                                      826310892810,
+                                      584686967589,
+                                      113822282951,
+                                      409236212092,
+                                      420631167535,
+                                      918438386086,
+                                      -999999999999,
+                                      489053889147,
+                                      0,
+                                      363993164092};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(1).size()), col1_data.size());
     cudf::test::fixed_point_column_wrapper<int64_t> col1(
-      std::begin(col1_data), std::end(col1_data), validity_c1, numeric::scale_type{-11});
+      col1_data.begin(), col1_data.end(), validity_c1, numeric::scale_type{-11});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(1), col1);
 
-    auto validity_c2       = cudf::test::iterators::nulls_at({6, 14});
-    __int128_t col2_data[] = {9078697037144433659,
-                              9050770539577117612,
-                              2358363961733893636,
-                              1566059559232276662,
-                              6658306200002735268,
-                              4967909073046397334,
-                              0,
-                              7235588493887532473,
-                              5023160741463849572,
-                              2765173712965988273,
-                              3880866513515749646,
-                              5019704400576359500,
-                              5544435986818825655,
-                              7265381725809874549,
-                              0,
-                              1576192427381240677,
-                              2828305195087094598,
-                              260308667809395171,
-                              2460080200895288476,
-                              2718441925197820439};
-
-    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(2).size()),
-              sizeof(col2_data) / sizeof(col2_data[0]));
+    auto validity_c2 = cudf::test::iterators::nulls_at({6, 14});
+    std::array<__int128_t, 20> col2_data{9078697037144433659,
+                                         9050770539577117612,
+                                         2358363961733893636,
+                                         1566059559232276662,
+                                         6658306200002735268,
+                                         4967909073046397334,
+                                         0,
+                                         7235588493887532473,
+                                         5023160741463849572,
+                                         2765173712965988273,
+                                         3880866513515749646,
+                                         5019704400576359500,
+                                         5544435986818825655,
+                                         7265381725809874549,
+                                         0,
+                                         1576192427381240677,
+                                         2828305195087094598,
+                                         260308667809395171,
+                                         2460080200895288476,
+                                         2718441925197820439};
+
+    EXPECT_EQ(static_cast<std::size_t>(result.tbl->view().column(2).size()), col2_data.size());
     cudf::test::fixed_point_column_wrapper<__int128_t> col2(
-      std::begin(col2_data), std::end(col2_data), validity_c2, numeric::scale_type{-1});
+      col2_data.begin(), col2_data.end(), validity_c2, numeric::scale_type{-1});
     CUDF_TEST_EXPECT_COLUMNS_EQUAL(result.tbl->view().column(2), col2);
   }
 }
@@ -1221,7 +1220,7 @@ TEST_F(ParquetReaderTest, NestingOptimizationTest)
 
 TEST_F(ParquetReaderTest, SingleLevelLists)
 {
-  unsigned char list_bytes[] = {
+  std::array<unsigned char, 214> list_bytes{
     0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0x28, 0x15, 0x28, 0x15, 0xa7, 0xce, 0x91, 0x8c, 0x06,
     0x1c, 0x15, 0x04, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
     0x02, 0x02, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15,
@@ -1239,7 +1238,7 @@ TEST_F(ParquetReaderTest, SingleLevelLists)
 
   // read single level list reproducing parquet file
   cudf::io::parquet_reader_options read_opts = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info{reinterpret_cast<char const*>(list_bytes), sizeof(list_bytes)});
+    cudf::io::source_info{reinterpret_cast<char const*>(list_bytes.data()), list_bytes.size()});
   auto table = cudf::io::read_parquet(read_opts);
 
   auto const c0 = table.tbl->get_column(0);
@@ -1252,7 +1251,7 @@ TEST_F(ParquetReaderTest, SingleLevelLists)
 
 TEST_F(ParquetReaderTest, ChunkedSingleLevelLists)
 {
-  unsigned char list_bytes[] = {
+  std::array<unsigned char, 214> list_bytes{
     0x50, 0x41, 0x52, 0x31, 0x15, 0x00, 0x15, 0x28, 0x15, 0x28, 0x15, 0xa7, 0xce, 0x91, 0x8c, 0x06,
     0x1c, 0x15, 0x04, 0x15, 0x00, 0x15, 0x06, 0x15, 0x06, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03,
     0x02, 0x02, 0x00, 0x00, 0x00, 0x03, 0x03, 0x00, 0x00, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x15,
@@ -1271,7 +1270,7 @@ TEST_F(ParquetReaderTest, ChunkedSingleLevelLists)
   auto reader = cudf::io::chunked_parquet_reader(
     1L << 31,
     cudf::io::parquet_reader_options::builder(
-      cudf::io::source_info{reinterpret_cast<char const*>(list_bytes), sizeof(list_bytes)}));
+      cudf::io::source_info{reinterpret_cast<char const*>(list_bytes.data()), list_bytes.size()}));
   int iterations = 0;
   while (reader.has_next() && iterations < 10) {
     auto chunk = reader.read_chunk();
@@ -1932,7 +1931,7 @@ TEST_F(ParquetReaderTest, FilterFloatNAN)
 
 TEST_F(ParquetReaderTest, RepeatedNoAnnotations)
 {
-  constexpr unsigned char repeated_bytes[] = {
+  constexpr std::array<unsigned char, 662> repeated_bytes{
     0x50, 0x41, 0x52, 0x31, 0x15, 0x04, 0x15, 0x30, 0x15, 0x30, 0x4c, 0x15, 0x0c, 0x15, 0x00, 0x12,
     0x00, 0x00, 0x01, 0x00, 0x00, 0x00, 0x02, 0x00, 0x00, 0x00, 0x03, 0x00, 0x00, 0x00, 0x04, 0x00,
     0x00, 0x00, 0x05, 0x00, 0x00, 0x00, 0x06, 0x00, 0x00, 0x00, 0x15, 0x00, 0x15, 0x0a, 0x15, 0x0a,
@@ -1976,9 +1975,9 @@ TEST_F(ParquetReaderTest, RepeatedNoAnnotations)
     0x61, 0x38, 0x33, 0x39, 0x31, 0x36, 0x63, 0x36, 0x39, 0x62, 0x35, 0x65, 0x29, 0x00, 0x32, 0x01,
     0x00, 0x00, 0x50, 0x41, 0x52, 0x31};
 
-  auto read_opts = cudf::io::parquet_reader_options::builder(
-    cudf::io::source_info{reinterpret_cast<char const*>(repeated_bytes), sizeof(repeated_bytes)});
-  auto result = cudf::io::read_parquet(read_opts);
+  auto read_opts = cudf::io::parquet_reader_options::builder(cudf::io::source_info{
+    reinterpret_cast<char const*>(repeated_bytes.data()), repeated_bytes.size()});
+  auto result    = cudf::io::read_parquet(read_opts);
 
   EXPECT_EQ(result.tbl->view().column(0).size(), 6);
   EXPECT_EQ(result.tbl->view().num_columns(), 2);
diff --git a/cpp/tests/io/parquet_v2_test.cpp b/cpp/tests/io/parquet_v2_test.cpp
index 9e66fc9409f..7c305235ea6 100644
--- a/cpp/tests/io/parquet_v2_test.cpp
+++ b/cpp/tests/io/parquet_v2_test.cpp
@@ -23,6 +23,8 @@
 
 #include <cudf/io/parquet.hpp>
 
+#include <array>
+
 using cudf::test::iterators::no_nulls;
 
 // Base test fixture for V2 header tests
@@ -693,9 +695,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex)
 
   // fixed length strings
   auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
+    std::array<char, 30> buf;
+    sprintf(buf.data(), "%012d", i);
+    return std::string(buf.data());
   });
   auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
 
@@ -715,9 +717,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndex)
 
   // mixed length strings
   auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
+    std::array<char, 30> buf;
+    sprintf(buf.data(), "%d", i);
+    return std::string(buf.data());
   });
   auto col7          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
 
@@ -787,9 +789,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls)
 
   // fixed length strings
   auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
+    std::array<char, 30> buf;
+    sprintf(buf.data(), "%012d", i);
+    return std::string(buf.data());
   });
   auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
 
@@ -819,9 +821,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNulls)
 
   // mixed length strings
   auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
+    std::array<char, 30> buf;
+    sprintf(buf.data(), "%d", i);
+    return std::string(buf.data());
   });
   auto col7 = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows, valids);
 
@@ -897,9 +899,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn)
 
   // fixed length strings
   auto str1_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%012d", i);
-    return std::string(buf);
+    std::array<char, 30> buf;
+    sprintf(buf.data(), "%012d", i);
+    return std::string(buf.data());
   });
   auto col0          = cudf::test::strings_column_wrapper(str1_elements, str1_elements + num_rows);
 
@@ -914,9 +916,9 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexNullColumn)
 
   // mixed length strings
   auto str2_elements = cudf::detail::make_counting_transform_iterator(0, [](auto i) {
-    char buf[30];
-    sprintf(buf, "%d", i);
-    return std::string(buf);
+    std::array<char, 30> buf;
+    sprintf(buf.data(), "%d", i);
+    return std::string(buf.data());
   });
   auto col3          = cudf::test::strings_column_wrapper(str2_elements, str2_elements + num_rows);
 
@@ -1034,7 +1036,7 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexStruct)
 
   // hard coded schema indices.
   // TODO find a way to do this without magic
-  size_t const colidxs[] = {1, 3, 4, 5, 8};
+  constexpr std::array<size_t, 5> colidxs{1, 3, 4, 5, 8};
   for (size_t r = 0; r < fmd.row_groups.size(); r++) {
     auto const& rg = fmd.row_groups[r];
     for (size_t c = 0; c < rg.columns.size(); c++) {
@@ -1129,7 +1131,7 @@ TEST_P(ParquetV2Test, CheckColumnOffsetIndexStructNulls)
   // col1 will have num_ordered_rows / 2 nulls total
   // col2 will have num_ordered_rows / 3 nulls total
   // col3 will have num_ordered_rows / 4 nulls total
-  int const null_mods[] = {0, 2, 3, 4};
+  constexpr std::array<int, 4> null_mods{0, 2, 3, 4};
 
   for (auto const& rg : fmd.row_groups) {
     for (size_t c = 0; c < rg.columns.size(); c++) {
@@ -1299,7 +1301,7 @@ TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls)
 
   table_view expected({col0, col1, col2, col3, col4, col5, col6, col7});
 
-  int64_t const expected_null_counts[]            = {4, 4, 4, 6, 4, 6, 4, 5, 11};
+  std::array<int64_t, 9> expected_null_counts{4, 4, 4, 6, 4, 6, 4, 5, 11};
   std::vector<int64_t> const expected_def_hists[] = {{1, 1, 2, 3},
                                                      {1, 3, 10},
                                                      {1, 1, 2, 10},
diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp
index c8100038942..8794f2ee304 100644
--- a/cpp/tests/io/parquet_writer_test.cpp
+++ b/cpp/tests/io/parquet_writer_test.cpp
@@ -31,6 +31,7 @@
 #include <src/io/parquet/parquet.hpp>
 #include <src/io/parquet/parquet_common.hpp>
 
+#include <array>
 #include <fstream>
 
 using cudf::test::iterators::no_nulls;
@@ -879,53 +880,52 @@ TEST_F(ParquetWriterTest, Decimal128Stats)
 
 TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
 {
-  char const* coldata[] = {
-    // in-range 7 bit.  should truncate to "yyyyyyyz"
-    "yyyyyyyyy",
-    // max 7 bit. should truncate to "x7fx7fx7fx7fx7fx7fx7fx80", since it's
-    // considered binary, not UTF-8.  If UTF-8 it should not truncate.
-    "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
-    // max binary.  this should not truncate
-    "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
-    // in-range 2-byte UTF8 (U+00E9). should truncate to "éééê"
-    "ééééé",
-    // max 2-byte UTF8 (U+07FF). should not truncate
-    "߿߿߿߿߿",
-    // in-range 3-byte UTF8 (U+0800). should truncate to "ࠀࠁ"
-    "ࠀࠀࠀ",
-    // max 3-byte UTF8 (U+FFFF). should not truncate
-    "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
-    // in-range 4-byte UTF8 (U+10000). should truncate to "𐀀𐀁"
-    "𐀀𐀀𐀀",
-    // max unicode (U+10FFFF). should truncate to \xf4\x8f\xbf\xbf\xf4\x90\x80\x80,
-    // which is no longer valid unicode, but is still ok UTF-8???
-    "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
-    // max 4-byte UTF8 (U+1FFFFF). should not truncate
-    "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
+  std::array coldata{// in-range 7 bit.  should truncate to "yyyyyyyz"
+                     "yyyyyyyyy",
+                     // max 7 bit. should truncate to "x7fx7fx7fx7fx7fx7fx7fx80", since it's
+                     // considered binary, not UTF-8.  If UTF-8 it should not truncate.
+                     "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
+                     // max binary.  this should not truncate
+                     "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+                     // in-range 2-byte UTF8 (U+00E9). should truncate to "éééê"
+                     "ééééé",
+                     // max 2-byte UTF8 (U+07FF). should not truncate
+                     "߿߿߿߿߿",
+                     // in-range 3-byte UTF8 (U+0800). should truncate to "ࠀࠁ"
+                     "ࠀࠀࠀ",
+                     // max 3-byte UTF8 (U+FFFF). should not truncate
+                     "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
+                     // in-range 4-byte UTF8 (U+10000). should truncate to "𐀀𐀁"
+                     "𐀀𐀀𐀀",
+                     // max unicode (U+10FFFF). should truncate to \xf4\x8f\xbf\xbf\xf4\x90\x80\x80,
+                     // which is no longer valid unicode, but is still ok UTF-8???
+                     "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
+                     // max 4-byte UTF8 (U+1FFFFF). should not truncate
+                     "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
 
   // NOTE: UTF8 min is initialized with 0xf7bfbfbf. Binary values larger
   // than that will not become minimum value (when written as UTF-8).
-  char const* truncated_min[] = {"yyyyyyyy",
-                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
-                                 "\xf7\xbf\xbf\xbf",
-                                 "éééé",
-                                 "߿߿߿߿",
-                                 "ࠀࠀ",
-                                 "\xef\xbf\xbf\xef\xbf\xbf",
-                                 "𐀀𐀀",
-                                 "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
-                                 "\xf7\xbf\xbf\xbf"};
-
-  char const* truncated_max[] = {"yyyyyyyz",
-                                 "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x80",
-                                 "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
-                                 "éééê",
-                                 "߿߿߿߿߿",
-                                 "ࠀࠁ",
-                                 "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
-                                 "𐀀𐀁",
-                                 "\xf4\x8f\xbf\xbf\xf4\x90\x80\x80",
-                                 "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
+  std::array truncated_min{"yyyyyyyy",
+                           "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x7f",
+                           "\xf7\xbf\xbf\xbf",
+                           "éééé",
+                           "߿߿߿߿",
+                           "ࠀࠀ",
+                           "\xef\xbf\xbf\xef\xbf\xbf",
+                           "𐀀𐀀",
+                           "\xf4\x8f\xbf\xbf\xf4\x8f\xbf\xbf",
+                           "\xf7\xbf\xbf\xbf"};
+
+  std::array truncated_max{"yyyyyyyz",
+                           "\x7f\x7f\x7f\x7f\x7f\x7f\x7f\x80",
+                           "\xff\xff\xff\xff\xff\xff\xff\xff\xff",
+                           "éééê",
+                           "߿߿߿߿߿",
+                           "ࠀࠁ",
+                           "\xef\xbf\xbf\xef\xbf\xbf\xef\xbf\xbf",
+                           "𐀀𐀁",
+                           "\xf4\x8f\xbf\xbf\xf4\x90\x80\x80",
+                           "\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf\xf7\xbf\xbf\xbf"};
 
   auto cols = [&]() {
     using string_wrapper = column_wrapper<cudf::string_view>;
diff --git a/cpp/tests/io/text/multibyte_split_test.cpp b/cpp/tests/io/text/multibyte_split_test.cpp
index 408d54bd5ff..74d08061df9 100644
--- a/cpp/tests/io/text/multibyte_split_test.cpp
+++ b/cpp/tests/io/text/multibyte_split_test.cpp
@@ -145,7 +145,7 @@ TEST_F(MultibyteSplitTest, LargeInput)
 
   for (auto i = 0; i < (2 * 32 * 128 * 1024); i++) {
     host_input += "...:|";
-    host_expected.emplace_back(std::string("...:|"));
+    host_expected.emplace_back("...:|");
   }
 
   auto expected = strings_column_wrapper{host_expected.begin(), host_expected.end()};
diff --git a/cpp/tests/join/mixed_join_tests.cu b/cpp/tests/join/mixed_join_tests.cu
index 6c147c8a128..9041969bec7 100644
--- a/cpp/tests/join/mixed_join_tests.cu
+++ b/cpp/tests/join/mixed_join_tests.cu
@@ -778,6 +778,138 @@ TYPED_TEST(MixedLeftSemiJoinTest, BasicEquality)
              {1});
 }
 
+TYPED_TEST(MixedLeftSemiJoinTest, MixedLeftSemiJoinGatherMap)
+{
+  auto const col_ref_left_1  = cudf::ast::column_reference(0, cudf::ast::table_reference::LEFT);
+  auto const col_ref_right_1 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT);
+  auto left_one_greater_right_one =
+    cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref_left_1, col_ref_right_1);
+
+  this->test({{2, 3, 9, 0, 1, 7, 4, 6, 5, 8}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}},
+             {{6, 5, 9, 8, 10, 32}, {0, 1, 2, 3, 4, 5}, {7, 8, 9, 0, 1, 2}},
+             {0},
+             {1},
+             left_one_greater_right_one,
+             {2, 7, 8});
+}
+
+TYPED_TEST(MixedLeftSemiJoinTest, MixedLeftSemiJoinGatherMapLarge)
+{
+  using T1 = double;
+
+  // Number of rows in each column
+  auto constexpr N = 10000;
+
+  // Generate column data for left and right tables
+  auto const [left_col0, right_col0] = gen_random_nullable_repeated_columns<T1>(N, 200);
+  auto const [left_col1, right_col1] = gen_random_nullable_repeated_columns<T1>(N, 100);
+
+  // Setup data and nulls for the left table
+  std::vector<std::pair<std::vector<T1>, std::vector<bool>>> lefts = {
+    {left_col0.first, left_col0.second}, {left_col1.first, left_col1.second}};
+  std::vector<cudf::test::fixed_width_column_wrapper<T1>> left_wrappers;
+  std::vector<cudf::column_view> left_columns;
+  for (auto [data, valids] : lefts) {
+    left_wrappers.emplace_back(
+      cudf::test::fixed_width_column_wrapper<T1>(data.begin(), data.end(), valids.begin()));
+    left_columns.emplace_back(left_wrappers.back());
+  };
+
+  // Setup data and nulls for the right table
+  std::vector<std::pair<std::vector<T1>, std::vector<bool>>> rights = {
+    {right_col0.first, right_col0.second}, {right_col1.first, right_col1.second}};
+  std::vector<cudf::test::fixed_width_column_wrapper<T1>> right_wrappers;
+  std::vector<cudf::column_view> right_columns;
+  for (auto [data, valids] : rights) {
+    right_wrappers.emplace_back(
+      cudf::test::fixed_width_column_wrapper<T1>(data.begin(), data.end(), valids.begin()));
+    right_columns.emplace_back(left_wrappers.back());
+  };
+
+  // Left and right table views.
+  auto const left_table  = cudf::table_view{left_columns};
+  auto const right_table = cudf::table_view{right_columns};
+
+  // Using the zeroth column for equality.
+  auto const left_equality  = left_table.select({0});
+  auto const right_equality = right_table.select({0});
+
+  // Column references for equality column.
+  auto const col_ref_left_0  = cudf::ast::column_reference(0, cudf::ast::table_reference::LEFT);
+  auto const col_ref_right_0 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT);
+  auto left_zero_eq_right_zero =
+    cudf::ast::operation(cudf::ast::ast_operator::EQUAL, col_ref_left_0, col_ref_right_0);
+
+  // Mixed semi join with zeroth column equality
+  {
+    // Expected left_semi_join result
+    auto const expected_mixed_semi_join =
+      cudf::conditional_left_semi_join(left_table, right_table, left_zero_eq_right_zero);
+
+    // Actual mixed_left_semi_join result
+    auto const mixed_semi_join = cudf::mixed_left_semi_join(left_equality,
+                                                            right_equality,
+                                                            left_table,
+                                                            right_table,
+                                                            left_zero_eq_right_zero,
+                                                            cudf::null_equality::UNEQUAL);
+
+    // Copy data back to host for comparisons
+    auto expected_indices = cudf::detail::make_std_vector_async<int32_t>(
+      cudf::device_span<int32_t>(*expected_mixed_semi_join), cudf::get_default_stream());
+    auto result_indices = cudf::detail::make_std_vector_sync<int32_t>(
+      cudf::device_span<int32_t>(*mixed_semi_join), cudf::get_default_stream());
+
+    // Sort the indices for 1-1 comparison
+    std::sort(expected_indices.begin(), expected_indices.end());
+    std::sort(result_indices.begin(), result_indices.end());
+
+    // Expected and actual vectors must match.
+    EXPECT_EQ(expected_mixed_semi_join->size(), mixed_semi_join->size());
+    EXPECT_TRUE(
+      std::equal(expected_indices.begin(), expected_indices.end(), result_indices.begin()));
+  }
+
+  // Mixed semi join with zeroth column equality and first column GREATER conditional
+  {
+    // Column references for conditional column.
+    auto const col_ref_left_1  = cudf::ast::column_reference(1, cudf::ast::table_reference::LEFT);
+    auto const col_ref_right_1 = cudf::ast::column_reference(1, cudf::ast::table_reference::RIGHT);
+    auto left_one_gt_right_one =
+      cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref_left_1, col_ref_right_1);
+
+    // Expected left_semi_join result
+    auto const expected_mixed_semi_join = cudf::conditional_left_semi_join(
+      left_table,
+      right_table,
+      cudf::ast::operation(
+        cudf::ast::ast_operator::LOGICAL_AND, left_zero_eq_right_zero, left_one_gt_right_one));
+
+    // Actual left_semi_join result
+    auto const mixed_semi_join = cudf::mixed_left_semi_join(left_equality,
+                                                            right_equality,
+                                                            left_table,
+                                                            right_table,
+                                                            left_one_gt_right_one,
+                                                            cudf::null_equality::UNEQUAL);
+
+    // Copy data back to host for comparisons
+    auto expected_indices = cudf::detail::make_std_vector_async<int32_t>(
+      cudf::device_span<int32_t>(*expected_mixed_semi_join), cudf::get_default_stream());
+    auto result_indices = cudf::detail::make_std_vector_sync<int32_t>(
+      cudf::device_span<int32_t>(*mixed_semi_join), cudf::get_default_stream());
+
+    // Sort the indices for 1-1 comparison
+    std::sort(expected_indices.begin(), expected_indices.end());
+    std::sort(result_indices.begin(), result_indices.end());
+
+    // Expected and actual vectors must match.
+    EXPECT_EQ(expected_mixed_semi_join->size(), mixed_semi_join->size());
+    EXPECT_TRUE(
+      std::equal(expected_indices.begin(), expected_indices.end(), result_indices.begin()));
+  }
+}
+
 TYPED_TEST(MixedLeftSemiJoinTest, BasicEqualityDuplicates)
 {
   this->test({{0, 1, 2, 1}, {3, 4, 5, 6}, {10, 20, 30, 40}},
@@ -900,3 +1032,18 @@ TYPED_TEST(MixedLeftAntiJoinTest, AsymmetricLeftLargerEquality)
              left_zero_eq_right_zero,
              {0, 1, 3});
 }
+
+TYPED_TEST(MixedLeftAntiJoinTest, MixedLeftAntiJoinGatherMap)
+{
+  auto const col_ref_left_1  = cudf::ast::column_reference(0, cudf::ast::table_reference::LEFT);
+  auto const col_ref_right_1 = cudf::ast::column_reference(0, cudf::ast::table_reference::RIGHT);
+  auto left_one_greater_right_one =
+    cudf::ast::operation(cudf::ast::ast_operator::GREATER, col_ref_left_1, col_ref_right_1);
+
+  this->test({{2, 3, 9, 0, 1, 7, 4, 6, 5, 8}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 0}},
+             {{6, 5, 9, 8, 10, 32}, {0, 1, 2, 3, 4, 5}, {7, 8, 9, 0, 1, 2}},
+             {0},
+             {1},
+             left_one_greater_right_one,
+             {0, 1, 3, 4, 5, 6, 9});
+}
diff --git a/cpp/tests/json/json_tests.cpp b/cpp/tests/json/json_tests.cpp
index a9186874e83..42a574ac5c0 100644
--- a/cpp/tests/json/json_tests.cpp
+++ b/cpp/tests/json/json_tests.cpp
@@ -652,7 +652,7 @@ TEST_F(JsonPathTests, MixedOutput)
   // various queries on:
   // clang-format off
   std::vector<std::string> input_strings {
-    "{\"a\": {\"b\" : \"c\"}}",
+    R"({"a": {"b" : "c"}})",
 
     "{"
       "\"a\": {\"b\" : \"c\"},"
@@ -827,7 +827,7 @@ TEST_F(JsonPathTests, AllowSingleQuotes)
   // various queries on:
   std::vector<std::string> input_strings{
     // clang-format off
-    "{\'a\': {\'b\' : \'c\'}}",
+    R"({'a': {'b' : 'c'}})",
 
     "{"
       "\'a\': {\'b\' : \"c\"},"
@@ -902,7 +902,7 @@ TEST_F(JsonPathTests, StringsWithSpecialChars)
   {
     std::vector<std::string> input_strings{
       // clang-format off
-      "{\"item\" : [{\"key\" : \"value[\"}]}",
+      R"({"item" : [{"key" : "value["}]})",
       // clang-format on
     };
 
@@ -927,7 +927,7 @@ TEST_F(JsonPathTests, StringsWithSpecialChars)
   {
     std::vector<std::string> input_strings{
       // clang-format off
-      "{\"a\" : \"[}{}][][{[\\\"}}[\\\"]\"}",
+      R"({"a" : "[}{}][][{[\"}}[\"]"})",
       // clang-format on
     };
 
@@ -958,8 +958,8 @@ TEST_F(JsonPathTests, EscapeSequences)
 
   std::vector<std::string> input_strings{
     // clang-format off
-    "{\"a\" : \"\\\" \\\\ \\/ \\b \\f \\n \\r \\t\"}",
-    "{\"a\" : \"\\u1248 \\uacdf \\uACDF \\u10EF\"}"
+    R"({"a" : "\" \\ \/ \b \f \n \r \t"})",
+    R"({"a" : "\u1248 \uacdf \uACDF \u10EF"})"
     // clang-format on
   };
 
diff --git a/cpp/tests/large_strings/json_tests.cu b/cpp/tests/large_strings/json_tests.cu
index 80bde168b75..a212d7d654a 100644
--- a/cpp/tests/large_strings/json_tests.cu
+++ b/cpp/tests/large_strings/json_tests.cu
@@ -96,5 +96,5 @@ TEST_F(JsonLargeReaderTest, MultiBatch)
   }
 
   // go back to normal batch_size
-  unsetenv("LIBCUDF_LARGE_STRINGS_THRESHOLD");
+  unsetenv("LIBCUDF_JSON_BATCH_SIZE");
 }
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 949ffcc26a6..1e9e13ded93 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -35,7 +35,6 @@
 
 #include <thrust/iterator/counting_iterator.h>
 
-#include <iostream>
 #include <vector>
 
 using aggregation        = cudf::aggregation;
@@ -1254,7 +1253,7 @@ struct StringReductionTest : public cudf::test::BaseFixture,
 };
 
 // ------------------------------------------------------------------------
-std::vector<std::string> string_list[] = {
+std::vector<std::vector<std::string>> string_list{{
   {"one", "two", "three", "four", "five", "six", "seven", "eight", "nine"},
   {"", "two", "three", "four", "five", "six", "seven", "eight", "nine"},
   {"one", "", "three", "four", "five", "six", "seven", "eight", "nine"},
@@ -1264,7 +1263,7 @@ std::vector<std::string> string_list[] = {
   {"\xF7\xBF\xBF\xBF", "", "", "", "", "", "", "", ""},
   {"one", "two", "three", "four", "\xF7\xBF\xBF\xBF", "six", "seven", "eight", "nine"},
   {"one", "two", "\xF7\xBF\xBF\xBF", "four", "five", "six", "seven", "eight", "nine"},
-};
+}};
 INSTANTIATE_TEST_CASE_P(string_cases, StringReductionTest, testing::ValuesIn(string_list));
 TEST_P(StringReductionTest, MinMax)
 {
@@ -2235,7 +2234,7 @@ TYPED_TEST(ReductionTest, NthElement)
 
 struct DictionaryStringReductionTest : public StringReductionTest {};
 
-std::vector<std::string> data_list[] = {
+std::vector<std::vector<std::string>> data_list = {
   {"nine", "two", "five", "three", "five", "six", "two", "eight", "nine"},
 };
 INSTANTIATE_TEST_CASE_P(dictionary_cases,
diff --git a/cpp/tests/reductions/scan_tests.cpp b/cpp/tests/reductions/scan_tests.cpp
index 76dbbaef491..c4463d68a68 100644
--- a/cpp/tests/reductions/scan_tests.cpp
+++ b/cpp/tests/reductions/scan_tests.cpp
@@ -415,8 +415,8 @@ TEST_F(ScanStringsTest, MoreStringsMinMax)
   int row_count = 512;
 
   auto data_begin = cudf::detail::make_counting_transform_iterator(0, [](auto idx) {
-    char const s[] = {static_cast<char>('a' + (idx % 26)), 0};
-    return std::string(s);
+    char const s = static_cast<char>('a' + (idx % 26));
+    return std::string{1, s};
   });
   auto validity   = cudf::detail::make_counting_transform_iterator(
     0, [](auto idx) -> bool { return (idx % 23) != 22; });
diff --git a/cpp/tests/rolling/nth_element_test.cpp b/cpp/tests/rolling/nth_element_test.cpp
index 9cc8b6dec81..2444992e68f 100644
--- a/cpp/tests/rolling/nth_element_test.cpp
+++ b/cpp/tests/rolling/nth_element_test.cpp
@@ -83,7 +83,7 @@ class rolling_exec {
     return *this;
   }
 
-  std::unique_ptr<cudf::column> test_grouped_nth_element(
+  [[nodiscard]] std::unique_ptr<cudf::column> test_grouped_nth_element(
     cudf::size_type n, std::optional<cudf::null_policy> null_handling = std::nullopt) const
   {
     return cudf::grouped_rolling_window(
@@ -96,7 +96,7 @@ class rolling_exec {
         n, null_handling.value_or(_null_handling)));
   }
 
-  std::unique_ptr<cudf::column> test_nth_element(
+  [[nodiscard]] std::unique_ptr<cudf::column> test_nth_element(
     cudf::size_type n, std::optional<cudf::null_policy> null_handling = std::nullopt) const
   {
     return cudf::rolling_window(_input,
diff --git a/cpp/tests/streams/transform_test.cpp b/cpp/tests/streams/transform_test.cpp
index 9187672221c..cf81dc6fb42 100644
--- a/cpp/tests/streams/transform_test.cpp
+++ b/cpp/tests/streams/transform_test.cpp
@@ -32,7 +32,7 @@
 class TransformTest : public cudf::test::BaseFixture {};
 
 template <class dtype, class Data>
-void test_udf(char const udf[], Data data_init, cudf::size_type size, bool is_ptx)
+void test_udf(char const* udf, Data data_init, cudf::size_type size, bool is_ptx)
 {
   auto all_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
   auto data_iter = cudf::detail::make_counting_transform_iterator(0, data_init);
diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp
index 7e530b2a34d..5923f8dee5a 100644
--- a/cpp/tests/strings/chars_types_tests.cpp
+++ b/cpp/tests/strings/chars_types_tests.cpp
@@ -24,6 +24,7 @@
 
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <vector>
 
 struct StringsCharsTest : public cudf::test::BaseFixture {};
@@ -50,20 +51,20 @@ TEST_P(CharsTypes, AllTypes)
                                      "de",
                                      "\t\r\n\f "};
 
-  bool expecteds[] = {false, false, false, false, false, false, false, false,
-                      false, false, false, false, false, true,  false, false,  // decimal
-                      false, false, false, false, false, false, false, false,
-                      false, true,  false, true,  false, true,  false, false,  // numeric
-                      false, false, false, false, false, false, false, false,
-                      false, false, false, true,  false, true,  false, false,  // digit
-                      true,  true,  false, true,  false, false, false, false,
-                      false, false, false, false, false, false, true,  false,  // alpha
-                      false, false, false, false, false, false, false, false,
-                      false, false, false, false, false, false, false, true,  // space
-                      false, false, false, true,  false, false, false, false,
-                      false, false, false, false, false, false, false, false,  // upper
-                      false, true,  false, false, false, false, false, false,
-                      false, false, false, false, false, false, true,  false};  // lower
+  std::array expecteds{false, false, false, false, false, false, false, false,
+                       false, false, false, false, false, true,  false, false,  // decimal
+                       false, false, false, false, false, false, false, false,
+                       false, true,  false, true,  false, true,  false, false,  // numeric
+                       false, false, false, false, false, false, false, false,
+                       false, false, false, true,  false, true,  false, false,  // digit
+                       true,  true,  false, true,  false, false, false, false,
+                       false, false, false, false, false, false, true,  false,  // alpha
+                       false, false, false, false, false, false, false, false,
+                       false, false, false, false, false, false, false, true,  // space
+                       false, false, false, true,  false, false, false, false,
+                       false, false, false, false, false, false, false, false,  // upper
+                       false, true,  false, false, false, false, false, false,
+                       false, false, false, false, false, false, true,  false};  // lower
 
   auto is_parm = GetParam();
 
diff --git a/cpp/tests/strings/contains_tests.cpp b/cpp/tests/strings/contains_tests.cpp
index acf850c7a66..bdfd38267e6 100644
--- a/cpp/tests/strings/contains_tests.cpp
+++ b/cpp/tests/strings/contains_tests.cpp
@@ -32,6 +32,7 @@
 #include <thrust/iterator/transform_iterator.h>
 
 #include <algorithm>
+#include <array>
 #include <vector>
 
 struct StringsContainsTests : public cudf::test::BaseFixture {};
@@ -167,10 +168,8 @@ TEST_F(StringsContainsTests, MatchesTest)
   auto strings_view = cudf::strings_column_view(strings);
   {
     auto const pattern = std::string("lazy");
-    bool h_expected[]  = {false, false, true, false, false, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
-      h_expected,
-      h_expected + h_strings.size(),
+      {false, false, true, false, false, false, false},
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     auto prog    = cudf::strings::regex_program::create(pattern);
     auto results = cudf::strings::matches_re(strings_view, *prog);
@@ -178,10 +177,8 @@ TEST_F(StringsContainsTests, MatchesTest)
   }
   {
     auto const pattern = std::string("\\d+");
-    bool h_expected[]  = {false, false, false, true, true, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
-      h_expected,
-      h_expected + h_strings.size(),
+      {false, false, false, true, true, false, false},
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     auto prog    = cudf::strings::regex_program::create(pattern);
     auto results = cudf::strings::matches_re(strings_view, *prog);
@@ -189,10 +186,8 @@ TEST_F(StringsContainsTests, MatchesTest)
   }
   {
     auto const pattern = std::string("@\\w+");
-    bool h_expected[]  = {false, false, false, false, false, false, false};
     cudf::test::fixed_width_column_wrapper<bool> expected(
-      h_expected,
-      h_expected + h_strings.size(),
+      {false, false, false, false, false, false, false},
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     auto prog    = cudf::strings::regex_program::create(pattern);
     auto results = cudf::strings::matches_re(strings_view, *prog);
@@ -200,10 +195,8 @@ TEST_F(StringsContainsTests, MatchesTest)
   }
   {
     auto const pattern = std::string(".*");
-    bool h_expected[]  = {true, true, true, true, true, false, true};
     cudf::test::fixed_width_column_wrapper<bool> expected(
-      h_expected,
-      h_expected + h_strings.size(),
+      {true, true, true, true, true, false, true},
       thrust::make_transform_iterator(h_strings.begin(), [](auto str) { return str != nullptr; }));
     auto prog    = cudf::strings::regex_program::create(pattern);
     auto results = cudf::strings::matches_re(strings_view, *prog);
@@ -335,9 +328,9 @@ TEST_F(StringsContainsTests, EmbeddedNullCharacter)
 {
   std::vector<std::string> data(10);
   std::generate(data.begin(), data.end(), [n = 0]() mutable {
-    char first      = static_cast<char>('A' + n++);
-    char raw_data[] = {first, '\0', 'B'};
-    return std::string{raw_data, 3};
+    char first          = static_cast<char>('A' + n++);
+    std::array raw_data = {first, '\0', 'B'};
+    return std::string{raw_data.data(), 3};
   });
   cudf::test::strings_column_wrapper input(data.begin(), data.end());
   auto strings_view = cudf::strings_column_view(input);
@@ -749,11 +742,11 @@ TEST_F(StringsContainsTests, ASCII)
   auto input = cudf::test::strings_column_wrapper({"abc \t\f\r 12", "áé 　❽❽", "aZ ❽4", "XYZ　8"});
   auto view = cudf::strings_column_view(input);
 
-  std::string patterns[] = {R"(\w+[\s]+\d+)",
-                            R"([^\W]+\s+[^\D]+)",
-                            R"([\w]+[^\S]+[\d]+)",
-                            R"([\w]+\s+[\d]+)",
-                            R"(\w+\s+\d+)"};
+  std::array patterns = {R"(\w+[\s]+\d+)",
+                         R"([^\W]+\s+[^\D]+)",
+                         R"([\w]+[^\S]+[\d]+)",
+                         R"([\w]+\s+[\d]+)",
+                         R"(\w+\s+\d+)"};
 
   for (auto ptn : patterns) {
     auto expected_contains = cudf::test::fixed_width_column_wrapper<bool>({1, 0, 0, 0});
@@ -787,24 +780,18 @@ TEST_F(StringsContainsTests, MediumRegex)
 
   auto strings_view = cudf::strings_column_view(strings);
   {
-    auto results      = cudf::strings::contains_re(strings_view, *prog);
-    bool h_expected[] = {true, false, false};
-    cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
-                                                          h_expected + h_strings.size());
+    auto results = cudf::strings::contains_re(strings_view, *prog);
+    cudf::test::fixed_width_column_wrapper<bool> expected({true, false, false});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results      = cudf::strings::matches_re(strings_view, *prog);
-    bool h_expected[] = {true, false, false};
-    cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
-                                                          h_expected + h_strings.size());
+    auto results = cudf::strings::matches_re(strings_view, *prog);
+    cudf::test::fixed_width_column_wrapper<bool> expected({true, false, false});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results         = cudf::strings::count_re(strings_view, *prog);
-    int32_t h_expected[] = {1, 0, 0};
-    cudf::test::fixed_width_column_wrapper<int32_t> expected(h_expected,
-                                                             h_expected + h_strings.size());
+    auto results = cudf::strings::count_re(strings_view, *prog);
+    cudf::test::fixed_width_column_wrapper<int32_t> expected({1, 0, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
 }
@@ -828,24 +815,18 @@ TEST_F(StringsContainsTests, LargeRegex)
 
   auto strings_view = cudf::strings_column_view(strings);
   {
-    auto results      = cudf::strings::contains_re(strings_view, *prog);
-    bool h_expected[] = {true, false, false};
-    cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
-                                                          h_expected + h_strings.size());
+    auto results = cudf::strings::contains_re(strings_view, *prog);
+    cudf::test::fixed_width_column_wrapper<bool> expected({true, false, false});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results      = cudf::strings::matches_re(strings_view, *prog);
-    bool h_expected[] = {true, false, false};
-    cudf::test::fixed_width_column_wrapper<bool> expected(h_expected,
-                                                          h_expected + h_strings.size());
+    auto results = cudf::strings::matches_re(strings_view, *prog);
+    cudf::test::fixed_width_column_wrapper<bool> expected({true, false, false});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
   {
-    auto results         = cudf::strings::count_re(strings_view, *prog);
-    int32_t h_expected[] = {1, 0, 0};
-    cudf::test::fixed_width_column_wrapper<int32_t> expected(h_expected,
-                                                             h_expected + h_strings.size());
+    auto results = cudf::strings::count_re(strings_view, *prog);
+    cudf::test::fixed_width_column_wrapper<int32_t> expected({1, 0, 0});
     CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(*results, expected);
   }
 }
diff --git a/cpp/tests/strings/durations_tests.cpp b/cpp/tests/strings/durations_tests.cpp
index 86189b29981..f2e31339035 100644
--- a/cpp/tests/strings/durations_tests.cpp
+++ b/cpp/tests/strings/durations_tests.cpp
@@ -24,6 +24,7 @@
 
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <vector>
 
 struct StringsDurationsTest : public cudf::test::BaseFixture {};
@@ -403,17 +404,17 @@ TEST_F(StringsDurationsTest, ParseSingle)
                                                 "01",
                                                 ""};  // error
   auto size = cudf::column_view(string_src).size();
-  int32_t expected_v[]{0, 0, 1, -1, 23, -23, 59, -59, 99, -99, 0, 1, 0};
-  auto it1 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_s{i * 3600}; });
+  std::array expected_v{0, 0, 1, -1, 23, -23, 59, -59, 99, -99, 0, 1, 0};
+  auto it1 = thrust::make_transform_iterator(expected_v.data(),
+                                             [](auto i) { return cudf::duration_s{i * 3600}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_s> expected_s1(it1, it1 + size);
   auto results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                              cudf::data_type(cudf::type_to_id<cudf::duration_s>()),
                                              "%H");
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_s1);
 
-  auto it2 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_s{i * 60}; });
+  auto it2 = thrust::make_transform_iterator(expected_v.data(),
+                                             [](auto i) { return cudf::duration_s{i * 60}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_s> expected_s2(it2, it2 + size);
   results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                         cudf::data_type(cudf::type_to_id<cudf::duration_s>()),
@@ -421,14 +422,14 @@ TEST_F(StringsDurationsTest, ParseSingle)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_s2);
 
   auto it3 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_s{i}; });
+    thrust::make_transform_iterator(expected_v.data(), [](auto i) { return cudf::duration_s{i}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_s> expected_s3(it3, it3 + size);
   results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                         cudf::data_type(cudf::type_to_id<cudf::duration_s>()),
                                         "%S");
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_s3);
 
-  auto it4 = thrust::make_transform_iterator(expected_v,
+  auto it4 = thrust::make_transform_iterator(expected_v.data(),
                                              [](auto i) { return cudf::duration_ms{i * 60000}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_ms> expected_ms(it4, it4 + size);
   results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
@@ -454,21 +455,21 @@ TEST_F(StringsDurationsTest, ParseMultiple)
                                                 "01:01:01",
                                                 ""};  // error
   auto size = cudf::column_view(string_src).size();
-  int32_t expected_v[]{0,
-                       0,
-                       -1,
-                       -(3600 + 60 + 1),
-                       23 * 3600 + 1,
-                       -(23 * 3600 + 1),
-                       59 * 3600,
-                       -59 * 3600,
-                       99 * 3600,
-                       -99 * 3600,
-                       0,
-                       3661,
-                       0};
+  std::array expected_v{0,
+                        0,
+                        -1,
+                        -(3600 + 60 + 1),
+                        23 * 3600 + 1,
+                        -(23 * 3600 + 1),
+                        59 * 3600,
+                        -59 * 3600,
+                        99 * 3600,
+                        -99 * 3600,
+                        0,
+                        3661,
+                        0};
   auto it1 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_s{i}; });
+    thrust::make_transform_iterator(expected_v.data(), [](auto i) { return cudf::duration_s{i}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_s> expected_s1(it1, it1 + size);
   auto results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                              cudf::data_type(cudf::type_to_id<cudf::duration_s>()),
@@ -476,7 +477,7 @@ TEST_F(StringsDurationsTest, ParseMultiple)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_s1);
 
   auto it2 = thrust::make_transform_iterator(
-    expected_v, [](auto i) { return cudf::duration_D{i / (24 * 3600)}; });
+    expected_v.data(), [](auto i) { return cudf::duration_D{i / (24 * 3600)}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_D> expected_D2(it2, it2 + size);
   results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                         cudf::data_type(cudf::type_to_id<cudf::duration_D>()),
@@ -508,28 +509,28 @@ TEST_F(StringsDurationsTest, ParseSubsecond)
                                                 "01:01:01",
                                                 ""};  // error
   auto size = cudf::column_view(string_src).size();
-  int64_t expected_v[]{0,
-                       -123456789L,
-                       -1000666999L,
-                       -((3600 + 60 + 1) * 1000000000L + 100000000L),
-                       (23 * 3600 + 1) * 1000000000L + 80L,
-                       -((23 * 3600 + 1) * 1000000000L + 123000000L),
-                       (59 * 3600) * 1000000000L,
-                       -(59 * 3600) * 1000000000L,
-                       (99 * 3600) * 1000000000L,
-                       -(99 * 3600) * 1000000000L,
-                       0,
-                       (3661) * 1000000000L,
-                       0};
+  std::array<int64_t, 13> expected_v{0,
+                                     -123456789L,
+                                     -1000666999L,
+                                     -((3600 + 60 + 1) * 1000000000L + 100000000L),
+                                     (23 * 3600 + 1) * 1000000000L + 80L,
+                                     -((23 * 3600 + 1) * 1000000000L + 123000000L),
+                                     (59 * 3600) * 1000000000L,
+                                     -(59 * 3600) * 1000000000L,
+                                     (99 * 3600) * 1000000000L,
+                                     -(99 * 3600) * 1000000000L,
+                                     0,
+                                     (3661) * 1000000000L,
+                                     0};
   auto it1 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_ns{i}; });
+    thrust::make_transform_iterator(expected_v.data(), [](auto i) { return cudf::duration_ns{i}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_ns> expected_ns1(it1, it1 + size);
   auto results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                              cudf::data_type(cudf::type_to_id<cudf::duration_ns>()),
                                              "%H:%M:%S");
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_ns1);
 
-  auto it2 = thrust::make_transform_iterator(expected_v,
+  auto it2 = thrust::make_transform_iterator(expected_v.data(),
                                              [](auto i) { return cudf::duration_ms{i / 1000000}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_ms> expected_ms2(it2, it2 + size);
   results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
@@ -559,25 +560,25 @@ TEST_F(StringsDurationsTest, ParseAMPM)
                                                 "01:01:01",   // error
                                                 ""};          // error
   auto size = cudf::column_view(string_src).size();
-  int32_t expected_v[]{0,
-                       0 + 12 * 3600,
-                       0,
-                       0 - 12 * 3600,
-                       -1,
-                       -1 - 12 * 3600,
-                       -(3600 + 60 + 1),
-                       -(3600 + 60 + 1) - 12 * 3600,
-                       11 * 3600 + 59 * 60 + 59,
-                       11 * 3600 + 59 * 60 + 59 + 12 * 3600,
-                       -(11 * 3600 + 59 * 60 + 59),
-                       -(11 * 3600 + 59 * 60 + 59 + 12 * 3600),
-                       0,
-                       0,
-                       0,
-                       0,
-                       0};
+  std::array expected_v{0,
+                        0 + 12 * 3600,
+                        0,
+                        0 - 12 * 3600,
+                        -1,
+                        -1 - 12 * 3600,
+                        -(3600 + 60 + 1),
+                        -(3600 + 60 + 1) - 12 * 3600,
+                        11 * 3600 + 59 * 60 + 59,
+                        11 * 3600 + 59 * 60 + 59 + 12 * 3600,
+                        -(11 * 3600 + 59 * 60 + 59),
+                        -(11 * 3600 + 59 * 60 + 59 + 12 * 3600),
+                        0,
+                        0,
+                        0,
+                        0,
+                        0};
   auto it1 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_s{i}; });
+    thrust::make_transform_iterator(expected_v.data(), [](auto i) { return cudf::duration_s{i}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_s> expected_s1(it1, it1 + size);
   auto results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                              cudf::data_type(cudf::type_to_id<cudf::duration_s>()),
@@ -585,7 +586,7 @@ TEST_F(StringsDurationsTest, ParseAMPM)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_s1);
 
   auto it2 = thrust::make_transform_iterator(
-    expected_v, [](auto i) { return cudf::duration_D{i / (24 * 3600)}; });
+    expected_v.data(), [](auto i) { return cudf::duration_D{i / (24 * 3600)}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_D> expected_D2(it2, it2 + size);
   results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                         cudf::data_type(cudf::type_to_id<cudf::duration_D>()),
@@ -616,20 +617,20 @@ TEST_F(StringsDurationsTest, ParseCompoundSpecifier)
                                                 "01:01:01",  // error
                                                 ""};         // error
   auto size = cudf::column_view(string_src).size();
-  int32_t expected_v[]{0,
-                       0 + 12 * 3600,
-                       1,
-                       1 + 12 * 3600,
-                       (3600 + 60 + 1),
-                       (3600 + 60 + 1) + 12 * 3600,
-                       11 * 3600 + 59 * 60 + 59,
-                       11 * 3600 + 59 * 60 + 59 + 12 * 3600,
-                       0,
-                       0,
-                       0,
-                       0};
+  std::array expected_v{0,
+                        0 + 12 * 3600,
+                        1,
+                        1 + 12 * 3600,
+                        (3600 + 60 + 1),
+                        (3600 + 60 + 1) + 12 * 3600,
+                        11 * 3600 + 59 * 60 + 59,
+                        11 * 3600 + 59 * 60 + 59 + 12 * 3600,
+                        0,
+                        0,
+                        0,
+                        0};
   auto it1 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_s{i}; });
+    thrust::make_transform_iterator(expected_v.data(), [](auto i) { return cudf::duration_s{i}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_s> expected_s1(it1, it1 + size);
   auto results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                              cudf::data_type(cudf::type_to_id<cudf::duration_s>()),
@@ -641,8 +642,8 @@ TEST_F(StringsDurationsTest, ParseCompoundSpecifier)
                                         "%OI:%OM:%OS %p");
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*results, expected_s1);
 
-  auto it2 =
-    thrust::make_transform_iterator(expected_v, [](auto i) { return cudf::duration_ms{i * 1000}; });
+  auto it2 = thrust::make_transform_iterator(expected_v.data(),
+                                             [](auto i) { return cudf::duration_ms{i * 1000}; });
   cudf::test::fixed_width_column_wrapper<cudf::duration_ms> expected_s2(it2, it2 + size);
   results = cudf::strings::to_durations(cudf::strings_column_view(string_src),
                                         cudf::data_type(cudf::type_to_id<cudf::duration_ms>()),
diff --git a/cpp/tests/strings/extract_tests.cpp b/cpp/tests/strings/extract_tests.cpp
index 1491da758d5..61246fb098d 100644
--- a/cpp/tests/strings/extract_tests.cpp
+++ b/cpp/tests/strings/extract_tests.cpp
@@ -275,8 +275,8 @@ TEST_F(StringsExtractTests, ExtractAllTest)
 
   auto pattern = std::string("(\\d+) (\\w+)");
 
-  bool valids[] = {true, true, true, false, false, false, true};
-  using LCW     = cudf::test::lists_column_wrapper<cudf::string_view>;
+  std::array valids{true, true, true, false, false, false, true};
+  using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
   LCW expected({LCW{"123", "banana", "7", "eleven"},
                 LCW{"41", "apple"},
                 LCW{"6", "péar", "0", "pair"},
@@ -284,7 +284,7 @@ TEST_F(StringsExtractTests, ExtractAllTest)
                 LCW{},
                 LCW{},
                 LCW{"4", "paré"}},
-               valids);
+               valids.data());
   auto prog    = cudf::strings::regex_program::create(pattern);
   auto results = cudf::strings::extract_all_record(sv, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
diff --git a/cpp/tests/strings/findall_tests.cpp b/cpp/tests/strings/findall_tests.cpp
index 47606b9b3ed..73da4d081e2 100644
--- a/cpp/tests/strings/findall_tests.cpp
+++ b/cpp/tests/strings/findall_tests.cpp
@@ -33,10 +33,10 @@ struct StringsFindallTests : public cudf::test::BaseFixture {};
 
 TEST_F(StringsFindallTests, FindallTest)
 {
-  bool valids[] = {true, true, true, true, true, false, true, true};
+  std::array valids{true, true, true, true, true, false, true, true};
   cudf::test::strings_column_wrapper input(
     {"3-A", "4-May 5-Day 6-Hay", "12-Dec-2021-Jan", "Feb-March", "4 ABC", "", "", "25-9000-Hal"},
-    valids);
+    valids.data());
   auto sv = cudf::strings_column_view(input);
 
   auto pattern = std::string("(\\d+)-(\\w+)");
@@ -50,7 +50,7 @@ TEST_F(StringsFindallTests, FindallTest)
                 LCW{},
                 LCW{},
                 LCW{"25-9000"}},
-               valids);
+               valids.data());
   auto prog    = cudf::strings::regex_program::create(pattern);
   auto results = cudf::strings::findall(sv, *prog);
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
@@ -148,3 +148,31 @@ TEST_F(StringsFindallTests, LargeRegex)
   LCW expected({LCW{large_regex.c_str()}, LCW{}, LCW{}});
   CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
 }
+
+TEST_F(StringsFindallTests, NoMatches)
+{
+  cudf::test::strings_column_wrapper input({"abc\nfff\nabc", "fff\nabc\nlll", "abc", "", "abc\n"});
+  auto sv = cudf::strings_column_view(input);
+
+  auto pattern = std::string("(^zzz$)");
+  using LCW    = cudf::test::lists_column_wrapper<cudf::string_view>;
+  LCW expected({LCW{}, LCW{}, LCW{}, LCW{}, LCW{}});
+  auto prog    = cudf::strings::regex_program::create(pattern);
+  auto results = cudf::strings::findall(sv, *prog);
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+}
+
+TEST_F(StringsFindallTests, EmptyTest)
+{
+  std::string pattern = R"(\w+)";
+
+  auto prog = cudf::strings::regex_program::create(pattern);
+
+  cudf::test::strings_column_wrapper input;
+  auto sv      = cudf::strings_column_view(input);
+  auto results = cudf::strings::findall(sv, *prog);
+
+  using LCW = cudf::test::lists_column_wrapper<cudf::string_view>;
+  LCW expected;
+  CUDF_TEST_EXPECT_COLUMNS_EQUIVALENT(results->view(), expected);
+}
diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp
index 5fa02d9978a..1785848ec77 100644
--- a/cpp/tests/transform/integration/unary_transform_test.cpp
+++ b/cpp/tests/transform/integration/unary_transform_test.cpp
@@ -30,7 +30,7 @@ namespace transformation {
 struct UnaryOperationIntegrationTest : public cudf::test::BaseFixture {};
 
 template <class dtype, class Op, class Data>
-void test_udf(char const udf[], Op op, Data data_init, cudf::size_type size, bool is_ptx)
+void test_udf(char const* udf, Op op, Data data_init, cudf::size_type size, bool is_ptx)
 {
   auto all_valid = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; });
   auto data_iter = cudf::detail::make_counting_transform_iterator(0, data_init);
diff --git a/dependencies.yaml b/dependencies.yaml
index 911c443d294..ed36a23e5c3 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -25,6 +25,7 @@ files:
       - rapids_build_setuptools
       - run_common
       - run_cudf
+      - run_cudf_polars
       - run_pylibcudf
       - run_dask_cudf
       - run_custreamz
@@ -604,7 +605,7 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - cachetools
-          - &numba numba>=0.57
+          - &numba-cuda-dep numba-cuda>=0.0.13
           - nvtx>=0.2.1
           - packaging
           - rich
@@ -719,7 +720,7 @@ dependencies:
         matrices:
           - matrix: {dependencies: "oldest"}
             packages:
-              - numba==0.57.*
+              - *numba-cuda-dep
               - pandas==2.0.*
           - matrix:
             packages:
@@ -801,7 +802,7 @@ dependencies:
       - output_types: [conda, requirements, pyproject]
         packages:
           - dask-cuda==24.12.*,>=0.0.0a0
-          - *numba
+          - *numba-cuda-dep
     specific:
       - output_types: [conda, requirements]
         matrices:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index d6f8cd2a1ff..e21536e2e97 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -25,6 +25,7 @@ This page provides API documentation for pylibcudf.
     lists
     merge
     null_mask
+    partitioning
     quantiles
     reduce
     replace
@@ -38,6 +39,7 @@ This page provides API documentation for pylibcudf.
     table
     traits
     transform
+    transpose
     types
     unary
 
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst
index c8933981736..53638f071cc 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst
@@ -19,3 +19,4 @@ I/O Functions
     csv
     json
     parquet
+    timezone
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst
new file mode 100644
index 00000000000..20c1ffc2e93
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst
@@ -0,0 +1,6 @@
+========
+Timezone
+========
+
+.. automodule:: pylibcudf.io.timezone
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst
new file mode 100644
index 00000000000..6951dbecca0
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/partitioning.rst
@@ -0,0 +1,6 @@
+============
+partitioning
+============
+
+.. automodule:: pylibcudf.partitioning
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/transpose.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/transpose.rst
new file mode 100644
index 00000000000..6241295e770
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/transpose.rst
@@ -0,0 +1,6 @@
+=========
+transpose
+=========
+
+.. automodule:: pylibcudf.transpose
+   :members:
diff --git a/docs/dask_cudf/source/best_practices.rst b/docs/dask_cudf/source/best_practices.rst
index 6cd098da56d..41263ebf589 100644
--- a/docs/dask_cudf/source/best_practices.rst
+++ b/docs/dask_cudf/source/best_practices.rst
@@ -160,7 +160,7 @@ of the underlying task graph to materialize the collection.
 
 :func:`sort_values` / :func:`set_index` : These operations both require Dask to
 eagerly collect quantile information about the column(s) being targeted by the
-global sort operation. See `Avoid Sorting`__ for notes on sorting considerations.
+global sort operation. See the next section for notes on sorting considerations.
 
 .. note::
   When using :func:`set_index`, be sure to pass in ``sort=False`` whenever the
@@ -252,6 +252,15 @@ result in a simple 1-to-1 mapping between files and output partitions.
   correspond to a reasonable partition size, use ``blocksize=None``
   to avoid unnecessary metadata collection.
 
+.. note::
+  When reading from remote storage (e.g. S3 and GCS), performance will
+  likely improve with ``filesystem="arrow"``. When this option is set,
+  PyArrow will be used to perform IO on multiple CPU threads. Please be
+  aware that this feature is experimental, and behavior may change in
+  the future (without deprecation). Do not pass in ``blocksize`` or
+  ``aggregate_files`` when this feature is used. Instead, set the
+  ``"dataframe.parquet.minimum-partition-size"`` config to control
+  file aggregation.
 
 Use :func:`from_map`
 ~~~~~~~~~~~~~~~~~~~~
@@ -288,11 +297,14 @@ bottleneck is typically device-to-host memory spilling.
 Although every workflow is different, the following guidelines
 are often recommended:
 
-* `Use a distributed cluster with Dask-CUDA workers <Use Dask-CUDA>`_
-* `Use native cuDF spilling whenever possible <Enable cuDF Spilling>`_
+* Use a distributed cluster with `Dask-CUDA <https://docs.rapids.ai/api/dask-cuda/stable/>`__ workers
+
+* Use native cuDF spilling whenever possible (`Dask-CUDA spilling documentation <https://docs.rapids.ai/api/dask-cuda/stable/spilling/>`__)
+
 * Avoid shuffling whenever possible
-  * Use ``split_out=1`` for low-cardinality groupby aggregations
-  * Use ``broadcast=True`` for joins when at least one collection comprises a small number of partitions (e.g. ``<=5``)
+    * Use ``split_out=1`` for low-cardinality groupby aggregations
+    * Use ``broadcast=True`` for joins when at least one collection comprises a small number of partitions (e.g. ``<=5``)
+
 * `Use UCX <https://docs.rapids.ai/api/dask-cuda/nightly/examples/ucx/>`__ if communication is a bottleneck.
 
 .. note::
diff --git a/docs/dask_cudf/source/conf.py b/docs/dask_cudf/source/conf.py
index dc40254312e..5daa8245695 100644
--- a/docs/dask_cudf/source/conf.py
+++ b/docs/dask_cudf/source/conf.py
@@ -78,6 +78,7 @@
     "cudf": ("https://docs.rapids.ai/api/cudf/stable/", None),
     "dask": ("https://docs.dask.org/en/stable/", None),
     "pandas": ("https://pandas.pydata.org/docs/", None),
+    "dask-cuda": ("https://docs.rapids.ai/api/dask-cuda/stable/", None),
 }
 
 numpydoc_show_inherited_class_members = True
diff --git a/docs/dask_cudf/source/index.rst b/docs/dask_cudf/source/index.rst
index 23ca7e49753..c2891ebc15e 100644
--- a/docs/dask_cudf/source/index.rst
+++ b/docs/dask_cudf/source/index.rst
@@ -16,10 +16,9 @@ as the ``"cudf"`` dataframe backend for
   Neither Dask cuDF nor Dask DataFrame provide support for multi-GPU
   or multi-node execution on their own. You must also deploy a
   `dask.distributed <https://distributed.dask.org/en/stable/>`__ cluster
-  to leverage multiple GPUs. We strongly recommend using `Dask-CUDA
-  <https://docs.rapids.ai/api/dask-cuda/stable/>`__ to simplify the
-  setup of the cluster, taking advantage of all features of the GPU
-  and networking hardware.
+  to leverage multiple GPUs. We strongly recommend using :doc:`dask-cuda:index`
+  to simplify the setup of the cluster, taking advantage of all features
+  of the GPU and networking hardware.
 
 If you are familiar with Dask and `pandas <pandas.pydata.org>`__ or
 `cuDF <https://docs.rapids.ai/api/cudf/stable/>`__, then Dask cuDF
@@ -40,9 +39,10 @@ Using Dask cuDF
 The Dask DataFrame API (Recommended)
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-Simply use the `Dask configuration <dask:configuration>`__ system to
-set the ``"dataframe.backend"`` option to ``"cudf"``. From Python,
-this can be achieved like so::
+Simply use the `Dask configuration
+<https://docs.dask.org/en/stable/how-to/selecting-the-collection-backend.html>`__
+system to set the ``"dataframe.backend"`` option to ``"cudf"``.
+From Python, this can be achieved like so::
 
   import dask
 
@@ -160,7 +160,7 @@ out-of-core computing. This also means that the compute tasks can be
 executed in parallel over a multi-GPU cluster.
 
 In order to execute your Dask workflow on multiple GPUs, you will
-typically need to use `Dask-CUDA <https://docs.rapids.ai/api/dask-cuda/stable/>`__
+typically need to use :doc:`dask-cuda:index`
 to deploy distributed Dask cluster, and
 `Distributed <https://distributed.dask.org/en/stable/client.html>`__
 to define a client object. For example::
@@ -191,7 +191,7 @@ to define a client object. For example::
   <https://distributed.dask.org/en/stable/manage-computation.html>`__
   for more details.
 
-Please see the `Dask-CUDA <https://docs.rapids.ai/api/dask-cuda/stable/>`__
+Please see the :doc:`dask-cuda:index`
 documentation for more information about deploying GPU-aware clusters
 (including `best practices
 <https://docs.rapids.ai/api/dask-cuda/stable/examples/best-practices/>`__).
diff --git a/java/src/main/java/ai/rapids/cudf/JSONOptions.java b/java/src/main/java/ai/rapids/cudf/JSONOptions.java
index 2bb74c3e3b1..e41cc15712f 100644
--- a/java/src/main/java/ai/rapids/cudf/JSONOptions.java
+++ b/java/src/main/java/ai/rapids/cudf/JSONOptions.java
@@ -39,6 +39,7 @@ public final class JSONOptions extends ColumnFilterOptions {
   private final boolean allowNonNumericNumbers;
   private final boolean allowUnquotedControlChars;
   private final boolean cudfPruneSchema;
+  private final boolean experimental;
   private final byte lineDelimiter;
 
   private JSONOptions(Builder builder) {
@@ -55,6 +56,7 @@ private JSONOptions(Builder builder) {
     allowNonNumericNumbers = builder.allowNonNumericNumbers;
     allowUnquotedControlChars = builder.allowUnquotedControlChars;
     cudfPruneSchema = builder.cudfPruneSchema;
+    experimental = builder.experimental;
     lineDelimiter = builder.lineDelimiter;
   }
 
@@ -111,6 +113,10 @@ public boolean unquotedControlChars() {
     return allowUnquotedControlChars;
   }
 
+  public boolean experimental() {
+    return experimental;
+  }
+
   @Override
   String[] getIncludeColumnNames() {
     throw new UnsupportedOperationException("JSON reader didn't support column prune");
@@ -136,6 +142,7 @@ public static final class Builder  extends ColumnFilterOptions.Builder<JSONOptio
     private boolean keepQuotes = false;
 
     private boolean cudfPruneSchema = false;
+    private boolean experimental = false;
     private byte lineDelimiter = '\n';
 
     public Builder withCudfPruneSchema(boolean prune) {
@@ -159,6 +166,14 @@ public Builder withStrictValidation(boolean isAllowed) {
       return this;
     }
 
+    /**
+     * Should experimental features be enabled or not
+     */
+    public Builder withExperimental(boolean isAllowed) {
+      experimental = isAllowed;
+      return this;
+    }
+
     /**
      * Should leading zeros on numbers be allowed or not. Strict validation
      * must be enabled for this to have any effect.
diff --git a/java/src/main/java/ai/rapids/cudf/Table.java b/java/src/main/java/ai/rapids/cudf/Table.java
index 6d370ca27b2..dbee53640aa 100644
--- a/java/src/main/java/ai/rapids/cudf/Table.java
+++ b/java/src/main/java/ai/rapids/cudf/Table.java
@@ -260,6 +260,7 @@ private static native long readJSON(int[] numChildren, String[] columnNames,
                                         boolean allowNonNumericNumbers,
                                         boolean allowUnquotedControl,
                                         boolean pruneColumns,
+                                        boolean experimental,
                                         byte lineDelimiter) throws CudfException;
 
   private static native long readJSONFromDataSource(int[] numChildren, String[] columnNames,
@@ -275,6 +276,7 @@ private static native long readJSONFromDataSource(int[] numChildren, String[] co
                                       boolean allowNonNumericNumbers,
                                       boolean allowUnquotedControl,
                                       boolean pruneColumns,
+                                      boolean experimental,
                                       byte lineDelimiter,
                                       long dsHandle) throws CudfException;
 
@@ -288,6 +290,7 @@ private static native long readAndInferJSONFromDataSource(boolean dayFirst, bool
                                       boolean allowLeadingZeros,
                                       boolean allowNonNumericNumbers,
                                       boolean allowUnquotedControl,
+                                      boolean experimental,
                                       byte lineDelimiter,
                                       long dsHandle) throws CudfException;
 
@@ -303,6 +306,7 @@ private static native long readAndInferJSON(long address, long length,
                                               boolean allowLeadingZeros,
                                               boolean allowNonNumericNumbers,
                                               boolean allowUnquotedControl,
+                                              boolean experimental,
                                               byte lineDelimiter) throws CudfException;
 
   /**
@@ -1333,6 +1337,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, File path) {
                     opts.nonNumericNumbersAllowed(),
                     opts.unquotedControlChars(),
                     cudfPruneSchema,
+                    opts.experimental(),
                     opts.getLineDelimiter()))) {
 
       return gatherJSONColumns(schema, twm, -1);
@@ -1417,6 +1422,7 @@ public static TableWithMeta readJSON(JSONOptions opts, HostMemoryBuffer buffer,
         opts.leadingZerosAllowed(),
         opts.nonNumericNumbersAllowed(),
         opts.unquotedControlChars(),
+        opts.experimental(),
         opts.getLineDelimiter()));
   }
 
@@ -1439,6 +1445,7 @@ public static TableWithMeta readAndInferJSON(JSONOptions opts, DataSource ds) {
           opts.leadingZerosAllowed(),
           opts.nonNumericNumbersAllowed(),
           opts.unquotedControlChars(),
+          opts.experimental(),
           opts.getLineDelimiter(),
           dsHandle));
         return twm;
@@ -1499,6 +1506,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, HostMemoryBuffer b
             opts.nonNumericNumbersAllowed(),
             opts.unquotedControlChars(),
             cudfPruneSchema,
+            opts.experimental(),
             opts.getLineDelimiter()))) {
       return gatherJSONColumns(schema, twm, emptyRowCount);
     }
@@ -1543,6 +1551,7 @@ public static Table readJSON(Schema schema, JSONOptions opts, DataSource ds, int
         opts.nonNumericNumbersAllowed(),
         opts.unquotedControlChars(),
         cudfPruneSchema,
+        opts.experimental(),
         opts.getLineDelimiter(),
         dsHandle))) {
       return gatherJSONColumns(schema, twm, emptyRowCount);
diff --git a/java/src/main/native/src/TableJni.cpp b/java/src/main/native/src/TableJni.cpp
index 0f77da54152..0a667978ca3 100644
--- a/java/src/main/native/src/TableJni.cpp
+++ b/java/src/main/native/src/TableJni.cpp
@@ -1627,6 +1627,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env,
                                                          jboolean allow_leading_zeros,
                                                          jboolean allow_nonnumeric_numbers,
                                                          jboolean allow_unquoted_control,
+                                                         jboolean experimental,
                                                          jbyte line_delimiter,
                                                          jlong ds_handle)
 {
@@ -1649,6 +1650,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSONFromDataSource(JNIEnv* env,
         .mixed_types_as_string(mixed_types_as_string)
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
+        .experimental(experimental)
         .keep_quotes(keep_quotes)
         .prune_columns(false);
     if (strict_validation) {
@@ -1680,6 +1682,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env,
                                            jboolean allow_leading_zeros,
                                            jboolean allow_nonnumeric_numbers,
                                            jboolean allow_unquoted_control,
+                                           jboolean experimental,
                                            jbyte line_delimiter)
 {
   JNI_NULL_CHECK(env, buffer, "buffer cannot be null", 0);
@@ -1705,6 +1708,7 @@ Java_ai_rapids_cudf_Table_readAndInferJSON(JNIEnv* env,
         .strict_validation(strict_validation)
         .mixed_types_as_string(mixed_types_as_string)
         .prune_columns(false)
+        .experimental(experimental)
         .delimiter(static_cast<char>(line_delimiter))
         .keep_quotes(keep_quotes);
     if (strict_validation) {
@@ -1821,6 +1825,7 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
                                                  jboolean allow_nonnumeric_numbers,
                                                  jboolean allow_unquoted_control,
                                                  jboolean prune_columns,
+                                                 jboolean experimental,
                                                  jbyte line_delimiter,
                                                  jlong ds_handle)
 {
@@ -1859,7 +1864,8 @@ Java_ai_rapids_cudf_Table_readJSONFromDataSource(JNIEnv* env,
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
         .keep_quotes(keep_quotes)
-        .prune_columns(prune_columns);
+        .prune_columns(prune_columns)
+        .experimental(experimental);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
         .nonnumeric_numbers(allow_nonnumeric_numbers)
@@ -1920,6 +1926,7 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
                                                            jboolean allow_nonnumeric_numbers,
                                                            jboolean allow_unquoted_control,
                                                            jboolean prune_columns,
+                                                           jboolean experimental,
                                                            jbyte line_delimiter)
 {
   bool read_buffer = true;
@@ -1972,7 +1979,8 @@ JNIEXPORT jlong JNICALL Java_ai_rapids_cudf_Table_readJSON(JNIEnv* env,
         .delimiter(static_cast<char>(line_delimiter))
         .strict_validation(strict_validation)
         .keep_quotes(keep_quotes)
-        .prune_columns(prune_columns);
+        .prune_columns(prune_columns)
+        .experimental(experimental);
     if (strict_validation) {
       opts.numeric_leading_zeros(allow_leading_zeros)
         .nonnumeric_numbers(allow_nonnumeric_numbers)
diff --git a/python/cudf/cudf/_lib/aggregation.pyx b/python/cudf/cudf/_lib/aggregation.pyx
index 7c91533cf93..3c96b90f0a1 100644
--- a/python/cudf/cudf/_lib/aggregation.pyx
+++ b/python/cudf/cudf/_lib/aggregation.pyx
@@ -78,8 +78,11 @@ class Aggregation:
         )
 
     @classmethod
-    def nunique(cls):
-        return cls(pylibcudf.aggregation.nunique(pylibcudf.types.NullPolicy.EXCLUDE))
+    def nunique(cls, dropna=True):
+        return cls(pylibcudf.aggregation.nunique(
+            pylibcudf.types.NullPolicy.EXCLUDE
+            if dropna else pylibcudf.types.NullPolicy.INCLUDE
+        ))
 
     @classmethod
     def nth(cls, size):
diff --git a/python/cudf/cudf/_lib/hash.pyx b/python/cudf/cudf/_lib/hash.pyx
index 48f75b12a73..9b7ab0888d2 100644
--- a/python/cudf/cudf/_lib/hash.pyx
+++ b/python/cudf/cudf/_lib/hash.pyx
@@ -3,11 +3,8 @@
 from cudf.core.buffer import acquire_spill_lock
 
 from libcpp.memory cimport unique_ptr
-from libcpp.pair cimport pair
 from libcpp.utility cimport move
-from libcpp.vector cimport vector
 
-cimport pylibcudf.libcudf.types as libcudf_types
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.hash cimport (
     md5,
@@ -19,37 +16,23 @@ from pylibcudf.libcudf.hash cimport (
     sha512,
     xxhash_64,
 )
-from pylibcudf.libcudf.partitioning cimport (
-    hash_partition as cpp_hash_partition,
-)
-from pylibcudf.libcudf.table.table cimport table
 from pylibcudf.libcudf.table.table_view cimport table_view
 
 from cudf._lib.column cimport Column
-from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
+from cudf._lib.utils cimport table_view_from_columns
+
+import pylibcudf as plc
 
 
 @acquire_spill_lock()
-def hash_partition(list source_columns, object columns_to_hash,
+def hash_partition(list source_columns, list columns_to_hash,
                    int num_partitions):
-    cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
-    cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_columns(source_columns)
-
-    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
-    with nogil:
-        c_result = move(
-            cpp_hash_partition(
-                c_source_view,
-                c_columns_to_hash,
-                c_num_partitions
-            )
-        )
-
-    return (
-        columns_from_unique_ptr(move(c_result.first)),
-        list(c_result.second)
+    plc_table, offsets = plc.partitioning.hash_partition(
+        plc.Table([col.to_pylibcudf(mode="read") for col in source_columns]),
+        columns_to_hash,
+        num_partitions
     )
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()], offsets
 
 
 @acquire_spill_lock()
diff --git a/python/cudf/cudf/_lib/io/utils.pxd b/python/cudf/cudf/_lib/io/utils.pxd
index 1938f00c179..76a6e32fde0 100644
--- a/python/cudf/cudf/_lib/io/utils.pxd
+++ b/python/cudf/cudf/_lib/io/utils.pxd
@@ -21,6 +21,10 @@ cdef add_df_col_struct_names(
     df,
     child_names_dict
 )
+cdef update_col_struct_field_names(
+    Column col,
+    child_names
+)
 cdef update_struct_field_names(
     table,
     vector[column_name_info]& schema_info)
diff --git a/python/cudf/cudf/_lib/orc.pyx b/python/cudf/cudf/_lib/orc.pyx
index adeba6fffb1..f88c48ce989 100644
--- a/python/cudf/cudf/_lib/orc.pyx
+++ b/python/cudf/cudf/_lib/orc.pyx
@@ -1,8 +1,5 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-import cudf
-from cudf.core.buffer import acquire_spill_lock
-
 from libc.stdint cimport int64_t
 from libcpp cimport bool, int
 from libcpp.map cimport map
@@ -11,187 +8,43 @@ from libcpp.string cimport string
 from libcpp.utility cimport move
 from libcpp.vector cimport vector
 
-import datetime
 from collections import OrderedDict
 
-cimport pylibcudf.libcudf.lists.lists_column_view as cpp_lists_column_view
-
 try:
     import ujson as json
 except ImportError:
     import json
 
 cimport pylibcudf.libcudf.io.types as cudf_io_types
+cimport pylibcudf.libcudf.lists.lists_column_view as cpp_lists_column_view
 from pylibcudf.libcudf.io.data_sink cimport data_sink
 from pylibcudf.libcudf.io.orc cimport (
     chunked_orc_writer_options,
     orc_chunked_writer,
-    orc_reader_options,
     orc_writer_options,
-    read_orc as libcudf_read_orc,
     write_orc as libcudf_write_orc,
 )
-from pylibcudf.libcudf.io.orc_metadata cimport (
-    binary_statistics,
-    bucket_statistics,
-    column_statistics,
-    date_statistics,
-    decimal_statistics,
-    double_statistics,
-    integer_statistics,
-    no_statistics,
-    parsed_orc_statistics,
-    read_parsed_orc_statistics as libcudf_read_parsed_orc_statistics,
-    statistics_type,
-    string_statistics,
-    timestamp_statistics,
-)
 from pylibcudf.libcudf.io.types cimport (
     column_in_metadata,
     compression_type,
     sink_info,
-    source_info,
     table_input_metadata,
-    table_with_metadata,
 )
 from pylibcudf.libcudf.table.table_view cimport table_view
-from pylibcudf.libcudf.types cimport data_type, size_type, type_id
-from pylibcudf.variant cimport get_if as std_get_if, holds_alternative
 
 from cudf._lib.column cimport Column
-from cudf._lib.io.utils cimport (
-    make_sink_info,
-    make_source_info,
-    update_column_struct_field_names,
-)
+from cudf._lib.io.utils cimport make_sink_info, update_col_struct_field_names
+from cudf._lib.utils cimport data_from_pylibcudf_io, table_view_from_table
 
-from cudf._lib.types import SUPPORTED_NUMPY_TO_LIBCUDF_TYPES
-
-from cudf._lib.types cimport underlying_type_t_type_id
-from cudf._lib.utils cimport data_from_unique_ptr, table_view_from_table
+import pylibcudf as plc
 
+import cudf
+from cudf._lib.types import SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES
 from cudf._lib.utils import _index_level_name, generate_pandas_metadata
+from cudf.core.buffer import acquire_spill_lock
 
 
-cdef _parse_column_type_statistics(column_statistics stats):
-    # Initialize stats to return and parse stats blob
-    column_stats = {}
-
-    if stats.number_of_values.has_value():
-        column_stats["number_of_values"] = stats.number_of_values.value()
-
-    if stats.has_null.has_value():
-        column_stats["has_null"] = stats.has_null.value()
-
-    cdef statistics_type type_specific_stats = stats.type_specific_stats
-
-    cdef integer_statistics* int_stats
-    cdef double_statistics* dbl_stats
-    cdef string_statistics* str_stats
-    cdef bucket_statistics* bucket_stats
-    cdef decimal_statistics* dec_stats
-    cdef date_statistics* date_stats
-    cdef binary_statistics* bin_stats
-    cdef timestamp_statistics* ts_stats
-
-    if holds_alternative[no_statistics](type_specific_stats):
-        return column_stats
-    elif int_stats := std_get_if[integer_statistics](&type_specific_stats):
-        if int_stats.minimum.has_value():
-            column_stats["minimum"] = int_stats.minimum.value()
-        else:
-            column_stats["minimum"] = None
-        if int_stats.maximum.has_value():
-            column_stats["maximum"] = int_stats.maximum.value()
-        else:
-            column_stats["maximum"] = None
-        if int_stats.sum.has_value():
-            column_stats["sum"] = int_stats.sum.value()
-        else:
-            column_stats["sum"] = None
-    elif dbl_stats := std_get_if[double_statistics](&type_specific_stats):
-        if dbl_stats.minimum.has_value():
-            column_stats["minimum"] = dbl_stats.minimum.value()
-        else:
-            column_stats["minimum"] = None
-        if dbl_stats.maximum.has_value():
-            column_stats["maximum"] = dbl_stats.maximum.value()
-        else:
-            column_stats["maximum"] = None
-        if dbl_stats.sum.has_value():
-            column_stats["sum"] = dbl_stats.sum.value()
-        else:
-            column_stats["sum"] = None
-    elif str_stats := std_get_if[string_statistics](&type_specific_stats):
-        if str_stats.minimum.has_value():
-            column_stats["minimum"] = str_stats.minimum.value().decode("utf-8")
-        else:
-            column_stats["minimum"] = None
-        if str_stats.maximum.has_value():
-            column_stats["maximum"] = str_stats.maximum.value().decode("utf-8")
-        else:
-            column_stats["maximum"] = None
-        if str_stats.sum.has_value():
-            column_stats["sum"] = str_stats.sum.value()
-        else:
-            column_stats["sum"] = None
-    elif bucket_stats := std_get_if[bucket_statistics](&type_specific_stats):
-        column_stats["true_count"] = bucket_stats.count[0]
-        column_stats["false_count"] = (
-            column_stats["number_of_values"]
-            - column_stats["true_count"]
-        )
-    elif dec_stats := std_get_if[decimal_statistics](&type_specific_stats):
-        if dec_stats.minimum.has_value():
-            column_stats["minimum"] = dec_stats.minimum.value().decode("utf-8")
-        else:
-            column_stats["minimum"] = None
-        if dec_stats.maximum.has_value():
-            column_stats["maximum"] = dec_stats.maximum.value().decode("utf-8")
-        else:
-            column_stats["maximum"] = None
-        if dec_stats.sum.has_value():
-            column_stats["sum"] = dec_stats.sum.value().decode("utf-8")
-        else:
-            column_stats["sum"] = None
-    elif date_stats := std_get_if[date_statistics](&type_specific_stats):
-        if date_stats.minimum.has_value():
-            column_stats["minimum"] = datetime.datetime.fromtimestamp(
-                datetime.timedelta(date_stats.minimum.value()).total_seconds(),
-                datetime.timezone.utc,
-            )
-        else:
-            column_stats["minimum"] = None
-        if date_stats.maximum.has_value():
-            column_stats["maximum"] = datetime.datetime.fromtimestamp(
-                datetime.timedelta(date_stats.maximum.value()).total_seconds(),
-                datetime.timezone.utc,
-            )
-        else:
-            column_stats["maximum"] = None
-    elif bin_stats := std_get_if[binary_statistics](&type_specific_stats):
-        if bin_stats.sum.has_value():
-            column_stats["sum"] = bin_stats.sum.value()
-        else:
-            column_stats["sum"] = None
-    elif ts_stats := std_get_if[timestamp_statistics](&type_specific_stats):
-        # Before ORC-135, the local timezone offset was included and they were
-        # stored as minimum and maximum. After ORC-135, the timestamp is
-        # adjusted to UTC before being converted to milliseconds and stored
-        # in minimumUtc and maximumUtc.
-        # TODO: Support minimum and maximum by reading writer's local timezone
-        if ts_stats.minimum_utc.has_value() and ts_stats.maximum_utc.has_value():
-            column_stats["minimum"] = datetime.datetime.fromtimestamp(
-                ts_stats.minimum_utc.value() / 1000, datetime.timezone.utc
-            )
-            column_stats["maximum"] = datetime.datetime.fromtimestamp(
-                ts_stats.maximum_utc.value() / 1000, datetime.timezone.utc
-            )
-    else:
-        raise ValueError("Unsupported statistics type")
-    return column_stats
-
-
+# TODO: Consider inlining this function since it seems to only be used in one place.
 cpdef read_parsed_orc_statistics(filepath_or_buffer):
     """
     Cython function to call into libcudf API, see `read_parsed_orc_statistics`.
@@ -201,25 +54,13 @@ cpdef read_parsed_orc_statistics(filepath_or_buffer):
     cudf.io.orc.read_orc_statistics
     """
 
-    cdef parsed_orc_statistics parsed = (
-        libcudf_read_parsed_orc_statistics(make_source_info([filepath_or_buffer]))
+    parsed = (
+        plc.io.orc.read_parsed_orc_statistics(
+            plc.io.SourceInfo([filepath_or_buffer])
+        )
     )
 
-    cdef vector[column_statistics] file_stats = parsed.file_stats
-    cdef vector[vector[column_statistics]] stripes_stats = parsed.stripes_stats
-
-    parsed_file_stats = [
-        _parse_column_type_statistics(file_stats[column_index])
-        for column_index in range(file_stats.size())
-    ]
-
-    parsed_stripes_stats = [
-        [_parse_column_type_statistics(stripes_stats[stripe_index][column_index])
-         for column_index in range(stripes_stats[stripe_index].size())]
-        for stripe_index in range(stripes_stats.size())
-    ]
-
-    return parsed.column_names, parsed_file_stats, parsed_stripes_stats
+    return parsed.column_names, parsed.file_stats, parsed.stripes_stats
 
 
 cpdef read_orc(object filepaths_or_buffers,
@@ -235,36 +76,34 @@ cpdef read_orc(object filepaths_or_buffers,
     See Also
     --------
     cudf.read_orc
+
+    Notes
+    -----
+    Currently this function only considers the metadata of the first file in the list of
+    filepaths_or_buffers.
     """
-    cdef orc_reader_options c_orc_reader_options = make_orc_reader_options(
-        filepaths_or_buffers,
+
+    if columns is not None:
+        columns = [str(col) for col in columns]
+
+    tbl_w_meta = plc.io.orc.read_orc(
+        plc.io.SourceInfo(filepaths_or_buffers),
         columns,
-        stripes or [],
+        stripes,
         get_skiprows_arg(skip_rows),
         get_num_rows_arg(num_rows),
-        (
-            type_id.EMPTY
-            if timestamp_type is None else
-            <type_id>(
-                <underlying_type_t_type_id> (
-                    SUPPORTED_NUMPY_TO_LIBCUDF_TYPES[
-                        cudf.dtype(timestamp_type)
-                    ]
-                )
-            )
-        ),
         use_index,
+        plc.types.DataType(
+            SUPPORTED_NUMPY_TO_PYLIBCUDF_TYPES[
+                cudf.dtype(timestamp_type)
+            ]
+        )
     )
 
-    cdef table_with_metadata c_result
-    cdef size_type nrows
+    names = tbl_w_meta.column_names(include_children=False)
 
-    with nogil:
-        c_result = move(libcudf_read_orc(c_orc_reader_options))
-
-    names = [info.name.decode() for info in c_result.metadata.schema_info]
     actual_index_names, col_names, is_range_index, reset_index_name, \
-        range_idx = _get_index_from_metadata(c_result.metadata.user_data,
+        range_idx = _get_index_from_metadata(tbl_w_meta.per_file_user_data,
                                              names,
                                              skip_rows,
                                              num_rows)
@@ -272,11 +111,11 @@ cpdef read_orc(object filepaths_or_buffers,
     if columns is not None and (isinstance(columns, list) and len(columns) == 0):
         # When `columns=[]`, index needs to be
         # established, but not the columns.
-        nrows = c_result.tbl.get()[0].view().num_rows()
+        nrows = tbl_w_meta.tbl.num_rows()
         return {}, cudf.RangeIndex(nrows)
 
-    data, index = data_from_unique_ptr(
-        move(c_result.tbl),
+    data, index = data_from_pylibcudf_io(
+        tbl_w_meta,
         col_names if columns is None else names,
         actual_index_names
     )
@@ -286,11 +125,13 @@ cpdef read_orc(object filepaths_or_buffers,
     elif reset_index_name:
         index.names = [None] * len(index.names)
 
+    child_name_values = tbl_w_meta.child_names.values()
+
     data = {
-        name: update_column_struct_field_names(
-            col, c_result.metadata.schema_info[i]
+        name: update_col_struct_field_names(
+            col, child_names
         )
-        for i, (name, col) in enumerate(data.items())
+        for (name, col), child_names in zip(data.items(), child_name_values)
     }
 
     return data, index
@@ -313,32 +154,35 @@ cdef compression_type _get_comp_type(object compression):
         raise ValueError(f"Unsupported `compression` type {compression}")
 
 cdef tuple _get_index_from_metadata(
-        map[string, string] user_data,
+        vector[map[string, string]] user_data,
         object names,
         object skip_rows,
         object num_rows):
-    json_str = user_data[b'pandas'].decode('utf-8')
+
     meta = None
     index_col = None
     is_range_index = False
     reset_index_name = False
     range_idx = None
-    if json_str != "":
-        meta = json.loads(json_str)
-        if 'index_columns' in meta and len(meta['index_columns']) > 0:
-            index_col = meta['index_columns']
-            if isinstance(index_col[0], dict) and \
-                    index_col[0]['kind'] == 'range':
-                is_range_index = True
-            else:
-                index_col_names = OrderedDict()
-                for idx_col in index_col:
-                    for c in meta['columns']:
-                        if c['field_name'] == idx_col:
-                            index_col_names[idx_col] = \
-                                c['name'] or c['field_name']
-                            if c['name'] is None:
-                                reset_index_name = True
+
+    if user_data.size() > 0:
+        json_str = user_data[0][b'pandas'].decode('utf-8')
+        if json_str != "":
+            meta = json.loads(json_str)
+            if 'index_columns' in meta and len(meta['index_columns']) > 0:
+                index_col = meta['index_columns']
+                if isinstance(index_col[0], dict) and \
+                        index_col[0]['kind'] == 'range':
+                    is_range_index = True
+                else:
+                    index_col_names = OrderedDict()
+                    for idx_col in index_col:
+                        for c in meta['columns']:
+                            if c['field_name'] == idx_col:
+                                index_col_names[idx_col] = \
+                                    c['name'] or c['field_name']
+                                if c['name'] is None:
+                                    reset_index_name = True
 
     actual_index_names = None
     if index_col is not None and len(index_col) > 0:
@@ -473,41 +317,6 @@ cdef int64_t get_num_rows_arg(object arg) except*:
     return <int64_t> arg
 
 
-cdef orc_reader_options make_orc_reader_options(
-    object filepaths_or_buffers,
-    object column_names,
-    object stripes,
-    int64_t skip_rows,
-    int64_t num_rows,
-    type_id timestamp_type,
-    bool use_index
-) except*:
-
-    cdef vector[vector[size_type]] strps = stripes
-    cdef orc_reader_options opts
-    cdef source_info src = make_source_info(filepaths_or_buffers)
-    opts = move(
-        orc_reader_options.builder(src)
-        .stripes(strps)
-        .skip_rows(skip_rows)
-        .timestamp_type(data_type(timestamp_type))
-        .use_index(use_index)
-        .build()
-    )
-    if num_rows >= 0:
-        opts.set_num_rows(num_rows)
-
-    cdef vector[string] c_column_names
-    if column_names is not None:
-        c_column_names.reserve(len(column_names))
-        for col in column_names:
-            c_column_names.push_back(str(col).encode())
-        if len(column_names) > 0:
-            opts.set_columns(c_column_names)
-
-    return opts
-
-
 cdef class ORCWriter:
     """
     ORCWriter lets you you incrementally write out a ORC file from a series
diff --git a/python/cudf/cudf/_lib/partitioning.pyx b/python/cudf/cudf/_lib/partitioning.pyx
index d94f0e1b564..13997da8403 100644
--- a/python/cudf/cudf/_lib/partitioning.pyx
+++ b/python/cudf/cudf/_lib/partitioning.pyx
@@ -2,24 +2,13 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.pair cimport pair
-from libcpp.utility cimport move
-from libcpp.vector cimport vector
-
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.partitioning cimport partition as cpp_partition
-from pylibcudf.libcudf.table.table cimport table
-from pylibcudf.libcudf.table.table_view cimport table_view
-
 from cudf._lib.column cimport Column
-from cudf._lib.utils cimport columns_from_unique_ptr, table_view_from_columns
+
+import pylibcudf as plc
 
 from cudf._lib.reduce import minmax
 from cudf._lib.stream_compaction import distinct_count as cpp_distinct_count
 
-cimport pylibcudf.libcudf.types as libcudf_types
-
 
 @acquire_spill_lock()
 def partition(list source_columns, Column partition_map,
@@ -50,25 +39,15 @@ def partition(list source_columns, Column partition_map,
 
     if num_partitions is None:
         num_partitions = cpp_distinct_count(partition_map, ignore_nulls=True)
-    cdef int c_num_partitions = num_partitions
-    cdef table_view c_source_view = table_view_from_columns(source_columns)
-
-    cdef column_view c_partition_map_view = partition_map.view()
 
-    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
     if partition_map.size > 0:
         lo, hi = minmax(partition_map)
         if lo < 0 or hi >= num_partitions:
             raise ValueError("Partition map has invalid values")
-    with nogil:
-        c_result = move(
-            cpp_partition(
-                c_source_view,
-                c_partition_map_view,
-                c_num_partitions
-            )
-        )
 
-    return (
-        columns_from_unique_ptr(move(c_result.first)), list(c_result.second)
+    plc_table, offsets = plc.partitioning.partition(
+        plc.Table([col.to_pylibcudf(mode="read") for col in source_columns]),
+        partition_map.to_pylibcudf(mode="read"),
+        num_partitions
     )
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()], offsets
diff --git a/python/cudf/cudf/_lib/strings/attributes.pyx b/python/cudf/cudf/_lib/strings/attributes.pyx
index fe8c17c9e31..df81b3942b4 100644
--- a/python/cudf/cudf/_lib/strings/attributes.pyx
+++ b/python/cudf/cudf/_lib/strings/attributes.pyx
@@ -2,19 +2,10 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.strings.attributes cimport (
-    code_points as cpp_code_points,
-    count_bytes as cpp_count_bytes,
-    count_characters as cpp_count_characters,
-)
-
 from cudf._lib.column cimport Column
 
+import pylibcudf as plc
+
 
 @acquire_spill_lock()
 def count_characters(Column source_strings):
@@ -22,13 +13,10 @@ def count_characters(Column source_strings):
     Returns an integer numeric column containing the
     length of each string in characters.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_count_characters(source_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.attributes.count_characters(
+        source_strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 @acquire_spill_lock()
@@ -37,13 +25,10 @@ def count_bytes(Column source_strings):
     Returns an integer numeric column containing the
     number of bytes of each string.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_count_bytes(source_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.attributes.count_bytes(
+        source_strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
 
 
 @acquire_spill_lock()
@@ -52,10 +37,7 @@ def code_points(Column source_strings):
     Creates a numeric column with code point values (integers)
     for each character of each string.
     """
-    cdef unique_ptr[column] c_result
-    cdef column_view source_view = source_strings.view()
-
-    with nogil:
-        c_result = move(cpp_code_points(source_view))
-
-    return Column.from_unique_ptr(move(c_result))
+    plc_column = plc.strings.attributes.code_points(
+        source_strings.to_pylibcudf(mode="read")
+    )
+    return Column.from_pylibcudf(plc_column)
diff --git a/python/cudf/cudf/_lib/timezone.pyx b/python/cudf/cudf/_lib/timezone.pyx
index bff3b2c4ce4..54624a5a2fd 100644
--- a/python/cudf/cudf/_lib/timezone.pyx
+++ b/python/cudf/cudf/_lib/timezone.pyx
@@ -1,29 +1,10 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
-from libcpp.memory cimport unique_ptr
-from libcpp.optional cimport make_optional
-from libcpp.string cimport string
-from libcpp.utility cimport move
+import pylibcudf as plc
 
-from pylibcudf.libcudf.io.timezone cimport (
-    make_timezone_transition_table as cpp_make_timezone_transition_table,
-)
-from pylibcudf.libcudf.table.table cimport table
-
-from cudf._lib.utils cimport columns_from_unique_ptr
+from cudf._lib.column cimport Column
 
 
 def make_timezone_transition_table(tzdir, tzname):
-    cdef unique_ptr[table] c_result
-    cdef string c_tzdir = tzdir.encode()
-    cdef string c_tzname = tzname.encode()
-
-    with nogil:
-        c_result = move(
-            cpp_make_timezone_transition_table(
-                make_optional[string](c_tzdir),
-                c_tzname
-            )
-        )
-
-    return columns_from_unique_ptr(move(c_result))
+    plc_table = plc.io.timezone.make_timezone_transition_table(tzdir, tzname)
+    return [Column.from_pylibcudf(col) for col in plc_table.columns()]
diff --git a/python/cudf/cudf/_lib/transpose.pyx b/python/cudf/cudf/_lib/transpose.pyx
index f78fbd4c844..995d278cb88 100644
--- a/python/cudf/cudf/_lib/transpose.pyx
+++ b/python/cudf/cudf/_lib/transpose.pyx
@@ -1,32 +1,18 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from libcpp.memory cimport unique_ptr
-from libcpp.pair cimport pair
-from libcpp.utility cimport move
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.table.table_view cimport table_view
-from pylibcudf.libcudf.transpose cimport transpose as cpp_transpose
+import pylibcudf as plc
 
 from cudf._lib.column cimport Column
-from cudf._lib.utils cimport columns_from_table_view, table_view_from_columns
 
 
 def transpose(list source_columns):
     """Transpose m n-row columns into n m-row columns
     """
-    cdef pair[unique_ptr[column], table_view] c_result
-    cdef table_view c_input = table_view_from_columns(source_columns)
-
-    with nogil:
-        c_result = move(cpp_transpose(c_input))
-
-    # Notice, the data pointer of `result_owner` has been exposed
-    # through `c_result.second` at this point.
-    result_owner = Column.from_unique_ptr(
-        move(c_result.first), data_ptr_exposed=True
-    )
-    return columns_from_table_view(
-        c_result.second,
-        owners=[result_owner] * c_result.second.num_columns()
+    input_table = plc.table.Table(
+        [col.to_pylibcudf(mode="read") for col in source_columns]
     )
+    result_table = plc.transpose.transpose(input_table)
+    return [
+        Column.from_pylibcudf(col, data_ptr_exposed=True)
+        for col in result_table.columns()
+    ]
diff --git a/python/cudf/cudf/_lib/utils.pxd b/python/cudf/cudf/_lib/utils.pxd
index ff97fe80310..7254db5c43d 100644
--- a/python/cudf/cudf/_lib/utils.pxd
+++ b/python/cudf/cudf/_lib/utils.pxd
@@ -11,7 +11,7 @@ from pylibcudf.libcudf.table.table cimport table, table_view
 cdef data_from_unique_ptr(
     unique_ptr[table] c_tbl, column_names, index_names=*)
 cdef data_from_pylibcudf_table(tbl, column_names, index_names=*)
-cdef data_from_pylibcudf_io(tbl_with_meta)
+cdef data_from_pylibcudf_io(tbl_with_meta, column_names = *, index_names = *)
 cdef data_from_table_view(
     table_view tv, object owner, object column_names, object index_names=*)
 cdef table_view table_view_from_columns(columns) except *
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index 8660cca9322..9e5b99f64eb 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -316,15 +316,17 @@ cdef data_from_pylibcudf_table(tbl, column_names, index_names=None):
         index_names
     )
 
-cdef data_from_pylibcudf_io(tbl_with_meta):
+cdef data_from_pylibcudf_io(tbl_with_meta, column_names=None, index_names=None):
     """
     Unpacks the TableWithMetadata from libcudf I/O
     into a dict of columns and an Index (cuDF format)
     """
+    if column_names is None:
+        column_names = tbl_with_meta.column_names(include_children=False)
     return _data_from_columns(
         columns=[Column.from_pylibcudf(plc) for plc in tbl_with_meta.columns],
-        column_names=tbl_with_meta.column_names(include_children=False),
-        index_names=None
+        column_names=column_names,
+        index_names=index_names
     )
 
 cdef columns_from_table_view(
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index de5ed15771d..864e87b5377 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -1337,7 +1337,7 @@ def _set_categories(
 
         # Ensure new_categories is unique first
         if not (is_unique or new_cats.is_unique):
-            new_cats = cudf.Series(new_cats)._column.unique()
+            new_cats = new_cats.unique()
 
         if cur_cats.equals(new_cats, check_dtypes=True):
             # TODO: Internal usages don't always need a copy; add a copy keyword
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 16b0aa95c35..79ed5a0e187 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -6287,14 +6287,17 @@ def _prepare_for_rowwise_op(self, method, skipna, numeric_only):
             )
 
         if not skipna and any(col.nullable for col in filtered._columns):
-            mask = DataFrame(
+            length = filtered._data.nrows
+            ca = ColumnAccessor(
                 {
-                    name: filtered._data[name]._get_mask_as_column()
-                    if filtered._data[name].nullable
-                    else as_column(True, length=len(filtered._data[name]))
-                    for name in filtered._column_names
-                }
+                    name: col._get_mask_as_column()
+                    if col.nullable
+                    else as_column(True, length=length)
+                    for name, col in filtered._data.items()
+                },
+                verify=False,
             )
+            mask = DataFrame._from_data(ca)
             mask = mask.all(axis=1)
         else:
             mask = None
@@ -6679,19 +6682,10 @@ def _apply_cupy_method_axis_1(self, method, *args, **kwargs):
                 )
             return Series._from_column(result, index=self.index)
         else:
-            result_df = DataFrame(result).set_index(self.index)
+            result_df = DataFrame(result, index=self.index)
             result_df._set_columns_like(prepared._data)
             return result_df
 
-    @_performance_tracking
-    def _columns_view(self, columns):
-        """
-        Return a subset of the DataFrame's columns as a view.
-        """
-        return DataFrame(
-            {col: self._data[col] for col in columns}, index=self.index
-        )
-
     @_performance_tracking
     def select_dtypes(self, include=None, exclude=None):
         """Return a subset of the DataFrame's columns based on the column dtypes.
@@ -6763,8 +6757,6 @@ def select_dtypes(self, include=None, exclude=None):
         if not isinstance(exclude, (list, tuple)):
             exclude = (exclude,) if exclude is not None else ()
 
-        df = DataFrame(index=self.index)
-
         # cudf_dtype_from_pydata_dtype can distinguish between
         # np.float and np.number
         selection = tuple(map(frozenset, (include, exclude)))
@@ -6820,12 +6812,12 @@ def select_dtypes(self, include=None, exclude=None):
         # remove all exclude types
         inclusion = inclusion - exclude_subtypes
 
-        for k, col in self._column_labels_and_values:
-            infered_type = cudf_dtype_from_pydata_dtype(col.dtype)
-            if infered_type in inclusion:
-                df._insert(len(df._data), k, col)
-
-        return df
+        to_select = [
+            label
+            for label, dtype in self._dtypes
+            if cudf_dtype_from_pydata_dtype(dtype) in inclusion
+        ]
+        return self.loc[:, to_select]
 
     @ioutils.doc_to_parquet()
     def to_parquet(
@@ -7331,7 +7323,7 @@ def cov(self, min_periods=None, ddof: int = 1, numeric_only: bool = False):
 
         cov = cupy.cov(self.values, ddof=ddof, rowvar=False)
         cols = self._data.to_pandas_index()
-        df = DataFrame(cupy.asfortranarray(cov)).set_index(cols)
+        df = DataFrame(cupy.asfortranarray(cov), index=cols)
         df._set_columns_like(self._data)
         return df
 
@@ -7374,7 +7366,7 @@ def corr(
 
         corr = cupy.corrcoef(values, rowvar=False)
         cols = self._data.to_pandas_index()
-        df = DataFrame(cupy.asfortranarray(corr)).set_index(cols)
+        df = DataFrame(cupy.asfortranarray(corr), index=cols)
         df._set_columns_like(self._data)
         return df
 
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index cb8cd0cd28b..81b20488d8d 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -27,6 +27,7 @@
 from cudf.core.abc import Serializable
 from cudf.core.column.column import ColumnBase, StructDtype, as_column
 from cudf.core.column_accessor import ColumnAccessor
+from cudf.core.copy_types import GatherMap
 from cudf.core.join._join_helpers import _match_join_keys
 from cudf.core.mixins import Reducible, Scannable
 from cudf.core.multiindex import MultiIndex
@@ -754,17 +755,33 @@ def agg(self, func=None, *args, engine=None, engine_kwargs=None, **kwargs):
                 left_cols = list(self.grouping.keys.drop_duplicates()._columns)
                 right_cols = list(result_index._columns)
                 join_keys = [
-                    _match_join_keys(lcol, rcol, "left")
+                    _match_join_keys(lcol, rcol, "inner")
                     for lcol, rcol in zip(left_cols, right_cols)
                 ]
                 # TODO: In future, see if we can centralize
                 # logic else where that has similar patterns.
                 join_keys = map(list, zip(*join_keys))
-                _, indices = libcudf.join.join(
-                    *join_keys,
-                    how="left",
+                # By construction, left and right keys are related by
+                # a permutation, so we can use an inner join.
+                left_order, right_order = libcudf.join.join(
+                    *join_keys, how="inner"
+                )
+                # left order is some permutation of the ordering we
+                # want, and right order is a matching gather map for
+                # the result table. Get the correct order by sorting
+                # the right gather map.
+                (right_order,) = libcudf.sort.sort_by_key(
+                    [right_order],
+                    [left_order],
+                    [True],
+                    ["first"],
+                    stable=False,
+                )
+                result = result._gather(
+                    GatherMap.from_column_unchecked(
+                        right_order, len(result), nullify=False
+                    )
                 )
-                result = result.take(indices)
 
         if not self._as_index:
             result = result.reset_index()
@@ -2232,6 +2249,22 @@ def func(x):
 
         return self.agg(func)
 
+    @_performance_tracking
+    def nunique(self, dropna: bool = True):
+        """
+        Return number of unique elements in the group.
+
+        Parameters
+        ----------
+        dropna : bool, default True
+            Don't include NaN in the counts.
+        """
+
+        def func(x):
+            return getattr(x, "nunique")(dropna=dropna)
+
+        return self.agg(func)
+
     @_performance_tracking
     def std(
         self,
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 6de3981ba66..92d094d9de5 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -700,7 +700,10 @@ def _compute_validity_mask(self, index, row_tuple, max_length):
             lookup_dict[i] = row
         lookup = cudf.DataFrame(lookup_dict)
         frame = cudf.DataFrame._from_data(
-            ColumnAccessor(dict(enumerate(index._columns)), verify=False)
+            ColumnAccessor(
+                dict(enumerate(index._columns)),
+                verify=False,
+            )
         )
         with warnings.catch_warnings():
             warnings.simplefilter("ignore", FutureWarning)
@@ -780,18 +783,12 @@ def _index_and_downcast(self, result, index, index_key):
             index_key = index_key[0]
 
         slice_access = isinstance(index_key, slice)
-        out_index = cudf.DataFrame()
-        # Select the last n-k columns where n is the number of columns and k is
+        # Count the last n-k columns where n is the number of columns and k is
         # the length of the indexing tuple
         size = 0
         if not isinstance(index_key, (numbers.Number, slice)):
             size = len(index_key)
-        for k in range(size, len(index._data)):
-            out_index.insert(
-                out_index._num_columns,
-                k,
-                cudf.Series._from_column(index._columns[k]),
-            )
+        num_selected = max(0, index.nlevels - size)
 
         # determine if we should downcast from a DataFrame to a Series
         need_downcast = (
@@ -814,16 +811,13 @@ def _index_and_downcast(self, result, index, index_key):
             result = cudf.Series._from_data(
                 {}, name=tuple(col[0] for col in index._columns)
             )
-        elif out_index._num_columns == 1:
+        elif num_selected == 1:
             # If there's only one column remaining in the output index, convert
             # it into an Index and name the final index values according
             # to that column's name.
-            last_column = index._columns[-1]
-            out_index = cudf.Index._from_column(
-                last_column, name=index.names[-1]
-            )
-            index = out_index
-        elif out_index._num_columns > 1:
+            *_, last_column = index._data.columns
+            index = cudf.Index._from_column(last_column, name=index.names[-1])
+        elif num_selected > 1:
             # Otherwise pop the leftmost levels, names, and codes from the
             # source index until it has the correct number of columns (n-k)
             result.reset_index(drop=True)
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 401fef67ee6..6e5abb2b82b 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -961,14 +961,14 @@ def _merge_sorted(
     )
 
 
-def _pivot(df, index, columns):
+def _pivot(col_accessor: ColumnAccessor, index, columns) -> cudf.DataFrame:
     """
     Reorganize the values of the DataFrame according to the given
     index and columns.
 
     Parameters
     ----------
-    df : DataFrame
+    col_accessor : DataFrame
     index : cudf.Index
         Index labels of the result
     columns : cudf.Index
@@ -985,7 +985,7 @@ def as_tuple(x):
             return x if isinstance(x, tuple) else (x,)
 
         nrows = len(index_labels)
-        for col_label, col in df._column_labels_and_values:
+        for col_label, col in col_accessor.items():
             names = [
                 as_tuple(col_label) + as_tuple(name) for name in column_labels
             ]
@@ -1067,22 +1067,21 @@ def pivot(data, columns=None, index=no_default, values=no_default):
         2  <NA>  <NA>  three
 
     """
-    df = data
     values_is_list = True
     if values is no_default:
-        values = df._columns_view(
-            col for col in df._column_names if col not in (index, columns)
-        )
+        cols_to_select = [
+            col for col in data._column_names if col not in (index, columns)
+        ]
+    elif not isinstance(values, (list, tuple)):
+        cols_to_select = [values]
+        values_is_list = False
     else:
-        if not isinstance(values, (list, tuple)):
-            values = [values]
-            values_is_list = False
-        values = df._columns_view(values)
+        cols_to_select = values
     if index is no_default:
-        index = df.index
+        index = data.index
     else:
-        index = cudf.Index(df.loc[:, index])
-    columns = cudf.Index(df.loc[:, columns])
+        index = cudf.Index(data.loc[:, index])
+    columns = cudf.Index(data.loc[:, columns])
 
     # Create a DataFrame composed of columns from both
     # columns and index
@@ -1096,7 +1095,7 @@ def pivot(data, columns=None, index=no_default, values=no_default):
     if len(columns_index) != len(columns_index.drop_duplicates()):
         raise ValueError("Duplicate index-column pairs found. Cannot reshape.")
 
-    result = _pivot(values, index, columns)
+    result = _pivot(data._data.select_by_label(cols_to_select), index, columns)
 
     # MultiIndex to Index
     if not values_is_list:
diff --git a/python/cudf/cudf/core/window/ewm.py b/python/cudf/cudf/core/window/ewm.py
index ef0f6958aeb..094df955273 100644
--- a/python/cudf/cudf/core/window/ewm.py
+++ b/python/cudf/cudf/core/window/ewm.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 import warnings
-from typing import Literal
+from typing import TYPE_CHECKING, Literal
 
 import numpy as np
 
@@ -10,6 +10,9 @@
 from cudf.api.types import is_numeric_dtype
 from cudf.core.window.rolling import _RollingBase
 
+if TYPE_CHECKING:
+    from cudf.core.column.column import ColumnBase
+
 
 class ExponentialMovingWindow(_RollingBase):
     r"""
@@ -179,8 +182,10 @@ def cov(
     ):
         raise NotImplementedError("cov not yet supported.")
 
-    def _apply_agg_series(self, sr, agg_name):
-        if not is_numeric_dtype(sr.dtype):
+    def _apply_agg_column(
+        self, source_column: ColumnBase, agg_name: str
+    ) -> ColumnBase:
+        if not is_numeric_dtype(source_column.dtype):
             raise TypeError("No numeric types to aggregate")
 
         # libcudf ewm has special casing for nulls only
@@ -188,20 +193,14 @@ def _apply_agg_series(self, sr, agg_name):
         # pandas does nans in the same positions mathematically.
         # as such we need to convert the nans to nulls before
         # passing them in.
-        to_libcudf_column = sr._column.astype("float64").nans_to_nulls()
-
-        return self.obj._from_data_like_self(
-            self.obj._data._from_columns_like_self(
-                [
-                    scan(
-                        agg_name,
-                        to_libcudf_column,
-                        True,
-                        com=self.com,
-                        adjust=self.adjust,
-                    )
-                ]
-            )
+        to_libcudf_column = source_column.astype("float64").nans_to_nulls()
+
+        return scan(
+            agg_name,
+            to_libcudf_column,
+            True,
+            com=self.com,
+            adjust=self.adjust,
         )
 
 
diff --git a/python/cudf/cudf/core/window/rolling.py b/python/cudf/cudf/core/window/rolling.py
index 043a41145e5..967edc2ab15 100644
--- a/python/cudf/cudf/core/window/rolling.py
+++ b/python/cudf/cudf/core/window/rolling.py
@@ -2,6 +2,7 @@
 from __future__ import annotations
 
 import warnings
+from typing import TYPE_CHECKING
 
 import numba
 import pandas as pd
@@ -16,25 +17,29 @@
 from cudf.utils import cudautils
 from cudf.utils.utils import GetAttrGetItemMixin
 
+if TYPE_CHECKING:
+    from cudf.core.column.column import ColumnBase
+
 
 class _RollingBase:
     """
-    Contains methods common to all kinds of rolling
+    Contains routines to apply a window aggregation to a column.
     """
 
-    def _apply_agg_dataframe(self, df, agg_name):
-        result_df = cudf.DataFrame({})
-        for i, col_name in enumerate(df.columns):
-            result_col = self._apply_agg_series(df[col_name], agg_name)
-            result_df.insert(i, col_name, result_col)
-        result_df.index = df.index
-        return result_df
+    obj: cudf.DataFrame | cudf.Series
 
-    def _apply_agg(self, agg_name):
-        if isinstance(self.obj, cudf.Series):
-            return self._apply_agg_series(self.obj, agg_name)
-        else:
-            return self._apply_agg_dataframe(self.obj, agg_name)
+    def _apply_agg_column(
+        self, source_column: ColumnBase, agg_name: str
+    ) -> ColumnBase:
+        raise NotImplementedError
+
+    def _apply_agg(self, agg_name: str) -> cudf.DataFrame | cudf.Series:
+        applied = (
+            self._apply_agg_column(col, agg_name) for col in self.obj._columns
+        )
+        return self.obj._from_data_like_self(
+            self.obj._data._from_columns_like_self(applied)
+        )
 
 
 class Rolling(GetAttrGetItemMixin, _RollingBase, Reducible):
@@ -290,14 +295,6 @@ def _apply_agg_column(self, source_column, agg_name):
             agg_params=self.agg_params,
         )
 
-    def _apply_agg(self, agg_name):
-        applied = (
-            self._apply_agg_column(col, agg_name) for col in self.obj._columns
-        )
-        return self.obj._from_data_like_self(
-            self.obj._data._from_columns_like_self(applied)
-        )
-
     def _reduce(
         self,
         op: str,
diff --git a/python/cudf/cudf/io/orc.py b/python/cudf/cudf/io/orc.py
index c54293badbe..68b60809bb9 100644
--- a/python/cudf/cudf/io/orc.py
+++ b/python/cudf/cudf/io/orc.py
@@ -181,11 +181,6 @@ def read_orc_statistics(
             parsed_stripes_statistics,
         ) = liborc.read_parsed_orc_statistics(path_or_buf)
 
-        # Parse column names
-        column_names = [
-            column_name.decode("utf-8") for column_name in column_names
-        ]
-
         # Parse file statistics
         file_statistics = {
             column_name: column_stats
@@ -248,9 +243,9 @@ def _filter_stripes(
         num_rows_scanned = 0
         for i, stripe_statistics in enumerate(stripes_statistics):
             num_rows_before_stripe = num_rows_scanned
-            num_rows_scanned += next(iter(stripe_statistics.values()))[
-                "number_of_values"
-            ]
+            num_rows_scanned += next(
+                iter(stripe_statistics.values())
+            ).number_of_values
             if stripes is not None and i not in stripes:
                 continue
             if skip_rows is not None and num_rows_scanned <= skip_rows:
diff --git a/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py b/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
new file mode 100644
index 00000000000..a009802bab0
--- /dev/null
+++ b/python/cudf/cudf/tests/groupby/test_ordering_pandas_compat.py
@@ -0,0 +1,29 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import numpy as np
+import pytest
+
+import cudf
+from cudf.testing import assert_eq
+
+
+@pytest.fixture(params=[False, True], ids=["without_nulls", "with_nulls"])
+def with_nulls(request):
+    return request.param
+
+
+@pytest.mark.parametrize("nrows", [30, 300, 300_000])
+@pytest.mark.parametrize("nkeys", [1, 2, 4])
+def test_groupby_maintain_order_random(nrows, nkeys, with_nulls):
+    key_names = [f"key{key}" for key in range(nkeys)]
+    key_values = [np.random.randint(100, size=nrows) for _ in key_names]
+    value = np.random.randint(-100, 100, size=nrows)
+    df = cudf.DataFrame(dict(zip(key_names, key_values), value=value))
+    if with_nulls:
+        for key in key_names:
+            df.loc[df[key] == 1, key] = None
+    with cudf.option_context("mode.pandas_compatible", True):
+        got = df.groupby(key_names, sort=False).agg({"value": "sum"})
+    expect = (
+        df.to_pandas().groupby(key_names, sort=False).agg({"value": "sum"})
+    )
+    assert_eq(expect, got, check_index_type=not with_nulls)
diff --git a/python/cudf/cudf/tests/pytest.ini b/python/cudf/cudf/tests/pytest.ini
index d05ba9aaacc..496a322ff80 100644
--- a/python/cudf/cudf/tests/pytest.ini
+++ b/python/cudf/cudf/tests/pytest.ini
@@ -9,7 +9,7 @@ filterwarnings =
     ignore:::.*xdist.*
     ignore:::.*pytest.*
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
-    ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning
+    ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore
     # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
     ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning
     # PerformanceWarning from cupy warming up the JIT cache
diff --git a/python/cudf/cudf/tests/test_groupby.py b/python/cudf/cudf/tests/test_groupby.py
index 848bc259e7b..14ba9894fd3 100644
--- a/python/cudf/cudf/tests/test_groupby.py
+++ b/python/cudf/cudf/tests/test_groupby.py
@@ -1940,6 +1940,23 @@ def test_groupby_nunique(agg, by):
     assert_groupby_results_equal(expect, got, check_dtype=False)
 
 
+@pytest.mark.parametrize("dropna", [True, False])
+def test_nunique_dropna(dropna):
+    gdf = cudf.DataFrame(
+        {
+            "a": [1, 1, 2],
+            "b": [4, None, 5],
+            "c": [None, None, 7],
+            "d": [1, 1, 3],
+        }
+    )
+    pdf = gdf.to_pandas()
+
+    result = gdf.groupby("a")["b"].nunique(dropna=dropna)
+    expected = pdf.groupby("a")["b"].nunique(dropna=dropna)
+    assert_groupby_results_equal(result, expected, check_dtype=False)
+
+
 @pytest.mark.parametrize(
     "n",
     [0, 1, 2, 10],
diff --git a/python/cudf/cudf/tests/test_orc.py b/python/cudf/cudf/tests/test_orc.py
index c2a30b76bea..1dd732c7191 100644
--- a/python/cudf/cudf/tests/test_orc.py
+++ b/python/cudf/cudf/tests/test_orc.py
@@ -184,25 +184,25 @@ def test_orc_read_statistics(datadir):
         pytest.skip(".orc file is not found: %s" % e)
 
     # Check numberOfValues
-    assert_eq(file_statistics[0]["int1"]["number_of_values"], 11_000)
+    assert_eq(file_statistics[0]["int1"].number_of_values, 11_000)
     assert_eq(
-        file_statistics[0]["int1"]["number_of_values"],
+        file_statistics[0]["int1"].number_of_values,
         sum(
             [
-                stripes_statistics[0]["int1"]["number_of_values"],
-                stripes_statistics[1]["int1"]["number_of_values"],
-                stripes_statistics[2]["int1"]["number_of_values"],
+                stripes_statistics[0]["int1"].number_of_values,
+                stripes_statistics[1]["int1"].number_of_values,
+                stripes_statistics[2]["int1"].number_of_values,
             ]
         ),
     )
     assert_eq(
-        stripes_statistics[1]["int1"]["number_of_values"],
-        stripes_statistics[1]["string1"]["number_of_values"],
+        stripes_statistics[1]["int1"].number_of_values,
+        stripes_statistics[1]["string1"].number_of_values,
     )
-    assert_eq(stripes_statistics[2]["string1"]["number_of_values"], 1_000)
+    assert_eq(stripes_statistics[2]["string1"].number_of_values, 1_000)
 
     # Check other statistics
-    assert_eq(stripes_statistics[2]["string1"]["has_null"], False)
+    assert_eq(stripes_statistics[2]["string1"].has_null, False)
     assert_eq(
         file_statistics[0]["int1"]["minimum"],
         min(
@@ -1538,8 +1538,8 @@ def test_empty_statistics():
     for stats in got:
         # Similar expected stats for the first 6 columns in this case
         for col_name in ascii_lowercase[:6]:
-            assert stats[0][col_name].get("number_of_values") == 0
-            assert stats[0][col_name].get("has_null") is True
+            assert stats[0][col_name].number_of_values == 0
+            assert stats[0][col_name].has_null is True
             assert stats[0][col_name].get("minimum") is None
             assert stats[0][col_name].get("maximum") is None
         for col_name in ascii_lowercase[:3]:
@@ -1547,17 +1547,17 @@ def test_empty_statistics():
         # Sum for decimal column is a string
         assert stats[0]["d"].get("sum") == "0"
 
-        assert stats[0]["g"].get("number_of_values") == 0
-        assert stats[0]["g"].get("has_null") is True
+        assert stats[0]["g"].number_of_values == 0
+        assert stats[0]["g"].has_null is True
         assert stats[0]["g"].get("true_count") == 0
         assert stats[0]["g"].get("false_count") == 0
 
-        assert stats[0]["h"].get("number_of_values") == 0
-        assert stats[0]["h"].get("has_null") is True
+        assert stats[0]["h"].number_of_values == 0
+        assert stats[0]["h"].has_null is True
         assert stats[0]["h"].get("sum") == 0
 
-        assert stats[0]["i"].get("number_of_values") == 1
-        assert stats[0]["i"].get("has_null") is False
+        assert stats[0]["i"].number_of_values == 1
+        assert stats[0]["i"].has_null is False
         assert stats[0]["i"].get("minimum") == 1
         assert stats[0]["i"].get("maximum") == 1
         assert stats[0]["i"].get("sum") == 1
diff --git a/python/cudf/cudf/utils/ioutils.py b/python/cudf/cudf/utils/ioutils.py
index 1180da321e6..d636f36f282 100644
--- a/python/cudf/cudf/utils/ioutils.py
+++ b/python/cudf/cudf/utils/ioutils.py
@@ -1873,7 +1873,7 @@ def _apply_filter_bool_eq(val, col_stats):
                 return False
         elif val is False:
             if (col_stats["false_count"] == 0) or (
-                col_stats["true_count"] == col_stats["number_of_values"]
+                col_stats["true_count"] == col_stats.number_of_values
             ):
                 return False
     return True
@@ -1900,7 +1900,7 @@ def _apply_predicate(op, val, col_stats):
             return False
         # TODO: Replace pd.isnull with
         # cudf.isnull once it is implemented
-        if pd.isnull(val) and not col_stats["has_null"]:
+        if pd.isnull(val) and not col_stats.has_null:
             return False
         if not _apply_filter_bool_eq(val, col_stats):
             return False
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index f90cb96e189..605f9be5a49 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -24,7 +24,7 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "fsspec>=0.6.0",
     "libcudf==24.12.*,>=0.0.0a0",
-    "numba>=0.57",
+    "numba-cuda>=0.0.13",
     "numpy>=1.23,<3.0a0",
     "nvtx>=0.2.1",
     "packaging",
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 8cd56c8ee3a..1c61075be22 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -603,24 +603,39 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             req.evaluate(result_subs, mapping=mapping) for req in self.agg_requests
         ]
         broadcasted = broadcast(*result_keys, *results)
-        result_keys = broadcasted[: len(result_keys)]
-        results = broadcasted[len(result_keys) :]
         # Handle order preservation of groups
-        # like cudf classic does
-        # https://github.com/rapidsai/cudf/blob/5780c4d8fb5afac2e04988a2ff5531f94c22d3a3/python/cudf/cudf/core/groupby/groupby.py#L723-L743
         if self.maintain_order and not sorted:
-            left = plc.stream_compaction.stable_distinct(
+            # The order we want
+            want = plc.stream_compaction.stable_distinct(
                 plc.Table([k.obj for k in keys]),
                 list(range(group_keys.num_columns())),
                 plc.stream_compaction.DuplicateKeepOption.KEEP_FIRST,
                 plc.types.NullEquality.EQUAL,
                 plc.types.NanEquality.ALL_EQUAL,
             )
-            right = plc.Table([key.obj for key in result_keys])
-            _, indices = plc.join.left_join(left, right, plc.types.NullEquality.EQUAL)
+            # The order we have
+            have = plc.Table([key.obj for key in broadcasted[: len(keys)]])
+
+            # We know an inner join is OK because by construction
+            # want and have are permutations of each other.
+            left_order, right_order = plc.join.inner_join(
+                want, have, plc.types.NullEquality.EQUAL
+            )
+            # Now left_order is an arbitrary permutation of the ordering we
+            # want, and right_order is a matching permutation of the ordering
+            # we have. To get to the original ordering, we need
+            # left_order == iota(nrows), with right_order permuted
+            # appropriately. This can be obtained by sorting
+            # right_order by left_order.
+            (right_order,) = plc.sorting.sort_by_key(
+                plc.Table([right_order]),
+                plc.Table([left_order]),
+                [plc.types.Order.ASCENDING],
+                [plc.types.NullOrder.AFTER],
+            ).columns()
             ordered_table = plc.copying.gather(
                 plc.Table([col.obj for col in broadcasted]),
-                indices,
+                right_order,
                 plc.copying.OutOfBoundsPolicy.DONT_CHECK,
             )
             broadcasted = [
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index 74bf8b9e4e2..1e8246496cd 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -4,6 +4,7 @@
 
 import itertools
 
+import numpy as np
 import pytest
 
 import polars as pl
@@ -191,3 +192,24 @@ def test_groupby_literal_in_agg(df, key, expr):
 def test_groupby_unary_non_pointwise_raises(df, expr):
     q = df.group_by("key1").agg(expr)
     assert_ir_translation_raises(q, NotImplementedError)
+
+
+@pytest.mark.parametrize("nrows", [30, 300, 300_000])
+@pytest.mark.parametrize("nkeys", [1, 2, 4])
+def test_groupby_maintain_order_random(nrows, nkeys, with_nulls):
+    key_names = [f"key{key}" for key in range(nkeys)]
+    key_values = [np.random.randint(100, size=nrows) for _ in key_names]
+    value = np.random.randint(-100, 100, size=nrows)
+    df = pl.DataFrame(dict(zip(key_names, key_values, strict=True), value=value))
+    if with_nulls:
+        df = df.with_columns(
+            *(
+                pl.when(pl.col(name) == 1)
+                .then(None)
+                .otherwise(pl.col(name))
+                .alias(name)
+                for name in key_names
+            )
+        )
+    q = df.lazy().group_by(key_names, maintain_order=True).agg(pl.col("value").sum())
+    assert_gpu_result_equal(q)
diff --git a/python/dask_cudf/dask_cudf/backends.py b/python/dask_cudf/dask_cudf/backends.py
index 9347ebba5de..bead964a0ef 100644
--- a/python/dask_cudf/dask_cudf/backends.py
+++ b/python/dask_cudf/dask_cudf/backends.py
@@ -8,6 +8,7 @@
 import numpy as np
 import pandas as pd
 import pyarrow as pa
+from packaging.version import Version
 from pandas.api.types import is_scalar
 
 import dask.dataframe as dd
@@ -52,6 +53,10 @@
 get_parallel_type.register(cudf.BaseIndex, lambda _: Index)
 
 
+# Required for Arrow filesystem support in read_parquet
+PYARROW_GE_15 = Version(pa.__version__) >= Version("15.0.0")
+
+
 @meta_nonempty.register(cudf.BaseIndex)
 @_dask_cudf_performance_tracking
 def _nonempty_index(idx):
@@ -695,15 +700,140 @@ def from_dict(
         )
 
     @staticmethod
-    def read_parquet(*args, engine=None, **kwargs):
+    def read_parquet(path, *args, filesystem="fsspec", engine=None, **kwargs):
         import dask_expr as dx
+        import fsspec
 
-        from dask_cudf.io.parquet import CudfEngine
+        if (
+            isinstance(filesystem, fsspec.AbstractFileSystem)
+            or isinstance(filesystem, str)
+            and filesystem.lower() == "fsspec"
+        ):
+            # Default "fsspec" filesystem
+            from dask_cudf.io.parquet import CudfEngine
 
-        _raise_unsupported_parquet_kwargs(**kwargs)
-        return _default_backend(
-            dx.read_parquet, *args, engine=CudfEngine, **kwargs
-        )
+            _raise_unsupported_parquet_kwargs(**kwargs)
+            return _default_backend(
+                dx.read_parquet,
+                path,
+                *args,
+                filesystem=filesystem,
+                engine=CudfEngine,
+                **kwargs,
+            )
+
+        else:
+            # EXPERIMENTAL filesystem="arrow" support.
+            # This code path uses PyArrow for IO, which is only
+            # beneficial for remote storage (e.g. S3)
+
+            from fsspec.utils import stringify_path
+            from pyarrow import fs as pa_fs
+
+            # CudfReadParquetPyarrowFS requires import of distributed beforehand
+            # (See: https://github.com/dask/dask/issues/11352)
+            import distributed  # noqa: F401
+            from dask.core import flatten
+            from dask.dataframe.utils import pyarrow_strings_enabled
+
+            from dask_cudf.expr._expr import CudfReadParquetPyarrowFS
+
+            if args:
+                raise ValueError(f"Unexpected positional arguments: {args}")
+
+            if not (
+                isinstance(filesystem, pa_fs.FileSystem)
+                or isinstance(filesystem, str)
+                and filesystem.lower() in ("arrow", "pyarrow")
+            ):
+                raise ValueError(f"Unexpected filesystem value: {filesystem}.")
+
+            if not PYARROW_GE_15:
+                raise NotImplementedError(
+                    "Experimental Arrow filesystem support requires pyarrow>=15"
+                )
+
+            if not isinstance(path, str):
+                path = stringify_path(path)
+
+            # Extract kwargs
+            columns = kwargs.pop("columns", None)
+            filters = kwargs.pop("filters", None)
+            categories = kwargs.pop("categories", None)
+            index = kwargs.pop("index", None)
+            storage_options = kwargs.pop("storage_options", None)
+            dtype_backend = kwargs.pop("dtype_backend", None)
+            calculate_divisions = kwargs.pop("calculate_divisions", False)
+            ignore_metadata_file = kwargs.pop("ignore_metadata_file", False)
+            metadata_task_size = kwargs.pop("metadata_task_size", None)
+            split_row_groups = kwargs.pop("split_row_groups", "infer")
+            blocksize = kwargs.pop("blocksize", "default")
+            aggregate_files = kwargs.pop("aggregate_files", None)
+            parquet_file_extension = kwargs.pop(
+                "parquet_file_extension", (".parq", ".parquet", ".pq")
+            )
+            arrow_to_pandas = kwargs.pop("arrow_to_pandas", None)
+            open_file_options = kwargs.pop("open_file_options", None)
+
+            # Validate and normalize kwargs
+            kwargs["dtype_backend"] = dtype_backend
+            if arrow_to_pandas is not None:
+                raise ValueError(
+                    "arrow_to_pandas not supported for the 'cudf' backend."
+                )
+            if open_file_options is not None:
+                raise ValueError(
+                    "The open_file_options argument is no longer supported "
+                    "by the 'cudf' backend."
+                )
+            if filters is not None:
+                for filter in flatten(filters, container=list):
+                    _, op, val = filter
+                    if op == "in" and not isinstance(val, (set, list, tuple)):
+                        raise TypeError(
+                            "Value of 'in' filter must be a list, set or tuple."
+                        )
+            if metadata_task_size is not None:
+                raise NotImplementedError(
+                    "metadata_task_size is not supported when using the pyarrow filesystem."
+                )
+            if split_row_groups != "infer":
+                raise NotImplementedError(
+                    "split_row_groups is not supported when using the pyarrow filesystem."
+                )
+            if parquet_file_extension != (".parq", ".parquet", ".pq"):
+                raise NotImplementedError(
+                    "parquet_file_extension is not supported when using the pyarrow filesystem."
+                )
+            if blocksize is not None and blocksize != "default":
+                warnings.warn(
+                    "blocksize is not supported when using the pyarrow filesystem."
+                    "blocksize argument will be ignored."
+                )
+            if aggregate_files is not None:
+                warnings.warn(
+                    "aggregate_files is not supported when using the pyarrow filesystem. "
+                    "Please use the 'dataframe.parquet.minimum-partition-size' config."
+                    "aggregate_files argument will be ignored."
+                )
+
+            return dx.new_collection(
+                CudfReadParquetPyarrowFS(
+                    path,
+                    columns=dx._util._convert_to_list(columns),
+                    filters=filters,
+                    categories=categories,
+                    index=index,
+                    calculate_divisions=calculate_divisions,
+                    storage_options=storage_options,
+                    filesystem=filesystem,
+                    ignore_metadata_file=ignore_metadata_file,
+                    arrow_to_pandas=arrow_to_pandas,
+                    pyarrow_strings_enabled=pyarrow_strings_enabled(),
+                    kwargs=kwargs,
+                    _series=isinstance(columns, str),
+                )
+            )
 
     @staticmethod
     def read_csv(
diff --git a/python/dask_cudf/dask_cudf/expr/_expr.py b/python/dask_cudf/dask_cudf/expr/_expr.py
index b284ab3774d..af83a01da98 100644
--- a/python/dask_cudf/dask_cudf/expr/_expr.py
+++ b/python/dask_cudf/dask_cudf/expr/_expr.py
@@ -2,10 +2,13 @@
 import functools
 
 import dask_expr._shuffle as _shuffle_module
+import pandas as pd
 from dask_expr import new_collection
 from dask_expr._cumulative import CumulativeBlockwise
 from dask_expr._expr import Elemwise, Expr, RenameAxis, VarColumns
 from dask_expr._reductions import Reduction, Var
+from dask_expr.io.io import FusedParquetIO
+from dask_expr.io.parquet import ReadParquetPyarrowFS
 
 from dask.dataframe.core import is_dataframe_like, make_meta, meta_nonempty
 from dask.dataframe.dispatch import is_categorical_dtype
@@ -18,6 +21,92 @@
 ##
 
 
+class CudfFusedParquetIO(FusedParquetIO):
+    @staticmethod
+    def _load_multiple_files(
+        frag_filters,
+        columns,
+        schema,
+        *to_pandas_args,
+    ):
+        import pyarrow as pa
+
+        from dask.base import apply, tokenize
+        from dask.threaded import get
+
+        token = tokenize(frag_filters, columns, schema)
+        name = f"pq-file-{token}"
+        dsk = {
+            (name, i): (
+                CudfReadParquetPyarrowFS._fragment_to_table,
+                frag,
+                filter,
+                columns,
+                schema,
+            )
+            for i, (frag, filter) in enumerate(frag_filters)
+        }
+        dsk[name] = (
+            apply,
+            pa.concat_tables,
+            [list(dsk.keys())],
+            {"promote_options": "permissive"},
+        )
+        return CudfReadParquetPyarrowFS._table_to_pandas(
+            get(dsk, name),
+            *to_pandas_args,
+        )
+
+
+class CudfReadParquetPyarrowFS(ReadParquetPyarrowFS):
+    @functools.cached_property
+    def _dataset_info(self):
+        from dask_cudf.io.parquet import set_object_dtypes_from_pa_schema
+
+        dataset_info = super()._dataset_info
+        meta_pd = dataset_info["base_meta"]
+        if isinstance(meta_pd, cudf.DataFrame):
+            return dataset_info
+
+        # Convert to cudf
+        # (drop unsupported timezone information)
+        for k, v in meta_pd.dtypes.items():
+            if isinstance(v, pd.DatetimeTZDtype) and v.tz is not None:
+                meta_pd[k] = meta_pd[k].dt.tz_localize(None)
+        meta_cudf = cudf.from_pandas(meta_pd)
+
+        # Re-set "object" dtypes to align with pa schema
+        kwargs = dataset_info.get("kwargs", {})
+        set_object_dtypes_from_pa_schema(
+            meta_cudf,
+            kwargs.get("schema", None),
+        )
+
+        dataset_info["base_meta"] = meta_cudf
+        self.operands[type(self)._parameters.index("_dataset_info_cache")] = (
+            dataset_info
+        )
+        return dataset_info
+
+    @staticmethod
+    def _table_to_pandas(
+        table,
+        index_name,
+        *args,
+    ):
+        df = cudf.DataFrame.from_arrow(table)
+        if index_name is not None:
+            df = df.set_index(index_name)
+        return df
+
+    def _tune_up(self, parent):
+        if self._fusion_compression_factor >= 1:
+            return
+        if isinstance(parent, CudfFusedParquetIO):
+            return
+        return parent.substitute(self, CudfFusedParquetIO(self))
+
+
 class RenameAxisCudf(RenameAxis):
     # TODO: Remove this after rename_axis is supported in cudf
     # (See: https://github.com/rapidsai/cudf/issues/16895)
diff --git a/python/dask_cudf/dask_cudf/io/tests/test_s3.py b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
index a14ffbc37dc..cf8af82e112 100644
--- a/python/dask_cudf/dask_cudf/io/tests/test_s3.py
+++ b/python/dask_cudf/dask_cudf/io/tests/test_s3.py
@@ -12,6 +12,7 @@
 from dask.dataframe import assert_eq
 
 import dask_cudf
+from dask_cudf.tests.utils import QUERY_PLANNING_ON
 
 moto = pytest.importorskip("moto", minversion="3.1.6")
 boto3 = pytest.importorskip("boto3")
@@ -127,7 +128,20 @@ def test_read_parquet_open_file_options_raises():
         )
 
 
-def test_read_parquet_filesystem(s3_base, s3so, pdf):
+@pytest.mark.parametrize(
+    "filesystem",
+    [
+        pytest.param(
+            "arrow",
+            marks=pytest.mark.skipif(
+                not QUERY_PLANNING_ON or not dask_cudf.backends.PYARROW_GE_15,
+                reason="Not supported",
+            ),
+        ),
+        "fsspec",
+    ],
+)
+def test_read_parquet_filesystem(s3_base, s3so, pdf, filesystem):
     fname = "test_parquet_filesystem.parquet"
     bucket = "parquet"
     buffer = BytesIO()
@@ -135,21 +149,24 @@ def test_read_parquet_filesystem(s3_base, s3so, pdf):
     buffer.seek(0)
     with s3_context(s3_base=s3_base, bucket=bucket, files={fname: buffer}):
         path = f"s3://{bucket}/{fname}"
+        if filesystem == "arrow":
+            # This feature requires arrow >= 15
+            pytest.importorskip("pyarrow", minversion="15.0.0")
 
-        # Cannot pass filesystem="arrow"
-        with pytest.raises(ValueError):
-            dask_cudf.read_parquet(
+            import pyarrow.fs as pa_fs
+
+            df = dask_cudf.read_parquet(
+                path,
+                filesystem=pa_fs.S3FileSystem(
+                    endpoint_override=s3so["client_kwargs"]["endpoint_url"],
+                ),
+            )
+        else:
+            df = dask_cudf.read_parquet(
                 path,
                 storage_options=s3so,
-                filesystem="arrow",
+                filesystem=filesystem,
             )
-
-        # Can pass filesystem="fsspec"
-        df = dask_cudf.read_parquet(
-            path,
-            storage_options=s3so,
-            filesystem="fsspec",
-        )
         assert df.b.sum().compute() == 9
 
 
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index c64de06338f..76e47b50c3b 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -46,7 +46,7 @@ cudf = "dask_cudf.backends:CudfDXBackendEntrypoint"
 [project.optional-dependencies]
 test = [
     "dask-cuda==24.12.*,>=0.0.0a0",
-    "numba>=0.57",
+    "numba-cuda>=0.0.13",
     "pytest-cov",
     "pytest-xdist",
     "pytest<8",
@@ -119,7 +119,7 @@ filterwarnings = [
     "error::FutureWarning",
     "error::DeprecationWarning",
     # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
-    "ignore:.*datetime.*utcnow.*scheduled for removal:DeprecationWarning",
+    "ignore:.*datetime.*utcnow.*scheduled for removal:DeprecationWarning:botocore",
     "ignore:create_block_manager_from_blocks is deprecated and will be removed in a future version. Use public APIs instead.:DeprecationWarning",
     # https://github.com/dask/partd/blob/main/partd/pandas.py#L198
     "ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning",
diff --git a/python/pylibcudf/pylibcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/CMakeLists.txt
index f07c8897e34..a7cb66d7b16 100644
--- a/python/pylibcudf/pylibcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/CMakeLists.txt
@@ -31,6 +31,7 @@ set(cython_sources
     lists.pyx
     merge.pyx
     null_mask.pyx
+    partitioning.pyx
     quantiles.pyx
     reduce.pyx
     replace.pyx
@@ -44,6 +45,7 @@ set(cython_sources
     table.pyx
     traits.pyx
     transform.pyx
+    transpose.pyx
     types.pyx
     unary.pyx
     utils.pyx
diff --git a/python/pylibcudf/pylibcudf/__init__.pxd b/python/pylibcudf/pylibcudf/__init__.pxd
index b7cf6413c05..a384edd456d 100644
--- a/python/pylibcudf/pylibcudf/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/__init__.pxd
@@ -17,6 +17,7 @@ from . cimport (
     lists,
     merge,
     null_mask,
+    partitioning,
     quantiles,
     reduce,
     replace,
@@ -29,6 +30,7 @@ from . cimport (
     strings,
     traits,
     transform,
+    transpose,
     types,
     unary,
 )
@@ -60,6 +62,7 @@ __all__ = [
     "lists",
     "merge",
     "null_mask",
+    "partitioning",
     "quantiles",
     "reduce",
     "replace",
@@ -72,6 +75,7 @@ __all__ = [
     "sorting",
     "traits",
     "transform",
+    "transpose",
     "types",
     "unary",
 ]
diff --git a/python/pylibcudf/pylibcudf/__init__.py b/python/pylibcudf/pylibcudf/__init__.py
index 84b1c29f791..2a5365e8fad 100644
--- a/python/pylibcudf/pylibcudf/__init__.py
+++ b/python/pylibcudf/pylibcudf/__init__.py
@@ -28,6 +28,7 @@
     lists,
     merge,
     null_mask,
+    partitioning,
     quantiles,
     reduce,
     replace,
@@ -40,6 +41,7 @@
     strings,
     traits,
     transform,
+    transpose,
     types,
     unary,
 )
@@ -74,6 +76,7 @@
     "lists",
     "merge",
     "null_mask",
+    "partitioning",
     "quantiles",
     "reduce",
     "replace",
@@ -86,6 +89,7 @@
     "sorting",
     "traits",
     "transform",
+    "transpose",
     "types",
     "unary",
 ]
diff --git a/python/pylibcudf/pylibcudf/io/CMakeLists.txt b/python/pylibcudf/pylibcudf/io/CMakeLists.txt
index bcc2151f5b6..965724a47b1 100644
--- a/python/pylibcudf/pylibcudf/io/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/io/CMakeLists.txt
@@ -12,7 +12,9 @@
 # the License.
 # =============================================================================
 
-set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx parquet.pyx types.pyx)
+set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx orc.pyx parquet.pyx timezone.pyx
+                   types.pyx
+)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/io/__init__.pxd b/python/pylibcudf/pylibcudf/io/__init__.pxd
index 62820048584..1bcc0a3f963 100644
--- a/python/pylibcudf/pylibcudf/io/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/io/__init__.pxd
@@ -1,5 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 # CSV is removed since it is def not cpdef (to force kw-only arguments)
-from . cimport avro, datasource, json, parquet, types
+from . cimport avro, datasource, json, orc, parquet, timezone, types
 from .types cimport SourceInfo, TableWithMetadata
diff --git a/python/pylibcudf/pylibcudf/io/__init__.py b/python/pylibcudf/pylibcudf/io/__init__.py
index 27640f7d955..2e4f215b12c 100644
--- a/python/pylibcudf/pylibcudf/io/__init__.py
+++ b/python/pylibcudf/pylibcudf/io/__init__.py
@@ -1,4 +1,4 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . import avro, csv, datasource, json, parquet, types
+from . import avro, csv, datasource, json, orc, parquet, timezone, types
 from .types import SinkInfo, SourceInfo, TableWithMetadata
diff --git a/python/pylibcudf/pylibcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/io/orc.pxd
new file mode 100644
index 00000000000..b111d617b1b
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/orc.pxd
@@ -0,0 +1,50 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from libc.stdint cimport uint64_t
+from libcpp cimport bool
+from libcpp.optional cimport optional
+from libcpp.string cimport string
+from libcpp.vector cimport vector
+from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.io.orc_metadata cimport (
+    column_statistics,
+    parsed_orc_statistics,
+    statistics_type,
+)
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.types cimport DataType
+
+
+cpdef TableWithMetadata read_orc(
+    SourceInfo source_info,
+    list columns = *,
+    list stripes = *,
+    size_type skip_rows = *,
+    size_type nrows = *,
+    bool use_index = *,
+    bool use_np_dtypes = *,
+    DataType timestamp_type = *,
+    list decimal128_columns = *
+)
+
+cdef class OrcColumnStatistics:
+    cdef optional[uint64_t] number_of_values_c
+    cdef optional[bool] has_null_c
+    cdef statistics_type type_specific_stats_c
+    cdef dict column_stats
+
+    cdef void _init_stats_dict(self)
+
+    @staticmethod
+    cdef OrcColumnStatistics from_libcudf(column_statistics& col_stats)
+
+
+cdef class ParsedOrcStatistics:
+    cdef parsed_orc_statistics c_obj
+
+    @staticmethod
+    cdef ParsedOrcStatistics from_libcudf(parsed_orc_statistics& orc_stats)
+
+
+cpdef ParsedOrcStatistics read_parsed_orc_statistics(
+    SourceInfo source_info
+)
diff --git a/python/pylibcudf/pylibcudf/io/orc.pyx b/python/pylibcudf/pylibcudf/io/orc.pyx
new file mode 100644
index 00000000000..01a5e4b04a1
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/orc.pyx
@@ -0,0 +1,302 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from libcpp cimport bool
+from libcpp.string cimport string
+from libcpp.utility cimport move
+from libcpp.vector cimport vector
+
+import datetime
+
+from pylibcudf.io.types cimport SourceInfo, TableWithMetadata
+from pylibcudf.libcudf.io.orc cimport (
+    orc_reader_options,
+    read_orc as cpp_read_orc,
+)
+from pylibcudf.libcudf.io.orc_metadata cimport (
+    binary_statistics,
+    bucket_statistics,
+    column_statistics,
+    date_statistics,
+    decimal_statistics,
+    double_statistics,
+    integer_statistics,
+    no_statistics,
+    read_parsed_orc_statistics as cpp_read_parsed_orc_statistics,
+    statistics_type,
+    string_statistics,
+    timestamp_statistics,
+)
+from pylibcudf.libcudf.io.types cimport table_with_metadata
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.types cimport DataType
+from pylibcudf.variant cimport get_if, holds_alternative
+
+
+cdef class OrcColumnStatistics:
+    def __init__(self):
+        raise TypeError(
+            "OrcColumnStatistics should not be instantiated by users. If it is "
+            "being constructed in Cython from a preexisting libcudf object, "
+            "use `OrcColumnStatistics.from_libcudf` instead."
+        )
+
+    @property
+    def number_of_values(self):
+        if self.number_of_values_c.has_value():
+            return self.number_of_values_c.value()
+        return None
+
+    @property
+    def has_null(self):
+        if self.has_null_c.has_value():
+            return self.has_null_c.value()
+        return None
+
+    cdef void _init_stats_dict(self):
+        # Initialize stats to return and parse stats blob
+        self.column_stats = {}
+
+        cdef statistics_type type_specific_stats = self.type_specific_stats_c
+
+        cdef integer_statistics* int_stats
+        cdef double_statistics* dbl_stats
+        cdef string_statistics* str_stats
+        cdef bucket_statistics* bucket_stats
+        cdef decimal_statistics* dec_stats
+        cdef date_statistics* date_stats
+        cdef binary_statistics* bin_stats
+        cdef timestamp_statistics* ts_stats
+
+        if holds_alternative[no_statistics](type_specific_stats):
+            pass
+        elif int_stats := get_if[integer_statistics](&type_specific_stats):
+            if int_stats.minimum.has_value():
+                self.column_stats["minimum"] = int_stats.minimum.value()
+            else:
+                self.column_stats["minimum"] = None
+            if int_stats.maximum.has_value():
+                self.column_stats["maximum"] = int_stats.maximum.value()
+            else:
+                self.column_stats["maximum"] = None
+            if int_stats.sum.has_value():
+                self.column_stats["sum"] = int_stats.sum.value()
+            else:
+                self.column_stats["sum"] = None
+        elif dbl_stats := get_if[double_statistics](&type_specific_stats):
+            if dbl_stats.minimum.has_value():
+                self.column_stats["minimum"] = dbl_stats.minimum.value()
+            else:
+                self.column_stats["minimum"] = None
+            if dbl_stats.maximum.has_value():
+                self.column_stats["maximum"] = dbl_stats.maximum.value()
+            else:
+                self.column_stats["maximum"] = None
+            if dbl_stats.sum.has_value():
+                self.column_stats["sum"] = dbl_stats.sum.value()
+            else:
+                self.column_stats["sum"] = None
+        elif str_stats := get_if[string_statistics](&type_specific_stats):
+            if str_stats.minimum.has_value():
+                self.column_stats["minimum"] = str_stats.minimum.value().decode("utf-8")
+            else:
+                self.column_stats["minimum"] = None
+            if str_stats.maximum.has_value():
+                self.column_stats["maximum"] = str_stats.maximum.value().decode("utf-8")
+            else:
+                self.column_stats["maximum"] = None
+            if str_stats.sum.has_value():
+                self.column_stats["sum"] = str_stats.sum.value()
+            else:
+                self.column_stats["sum"] = None
+        elif bucket_stats := get_if[bucket_statistics](&type_specific_stats):
+            self.column_stats["true_count"] = bucket_stats.count[0]
+            self.column_stats["false_count"] = (
+                self.number_of_values
+                - self.column_stats["true_count"]
+            )
+        elif dec_stats := get_if[decimal_statistics](&type_specific_stats):
+            if dec_stats.minimum.has_value():
+                self.column_stats["minimum"] = dec_stats.minimum.value().decode("utf-8")
+            else:
+                self.column_stats["minimum"] = None
+            if dec_stats.maximum.has_value():
+                self.column_stats["maximum"] = dec_stats.maximum.value().decode("utf-8")
+            else:
+                self.column_stats["maximum"] = None
+            if dec_stats.sum.has_value():
+                self.column_stats["sum"] = dec_stats.sum.value().decode("utf-8")
+            else:
+                self.column_stats["sum"] = None
+        elif date_stats := get_if[date_statistics](&type_specific_stats):
+            if date_stats.minimum.has_value():
+                self.column_stats["minimum"] = datetime.datetime.fromtimestamp(
+                    datetime.timedelta(date_stats.minimum.value()).total_seconds(),
+                    datetime.timezone.utc,
+                )
+            else:
+                self.column_stats["minimum"] = None
+            if date_stats.maximum.has_value():
+                self.column_stats["maximum"] = datetime.datetime.fromtimestamp(
+                    datetime.timedelta(date_stats.maximum.value()).total_seconds(),
+                    datetime.timezone.utc,
+                )
+            else:
+                self.column_stats["maximum"] = None
+        elif bin_stats := get_if[binary_statistics](&type_specific_stats):
+            if bin_stats.sum.has_value():
+                self.column_stats["sum"] = bin_stats.sum.value()
+            else:
+                self.column_stats["sum"] = None
+        elif ts_stats := get_if[timestamp_statistics](&type_specific_stats):
+            # Before ORC-135, the local timezone offset was included and they were
+            # stored as minimum and maximum. After ORC-135, the timestamp is
+            # adjusted to UTC before being converted to milliseconds and stored
+            # in minimumUtc and maximumUtc.
+            # TODO: Support minimum and maximum by reading writer's local timezone
+            if ts_stats.minimum_utc.has_value() and ts_stats.maximum_utc.has_value():
+                self.column_stats["minimum"] = datetime.datetime.fromtimestamp(
+                    ts_stats.minimum_utc.value() / 1000, datetime.timezone.utc
+                )
+                self.column_stats["maximum"] = datetime.datetime.fromtimestamp(
+                    ts_stats.maximum_utc.value() / 1000, datetime.timezone.utc
+                )
+        else:
+            raise ValueError("Unsupported statistics type")
+
+    def __getitem__(self, item):
+        return self.column_stats[item]
+
+    def __contains__(self, item):
+        return item in self.column_stats
+
+    def get(self, item, default=None):
+        return self.column_stats.get(item, default)
+
+    @staticmethod
+    cdef OrcColumnStatistics from_libcudf(column_statistics& col_stats):
+        cdef OrcColumnStatistics out = OrcColumnStatistics.__new__(OrcColumnStatistics)
+        out.number_of_values_c = col_stats.number_of_values
+        out.has_null_c = col_stats.has_null
+        out.type_specific_stats_c = col_stats.type_specific_stats
+        out._init_stats_dict()
+        return out
+
+
+cdef class ParsedOrcStatistics:
+
+    @property
+    def column_names(self):
+        return [name.decode() for name in self.c_obj.column_names]
+
+    @property
+    def file_stats(self):
+        return [
+            OrcColumnStatistics.from_libcudf(self.c_obj.file_stats[i])
+            for i in range(self.c_obj.file_stats.size())
+        ]
+
+    @property
+    def stripes_stats(self):
+        return [
+            [
+                OrcColumnStatistics.from_libcudf(stripe_stats_c[i])
+                for i in range(stripe_stats_c.size())
+            ]
+            for stripe_stats_c in self.c_obj.stripes_stats
+        ]
+
+    @staticmethod
+    cdef ParsedOrcStatistics from_libcudf(parsed_orc_statistics& orc_stats):
+        cdef ParsedOrcStatistics out = ParsedOrcStatistics.__new__(ParsedOrcStatistics)
+        out.c_obj = move(orc_stats)
+        return out
+
+
+cpdef TableWithMetadata read_orc(
+    SourceInfo source_info,
+    list columns = None,
+    list stripes = None,
+    size_type skip_rows = 0,
+    size_type nrows = -1,
+    bool use_index = True,
+    bool use_np_dtypes = True,
+    DataType timestamp_type = None,
+    list decimal128_columns = None,
+):
+    """Reads an ORC file into a :py:class:`~.types.TableWithMetadata`.
+
+    Parameters
+    ----------
+    source_info : SourceInfo
+        The SourceInfo object to read the Parquet file from.
+    columns : list, default None
+        The string names of the columns to be read.
+    stripes : list[list[size_type]], default None
+        List of stripes to be read.
+    skip_rows : int64_t, default 0
+        The number of rows to skip from the start of the file.
+    nrows : size_type, default -1
+        The number of rows to read. By default, read the entire file.
+    use_index : bool, default True
+        Whether to use the row index to speed up reading.
+    use_np_dtypes : bool, default True
+        Whether to use numpy compatible dtypes.
+    timestamp_type : DataType, default None
+        The timestamp type to use for the timestamp columns.
+    decimal128_columns : list, default None
+        List of column names to be read as 128-bit decimals.
+
+    Returns
+    -------
+    TableWithMetadata
+        The Table and its corresponding metadata (column names) that were read in.
+    """
+    cdef orc_reader_options opts
+    cdef vector[vector[size_type]] c_stripes
+    opts = move(
+        orc_reader_options.builder(source_info.c_obj)
+        .use_index(use_index)
+        .build()
+    )
+    if nrows >= 0:
+        opts.set_num_rows(nrows)
+    if skip_rows >= 0:
+        opts.set_skip_rows(skip_rows)
+    if stripes is not None:
+        c_stripes = stripes
+        opts.set_stripes(c_stripes)
+    if timestamp_type is not None:
+        opts.set_timestamp_type(timestamp_type.c_obj)
+
+    cdef vector[string] c_decimal128_columns
+    if decimal128_columns is not None and len(decimal128_columns) > 0:
+        c_decimal128_columns.reserve(len(decimal128_columns))
+        for col in decimal128_columns:
+            if not isinstance(col, str):
+                raise TypeError("Decimal 128 column names must be strings!")
+            c_decimal128_columns.push_back(col.encode())
+        opts.set_decimal128_columns(c_decimal128_columns)
+
+    cdef vector[string] c_column_names
+    if columns is not None and len(columns) > 0:
+        c_column_names.reserve(len(columns))
+        for col in columns:
+            if not isinstance(col, str):
+                raise TypeError("Column names must be strings!")
+            c_column_names.push_back(col.encode())
+        opts.set_columns(c_column_names)
+
+    cdef table_with_metadata c_result
+
+    with nogil:
+        c_result = move(cpp_read_orc(opts))
+
+    return TableWithMetadata.from_libcudf(c_result)
+
+
+cpdef ParsedOrcStatistics read_parsed_orc_statistics(
+    SourceInfo source_info
+):
+    cdef parsed_orc_statistics parsed = (
+        cpp_read_parsed_orc_statistics(source_info.c_obj)
+    )
+    return ParsedOrcStatistics.from_libcudf(parsed)
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/io/timezone.pxd
new file mode 100644
index 00000000000..2aa755dbbd8
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/timezone.pxd
@@ -0,0 +1,6 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from ..table cimport Table
+
+
+cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name)
diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx
new file mode 100644
index 00000000000..e02239d7252
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/io/timezone.pyx
@@ -0,0 +1,43 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.optional cimport make_optional
+from libcpp.string cimport string
+from libcpp.utility cimport move
+from pylibcudf.libcudf.io.timezone cimport (
+    make_timezone_transition_table as cpp_make_timezone_transition_table,
+)
+from pylibcudf.libcudf.table.table cimport table
+
+from ..table cimport Table
+
+
+cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name):
+    """
+    Creates a transition table to convert ORC timestamps to UTC.
+
+    Parameters
+    ----------
+    tzif_dir : str
+        The directory where the TZif files are located
+    timezone_name : str
+        standard timezone name
+
+    Returns
+    -------
+    Table
+        The transition table for the given timezone.
+    """
+    cdef unique_ptr[table] c_result
+    cdef string c_tzdir = tzif_dir.encode()
+    cdef string c_tzname = timezone_name.encode()
+
+    with nogil:
+        c_result = move(
+            cpp_make_timezone_transition_table(
+                make_optional[string](c_tzdir),
+                c_tzname
+            )
+        )
+
+    return Table.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/io/types.pyx b/python/pylibcudf/pylibcudf/io/types.pyx
index 1600a805b37..563a02761da 100644
--- a/python/pylibcudf/pylibcudf/io/types.pyx
+++ b/python/pylibcudf/pylibcudf/io/types.pyx
@@ -130,6 +130,7 @@ cdef class TableWithMetadata:
         """
         return self.metadata.per_file_user_data
 
+
 cdef class SourceInfo:
     """A class containing details on a source to read from.
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
index e4a09b8feb2..dca24c7f665 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/orc.pxd
@@ -35,6 +35,7 @@ cdef extern from "cudf/io/orc.hpp" \
         void enable_use_index(bool val) except +
         void enable_use_np_dtypes(bool val) except +
         void set_timestamp_type(data_type type) except +
+        void set_decimal128_columns(vector[string] val) except +
 
         @staticmethod
         orc_reader_options_builder builder(
diff --git a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
index db6cb0cdfa5..9302ffe2f80 100644
--- a/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/io/orc_metadata.pxd
@@ -1,11 +1,11 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-cimport pylibcudf.libcudf.io.types as cudf_io_types
 from libc.stdint cimport int32_t, int64_t, uint32_t, uint64_t
 from libcpp cimport bool
 from libcpp.optional cimport optional
 from libcpp.string cimport string
 from libcpp.vector cimport vector
+from pylibcudf.libcudf.io cimport types as cudf_io_types
 from pylibcudf.variant cimport monostate, variant
 
 
diff --git a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
index 1ea10e8a194..89bddbffab5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/partitioning.pxd
@@ -25,3 +25,10 @@ cdef extern from "cudf/partitioning.hpp" namespace "cudf" nogil:
         const column_view& partition_map,
         int num_partitions
     ) except +
+
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] \
+        round_robin_partition "cudf::round_robin_partition" (
+        const table_view& input,
+        int num_partitions,
+        int start_partition
+    ) except +
diff --git a/python/pylibcudf/pylibcudf/partitioning.pxd b/python/pylibcudf/pylibcudf/partitioning.pxd
new file mode 100644
index 00000000000..aad60149fc4
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/partitioning.pxd
@@ -0,0 +1,19 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from .column cimport Column
+from .table cimport Table
+
+
+cpdef tuple[Table, list] hash_partition(
+    Table input,
+    list columns_to_hash,
+    int num_partitions
+)
+
+cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partitions)
+
+cpdef tuple[Table, list] round_robin_partition(
+    Table input,
+    int num_partitions,
+    int start_partition=*
+)
diff --git a/python/pylibcudf/pylibcudf/partitioning.pyx b/python/pylibcudf/pylibcudf/partitioning.pyx
new file mode 100644
index 00000000000..8fa70daab5a
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/partitioning.pyx
@@ -0,0 +1,120 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+cimport pylibcudf.libcudf.types as libcudf_types
+from libcpp.memory cimport unique_ptr
+from libcpp.pair cimport pair
+from libcpp.utility cimport move
+from libcpp.vector cimport vector
+from pylibcudf.libcudf cimport partitioning as cpp_partitioning
+from pylibcudf.libcudf.table.table cimport table
+
+from .column cimport Column
+from .table cimport Table
+
+
+cpdef tuple[Table, list] hash_partition(
+    Table input,
+    list columns_to_hash,
+    int num_partitions
+):
+    """
+    Partitions rows from the input table into multiple output tables.
+
+    For details, see :cpp:func:`hash_partition`.
+
+    Parameters
+    ----------
+    input : Table
+        The table to partition
+    columns_to_hash : list[int]
+        Indices of input columns to hash
+    num_partitions : int
+        The number of partitions to use
+
+    Returns
+    -------
+    tuple[Table, list[int]]
+        An output table and a vector of row offsets to each partition
+    """
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+    cdef vector[libcudf_types.size_type] c_columns_to_hash = columns_to_hash
+    cdef int c_num_partitions = num_partitions
+
+    with nogil:
+        c_result = move(
+            cpp_partitioning.hash_partition(
+                input.view(), c_columns_to_hash, c_num_partitions
+            )
+        )
+
+    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
+
+cpdef tuple[Table, list] partition(Table t, Column partition_map, int num_partitions):
+    """
+    Partitions rows of `t` according to the mapping specified by `partition_map`.
+
+    For details, see :cpp:func:`partition`.
+
+    Parameters
+    ----------
+    t : Table
+        The table to partition
+    partition_map : Column
+        Non-nullable column of integer values that map each row
+        in `t` to it's partition.
+    num_partitions : int
+        The total number of partitions
+
+    Returns
+    -------
+    tuple[Table, list[int]]
+        An output table and a list of row offsets to each partition
+    """
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+    cdef int c_num_partitions = num_partitions
+
+    with nogil:
+        c_result = move(
+            cpp_partitioning.partition(t.view(), partition_map.view(), c_num_partitions)
+        )
+
+    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
+
+
+cpdef tuple[Table, list] round_robin_partition(
+    Table input,
+    int num_partitions,
+    int start_partition=0
+):
+    """
+    Round-robin partition.
+
+    For details, see :cpp:func:`round_robin_partition`.
+
+    Parameters
+    ----------
+    input : Table
+        The input table to be round-robin partitioned
+    num_partitions : int
+        Number of partitions for the table
+    start_partition : int, default 0
+        Index of the 1st partition
+
+    Returns
+    -------
+    tuple[Table, list[int]]
+        The partitioned table and the partition offsets
+        for each partition within the table.
+    """
+    cdef pair[unique_ptr[table], vector[libcudf_types.size_type]] c_result
+    cdef int c_num_partitions = num_partitions
+    cdef int c_start_partition = start_partition
+
+    with nogil:
+        c_result = move(
+            cpp_partitioning.round_robin_partition(
+                input.view(), c_num_partitions, c_start_partition
+            )
+        )
+
+    return Table.from_libcudf(move(c_result.first)), list(c_result.second)
diff --git a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
index 77f20b0b917..142bc124ca2 100644
--- a/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/strings/CMakeLists.txt
@@ -13,8 +13,21 @@
 # =============================================================================
 
 set(cython_sources
-    capitalize.pyx case.pyx char_types.pyx contains.pyx extract.pyx find.pyx findall.pyx
-    regex_flags.pyx regex_program.pyx repeat.pyx replace.pyx side_type.pyx slice.pyx strip.pyx
+    attributes.pyx
+    capitalize.pyx
+    case.pyx
+    char_types.pyx
+    contains.pyx
+    extract.pyx
+    find.pyx
+    findall.pyx
+    regex_flags.pyx
+    regex_program.pyx
+    repeat.pyx
+    replace.pyx
+    side_type.pyx
+    slice.pyx
+    strip.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.pxd b/python/pylibcudf/pylibcudf/strings/__init__.pxd
index 91d884b294b..d8afccc7336 100644
--- a/python/pylibcudf/pylibcudf/strings/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/strings/__init__.pxd
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from . cimport (
+    attributes,
     capitalize,
     case,
     char_types,
@@ -16,3 +17,21 @@ from . cimport (
     strip,
 )
 from .side_type cimport side_type
+
+__all__ = [
+    "attributes",
+    "capitalize",
+    "case",
+    "char_types",
+    "contains",
+    "convert",
+    "extract",
+    "find",
+    "findall",
+    "regex_flags",
+    "regex_program",
+    "replace",
+    "slice",
+    "strip",
+    "side_type",
+]
diff --git a/python/pylibcudf/pylibcudf/strings/__init__.py b/python/pylibcudf/pylibcudf/strings/__init__.py
index b4856784390..22452812e42 100644
--- a/python/pylibcudf/pylibcudf/strings/__init__.py
+++ b/python/pylibcudf/pylibcudf/strings/__init__.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 from . import (
+    attributes,
     capitalize,
     case,
     char_types,
@@ -17,3 +18,21 @@
     strip,
 )
 from .side_type import SideType
+
+__all__ = [
+    "attributes",
+    "capitalize",
+    "case",
+    "char_types",
+    "contains",
+    "convert",
+    "extract",
+    "find",
+    "findall",
+    "regex_flags",
+    "regex_program",
+    "replace",
+    "slice",
+    "strip",
+    "SideType",
+]
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pxd b/python/pylibcudf/pylibcudf/strings/attributes.pxd
new file mode 100644
index 00000000000..27398766924
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pxd
@@ -0,0 +1,10 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+
+
+cpdef Column count_characters(Column source_strings)
+
+cpdef Column count_bytes(Column source_strings)
+
+cpdef Column code_points(Column source_strings)
diff --git a/python/pylibcudf/pylibcudf/strings/attributes.pyx b/python/pylibcudf/pylibcudf/strings/attributes.pyx
new file mode 100644
index 00000000000..36bee7bd1d9
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/strings/attributes.pyx
@@ -0,0 +1,76 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.strings cimport attributes as cpp_attributes
+
+
+cpdef Column count_characters(Column source_strings):
+    """
+    Returns a column containing character lengths of each string
+    in the given column.
+
+    Parameters
+    ----------
+    source_strings : Column
+        Column of strings.
+
+    Returns
+    -------
+    Column
+        New column with lengths for each string
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_attributes.count_characters(source_strings.view()))
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column count_bytes(Column source_strings):
+    """
+    Returns a column containing byte lengths of each string
+    in the given column.
+
+    Parameters
+    ----------
+    source_strings : Column
+        Column of strings.
+
+    Returns
+    -------
+    Column
+        New column with the number of bytes for each string
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_attributes.count_bytes(source_strings.view()))
+
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column code_points(Column source_strings):
+    """
+    Creates a numeric column with code point values (integers)
+    for each character of each string.
+
+    Parameters
+    ----------
+    source_strings : Column
+        Column of strings.
+
+    Returns
+    -------
+    Column
+        New column with code point integer values for each character
+    """
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(cpp_attributes.code_points(source_strings.view()))
+
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/common/utils.py b/python/pylibcudf/pylibcudf/tests/common/utils.py
index babe6634318..9f389fa42c4 100644
--- a/python/pylibcudf/pylibcudf/tests/common/utils.py
+++ b/python/pylibcudf/pylibcudf/tests/common/utils.py
@@ -9,6 +9,7 @@
 import pyarrow.compute as pc
 import pylibcudf as plc
 import pytest
+from pyarrow.orc import write_table as orc_write_table
 from pyarrow.parquet import write_table as pq_write_table
 from pylibcudf.io.types import CompressionType
 
@@ -242,13 +243,21 @@ def is_nested_list(typ):
     return nesting_level(typ)[0] > 1
 
 
-def _convert_numeric_types_to_floating(pa_table):
+def _convert_types(pa_table, input_pred, result_type):
     """
-    Useful little helper for testing the
-    dtypes option in I/O readers.
+    Useful little helper for testing the dtypes option in I/O readers.
 
-    Returns a tuple containing the pylibcudf dtypes
-    and the new pyarrow schema
+    Returns a tuple containing the pylibcudf dtypes and the new pyarrow schema based on
+    the data in the table.
+
+    Parameters
+    ----------
+    pa_table : pyarrow.Table
+        The table from which to extract the dtypes
+    input_pred : function
+        Predicate that evaluates to true for types to replace
+    result_type : pa.DataType
+        The type to cast to
     """
     dtypes = []
     new_fields = []
@@ -257,11 +266,9 @@ def _convert_numeric_types_to_floating(pa_table):
         child_types = []
 
         plc_type = plc.interop.from_arrow(field.type)
-        if pa.types.is_integer(field.type) or pa.types.is_unsigned_integer(
-            field.type
-        ):
-            plc_type = plc.interop.from_arrow(pa.float64())
-            field = field.with_type(pa.float64())
+        if input_pred(field.type):
+            plc_type = plc.interop.from_arrow(result_type)
+            field = field.with_type(result_type)
 
         dtypes.append((field.name, plc_type, child_types))
 
@@ -332,6 +339,16 @@ def make_source(path_or_buf, pa_table, format, **kwargs):
             if isinstance(path_or_buf, io.IOBase)
             else path_or_buf,
         )
+    elif format == "orc":
+        # The conversion to pandas is lossy (doesn't preserve
+        # nested types) so we
+        # will just use pyarrow directly to write this
+        orc_write_table(
+            pa_table,
+            pa.PythonFile(path_or_buf)
+            if isinstance(path_or_buf, io.IOBase)
+            else path_or_buf,
+        )
     if isinstance(path_or_buf, io.IOBase):
         path_or_buf.seek(0)
     return path_or_buf
diff --git a/python/pylibcudf/pylibcudf/tests/io/test_csv.py b/python/pylibcudf/pylibcudf/tests/io/test_csv.py
index ccd7eef54f3..ab26f23418d 100644
--- a/python/pylibcudf/pylibcudf/tests/io/test_csv.py
+++ b/python/pylibcudf/pylibcudf/tests/io/test_csv.py
@@ -9,7 +9,7 @@
 import pytest
 from pylibcudf.io.types import CompressionType
 from utils import (
-    _convert_numeric_types_to_floating,
+    _convert_types,
     assert_table_and_meta_eq,
     make_source,
     write_source_str,
@@ -148,7 +148,11 @@ def test_read_csv_dtypes(csv_table_data, source_or_sink, usecols):
     if usecols is not None:
         pa_table = pa_table.select(usecols)
 
-    dtypes, new_fields = _convert_numeric_types_to_floating(pa_table)
+    dtypes, new_fields = _convert_types(
+        pa_table,
+        lambda t: (pa.types.is_unsigned_integer(t) or pa.types.is_integer(t)),
+        pa.float64(),
+    )
     # Extract the dtype out of the (name, type, child_types) tuple
     # (read_csv doesn't support this format since it doesn't support nested columns)
     dtypes = {name: dtype for name, dtype, _ in dtypes}
diff --git a/python/pylibcudf/pylibcudf/tests/io/test_orc.py b/python/pylibcudf/pylibcudf/tests/io/test_orc.py
new file mode 100644
index 00000000000..42b14b1feff
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/io/test_orc.py
@@ -0,0 +1,53 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import _convert_types, assert_table_and_meta_eq, make_source
+
+# Shared kwargs to pass to make_source
+_COMMON_ORC_SOURCE_KWARGS = {"format": "orc"}
+
+
+@pytest.mark.parametrize("columns", [None, ["col_int64", "col_bool"]])
+def test_read_orc_basic(
+    table_data, binary_source_or_sink, nrows_skiprows, columns
+):
+    _, pa_table = table_data
+    nrows, skiprows = nrows_skiprows
+
+    # ORC reader doesn't support skip_rows for nested columns
+    if skiprows > 0:
+        colnames_to_drop = []
+        for i in range(len(pa_table.schema)):
+            field = pa_table.schema.field(i)
+
+            if pa.types.is_nested(field.type):
+                colnames_to_drop.append(field.name)
+        pa_table = pa_table.drop(colnames_to_drop)
+    # ORC doesn't support unsigned ints
+    # let's cast to int64
+    _, new_fields = _convert_types(
+        pa_table, pa.types.is_unsigned_integer, pa.int64()
+    )
+    pa_table = pa_table.cast(pa.schema(new_fields))
+
+    source = make_source(
+        binary_source_or_sink, pa_table, **_COMMON_ORC_SOURCE_KWARGS
+    )
+
+    res = plc.io.orc.read_orc(
+        plc.io.SourceInfo([source]),
+        nrows=nrows,
+        skip_rows=skiprows,
+        columns=columns,
+    )
+
+    if columns is not None:
+        pa_table = pa_table.select(columns)
+
+    # Adapt to nrows/skiprows
+    pa_table = pa_table.slice(
+        offset=skiprows, length=nrows if nrows != -1 else None
+    )
+
+    assert_table_and_meta_eq(pa_table, res, check_field_nullability=False)
diff --git a/python/pylibcudf/pylibcudf/tests/io/test_timezone.py b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py
new file mode 100644
index 00000000000..76b0424b2af
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py
@@ -0,0 +1,16 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+import zoneinfo
+
+import pylibcudf as plc
+import pytest
+
+
+def test_make_timezone_transition_table():
+    if len(zoneinfo.TZPATH) == 0:
+        pytest.skip("No TZPATH available.")
+    tz_path = zoneinfo.TZPATH[0]
+    result = plc.io.timezone.make_timezone_transition_table(
+        tz_path, "America/Los_Angeles"
+    )
+    assert isinstance(result, plc.Table)
+    assert result.num_rows() > 0
diff --git a/python/pylibcudf/pylibcudf/tests/test_partitioning.py b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
new file mode 100644
index 00000000000..444d0089d2c
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_partitioning.py
@@ -0,0 +1,55 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_table_eq
+
+
+@pytest.fixture(scope="module")
+def partitioning_data():
+    data = {"a": [1, 2, 3], "b": [1, 2, 5], "c": [1, 2, 10]}
+    pa_table = pa.table(data)
+    plc_table = plc.interop.from_arrow(pa_table)
+    return data, plc_table, pa_table
+
+
+def test_partition(partitioning_data):
+    raw_data, plc_table, pa_table = partitioning_data
+    result, result_offsets = plc.partitioning.partition(
+        plc_table,
+        plc.interop.from_arrow(pa.array([0, 0, 0])),
+        1,
+    )
+    expected = pa.table(
+        list(raw_data.values()),
+        schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
+    )
+    assert_table_eq(expected, result)
+    assert result_offsets == [0, 3]
+
+
+def test_hash_partition(partitioning_data):
+    raw_data, plc_table, pa_table = partitioning_data
+    result, result_offsets = plc.partitioning.hash_partition(
+        plc_table, [0, 1], 1
+    )
+    expected = pa.table(
+        list(raw_data.values()),
+        schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
+    )
+    assert_table_eq(expected, result)
+    assert result_offsets == [0]
+
+
+def test_round_robin_partition(partitioning_data):
+    raw_data, plc_table, pa_table = partitioning_data
+    result, result_offsets = plc.partitioning.round_robin_partition(
+        plc_table, 1, 0
+    )
+    expected = pa.table(
+        list(raw_data.values()),
+        schema=pa.schema([pa.field("", pa.int64(), nullable=False)] * 3),
+    )
+    assert_table_eq(expected, result)
+    assert result_offsets == [0]
diff --git a/python/pylibcudf/pylibcudf/tests/test_string_attributes.py b/python/pylibcudf/pylibcudf/tests/test_string_attributes.py
new file mode 100644
index 00000000000..a1820def0b1
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_string_attributes.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pyarrow.compute as pc
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture()
+def str_data():
+    pa_data = pa.array(["A", None])
+    return pa_data, plc.interop.from_arrow(pa_data)
+
+
+def test_count_characters(str_data):
+    result = plc.strings.attributes.count_characters(str_data[1])
+    expected = pc.utf8_length(str_data[0])
+    assert_column_eq(expected, result)
+
+
+def test_count_bytes(str_data):
+    result = plc.strings.attributes.count_characters(str_data[1])
+    expected = pc.binary_length(str_data[0])
+    assert_column_eq(expected, result)
+
+
+def test_code_points(str_data):
+    result = plc.strings.attributes.code_points(str_data[1])
+    exp_data = [ord(str_data[0].to_pylist()[0])]
+    expected = pa.chunked_array([exp_data], type=pa.int32())
+    assert_column_eq(expected, result)
diff --git a/python/pylibcudf/pylibcudf/tests/test_transpose.py b/python/pylibcudf/pylibcudf/tests/test_transpose.py
new file mode 100644
index 00000000000..ac11123f680
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_transpose.py
@@ -0,0 +1,32 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from packaging.version import parse
+
+
+@pytest.mark.skipif(
+    parse(pa.__version__) < parse("16.0.0"),
+    reason="https://github.com/apache/arrow/pull/40070",
+)
+@pytest.mark.parametrize(
+    "arr",
+    [
+        [],
+        [1, 2, 3],
+        [1, 2],
+        [1],
+    ],
+)
+def test_transpose(arr):
+    data = {"a": arr, "b": arr}
+    arrow_tbl = pa.table(data)
+    plc_tbl = plc.interop.from_arrow(arrow_tbl)
+    plc_result = plc.transpose.transpose(plc_tbl)
+    result = plc.interop.to_arrow(plc_result)
+    expected = pa.Table.from_pandas(
+        arrow_tbl.to_pandas().T, preserve_index=False
+    ).rename_columns([""] * len(arr))
+    expected = pa.table(expected, schema=result.schema)
+    assert result.equals(expected)
diff --git a/python/pylibcudf/pylibcudf/transpose.pxd b/python/pylibcudf/pylibcudf/transpose.pxd
new file mode 100644
index 00000000000..7b5a7676b49
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/transpose.pxd
@@ -0,0 +1,5 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from .table cimport Table
+
+
+cpdef Table transpose(Table input_table)
diff --git a/python/pylibcudf/pylibcudf/transpose.pyx b/python/pylibcudf/pylibcudf/transpose.pyx
new file mode 100644
index 00000000000..a708f6cc37f
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/transpose.pyx
@@ -0,0 +1,38 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from libcpp.memory cimport unique_ptr
+from libcpp.pair cimport pair
+from libcpp.utility cimport move
+from pylibcudf.libcudf cimport transpose as cpp_transpose
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.table.table_view cimport table_view
+
+from .column cimport Column
+from .table cimport Table
+
+
+cpdef Table transpose(Table input_table):
+    """Transpose a Table.
+
+    For details, see :cpp:func:`transpose`.
+
+    Parameters
+    ----------
+    input_table : Table
+        Table to transpose
+
+    Returns
+    -------
+    Table
+        Transposed table.
+    """
+    cdef pair[unique_ptr[column], table_view] c_result
+    cdef Table owner_table
+
+    with nogil:
+        c_result = move(cpp_transpose.transpose(input_table.view()))
+
+    owner_table = Table(
+        [Column.from_libcudf(move(c_result.first))] * c_result.second.num_columns()
+    )
+
+    return Table.from_table_view(c_result.second, owner_table)