diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
index a35802f2ab0..ceee9074b93 100644
--- a/.github/workflows/pr.yaml
+++ b/.github/workflows/pr.yaml
@@ -25,7 +25,8 @@ jobs:
       - docs-build
       - wheel-build-cudf
       - wheel-tests-cudf
-      - test-cudf-polars
+      - wheel-build-cudf-polars
+      - wheel-tests-cudf-polars
       - wheel-build-dask-cudf
       - wheel-tests-dask-cudf
       - devcontainer
@@ -133,9 +134,18 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_cudf.sh
-  test-cudf-polars:
+  wheel-build-cudf-polars:
     needs: wheel-build-cudf
     secrets: inherit
+    uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.08
+    with:
+      # This selects "ARCH=amd64 + the latest supported Python + CUDA".
+      matrix_filter: map(select(.ARCH == "amd64")) | group_by(.CUDA_VER|split(".")|map(tonumber)|.[0]) | map(max_by([(.PY_VER|split(".")|map(tonumber)), (.CUDA_VER|split(".")|map(tonumber))]))
+      build_type: pull-request
+      script: "ci/build_wheel_cudf_polars.sh"
+  wheel-tests-cudf-polars:
+    needs: wheel-build-cudf-polars
+    secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.08
     with:
       # This selects "ARCH=amd64 + the latest supported Python + CUDA".
@@ -143,7 +153,7 @@ jobs:
       build_type: pull-request
       # This always runs, but only fails if this PR touches code in
       # pylibcudf or cudf_polars
-      script: "ci/test_cudf_polars.sh"
+      script: "ci/test_wheel_cudf_polars.sh"
   wheel-build-dask-cudf:
     needs: wheel-build-cudf
     secrets: inherit
diff --git a/ci/build_wheel_cudf_polars.sh b/ci/build_wheel_cudf_polars.sh
new file mode 100755
index 00000000000..9c945e11c00
--- /dev/null
+++ b/ci/build_wheel_cudf_polars.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+package_dir="python/cudf_polars"
+
+./ci/build_wheel.sh ${package_dir}
+
+RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
+RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 ${package_dir}/dist
diff --git a/ci/run_cudf_polars_pytests.sh b/ci/run_cudf_polars_pytests.sh
new file mode 100755
index 00000000000..c10612a065a
--- /dev/null
+++ b/ci/run_cudf_polars_pytests.sh
@@ -0,0 +1,11 @@
+#!/bin/bash
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+set -euo pipefail
+
+# It is essential to cd into python/cudf_polars as `pytest-xdist` + `coverage` seem to work only at this directory level.
+
+# Support invoking run_cudf_polars_pytests.sh outside the script directory
+cd "$(dirname "$(realpath "${BASH_SOURCE[0]}")")"/../python/cudf_polars/
+
+python -m pytest --cache-clear "$@" tests
diff --git a/ci/test_cudf_polars.sh b/ci/test_wheel_cudf_polars.sh
similarity index 67%
rename from ci/test_cudf_polars.sh
rename to ci/test_wheel_cudf_polars.sh
index 95fb4b431bf..900acd5d473 100755
--- a/ci/test_cudf_polars.sh
+++ b/ci/test_wheel_cudf_polars.sh
@@ -18,19 +18,14 @@ else
 fi
 
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
-RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="cudf_polars_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
 
-RESULTS_DIR=${RAPIDS_TESTS_DIR:-"$(mktemp -d)"}
-RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${RESULTS_DIR}/test-results"}/
-mkdir -p "${RAPIDS_TESTS_DIR}"
-
-rapids-logger "Install cudf wheel"
-# echo to expand wildcard before adding `[extra]` requires for pip
-python -m pip install $(echo ./dist/cudf*.whl)[test]
+# Download the cudf built in the previous step
+RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
+python -m pip install ./local-cudf-dep/cudf*.whl
 
 rapids-logger "Install cudf_polars"
-python -m pip install 'polars>=1.0'
-python -m pip install --no-deps python/cudf_polars
+python -m pip install $(echo ./dist/cudf_polars*.whl)[test]
 
 rapids-logger "Run cudf_polars tests"
 
@@ -42,13 +37,11 @@ EXITCODE=0
 trap set_exitcode ERR
 set +e
 
-python -m pytest \
-       --cache-clear \
+./ci/run_cudf_polars_pytests.sh \
        --cov cudf_polars \
        --cov-fail-under=100 \
-       --cov-config=python/cudf_polars/pyproject.toml \
-       --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf_polars.xml" \
-       python/cudf_polars/tests
+       --cov-config=./pyproject.toml \
+       --junitxml="${RAPIDS_TESTS_DIR}/junit-cudf-polars.xml"
 
 trap ERR
 set -e
diff --git a/ci/test_wheel_dask_cudf.sh b/ci/test_wheel_dask_cudf.sh
index 2b20b9d9ce4..c3800d3cc25 100755
--- a/ci/test_wheel_dask_cudf.sh
+++ b/ci/test_wheel_dask_cudf.sh
@@ -8,7 +8,7 @@ RAPIDS_PY_WHEEL_NAME="dask_cudf_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="
 
 # Download the cudf built in the previous step
 RAPIDS_PY_WHEEL_NAME="cudf_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./local-cudf-dep
-python -m pip install --no-deps ./local-cudf-dep/cudf*.whl
+python -m pip install ./local-cudf-dep/cudf*.whl
 
 # echo to expand wildcard before adding `[extra]` requires for pip
 python -m pip install $(echo ./dist/dask_cudf*.whl)[test]
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
index cc9238ab80a..b8d73a01f96 100644
--- a/conda/environments/all_cuda-118_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -15,7 +15,7 @@ dependencies:
 - cachetools
 - clang-tools=16.0.6
 - clang==16.0.6
-- cmake>=3.26.4
+- cmake>=3.26.4,!=3.30.0
 - cramjam
 - cubinlinker
 - cuda-nvtx=11.8
diff --git a/conda/environments/all_cuda-122_arch-x86_64.yaml b/conda/environments/all_cuda-122_arch-x86_64.yaml
index 9fecd452248..c32d21c5d36 100644
--- a/conda/environments/all_cuda-122_arch-x86_64.yaml
+++ b/conda/environments/all_cuda-122_arch-x86_64.yaml
@@ -15,7 +15,7 @@ dependencies:
 - cachetools
 - clang-tools=16.0.6
 - clang==16.0.6
-- cmake>=3.26.4
+- cmake>=3.26.4,!=3.30.0
 - cramjam
 - cuda-cudart-dev
 - cuda-nvcc
diff --git a/conda/recipes/cudf/conda_build_config.yaml b/conda/recipes/cudf/conda_build_config.yaml
index d399e440edd..af894cccda0 100644
--- a/conda/recipes/cudf/conda_build_config.yaml
+++ b/conda/recipes/cudf/conda_build_config.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.26.4"
+  - ">=3.26.4,!=3.30.0"
 
 cuda_compiler:
   - cuda-nvcc
diff --git a/conda/recipes/cudf_kafka/conda_build_config.yaml b/conda/recipes/cudf_kafka/conda_build_config.yaml
index d399e440edd..af894cccda0 100644
--- a/conda/recipes/cudf_kafka/conda_build_config.yaml
+++ b/conda/recipes/cudf_kafka/conda_build_config.yaml
@@ -11,7 +11,7 @@ c_stdlib_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.26.4"
+  - ">=3.26.4,!=3.30.0"
 
 cuda_compiler:
   - cuda-nvcc
diff --git a/conda/recipes/libcudf/conda_build_config.yaml b/conda/recipes/libcudf/conda_build_config.yaml
index c01178bf732..4f99411e978 100644
--- a/conda/recipes/libcudf/conda_build_config.yaml
+++ b/conda/recipes/libcudf/conda_build_config.yaml
@@ -17,7 +17,7 @@ c_stdlib_version:
   - "2.17"
 
 cmake_version:
-  - ">=3.26.4"
+  - ">=3.26.4,!=3.30.0"
 
 libarrow_version:
   - "==16.1.0"
diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 2811711d58c..7999ada9282 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -925,6 +925,11 @@ if(CUDF_BUILD_STREAMS_TEST_UTIL)
     add_library(
       ${_tgt} SHARED src/utilities/stacktrace.cpp tests/utilities/identify_stream_usage.cpp
     )
+    if(CUDF_USE_PER_THREAD_DEFAULT_STREAM)
+      target_compile_definitions(
+        ${_tgt} PUBLIC CUDA_API_PER_THREAD_DEFAULT_STREAM CUDF_USE_PER_THREAD_DEFAULT_STREAM
+      )
+    endif()
 
     set_target_properties(
       ${_tgt}
diff --git a/cpp/include/cudf/dictionary/detail/update_keys.hpp b/cpp/include/cudf/dictionary/detail/update_keys.hpp
index e8486a80afc..9cdda773dbb 100644
--- a/cpp/include/cudf/dictionary/detail/update_keys.hpp
+++ b/cpp/include/cudf/dictionary/detail/update_keys.hpp
@@ -29,7 +29,7 @@ namespace dictionary {
 namespace detail {
 /**
  * @copydoc cudf::dictionary::add_keys(dictionary_column_view const&,column_view
- * const&,mm::mr::device_memory_resource*)
+ * const&,rmm::device_async_resource_ref)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
@@ -40,7 +40,7 @@ std::unique_ptr<column> add_keys(dictionary_column_view const& dictionary_column
 
 /**
  * @copydoc cudf::dictionary::remove_keys(dictionary_column_view const&,column_view
- * const&,mm::mr::device_memory_resource*)
+ * const&,rmm::device_async_resource_ref)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
@@ -51,7 +51,7 @@ std::unique_ptr<column> remove_keys(dictionary_column_view const& dictionary_col
 
 /**
  * @copydoc cudf::dictionary::remove_unused_keys(dictionary_column_view
- * const&,mm::mr::device_memory_resource*)
+ * const&,rmm::device_async_resource_ref)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
@@ -61,7 +61,7 @@ std::unique_ptr<column> remove_unused_keys(dictionary_column_view const& diction
 
 /**
  * @copydoc cudf::dictionary::set_keys(dictionary_column_view
- * const&,mm::mr::device_memory_resource*)
+ * const&,rmm::device_async_resource_ref)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
@@ -72,7 +72,7 @@ std::unique_ptr<column> set_keys(dictionary_column_view const& dictionary_column
 
 /**
  * @copydoc
- * cudf::dictionary::match_dictionaries(std::vector<cudf::dictionary_column_view>,mm::mr::device_memory_resource*)
+ * cudf::dictionary::match_dictionaries(std::vector<cudf::dictionary_column_view>,rmm::device_async_resource_ref)
  *
  * @param stream CUDA stream used for device memory operations and kernel launches.
  */
diff --git a/cpp/include/cudf/interop.hpp b/cpp/include/cudf/interop.hpp
index 502ffb9ba4f..11f6ce2bad7 100644
--- a/cpp/include/cudf/interop.hpp
+++ b/cpp/include/cudf/interop.hpp
@@ -39,6 +39,7 @@
 #include <cudf/utilities/span.hpp>
 
 #include <rmm/mr/device/per_device_resource.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <utility>
 
@@ -372,8 +373,8 @@ std::unique_ptr<cudf::scalar> from_arrow(
 std::unique_ptr<cudf::table> from_arrow(
   ArrowSchema const* schema,
   ArrowArray const* input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Create `cudf::column` from a given ArrowArray and ArrowSchema input
@@ -391,8 +392,8 @@ std::unique_ptr<cudf::table> from_arrow(
 std::unique_ptr<cudf::column> from_arrow_column(
   ArrowSchema const* schema,
   ArrowArray const* input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Create `cudf::table` from given ArrowDeviceArray input
@@ -415,8 +416,8 @@ std::unique_ptr<cudf::column> from_arrow_column(
 std::unique_ptr<table> from_arrow_host(
   ArrowSchema const* schema,
   ArrowDeviceArray const* input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Create `cudf::table` from given ArrowArrayStream input
@@ -433,8 +434,8 @@ std::unique_ptr<table> from_arrow_host(
  */
 std::unique_ptr<table> from_arrow_stream(
   ArrowArrayStream* input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief Create `cudf::column` from given ArrowDeviceArray input
@@ -456,8 +457,8 @@ std::unique_ptr<table> from_arrow_stream(
 std::unique_ptr<column> from_arrow_host_column(
   ArrowSchema const* schema,
   ArrowDeviceArray const* input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief typedef for a vector of owning columns, used for conversion from ArrowDeviceArray
@@ -537,8 +538,8 @@ using unique_table_view_t =
 unique_table_view_t from_arrow_device(
   ArrowSchema const* schema,
   ArrowDeviceArray const* input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /**
  * @brief typedef for a unique_ptr to a `cudf::column_view` with custom deleter
@@ -580,8 +581,8 @@ using unique_column_view_t =
 unique_column_view_t from_arrow_device_column(
   ArrowSchema const* schema,
   ArrowDeviceArray const* input,
-  rmm::cuda_stream_view stream        = cudf::get_default_stream(),
-  rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
 
 /** @} */  // end of group
 }  // namespace cudf
diff --git a/cpp/include/cudf/strings/replace.hpp b/cpp/include/cudf/strings/replace.hpp
index a19aa9be0c0..a714f762a19 100644
--- a/cpp/include/cudf/strings/replace.hpp
+++ b/cpp/include/cudf/strings/replace.hpp
@@ -122,7 +122,7 @@ std::unique_ptr<column> replace_slice(
  * If a target string is found, it is replaced by the corresponding entry in the repls column.
  * All occurrences found in each string are replaced.
  *
- * This does not use regex to match targets in the string.
+ * This does not use regex to match targets in the string. Empty string targets are ignored.
  *
  * Null string entries will return null output string entries.
  *
diff --git a/cpp/src/interop/from_arrow_device.cu b/cpp/src/interop/from_arrow_device.cu
index 73c1a474310..e1d289e67a3 100644
--- a/cpp/src/interop/from_arrow_device.cu
+++ b/cpp/src/interop/from_arrow_device.cu
@@ -35,6 +35,7 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <nanoarrow/nanoarrow.h>
 #include <nanoarrow/nanoarrow.hpp>
@@ -56,7 +57,7 @@ struct dispatch_from_arrow_device {
                               data_type,
                               bool,
                               rmm::cuda_stream_view,
-                              rmm::mr::device_memory_resource*)
+                              rmm::device_async_resource_ref)
   {
     CUDF_FAIL("Unsupported type in from_arrow_device", cudf::data_type_error);
   }
@@ -68,7 +69,7 @@ struct dispatch_from_arrow_device {
                               data_type type,
                               bool skip_mask,
                               rmm::cuda_stream_view,
-                              rmm::mr::device_memory_resource*)
+                              rmm::device_async_resource_ref mr)
   {
     size_type const num_rows   = input->length;
     size_type const offset     = input->offset;
@@ -90,7 +91,7 @@ dispatch_tuple_t get_column(ArrowSchemaView* schema,
                             data_type type,
                             bool skip_mask,
                             rmm::cuda_stream_view stream,
-                            rmm::mr::device_memory_resource* mr);
+                            rmm::device_async_resource_ref mr);
 
 template <>
 dispatch_tuple_t dispatch_from_arrow_device::operator()<bool>(ArrowSchemaView* schema,
@@ -98,7 +99,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()<bool>(ArrowSchemaView* s
                                                               data_type type,
                                                               bool skip_mask,
                                                               rmm::cuda_stream_view stream,
-                                                              rmm::mr::device_memory_resource* mr)
+                                                              rmm::device_async_resource_ref mr)
 {
   if (input->length == 0) {
     return std::make_tuple<column_view, owned_columns_t>(
@@ -141,7 +142,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()<cudf::string_view>(
   data_type type,
   bool skip_mask,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+  rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(schema->type != NANOARROW_TYPE_LARGE_STRING,
                "Large strings are not yet supported in from_arrow_device",
@@ -182,7 +183,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()<cudf::dictionary32>(
   data_type type,
   bool skip_mask,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+  rmm::device_async_resource_ref mr)
 {
   ArrowSchemaView keys_schema_view;
   NANOARROW_THROW_NOT_OK(
@@ -238,7 +239,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()<cudf::struct_view>(
   data_type type,
   bool skip_mask,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+  rmm::device_async_resource_ref mr)
 {
   std::vector<column_view> children;
   owned_columns_t out_owned_cols;
@@ -283,7 +284,7 @@ dispatch_tuple_t dispatch_from_arrow_device::operator()<cudf::list_view>(
   data_type type,
   bool skip_mask,
   rmm::cuda_stream_view stream,
-  rmm::mr::device_memory_resource* mr)
+  rmm::device_async_resource_ref mr)
 {
   size_type const num_rows   = input->length;
   size_type const offset     = input->offset;
@@ -324,7 +325,7 @@ dispatch_tuple_t get_column(ArrowSchemaView* schema,
                             data_type type,
                             bool skip_mask,
                             rmm::cuda_stream_view stream,
-                            rmm::mr::device_memory_resource* mr)
+                            rmm::device_async_resource_ref mr)
 {
   return type.id() != type_id::EMPTY
            ? std::move(type_dispatcher(
@@ -342,7 +343,7 @@ dispatch_tuple_t get_column(ArrowSchemaView* schema,
 unique_table_view_t from_arrow_device(ArrowSchema const* schema,
                                       ArrowDeviceArray const* input,
                                       rmm::cuda_stream_view stream,
-                                      rmm::mr::device_memory_resource* mr)
+                                      rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(schema != nullptr && input != nullptr,
                "input ArrowSchema and ArrowDeviceArray must not be NULL",
@@ -397,7 +398,7 @@ unique_table_view_t from_arrow_device(ArrowSchema const* schema,
 unique_column_view_t from_arrow_device_column(ArrowSchema const* schema,
                                               ArrowDeviceArray const* input,
                                               rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr)
+                                              rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(schema != nullptr && input != nullptr,
                "input ArrowSchema and ArrowDeviceArray must not be NULL",
@@ -429,7 +430,7 @@ unique_column_view_t from_arrow_device_column(ArrowSchema const* schema,
 unique_table_view_t from_arrow_device(ArrowSchema const* schema,
                                       ArrowDeviceArray const* input,
                                       rmm::cuda_stream_view stream,
-                                      rmm::mr::device_memory_resource* mr)
+                                      rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
 
@@ -439,7 +440,7 @@ unique_table_view_t from_arrow_device(ArrowSchema const* schema,
 unique_column_view_t from_arrow_device_column(ArrowSchema const* schema,
                                               ArrowDeviceArray const* input,
                                               rmm::cuda_stream_view stream,
-                                              rmm::mr::device_memory_resource* mr)
+                                              rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
 
diff --git a/cpp/src/interop/from_arrow_host.cu b/cpp/src/interop/from_arrow_host.cu
index b7e07056686..b3087dedf98 100644
--- a/cpp/src/interop/from_arrow_host.cu
+++ b/cpp/src/interop/from_arrow_host.cu
@@ -38,6 +38,7 @@
 #include <rmm/cuda_device.hpp>
 #include <rmm/cuda_stream_view.hpp>
 #include <rmm/device_buffer.hpp>
+#include <rmm/resource_ref.hpp>
 
 #include <nanoarrow/nanoarrow.h>
 #include <nanoarrow/nanoarrow.hpp>
@@ -49,7 +50,7 @@ namespace {
 
 struct dispatch_copy_from_arrow_host {
   rmm::cuda_stream_view stream;
-  rmm::mr::device_memory_resource* mr;
+  rmm::device_async_resource_ref mr;
 
   std::unique_ptr<rmm::device_buffer> get_mask_buffer(ArrowArray const* array)
   {
@@ -131,7 +132,7 @@ std::unique_ptr<column> get_column_copy(ArrowSchemaView* schema,
                                         data_type type,
                                         bool skip_mask,
                                         rmm::cuda_stream_view stream,
-                                        rmm::mr::device_memory_resource* mr);
+                                        rmm::device_async_resource_ref mr);
 
 template <>
 std::unique_ptr<column> dispatch_copy_from_arrow_host::operator()<bool>(ArrowSchemaView* schema,
@@ -388,7 +389,7 @@ std::unique_ptr<column> get_column_copy(ArrowSchemaView* schema,
                                         data_type type,
                                         bool skip_mask,
                                         rmm::cuda_stream_view stream,
-                                        rmm::mr::device_memory_resource* mr)
+                                        rmm::device_async_resource_ref mr)
 {
   return type.id() != type_id::EMPTY
            ? std::move(type_dispatcher(
@@ -405,7 +406,7 @@ std::unique_ptr<column> get_column_copy(ArrowSchemaView* schema,
 std::unique_ptr<table> from_arrow_host(ArrowSchema const* schema,
                                        ArrowDeviceArray const* input,
                                        rmm::cuda_stream_view stream,
-                                       rmm::mr::device_memory_resource* mr)
+                                       rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(schema != nullptr && input != nullptr,
                "input ArrowSchema and ArrowDeviceArray must not be NULL",
@@ -441,7 +442,7 @@ std::unique_ptr<table> from_arrow_host(ArrowSchema const* schema,
 std::unique_ptr<column> from_arrow_host_column(ArrowSchema const* schema,
                                                ArrowDeviceArray const* input,
                                                rmm::cuda_stream_view stream,
-                                               rmm::mr::device_memory_resource* mr)
+                                               rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(schema != nullptr && input != nullptr,
                "input ArrowSchema and ArrowDeviceArray must not be NULL",
@@ -462,7 +463,7 @@ std::unique_ptr<column> from_arrow_host_column(ArrowSchema const* schema,
 std::unique_ptr<table> from_arrow_host(ArrowSchema const* schema,
                                        ArrowDeviceArray const* input,
                                        rmm::cuda_stream_view stream,
-                                       rmm::mr::device_memory_resource* mr)
+                                       rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
 
@@ -472,7 +473,7 @@ std::unique_ptr<table> from_arrow_host(ArrowSchema const* schema,
 std::unique_ptr<column> from_arrow_host_column(ArrowSchema const* schema,
                                                ArrowDeviceArray const* input,
                                                rmm::cuda_stream_view stream,
-                                               rmm::mr::device_memory_resource* mr)
+                                               rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
 
@@ -482,7 +483,7 @@ std::unique_ptr<column> from_arrow_host_column(ArrowSchema const* schema,
 std::unique_ptr<table> from_arrow(ArrowSchema const* schema,
                                   ArrowArray const* input,
                                   rmm::cuda_stream_view stream,
-                                  rmm::mr::device_memory_resource* mr)
+                                  rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
 
@@ -497,7 +498,7 @@ std::unique_ptr<table> from_arrow(ArrowSchema const* schema,
 std::unique_ptr<column> from_arrow_column(ArrowSchema const* schema,
                                           ArrowArray const* input,
                                           rmm::cuda_stream_view stream,
-                                          rmm::mr::device_memory_resource* mr)
+                                          rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
 
diff --git a/cpp/src/interop/from_arrow_stream.cu b/cpp/src/interop/from_arrow_stream.cu
index 0c85b561944..578105aa90a 100644
--- a/cpp/src/interop/from_arrow_stream.cu
+++ b/cpp/src/interop/from_arrow_stream.cu
@@ -41,7 +41,7 @@ namespace {
 
 std::unique_ptr<column> make_empty_column_from_schema(ArrowSchema const* schema,
                                                       rmm::cuda_stream_view stream,
-                                                      rmm::mr::device_memory_resource* mr)
+                                                      rmm::device_async_resource_ref mr)
 {
   ArrowSchemaView schema_view;
   NANOARROW_THROW_NOT_OK(ArrowSchemaViewInit(&schema_view, schema, nullptr));
@@ -81,7 +81,7 @@ std::unique_ptr<column> make_empty_column_from_schema(ArrowSchema const* schema,
 
 std::unique_ptr<table> from_arrow_stream(ArrowArrayStream* input,
                                          rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
+                                         rmm::device_async_resource_ref mr)
 {
   CUDF_EXPECTS(input != nullptr, "input ArrowArrayStream must not be NULL", std::invalid_argument);
 
@@ -135,7 +135,7 @@ std::unique_ptr<table> from_arrow_stream(ArrowArrayStream* input,
 
 std::unique_ptr<table> from_arrow_stream(ArrowArrayStream* input,
                                          rmm::cuda_stream_view stream,
-                                         rmm::mr::device_memory_resource* mr)
+                                         rmm::device_async_resource_ref mr)
 {
   CUDF_FUNC_RANGE();
   return detail::from_arrow_stream(input, stream, mr);
diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 2b3aa2f08f1..62b85891adb 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -376,7 +376,12 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::list_view>(
     metadata.children_meta.empty() ? std::vector<column_metadata>{{}, {}} : metadata.children_meta;
   auto child_arrays = fetch_child_array(input_view, children_meta, ar_mr, stream);
   if (child_arrays.empty()) {
-    return std::make_shared<arrow::ListArray>(arrow::list(arrow::null()), 0, nullptr, nullptr);
+    // Empty list will have only one value in offset of 4 bytes
+    auto tmp_offset_buffer = allocate_arrow_buffer(sizeof(int32_t), ar_mr);
+    memset(tmp_offset_buffer->mutable_data(), 0, sizeof(int32_t));
+
+    return std::make_shared<arrow::ListArray>(
+      arrow::list(arrow::null()), 0, std::move(tmp_offset_buffer), nullptr);
   }
 
   auto offset_buffer = child_arrays[0]->data()->buffers[1];
diff --git a/cpp/src/io/csv/writer_impl.cu b/cpp/src/io/csv/writer_impl.cu
index 7c4d5711281..63eb0b03c5f 100644
--- a/cpp/src/io/csv/writer_impl.cu
+++ b/cpp/src/io/csv/writer_impl.cu
@@ -25,6 +25,7 @@
 
 #include <cudf/column/column_device_view.cuh>
 #include <cudf/detail/copy.hpp>
+#include <cudf/detail/fill.hpp>
 #include <cudf/detail/null_mask.hpp>
 #include <cudf/io/data_sink.hpp>
 #include <cudf/io/detail/csv.hpp>
@@ -372,15 +373,33 @@ void write_chunked(data_sink* out_sink,
   CUDF_EXPECTS(str_column_view.size() > 0, "Unexpected empty strings column.");
 
   cudf::string_scalar newline{options.get_line_terminator(), true, stream};
-  auto p_str_col_w_nl = cudf::strings::detail::join_strings(str_column_view,
-                                                            newline,
-                                                            string_scalar{"", false, stream},
-                                                            stream,
-                                                            rmm::mr::get_current_device_resource());
-  strings_column_view strings_column{p_str_col_w_nl->view()};
 
-  auto total_num_bytes      = strings_column.chars_size(stream);
-  char const* ptr_all_bytes = strings_column.chars_begin(stream);
+  // use strings concatenate to build the final CSV output in device memory
+  auto contents_w_nl = [&] {
+    auto const total_size =
+      str_column_view.chars_size(stream) + (newline.size() * str_column_view.size());
+    auto const empty_str = string_scalar("", true, stream);
+    // use join_strings when the output will be less than 2GB
+    if (total_size < static_cast<int64_t>(std::numeric_limits<size_type>::max())) {
+      return cudf::strings::detail::join_strings(str_column_view, newline, empty_str, stream, mr)
+        ->release();
+    }
+    auto nl_col = cudf::make_column_from_scalar(newline, str_column_view.size(), stream);
+    // convert the last element into an empty string by resetting the last offset value
+    auto& offsets     = nl_col->child(strings_column_view::offsets_column_index);
+    auto offsets_view = offsets.mutable_view();
+    cudf::fill_in_place(offsets_view,
+                        offsets.size() - 1,  // set the last element with
+                        offsets.size(),      // the value from 2nd to last element
+                        *cudf::detail::get_element(offsets.view(), offsets.size() - 2, stream, mr),
+                        stream);
+    auto const nl_tbl = cudf::table_view({str_column_view.parent(), nl_col->view()});
+    return cudf::strings::detail::concatenate(
+             nl_tbl, empty_str, empty_str, strings::separator_on_nulls::NO, stream, mr)
+      ->release();
+  }();
+  auto const total_num_bytes = contents_w_nl.data->size();
+  auto const ptr_all_bytes   = static_cast<char const*>(contents_w_nl.data->data());
 
   if (out_sink->is_device_write_preferred(total_num_bytes)) {
     // Direct write from device memory
@@ -491,7 +510,8 @@ void write_csv(data_sink* out_sink,
           str_table_view.column(0), options_narep, stream, rmm::mr::get_current_device_resource());
       }();
 
-      write_chunked(out_sink, str_concat_col->view(), options, stream, mr);
+      write_chunked(
+        out_sink, str_concat_col->view(), options, stream, rmm::mr::get_current_device_resource());
     }
   }
 }
diff --git a/cpp/src/strings/replace/multi.cu b/cpp/src/strings/replace/multi.cu
index 43a3d69091a..2ca22f0e017 100644
--- a/cpp/src/strings/replace/multi.cu
+++ b/cpp/src/strings/replace/multi.cu
@@ -451,8 +451,8 @@ struct replace_multi_fn {
     while (spos < d_str.size_bytes()) {
       for (int tgt_idx = 0; tgt_idx < d_targets.size(); ++tgt_idx) {
         auto const d_tgt = d_targets.element<string_view>(tgt_idx);
-        if ((d_tgt.size_bytes() <= (d_str.size_bytes() - spos)) &&    // check fit
-            (d_tgt.compare(in_ptr + spos, d_tgt.size_bytes()) == 0))  // and match
+        if (!d_tgt.empty() && (d_tgt.size_bytes() <= (d_str.size_bytes() - spos)) &&  // check fit
+            (d_tgt.compare(in_ptr + spos, d_tgt.size_bytes()) == 0))                  // and match
         {
           auto const d_repl = (d_repls.size() == 1) ? d_repls.element<string_view>(0)
                                                     : d_repls.element<string_view>(tgt_idx);
@@ -468,9 +468,8 @@ struct replace_multi_fn {
       }
       ++spos;
     }
-    if (out_ptr)  // copy remainder
-    {
-      memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos);
+    if (out_ptr) {
+      memcpy(out_ptr, in_ptr + lpos, d_str.size_bytes() - lpos);  // copy remainder
     } else {
       d_sizes[idx] = bytes;
     }
diff --git a/cpp/tests/strings/replace_tests.cpp b/cpp/tests/strings/replace_tests.cpp
index 3aa7467d156..6c4afbb435a 100644
--- a/cpp/tests/strings/replace_tests.cpp
+++ b/cpp/tests/strings/replace_tests.cpp
@@ -532,6 +532,23 @@ TEST_F(StringsReplaceTest, ReplaceMultiLong)
   }
 }
 
+TEST_F(StringsReplaceTest, EmptyTarget)
+{
+  auto const input = cudf::test::strings_column_wrapper({"hello", "world", "", "accénted"});
+  auto const sv    = cudf::strings_column_view(input);
+
+  auto const targets = cudf::test::strings_column_wrapper({"e", "", "d"});
+  auto const tv      = cudf::strings_column_view(targets);
+
+  auto const repls = cudf::test::strings_column_wrapper({"E", "_", "D"});
+  auto const rv    = cudf::strings_column_view(repls);
+
+  // empty target should be ignored
+  auto results  = cudf::strings::replace_multiple(sv, tv, rv);
+  auto expected = cudf::test::strings_column_wrapper({"hEllo", "worlD", "", "accéntED"});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(results->view(), expected);
+}
+
 TEST_F(StringsReplaceTest, EmptyStringsColumn)
 {
   auto const zero_size_strings_column = cudf::make_empty_column(cudf::type_id::STRING)->view();
diff --git a/dependencies.yaml b/dependencies.yaml
index e3f8a72e76c..27621ff9a3f 100644
--- a/dependencies.yaml
+++ b/dependencies.yaml
@@ -243,7 +243,7 @@ dependencies:
     common:
       - output_types: [conda, requirements, pyproject]
         packages:
-          - &cmake_ver cmake>=3.26.4
+          - &cmake_ver cmake>=3.26.4,!=3.30.0
           - &ninja ninja
   build_all:
     common:
@@ -755,7 +755,7 @@ dependencies:
           - {matrix: null, packages: *cupy_packages_cu11}
   test_python_pandas_cudf:
     common:
-      - output_types: pyproject
+      - output_types: [requirements, pyproject]
         packages:
           # dependencies to run pandas tests
           # https://github.com/pandas-dev/pandas/blob/main/environment.yml
@@ -766,7 +766,7 @@ dependencies:
           - pytest-reportlog
   test_python_cudf_pandas:
     common:
-      - output_types: pyproject
+      - output_types: [requirements, pyproject]
         packages:
           - ipython
           - openpyxl
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index df7749b2a40..1b6cfa73a2b 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -18,7 +18,6 @@ This page provides API documentation for pylibcudf.
     filling
     gpumemoryview
     groupby
-    io/index.rst
     interop
     join
     lists
@@ -26,15 +25,16 @@ This page provides API documentation for pylibcudf.
     null_mask
     quantiles
     reduce
+    replace
     reshape
     rolling
     round
     scalar
     search
-    stream_compaction
     sorting
-    replace
+    stream_compaction
     table
+    traits
     types
     unary
 
@@ -42,4 +42,5 @@ This page provides API documentation for pylibcudf.
     :maxdepth: 2
     :caption: Subpackages
 
+    io/index.rst
     strings/index.rst
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
new file mode 100644
index 00000000000..294ca8dc78c
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/traits.rst
@@ -0,0 +1,6 @@
+======
+traits
+======
+
+.. automodule:: cudf._lib.pylibcudf.traits
+   :members:
diff --git a/python/cudf/cudf/_lib/io/utils.pxd b/python/cudf/cudf/_lib/io/utils.pxd
index 252d986843a..680a87c789e 100644
--- a/python/cudf/cudf/_lib/io/utils.pxd
+++ b/python/cudf/cudf/_lib/io/utils.pxd
@@ -16,6 +16,10 @@ cdef source_info make_source_info(list src) except*
 cdef sink_info make_sinks_info(
     list src, vector[unique_ptr[data_sink]] & data) except*
 cdef sink_info make_sink_info(src, unique_ptr[data_sink] & data) except*
+cdef add_df_col_struct_names(
+    df,
+    child_names_dict
+)
 cdef update_struct_field_names(
     table,
     vector[column_name_info]& schema_info)
diff --git a/python/cudf/cudf/_lib/io/utils.pyx b/python/cudf/cudf/_lib/io/utils.pyx
index 1d7c56888d9..58956b9e9b7 100644
--- a/python/cudf/cudf/_lib/io/utils.pyx
+++ b/python/cudf/cudf/_lib/io/utils.pyx
@@ -147,10 +147,37 @@ cdef cppclass iobase_data_sink(data_sink):
         return buf.tell()
 
 
+cdef add_df_col_struct_names(df, child_names_dict):
+    for name, child_names in child_names_dict.items():
+        col = df._data[name]
+
+        df._data[name] = update_col_struct_field_names(col, child_names)
+
+
+cdef update_col_struct_field_names(Column col, child_names):
+    if col.children:
+        children = list(col.children)
+        for i, (child, names) in enumerate(zip(children, child_names.values())):
+            children[i] = update_col_struct_field_names(
+                child,
+                names
+            )
+        col.set_base_children(tuple(children))
+
+    if isinstance(col.dtype, StructDtype):
+        col = col._rename_fields(
+            child_names.keys()
+        )
+
+    return col
+
+
 cdef update_struct_field_names(
     table,
     vector[column_name_info]& schema_info
 ):
+    # Deprecated, remove in favor of add_col_struct_names
+    # when a reader is ported to pylibcudf
     for i, (name, col) in enumerate(table._data.items()):
         table._data[name] = update_column_struct_field_names(
             col, schema_info[i]
diff --git a/python/cudf/cudf/_lib/json.pyx b/python/cudf/cudf/_lib/json.pyx
index 22e34feb547..9c646e3357b 100644
--- a/python/cudf/cudf/_lib/json.pyx
+++ b/python/cudf/cudf/_lib/json.pyx
@@ -8,26 +8,16 @@ import cudf
 from cudf.core.buffer import acquire_spill_lock
 
 from libcpp cimport bool
-from libcpp.map cimport map
-from libcpp.string cimport string
-from libcpp.utility cimport move
-from libcpp.vector cimport vector
 
 cimport cudf._lib.pylibcudf.libcudf.io.types as cudf_io_types
-from cudf._lib.io.utils cimport make_source_info, update_struct_field_names
-from cudf._lib.pylibcudf.libcudf.io.json cimport (
-    json_reader_options,
-    json_recovery_mode_t,
-    read_json as libcudf_read_json,
-    schema_element,
-)
-from cudf._lib.pylibcudf.libcudf.io.types cimport (
-    compression_type,
-    table_with_metadata,
-)
-from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
+from cudf._lib.io.utils cimport add_df_col_struct_names
+from cudf._lib.pylibcudf.io.types cimport compression_type
+from cudf._lib.pylibcudf.libcudf.io.json cimport json_recovery_mode_t
+from cudf._lib.pylibcudf.libcudf.io.types cimport compression_type
+from cudf._lib.pylibcudf.libcudf.types cimport data_type, type_id
+from cudf._lib.pylibcudf.types cimport DataType
 from cudf._lib.types cimport dtype_to_data_type
-from cudf._lib.utils cimport data_from_unique_ptr
+from cudf._lib.utils cimport data_from_pylibcudf_io
 
 import cudf._lib.pylibcudf as plc
 
@@ -62,6 +52,7 @@ cpdef read_json(object filepaths_or_buffers,
     # If input data is a JSON string (or StringIO), hold a reference to
     # the encoded memoryview externally to ensure the encoded buffer
     # isn't destroyed before calling libcudf `read_json()`
+
     for idx in range(len(filepaths_or_buffers)):
         if isinstance(filepaths_or_buffers[idx], io.StringIO):
             filepaths_or_buffers[idx] = \
@@ -71,17 +62,7 @@ cpdef read_json(object filepaths_or_buffers,
             filepaths_or_buffers[idx] = filepaths_or_buffers[idx].encode()
 
     # Setup arguments
-    cdef vector[data_type] c_dtypes_list
-    cdef map[string, schema_element] c_dtypes_schema_map
     cdef cudf_io_types.compression_type c_compression
-    # Determine byte read offsets if applicable
-    cdef size_type c_range_offset = (
-        byte_range[0] if byte_range is not None else 0
-    )
-    cdef size_type c_range_size = (
-        byte_range[1] if byte_range is not None else 0
-    )
-    cdef bool c_lines = lines
 
     if compression is not None:
         if compression == 'gzip':
@@ -94,56 +75,50 @@ cpdef read_json(object filepaths_or_buffers,
             c_compression = cudf_io_types.compression_type.AUTO
     else:
         c_compression = cudf_io_types.compression_type.NONE
-    is_list_like_dtypes = False
+
+    processed_dtypes = None
+
     if dtype is False:
         raise ValueError("False value is unsupported for `dtype`")
     elif dtype is not True:
+        processed_dtypes = []
         if isinstance(dtype, abc.Mapping):
             for k, v in dtype.items():
-                c_dtypes_schema_map[str(k).encode()] = \
-                    _get_cudf_schema_element_from_dtype(v)
+                # Make sure keys are string
+                k = str(k)
+                lib_type, child_types = _get_cudf_schema_element_from_dtype(v)
+                processed_dtypes.append((k, lib_type, child_types))
         elif isinstance(dtype, abc.Collection):
-            is_list_like_dtypes = True
-            c_dtypes_list.reserve(len(dtype))
             for col_dtype in dtype:
-                c_dtypes_list.push_back(
-                    _get_cudf_data_type_from_dtype(
-                        col_dtype))
+                processed_dtypes.append(
+                    # Ignore child columns since we cannot specify their dtypes
+                    # when passing a list
+                    _get_cudf_schema_element_from_dtype(col_dtype)[0]
+                )
         else:
             raise TypeError("`dtype` must be 'list like' or 'dict'")
 
-    cdef json_reader_options opts = move(
-        json_reader_options.builder(make_source_info(filepaths_or_buffers))
-        .compression(c_compression)
-        .lines(c_lines)
-        .byte_range_offset(c_range_offset)
-        .byte_range_size(c_range_size)
-        .recovery_mode(_get_json_recovery_mode(on_bad_lines))
-        .build()
+    table_w_meta = plc.io.json.read_json(
+        plc.io.SourceInfo(filepaths_or_buffers),
+        processed_dtypes,
+        c_compression,
+        lines,
+        byte_range_offset = byte_range[0] if byte_range is not None else 0,
+        byte_range_size = byte_range[1] if byte_range is not None else 0,
+        keep_quotes = keep_quotes,
+        mixed_types_as_string = mixed_types_as_string,
+        prune_columns = prune_columns,
+        recovery_mode = _get_json_recovery_mode(on_bad_lines)
     )
-    if is_list_like_dtypes:
-        opts.set_dtypes(c_dtypes_list)
-    else:
-        opts.set_dtypes(c_dtypes_schema_map)
-
-    opts.enable_keep_quotes(keep_quotes)
-    opts.enable_mixed_types_as_string(mixed_types_as_string)
-    opts.enable_prune_columns(prune_columns)
-
-    # Read JSON
-    cdef cudf_io_types.table_with_metadata c_result
 
-    with nogil:
-        c_result = move(libcudf_read_json(opts))
-
-    meta_names = [info.name.decode() for info in c_result.metadata.schema_info]
-    df = cudf.DataFrame._from_data(*data_from_unique_ptr(
-        move(c_result.tbl),
-        column_names=meta_names
-    ))
-
-    update_struct_field_names(df, c_result.metadata.schema_info)
+    df = cudf.DataFrame._from_data(
+        *data_from_pylibcudf_io(
+            table_w_meta
+        )
+    )
 
+    # Post-processing to add in struct column names
+    add_df_col_struct_names(df, table_w_meta.child_names)
     return df
 
 
@@ -192,28 +167,32 @@ def write_json(
         )
 
 
-cdef schema_element _get_cudf_schema_element_from_dtype(object dtype) except *:
-    cdef schema_element s_element
-    cdef data_type lib_type
+cdef _get_cudf_schema_element_from_dtype(object dtype) except *:
     dtype = cudf.dtype(dtype)
     if isinstance(dtype, cudf.CategoricalDtype):
         raise NotImplementedError(
             "CategoricalDtype as dtype is not yet "
             "supported in JSON reader"
         )
-    lib_type = dtype_to_data_type(dtype)
-    s_element.type = lib_type
+
+    lib_type = DataType.from_libcudf(dtype_to_data_type(dtype))
+    child_types = []
+
     if isinstance(dtype, cudf.StructDtype):
         for name, child_type in dtype.fields.items():
-            s_element.child_types[name.encode()] = \
+            child_lib_type, grandchild_types = \
                 _get_cudf_schema_element_from_dtype(child_type)
+            child_types.append((name, child_lib_type, grandchild_types))
     elif isinstance(dtype, cudf.ListDtype):
-        s_element.child_types["offsets".encode()] = \
-            _get_cudf_schema_element_from_dtype(cudf.dtype("int32"))
-        s_element.child_types["element".encode()] = \
+        child_lib_type, grandchild_types = \
             _get_cudf_schema_element_from_dtype(dtype.element_type)
 
-    return s_element
+        child_types = [
+            ("offsets", DataType.from_libcudf(data_type(type_id.INT32)), []),
+            ("element", child_lib_type, grandchild_types)
+        ]
+
+    return lib_type, child_types
 
 
 cdef data_type _get_cudf_data_type_from_dtype(object dtype) except *:
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index caa445c3a0e..e5ad9d01310 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -39,6 +39,7 @@ set(cython_sources
     stream_compaction.pyx
     sorting.pyx
     table.pyx
+    traits.pyx
     types.pyx
     unary.pyx
     utils.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index 16d21daa140..92ce4523a5d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -24,6 +24,7 @@ from . cimport (
     sorting,
     stream_compaction,
     strings,
+    traits,
     types,
     unary,
 )
@@ -56,12 +57,14 @@ __all__ = [
     "quantiles",
     "reduce",
     "replace",
+    "reshape",
     "rolling",
     "round",
     "search",
     "stream_compaction",
     "strings",
     "sorting",
+    "traits",
     "types",
     "unary",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index fd4f35d30bf..3c3c98be1cc 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -24,6 +24,7 @@
     sorting,
     stream_compaction,
     strings,
+    traits,
     types,
     unary,
 )
@@ -36,6 +37,7 @@
 __all__ = [
     "Column",
     "DataType",
+    "MaskState",
     "Scalar",
     "Table",
     "TypeId",
@@ -56,12 +58,14 @@
     "quantiles",
     "reduce",
     "replace",
+    "reshape",
     "rolling",
     "round",
     "search",
     "stream_compaction",
     "strings",
     "sorting",
+    "traits",
     "types",
     "unary",
 ]
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/json.pxd b/python/cudf/cudf/_lib/pylibcudf/io/json.pxd
index a91d574131f..f7f733a493d 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/json.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/io/json.pxd
@@ -1,11 +1,30 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-
 from libcpp cimport bool
 
-from cudf._lib.pylibcudf.io.types cimport SinkInfo, TableWithMetadata
+from cudf._lib.pylibcudf.io.types cimport (
+    SinkInfo,
+    SourceInfo,
+    TableWithMetadata,
+    compression_type,
+)
+from cudf._lib.pylibcudf.libcudf.io.json cimport json_recovery_mode_t
 from cudf._lib.pylibcudf.libcudf.types cimport size_type
 
 
+cpdef TableWithMetadata read_json(
+    SourceInfo source_info,
+    list dtypes = *,
+    compression_type compression = *,
+    bool lines = *,
+    size_type byte_range_offset = *,
+    size_type byte_range_size = *,
+    bool keep_quotes = *,
+    bool mixed_types_as_string = *,
+    bool prune_columns = *,
+    json_recovery_mode_t recovery_mode = *,
+)
+
+
 cpdef void write_json(
     SinkInfo sink_info,
     TableWithMetadata tbl,
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/json.pyx b/python/cudf/cudf/_lib/pylibcudf/io/json.pyx
index 7530eba3803..354cb4981de 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/json.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/io/json.pyx
@@ -1,16 +1,130 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-
 from libcpp cimport bool
 from libcpp.limits cimport numeric_limits
+from libcpp.map cimport map
 from libcpp.string cimport string
+from libcpp.utility cimport move
+from libcpp.vector cimport vector
 
-from cudf._lib.pylibcudf.io.types cimport SinkInfo, TableWithMetadata
+from cudf._lib.pylibcudf.io.types cimport (
+    SinkInfo,
+    SourceInfo,
+    TableWithMetadata,
+)
 from cudf._lib.pylibcudf.libcudf.io.json cimport (
+    json_reader_options,
+    json_recovery_mode_t,
     json_writer_options,
+    read_json as cpp_read_json,
+    schema_element,
     write_json as cpp_write_json,
 )
-from cudf._lib.pylibcudf.libcudf.io.types cimport table_metadata
-from cudf._lib.pylibcudf.types cimport size_type
+from cudf._lib.pylibcudf.libcudf.io.types cimport (
+    compression_type,
+    table_metadata,
+    table_with_metadata,
+)
+from cudf._lib.pylibcudf.libcudf.types cimport data_type, size_type
+from cudf._lib.pylibcudf.types cimport DataType
+
+
+cdef map[string, schema_element] _generate_schema_map(list dtypes):
+    cdef map[string, schema_element] schema_map
+    cdef schema_element s_elem
+    cdef string c_name
+
+    for name, dtype, child_dtypes in dtypes:
+        if not (isinstance(name, str) and
+                isinstance(dtype, DataType) and
+                isinstance(child_dtypes, list)):
+
+            raise ValueError("Must pass a list of a tuple containing "
+                             "(column_name, column_dtype, list of child_dtypes)")
+
+        c_name = <str>name.encode()
+
+        s_elem.type = (<DataType>dtype).c_obj
+        s_elem.child_types = _generate_schema_map(child_dtypes)
+
+        schema_map[c_name] = s_elem
+    return schema_map
+
+
+cpdef TableWithMetadata read_json(
+    SourceInfo source_info,
+    list dtypes = None,
+    compression_type compression = compression_type.AUTO,
+    bool lines = False,
+    size_type byte_range_offset = 0,
+    size_type byte_range_size = 0,
+    bool keep_quotes = False,
+    bool mixed_types_as_string = False,
+    bool prune_columns = False,
+    json_recovery_mode_t recovery_mode = json_recovery_mode_t.FAIL,
+):
+    """Reads an JSON file into a :py:class:`~.types.TableWithMetadata`.
+
+    Parameters
+    ----------
+    source_info : SourceInfo
+        The SourceInfo object to read the JSON file from.
+    dtypes : list, default None
+        Set data types for the columns in the JSON file.
+
+        Each element of the list has the format
+        (column_name, column_dtype, list of child dtypes), where
+        the list of child dtypes is an empty list if the child is not
+        a nested type (list or struct dtype), and is of format
+        (column_child_name, column_child_type, list of grandchild dtypes).
+    compression_type: CompressionType, default CompressionType.AUTO
+        The compression format of the JSON source.
+    byte_range_offset : size_type, default 0
+        Number of bytes to skip from source start.
+    byte_range_size : size_type, default 0
+        Number of bytes to read. By default, will read all bytes.
+    keep_quotes : bool, default False
+        Whether the reader should keep quotes of string values.
+    prune_columns : bool, default False
+        Whether to only read columns specified in dtypes.
+    recover_mode : JSONRecoveryMode, default JSONRecoveryMode.FAIL
+        Whether to raise an error or set corresponding values to null
+        when encountering an invalid JSON line.
+
+    Returns
+    -------
+    TableWithMetadata
+        The Table and its corresponding metadata (column names) that were read in.
+    """
+    cdef vector[data_type] types_vec
+    cdef json_reader_options opts = move(
+        json_reader_options.builder(source_info.c_obj)
+        .compression(compression)
+        .lines(lines)
+        .byte_range_offset(byte_range_offset)
+        .byte_range_size(byte_range_size)
+        .recovery_mode(recovery_mode)
+        .build()
+    )
+
+    if dtypes is not None:
+        if isinstance(dtypes[0], tuple):
+            opts.set_dtypes(move(_generate_schema_map(dtypes)))
+        else:
+            for dtype in dtypes:
+                types_vec.push_back((<DataType>dtype).c_obj)
+            opts.set_dtypes(types_vec)
+
+    opts.enable_keep_quotes(keep_quotes)
+    opts.enable_mixed_types_as_string(mixed_types_as_string)
+    opts.enable_prune_columns(prune_columns)
+
+    # Read JSON
+    cdef table_with_metadata c_result
+
+    with nogil:
+        c_result = move(cpp_read_json(opts))
+
+    return TableWithMetadata.from_libcudf(c_result)
 
 
 cpdef void write_json(
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pxd b/python/cudf/cudf/_lib/pylibcudf/io/types.pxd
index 88daf54f33b..ab223c16a72 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/types.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/io/types.pxd
@@ -28,6 +28,11 @@ cdef class TableWithMetadata:
 
     cdef vector[column_name_info] _make_column_info(self, list column_names)
 
+    cdef list _make_columns_list(self, dict child_dict)
+
+    @staticmethod
+    cdef dict _parse_col_names(vector[column_name_info] infos)
+
     @staticmethod
     cdef TableWithMetadata from_libcudf(table_with_metadata& tbl)
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx
index f94e20970a4..df0b729b711 100644
--- a/python/cudf/cudf/_lib/pylibcudf/io/types.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/io/types.pyx
@@ -22,6 +22,11 @@ import errno
 import io
 import os
 
+from cudf._lib.pylibcudf.libcudf.io.json import \
+    json_recovery_mode_t as JSONRecoveryMode  # no-cython-lint
+from cudf._lib.pylibcudf.libcudf.io.types import \
+    compression_type as CompressionType  # no-cython-lint
+
 
 cdef class TableWithMetadata:
     """A container holding a table and its associated metadata
@@ -69,16 +74,44 @@ cdef class TableWithMetadata:
         """
         return self.tbl.columns()
 
-    @property
-    def column_names(self):
+    cdef list _make_columns_list(self, dict child_dict):
+        cdef list names = []
+        for child in child_dict:
+            grandchildren = self._make_columns_list(child_dict[child])
+            names.append((child, grandchildren))
+        return names
+
+    def column_names(self, include_children=False):
         """
         Return a list containing the column names of the table
         """
         cdef list names = []
+        cdef str name
+        cdef dict child_names = self.child_names
         for col_info in self.metadata.schema_info:
-            # TODO: Handle nesting (columns with child columns)
-            assert col_info.children.size() == 0, "Child column names are not handled!"
-            names.append(col_info.name.decode())
+            name = col_info.name.decode()
+            if include_children:
+                children = self._make_columns_list(child_names[name])
+                names.append((name, children))
+            else:
+                names.append(name)
+        return names
+
+    @property
+    def child_names(self):
+        """
+        Return a dictionary mapping the names of columns with children
+        to the names of their child columns
+        """
+        return TableWithMetadata._parse_col_names(self.metadata.schema_info)
+
+    @staticmethod
+    cdef dict _parse_col_names(vector[column_name_info] infos):
+        cdef dict child_names = dict()
+        cdef dict names = dict()
+        for col_info in infos:
+            child_names = TableWithMetadata._parse_col_names(col_info.children)
+            names[col_info.name.decode()] = child_names
         return names
 
     @staticmethod
@@ -137,6 +170,15 @@ cdef class SourceInfo:
         cdef vector[host_buffer] c_host_buffers
         cdef const unsigned char[::1] c_buffer
         cdef bint empty_buffer = False
+        cdef list new_sources = []
+
+        if isinstance(sources[0], io.StringIO):
+            for buffer in sources:
+                if not isinstance(buffer, io.StringIO):
+                    raise ValueError("All sources must be of the same type!")
+                new_sources.append(buffer.read().encode())
+            sources = new_sources
+
         if isinstance(sources[0], bytes):
             empty_buffer = True
             for buffer in sources:
@@ -156,7 +198,10 @@ cdef class SourceInfo:
                                                      c_buffer.shape[0]))
         else:
             raise ValueError("Sources must be a list of str/paths, "
-                             "bytes, io.BytesIO, or a Datasource")
+                             "bytes, io.BytesIO, io.StringIO, or a Datasource")
+
+        if empty_buffer is True:
+            c_host_buffers.push_back(host_buffer(<char*>NULL, 0))
 
         self.c_obj = source_info(c_host_buffers)
 
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
index 6c66d01ca57..699e85ce567 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
@@ -22,4 +22,5 @@ rapids_cython_create_modules(
   SOURCE_FILES "${cython_sources}"
   LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp
 )
+add_subdirectory(io)
 add_subdirectory(strings)
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/CMakeLists.txt
new file mode 100644
index 00000000000..6831063ecb9
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/CMakeLists.txt
@@ -0,0 +1,26 @@
+# =============================================================================
+# Copyright (c) 2024, NVIDIA CORPORATION.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
+# in compliance with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software distributed under the License
+# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
+# or implied. See the License for the specific language governing permissions and limitations under
+# the License.
+# =============================================================================
+
+set(cython_sources json.pyx types.pyx)
+
+set(linked_libraries cudf::cudf)
+
+rapids_cython_create_modules(
+  CXX
+  SOURCE_FILES "${cython_sources}"
+  LINKED_LIBRARIES "${linked_libraries}" ASSOCIATED_TARGETS cudf MODULE_PREFIX cpp_io_
+)
+
+set(targets_using_arrow_headers cpp_io_json cpp_io_types)
+link_to_pyarrow_headers("${targets_using_arrow_headers}")
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
index 2e50cccd132..86621ae184f 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pxd
@@ -1,6 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
-from libc.stdint cimport uint8_t
+from libc.stdint cimport int32_t, uint8_t
 from libcpp cimport bool
 from libcpp.map cimport map
 from libcpp.memory cimport shared_ptr, unique_ptr
@@ -19,9 +19,9 @@ cdef extern from "cudf/io/json.hpp" \
         data_type type
         map[string, schema_element] child_types
 
-    cdef enum json_recovery_mode_t:
-        FAIL "cudf::io::json_recovery_mode_t::FAIL"
-        RECOVER_WITH_NULL "cudf::io::json_recovery_mode_t::RECOVER_WITH_NULL"
+    cpdef enum class json_recovery_mode_t(int32_t):
+        FAIL
+        RECOVER_WITH_NULL
 
     cdef cppclass json_reader_options:
         json_reader_options() except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pyx b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/json.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/io/types.pyx b/python/cudf/cudf/_lib/pylibcudf/libcudf/io/types.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd
index fd21e7b334b..8917a6ac899 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/lists_column_view.pxd
@@ -10,7 +10,9 @@ from cudf._lib.pylibcudf.libcudf.types cimport size_type
 cdef extern from "cudf/lists/lists_column_view.hpp" namespace "cudf" nogil:
     cdef cppclass lists_column_view(column_view):
         lists_column_view() except +
+        lists_column_view(const lists_column_view& lists_column) except +
         lists_column_view(const column_view& lists_column) except +
+        lists_column_view& operator=(const lists_column_view&) except +
         column_view parent() except +
         column_view offsets() except +
         column_view child() except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/reverse.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/reverse.pxd
new file mode 100644
index 00000000000..0382a5d42c3
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/lists/reverse.pxd
@@ -0,0 +1,14 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+
+from cudf._lib.pylibcudf.libcudf.column.column cimport column
+from cudf._lib.pylibcudf.libcudf.lists.lists_column_view cimport (
+    lists_column_view,
+)
+
+
+cdef extern from "cudf/lists/reverse.hpp" namespace "cudf::lists" nogil:
+    cdef unique_ptr[column] reverse(
+        const lists_column_view& lists_column,
+    ) except +
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd b/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd
new file mode 100644
index 00000000000..0cc58af735b
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/utilities/traits.pxd
@@ -0,0 +1,27 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+from libcpp.vector cimport vector
+
+from cudf._lib.pylibcudf.libcudf.types cimport data_type
+
+
+cdef extern from "cudf/utilities/traits.hpp" namespace "cudf" nogil:
+    cdef bool is_relationally_comparable(data_type)
+    cdef bool is_equality_comparable(data_type)
+    cdef bool is_numeric(data_type)
+    cdef bool is_index_type(data_type)
+    cdef bool is_unsigned(data_type)
+    cdef bool is_integral(data_type)
+    cdef bool is_integral_not_bool(data_type)
+    cdef bool is_floating_point(data_type)
+    cdef bool is_boolean(data_type)
+    cdef bool is_timestamp(data_type)
+    cdef bool is_fixed_point(data_type)
+    cdef bool is_duration(data_type)
+    cdef bool is_chrono(data_type)
+    cdef bool is_dictionary(data_type)
+    cdef bool is_fixed_width(data_type)
+    cdef bool is_compound(data_type)
+    cdef bool is_nested(data_type)
+    cdef bool is_bit_castable(data_type, data_type)
diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pxd b/python/cudf/cudf/_lib/pylibcudf/lists.pxd
index 2ccf0139e90..c9d0a84e8ac 100644
--- a/python/cudf/cudf/_lib/pylibcudf/lists.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/lists.pxd
@@ -23,3 +23,5 @@ cpdef Column contains(Column, ColumnOrScalar)
 cpdef Column contains_nulls(Column)
 
 cpdef Column index_of(Column, ColumnOrScalar, bool)
+
+cpdef Column reverse(Column)
diff --git a/python/cudf/cudf/_lib/pylibcudf/lists.pyx b/python/cudf/cudf/_lib/pylibcudf/lists.pyx
index a94d940accd..651f1346f88 100644
--- a/python/cudf/cudf/_lib/pylibcudf/lists.pyx
+++ b/python/cudf/cudf/_lib/pylibcudf/lists.pyx
@@ -9,6 +9,7 @@ from cudf._lib.pylibcudf.libcudf.column.column cimport column
 from cudf._lib.pylibcudf.libcudf.lists cimport (
     contains as cpp_contains,
     explode as cpp_explode,
+    reverse as cpp_reverse,
 )
 from cudf._lib.pylibcudf.libcudf.lists.combine cimport (
     concatenate_list_elements as cpp_concatenate_list_elements,
@@ -206,3 +207,28 @@ cpdef Column index_of(Column input, ColumnOrScalar search_key, bool find_first_o
             find_option,
         ))
     return Column.from_libcudf(move(c_result))
+
+
+cpdef Column reverse(Column input):
+    """Reverse the element order within each list of the input column.
+
+    For details, see :cpp:func:`reverse`.
+
+    Parameters
+    ----------
+    input : Column
+        The input column.
+
+    Returns
+    -------
+    Column
+        A new Column with reversed lists.
+    """
+    cdef unique_ptr[column] c_result
+    cdef ListColumnView list_view = input.list_view()
+
+    with nogil:
+        c_result = move(cpp_reverse.reverse(
+            list_view.view(),
+        ))
+    return Column.from_libcudf(move(c_result))
diff --git a/python/cudf/cudf/_lib/pylibcudf/traits.pxd b/python/cudf/cudf/_lib/pylibcudf/traits.pxd
new file mode 100644
index 00000000000..668fa775202
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/traits.pxd
@@ -0,0 +1,25 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+
+from .types cimport DataType
+
+
+cpdef bool is_relationally_comparable(DataType typ)
+cpdef bool is_equality_comparable(DataType typ)
+cpdef bool is_numeric(DataType typ)
+cpdef bool is_index_type(DataType typ)
+cpdef bool is_unsigned(DataType typ)
+cpdef bool is_integral(DataType typ)
+cpdef bool is_integral_not_bool(DataType typ)
+cpdef bool is_floating_point(DataType typ)
+cpdef bool is_boolean(DataType typ)
+cpdef bool is_timestamp(DataType typ)
+cpdef bool is_fixed_point(DataType typ)
+cpdef bool is_duration(DataType typ)
+cpdef bool is_chrono(DataType typ)
+cpdef bool is_dictionary(DataType typ)
+cpdef bool is_fixed_width(DataType typ)
+cpdef bool is_compound(DataType typ)
+cpdef bool is_nested(DataType typ)
+cpdef bool is_bit_castable(DataType source, DataType target)
diff --git a/python/cudf/cudf/_lib/pylibcudf/traits.pyx b/python/cudf/cudf/_lib/pylibcudf/traits.pyx
new file mode 100644
index 00000000000..d2370f8d641
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/traits.pyx
@@ -0,0 +1,151 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp cimport bool
+
+from cudf._lib.pylibcudf.libcudf.utilities cimport traits
+
+from .types cimport DataType
+
+
+cpdef bool is_relationally_comparable(DataType typ):
+    """Checks if the given data type supports relational comparisons.
+
+    For details, see :cpp:func:`is_relationally_comparable`.
+    """
+    return traits.is_relationally_comparable(typ.c_obj)
+
+
+cpdef bool is_equality_comparable(DataType typ):
+    """Checks if the given data type supports equality comparisons.
+
+    For details, see :cpp:func:`is_equality_comparable`.
+    """
+    return traits.is_equality_comparable(typ.c_obj)
+
+
+cpdef bool is_numeric(DataType typ):
+    """Checks if the given data type is numeric.
+
+    For details, see :cpp:func:`is_numeric`.
+    """
+    return traits.is_numeric(typ.c_obj)
+
+
+cpdef bool is_index_type(DataType typ):
+    """Checks if the given data type is an index type.
+
+    For details, see :cpp:func:`is_index_type`.
+    """
+    return traits.is_index_type(typ.c_obj)
+
+
+cpdef bool is_unsigned(DataType typ):
+    """Checks if the given data type is an unsigned type.
+
+    For details, see :cpp:func:`is_unsigned`.
+    """
+    return traits.is_unsigned(typ.c_obj)
+
+
+cpdef bool is_integral(DataType typ):
+    """Checks if the given data type is an integral type.
+
+    For details, see :cpp:func:`is_integral`.
+    """
+    return traits.is_integral(typ.c_obj)
+
+
+cpdef bool is_integral_not_bool(DataType typ):
+    """Checks if the given data type is an integral type excluding booleans.
+
+    For details, see :cpp:func:`is_integral_not_bool`.
+    """
+    return traits.is_integral_not_bool(typ.c_obj)
+
+
+cpdef bool is_floating_point(DataType typ):
+    """Checks if the given data type is a floating point type.
+
+    For details, see :cpp:func:`is_floating_point`.
+    """
+    return traits.is_floating_point(typ.c_obj)
+
+
+cpdef bool is_boolean(DataType typ):
+    """Checks if the given data type is a boolean type.
+
+    For details, see :cpp:func:`is_boolean`.
+    """
+    return traits.is_boolean(typ.c_obj)
+
+
+cpdef bool is_timestamp(DataType typ):
+    """Checks if the given data type is a timestamp type.
+
+    For details, see :cpp:func:`is_timestamp`.
+    """
+    return traits.is_timestamp(typ.c_obj)
+
+
+cpdef bool is_fixed_point(DataType typ):
+    """Checks if the given data type is a fixed point type.
+
+    For details, see :cpp:func:`is_fixed_point`.
+    """
+    return traits.is_fixed_point(typ.c_obj)
+
+
+cpdef bool is_duration(DataType typ):
+    """Checks if the given data type is a duration type.
+
+    For details, see :cpp:func:`is_duration`.
+    """
+    return traits.is_duration(typ.c_obj)
+
+
+cpdef bool is_chrono(DataType typ):
+    """Checks if the given data type is a chrono type.
+
+    For details, see :cpp:func:`is_chrono`.
+    """
+    return traits.is_chrono(typ.c_obj)
+
+
+cpdef bool is_dictionary(DataType typ):
+    """Checks if the given data type is a dictionary type.
+
+    For details, see :cpp:func:`is_dictionary`.
+    """
+    return traits.is_dictionary(typ.c_obj)
+
+
+cpdef bool is_fixed_width(DataType typ):
+    """Checks if the given data type is a fixed width type.
+
+    For details, see :cpp:func:`is_fixed_width`.
+    """
+    return traits.is_fixed_width(typ.c_obj)
+
+
+cpdef bool is_compound(DataType typ):
+    """Checks if the given data type is a compound type.
+
+    For details, see :cpp:func:`is_compound`.
+    """
+    return traits.is_compound(typ.c_obj)
+
+
+cpdef bool is_nested(DataType typ):
+    """Checks if the given data type is a nested type.
+
+    For details, see :cpp:func:`is_nested`.
+    """
+    return traits.is_nested(typ.c_obj)
+
+
+cpdef bool is_bit_castable(DataType source, DataType target):
+    """Checks if the source type is bit-castable to the target type.
+
+    For details, see :cpp:func:`is_bit_castable`.
+    """
+    return traits.is_bit_castable(source.c_obj, target.c_obj)
diff --git a/python/cudf/cudf/_lib/utils.pyx b/python/cudf/cudf/_lib/utils.pyx
index de6b9f690b6..f136cd997a7 100644
--- a/python/cudf/cudf/_lib/utils.pyx
+++ b/python/cudf/cudf/_lib/utils.pyx
@@ -322,7 +322,7 @@ cdef data_from_pylibcudf_io(tbl_with_meta):
     """
     return _data_from_columns(
         columns=[Column.from_pylibcudf(plc) for plc in tbl_with_meta.columns],
-        column_names=tbl_with_meta.column_names,
+        column_names=tbl_with_meta.column_names(include_children=False),
         index_names=None
     )
 
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index c548db67344..1992d471947 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -73,10 +73,15 @@ def memory_usage(self):
             child0_size = (
                 current_base_child.size + 1 - current_offset
             ) * current_base_child.base_children[0].dtype.itemsize
-            current_offset = current_base_child.base_children[
-                0
-            ].element_indexing(current_offset)
             n += child0_size
+            current_offset_col = current_base_child.base_children[0]
+            if not len(current_offset_col):
+                # See https://github.com/rapidsai/cudf/issues/16164 why
+                # offset column can be uninitialized
+                break
+            current_offset = current_offset_col.element_indexing(
+                current_offset
+            )
             current_base_child = current_base_child.base_children[1]
 
         n += (
diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py
index f8bfe340ae5..46603ff32b8 100644
--- a/python/cudf/cudf/pylibcudf_tests/common/utils.py
+++ b/python/cudf/cudf/pylibcudf_tests/common/utils.py
@@ -8,13 +8,14 @@
 import pytest
 
 from cudf._lib import pylibcudf as plc
+from cudf._lib.pylibcudf.io.types import CompressionType
 
 
 def metadata_from_arrow_type(
     pa_type: pa.Array,
     name: str = "",
 ) -> plc.interop.ColumnMetadata | None:
-    metadata = plc.interop.ColumnMetadata(name)  # None
+    metadata = plc.interop.ColumnMetadata(name)
     if pa.types.is_list(pa_type):
         child_meta = [plc.interop.ColumnMetadata("offsets")]
         for i in range(pa_type.num_fields):
@@ -39,9 +40,25 @@ def metadata_from_arrow_type(
 
 
 def assert_column_eq(
-    lhs: pa.Array | plc.Column, rhs: pa.Array | plc.Column
+    lhs: pa.Array | plc.Column,
+    rhs: pa.Array | plc.Column,
+    check_field_nullability=True,
 ) -> None:
-    """Verify that a pylibcudf array and PyArrow array are equal."""
+    """Verify that a pylibcudf array and PyArrow array are equal.
+
+    Parameters
+    ----------
+    lhs: Union[pa.Array, plc.Column]
+        The array with the expected values
+    rhs: Union[pa.Array, plc.Column]
+        The array to check
+    check_field_nullability:
+        For list/struct dtypes, whether to check if the nullable attributes
+        on child fields are equal.
+
+        Useful for checking roundtripping of lossy formats like JSON that may not
+        preserve this information.
+    """
     # Nested types require children metadata to be passed to the conversion function.
     if isinstance(lhs, (pa.Array, pa.ChunkedArray)) and isinstance(
         rhs, plc.Column
@@ -65,6 +82,33 @@ def assert_column_eq(
     if isinstance(rhs, pa.ChunkedArray):
         rhs = rhs.combine_chunks()
 
+    def _make_fields_nullable(typ):
+        new_fields = []
+        for i in range(typ.num_fields):
+            child_field = typ.field(i)
+            if not child_field.nullable:
+                child_type = child_field.type
+                if isinstance(child_field.type, (pa.StructType, pa.ListType)):
+                    child_type = _make_fields_nullable(child_type)
+                new_fields.append(
+                    pa.field(child_field.name, child_type, nullable=True)
+                )
+            else:
+                new_fields.append(child_field)
+
+        if isinstance(typ, pa.StructType):
+            return pa.struct(new_fields)
+        elif isinstance(typ, pa.ListType):
+            return pa.list_(new_fields[0])
+        return typ
+
+    if not check_field_nullability:
+        rhs_type = _make_fields_nullable(rhs.type)
+        rhs = rhs.cast(rhs_type)
+
+        lhs_type = _make_fields_nullable(lhs.type)
+        lhs = rhs.cast(lhs_type)
+
     assert lhs.equals(rhs)
 
 
@@ -78,20 +122,24 @@ def assert_table_eq(pa_table: pa.Table, plc_table: plc.Table) -> None:
 
 
 def assert_table_and_meta_eq(
-    plc_table_w_meta: plc.io.types.TableWithMetadata, pa_table: pa.Table
+    pa_table: pa.Table,
+    plc_table_w_meta: plc.io.types.TableWithMetadata,
+    check_field_nullability=True,
 ) -> None:
     """Verify that the pylibcudf TableWithMetadata and PyArrow table are equal"""
 
     plc_table = plc_table_w_meta.tbl
 
     plc_shape = (plc_table.num_rows(), plc_table.num_columns())
-    assert plc_shape == pa_table.shape
+    assert (
+        plc_shape == pa_table.shape
+    ), f"{plc_shape} is not equal to {pa_table.shape}"
 
     for plc_col, pa_col in zip(plc_table.columns(), pa_table.columns):
-        assert_column_eq(plc_col, pa_col)
+        assert_column_eq(pa_col, plc_col, check_field_nullability)
 
     # Check column name equality
-    assert plc_table_w_meta.column_names == pa_table.column_names
+    assert plc_table_w_meta.column_names() == pa_table.column_names
 
 
 def cudf_raises(expected_exception: BaseException, *args, **kwargs):
@@ -102,49 +150,10 @@ def cudf_raises(expected_exception: BaseException, *args, **kwargs):
     return pytest.raises(expected_exception, *args, **kwargs)
 
 
-# TODO: Consider moving these type utilities into pylibcudf.types itself.
-def is_signed_integer(plc_dtype: plc.DataType):
-    return (
-        plc.TypeId.INT8.value <= plc_dtype.id().value <= plc.TypeId.INT64.value
-    )
-
-
-def is_integer(plc_dtype: plc.DataType):
-    return plc_dtype.id() in (
-        plc.TypeId.INT8,
-        plc.TypeId.INT16,
-        plc.TypeId.INT32,
-        plc.TypeId.INT64,
-        plc.TypeId.UINT8,
-        plc.TypeId.UINT16,
-        plc.TypeId.UINT32,
-        plc.TypeId.UINT64,
-    )
-
-
-def is_floating(plc_dtype: plc.DataType):
-    return plc_dtype.id() in (
-        plc.TypeId.FLOAT32,
-        plc.TypeId.FLOAT64,
-    )
-
-
-def is_boolean(plc_dtype: plc.DataType):
-    return plc_dtype.id() == plc.TypeId.BOOL8
-
-
 def is_string(plc_dtype: plc.DataType):
     return plc_dtype.id() == plc.TypeId.STRING
 
 
-def is_fixed_width(plc_dtype: plc.DataType):
-    return (
-        is_integer(plc_dtype)
-        or is_floating(plc_dtype)
-        or is_boolean(plc_dtype)
-    )
-
-
 def nesting_level(typ) -> tuple[int, int]:
     """Return list and struct nesting of a pyarrow type."""
     if isinstance(typ, pa.ListType):
@@ -221,4 +230,26 @@ def sink_to_str(sink):
     + DEFAULT_PA_STRUCT_TESTING_TYPES
 )
 
+# Map pylibcudf compression types to pandas ones
+# Not all compression types map cleanly, read the comments to learn more!
+# If a compression type is unsupported, it maps to False.
+
+COMPRESSION_TYPE_TO_PANDAS = {
+    CompressionType.NONE: None,
+    # Users of this dict will have to special case
+    # AUTO
+    CompressionType.AUTO: None,
+    CompressionType.GZIP: "gzip",
+    CompressionType.BZIP2: "bz2",
+    CompressionType.ZIP: "zip",
+    CompressionType.XZ: "xz",
+    CompressionType.ZSTD: "zstd",
+    # Unsupported
+    CompressionType.ZLIB: False,
+    CompressionType.LZ4: False,
+    CompressionType.LZO: False,
+    # These only work for parquet
+    CompressionType.SNAPPY: "snappy",
+    CompressionType.BROTLI: "brotli",
+}
 ALL_PA_TYPES = DEFAULT_PA_TYPES
diff --git a/python/cudf/cudf/pylibcudf_tests/conftest.py b/python/cudf/cudf/pylibcudf_tests/conftest.py
index e4760ea7ac8..39832eb4bba 100644
--- a/python/cudf/cudf/pylibcudf_tests/conftest.py
+++ b/python/cudf/cudf/pylibcudf_tests/conftest.py
@@ -121,6 +121,11 @@ def source_or_sink(request, tmp_path):
         return fp_or_buf()
 
 
+@pytest.fixture(params=[opt for opt in plc.io.types.CompressionType])
+def compression_type(request):
+    return request.param
+
+
 @pytest.fixture(
     scope="session", params=[opt for opt in plc.types.Interpolation]
 )
diff --git a/python/cudf/cudf/pylibcudf_tests/io/test_avro.py b/python/cudf/cudf/pylibcudf_tests/io/test_avro.py
index d6cd86768cd..061d6792ce3 100644
--- a/python/cudf/cudf/pylibcudf_tests/io/test_avro.py
+++ b/python/cudf/cudf/pylibcudf_tests/io/test_avro.py
@@ -120,4 +120,4 @@ def test_read_avro(avro_dtypes, avro_dtype_data, row_opts, columns, nullable):
     if columns != []:
         expected = expected.select(columns)
 
-    assert_table_and_meta_eq(res, expected)
+    assert_table_and_meta_eq(expected, res)
diff --git a/python/cudf/cudf/pylibcudf_tests/io/test_json.py b/python/cudf/cudf/pylibcudf_tests/io/test_json.py
index d6b8bfa6976..c13eaf40625 100644
--- a/python/cudf/cudf/pylibcudf_tests/io/test_json.py
+++ b/python/cudf/cudf/pylibcudf_tests/io/test_json.py
@@ -1,11 +1,49 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 import io
 
+import pandas as pd
 import pyarrow as pa
 import pytest
-from utils import sink_to_str
+from utils import (
+    COMPRESSION_TYPE_TO_PANDAS,
+    assert_table_and_meta_eq,
+    sink_to_str,
+)
 
 import cudf._lib.pylibcudf as plc
+from cudf._lib.pylibcudf.io.types import CompressionType
+
+
+def make_json_source(path_or_buf, pa_table, **kwargs):
+    """
+    Uses pandas to write a pyarrow Table to a JSON file.
+
+    The caller is responsible for making sure that no arguments
+    unsupported by pandas are passed in.
+    """
+    df = pa_table.to_pandas()
+    if "compression" in kwargs:
+        kwargs["compression"] = COMPRESSION_TYPE_TO_PANDAS[
+            kwargs["compression"]
+        ]
+    df.to_json(path_or_buf, orient="records", **kwargs)
+    if isinstance(path_or_buf, io.IOBase):
+        path_or_buf.seek(0)
+    return path_or_buf
+
+
+def write_json_bytes(source, json_str):
+    """
+    Write a JSON string to the source
+    """
+    if not isinstance(source, io.IOBase):
+        with open(source, "w") as source_f:
+            source_f.write(json_str)
+    else:
+        if isinstance(source, io.BytesIO):
+            json_str = json_str.encode("utf-8")
+        source.write(json_str)
+        source.seek(0)
 
 
 @pytest.mark.parametrize("rows_per_chunk", [8, 100])
@@ -114,3 +152,238 @@ def test_write_json_bool_opts(true_value, false_value):
         pd_result = pd_result.replace("false", false_value)
 
     assert str_result == pd_result
+
+
+@pytest.mark.parametrize("lines", [True, False])
+def test_read_json_basic(
+    table_data, source_or_sink, lines, compression_type, request
+):
+    if compression_type in {
+        # Not supported by libcudf
+        CompressionType.SNAPPY,
+        CompressionType.XZ,
+        CompressionType.ZSTD,
+        # Not supported by pandas
+        # TODO: find a way to test these
+        CompressionType.BROTLI,
+        CompressionType.LZ4,
+        CompressionType.LZO,
+        CompressionType.ZLIB,
+    }:
+        pytest.skip("unsupported compression type by pandas/libcudf")
+
+    # can't compress non-binary data with pandas
+    if isinstance(source_or_sink, io.StringIO):
+        compression_type = CompressionType.NONE
+
+    _, pa_table = table_data
+
+    source = make_json_source(
+        source_or_sink, pa_table, lines=lines, compression=compression_type
+    )
+
+    request.applymarker(
+        pytest.mark.xfail(
+            condition=(
+                len(pa_table) > 0
+                and compression_type
+                not in {CompressionType.NONE, CompressionType.AUTO}
+            ),
+            # note: wasn't able to narrow down the specific types that were failing
+            # seems to be a little non-deterministic, but always fails with
+            # cudaErrorInvalidValue invalid argument
+            reason="libcudf json reader crashes on compressed non empty table_data",
+        )
+    )
+
+    if isinstance(source, io.IOBase):
+        source.seek(0)
+
+    res = plc.io.json.read_json(
+        plc.io.SourceInfo([source]),
+        compression=compression_type,
+        lines=lines,
+    )
+
+    # Adjustments to correct for the fact orient=records is lossy
+    #  and doesn't
+    # 1) preserve colnames when zero rows in table
+    # 2) preserve struct nullability
+    # 3) differentiate int64/uint64
+    if len(pa_table) == 0:
+        pa_table = pa.table([])
+
+    new_fields = []
+    for i in range(len(pa_table.schema)):
+        curr_field = pa_table.schema.field(i)
+        if curr_field.type == pa.uint64():
+            try:
+                curr_field = curr_field.with_type(pa.int64())
+            except OverflowError:
+                # There will be no confusion, values are too large
+                # for int64 anyways
+                pass
+        new_fields.append(curr_field)
+
+    pa_table = pa_table.cast(pa.schema(new_fields))
+
+    # Convert non-nullable struct fields to nullable fields
+    # since nullable=False cannot roundtrip through orient='records'
+    # JSON format
+    assert_table_and_meta_eq(pa_table, res, check_field_nullability=False)
+
+
+def test_read_json_dtypes(table_data, source_or_sink):
+    # Simple test for dtypes where we read in
+    # all numeric data as floats
+    _, pa_table = table_data
+    source = make_json_source(
+        source_or_sink,
+        pa_table,
+        lines=True,
+    )
+
+    dtypes = []
+    new_fields = []
+    for i in range(len(pa_table.schema)):
+        field = pa_table.schema.field(i)
+        child_types = []
+
+        def get_child_types(typ):
+            typ_child_types = []
+            for i in range(typ.num_fields):
+                curr_field = typ.field(i)
+                typ_child_types.append(
+                    (
+                        curr_field.name,
+                        curr_field.type,
+                        get_child_types(curr_field.type),
+                    )
+                )
+            return typ_child_types
+
+        plc_type = plc.interop.from_arrow(field.type)
+        if pa.types.is_integer(field.type) or pa.types.is_unsigned_integer(
+            field.type
+        ):
+            plc_type = plc.interop.from_arrow(pa.float64())
+            field = field.with_type(pa.float64())
+
+        dtypes.append((field.name, plc_type, child_types))
+
+        new_fields.append(field)
+
+    new_schema = pa.schema(new_fields)
+
+    res = plc.io.json.read_json(
+        plc.io.SourceInfo([source]), dtypes=dtypes, lines=True
+    )
+    new_table = pa_table.cast(new_schema)
+
+    # orient=records is lossy
+    # and doesn't preserve column names when there's zero rows in the table
+    if len(new_table) == 0:
+        new_table = pa.table([])
+
+    assert_table_and_meta_eq(new_table, res, check_field_nullability=False)
+
+
+@pytest.mark.parametrize("chunk_size", [10, 15, 20])
+def test_read_json_lines_byte_range(source_or_sink, chunk_size):
+    source = source_or_sink
+    if isinstance(source_or_sink, io.StringIO):
+        pytest.skip("byte_range doesn't work on StringIO")
+
+    json_str = "[1, 2, 3]\n[4, 5, 6]\n[7, 8, 9]\n"
+    write_json_bytes(source, json_str)
+
+    tbls_w_meta = []
+    for chunk_start in range(0, len(json_str.encode("utf-8")), chunk_size):
+        tbls_w_meta.append(
+            plc.io.json.read_json(
+                plc.io.SourceInfo([source]),
+                lines=True,
+                byte_range_offset=chunk_start,
+                byte_range_size=chunk_start + chunk_size,
+            )
+        )
+
+    if isinstance(source, io.IOBase):
+        source.seek(0)
+    exp = pd.read_json(source, orient="records", lines=True)
+
+    # TODO: can do this operation using pylibcudf
+    tbls = []
+    for tbl_w_meta in tbls_w_meta:
+        if tbl_w_meta.tbl.num_rows() > 0:
+            tbls.append(plc.interop.to_arrow(tbl_w_meta.tbl))
+    full_tbl = pa.concat_tables(tbls)
+
+    full_tbl_plc = plc.io.TableWithMetadata(
+        plc.interop.from_arrow(full_tbl),
+        tbls_w_meta[0].column_names(include_children=True),
+    )
+    assert_table_and_meta_eq(pa.Table.from_pandas(exp), full_tbl_plc)
+
+
+@pytest.mark.parametrize("keep_quotes", [True, False])
+def test_read_json_lines_keep_quotes(keep_quotes, source_or_sink):
+    source = source_or_sink
+
+    json_bytes = '["a", "b", "c"]\n'
+    write_json_bytes(source, json_bytes)
+
+    tbl_w_meta = plc.io.json.read_json(
+        plc.io.SourceInfo([source]), lines=True, keep_quotes=keep_quotes
+    )
+
+    template = "{0}"
+    if keep_quotes:
+        template = '"{0}"'
+
+    exp = pa.Table.from_arrays(
+        [
+            [template.format("a")],
+            [template.format("b")],
+            [template.format("c")],
+        ],
+        names=["0", "1", "2"],
+    )
+
+    assert_table_and_meta_eq(exp, tbl_w_meta)
+
+
+@pytest.mark.parametrize(
+    "recovery_mode", [opt for opt in plc.io.types.JSONRecoveryMode]
+)
+def test_read_json_lines_recovery_mode(recovery_mode, source_or_sink):
+    source = source_or_sink
+
+    json_bytes = '{"a":1,"b":10}\n{"a":2,"b":11}\nabc\n{"a":3,"b":12}\n'
+    write_json_bytes(source, json_bytes)
+
+    if recovery_mode == plc.io.types.JSONRecoveryMode.FAIL:
+        with pytest.raises(RuntimeError):
+            plc.io.json.read_json(
+                plc.io.SourceInfo([source]),
+                lines=True,
+                recovery_mode=recovery_mode,
+            )
+    else:
+        # Recover case (bad values replaced with nulls)
+        tbl_w_meta = plc.io.json.read_json(
+            plc.io.SourceInfo([source]),
+            lines=True,
+            recovery_mode=recovery_mode,
+        )
+        exp = pa.Table.from_arrays(
+            [[1, 2, None, 3], [10, 11, None, 12]], names=["a", "b"]
+        )
+        assert_table_and_meta_eq(exp, tbl_w_meta)
+
+
+# TODO: Add tests for these!
+# Tests were not added in the initial PR porting the JSON reader to pylibcudf
+# to save time (and since there are no existing tests for these in Python cuDF)
+# mixed_types_as_string = mixed_types_as_string,
+# prune_columns = prune_columns,
diff --git a/python/cudf/cudf/pylibcudf_tests/test_copying.py b/python/cudf/cudf/pylibcudf_tests/test_copying.py
index 0a6df198d46..f27fe4e942e 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_copying.py
+++ b/python/cudf/cudf/pylibcudf_tests/test_copying.py
@@ -9,9 +9,6 @@
     assert_column_eq,
     assert_table_eq,
     cudf_raises,
-    is_fixed_width,
-    is_floating,
-    is_integer,
     is_nested_list,
     is_nested_struct,
     is_string,
@@ -359,9 +356,9 @@ def test_scatter_table_type_mismatch(source_table, index_column, target_table):
     _, plc_index_column = index_column
     _, plc_target_table = target_table
     with cudf_raises(TypeError):
-        if is_integer(
+        if plc.traits.is_integral_not_bool(
             dtype := plc_target_table.columns()[0].type()
-        ) or is_floating(dtype):
+        ) or plc.traits.is_floating_point(dtype):
             pa_array = pa.array([True] * plc_source_table.num_rows())
         else:
             pa_array = pa.array([1] * plc_source_table.num_rows())
@@ -428,9 +425,9 @@ def test_scatter_scalars_type_mismatch(index_column, target_table):
     _, plc_index_column = index_column
     _, plc_target_table = target_table
     with cudf_raises(TypeError):
-        if is_integer(
+        if plc.traits.is_integral_not_bool(
             dtype := plc_target_table.columns()[0].type()
-        ) or is_floating(dtype):
+        ) or plc.traits.is_floating_point(dtype):
             plc_source_scalar = [plc.interop.from_arrow(pa.scalar(True))]
         else:
             plc_source_scalar = [plc.interop.from_arrow(pa.scalar(1))]
@@ -458,7 +455,7 @@ def test_empty_like_table(source_table):
 @pytest.mark.parametrize("size", [None, 10])
 def test_allocate_like(input_column, size):
     _, plc_input_column = input_column
-    if is_fixed_width(plc_input_column.type()):
+    if plc.traits.is_fixed_width(plc_input_column.type()):
         result = plc.copying.allocate_like(
             plc_input_column,
             plc.copying.MaskAllocationPolicy.RETAIN,
@@ -484,7 +481,7 @@ def test_copy_range_in_place(
 
     pa_target_column, _ = target_column
 
-    if not is_fixed_width(mutable_target_column.type()):
+    if not plc.traits.is_fixed_width(mutable_target_column.type()):
         with pytest.raises(TypeError):
             plc.copying.copy_range_in_place(
                 plc_input_column,
@@ -516,7 +513,7 @@ def test_copy_range_in_place_out_of_bounds(
 ):
     _, plc_input_column = input_column
 
-    if is_fixed_width(mutable_target_column.type()):
+    if plc.traits.is_fixed_width(mutable_target_column.type()):
         with cudf_raises(IndexError):
             plc.copying.copy_range_in_place(
                 plc_input_column,
@@ -528,7 +525,9 @@ def test_copy_range_in_place_out_of_bounds(
 
 
 def test_copy_range_in_place_different_types(mutable_target_column):
-    if is_integer(dtype := mutable_target_column.type()) or is_floating(dtype):
+    if plc.traits.is_integral_not_bool(
+        dtype := mutable_target_column.type()
+    ) or plc.traits.is_floating_point(dtype):
         plc_input_column = plc.interop.from_arrow(pa.array(["a", "b", "c"]))
     else:
         plc_input_column = plc.interop.from_arrow(pa.array([1, 2, 3]))
@@ -548,7 +547,7 @@ def test_copy_range_in_place_null_mismatch(
 ):
     pa_input_column, _ = input_column
 
-    if is_fixed_width(mutable_target_column.type()):
+    if plc.traits.is_fixed_width(mutable_target_column.type()):
         pa_input_column = pc.if_else(
             _pyarrow_index_to_mask([0], len(pa_input_column)),
             pa_input_column,
@@ -568,7 +567,9 @@ def test_copy_range_in_place_null_mismatch(
 def test_copy_range(input_column, target_column):
     pa_input_column, plc_input_column = input_column
     pa_target_column, plc_target_column = target_column
-    if is_fixed_width(dtype := plc_target_column.type()) or is_string(dtype):
+    if plc.traits.is_fixed_width(
+        dtype := plc_target_column.type()
+    ) or is_string(dtype):
         result = plc.copying.copy_range(
             plc_input_column,
             plc_target_column,
@@ -610,7 +611,9 @@ def test_copy_range_out_of_bounds(input_column, target_column):
 
 def test_copy_range_different_types(target_column):
     _, plc_target_column = target_column
-    if is_integer(dtype := plc_target_column.type()) or is_floating(dtype):
+    if plc.traits.is_integral_not_bool(
+        dtype := plc_target_column.type()
+    ) or plc.traits.is_floating_point(dtype):
         plc_input_column = plc.interop.from_arrow(pa.array(["a", "b", "c"]))
     else:
         plc_input_column = plc.interop.from_arrow(pa.array([1, 2, 3]))
@@ -629,7 +632,9 @@ def test_shift(target_column, source_scalar):
     pa_source_scalar, plc_source_scalar = source_scalar
     pa_target_column, plc_target_column = target_column
     shift = 2
-    if is_fixed_width(dtype := plc_target_column.type()) or is_string(dtype):
+    if plc.traits.is_fixed_width(
+        dtype := plc_target_column.type()
+    ) or is_string(dtype):
         result = plc.copying.shift(plc_target_column, shift, plc_source_scalar)
         expected = pa.concat_arrays(
             [pa.array([pa_source_scalar] * shift), pa_target_column[:-shift]]
@@ -642,7 +647,9 @@ def test_shift(target_column, source_scalar):
 
 def test_shift_type_mismatch(target_column):
     _, plc_target_column = target_column
-    if is_integer(dtype := plc_target_column.type()) or is_floating(dtype):
+    if plc.traits.is_integral_not_bool(
+        dtype := plc_target_column.type()
+    ) or plc.traits.is_floating_point(dtype):
         fill_value = plc.interop.from_arrow(pa.scalar("a"))
     else:
         fill_value = plc.interop.from_arrow(pa.scalar(1))
@@ -747,7 +754,9 @@ def test_copy_if_else_column_column(target_column, mask, source_scalar):
 def test_copy_if_else_wrong_type(target_column, mask):
     _, plc_target_column = target_column
     _, plc_mask = mask
-    if is_integer(dtype := plc_target_column.type()) or is_floating(dtype):
+    if plc.traits.is_integral_not_bool(
+        dtype := plc_target_column.type()
+    ) or plc.traits.is_floating_point(dtype):
         plc_input_column = plc.interop.from_arrow(
             pa.array(["a"] * plc_target_column.size())
         )
@@ -951,9 +960,9 @@ def test_boolean_mask_scatter_from_wrong_num_true(source_table, target_table):
 def test_boolean_mask_scatter_from_wrong_col_type(target_table, mask):
     _, plc_target_table = target_table
     _, plc_mask = mask
-    if is_integer(
+    if plc.traits.is_integral_not_bool(
         dtype := plc_target_table.columns()[0].type()
-    ) or is_floating(dtype):
+    ) or plc.traits.is_floating_point(dtype):
         input_column = plc.interop.from_arrow(pa.array(["a", "b", "c"]))
     else:
         input_column = plc.interop.from_arrow(pa.array([1, 2, 3]))
diff --git a/python/cudf/cudf/pylibcudf_tests/test_lists.py b/python/cudf/cudf/pylibcudf_tests/test_lists.py
index c781126e388..58a1dcf8d56 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_lists.py
+++ b/python/cudf/cudf/pylibcudf_tests/test_lists.py
@@ -134,3 +134,15 @@ def test_index_of_list_column(test_data, column):
     expect = pa.array(column[1], type=pa.int32())
 
     assert_column_eq(expect, res)
+
+
+def test_reverse(test_data):
+    list_column = test_data[0][0]
+    arr = pa.array(list_column)
+    plc_column = plc.interop.from_arrow(arr)
+
+    res = plc.lists.reverse(plc_column)
+
+    expect = pa.array([lst[::-1] for lst in list_column])
+
+    assert_column_eq(expect, res)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_traits.py b/python/cudf/cudf/pylibcudf_tests/test_traits.py
new file mode 100644
index 00000000000..6c22cb02f21
--- /dev/null
+++ b/python/cudf/cudf/pylibcudf_tests/test_traits.py
@@ -0,0 +1,110 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from cudf._lib import pylibcudf as plc
+
+
+def test_is_relationally_comparable():
+    assert plc.traits.is_relationally_comparable(plc.DataType(plc.TypeId.INT8))
+    assert not plc.traits.is_relationally_comparable(
+        plc.DataType(plc.TypeId.LIST)
+    )
+
+
+def test_is_equality_comparable():
+    assert plc.traits.is_equality_comparable(plc.DataType(plc.TypeId.INT8))
+    assert not plc.traits.is_equality_comparable(plc.DataType(plc.TypeId.LIST))
+
+
+def test_is_numeric():
+    assert plc.traits.is_numeric(plc.DataType(plc.TypeId.FLOAT64))
+    assert not plc.traits.is_numeric(plc.DataType(plc.TypeId.LIST))
+
+
+def test_is_index_type():
+    assert plc.traits.is_index_type(plc.DataType(plc.TypeId.INT8))
+    assert not plc.traits.is_index_type(plc.DataType(plc.TypeId.BOOL8))
+
+
+def test_is_unsigned():
+    assert plc.traits.is_unsigned(plc.DataType(plc.TypeId.UINT8))
+    assert not plc.traits.is_unsigned(plc.DataType(plc.TypeId.INT8))
+
+
+def test_is_integral():
+    assert plc.traits.is_integral(plc.DataType(plc.TypeId.BOOL8))
+    assert not plc.traits.is_integral(plc.DataType(plc.TypeId.DECIMAL32))
+
+
+def test_is_integral_not_bool():
+    assert plc.traits.is_integral_not_bool(plc.DataType(plc.TypeId.INT8))
+    assert not plc.traits.is_integral_not_bool(plc.DataType(plc.TypeId.BOOL8))
+
+
+def test_is_floating_point():
+    assert plc.traits.is_floating_point(plc.DataType(plc.TypeId.FLOAT64))
+    assert not plc.traits.is_floating_point(plc.DataType(plc.TypeId.UINT8))
+
+
+def test_is_boolean():
+    assert plc.traits.is_boolean(plc.DataType(plc.TypeId.BOOL8))
+    assert not plc.traits.is_boolean(plc.DataType(plc.TypeId.UINT8))
+
+
+def test_is_timestamp():
+    assert plc.traits.is_timestamp(
+        plc.DataType(plc.TypeId.TIMESTAMP_MICROSECONDS)
+    )
+    assert not plc.traits.is_timestamp(
+        plc.DataType(plc.TypeId.DURATION_MICROSECONDS)
+    )
+
+
+def test_is_fixed_point():
+    assert plc.traits.is_fixed_point(plc.DataType(plc.TypeId.DECIMAL128))
+    assert not plc.traits.is_fixed_point(plc.DataType(plc.TypeId.FLOAT32))
+
+
+def test_is_duration():
+    assert plc.traits.is_duration(
+        plc.DataType(plc.TypeId.DURATION_MICROSECONDS)
+    )
+    assert not plc.traits.is_duration(
+        plc.DataType(plc.TypeId.TIMESTAMP_MICROSECONDS)
+    )
+
+
+def test_is_chrono():
+    assert plc.traits.is_chrono(plc.DataType(plc.TypeId.DURATION_MICROSECONDS))
+    assert plc.traits.is_chrono(
+        plc.DataType(plc.TypeId.TIMESTAMP_MICROSECONDS)
+    )
+    assert not plc.traits.is_chrono(plc.DataType(plc.TypeId.UINT8))
+
+
+def test_is_dictionary():
+    assert plc.traits.is_dictionary(plc.DataType(plc.TypeId.DICTIONARY32))
+    assert not plc.traits.is_dictionary(plc.DataType(plc.TypeId.UINT8))
+
+
+def test_is_fixed_width():
+    assert plc.traits.is_fixed_width(plc.DataType(plc.TypeId.INT8))
+    assert not plc.traits.is_fixed_width(plc.DataType(plc.TypeId.STRING))
+
+
+def test_is_compound():
+    assert plc.traits.is_compound(plc.DataType(plc.TypeId.STRUCT))
+    assert not plc.traits.is_compound(plc.DataType(plc.TypeId.UINT8))
+
+
+def test_is_nested():
+    assert plc.traits.is_nested(plc.DataType(plc.TypeId.STRUCT))
+    assert not plc.traits.is_nested(plc.DataType(plc.TypeId.STRING))
+
+
+def test_is_bit_castable():
+    assert plc.traits.is_bit_castable(
+        plc.DataType(plc.TypeId.INT8), plc.DataType(plc.TypeId.UINT8)
+    )
+    assert not plc.traits.is_bit_castable(
+        plc.DataType(plc.TypeId.UINT8), plc.DataType(plc.TypeId.UINT16)
+    )
diff --git a/python/cudf/cudf/tests/test_json.py b/python/cudf/cudf/tests/test_json.py
index 297040b6d95..9222f6d23db 100644
--- a/python/cudf/cudf/tests/test_json.py
+++ b/python/cudf/cudf/tests/test_json.py
@@ -1077,8 +1077,13 @@ def test_json_dtypes_nested_data():
     )
 
     pdf = pd.read_json(
-        StringIO(expected_json_str), orient="records", lines=True
+        StringIO(expected_json_str),
+        orient="records",
+        lines=True,
     )
+
+    assert_eq(df, pdf)
+
     pdf.columns = pdf.columns.astype("str")
     pa_table_pdf = pa.Table.from_pandas(
         pdf, schema=df.to_arrow().schema, safe=False
diff --git a/python/cudf/cudf/tests/test_list.py b/python/cudf/cudf/tests/test_list.py
index f76143cb381..ec9d7995b05 100644
--- a/python/cudf/cudf/tests/test_list.py
+++ b/python/cudf/cudf/tests/test_list.py
@@ -12,6 +12,7 @@
 from cudf import NA
 from cudf._lib.copying import get_element
 from cudf.api.types import is_scalar
+from cudf.core.column.column import column_empty
 from cudf.testing import assert_eq
 from cudf.testing._utils import DATETIME_TYPES, NUMERIC_TYPES, TIMEDELTA_TYPES
 
@@ -926,3 +927,29 @@ def test_list_iterate_error():
 def test_list_struct_list_memory_usage():
     df = cudf.DataFrame({"a": [[{"b": [1]}]]})
     assert df.memory_usage().sum() == 16
+
+
+def test_empty_nested_list_uninitialized_offsets_memory_usage():
+    col = column_empty(0, cudf.ListDtype(cudf.ListDtype("int64")))
+    nested_col = col.children[1]
+    empty_inner = type(nested_col)(
+        size=nested_col.size,
+        dtype=nested_col.dtype,
+        mask=nested_col.mask,
+        offset=nested_col.offset,
+        null_count=nested_col.null_count,
+        children=(
+            column_empty(0, nested_col.children[0].dtype),
+            nested_col.children[1],
+        ),
+    )
+    col_empty_offset = type(col)(
+        size=col.size,
+        dtype=col.dtype,
+        mask=col.mask,
+        offset=col.offset,
+        null_count=col.null_count,
+        children=(column_empty(0, col.children[0].dtype), empty_inner),
+    )
+    ser = cudf.Series._from_data({None: col_empty_offset})
+    assert ser.memory_usage() == 8
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 20b731624df..dcb33b1fc1a 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -121,7 +121,7 @@ skip = [
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 requires = [
-    "cmake>=3.26.4",
+    "cmake>=3.26.4,!=3.30.0",
     "cython>=3.0.3",
     "ninja",
     "numpy==1.23.*",
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index 11e18cd4f32..badfdf06d15 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -101,7 +101,7 @@ regex = "(?P<value>.*)"
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
 requires = [
-    "cmake>=3.26.4",
+    "cmake>=3.26.4,!=3.30.0",
     "cython>=3.0.3",
     "ninja",
     "numpy==1.23.*",
diff --git a/python/cudf_polars/cudf_polars/callback.py b/python/cudf_polars/cudf_polars/callback.py
index 979087d5273..764cdd3b3ca 100644
--- a/python/cudf_polars/cudf_polars/callback.py
+++ b/python/cudf_polars/cudf_polars/callback.py
@@ -34,7 +34,12 @@ def _callback(
         return ir.evaluate(cache={}).to_polars()
 
 
-def execute_with_cudf(nt: NodeTraverser, *, raise_on_fail: bool = False) -> None:
+def execute_with_cudf(
+    nt: NodeTraverser,
+    *,
+    raise_on_fail: bool = False,
+    exception: type[Exception] | tuple[type[Exception], ...] = Exception,
+) -> None:
     """
     A post optimization callback that attempts to execute the plan with cudf.
 
@@ -47,11 +52,15 @@ def execute_with_cudf(nt: NodeTraverser, *, raise_on_fail: bool = False) -> None
         Should conversion raise an exception rather than continuing
         without setting a callback.
 
+    exception
+        Optional exception, or tuple of exceptions, to catch during
+        translation. Defaults to ``Exception``.
+
     The NodeTraverser is mutated if the libcudf executor can handle the plan.
     """
     try:
         with nvtx.annotate(message="ConvertIR", domain="cudf_polars"):
             nt.set_udf(partial(_callback, translate_ir(nt)))
-    except NotImplementedError:
+    except exception:
         if raise_on_fail:
             raise
diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index ec8d00c3123..d86656578d7 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -5,6 +5,7 @@
 
 from __future__ import annotations
 
+import itertools
 from functools import cached_property
 from typing import TYPE_CHECKING, cast
 
@@ -160,7 +161,10 @@ def with_columns(self, columns: Sequence[NamedColumn]) -> Self:
         -----
         If column names overlap, newer names replace older ones.
         """
-        return type(self)([*self.columns, *columns])
+        columns = list(
+            {c.name: c for c in itertools.chain(self.columns, columns)}.values()
+        )
+        return type(self)(columns)
 
     def discard_columns(self, names: Set[str]) -> Self:
         """Drop columns by name."""
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index fe859c8d958..93cb9db7cbd 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -44,6 +44,7 @@
     "Col",
     "BooleanFunction",
     "StringFunction",
+    "TemporalFunction",
     "Sort",
     "SortBy",
     "Gather",
@@ -703,6 +704,7 @@ def _validate_input(self):
             pl_expr.StringFunction.EndsWith,
             pl_expr.StringFunction.StartsWith,
             pl_expr.StringFunction.Contains,
+            pl_expr.StringFunction.Slice,
         ):
             raise NotImplementedError(f"String function {self.name}")
         if self.name == pl_expr.StringFunction.Contains:
@@ -716,6 +718,11 @@ def _validate_input(self):
                     raise NotImplementedError(
                         "Regex contains only supports a scalar pattern"
                     )
+        elif self.name == pl_expr.StringFunction.Slice:
+            if not all(isinstance(child, Literal) for child in self.children[1:]):
+                raise NotImplementedError(
+                    "Slice only supports literal start and stop values"
+                )
 
     def do_evaluate(
         self,
@@ -744,6 +751,36 @@ def do_evaluate(
                 flags=plc.strings.regex_flags.RegexFlags.DEFAULT,
             )
             return Column(plc.strings.contains.contains_re(column.obj, prog))
+        elif self.name == pl_expr.StringFunction.Slice:
+            child, expr_offset, expr_length = self.children
+            assert isinstance(expr_offset, Literal)
+            assert isinstance(expr_length, Literal)
+
+            column = child.evaluate(df, context=context, mapping=mapping)
+            # libcudf slices via [start,stop).
+            # polars slices with offset + length where start == offset
+            # stop = start + length. Negative values for start look backward
+            # from the last element of the string. If the end index would be
+            # below zero, an empty string is returned.
+            # Do this maths on the host
+            start = expr_offset.value.as_py()
+            length = expr_length.value.as_py()
+
+            if length == 0:
+                stop = start
+            else:
+                # No length indicates a scan to the end
+                # The libcudf equivalent is a null stop
+                stop = start + length if length else None
+                if length and start < 0 and length >= -start:
+                    stop = None
+            return Column(
+                plc.strings.slice.slice_strings(
+                    column.obj,
+                    plc.interop.from_arrow(pa.scalar(start, type=pa.int32())),
+                    plc.interop.from_arrow(pa.scalar(stop, type=pa.int32())),
+                )
+            )
         columns = [
             child.evaluate(df, context=context, mapping=mapping)
             for child in self.children
@@ -779,6 +816,129 @@ def do_evaluate(
         )  # pragma: no cover; handled by init raising
 
 
+class TemporalFunction(Expr):
+    __slots__ = ("name", "options", "children")
+    _non_child = ("dtype", "name", "options")
+    children: tuple[Expr, ...]
+
+    def __init__(
+        self,
+        dtype: plc.DataType,
+        name: pl_expr.TemporalFunction,
+        options: tuple[Any, ...],
+        *children: Expr,
+    ) -> None:
+        super().__init__(dtype)
+        self.options = options
+        self.name = name
+        self.children = children
+        if self.name != pl_expr.TemporalFunction.Year:
+            raise NotImplementedError(f"String function {self.name}")
+
+    def do_evaluate(
+        self,
+        df: DataFrame,
+        *,
+        context: ExecutionContext = ExecutionContext.FRAME,
+        mapping: Mapping[Expr, Column] | None = None,
+    ) -> Column:
+        """Evaluate this expression given a dataframe for context."""
+        columns = [
+            child.evaluate(df, context=context, mapping=mapping)
+            for child in self.children
+        ]
+        if self.name == pl_expr.TemporalFunction.Year:
+            (column,) = columns
+            return Column(plc.datetime.extract_year(column.obj))
+        raise NotImplementedError(
+            f"TemporalFunction {self.name}"
+        )  # pragma: no cover; init trips first
+
+
+class UnaryFunction(Expr):
+    __slots__ = ("name", "options", "children")
+    _non_child = ("dtype", "name", "options")
+    children: tuple[Expr, ...]
+
+    def __init__(
+        self, dtype: plc.DataType, name: str, options: tuple[Any, ...], *children: Expr
+    ) -> None:
+        super().__init__(dtype)
+        self.name = name
+        self.options = options
+        self.children = children
+        if self.name not in ("round", "unique"):
+            raise NotImplementedError(f"Unary function {name=}")
+
+    def do_evaluate(
+        self,
+        df: DataFrame,
+        *,
+        context: ExecutionContext = ExecutionContext.FRAME,
+        mapping: Mapping[Expr, Column] | None = None,
+    ) -> Column:
+        """Evaluate this expression given a dataframe for context."""
+        if self.name == "round":
+            (decimal_places,) = self.options
+            (values,) = (
+                child.evaluate(df, context=context, mapping=mapping)
+                for child in self.children
+            )
+            return Column(
+                plc.round.round(
+                    values.obj, decimal_places, plc.round.RoundingMethod.HALF_UP
+                )
+            ).sorted_like(values)
+        elif self.name == "unique":
+            (maintain_order,) = self.options
+            (values,) = (
+                child.evaluate(df, context=context, mapping=mapping)
+                for child in self.children
+            )
+            # Only one column, so keep_any is the same as keep_first
+            # for stable distinct
+            keep = plc.stream_compaction.DuplicateKeepOption.KEEP_ANY
+            if values.is_sorted:
+                maintain_order = True
+                result = plc.stream_compaction.unique(
+                    plc.Table([values.obj]),
+                    [0],
+                    keep,
+                    plc.types.NullEquality.EQUAL,
+                )
+            else:
+                distinct = (
+                    plc.stream_compaction.stable_distinct
+                    if maintain_order
+                    else plc.stream_compaction.distinct
+                )
+                result = distinct(
+                    plc.Table([values.obj]),
+                    [0],
+                    keep,
+                    plc.types.NullEquality.EQUAL,
+                    plc.types.NanEquality.ALL_EQUAL,
+                )
+            (column,) = result.columns()
+            if maintain_order:
+                return Column(column).sorted_like(values)
+            return Column(column)
+        raise NotImplementedError(
+            f"Unimplemented unary function {self.name=}"
+        )  # pragma: no cover; init trips first
+
+    def collect_agg(self, *, depth: int) -> AggInfo:
+        """Collect information about aggregations in groupbys."""
+        if depth == 1:
+            # inside aggregation, need to pre-evaluate, groupby
+            # construction has checked that we don't have nested aggs,
+            # so stop the recursion and return ourselves for pre-eval
+            return AggInfo([(self, plc.aggregation.collect_list(), self)])
+        else:
+            (child,) = self.children
+            return child.collect_agg(depth=depth)
+
+
 class Sort(Expr):
     __slots__ = ("options", "children")
     _non_child = ("dtype", "options")
@@ -1182,7 +1342,8 @@ def __init__(
         self.children = (left, right)
         if (
             op in (plc.binaryop.BinaryOperator.ADD, plc.binaryop.BinaryOperator.SUB)
-            and ({left.dtype.id(), right.dtype.id()}.issubset(dtypes.TIMELIKE_TYPES))
+            and plc.traits.is_chrono(left.dtype)
+            and plc.traits.is_chrono(right.dtype)
             and not dtypes.have_compatible_resolution(left.dtype.id(), right.dtype.id())
         ):
             raise NotImplementedError("Casting rules for timelike types")
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 9b3096becd4..6b552642e88 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -96,6 +96,8 @@ def broadcast(
     ``target_length`` is provided and not all columns are length-1
     (i.e. ``n != 1``), then ``target_length`` must be equal to ``n``.
     """
+    if len(columns) == 0:
+        return []
     lengths: set[int] = {column.obj.size() for column in columns}
     if lengths == {1}:
         if target_length is None:
@@ -431,7 +433,7 @@ def check_agg(agg: expr.Expr) -> int:
         NotImplementedError
             For unsupported expression nodes.
         """
-        if isinstance(agg, (expr.BinOp, expr.Cast)):
+        if isinstance(agg, (expr.BinOp, expr.Cast, expr.UnaryFunction)):
             return max(GroupBy.check_agg(child) for child in agg.children)
         elif isinstance(agg, expr.Agg):
             return 1 + max(GroupBy.check_agg(child) for child in agg.children)
diff --git a/python/cudf_polars/cudf_polars/dsl/translate.py b/python/cudf_polars/cudf_polars/dsl/translate.py
index a2fdb3c3d79..5a1e682abe7 100644
--- a/python/cudf_polars/cudf_polars/dsl/translate.py
+++ b/python/cudf_polars/cudf_polars/dsl/translate.py
@@ -361,8 +361,23 @@ def _(node: pl_expr.Function, visitor: NodeTraverser, dtype: plc.DataType) -> ex
             options,
             *(translate_expr(visitor, n=n) for n in node.input),
         )
-    else:
-        raise NotImplementedError(f"No handler for Expr function node with {name=}")
+    elif isinstance(name, pl_expr.TemporalFunction):
+        return expr.TemporalFunction(
+            dtype,
+            name,
+            options,
+            *(translate_expr(visitor, n=n) for n in node.input),
+        )
+    elif isinstance(name, str):
+        return expr.UnaryFunction(
+            dtype,
+            name,
+            options,
+            *(translate_expr(visitor, n=n) for n in node.input),
+        )
+    raise NotImplementedError(
+        f"No handler for Expr function node with {name=}"
+    )  # pragma: no cover; polars raises on the rust side for now
 
 
 @_translate_expr.register
@@ -432,8 +447,11 @@ def _(node: pl_expr.Cast, visitor: NodeTraverser, dtype: plc.DataType) -> expr.E
     # Push casts into literals so we can handle Cast(Literal(Null))
     if isinstance(inner, expr.Literal):
         return expr.Literal(dtype, inner.value.cast(plc.interop.to_arrow(dtype)))
-    else:
-        return expr.Cast(dtype, inner)
+    elif isinstance(inner, expr.Cast):
+        # Translation of Len/Count-agg put in a cast, remove double
+        # casts if we have one.
+        (inner,) = inner.children
+    return expr.Cast(dtype, inner)
 
 
 @_translate_expr.register
@@ -443,12 +461,15 @@ def _(node: pl_expr.Column, visitor: NodeTraverser, dtype: plc.DataType) -> expr
 
 @_translate_expr.register
 def _(node: pl_expr.Agg, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Expr:
-    return expr.Agg(
+    value = expr.Agg(
         dtype,
         node.name,
         node.options,
         *(translate_expr(visitor, n=n) for n in node.arguments),
     )
+    if value.name == "count" and value.dtype.id() != plc.TypeId.INT32:
+        return expr.Cast(value.dtype, value)
+    return value
 
 
 @_translate_expr.register
@@ -475,7 +496,10 @@ def _(
 
 @_translate_expr.register
 def _(node: pl_expr.Len, visitor: NodeTraverser, dtype: plc.DataType) -> expr.Expr:
-    return expr.Len(dtype)
+    value = expr.Len(dtype)
+    if dtype.id() != plc.TypeId.INT32:
+        return expr.Cast(dtype, value)
+    return value  # pragma: no cover; never reached since polars len has uint32 dtype
 
 
 def translate_expr(visitor: NodeTraverser, *, n: int) -> expr.Expr:
diff --git a/python/cudf_polars/cudf_polars/utils/dtypes.py b/python/cudf_polars/cudf_polars/utils/dtypes.py
index 507acb5d33a..918cd024fa2 100644
--- a/python/cudf_polars/cudf_polars/utils/dtypes.py
+++ b/python/cudf_polars/cudf_polars/utils/dtypes.py
@@ -17,19 +17,6 @@
 __all__ = ["from_polars", "downcast_arrow_lists", "have_compatible_resolution"]
 
 
-TIMELIKE_TYPES: frozenset[plc.TypeId] = frozenset(
-    [
-        plc.TypeId.TIMESTAMP_MILLISECONDS,
-        plc.TypeId.TIMESTAMP_MICROSECONDS,
-        plc.TypeId.TIMESTAMP_NANOSECONDS,
-        plc.TypeId.TIMESTAMP_DAYS,
-        plc.TypeId.DURATION_MILLISECONDS,
-        plc.TypeId.DURATION_MICROSECONDS,
-        plc.TypeId.DURATION_NANOSECONDS,
-    ]
-)
-
-
 def have_compatible_resolution(lid: plc.TypeId, rid: plc.TypeId):
     """
     Do two datetime typeids have matching resolution for a binop.
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index bf4673fcc50..0b559f7a8e9 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -182,5 +182,3 @@ docstring-code-format = true
 [tool.rapids-build-backend]
 build-backend = "setuptools.build_meta"
 dependencies-file = "../../dependencies.yaml"
-# Pure python
-disable-cuda = true
diff --git a/python/cudf_polars/tests/expressions/test_datetime_basic.py b/python/cudf_polars/tests/expressions/test_datetime_basic.py
index 6ba2a1dce1e..218101bf87c 100644
--- a/python/cudf_polars/tests/expressions/test_datetime_basic.py
+++ b/python/cudf_polars/tests/expressions/test_datetime_basic.py
@@ -2,6 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+import datetime
+from operator import methodcaller
+
 import pytest
 
 import polars as pl
@@ -32,3 +35,28 @@ def test_datetime_dataframe_scan(dtype):
 
     query = ldf.select(pl.col("b"), pl.col("a"))
     assert_gpu_result_equal(query)
+
+
+@pytest.mark.parametrize(
+    "field",
+    [
+        methodcaller("year"),
+        pytest.param(
+            methodcaller("day"),
+            marks=pytest.mark.xfail(reason="day extraction not implemented"),
+        ),
+    ],
+)
+def test_datetime_extract(field):
+    ldf = pl.LazyFrame(
+        {"dates": [datetime.date(2024, 1, 1), datetime.date(2024, 10, 11)]}
+    )
+    q = ldf.select(field(pl.col("dates").dt))
+
+    with pytest.raises(AssertionError):
+        # polars produces int32, libcudf produces int16 for the year extraction
+        # libcudf can lose data here.
+        # https://github.com/rapidsai/cudf/issues/16196
+        assert_gpu_result_equal(q)
+
+    assert_gpu_result_equal(q, check_dtypes=False)
diff --git a/python/cudf_polars/tests/expressions/test_round.py b/python/cudf_polars/tests/expressions/test_round.py
new file mode 100644
index 00000000000..3af3a0ce6d1
--- /dev/null
+++ b/python/cudf_polars/tests/expressions/test_round.py
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import math
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+@pytest.fixture(params=[pl.Float32, pl.Float64])
+def dtype(request):
+    return request.param
+
+
+@pytest.fixture
+def df(dtype, with_nulls):
+    a = [-math.e, 10, 22.5, 1.5, 2.5, -1.5, math.pi, 8]
+    if with_nulls:
+        a[2] = None
+        a[-1] = None
+    return pl.LazyFrame({"a": a}, schema={"a": dtype})
+
+
+@pytest.mark.parametrize("decimals", [0, 2, 4])
+def test_round(df, decimals):
+    q = df.select(pl.col("a").round(decimals=decimals))
+
+    assert_gpu_result_equal(q, check_exact=False)
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 9729e765948..8cf65dd51ac 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -37,6 +37,30 @@ def ldf(with_nulls):
     return pl.LazyFrame({"a": a, "b": range(len(a))})
 
 
+slice_cases = [
+    (1, 3),
+    (0, 3),
+    (0, 0),
+    (-3, 1),
+    (-100, 5),
+    (1, 1),
+    (100, 100),
+    (-3, 4),
+    (-3, 3),
+]
+
+
+@pytest.fixture(params=slice_cases)
+def slice_column_data(ldf, request):
+    start, length = request.param
+    if length:
+        return ldf.with_columns(
+            pl.lit(start).alias("start"), pl.lit(length).alias("length")
+        )
+    else:
+        return ldf.with_columns(pl.lit(start).alias("start"))
+
+
 def test_supported_stringfunction_expression(ldf):
     query = ldf.select(
         pl.col("a").str.starts_with("Z"),
@@ -104,3 +128,25 @@ def test_contains_invalid(ldf):
         query.collect()
     with pytest.raises(pl.exceptions.ComputeError):
         query.collect(post_opt_callback=partial(execute_with_cudf, raise_on_fail=True))
+
+
+@pytest.mark.parametrize("offset", [1, -1, 0, 100, -100])
+def test_slice_scalars_offset(ldf, offset):
+    query = ldf.select(pl.col("a").str.slice(offset))
+    assert_gpu_result_equal(query)
+
+
+@pytest.mark.parametrize("offset,length", slice_cases)
+def test_slice_scalars_length_and_offset(ldf, offset, length):
+    query = ldf.select(pl.col("a").str.slice(offset, length))
+    assert_gpu_result_equal(query)
+
+
+def test_slice_column(slice_column_data):
+    if "length" in slice_column_data.collect_schema():
+        query = slice_column_data.select(
+            pl.col("a").str.slice(pl.col("start"), pl.col("length"))
+        )
+    else:
+        query = slice_column_data.select(pl.col("a").str.slice(pl.col("start")))
+    assert_ir_translation_raises(query, NotImplementedError)
diff --git a/python/cudf_polars/tests/expressions/test_unique.py b/python/cudf_polars/tests/expressions/test_unique.py
new file mode 100644
index 00000000000..9b009a422c2
--- /dev/null
+++ b/python/cudf_polars/tests/expressions/test_unique.py
@@ -0,0 +1,24 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+@pytest.mark.parametrize("maintain_order", [False, True], ids=["unstable", "stable"])
+@pytest.mark.parametrize("pre_sorted", [False, True], ids=["unsorted", "sorted"])
+def test_unique(maintain_order, pre_sorted):
+    ldf = pl.DataFrame(
+        {
+            "b": [1.5, 2.5, None, 1.5, 3, float("nan"), 3],
+        }
+    ).lazy()
+    if pre_sorted:
+        ldf = ldf.sort("b")
+
+    query = ldf.select(pl.col("b").unique(maintain_order=maintain_order))
+    assert_gpu_result_equal(query, check_row_order=maintain_order)
diff --git a/python/cudf_polars/tests/test_groupby.py b/python/cudf_polars/tests/test_groupby.py
index aefad59eb91..b84e2c16b43 100644
--- a/python/cudf_polars/tests/test_groupby.py
+++ b/python/cudf_polars/tests/test_groupby.py
@@ -47,6 +47,8 @@ def keys(request):
         [pl.col("float").max() - pl.col("int").min()],
         [pl.col("float").mean(), pl.col("int").std()],
         [(pl.col("float") - pl.lit(2)).max()],
+        [pl.col("float").sum().round(decimals=1)],
+        [pl.col("float").round(decimals=1).sum()],
     ],
     ids=lambda aggs: "-".join(map(str, aggs)),
 )
@@ -83,10 +85,7 @@ def test_groupby(df: pl.LazyFrame, maintain_order, keys, exprs):
 def test_groupby_len(df, keys):
     q = df.group_by(*keys).agg(pl.len())
 
-    # TODO: polars returns UInt32, libcudf returns Int32
-    with pytest.raises(AssertionError):
-        assert_gpu_result_equal(q, check_row_order=False)
-    assert_gpu_result_equal(q, check_dtypes=False, check_row_order=False)
+    assert_gpu_result_equal(q, check_row_order=False)
 
 
 @pytest.mark.parametrize(
diff --git a/python/cudf_polars/tests/test_union.py b/python/cudf_polars/tests/test_union.py
index b021d832910..865b95a7d91 100644
--- a/python/cudf_polars/tests/test_union.py
+++ b/python/cudf_polars/tests/test_union.py
@@ -46,3 +46,12 @@ def test_concat_vertical():
     q = pl.concat([ldf, ldf2], how="vertical")
 
     assert_gpu_result_equal(q)
+
+
+def test_concat_diagonal_empty():
+    df1 = pl.LazyFrame()
+    df2 = pl.LazyFrame({"a": [1, 2]})
+
+    q = pl.concat([df1, df2], how="diagonal_relaxed")
+
+    assert_gpu_result_equal(q, collect_kwargs={"no_optimization": True})