diff --git a/cpp/include/cudf/reduction/detail/reduction_operators.cuh b/cpp/include/cudf/reduction/detail/reduction_operators.cuh
index 4cf8564ab3a..5694362af8f 100644
--- a/cpp/include/cudf/reduction/detail/reduction_operators.cuh
+++ b/cpp/include/cudf/reduction/detail/reduction_operators.cuh
@@ -31,17 +31,41 @@ namespace detail {
 // intermediate data structure to compute `var`, `std`
 template <typename ResultType>
 struct var_std {
-  ResultType value;          /// the value
-  ResultType value_squared;  /// the value of squared
-
-  CUDF_HOST_DEVICE inline var_std(ResultType _value = 0, ResultType _value_squared = 0)
-    : value(_value), value_squared(_value_squared){};
+  // Uses the pairwise approach of Chan, Golub, and LeVeque,
+  // _Algorithms for computing the sample variance: analysis and
+  // recommendations_ (1983)
+  // https://doi.org/10.1080/00031305.1983.10483115
+  // Also http://www.cs.yale.edu/publications/techreports/tr222.pdf
+  // This is a modification of Youngs and Cramer's online approach.
+  ResultType running_sum;
+  ResultType running_square_deviations;
+  size_type count;
+
+  CUDF_HOST_DEVICE inline var_std(ResultType t = 0, ResultType s = 0, size_type n = 0)
+    : running_sum(t), running_square_deviations(s), count(n){};
 
   using this_t = var_std<ResultType>;
 
   CUDF_HOST_DEVICE inline this_t operator+(this_t const& rhs) const
   {
-    return this_t((this->value + rhs.value), (this->value_squared + rhs.value_squared));
+    // Updates as per equations 1.5a and 1.5b in the paper
+    // T_{1,m+n} = T_{1,m} + T_{m+1,n+1}
+    // S_{1,m+n} = S_{1,m} + S_{m+1,n+1} + m/(n(m+n)) * (n/m T_{1,m} - T_{m+1,n+1})**2
+    // Here the first m samples are in this, the remaining n samples are in rhs.
+    auto const m = this->count;
+    auto const n = rhs.count;
+    // Avoid division by zero.
+    if (m == 0) { return rhs; }
+    if (n == 0) { return *this; }
+    auto const tm   = this->running_sum;
+    auto const tn   = rhs.running_sum;
+    auto const sm   = this->running_square_deviations;
+    auto const sn   = rhs.running_square_deviations;
+    auto const tmn  = tm + tn;
+    auto const diff = ((static_cast<ResultType>(n) / m) * tm) - tn;
+    // Computing m/n(m+n) as m/n/(m+n) to avoid integer overflow
+    auto const smn = sm + sn + ((static_cast<ResultType>(m) / n) / (m + n)) * diff * diff;
+    return {tmn, smn, m + n};
   };
 };
 
@@ -50,10 +74,7 @@ template <typename ResultType>
 struct transformer_var_std {
   using OutputType = var_std<ResultType>;
 
-  CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value)
-  {
-    return OutputType(value, value * value);
-  };
+  CUDF_HOST_DEVICE inline OutputType operator()(ResultType const& value) { return {value, 0, 1}; };
 };
 
 // ------------------------------------------------------------------------
@@ -257,12 +278,7 @@ struct variance : public compound_op<variance> {
                                                              cudf::size_type const& count,
                                                              cudf::size_type const& ddof)
     {
-      ResultType mean     = input.value / count;
-      ResultType asum     = input.value_squared;
-      cudf::size_type div = count - ddof;
-      ResultType var      = asum / div - ((mean * mean) * count) / div;
-
-      return var;
+      return input.running_square_deviations / (count - ddof);
     };
   };
 };
diff --git a/cpp/src/io/json/process_tokens.cu b/cpp/src/io/json/process_tokens.cu
index 83c7b663980..d41d137a2c9 100644
--- a/cpp/src/io/json/process_tokens.cu
+++ b/cpp/src/io/json/process_tokens.cu
@@ -22,6 +22,7 @@
 #include <cudf/detail/nvtx/ranges.hpp>
 #include <cudf/detail/utilities/vector_factories.hpp>
 #include <cudf/io/detail/tokenize_json.hpp>
+#include <cudf/utilities/memory_resource.hpp>
 
 #include <rmm/device_uvector.hpp>
 #include <rmm/exec_policy.hpp>
@@ -87,13 +88,25 @@ void validate_token_stream(device_span<char const> d_input,
 {
   CUDF_FUNC_RANGE();
   if (!options.is_strict_validation()) { return; }
+
+  rmm::device_uvector<bool> d_invalid = cudf::detail::make_zeroed_device_uvector_async<bool>(
+    tokens.size(), stream, cudf::get_current_device_resource_ref());
+
   using token_t = cudf::io::json::token_t;
-  cudf::detail::optional_trie trie_na =
-    cudf::detail::create_serialized_trie(options.get_na_values(), stream);
-  auto trie_na_view    = cudf::detail::make_trie_view(trie_na);
+  auto literals = options.get_na_values();
+  literals.emplace_back("null");  // added these too to single trie
+  literals.emplace_back("true");
+  literals.emplace_back("false");
+
+  cudf::detail::optional_trie trie_literals =
+    cudf::detail::create_serialized_trie(literals, stream);
+  cudf::detail::optional_trie trie_nonnumeric = cudf::detail::create_serialized_trie(
+    {"NaN", "Infinity", "+INF", "+Infinity", "-INF", "-Infinity"}, stream);
+
   auto validate_values = cuda::proclaim_return_type<bool>(
     [data                        = d_input.data(),
-     trie_na                     = trie_na_view,
+     trie_literals               = cudf::detail::make_trie_view(trie_literals),
+     trie_nonnumeric             = cudf::detail::make_trie_view(trie_nonnumeric),
      allow_numeric_leading_zeros = options.is_allowed_numeric_leading_zeros(),
      allow_nonnumeric =
        options.is_allowed_nonnumeric_numbers()] __device__(SymbolOffsetT start,
@@ -101,24 +114,15 @@ void validate_token_stream(device_span<char const> d_input,
       // This validates an unquoted value. A value must match https://www.json.org/json-en.html
       // but the leading and training whitespace should already have been removed, and is not
       // a string
-      auto c               = data[start];
-      auto is_null_literal = serialized_trie_contains(trie_na, {data + start, end - start});
-      if (is_null_literal) {
-        return true;
-      } else if ('n' == c) {
-        return substr_eq(data, start, end, 4, "null");
-      } else if ('t' == c) {
-        return substr_eq(data, start, end, 4, "true");
-      } else if ('f' == c) {
-        return substr_eq(data, start, end, 5, "false");
-      } else if (allow_nonnumeric && c == 'N') {
-        return substr_eq(data, start, end, 3, "NaN");
-      } else if (allow_nonnumeric && c == 'I') {
-        return substr_eq(data, start, end, 8, "Infinity");
-      } else if (allow_nonnumeric && c == '+') {
-        return substr_eq(data, start, end, 4, "+INF") ||
-               substr_eq(data, start, end, 9, "+Infinity");
-      } else if ('-' == c || c <= '9' && 'c' >= '0') {
+      auto const is_literal = serialized_trie_contains(trie_literals, {data + start, end - start});
+      if (is_literal) { return true; }
+      if (allow_nonnumeric) {
+        auto const is_nonnumeric =
+          serialized_trie_contains(trie_nonnumeric, {data + start, end - start});
+        if (is_nonnumeric) { return true; }
+      }
+      auto c = data[start];
+      if ('-' == c || c <= '9' && 'c' >= '0') {
         // number
         auto num_state = number_state::START;
         for (auto at = start; at < end; at++) {
@@ -140,9 +144,6 @@ void validate_token_stream(device_span<char const> d_input,
                 num_state = number_state::LEADING_ZERO;
               } else if (c >= '1' && c <= '9') {
                 num_state = number_state::WHOLE;
-              } else if (allow_nonnumeric && 'I' == c) {
-                return substr_eq(data, start, end, 4, "-INF") ||
-                       substr_eq(data, start, end, 9, "-Infinity");
               } else {
                 return false;
               }
@@ -273,33 +274,44 @@ void validate_token_stream(device_span<char const> d_input,
 
   auto num_tokens = tokens.size();
   auto count_it   = thrust::make_counting_iterator(0);
-  auto predicate  = [tokens        = tokens.begin(),
-                    token_indices = token_indices.begin(),
-                    validate_values,
-                    validate_strings] __device__(auto i) -> bool {
+  auto predicate  = cuda::proclaim_return_type<bool>([tokens        = tokens.begin(),
+                                                     token_indices = token_indices.begin(),
+                                                     validate_values,
+                                                     validate_strings] __device__(auto i) -> bool {
     if (tokens[i] == token_t::ValueEnd) {
       return !validate_values(token_indices[i - 1], token_indices[i]);
     } else if (tokens[i] == token_t::FieldNameEnd || tokens[i] == token_t::StringEnd) {
       return !validate_strings(token_indices[i - 1], token_indices[i]);
     }
     return false;
-  };
+  });
+
+  auto conditional_invalidout_it =
+    cudf::detail::make_tabulate_output_iterator(cuda::proclaim_return_type<void>(
+      [d_invalid = d_invalid.begin()] __device__(size_type i, bool x) -> void {
+        if (x) { d_invalid[i] = true; }
+      }));
+  thrust::transform(rmm::exec_policy_nosync(stream),
+                    count_it,
+                    count_it + num_tokens,
+                    conditional_invalidout_it,
+                    predicate);
 
   using scan_type            = write_if::scan_type;
   auto conditional_write     = write_if{tokens.begin(), num_tokens};
   auto conditional_output_it = cudf::detail::make_tabulate_output_iterator(conditional_write);
-  auto transform_op          = cuda::proclaim_return_type<scan_type>(
-    [predicate, tokens = tokens.begin()] __device__(auto i) -> scan_type {
-      if (predicate(i)) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
-      return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
-    });
-  auto binary_op = cuda::proclaim_return_type<scan_type>(
+  auto binary_op             = cuda::proclaim_return_type<scan_type>(
     [] __device__(scan_type prev, scan_type curr) -> scan_type {
       auto op_result = (prev.first == token_t::ErrorBegin ? prev.first : curr.first);
-      return scan_type((curr.second ? curr.first : op_result), prev.second | curr.second);
+      return {(curr.second ? curr.first : op_result), prev.second | curr.second};
+    });
+  auto transform_op = cuda::proclaim_return_type<scan_type>(
+    [d_invalid = d_invalid.begin(), tokens = tokens.begin()] __device__(auto i) -> scan_type {
+      if (d_invalid[i]) return {token_t::ErrorBegin, tokens[i] == token_t::LineEnd};
+      return {static_cast<token_t>(tokens[i]), tokens[i] == token_t::LineEnd};
     });
 
-  thrust::transform_inclusive_scan(rmm::exec_policy(stream),
+  thrust::transform_inclusive_scan(rmm::exec_policy_nosync(stream),
                                    count_it,
                                    count_it + num_tokens,
                                    conditional_output_it,
diff --git a/cpp/src/reductions/compound.cuh b/cpp/src/reductions/compound.cuh
index 6bc8b48832f..cd9fade164a 100644
--- a/cpp/src/reductions/compound.cuh
+++ b/cpp/src/reductions/compound.cuh
@@ -18,13 +18,18 @@
 
 #include <cudf/dictionary/detail/iterator.cuh>
 #include <cudf/reduction/detail/reduction.cuh>
+#include <cudf/reduction/detail/reduction_operators.cuh>
 #include <cudf/scalar/scalar_factories.hpp>
+#include <cudf/utilities/error.hpp>
 #include <cudf/utilities/memory_resource.hpp>
 #include <cudf/utilities/traits.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
 #include <thrust/iterator/transform_iterator.h>
 
+#include <stdexcept>
+#include <type_traits>
+
 namespace cudf {
 namespace reduction {
 namespace compound {
@@ -53,9 +58,17 @@ std::unique_ptr<scalar> compound_reduction(column_view const& col,
 {
   auto const valid_count = col.size() - col.null_count();
 
+  // All null input produces all null output
+  if (valid_count == 0 ||
+      // Only care about ddof for standard deviation and variance right now
+      valid_count <= ddof && (std::is_same_v<Op, cudf::reduction::detail::op::standard_deviation> ||
+                              std::is_same_v<Op, cudf::reduction::detail::op::variance>)) {
+    auto result = cudf::make_fixed_width_scalar(output_dtype, stream, mr);
+    result->set_valid_async(false, stream);
+    return result;
+  }
   // reduction by iterator
   auto dcol = cudf::column_device_view::create(col, stream);
-  std::unique_ptr<scalar> result;
   Op compound_op{};
 
   if (!cudf::is_dictionary(col.type())) {
@@ -63,25 +76,21 @@ std::unique_ptr<scalar> compound_reduction(column_view const& col,
       auto it = thrust::make_transform_iterator(
         dcol->pair_begin<ElementType, true>(),
         compound_op.template get_null_replacing_element_transformer<ResultType>());
-      result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
+      return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
         it, col.size(), compound_op, valid_count, ddof, stream, mr);
     } else {
       auto it = thrust::make_transform_iterator(
         dcol->begin<ElementType>(), compound_op.template get_element_transformer<ResultType>());
-      result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
+      return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
         it, col.size(), compound_op, valid_count, ddof, stream, mr);
     }
   } else {
     auto it = thrust::make_transform_iterator(
       cudf::dictionary::detail::make_dictionary_pair_iterator<ElementType>(*dcol, col.has_nulls()),
       compound_op.template get_null_replacing_element_transformer<ResultType>());
-    result = cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
+    return cudf::reduction::detail::reduce<Op, decltype(it), ResultType>(
       it, col.size(), compound_op, valid_count, ddof, stream, mr);
   }
-
-  // set scalar is valid
-  result->set_valid_async(col.null_count() < col.size(), stream);
-  return result;
 };
 
 // @brief result type dispatcher for compound reduction (a.k.a. mean, var, std)
@@ -137,6 +146,7 @@ struct element_type_dispatcher {
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
   {
+    CUDF_EXPECTS(ddof >= 0, "ddof must be non-negative", std::domain_error);
     return cudf::type_dispatcher(
       output_dtype, result_type_dispatcher<ElementType, Op>(), col, output_dtype, ddof, stream, mr);
   }
diff --git a/cpp/tests/reductions/reduction_tests.cpp b/cpp/tests/reductions/reduction_tests.cpp
index 1e9e13ded93..bdb98372836 100644
--- a/cpp/tests/reductions/reduction_tests.cpp
+++ b/cpp/tests/reductions/reduction_tests.cpp
@@ -33,8 +33,12 @@
 #include <cudf/types.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
+#include <thrust/copy.h>
 #include <thrust/iterator/counting_iterator.h>
 
+#include <algorithm>
+#include <iostream>
+#include <iterator>
 #include <vector>
 
 using aggregation        = cudf::aggregation;
@@ -765,6 +769,25 @@ TYPED_TEST(MultiStepReductionTest, Mean)
             expected_value_nulls);
 }
 
+template <typename T>
+double calc_var(std::vector<T> const& v, int ddof, std::vector<bool> const& mask = {})
+{
+  auto const values = [&]() {
+    if (mask.empty()) { return v; }
+    std::vector<T> masked{};
+    thrust::copy_if(
+      v.begin(), v.end(), mask.begin(), std::back_inserter(masked), [](auto m) { return m; });
+    return masked;
+  }();
+  auto const valid_count = values.size();
+  double const mean      = std::accumulate(values.cbegin(), values.cend(), double{0}) / valid_count;
+  double const sq_sum_of_differences =
+    std::accumulate(values.cbegin(), values.cend(), double{0}, [mean](double acc, auto const v) {
+      return acc + std::pow(v - mean, 2);
+    });
+  return sq_sum_of_differences / (valid_count - ddof);
+}
+
 // This test is disabled for only a Debug build because a compiler error
 // documented in cpp/src/reductions/std.cu and cpp/src/reductions/var.cu
 #ifdef NDEBUG
@@ -777,25 +800,12 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
   std::vector<int> int_values({-3, 2, 1, 0, 5, -3, -2, 28});
   std::vector<bool> host_bools({true, true, false, true, true, true, false, true});
 
-  auto calc_var = [](std::vector<T>& v, cudf::size_type valid_count, int ddof) {
-    double mean = std::accumulate(v.begin(), v.end(), double{0});
-    mean /= valid_count;
-
-    double sum_of_sq = std::accumulate(
-      v.begin(), v.end(), double{0}, [](double acc, TypeParam i) { return acc + i * i; });
-
-    cudf::size_type div = valid_count - ddof;
-
-    double var = sum_of_sq / div - ((mean * mean) * valid_count) / div;
-    return var;
-  };
-
   // test without nulls
   std::vector<T> v = convert_values<T>(int_values);
   cudf::test::fixed_width_column_wrapper<T> col(v.begin(), v.end());
 
   auto const ddof = 1;
-  double var      = calc_var(v, v.size(), ddof);
+  double var      = calc_var(v, ddof);
   double std      = std::sqrt(var);
   auto var_agg    = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg    = cudf::make_std_aggregation<reduce_aggregation>(ddof);
@@ -811,23 +821,19 @@ TYPED_TEST(MultiStepReductionTest, DISABLED_var_std)
 
   // test with nulls
   cudf::test::fixed_width_column_wrapper<T> col_nulls = construct_null_column(v, host_bools);
-  cudf::size_type valid_count =
-    cudf::column_view(col_nulls).size() - cudf::column_view(col_nulls).null_count();
-  auto replaced_array = replace_nulls(v, host_bools, T{0});
-
-  double var_nulls = calc_var(replaced_array, valid_count, ddof);
-  double std_nulls = std::sqrt(var_nulls);
+  double var_nulls                                    = calc_var(v, ddof, host_bools);
+  double std_nulls                                    = std::sqrt(var_nulls);
 
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            var_nulls);
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            std_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   var_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   std_nulls);
 }
 
 // ----------------------------------------------------------------------------
@@ -1139,23 +1145,10 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   std::vector<double> int_values({-3, 2, 1, 0, 5, -3, -2, 28});
   std::vector<bool> host_bools({true, true, false, true, true, true, false, true});
 
-  auto calc_var = [ddof](std::vector<double>& v, cudf::size_type valid_count) {
-    double mean = std::accumulate(v.begin(), v.end(), double{0});
-    mean /= valid_count;
-
-    double sum_of_sq = std::accumulate(
-      v.begin(), v.end(), double{0}, [](double acc, double i) { return acc + i * i; });
-
-    cudf::size_type div = valid_count - ddof;
-
-    double var = sum_of_sq / div - ((mean * mean) * valid_count) / div;
-    return var;
-  };
-
   // test without nulls
   cudf::test::fixed_width_column_wrapper<double> col(int_values.begin(), int_values.end());
 
-  double var   = calc_var(int_values, int_values.size());
+  double var   = calc_var(int_values, ddof);
   double std   = std::sqrt(var);
   auto var_agg = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg = cudf::make_std_aggregation<reduce_aggregation>(ddof);
@@ -1172,23 +1165,19 @@ TEST_P(ReductionParamTest, DISABLED_std_var)
   // test with nulls
   cudf::test::fixed_width_column_wrapper<double> col_nulls =
     construct_null_column(int_values, host_bools);
-  cudf::size_type valid_count =
-    cudf::column_view(col_nulls).size() - cudf::column_view(col_nulls).null_count();
-  auto replaced_array = replace_nulls<double>(int_values, host_bools, int{0});
-
-  double var_nulls = calc_var(replaced_array, valid_count);
+  double var_nulls = calc_var(int_values, ddof, host_bools);
   double std_nulls = std::sqrt(var_nulls);
 
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            var_nulls);
-  EXPECT_EQ(this
-              ->template reduction_test<double>(
-                col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
-              .first,
-            std_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *var_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   var_nulls);
+  EXPECT_DOUBLE_EQ(this
+                     ->template reduction_test<double>(
+                       col_nulls, *std_agg, cudf::data_type(cudf::type_id::FLOAT64))
+                     .first,
+                   std_nulls);
 }
 
 //-------------------------------------------------------------------
@@ -2471,21 +2460,11 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   std::vector<T> v = convert_values<T>(int_values);
   cudf::data_type output_type{cudf::type_to_id<double>()};
 
-  auto calc_var = [](std::vector<T> const& v, cudf::size_type valid_count, cudf::size_type ddof) {
-    double mean = std::accumulate(v.cbegin(), v.cend(), double{0});
-    mean /= valid_count;
-    double sum_of_sq = std::accumulate(
-      v.cbegin(), v.cend(), double{0}, [](double acc, TypeParam i) { return acc + i * i; });
-    auto const div = valid_count - ddof;
-    double var     = sum_of_sq / div - ((mean * mean) * valid_count) / div;
-    return var;
-  };
-
   // test without nulls
   cudf::test::dictionary_column_wrapper<T> col(v.begin(), v.end());
 
   cudf::size_type const ddof = 1;
-  double var                 = calc_var(v, v.size(), ddof);
+  double var                 = calc_var(v, ddof);
   double std                 = std::sqrt(var);
   auto var_agg               = cudf::make_variance_aggregation<reduce_aggregation>(ddof);
   auto std_agg               = cudf::make_std_aggregation<reduce_aggregation>(ddof);
@@ -2497,15 +2476,13 @@ TYPED_TEST(DictionaryReductionTest, DISABLED_VarStd)
   std::vector<bool> validity({true, true, false, true, true, true, false, true});
   cudf::test::dictionary_column_wrapper<T> col_nulls(v.begin(), v.end(), validity.begin());
 
-  cudf::size_type const valid_count = std::count(validity.begin(), validity.end(), true);
-
-  double var_nulls = calc_var(replace_nulls(v, validity, T{0}), valid_count, ddof);
+  double var_nulls = calc_var(v, ddof, validity);
   double std_nulls = std::sqrt(var_nulls);
 
-  EXPECT_EQ(this->template reduction_test<double>(col_nulls, *var_agg, output_type).first,
-            var_nulls);
-  EXPECT_EQ(this->template reduction_test<double>(col_nulls, *std_agg, output_type).first,
-            std_nulls);
+  EXPECT_DOUBLE_EQ(this->template reduction_test<double>(col_nulls, *var_agg, output_type).first,
+                   var_nulls);
+  EXPECT_DOUBLE_EQ(this->template reduction_test<double>(col_nulls, *std_agg, output_type).first,
+                   std_nulls);
 }
 
 TYPED_TEST(DictionaryReductionTest, NthElement)
diff --git a/docs/cudf/source/developer_guide/testing.md b/docs/cudf/source/developer_guide/testing.md
index f12f809d5db..22cc1b5b8de 100644
--- a/docs/cudf/source/developer_guide/testing.md
+++ b/docs/cudf/source/developer_guide/testing.md
@@ -7,6 +7,23 @@ specifically the [`pytest-cov`](https://github.com/pytest-dev/pytest-cov) plugin
 Code coverage reports are uploaded to [Codecov](https://app.codecov.io/gh/rapidsai/cudf).
 Each PR also indicates whether it increases or decreases test coverage.
 
+### Configuring pytest
+
+Pytest will accept configuration in [multiple different
+files](https://docs.pytest.org/en/stable/reference/customize.html),
+with a specified discovery and precedence order. Note in particular
+that there is no automatic "include" mechanism, as soon as a matching
+configuration file is found, discovery stops.
+
+For preference, so that all tool configuration lives in the same
+place, we use `pyproject.toml`-based configuration. Test configuration
+for a given package should live in that package's `pyproject.toml`
+file.
+
+Where tests do not naturally belong to a project, for example the
+`cudf.pandas` integration tests and the cuDF benchmarks, use a
+`pytest.ini` file as close to the tests as possible.
+
 ## Test organization
 
 How tests are organized depends on which of the following two groups they fall into:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/generate_ngrams.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/generate_ngrams.rst
new file mode 100644
index 00000000000..d68199271bd
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/generate_ngrams.rst
@@ -0,0 +1,6 @@
+===============
+generate_ngrams
+===============
+
+.. automodule:: pylibcudf.nvtext.generate_ngrams
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
index b5cd5ee42c3..2e03b589c8b 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/nvtext/index.rst
@@ -5,3 +5,4 @@ nvtext
     :maxdepth: 1
 
     edit_distance
+    generate_ngrams
diff --git a/java/src/test/java/ai/rapids/cudf/ReductionTest.java b/java/src/test/java/ai/rapids/cudf/ReductionTest.java
index 8cc7df1ce7f..6bd6603d71b 100644
--- a/java/src/test/java/ai/rapids/cudf/ReductionTest.java
+++ b/java/src/test/java/ai/rapids/cudf/ReductionTest.java
@@ -612,13 +612,13 @@ void testWithSetOutputType() {
       assertEquals(expected, result);
     }
 
-    try (Scalar expected = Scalar.fromFloat(1.666667f);
+    try (Scalar expected = Scalar.fromFloat(1.6666666f);
          ColumnVector cv = ColumnVector.fromBytes(new byte[]{1, 2, 3, 4});
          Scalar result = cv.variance(DType.FLOAT32)) {
       assertEquals(expected, result);
     }
 
-    try (Scalar expected = Scalar.fromFloat(1.2909945f);
+    try (Scalar expected = Scalar.fromFloat(1.2909944f);
          ColumnVector cv = ColumnVector.fromBytes(new byte[]{1, 2, 3, 4});
          Scalar result = cv.standardDeviation(DType.FLOAT32)) {
       assertEquals(expected, result);
diff --git a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
index 6591b527eec..7fdf9258b7f 100644
--- a/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
+++ b/python/cudf/cudf/_lib/nvtext/generate_ngrams.pyx
@@ -2,75 +2,34 @@
 
 from cudf.core.buffer import acquire_spill_lock
 
-from libcpp.memory cimport unique_ptr
-from libcpp.utility cimport move
-
-from pylibcudf.libcudf.column.column cimport column
-from pylibcudf.libcudf.column.column_view cimport column_view
-from pylibcudf.libcudf.nvtext.generate_ngrams cimport (
-    generate_character_ngrams as cpp_generate_character_ngrams,
-    generate_ngrams as cpp_generate_ngrams,
-    hash_character_ngrams as cpp_hash_character_ngrams,
-)
-from pylibcudf.libcudf.scalar.scalar cimport string_scalar
-from pylibcudf.libcudf.types cimport size_type
-
 from cudf._lib.column cimport Column
-from cudf._lib.scalar cimport DeviceScalar
+
+from pylibcudf import nvtext
 
 
 @acquire_spill_lock()
 def generate_ngrams(Column strings, int ngrams, object py_separator):
-
-    cdef DeviceScalar separator = py_separator.device_value
-
-    cdef column_view c_strings = strings.view()
-    cdef size_type c_ngrams = ngrams
-    cdef const string_scalar* c_separator = <const string_scalar*>separator\
-        .get_raw_ptr()
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_generate_ngrams(
-                c_strings,
-                c_ngrams,
-                c_separator[0]
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.generate_ngrams.generate_ngrams(
+        strings.to_pylibcudf(mode="read"),
+        ngrams,
+        py_separator.device_value.c_value
+    )
+    return Column.from_pylibcudf(result)
 
 
 @acquire_spill_lock()
 def generate_character_ngrams(Column strings, int ngrams):
-    cdef column_view c_strings = strings.view()
-    cdef size_type c_ngrams = ngrams
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_generate_character_ngrams(
-                c_strings,
-                c_ngrams
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.generate_ngrams.generate_character_ngrams(
+        strings.to_pylibcudf(mode="read"),
+        ngrams
+    )
+    return Column.from_pylibcudf(result)
 
 
 @acquire_spill_lock()
 def hash_character_ngrams(Column strings, int ngrams):
-    cdef column_view c_strings = strings.view()
-    cdef size_type c_ngrams = ngrams
-    cdef unique_ptr[column] c_result
-
-    with nogil:
-        c_result = move(
-            cpp_hash_character_ngrams(
-                c_strings,
-                c_ngrams
-            )
-        )
-
-    return Column.from_unique_ptr(move(c_result))
+    result = nvtext.generate_ngrams.hash_character_ngrams(
+        strings.to_pylibcudf(mode="read"),
+        ngrams
+    )
+    return Column.from_pylibcudf(result)
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index 3b8dd05c13a..f6ab91f2f01 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -180,9 +180,12 @@ def var(
         min_count: int = 0,
         ddof=1,
     ):
-        return self._reduce(
+        result = self._reduce(
             "var", skipna=skipna, min_count=min_count, ddof=ddof
         )
+        if result is NA:
+            return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
+        return result
 
     def std(
         self,
@@ -190,9 +193,12 @@ def std(
         min_count: int = 0,
         ddof=1,
     ):
-        return self._reduce(
+        result = self._reduce(
             "std", skipna=skipna, min_count=min_count, ddof=ddof
         )
+        if result is NA:
+            return cudf.utils.dtypes._get_nan_for_dtype(self.dtype)
+        return result
 
     def median(self, skipna: bool | None = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index acd97c2047c..41ee94b72c8 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -2943,7 +2943,7 @@ def corr(self, other, method="pearson", min_periods=None):
         >>> ser1 = cudf.Series([0.9, 0.13, 0.62])
         >>> ser2 = cudf.Series([0.12, 0.26, 0.51])
         >>> ser1.corr(ser2, method="pearson")
-        -0.20454263717316112
+        -0.20454263717316126
         >>> ser1.corr(ser2, method="spearman")
         -0.5
         """
diff --git a/python/cudf/cudf/tests/pytest.ini b/python/cudf/cudf/tests/pytest.ini
deleted file mode 100644
index 496a322ff80..00000000000
--- a/python/cudf/cudf/tests/pytest.ini
+++ /dev/null
@@ -1,19 +0,0 @@
-# Copyright (c) 2022, NVIDIA CORPORATION.
-
-[pytest]
-markers =
-    spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON`
-xfail_strict = true
-filterwarnings =
-    error
-    ignore:::.*xdist.*
-    ignore:::.*pytest.*
-    # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
-    ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore
-    # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
-    ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning
-    # PerformanceWarning from cupy warming up the JIT cache
-    ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning
-    # Ignore numba PEP 456 warning specific to arm machines
-    ignore:FNV hashing is not implemented in Numba.*:UserWarning
-addopts = --tb=native
diff --git a/python/cudf/cudf/tests/test_datetime.py b/python/cudf/cudf/tests/test_datetime.py
index 4a2345fc009..976b12a9ab5 100644
--- a/python/cudf/cudf/tests/test_datetime.py
+++ b/python/cudf/cudf/tests/test_datetime.py
@@ -2525,23 +2525,7 @@ def test_dti_asi8():
 
 @pytest.mark.parametrize(
     "method, kwargs",
-    [
-        ["mean", {}],
-        pytest.param(
-            "std",
-            {},
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/16444"
-            ),
-        ),
-        pytest.param(
-            "std",
-            {"ddof": 0},
-            marks=pytest.mark.xfail(
-                reason="https://github.com/rapidsai/cudf/issues/16444"
-            ),
-        ),
-    ],
+    [["mean", {}], ["std", {}], ["std", {"ddof": 0}]],
 )
 def test_dti_reduction(method, kwargs):
     pd_dti = pd.DatetimeIndex(["2020-01-01", "2020-12-31"], name="foo")
diff --git a/python/cudf/cudf_pandas_tests/pytest.ini b/python/cudf/cudf_pandas_tests/pytest.ini
new file mode 100644
index 00000000000..46e2448ea24
--- /dev/null
+++ b/python/cudf/cudf_pandas_tests/pytest.ini
@@ -0,0 +1,9 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+# Note, this config file overrides the default "cudf" test config in
+# ../pyproject.toml We do so deliberately because we have different
+# treatment of markers and warnings
+[pytest]
+addopts = --tb=native --strict-config --strict-markers
+empty_parameter_set_mark = fail_at_collect
+xfail_strict = true
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 2bbed40e34e..a74b7148c00 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -3,6 +3,7 @@
 # SPDX-License-Identifier: Apache-2.0
 
 import collections
+import contextlib
 import copy
 import datetime
 import operator
@@ -21,10 +22,15 @@
 import pyarrow as pa
 import pytest
 from nbconvert.preprocessors import ExecutePreprocessor
-from numba import NumbaDeprecationWarning, vectorize
+from numba import (
+    NumbaDeprecationWarning,
+    __version__ as numba_version,
+    vectorize,
+)
+from packaging import version
 from pytz import utc
 
-from cudf.core._compat import PANDAS_GE_220
+from cudf.core._compat import PANDAS_GE_210, PANDAS_GE_220, PANDAS_VERSION
 from cudf.pandas import LOADED, Profiler
 from cudf.pandas.fast_slow_proxy import (
     ProxyFallbackError,
@@ -52,8 +58,6 @@
     get_calendar,
 )
 
-from cudf.core._compat import PANDAS_CURRENT_SUPPORTED_VERSION, PANDAS_VERSION
-
 # Accelerated pandas has the real pandas and cudf modules as attributes
 pd = xpd._fsproxy_slow
 cudf = xpd._fsproxy_fast
@@ -622,10 +626,6 @@ def test_array_function_series_fallback(series):
     tm.assert_equal(expect, got)
 
 
-@pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas",
-)
 def test_timedeltaproperties(series):
     psr, sr = series
     psr, sr = psr.astype("timedelta64[ns]"), sr.astype("timedelta64[ns]")
@@ -685,10 +685,6 @@ def test_maintain_container_subclasses(multiindex):
     assert isinstance(got, xpd.core.indexes.frozen.FrozenList)
 
 
-@pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
-    reason="Fails in older versions of pandas due to unsupported boxcar window type",
-)
 def test_rolling_win_type():
     pdf = pd.DataFrame(range(5))
     df = xpd.DataFrame(range(5))
@@ -697,8 +693,14 @@ def test_rolling_win_type():
     tm.assert_equal(result, expected)
 
 
-@pytest.mark.skip(
-    reason="Requires Numba 0.59 to fix segfaults on ARM. See https://github.com/numba/llvmlite/pull/1009"
+@pytest.mark.skipif(
+    version.parse(numba_version) < version.parse("0.59"),
+    reason="Requires Numba 0.59 to fix segfaults on ARM. See https://github.com/numba/llvmlite/pull/1009",
+)
+@pytest.mark.xfail(
+    version.parse(numba_version) >= version.parse("0.59")
+    and PANDAS_VERSION < version.parse("2.1"),
+    reason="numba.generated_jit removed in 0.59, requires pandas >= 2.1",
 )
 def test_rolling_apply_numba_engine():
     def weighted_mean(x):
@@ -709,7 +711,12 @@ def weighted_mean(x):
     pdf = pd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
     df = xpd.DataFrame([[1, 2, 0.6], [2, 3, 0.4], [3, 4, 0.2], [4, 5, 0.7]])
 
-    with pytest.warns(NumbaDeprecationWarning):
+    ctx = (
+        contextlib.nullcontext()
+        if PANDAS_GE_210
+        else pytest.warns(NumbaDeprecationWarning)
+    )
+    with ctx:
         expect = pdf.rolling(2, method="table", min_periods=0).apply(
             weighted_mean, raw=True, engine="numba"
         )
@@ -1305,7 +1312,7 @@ def max_times_two(self):
 
 
 @pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION < version.parse("2.1"),
     reason="DatetimeArray.__floordiv__ missing in pandas-2.0.0",
 )
 def test_floordiv_array_vs_df():
@@ -1580,7 +1587,7 @@ def test_numpy_cupy_flatiter(series):
 
 
 @pytest.mark.xfail(
-    PANDAS_VERSION < PANDAS_CURRENT_SUPPORTED_VERSION,
+    PANDAS_VERSION < version.parse("2.1"),
     reason="pyarrow_numpy storage type was not supported in pandas-2.0.0",
 )
 def test_arrow_string_arrays():
diff --git a/python/cudf/pyproject.toml b/python/cudf/pyproject.toml
index 1b730ffd13c..c0776fd0de6 100644
--- a/python/cudf/pyproject.toml
+++ b/python/cudf/pyproject.toml
@@ -124,6 +124,27 @@ skip = [
     "__init__.py",
 ]
 
+[tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
+filterwarnings = [
+    "error",
+    "ignore:::.*xdist.*",
+    "ignore:::.*pytest.*",
+    # some third-party dependencies (e.g. 'boto3') still using datetime.datetime.utcnow()
+    "ignore:.*datetime.*utcnow.*scheduled for removal.*:DeprecationWarning:botocore",
+    # Deprecation warning from Pyarrow Table.to_pandas() with pandas-2.2+
+    "ignore:Passing a BlockManager to DataFrame is deprecated:DeprecationWarning",
+    # PerformanceWarning from cupy warming up the JIT cache
+    "ignore:Jitify is performing a one-time only warm-up to populate the persistent cache:cupy._util.PerformanceWarning",
+    # Ignore numba PEP 456 warning specific to arm machines
+    "ignore:FNV hashing is not implemented in Numba.*:UserWarning"
+]
+markers = [
+    "spilling: mark benchmark a good candidate to run with `CUDF_SPILL=ON`"
+]
+xfail_strict = true
+
 [tool.rapids-build-backend]
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"
diff --git a/python/cudf_kafka/cudf_kafka/tests/pytest.ini b/python/cudf_kafka/cudf_kafka/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/cudf_kafka/cudf_kafka/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/cudf_kafka/pyproject.toml b/python/cudf_kafka/pyproject.toml
index a1a3ec37842..87e19a2bccf 100644
--- a/python/cudf_kafka/pyproject.toml
+++ b/python/cudf_kafka/pyproject.toml
@@ -79,9 +79,12 @@ skip = [
 ]
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
   "error"
 ]
+xfail_strict = true
 
 [tool.scikit-build]
 build-dir = "build/{wheel_tag}"
diff --git a/python/cudf_polars/cudf_polars/containers/__init__.py b/python/cudf_polars/cudf_polars/containers/__init__.py
index 06bb08953f1..3b1eff4a0d0 100644
--- a/python/cudf_polars/cudf_polars/containers/__init__.py
+++ b/python/cudf_polars/cudf_polars/containers/__init__.py
@@ -5,7 +5,7 @@
 
 from __future__ import annotations
 
-__all__: list[str] = ["DataFrame", "Column", "NamedColumn"]
+__all__: list[str] = ["DataFrame", "Column"]
 
-from cudf_polars.containers.column import Column, NamedColumn
+from cudf_polars.containers.column import Column
 from cudf_polars.containers.dataframe import DataFrame
diff --git a/python/cudf_polars/cudf_polars/containers/column.py b/python/cudf_polars/cudf_polars/containers/column.py
index 3fe3e5557cb..00186098e54 100644
--- a/python/cudf_polars/cudf_polars/containers/column.py
+++ b/python/cudf_polars/cudf_polars/containers/column.py
@@ -15,7 +15,7 @@
 
     import polars as pl
 
-__all__: list[str] = ["Column", "NamedColumn"]
+__all__: list[str] = ["Column"]
 
 
 class Column:
@@ -26,6 +26,9 @@ class Column:
     order: plc.types.Order
     null_order: plc.types.NullOrder
     is_scalar: bool
+    # Optional name, only ever set by evaluation of NamedExpr nodes
+    # The internal evaluation should not care about the name.
+    name: str | None
 
     def __init__(
         self,
@@ -34,14 +37,12 @@ def __init__(
         is_sorted: plc.types.Sorted = plc.types.Sorted.NO,
         order: plc.types.Order = plc.types.Order.ASCENDING,
         null_order: plc.types.NullOrder = plc.types.NullOrder.BEFORE,
+        name: str | None = None,
     ):
         self.obj = column
         self.is_scalar = self.obj.size() == 1
-        if self.obj.size() <= 1:
-            is_sorted = plc.types.Sorted.YES
-        self.is_sorted = is_sorted
-        self.order = order
-        self.null_order = null_order
+        self.name = name
+        self.set_sorted(is_sorted=is_sorted, order=order, null_order=null_order)
 
     @functools.cached_property
     def obj_scalar(self) -> plc.Scalar:
@@ -63,9 +64,26 @@ def obj_scalar(self) -> plc.Scalar:
             )
         return plc.copying.get_element(self.obj, 0)
 
+    def rename(self, name: str | None, /) -> Self:
+        """
+        Return a shallow copy with a new name.
+
+        Parameters
+        ----------
+        name
+            New name
+
+        Returns
+        -------
+        Shallow copy of self with new name set.
+        """
+        new = self.copy()
+        new.name = name
+        return new
+
     def sorted_like(self, like: Column, /) -> Self:
         """
-        Copy sortedness properties from a column onto self.
+        Return a shallow copy with sortedness from like.
 
         Parameters
         ----------
@@ -74,20 +92,23 @@ def sorted_like(self, like: Column, /) -> Self:
 
         Returns
         -------
-        Self with metadata set.
+        Shallow copy of self with metadata set.
 
         See Also
         --------
         set_sorted, copy_metadata
         """
-        return self.set_sorted(
-            is_sorted=like.is_sorted, order=like.order, null_order=like.null_order
+        return type(self)(
+            self.obj,
+            name=self.name,
+            is_sorted=like.is_sorted,
+            order=like.order,
+            null_order=like.null_order,
         )
 
-    # TODO: Return Column once #16272 is fixed.
-    def astype(self, dtype: plc.DataType) -> plc.Column:
+    def astype(self, dtype: plc.DataType) -> Column:
         """
-        Return the backing column as the requested dtype.
+        Cast the column to as the requested dtype.
 
         Parameters
         ----------
@@ -109,8 +130,10 @@ def astype(self, dtype: plc.DataType) -> plc.Column:
         the current one.
         """
         if self.obj.type() != dtype:
-            return plc.unary.cast(self.obj, dtype)
-        return self.obj
+            return Column(plc.unary.cast(self.obj, dtype), name=self.name).sorted_like(
+                self
+            )
+        return self
 
     def copy_metadata(self, from_: pl.Series, /) -> Self:
         """
@@ -129,6 +152,7 @@ def copy_metadata(self, from_: pl.Series, /) -> Self:
         --------
         set_sorted, sorted_like
         """
+        self.name = from_.name
         if len(from_) <= 1:
             return self
         ascending = from_.flags["SORTED_ASC"]
@@ -192,6 +216,7 @@ def copy(self) -> Self:
             is_sorted=self.is_sorted,
             order=self.order,
             null_order=self.null_order,
+            name=self.name,
         )
 
     def mask_nans(self) -> Self:
@@ -217,58 +242,3 @@ def nan_count(self) -> int:
                 )
             ).as_py()
         return 0
-
-
-class NamedColumn(Column):
-    """A column with a name."""
-
-    name: str
-
-    def __init__(
-        self,
-        column: plc.Column,
-        name: str,
-        *,
-        is_sorted: plc.types.Sorted = plc.types.Sorted.NO,
-        order: plc.types.Order = plc.types.Order.ASCENDING,
-        null_order: plc.types.NullOrder = plc.types.NullOrder.BEFORE,
-    ) -> None:
-        super().__init__(
-            column, is_sorted=is_sorted, order=order, null_order=null_order
-        )
-        self.name = name
-
-    def copy(self, *, new_name: str | None = None) -> Self:
-        """
-        A shallow copy of the column.
-
-        Parameters
-        ----------
-        new_name
-            Optional new name for the copied column.
-
-        Returns
-        -------
-        New column sharing data with self.
-        """
-        return type(self)(
-            self.obj,
-            self.name if new_name is None else new_name,
-            is_sorted=self.is_sorted,
-            order=self.order,
-            null_order=self.null_order,
-        )
-
-    def mask_nans(self) -> Self:
-        """Return a shallow copy of self with nans masked out."""
-        # Annoying, the inheritance is not right (can't call the
-        # super-type mask_nans), but will sort that by refactoring
-        # later.
-        if plc.traits.is_floating_point(self.obj.type()):
-            old_count = self.obj.null_count()
-            mask, new_count = plc.transform.nans_to_nulls(self.obj)
-            result = type(self)(self.obj.with_mask(mask, new_count), self.name)
-            if old_count == new_count:
-                return result.sorted_like(self)
-            return result
-        return self.copy()
diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index f3e3862d0cc..2c195f6637c 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -5,43 +5,50 @@
 
 from __future__ import annotations
 
-import itertools
 from functools import cached_property
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, cast
 
 import pyarrow as pa
 import pylibcudf as plc
 
 import polars as pl
 
-from cudf_polars.containers.column import NamedColumn
+from cudf_polars.containers import Column
 from cudf_polars.utils import dtypes
 
 if TYPE_CHECKING:
-    from collections.abc import Mapping, Sequence, Set
+    from collections.abc import Iterable, Mapping, Sequence, Set
 
     from typing_extensions import Self
 
-    from cudf_polars.containers import Column
-
 
 __all__: list[str] = ["DataFrame"]
 
 
+# Pacify the type checker. DataFrame init asserts that all the columns
+# have a string name, so let's narrow the type.
+class NamedColumn(Column):
+    name: str
+
+
 class DataFrame:
     """A representation of a dataframe."""
 
-    columns: list[NamedColumn]
+    column_map: dict[str, Column]
     table: plc.Table
+    columns: list[NamedColumn]
 
-    def __init__(self, columns: Sequence[NamedColumn]) -> None:
-        self.columns = list(columns)
-        self._column_map = {c.name: c for c in self.columns}
-        self.table = plc.Table([c.obj for c in columns])
+    def __init__(self, columns: Iterable[Column]) -> None:
+        columns = list(columns)
+        if any(c.name is None for c in columns):
+            raise ValueError("All columns must have a name")
+        self.columns = [cast(NamedColumn, c) for c in columns]
+        self.column_map = {c.name: c for c in self.columns}
+        self.table = plc.Table([c.obj for c in self.columns])
 
     def copy(self) -> Self:
         """Return a shallow copy of self."""
-        return type(self)([c.copy() for c in self.columns])
+        return type(self)(c.copy() for c in self.columns)
 
     def to_polars(self) -> pl.DataFrame:
         """Convert to a polars DataFrame."""
@@ -51,42 +58,38 @@ def to_polars(self) -> pl.DataFrame:
         # https://github.com/pola-rs/polars/issues/11632
         # To guarantee we produce correct names, we therefore
         # serialise with names we control and rename with that map.
-        name_map = {f"column_{i}": c.name for i, c in enumerate(self.columns)}
+        name_map = {f"column_{i}": name for i, name in enumerate(self.column_map)}
         table: pa.Table = plc.interop.to_arrow(
             self.table,
             [plc.interop.ColumnMetadata(name=name) for name in name_map],
         )
         df: pl.DataFrame = pl.from_arrow(table)
         return df.rename(name_map).with_columns(
-            *(
-                pl.col(c.name).set_sorted(
-                    descending=c.order == plc.types.Order.DESCENDING
-                )
-                if c.is_sorted
-                else pl.col(c.name)
-                for c in self.columns
-            )
+            pl.col(c.name).set_sorted(descending=c.order == plc.types.Order.DESCENDING)
+            if c.is_sorted
+            else pl.col(c.name)
+            for c in self.columns
         )
 
     @cached_property
     def column_names_set(self) -> frozenset[str]:
         """Return the column names as a set."""
-        return frozenset(c.name for c in self.columns)
+        return frozenset(self.column_map)
 
     @cached_property
     def column_names(self) -> list[str]:
         """Return a list of the column names."""
-        return [c.name for c in self.columns]
+        return list(self.column_map)
 
     @cached_property
     def num_columns(self) -> int:
         """Number of columns."""
-        return len(self.columns)
+        return len(self.column_map)
 
     @cached_property
     def num_rows(self) -> int:
         """Number of rows."""
-        return 0 if len(self.columns) == 0 else self.table.num_rows()
+        return self.table.num_rows() if self.column_map else 0
 
     @classmethod
     def from_polars(cls, df: pl.DataFrame) -> Self:
@@ -111,12 +114,8 @@ def from_polars(cls, df: pl.DataFrame) -> Self:
         # No-op if the schema is unchanged.
         d_table = plc.interop.from_arrow(table.cast(schema))
         return cls(
-            [
-                NamedColumn(column, h_col.name).copy_metadata(h_col)
-                for column, h_col in zip(
-                    d_table.columns(), df.iter_columns(), strict=True
-                )
-            ]
+            Column(column).copy_metadata(h_col)
+            for column, h_col in zip(d_table.columns(), df.iter_columns(), strict=True)
         )
 
     @classmethod
@@ -144,17 +143,14 @@ def from_table(cls, table: plc.Table, names: Sequence[str]) -> Self:
         if table.num_columns() != len(names):
             raise ValueError("Mismatching name and table length.")
         return cls(
-            [
-                NamedColumn(c, name)
-                for c, name in zip(table.columns(), names, strict=True)
-            ]
+            Column(c, name=name) for c, name in zip(table.columns(), names, strict=True)
         )
 
     def sorted_like(
         self, like: DataFrame, /, *, subset: Set[str] | None = None
     ) -> Self:
         """
-        Copy sortedness from a dataframe onto self.
+        Return a shallow copy with sortedness copied from like.
 
         Parameters
         ----------
@@ -165,7 +161,7 @@ def sorted_like(
 
         Returns
         -------
-        Self with metadata set.
+        Shallow copy of self with metadata set.
 
         Raises
         ------
@@ -175,13 +171,12 @@ def sorted_like(
         if like.column_names != self.column_names:
             raise ValueError("Can only copy from identically named frame")
         subset = self.column_names_set if subset is None else subset
-        self.columns = [
+        return type(self)(
             c.sorted_like(other) if c.name in subset else c
             for c, other in zip(self.columns, like.columns, strict=True)
-        ]
-        return self
+        )
 
-    def with_columns(self, columns: Sequence[NamedColumn]) -> Self:
+    def with_columns(self, columns: Iterable[Column], *, replace_only=False) -> Self:
         """
         Return a new dataframe with extra columns.
 
@@ -189,6 +184,8 @@ def with_columns(self, columns: Sequence[NamedColumn]) -> Self:
         ----------
         columns
             Columns to add
+        replace_only
+            If true, then only replacements are allowed (matching by name).
 
         Returns
         -------
@@ -196,36 +193,30 @@ def with_columns(self, columns: Sequence[NamedColumn]) -> Self:
 
         Notes
         -----
-        If column names overlap, newer names replace older ones.
+        If column names overlap, newer names replace older ones, and
+        appear in the same order as the original frame.
         """
-        columns = list(
-            {c.name: c for c in itertools.chain(self.columns, columns)}.values()
-        )
-        return type(self)(columns)
+        new = {c.name: c for c in columns}
+        if replace_only and not self.column_names_set.issuperset(new.keys()):
+            raise ValueError("Cannot replace with non-existing names")
+        return type(self)((self.column_map | new).values())
 
     def discard_columns(self, names: Set[str]) -> Self:
         """Drop columns by name."""
-        return type(self)([c for c in self.columns if c.name not in names])
+        return type(self)(column for column in self.columns if column.name not in names)
 
     def select(self, names: Sequence[str]) -> Self:
         """Select columns by name returning DataFrame."""
-        want = set(names)
-        if not want.issubset(self.column_names_set):
-            raise ValueError("Can't select missing names")
-        return type(self)([self._column_map[name] for name in names])
-
-    def replace_columns(self, *columns: NamedColumn) -> Self:
-        """Return a new dataframe with columns replaced by name."""
-        new = {c.name: c for c in columns}
-        if not set(new).issubset(self.column_names_set):
-            raise ValueError("Cannot replace with non-existing names")
-        return type(self)([new.get(c.name, c) for c in self.columns])
+        try:
+            return type(self)(self.column_map[name] for name in names)
+        except KeyError as e:
+            raise ValueError("Can't select missing names") from e
 
     def rename_columns(self, mapping: Mapping[str, str]) -> Self:
         """Rename some columns."""
-        return type(self)([c.copy(new_name=mapping.get(c.name)) for c in self.columns])
+        return type(self)(c.rename(mapping.get(c.name, c.name)) for c in self.columns)
 
-    def select_columns(self, names: Set[str]) -> list[NamedColumn]:
+    def select_columns(self, names: Set[str]) -> list[Column]:
         """Select columns by name."""
         return [c for c in self.columns if c.name in names]
 
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index a418560b31c..f7775ceb238 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -27,7 +27,7 @@
 from polars.exceptions import InvalidOperationError
 from polars.polars import _expr_nodes as pl_expr
 
-from cudf_polars.containers import Column, NamedColumn
+from cudf_polars.containers import Column
 from cudf_polars.utils import dtypes, sorting
 
 if TYPE_CHECKING:
@@ -313,7 +313,7 @@ def evaluate(
         *,
         context: ExecutionContext = ExecutionContext.FRAME,
         mapping: Mapping[Expr, Column] | None = None,
-    ) -> NamedColumn:
+    ) -> Column:
         """
         Evaluate this expression given a dataframe for context.
 
@@ -328,20 +328,15 @@ def evaluate(
 
         Returns
         -------
-        NamedColumn attaching a name to an evaluated Column
+        Evaluated Column with name attached.
 
         See Also
         --------
         :meth:`Expr.evaluate` for details, this function just adds the
         name to a column produced from an expression.
         """
-        obj = self.value.evaluate(df, context=context, mapping=mapping)
-        return NamedColumn(
-            obj.obj,
-            self.name,
-            is_sorted=obj.is_sorted,
-            order=obj.order,
-            null_order=obj.null_order,
+        return self.value.evaluate(df, context=context, mapping=mapping).rename(
+            self.name
         )
 
     def collect_agg(self, *, depth: int) -> AggInfo:
@@ -428,7 +423,9 @@ def do_evaluate(
         mapping: Mapping[Expr, Column] | None = None,
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
-        return df._column_map[self.name]
+        # Deliberately remove the name here so that we guarantee
+        # evaluation of the IR produces names.
+        return df.column_map[self.name].rename(None)
 
     def collect_agg(self, *, depth: int) -> AggInfo:
         """Collect information about aggregations in groupbys."""
diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 1c61075be22..e319c363a23 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -26,7 +26,7 @@
 import polars as pl
 
 import cudf_polars.dsl.expr as expr
-from cudf_polars.containers import DataFrame, NamedColumn
+from cudf_polars.containers import Column, DataFrame
 from cudf_polars.utils import dtypes, sorting
 
 if TYPE_CHECKING:
@@ -57,9 +57,7 @@
 ]
 
 
-def broadcast(
-    *columns: NamedColumn, target_length: int | None = None
-) -> list[NamedColumn]:
+def broadcast(*columns: Column, target_length: int | None = None) -> list[Column]:
     """
     Broadcast a sequence of columns to a common length.
 
@@ -112,12 +110,12 @@ def broadcast(
     return [
         column
         if column.obj.size() != 1
-        else NamedColumn(
+        else Column(
             plc.Column.from_scalar(column.obj_scalar, nrows),
-            column.name,
             is_sorted=plc.types.Sorted.YES,
             order=plc.types.Order.ASCENDING,
             null_order=plc.types.NullOrder.BEFORE,
+            name=column.name,
         )
         for column in columns
     ]
@@ -385,15 +383,17 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             init = plc.interop.from_arrow(
                 pa.scalar(offset, type=plc.interop.to_arrow(dtype))
             )
-            index = NamedColumn(
+            index = Column(
                 plc.filling.sequence(df.num_rows, init, step),
-                name,
                 is_sorted=plc.types.Sorted.YES,
                 order=plc.types.Order.ASCENDING,
                 null_order=plc.types.NullOrder.AFTER,
+                name=name,
             )
             df = DataFrame([index, *df.columns])
-        assert all(c.obj.type() == self.schema[c.name] for c in df.columns)
+        assert all(
+            c.obj.type() == self.schema[name] for name, c in df.column_map.items()
+        )
         if self.predicate is None:
             return df
         else:
@@ -588,15 +588,14 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 requests.append(plc.groupby.GroupByRequest(col, [req]))
                 replacements.append(rep)
         group_keys, raw_tables = grouper.aggregate(requests)
-        # TODO: names
-        raw_columns: list[NamedColumn] = []
+        raw_columns: list[Column] = []
         for i, table in enumerate(raw_tables):
             (column,) = table.columns()
-            raw_columns.append(NamedColumn(column, f"tmp{i}"))
+            raw_columns.append(Column(column, name=f"tmp{i}"))
         mapping = dict(zip(replacements, raw_columns, strict=True))
         result_keys = [
-            NamedColumn(gk, k.name)
-            for gk, k in zip(group_keys.columns(), keys, strict=True)
+            Column(grouped_key, name=key.name)
+            for key, grouped_key in zip(keys, group_keys.columns(), strict=True)
         ]
         result_subs = DataFrame(raw_columns)
         results = [
@@ -639,8 +638,8 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.copying.OutOfBoundsPolicy.DONT_CHECK,
             )
             broadcasted = [
-                NamedColumn(reordered, b.name)
-                for reordered, b in zip(
+                Column(reordered, name=old.name)
+                for reordered, old in zip(
                     ordered_table.columns(), broadcasted, strict=True
                 )
             ]
@@ -787,20 +786,20 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             # result, not the gather maps
             columns = plc.join.cross_join(left.table, right.table).columns()
             left_cols = [
-                NamedColumn(new, old.name).sorted_like(old)
+                Column(new, name=old.name).sorted_like(old)
                 for new, old in zip(
                     columns[: left.num_columns], left.columns, strict=True
                 )
             ]
             right_cols = [
-                NamedColumn(
+                Column(
                     new,
-                    old.name
-                    if old.name not in left.column_names_set
-                    else f"{old.name}{suffix}",
+                    name=name
+                    if name not in left.column_names_set
+                    else f"{name}{suffix}",
                 )
-                for new, old in zip(
-                    columns[left.num_columns :], right.columns, strict=True
+                for new, name in zip(
+                    columns[left.num_columns :], right.column_names, strict=True
                 )
             ]
             return DataFrame([*left_cols, *right_cols])
@@ -838,18 +837,19 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.copying.gather(right.table, rg, right_policy), right.column_names
             )
             if coalesce and how != "inner":
-                left = left.replace_columns(
-                    *(
-                        NamedColumn(
+                left = left.with_columns(
+                    (
+                        Column(
                             plc.replace.replace_nulls(left_col.obj, right_col.obj),
-                            left_col.name,
+                            name=left_col.name,
                         )
                         for left_col, right_col in zip(
                             left.select_columns(left_on.column_names_set),
                             right.select_columns(right_on.column_names_set),
                             strict=True,
                         )
-                    )
+                    ),
+                    replace_only=True,
                 )
                 right = right.discard_columns(right_on.column_names_set)
             if how == "right":
@@ -931,9 +931,10 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         df = self.df.evaluate(cache=cache)
         if self.subset is None:
             indices = list(range(df.num_columns))
+            keys_sorted = all(c.is_sorted for c in df.column_map.values())
         else:
             indices = [i for i, k in enumerate(df.column_names) if k in self.subset]
-        keys_sorted = all(df.columns[i].is_sorted for i in indices)
+            keys_sorted = all(df.column_map[name].is_sorted for name in self.subset)
         if keys_sorted:
             table = plc.stream_compaction.unique(
                 df.table,
@@ -954,10 +955,11 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.types.NullEquality.EQUAL,
                 plc.types.NanEquality.ALL_EQUAL,
             )
+        # TODO: Is this sortedness setting correct
         result = DataFrame(
             [
-                NamedColumn(c, old.name).sorted_like(old)
-                for c, old in zip(table.columns(), df.columns, strict=True)
+                Column(new, name=old.name).sorted_like(old)
+                for new, old in zip(table.columns(), df.columns, strict=True)
             ]
         )
         if keys_sorted or self.stable:
@@ -1008,30 +1010,30 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         sort_keys = broadcast(
             *(k.evaluate(df) for k in self.by), target_length=df.num_rows
         )
-        names = {c.name: i for i, c in enumerate(df.columns)}
         # TODO: More robust identification here.
-        keys_in_result = [
-            i
-            for k in sort_keys
-            if (i := names.get(k.name)) is not None and k.obj is df.columns[i].obj
-        ]
+        keys_in_result = {
+            k.name: i
+            for i, k in enumerate(sort_keys)
+            if k.name in df.column_map and k.obj is df.column_map[k.name].obj
+        }
         table = self.do_sort(
             df.table,
             plc.Table([k.obj for k in sort_keys]),
             self.order,
             self.null_order,
         )
-        columns = [
-            NamedColumn(c, old.name)
-            for c, old in zip(table.columns(), df.columns, strict=True)
-        ]
-        # If a sort key is in the result table, set the sortedness property
-        for k, i in enumerate(keys_in_result):
-            columns[i] = columns[i].set_sorted(
-                is_sorted=plc.types.Sorted.YES,
-                order=self.order[k],
-                null_order=self.null_order[k],
-            )
+        columns: list[Column] = []
+        for name, c in zip(df.column_map, table.columns(), strict=True):
+            column = Column(c, name=name)
+            # If a sort key is in the result table, set the sortedness property
+            if name in keys_in_result:
+                i = keys_in_result[name]
+                column = column.set_sorted(
+                    is_sorted=plc.types.Sorted.YES,
+                    order=self.order[i],
+                    null_order=self.null_order[i],
+                )
+            columns.append(column)
         return DataFrame(columns).slice(self.zlice)
 
 
@@ -1080,7 +1082,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         df = self.df.evaluate(cache=cache)
         # This can reorder things.
         columns = broadcast(
-            *df.select(list(self.schema.keys())).columns, target_length=df.num_rows
+            *(df.column_map[name] for name in self.schema), target_length=df.num_rows
         )
         return DataFrame(columns)
 
@@ -1125,7 +1127,7 @@ def __post_init__(self) -> None:
             old, new, _ = self.options
             # TODO: perhaps polars should validate renaming in the IR?
             if len(new) != len(set(new)) or (
-                set(new) & (set(self.df.schema.keys() - set(old)))
+                set(new) & (set(self.df.schema.keys()) - set(old))
             ):
                 raise NotImplementedError("Duplicate new names in rename.")
         elif self.name == "unpivot":
@@ -1170,7 +1172,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             npiv = len(pivotees)
             df = self.df.evaluate(cache=cache)
             index_columns = [
-                NamedColumn(col, name)
+                Column(col, name=name)
                 for col, name in zip(
                     plc.reshape.tile(df.select(indices).table, npiv).columns(),
                     indices,
@@ -1191,13 +1193,16 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 df.num_rows,
             ).columns()
             value_column = plc.concatenate.concatenate(
-                [c.astype(self.schema[value_name]) for c in df.select(pivotees).columns]
+                [
+                    df.column_map[pivotee].astype(self.schema[value_name]).obj
+                    for pivotee in pivotees
+                ]
             )
             return DataFrame(
                 [
                     *index_columns,
-                    NamedColumn(variable_column, variable_name),
-                    NamedColumn(value_column, value_name),
+                    Column(variable_column, name=variable_name),
+                    Column(value_column, name=value_name),
                 ]
             )
         else:
@@ -1278,6 +1283,4 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
             )
             for df in dfs
         ]
-        return DataFrame(
-            list(itertools.chain.from_iterable(df.columns for df in dfs)),
-        )
+        return DataFrame(itertools.chain.from_iterable(df.columns for df in dfs))
diff --git a/python/cudf_polars/docs/overview.md b/python/cudf_polars/docs/overview.md
index bff44af1468..7837a275f20 100644
--- a/python/cudf_polars/docs/overview.md
+++ b/python/cudf_polars/docs/overview.md
@@ -201,21 +201,21 @@ the logical plan in any case, so is reasonably natural.
 # Containers
 
 Containers should be constructed as relatively lightweight objects
-around their pylibcudf counterparts. We have four (in
+around their pylibcudf counterparts. We have three (in
 `cudf_polars/containers/`):
 
 1. `Scalar` (a wrapper around a pylibcudf `Scalar`)
 2. `Column` (a wrapper around a pylibcudf `Column`)
-3. `NamedColumn` (a `Column` with an additional name)
-4. `DataFrame` (a wrapper around a pylibcudf `Table`)
+3. `DataFrame` (a wrapper around a pylibcudf `Table`)
 
 The interfaces offered by these are somewhat in flux, but broadly
-speaking, a `DataFrame` is just a list of `NamedColumn`s which each
-hold a `Column` plus a string `name`. `NamedColumn`s are only ever
-constructed via `NamedExpr`s, which are the top-level expression node
-that lives inside an `IR` node. This means that the expression
-evaluator never has to concern itself with column names: columns are
-only ever decorated with names when constructing a `DataFrame`.
+speaking, a `DataFrame` is just a mapping from string `name`s to
+`Column`s, and thus also holds a pylibcudf `Table`. Names are only
+attached to `Column`s and hence inserted into `DataFrames` via
+`NamedExpr`s, which are the top-level expression nodes that live
+inside an `IR` node. This means that the expression evaluator never
+has to concern itself with column names: columns are only ever
+decorated with names when constructing a `DataFrame`.
 
 The columns keep track of metadata (for example, whether or not they
 are sorted). We could imagine tracking more metadata, like minimum and
diff --git a/python/cudf_polars/pyproject.toml b/python/cudf_polars/pyproject.toml
index f55031e0826..5345fad41a2 100644
--- a/python/cudf_polars/pyproject.toml
+++ b/python/cudf_polars/pyproject.toml
@@ -50,6 +50,11 @@ license-files = ["LICENSE"]
 version = {file = "cudf_polars/VERSION"}
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
+filterwarnings = [
+  "error"
+]
 xfail_strict = true
 
 [tool.coverage.report]
diff --git a/python/cudf_polars/tests/containers/test_column.py b/python/cudf_polars/tests/containers/test_column.py
index 19919877f84..1f26ab1af9f 100644
--- a/python/cudf_polars/tests/containers/test_column.py
+++ b/python/cudf_polars/tests/containers/test_column.py
@@ -3,13 +3,11 @@
 
 from __future__ import annotations
 
-from functools import partial
-
 import pyarrow
 import pylibcudf as plc
 import pytest
 
-from cudf_polars.containers import Column, NamedColumn
+from cudf_polars.containers import Column
 
 
 def test_non_scalar_access_raises():
@@ -55,11 +53,10 @@ def test_shallow_copy():
 
 
 @pytest.mark.parametrize("typeid", [plc.TypeId.INT8, plc.TypeId.FLOAT32])
-@pytest.mark.parametrize("constructor", [Column, partial(NamedColumn, name="name")])
-def test_mask_nans(typeid, constructor):
+def test_mask_nans(typeid):
     dtype = plc.DataType(typeid)
     values = pyarrow.array([0, 0, 0], type=plc.interop.to_arrow(dtype))
-    column = constructor(plc.interop.from_arrow(values))
+    column = Column(plc.interop.from_arrow(values))
     masked = column.mask_nans()
     assert column.obj.null_count() == masked.obj.null_count()
 
diff --git a/python/cudf_polars/tests/containers/test_dataframe.py b/python/cudf_polars/tests/containers/test_dataframe.py
index 39fb44d55a5..5c68fb8f0aa 100644
--- a/python/cudf_polars/tests/containers/test_dataframe.py
+++ b/python/cudf_polars/tests/containers/test_dataframe.py
@@ -8,18 +8,18 @@
 
 import polars as pl
 
-from cudf_polars.containers import DataFrame, NamedColumn
+from cudf_polars.containers import Column, DataFrame
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 
 
 def test_select_missing_raises():
     df = DataFrame(
         [
-            NamedColumn(
+            Column(
                 plc.column_factories.make_numeric_column(
                     plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
                 ),
-                "a",
+                name="a",
             )
         ]
     )
@@ -30,17 +30,17 @@ def test_select_missing_raises():
 def test_replace_missing_raises():
     df = DataFrame(
         [
-            NamedColumn(
+            Column(
                 plc.column_factories.make_numeric_column(
                     plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
                 ),
-                "a",
+                name="a",
             )
         ]
     )
-    replacement = df.columns[0].copy(new_name="b")
+    replacement = df.column_map["a"].copy().rename("b")
     with pytest.raises(ValueError):
-        df.replace_columns(replacement)
+        df.with_columns([replacement], replace_only=True)
 
 
 def test_from_table_wrong_names():
@@ -55,14 +55,23 @@ def test_from_table_wrong_names():
         DataFrame.from_table(table, ["a", "b"])
 
 
+def test_unnamed_column_raise():
+    payload = plc.column_factories.make_numeric_column(
+        plc.DataType(plc.TypeId.INT8), 0, plc.MaskState.ALL_VALID
+    )
+
+    with pytest.raises(ValueError):
+        DataFrame([Column(payload, name="a"), Column(payload)])
+
+
 def test_sorted_like_raises_mismatching_names():
     df = DataFrame(
         [
-            NamedColumn(
+            Column(
                 plc.column_factories.make_numeric_column(
                     plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
                 ),
-                "a",
+                name="a",
             )
         ]
     )
@@ -72,11 +81,11 @@ def test_sorted_like_raises_mismatching_names():
 
 
 def test_shallow_copy():
-    column = NamedColumn(
+    column = Column(
         plc.column_factories.make_numeric_column(
             plc.DataType(plc.TypeId.INT8), 2, plc.MaskState.ALL_VALID
         ),
-        "a",
+        name="a",
     )
     column.set_sorted(
         is_sorted=plc.types.Sorted.YES,
@@ -85,13 +94,13 @@ def test_shallow_copy():
     )
     df = DataFrame([column])
     copy = df.copy()
-    copy.columns[0].set_sorted(
+    copy.column_map["a"].set_sorted(
         is_sorted=plc.types.Sorted.NO,
         order=plc.types.Order.ASCENDING,
         null_order=plc.types.NullOrder.AFTER,
     )
-    assert df.columns[0].is_sorted == plc.types.Sorted.YES
-    assert copy.columns[0].is_sorted == plc.types.Sorted.NO
+    assert df.column_map["a"].is_sorted == plc.types.Sorted.YES
+    assert copy.column_map["a"].is_sorted == plc.types.Sorted.NO
 
 
 def test_sorted_flags_preserved_empty():
@@ -100,7 +109,7 @@ def test_sorted_flags_preserved_empty():
 
     gf = DataFrame.from_polars(df)
 
-    (a,) = gf.columns
+    a = gf.column_map["a"]
 
     assert a.is_sorted == plc.types.Sorted.YES
 
diff --git a/python/cudf_polars/tests/expressions/test_sort.py b/python/cudf_polars/tests/expressions/test_sort.py
index 76c7648813a..2a37683478b 100644
--- a/python/cudf_polars/tests/expressions/test_sort.py
+++ b/python/cudf_polars/tests/expressions/test_sort.py
@@ -69,7 +69,7 @@ def test_setsorted(descending, nulls_last, with_nulls):
 
     df = translate_ir(q._ldf.visit()).evaluate(cache={})
 
-    (a,) = df.columns
+    a = df.column_map["a"]
 
     assert a.is_sorted == plc.types.Sorted.YES
     null_order = (
diff --git a/python/cudf_polars/tests/pytest.ini b/python/cudf_polars/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/cudf_polars/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/cudf_polars/tests/utils/test_broadcast.py b/python/cudf_polars/tests/utils/test_broadcast.py
index 35aaef44e1f..e7770bfadac 100644
--- a/python/cudf_polars/tests/utils/test_broadcast.py
+++ b/python/cudf_polars/tests/utils/test_broadcast.py
@@ -6,34 +6,35 @@
 import pylibcudf as plc
 import pytest
 
-from cudf_polars.containers import NamedColumn
+from cudf_polars.containers import Column
 from cudf_polars.dsl.ir import broadcast
 
 
 @pytest.mark.parametrize("target", [4, None])
 def test_broadcast_all_scalar(target):
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8), 1, plc.MaskState.ALL_VALID
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
     result = broadcast(*columns, target_length=target)
     expected = 1 if target is None else target
 
+    assert [c.name for c in result] == [f"col{i}" for i in range(3)]
     assert all(column.obj.size() == expected for column in result)
 
 
 def test_invalid_target_length():
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8), 4, plc.MaskState.ALL_VALID
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
@@ -43,11 +44,11 @@ def test_invalid_target_length():
 
 def test_broadcast_mismatching_column_lengths():
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8), i + 1, plc.MaskState.ALL_VALID
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
@@ -58,16 +59,17 @@ def test_broadcast_mismatching_column_lengths():
 @pytest.mark.parametrize("nrows", [0, 5])
 def test_broadcast_with_scalars(nrows):
     columns = [
-        NamedColumn(
+        Column(
             plc.column_factories.make_numeric_column(
                 plc.DataType(plc.TypeId.INT8),
                 nrows if i == 0 else 1,
                 plc.MaskState.ALL_VALID,
             ),
-            f"col{i}",
+            name=f"col{i}",
         )
         for i in range(3)
     ]
 
     result = broadcast(*columns)
+    assert [c.name for c in result] == [f"col{i}" for i in range(3)]
     assert all(column.obj.size() == nrows for column in result)
diff --git a/python/custreamz/custreamz/tests/pytest.ini b/python/custreamz/custreamz/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/custreamz/custreamz/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/custreamz/custreamz/tests/test_dataframes.py b/python/custreamz/custreamz/tests/test_dataframes.py
index bae4b051cae..8c0130d2818 100644
--- a/python/custreamz/custreamz/tests/test_dataframes.py
+++ b/python/custreamz/custreamz/tests/test_dataframes.py
@@ -377,24 +377,16 @@ def test_setitem_overwrites(stream):
     [
         ({}, "sum"),
         ({}, "mean"),
-        pytest.param({}, "min"),
+        ({}, "min"),
         pytest.param(
             {},
             "median",
             marks=pytest.mark.xfail(reason="Unavailable for rolling objects"),
         ),
-        pytest.param({}, "max"),
-        pytest.param(
-            {},
-            "var",
-            marks=pytest.mark.xfail(reason="Unavailable for rolling objects"),
-        ),
-        pytest.param({}, "count"),
-        pytest.param(
-            {"ddof": 0},
-            "std",
-            marks=pytest.mark.xfail(reason="Unavailable for rolling objects"),
-        ),
+        ({}, "max"),
+        ({}, "var"),
+        ({}, "count"),
+        ({"ddof": 0}, "std"),
         pytest.param(
             {"quantile": 0.5},
             "quantile",
diff --git a/python/custreamz/pyproject.toml b/python/custreamz/pyproject.toml
index 85ab0024bb5..af45f49d9b4 100644
--- a/python/custreamz/pyproject.toml
+++ b/python/custreamz/pyproject.toml
@@ -111,6 +111,8 @@ skip = [
 ]
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error",
     "ignore:unclosed <socket.socket:ResourceWarning",
@@ -118,4 +120,8 @@ filterwarnings = [
     # Should be fixed in the next streamz release
     # https://github.com/python-streamz/streamz/commit/2812f1f961dfcb3f17e948d8b12a12472975558e
     "ignore:pkg_resources is deprecated as an API:DeprecationWarning:streamz",
+    "ignore:Deprecated call to `pkg_resources.declare_namespace:DeprecationWarning",
+    # Ignore numba PEP 456 warning specific to arm machines
+    "ignore:FNV hashing is not implemented in Numba.*:UserWarning"
 ]
+xfail_strict = true
diff --git a/python/dask_cudf/dask_cudf/tests/pytest.ini b/python/dask_cudf/dask_cudf/tests/pytest.ini
deleted file mode 100644
index 7b0a9f29fb1..00000000000
--- a/python/dask_cudf/dask_cudf/tests/pytest.ini
+++ /dev/null
@@ -1,4 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-addopts = --tb=native
diff --git a/python/dask_cudf/pyproject.toml b/python/dask_cudf/pyproject.toml
index ce825c7647b..fbcd7ae5dfb 100644
--- a/python/dask_cudf/pyproject.toml
+++ b/python/dask_cudf/pyproject.toml
@@ -115,6 +115,8 @@ skip = [
 ]
 
 [tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
 filterwarnings = [
     "error::FutureWarning",
     "error::DeprecationWarning",
@@ -125,3 +127,4 @@ filterwarnings = [
     "ignore:Passing a BlockManager to DataFrame is deprecated and will raise in a future version. Use public APIs instead.:DeprecationWarning",
     "ignore:String support for `aggregate_files` is experimental. Behavior may change in the future.:FutureWarning:dask",
 ]
+xfail_strict = true
diff --git a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
index ebe1fda1f12..eb5617a1da6 100644
--- a/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/nvtext/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources edit_distance.pyx)
+set(cython_sources edit_distance.pyx generate_ngrams.pyx)
 
 set(linked_libraries cudf::cudf)
 rapids_cython_create_modules(
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
index 82f7c425b1d..7f5fa2b9925 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.pxd
@@ -1,7 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . cimport edit_distance
+from . cimport edit_distance, generate_ngrams
 
 __all__ = [
     "edit_distance",
+    "generate_ngrams",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/__init__.py b/python/pylibcudf/pylibcudf/nvtext/__init__.py
index 986652a241f..a66ce984745 100644
--- a/python/pylibcudf/pylibcudf/nvtext/__init__.py
+++ b/python/pylibcudf/pylibcudf/nvtext/__init__.py
@@ -1,7 +1,8 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
-from . import edit_distance
+from . import edit_distance, generate_ngrams
 
 __all__ = [
     "edit_distance",
+    "generate_ngrams",
 ]
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
new file mode 100644
index 00000000000..f15eb1f25e9
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pxd
@@ -0,0 +1,12 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
+
+
+cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator)
+
+cpdef Column generate_character_ngrams(Column input, size_type ngrams=*)
+
+cpdef Column hash_character_ngrams(Column input, size_type ngrams=*)
diff --git a/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
new file mode 100644
index 00000000000..8c7a8edc01d
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/nvtext/generate_ngrams.pyx
@@ -0,0 +1,111 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+from pylibcudf.column cimport Column
+from pylibcudf.libcudf.column.column cimport column
+from pylibcudf.libcudf.column.column_view cimport column_view
+from pylibcudf.libcudf.nvtext.generate_ngrams cimport (
+    generate_character_ngrams as cpp_generate_character_ngrams,
+    generate_ngrams as cpp_generate_ngrams,
+    hash_character_ngrams as cpp_hash_character_ngrams,
+)
+from pylibcudf.libcudf.scalar.scalar cimport string_scalar
+from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.scalar cimport Scalar
+
+
+cpdef Column generate_ngrams(Column input, size_type ngrams, Scalar separator):
+    """
+    Returns a single column of strings by generating ngrams from a strings column.
+
+    For details, see :cpp:func:`generate_ngrams`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    ngram : size_type
+        The ngram number to generate
+    separator : Scalar
+        The string to use for separating ngram tokens
+
+    Returns
+    -------
+    Column
+        New strings columns of tokens
+    """
+    cdef column_view c_strings = input.view()
+    cdef const string_scalar* c_separator = <const string_scalar*>separator.c_obj.get()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_generate_ngrams(
+                c_strings,
+                ngrams,
+                c_separator[0]
+            )
+        )
+    return Column.from_libcudf(move(c_result))
+
+
+cpdef Column generate_character_ngrams(Column input, size_type ngrams = 2):
+    """
+    Returns a lists column of ngrams of characters within each string.
+
+    For details, see :cpp:func:`generate_character_ngrams`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    ngram : size_type
+        The ngram number to generate
+
+    Returns
+    -------
+    Column
+        Lists column of strings
+    """
+    cdef column_view c_strings = input.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_generate_character_ngrams(
+                c_strings,
+                ngrams,
+            )
+        )
+    return Column.from_libcudf(move(c_result))
+
+cpdef Column hash_character_ngrams(Column input, size_type ngrams = 2):
+    """
+    Returns a lists column of hash values of the characters in each string
+
+    For details, see :cpp:func:`hash_character_ngrams`
+
+    Parameters
+    ----------
+    input : Column
+        Input strings
+    ngram : size_type
+        The ngram number to generate
+
+    Returns
+    -------
+    Column
+        Lists column of hash values
+    """
+    cdef column_view c_strings = input.view()
+    cdef unique_ptr[column] c_result
+
+    with nogil:
+        c_result = move(
+            cpp_hash_character_ngrams(
+                c_strings,
+                ngrams,
+            )
+        )
+    return Column.from_libcudf(move(c_result))
diff --git a/python/pylibcudf/pylibcudf/tests/pytest.ini b/python/pylibcudf/pylibcudf/tests/pytest.ini
deleted file mode 100644
index f572f85ca49..00000000000
--- a/python/pylibcudf/pylibcudf/tests/pytest.ini
+++ /dev/null
@@ -1,9 +0,0 @@
-# Copyright (c) 2024, NVIDIA CORPORATION.
-
-[pytest]
-xfail_strict = true
-filterwarnings =
-    error
-    ignore:::.*xdist.*
-    ignore:::.*pytest.*
-addopts = --tb=native
diff --git a/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py
new file mode 100644
index 00000000000..5cf9874d595
--- /dev/null
+++ b/python/pylibcudf/pylibcudf/tests/test_nvtext_generate_ngrams.py
@@ -0,0 +1,54 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import pyarrow as pa
+import pylibcudf as plc
+import pytest
+from utils import assert_column_eq
+
+
+@pytest.fixture(scope="module")
+def input_col():
+    arr = ["ab", "cde", "fgh"]
+    return pa.array(arr)
+
+
+@pytest.mark.parametrize("ngram", [2, 3])
+@pytest.mark.parametrize("sep", ["_", "**", ","])
+def test_generate_ngrams(input_col, ngram, sep):
+    result = plc.nvtext.generate_ngrams.generate_ngrams(
+        plc.interop.from_arrow(input_col),
+        ngram,
+        plc.interop.from_arrow(pa.scalar(sep)),
+    )
+    expected = pa.array([f"ab{sep}cde", f"cde{sep}fgh"])
+    if ngram == 3:
+        expected = pa.array([f"ab{sep}cde{sep}fgh"])
+    assert_column_eq(result, expected)
+
+
+@pytest.mark.parametrize("ngram", [2, 3])
+def test_generate_character_ngrams(input_col, ngram):
+    result = plc.nvtext.generate_ngrams.generate_character_ngrams(
+        plc.interop.from_arrow(input_col),
+        ngram,
+    )
+    expected = pa.array([["ab"], ["cd", "de"], ["fg", "gh"]])
+    if ngram == 3:
+        expected = pa.array([[], ["cde"], ["fgh"]])
+    assert_column_eq(result, expected)
+
+
+@pytest.mark.parametrize("ngram", [2, 3])
+def test_hash_character_ngrams(input_col, ngram):
+    result = plc.nvtext.generate_ngrams.hash_character_ngrams(
+        plc.interop.from_arrow(input_col),
+        ngram,
+    )
+    pa_result = plc.interop.to_arrow(result)
+    assert all(
+        len(got) == max(0, len(s.as_py()) - ngram + 1)
+        for got, s in zip(pa_result, input_col)
+    )
+    assert pa_result.type == pa.list_(
+        pa.field("element", pa.uint32(), nullable=False)
+    )
diff --git a/python/pylibcudf/pyproject.toml b/python/pylibcudf/pyproject.toml
index a8224f54e1c..be65142850f 100644
--- a/python/pylibcudf/pyproject.toml
+++ b/python/pylibcudf/pyproject.toml
@@ -95,6 +95,16 @@ skip = [
     "__init__.py",
 ]
 
+[tool.pytest.ini_options]
+addopts = "--tb=native --strict-config --strict-markers"
+empty_parameter_set_mark = "fail_at_collect"
+filterwarnings = [
+  "error",
+  "ignore:::.*xdist.*",
+  "ignore:::.*pytest.*"
+]
+xfail_strict = true
+
 [tool.rapids-build-backend]
 build-backend = "scikit_build_core.build"
 dependencies-file = "../../dependencies.yaml"