From 7e1e4757e753253a99df110fd3814d0136289ef2 Mon Sep 17 00:00:00 2001
From: Vyas Ramasubramani <vyasr@nvidia.com>
Date: Mon, 7 Oct 2024 12:41:47 -0700
Subject: [PATCH 1/2] Address all remaining clang-tidy errors (#16956)

With this set of changes I get a clean run of clang-tidy (with one caveat that I'll explain in the follow-up PR to add clang-tidy to pre-commit/CI).

Authors:
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Nghia Truong (https://github.com/ttnghia)
  - MithunR (https://github.com/mythrocks)
  - David Wendt (https://github.com/davidwendt)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)

URL: https://github.com/rapidsai/cudf/pull/16956
---
 cpp/.clang-tidy                               | 43 ++++++++++++---
 cpp/cmake/thirdparty/get_nanoarrow.cmake      |  8 +--
 .../nanoarrow_clang_tidy_compliance.diff      | 38 ++++++++++++++
 .../patches/nanoarrow_override.json           | 18 +++++++
 cpp/include/cudf/table/table.hpp              |  2 +-
 cpp/include/cudf/table/table_view.hpp         |  2 +-
 cpp/src/io/avro/avro.cpp                      |  3 +-
 cpp/src/io/orc/orc.hpp                        |  2 +-
 .../io/parquet/compact_protocol_reader.cpp    |  6 ++-
 cpp/src/io/utilities/data_sink.cpp            |  6 ++-
 cpp/src/io/utilities/hostdevice_span.hpp      |  8 +--
 cpp/src/utilities/host_memory.cpp             | 13 ++++-
 cpp/tests/binaryop/binop-compiled-test.cpp    |  6 ++-
 cpp/tests/binaryop/util/operation.h           |  4 +-
 cpp/tests/column/column_test.cpp              |  4 +-
 cpp/tests/copying/slice_tests.cpp             | 12 +++--
 cpp/tests/copying/slice_tests.cuh             | 21 ++++----
 cpp/tests/copying/split_tests.cpp             | 52 ++++++++++---------
 .../hashing/murmurhash3_x64_128_test.cpp      |  4 +-
 cpp/tests/hashing/sha256_test.cpp             |  2 -
 cpp/tests/interop/from_arrow_device_test.cpp  | 12 ++---
 cpp/tests/interop/from_arrow_host_test.cpp    |  6 +--
 cpp/tests/interop/from_arrow_test.cpp         |  8 +--
 cpp/tests/interop/to_arrow_device_test.cpp    | 12 ++---
 cpp/tests/interop/to_arrow_host_test.cpp      |  6 +--
 cpp/tests/interop/to_arrow_test.cpp           | 14 ++---
 cpp/tests/io/comp/decomp_test.cpp             | 36 ++++++++-----
 cpp/tests/io/csv_test.cpp                     | 12 ++---
 cpp/tests/io/json/json_test.cpp               |  6 +--
 cpp/tests/io/orc_test.cpp                     | 37 ++++++-------
 cpp/tests/io/parquet_misc_test.cpp            |  2 +-
 cpp/tests/io/parquet_reader_test.cpp          |  7 +--
 cpp/tests/io/parquet_v2_test.cpp              | 36 ++++++-------
 cpp/tests/io/parquet_writer_test.cpp          | 17 +++---
 cpp/tests/join/distinct_join_tests.cpp        | 10 ++--
 cpp/tests/merge/merge_string_test.cpp         |  4 +-
 cpp/tests/merge/merge_test.cpp                |  6 +--
 .../reductions/segmented_reduction_tests.cpp  |  9 ++--
 cpp/tests/replace/replace_tests.cpp           |  4 +-
 cpp/tests/rolling/collect_ops_test.cpp        |  8 +--
 cpp/tests/rolling/offset_row_window_test.cpp  | 12 +++--
 cpp/tests/rolling/rolling_test.cpp            |  2 +-
 cpp/tests/scalar/scalar_test.cpp              |  6 +--
 cpp/tests/search/search_list_test.cpp         |  3 +-
 cpp/tests/sort/sort_test.cpp                  |  2 +-
 cpp/tests/stream_compaction/unique_tests.cpp  |  1 -
 cpp/tests/streams/stream_compaction_test.cpp  |  2 -
 cpp/tests/strings/integers_tests.cpp          |  3 +-
 cpp/tests/structs/structs_column_tests.cpp    |  5 +-
 cpp/tests/transform/bools_to_mask_test.cpp    |  2 +-
 .../integration/unary_transform_test.cpp      | 28 +++++-----
 51 files changed, 344 insertions(+), 228 deletions(-)
 create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
 create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_override.json

diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy
index b791d846d1d..2d4f8c0d80e 100644
--- a/cpp/.clang-tidy
+++ b/cpp/.clang-tidy
@@ -1,18 +1,47 @@
 ---
+# Notes on disabled checks
+# ------------------------
+# modernize-use-equals-default:
+#     auto-fix is broken (doesn't insert =default correctly)
+# modernize-concat-nested-namespaces:
+#     auto-fix is broken (can delete code)
+# modernize-use-trailing-return-type:
+#     Purely stylistic, no benefit to rewriting everything
+# modernize-return-braced-init-list:
+#     Stylistically we prefer to see the return type at the return site.
+#     See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
+#     for more information.
+# modernize-use-bool-literals:
+#     Our tests use int flags for validity masks extensively and we prefer that
+# clang-analyzer-cplusplus.NewDeleteLeaks:
+#     This check has numerous bugs, see
+#     https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
+#     We encounter at least
+#     https://github.com/llvm/llvm-project/issues/60896
+#     https://github.com/llvm/llvm-project/issues/69602
+# clang-analyzer-optin.core.EnumCastOutOfRange
+#     We use enums as flags in multiple cases and this check makes ORing flags invalid
+# clang-analyzer-optin.cplusplus.UninitializedObject'
+#     There is an error in nanoarrow that none of the clang-tidy filters (i.e.
+#     header-filter and exclude-header-filter are able to properly avoid. This
+#     merits further investigation
+#
+# We need to verify that broken checks are still broken
 Checks:
       'modernize-*,
        -modernize-use-equals-default,
        -modernize-concat-nested-namespaces,
        -modernize-use-trailing-return-type,
-       -modernize-use-bool-literals'
-
-      # -modernize-use-equals-default        # auto-fix is broken (doesn't insert =default correctly)
-      # -modernize-concat-nested-namespaces  # auto-fix is broken (can delete code)
-      # -modernize-use-trailing-return-type  # just a preference
+       -modernize-return-braced-init-list,
+       -modernize-use-bool-literals,
+       clang-analyzer-*,
+       -clang-analyzer-cplusplus.NewDeleteLeaks,
+       -clang-analyzer-optin.core.EnumCastOutOfRange,
+       -clang-analyzer-optin.cplusplus.UninitializedObject'
 
 WarningsAsErrors: ''
-HeaderFilterRegex: ''
-AnalyzeTemporaryDtors: false
+HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
+ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
 FormatStyle:     none
 CheckOptions:
  - key:             modernize-loop-convert.MaxCopySize
diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake
index 8df1b431095..d7d7fcca044 100644
--- a/cpp/cmake/thirdparty/get_nanoarrow.cmake
+++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake
@@ -14,15 +14,17 @@
 
 # This function finds nanoarrow and sets any additional necessary environment variables.
 function(find_and_configure_nanoarrow)
+  include(${rapids-cmake-dir}/cpm/package_override.cmake)
+
+  set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
+  rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")
+
   # Currently we need to always build nanoarrow so we don't pickup a previous installed version
   set(CPM_DOWNLOAD_nanoarrow ON)
   rapids_cpm_find(
     nanoarrow 0.6.0.dev
     GLOBAL_TARGETS nanoarrow
     CPM_ARGS
-    GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
-    GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
-    GIT_SHALLOW FALSE
     OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
   )
   set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
new file mode 100644
index 00000000000..e9a36fcb567
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
@@ -0,0 +1,38 @@
+diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h
+index caa6be4..70ec8a2 100644
+--- a/src/nanoarrow/common/inline_buffer.h
++++ b/src/nanoarrow/common/inline_buffer.h
+@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
+ }
+ 
+ static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
+-  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) |
++  *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT
+                    ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) |
+                    ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) |
+                    ((values[7] + 0x7f) & 0x80));
+@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
+     // set bits within a single byte
+     const uint8_t only_byte_mask =
+         i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
+-    bits[bytes_begin] &= only_byte_mask;
++    bits[bytes_begin] &= only_byte_mask;  // NOLINT
+     bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
+     return;
+   }
+ 
+   // set/clear trailing bits of first byte
+-  bits[bytes_begin] &= first_byte_mask;
++  bits[bytes_begin] &= first_byte_mask;  // NOLINT
+   bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);
+ 
+   if (bytes_end - bytes_begin > 2) {
+@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
+   n_remaining -= n_full_bytes * 8;
+   if (n_remaining > 0) {
+     // Zero out the last byte
+-    *out_cursor = 0x00;
++    *out_cursor = 0x00;  // NOLINT
+     for (int i = 0; i < n_remaining; i++) {
+       ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
+     }
diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
new file mode 100644
index 00000000000..d529787e7c8
--- /dev/null
+++ b/cpp/cmake/thirdparty/patches/nanoarrow_override.json
@@ -0,0 +1,18 @@
+
+{
+  "packages" : {
+    "nanoarrow" : {
+      "version" : "0.6.0.dev",
+      "git_url" : "https://github.com/apache/arrow-nanoarrow.git",
+      "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
+      "git_shallow" : false,
+      "patches" : [
+        {
+          "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
+          "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
+          "fixed_in" : ""
+        }
+      ]
+    }
+  }
+}
diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp
index 762131a174f..15fdad21d9f 100644
--- a/cpp/include/cudf/table/table.hpp
+++ b/cpp/include/cudf/table/table.hpp
@@ -148,7 +148,7 @@ class table {
     std::vector<column_view> columns(std::distance(begin, end));
     std::transform(
       begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); });
-    return table_view(columns);
+    return table_view{columns};
   }
 
   /**
diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp
index 4a990f67ce4..d41176590ea 100644
--- a/cpp/include/cudf/table/table_view.hpp
+++ b/cpp/include/cudf/table/table_view.hpp
@@ -241,7 +241,7 @@ class table_view : public detail::table_view_base<column_view> {
   {
     std::vector<column_view> columns(std::distance(begin, end));
     std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); });
-    return table_view(columns);
+    return table_view{columns};
   }
 
   /**
diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp
index 03cf6d4a0e0..d5caa4720ac 100644
--- a/cpp/src/io/avro/avro.cpp
+++ b/cpp/src/io/avro/avro.cpp
@@ -16,6 +16,7 @@
 
 #include "avro.hpp"
 
+#include <array>
 #include <cstring>
 #include <unordered_map>
 
@@ -302,7 +303,7 @@ bool schema_parser::parse(std::vector<schema_entry>& schema, std::string const&
   // Empty schema
   if (json_str == "[]") return true;
 
-  char depthbuf[MAX_SCHEMA_DEPTH];
+  std::array<char, MAX_SCHEMA_DEPTH> depthbuf;
   int depth = 0, parent_idx = -1, entry_idx = -1;
   json_state_e state = state_attrname;
   std::string str;
diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp
index 790532c9d54..5ab36fdae8e 100644
--- a/cpp/src/io/orc/orc.hpp
+++ b/cpp/src/io/orc/orc.hpp
@@ -258,7 +258,7 @@ class ProtobufReader {
 
  private:
   template <int index>
-  friend class FunctionSwitchImpl;
+  friend struct FunctionSwitchImpl;
 
   void skip_bytes(size_t bytecnt)
   {
diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp
index b978799b8bc..312a5243687 100644
--- a/cpp/src/io/parquet/compact_protocol_reader.cpp
+++ b/cpp/src/io/parquet/compact_protocol_reader.cpp
@@ -228,7 +228,8 @@ class parquet_field_string : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * string fails
  */
-struct parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
+class parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
+ public:
   parquet_field_string_list(int f, std::vector<std::string>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
@@ -396,8 +397,9 @@ class parquet_field_binary : public parquet_field {
  * @return True if field types mismatch or if the process of reading a
  * binary fails
  */
-struct parquet_field_binary_list
+class parquet_field_binary_list
   : public parquet_field_list<std::vector<uint8_t>, FieldType::BINARY> {
+ public:
   parquet_field_binary_list(int f, std::vector<std::vector<uint8_t>>& v) : parquet_field_list(f, v)
   {
     auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp
index 1dbb9369115..0b76f3d3e8f 100644
--- a/cpp/src/io/utilities/data_sink.cpp
+++ b/cpp/src/io/utilities/data_sink.cpp
@@ -50,7 +50,8 @@ class file_sink : public data_sink {
     }
   }
 
-  ~file_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~file_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
@@ -114,7 +115,8 @@ class host_buffer_sink : public data_sink {
  public:
   explicit host_buffer_sink(std::vector<char>* buffer) : buffer_(buffer) {}
 
-  ~host_buffer_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~host_buffer_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
diff --git a/cpp/src/io/utilities/hostdevice_span.hpp b/cpp/src/io/utilities/hostdevice_span.hpp
index d9eac423901..1d8b34addbd 100644
--- a/cpp/src/io/utilities/hostdevice_span.hpp
+++ b/cpp/src/io/utilities/hostdevice_span.hpp
@@ -43,8 +43,8 @@ class hostdevice_span {
   template <typename C,
             // Only supported containers of types convertible to T
             std::enable_if_t<std::is_convertible_v<
-              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
-              T (*)[]>>* = nullptr>
+              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],  // NOLINT
+              T (*)[]>>* = nullptr>                                                  // NOLINT
   constexpr hostdevice_span(C& in) : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
   {
   }
@@ -54,8 +54,8 @@ class hostdevice_span {
   template <typename C,
             // Only supported containers of types convertible to T
             std::enable_if_t<std::is_convertible_v<
-              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
-              T (*)[]>>* = nullptr>
+              std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],  // NOLINT
+              T (*)[]>>* = nullptr>                                                  // NOLINT
   constexpr hostdevice_span(C const& in)
     : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
   {
diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp
index 125b98c4a67..9d8e3cf2fa6 100644
--- a/cpp/src/utilities/host_memory.cpp
+++ b/cpp/src/utilities/host_memory.cpp
@@ -115,12 +115,19 @@ class fixed_pinned_pool_memory_resource {
     return !operator==(other);
   }
 
-  friend void get_property(fixed_pinned_pool_memory_resource const&,
+  // clang-tidy will complain about this function because it is completely
+  // unused at runtime and only exist for tag introspection by CCCL, so we
+  // ignore linting. This masks a real issue if we ever want to compile with
+  // clang, though, which is that the function will actually be compiled out by
+  // clang. If cudf were ever to try to support clang as a compile we would
+  // need to force the compiler to emit this symbol. The same goes for the
+  // other get_property definitions in this file.
+  friend void get_property(fixed_pinned_pool_memory_resource const&,  // NOLINT
                            cuda::mr::device_accessible) noexcept
   {
   }
 
-  friend void get_property(fixed_pinned_pool_memory_resource const&,
+  friend void get_property(fixed_pinned_pool_memory_resource const&,  // NOLINT
                            cuda::mr::host_accessible) noexcept
   {
   }
@@ -235,7 +242,9 @@ class new_delete_memory_resource {
 
   bool operator!=(new_delete_memory_resource const& other) const { return !operator==(other); }
 
+  // NOLINTBEGIN
   friend void get_property(new_delete_memory_resource const&, cuda::mr::host_accessible) noexcept {}
+  // NOLINTEND
 };
 
 static_assert(cuda::mr::resource_with<new_delete_memory_resource, cuda::mr::host_accessible>,
diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp
index 06e0d193d80..aa5b49567e6 100644
--- a/cpp/tests/binaryop/binop-compiled-test.cpp
+++ b/cpp/tests/binaryop/binop-compiled-test.cpp
@@ -557,7 +557,11 @@ auto NullOp_Result(cudf::column_view lhs, cudf::column_view rhs)
   std::transform(thrust::make_counting_iterator(0),
                  thrust::make_counting_iterator(lhs.size()),
                  result.begin(),
-                 [&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut {
+                 [&lhs_data    = lhs_data,
+                  &lhs_mask    = lhs_mask,
+                  &rhs_data    = rhs_data,
+                  &rhs_mask    = rhs_mask,
+                  &result_mask = result_mask](auto i) -> TypeOut {
                    auto lhs_valid    = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i);
                    auto rhs_valid    = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i);
                    bool output_valid = lhs_valid or rhs_valid;
diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h
index d36b48d666a..ef1ccfccab5 100644
--- a/cpp/tests/binaryop/util/operation.h
+++ b/cpp/tests/binaryop/util/operation.h
@@ -100,7 +100,7 @@ struct Mul {
             std::enable_if_t<(cudf::is_duration_t<LhsT>::value && std::is_integral_v<RhsT>) ||
                                (cudf::is_duration_t<RhsT>::value && std::is_integral_v<LhsT>),
                              void>* = nullptr>
-  OutT DurationProduct(LhsT x, RhsT y) const
+  [[nodiscard]] OutT DurationProduct(LhsT x, RhsT y) const
   {
     return x * y;
   }
@@ -128,7 +128,7 @@ struct Div {
     typename LhsT,
     typename RhsT,
     std::enable_if_t<(std::is_integral_v<RhsT> || cudf::is_duration<RhsT>()), void>* = nullptr>
-  OutT DurationDivide(LhsT x, RhsT y) const
+  [[nodiscard]] OutT DurationDivide(LhsT x, RhsT y) const
   {
     return x / y;
   }
diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp
index 14b4197de71..631f5150829 100644
--- a/cpp/tests/column/column_test.cpp
+++ b/cpp/tests/column/column_test.cpp
@@ -340,7 +340,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorNoMask)
 
   cudf::column moved_to{std::move(original)};
 
-  EXPECT_EQ(0, original.size());
+  EXPECT_EQ(0, original.size());  // NOLINT
   EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());
 
   verify_column_views(moved_to);
@@ -359,7 +359,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask)
   cudf::column moved_to{std::move(original)};
   verify_column_views(moved_to);
 
-  EXPECT_EQ(0, original.size());
+  EXPECT_EQ(0, original.size());  // NOLINT
   EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());
 
   // Verify move
diff --git a/cpp/tests/copying/slice_tests.cpp b/cpp/tests/copying/slice_tests.cpp
index bebd3d25610..aef0d4ad78a 100644
--- a/cpp/tests/copying/slice_tests.cpp
+++ b/cpp/tests/copying/slice_tests.cpp
@@ -29,6 +29,7 @@
 #include <cudf/utilities/type_dispatcher.hpp>
 #include <cudf/wrappers/timestamps.hpp>
 
+#include <array>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -370,11 +371,12 @@ TEST_F(SliceStringTableTest, StringWithNulls)
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
diff --git a/cpp/tests/copying/slice_tests.cuh b/cpp/tests/copying/slice_tests.cuh
index a180740f143..1e037294527 100644
--- a/cpp/tests/copying/slice_tests.cuh
+++ b/cpp/tests/copying/slice_tests.cuh
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022, NVIDIA CORPORATION.
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -148,7 +148,7 @@ std::vector<cudf::table> create_expected_tables(cudf::size_type num_cols,
       }
     }
 
-    result.push_back(cudf::table(std::move(cols)));
+    result.emplace_back(std::move(cols));
   }
 
   return result;
@@ -163,13 +163,12 @@ inline std::vector<cudf::test::strings_column_wrapper> create_expected_string_co
 
   for (unsigned long index = 0; index < indices.size(); index += 2) {
     if (not nullable) {
-      result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index],
-                                                          strings.begin() + indices[index + 1]));
+      result.emplace_back(strings.begin() + indices[index], strings.begin() + indices[index + 1]);
     } else {
       auto valids = cudf::detail::make_counting_transform_iterator(
         indices[index], [](auto i) { return i % 2 == 0; });
-      result.push_back(cudf::test::strings_column_wrapper(
-        strings.begin() + indices[index], strings.begin() + indices[index + 1], valids));
+      result.emplace_back(
+        strings.begin() + indices[index], strings.begin() + indices[index + 1], valids);
     }
   }
 
@@ -184,16 +183,16 @@ inline std::vector<cudf::test::strings_column_wrapper> create_expected_string_co
   std::vector<cudf::test::strings_column_wrapper> result = {};
 
   for (unsigned long index = 0; index < indices.size(); index += 2) {
-    result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index],
-                                                        strings.begin() + indices[index + 1],
-                                                        validity.begin() + indices[index]));
+    result.emplace_back(strings.begin() + indices[index],
+                        strings.begin() + indices[index + 1],
+                        validity.begin() + indices[index]);
   }
 
   return result;
 }
 
 inline std::vector<cudf::table> create_expected_string_tables(
-  std::vector<std::string> const strings[2],
+  std::vector<std::vector<std::string>> const strings,
   std::vector<cudf::size_type> const& indices,
   bool nullable)
 {
@@ -216,7 +215,7 @@ inline std::vector<cudf::table> create_expected_string_tables(
       }
     }
 
-    result.push_back(cudf::table(std::move(cols)));
+    result.emplace_back(std::move(cols));
   }
 
   return result;
diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp
index ee3e7da5e0f..b56b0f2d3f8 100644
--- a/cpp/tests/copying/split_tests.cpp
+++ b/cpp/tests/copying/split_tests.cpp
@@ -35,6 +35,7 @@
 #include <thrust/iterator/counting_iterator.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <stdexcept>
 #include <string>
 #include <vector>
@@ -135,7 +136,7 @@ std::vector<cudf::table> create_expected_tables_for_splits(
 }
 
 std::vector<cudf::table> create_expected_string_tables_for_splits(
-  std::vector<std::string> const strings[2],
+  std::vector<std::vector<std::string>> const strings,
   std::vector<cudf::size_type> const& splits,
   bool nullable)
 {
@@ -144,8 +145,8 @@ std::vector<cudf::table> create_expected_string_tables_for_splits(
 }
 
 std::vector<cudf::table> create_expected_string_tables_for_splits(
-  std::vector<std::string> const strings[2],
-  std::vector<bool> const validity[2],
+  std::vector<std::vector<std::string>> const strings,
+  std::vector<std::vector<bool>> const validity,
   std::vector<cudf::size_type> const& splits)
 {
   std::vector<cudf::size_type> indices = splits_to_indices(splits, strings[0].size());
@@ -627,11 +628,12 @@ void split_string_with_invalids(SplitFunc Split,
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
@@ -658,11 +660,12 @@ void split_empty_output_strings_column_value(SplitFunc Split,
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
 
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
@@ -684,9 +687,9 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare)
   auto valids =
     cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
 
   std::vector<cudf::size_type> splits{2, 5, 9};
 
@@ -699,16 +702,17 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare)
     EXPECT_NO_THROW(Split(empty_table, splits));
   }
 
-  cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), no_valids},
-                                              {strings[1].begin(), strings[1].end(), valids}};
+  std::array<cudf::test::strings_column_wrapper, 2> sw{
+    {{strings[0].begin(), strings[0].end(), no_valids},
+     {strings[1].begin(), strings[1].end(), valids}}};
   std::vector<std::unique_ptr<cudf::column>> scols;
   scols.push_back(sw[0].release());
   scols.push_back(sw[1].release());
   cudf::table src_table(std::move(scols));
   auto result = Split(src_table, splits);
 
-  std::vector<bool> validity_masks[2] = {std::vector<bool>(strings[0].size()),
-                                         std::vector<bool>(strings[0].size())};
+  std::vector<std::vector<bool>> validity_masks{std::vector<bool>(strings[0].size()),
+                                                std::vector<bool>(strings[0].size())};
   std::generate(
     validity_masks[1].begin(), validity_masks[1].end(), [i = 0]() mutable { return i++ % 2 == 0; });
 
@@ -1913,9 +1917,9 @@ TEST_F(ContiguousSplitTableCornerCases, MixedColumnTypes)
   cudf::size_type start = 0;
   auto valids = cudf::detail::make_counting_transform_iterator(start, [](auto i) { return true; });
 
-  std::vector<std::string> strings[2] = {
-    {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
-    {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
+  std::vector<std::vector<std::string>> strings{
+    {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
+     {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
 
   std::vector<std::unique_ptr<cudf::column>> cols;
 
@@ -2377,7 +2381,7 @@ TEST_F(ContiguousSplitTableCornerCases, OutBufferToSmall)
 {
   // internally, contiguous split chunks GPU work in 1MB contiguous copies
   // so the output buffer must be 1MB or larger.
-  EXPECT_THROW(cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error);
+  EXPECT_THROW(auto _ = cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error);
 }
 
 TEST_F(ContiguousSplitTableCornerCases, ChunkSpanTooSmall)
diff --git a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
index 4fb8f78b558..0e68050f935 100644
--- a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
+++ b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023, NVIDIA CORPORATION.
+ * Copyright (c) 2023-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -22,8 +22,6 @@
 
 #include <cudf/hashing.hpp>
 
-constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
-
 using NumericTypesNoBools =
   cudf::test::Concat<cudf::test::IntegralTypesNotBool, cudf::test::FloatingPointTypes>;
 
diff --git a/cpp/tests/hashing/sha256_test.cpp b/cpp/tests/hashing/sha256_test.cpp
index cc95c7a2f0f..8bc47c92c6b 100644
--- a/cpp/tests/hashing/sha256_test.cpp
+++ b/cpp/tests/hashing/sha256_test.cpp
@@ -23,8 +23,6 @@
 #include <cudf/hashing.hpp>
 #include <cudf/utilities/error.hpp>
 
-constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS};
-
 class SHA256HashTest : public cudf::test::BaseFixture {};
 
 TEST_F(SHA256HashTest, EmptyTable)
diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp
index a4dc7531765..2151ec6e22f 100644
--- a/cpp/tests/interop/from_arrow_device_test.cpp
+++ b/cpp/tests/interop/from_arrow_device_test.cpp
@@ -270,9 +270,9 @@ TEST_F(FromArrowDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
@@ -414,9 +414,9 @@ TEST_F(FromArrowDeviceTest, DictionaryIndicesType)
 {
   std::vector<std::unique_ptr<cudf::column>> columns;
   auto col = cudf::test::fixed_width_column_wrapper<int64_t>({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
 
   cudf::table expected_table(std::move(columns));
   cudf::table_view expected_table_view = expected_table.view();
diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp
index cbfa4911c3c..ef9936b214c 100644
--- a/cpp/tests/interop/from_arrow_host_test.cpp
+++ b/cpp/tests/interop/from_arrow_host_test.cpp
@@ -309,9 +309,9 @@ TEST_F(FromArrowHostDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index 81c406c0faf..6e742b9e4cf 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -52,7 +52,7 @@ std::unique_ptr<cudf::table> get_cudf_table()
                          .release());
   auto col4 = cudf::test::fixed_width_column_wrapper<int32_t>({1, 2, 5, 2, 7},
                                                               {true, false, true, true, true});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
                          {true, false, true, false, true}, {true, false, true, true, false})
                          .release());
@@ -339,9 +339,9 @@ TEST_F(FromArrowTest, DictionaryIndicesType)
   std::vector<std::unique_ptr<cudf::column>> columns;
   auto col = cudf::test::fixed_width_column_wrapper<int64_t>({1, 2, 5, 2, 7},
                                                              {true, false, true, true, true});
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
-  columns.emplace_back(std::move(cudf::dictionary::encode(col)));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
+  columns.emplace_back(cudf::dictionary::encode(col));
 
   cudf::table expected_table(std::move(columns));
 
diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp
index 51216a8512c..7ba586461dc 100644
--- a/cpp/tests/interop/to_arrow_device_test.cpp
+++ b/cpp/tests/interop/to_arrow_device_test.cpp
@@ -55,7 +55,7 @@ get_nanoarrow_cudf_table(cudf::size_type length)
   auto col4 = cudf::test::fixed_width_column_wrapper<int64_t>(
     test_data.int64_data.begin(), test_data.int64_data.end(), test_data.validity.begin());
   auto dict_col = cudf::dictionary::encode(col4);
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(test_data.bool_data.begin(),
                                                                     test_data.bool_data.end(),
                                                                     test_data.bool_validity.begin())
@@ -82,8 +82,8 @@ get_nanoarrow_cudf_table(cudf::size_type length)
       test_data.string_data.begin(), test_data.string_data.end(), test_data.validity.begin())
       .release();
   vector_of_columns cols;
-  cols.push_back(move(int_column));
-  cols.push_back(move(str_column));
+  cols.push_back(std::move(int_column));
+  cols.push_back(std::move(str_column));
   auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper<bool>(
     test_data.bool_data_validity.begin(), test_data.bool_data_validity.end()));
   columns.emplace_back(
@@ -575,9 +575,9 @@ TEST_F(ToArrowDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp
index fc0ed6c9352..fcb4433b42e 100644
--- a/cpp/tests/interop/to_arrow_host_test.cpp
+++ b/cpp/tests/interop/to_arrow_host_test.cpp
@@ -436,9 +436,9 @@ TEST_F(ToArrowHostDeviceTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp
index 90ae12cdd90..a6aa4b22eca 100644
--- a/cpp/tests/interop/to_arrow_test.cpp
+++ b/cpp/tests/interop/to_arrow_test.cpp
@@ -90,7 +90,7 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
   auto col4 = cudf::test::fixed_width_column_wrapper<int64_t>(
     int64_data.begin(), int64_data.end(), validity.begin());
   auto dict_col = cudf::dictionary::encode(col4);
-  columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
+  columns.emplace_back(cudf::dictionary::encode(col4));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
                          bool_data.begin(), bool_data.end(), bool_validity.begin())
                          .release());
@@ -112,8 +112,8 @@ std::pair<std::unique_ptr<cudf::table>, std::shared_ptr<arrow::Table>> get_table
     cudf::test::strings_column_wrapper(string_data.begin(), string_data.end(), validity.begin())
       .release();
   vector_of_columns cols;
-  cols.push_back(move(int_column));
-  cols.push_back(move(str_column));
+  cols.push_back(std::move(int_column));
+  cols.push_back(std::move(str_column));
   auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper<bool>(
     bool_data_validity.begin(), bool_data_validity.end()));
   columns.emplace_back(
@@ -294,9 +294,9 @@ TEST_F(ToArrowTest, StructColumn)
   auto int_col2 =
     cudf::test::fixed_width_column_wrapper<int32_t, int32_t>{{12, 24, 47}, {1, 0, 1}}.release();
   auto bool_col = cudf::test::fixed_width_column_wrapper<bool>{{true, true, false}}.release();
-  auto list_col =
-    cudf::test::lists_column_wrapper<int64_t>({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})
-      .release();
+  auto list_col = cudf::test::lists_column_wrapper<int64_t>(
+                    {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}})  // NOLINT
+                    .release();
   vector_of_columns cols2;
   cols2.push_back(std::move(str_col2));
   cols2.push_back(std::move(int_col2));
@@ -438,7 +438,7 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge)
     auto const schema               = std::make_shared<arrow::Schema>(schema_vector);
     auto const expected_arrow_table = arrow::Table::Make(schema, {arr});
 
-    std::vector<cudf::column_metadata> const metadata = {{"a"}};
+    std::vector<cudf::column_metadata> const metadata = {{"a"}};  // NOLINT
     ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table));
   }
 }
diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp
index 840cf263ed9..54262dc3b44 100644
--- a/cpp/tests/io/comp/decomp_test.cpp
+++ b/cpp/tests/io/comp/decomp_test.cpp
@@ -39,19 +39,19 @@ using cudf::device_span;
  */
 template <typename Decompressor>
 struct DecompressTest : public cudf::test::BaseFixture {
-  std::vector<uint8_t> vector_from_string(char const* str) const
+  [[nodiscard]] std::vector<uint8_t> vector_from_string(std::string const str) const
   {
-    return std::vector<uint8_t>(reinterpret_cast<uint8_t const*>(str),
-                                reinterpret_cast<uint8_t const*>(str) + strlen(str));
+    return {reinterpret_cast<uint8_t const*>(str.c_str()),
+            reinterpret_cast<uint8_t const*>(str.c_str()) + strlen(str.c_str())};
   }
 
-  void Decompress(std::vector<uint8_t>* decompressed,
+  void Decompress(std::vector<uint8_t>& decompressed,
                   uint8_t const* compressed,
                   size_t compressed_size)
   {
     auto stream = cudf::get_default_stream();
     rmm::device_buffer src{compressed, compressed_size, stream};
-    rmm::device_uvector<uint8_t> dst{decompressed->size(), stream};
+    rmm::device_uvector<uint8_t> dst{decompressed.size(), stream};
 
     cudf::detail::hostdevice_vector<device_span<uint8_t const>> inf_in(1, stream);
     inf_in[0] = {static_cast<uint8_t const*>(src.data()), src.size()};
@@ -67,7 +67,7 @@ struct DecompressTest : public cudf::test::BaseFixture {
 
     static_cast<Decompressor*>(this)->dispatch(inf_in, inf_out, inf_stat);
     CUDF_CUDA_TRY(cudaMemcpyAsync(
-      decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
+      decompressed.data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value()));
     inf_stat.device_to_host_sync(stream);
     ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS);
   }
@@ -125,49 +125,57 @@ struct NvcompConfigTest : public cudf::test::BaseFixture {};
 
 TEST_F(GzipDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0x1f, 0x8b, 0x8,  0x0,  0x9,  0x63, 0x99, 0x5c, 0x2,  0xff, 0xcb, 0x48, 0xcd, 0xc9, 0xc9, 0x57,
     0x28, 0xcf, 0x2f, 0xca, 0x49, 0x1,  0x0,  0x85, 0x11, 0x4a, 0xd,  0xb,  0x0,  0x0,  0x0};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(SnappyDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0xb, 0x28, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(SnappyDecompressTest, ShortLiteralAfterLongCopyAtStartup)
 {
-  constexpr char uncompressed[]  = "Aaaaaaaaaaaah!";
+  std::string const uncompressed{"Aaaaaaaaaaaah!"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {14, 0x0, 'A', 0x0, 'a', (10 - 4) * 4 + 1, 1, 0x4, 'h', '!'};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
 TEST_F(BrotliDecompressTest, HelloWorld)
 {
-  constexpr char uncompressed[]  = "hello world";
+  std::string const uncompressed{"hello world"};
+  // NOLINTBEGIN
   constexpr uint8_t compressed[] = {
     0xb, 0x5, 0x80, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x3};
+  // NOLINTEND
 
   std::vector<uint8_t> input = vector_from_string(uncompressed);
   std::vector<uint8_t> output(input.size());
-  Decompress(&output, compressed, sizeof(compressed));
+  Decompress(output, compressed, sizeof(compressed));
   EXPECT_EQ(output, input);
 }
 
diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp
index 0028dd946e3..b265dcf9273 100644
--- a/cpp/tests/io/csv_test.cpp
+++ b/cpp/tests/io/csv_test.cpp
@@ -63,9 +63,9 @@ auto dtype()
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 using column     = cudf::column;
 using table      = cudf::table;
 using table_view = cudf::table_view;
@@ -954,7 +954,7 @@ TEST_F(CsvReaderTest, Strings)
   ASSERT_EQ(type_id::STRING, view.column(1).type().id());
 
   expect_column_data_equal(
-    std::vector<std::string>{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"},
+    std::vector<std::string>{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"},
     view.column(1));
 }
 
@@ -1014,7 +1014,7 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored)
   ASSERT_EQ(type_id::STRING, view.column(1).type().id());
 
   expect_column_data_equal(
-    std::vector<std::string>{"\"abcdef ghi\"", "\"jkl \"\"mno\"\" pqr\"", "stu \"vwx\" yz"},
+    std::vector<std::string>{"\"abcdef ghi\"", R"("jkl ""mno"" pqr")", "stu \"vwx\" yz"},
     view.column(1));
 }
 
@@ -1830,7 +1830,7 @@ TEST_F(CsvReaderTest, StringsWithWriter)
 
   auto int_column = column_wrapper<int32_t>{10, 20, 30};
   auto string_column =
-    column_wrapper<cudf::string_view>{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"};
+    column_wrapper<cudf::string_view>{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"};
   cudf::table_view input_table(std::vector<cudf::column_view>{int_column, string_column});
 
   // TODO add quoting style flag?
diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp
index 49ad0c408dc..cb6716f4a18 100644
--- a/cpp/tests/io/json/json_test.cpp
+++ b/cpp/tests/io/json/json_test.cpp
@@ -68,9 +68,9 @@ auto dtype()
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 
 cudf::test::TempDirTestEnvironment* const temp_env =
   static_cast<cudf::test::TempDirTestEnvironment*>(
diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp
index 89e704f3ed3..cce0adbf317 100644
--- a/cpp/tests/io/orc_test.cpp
+++ b/cpp/tests/io/orc_test.cpp
@@ -43,9 +43,9 @@
 
 template <typename T, typename SourceElementT = T>
 using column_wrapper =
-  typename std::conditional<std::is_same_v<T, cudf::string_view>,
-                            cudf::test::strings_column_wrapper,
-                            cudf::test::fixed_width_column_wrapper<T, SourceElementT>>::type;
+  std::conditional_t<std::is_same_v<T, cudf::string_view>,
+                     cudf::test::strings_column_wrapper,
+                     cudf::test::fixed_width_column_wrapper<T, SourceElementT>>;
 
 using str_col     = column_wrapper<cudf::string_view>;
 using bool_col    = column_wrapper<bool>;
@@ -1358,21 +1358,22 @@ TEST_P(OrcWriterTestStripes, StripeSize)
   cols.push_back(col.release());
   auto const expected = std::make_unique<table>(std::move(cols));
 
-  auto validate = [&](std::vector<char> const& orc_buffer) {
-    auto const expected_stripe_num =
-      std::max<cudf::size_type>(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes);
-    auto const stats = cudf::io::read_parsed_orc_statistics(
-      cudf::io::source_info(orc_buffer.data(), orc_buffer.size()));
-    EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num);
-
-    cudf::io::orc_reader_options in_opts =
-      cudf::io::orc_reader_options::builder(
-        cudf::io::source_info(orc_buffer.data(), orc_buffer.size()))
-        .use_index(false);
-    auto result = cudf::io::read_orc(in_opts);
-
-    CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
-  };
+  auto validate =
+    [&, &size_bytes = size_bytes, &size_rows = size_rows](std::vector<char> const& orc_buffer) {
+      auto const expected_stripe_num =
+        std::max<cudf::size_type>(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes);
+      auto const stats = cudf::io::read_parsed_orc_statistics(
+        cudf::io::source_info(orc_buffer.data(), orc_buffer.size()));
+      EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num);
+
+      cudf::io::orc_reader_options in_opts =
+        cudf::io::orc_reader_options::builder(
+          cudf::io::source_info(orc_buffer.data(), orc_buffer.size()))
+          .use_index(false);
+      auto result = cudf::io::read_orc(in_opts);
+
+      CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view());
+    };
 
   {
     std::vector<char> out_buffer_chunked;
diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp
index 8b03e94191e..f1286a00d22 100644
--- a/cpp/tests/io/parquet_misc_test.cpp
+++ b/cpp/tests/io/parquet_misc_test.cpp
@@ -98,7 +98,7 @@ TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaListSliced)
   // list<T>
   constexpr int vals_per_row = 4;
   auto c1_offset_iter        = cudf::detail::make_counting_transform_iterator(
-    0, [vals_per_row](cudf::size_type idx) { return idx * vals_per_row; });
+    0, [](cudf::size_type idx) { return idx * vals_per_row; });
   cudf::test::fixed_width_column_wrapper<cudf::size_type> c1_offsets(c1_offset_iter,
                                                                      c1_offset_iter + num_rows + 1);
   cudf::test::fixed_width_column_wrapper<T> c1_vals(
diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp
index dc8e68b3a15..4a5309f3ba7 100644
--- a/cpp/tests/io/parquet_reader_test.cpp
+++ b/cpp/tests/io/parquet_reader_test.cpp
@@ -1189,15 +1189,12 @@ TEST_F(ParquetReaderTest, NestingOptimizationTest)
   cudf::test::fixed_width_column_wrapper<int> values(value_iter, value_iter + num_values, validity);
 
   // ~256k values with num_nesting_levels = 16
-  int total_values_produced = num_values;
-  auto prev_col             = values.release();
+  auto prev_col = values.release();
   for (int idx = 0; idx < num_nesting_levels; idx++) {
-    auto const depth    = num_nesting_levels - idx;
     auto const num_rows = (1 << (num_nesting_levels - idx));
 
     auto offsets_iter = cudf::detail::make_counting_transform_iterator(
-      0, [depth, rows_per_level](cudf::size_type i) { return i * rows_per_level; });
-    total_values_produced += (num_rows + 1);
+      0, [](cudf::size_type i) { return i * rows_per_level; });
 
     cudf::test::fixed_width_column_wrapper<cudf::size_type> offsets(offsets_iter,
                                                                     offsets_iter + num_rows + 1);
diff --git a/cpp/tests/io/parquet_v2_test.cpp b/cpp/tests/io/parquet_v2_test.cpp
index 7c305235ea6..a0b48f54854 100644
--- a/cpp/tests/io/parquet_v2_test.cpp
+++ b/cpp/tests/io/parquet_v2_test.cpp
@@ -1302,24 +1302,24 @@ TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls)
   table_view expected({col0, col1, col2, col3, col4, col5, col6, col7});
 
   std::array<int64_t, 9> expected_null_counts{4, 4, 4, 6, 4, 6, 4, 5, 11};
-  std::vector<int64_t> const expected_def_hists[] = {{1, 1, 2, 3},
-                                                     {1, 3, 10},
-                                                     {1, 1, 2, 10},
-                                                     {1, 1, 2, 2, 8},
-                                                     {1, 1, 1, 1, 10},
-                                                     {1, 1, 1, 1, 2, 8},
-                                                     {1, 3, 9},
-                                                     {1, 3, 1, 8},
-                                                     {1, 0, 4, 1, 1, 4, 9}};
-  std::vector<int64_t> const expected_rep_hists[] = {{4, 3},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 6},
-                                                     {4, 4, 5},
-                                                     {4, 4, 5},
-                                                     {4, 6, 2, 8}};
+  std::vector<std::vector<int64_t>> const expected_def_hists = {{1, 1, 2, 3},
+                                                                {1, 3, 10},
+                                                                {1, 1, 2, 10},
+                                                                {1, 1, 2, 2, 8},
+                                                                {1, 1, 1, 1, 10},
+                                                                {1, 1, 1, 1, 2, 8},
+                                                                {1, 3, 9},
+                                                                {1, 3, 1, 8},
+                                                                {1, 0, 4, 1, 1, 4, 9}};
+  std::vector<std::vector<int64_t>> const expected_rep_hists = {{4, 3},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 6},
+                                                                {4, 4, 5},
+                                                                {4, 4, 5},
+                                                                {4, 6, 2, 8}};
 
   auto const filepath = temp_env->get_temp_filepath("ColumnIndexListWithNulls.parquet");
   auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected)
diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp
index 8794f2ee304..6c5e9cdf07a 100644
--- a/cpp/tests/io/parquet_writer_test.cpp
+++ b/cpp/tests/io/parquet_writer_test.cpp
@@ -290,7 +290,8 @@ class custom_test_data_sink : public cudf::io::data_sink {
     CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file");
   }
 
-  ~custom_test_data_sink() override { flush(); }
+  // Marked as NOLINT because we are calling a virtual method in the destructor
+  ~custom_test_data_sink() override { flush(); }  // NOLINT
 
   void host_write(void const* data, size_t size) override
   {
@@ -981,13 +982,15 @@ TEST_F(ParquetWriterTest, CheckColumnIndexTruncation)
 
 TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation)
 {
-  std::vector<uint8_t> truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
-                                          {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  std::array<std::vector<uint8_t>, 3> truncated_min{
+    {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe},
+     {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff},
+     {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}};
 
-  std::vector<uint8_t> truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
-                                          {0xff},
-                                          {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}};
+  std::array<std::vector<uint8_t>, 3> truncated_max{
+    {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff},
+     {0xff},
+     {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}};
 
   cudf::test::lists_column_wrapper<uint8_t> col0{
     {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}};
diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp
index 93754091b3f..178edc52dd3 100644
--- a/cpp/tests/join/distinct_join_tests.cpp
+++ b/cpp/tests/join/distinct_join_tests.cpp
@@ -314,7 +314,7 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -362,7 +362,7 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -398,7 +398,7 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   this->compare_to_reference(
     build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY);
@@ -423,7 +423,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::NO>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   column_wrapper<int32_t> col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}};
   strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {true, true, false, true, true});
@@ -468,7 +468,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls)
 
   auto distinct_join = cudf::distinct_hash_join<cudf::has_nested::YES>{build.view(), probe.view()};
   auto result        = distinct_join.left_join();
-  auto gather_map    = std::pair{std::move(result), std::move(get_left_indices(result->size()))};
+  auto gather_map    = std::pair{std::move(result), get_left_indices(result->size())};
 
   auto col0_gold_names_col = strcol_wrapper{
     "Samuel Vimes", "Detritus", "Carrot Ironfoundersson", "Samuel Vimes", "Angua von Überwald"};
diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp
index 97979e79010..bea044496b3 100644
--- a/cpp/tests/merge/merge_string_test.cpp
+++ b/cpp/tests/merge/merge_string_test.cpp
@@ -97,7 +97,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyColumns)
                                             "hi",
                                             "hj"});
 
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
@@ -296,7 +296,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyNullColumns)
                                             true,
                                             false,
                                             false});
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp
index 2e09f25b51f..6208d395f0a 100644
--- a/cpp/tests/merge/merge_test.cpp
+++ b/cpp/tests/merge/merge_test.cpp
@@ -349,7 +349,7 @@ TYPED_TEST(MergeTest_, Merge1KeyColumns)
   cudf::test::fixed_width_column_wrapper<TypeParam, typename decltype(seq_out1)::value_type>
     expectedDataWrap1(seq_out1, seq_out1 + outputRows);
 
-  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) {
+  auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8)
       return 0;
     else
@@ -452,7 +452,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns)
   cudf::size_type inputRows = 40;
 
   // data: 0  2  4  6 | valid: 1 1 1 0
-  auto sequence1       = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) {
+  auto sequence1       = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
       return 0;  // <- no shortcut to this can avoid compiler errors
     } else {
@@ -465,7 +465,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns)
     leftColWrap1(sequence1, sequence1 + inputRows, valid_sequence1);
 
   // data: 1  3  5  7 | valid: 1 1 1 0
-  auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) {
+  auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) {
     if (cudf::type_to_id<TypeParam>() == cudf::type_id::BOOL8) {
       return 1;
     } else
diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp
index 19996f827cf..bc0321bd40a 100644
--- a/cpp/tests/reductions/segmented_reduction_tests.cpp
+++ b/cpp/tests/reductions/segmented_reduction_tests.cpp
@@ -1092,11 +1092,10 @@ TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets)
   auto aggregates =
     std::vector<std::unique_ptr<cudf::segmented_reduce_aggregation,
                                 std::default_delete<cudf::segmented_reduce_aggregation>>>();
-  aggregates.push_back(std::move(cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(std::move(cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(std::move(cudf::make_sum_aggregation<cudf::segmented_reduce_aggregation>()));
-  aggregates.push_back(
-    std::move(cudf::make_product_aggregation<cudf::segmented_reduce_aggregation>()));
+  aggregates.push_back(cudf::make_max_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_min_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_sum_aggregation<cudf::segmented_reduce_aggregation>());
+  aggregates.push_back(cudf::make_product_aggregation<cudf::segmented_reduce_aggregation>());
 
   auto output_type = cudf::data_type{cudf::type_to_id<int32_t>()};
   for (auto&& agg : aggregates) {
diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp
index 1858cd7782e..b12bf08520f 100644
--- a/cpp/tests/replace/replace_tests.cpp
+++ b/cpp/tests/replace/replace_tests.cpp
@@ -356,7 +356,7 @@ void test_replace(cudf::host_span<T const> input_column,
 
   for (size_t i = 0; i < values_to_replace_column.size(); i++) {
     size_t k  = 0;
-    auto pred = [=, &k, &reference_result, &expected_valid, &isReplaced](T element) {
+    auto pred = [=, &k, &expected_valid, &isReplaced](T element) {
       bool toBeReplaced = false;
       if (!isReplaced[k]) {
         if (!input_has_nulls || expected_valid[k]) {
@@ -503,7 +503,7 @@ TYPED_TEST(ReplaceTest, LargeScaleReplaceTest)
   const size_t REPLACE_SIZE = 10000;
 
   thrust::host_vector<TypeParam> input_column(DATA_SIZE);
-  std::generate(std::begin(input_column), std::end(input_column), [REPLACE_SIZE]() {
+  std::generate(std::begin(input_column), std::end(input_column), []() {
     return std::rand() % (REPLACE_SIZE);
   });
 
diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp
index f702dc78371..165e0347785 100644
--- a/cpp/tests/rolling/collect_ops_test.cpp
+++ b/cpp/tests/rolling/collect_ops_test.cpp
@@ -214,7 +214,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowHonoursMinPeriods)
                          *cudf::make_collect_list_aggregation<cudf::rolling_aggregation>());
   auto expected_result_2 = cudf::test::lists_column_wrapper<T, int32_t>{
     {{}, {0, 1, 2, 3}, {1, 2, 3, 4}, {2, 3, 4, 5}, {}, {}},
-    cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) {
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
       return i != 0 && i < 4;
     })}.release();
 
@@ -338,7 +338,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods)
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 4, 8, 12, 12, 12}.release();
     auto expected_num_rows = expected_offsets->size() - 1;
     auto null_mask_iter    = cudf::detail::make_counting_transform_iterator(
-      cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; });
+      cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; });
 
     auto [null_mask, null_count] =
       cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows);
@@ -373,7 +373,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods)
       cudf::test::fixed_width_column_wrapper<cudf::size_type>{0, 0, 3, 5, 8, 8, 8}.release();
     auto expected_num_rows = expected_offsets->size() - 1;
     auto null_mask_iter    = cudf::detail::make_counting_transform_iterator(
-      cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; });
+      cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; });
 
     auto [null_mask, null_count] =
       cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows);
@@ -1499,7 +1499,7 @@ TYPED_TEST(TypedCollectSetTest, RollingWindowHonoursMinPeriods)
                         *cudf::make_collect_set_aggregation<cudf::rolling_aggregation>());
   auto expected_result_2 = cudf::test::lists_column_wrapper<T, int32_t>{
     {{}, {0, 1, 2}, {1, 2, 4}, {2, 4, 5}, {}, {}},
-    cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) {
+    cudf::detail::make_counting_transform_iterator(0, [](auto i) {
       return i != 0 && i < 4;
     })}.release();
 
diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp
index ec726878b34..0eaab0c9f7a 100644
--- a/cpp/tests/rolling/offset_row_window_test.cpp
+++ b/cpp/tests/rolling/offset_row_window_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2021-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -41,6 +41,11 @@ using cudf::test::iterators::nulls_at;
 
 auto constexpr null = int32_t{0};  // NULL representation for int32_t;
 
+// clang-tidy doesn't think std::transform can handle a
+// thrust::constant_iterator, so this is a workaround that uses nulls_at
+// instead of no_nulls
+auto no_nulls_list() { return nulls_at({}); }
+
 struct OffsetRowWindowTest : public cudf::test::BaseFixture {
   static ints_column const _keys;    // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1};
   static ints_column const _values;  // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9};
@@ -210,7 +215,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_0_to_2)
 
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *run_rolling(*AGG_COLLECT_LIST),
-    lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, no_nulls});
+    lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}},
+                 no_nulls_list()});
 }
 
 TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2)
@@ -250,7 +256,7 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2)
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(
     *run_rolling(*AGG_COLLECT_LIST),
     lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}},
-                 no_nulls});
+                 no_nulls_list()});
 }
 
 // To test that preceding bounds are clamped correctly at group boundaries.
diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp
index c2c22986975..6e0dc16dca9 100644
--- a/cpp/tests/rolling/rolling_test.cpp
+++ b/cpp/tests/rolling/rolling_test.cpp
@@ -541,7 +541,7 @@ class RollingTest : public cudf::test::BaseFixture {
 
     agg_op op;
     for (cudf::size_type i = 0; i < num_rows; i++) {
-      OutputType val = agg_op::template identity<OutputType>();
+      auto val = agg_op::template identity<OutputType>();
 
       // load sizes
       min_periods = std::max(min_periods, 1);  // at least one observation is required
diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp
index 2d37de920d5..2b79911a95a 100644
--- a/cpp/tests/scalar/scalar_test.cpp
+++ b/cpp/tests/scalar/scalar_test.cpp
@@ -190,7 +190,7 @@ TEST_F(ListScalarTest, MoveConstructorNonNested)
 
   EXPECT_EQ(mask_ptr, s2.validity_data());
   EXPECT_EQ(data_ptr, s2.view().data<int32_t>());
-  EXPECT_EQ(s.view().data<int32_t>(), nullptr);
+  EXPECT_EQ(s.view().data<int32_t>(), nullptr);  // NOLINT
 }
 
 TEST_F(ListScalarTest, MoveConstructorNested)
@@ -205,8 +205,8 @@ TEST_F(ListScalarTest, MoveConstructorNested)
   EXPECT_EQ(mask_ptr, s2.validity_data());
   EXPECT_EQ(offset_ptr, s2.view().child(0).data<cudf::size_type>());
   EXPECT_EQ(data_ptr, s2.view().child(1).data<int32_t>());
-  EXPECT_EQ(s.view().data<int32_t>(), nullptr);
-  EXPECT_EQ(s.view().num_children(), 0);
+  EXPECT_EQ(s.view().data<int32_t>(), nullptr);  // NOLINT
+  EXPECT_EQ(s.view().num_children(), 0);         // NOLINT
 }
 
 struct StructScalarTest : public cudf::test::BaseFixture {};
diff --git a/cpp/tests/search/search_list_test.cpp b/cpp/tests/search/search_list_test.cpp
index 48711c21715..7584003e800 100644
--- a/cpp/tests/search/search_list_test.cpp
+++ b/cpp/tests/search/search_list_test.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022-2023, NVIDIA CORPORATION.
+ * Copyright (c) 2022-2024, NVIDIA CORPORATION.
  *
  * Licensed under the Apache License, Version 2.0 (the "License");
  * you may not use this file except in compliance with the License.
@@ -35,7 +35,6 @@ using strings_col = cudf::test::strings_column_wrapper;
 
 constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR};
 constexpr int32_t null{0};  // Mark for null child elements at the current level
-constexpr int32_t XXX{0};   // Mark for null elements at all levels
 
 using TestTypes = cudf::test::Concat<cudf::test::IntegralTypesNotBool,
                                      cudf::test::FloatingPointTypes,
diff --git a/cpp/tests/sort/sort_test.cpp b/cpp/tests/sort/sort_test.cpp
index e84275f41ef..6a35e977b46 100644
--- a/cpp/tests/sort/sort_test.cpp
+++ b/cpp/tests/sort/sort_test.cpp
@@ -1087,7 +1087,7 @@ TEST_F(SortCornerTest, WithEmptyStructColumn)
   child_columns2.push_back(std::move(child_col_1));
   int_col col4{{5, 4, 3, 2, 1, 0}};
   std::vector<std::unique_ptr<cudf::column>> grand_child;
-  grand_child.push_back(std::move(col4.release()));
+  grand_child.push_back(col4.release());
   auto child_col_2 = cudf::make_structs_column(6, std::move(grand_child), 0, rmm::device_buffer{});
   child_columns2.push_back(std::move(child_col_2));
   auto struct_col3 =
diff --git a/cpp/tests/stream_compaction/unique_tests.cpp b/cpp/tests/stream_compaction/unique_tests.cpp
index 4d7d23dc881..d5b6915b520 100644
--- a/cpp/tests/stream_compaction/unique_tests.cpp
+++ b/cpp/tests/stream_compaction/unique_tests.cpp
@@ -43,7 +43,6 @@ auto constexpr KEEP_ANY     = cudf::duplicate_keep_option::KEEP_ANY;
 auto constexpr KEEP_FIRST   = cudf::duplicate_keep_option::KEEP_FIRST;
 auto constexpr KEEP_LAST    = cudf::duplicate_keep_option::KEEP_LAST;
 auto constexpr KEEP_NONE    = cudf::duplicate_keep_option::KEEP_NONE;
-auto constexpr NULL_EQUAL   = cudf::null_equality::EQUAL;
 auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL;
 
 using int32s_col  = cudf::test::fixed_width_column_wrapper<int32_t>;
diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp
index 443f4548b2c..07b2d77cc04 100644
--- a/cpp/tests/streams/stream_compaction_test.cpp
+++ b/cpp/tests/streams/stream_compaction_test.cpp
@@ -29,8 +29,6 @@
 
 #include <cmath>
 
-auto constexpr null{0};  // null at current level
-auto constexpr XXX{0};   // null pushed down from parent level
 auto constexpr NaN          = std::numeric_limits<double>::quiet_NaN();
 auto constexpr KEEP_ANY     = cudf::duplicate_keep_option::KEEP_ANY;
 auto constexpr KEEP_FIRST   = cudf::duplicate_keep_option::KEEP_FIRST;
diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp
index ce5f68de3c9..26bcfe8028d 100644
--- a/cpp/tests/strings/integers_tests.cpp
+++ b/cpp/tests/strings/integers_tests.cpp
@@ -30,6 +30,7 @@
 #include <thrust/host_vector.h>
 #include <thrust/iterator/transform_iterator.h>
 
+#include <array>
 #include <string>
 #include <vector>
 
@@ -425,7 +426,7 @@ TYPED_TEST(StringsIntegerConvertTest, IntegerToHex)
     if (v == 0) { return std::string("00"); }
     // special handling for single-byte types
     if constexpr (std::is_same_v<TypeParam, int8_t> || std::is_same_v<TypeParam, uint8_t>) {
-      char const hex_digits[16] = {
+      std::array const hex_digits = {
         '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
       std::string str;
       str += hex_digits[(v & 0xF0) >> 4];
diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp
index f0010fc1ed9..219bd6d8b01 100644
--- a/cpp/tests/structs/structs_column_tests.cpp
+++ b/cpp/tests/structs/structs_column_tests.cpp
@@ -635,9 +635,8 @@ TEST_F(StructColumnWrapperTest, TestStructsColumnWithEmptyChild)
   auto mask_vec = std::vector<bool>{true, false, false};
   auto [null_mask, null_count] =
     cudf::test::detail::make_null_mask(mask_vec.begin(), mask_vec.end());
-  auto structs_col =
-    cudf::make_structs_column(num_rows, std::move(cols), null_count, std::move(null_mask));
-  EXPECT_NO_THROW(structs_col->view());
+  EXPECT_NO_THROW(auto structs_col = cudf::make_structs_column(
+                    num_rows, std::move(cols), null_count, std::move(null_mask)));
 }
 
 CUDF_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp
index 215ca158f37..2684123c08a 100644
--- a/cpp/tests/transform/bools_to_mask_test.cpp
+++ b/cpp/tests/transform/bools_to_mask_test.cpp
@@ -32,7 +32,7 @@ struct MaskToNullTest : public cudf::test::BaseFixture {
   {
     cudf::test::fixed_width_column_wrapper<bool> input_column(
       input.begin(), input.end(), val.begin());
-    std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and<bool>());
+    std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and<>());
 
     auto sample = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; });
 
diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp
index 1785848ec77..0bdf5b321ac 100644
--- a/cpp/tests/transform/integration/unary_transform_test.cpp
+++ b/cpp/tests/transform/integration/unary_transform_test.cpp
@@ -47,7 +47,7 @@ void test_udf(char const* udf, Op op, Data data_init, cudf::size_type size, bool
 TEST_F(UnaryOperationIntegrationTest, Transform_FP32_FP32)
 {
   // c = a*a*a*a
-  char const* cuda =
+  std::string const cuda =
     R"***(
 __device__ inline void    fdsf   (
        float* C,
@@ -58,7 +58,7 @@ __device__ inline void    fdsf   (
 }
 )***";
 
-  char const* ptx =
+  std::string const ptx =
     R"***(
 //
 // Generated by NVIDIA NVVM Compiler
@@ -101,17 +101,17 @@ __device__ inline void    fdsf   (
   auto op        = [](dtype a) { return a * a * a * a; };
   auto data_init = [](cudf::size_type row) { return row % 3; };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32)
 {
   // c = a * a - a
-  char const cuda[] =
+  std::string const cuda =
     "__device__ inline void f(int* output,int input){*output = input*input - input;}";
 
-  char const* ptx =
+  std::string const ptx =
     R"***(
 .func _Z1fPii(
         .param .b64 _Z1fPii_param_0,
@@ -136,8 +136,8 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32)
   auto op        = [](dtype a) { return a * a - a; };
   auto data_init = [](cudf::size_type row) { return row % 78; };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8)
@@ -145,7 +145,7 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8)
   // Capitalize all the lower case letters
   // Assuming ASCII, the PTX code is compiled from the following CUDA code
 
-  char const cuda[] =
+  std::string const cuda =
     R"***(
 __device__ inline void f(
   signed char* output,
@@ -159,7 +159,7 @@ __device__ inline void f(
 }
 )***";
 
-  char const ptx[] =
+  std::string const ptx =
     R"***(
 .func _Z1fPcc(
         .param .b64 _Z1fPcc_param_0,
@@ -191,15 +191,15 @@ __device__ inline void f(
   auto op        = [](dtype a) { return std::toupper(a); };
   auto data_init = [](cudf::size_type row) { return 'a' + (row % 26); };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
-  test_udf<dtype>(ptx, op, data_init, 500, true);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
+  test_udf<dtype>(ptx.c_str(), op, data_init, 500, true);
 }
 
 TEST_F(UnaryOperationIntegrationTest, Transform_Datetime)
 {
   // Add one day to timestamp in microseconds
 
-  char const cuda[] =
+  std::string const cuda =
     R"***(
 __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input)
 {
@@ -217,7 +217,7 @@ __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input)
   auto random_eng = cudf::test::UniformRandomGenerator<cudf::timestamp_us::rep>(0, 100000000);
   auto data_init  = [&random_eng](cudf::size_type row) { return random_eng.generate(); };
 
-  test_udf<dtype>(cuda, op, data_init, 500, false);
+  test_udf<dtype>(cuda.c_str(), op, data_init, 500, false);
 }
 
 }  // namespace transformation

From 2d02bdce9e3efae232dea4a5b8b2eecf5c0f8a93 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Mon, 7 Oct 2024 16:30:58 -0500
Subject: [PATCH 2/2] Implement `extract_datetime_component` in
 `libcudf`/`pylibcudf` (#16776)

Closes https://github.com/rapidsai/cudf/issues/16735

Authors:
  - https://github.com/brandon-b-miller
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Matthew Murray (https://github.com/Matt711)
  - Lawrence Mitchell (https://github.com/wence-)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16776
---
 cpp/include/cudf/datetime.hpp                 |  34 +++++
 cpp/include/cudf/detail/datetime.hpp          |  10 ++
 cpp/src/datetime/datetime_ops.cu              |  88 ++++++------
 cpp/tests/datetime/datetime_ops_test.cpp      | 130 ++++++++++++++++++
 python/cudf/cudf/_lib/datetime.pyx            |  39 +++++-
 python/cudf_polars/cudf_polars/dsl/expr.py    |  40 ++++--
 python/pylibcudf/pylibcudf/datetime.pxd       |   7 +
 python/pylibcudf/pylibcudf/datetime.pyx       |  71 ++++------
 .../pylibcudf/libcudf/CMakeLists.txt          |   5 +-
 .../pylibcudf/pylibcudf/libcudf/datetime.pxd  |  17 +++
 .../pylibcudf/pylibcudf/libcudf/datetime.pyx  |   0
 .../pylibcudf/tests/test_datetime.py          |  55 ++++----
 12 files changed, 358 insertions(+), 138 deletions(-)
 create mode 100644 python/pylibcudf/pylibcudf/libcudf/datetime.pyx

diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp
index 7359a0d5fde..1eaea5b6374 100644
--- a/cpp/include/cudf/datetime.hpp
+++ b/cpp/include/cudf/datetime.hpp
@@ -38,6 +38,22 @@ namespace datetime {
  * @file
  */
 
+/**
+ * @brief Types of datetime components that may be extracted.
+ */
+enum class datetime_component : uint8_t {
+  YEAR,
+  MONTH,
+  DAY,
+  WEEKDAY,
+  HOUR,
+  MINUTE,
+  SECOND,
+  MILLISECOND,
+  MICROSECOND,
+  NANOSECOND
+};
+
 /**
  * @brief  Extracts year from any datetime type and returns an int16_t
  * cudf::column.
@@ -207,6 +223,24 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(
   rmm::cuda_stream_view stream      = cudf::get_default_stream(),
   rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
 
+/**
+ * @brief Extracts the specified datetime component from any datetime type and
+ * returns an int16_t cudf::column.
+ *
+ * @param column cudf::column_view of the input datetime values
+ * @param component The datetime component to extract
+ * @param stream CUDA stream used for device memory operations and kernel launches
+ * @param mr Device memory resource used to allocate device memory of the returned column
+ *
+ * @returns cudf::column of the extracted int16_t datetime component
+ * @throw cudf::logic_error if input column datatype is not TIMESTAMP
+ */
+std::unique_ptr<cudf::column> extract_datetime_component(
+  cudf::column_view const& column,
+  datetime_component component,
+  rmm::cuda_stream_view stream      = cudf::get_default_stream(),
+  rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());
+
 /** @} */  // end of group
 /**
  * @addtogroup datetime_compute
diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp
index 9db7e48498f..df3050d6494 100644
--- a/cpp/include/cudf/detail/datetime.hpp
+++ b/cpp/include/cudf/detail/datetime.hpp
@@ -115,6 +115,16 @@ std::unique_ptr<cudf::column> extract_nanosecond_fraction(cudf::column_view cons
                                                           rmm::cuda_stream_view stream,
                                                           rmm::device_async_resource_ref mr);
 
+/**
+ * @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component,
+ * rmm::cuda_stream_view, rmm::device_async_resource_ref)
+ *
+ */
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr);
+
 /**
  * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view,
  * rmm::device_async_resource_ref)
diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu
index ddb0dbcd96d..a497cedb3bc 100644
--- a/cpp/src/datetime/datetime_ops.cu
+++ b/cpp/src/datetime/datetime_ops.cu
@@ -44,19 +44,6 @@
 namespace cudf {
 namespace datetime {
 namespace detail {
-enum class datetime_component {
-  INVALID = 0,
-  YEAR,
-  MONTH,
-  DAY,
-  WEEKDAY,
-  HOUR,
-  MINUTE,
-  SECOND,
-  MILLISECOND,
-  MICROSECOND,
-  NANOSECOND
-};
 
 enum class rounding_function {
   CEIL,   ///< Rounds up to the next integer multiple of the provided frequency
@@ -453,90 +440,70 @@ std::unique_ptr<column> extract_year(column_view const& column,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::YEAR>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr);
 }
 
 std::unique_ptr<column> extract_month(column_view const& column,
                                       rmm::cuda_stream_view stream,
                                       rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MONTH>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr);
 }
 
 std::unique_ptr<column> extract_day(column_view const& column,
                                     rmm::cuda_stream_view stream,
                                     rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::DAY>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr);
 }
 
 std::unique_ptr<column> extract_weekday(column_view const& column,
                                         rmm::cuda_stream_view stream,
                                         rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::WEEKDAY>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr);
 }
 
 std::unique_ptr<column> extract_hour(column_view const& column,
                                      rmm::cuda_stream_view stream,
                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::HOUR>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr);
 }
 
 std::unique_ptr<column> extract_minute(column_view const& column,
                                        rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MINUTE>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr);
 }
 
 std::unique_ptr<column> extract_second(column_view const& column,
                                        rmm::cuda_stream_view stream,
                                        rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::SECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MILLISECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_microsecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::MICROSECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr);
 }
 
 std::unique_ptr<column> extract_nanosecond_fraction(column_view const& column,
                                                     rmm::cuda_stream_view stream,
                                                     rmm::device_async_resource_ref mr)
 {
-  return detail::apply_datetime_op<
-    detail::extract_component_operator<detail::datetime_component::NANOSECOND>,
-    cudf::type_id::INT16>(column, stream, mr);
+  return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr);
 }
 
 std::unique_ptr<column> last_day_of_month(column_view const& column,
@@ -576,6 +543,32 @@ std::unique_ptr<column> extract_quarter(column_view const& column,
   return apply_datetime_op<extract_quarter_op, type_id::INT16>(column, stream, mr);
 }
 
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr)
+{
+#define extract(field)                                                                 \
+  case field:                                                                          \
+    return apply_datetime_op<extract_component_operator<field>, cudf::type_id::INT16>( \
+      column, stream, mr)
+
+  switch (component) {
+    extract(datetime_component::YEAR);
+    extract(datetime_component::MONTH);
+    extract(datetime_component::DAY);
+    extract(datetime_component::WEEKDAY);
+    extract(datetime_component::HOUR);
+    extract(datetime_component::MINUTE);
+    extract(datetime_component::SECOND);
+    extract(datetime_component::MILLISECOND);
+    extract(datetime_component::MICROSECOND);
+    extract(datetime_component::NANOSECOND);
+    default: CUDF_FAIL("Unsupported datetime component.");
+  }
+#undef extract
+}
+
 }  // namespace detail
 
 std::unique_ptr<column> ceil_datetimes(column_view const& column,
@@ -661,6 +654,15 @@ std::unique_ptr<column> extract_second(column_view const& column,
   return detail::extract_second(column, stream, mr);
 }
 
+std::unique_ptr<cudf::column> extract_datetime_component(cudf::column_view const& column,
+                                                         datetime_component component,
+                                                         rmm::cuda_stream_view stream,
+                                                         rmm::device_async_resource_ref mr)
+{
+  CUDF_FUNC_RANGE();
+  return detail::extract_datetime_component(column, component, stream, mr);
+}
+
 std::unique_ptr<column> extract_millisecond_fraction(column_view const& column,
                                                      rmm::cuda_stream_view stream,
                                                      rmm::device_async_resource_ref mr)
diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp
index 13577c4d0ea..603edb27c7c 100644
--- a/cpp/tests/datetime/datetime_ops_test.cpp
+++ b/cpp/tests/datetime/datetime_ops_test.cpp
@@ -196,6 +196,136 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents)
                                  fixed_width_column_wrapper<int16_t>{0, 0, 0});
   CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns),
                                  fixed_width_column_wrapper<int16_t>{766, 424, 623});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1965, 2018, 2023});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::YEAR),
+    fixed_width_column_wrapper<int16_t>{1969, 1970, 1970});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{10, 7, 1});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MONTH),
+    fixed_width_column_wrapper<int16_t>{12, 1, 1});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{26, 4, 25});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::DAY),
+    fixed_width_column_wrapper<int16_t>{31, 1, 1});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY),
+    fixed_width_column_wrapper<int16_t>{2, 3, 3});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{14, 12, 7});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{14, 12, 7});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::HOUR),
+    fixed_width_column_wrapper<int16_t>{23, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{1, 0, 32});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{1, 0, 32});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MINUTE),
+    fixed_width_column_wrapper<int16_t>{59, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{12, 0, 12});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{12, 0, 12});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::SECOND),
+    fixed_width_column_wrapper<int16_t>{59, 0, 0});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{762, 0, 929});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MILLISECOND),
+    fixed_width_column_wrapper<int16_t>{976, 23, 987});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MICROSECOND),
+    fixed_width_column_wrapper<int16_t>{675, 432, 234});
+
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{0, 0, 0});
+  CUDF_TEST_EXPECT_COLUMNS_EQUAL(
+    *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::NANOSECOND),
+    fixed_width_column_wrapper<int16_t>{766, 424, 623});
 }
 
 template <typename T>
diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx
index bc5e085ec39..d844466120f 100644
--- a/python/cudf/cudf/_lib/datetime.pyx
+++ b/python/cudf/cudf/_lib/datetime.pyx
@@ -13,12 +13,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view
 from pylibcudf.libcudf.filling cimport calendrical_month_sequence
 from pylibcudf.libcudf.scalar.scalar cimport scalar
 from pylibcudf.libcudf.types cimport size_type
+from pylibcudf.datetime import DatetimeComponent
 
 from cudf._lib.column cimport Column
 from cudf._lib.scalar cimport DeviceScalar
 
-import pylibcudf as plc
-
 
 @acquire_spill_lock()
 def add_months(Column col, Column months):
@@ -40,9 +39,39 @@ def add_months(Column col, Column months):
 
 @acquire_spill_lock()
 def extract_datetime_component(Column col, object field):
-    result = Column.from_pylibcudf(
-        plc.datetime.extract_datetime_component(col.to_pylibcudf(mode="read"), field)
-    )
+
+    cdef unique_ptr[column] c_result
+    cdef column_view col_view = col.view()
+    cdef libcudf_datetime.datetime_component component
+
+    component_names = {
+        "year": DatetimeComponent.YEAR,
+        "month": DatetimeComponent.MONTH,
+        "day": DatetimeComponent.DAY,
+        "weekday": DatetimeComponent.WEEKDAY,
+        "hour": DatetimeComponent.HOUR,
+        "minute": DatetimeComponent.MINUTE,
+        "second": DatetimeComponent.SECOND,
+        "millisecond": DatetimeComponent.MILLISECOND,
+        "microsecond": DatetimeComponent.MICROSECOND,
+        "nanosecond": DatetimeComponent.NANOSECOND,
+    }
+    if field == "day_of_year":
+        with nogil:
+            c_result = move(libcudf_datetime.day_of_year(col_view))
+    elif field in component_names:
+        component = component_names[field]
+        with nogil:
+            c_result = move(
+                libcudf_datetime.extract_datetime_component(
+                    col_view,
+                    component
+                )
+            )
+    else:
+        raise ValueError(f"Invalid field: '{field}'")
+
+    result = Column.from_unique_ptr(move(c_result))
 
     if field == "weekday":
         # Pandas counts Monday-Sunday as 0-6
diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 54476b7fedc..a418560b31c 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -961,16 +961,16 @@ def do_evaluate(
 class TemporalFunction(Expr):
     __slots__ = ("name", "options", "children")
     _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = {
-        pl_expr.TemporalFunction.Year: "year",
-        pl_expr.TemporalFunction.Month: "month",
-        pl_expr.TemporalFunction.Day: "day",
-        pl_expr.TemporalFunction.WeekDay: "weekday",
-        pl_expr.TemporalFunction.Hour: "hour",
-        pl_expr.TemporalFunction.Minute: "minute",
-        pl_expr.TemporalFunction.Second: "second",
-        pl_expr.TemporalFunction.Millisecond: "millisecond",
-        pl_expr.TemporalFunction.Microsecond: "microsecond",
-        pl_expr.TemporalFunction.Nanosecond: "nanosecond",
+        pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR,
+        pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH,
+        pl_expr.TemporalFunction.Day: plc.datetime.DatetimeComponent.DAY,
+        pl_expr.TemporalFunction.WeekDay: plc.datetime.DatetimeComponent.WEEKDAY,
+        pl_expr.TemporalFunction.Hour: plc.datetime.DatetimeComponent.HOUR,
+        pl_expr.TemporalFunction.Minute: plc.datetime.DatetimeComponent.MINUTE,
+        pl_expr.TemporalFunction.Second: plc.datetime.DatetimeComponent.SECOND,
+        pl_expr.TemporalFunction.Millisecond: plc.datetime.DatetimeComponent.MILLISECOND,
+        pl_expr.TemporalFunction.Microsecond: plc.datetime.DatetimeComponent.MICROSECOND,
+        pl_expr.TemporalFunction.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND,
     }
     _non_child = ("dtype", "name", "options")
     children: tuple[Expr, ...]
@@ -1003,8 +1003,12 @@ def do_evaluate(
         ]
         (column,) = columns
         if self.name == pl_expr.TemporalFunction.Microsecond:
-            millis = plc.datetime.extract_datetime_component(column.obj, "millisecond")
-            micros = plc.datetime.extract_datetime_component(column.obj, "microsecond")
+            millis = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MILLISECOND
+            )
+            micros = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MICROSECOND
+            )
             millis_as_micros = plc.binaryop.binary_operation(
                 millis,
                 plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())),
@@ -1019,9 +1023,15 @@ def do_evaluate(
             )
             return Column(total_micros)
         elif self.name == pl_expr.TemporalFunction.Nanosecond:
-            millis = plc.datetime.extract_datetime_component(column.obj, "millisecond")
-            micros = plc.datetime.extract_datetime_component(column.obj, "microsecond")
-            nanos = plc.datetime.extract_datetime_component(column.obj, "nanosecond")
+            millis = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MILLISECOND
+            )
+            micros = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.MICROSECOND
+            )
+            nanos = plc.datetime.extract_datetime_component(
+                column.obj, plc.datetime.DatetimeComponent.NANOSECOND
+            )
             millis_as_nanos = plc.binaryop.binary_operation(
                 millis,
                 plc.interop.from_arrow(pa.scalar(1_000_000, type=pa.int32())),
diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd
index 2fce48cf1b4..72ce680ba7a 100644
--- a/python/pylibcudf/pylibcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/datetime.pxd
@@ -1,8 +1,15 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
+from pylibcudf.libcudf.datetime cimport datetime_component
+
 from .column cimport Column
 
 
 cpdef Column extract_year(
     Column col
 )
+
+cpdef Column extract_datetime_component(
+    Column col,
+    datetime_component component
+)
diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx
index e8e0caaf42d..784d29128bf 100644
--- a/python/pylibcudf/pylibcudf/datetime.pyx
+++ b/python/pylibcudf/pylibcudf/datetime.pyx
@@ -3,19 +3,14 @@ from libcpp.memory cimport unique_ptr
 from libcpp.utility cimport move
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.datetime cimport (
-    day_of_year as cpp_day_of_year,
-    extract_day as cpp_extract_day,
-    extract_hour as cpp_extract_hour,
-    extract_microsecond_fraction as cpp_extract_microsecond_fraction,
-    extract_millisecond_fraction as cpp_extract_millisecond_fraction,
-    extract_minute as cpp_extract_minute,
-    extract_month as cpp_extract_month,
-    extract_nanosecond_fraction as cpp_extract_nanosecond_fraction,
-    extract_second as cpp_extract_second,
-    extract_weekday as cpp_extract_weekday,
+    datetime_component,
+    extract_datetime_component as cpp_extract_datetime_component,
     extract_year as cpp_extract_year,
 )
 
+from pylibcudf.libcudf.datetime import \
+    datetime_component as DatetimeComponent  # no-cython-lint
+
 from .column cimport Column
 
 
@@ -41,41 +36,29 @@ cpdef Column extract_year(
         result = move(cpp_extract_year(values.view()))
     return Column.from_libcudf(move(result))
 
+cpdef Column extract_datetime_component(
+    Column values,
+    datetime_component component
+):
+    """
+    Extract a datetime component from a datetime column.
 
-def extract_datetime_component(Column col, str field):
+    For details, see :cpp:func:`cudf::extract_datetime_component`.
 
-    cdef unique_ptr[column] c_result
+    Parameters
+    ----------
+    values : Column
+        The column to extract the component from.
+    component : DatetimeComponent
+        The datetime component to extract.
 
-    with nogil:
-        if field == "year":
-            c_result = move(cpp_extract_year(col.view()))
-        elif field == "month":
-            c_result = move(cpp_extract_month(col.view()))
-        elif field == "day":
-            c_result = move(cpp_extract_day(col.view()))
-        elif field == "weekday":
-            c_result = move(cpp_extract_weekday(col.view()))
-        elif field == "hour":
-            c_result = move(cpp_extract_hour(col.view()))
-        elif field == "minute":
-            c_result = move(cpp_extract_minute(col.view()))
-        elif field == "second":
-            c_result = move(cpp_extract_second(col.view()))
-        elif field == "millisecond":
-            c_result = move(
-                cpp_extract_millisecond_fraction(col.view())
-            )
-        elif field == "microsecond":
-            c_result = move(
-                cpp_extract_microsecond_fraction(col.view())
-            )
-        elif field == "nanosecond":
-            c_result = move(
-                cpp_extract_nanosecond_fraction(col.view())
-            )
-        elif field == "day_of_year":
-            c_result = move(cpp_day_of_year(col.view()))
-        else:
-            raise ValueError(f"Invalid datetime field: '{field}'")
+    Returns
+    -------
+    Column
+        Column with the extracted component.
+    """
+    cdef unique_ptr[column] result
 
-    return Column.from_libcudf(move(c_result))
+    with nogil:
+        result = move(cpp_extract_datetime_component(values.view(), component))
+    return Column.from_libcudf(move(result))
diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
index 2167616690f..15beaee47d4 100644
--- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
+++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt
@@ -12,8 +12,9 @@
 # the License.
 # =============================================================================
 
-set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx labeling.pyx reduce.pyx
-                   replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
+set(cython_sources
+    aggregation.pyx binaryop.pyx copying.pyx datetime.pyx expressions.pyx labeling.pyx reduce.pyx
+    replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx
 )
 
 set(linked_libraries cudf::cudf)
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
index a4465343197..73cdfb96af5 100644
--- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
+++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd
@@ -1,5 +1,6 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
 
+from libc.stdint cimport uint8_t
 from libcpp.memory cimport unique_ptr
 from pylibcudf.libcudf.column.column cimport column
 from pylibcudf.libcudf.column.column_view cimport column_view
@@ -7,6 +8,18 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar
 
 
 cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
+    cpdef enum class datetime_component(uint8_t):
+        YEAR
+        MONTH
+        DAY
+        WEEKDAY
+        HOUR
+        MINUTE
+        SECOND
+        MILLISECOND
+        MICROSECOND
+        NANOSECOND
+
     cdef unique_ptr[column] extract_year(const column_view& column) except +
     cdef unique_ptr[column] extract_month(const column_view& column) except +
     cdef unique_ptr[column] extract_day(const column_view& column) except +
@@ -23,6 +36,10 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
     cdef unique_ptr[column] extract_nanosecond_fraction(
         const column_view& column
     ) except +
+    cdef unique_ptr[column] extract_datetime_component(
+        const column_view& column,
+        datetime_component component
+    ) except +
 
     ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency":
         DAY "cudf::datetime::rounding_frequency::DAY"
diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pyx b/python/pylibcudf/pylibcudf/libcudf/datetime.pyx
new file mode 100644
index 00000000000..e69de29bb2d
diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py
index 89c96829e71..75930d59058 100644
--- a/python/pylibcudf/pylibcudf/tests/test_datetime.py
+++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py
@@ -1,7 +1,6 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
 
 import datetime
-import functools
 
 import pyarrow as pa
 import pyarrow.compute as pc
@@ -10,19 +9,6 @@
 from utils import assert_column_eq
 
 
-@pytest.fixture
-def date_column(has_nulls):
-    values = [
-        datetime.date(1999, 1, 1),
-        datetime.date(2024, 10, 12),
-        datetime.date(1, 1, 1),
-        datetime.date(9999, 1, 1),
-    ]
-    if has_nulls:
-        values[2] = None
-    return plc.interop.from_arrow(pa.array(values, type=pa.date32()))
-
-
 @pytest.fixture(scope="module", params=["s", "ms", "us", "ns"])
 def datetime_column(has_nulls, request):
     values = [
@@ -40,24 +26,35 @@ def datetime_column(has_nulls, request):
     )
 
 
-@pytest.mark.parametrize(
-    "component, pc_fun",
-    [
-        ("year", pc.year),
-        ("month", pc.month),
-        ("day", pc.day),
-        ("weekday", functools.partial(pc.day_of_week, count_from_zero=False)),
-        ("hour", pc.hour),
-        ("minute", pc.minute),
-        ("second", pc.second),
-        ("millisecond", pc.millisecond),
-        ("microsecond", pc.microsecond),
-        ("nanosecond", pc.nanosecond),
+@pytest.fixture(
+    params=[
+        ("year", plc.datetime.DatetimeComponent.YEAR),
+        ("month", plc.datetime.DatetimeComponent.MONTH),
+        ("day", plc.datetime.DatetimeComponent.DAY),
+        ("day_of_week", plc.datetime.DatetimeComponent.WEEKDAY),
+        ("hour", plc.datetime.DatetimeComponent.HOUR),
+        ("minute", plc.datetime.DatetimeComponent.MINUTE),
+        ("second", plc.datetime.DatetimeComponent.SECOND),
+        ("millisecond", plc.datetime.DatetimeComponent.MILLISECOND),
+        ("microsecond", plc.datetime.DatetimeComponent.MICROSECOND),
+        ("nanosecond", plc.datetime.DatetimeComponent.NANOSECOND),
     ],
+    ids=lambda x: x[0],
 )
-def test_extraction(datetime_column, component, pc_fun):
+def component(request):
+    return request.param
+
+
+def test_extract_datetime_component(datetime_column, component):
+    attr, component = component
+    kwargs = {}
+    if attr == "day_of_week":
+        kwargs = {"count_from_zero": False}
     got = plc.datetime.extract_datetime_component(datetime_column, component)
     # libcudf produces an int16, arrow produces an int64
-    expect = pc_fun(plc.interop.to_arrow(datetime_column)).cast(pa.int16())
+
+    expect = getattr(pc, attr)(
+        plc.interop.to_arrow(datetime_column), **kwargs
+    ).cast(pa.int16())
 
     assert_column_eq(expect, got)