From 7e1e4757e753253a99df110fd3814d0136289ef2 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Mon, 7 Oct 2024 12:41:47 -0700 Subject: [PATCH 1/2] Address all remaining clang-tidy errors (#16956) With this set of changes I get a clean run of clang-tidy (with one caveat that I'll explain in the follow-up PR to add clang-tidy to pre-commit/CI). Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Nghia Truong (https://github.com/ttnghia) - MithunR (https://github.com/mythrocks) - David Wendt (https://github.com/davidwendt) - Kyle Edwards (https://github.com/KyleFromNVIDIA) URL: https://github.com/rapidsai/cudf/pull/16956 --- cpp/.clang-tidy | 43 ++++++++++++--- cpp/cmake/thirdparty/get_nanoarrow.cmake | 8 +-- .../nanoarrow_clang_tidy_compliance.diff | 38 ++++++++++++++ .../patches/nanoarrow_override.json | 18 +++++++ cpp/include/cudf/table/table.hpp | 2 +- cpp/include/cudf/table/table_view.hpp | 2 +- cpp/src/io/avro/avro.cpp | 3 +- cpp/src/io/orc/orc.hpp | 2 +- .../io/parquet/compact_protocol_reader.cpp | 6 ++- cpp/src/io/utilities/data_sink.cpp | 6 ++- cpp/src/io/utilities/hostdevice_span.hpp | 8 +-- cpp/src/utilities/host_memory.cpp | 13 ++++- cpp/tests/binaryop/binop-compiled-test.cpp | 6 ++- cpp/tests/binaryop/util/operation.h | 4 +- cpp/tests/column/column_test.cpp | 4 +- cpp/tests/copying/slice_tests.cpp | 12 +++-- cpp/tests/copying/slice_tests.cuh | 21 ++++---- cpp/tests/copying/split_tests.cpp | 52 ++++++++++--------- .../hashing/murmurhash3_x64_128_test.cpp | 4 +- cpp/tests/hashing/sha256_test.cpp | 2 - cpp/tests/interop/from_arrow_device_test.cpp | 12 ++--- cpp/tests/interop/from_arrow_host_test.cpp | 6 +-- cpp/tests/interop/from_arrow_test.cpp | 8 +-- cpp/tests/interop/to_arrow_device_test.cpp | 12 ++--- cpp/tests/interop/to_arrow_host_test.cpp | 6 +-- cpp/tests/interop/to_arrow_test.cpp | 14 ++--- cpp/tests/io/comp/decomp_test.cpp | 36 ++++++++----- cpp/tests/io/csv_test.cpp | 12 ++--- cpp/tests/io/json/json_test.cpp | 6 +-- cpp/tests/io/orc_test.cpp | 37 ++++++------- cpp/tests/io/parquet_misc_test.cpp | 2 +- cpp/tests/io/parquet_reader_test.cpp | 7 +-- cpp/tests/io/parquet_v2_test.cpp | 36 ++++++------- cpp/tests/io/parquet_writer_test.cpp | 17 +++--- cpp/tests/join/distinct_join_tests.cpp | 10 ++-- cpp/tests/merge/merge_string_test.cpp | 4 +- cpp/tests/merge/merge_test.cpp | 6 +-- .../reductions/segmented_reduction_tests.cpp | 9 ++-- cpp/tests/replace/replace_tests.cpp | 4 +- cpp/tests/rolling/collect_ops_test.cpp | 8 +-- cpp/tests/rolling/offset_row_window_test.cpp | 12 +++-- cpp/tests/rolling/rolling_test.cpp | 2 +- cpp/tests/scalar/scalar_test.cpp | 6 +-- cpp/tests/search/search_list_test.cpp | 3 +- cpp/tests/sort/sort_test.cpp | 2 +- cpp/tests/stream_compaction/unique_tests.cpp | 1 - cpp/tests/streams/stream_compaction_test.cpp | 2 - cpp/tests/strings/integers_tests.cpp | 3 +- cpp/tests/structs/structs_column_tests.cpp | 5 +- cpp/tests/transform/bools_to_mask_test.cpp | 2 +- .../integration/unary_transform_test.cpp | 28 +++++----- 51 files changed, 344 insertions(+), 228 deletions(-) create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff create mode 100644 cpp/cmake/thirdparty/patches/nanoarrow_override.json diff --git a/cpp/.clang-tidy b/cpp/.clang-tidy index b791d846d1d..2d4f8c0d80e 100644 --- a/cpp/.clang-tidy +++ b/cpp/.clang-tidy @@ -1,18 +1,47 @@ --- +# Notes on disabled checks +# ------------------------ +# modernize-use-equals-default: +# auto-fix is broken (doesn't insert =default correctly) +# modernize-concat-nested-namespaces: +# auto-fix is broken (can delete code) +# modernize-use-trailing-return-type: +# Purely stylistic, no benefit to rewriting everything +# modernize-return-braced-init-list: +# Stylistically we prefer to see the return type at the return site. +# See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672 +# for more information. +# modernize-use-bool-literals: +# Our tests use int flags for validity masks extensively and we prefer that +# clang-analyzer-cplusplus.NewDeleteLeaks: +# This check has numerous bugs, see +# https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks +# We encounter at least +# https://github.com/llvm/llvm-project/issues/60896 +# https://github.com/llvm/llvm-project/issues/69602 +# clang-analyzer-optin.core.EnumCastOutOfRange +# We use enums as flags in multiple cases and this check makes ORing flags invalid +# clang-analyzer-optin.cplusplus.UninitializedObject' +# There is an error in nanoarrow that none of the clang-tidy filters (i.e. +# header-filter and exclude-header-filter are able to properly avoid. This +# merits further investigation +# +# We need to verify that broken checks are still broken Checks: 'modernize-*, -modernize-use-equals-default, -modernize-concat-nested-namespaces, -modernize-use-trailing-return-type, - -modernize-use-bool-literals' - - # -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly) - # -modernize-concat-nested-namespaces # auto-fix is broken (can delete code) - # -modernize-use-trailing-return-type # just a preference + -modernize-return-braced-init-list, + -modernize-use-bool-literals, + clang-analyzer-*, + -clang-analyzer-cplusplus.NewDeleteLeaks, + -clang-analyzer-optin.core.EnumCastOutOfRange, + -clang-analyzer-optin.cplusplus.UninitializedObject' WarningsAsErrors: '' -HeaderFilterRegex: '' -AnalyzeTemporaryDtors: false +HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*' +ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*' FormatStyle: none CheckOptions: - key: modernize-loop-convert.MaxCopySize diff --git a/cpp/cmake/thirdparty/get_nanoarrow.cmake b/cpp/cmake/thirdparty/get_nanoarrow.cmake index 8df1b431095..d7d7fcca044 100644 --- a/cpp/cmake/thirdparty/get_nanoarrow.cmake +++ b/cpp/cmake/thirdparty/get_nanoarrow.cmake @@ -14,15 +14,17 @@ # This function finds nanoarrow and sets any additional necessary environment variables. function(find_and_configure_nanoarrow) + include(${rapids-cmake-dir}/cpm/package_override.cmake) + + set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches") + rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json") + # Currently we need to always build nanoarrow so we don't pickup a previous installed version set(CPM_DOWNLOAD_nanoarrow ON) rapids_cpm_find( nanoarrow 0.6.0.dev GLOBAL_TARGETS nanoarrow CPM_ARGS - GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git - GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb - GIT_SHALLOW FALSE OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf" ) set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON) diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff new file mode 100644 index 00000000000..e9a36fcb567 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff @@ -0,0 +1,38 @@ +diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h +index caa6be4..70ec8a2 100644 +--- a/src/nanoarrow/common/inline_buffer.h ++++ b/src/nanoarrow/common/inline_buffer.h +@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) { + } + + static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) { +- *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | ++ *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT + ((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) | + ((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) | + ((values[7] + 0x7f) & 0x80)); +@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l + // set bits within a single byte + const uint8_t only_byte_mask = + i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask); +- bits[bytes_begin] &= only_byte_mask; ++ bits[bytes_begin] &= only_byte_mask; // NOLINT + bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask); + return; + } + + // set/clear trailing bits of first byte +- bits[bytes_begin] &= first_byte_mask; ++ bits[bytes_begin] &= first_byte_mask; // NOLINT + bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask); + + if (bytes_end - bytes_begin > 2) { +@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap, + n_remaining -= n_full_bytes * 8; + if (n_remaining > 0) { + // Zero out the last byte +- *out_cursor = 0x00; ++ *out_cursor = 0x00; // NOLINT + for (int i = 0; i < n_remaining; i++) { + ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]); + } diff --git a/cpp/cmake/thirdparty/patches/nanoarrow_override.json b/cpp/cmake/thirdparty/patches/nanoarrow_override.json new file mode 100644 index 00000000000..d529787e7c8 --- /dev/null +++ b/cpp/cmake/thirdparty/patches/nanoarrow_override.json @@ -0,0 +1,18 @@ + +{ + "packages" : { + "nanoarrow" : { + "version" : "0.6.0.dev", + "git_url" : "https://github.com/apache/arrow-nanoarrow.git", + "git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb", + "git_shallow" : false, + "patches" : [ + { + "file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff", + "issue" : "https://github.com/apache/arrow-nanoarrow/issues/537", + "fixed_in" : "" + } + ] + } + } +} diff --git a/cpp/include/cudf/table/table.hpp b/cpp/include/cudf/table/table.hpp index 762131a174f..15fdad21d9f 100644 --- a/cpp/include/cudf/table/table.hpp +++ b/cpp/include/cudf/table/table.hpp @@ -148,7 +148,7 @@ class table { std::vector columns(std::distance(begin, end)); std::transform( begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); }); - return table_view(columns); + return table_view{columns}; } /** diff --git a/cpp/include/cudf/table/table_view.hpp b/cpp/include/cudf/table/table_view.hpp index 4a990f67ce4..d41176590ea 100644 --- a/cpp/include/cudf/table/table_view.hpp +++ b/cpp/include/cudf/table/table_view.hpp @@ -241,7 +241,7 @@ class table_view : public detail::table_view_base { { std::vector columns(std::distance(begin, end)); std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); }); - return table_view(columns); + return table_view{columns}; } /** diff --git a/cpp/src/io/avro/avro.cpp b/cpp/src/io/avro/avro.cpp index 03cf6d4a0e0..d5caa4720ac 100644 --- a/cpp/src/io/avro/avro.cpp +++ b/cpp/src/io/avro/avro.cpp @@ -16,6 +16,7 @@ #include "avro.hpp" +#include #include #include @@ -302,7 +303,7 @@ bool schema_parser::parse(std::vector& schema, std::string const& // Empty schema if (json_str == "[]") return true; - char depthbuf[MAX_SCHEMA_DEPTH]; + std::array depthbuf; int depth = 0, parent_idx = -1, entry_idx = -1; json_state_e state = state_attrname; std::string str; diff --git a/cpp/src/io/orc/orc.hpp b/cpp/src/io/orc/orc.hpp index 790532c9d54..5ab36fdae8e 100644 --- a/cpp/src/io/orc/orc.hpp +++ b/cpp/src/io/orc/orc.hpp @@ -258,7 +258,7 @@ class ProtobufReader { private: template - friend class FunctionSwitchImpl; + friend struct FunctionSwitchImpl; void skip_bytes(size_t bytecnt) { diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index b978799b8bc..312a5243687 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -228,7 +228,8 @@ class parquet_field_string : public parquet_field { * @return True if field types mismatch or if the process of reading a * string fails */ -struct parquet_field_string_list : public parquet_field_list { +class parquet_field_string_list : public parquet_field_list { + public: parquet_field_string_list(int f, std::vector& v) : parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { @@ -396,8 +397,9 @@ class parquet_field_binary : public parquet_field { * @return True if field types mismatch or if the process of reading a * binary fails */ -struct parquet_field_binary_list +class parquet_field_binary_list : public parquet_field_list, FieldType::BINARY> { + public: parquet_field_binary_list(int f, std::vector>& v) : parquet_field_list(f, v) { auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) { diff --git a/cpp/src/io/utilities/data_sink.cpp b/cpp/src/io/utilities/data_sink.cpp index 1dbb9369115..0b76f3d3e8f 100644 --- a/cpp/src/io/utilities/data_sink.cpp +++ b/cpp/src/io/utilities/data_sink.cpp @@ -50,7 +50,8 @@ class file_sink : public data_sink { } } - ~file_sink() override { flush(); } + // Marked as NOLINT because we are calling a virtual method in the destructor + ~file_sink() override { flush(); } // NOLINT void host_write(void const* data, size_t size) override { @@ -114,7 +115,8 @@ class host_buffer_sink : public data_sink { public: explicit host_buffer_sink(std::vector* buffer) : buffer_(buffer) {} - ~host_buffer_sink() override { flush(); } + // Marked as NOLINT because we are calling a virtual method in the destructor + ~host_buffer_sink() override { flush(); } // NOLINT void host_write(void const* data, size_t size) override { diff --git a/cpp/src/io/utilities/hostdevice_span.hpp b/cpp/src/io/utilities/hostdevice_span.hpp index d9eac423901..1d8b34addbd 100644 --- a/cpp/src/io/utilities/hostdevice_span.hpp +++ b/cpp/src/io/utilities/hostdevice_span.hpp @@ -43,8 +43,8 @@ class hostdevice_span { template ().host_ptr())> (*)[], - T (*)[]>>* = nullptr> + std::remove_pointer_t().host_ptr())> (*)[], // NOLINT + T (*)[]>>* = nullptr> // NOLINT constexpr hostdevice_span(C& in) : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size()) { } @@ -54,8 +54,8 @@ class hostdevice_span { template ().host_ptr())> (*)[], - T (*)[]>>* = nullptr> + std::remove_pointer_t().host_ptr())> (*)[], // NOLINT + T (*)[]>>* = nullptr> // NOLINT constexpr hostdevice_span(C const& in) : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size()) { diff --git a/cpp/src/utilities/host_memory.cpp b/cpp/src/utilities/host_memory.cpp index 125b98c4a67..9d8e3cf2fa6 100644 --- a/cpp/src/utilities/host_memory.cpp +++ b/cpp/src/utilities/host_memory.cpp @@ -115,12 +115,19 @@ class fixed_pinned_pool_memory_resource { return !operator==(other); } - friend void get_property(fixed_pinned_pool_memory_resource const&, + // clang-tidy will complain about this function because it is completely + // unused at runtime and only exist for tag introspection by CCCL, so we + // ignore linting. This masks a real issue if we ever want to compile with + // clang, though, which is that the function will actually be compiled out by + // clang. If cudf were ever to try to support clang as a compile we would + // need to force the compiler to emit this symbol. The same goes for the + // other get_property definitions in this file. + friend void get_property(fixed_pinned_pool_memory_resource const&, // NOLINT cuda::mr::device_accessible) noexcept { } - friend void get_property(fixed_pinned_pool_memory_resource const&, + friend void get_property(fixed_pinned_pool_memory_resource const&, // NOLINT cuda::mr::host_accessible) noexcept { } @@ -235,7 +242,9 @@ class new_delete_memory_resource { bool operator!=(new_delete_memory_resource const& other) const { return !operator==(other); } + // NOLINTBEGIN friend void get_property(new_delete_memory_resource const&, cuda::mr::host_accessible) noexcept {} + // NOLINTEND }; static_assert(cuda::mr::resource_with, diff --git a/cpp/tests/binaryop/binop-compiled-test.cpp b/cpp/tests/binaryop/binop-compiled-test.cpp index 06e0d193d80..aa5b49567e6 100644 --- a/cpp/tests/binaryop/binop-compiled-test.cpp +++ b/cpp/tests/binaryop/binop-compiled-test.cpp @@ -557,7 +557,11 @@ auto NullOp_Result(cudf::column_view lhs, cudf::column_view rhs) std::transform(thrust::make_counting_iterator(0), thrust::make_counting_iterator(lhs.size()), result.begin(), - [&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut { + [&lhs_data = lhs_data, + &lhs_mask = lhs_mask, + &rhs_data = rhs_data, + &rhs_mask = rhs_mask, + &result_mask = result_mask](auto i) -> TypeOut { auto lhs_valid = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i); auto rhs_valid = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i); bool output_valid = lhs_valid or rhs_valid; diff --git a/cpp/tests/binaryop/util/operation.h b/cpp/tests/binaryop/util/operation.h index d36b48d666a..ef1ccfccab5 100644 --- a/cpp/tests/binaryop/util/operation.h +++ b/cpp/tests/binaryop/util/operation.h @@ -100,7 +100,7 @@ struct Mul { std::enable_if_t<(cudf::is_duration_t::value && std::is_integral_v) || (cudf::is_duration_t::value && std::is_integral_v), void>* = nullptr> - OutT DurationProduct(LhsT x, RhsT y) const + [[nodiscard]] OutT DurationProduct(LhsT x, RhsT y) const { return x * y; } @@ -128,7 +128,7 @@ struct Div { typename LhsT, typename RhsT, std::enable_if_t<(std::is_integral_v || cudf::is_duration()), void>* = nullptr> - OutT DurationDivide(LhsT x, RhsT y) const + [[nodiscard]] OutT DurationDivide(LhsT x, RhsT y) const { return x / y; } diff --git a/cpp/tests/column/column_test.cpp b/cpp/tests/column/column_test.cpp index 14b4197de71..631f5150829 100644 --- a/cpp/tests/column/column_test.cpp +++ b/cpp/tests/column/column_test.cpp @@ -340,7 +340,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorNoMask) cudf::column moved_to{std::move(original)}; - EXPECT_EQ(0, original.size()); + EXPECT_EQ(0, original.size()); // NOLINT EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type()); verify_column_views(moved_to); @@ -359,7 +359,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask) cudf::column moved_to{std::move(original)}; verify_column_views(moved_to); - EXPECT_EQ(0, original.size()); + EXPECT_EQ(0, original.size()); // NOLINT EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type()); // Verify move diff --git a/cpp/tests/copying/slice_tests.cpp b/cpp/tests/copying/slice_tests.cpp index bebd3d25610..aef0d4ad78a 100644 --- a/cpp/tests/copying/slice_tests.cpp +++ b/cpp/tests/copying/slice_tests.cpp @@ -29,6 +29,7 @@ #include #include +#include #include #include #include @@ -370,11 +371,12 @@ TEST_F(SliceStringTableTest, StringWithNulls) auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); - std::vector strings[2] = { - {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, - {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}; - cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids}, - {strings[1].begin(), strings[1].end(), valids}}; + std::vector> strings{ + {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, + {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}}; + std::array sw{ + {{strings[0].begin(), strings[0].end(), valids}, + {strings[1].begin(), strings[1].end(), valids}}}; std::vector> scols; scols.push_back(sw[0].release()); diff --git a/cpp/tests/copying/slice_tests.cuh b/cpp/tests/copying/slice_tests.cuh index a180740f143..1e037294527 100644 --- a/cpp/tests/copying/slice_tests.cuh +++ b/cpp/tests/copying/slice_tests.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -148,7 +148,7 @@ std::vector create_expected_tables(cudf::size_type num_cols, } } - result.push_back(cudf::table(std::move(cols))); + result.emplace_back(std::move(cols)); } return result; @@ -163,13 +163,12 @@ inline std::vector create_expected_string_co for (unsigned long index = 0; index < indices.size(); index += 2) { if (not nullable) { - result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index], - strings.begin() + indices[index + 1])); + result.emplace_back(strings.begin() + indices[index], strings.begin() + indices[index + 1]); } else { auto valids = cudf::detail::make_counting_transform_iterator( indices[index], [](auto i) { return i % 2 == 0; }); - result.push_back(cudf::test::strings_column_wrapper( - strings.begin() + indices[index], strings.begin() + indices[index + 1], valids)); + result.emplace_back( + strings.begin() + indices[index], strings.begin() + indices[index + 1], valids); } } @@ -184,16 +183,16 @@ inline std::vector create_expected_string_co std::vector result = {}; for (unsigned long index = 0; index < indices.size(); index += 2) { - result.push_back(cudf::test::strings_column_wrapper(strings.begin() + indices[index], - strings.begin() + indices[index + 1], - validity.begin() + indices[index])); + result.emplace_back(strings.begin() + indices[index], + strings.begin() + indices[index + 1], + validity.begin() + indices[index]); } return result; } inline std::vector create_expected_string_tables( - std::vector const strings[2], + std::vector> const strings, std::vector const& indices, bool nullable) { @@ -216,7 +215,7 @@ inline std::vector create_expected_string_tables( } } - result.push_back(cudf::table(std::move(cols))); + result.emplace_back(std::move(cols)); } return result; diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp index ee3e7da5e0f..b56b0f2d3f8 100644 --- a/cpp/tests/copying/split_tests.cpp +++ b/cpp/tests/copying/split_tests.cpp @@ -35,6 +35,7 @@ #include #include +#include #include #include #include @@ -135,7 +136,7 @@ std::vector create_expected_tables_for_splits( } std::vector create_expected_string_tables_for_splits( - std::vector const strings[2], + std::vector> const strings, std::vector const& splits, bool nullable) { @@ -144,8 +145,8 @@ std::vector create_expected_string_tables_for_splits( } std::vector create_expected_string_tables_for_splits( - std::vector const strings[2], - std::vector const validity[2], + std::vector> const strings, + std::vector> const validity, std::vector const& splits) { std::vector indices = splits_to_indices(splits, strings[0].size()); @@ -627,11 +628,12 @@ void split_string_with_invalids(SplitFunc Split, auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); - std::vector strings[2] = { - {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, - {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}; - cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids}, - {strings[1].begin(), strings[1].end(), valids}}; + std::vector> strings{ + {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, + {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}}; + std::array sw{ + {{strings[0].begin(), strings[0].end(), valids}, + {strings[1].begin(), strings[1].end(), valids}}}; std::vector> scols; scols.push_back(sw[0].release()); @@ -658,11 +660,12 @@ void split_empty_output_strings_column_value(SplitFunc Split, auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); - std::vector strings[2] = { - {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, - {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}; - cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids}, - {strings[1].begin(), strings[1].end(), valids}}; + std::vector> strings{ + {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, + {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}}; + std::array sw{ + {{strings[0].begin(), strings[0].end(), valids}, + {strings[1].begin(), strings[1].end(), valids}}}; std::vector> scols; scols.push_back(sw[0].release()); @@ -684,9 +687,9 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare) auto valids = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; }); - std::vector strings[2] = { - {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, - {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}; + std::vector> strings{ + {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, + {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}}; std::vector splits{2, 5, 9}; @@ -699,16 +702,17 @@ void split_null_input_strings_column_value(SplitFunc Split, CompareFunc Compare) EXPECT_NO_THROW(Split(empty_table, splits)); } - cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), no_valids}, - {strings[1].begin(), strings[1].end(), valids}}; + std::array sw{ + {{strings[0].begin(), strings[0].end(), no_valids}, + {strings[1].begin(), strings[1].end(), valids}}}; std::vector> scols; scols.push_back(sw[0].release()); scols.push_back(sw[1].release()); cudf::table src_table(std::move(scols)); auto result = Split(src_table, splits); - std::vector validity_masks[2] = {std::vector(strings[0].size()), - std::vector(strings[0].size())}; + std::vector> validity_masks{std::vector(strings[0].size()), + std::vector(strings[0].size())}; std::generate( validity_masks[1].begin(), validity_masks[1].end(), [i = 0]() mutable { return i++ % 2 == 0; }); @@ -1913,9 +1917,9 @@ TEST_F(ContiguousSplitTableCornerCases, MixedColumnTypes) cudf::size_type start = 0; auto valids = cudf::detail::make_counting_transform_iterator(start, [](auto i) { return true; }); - std::vector strings[2] = { - {"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, - {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}; + std::vector> strings{ + {{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}, + {"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}}; std::vector> cols; @@ -2377,7 +2381,7 @@ TEST_F(ContiguousSplitTableCornerCases, OutBufferToSmall) { // internally, contiguous split chunks GPU work in 1MB contiguous copies // so the output buffer must be 1MB or larger. - EXPECT_THROW(cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error); + EXPECT_THROW(auto _ = cudf::chunked_pack::create({}, 1 * 1024), cudf::logic_error); } TEST_F(ContiguousSplitTableCornerCases, ChunkSpanTooSmall) diff --git a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp index 4fb8f78b558..0e68050f935 100644 --- a/cpp/tests/hashing/murmurhash3_x64_128_test.cpp +++ b/cpp/tests/hashing/murmurhash3_x64_128_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,8 +22,6 @@ #include -constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; - using NumericTypesNoBools = cudf::test::Concat; diff --git a/cpp/tests/hashing/sha256_test.cpp b/cpp/tests/hashing/sha256_test.cpp index cc95c7a2f0f..8bc47c92c6b 100644 --- a/cpp/tests/hashing/sha256_test.cpp +++ b/cpp/tests/hashing/sha256_test.cpp @@ -23,8 +23,6 @@ #include #include -constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::ALL_ERRORS}; - class SHA256HashTest : public cudf::test::BaseFixture {}; TEST_F(SHA256HashTest, EmptyTable) diff --git a/cpp/tests/interop/from_arrow_device_test.cpp b/cpp/tests/interop/from_arrow_device_test.cpp index a4dc7531765..2151ec6e22f 100644 --- a/cpp/tests/interop/from_arrow_device_test.cpp +++ b/cpp/tests/interop/from_arrow_device_test.cpp @@ -270,9 +270,9 @@ TEST_F(FromArrowDeviceTest, StructColumn) auto int_col2 = cudf::test::fixed_width_column_wrapper{{12, 24, 47}, {1, 0, 1}}.release(); auto bool_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); - auto list_col = - cudf::test::lists_column_wrapper({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) - .release(); + auto list_col = cudf::test::lists_column_wrapper( + {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) // NOLINT + .release(); vector_of_columns cols2; cols2.push_back(std::move(str_col2)); cols2.push_back(std::move(int_col2)); @@ -414,9 +414,9 @@ TEST_F(FromArrowDeviceTest, DictionaryIndicesType) { std::vector> columns; auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {1, 0, 1, 1, 1}); - columns.emplace_back(std::move(cudf::dictionary::encode(col))); - columns.emplace_back(std::move(cudf::dictionary::encode(col))); - columns.emplace_back(std::move(cudf::dictionary::encode(col))); + columns.emplace_back(cudf::dictionary::encode(col)); + columns.emplace_back(cudf::dictionary::encode(col)); + columns.emplace_back(cudf::dictionary::encode(col)); cudf::table expected_table(std::move(columns)); cudf::table_view expected_table_view = expected_table.view(); diff --git a/cpp/tests/interop/from_arrow_host_test.cpp b/cpp/tests/interop/from_arrow_host_test.cpp index cbfa4911c3c..ef9936b214c 100644 --- a/cpp/tests/interop/from_arrow_host_test.cpp +++ b/cpp/tests/interop/from_arrow_host_test.cpp @@ -309,9 +309,9 @@ TEST_F(FromArrowHostDeviceTest, StructColumn) auto int_col2 = cudf::test::fixed_width_column_wrapper{{12, 24, 47}, {1, 0, 1}}.release(); auto bool_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); - auto list_col = - cudf::test::lists_column_wrapper({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) - .release(); + auto list_col = cudf::test::lists_column_wrapper( + {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) // NOLINT + .release(); vector_of_columns cols2; cols2.push_back(std::move(str_col2)); cols2.push_back(std::move(int_col2)); diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp index 81c406c0faf..6e742b9e4cf 100644 --- a/cpp/tests/interop/from_arrow_test.cpp +++ b/cpp/tests/interop/from_arrow_test.cpp @@ -52,7 +52,7 @@ std::unique_ptr get_cudf_table() .release()); auto col4 = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {true, false, true, true, true}); - columns.emplace_back(std::move(cudf::dictionary::encode(col4))); + columns.emplace_back(cudf::dictionary::encode(col4)); columns.emplace_back(cudf::test::fixed_width_column_wrapper( {true, false, true, false, true}, {true, false, true, true, false}) .release()); @@ -339,9 +339,9 @@ TEST_F(FromArrowTest, DictionaryIndicesType) std::vector> columns; auto col = cudf::test::fixed_width_column_wrapper({1, 2, 5, 2, 7}, {true, false, true, true, true}); - columns.emplace_back(std::move(cudf::dictionary::encode(col))); - columns.emplace_back(std::move(cudf::dictionary::encode(col))); - columns.emplace_back(std::move(cudf::dictionary::encode(col))); + columns.emplace_back(cudf::dictionary::encode(col)); + columns.emplace_back(cudf::dictionary::encode(col)); + columns.emplace_back(cudf::dictionary::encode(col)); cudf::table expected_table(std::move(columns)); diff --git a/cpp/tests/interop/to_arrow_device_test.cpp b/cpp/tests/interop/to_arrow_device_test.cpp index 51216a8512c..7ba586461dc 100644 --- a/cpp/tests/interop/to_arrow_device_test.cpp +++ b/cpp/tests/interop/to_arrow_device_test.cpp @@ -55,7 +55,7 @@ get_nanoarrow_cudf_table(cudf::size_type length) auto col4 = cudf::test::fixed_width_column_wrapper( test_data.int64_data.begin(), test_data.int64_data.end(), test_data.validity.begin()); auto dict_col = cudf::dictionary::encode(col4); - columns.emplace_back(std::move(cudf::dictionary::encode(col4))); + columns.emplace_back(cudf::dictionary::encode(col4)); columns.emplace_back(cudf::test::fixed_width_column_wrapper(test_data.bool_data.begin(), test_data.bool_data.end(), test_data.bool_validity.begin()) @@ -82,8 +82,8 @@ get_nanoarrow_cudf_table(cudf::size_type length) test_data.string_data.begin(), test_data.string_data.end(), test_data.validity.begin()) .release(); vector_of_columns cols; - cols.push_back(move(int_column)); - cols.push_back(move(str_column)); + cols.push_back(std::move(int_column)); + cols.push_back(std::move(str_column)); auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( test_data.bool_data_validity.begin(), test_data.bool_data_validity.end())); columns.emplace_back( @@ -575,9 +575,9 @@ TEST_F(ToArrowDeviceTest, StructColumn) auto int_col2 = cudf::test::fixed_width_column_wrapper{{12, 24, 47}, {1, 0, 1}}.release(); auto bool_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); - auto list_col = - cudf::test::lists_column_wrapper({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) - .release(); + auto list_col = cudf::test::lists_column_wrapper( + {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) // NOLINT + .release(); vector_of_columns cols2; cols2.push_back(std::move(str_col2)); cols2.push_back(std::move(int_col2)); diff --git a/cpp/tests/interop/to_arrow_host_test.cpp b/cpp/tests/interop/to_arrow_host_test.cpp index fc0ed6c9352..fcb4433b42e 100644 --- a/cpp/tests/interop/to_arrow_host_test.cpp +++ b/cpp/tests/interop/to_arrow_host_test.cpp @@ -436,9 +436,9 @@ TEST_F(ToArrowHostDeviceTest, StructColumn) auto int_col2 = cudf::test::fixed_width_column_wrapper{{12, 24, 47}, {1, 0, 1}}.release(); auto bool_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); - auto list_col = - cudf::test::lists_column_wrapper({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) - .release(); + auto list_col = cudf::test::lists_column_wrapper( + {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) // NOLINT + .release(); vector_of_columns cols2; cols2.push_back(std::move(str_col2)); cols2.push_back(std::move(int_col2)); diff --git a/cpp/tests/interop/to_arrow_test.cpp b/cpp/tests/interop/to_arrow_test.cpp index 90ae12cdd90..a6aa4b22eca 100644 --- a/cpp/tests/interop/to_arrow_test.cpp +++ b/cpp/tests/interop/to_arrow_test.cpp @@ -90,7 +90,7 @@ std::pair, std::shared_ptr> get_table auto col4 = cudf::test::fixed_width_column_wrapper( int64_data.begin(), int64_data.end(), validity.begin()); auto dict_col = cudf::dictionary::encode(col4); - columns.emplace_back(std::move(cudf::dictionary::encode(col4))); + columns.emplace_back(cudf::dictionary::encode(col4)); columns.emplace_back(cudf::test::fixed_width_column_wrapper( bool_data.begin(), bool_data.end(), bool_validity.begin()) .release()); @@ -112,8 +112,8 @@ std::pair, std::shared_ptr> get_table cudf::test::strings_column_wrapper(string_data.begin(), string_data.end(), validity.begin()) .release(); vector_of_columns cols; - cols.push_back(move(int_column)); - cols.push_back(move(str_column)); + cols.push_back(std::move(int_column)); + cols.push_back(std::move(str_column)); auto [null_mask, null_count] = cudf::bools_to_mask(cudf::test::fixed_width_column_wrapper( bool_data_validity.begin(), bool_data_validity.end())); columns.emplace_back( @@ -294,9 +294,9 @@ TEST_F(ToArrowTest, StructColumn) auto int_col2 = cudf::test::fixed_width_column_wrapper{{12, 24, 47}, {1, 0, 1}}.release(); auto bool_col = cudf::test::fixed_width_column_wrapper{{true, true, false}}.release(); - auto list_col = - cudf::test::lists_column_wrapper({{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) - .release(); + auto list_col = cudf::test::lists_column_wrapper( + {{{1, 2}, {3, 4}, {5}}, {{{6}}}, {{7}, {8, 9}}}) // NOLINT + .release(); vector_of_columns cols2; cols2.push_back(std::move(str_col2)); cols2.push_back(std::move(int_col2)); @@ -438,7 +438,7 @@ TEST_F(ToArrowTest, FixedPoint64TableLarge) auto const schema = std::make_shared(schema_vector); auto const expected_arrow_table = arrow::Table::Make(schema, {arr}); - std::vector const metadata = {{"a"}}; + std::vector const metadata = {{"a"}}; // NOLINT ASSERT_TRUE(is_equal(input, metadata, expected_arrow_table)); } } diff --git a/cpp/tests/io/comp/decomp_test.cpp b/cpp/tests/io/comp/decomp_test.cpp index 840cf263ed9..54262dc3b44 100644 --- a/cpp/tests/io/comp/decomp_test.cpp +++ b/cpp/tests/io/comp/decomp_test.cpp @@ -39,19 +39,19 @@ using cudf::device_span; */ template struct DecompressTest : public cudf::test::BaseFixture { - std::vector vector_from_string(char const* str) const + [[nodiscard]] std::vector vector_from_string(std::string const str) const { - return std::vector(reinterpret_cast(str), - reinterpret_cast(str) + strlen(str)); + return {reinterpret_cast(str.c_str()), + reinterpret_cast(str.c_str()) + strlen(str.c_str())}; } - void Decompress(std::vector* decompressed, + void Decompress(std::vector& decompressed, uint8_t const* compressed, size_t compressed_size) { auto stream = cudf::get_default_stream(); rmm::device_buffer src{compressed, compressed_size, stream}; - rmm::device_uvector dst{decompressed->size(), stream}; + rmm::device_uvector dst{decompressed.size(), stream}; cudf::detail::hostdevice_vector> inf_in(1, stream); inf_in[0] = {static_cast(src.data()), src.size()}; @@ -67,7 +67,7 @@ struct DecompressTest : public cudf::test::BaseFixture { static_cast(this)->dispatch(inf_in, inf_out, inf_stat); CUDF_CUDA_TRY(cudaMemcpyAsync( - decompressed->data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value())); + decompressed.data(), dst.data(), dst.size(), cudaMemcpyDefault, stream.value())); inf_stat.device_to_host_sync(stream); ASSERT_EQ(inf_stat[0].status, cudf::io::compression_status::SUCCESS); } @@ -125,49 +125,57 @@ struct NvcompConfigTest : public cudf::test::BaseFixture {}; TEST_F(GzipDecompressTest, HelloWorld) { - constexpr char uncompressed[] = "hello world"; + std::string const uncompressed{"hello world"}; + // NOLINTBEGIN constexpr uint8_t compressed[] = { 0x1f, 0x8b, 0x8, 0x0, 0x9, 0x63, 0x99, 0x5c, 0x2, 0xff, 0xcb, 0x48, 0xcd, 0xc9, 0xc9, 0x57, 0x28, 0xcf, 0x2f, 0xca, 0x49, 0x1, 0x0, 0x85, 0x11, 0x4a, 0xd, 0xb, 0x0, 0x0, 0x0}; + // NOLINTEND std::vector input = vector_from_string(uncompressed); std::vector output(input.size()); - Decompress(&output, compressed, sizeof(compressed)); + Decompress(output, compressed, sizeof(compressed)); EXPECT_EQ(output, input); } TEST_F(SnappyDecompressTest, HelloWorld) { - constexpr char uncompressed[] = "hello world"; + std::string const uncompressed{"hello world"}; + // NOLINTBEGIN constexpr uint8_t compressed[] = { 0xb, 0x28, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64}; + // NOLINTEND std::vector input = vector_from_string(uncompressed); std::vector output(input.size()); - Decompress(&output, compressed, sizeof(compressed)); + Decompress(output, compressed, sizeof(compressed)); EXPECT_EQ(output, input); } TEST_F(SnappyDecompressTest, ShortLiteralAfterLongCopyAtStartup) { - constexpr char uncompressed[] = "Aaaaaaaaaaaah!"; + std::string const uncompressed{"Aaaaaaaaaaaah!"}; + // NOLINTBEGIN constexpr uint8_t compressed[] = {14, 0x0, 'A', 0x0, 'a', (10 - 4) * 4 + 1, 1, 0x4, 'h', '!'}; + // NOLINTEND std::vector input = vector_from_string(uncompressed); std::vector output(input.size()); - Decompress(&output, compressed, sizeof(compressed)); + Decompress(output, compressed, sizeof(compressed)); EXPECT_EQ(output, input); } TEST_F(BrotliDecompressTest, HelloWorld) { - constexpr char uncompressed[] = "hello world"; + std::string const uncompressed{"hello world"}; + // NOLINTBEGIN constexpr uint8_t compressed[] = { 0xb, 0x5, 0x80, 0x68, 0x65, 0x6c, 0x6c, 0x6f, 0x20, 0x77, 0x6f, 0x72, 0x6c, 0x64, 0x3}; + // NOLINTEND std::vector input = vector_from_string(uncompressed); std::vector output(input.size()); - Decompress(&output, compressed, sizeof(compressed)); + Decompress(output, compressed, sizeof(compressed)); EXPECT_EQ(output, input); } diff --git a/cpp/tests/io/csv_test.cpp b/cpp/tests/io/csv_test.cpp index 0028dd946e3..b265dcf9273 100644 --- a/cpp/tests/io/csv_test.cpp +++ b/cpp/tests/io/csv_test.cpp @@ -63,9 +63,9 @@ auto dtype() template using column_wrapper = - typename std::conditional, - cudf::test::strings_column_wrapper, - cudf::test::fixed_width_column_wrapper>::type; + std::conditional_t, + cudf::test::strings_column_wrapper, + cudf::test::fixed_width_column_wrapper>; using column = cudf::column; using table = cudf::table; using table_view = cudf::table_view; @@ -954,7 +954,7 @@ TEST_F(CsvReaderTest, Strings) ASSERT_EQ(type_id::STRING, view.column(1).type().id()); expect_column_data_equal( - std::vector{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"}, + std::vector{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"}, view.column(1)); } @@ -1014,7 +1014,7 @@ TEST_F(CsvReaderTest, StringsQuotesIgnored) ASSERT_EQ(type_id::STRING, view.column(1).type().id()); expect_column_data_equal( - std::vector{"\"abcdef ghi\"", "\"jkl \"\"mno\"\" pqr\"", "stu \"vwx\" yz"}, + std::vector{"\"abcdef ghi\"", R"("jkl ""mno"" pqr")", "stu \"vwx\" yz"}, view.column(1)); } @@ -1830,7 +1830,7 @@ TEST_F(CsvReaderTest, StringsWithWriter) auto int_column = column_wrapper{10, 20, 30}; auto string_column = - column_wrapper{"abc def ghi", "\"jkl mno pqr\"", "stu \"\"vwx\"\" yz"}; + column_wrapper{"abc def ghi", "\"jkl mno pqr\"", R"(stu ""vwx"" yz)"}; cudf::table_view input_table(std::vector{int_column, string_column}); // TODO add quoting style flag? diff --git a/cpp/tests/io/json/json_test.cpp b/cpp/tests/io/json/json_test.cpp index 49ad0c408dc..cb6716f4a18 100644 --- a/cpp/tests/io/json/json_test.cpp +++ b/cpp/tests/io/json/json_test.cpp @@ -68,9 +68,9 @@ auto dtype() template using column_wrapper = - typename std::conditional, - cudf::test::strings_column_wrapper, - cudf::test::fixed_width_column_wrapper>::type; + std::conditional_t, + cudf::test::strings_column_wrapper, + cudf::test::fixed_width_column_wrapper>; cudf::test::TempDirTestEnvironment* const temp_env = static_cast( diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index 89e704f3ed3..cce0adbf317 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -43,9 +43,9 @@ template using column_wrapper = - typename std::conditional, - cudf::test::strings_column_wrapper, - cudf::test::fixed_width_column_wrapper>::type; + std::conditional_t, + cudf::test::strings_column_wrapper, + cudf::test::fixed_width_column_wrapper>; using str_col = column_wrapper; using bool_col = column_wrapper; @@ -1358,21 +1358,22 @@ TEST_P(OrcWriterTestStripes, StripeSize) cols.push_back(col.release()); auto const expected = std::make_unique(std::move(cols)); - auto validate = [&](std::vector const& orc_buffer) { - auto const expected_stripe_num = - std::max(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes); - auto const stats = cudf::io::read_parsed_orc_statistics( - cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); - EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); - - cudf::io::orc_reader_options in_opts = - cudf::io::orc_reader_options::builder( - cudf::io::source_info(orc_buffer.data(), orc_buffer.size())) - .use_index(false); - auto result = cudf::io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); - }; + auto validate = + [&, &size_bytes = size_bytes, &size_rows = size_rows](std::vector const& orc_buffer) { + auto const expected_stripe_num = + std::max(num_rows / size_rows, (num_rows * sizeof(int64_t)) / size_bytes); + auto const stats = cudf::io::read_parsed_orc_statistics( + cudf::io::source_info(orc_buffer.data(), orc_buffer.size())); + EXPECT_EQ(stats.stripes_stats.size(), expected_stripe_num); + + cudf::io::orc_reader_options in_opts = + cudf::io::orc_reader_options::builder( + cudf::io::source_info(orc_buffer.data(), orc_buffer.size())) + .use_index(false); + auto result = cudf::io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); + }; { std::vector out_buffer_chunked; diff --git a/cpp/tests/io/parquet_misc_test.cpp b/cpp/tests/io/parquet_misc_test.cpp index 8b03e94191e..f1286a00d22 100644 --- a/cpp/tests/io/parquet_misc_test.cpp +++ b/cpp/tests/io/parquet_misc_test.cpp @@ -98,7 +98,7 @@ TYPED_TEST(ParquetWriterDeltaTest, SupportedDeltaListSliced) // list constexpr int vals_per_row = 4; auto c1_offset_iter = cudf::detail::make_counting_transform_iterator( - 0, [vals_per_row](cudf::size_type idx) { return idx * vals_per_row; }); + 0, [](cudf::size_type idx) { return idx * vals_per_row; }); cudf::test::fixed_width_column_wrapper c1_offsets(c1_offset_iter, c1_offset_iter + num_rows + 1); cudf::test::fixed_width_column_wrapper c1_vals( diff --git a/cpp/tests/io/parquet_reader_test.cpp b/cpp/tests/io/parquet_reader_test.cpp index dc8e68b3a15..4a5309f3ba7 100644 --- a/cpp/tests/io/parquet_reader_test.cpp +++ b/cpp/tests/io/parquet_reader_test.cpp @@ -1189,15 +1189,12 @@ TEST_F(ParquetReaderTest, NestingOptimizationTest) cudf::test::fixed_width_column_wrapper values(value_iter, value_iter + num_values, validity); // ~256k values with num_nesting_levels = 16 - int total_values_produced = num_values; - auto prev_col = values.release(); + auto prev_col = values.release(); for (int idx = 0; idx < num_nesting_levels; idx++) { - auto const depth = num_nesting_levels - idx; auto const num_rows = (1 << (num_nesting_levels - idx)); auto offsets_iter = cudf::detail::make_counting_transform_iterator( - 0, [depth, rows_per_level](cudf::size_type i) { return i * rows_per_level; }); - total_values_produced += (num_rows + 1); + 0, [](cudf::size_type i) { return i * rows_per_level; }); cudf::test::fixed_width_column_wrapper offsets(offsets_iter, offsets_iter + num_rows + 1); diff --git a/cpp/tests/io/parquet_v2_test.cpp b/cpp/tests/io/parquet_v2_test.cpp index 7c305235ea6..a0b48f54854 100644 --- a/cpp/tests/io/parquet_v2_test.cpp +++ b/cpp/tests/io/parquet_v2_test.cpp @@ -1302,24 +1302,24 @@ TEST_P(ParquetV2Test, CheckColumnIndexListWithNulls) table_view expected({col0, col1, col2, col3, col4, col5, col6, col7}); std::array expected_null_counts{4, 4, 4, 6, 4, 6, 4, 5, 11}; - std::vector const expected_def_hists[] = {{1, 1, 2, 3}, - {1, 3, 10}, - {1, 1, 2, 10}, - {1, 1, 2, 2, 8}, - {1, 1, 1, 1, 10}, - {1, 1, 1, 1, 2, 8}, - {1, 3, 9}, - {1, 3, 1, 8}, - {1, 0, 4, 1, 1, 4, 9}}; - std::vector const expected_rep_hists[] = {{4, 3}, - {4, 4, 6}, - {4, 4, 6}, - {4, 4, 6}, - {4, 4, 6}, - {4, 4, 6}, - {4, 4, 5}, - {4, 4, 5}, - {4, 6, 2, 8}}; + std::vector> const expected_def_hists = {{1, 1, 2, 3}, + {1, 3, 10}, + {1, 1, 2, 10}, + {1, 1, 2, 2, 8}, + {1, 1, 1, 1, 10}, + {1, 1, 1, 1, 2, 8}, + {1, 3, 9}, + {1, 3, 1, 8}, + {1, 0, 4, 1, 1, 4, 9}}; + std::vector> const expected_rep_hists = {{4, 3}, + {4, 4, 6}, + {4, 4, 6}, + {4, 4, 6}, + {4, 4, 6}, + {4, 4, 6}, + {4, 4, 5}, + {4, 4, 5}, + {4, 6, 2, 8}}; auto const filepath = temp_env->get_temp_filepath("ColumnIndexListWithNulls.parquet"); auto out_opts = cudf::io::parquet_writer_options::builder(cudf::io::sink_info{filepath}, expected) diff --git a/cpp/tests/io/parquet_writer_test.cpp b/cpp/tests/io/parquet_writer_test.cpp index 8794f2ee304..6c5e9cdf07a 100644 --- a/cpp/tests/io/parquet_writer_test.cpp +++ b/cpp/tests/io/parquet_writer_test.cpp @@ -290,7 +290,8 @@ class custom_test_data_sink : public cudf::io::data_sink { CUDF_EXPECTS(outfile_.is_open(), "Cannot open output file"); } - ~custom_test_data_sink() override { flush(); } + // Marked as NOLINT because we are calling a virtual method in the destructor + ~custom_test_data_sink() override { flush(); } // NOLINT void host_write(void const* data, size_t size) override { @@ -981,13 +982,15 @@ TEST_F(ParquetWriterTest, CheckColumnIndexTruncation) TEST_F(ParquetWriterTest, BinaryColumnIndexTruncation) { - std::vector truncated_min[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}, - {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; + std::array, 3> truncated_min{ + {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}, + {0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}; - std::vector truncated_max[] = {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff}, - {0xff}, - {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}; + std::array, 3> truncated_max{ + {{0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xff}, + {0xff}, + {0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}}}; cudf::test::lists_column_wrapper col0{ {0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe, 0xfe}}; diff --git a/cpp/tests/join/distinct_join_tests.cpp b/cpp/tests/join/distinct_join_tests.cpp index 93754091b3f..178edc52dd3 100644 --- a/cpp/tests/join/distinct_join_tests.cpp +++ b/cpp/tests/join/distinct_join_tests.cpp @@ -314,7 +314,7 @@ TEST_F(DistinctJoinTest, EmptyBuildTableLeftJoin) auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), std::move(get_left_indices(result->size()))}; + auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -362,7 +362,7 @@ TEST_F(DistinctJoinTest, EmptyProbeTableLeftJoin) auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), std::move(get_left_indices(result->size()))}; + auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; this->compare_to_reference( build.view(), probe.view(), gather_map, probe.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -398,7 +398,7 @@ TEST_F(DistinctJoinTest, LeftJoinNoNulls) auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), std::move(get_left_indices(result->size()))}; + auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; this->compare_to_reference( build.view(), probe.view(), gather_map, gold.view(), cudf::out_of_bounds_policy::NULLIFY); @@ -423,7 +423,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithNulls) auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), std::move(get_left_indices(result->size()))}; + auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; column_wrapper col_gold_0{{3, 1, 2, 0, 2}, {true, true, true, true, true}}; strcol_wrapper col_gold_1({"s1", "s1", "", "s4", "s0"}, {true, true, false, true, true}); @@ -468,7 +468,7 @@ TEST_F(DistinctJoinTest, LeftJoinWithStructsAndNulls) auto distinct_join = cudf::distinct_hash_join{build.view(), probe.view()}; auto result = distinct_join.left_join(); - auto gather_map = std::pair{std::move(result), std::move(get_left_indices(result->size()))}; + auto gather_map = std::pair{std::move(result), get_left_indices(result->size())}; auto col0_gold_names_col = strcol_wrapper{ "Samuel Vimes", "Detritus", "Carrot Ironfoundersson", "Samuel Vimes", "Angua von Überwald"}; diff --git a/cpp/tests/merge/merge_string_test.cpp b/cpp/tests/merge/merge_string_test.cpp index 97979e79010..bea044496b3 100644 --- a/cpp/tests/merge/merge_string_test.cpp +++ b/cpp/tests/merge/merge_string_test.cpp @@ -97,7 +97,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyColumns) "hi", "hj"}); - auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) { + auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) { if (cudf::type_to_id() == cudf::type_id::BOOL8) return 0; else @@ -296,7 +296,7 @@ TYPED_TEST(MergeStringTest, Merge1StringKeyNullColumns) true, false, false}); - auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) { + auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) { if (cudf::type_to_id() == cudf::type_id::BOOL8) return 0; else diff --git a/cpp/tests/merge/merge_test.cpp b/cpp/tests/merge/merge_test.cpp index 2e09f25b51f..6208d395f0a 100644 --- a/cpp/tests/merge/merge_test.cpp +++ b/cpp/tests/merge/merge_test.cpp @@ -349,7 +349,7 @@ TYPED_TEST(MergeTest_, Merge1KeyColumns) cudf::test::fixed_width_column_wrapper expectedDataWrap1(seq_out1, seq_out1 + outputRows); - auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [outputRows](auto row) { + auto seq_out2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) { if (cudf::type_to_id() == cudf::type_id::BOOL8) return 0; else @@ -452,7 +452,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns) cudf::size_type inputRows = 40; // data: 0 2 4 6 | valid: 1 1 1 0 - auto sequence1 = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) { + auto sequence1 = cudf::detail::make_counting_transform_iterator(0, [](auto row) { if (cudf::type_to_id() == cudf::type_id::BOOL8) { return 0; // <- no shortcut to this can avoid compiler errors } else { @@ -465,7 +465,7 @@ TYPED_TEST(MergeTest_, Merge1KeyNullColumns) leftColWrap1(sequence1, sequence1 + inputRows, valid_sequence1); // data: 1 3 5 7 | valid: 1 1 1 0 - auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [inputRows](auto row) { + auto sequence2 = cudf::detail::make_counting_transform_iterator(0, [](auto row) { if (cudf::type_to_id() == cudf::type_id::BOOL8) { return 1; } else diff --git a/cpp/tests/reductions/segmented_reduction_tests.cpp b/cpp/tests/reductions/segmented_reduction_tests.cpp index 19996f827cf..bc0321bd40a 100644 --- a/cpp/tests/reductions/segmented_reduction_tests.cpp +++ b/cpp/tests/reductions/segmented_reduction_tests.cpp @@ -1092,11 +1092,10 @@ TEST_F(SegmentedReductionTestUntyped, EmptyInputWithOffsets) auto aggregates = std::vector>>(); - aggregates.push_back(std::move(cudf::make_max_aggregation())); - aggregates.push_back(std::move(cudf::make_min_aggregation())); - aggregates.push_back(std::move(cudf::make_sum_aggregation())); - aggregates.push_back( - std::move(cudf::make_product_aggregation())); + aggregates.push_back(cudf::make_max_aggregation()); + aggregates.push_back(cudf::make_min_aggregation()); + aggregates.push_back(cudf::make_sum_aggregation()); + aggregates.push_back(cudf::make_product_aggregation()); auto output_type = cudf::data_type{cudf::type_to_id()}; for (auto&& agg : aggregates) { diff --git a/cpp/tests/replace/replace_tests.cpp b/cpp/tests/replace/replace_tests.cpp index 1858cd7782e..b12bf08520f 100644 --- a/cpp/tests/replace/replace_tests.cpp +++ b/cpp/tests/replace/replace_tests.cpp @@ -356,7 +356,7 @@ void test_replace(cudf::host_span input_column, for (size_t i = 0; i < values_to_replace_column.size(); i++) { size_t k = 0; - auto pred = [=, &k, &reference_result, &expected_valid, &isReplaced](T element) { + auto pred = [=, &k, &expected_valid, &isReplaced](T element) { bool toBeReplaced = false; if (!isReplaced[k]) { if (!input_has_nulls || expected_valid[k]) { @@ -503,7 +503,7 @@ TYPED_TEST(ReplaceTest, LargeScaleReplaceTest) const size_t REPLACE_SIZE = 10000; thrust::host_vector input_column(DATA_SIZE); - std::generate(std::begin(input_column), std::end(input_column), [REPLACE_SIZE]() { + std::generate(std::begin(input_column), std::end(input_column), []() { return std::rand() % (REPLACE_SIZE); }); diff --git a/cpp/tests/rolling/collect_ops_test.cpp b/cpp/tests/rolling/collect_ops_test.cpp index f702dc78371..165e0347785 100644 --- a/cpp/tests/rolling/collect_ops_test.cpp +++ b/cpp/tests/rolling/collect_ops_test.cpp @@ -214,7 +214,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowHonoursMinPeriods) *cudf::make_collect_list_aggregation()); auto expected_result_2 = cudf::test::lists_column_wrapper{ {{}, {0, 1, 2, 3}, {1, 2, 3, 4}, {2, 3, 4, 5}, {}, {}}, - cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) { + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0 && i < 4; })}.release(); @@ -338,7 +338,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods) cudf::test::fixed_width_column_wrapper{0, 0, 4, 8, 12, 12, 12}.release(); auto expected_num_rows = expected_offsets->size() - 1; auto null_mask_iter = cudf::detail::make_counting_transform_iterator( - cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; }); + cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; }); auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); @@ -373,7 +373,7 @@ TYPED_TEST(TypedCollectListTest, RollingWindowWithNullInputsHonoursMinPeriods) cudf::test::fixed_width_column_wrapper{0, 0, 3, 5, 8, 8, 8}.release(); auto expected_num_rows = expected_offsets->size() - 1; auto null_mask_iter = cudf::detail::make_counting_transform_iterator( - cudf::size_type{0}, [expected_num_rows](auto i) { return i > 0 && i < 4; }); + cudf::size_type{0}, [](auto i) { return i > 0 && i < 4; }); auto [null_mask, null_count] = cudf::test::detail::make_null_mask(null_mask_iter, null_mask_iter + expected_num_rows); @@ -1499,7 +1499,7 @@ TYPED_TEST(TypedCollectSetTest, RollingWindowHonoursMinPeriods) *cudf::make_collect_set_aggregation()); auto expected_result_2 = cudf::test::lists_column_wrapper{ {{}, {0, 1, 2}, {1, 2, 4}, {2, 4, 5}, {}, {}}, - cudf::detail::make_counting_transform_iterator(0, [num_elements](auto i) { + cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i != 0 && i < 4; })}.release(); diff --git a/cpp/tests/rolling/offset_row_window_test.cpp b/cpp/tests/rolling/offset_row_window_test.cpp index ec726878b34..0eaab0c9f7a 100644 --- a/cpp/tests/rolling/offset_row_window_test.cpp +++ b/cpp/tests/rolling/offset_row_window_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,6 +41,11 @@ using cudf::test::iterators::nulls_at; auto constexpr null = int32_t{0}; // NULL representation for int32_t; +// clang-tidy doesn't think std::transform can handle a +// thrust::constant_iterator, so this is a workaround that uses nulls_at +// instead of no_nulls +auto no_nulls_list() { return nulls_at({}); } + struct OffsetRowWindowTest : public cudf::test::BaseFixture { static ints_column const _keys; // {0, 0, 0, 0, 0, 0, 1, 1, 1, 1}; static ints_column const _values; // {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}; @@ -210,7 +215,8 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Grouped_0_to_2) CUDF_TEST_EXPECT_COLUMNS_EQUAL( *run_rolling(*AGG_COLLECT_LIST), - lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, no_nulls}); + lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5}, {}, {7, 8}, {8, 9}, {9}, {}}, + no_nulls_list()}); } TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2) @@ -250,7 +256,7 @@ TEST_F(OffsetRowWindowTest, OffsetRowWindow_Ungrouped_0_to_2) CUDF_TEST_EXPECT_COLUMNS_EQUAL( *run_rolling(*AGG_COLLECT_LIST), lists_column{{{1, 2}, {2, 3}, {3, 4}, {4, 5}, {5, 6}, {6, 7}, {7, 8}, {8, 9}, {9}, {}}, - no_nulls}); + no_nulls_list()}); } // To test that preceding bounds are clamped correctly at group boundaries. diff --git a/cpp/tests/rolling/rolling_test.cpp b/cpp/tests/rolling/rolling_test.cpp index c2c22986975..6e0dc16dca9 100644 --- a/cpp/tests/rolling/rolling_test.cpp +++ b/cpp/tests/rolling/rolling_test.cpp @@ -541,7 +541,7 @@ class RollingTest : public cudf::test::BaseFixture { agg_op op; for (cudf::size_type i = 0; i < num_rows; i++) { - OutputType val = agg_op::template identity(); + auto val = agg_op::template identity(); // load sizes min_periods = std::max(min_periods, 1); // at least one observation is required diff --git a/cpp/tests/scalar/scalar_test.cpp b/cpp/tests/scalar/scalar_test.cpp index 2d37de920d5..2b79911a95a 100644 --- a/cpp/tests/scalar/scalar_test.cpp +++ b/cpp/tests/scalar/scalar_test.cpp @@ -190,7 +190,7 @@ TEST_F(ListScalarTest, MoveConstructorNonNested) EXPECT_EQ(mask_ptr, s2.validity_data()); EXPECT_EQ(data_ptr, s2.view().data()); - EXPECT_EQ(s.view().data(), nullptr); + EXPECT_EQ(s.view().data(), nullptr); // NOLINT } TEST_F(ListScalarTest, MoveConstructorNested) @@ -205,8 +205,8 @@ TEST_F(ListScalarTest, MoveConstructorNested) EXPECT_EQ(mask_ptr, s2.validity_data()); EXPECT_EQ(offset_ptr, s2.view().child(0).data()); EXPECT_EQ(data_ptr, s2.view().child(1).data()); - EXPECT_EQ(s.view().data(), nullptr); - EXPECT_EQ(s.view().num_children(), 0); + EXPECT_EQ(s.view().data(), nullptr); // NOLINT + EXPECT_EQ(s.view().num_children(), 0); // NOLINT } struct StructScalarTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/search/search_list_test.cpp b/cpp/tests/search/search_list_test.cpp index 48711c21715..7584003e800 100644 --- a/cpp/tests/search/search_list_test.cpp +++ b/cpp/tests/search/search_list_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -35,7 +35,6 @@ using strings_col = cudf::test::strings_column_wrapper; constexpr cudf::test::debug_output_level verbosity{cudf::test::debug_output_level::FIRST_ERROR}; constexpr int32_t null{0}; // Mark for null child elements at the current level -constexpr int32_t XXX{0}; // Mark for null elements at all levels using TestTypes = cudf::test::Concat> grand_child; - grand_child.push_back(std::move(col4.release())); + grand_child.push_back(col4.release()); auto child_col_2 = cudf::make_structs_column(6, std::move(grand_child), 0, rmm::device_buffer{}); child_columns2.push_back(std::move(child_col_2)); auto struct_col3 = diff --git a/cpp/tests/stream_compaction/unique_tests.cpp b/cpp/tests/stream_compaction/unique_tests.cpp index 4d7d23dc881..d5b6915b520 100644 --- a/cpp/tests/stream_compaction/unique_tests.cpp +++ b/cpp/tests/stream_compaction/unique_tests.cpp @@ -43,7 +43,6 @@ auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY; auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST; auto constexpr KEEP_LAST = cudf::duplicate_keep_option::KEEP_LAST; auto constexpr KEEP_NONE = cudf::duplicate_keep_option::KEEP_NONE; -auto constexpr NULL_EQUAL = cudf::null_equality::EQUAL; auto constexpr NULL_UNEQUAL = cudf::null_equality::UNEQUAL; using int32s_col = cudf::test::fixed_width_column_wrapper; diff --git a/cpp/tests/streams/stream_compaction_test.cpp b/cpp/tests/streams/stream_compaction_test.cpp index 443f4548b2c..07b2d77cc04 100644 --- a/cpp/tests/streams/stream_compaction_test.cpp +++ b/cpp/tests/streams/stream_compaction_test.cpp @@ -29,8 +29,6 @@ #include -auto constexpr null{0}; // null at current level -auto constexpr XXX{0}; // null pushed down from parent level auto constexpr NaN = std::numeric_limits::quiet_NaN(); auto constexpr KEEP_ANY = cudf::duplicate_keep_option::KEEP_ANY; auto constexpr KEEP_FIRST = cudf::duplicate_keep_option::KEEP_FIRST; diff --git a/cpp/tests/strings/integers_tests.cpp b/cpp/tests/strings/integers_tests.cpp index ce5f68de3c9..26bcfe8028d 100644 --- a/cpp/tests/strings/integers_tests.cpp +++ b/cpp/tests/strings/integers_tests.cpp @@ -30,6 +30,7 @@ #include #include +#include #include #include @@ -425,7 +426,7 @@ TYPED_TEST(StringsIntegerConvertTest, IntegerToHex) if (v == 0) { return std::string("00"); } // special handling for single-byte types if constexpr (std::is_same_v || std::is_same_v) { - char const hex_digits[16] = { + std::array const hex_digits = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'}; std::string str; str += hex_digits[(v & 0xF0) >> 4]; diff --git a/cpp/tests/structs/structs_column_tests.cpp b/cpp/tests/structs/structs_column_tests.cpp index f0010fc1ed9..219bd6d8b01 100644 --- a/cpp/tests/structs/structs_column_tests.cpp +++ b/cpp/tests/structs/structs_column_tests.cpp @@ -635,9 +635,8 @@ TEST_F(StructColumnWrapperTest, TestStructsColumnWithEmptyChild) auto mask_vec = std::vector{true, false, false}; auto [null_mask, null_count] = cudf::test::detail::make_null_mask(mask_vec.begin(), mask_vec.end()); - auto structs_col = - cudf::make_structs_column(num_rows, std::move(cols), null_count, std::move(null_mask)); - EXPECT_NO_THROW(structs_col->view()); + EXPECT_NO_THROW(auto structs_col = cudf::make_structs_column( + num_rows, std::move(cols), null_count, std::move(null_mask))); } CUDF_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/transform/bools_to_mask_test.cpp b/cpp/tests/transform/bools_to_mask_test.cpp index 215ca158f37..2684123c08a 100644 --- a/cpp/tests/transform/bools_to_mask_test.cpp +++ b/cpp/tests/transform/bools_to_mask_test.cpp @@ -32,7 +32,7 @@ struct MaskToNullTest : public cudf::test::BaseFixture { { cudf::test::fixed_width_column_wrapper input_column( input.begin(), input.end(), val.begin()); - std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and()); + std::transform(val.begin(), val.end(), input.begin(), input.begin(), std::logical_and<>()); auto sample = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i; }); diff --git a/cpp/tests/transform/integration/unary_transform_test.cpp b/cpp/tests/transform/integration/unary_transform_test.cpp index 1785848ec77..0bdf5b321ac 100644 --- a/cpp/tests/transform/integration/unary_transform_test.cpp +++ b/cpp/tests/transform/integration/unary_transform_test.cpp @@ -47,7 +47,7 @@ void test_udf(char const* udf, Op op, Data data_init, cudf::size_type size, bool TEST_F(UnaryOperationIntegrationTest, Transform_FP32_FP32) { // c = a*a*a*a - char const* cuda = + std::string const cuda = R"***( __device__ inline void fdsf ( float* C, @@ -58,7 +58,7 @@ __device__ inline void fdsf ( } )***"; - char const* ptx = + std::string const ptx = R"***( // // Generated by NVIDIA NVVM Compiler @@ -101,17 +101,17 @@ __device__ inline void fdsf ( auto op = [](dtype a) { return a * a * a * a; }; auto data_init = [](cudf::size_type row) { return row % 3; }; - test_udf(cuda, op, data_init, 500, false); - test_udf(ptx, op, data_init, 500, true); + test_udf(cuda.c_str(), op, data_init, 500, false); + test_udf(ptx.c_str(), op, data_init, 500, true); } TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32) { // c = a * a - a - char const cuda[] = + std::string const cuda = "__device__ inline void f(int* output,int input){*output = input*input - input;}"; - char const* ptx = + std::string const ptx = R"***( .func _Z1fPii( .param .b64 _Z1fPii_param_0, @@ -136,8 +136,8 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT32_INT32) auto op = [](dtype a) { return a * a - a; }; auto data_init = [](cudf::size_type row) { return row % 78; }; - test_udf(cuda, op, data_init, 500, false); - test_udf(ptx, op, data_init, 500, true); + test_udf(cuda.c_str(), op, data_init, 500, false); + test_udf(ptx.c_str(), op, data_init, 500, true); } TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8) @@ -145,7 +145,7 @@ TEST_F(UnaryOperationIntegrationTest, Transform_INT8_INT8) // Capitalize all the lower case letters // Assuming ASCII, the PTX code is compiled from the following CUDA code - char const cuda[] = + std::string const cuda = R"***( __device__ inline void f( signed char* output, @@ -159,7 +159,7 @@ __device__ inline void f( } )***"; - char const ptx[] = + std::string const ptx = R"***( .func _Z1fPcc( .param .b64 _Z1fPcc_param_0, @@ -191,15 +191,15 @@ __device__ inline void f( auto op = [](dtype a) { return std::toupper(a); }; auto data_init = [](cudf::size_type row) { return 'a' + (row % 26); }; - test_udf(cuda, op, data_init, 500, false); - test_udf(ptx, op, data_init, 500, true); + test_udf(cuda.c_str(), op, data_init, 500, false); + test_udf(ptx.c_str(), op, data_init, 500, true); } TEST_F(UnaryOperationIntegrationTest, Transform_Datetime) { // Add one day to timestamp in microseconds - char const cuda[] = + std::string const cuda = R"***( __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input) { @@ -217,7 +217,7 @@ __device__ inline void f(cudf::timestamp_us* output, cudf::timestamp_us input) auto random_eng = cudf::test::UniformRandomGenerator(0, 100000000); auto data_init = [&random_eng](cudf::size_type row) { return random_eng.generate(); }; - test_udf(cuda, op, data_init, 500, false); + test_udf(cuda.c_str(), op, data_init, 500, false); } } // namespace transformation From 2d02bdce9e3efae232dea4a5b8b2eecf5c0f8a93 Mon Sep 17 00:00:00 2001 From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com> Date: Mon, 7 Oct 2024 16:30:58 -0500 Subject: [PATCH 2/2] Implement `extract_datetime_component` in `libcudf`/`pylibcudf` (#16776) Closes https://github.com/rapidsai/cudf/issues/16735 Authors: - https://github.com/brandon-b-miller - Lawrence Mitchell (https://github.com/wence-) Approvers: - Matthew Murray (https://github.com/Matt711) - Lawrence Mitchell (https://github.com/wence-) - Bradley Dice (https://github.com/bdice) URL: https://github.com/rapidsai/cudf/pull/16776 --- cpp/include/cudf/datetime.hpp | 34 +++++ cpp/include/cudf/detail/datetime.hpp | 10 ++ cpp/src/datetime/datetime_ops.cu | 88 ++++++------ cpp/tests/datetime/datetime_ops_test.cpp | 130 ++++++++++++++++++ python/cudf/cudf/_lib/datetime.pyx | 39 +++++- python/cudf_polars/cudf_polars/dsl/expr.py | 40 ++++-- python/pylibcudf/pylibcudf/datetime.pxd | 7 + python/pylibcudf/pylibcudf/datetime.pyx | 71 ++++------ .../pylibcudf/libcudf/CMakeLists.txt | 5 +- .../pylibcudf/pylibcudf/libcudf/datetime.pxd | 17 +++ .../pylibcudf/pylibcudf/libcudf/datetime.pyx | 0 .../pylibcudf/tests/test_datetime.py | 55 ++++---- 12 files changed, 358 insertions(+), 138 deletions(-) create mode 100644 python/pylibcudf/pylibcudf/libcudf/datetime.pyx diff --git a/cpp/include/cudf/datetime.hpp b/cpp/include/cudf/datetime.hpp index 7359a0d5fde..1eaea5b6374 100644 --- a/cpp/include/cudf/datetime.hpp +++ b/cpp/include/cudf/datetime.hpp @@ -38,6 +38,22 @@ namespace datetime { * @file */ +/** + * @brief Types of datetime components that may be extracted. + */ +enum class datetime_component : uint8_t { + YEAR, + MONTH, + DAY, + WEEKDAY, + HOUR, + MINUTE, + SECOND, + MILLISECOND, + MICROSECOND, + NANOSECOND +}; + /** * @brief Extracts year from any datetime type and returns an int16_t * cudf::column. @@ -207,6 +223,24 @@ std::unique_ptr extract_nanosecond_fraction( rmm::cuda_stream_view stream = cudf::get_default_stream(), rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); +/** + * @brief Extracts the specified datetime component from any datetime type and + * returns an int16_t cudf::column. + * + * @param column cudf::column_view of the input datetime values + * @param component The datetime component to extract + * @param stream CUDA stream used for device memory operations and kernel launches + * @param mr Device memory resource used to allocate device memory of the returned column + * + * @returns cudf::column of the extracted int16_t datetime component + * @throw cudf::logic_error if input column datatype is not TIMESTAMP + */ +std::unique_ptr extract_datetime_component( + cudf::column_view const& column, + datetime_component component, + rmm::cuda_stream_view stream = cudf::get_default_stream(), + rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref()); + /** @} */ // end of group /** * @addtogroup datetime_compute diff --git a/cpp/include/cudf/detail/datetime.hpp b/cpp/include/cudf/detail/datetime.hpp index 9db7e48498f..df3050d6494 100644 --- a/cpp/include/cudf/detail/datetime.hpp +++ b/cpp/include/cudf/detail/datetime.hpp @@ -115,6 +115,16 @@ std::unique_ptr extract_nanosecond_fraction(cudf::column_view cons rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr); +/** + * @copydoc cudf::extract_datetime_component(cudf::column_view const&, datetime_component, + * rmm::cuda_stream_view, rmm::device_async_resource_ref) + * + */ +std::unique_ptr extract_datetime_component(cudf::column_view const& column, + datetime_component component, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr); + /** * @copydoc cudf::last_day_of_month(cudf::column_view const&, rmm::cuda_stream_view, * rmm::device_async_resource_ref) diff --git a/cpp/src/datetime/datetime_ops.cu b/cpp/src/datetime/datetime_ops.cu index ddb0dbcd96d..a497cedb3bc 100644 --- a/cpp/src/datetime/datetime_ops.cu +++ b/cpp/src/datetime/datetime_ops.cu @@ -44,19 +44,6 @@ namespace cudf { namespace datetime { namespace detail { -enum class datetime_component { - INVALID = 0, - YEAR, - MONTH, - DAY, - WEEKDAY, - HOUR, - MINUTE, - SECOND, - MILLISECOND, - MICROSECOND, - NANOSECOND -}; enum class rounding_function { CEIL, ///< Rounds up to the next integer multiple of the provided frequency @@ -453,90 +440,70 @@ std::unique_ptr extract_year(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::YEAR, stream, mr); } std::unique_ptr extract_month(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MONTH, stream, mr); } std::unique_ptr extract_day(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::DAY, stream, mr); } std::unique_ptr extract_weekday(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::WEEKDAY, stream, mr); } std::unique_ptr extract_hour(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::HOUR, stream, mr); } std::unique_ptr extract_minute(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MINUTE, stream, mr); } std::unique_ptr extract_second(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::SECOND, stream, mr); } std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MILLISECOND, stream, mr); } std::unique_ptr extract_microsecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::MICROSECOND, stream, mr); } std::unique_ptr extract_nanosecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) { - return detail::apply_datetime_op< - detail::extract_component_operator, - cudf::type_id::INT16>(column, stream, mr); + return detail::extract_datetime_component(column, datetime_component::NANOSECOND, stream, mr); } std::unique_ptr last_day_of_month(column_view const& column, @@ -576,6 +543,32 @@ std::unique_ptr extract_quarter(column_view const& column, return apply_datetime_op(column, stream, mr); } +std::unique_ptr extract_datetime_component(cudf::column_view const& column, + datetime_component component, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ +#define extract(field) \ + case field: \ + return apply_datetime_op, cudf::type_id::INT16>( \ + column, stream, mr) + + switch (component) { + extract(datetime_component::YEAR); + extract(datetime_component::MONTH); + extract(datetime_component::DAY); + extract(datetime_component::WEEKDAY); + extract(datetime_component::HOUR); + extract(datetime_component::MINUTE); + extract(datetime_component::SECOND); + extract(datetime_component::MILLISECOND); + extract(datetime_component::MICROSECOND); + extract(datetime_component::NANOSECOND); + default: CUDF_FAIL("Unsupported datetime component."); + } +#undef extract +} + } // namespace detail std::unique_ptr ceil_datetimes(column_view const& column, @@ -661,6 +654,15 @@ std::unique_ptr extract_second(column_view const& column, return detail::extract_second(column, stream, mr); } +std::unique_ptr extract_datetime_component(cudf::column_view const& column, + datetime_component component, + rmm::cuda_stream_view stream, + rmm::device_async_resource_ref mr) +{ + CUDF_FUNC_RANGE(); + return detail::extract_datetime_component(column, component, stream, mr); +} + std::unique_ptr extract_millisecond_fraction(column_view const& column, rmm::cuda_stream_view stream, rmm::device_async_resource_ref mr) diff --git a/cpp/tests/datetime/datetime_ops_test.cpp b/cpp/tests/datetime/datetime_ops_test.cpp index 13577c4d0ea..603edb27c7c 100644 --- a/cpp/tests/datetime/datetime_ops_test.cpp +++ b/cpp/tests/datetime/datetime_ops_test.cpp @@ -196,6 +196,136 @@ TEST_F(BasicDatetimeOpsTest, TestExtractingDatetimeComponents) fixed_width_column_wrapper{0, 0, 0}); CUDF_TEST_EXPECT_COLUMNS_EQUAL(*extract_nanosecond_fraction(timestamps_ns), fixed_width_column_wrapper{766, 424, 623}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1965, 2018, 2023}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::YEAR), + fixed_width_column_wrapper{1969, 1970, 1970}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{10, 7, 1}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MONTH), + fixed_width_column_wrapper{12, 1, 1}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{26, 4, 25}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::DAY), + fixed_width_column_wrapper{31, 1, 1}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::WEEKDAY), + fixed_width_column_wrapper{2, 3, 3}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{14, 12, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{14, 12, 7}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::HOUR), + fixed_width_column_wrapper{23, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{1, 0, 32}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{1, 0, 32}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MINUTE), + fixed_width_column_wrapper{59, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{12, 0, 12}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{12, 0, 12}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::SECOND), + fixed_width_column_wrapper{59, 0, 0}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{762, 0, 929}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MILLISECOND), + fixed_width_column_wrapper{976, 23, 987}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::MICROSECOND), + fixed_width_column_wrapper{675, 432, 234}); + + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_D, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_s, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ms, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{0, 0, 0}); + CUDF_TEST_EXPECT_COLUMNS_EQUAL( + *extract_datetime_component(timestamps_ns, cudf::datetime::datetime_component::NANOSECOND), + fixed_width_column_wrapper{766, 424, 623}); } template diff --git a/python/cudf/cudf/_lib/datetime.pyx b/python/cudf/cudf/_lib/datetime.pyx index bc5e085ec39..d844466120f 100644 --- a/python/cudf/cudf/_lib/datetime.pyx +++ b/python/cudf/cudf/_lib/datetime.pyx @@ -13,12 +13,11 @@ from pylibcudf.libcudf.column.column_view cimport column_view from pylibcudf.libcudf.filling cimport calendrical_month_sequence from pylibcudf.libcudf.scalar.scalar cimport scalar from pylibcudf.libcudf.types cimport size_type +from pylibcudf.datetime import DatetimeComponent from cudf._lib.column cimport Column from cudf._lib.scalar cimport DeviceScalar -import pylibcudf as plc - @acquire_spill_lock() def add_months(Column col, Column months): @@ -40,9 +39,39 @@ def add_months(Column col, Column months): @acquire_spill_lock() def extract_datetime_component(Column col, object field): - result = Column.from_pylibcudf( - plc.datetime.extract_datetime_component(col.to_pylibcudf(mode="read"), field) - ) + + cdef unique_ptr[column] c_result + cdef column_view col_view = col.view() + cdef libcudf_datetime.datetime_component component + + component_names = { + "year": DatetimeComponent.YEAR, + "month": DatetimeComponent.MONTH, + "day": DatetimeComponent.DAY, + "weekday": DatetimeComponent.WEEKDAY, + "hour": DatetimeComponent.HOUR, + "minute": DatetimeComponent.MINUTE, + "second": DatetimeComponent.SECOND, + "millisecond": DatetimeComponent.MILLISECOND, + "microsecond": DatetimeComponent.MICROSECOND, + "nanosecond": DatetimeComponent.NANOSECOND, + } + if field == "day_of_year": + with nogil: + c_result = move(libcudf_datetime.day_of_year(col_view)) + elif field in component_names: + component = component_names[field] + with nogil: + c_result = move( + libcudf_datetime.extract_datetime_component( + col_view, + component + ) + ) + else: + raise ValueError(f"Invalid field: '{field}'") + + result = Column.from_unique_ptr(move(c_result)) if field == "weekday": # Pandas counts Monday-Sunday as 0-6 diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py index 54476b7fedc..a418560b31c 100644 --- a/python/cudf_polars/cudf_polars/dsl/expr.py +++ b/python/cudf_polars/cudf_polars/dsl/expr.py @@ -961,16 +961,16 @@ def do_evaluate( class TemporalFunction(Expr): __slots__ = ("name", "options", "children") _COMPONENT_MAP: ClassVar[dict[pl_expr.TemporalFunction, str]] = { - pl_expr.TemporalFunction.Year: "year", - pl_expr.TemporalFunction.Month: "month", - pl_expr.TemporalFunction.Day: "day", - pl_expr.TemporalFunction.WeekDay: "weekday", - pl_expr.TemporalFunction.Hour: "hour", - pl_expr.TemporalFunction.Minute: "minute", - pl_expr.TemporalFunction.Second: "second", - pl_expr.TemporalFunction.Millisecond: "millisecond", - pl_expr.TemporalFunction.Microsecond: "microsecond", - pl_expr.TemporalFunction.Nanosecond: "nanosecond", + pl_expr.TemporalFunction.Year: plc.datetime.DatetimeComponent.YEAR, + pl_expr.TemporalFunction.Month: plc.datetime.DatetimeComponent.MONTH, + pl_expr.TemporalFunction.Day: plc.datetime.DatetimeComponent.DAY, + pl_expr.TemporalFunction.WeekDay: plc.datetime.DatetimeComponent.WEEKDAY, + pl_expr.TemporalFunction.Hour: plc.datetime.DatetimeComponent.HOUR, + pl_expr.TemporalFunction.Minute: plc.datetime.DatetimeComponent.MINUTE, + pl_expr.TemporalFunction.Second: plc.datetime.DatetimeComponent.SECOND, + pl_expr.TemporalFunction.Millisecond: plc.datetime.DatetimeComponent.MILLISECOND, + pl_expr.TemporalFunction.Microsecond: plc.datetime.DatetimeComponent.MICROSECOND, + pl_expr.TemporalFunction.Nanosecond: plc.datetime.DatetimeComponent.NANOSECOND, } _non_child = ("dtype", "name", "options") children: tuple[Expr, ...] @@ -1003,8 +1003,12 @@ def do_evaluate( ] (column,) = columns if self.name == pl_expr.TemporalFunction.Microsecond: - millis = plc.datetime.extract_datetime_component(column.obj, "millisecond") - micros = plc.datetime.extract_datetime_component(column.obj, "microsecond") + millis = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MILLISECOND + ) + micros = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MICROSECOND + ) millis_as_micros = plc.binaryop.binary_operation( millis, plc.interop.from_arrow(pa.scalar(1_000, type=pa.int32())), @@ -1019,9 +1023,15 @@ def do_evaluate( ) return Column(total_micros) elif self.name == pl_expr.TemporalFunction.Nanosecond: - millis = plc.datetime.extract_datetime_component(column.obj, "millisecond") - micros = plc.datetime.extract_datetime_component(column.obj, "microsecond") - nanos = plc.datetime.extract_datetime_component(column.obj, "nanosecond") + millis = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MILLISECOND + ) + micros = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.MICROSECOND + ) + nanos = plc.datetime.extract_datetime_component( + column.obj, plc.datetime.DatetimeComponent.NANOSECOND + ) millis_as_nanos = plc.binaryop.binary_operation( millis, plc.interop.from_arrow(pa.scalar(1_000_000, type=pa.int32())), diff --git a/python/pylibcudf/pylibcudf/datetime.pxd b/python/pylibcudf/pylibcudf/datetime.pxd index 2fce48cf1b4..72ce680ba7a 100644 --- a/python/pylibcudf/pylibcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/datetime.pxd @@ -1,8 +1,15 @@ # Copyright (c) 2024, NVIDIA CORPORATION. +from pylibcudf.libcudf.datetime cimport datetime_component + from .column cimport Column cpdef Column extract_year( Column col ) + +cpdef Column extract_datetime_component( + Column col, + datetime_component component +) diff --git a/python/pylibcudf/pylibcudf/datetime.pyx b/python/pylibcudf/pylibcudf/datetime.pyx index e8e0caaf42d..784d29128bf 100644 --- a/python/pylibcudf/pylibcudf/datetime.pyx +++ b/python/pylibcudf/pylibcudf/datetime.pyx @@ -3,19 +3,14 @@ from libcpp.memory cimport unique_ptr from libcpp.utility cimport move from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.datetime cimport ( - day_of_year as cpp_day_of_year, - extract_day as cpp_extract_day, - extract_hour as cpp_extract_hour, - extract_microsecond_fraction as cpp_extract_microsecond_fraction, - extract_millisecond_fraction as cpp_extract_millisecond_fraction, - extract_minute as cpp_extract_minute, - extract_month as cpp_extract_month, - extract_nanosecond_fraction as cpp_extract_nanosecond_fraction, - extract_second as cpp_extract_second, - extract_weekday as cpp_extract_weekday, + datetime_component, + extract_datetime_component as cpp_extract_datetime_component, extract_year as cpp_extract_year, ) +from pylibcudf.libcudf.datetime import \ + datetime_component as DatetimeComponent # no-cython-lint + from .column cimport Column @@ -41,41 +36,29 @@ cpdef Column extract_year( result = move(cpp_extract_year(values.view())) return Column.from_libcudf(move(result)) +cpdef Column extract_datetime_component( + Column values, + datetime_component component +): + """ + Extract a datetime component from a datetime column. -def extract_datetime_component(Column col, str field): + For details, see :cpp:func:`cudf::extract_datetime_component`. - cdef unique_ptr[column] c_result + Parameters + ---------- + values : Column + The column to extract the component from. + component : DatetimeComponent + The datetime component to extract. - with nogil: - if field == "year": - c_result = move(cpp_extract_year(col.view())) - elif field == "month": - c_result = move(cpp_extract_month(col.view())) - elif field == "day": - c_result = move(cpp_extract_day(col.view())) - elif field == "weekday": - c_result = move(cpp_extract_weekday(col.view())) - elif field == "hour": - c_result = move(cpp_extract_hour(col.view())) - elif field == "minute": - c_result = move(cpp_extract_minute(col.view())) - elif field == "second": - c_result = move(cpp_extract_second(col.view())) - elif field == "millisecond": - c_result = move( - cpp_extract_millisecond_fraction(col.view()) - ) - elif field == "microsecond": - c_result = move( - cpp_extract_microsecond_fraction(col.view()) - ) - elif field == "nanosecond": - c_result = move( - cpp_extract_nanosecond_fraction(col.view()) - ) - elif field == "day_of_year": - c_result = move(cpp_day_of_year(col.view())) - else: - raise ValueError(f"Invalid datetime field: '{field}'") + Returns + ------- + Column + Column with the extracted component. + """ + cdef unique_ptr[column] result - return Column.from_libcudf(move(c_result)) + with nogil: + result = move(cpp_extract_datetime_component(values.view(), component)) + return Column.from_libcudf(move(result)) diff --git a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt index 2167616690f..15beaee47d4 100644 --- a/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/libcudf/CMakeLists.txt @@ -12,8 +12,9 @@ # the License. # ============================================================================= -set(cython_sources aggregation.pyx binaryop.pyx copying.pyx expressions.pyx labeling.pyx reduce.pyx - replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx +set(cython_sources + aggregation.pyx binaryop.pyx copying.pyx datetime.pyx expressions.pyx labeling.pyx reduce.pyx + replace.pyx round.pyx stream_compaction.pyx types.pyx unary.pyx ) set(linked_libraries cudf::cudf) diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd index a4465343197..73cdfb96af5 100644 --- a/python/pylibcudf/pylibcudf/libcudf/datetime.pxd +++ b/python/pylibcudf/pylibcudf/libcudf/datetime.pxd @@ -1,5 +1,6 @@ # Copyright (c) 2020-2024, NVIDIA CORPORATION. +from libc.stdint cimport uint8_t from libcpp.memory cimport unique_ptr from pylibcudf.libcudf.column.column cimport column from pylibcudf.libcudf.column.column_view cimport column_view @@ -7,6 +8,18 @@ from pylibcudf.libcudf.scalar.scalar cimport scalar cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: + cpdef enum class datetime_component(uint8_t): + YEAR + MONTH + DAY + WEEKDAY + HOUR + MINUTE + SECOND + MILLISECOND + MICROSECOND + NANOSECOND + cdef unique_ptr[column] extract_year(const column_view& column) except + cdef unique_ptr[column] extract_month(const column_view& column) except + cdef unique_ptr[column] extract_day(const column_view& column) except + @@ -23,6 +36,10 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil: cdef unique_ptr[column] extract_nanosecond_fraction( const column_view& column ) except + + cdef unique_ptr[column] extract_datetime_component( + const column_view& column, + datetime_component component + ) except + ctypedef enum rounding_frequency "cudf::datetime::rounding_frequency": DAY "cudf::datetime::rounding_frequency::DAY" diff --git a/python/pylibcudf/pylibcudf/libcudf/datetime.pyx b/python/pylibcudf/pylibcudf/libcudf/datetime.pyx new file mode 100644 index 00000000000..e69de29bb2d diff --git a/python/pylibcudf/pylibcudf/tests/test_datetime.py b/python/pylibcudf/pylibcudf/tests/test_datetime.py index 89c96829e71..75930d59058 100644 --- a/python/pylibcudf/pylibcudf/tests/test_datetime.py +++ b/python/pylibcudf/pylibcudf/tests/test_datetime.py @@ -1,7 +1,6 @@ # Copyright (c) 2024, NVIDIA CORPORATION. import datetime -import functools import pyarrow as pa import pyarrow.compute as pc @@ -10,19 +9,6 @@ from utils import assert_column_eq -@pytest.fixture -def date_column(has_nulls): - values = [ - datetime.date(1999, 1, 1), - datetime.date(2024, 10, 12), - datetime.date(1, 1, 1), - datetime.date(9999, 1, 1), - ] - if has_nulls: - values[2] = None - return plc.interop.from_arrow(pa.array(values, type=pa.date32())) - - @pytest.fixture(scope="module", params=["s", "ms", "us", "ns"]) def datetime_column(has_nulls, request): values = [ @@ -40,24 +26,35 @@ def datetime_column(has_nulls, request): ) -@pytest.mark.parametrize( - "component, pc_fun", - [ - ("year", pc.year), - ("month", pc.month), - ("day", pc.day), - ("weekday", functools.partial(pc.day_of_week, count_from_zero=False)), - ("hour", pc.hour), - ("minute", pc.minute), - ("second", pc.second), - ("millisecond", pc.millisecond), - ("microsecond", pc.microsecond), - ("nanosecond", pc.nanosecond), +@pytest.fixture( + params=[ + ("year", plc.datetime.DatetimeComponent.YEAR), + ("month", plc.datetime.DatetimeComponent.MONTH), + ("day", plc.datetime.DatetimeComponent.DAY), + ("day_of_week", plc.datetime.DatetimeComponent.WEEKDAY), + ("hour", plc.datetime.DatetimeComponent.HOUR), + ("minute", plc.datetime.DatetimeComponent.MINUTE), + ("second", plc.datetime.DatetimeComponent.SECOND), + ("millisecond", plc.datetime.DatetimeComponent.MILLISECOND), + ("microsecond", plc.datetime.DatetimeComponent.MICROSECOND), + ("nanosecond", plc.datetime.DatetimeComponent.NANOSECOND), ], + ids=lambda x: x[0], ) -def test_extraction(datetime_column, component, pc_fun): +def component(request): + return request.param + + +def test_extract_datetime_component(datetime_column, component): + attr, component = component + kwargs = {} + if attr == "day_of_week": + kwargs = {"count_from_zero": False} got = plc.datetime.extract_datetime_component(datetime_column, component) # libcudf produces an int16, arrow produces an int64 - expect = pc_fun(plc.interop.to_arrow(datetime_column)).cast(pa.int16()) + + expect = getattr(pc, attr)( + plc.interop.to_arrow(datetime_column), **kwargs + ).cast(pa.int16()) assert_column_eq(expect, got)