Skip to content

Commit

Permalink
Merge branch 'branch-24.12' into cudftestutil-header
Browse files Browse the repository at this point in the history
  • Loading branch information
lamarrr authored Oct 7, 2024
2 parents b99b315 + 7e1e475 commit 6d9d8d4
Show file tree
Hide file tree
Showing 51 changed files with 344 additions and 228 deletions.
43 changes: 36 additions & 7 deletions cpp/.clang-tidy
Original file line number Diff line number Diff line change
@@ -1,18 +1,47 @@
---
# Notes on disabled checks
# ------------------------
# modernize-use-equals-default:
# auto-fix is broken (doesn't insert =default correctly)
# modernize-concat-nested-namespaces:
# auto-fix is broken (can delete code)
# modernize-use-trailing-return-type:
# Purely stylistic, no benefit to rewriting everything
# modernize-return-braced-init-list:
# Stylistically we prefer to see the return type at the return site.
# See https://github.com/rapidsai/cudf/pull/16956#pullrequestreview-2341891672
# for more information.
# modernize-use-bool-literals:
# Our tests use int flags for validity masks extensively and we prefer that
# clang-analyzer-cplusplus.NewDeleteLeaks:
# This check has numerous bugs, see
# https://github.com/llvm/llvm-project/issues?q=is%3Aissue+is%3Aopen+newdeleteleaks
# We encounter at least
# https://github.com/llvm/llvm-project/issues/60896
# https://github.com/llvm/llvm-project/issues/69602
# clang-analyzer-optin.core.EnumCastOutOfRange
# We use enums as flags in multiple cases and this check makes ORing flags invalid
# clang-analyzer-optin.cplusplus.UninitializedObject'
# There is an error in nanoarrow that none of the clang-tidy filters (i.e.
# header-filter and exclude-header-filter are able to properly avoid. This
# merits further investigation
#
# We need to verify that broken checks are still broken
Checks:
'modernize-*,
-modernize-use-equals-default,
-modernize-concat-nested-namespaces,
-modernize-use-trailing-return-type,
-modernize-use-bool-literals'

# -modernize-use-equals-default # auto-fix is broken (doesn't insert =default correctly)
# -modernize-concat-nested-namespaces # auto-fix is broken (can delete code)
# -modernize-use-trailing-return-type # just a preference
-modernize-return-braced-init-list,
-modernize-use-bool-literals,
clang-analyzer-*,
-clang-analyzer-cplusplus.NewDeleteLeaks,
-clang-analyzer-optin.core.EnumCastOutOfRange,
-clang-analyzer-optin.cplusplus.UninitializedObject'

WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
HeaderFilterRegex: '.*cudf/cpp/(src|include|tests).*'
ExcludeHeaderFilterRegex: '.*(Message_generated.h|Schema_generated.h|brotli_dict.hpp|unbz2.hpp|cxxopts.hpp).*'
FormatStyle: none
CheckOptions:
- key: modernize-loop-convert.MaxCopySize
Expand Down
8 changes: 5 additions & 3 deletions cpp/cmake/thirdparty/get_nanoarrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -14,15 +14,17 @@

# This function finds nanoarrow and sets any additional necessary environment variables.
function(find_and_configure_nanoarrow)
include(${rapids-cmake-dir}/cpm/package_override.cmake)

set(cudf_patch_dir "${CMAKE_CURRENT_FUNCTION_LIST_DIR}/patches")
rapids_cpm_package_override("${cudf_patch_dir}/nanoarrow_override.json")

# Currently we need to always build nanoarrow so we don't pickup a previous installed version
set(CPM_DOWNLOAD_nanoarrow ON)
rapids_cpm_find(
nanoarrow 0.6.0.dev
GLOBAL_TARGETS nanoarrow
CPM_ARGS
GIT_REPOSITORY https://github.com/apache/arrow-nanoarrow.git
GIT_TAG 1e2664a70ec14907409cadcceb14d79b9670bcdb
GIT_SHALLOW FALSE
OPTIONS "BUILD_SHARED_LIBS OFF" "NANOARROW_NAMESPACE cudf"
)
set_target_properties(nanoarrow PROPERTIES POSITION_INDEPENDENT_CODE ON)
Expand Down
38 changes: 38 additions & 0 deletions cpp/cmake/thirdparty/patches/nanoarrow_clang_tidy_compliance.diff
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
diff --git a/src/nanoarrow/common/inline_buffer.h b/src/nanoarrow/common/inline_buffer.h
index caa6be4..70ec8a2 100644
--- a/src/nanoarrow/common/inline_buffer.h
+++ b/src/nanoarrow/common/inline_buffer.h
@@ -347,7 +347,7 @@ static inline void _ArrowBitsUnpackInt32(const uint8_t word, int32_t* out) {
}

static inline void _ArrowBitmapPackInt8(const int8_t* values, uint8_t* out) {
- *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) |
+ *out = (uint8_t)(values[0] | ((values[1] + 0x1) & 0x2) | ((values[2] + 0x3) & 0x4) | // NOLINT
((values[3] + 0x7) & 0x8) | ((values[4] + 0xf) & 0x10) |
((values[5] + 0x1f) & 0x20) | ((values[6] + 0x3f) & 0x40) |
((values[7] + 0x7f) & 0x80));
@@ -471,13 +471,13 @@ static inline void ArrowBitsSetTo(uint8_t* bits, int64_t start_offset, int64_t l
// set bits within a single byte
const uint8_t only_byte_mask =
i_end % 8 == 0 ? first_byte_mask : (uint8_t)(first_byte_mask | last_byte_mask);
- bits[bytes_begin] &= only_byte_mask;
+ bits[bytes_begin] &= only_byte_mask; // NOLINT
bits[bytes_begin] |= (uint8_t)(fill_byte & ~only_byte_mask);
return;
}

// set/clear trailing bits of first byte
- bits[bytes_begin] &= first_byte_mask;
+ bits[bytes_begin] &= first_byte_mask; // NOLINT
bits[bytes_begin] |= (uint8_t)(fill_byte & ~first_byte_mask);

if (bytes_end - bytes_begin > 2) {
@@ -637,7 +637,7 @@ static inline void ArrowBitmapAppendInt8Unsafe(struct ArrowBitmap* bitmap,
n_remaining -= n_full_bytes * 8;
if (n_remaining > 0) {
// Zero out the last byte
- *out_cursor = 0x00;
+ *out_cursor = 0x00; // NOLINT
for (int i = 0; i < n_remaining; i++) {
ArrowBitSetTo(bitmap->buffer.data, out_i_cursor++, values_cursor[i]);
}
18 changes: 18 additions & 0 deletions cpp/cmake/thirdparty/patches/nanoarrow_override.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@

{
"packages" : {
"nanoarrow" : {
"version" : "0.6.0.dev",
"git_url" : "https://github.com/apache/arrow-nanoarrow.git",
"git_tag" : "1e2664a70ec14907409cadcceb14d79b9670bcdb",
"git_shallow" : false,
"patches" : [
{
"file" : "${current_json_dir}/nanoarrow_clang_tidy_compliance.diff",
"issue" : "https://github.com/apache/arrow-nanoarrow/issues/537",
"fixed_in" : ""
}
]
}
}
}
2 changes: 1 addition & 1 deletion cpp/include/cudf/table/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -148,7 +148,7 @@ class table {
std::vector<column_view> columns(std::distance(begin, end));
std::transform(
begin, end, columns.begin(), [this](auto index) { return _columns.at(index)->view(); });
return table_view(columns);
return table_view{columns};
}

/**
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/table/table_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -241,7 +241,7 @@ class table_view : public detail::table_view_base<column_view> {
{
std::vector<column_view> columns(std::distance(begin, end));
std::transform(begin, end, columns.begin(), [this](auto index) { return this->column(index); });
return table_view(columns);
return table_view{columns};
}

/**
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/avro/avro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include "avro.hpp"

#include <array>
#include <cstring>
#include <unordered_map>

Expand Down Expand Up @@ -302,7 +303,7 @@ bool schema_parser::parse(std::vector<schema_entry>& schema, std::string const&
// Empty schema
if (json_str == "[]") return true;

char depthbuf[MAX_SCHEMA_DEPTH];
std::array<char, MAX_SCHEMA_DEPTH> depthbuf;
int depth = 0, parent_idx = -1, entry_idx = -1;
json_state_e state = state_attrname;
std::string str;
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/io/orc/orc.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -258,7 +258,7 @@ class ProtobufReader {

private:
template <int index>
friend class FunctionSwitchImpl;
friend struct FunctionSwitchImpl;

void skip_bytes(size_t bytecnt)
{
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/io/parquet/compact_protocol_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,8 @@ class parquet_field_string : public parquet_field {
* @return True if field types mismatch or if the process of reading a
* string fails
*/
struct parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
class parquet_field_string_list : public parquet_field_list<std::string, FieldType::BINARY> {
public:
parquet_field_string_list(int f, std::vector<std::string>& v) : parquet_field_list(f, v)
{
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
Expand Down Expand Up @@ -396,8 +397,9 @@ class parquet_field_binary : public parquet_field {
* @return True if field types mismatch or if the process of reading a
* binary fails
*/
struct parquet_field_binary_list
class parquet_field_binary_list
: public parquet_field_list<std::vector<uint8_t>, FieldType::BINARY> {
public:
parquet_field_binary_list(int f, std::vector<std::vector<uint8_t>>& v) : parquet_field_list(f, v)
{
auto const read_value = [&val = v](uint32_t i, CompactProtocolReader* cpr) {
Expand Down
6 changes: 4 additions & 2 deletions cpp/src/io/utilities/data_sink.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,8 @@ class file_sink : public data_sink {
}
}

~file_sink() override { flush(); }
// Marked as NOLINT because we are calling a virtual method in the destructor
~file_sink() override { flush(); } // NOLINT

void host_write(void const* data, size_t size) override
{
Expand Down Expand Up @@ -114,7 +115,8 @@ class host_buffer_sink : public data_sink {
public:
explicit host_buffer_sink(std::vector<char>* buffer) : buffer_(buffer) {}

~host_buffer_sink() override { flush(); }
// Marked as NOLINT because we are calling a virtual method in the destructor
~host_buffer_sink() override { flush(); } // NOLINT

void host_write(void const* data, size_t size) override
{
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/io/utilities/hostdevice_span.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,8 +43,8 @@ class hostdevice_span {
template <typename C,
// Only supported containers of types convertible to T
std::enable_if_t<std::is_convertible_v<
std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
T (*)[]>>* = nullptr>
std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[], // NOLINT
T (*)[]>>* = nullptr> // NOLINT
constexpr hostdevice_span(C& in) : hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
{
}
Expand All @@ -54,8 +54,8 @@ class hostdevice_span {
template <typename C,
// Only supported containers of types convertible to T
std::enable_if_t<std::is_convertible_v<
std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[],
T (*)[]>>* = nullptr>
std::remove_pointer_t<decltype(std::declval<C&>().host_ptr())> (*)[], // NOLINT
T (*)[]>>* = nullptr> // NOLINT
constexpr hostdevice_span(C const& in)
: hostdevice_span(in.host_ptr(), in.device_ptr(), in.size())
{
Expand Down
13 changes: 11 additions & 2 deletions cpp/src/utilities/host_memory.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -115,12 +115,19 @@ class fixed_pinned_pool_memory_resource {
return !operator==(other);
}

friend void get_property(fixed_pinned_pool_memory_resource const&,
// clang-tidy will complain about this function because it is completely
// unused at runtime and only exist for tag introspection by CCCL, so we
// ignore linting. This masks a real issue if we ever want to compile with
// clang, though, which is that the function will actually be compiled out by
// clang. If cudf were ever to try to support clang as a compile we would
// need to force the compiler to emit this symbol. The same goes for the
// other get_property definitions in this file.
friend void get_property(fixed_pinned_pool_memory_resource const&, // NOLINT
cuda::mr::device_accessible) noexcept
{
}

friend void get_property(fixed_pinned_pool_memory_resource const&,
friend void get_property(fixed_pinned_pool_memory_resource const&, // NOLINT
cuda::mr::host_accessible) noexcept
{
}
Expand Down Expand Up @@ -235,7 +242,9 @@ class new_delete_memory_resource {

bool operator!=(new_delete_memory_resource const& other) const { return !operator==(other); }

// NOLINTBEGIN
friend void get_property(new_delete_memory_resource const&, cuda::mr::host_accessible) noexcept {}
// NOLINTEND
};

static_assert(cuda::mr::resource_with<new_delete_memory_resource, cuda::mr::host_accessible>,
Expand Down
6 changes: 5 additions & 1 deletion cpp/tests/binaryop/binop-compiled-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -557,7 +557,11 @@ auto NullOp_Result(cudf::column_view lhs, cudf::column_view rhs)
std::transform(thrust::make_counting_iterator(0),
thrust::make_counting_iterator(lhs.size()),
result.begin(),
[&lhs_data, &lhs_mask, &rhs_data, &rhs_mask, &result_mask](auto i) -> TypeOut {
[&lhs_data = lhs_data,
&lhs_mask = lhs_mask,
&rhs_data = rhs_data,
&rhs_mask = rhs_mask,
&result_mask = result_mask](auto i) -> TypeOut {
auto lhs_valid = lhs_mask.data() and cudf::bit_is_set(lhs_mask.data(), i);
auto rhs_valid = rhs_mask.data() and cudf::bit_is_set(rhs_mask.data(), i);
bool output_valid = lhs_valid or rhs_valid;
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/binaryop/util/operation.h
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ struct Mul {
std::enable_if_t<(cudf::is_duration_t<LhsT>::value && std::is_integral_v<RhsT>) ||
(cudf::is_duration_t<RhsT>::value && std::is_integral_v<LhsT>),
void>* = nullptr>
OutT DurationProduct(LhsT x, RhsT y) const
[[nodiscard]] OutT DurationProduct(LhsT x, RhsT y) const
{
return x * y;
}
Expand Down Expand Up @@ -128,7 +128,7 @@ struct Div {
typename LhsT,
typename RhsT,
std::enable_if_t<(std::is_integral_v<RhsT> || cudf::is_duration<RhsT>()), void>* = nullptr>
OutT DurationDivide(LhsT x, RhsT y) const
[[nodiscard]] OutT DurationDivide(LhsT x, RhsT y) const
{
return x / y;
}
Expand Down
4 changes: 2 additions & 2 deletions cpp/tests/column/column_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -340,7 +340,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorNoMask)

cudf::column moved_to{std::move(original)};

EXPECT_EQ(0, original.size());
EXPECT_EQ(0, original.size()); // NOLINT
EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());

verify_column_views(moved_to);
Expand All @@ -359,7 +359,7 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask)
cudf::column moved_to{std::move(original)};
verify_column_views(moved_to);

EXPECT_EQ(0, original.size());
EXPECT_EQ(0, original.size()); // NOLINT
EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, original.type());

// Verify move
Expand Down
12 changes: 7 additions & 5 deletions cpp/tests/copying/slice_tests.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
#include <cudf/utilities/type_dispatcher.hpp>
#include <cudf/wrappers/timestamps.hpp>

#include <array>
#include <stdexcept>
#include <string>
#include <vector>
Expand Down Expand Up @@ -370,11 +371,12 @@ TEST_F(SliceStringTableTest, StringWithNulls)
auto valids =
cudf::detail::make_counting_transform_iterator(0, [](auto i) { return i % 2 == 0; });

std::vector<std::string> strings[2] = {
{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
{"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}};
cudf::test::strings_column_wrapper sw[2] = {{strings[0].begin(), strings[0].end(), valids},
{strings[1].begin(), strings[1].end(), valids}};
std::vector<std::vector<std::string>> strings{
{{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"},
{"", "one", "two", "three", "four", "five", "six", "seven", "eight", "nine"}}};
std::array<cudf::test::strings_column_wrapper, 2> sw{
{{strings[0].begin(), strings[0].end(), valids},
{strings[1].begin(), strings[1].end(), valids}}};

std::vector<std::unique_ptr<cudf::column>> scols;
scols.push_back(sw[0].release());
Expand Down
Loading

0 comments on commit 6d9d8d4

Please sign in to comment.