From c016b58b24e63468e9110a6ca82adfc5fd61202d Mon Sep 17 00:00:00 2001 From: Bradley Dice Date: Tue, 19 Sep 2023 07:50:20 -0500 Subject: [PATCH] Update to clang 16.0.6. (#14120) This PR updates cudf to use clang 16.0.6. The previous version 16.0.1 has some minor formatting issues affecting several RAPIDS repos. Authors: - Bradley Dice (https://github.com/bdice) Approvers: - Mark Harris (https://github.com/harrism) - David Wendt (https://github.com/davidwendt) URL: https://github.com/rapidsai/cudf/pull/14120 --- .pre-commit-config.yaml | 2 +- cpp/benchmarks/iterator/iterator.cu | 2 +- .../stream_compaction/apply_boolean_mask.cpp | 4 +- cpp/benchmarks/string/char_types.cpp | 2 +- cpp/benchmarks/string/extract.cpp | 2 +- .../cudf/column/column_device_view.cuh | 2 +- cpp/include/cudf/detail/copy_if.cuh | 2 +- cpp/include/cudf/detail/indexalator.cuh | 4 +- cpp/include/cudf/detail/join.hpp | 4 +- cpp/include/cudf/fixed_point/fixed_point.hpp | 2 +- cpp/include/cudf/groupby.hpp | 4 +- cpp/include/cudf/io/csv.hpp | 2 +- cpp/include/cudf/io/json.hpp | 2 +- cpp/include/cudf/strings/detail/utf8.hpp | 36 ++-- cpp/include/cudf/table/row_operators.cuh | 4 +- cpp/include/cudf/table/table_view.hpp | 2 +- cpp/include/cudf/wrappers/dictionary.hpp | 2 +- cpp/include/cudf_test/base_fixture.hpp | 4 +- cpp/include/nvtext/subword_tokenize.hpp | 2 +- cpp/scripts/run-clang-tidy.py | 2 +- cpp/src/copying/contiguous_split.cu | 8 +- cpp/src/groupby/sort/functors.hpp | 10 +- cpp/src/io/avro/avro_gpu.cu | 2 +- cpp/src/io/comp/cpu_unbz2.cpp | 2 +- cpp/src/io/comp/debrotli.cu | 4 +- cpp/src/io/comp/gpuinflate.cu | 18 +- cpp/src/io/comp/uncomp.cpp | 10 +- cpp/src/io/comp/unsnap.cu | 2 +- cpp/src/io/json/json_column.cu | 2 +- cpp/src/io/json/nested_json_gpu.cu | 160 +++++++++--------- cpp/src/io/orc/orc_gpu.hpp | 2 +- cpp/src/io/orc/stripe_data.cu | 4 +- .../io/parquet/compact_protocol_reader.cpp | 2 +- .../io/parquet/compact_protocol_writer.cpp | 2 +- cpp/src/io/parquet/delta_binary.cuh | 20 +-- cpp/src/io/parquet/page_delta_decode.cu | 2 +- cpp/src/io/parquet/parquet.hpp | 4 +- cpp/src/io/parquet/parquet_gpu.hpp | 22 +-- cpp/src/io/parquet/reader_impl_preprocess.cu | 2 +- cpp/src/join/join.cu | 4 +- .../quantiles/tdigest/tdigest_aggregation.cu | 2 +- .../rolling/detail/rolling_collect_list.cuh | 2 +- cpp/src/strings/char_types/char_types.cu | 4 +- cpp/src/strings/convert/convert_datetime.cu | 6 +- cpp/src/strings/convert/convert_durations.cu | 2 +- cpp/src/strings/convert/convert_floats.cu | 6 +- cpp/src/strings/convert/convert_integers.cu | 2 +- cpp/src/strings/convert/convert_ipv4.cu | 2 +- cpp/src/strings/convert/convert_urls.cu | 4 +- cpp/src/strings/json/json_path.cu | 2 +- cpp/src/strings/regex/regcomp.cpp | 14 +- cpp/src/strings/regex/regcomp.h | 8 +- cpp/src/strings/regex/regex.cuh | 18 +- cpp/src/strings/regex/regex.inl | 10 +- cpp/src/strings/replace/replace_re.cu | 2 +- cpp/src/strings/split/partition.cu | 2 +- cpp/src/strings/split/split.cuh | 2 +- cpp/src/strings/split/split_re.cu | 2 +- cpp/src/strings/utilities.cu | 6 +- cpp/src/text/normalize.cu | 4 +- cpp/src/text/replace.cu | 2 +- cpp/src/text/subword/bpe_tokenizer.cu | 2 +- cpp/src/text/subword/load_merges_file.cu | 2 +- cpp/src/text/utilities/tokenize_ops.cuh | 2 +- cpp/tests/groupby/merge_lists_tests.cpp | 2 +- cpp/tests/groupby/merge_sets_tests.cpp | 12 +- cpp/tests/io/parquet_test.cpp | 6 +- cpp/tests/lists/reverse_tests.cpp | 8 +- .../difference_distinct_tests.cpp | 2 +- .../intersect_distinct_tests.cpp | 4 +- .../set_operations/union_distinct_tests.cpp | 4 +- .../stream_compaction/distinct_tests.cpp | 10 +- .../reshape/interleave_columns_tests.cpp | 2 +- .../rolling/range_rolling_window_test.cpp | 2 +- cpp/tests/sort/segmented_sort_tests.cpp | 2 +- cpp/tests/strings/chars_types_tests.cpp | 12 +- cpp/tests/strings/durations_tests.cpp | 8 +- cpp/tests/utilities/column_utilities.cu | 2 +- 78 files changed, 276 insertions(+), 276 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 238e5b44030..7e44091774f 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -63,7 +63,7 @@ repos: # Explicitly specify the pyproject.toml at the repo root, not per-project. args: ["--config=pyproject.toml"] - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.1 + rev: v16.0.6 hooks: - id: clang-format types_or: [c, c++, cuda] diff --git a/cpp/benchmarks/iterator/iterator.cu b/cpp/benchmarks/iterator/iterator.cu index 7acf24c30a5..dcd13cf62c4 100644 --- a/cpp/benchmarks/iterator/iterator.cu +++ b/cpp/benchmarks/iterator/iterator.cu @@ -145,7 +145,7 @@ void BM_iterator(benchmark::State& state) cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0 if (cub_or_thrust) { if (raw_or_iterator) { - raw_stream_bench_cub(hasnull_F, dev_result); // driven by raw pointer + raw_stream_bench_cub(hasnull_F, dev_result); // driven by raw pointer } else { iterator_bench_cub(hasnull_F, dev_result); // driven by riterator without nulls } diff --git a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp index a6feaf04842..f78aa9fa654 100644 --- a/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp +++ b/cpp/benchmarks/stream_compaction/apply_boolean_mask.cpp @@ -59,8 +59,8 @@ void calculate_bandwidth(benchmark::State& state, cudf::size_type num_columns) int64_t const column_bytes_in = column_bytes_out; // we only read unmasked inputs int64_t const bytes_read = - (column_bytes_in + validity_bytes_in) * num_columns + // reading columns - mask_size; // reading boolean mask + (column_bytes_in + validity_bytes_in) * num_columns + // reading columns + mask_size; // reading boolean mask int64_t const bytes_written = (column_bytes_out + validity_bytes_out) * num_columns; // writing columns diff --git a/cpp/benchmarks/string/char_types.cpp b/cpp/benchmarks/string/char_types.cpp index 8e9e595fcef..59e6245fd41 100644 --- a/cpp/benchmarks/string/char_types.cpp +++ b/cpp/benchmarks/string/char_types.cpp @@ -43,7 +43,7 @@ static void bench_char_types(nvbench::state& state) state.set_cuda_stream(nvbench::make_cuda_stream_view(cudf::get_default_stream().value())); // gather some throughput statistics as well auto chars_size = input.chars_size(); - state.add_global_memory_reads(chars_size); // all bytes are read; + state.add_global_memory_reads(chars_size); // all bytes are read; if (api_type == "all") { state.add_global_memory_writes(num_rows); // output is a bool8 per row } else { diff --git a/cpp/benchmarks/string/extract.cpp b/cpp/benchmarks/string/extract.cpp index 9e67c5a5b52..135dadabbe4 100644 --- a/cpp/benchmarks/string/extract.cpp +++ b/cpp/benchmarks/string/extract.cpp @@ -43,7 +43,7 @@ static void bench_extract(nvbench::state& state) std::uniform_int_distribution words_dist(0, 999); std::vector samples(100); // 100 unique rows of data to reuse std::generate(samples.begin(), samples.end(), [&]() { - std::string row; // build a row of random tokens + std::string row; // build a row of random tokens while (static_cast(row.size()) < row_width) { row += std::to_string(words_dist(generator)) + " "; } diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 05ef21bd750..35851a99822 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -1393,7 +1393,7 @@ struct pair_accessor { */ template struct pair_rep_accessor { - column_device_view const col; ///< column view of column in device + column_device_view const col; ///< column view of column in device using rep_type = device_storage_type_t; ///< representation type diff --git a/cpp/include/cudf/detail/copy_if.cuh b/cpp/include/cudf/detail/copy_if.cuh index 1dd91dcd865..ebe7e052b6d 100644 --- a/cpp/include/cudf/detail/copy_if.cuh +++ b/cpp/include/cudf/detail/copy_if.cuh @@ -133,7 +133,7 @@ __launch_bounds__(block_size) __global__ if (has_validity) { temp_valids[threadIdx.x] = false; // init shared memory if (threadIdx.x < cudf::detail::warp_size) temp_valids[block_size + threadIdx.x] = false; - __syncthreads(); // wait for init + __syncthreads(); // wait for init } if (mask_true) { diff --git a/cpp/include/cudf/detail/indexalator.cuh b/cpp/include/cudf/detail/indexalator.cuh index 0ab9da0dbd0..4731c4919e3 100644 --- a/cpp/include/cudf/detail/indexalator.cuh +++ b/cpp/include/cudf/detail/indexalator.cuh @@ -248,7 +248,7 @@ struct input_indexalator : base_indexalator { friend struct indexalator_factory; friend struct base_indexalator; // for CRTP - using reference = size_type const; // this keeps STL and thrust happy + using reference = size_type const; // this keeps STL and thrust happy input_indexalator() = default; input_indexalator(input_indexalator const&) = default; @@ -332,7 +332,7 @@ struct output_indexalator : base_indexalator { friend struct indexalator_factory; friend struct base_indexalator; // for CRTP - using reference = output_indexalator const&; // required for output iterators + using reference = output_indexalator const&; // required for output iterators output_indexalator() = default; output_indexalator(output_indexalator const&) = default; diff --git a/cpp/include/cudf/detail/join.hpp b/cpp/include/cudf/detail/join.hpp index 6fcf10aef57..b69632c83ca 100644 --- a/cpp/include/cudf/detail/join.hpp +++ b/cpp/include/cudf/detail/join.hpp @@ -78,8 +78,8 @@ struct hash_join { cudf::null_equality const _nulls_equal; ///< whether to consider nulls as equal cudf::table_view _build; ///< input table to build the hash map std::shared_ptr - _preprocessed_build; ///< input table preprocssed for row operators - map_type _hash_table; ///< hash table built on `_build` + _preprocessed_build; ///< input table preprocssed for row operators + map_type _hash_table; ///< hash table built on `_build` public: /** diff --git a/cpp/include/cudf/fixed_point/fixed_point.hpp b/cpp/include/cudf/fixed_point/fixed_point.hpp index 7c59c2f9194..13d8716c1df 100644 --- a/cpp/include/cudf/fixed_point/fixed_point.hpp +++ b/cpp/include/cudf/fixed_point/fixed_point.hpp @@ -829,5 +829,5 @@ using decimal32 = fixed_point; ///< 32-bit decima using decimal64 = fixed_point; ///< 64-bit decimal fixed point using decimal128 = fixed_point<__int128_t, Radix::BASE_10>; ///< 128-bit decimal fixed point -/** @} */ // end of group +/** @} */ // end of group } // namespace numeric diff --git a/cpp/include/cudf/groupby.hpp b/cpp/include/cudf/groupby.hpp index 6e575685daa..1c31e8777a8 100644 --- a/cpp/include/cudf/groupby.hpp +++ b/cpp/include/cudf/groupby.hpp @@ -386,8 +386,8 @@ class groupby { ///< indicates null order ///< of each column std::unique_ptr - _helper; ///< Helper object - ///< used by sort based implementation + _helper; ///< Helper object + ///< used by sort based implementation /** * @brief Get the sort helper object diff --git a/cpp/include/cudf/io/csv.hpp b/cpp/include/cudf/io/csv.hpp index c84ca7e6c73..b49a13a8ea9 100644 --- a/cpp/include/cudf/io/csv.hpp +++ b/cpp/include/cudf/io/csv.hpp @@ -213,7 +213,7 @@ class csv_reader_options { auto const max_row_bytes = 16 * 1024; // 16KB auto const column_bytes = 64; - auto const base_padding = 1024; // 1KB + auto const base_padding = 1024; // 1KB if (num_columns == 0) { // Use flat size if the number of columns is not known diff --git a/cpp/include/cudf/io/json.hpp b/cpp/include/cudf/io/json.hpp index 15dc2a614ad..d408d249a7f 100644 --- a/cpp/include/cudf/io/json.hpp +++ b/cpp/include/cudf/io/json.hpp @@ -207,7 +207,7 @@ class json_reader_options { auto const max_row_bytes = 16 * 1024; // 16KB auto const column_bytes = 64; - auto const base_padding = 1024; // 1KB + auto const base_padding = 1024; // 1KB if (num_columns == 0) { // Use flat size if the number of columns is not known diff --git a/cpp/include/cudf/strings/detail/utf8.hpp b/cpp/include/cudf/strings/detail/utf8.hpp index df8e2885782..e04572535de 100644 --- a/cpp/include/cudf/strings/detail/utf8.hpp +++ b/cpp/include/cudf/strings/detail/utf8.hpp @@ -155,18 +155,18 @@ constexpr inline size_type from_char_utf8(char_utf8 character, char* str) constexpr uint32_t utf8_to_codepoint(cudf::char_utf8 utf8_char) { uint32_t unchr = 0; - if (utf8_char < 0x0000'0080) // single-byte pass thru + if (utf8_char < 0x0000'0080) // single-byte pass thru unchr = utf8_char; - else if (utf8_char < 0x0000'E000) // two bytes + else if (utf8_char < 0x0000'E000) // two bytes { - unchr = (utf8_char & 0x1F00) >> 2; // shift and - unchr |= (utf8_char & 0x003F); // unmask - } else if (utf8_char < 0x00F0'0000) // three bytes + unchr = (utf8_char & 0x1F00) >> 2; // shift and + unchr |= (utf8_char & 0x003F); // unmask + } else if (utf8_char < 0x00F0'0000) // three bytes { - unchr = (utf8_char & 0x0F'0000) >> 4; // get upper 4 bits - unchr |= (utf8_char & 0x00'3F00) >> 2; // shift and - unchr |= (utf8_char & 0x00'003F); // unmask - } else if (utf8_char <= 0xF800'0000u) // four bytes + unchr = (utf8_char & 0x0F'0000) >> 4; // get upper 4 bits + unchr |= (utf8_char & 0x00'3F00) >> 2; // shift and + unchr |= (utf8_char & 0x00'003F); // unmask + } else if (utf8_char <= 0xF800'0000u) // four bytes { unchr = (utf8_char & 0x0300'0000) >> 6; // upper 3 bits unchr |= (utf8_char & 0x003F'0000) >> 4; // next 6 bits @@ -185,20 +185,20 @@ constexpr uint32_t utf8_to_codepoint(cudf::char_utf8 utf8_char) constexpr cudf::char_utf8 codepoint_to_utf8(uint32_t unchr) { cudf::char_utf8 utf8 = 0; - if (unchr < 0x0000'0080) // single byte utf8 + if (unchr < 0x0000'0080) // single byte utf8 utf8 = unchr; - else if (unchr < 0x0000'0800) // double byte utf8 + else if (unchr < 0x0000'0800) // double byte utf8 { - utf8 = (unchr << 2) & 0x1F00; // shift bits for - utf8 |= (unchr & 0x3F); // utf8 encoding + utf8 = (unchr << 2) & 0x1F00; // shift bits for + utf8 |= (unchr & 0x3F); // utf8 encoding utf8 |= 0x0000'C080; - } else if (unchr < 0x0001'0000) // triple byte utf8 + } else if (unchr < 0x0001'0000) // triple byte utf8 { - utf8 = (unchr << 4) & 0x0F'0000; // upper 4 bits - utf8 |= (unchr << 2) & 0x00'3F00; // next 6 bits - utf8 |= (unchr & 0x3F); // last 6 bits + utf8 = (unchr << 4) & 0x0F'0000; // upper 4 bits + utf8 |= (unchr << 2) & 0x00'3F00; // next 6 bits + utf8 |= (unchr & 0x3F); // last 6 bits utf8 |= 0x00E0'8080; - } else if (unchr < 0x0011'0000) // quadruple byte utf8 + } else if (unchr < 0x0011'0000) // quadruple byte utf8 { utf8 = (unchr << 6) & 0x0700'0000; // upper 3 bits utf8 |= (unchr << 4) & 0x003F'0000; // next 6 bits diff --git a/cpp/include/cudf/table/row_operators.cuh b/cpp/include/cudf/table/row_operators.cuh index 599a85c8a54..4806f96c934 100644 --- a/cpp/include/cudf/table/row_operators.cuh +++ b/cpp/include/cudf/table/row_operators.cuh @@ -105,9 +105,9 @@ inline __device__ auto null_compare(bool lhs_is_null, bool rhs_is_null, null_ord { if (lhs_is_null and rhs_is_null) { // null (dictionary_wrapper const& lhs, using dictionary32 = dictionary_wrapper; ///< 32-bit integer indexed dictionary wrapper -/** @} */ // end of group +/** @} */ // end of group } // namespace cudf diff --git a/cpp/include/cudf_test/base_fixture.hpp b/cpp/include/cudf_test/base_fixture.hpp index b622d7c6b78..06aabbe4e9c 100644 --- a/cpp/include/cudf_test/base_fixture.hpp +++ b/cpp/include/cudf_test/base_fixture.hpp @@ -331,9 +331,9 @@ inline auto parse_cudf_test_opts(int argc, char** argv) cxxopts::Options options(argv[0], " - cuDF tests command line options"); char const* env_rmm_mode = std::getenv("GTEST_CUDF_RMM_MODE"); // Overridden by CLI options char const* env_stream_mode = - std::getenv("GTEST_CUDF_STREAM_MODE"); // Overridden by CLI options + std::getenv("GTEST_CUDF_STREAM_MODE"); // Overridden by CLI options char const* env_stream_error_mode = - std::getenv("GTEST_CUDF_STREAM_ERROR_MODE"); // Overridden by CLI options + std::getenv("GTEST_CUDF_STREAM_ERROR_MODE"); // Overridden by CLI options auto default_rmm_mode = env_rmm_mode ? env_rmm_mode : "pool"; auto default_stream_mode = env_stream_mode ? env_stream_mode : "default"; auto default_stream_error_mode = env_stream_error_mode ? env_stream_error_mode : "error"; diff --git a/cpp/include/nvtext/subword_tokenize.hpp b/cpp/include/nvtext/subword_tokenize.hpp index ac75f5e9147..72a899d70b4 100644 --- a/cpp/include/nvtext/subword_tokenize.hpp +++ b/cpp/include/nvtext/subword_tokenize.hpp @@ -44,7 +44,7 @@ struct hashed_vocabulary { std::unique_ptr bin_offsets; ///< uint16 column, containing the start index of each ///< bin in the flattened hash table std::unique_ptr - cp_metadata; ///< uint32 column, The code point metadata table to use for normalization + cp_metadata; ///< uint32 column, The code point metadata table to use for normalization std::unique_ptr aux_cp_table; ///< uint64 column, The auxiliary code point table to use for normalization }; diff --git a/cpp/scripts/run-clang-tidy.py b/cpp/scripts/run-clang-tidy.py index a617a4c0df7..e5e57dbf562 100644 --- a/cpp/scripts/run-clang-tidy.py +++ b/cpp/scripts/run-clang-tidy.py @@ -22,7 +22,7 @@ import shutil -EXPECTED_VERSION = "16.0.1" +EXPECTED_VERSION = "16.0.6" VERSION_REGEX = re.compile(r" LLVM version ([0-9.]+)") GPU_ARCH_REGEX = re.compile(r"sm_(\d+)") SPACES = re.compile(r"\s+") diff --git a/cpp/src/copying/contiguous_split.cu b/cpp/src/copying/contiguous_split.cu index e1a55ec5419..5ea56a05dcb 100644 --- a/cpp/src/copying/contiguous_split.cu +++ b/cpp/src/copying/contiguous_split.cu @@ -114,8 +114,8 @@ struct dst_buf_info { int bit_shift; // # of bits to shift right by (for validity buffers) size_type valid_count; // validity count for this block of work - int src_buf_index; // source buffer index - int dst_buf_index; // destination buffer index + int src_buf_index; // source buffer index + int dst_buf_index; // destination buffer index }; /** @@ -1384,7 +1384,7 @@ struct chunk_iteration_state { std::size_t starting_batch; ///< Starting batch index for the current iteration std::vector const h_num_buffs_per_iteration; ///< The count of batches per iteration std::vector const - h_size_of_buffs_per_iteration; ///< The size in bytes per iteration + h_size_of_buffs_per_iteration; ///< The size in bytes per iteration }; std::unique_ptr chunk_iteration_state::create( @@ -1989,7 +1989,7 @@ struct contiguous_split_state { // This can be 1 if `contiguous_split` is just packing and not splitting std::size_t const num_partitions; ///< The number of partitions to produce - size_type const num_src_bufs; ///< Number of source buffers including children + size_type const num_src_bufs; ///< Number of source buffers including children std::size_t const num_bufs; ///< Number of source buffers including children * number of splits diff --git a/cpp/src/groupby/sort/functors.hpp b/cpp/src/groupby/sort/functors.hpp index c378ac99727..be36956b929 100644 --- a/cpp/src/groupby/sort/functors.hpp +++ b/cpp/src/groupby/sort/functors.hpp @@ -94,12 +94,12 @@ struct store_result_functor { }; protected: - sort::sort_groupby_helper& helper; ///< Sort helper - cudf::detail::result_cache& cache; ///< cache of results to store into - column_view const& values; ///< Column of values to group and aggregate + sort::sort_groupby_helper& helper; ///< Sort helper + cudf::detail::result_cache& cache; ///< cache of results to store into + column_view const& values; ///< Column of values to group and aggregate - rmm::cuda_stream_view stream; ///< CUDA stream on which to execute kernels - rmm::mr::device_memory_resource* mr; ///< Memory resource to allocate space for results + rmm::cuda_stream_view stream; ///< CUDA stream on which to execute kernels + rmm::mr::device_memory_resource* mr; ///< Memory resource to allocate space for results sorted keys_are_sorted; ///< Whether the keys are sorted std::unique_ptr sorted_values; ///< Memoised grouped and sorted values diff --git a/cpp/src/io/avro/avro_gpu.cu b/cpp/src/io/avro/avro_gpu.cu index 2c634d9b590..365f6d6875c 100644 --- a/cpp/src/io/avro/avro_gpu.cu +++ b/cpp/src/io/avro/avro_gpu.cu @@ -303,7 +303,7 @@ avro_decode_row(schemadesc_s const* schema, // If within an array, check if we reached the last item if (array_repeat_count != 0 && array_children <= 0 && cur < end) { if (!--array_repeat_count) { - i = array_start; // Restart at the array parent + i = array_start; // Restart at the array parent } else { i = array_start + 1; // Restart after the array parent array_children = schema[array_start].count; diff --git a/cpp/src/io/comp/cpu_unbz2.cpp b/cpp/src/io/comp/cpu_unbz2.cpp index 7159ff30d7c..a116335b254 100644 --- a/cpp/src/io/comp/cpu_unbz2.cpp +++ b/cpp/src/io/comp/cpu_unbz2.cpp @@ -216,7 +216,7 @@ int32_t bz2_decompress_block(unbz_state_s* s) s->currBlockNo++; - skipbits(s, 32); // block CRC + skipbits(s, 32); // block CRC if (getbits(s, 1)) return BZ_DATA_ERROR; // blockRandomized not supported (old bzip versions) diff --git a/cpp/src/io/comp/debrotli.cu b/cpp/src/io/comp/debrotli.cu index 542ca031b7c..8bafd054bdb 100644 --- a/cpp/src/io/comp/debrotli.cu +++ b/cpp/src/io/comp/debrotli.cu @@ -121,7 +121,7 @@ __inline__ __device__ int brotli_context(int p1, int p2, int lut) struct huff_scratch_s { uint16_t code_length_histo[16]; uint8_t code_length_code_lengths[brotli_code_length_codes]; - int8_t offset[6]; // offsets in sorted table for each length + int8_t offset[6]; // offsets in sorted table for each length uint16_t lenvlctab[32]; uint16_t sorted[brotli_code_length_codes]; // symbols sorted by code length int16_t next_symbol[32]; @@ -1298,7 +1298,7 @@ static __device__ void InverseMoveToFrontTransform(debrotli_state_s* s, uint8_t* // Reinitialize elements that could have been changed. uint32_t i = 1; uint32_t upper_bound = s->mtf_upper_bound; - uint32_t* mtf = &s->mtf[1]; // Make mtf[-1] addressable. + uint32_t* mtf = &s->mtf[1]; // Make mtf[-1] addressable. auto* mtf_u8 = reinterpret_cast(mtf); uint32_t pattern = 0x0302'0100; // Little-endian diff --git a/cpp/src/io/comp/gpuinflate.cu b/cpp/src/io/comp/gpuinflate.cu index 42c4fbe7bea..8993815e560 100644 --- a/cpp/src/io/comp/gpuinflate.cu +++ b/cpp/src/io/comp/gpuinflate.cu @@ -124,11 +124,11 @@ struct inflate_state_s { uint8_t* outbase; ///< start of output buffer uint8_t* outend; ///< end of output buffer // Input state - uint8_t const* cur; ///< input buffer - uint8_t const* end; ///< end of input buffer + uint8_t const* cur; ///< input buffer + uint8_t const* end; ///< end of input buffer - uint2 bitbuf; ///< bit buffer (64-bit) - uint32_t bitpos; ///< position in bit buffer + uint2 bitbuf; ///< bit buffer (64-bit) + uint32_t bitpos; ///< position in bit buffer int32_t err; ///< Error status int btype; ///< current block type @@ -295,7 +295,7 @@ __device__ int construct( return 0; // complete, but decode() will fail // check for an over-subscribed or incomplete set of lengths - left = 1; // one possible code of zero length + left = 1; // one possible code of zero length for (len = 1; len <= max_bits; len++) { left <<= 1; // one more bit, double codes left left -= counts[len]; // deduct count from possible codes @@ -349,8 +349,8 @@ __device__ int init_dynamic(inflate_state_s* s) index = 0; while (index < nlen + ndist) { int symbol = decode(s, s->lencnt, s->lensym); - if (symbol < 0) return symbol; // invalid symbol - if (symbol < 16) // length in 0..15 + if (symbol < 0) return symbol; // invalid symbol + if (symbol < 16) // length in 0..15 lengths[index++] = symbol; else { // repeat instruction int len = 0; // last length to repeat, assume repeating zeros @@ -358,9 +358,9 @@ __device__ int init_dynamic(inflate_state_s* s) if (index == 0) return -5; // no last length! len = lengths[index - 1]; // last length symbol = 3 + getbits(s, 2); - } else if (symbol == 17) // repeat zero 3..10 times + } else if (symbol == 17) // repeat zero 3..10 times symbol = 3 + getbits(s, 3); - else // == 18, repeat zero 11..138 times + else // == 18, repeat zero 11..138 times symbol = 11 + getbits(s, 7); if (index + symbol > nlen + ndist) return -6; // too many lengths! while (symbol--) // repeat last or zero symbol times diff --git a/cpp/src/io/comp/uncomp.cpp b/cpp/src/io/comp/uncomp.cpp index 017fd8abb47..0d2d21333bb 100644 --- a/cpp/src/io/comp/uncomp.cpp +++ b/cpp/src/io/comp/uncomp.cpp @@ -28,7 +28,7 @@ #include // memset -#include // uncompress +#include // uncompress using cudf::host_span; @@ -47,7 +47,7 @@ struct gz_file_header_s { uint8_t os; // OS id }; -struct zip_eocd_s // end of central directory +struct zip_eocd_s // end of central directory { uint32_t sig; // 0x0605'4b50 uint16_t disk_id; // number of this disk @@ -59,7 +59,7 @@ struct zip_eocd_s // end of central directory // number uint16_t comment_len; // comment length (excluded from struct) }; -struct zip64_eocdl // end of central dir locator +struct zip64_eocdl // end of central dir locator { uint32_t sig; // 0x0706'4b50 uint32_t disk_start; // number of the disk with the start of the zip64 end of central directory @@ -67,7 +67,7 @@ struct zip64_eocdl // end of central dir locator uint32_t num_disks; // total number of disks }; -struct zip_cdfh_s // central directory file header +struct zip_cdfh_s // central directory file header { uint32_t sig; // 0x0201'4b50 uint16_t ver; // version made by @@ -111,7 +111,7 @@ struct bz2_file_header_s { struct gz_archive_s { gz_file_header_s const* fhdr; - uint16_t hcrc16; // header crc16 if present + uint16_t hcrc16; // header crc16 if present uint16_t xlen; uint8_t const* fxtra; // xlen bytes (optional) uint8_t const* fname; // zero-terminated original filename if present diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index a7a1cfd3f9e..c699502317f 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -45,7 +45,7 @@ void __device__ busy_wait(size_t cycles) struct unsnap_batch_s { int32_t len; // 1..64 = Number of bytes uint32_t - offset; // copy distance if greater than zero or negative of literal offset in byte stream + offset; // copy distance if greater than zero or negative of literal offset in byte stream }; /** diff --git a/cpp/src/io/json/json_column.cu b/cpp/src/io/json/json_column.cu index bdad16bd9f1..cabf904f020 100644 --- a/cpp/src/io/json/json_column.cu +++ b/cpp/src/io/json/json_column.cu @@ -169,7 +169,7 @@ reduce_to_column_tree(tree_meta_t& tree, }); // 4. unique_copy parent_node_ids, ranges - rmm::device_uvector column_levels(0, stream); // not required + rmm::device_uvector column_levels(0, stream); // not required rmm::device_uvector parent_col_ids(num_columns, stream); rmm::device_uvector col_range_begin(num_columns, stream); // Field names rmm::device_uvector col_range_end(num_columns, stream); diff --git a/cpp/src/io/json/nested_json_gpu.cu b/cpp/src/io/json/nested_json_gpu.cu index b691eaa8caf..0b49f97597d 100644 --- a/cpp/src/io/json/nested_json_gpu.cu +++ b/cpp/src/io/json/nested_json_gpu.cu @@ -762,18 +762,18 @@ auto get_translation_table(bool include_line_delimiter) nl_tokens({}), // LINE_BREAK {ValueBegin}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_BOA)] = { - { /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ErrorBegin}, // WHITE_SPACE - nl_tokens({ErrorBegin}), // LINE_BREAK - {ErrorBegin}, // OTHER + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ErrorBegin}, // WHITE_SPACE + nl_tokens({ErrorBegin}), // LINE_BREAK + {ErrorBegin}, // OTHER /*LIST*/ {StructBegin}, // OPENING_BRACE {ListBegin}, // OPENING_BRACKET @@ -799,18 +799,18 @@ auto get_translation_table(bool include_line_delimiter) nl_tokens({}), // LINE_BREAK {ErrorBegin}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_LON)] = { - { /*ROOT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ErrorBegin}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {ValueEnd}, // WHITE_SPACE - nl_tokens({ValueEnd}), // LINE_BREAK - {}, // OTHER + { /*ROOT*/ + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ErrorBegin}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {ValueEnd}, // WHITE_SPACE + nl_tokens({ValueEnd}), // LINE_BREAK + {}, // OTHER /*LIST*/ {ErrorBegin}, // OPENING_BRACE {ErrorBegin}, // OPENING_BRACKET @@ -824,17 +824,17 @@ auto get_translation_table(bool include_line_delimiter) nl_tokens({ValueEnd}), // LINE_BREAK {}, // OTHER /*STRUCT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {ValueEnd, StructMemberEnd, StructEnd}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {ErrorBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ValueEnd, StructMemberEnd}, // COMMA - {ErrorBegin}, // COLON - {ValueEnd}, // WHITE_SPACE - nl_tokens({ValueEnd}), // LINE_BREAK - {}}}; // OTHER + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {ValueEnd, StructMemberEnd, StructEnd}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {ErrorBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ValueEnd, StructMemberEnd}, // COMMA + {ErrorBegin}, // COLON + {ValueEnd}, // WHITE_SPACE + nl_tokens({ValueEnd}), // LINE_BREAK + {}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_STR)] = {{ /*ROOT*/ {}, // OPENING_BRACE @@ -974,17 +974,17 @@ auto get_translation_table(bool include_line_delimiter) nl_tokens({ErrorBegin}), // LINE_BREAK {ErrorBegin}, // OTHER /*STRUCT*/ - {ErrorBegin}, // OPENING_BRACE - {ErrorBegin}, // OPENING_BRACKET - {StructEnd}, // CLOSING_BRACE - {ErrorBegin}, // CLOSING_BRACKET - {StructMemberBegin, FieldNameBegin}, // QUOTE - {ErrorBegin}, // ESCAPE - {ErrorBegin}, // COMMA - {ErrorBegin}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {ErrorBegin}}}; // OTHER + {ErrorBegin}, // OPENING_BRACE + {ErrorBegin}, // OPENING_BRACKET + {StructEnd}, // CLOSING_BRACE + {ErrorBegin}, // CLOSING_BRACKET + {StructMemberBegin, FieldNameBegin}, // QUOTE + {ErrorBegin}, // ESCAPE + {ErrorBegin}, // COMMA + {ErrorBegin}, // COLON + {}, // WHITE_SPACE + nl_tokens({}), // LINE_BREAK + {ErrorBegin}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_FLN)] = {{ /*ROOT*/ {ErrorBegin}, // OPENING_BRACE @@ -1011,17 +1011,17 @@ auto get_translation_table(bool include_line_delimiter) nl_tokens({ErrorBegin}), // LINE_BREAK {ErrorBegin}, // OTHER /*STRUCT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {FieldNameEnd}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}}}; // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {FieldNameEnd}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}), // LINE_BREAK + {}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_FNE)] = {{ /*ROOT*/ {ErrorBegin}, // OPENING_BRACE @@ -1048,17 +1048,17 @@ auto get_translation_table(bool include_line_delimiter) nl_tokens({ErrorBegin}), // LINE_BREAK {ErrorBegin}, // OTHER /*STRUCT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}}}; // OTHER + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}), // LINE_BREAK + {}}}; // OTHER pda_tlt[static_cast(pda_state_t::PD_PFN)] = {{ /*ROOT*/ {ErrorBegin}, // OPENING_BRACE @@ -1097,18 +1097,18 @@ auto get_translation_table(bool include_line_delimiter) nl_tokens({}), // LINE_BREAK {ErrorBegin}}}; // OTHER - pda_tlt[static_cast(pda_state_t::PD_ERR)] = {{ /*ROOT*/ - {}, // OPENING_BRACE - {}, // OPENING_BRACKET - {}, // CLOSING_BRACE - {}, // CLOSING_BRACKET - {}, // QUOTE - {}, // ESCAPE - {}, // COMMA - {}, // COLON - {}, // WHITE_SPACE - nl_tokens({}), // LINE_BREAK - {}, // OTHER + pda_tlt[static_cast(pda_state_t::PD_ERR)] = {{ /*ROOT*/ + {}, // OPENING_BRACE + {}, // OPENING_BRACKET + {}, // CLOSING_BRACE + {}, // CLOSING_BRACKET + {}, // QUOTE + {}, // ESCAPE + {}, // COMMA + {}, // COLON + {}, // WHITE_SPACE + nl_tokens({}), // LINE_BREAK + {}, // OTHER /*LIST*/ {}, // OPENING_BRACE {}, // OPENING_BRACKET diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 681cc0fb9d2..9b8df50a22a 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -157,7 +157,7 @@ struct EncChunk { uint8_t dtype_len; // data type length int32_t scale; // scale for decimals or timestamps - uint32_t* dict_index; // dictionary index from row index + uint32_t* dict_index; // dictionary index from row index uint32_t* decimal_offsets; orc_column_device_view const* column; }; diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index b66ca827119..3edcd3d83b2 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -367,14 +367,14 @@ inline __device__ uint32_t varint_length(volatile orc_bytestream_s* bs, int pos) if (zbit) { return 5 + (zbit >> 3); // up to 9x7 bits } else if ((sizeof(T) <= 8) || (bytestream_readbyte(bs, pos + 9) <= 0x7f)) { - return 10; // up to 70 bits + return 10; // up to 70 bits } else { uint64_t next64 = bytestream_readu64(bs, pos + 10); zbit = __ffsll((~next64) & 0x8080'8080'8080'8080ull); if (zbit) { return 10 + (zbit >> 3); // Up to 18x7 bits (126) } else { - return 19; // Up to 19x7 bits (133) + return 19; // Up to 19x7 bits (133) } } } diff --git a/cpp/src/io/parquet/compact_protocol_reader.cpp b/cpp/src/io/parquet/compact_protocol_reader.cpp index 92fcd151925..ae11af92f78 100644 --- a/cpp/src/io/parquet/compact_protocol_reader.cpp +++ b/cpp/src/io/parquet/compact_protocol_reader.cpp @@ -168,7 +168,7 @@ bool CompactProtocolReader::read(LogicalType* l) ParquetFieldUnion(2, l->isset.MAP, l->MAP), ParquetFieldUnion(3, l->isset.LIST, l->LIST), ParquetFieldUnion(4, l->isset.ENUM, l->ENUM), - ParquetFieldUnion(5, l->isset.DECIMAL, l->DECIMAL), // read the struct + ParquetFieldUnion(5, l->isset.DECIMAL, l->DECIMAL), // read the struct ParquetFieldUnion(6, l->isset.DATE, l->DATE), ParquetFieldUnion(7, l->isset.TIME, l->TIME), // read the struct ParquetFieldUnion(8, l->isset.TIMESTAMP, l->TIMESTAMP), // read the struct diff --git a/cpp/src/io/parquet/compact_protocol_writer.cpp b/cpp/src/io/parquet/compact_protocol_writer.cpp index b2a89129645..b2c0c97c52d 100644 --- a/cpp/src/io/parquet/compact_protocol_writer.cpp +++ b/cpp/src/io/parquet/compact_protocol_writer.cpp @@ -315,7 +315,7 @@ inline void CompactProtocolFieldWriter::field_struct(int field, T const& val) if constexpr (not std::is_empty_v) { writer.write(val); // write the struct if it's not empty } else { - put_byte(0); // otherwise, add a stop field + put_byte(0); // otherwise, add a stop field } current_field_value = field; } diff --git a/cpp/src/io/parquet/delta_binary.cuh b/cpp/src/io/parquet/delta_binary.cuh index 4fc8b9cfb8e..2382e4aafdf 100644 --- a/cpp/src/io/parquet/delta_binary.cuh +++ b/cpp/src/io/parquet/delta_binary.cuh @@ -90,16 +90,16 @@ inline __device__ zigzag128_t get_zz128(uint8_t const*& cur, uint8_t const* end) } struct delta_binary_decoder { - uint8_t const* block_start; // start of data, but updated as data is read - uint8_t const* block_end; // end of data - uleb128_t block_size; // usually 128, must be multiple of 128 - uleb128_t mini_block_count; // usually 4, chosen such that block_size/mini_block_count is a - // multiple of 32 - uleb128_t value_count; // total values encoded in the block - zigzag128_t last_value; // last value decoded, initialized to first_value from header - - uint32_t values_per_mb; // block_size / mini_block_count, must be multiple of 32 - uint32_t current_value_idx; // current value index, initialized to 0 at start of block + uint8_t const* block_start; // start of data, but updated as data is read + uint8_t const* block_end; // end of data + uleb128_t block_size; // usually 128, must be multiple of 128 + uleb128_t mini_block_count; // usually 4, chosen such that block_size/mini_block_count is a + // multiple of 32 + uleb128_t value_count; // total values encoded in the block + zigzag128_t last_value; // last value decoded, initialized to first_value from header + + uint32_t values_per_mb; // block_size / mini_block_count, must be multiple of 32 + uint32_t current_value_idx; // current value index, initialized to 0 at start of block zigzag128_t cur_min_delta; // min delta for the block uint32_t cur_mb; // index of the current mini-block within the block diff --git a/cpp/src/io/parquet/page_delta_decode.cu b/cpp/src/io/parquet/page_delta_decode.cu index e79a479388f..35f33a761be 100644 --- a/cpp/src/io/parquet/page_delta_decode.cu +++ b/cpp/src/io/parquet/page_delta_decode.cu @@ -85,7 +85,7 @@ __global__ void __launch_bounds__(96) gpuDecodeDeltaBinary( if (t < 2 * warp_size) { // warp0..1 target_pos = min(src_pos + 2 * batch_size, s->nz_count + batch_size); - } else { // warp2 + } else { // warp2 target_pos = min(s->nz_count, src_pos + batch_size); } __syncthreads(); diff --git a/cpp/src/io/parquet/parquet.hpp b/cpp/src/io/parquet/parquet.hpp index a729f28d672..f7318bb9935 100644 --- a/cpp/src/io/parquet/parquet.hpp +++ b/cpp/src/io/parquet/parquet.hpp @@ -365,8 +365,8 @@ struct ColumnIndex { std::vector> min_values; // lower bound for values in each page std::vector> max_values; // upper bound for values in each page BoundaryOrder boundary_order = - BoundaryOrder::UNORDERED; // Indicates if min and max values are ordered - std::vector null_counts; // Optional count of null values per page + BoundaryOrder::UNORDERED; // Indicates if min and max values are ordered + std::vector null_counts; // Optional count of null values per page }; // bit space we are reserving in column_buffer::user_data diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index e82b6abc13d..a3cc37dee4f 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -299,7 +299,7 @@ struct ColumnChunkDesc { int8_t converted_type; // converted type enum LogicalType logical_type; // logical type int8_t decimal_precision; // Decimal precision - int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) + int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) int32_t src_col_index; // my input column index int32_t src_col_schema; // my schema index in the file @@ -396,16 +396,16 @@ constexpr uint32_t encoding_to_mask(Encoding encoding) struct EncColumnChunk { parquet_column_device_view const* col_desc; //!< Column description size_type col_desc_id; - PageFragment* fragments; //!< First fragment in chunk - uint8_t* uncompressed_bfr; //!< Uncompressed page data - uint8_t* compressed_bfr; //!< Compressed page data - statistics_chunk const* stats; //!< Fragment statistics - uint32_t bfr_size; //!< Uncompressed buffer size - uint32_t compressed_size; //!< Compressed buffer size - uint32_t max_page_data_size; //!< Max data size (excluding header) of any page in this chunk - uint32_t page_headers_size; //!< Sum of size of all page headers - size_type start_row; //!< First row of chunk - uint32_t num_rows; //!< Number of rows in chunk + PageFragment* fragments; //!< First fragment in chunk + uint8_t* uncompressed_bfr; //!< Uncompressed page data + uint8_t* compressed_bfr; //!< Compressed page data + statistics_chunk const* stats; //!< Fragment statistics + uint32_t bfr_size; //!< Uncompressed buffer size + uint32_t compressed_size; //!< Compressed buffer size + uint32_t max_page_data_size; //!< Max data size (excluding header) of any page in this chunk + uint32_t page_headers_size; //!< Sum of size of all page headers + size_type start_row; //!< First row of chunk + uint32_t num_rows; //!< Number of rows in chunk size_type num_values; //!< Number of values in chunk. Different from num_rows for nested types uint32_t first_fragment; //!< First fragment of chunk EncPage* pages; //!< Ptr to pages that belong to this chunk diff --git a/cpp/src/io/parquet/reader_impl_preprocess.cu b/cpp/src/io/parquet/reader_impl_preprocess.cu index bde73c3dd96..a2db0de26bb 100644 --- a/cpp/src/io/parquet/reader_impl_preprocess.cu +++ b/cpp/src/io/parquet/reader_impl_preprocess.cu @@ -1673,7 +1673,7 @@ void reader::impl::preprocess_pages(size_t skip_rows, // - we will be doing a chunked read gpu::ComputePageSizes(pages, chunks, - 0, // 0-max size_t. process all possible rows + 0, // 0-max size_t. process all possible rows std::numeric_limits::max(), true, // compute num_rows chunk_read_limit > 0, // compute string sizes diff --git a/cpp/src/join/join.cu b/cpp/src/join/join.cu index 8210f3114d6..ae025b1a213 100644 --- a/cpp/src/join/join.cu +++ b/cpp/src/join/join.cu @@ -73,7 +73,7 @@ left_join(table_view const& left_input, // Make sure any dictionary columns have matched key sets. // This will return any new dictionary columns created as well as updated table_views. auto matched = cudf::dictionary::detail::match_dictionaries( - {left_input, right_input}, // these should match + {left_input, right_input}, // these should match stream, rmm::mr::get_current_device_resource()); // temporary objects returned // now rebuild the table views with the updated ones @@ -98,7 +98,7 @@ full_join(table_view const& left_input, // Make sure any dictionary columns have matched key sets. // This will return any new dictionary columns created as well as updated table_views. auto matched = cudf::dictionary::detail::match_dictionaries( - {left_input, right_input}, // these should match + {left_input, right_input}, // these should match stream, rmm::mr::get_current_device_resource()); // temporary objects returned // now rebuild the table views with the updated ones diff --git a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu index 2ce55e10fb1..9e8b75ae3b6 100644 --- a/cpp/src/quantiles/tdigest/tdigest_aggregation.cu +++ b/cpp/src/quantiles/tdigest/tdigest_aggregation.cu @@ -459,7 +459,7 @@ __global__ void generate_cluster_limits_kernel(int delta, int adjusted_w_index = nearest_w_index; if ((last_inserted_index < 0) || // if we haven't inserted anything yet (nearest_w_index == - last_inserted_index)) { // if we land in the same bucket as the previous cap + last_inserted_index)) { // if we land in the same bucket as the previous cap // force the value into this bucket adjusted_w_index = (last_inserted_index == group_size - 1) diff --git a/cpp/src/rolling/detail/rolling_collect_list.cuh b/cpp/src/rolling/detail/rolling_collect_list.cuh index 9f74a961e12..39d15ed716f 100644 --- a/cpp/src/rolling/detail/rolling_collect_list.cuh +++ b/cpp/src/rolling/detail/rolling_collect_list.cuh @@ -116,7 +116,7 @@ std::unique_ptr create_collect_gather_map(column_view const& child_offse thrust::make_counting_iterator(per_row_mapping.size()), gather_map->mutable_view().template begin(), [d_offsets = - child_offsets.template begin(), // E.g. [0, 2, 5, 8, 11, 13] + child_offsets.template begin(), // E.g. [0, 2, 5, 8, 11, 13] d_groups = per_row_mapping.template begin(), // E.g. [0,0, 1,1,1, 2,2,2, 3,3,3, 4,4] d_prev = preceding_iter] __device__(auto i) { diff --git a/cpp/src/strings/char_types/char_types.cu b/cpp/src/strings/char_types/char_types.cu index b87fb80fcc2..0c0ad0ad29e 100644 --- a/cpp/src/strings/char_types/char_types.cu +++ b/cpp/src/strings/char_types/char_types.cu @@ -139,9 +139,9 @@ struct filter_chars_fn { { auto const code_point = detail::utf8_to_codepoint(ch); auto const flag = code_point <= 0x00'FFFF ? d_flags[code_point] : 0; - if (flag == 0) // all types pass unless specifically identified + if (flag == 0) // all types pass unless specifically identified return (types_to_remove == ALL_TYPES); - if (types_to_keep == ALL_TYPES) // filter case + if (types_to_keep == ALL_TYPES) // filter case return (types_to_remove & flag) != 0; return (types_to_keep & flag) == 0; // keep case } diff --git a/cpp/src/strings/convert/convert_datetime.cu b/cpp/src/strings/convert/convert_datetime.cu index cca06ca0739..8a953d778ed 100644 --- a/cpp/src/strings/convert/convert_datetime.cu +++ b/cpp/src/strings/convert/convert_datetime.cu @@ -317,8 +317,8 @@ struct parse_datetime { bytes_read -= left; break; } - case 'u': [[fallthrough]]; // day of week: Mon(1)-Sat(6),Sun(7) - case 'w': { // day of week; Sun(0),Mon(1)-Sat(6) + case 'u': [[fallthrough]]; // day of week: Mon(1)-Sat(6),Sun(7) + case 'w': { // day of week; Sun(0),Mon(1)-Sat(6) auto const [weekday, left] = parse_int(ptr, item.length); timeparts.weekday = // 0 is mapped to 7 for chrono library static_cast((item.value == 'w' && weekday == 0) ? 7 : weekday); @@ -1000,7 +1000,7 @@ struct datetime_formatter_fn { case 'S': // second copy_value = timeparts.second; break; - case 'f': // sub-second + case 'f': // sub-second { char subsecond_digits[] = "000000000"; // 9 max digits int const digits = [] { diff --git a/cpp/src/strings/convert/convert_durations.cu b/cpp/src/strings/convert/convert_durations.cu index 863f76b9b98..6ab70825a6b 100644 --- a/cpp/src/strings/convert/convert_durations.cu +++ b/cpp/src/strings/convert/convert_durations.cu @@ -576,7 +576,7 @@ struct parse_duration { item_length++; // : timeparts->second = parse_second(ptr + item_length, item_length); break; - case 'r': // hh:MM:SS AM/PM + case 'r': // hh:MM:SS AM/PM timeparts->hour = parse_hour(ptr, item_length); item_length++; // : timeparts->minute = parse_minute(ptr + item_length, item_length); diff --git a/cpp/src/strings/convert/convert_floats.cu b/cpp/src/strings/convert/convert_floats.cu index ab1e6870937..32167589ab4 100644 --- a/cpp/src/strings/convert/convert_floats.cu +++ b/cpp/src/strings/convert/convert_floats.cu @@ -284,7 +284,7 @@ struct ftos_converter { while (pb != buffer) // reverses the digits *ptr++ = *--pb; // e.g. 54321 -> 12345 } else - *ptr++ = '0'; // always include at least .0 + *ptr++ = '0'; // always include at least .0 // exponent if (exp10) { *ptr++ = 'e'; @@ -310,7 +310,7 @@ struct ftos_converter { { if (std::isnan(value)) return 3; // NaN bool bneg = false; - if (signbit(value)) { // handles -0.0 too + if (signbit(value)) { // handles -0.0 too value = -value; bneg = true; } @@ -337,7 +337,7 @@ struct ftos_converter { ++count; // always include .0 // exponent if (exp10) { - count += 2; // 'e±' + count += 2; // 'e±' if (exp10 < 0) exp10 = -exp10; count += (int)(exp10 < 10); // padding while (exp10 > 0) { diff --git a/cpp/src/strings/convert/convert_integers.cu b/cpp/src/strings/convert/convert_integers.cu index 260c3393f3c..5597d2831c0 100644 --- a/cpp/src/strings/convert/convert_integers.cu +++ b/cpp/src/strings/convert/convert_integers.cu @@ -76,7 +76,7 @@ struct string_to_integer_check_fn { auto const digit = static_cast(chr - '0'); auto const bound_check = (bound_val - sign * digit) / IntegerType{10} * sign; if (value > bound_check) return false; - value = value* IntegerType{10} + digit; + value = value * IntegerType{10} + digit; } return true; diff --git a/cpp/src/strings/convert/convert_ipv4.cu b/cpp/src/strings/convert/convert_ipv4.cu index 4606aba6d17..adb72cb0263 100644 --- a/cpp/src/strings/convert/convert_ipv4.cu +++ b/cpp/src/strings/convert/convert_ipv4.cu @@ -197,7 +197,7 @@ std::unique_ptr is_ipv4(strings_column_view const& strings, if (d_str.empty()) return false; constexpr int max_ip = 255; // values must be in [0,255] int ip_vals[4] = {-1, -1, -1, -1}; - int ipv_idx = 0; // index into ip_vals + int ipv_idx = 0; // index into ip_vals for (auto const ch : d_str) { if ((ch >= '0') && (ch <= '9')) { auto const ip_val = ip_vals[ipv_idx]; diff --git a/cpp/src/strings/convert/convert_urls.cu b/cpp/src/strings/convert/convert_urls.cu index 71b6c09310e..9efa148cfd2 100644 --- a/cpp/src/strings/convert/convert_urls.cu +++ b/cpp/src/strings/convert/convert_urls.cu @@ -107,9 +107,9 @@ struct url_encoder_fn { out_ptr = copy_and_increment(out_ptr, hex, 2); // add them to the output } } - } else // these are to be utf-8 url-encoded + } else // these are to be utf-8 url-encoded { - uint8_t char_bytes[4]; // holds utf-8 bytes for one character + uint8_t char_bytes[4]; // holds utf-8 bytes for one character size_type char_width = from_char_utf8(ch, reinterpret_cast(char_bytes)); nbytes += char_width * 3; // '%' plus 2 hex chars per byte (example: é is %C3%A9) // process each byte in this current character diff --git a/cpp/src/strings/json/json_path.cu b/cpp/src/strings/json/json_path.cu index 2d2691e0518..c56752f5429 100644 --- a/cpp/src/strings/json/json_path.cu +++ b/cpp/src/strings/json/json_path.cu @@ -984,7 +984,7 @@ std::unique_ptr get_json_object(cudf::strings_column_view const& c col.size(), rmm::device_buffer{0, stream, mr}, // no data cudf::detail::create_null_mask(col.size(), mask_state::ALL_NULL, stream, mr), - col.size()); // null count + col.size()); // null count } constexpr int block_size = 512; diff --git a/cpp/src/strings/regex/regcomp.cpp b/cpp/src/strings/regex/regcomp.cpp index 5fd098a872e..b7a7f19369d 100644 --- a/cpp/src/strings/regex/regcomp.cpp +++ b/cpp/src/strings/regex/regcomp.cpp @@ -184,9 +184,9 @@ class regex_parser { int32_t _id_cclass_d{-1}; // digits [0-9] int32_t _id_cclass_D{-1}; // not digits - char32_t _chr{}; // last lex'd char - int32_t _cclass_id{}; // last lex'd class - int16_t _min_count{}; // data for counted operators + char32_t _chr{}; // last lex'd char + int32_t _cclass_id{}; // last lex'd class + int16_t _min_count{}; // data for counted operators int16_t _max_count{}; std::vector _items; @@ -361,9 +361,9 @@ class regex_parser { auto [q, n_chr] = next_char(); if (n_chr == 0) { return 0; } // malformed: '[x-' - if (!q && n_chr == ']') { // handles: '[x-]' + if (!q && n_chr == ']') { // handles: '[x-]' literals.push_back(chr); - literals.push_back(chr); // add '-' as literal + literals.push_back(chr); // add '-' as literal break; } // normal case: '[a-z]' @@ -749,7 +749,7 @@ class regex_parser { // infinite repeats if (n > 0) { // append '+' after last repetition out.push_back(regex_parser::Item{item.type == COUNTED ? PLUS : PLUS_LAZY, 0}); - } else { // copy it once then append '*' + } else { // copy it once then append '*' out.insert(out.end(), begin, end); out.push_back(regex_parser::Item{item.type == COUNTED ? STAR : STAR_LAZY, 0}); } @@ -1095,7 +1095,7 @@ void reprog::build_start_ids() ids.pop(); reinst const& inst = _insts[id]; if (inst.type == OR) { - if (inst.u2.left_id != id) // prevents infinite while-loop here + if (inst.u2.left_id != id) // prevents infinite while-loop here ids.push(inst.u2.left_id); if (inst.u1.right_id != id) // prevents infinite while-loop here ids.push(inst.u1.right_id); diff --git a/cpp/src/strings/regex/regcomp.h b/cpp/src/strings/regex/regcomp.h index aa2cb363b80..ab912ace0df 100644 --- a/cpp/src/strings/regex/regcomp.h +++ b/cpp/src/strings/regex/regcomp.h @@ -77,16 +77,16 @@ constexpr int32_t NCCLASS_D{1 << 5}; // not CCLASS_D or '\n' * @brief Structure of an encoded regex instruction */ struct reinst { - int32_t type; /* operator type or instruction type */ + int32_t type; /* operator type or instruction type */ union { int32_t cls_id; /* class pointer */ char32_t c; /* character */ int32_t subid; /* sub-expression id for RBRA and LBRA */ int32_t right_id; /* right child of OR */ } u1; - union { /* regexec relies on these two being in the same union */ - int32_t left_id; /* left child of OR */ - int32_t next_id; /* next instruction for CAT & LBRA */ + union { /* regexec relies on these two being in the same union */ + int32_t left_id; /* left child of OR */ + int32_t next_id; /* next instruction for CAT & LBRA */ } u2; int32_t reserved4; }; diff --git a/cpp/src/strings/regex/regex.cuh b/cpp/src/strings/regex/regex.cuh index 19d82380350..c1abbd78b43 100644 --- a/cpp/src/strings/regex/regex.cuh +++ b/cpp/src/strings/regex/regex.cuh @@ -253,21 +253,21 @@ class reprog_device { reprog_device(reprog const&); - int32_t _startinst_id; // first instruction id - int32_t _num_capturing_groups; // instruction groups - int32_t _insts_count; // number of instructions - int32_t _starts_count; // number of start-insts ids - int32_t _classes_count; // number of classes - int32_t _max_insts; // for partitioning working memory + int32_t _startinst_id; // first instruction id + int32_t _num_capturing_groups; // instruction groups + int32_t _insts_count; // number of instructions + int32_t _starts_count; // number of start-insts ids + int32_t _classes_count; // number of classes + int32_t _max_insts; // for partitioning working memory uint8_t const* _codepoint_flags{}; // table of character types reinst const* _insts{}; // array of regex instructions int32_t const* _startinst_ids{}; // array of start instruction ids reclass_device const* _classes{}; // array of regex classes - std::size_t _prog_size{}; // total size of this instance - void* _buffer{}; // working memory buffer - int32_t _thread_count{}; // threads available in working memory + std::size_t _prog_size{}; // total size of this instance + void* _buffer{}; // working memory buffer + int32_t _thread_count{}; // threads available in working memory }; /** diff --git a/cpp/src/strings/regex/regex.inl b/cpp/src/strings/regex/regex.inl index c5205ae7789..ce12dc17aa4 100644 --- a/cpp/src/strings/regex/regex.inl +++ b/cpp/src/strings/regex/regex.inl @@ -146,17 +146,17 @@ __device__ __forceinline__ bool reclass_device::is_match(char32_t const ch, uint32_t codept = utf8_to_codepoint(ch); if (codept > 0x00'FFFF) return false; int8_t fl = codepoint_flags[codept]; - if ((builtins & CCLASS_W) && ((ch == '_') || IS_ALPHANUM(fl))) // \w + if ((builtins & CCLASS_W) && ((ch == '_') || IS_ALPHANUM(fl))) // \w return true; - if ((builtins & CCLASS_S) && IS_SPACE(fl)) // \s + if ((builtins & CCLASS_S) && IS_SPACE(fl)) // \s return true; - if ((builtins & CCLASS_D) && IS_DIGIT(fl)) // \d + if ((builtins & CCLASS_D) && IS_DIGIT(fl)) // \d return true; if ((builtins & NCCLASS_W) && ((ch != '\n') && (ch != '_') && !IS_ALPHANUM(fl))) // \W return true; - if ((builtins & NCCLASS_S) && !IS_SPACE(fl)) // \S + if ((builtins & NCCLASS_S) && !IS_SPACE(fl)) // \S return true; - if ((builtins & NCCLASS_D) && ((ch != '\n') && !IS_DIGIT(fl))) // \D + if ((builtins & NCCLASS_D) && ((ch != '\n') && !IS_DIGIT(fl))) // \D return true; // return false; diff --git a/cpp/src/strings/replace/replace_re.cu b/cpp/src/strings/replace/replace_re.cu index 460074a5296..81ddb937be5 100644 --- a/cpp/src/strings/replace/replace_re.cu +++ b/cpp/src/strings/replace/replace_re.cu @@ -68,7 +68,7 @@ struct replace_regex_fn { if (!match) { break; } // no more matches auto const [start_pos, end_pos] = match_positions_to_bytes(*match, d_str, last_pos); - nbytes += d_repl.size_bytes() - (end_pos - start_pos); // add new size + nbytes += d_repl.size_bytes() - (end_pos - start_pos); // add new size if (out_ptr) { // replace: // i:bbbbsssseeee diff --git a/cpp/src/strings/split/partition.cu b/cpp/src/strings/split/partition.cu index 099f5978992..0c7d119ea38 100644 --- a/cpp/src/strings/split/partition.cu +++ b/cpp/src/strings/split/partition.cu @@ -170,7 +170,7 @@ struct rpartition_fn : public partition_fn { --itr; pos = check_delimiter(idx, d_str, itr); } - if (pos < 0) // delimiter not found + if (pos < 0) // delimiter not found { d_indices_left[idx] = string_index_pair{"", 0}; // two empty d_indices_delim[idx] = string_index_pair{"", 0}; // strings diff --git a/cpp/src/strings/split/split.cuh b/cpp/src/strings/split/split.cuh index e76d8ac1c60..dc0b04af388 100644 --- a/cpp/src/strings/split/split.cuh +++ b/cpp/src/strings/split/split.cuh @@ -190,7 +190,7 @@ struct split_tokenizer_fn : base_split_tokenizer { device_span d_delimiters, device_span d_tokens) const { - auto const base_ptr = get_base_ptr(); // d_positions values based on this + auto const base_ptr = get_base_ptr(); // d_positions values based on this auto str_ptr = d_str.data(); auto const str_end = str_ptr + d_str.size_bytes(); // end of the string auto const token_count = static_cast(d_tokens.size()); diff --git a/cpp/src/strings/split/split_re.cu b/cpp/src/strings/split/split_re.cu index 9aeb6b69bdc..3be5937297f 100644 --- a/cpp/src/strings/split/split_re.cu +++ b/cpp/src/strings/split/split_re.cu @@ -91,7 +91,7 @@ struct token_reader_fn { } else { if (direction == split_direction::FORWARD) { break; } // we are done for (auto l = 0; l < token_idx - 1; ++l) { - d_result[l] = d_result[l + 1]; // shift left + d_result[l] = d_result[l + 1]; // shift left } d_result[token_idx - 1] = token; } diff --git a/cpp/src/strings/utilities.cu b/cpp/src/strings/utilities.cu index 57a868485df..c8c68d19ce6 100644 --- a/cpp/src/strings/utilities.cu +++ b/cpp/src/strings/utilities.cu @@ -86,9 +86,9 @@ thread_safe_per_context_cache d_special_case_mappings; } // namespace - /** - * @copydoc cudf::strings::detail::get_character_flags_table - */ +/** + * @copydoc cudf::strings::detail::get_character_flags_table + */ character_flags_table_type const* get_character_flags_table() { return d_character_codepoint_flags.find_or_initialize([&](void) { diff --git a/cpp/src/text/normalize.cu b/cpp/src/text/normalize.cu index 78dfb6bf1a6..1b07b0785f5 100644 --- a/cpp/src/text/normalize.cu +++ b/cpp/src/text/normalize.cu @@ -70,7 +70,7 @@ struct normalize_spaces_fn { cudf::string_view const single_space(" ", 1); auto const d_str = d_strings.element(idx); char* buffer = d_chars ? d_chars + d_offsets[idx] : nullptr; - char* optr = buffer; // running output pointer + char* optr = buffer; // running output pointer cudf::size_type nbytes = 0; // holds the number of bytes per output string @@ -146,7 +146,7 @@ struct codepoint_to_utf8_fn { char* out_ptr = d_chars + d_offsets[idx]; for (uint32_t jdx = 0; jdx < count; ++jdx) { uint32_t code_point = *str_cps++; - if (code_point < UTF8_1BYTE) // ASCII range + if (code_point < UTF8_1BYTE) // ASCII range *out_ptr++ = static_cast(code_point); else if (code_point < UTF8_2BYTE) { // create two-byte UTF-8 // b00001xxx:byyyyyyyy => b110xxxyy:b10yyyyyy diff --git a/cpp/src/text/replace.cu b/cpp/src/text/replace.cu index d122f048a4e..34916e121dc 100644 --- a/cpp/src/text/replace.cu +++ b/cpp/src/text/replace.cu @@ -114,7 +114,7 @@ using strings_iterator = cudf::column_device_view::const_iterator= end) { break; } // done checking for pairs // skip to the next adjacent pair diff --git a/cpp/src/text/subword/load_merges_file.cu b/cpp/src/text/subword/load_merges_file.cu index 1f1b90b3f49..db6ad2e2dd2 100644 --- a/cpp/src/text/subword/load_merges_file.cu +++ b/cpp/src/text/subword/load_merges_file.cu @@ -93,7 +93,7 @@ std::unique_ptr initialize_merge_pairs_map( auto merge_pairs_map = std::make_unique( static_cast(input.size() * 2), // capacity is 2x; cuco::empty_key{-1}, - cuco::empty_value{-1}, // empty value is not used + cuco::empty_value{-1}, // empty value is not used bpe_equal{input}, probe_scheme{bpe_hasher{input}}, hash_table_allocator_type{default_allocator{}, stream}, diff --git a/cpp/src/text/utilities/tokenize_ops.cuh b/cpp/src/text/utilities/tokenize_ops.cuh index fbd2d1efcff..a84e94a6924 100644 --- a/cpp/src/text/utilities/tokenize_ops.cuh +++ b/cpp/src/text/utilities/tokenize_ops.cuh @@ -230,7 +230,7 @@ struct multi_delimiter_strings_tokenizer { }); if (itr_find != delimiters_end) { // found delimiter auto token_size = static_cast((curr_ptr - data_ptr) - last_pos); - if (token_size > 0) // we only care about non-zero sized tokens + if (token_size > 0) // we only care about non-zero sized tokens { if (d_str_tokens) d_str_tokens[token_idx] = string_index_pair{data_ptr + last_pos, token_size}; diff --git a/cpp/tests/groupby/merge_lists_tests.cpp b/cpp/tests/groupby/merge_lists_tests.cpp index 991473c5023..f2909f870aa 100644 --- a/cpp/tests/groupby/merge_lists_tests.cpp +++ b/cpp/tests/groupby/merge_lists_tests.cpp @@ -374,7 +374,7 @@ TEST_F(GroupbyMergeListsTest, StringsColumnInput) "" /*NULL*/, "" /*NULL*/, "German Shepherd", - "" /*NULL*/ + "" /*NULL*/ }, nulls_at({3, 4, 5, 7})}, // key = "dog" lists_col{{"Whale", "" /*NULL*/, "Polar Bear"}, null_at(1)}, // key = "unknown" diff --git a/cpp/tests/groupby/merge_sets_tests.cpp b/cpp/tests/groupby/merge_sets_tests.cpp index 67ff61563bb..5fc7e68b524 100644 --- a/cpp/tests/groupby/merge_sets_tests.cpp +++ b/cpp/tests/groupby/merge_sets_tests.cpp @@ -333,7 +333,7 @@ TEST_F(GroupbyMergeSetsTest, StringsColumnInput) lists_col{{"" /*NULL*/, "" /*NULL*/, "" /*NULL*/}, all_nulls()} // key = "dog" }; auto const lists3 = lists_col{ - lists_col{"Fuji", "Red Delicious"}, // key = "apple" + lists_col{"Fuji", "Red Delicious"}, // key = "apple" lists_col{{"" /*NULL*/, "Corgi", "German Shepherd", "" /*NULL*/, "Golden Retriever"}, nulls_at({0, 3})}, // key = "dog" lists_col{{"Seeedless", "Mini"}, no_nulls()} // key = "water melon" @@ -343,14 +343,14 @@ TEST_F(GroupbyMergeSetsTest, StringsColumnInput) merge_sets(vcol_views{keys1, keys2, keys3}, vcol_views{lists1, lists2, lists3}); auto const expected_keys = strings_col{"apple", "banana", "dog", "unknown", "water melon"}; auto const expected_lists = lists_col{ - lists_col{"Fuji", "Honey Bee", "Red Delicious"}, // key = "apple" - lists_col{"Green", "Yellow"}, // key = "banana" + lists_col{"Fuji", "Honey Bee", "Red Delicious"}, // key = "apple" + lists_col{"Green", "Yellow"}, // key = "banana" lists_col{{ "Corgi", "German Shepherd", "Golden Retriever", "Poodle", "" /*NULL*/ }, - null_at(4)}, // key = "dog" - lists_col{{"Polar Bear", "Whale", "" /*NULL*/}, null_at(2)}, // key = "unknown" - lists_col{{"Mini", "Seeedless"}, no_nulls()} // key = "water melon" + null_at(4)}, // key = "dog" + lists_col{{"Polar Bear", "Whale", "" /*NULL*/}, null_at(2)}, // key = "unknown" + lists_col{{"Mini", "Seeedless"}, no_nulls()} // key = "water melon" }; CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_keys, *out_keys, verbosity); diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 64aca091686..81e0e12eeb9 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -2166,7 +2166,7 @@ TEST_F(ParquetChunkedWriterTest, ForcedNullabilityList) cudf::io::table_input_metadata metadata(table1); metadata.column_metadata[0].set_nullability(true); // List is nullable at first (root) level metadata.column_metadata[0].child(1).set_nullability( - false); // non-nullable at second (leaf) level + false); // non-nullable at second (leaf) level metadata.column_metadata[1].set_nullability(true); auto filepath = temp_env->get_temp_filepath("ChunkedListNullable.parquet"); @@ -5880,7 +5880,7 @@ TEST_F(ParquetMetadataReaderTest, TestNested) EXPECT_EQ(out_map_col.type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE); // map ASSERT_EQ(out_map_col.num_children(), 1); - EXPECT_EQ(out_map_col.child(0).name(), "key_value"); // key_value (named in parquet writer) + EXPECT_EQ(out_map_col.child(0).name(), "key_value"); // key_value (named in parquet writer) ASSERT_EQ(out_map_col.child(0).num_children(), 2); EXPECT_EQ(out_map_col.child(0).child(0).name(), "key"); // key (named in parquet writer) EXPECT_EQ(out_map_col.child(0).child(1).name(), "value"); // value (named in parquet writer) @@ -5897,7 +5897,7 @@ TEST_F(ParquetMetadataReaderTest, TestNested) ASSERT_EQ(out_list_col.child(0).num_children(), 1); auto const& out_list_struct_col = out_list_col.child(0).child(0); - EXPECT_EQ(out_list_struct_col.name(), "element"); // elements (named in parquet writer) + EXPECT_EQ(out_list_struct_col.name(), "element"); // elements (named in parquet writer) EXPECT_EQ(out_list_struct_col.type_kind(), cudf::io::parquet::TypeKind::UNDEFINED_TYPE); // struct ASSERT_EQ(out_list_struct_col.num_children(), 2); diff --git a/cpp/tests/lists/reverse_tests.cpp b/cpp/tests/lists/reverse_tests.cpp index a899d387c3e..00dc13c5812 100644 --- a/cpp/tests/lists/reverse_tests.cpp +++ b/cpp/tests/lists/reverse_tests.cpp @@ -370,8 +370,8 @@ TYPED_TEST(ListsReverseTypedTest, InputListsOfStructsWithNulls) "Kiwi", "Cherry", "Banana", - "", /*NULL*/ - "", /*NULL*/ + "", /*NULL*/ + "", /*NULL*/ "Apple", "", /*NULL*/ "Banana", // end list1 @@ -436,8 +436,8 @@ TYPED_TEST(ListsReverseTypedTest, InputListsOfStructsWithNulls) "Kiwi", "Cherry", "Banana", - "", /*NULL*/ - "", /*NULL*/ + "", /*NULL*/ + "", /*NULL*/ "Apple", "", /*NULL*/ "Banana", // end list1 diff --git a/cpp/tests/lists/set_operations/difference_distinct_tests.cpp b/cpp/tests/lists/set_operations/difference_distinct_tests.cpp index bf7ebc902ba..84c51f256b7 100644 --- a/cpp/tests/lists/set_operations/difference_distinct_tests.cpp +++ b/cpp/tests/lists/set_operations/difference_distinct_tests.cpp @@ -571,7 +571,7 @@ TEST_F(SetDifferenceTest, InputListsOfNestedStructsHaveNull) "" /*NULL*/, "" /*NULL*/, "" /*NULL*/, "Apple", "Banana", "Cherry", "Kiwi", // end list1 "" /*NULL*/, "Bear", "Cat", "Dog", "Duck", - "Panda", // end list2 + "Panda", // end list2 "ÁÁÁ", "ÉÉÉÉÉ", "ÁBC", "ÁÁÁ", "ÍÍÍÍÍ", "" /*NULL*/, "XYZ", "ÁBC" // end list3 diff --git a/cpp/tests/lists/set_operations/intersect_distinct_tests.cpp b/cpp/tests/lists/set_operations/intersect_distinct_tests.cpp index dbccf06036b..11f98af3520 100644 --- a/cpp/tests/lists/set_operations/intersect_distinct_tests.cpp +++ b/cpp/tests/lists/set_operations/intersect_distinct_tests.cpp @@ -514,7 +514,7 @@ TEST_F(SetIntersectTest, InputListsOfNestedStructsHaveNull) null, // end list1 null, // end list2 null, - null // end list3 + null // end list3 }, all_nulls()}; auto grandchild2 = strings_col{{ @@ -522,7 +522,7 @@ TEST_F(SetIntersectTest, InputListsOfNestedStructsHaveNull) "Apple", // end list1 "" /*NULL*/, // end list2 "ÁÁÁ", - "ÉÉÉÉÉ" // end list3 + "ÉÉÉÉÉ" // end list3 }, nulls_at({0, 2})}; auto child1 = structs_col{{grandchild1, grandchild2}, null_at(0)}; diff --git a/cpp/tests/lists/set_operations/union_distinct_tests.cpp b/cpp/tests/lists/set_operations/union_distinct_tests.cpp index 5cc0897351d..e33ea31541b 100644 --- a/cpp/tests/lists/set_operations/union_distinct_tests.cpp +++ b/cpp/tests/lists/set_operations/union_distinct_tests.cpp @@ -560,7 +560,7 @@ TEST_F(SetUnionTest, InputListsOfNestedStructsHaveNull) auto grandchild2 = strings_col{{ "" /*NULL*/, "Apple", "Banana", "Cherry", "Kiwi", "Banana", "Cherry", - "Kiwi", // end list1 + "Kiwi", // end list1 "" /*NULL*/, "Bear", "Cat", "Dog", "Duck", "Panda", "Bear", "Cat", "Dog", "Duck", "Panda", // end list2 @@ -597,7 +597,7 @@ TEST_F(SetUnionTest, InputListsOfNestedStructsHaveNull) { "" /*NULL*/, "" /*NULL*/, "" /*NULL*/, "" /*NULL*/, "" /*NULL*/, "" /*NULL*/, "Apple", "Apple", "Banana", "Cherry", "Kiwi", "Banana", "Cherry", - "Kiwi", // end list1 + "Kiwi", // end list1 "" /*NULL*/, "" /*NULL*/, "Bear", "Cat", "Dog", "Duck", "Panda", "Bear", "Cat", "Dog", "Duck", "Panda", // end list2 "ÁÁÁ", "ÁÁÁ", "ÉÉÉÉÉ", "ÉÉÉÉÉ", "ÁBC", "ÁÁÁ", "ÍÍÍÍÍ", diff --git a/cpp/tests/lists/stream_compaction/distinct_tests.cpp b/cpp/tests/lists/stream_compaction/distinct_tests.cpp index 57d1714c255..fbc637f9315 100644 --- a/cpp/tests/lists/stream_compaction/distinct_tests.cpp +++ b/cpp/tests/lists/stream_compaction/distinct_tests.cpp @@ -529,7 +529,7 @@ TEST_F(ListDistinctTest, InputListsOfStructsHaveNull) 2, 3, 3, - 3}, // end list3 + 3}, // end list3 nulls_at({1, 6, 12, 13})}; auto child2 = strings_col{{ // begin list1 "XXX", /*NULL*/ @@ -551,7 +551,7 @@ TEST_F(ListDistinctTest, InputListsOfStructsHaveNull) "ÁBC", "ÁÁÁ", "ÍÍÍÍÍ", - "", /*NULL*/ + "", /*NULL*/ "XYZ", "ÁBC"}, // end list3 nulls_at({6, 17})}; @@ -670,7 +670,7 @@ TEST_F(ListDistinctTest, InputListsOfNestedStructsHaveNull) "ÁBC", "ÁÁÁ", "ÍÍÍÍÍ", - "", /*NULL*/ + "", /*NULL*/ "XYZ", "ÁBC" // end list3 }, @@ -729,8 +729,8 @@ TEST_F(ListDistinctTest, InputListsOfStructsOfLists) floats_lists{3, 4, 5}, // end list2 // begin list3 floats_lists{}, - floats_lists{}, // end list3 - // begin list4 + floats_lists{}, // end list3 + // begin list4 floats_lists{6, 7}, floats_lists{6, 7}, floats_lists{6, 7}}; diff --git a/cpp/tests/reshape/interleave_columns_tests.cpp b/cpp/tests/reshape/interleave_columns_tests.cpp index eba6c961bbb..e8ea9d619c5 100644 --- a/cpp/tests/reshape/interleave_columns_tests.cpp +++ b/cpp/tests/reshape/interleave_columns_tests.cpp @@ -806,7 +806,7 @@ TYPED_TEST(ListsColumnsInterleaveTypedTest, SlicedInputListsOfListsWithNulls) ListsCol{ListsCol{{null, 11}, null_at(0)}, ListsCol{{22, null, null}, nulls_at({1, 2})}}, // don't care ListsCol{ListsCol{{null, 11}, null_at(0)}, - ListsCol{{22, null, null}, nulls_at({1, 2})}} // don't care + ListsCol{{22, null, null}, nulls_at({1, 2})}} // don't care }; auto const col1 = cudf::slice(col1_original, {3, 6})[0]; diff --git a/cpp/tests/rolling/range_rolling_window_test.cpp b/cpp/tests/rolling/range_rolling_window_test.cpp index 585383f28f8..eed9db1fe04 100644 --- a/cpp/tests/rolling/range_rolling_window_test.cpp +++ b/cpp/tests/rolling/range_rolling_window_test.cpp @@ -91,7 +91,7 @@ struct window_exec { ScalarT preceding; // Preceding window scalar. ScalarT following; // Following window scalar. cudf::size_type min_periods = 1; -}; // struct window_exec; +}; // struct window_exec; struct RangeRollingTest : public cudf::test::BaseFixture {}; diff --git a/cpp/tests/sort/segmented_sort_tests.cpp b/cpp/tests/sort/segmented_sort_tests.cpp index b3f98eb54b9..da9666cbc74 100644 --- a/cpp/tests/sort/segmented_sort_tests.cpp +++ b/cpp/tests/sort/segmented_sort_tests.cpp @@ -270,7 +270,7 @@ TEST_F(SegmentedSortInt, Sliced) column_wrapper expected2{{0, 1, 3, 2, 4, 5, 6}}; column_wrapper expected3{{0, 1, 2, 3, 4, 5, 6}}; // clang-format on - auto slice = cudf::slice(col1, {4, 11})[0]; // 7 elements + auto slice = cudf::slice(col1, {4, 11})[0]; // 7 elements cudf::table_view input{{slice}}; auto seg_slice = cudf::slice(segments2, {2, 4})[0]; // 2 elements diff --git a/cpp/tests/strings/chars_types_tests.cpp b/cpp/tests/strings/chars_types_tests.cpp index a16da41af7a..c595977c269 100644 --- a/cpp/tests/strings/chars_types_tests.cpp +++ b/cpp/tests/strings/chars_types_tests.cpp @@ -50,17 +50,17 @@ TEST_P(CharsTypes, AllTypes) "\t\r\n\f "}; bool expecteds[] = {false, false, false, false, false, false, false, false, - false, false, false, false, false, true, false, false, // decimal + false, false, false, false, false, true, false, false, // decimal false, false, false, false, false, false, false, false, - false, true, false, true, false, true, false, false, // numeric + false, true, false, true, false, true, false, false, // numeric false, false, false, false, false, false, false, false, - false, false, false, true, false, true, false, false, // digit + false, false, false, true, false, true, false, false, // digit true, true, false, true, false, false, false, false, - false, false, false, false, false, false, true, false, // alpha + false, false, false, false, false, false, true, false, // alpha false, false, false, false, false, false, false, false, - false, false, false, false, false, false, false, true, // space + false, false, false, false, false, false, false, true, // space false, false, false, true, false, false, false, false, - false, false, false, false, false, false, false, false, // upper + false, false, false, false, false, false, false, false, // upper false, true, false, false, false, false, false, false, false, false, false, false, false, false, true, false}; // lower diff --git a/cpp/tests/strings/durations_tests.cpp b/cpp/tests/strings/durations_tests.cpp index 0c7a1ad8042..1902f907f43 100644 --- a/cpp/tests/strings/durations_tests.cpp +++ b/cpp/tests/strings/durations_tests.cpp @@ -398,7 +398,7 @@ TEST_F(StringsDurationsTest, ParseSingle) "-59", "999", "-999", - "", // error + "", // error "01", ""}; // error auto size = cudf::column_view(string_src).size(); @@ -449,7 +449,7 @@ TEST_F(StringsDurationsTest, ParseMultiple) "-59:00:00", "999:00:00", "-999:00:00", - "", // error + "", // error "01:01:01", ""}; // error auto size = cudf::column_view(string_src).size(); @@ -503,7 +503,7 @@ TEST_F(StringsDurationsTest, ParseSubsecond) "-59:00:00", "999:00:00", "-999:00:00", - "", // error + "", // error "01:01:01", ""}; // error auto size = cudf::column_view(string_src).size(); @@ -660,7 +660,7 @@ TEST_F(StringsDurationsTest, ParseCompoundSpecifier) "09:00 AM", // error "", // error "01:01:01", - ""}; // error + ""}; // error cudf::test::fixed_width_column_wrapper expected_s3( {0, diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index bae402155e9..620e0bfe8de 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -440,7 +440,7 @@ class corresponding_rows_not_equivalent { // Must handle inf and nan separately if (std::isinf(x) || std::isinf(y)) { - return x != y; // comparison of (inf==inf) returns true + return x != y; // comparison of (inf==inf) returns true } else if (std::isnan(x) || std::isnan(y)) { return std::isnan(x) != std::isnan(y); // comparison of (nan==nan) returns false } else {