From 3676839700905c13c7a837e51b5ad7bfafc4b225 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Wed, 13 Sep 2023 09:17:39 -0400 Subject: [PATCH 1/6] Correct 20054-D: dynamic initialization found on arm+12.2 --- cpp/src/io/avro/avro_common.hpp | 3 +- cpp/src/io/comp/unsnap.cu | 22 ++++--- cpp/src/io/orc/orc_gpu.hpp | 38 +++++------- cpp/src/io/orc/stats_enc.cu | 14 ++--- cpp/src/io/orc/stripe_init.cu | 29 ++++----- cpp/src/io/parquet/page_decode.cuh | 67 +++++++++++---------- cpp/src/io/parquet/page_hdr.cu | 12 ++-- cpp/src/io/parquet/parquet_gpu.hpp | 57 +++++++++--------- cpp/src/io/statistics/column_statistics.cuh | 12 ++-- cpp/src/io/statistics/statistics.cuh | 30 ++++----- 10 files changed, 143 insertions(+), 141 deletions(-) diff --git a/cpp/src/io/avro/avro_common.hpp b/cpp/src/io/avro/avro_common.hpp index ff8ee206dd4..0058d236d8c 100644 --- a/cpp/src/io/avro/avro_common.hpp +++ b/cpp/src/io/avro/avro_common.hpp @@ -25,7 +25,8 @@ namespace cudf { namespace io { namespace avro { struct block_desc_s { - block_desc_s() {} + block_desc_s() = default; // required to compile on ctk-12.2 + aarch64 + explicit constexpr block_desc_s( size_t offset_, uint32_t size_, uint32_t row_offset_, uint32_t first_row_, uint32_t num_rows_) : offset(offset_), diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index a7a1cfd3f9e..f3a79df5d23 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -52,6 +52,8 @@ struct unsnap_batch_s { * @brief Queue structure used to exchange data between warps */ struct unsnap_queue_s { + unsnap_queue_s() = default; // required to compile on ctk-12.2 + aarch64 + uint32_t prefetch_wrpos; ///< Prefetcher write position uint32_t prefetch_rdpos; ///< Prefetch consumer read position int32_t prefetch_end; ///< Prefetch enable flag (nonzero stops prefetcher) @@ -64,15 +66,17 @@ struct unsnap_queue_s { * @brief snappy decompression state */ struct unsnap_state_s { - uint8_t const* base; ///< base ptr of compressed stream - uint8_t const* end; ///< end of compressed stream - uint32_t uncompressed_size; ///< uncompressed stream size - uint32_t bytes_left; ///< remaining bytes to decompress - int32_t error; ///< current error status - uint32_t tstart; ///< start time for perf logging - volatile unsnap_queue_s q; ///< queue for cross-warp communication - device_span src; ///< input for current block - device_span dst; ///< output for current block + constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64 + + uint8_t const* base = nullptr; ///< base ptr of compressed stream + uint8_t const* end = nullptr; ///< end of compressed stream + uint32_t uncompressed_size = 0; ///< uncompressed stream size + uint32_t bytes_left = 0; ///< remaining bytes to decompress + int32_t error = 0; ///< current error status + uint32_t tstart = 0; ///< start time for perf logging + volatile unsnap_queue_s q{}; ///< queue for cross-warp communication + device_span src; ///< input for current block + device_span dst; ///< output for current block }; inline __device__ volatile uint8_t& byte_access(unsnap_state_s* s, uint32_t pos) diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index 681cc0fb9d2..c676e129798 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -59,31 +59,25 @@ struct CompressedStreamInfo { explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_) : compressed_data(compressed_data_), uncompressed_data(nullptr), - compressed_data_size(compressed_size_), - dec_in_ctl(nullptr), - dec_out_ctl(nullptr), - copy_in_ctl(nullptr), - copy_out_ctl(nullptr), - num_compressed_blocks(0), - num_uncompressed_blocks(0), - max_uncompressed_size(0), - max_uncompressed_block_size(0) + compressed_data_size(compressed_size_) { } - uint8_t const* compressed_data; // [in] base ptr to compressed stream data - uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet - size_t compressed_data_size; // [in] compressed data size for this stream - device_span* dec_in_ctl; // [in] input buffer to decompress - device_span* dec_out_ctl; // [in] output buffer to decompress into - device_span dec_res; // [in] results of decompression - device_span* copy_in_ctl; // [out] input buffer to copy - device_span* copy_out_ctl; // [out] output buffer to copy to - uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed - // blocks(out) - uint32_t num_uncompressed_blocks; // [in,out] number of entries in dec_in_ctl(in), number of + uint8_t const* compressed_data = nullptr; // [in] base ptr to compressed stream data + uint8_t* uncompressed_data = + nullptr; // [in] base ptr to uncompressed stream data or NULL if not known yet + size_t compressed_data_size = 0; // [in] compressed data size for this stream + device_span* dec_in_ctl = nullptr; // [in] input buffer to decompress + device_span* dec_out_ctl = nullptr; // [in] output buffer to decompress into + device_span dec_res{}; // [in] results of decompression + device_span* copy_in_ctl = nullptr; // [out] input buffer to copy + device_span* copy_out_ctl = nullptr; // [out] output buffer to copy to + uint32_t num_compressed_blocks = 0; // [in,out] number of entries in decctl(in), number of + // compressed blocks(out) + uint32_t num_uncompressed_blocks = 0; // [in,out] number of entries in dec_in_ctl(in), number of // uncompressed blocks(out) - uint64_t max_uncompressed_size; // [out] maximum uncompressed data size of stream - uint32_t max_uncompressed_block_size; // [out] maximum uncompressed size of any block in stream + uint64_t max_uncompressed_size = 0; // [out] maximum uncompressed data size of stream + uint32_t max_uncompressed_block_size = + 0; // [out] maximum uncompressed size of any block in stream }; enum StreamIndexType { diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 069841980c1..888ab2a1828 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -121,14 +121,14 @@ __global__ void __launch_bounds__(block_size, 1) } struct stats_state_s { - uint8_t* base; ///< Output buffer start - uint8_t* end; ///< Output buffer end - statistics_chunk chunk; - statistics_merge_group group; - statistics_dtype stats_dtype; //!< Statistics data type for this column + uint8_t* base = nullptr; ///< Output buffer start + uint8_t* end = nullptr; ///< Output buffer end + statistics_chunk chunk{}; + statistics_merge_group group{}; + statistics_dtype stats_dtype{}; //!< Statistics data type for this column // ORC stats - uint64_t numberOfValues; - uint8_t hasNull; + uint64_t numberOfValues = 0; + uint8_t hasNull = 0; }; /* diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index d8a60350356..d387158696e 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -30,14 +30,14 @@ namespace orc { namespace gpu { struct comp_in_out { - uint8_t const* in_ptr; - size_t in_size; - uint8_t* out_ptr; - size_t out_size; + uint8_t const* in_ptr = nullptr; + size_t in_size = 0; + uint8_t* out_ptr = nullptr; + size_t out_size = 0; }; struct compressed_stream_s { - CompressedStreamInfo info; - comp_in_out ctl; + CompressedStreamInfo info{}; + comp_in_out ctl{}; }; // blockDim {128,1,1} @@ -208,14 +208,15 @@ __global__ void __launch_bounds__(128, 8) * @brief Shared mem state for gpuParseRowGroupIndex */ struct rowindex_state_s { - ColumnDesc chunk; - uint32_t rowgroup_start; - uint32_t rowgroup_end; - int is_compressed; - uint32_t row_index_entry[3][CI_PRESENT]; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2 - CompressedStreamInfo strm_info[2]; - RowGroup rowgroups[128]; - uint32_t compressed_offset[128][2]; + ColumnDesc chunk{}; + uint32_t rowgroup_start = 0; + uint32_t rowgroup_end = 0; + int is_compressed = 0; + uint32_t row_index_entry[3] + [CI_PRESENT]{}; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2 + CompressedStreamInfo strm_info[2]{}; + RowGroup rowgroups[128]{}; + uint32_t compressed_offset[128][2]{}; }; enum row_entry_state_e { diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 26e3c951b2e..68378450a8b 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -26,48 +26,49 @@ namespace cudf::io::parquet::gpu { struct page_state_s { - uint8_t const* data_start; - uint8_t const* data_end; - uint8_t const* lvl_end; - uint8_t const* dict_base; // ptr to dictionary page data - int32_t dict_size; // size of dictionary data - int32_t first_row; // First row in page to output - int32_t num_rows; // Rows in page to decode (including rows to be skipped) - int32_t first_output_value; // First value in page to output - int32_t num_input_values; // total # of input/level values in the page - int32_t dtype_len; // Output data type length - int32_t dtype_len_in; // Can be larger than dtype_len if truncating 32-bit into 8-bit - int32_t dict_bits; // # of bits to store dictionary indices - uint32_t dict_run; - int32_t dict_val; - uint32_t initial_rle_run[NUM_LEVEL_TYPES]; // [def,rep] - int32_t initial_rle_value[NUM_LEVEL_TYPES]; // [def,rep] - int32_t error; - PageInfo page; - ColumnChunkDesc col; + constexpr page_state_s() noexcept {} + uint8_t const* data_start = nullptr; + uint8_t const* data_end = nullptr; + uint8_t const* lvl_end = nullptr; + uint8_t const* dict_base = nullptr; // ptr to dictionary page data + int32_t dict_size = 0; // size of dictionary data + int32_t first_row = 0; // First row in page to output + int32_t num_rows = 0; // Rows in page to decode (including rows to be skipped) + int32_t first_output_value = 0; // First value in page to output + int32_t num_input_values = 0; // total # of input/level values in the page + int32_t dtype_len = 0; // Output data type length + int32_t dtype_len_in = 0; // Can be larger than dtype_len if truncating 32-bit into 8-bit + int32_t dict_bits = 0; // # of bits to store dictionary indices + uint32_t dict_run = 0; + int32_t dict_val = 0; + uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t error = 0; + PageInfo page{}; + ColumnChunkDesc col{}; // (leaf) value decoding - int32_t nz_count; // number of valid entries in nz_idx (write position in circular buffer) - int32_t dict_pos; // write position of dictionary indices - int32_t src_pos; // input read position of final output value - int32_t ts_scale; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale + int32_t nz_count = 0; // number of valid entries in nz_idx (write position in circular buffer) + int32_t dict_pos = 0; // write position of dictionary indices + int32_t src_pos = 0; // input read position of final output value + int32_t ts_scale = 0; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale // repetition/definition level decoding - int32_t input_value_count; // how many values of the input we've processed - int32_t input_row_count; // how many rows of the input we've processed - int32_t input_leaf_count; // how many leaf values of the input we've processed - uint8_t const* lvl_start[NUM_LEVEL_TYPES]; // [def,rep] - uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]; // [def,rep] - uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]; // [def,rep] - int32_t lvl_count[NUM_LEVEL_TYPES]; // how many of each of the streams we've decoded - int32_t row_index_lower_bound; // lower bound of row indices we should process + int32_t input_value_count = 0; // how many values of the input we've processed + int32_t input_row_count = 0; // how many rows of the input we've processed + int32_t input_leaf_count = 0; // how many leaf values of the input we've processed + uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] + uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] + uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep] + int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded + int32_t row_index_lower_bound = 0; // lower bound of row indices we should process // a shared-memory cache of frequently used data when decoding. The source of this data is // normally stored in global memory which can yield poor performance. So, when possible // we copy that info here prior to decoding - PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]; + PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{}; // points to either nesting_decode_cache above when possible, or to the global source otherwise - PageNestingDecodeInfo* nesting_info; + PageNestingDecodeInfo* nesting_info = nullptr; }; // buffers only used in the decode kernel. separated from page_state_s to keep diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 0d611643b46..2a6f76e43d2 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -45,13 +45,13 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0, ST_FLD_LIST}; struct byte_stream_s { - uint8_t const* cur; - uint8_t const* end; - uint8_t const* base; + uint8_t const* cur = nullptr; + uint8_t const* end = nullptr; + uint8_t const* base = nullptr; // Parsed symbols - PageType page_type; - PageInfo page; - ColumnChunkDesc ck; + PageType page_type{}; + PageInfo page{}; + ColumnChunkDesc ck{}; }; /** diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index e82b6abc13d..6930eac5feb 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -228,7 +228,7 @@ struct PageInfo { * @brief Struct describing a particular chunk of column data */ struct ColumnChunkDesc { - ColumnChunkDesc() = default; + constexpr ColumnChunkDesc() noexcept {}; explicit ColumnChunkDesc(size_t compressed_size_, uint8_t* compressed_data_, size_t num_values_, @@ -275,34 +275,35 @@ struct ColumnChunkDesc { { } - uint8_t const* compressed_data; // pointer to compressed column chunk data - size_t compressed_size; // total compressed data size for this chunk - size_t num_values; // total number of values in this column - size_t start_row; // starting row of this chunk - uint32_t num_rows; // number of rows in this chunk - int16_t max_level[level_type::NUM_LEVEL_TYPES]; // max definition/repetition level - int16_t max_nesting_depth; // max nesting depth of the output - uint16_t data_type; // basic column data type, ((type_length << 3) | - // parquet::Type) + uint8_t const* compressed_data = nullptr; // pointer to compressed column chunk data + size_t compressed_size = 0; // total compressed data size for this chunk + size_t num_values = 0; // total number of values in this column + size_t start_row = 0; // starting row of this chunk + uint32_t num_rows = 0; // number of rows in this chunk + int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level + int16_t max_nesting_depth = 0; // max nesting depth of the output + uint16_t data_type = 0; // basic column data type, ((type_length << 3) | + // parquet::Type) uint8_t - level_bits[level_type::NUM_LEVEL_TYPES]; // bits to encode max definition/repetition levels - int32_t num_data_pages; // number of data pages - int32_t num_dict_pages; // number of dictionary pages - int32_t max_num_pages; // size of page_info array - PageInfo* page_info; // output page info for up to num_dict_pages + - // num_data_pages (dictionary pages first) - string_index_pair* str_dict_index; // index for string dictionary - bitmask_type** valid_map_base; // base pointers of valid bit map for this column - void** column_data_base; // base pointers of column data - void** column_string_base; // base pointers of column string data - int8_t codec; // compressed codec enum - int8_t converted_type; // converted type enum - LogicalType logical_type; // logical type - int8_t decimal_precision; // Decimal precision - int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) - - int32_t src_col_index; // my input column index - int32_t src_col_schema; // my schema index in the file + level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels + int32_t num_data_pages = 0; // number of data pages + int32_t num_dict_pages = 0; // number of dictionary pages + int32_t max_num_pages = 0; // size of page_info array + PageInfo* page_info = nullptr; // output page info for up to num_dict_pages + + // num_data_pages (dictionary pages first) + string_index_pair* str_dict_index = nullptr; // index for string dictionary + bitmask_type** valid_map_base = nullptr; // base pointers of valid bit map for this column + void** column_data_base = nullptr; // base pointers of column data + void** column_string_base = nullptr; // base pointers of column string data + int8_t codec = 0; // compressed codec enum + int8_t converted_type = 0; // converted type enum + LogicalType logical_type{}; // logical type + int8_t decimal_precision = 0; // Decimal precision + int32_t ts_clock_rate = + 0; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) + + int32_t src_col_index = 0; // my input column index + int32_t src_col_schema = 0; // my schema index in the file }; /** diff --git a/cpp/src/io/statistics/column_statistics.cuh b/cpp/src/io/statistics/column_statistics.cuh index 28e77f62a43..f71fb95949f 100644 --- a/cpp/src/io/statistics/column_statistics.cuh +++ b/cpp/src/io/statistics/column_statistics.cuh @@ -34,18 +34,18 @@ namespace io { * @brief shared state for statistics calculation kernel */ struct stats_state_s { - stats_column_desc col; ///< Column information - statistics_group group; ///< Group description - statistics_chunk ck; ///< Output statistics chunk + stats_column_desc col{}; ///< Column information + statistics_group group{}; ///< Group description + statistics_chunk ck{}; ///< Output statistics chunk }; /** * @brief shared state for statistics merge kernel */ struct merge_state_s { - stats_column_desc col; ///< Column information - statistics_merge_group group; ///< Group description - statistics_chunk ck; ///< Resulting statistics chunk + stats_column_desc col{}; ///< Column information + statistics_merge_group group{}; ///< Group description + statistics_chunk ck{}; ///< Resulting statistics chunk }; template diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh index 805ca43553e..ac395a92b19 100644 --- a/cpp/src/io/statistics/statistics.cuh +++ b/cpp/src/io/statistics/statistics.cuh @@ -98,27 +98,27 @@ union statistics_val { }; struct statistics_chunk { - uint32_t non_nulls; //!< number of non-null values in chunk - uint32_t null_count; //!< number of null values in chunk - statistics_val min_value; //!< minimum value in chunk - statistics_val max_value; //!< maximum value in chunk - statistics_val sum; //!< sum of chunk - uint8_t has_minmax; //!< Nonzero if min_value and max_values are valid - uint8_t has_sum; //!< Nonzero if sum is valid + uint32_t non_nulls = 0; //!< number of non-null values in chunk + uint32_t null_count = 0; //!< number of null values in chunk + statistics_val min_value{}; //!< minimum value in chunk + statistics_val max_value{}; //!< maximum value in chunk + statistics_val sum{}; //!< sum of chunk + uint8_t has_minmax = 0; //!< Nonzero if min_value and max_values are valid + uint8_t has_sum = 0; //!< Nonzero if sum is valid }; struct statistics_group { - stats_column_desc const* col; //!< Column information - uint32_t start_row; //!< Start row of this group - uint32_t num_rows; //!< Number of rows in group - uint32_t non_leaf_nulls; //!< Number of null non-leaf values in the group + stats_column_desc const* col = nullptr; //!< Column information + uint32_t start_row = 0; //!< Start row of this group + uint32_t num_rows = 0; //!< Number of rows in group + uint32_t non_leaf_nulls = 0; //!< Number of null non-leaf values in the group }; struct statistics_merge_group { - data_type col_dtype; //!< Column data type - statistics_dtype stats_dtype; //!< Statistics data type for this column - uint32_t start_chunk; //!< Start chunk of this group - uint32_t num_chunks; //!< Number of chunks in group + data_type col_dtype; //!< Column data type + statistics_dtype stats_dtype = dtype_none; //!< Statistics data type for this column + uint32_t start_chunk = 0; //!< Start chunk of this group + uint32_t num_chunks = 0; //!< Number of chunks in group }; template >* = nullptr> From 9b2f16a11fc884f6b0711583394d38e32f2148f7 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 18 Sep 2023 09:56:17 -0400 Subject: [PATCH 2/6] Update cpp/src/io/comp/unsnap.cu Co-authored-by: David Wendt <45795991+davidwendt@users.noreply.github.com> --- cpp/src/io/comp/unsnap.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index f3a79df5d23..7efbc9f015a 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -68,12 +68,12 @@ struct unsnap_queue_s { struct unsnap_state_s { constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64 - uint8_t const* base = nullptr; ///< base ptr of compressed stream - uint8_t const* end = nullptr; ///< end of compressed stream - uint32_t uncompressed_size = 0; ///< uncompressed stream size - uint32_t bytes_left = 0; ///< remaining bytes to decompress - int32_t error = 0; ///< current error status - uint32_t tstart = 0; ///< start time for perf logging + uint8_t const* base{}; ///< base ptr of compressed stream + uint8_t const* end{}; ///< end of compressed stream + uint32_t uncompressed_size{}; ///< uncompressed stream size + uint32_t bytes_left{}; ///< remaining bytes to decompress + int32_t error{}; ///< current error status + uint32_t tstart{}; ///< start time for perf logging volatile unsnap_queue_s q{}; ///< queue for cross-warp communication device_span src; ///< input for current block device_span dst; ///< output for current block From d0667c5f51e25f005b39d2a6f57256f8fdb96a8d Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 18 Sep 2023 10:00:01 -0400 Subject: [PATCH 3/6] Simplify changes --- cpp/src/io/orc/orc_gpu.hpp | 31 +++++++++++++++---------------- cpp/src/io/orc/stats_enc.cu | 8 ++++---- 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/cpp/src/io/orc/orc_gpu.hpp b/cpp/src/io/orc/orc_gpu.hpp index c676e129798..3fdabff37db 100644 --- a/cpp/src/io/orc/orc_gpu.hpp +++ b/cpp/src/io/orc/orc_gpu.hpp @@ -62,22 +62,21 @@ struct CompressedStreamInfo { compressed_data_size(compressed_size_) { } - uint8_t const* compressed_data = nullptr; // [in] base ptr to compressed stream data - uint8_t* uncompressed_data = - nullptr; // [in] base ptr to uncompressed stream data or NULL if not known yet - size_t compressed_data_size = 0; // [in] compressed data size for this stream - device_span* dec_in_ctl = nullptr; // [in] input buffer to decompress - device_span* dec_out_ctl = nullptr; // [in] output buffer to decompress into - device_span dec_res{}; // [in] results of decompression - device_span* copy_in_ctl = nullptr; // [out] input buffer to copy - device_span* copy_out_ctl = nullptr; // [out] output buffer to copy to - uint32_t num_compressed_blocks = 0; // [in,out] number of entries in decctl(in), number of - // compressed blocks(out) - uint32_t num_uncompressed_blocks = 0; // [in,out] number of entries in dec_in_ctl(in), number of - // uncompressed blocks(out) - uint64_t max_uncompressed_size = 0; // [out] maximum uncompressed data size of stream - uint32_t max_uncompressed_block_size = - 0; // [out] maximum uncompressed size of any block in stream + uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data + uint8_t* + uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet + size_t compressed_data_size{}; // [in] compressed data size for this stream + device_span* dec_in_ctl{}; // [in] input buffer to decompress + device_span* dec_out_ctl{}; // [in] output buffer to decompress into + device_span dec_res{}; // [in] results of decompression + device_span* copy_in_ctl{}; // [out] input buffer to copy + device_span* copy_out_ctl{}; // [out] output buffer to copy to + uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of + // compressed blocks(out) + uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of + // uncompressed blocks(out) + uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream + uint32_t max_uncompressed_block_size{}; // [out] maximum uncompressed size of any block in stream }; enum StreamIndexType { diff --git a/cpp/src/io/orc/stats_enc.cu b/cpp/src/io/orc/stats_enc.cu index 888ab2a1828..f4118f07e8b 100644 --- a/cpp/src/io/orc/stats_enc.cu +++ b/cpp/src/io/orc/stats_enc.cu @@ -121,14 +121,14 @@ __global__ void __launch_bounds__(block_size, 1) } struct stats_state_s { - uint8_t* base = nullptr; ///< Output buffer start - uint8_t* end = nullptr; ///< Output buffer end + uint8_t* base{}; ///< Output buffer start + uint8_t* end{}; ///< Output buffer end statistics_chunk chunk{}; statistics_merge_group group{}; statistics_dtype stats_dtype{}; //!< Statistics data type for this column // ORC stats - uint64_t numberOfValues = 0; - uint8_t hasNull = 0; + uint64_t numberOfValues{}; + uint8_t hasNull{}; }; /* From 951d781742ad83d5db9c7edbf5b45596a3bb5121 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 18 Sep 2023 10:00:01 -0400 Subject: [PATCH 4/6] Simplify changes --- cpp/src/io/orc/stripe_init.cu | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/src/io/orc/stripe_init.cu b/cpp/src/io/orc/stripe_init.cu index d387158696e..8eeca504121 100644 --- a/cpp/src/io/orc/stripe_init.cu +++ b/cpp/src/io/orc/stripe_init.cu @@ -30,10 +30,10 @@ namespace orc { namespace gpu { struct comp_in_out { - uint8_t const* in_ptr = nullptr; - size_t in_size = 0; - uint8_t* out_ptr = nullptr; - size_t out_size = 0; + uint8_t const* in_ptr{}; + size_t in_size{}; + uint8_t* out_ptr{}; + size_t out_size{}; }; struct compressed_stream_s { CompressedStreamInfo info{}; @@ -209,9 +209,9 @@ __global__ void __launch_bounds__(128, 8) */ struct rowindex_state_s { ColumnDesc chunk{}; - uint32_t rowgroup_start = 0; - uint32_t rowgroup_end = 0; - int is_compressed = 0; + uint32_t rowgroup_start{}; + uint32_t rowgroup_end{}; + int is_compressed{}; uint32_t row_index_entry[3] [CI_PRESENT]{}; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2 CompressedStreamInfo strm_info[2]{}; From 81f90cc4a9ef6c77dbb4ce03802db0047fc8f9d2 Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 18 Sep 2023 10:00:01 -0400 Subject: [PATCH 5/6] Simplify changes --- cpp/src/io/parquet/page_decode.cuh | 48 ++++++++++++++-------------- cpp/src/io/parquet/page_hdr.cu | 6 ++-- cpp/src/io/parquet/parquet_gpu.hpp | 43 ++++++++++++------------- cpp/src/io/statistics/statistics.cuh | 24 +++++++------- 4 files changed, 60 insertions(+), 61 deletions(-) diff --git a/cpp/src/io/parquet/page_decode.cuh b/cpp/src/io/parquet/page_decode.cuh index 68378450a8b..5e66885d746 100644 --- a/cpp/src/io/parquet/page_decode.cuh +++ b/cpp/src/io/parquet/page_decode.cuh @@ -27,48 +27,48 @@ namespace cudf::io::parquet::gpu { struct page_state_s { constexpr page_state_s() noexcept {} - uint8_t const* data_start = nullptr; - uint8_t const* data_end = nullptr; - uint8_t const* lvl_end = nullptr; - uint8_t const* dict_base = nullptr; // ptr to dictionary page data - int32_t dict_size = 0; // size of dictionary data - int32_t first_row = 0; // First row in page to output - int32_t num_rows = 0; // Rows in page to decode (including rows to be skipped) - int32_t first_output_value = 0; // First value in page to output - int32_t num_input_values = 0; // total # of input/level values in the page - int32_t dtype_len = 0; // Output data type length - int32_t dtype_len_in = 0; // Can be larger than dtype_len if truncating 32-bit into 8-bit - int32_t dict_bits = 0; // # of bits to store dictionary indices - uint32_t dict_run = 0; - int32_t dict_val = 0; + uint8_t const* data_start{}; + uint8_t const* data_end{}; + uint8_t const* lvl_end{}; + uint8_t const* dict_base{}; // ptr to dictionary page data + int32_t dict_size{}; // size of dictionary data + int32_t first_row{}; // First row in page to output + int32_t num_rows{}; // Rows in page to decode (including rows to be skipped) + int32_t first_output_value{}; // First value in page to output + int32_t num_input_values{}; // total # of input/level values in the page + int32_t dtype_len{}; // Output data type length + int32_t dtype_len_in{}; // Can be larger than dtype_len if truncating 32-bit into 8-bit + int32_t dict_bits{}; // # of bits to store dictionary indices + uint32_t dict_run{}; + int32_t dict_val{}; uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep] int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep] - int32_t error = 0; + int32_t error{}; PageInfo page{}; ColumnChunkDesc col{}; // (leaf) value decoding - int32_t nz_count = 0; // number of valid entries in nz_idx (write position in circular buffer) - int32_t dict_pos = 0; // write position of dictionary indices - int32_t src_pos = 0; // input read position of final output value - int32_t ts_scale = 0; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale + int32_t nz_count{}; // number of valid entries in nz_idx (write position in circular buffer) + int32_t dict_pos{}; // write position of dictionary indices + int32_t src_pos{}; // input read position of final output value + int32_t ts_scale{}; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale // repetition/definition level decoding - int32_t input_value_count = 0; // how many values of the input we've processed - int32_t input_row_count = 0; // how many rows of the input we've processed - int32_t input_leaf_count = 0; // how many leaf values of the input we've processed + int32_t input_value_count{}; // how many values of the input we've processed + int32_t input_row_count{}; // how many rows of the input we've processed + int32_t input_leaf_count{}; // how many leaf values of the input we've processed uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep] uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep] int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded - int32_t row_index_lower_bound = 0; // lower bound of row indices we should process + int32_t row_index_lower_bound{}; // lower bound of row indices we should process // a shared-memory cache of frequently used data when decoding. The source of this data is // normally stored in global memory which can yield poor performance. So, when possible // we copy that info here prior to decoding PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{}; // points to either nesting_decode_cache above when possible, or to the global source otherwise - PageNestingDecodeInfo* nesting_info = nullptr; + PageNestingDecodeInfo* nesting_info{}; }; // buffers only used in the decode kernel. separated from page_state_s to keep diff --git a/cpp/src/io/parquet/page_hdr.cu b/cpp/src/io/parquet/page_hdr.cu index 2a6f76e43d2..6f8b2f50443 100644 --- a/cpp/src/io/parquet/page_hdr.cu +++ b/cpp/src/io/parquet/page_hdr.cu @@ -45,9 +45,9 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0, ST_FLD_LIST}; struct byte_stream_s { - uint8_t const* cur = nullptr; - uint8_t const* end = nullptr; - uint8_t const* base = nullptr; + uint8_t const* cur{}; + uint8_t const* end{}; + uint8_t const* base{}; // Parsed symbols PageType page_type{}; PageInfo page{}; diff --git a/cpp/src/io/parquet/parquet_gpu.hpp b/cpp/src/io/parquet/parquet_gpu.hpp index 6930eac5feb..fc7fe13fb85 100644 --- a/cpp/src/io/parquet/parquet_gpu.hpp +++ b/cpp/src/io/parquet/parquet_gpu.hpp @@ -275,35 +275,34 @@ struct ColumnChunkDesc { { } - uint8_t const* compressed_data = nullptr; // pointer to compressed column chunk data - size_t compressed_size = 0; // total compressed data size for this chunk - size_t num_values = 0; // total number of values in this column - size_t start_row = 0; // starting row of this chunk - uint32_t num_rows = 0; // number of rows in this chunk + uint8_t const* compressed_data{}; // pointer to compressed column chunk data + size_t compressed_size{}; // total compressed data size for this chunk + size_t num_values{}; // total number of values in this column + size_t start_row{}; // starting row of this chunk + uint32_t num_rows{}; // number of rows in this chunk int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level - int16_t max_nesting_depth = 0; // max nesting depth of the output - uint16_t data_type = 0; // basic column data type, ((type_length << 3) | + int16_t max_nesting_depth{}; // max nesting depth of the output + uint16_t data_type{}; // basic column data type, ((type_length << 3) | // parquet::Type) uint8_t level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels - int32_t num_data_pages = 0; // number of data pages - int32_t num_dict_pages = 0; // number of dictionary pages - int32_t max_num_pages = 0; // size of page_info array - PageInfo* page_info = nullptr; // output page info for up to num_dict_pages + + int32_t num_data_pages{}; // number of data pages + int32_t num_dict_pages{}; // number of dictionary pages + int32_t max_num_pages{}; // size of page_info array + PageInfo* page_info{}; // output page info for up to num_dict_pages + // num_data_pages (dictionary pages first) - string_index_pair* str_dict_index = nullptr; // index for string dictionary - bitmask_type** valid_map_base = nullptr; // base pointers of valid bit map for this column - void** column_data_base = nullptr; // base pointers of column data - void** column_string_base = nullptr; // base pointers of column string data - int8_t codec = 0; // compressed codec enum - int8_t converted_type = 0; // converted type enum + string_index_pair* str_dict_index{}; // index for string dictionary + bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column + void** column_data_base{}; // base pointers of column data + void** column_string_base{}; // base pointers of column string data + int8_t codec{}; // compressed codec enum + int8_t converted_type{}; // converted type enum LogicalType logical_type{}; // logical type - int8_t decimal_precision = 0; // Decimal precision - int32_t ts_clock_rate = - 0; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) + int8_t decimal_precision{}; // Decimal precision + int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) - int32_t src_col_index = 0; // my input column index - int32_t src_col_schema = 0; // my schema index in the file + int32_t src_col_index{}; // my input column index + int32_t src_col_schema{}; // my schema index in the file }; /** diff --git a/cpp/src/io/statistics/statistics.cuh b/cpp/src/io/statistics/statistics.cuh index ac395a92b19..b6e698fee11 100644 --- a/cpp/src/io/statistics/statistics.cuh +++ b/cpp/src/io/statistics/statistics.cuh @@ -98,27 +98,27 @@ union statistics_val { }; struct statistics_chunk { - uint32_t non_nulls = 0; //!< number of non-null values in chunk - uint32_t null_count = 0; //!< number of null values in chunk + uint32_t non_nulls{}; //!< number of non-null values in chunk + uint32_t null_count{}; //!< number of null values in chunk statistics_val min_value{}; //!< minimum value in chunk statistics_val max_value{}; //!< maximum value in chunk statistics_val sum{}; //!< sum of chunk - uint8_t has_minmax = 0; //!< Nonzero if min_value and max_values are valid - uint8_t has_sum = 0; //!< Nonzero if sum is valid + uint8_t has_minmax{}; //!< Nonzero if min_value and max_values are valid + uint8_t has_sum{}; //!< Nonzero if sum is valid }; struct statistics_group { - stats_column_desc const* col = nullptr; //!< Column information - uint32_t start_row = 0; //!< Start row of this group - uint32_t num_rows = 0; //!< Number of rows in group - uint32_t non_leaf_nulls = 0; //!< Number of null non-leaf values in the group + stats_column_desc const* col{}; //!< Column information + uint32_t start_row{}; //!< Start row of this group + uint32_t num_rows{}; //!< Number of rows in group + uint32_t non_leaf_nulls{}; //!< Number of null non-leaf values in the group }; struct statistics_merge_group { - data_type col_dtype; //!< Column data type - statistics_dtype stats_dtype = dtype_none; //!< Statistics data type for this column - uint32_t start_chunk = 0; //!< Start chunk of this group - uint32_t num_chunks = 0; //!< Number of chunks in group + data_type col_dtype; //!< Column data type + statistics_dtype stats_dtype{dtype_none}; //!< Statistics data type for this column + uint32_t start_chunk{}; //!< Start chunk of this group + uint32_t num_chunks{}; //!< Number of chunks in group }; template >* = nullptr> From e7882bcdd8309f9464ba1f63105c3c129b8cde7f Mon Sep 17 00:00:00 2001 From: Robert Maynard Date: Mon, 18 Sep 2023 10:31:07 -0400 Subject: [PATCH 6/6] Correct style issues found by CI --- cpp/src/io/comp/unsnap.cu | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/cpp/src/io/comp/unsnap.cu b/cpp/src/io/comp/unsnap.cu index 7efbc9f015a..0fd8e3945cb 100644 --- a/cpp/src/io/comp/unsnap.cu +++ b/cpp/src/io/comp/unsnap.cu @@ -68,12 +68,12 @@ struct unsnap_queue_s { struct unsnap_state_s { constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64 - uint8_t const* base{}; ///< base ptr of compressed stream - uint8_t const* end{}; ///< end of compressed stream - uint32_t uncompressed_size{}; ///< uncompressed stream size - uint32_t bytes_left{}; ///< remaining bytes to decompress - int32_t error{}; ///< current error status - uint32_t tstart{}; ///< start time for perf logging + uint8_t const* base{}; ///< base ptr of compressed stream + uint8_t const* end{}; ///< end of compressed stream + uint32_t uncompressed_size{}; ///< uncompressed stream size + uint32_t bytes_left{}; ///< remaining bytes to decompress + int32_t error{}; ///< current error status + uint32_t tstart{}; ///< start time for perf logging volatile unsnap_queue_s q{}; ///< queue for cross-warp communication device_span src; ///< input for current block device_span dst; ///< output for current block