rapidsai · robertmaynard · Sep 13, 2023 · Sep 18, 2023 · Sep 18, 2023 · Sep 18, 2023
@@ -47,17 +47,7 @@ namespace test {
  * class MyTestFixture : public cudf::test::BaseFixture {};
  * ```
  */
-class BaseFixture : public ::testing::Test {
-  rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
-
- public:
-  /**
-   * @brief Returns pointer to `device_memory_resource` that should be used for
-   * all tests inheriting from this fixture
-   * @return pointer to memory resource
-   */
-  rmm::mr::device_memory_resource* mr() { return _mr; }
-};
+class BaseFixture : public ::testing::Test {};
 
 /**
  * @brief Base test fixture that takes a parameter.
@@ -68,17 +58,7 @@ class BaseFixture : public ::testing::Test {
  * ```
  */
 template <typename T>
-class BaseFixtureWithParam : public ::testing::TestWithParam<T> {
-  rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()};
-
- public:
-  /**
-   * @brief Returns pointer to `device_memory_resource` that should be used for
-   * all tests inheriting from this fixture
-   * @return pointer to memory resource
-   */
-  rmm::mr::device_memory_resource* mr() const { return _mr; }
-};
+class BaseFixtureWithParam : public ::testing::TestWithParam<T> {};
 
 template <typename T, typename Enable = void>
 struct uniform_distribution_impl {};

@@ -25,7 +25,8 @@ namespace cudf {
 namespace io {
 namespace avro {
 struct block_desc_s {
-  block_desc_s() {}
+  block_desc_s() = default;  // required to compile on ctk-12.2 + aarch64
+
   explicit constexpr block_desc_s(
     size_t offset_, uint32_t size_, uint32_t row_offset_, uint32_t first_row_, uint32_t num_rows_)
     : offset(offset_),

@@ -52,6 +52,8 @@ struct unsnap_batch_s {
  * @brief Queue structure used to exchange data between warps
  */
 struct unsnap_queue_s {
+  unsnap_queue_s() = default;      // required to compile on ctk-12.2 + aarch64
+
   uint32_t prefetch_wrpos;         ///< Prefetcher write position
   uint32_t prefetch_rdpos;         ///< Prefetch consumer read position
   int32_t prefetch_end;            ///< Prefetch enable flag (nonzero stops prefetcher)
@@ -64,15 +66,17 @@ struct unsnap_queue_s {
  * @brief snappy decompression state
  */
 struct unsnap_state_s {
-  uint8_t const* base;             ///< base ptr of compressed stream
-  uint8_t const* end;              ///< end of compressed stream
-  uint32_t uncompressed_size;      ///< uncompressed stream size
-  uint32_t bytes_left;             ///< remaining bytes to decompress
-  int32_t error;                   ///< current error status
-  uint32_t tstart;                 ///< start time for perf logging
-  volatile unsnap_queue_s q;       ///< queue for cross-warp communication
-  device_span<uint8_t const> src;  ///< input for current block
-  device_span<uint8_t> dst;        ///< output for current block
+  constexpr unsnap_state_s() noexcept {}  // required to compile on ctk-12.2 + aarch64
+
+  uint8_t const* base{};                  ///< base ptr of compressed stream
+  uint8_t const* end{};                   ///< end of compressed stream
+  uint32_t uncompressed_size{};           ///< uncompressed stream size
+  uint32_t bytes_left{};                  ///< remaining bytes to decompress
+  int32_t error{};                        ///< current error status
+  uint32_t tstart{};                      ///< start time for perf logging
+  volatile unsnap_queue_s q{};            ///< queue for cross-warp communication
+  device_span<uint8_t const> src;         ///< input for current block
+  device_span<uint8_t> dst;               ///< output for current block
 };
 
 inline __device__ volatile uint8_t& byte_access(unsnap_state_s* s, uint32_t pos)

@@ -59,31 +59,24 @@ struct CompressedStreamInfo {
   explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_)
     : compressed_data(compressed_data_),
       uncompressed_data(nullptr),
-      compressed_data_size(compressed_size_),
-      dec_in_ctl(nullptr),
-      dec_out_ctl(nullptr),
-      copy_in_ctl(nullptr),
-      copy_out_ctl(nullptr),
-      num_compressed_blocks(0),
-      num_uncompressed_blocks(0),
-      max_uncompressed_size(0),
-      max_uncompressed_block_size(0)
+      compressed_data_size(compressed_size_)
   {
   }
-  uint8_t const* compressed_data;  // [in] base ptr to compressed stream data
-  uint8_t* uncompressed_data;  // [in] base ptr to uncompressed stream data or NULL if not known yet
-  size_t compressed_data_size;              // [in] compressed data size for this stream
-  device_span<uint8_t const>* dec_in_ctl;   // [in] input buffer to decompress
-  device_span<uint8_t>* dec_out_ctl;        // [in] output buffer to decompress into
-  device_span<compression_result> dec_res;  // [in] results of decompression
-  device_span<uint8_t const>* copy_in_ctl;  // [out] input buffer to copy
-  device_span<uint8_t>* copy_out_ctl;       // [out] output buffer to copy to
-  uint32_t num_compressed_blocks;  // [in,out] number of entries in decctl(in), number of compressed
-                                   // blocks(out)
-  uint32_t num_uncompressed_blocks;      // [in,out] number of entries in dec_in_ctl(in), number of
-                                         // uncompressed blocks(out)
-  uint64_t max_uncompressed_size;        // [out] maximum uncompressed data size of stream
-  uint32_t max_uncompressed_block_size;  // [out] maximum uncompressed size of any block in stream
+  uint8_t const* compressed_data{};  // [in] base ptr to compressed stream data
+  uint8_t*
+    uncompressed_data{};  // [in] base ptr to uncompressed stream data or NULL if not known yet
+  size_t compressed_data_size{};              // [in] compressed data size for this stream
+  device_span<uint8_t const>* dec_in_ctl{};   // [in] input buffer to decompress
+  device_span<uint8_t>* dec_out_ctl{};        // [in] output buffer to decompress into
+  device_span<compression_result> dec_res{};  // [in] results of decompression
+  device_span<uint8_t const>* copy_in_ctl{};  // [out] input buffer to copy
+  device_span<uint8_t>* copy_out_ctl{};       // [out] output buffer to copy to
+  uint32_t num_compressed_blocks{};           // [in,out] number of entries in decctl(in), number of
+                                              // compressed blocks(out)
+  uint32_t num_uncompressed_blocks{};  // [in,out] number of entries in dec_in_ctl(in), number of
+                                       // uncompressed blocks(out)
+  uint64_t max_uncompressed_size{};    // [out] maximum uncompressed data size of stream
+  uint32_t max_uncompressed_block_size{};  // [out] maximum uncompressed size of any block in stream
 };
 
 enum StreamIndexType {

@@ -121,14 +121,14 @@ __global__ void __launch_bounds__(block_size, 1)
 }
 
 struct stats_state_s {
-  uint8_t* base;  ///< Output buffer start
-  uint8_t* end;   ///< Output buffer end
-  statistics_chunk chunk;
-  statistics_merge_group group;
-  statistics_dtype stats_dtype;  //!< Statistics data type for this column
+  uint8_t* base{};  ///< Output buffer start
+  uint8_t* end{};   ///< Output buffer end
+  statistics_chunk chunk{};
+  statistics_merge_group group{};
+  statistics_dtype stats_dtype{};  //!< Statistics data type for this column
   // ORC stats
-  uint64_t numberOfValues;
-  uint8_t hasNull;
+  uint64_t numberOfValues{};
+  uint8_t hasNull{};
 };
 
 /*

@@ -30,14 +30,14 @@ namespace orc {
 namespace gpu {
 
 struct comp_in_out {
-  uint8_t const* in_ptr;
-  size_t in_size;
-  uint8_t* out_ptr;
-  size_t out_size;
+  uint8_t const* in_ptr{};
+  size_t in_size{};
+  uint8_t* out_ptr{};
+  size_t out_size{};
 };
 struct compressed_stream_s {
-  CompressedStreamInfo info;
-  comp_in_out ctl;
+  CompressedStreamInfo info{};
+  comp_in_out ctl{};
 };
 
 // blockDim {128,1,1}
@@ -208,14 +208,15 @@ __global__ void __launch_bounds__(128, 8)
  * @brief Shared mem state for gpuParseRowGroupIndex
  */
 struct rowindex_state_s {
-  ColumnDesc chunk;
-  uint32_t rowgroup_start;
-  uint32_t rowgroup_end;
-  int is_compressed;
-  uint32_t row_index_entry[3][CI_PRESENT];  // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
-  CompressedStreamInfo strm_info[2];
-  RowGroup rowgroups[128];
-  uint32_t compressed_offset[128][2];
+  ColumnDesc chunk{};
+  uint32_t rowgroup_start{};
+  uint32_t rowgroup_end{};
+  int is_compressed{};
+  uint32_t row_index_entry[3]
+                          [CI_PRESENT]{};  // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
+  CompressedStreamInfo strm_info[2]{};
+  RowGroup rowgroups[128]{};
+  uint32_t compressed_offset[128][2]{};
 };
 
 enum row_entry_state_e {

@@ -26,48 +26,49 @@
 namespace cudf::io::parquet::gpu {
 
 struct page_state_s {
-  uint8_t const* data_start;
-  uint8_t const* data_end;
-  uint8_t const* lvl_end;
-  uint8_t const* dict_base;    // ptr to dictionary page data
-  int32_t dict_size;           // size of dictionary data
-  int32_t first_row;           // First row in page to output
-  int32_t num_rows;            // Rows in page to decode (including rows to be skipped)
-  int32_t first_output_value;  // First value in page to output
-  int32_t num_input_values;    // total # of input/level values in the page
-  int32_t dtype_len;           // Output data type length
-  int32_t dtype_len_in;        // Can be larger than dtype_len if truncating 32-bit into 8-bit
-  int32_t dict_bits;           // # of bits to store dictionary indices
-  uint32_t dict_run;
-  int32_t dict_val;
-  uint32_t initial_rle_run[NUM_LEVEL_TYPES];   // [def,rep]
-  int32_t initial_rle_value[NUM_LEVEL_TYPES];  // [def,rep]
-  int32_t error;
-  PageInfo page;
-  ColumnChunkDesc col;
+  constexpr page_state_s() noexcept {}
+  uint8_t const* data_start{};
+  uint8_t const* data_end{};
+  uint8_t const* lvl_end{};
+  uint8_t const* dict_base{};    // ptr to dictionary page data
+  int32_t dict_size{};           // size of dictionary data
+  int32_t first_row{};           // First row in page to output
+  int32_t num_rows{};            // Rows in page to decode (including rows to be skipped)
+  int32_t first_output_value{};  // First value in page to output
+  int32_t num_input_values{};    // total # of input/level values in the page
+  int32_t dtype_len{};           // Output data type length
+  int32_t dtype_len_in{};        // Can be larger than dtype_len if truncating 32-bit into 8-bit
+  int32_t dict_bits{};           // # of bits to store dictionary indices
+  uint32_t dict_run{};
+  int32_t dict_val{};
+  uint32_t initial_rle_run[NUM_LEVEL_TYPES]{};   // [def,rep]
+  int32_t initial_rle_value[NUM_LEVEL_TYPES]{};  // [def,rep]
+  int32_t error{};
+  PageInfo page{};
+  ColumnChunkDesc col{};
 
   // (leaf) value decoding
-  int32_t nz_count;  // number of valid entries in nz_idx (write position in circular buffer)
-  int32_t dict_pos;  // write position of dictionary indices
-  int32_t src_pos;   // input read position of final output value
-  int32_t ts_scale;  // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale
+  int32_t nz_count{};  // number of valid entries in nz_idx (write position in circular buffer)
+  int32_t dict_pos{};  // write position of dictionary indices
+  int32_t src_pos{};   // input read position of final output value
+  int32_t ts_scale{};  // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale
 
   // repetition/definition level decoding
-  int32_t input_value_count;                  // how many values of the input we've processed
-  int32_t input_row_count;                    // how many rows of the input we've processed
-  int32_t input_leaf_count;                   // how many leaf values of the input we've processed
-  uint8_t const* lvl_start[NUM_LEVEL_TYPES];  // [def,rep]
-  uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES];  // [def,rep]
-  uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES];    // [def,rep]
-  int32_t lvl_count[NUM_LEVEL_TYPES];             // how many of each of the streams we've decoded
-  int32_t row_index_lower_bound;                  // lower bound of row indices we should process
+  int32_t input_value_count{};                  // how many values of the input we've processed
+  int32_t input_row_count{};                    // how many rows of the input we've processed
+  int32_t input_leaf_count{};                   // how many leaf values of the input we've processed
+  uint8_t const* lvl_start[NUM_LEVEL_TYPES]{};  // [def,rep]
+  uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{};  // [def,rep]
+  uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{};    // [def,rep]
+  int32_t lvl_count[NUM_LEVEL_TYPES]{};             // how many of each of the streams we've decoded
+  int32_t row_index_lower_bound{};                  // lower bound of row indices we should process
 
   // a shared-memory cache of frequently used data when decoding. The source of this data is
   // normally stored in global memory which can yield poor performance. So, when possible
   // we copy that info here prior to decoding
-  PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info];
+  PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{};
   // points to either nesting_decode_cache above when possible, or to the global source otherwise
-  PageNestingDecodeInfo* nesting_info;
+  PageNestingDecodeInfo* nesting_info{};
 };
 
 // buffers only used in the decode kernel.  separated from page_state_s to keep

@@ -45,13 +45,13 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0,
                                                                   ST_FLD_LIST};
 
 struct byte_stream_s {
-  uint8_t const* cur;
-  uint8_t const* end;
-  uint8_t const* base;
+  uint8_t const* cur{};
+  uint8_t const* end{};
+  uint8_t const* base{};
   // Parsed symbols
-  PageType page_type;
-  PageInfo page;
-  ColumnChunkDesc ck;
+  PageType page_type{};
+  PageInfo page{};
+  ColumnChunkDesc ck{};
 };
 
 /**

@@ -228,7 +228,7 @@ struct PageInfo {
  * @brief Struct describing a particular chunk of column data
  */
 struct ColumnChunkDesc {
-  ColumnChunkDesc() = default;
+  constexpr ColumnChunkDesc() noexcept {};
   explicit ColumnChunkDesc(size_t compressed_size_,
                            uint8_t* compressed_data_,
                            size_t num_values_,
@@ -275,34 +275,34 @@ struct ColumnChunkDesc {
   {
   }
 
-  uint8_t const* compressed_data;                  // pointer to compressed column chunk data
-  size_t compressed_size;                          // total compressed data size for this chunk
-  size_t num_values;                               // total number of values in this column
-  size_t start_row;                                // starting row of this chunk
-  uint32_t num_rows;                               // number of rows in this chunk
-  int16_t max_level[level_type::NUM_LEVEL_TYPES];  // max definition/repetition level
-  int16_t max_nesting_depth;                       // max nesting depth of the output
-  uint16_t data_type;                              // basic column data type, ((type_length << 3) |
-                                                   // parquet::Type)
+  uint8_t const* compressed_data{};                  // pointer to compressed column chunk data
+  size_t compressed_size{};                          // total compressed data size for this chunk
+  size_t num_values{};                               // total number of values in this column
+  size_t start_row{};                                // starting row of this chunk
+  uint32_t num_rows{};                               // number of rows in this chunk
+  int16_t max_level[level_type::NUM_LEVEL_TYPES]{};  // max definition/repetition level
+  int16_t max_nesting_depth{};                       // max nesting depth of the output
+  uint16_t data_type{};                         // basic column data type, ((type_length << 3) |
+                                                // parquet::Type)
   uint8_t
-    level_bits[level_type::NUM_LEVEL_TYPES];  // bits to encode max definition/repetition levels
-  int32_t num_data_pages;                     // number of data pages
-  int32_t num_dict_pages;                     // number of dictionary pages
-  int32_t max_num_pages;                      // size of page_info array
-  PageInfo* page_info;                        // output page info for up to num_dict_pages +
-                                              // num_data_pages (dictionary pages first)
-  string_index_pair* str_dict_index;          // index for string dictionary
-  bitmask_type** valid_map_base;              // base pointers of valid bit map for this column
-  void** column_data_base;                    // base pointers of column data
-  void** column_string_base;                  // base pointers of column string data
-  int8_t codec;                               // compressed codec enum
-  int8_t converted_type;                      // converted type enum
-  LogicalType logical_type;                   // logical type
-  int8_t decimal_precision;                   // Decimal precision
-  int32_t ts_clock_rate;   // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)
-
-  int32_t src_col_index;   // my input column index
-  int32_t src_col_schema;  // my schema index in the file
+    level_bits[level_type::NUM_LEVEL_TYPES]{};  // bits to encode max definition/repetition levels
+  int32_t num_data_pages{};                     // number of data pages
+  int32_t num_dict_pages{};                     // number of dictionary pages
+  int32_t max_num_pages{};                      // size of page_info array
+  PageInfo* page_info{};                        // output page info for up to num_dict_pages +
+                                                // num_data_pages (dictionary pages first)
+  string_index_pair* str_dict_index{};          // index for string dictionary
+  bitmask_type** valid_map_base{};              // base pointers of valid bit map for this column
+  void** column_data_base{};                    // base pointers of column data
+  void** column_string_base{};                  // base pointers of column string data
+  int8_t codec{};                               // compressed codec enum
+  int8_t converted_type{};                      // converted type enum
+  LogicalType logical_type{};                   // logical type
+  int8_t decimal_precision{};                   // Decimal precision
+  int32_t ts_clock_rate{};   // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)
+
+  int32_t src_col_index{};   // my input column index
+  int32_t src_col_schema{};  // my schema index in the file
 };
 
 /**

@@ -34,18 +34,18 @@ namespace io {
  * @brief shared state for statistics calculation kernel
  */
 struct stats_state_s {
-  stats_column_desc col;   ///< Column information
-  statistics_group group;  ///< Group description
-  statistics_chunk ck;     ///< Output statistics chunk
+  stats_column_desc col{};   ///< Column information
+  statistics_group group{};  ///< Group description
+  statistics_chunk ck{};     ///< Output statistics chunk
 };
 
 /**
  * @brief shared state for statistics merge kernel
  */
 struct merge_state_s {
-  stats_column_desc col;         ///< Column information
-  statistics_merge_group group;  ///< Group description
-  statistics_chunk ck;           ///< Resulting statistics chunk
+  stats_column_desc col{};         ///< Column information
+  statistics_merge_group group{};  ///< Group description
+  statistics_chunk ck{};           ///< Resulting statistics chunk
 };
 
 template <int dimension>