Skip to content

Commit

Permalink
Simplify changes
Browse files Browse the repository at this point in the history
  • Loading branch information
robertmaynard committed Sep 18, 2023
1 parent 951d781 commit 7ed1625
Show file tree
Hide file tree
Showing 4 changed files with 56 additions and 57 deletions.
48 changes: 24 additions & 24 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -27,48 +27,48 @@ namespace cudf::io::parquet::gpu {

struct page_state_s {
constexpr page_state_s() noexcept {}
uint8_t const* data_start = nullptr;
uint8_t const* data_end = nullptr;
uint8_t const* lvl_end = nullptr;
uint8_t const* dict_base = nullptr; // ptr to dictionary page data
int32_t dict_size = 0; // size of dictionary data
int32_t first_row = 0; // First row in page to output
int32_t num_rows = 0; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value = 0; // First value in page to output
int32_t num_input_values = 0; // total # of input/level values in the page
int32_t dtype_len = 0; // Output data type length
int32_t dtype_len_in = 0; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits = 0; // # of bits to store dictionary indices
uint32_t dict_run = 0;
int32_t dict_val = 0;
uint8_t const* data_start{};
uint8_t const* data_end{};
uint8_t const* lvl_end{};
uint8_t const* dict_base{}; // ptr to dictionary page data
int32_t dict_size{}; // size of dictionary data
int32_t first_row{}; // First row in page to output
int32_t num_rows{}; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value{}; // First value in page to output
int32_t num_input_values{}; // total # of input/level values in the page
int32_t dtype_len{}; // Output data type length
int32_t dtype_len_in{}; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits{}; // # of bits to store dictionary indices
uint32_t dict_run{};
int32_t dict_val{};
uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t error = 0;
int32_t error{};
PageInfo page{};
ColumnChunkDesc col{};

// (leaf) value decoding
int32_t nz_count = 0; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos = 0; // write position of dictionary indices
int32_t src_pos = 0; // input read position of final output value
int32_t ts_scale = 0; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale
int32_t nz_count{}; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos{}; // write position of dictionary indices
int32_t src_pos{}; // input read position of final output value
int32_t ts_scale{}; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale

// repetition/definition level decoding
int32_t input_value_count = 0; // how many values of the input we've processed
int32_t input_row_count = 0; // how many rows of the input we've processed
int32_t input_leaf_count = 0; // how many leaf values of the input we've processed
int32_t input_value_count{}; // how many values of the input we've processed
int32_t input_row_count{}; // how many rows of the input we've processed
int32_t input_leaf_count{}; // how many leaf values of the input we've processed
uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded
int32_t row_index_lower_bound = 0; // lower bound of row indices we should process
int32_t row_index_lower_bound{}; // lower bound of row indices we should process

// a shared-memory cache of frequently used data when decoding. The source of this data is
// normally stored in global memory which can yield poor performance. So, when possible
// we copy that info here prior to decoding
PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{};
// points to either nesting_decode_cache above when possible, or to the global source otherwise
PageNestingDecodeInfo* nesting_info = nullptr;
PageNestingDecodeInfo* nesting_info{};
};

// buffers only used in the decode kernel. separated from page_state_s to keep
Expand Down
6 changes: 3 additions & 3 deletions cpp/src/io/parquet/page_hdr.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,9 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0,
ST_FLD_LIST};

struct byte_stream_s {
uint8_t const* cur = nullptr;
uint8_t const* end = nullptr;
uint8_t const* base = nullptr;
uint8_t const* cur{};
uint8_t const* end{};
uint8_t const* base{};
// Parsed symbols
PageType page_type{};
PageInfo page{};
Expand Down
43 changes: 21 additions & 22 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -275,35 +275,34 @@ struct ColumnChunkDesc {
{
}

uint8_t const* compressed_data = nullptr; // pointer to compressed column chunk data
size_t compressed_size = 0; // total compressed data size for this chunk
size_t num_values = 0; // total number of values in this column
size_t start_row = 0; // starting row of this chunk
uint32_t num_rows = 0; // number of rows in this chunk
uint8_t const* compressed_data{}; // pointer to compressed column chunk data
size_t compressed_size{}; // total compressed data size for this chunk
size_t num_values{}; // total number of values in this column
size_t start_row{}; // starting row of this chunk
uint32_t num_rows{}; // number of rows in this chunk
int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level
int16_t max_nesting_depth = 0; // max nesting depth of the output
uint16_t data_type = 0; // basic column data type, ((type_length << 3) |
int16_t max_nesting_depth{}; // max nesting depth of the output
uint16_t data_type{}; // basic column data type, ((type_length << 3) |
// parquet::Type)
uint8_t
level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels
int32_t num_data_pages = 0; // number of data pages
int32_t num_dict_pages = 0; // number of dictionary pages
int32_t max_num_pages = 0; // size of page_info array
PageInfo* page_info = nullptr; // output page info for up to num_dict_pages +
int32_t num_data_pages{}; // number of data pages
int32_t num_dict_pages{}; // number of dictionary pages
int32_t max_num_pages{}; // size of page_info array
PageInfo* page_info{}; // output page info for up to num_dict_pages +
// num_data_pages (dictionary pages first)
string_index_pair* str_dict_index = nullptr; // index for string dictionary
bitmask_type** valid_map_base = nullptr; // base pointers of valid bit map for this column
void** column_data_base = nullptr; // base pointers of column data
void** column_string_base = nullptr; // base pointers of column string data
int8_t codec = 0; // compressed codec enum
int8_t converted_type = 0; // converted type enum
string_index_pair* str_dict_index{}; // index for string dictionary
bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column
void** column_data_base{}; // base pointers of column data
void** column_string_base{}; // base pointers of column string data
int8_t codec{}; // compressed codec enum
int8_t converted_type{}; // converted type enum
LogicalType logical_type{}; // logical type
int8_t decimal_precision = 0; // Decimal precision
int32_t ts_clock_rate =
0; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)
int8_t decimal_precision{}; // Decimal precision
int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)

int32_t src_col_index = 0; // my input column index
int32_t src_col_schema = 0; // my schema index in the file
int32_t src_col_index{}; // my input column index
int32_t src_col_schema{}; // my schema index in the file
};

/**
Expand Down
16 changes: 8 additions & 8 deletions cpp/src/io/statistics/statistics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -98,20 +98,20 @@ union statistics_val {
};

struct statistics_chunk {
uint32_t non_nulls = 0; //!< number of non-null values in chunk
uint32_t null_count = 0; //!< number of null values in chunk
uint32_t non_nulls{}; //!< number of non-null values in chunk
uint32_t null_count{}; //!< number of null values in chunk
statistics_val min_value{}; //!< minimum value in chunk
statistics_val max_value{}; //!< maximum value in chunk
statistics_val sum{}; //!< sum of chunk
uint8_t has_minmax = 0; //!< Nonzero if min_value and max_values are valid
uint8_t has_sum = 0; //!< Nonzero if sum is valid
uint8_t has_minmax{}; //!< Nonzero if min_value and max_values are valid
uint8_t has_sum{}; //!< Nonzero if sum is valid
};

struct statistics_group {
stats_column_desc const* col = nullptr; //!< Column information
uint32_t start_row = 0; //!< Start row of this group
uint32_t num_rows = 0; //!< Number of rows in group
uint32_t non_leaf_nulls = 0; //!< Number of null non-leaf values in the group
stats_column_desc const* col{}; //!< Column information
uint32_t start_row{}; //!< Start row of this group
uint32_t num_rows{}; //!< Number of rows in group
uint32_t non_leaf_nulls{}; //!< Number of null non-leaf values in the group
};

struct statistics_merge_group {
Expand Down

0 comments on commit 7ed1625

Please sign in to comment.