Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Bug/remove test mr #14127

Closed
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 2 additions & 22 deletions cpp/include/cudf_test/base_fixture.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,17 +47,7 @@ namespace test {
* class MyTestFixture : public cudf::test::BaseFixture {};
* ```
*/
class BaseFixture : public ::testing::Test {
rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()};

public:
/**
* @brief Returns pointer to `device_memory_resource` that should be used for
* all tests inheriting from this fixture
* @return pointer to memory resource
*/
rmm::mr::device_memory_resource* mr() { return _mr; }
};
class BaseFixture : public ::testing::Test {};

/**
* @brief Base test fixture that takes a parameter.
Expand All @@ -68,17 +58,7 @@ class BaseFixture : public ::testing::Test {
* ```
*/
template <typename T>
class BaseFixtureWithParam : public ::testing::TestWithParam<T> {
rmm::mr::device_memory_resource* _mr{rmm::mr::get_current_device_resource()};

public:
/**
* @brief Returns pointer to `device_memory_resource` that should be used for
* all tests inheriting from this fixture
* @return pointer to memory resource
*/
rmm::mr::device_memory_resource* mr() const { return _mr; }
};
class BaseFixtureWithParam : public ::testing::TestWithParam<T> {};

template <typename T, typename Enable = void>
struct uniform_distribution_impl {};
Expand Down
3 changes: 2 additions & 1 deletion cpp/src/io/avro/avro_common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,8 @@ namespace cudf {
namespace io {
namespace avro {
struct block_desc_s {
block_desc_s() {}
block_desc_s() = default; // required to compile on ctk-12.2 + aarch64

explicit constexpr block_desc_s(
size_t offset_, uint32_t size_, uint32_t row_offset_, uint32_t first_row_, uint32_t num_rows_)
: offset(offset_),
Expand Down
22 changes: 13 additions & 9 deletions cpp/src/io/comp/unsnap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,8 @@ struct unsnap_batch_s {
* @brief Queue structure used to exchange data between warps
*/
struct unsnap_queue_s {
unsnap_queue_s() = default; // required to compile on ctk-12.2 + aarch64

uint32_t prefetch_wrpos; ///< Prefetcher write position
uint32_t prefetch_rdpos; ///< Prefetch consumer read position
int32_t prefetch_end; ///< Prefetch enable flag (nonzero stops prefetcher)
Expand All @@ -64,15 +66,17 @@ struct unsnap_queue_s {
* @brief snappy decompression state
*/
struct unsnap_state_s {
uint8_t const* base; ///< base ptr of compressed stream
uint8_t const* end; ///< end of compressed stream
uint32_t uncompressed_size; ///< uncompressed stream size
uint32_t bytes_left; ///< remaining bytes to decompress
int32_t error; ///< current error status
uint32_t tstart; ///< start time for perf logging
volatile unsnap_queue_s q; ///< queue for cross-warp communication
device_span<uint8_t const> src; ///< input for current block
device_span<uint8_t> dst; ///< output for current block
constexpr unsnap_state_s() noexcept {} // required to compile on ctk-12.2 + aarch64

uint8_t const* base{}; ///< base ptr of compressed stream
uint8_t const* end{}; ///< end of compressed stream
uint32_t uncompressed_size{}; ///< uncompressed stream size
uint32_t bytes_left{}; ///< remaining bytes to decompress
int32_t error{}; ///< current error status
uint32_t tstart{}; ///< start time for perf logging
volatile unsnap_queue_s q{}; ///< queue for cross-warp communication
device_span<uint8_t const> src; ///< input for current block
device_span<uint8_t> dst; ///< output for current block
};

inline __device__ volatile uint8_t& byte_access(unsnap_state_s* s, uint32_t pos)
Expand Down
39 changes: 16 additions & 23 deletions cpp/src/io/orc/orc_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -59,31 +59,24 @@ struct CompressedStreamInfo {
explicit constexpr CompressedStreamInfo(uint8_t const* compressed_data_, size_t compressed_size_)
: compressed_data(compressed_data_),
uncompressed_data(nullptr),
compressed_data_size(compressed_size_),
dec_in_ctl(nullptr),
dec_out_ctl(nullptr),
copy_in_ctl(nullptr),
copy_out_ctl(nullptr),
num_compressed_blocks(0),
num_uncompressed_blocks(0),
max_uncompressed_size(0),
max_uncompressed_block_size(0)
compressed_data_size(compressed_size_)
{
}
uint8_t const* compressed_data; // [in] base ptr to compressed stream data
uint8_t* uncompressed_data; // [in] base ptr to uncompressed stream data or NULL if not known yet
size_t compressed_data_size; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl; // [in] output buffer to decompress into
device_span<compression_result> dec_res; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl; // [out] output buffer to copy to
uint32_t num_compressed_blocks; // [in,out] number of entries in decctl(in), number of compressed
// blocks(out)
uint32_t num_uncompressed_blocks; // [in,out] number of entries in dec_in_ctl(in), number of
// uncompressed blocks(out)
uint64_t max_uncompressed_size; // [out] maximum uncompressed data size of stream
uint32_t max_uncompressed_block_size; // [out] maximum uncompressed size of any block in stream
uint8_t const* compressed_data{}; // [in] base ptr to compressed stream data
uint8_t*
uncompressed_data{}; // [in] base ptr to uncompressed stream data or NULL if not known yet
size_t compressed_data_size{}; // [in] compressed data size for this stream
device_span<uint8_t const>* dec_in_ctl{}; // [in] input buffer to decompress
device_span<uint8_t>* dec_out_ctl{}; // [in] output buffer to decompress into
device_span<compression_result> dec_res{}; // [in] results of decompression
device_span<uint8_t const>* copy_in_ctl{}; // [out] input buffer to copy
device_span<uint8_t>* copy_out_ctl{}; // [out] output buffer to copy to
uint32_t num_compressed_blocks{}; // [in,out] number of entries in decctl(in), number of
// compressed blocks(out)
uint32_t num_uncompressed_blocks{}; // [in,out] number of entries in dec_in_ctl(in), number of
// uncompressed blocks(out)
uint64_t max_uncompressed_size{}; // [out] maximum uncompressed data size of stream
uint32_t max_uncompressed_block_size{}; // [out] maximum uncompressed size of any block in stream
};

enum StreamIndexType {
Expand Down
14 changes: 7 additions & 7 deletions cpp/src/io/orc/stats_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -121,14 +121,14 @@ __global__ void __launch_bounds__(block_size, 1)
}

struct stats_state_s {
uint8_t* base; ///< Output buffer start
uint8_t* end; ///< Output buffer end
statistics_chunk chunk;
statistics_merge_group group;
statistics_dtype stats_dtype; //!< Statistics data type for this column
uint8_t* base{}; ///< Output buffer start
uint8_t* end{}; ///< Output buffer end
statistics_chunk chunk{};
statistics_merge_group group{};
statistics_dtype stats_dtype{}; //!< Statistics data type for this column
// ORC stats
uint64_t numberOfValues;
uint8_t hasNull;
uint64_t numberOfValues{};
uint8_t hasNull{};
};

/*
Expand Down
29 changes: 15 additions & 14 deletions cpp/src/io/orc/stripe_init.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ namespace orc {
namespace gpu {

struct comp_in_out {
uint8_t const* in_ptr;
size_t in_size;
uint8_t* out_ptr;
size_t out_size;
uint8_t const* in_ptr{};
size_t in_size{};
uint8_t* out_ptr{};
size_t out_size{};
};
struct compressed_stream_s {
CompressedStreamInfo info;
comp_in_out ctl;
CompressedStreamInfo info{};
comp_in_out ctl{};
};

// blockDim {128,1,1}
Expand Down Expand Up @@ -208,14 +208,15 @@ __global__ void __launch_bounds__(128, 8)
* @brief Shared mem state for gpuParseRowGroupIndex
*/
struct rowindex_state_s {
ColumnDesc chunk;
uint32_t rowgroup_start;
uint32_t rowgroup_end;
int is_compressed;
uint32_t row_index_entry[3][CI_PRESENT]; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
CompressedStreamInfo strm_info[2];
RowGroup rowgroups[128];
uint32_t compressed_offset[128][2];
ColumnDesc chunk{};
uint32_t rowgroup_start{};
uint32_t rowgroup_end{};
int is_compressed{};
uint32_t row_index_entry[3]
[CI_PRESENT]{}; // NOTE: Assumes CI_PRESENT follows CI_DATA and CI_DATA2
CompressedStreamInfo strm_info[2]{};
RowGroup rowgroups[128]{};
uint32_t compressed_offset[128][2]{};
};

enum row_entry_state_e {
Expand Down
67 changes: 34 additions & 33 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -26,48 +26,49 @@
namespace cudf::io::parquet::gpu {

struct page_state_s {
uint8_t const* data_start;
uint8_t const* data_end;
uint8_t const* lvl_end;
uint8_t const* dict_base; // ptr to dictionary page data
int32_t dict_size; // size of dictionary data
int32_t first_row; // First row in page to output
int32_t num_rows; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value; // First value in page to output
int32_t num_input_values; // total # of input/level values in the page
int32_t dtype_len; // Output data type length
int32_t dtype_len_in; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits; // # of bits to store dictionary indices
uint32_t dict_run;
int32_t dict_val;
uint32_t initial_rle_run[NUM_LEVEL_TYPES]; // [def,rep]
int32_t initial_rle_value[NUM_LEVEL_TYPES]; // [def,rep]
int32_t error;
PageInfo page;
ColumnChunkDesc col;
constexpr page_state_s() noexcept {}
uint8_t const* data_start{};
uint8_t const* data_end{};
uint8_t const* lvl_end{};
uint8_t const* dict_base{}; // ptr to dictionary page data
int32_t dict_size{}; // size of dictionary data
int32_t first_row{}; // First row in page to output
int32_t num_rows{}; // Rows in page to decode (including rows to be skipped)
int32_t first_output_value{}; // First value in page to output
int32_t num_input_values{}; // total # of input/level values in the page
int32_t dtype_len{}; // Output data type length
int32_t dtype_len_in{}; // Can be larger than dtype_len if truncating 32-bit into 8-bit
int32_t dict_bits{}; // # of bits to store dictionary indices
uint32_t dict_run{};
int32_t dict_val{};
uint32_t initial_rle_run[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t initial_rle_value[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t error{};
PageInfo page{};
ColumnChunkDesc col{};

// (leaf) value decoding
int32_t nz_count; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos; // write position of dictionary indices
int32_t src_pos; // input read position of final output value
int32_t ts_scale; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale
int32_t nz_count{}; // number of valid entries in nz_idx (write position in circular buffer)
int32_t dict_pos{}; // write position of dictionary indices
int32_t src_pos{}; // input read position of final output value
int32_t ts_scale{}; // timestamp scale: <0: divide by -ts_scale, >0: multiply by ts_scale

// repetition/definition level decoding
int32_t input_value_count; // how many values of the input we've processed
int32_t input_row_count; // how many rows of the input we've processed
int32_t input_leaf_count; // how many leaf values of the input we've processed
uint8_t const* lvl_start[NUM_LEVEL_TYPES]; // [def,rep]
uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]; // [def,rep]
uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]; // [def,rep]
int32_t lvl_count[NUM_LEVEL_TYPES]; // how many of each of the streams we've decoded
int32_t row_index_lower_bound; // lower bound of row indices we should process
int32_t input_value_count{}; // how many values of the input we've processed
int32_t input_row_count{}; // how many rows of the input we've processed
int32_t input_leaf_count{}; // how many leaf values of the input we've processed
uint8_t const* lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_start[NUM_LEVEL_TYPES]{}; // [def,rep]
uint8_t const* abs_lvl_end[NUM_LEVEL_TYPES]{}; // [def,rep]
int32_t lvl_count[NUM_LEVEL_TYPES]{}; // how many of each of the streams we've decoded
int32_t row_index_lower_bound{}; // lower bound of row indices we should process

// a shared-memory cache of frequently used data when decoding. The source of this data is
// normally stored in global memory which can yield poor performance. So, when possible
// we copy that info here prior to decoding
PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info];
PageNestingDecodeInfo nesting_decode_cache[max_cacheable_nesting_decode_info]{};
// points to either nesting_decode_cache above when possible, or to the global source otherwise
PageNestingDecodeInfo* nesting_info;
PageNestingDecodeInfo* nesting_info{};
};

// buffers only used in the decode kernel. separated from page_state_s to keep
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/io/parquet/page_hdr.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,13 +45,13 @@ static const __device__ __constant__ uint8_t g_list2struct[16] = {0,
ST_FLD_LIST};

struct byte_stream_s {
uint8_t const* cur;
uint8_t const* end;
uint8_t const* base;
uint8_t const* cur{};
uint8_t const* end{};
uint8_t const* base{};
// Parsed symbols
PageType page_type;
PageInfo page;
ColumnChunkDesc ck;
PageType page_type{};
PageInfo page{};
ColumnChunkDesc ck{};
};

/**
Expand Down
56 changes: 28 additions & 28 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -228,7 +228,7 @@ struct PageInfo {
* @brief Struct describing a particular chunk of column data
*/
struct ColumnChunkDesc {
ColumnChunkDesc() = default;
constexpr ColumnChunkDesc() noexcept {};
explicit ColumnChunkDesc(size_t compressed_size_,
uint8_t* compressed_data_,
size_t num_values_,
Expand Down Expand Up @@ -275,34 +275,34 @@ struct ColumnChunkDesc {
{
}

uint8_t const* compressed_data; // pointer to compressed column chunk data
size_t compressed_size; // total compressed data size for this chunk
size_t num_values; // total number of values in this column
size_t start_row; // starting row of this chunk
uint32_t num_rows; // number of rows in this chunk
int16_t max_level[level_type::NUM_LEVEL_TYPES]; // max definition/repetition level
int16_t max_nesting_depth; // max nesting depth of the output
uint16_t data_type; // basic column data type, ((type_length << 3) |
// parquet::Type)
uint8_t const* compressed_data{}; // pointer to compressed column chunk data
size_t compressed_size{}; // total compressed data size for this chunk
size_t num_values{}; // total number of values in this column
size_t start_row{}; // starting row of this chunk
uint32_t num_rows{}; // number of rows in this chunk
int16_t max_level[level_type::NUM_LEVEL_TYPES]{}; // max definition/repetition level
int16_t max_nesting_depth{}; // max nesting depth of the output
uint16_t data_type{}; // basic column data type, ((type_length << 3) |
// parquet::Type)
uint8_t
level_bits[level_type::NUM_LEVEL_TYPES]; // bits to encode max definition/repetition levels
int32_t num_data_pages; // number of data pages
int32_t num_dict_pages; // number of dictionary pages
int32_t max_num_pages; // size of page_info array
PageInfo* page_info; // output page info for up to num_dict_pages +
// num_data_pages (dictionary pages first)
string_index_pair* str_dict_index; // index for string dictionary
bitmask_type** valid_map_base; // base pointers of valid bit map for this column
void** column_data_base; // base pointers of column data
void** column_string_base; // base pointers of column string data
int8_t codec; // compressed codec enum
int8_t converted_type; // converted type enum
LogicalType logical_type; // logical type
int8_t decimal_precision; // Decimal precision
int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)

int32_t src_col_index; // my input column index
int32_t src_col_schema; // my schema index in the file
level_bits[level_type::NUM_LEVEL_TYPES]{}; // bits to encode max definition/repetition levels
int32_t num_data_pages{}; // number of data pages
int32_t num_dict_pages{}; // number of dictionary pages
int32_t max_num_pages{}; // size of page_info array
PageInfo* page_info{}; // output page info for up to num_dict_pages +
// num_data_pages (dictionary pages first)
string_index_pair* str_dict_index{}; // index for string dictionary
bitmask_type** valid_map_base{}; // base pointers of valid bit map for this column
void** column_data_base{}; // base pointers of column data
void** column_string_base{}; // base pointers of column string data
int8_t codec{}; // compressed codec enum
int8_t converted_type{}; // converted type enum
LogicalType logical_type{}; // logical type
int8_t decimal_precision{}; // Decimal precision
int32_t ts_clock_rate{}; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns)

int32_t src_col_index{}; // my input column index
int32_t src_col_schema{}; // my schema index in the file
};

/**
Expand Down
12 changes: 6 additions & 6 deletions cpp/src/io/statistics/column_statistics.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,18 +34,18 @@ namespace io {
* @brief shared state for statistics calculation kernel
*/
struct stats_state_s {
stats_column_desc col; ///< Column information
statistics_group group; ///< Group description
statistics_chunk ck; ///< Output statistics chunk
stats_column_desc col{}; ///< Column information
statistics_group group{}; ///< Group description
statistics_chunk ck{}; ///< Output statistics chunk
};

/**
* @brief shared state for statistics merge kernel
*/
struct merge_state_s {
stats_column_desc col; ///< Column information
statistics_merge_group group; ///< Group description
statistics_chunk ck; ///< Resulting statistics chunk
stats_column_desc col{}; ///< Column information
statistics_merge_group group{}; ///< Group description
statistics_chunk ck{}; ///< Resulting statistics chunk
};

template <int dimension>
Expand Down
Loading
Loading