Skip to content

Commit

Permalink
Add a benchmark to study Parquet reader's performance for wide tables (
Browse files Browse the repository at this point in the history
…#16751)

Related to #16750 

This PR adds a benchmark to study read throughput of Parquet reader for wide tables.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Paul Mattione (https://github.com/pmattione-nvidia)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #16751
  • Loading branch information
mhaseeb123 authored Sep 18, 2024
1 parent 57ae3e3 commit 44a9c10
Showing 1 changed file with 85 additions and 2 deletions.
87 changes: 85 additions & 2 deletions cpp/benchmarks/io/parquet/parquet_reader_input.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,8 @@ constexpr cudf::size_type num_cols = 64;
void parquet_read_common(cudf::size_type num_rows_to_read,
cudf::size_type num_cols_to_read,
cuio_source_sink_pair& source_sink,
nvbench::state& state)
nvbench::state& state,
size_t table_data_size = data_size)
{
cudf::io::parquet_reader_options read_opts =
cudf::io::parquet_reader_options::builder(source_sink.make_source_info());
Expand All @@ -52,7 +53,7 @@ void parquet_read_common(cudf::size_type num_rows_to_read,
});

auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
state.add_element_count(static_cast<double>(table_data_size) / time, "bytes_per_second");
state.add_buffer_size(
mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
Expand Down Expand Up @@ -231,6 +232,70 @@ void BM_parquet_read_chunks(nvbench::state& state, nvbench::type_list<nvbench::e
state.add_buffer_size(source_sink.size(), "encoded_file_size", "encoded_file_size");
}

template <data_type DataType>
void BM_parquet_read_wide_tables(nvbench::state& state,
nvbench::type_list<nvbench::enum_type<DataType>> type_list)
{
auto const d_type = get_type_or_group(static_cast<int32_t>(DataType));

auto const n_col = static_cast<cudf::size_type>(state.get_int64("num_cols"));
auto const data_size_bytes = static_cast<size_t>(state.get_int64("data_size_mb") << 20);
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
auto const source_type = io_type::DEVICE_BUFFER;
cuio_source_sink_pair source_sink(source_type);

auto const num_rows_written = [&]() {
auto const tbl = create_random_table(
cycle_dtypes(d_type, n_col),
table_size_bytes{data_size_bytes},
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(cudf::io::compression_type::NONE);
cudf::io::write_parquet(write_opts);
return view.num_rows();
}();

parquet_read_common(num_rows_written, n_col, source_sink, state, data_size_bytes);
}

void BM_parquet_read_wide_tables_mixed(nvbench::state& state)
{
auto const d_type = []() {
auto d_type1 = get_type_or_group(static_cast<int32_t>(data_type::INTEGRAL));
auto d_type2 = get_type_or_group(static_cast<int32_t>(data_type::FLOAT));
d_type1.reserve(d_type1.size() + d_type2.size());
std::move(d_type2.begin(), d_type2.end(), std::back_inserter(d_type1));
return d_type1;
}();

auto const n_col = static_cast<cudf::size_type>(state.get_int64("num_cols"));
auto const data_size_bytes = static_cast<size_t>(state.get_int64("data_size_mb") << 20);
auto const cardinality = static_cast<cudf::size_type>(state.get_int64("cardinality"));
auto const run_length = static_cast<cudf::size_type>(state.get_int64("run_length"));
auto const source_type = io_type::DEVICE_BUFFER;
cuio_source_sink_pair source_sink(source_type);

auto const num_rows_written = [&]() {
auto const tbl = create_random_table(
cycle_dtypes(d_type, n_col),
table_size_bytes{data_size_bytes},
data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
auto const view = tbl->view();

cudf::io::parquet_writer_options write_opts =
cudf::io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(cudf::io::compression_type::NONE);
cudf::io::write_parquet(write_opts);
return view.num_rows();
}();

parquet_read_common(num_rows_written, n_col, source_sink, state, data_size_bytes);
}

using d_type_list = nvbench::enum_type_list<data_type::INTEGRAL,
data_type::FLOAT,
data_type::DECIMAL,
Expand Down Expand Up @@ -272,6 +337,24 @@ NVBENCH_BENCH(BM_parquet_read_io_small_mixed)
.add_int64_axis("run_length", {1, 32})
.add_int64_axis("num_string_cols", {1, 2, 3});

using d_type_list_wide_table = nvbench::enum_type_list<data_type::DECIMAL, data_type::STRING>;
NVBENCH_BENCH_TYPES(BM_parquet_read_wide_tables, NVBENCH_TYPE_AXES(d_type_list_wide_table))
.set_name("parquet_read_wide_tables")
.set_min_samples(4)
.set_type_axes_names({"data_type"})
.add_int64_axis("data_size_mb", {1024, 2048, 4096})
.add_int64_axis("num_cols", {256, 512, 1024})
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});

NVBENCH_BENCH(BM_parquet_read_wide_tables_mixed)
.set_name("parquet_read_wide_tables_mixed")
.set_min_samples(4)
.add_int64_axis("data_size_mb", {1024, 2048, 4096})
.add_int64_axis("num_cols", {256, 512, 1024})
.add_int64_axis("cardinality", {0, 1000})
.add_int64_axis("run_length", {1, 32});

// a benchmark for structs that only contain fixed-width types
using d_type_list_struct_only = nvbench::enum_type_list<data_type::STRUCT>;
NVBENCH_BENCH_TYPES(BM_parquet_read_fixed_width_struct, NVBENCH_TYPE_AXES(d_type_list_struct_only))
Expand Down

0 comments on commit 44a9c10

Please sign in to comment.