From 0b154b4425638af74ef26dd18aaf91eef85d03f4 Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 4 Sep 2024 13:54:37 -0700 Subject: [PATCH] Add scale_factor axes to queries --- cpp/benchmarks/tpch/q1.cpp | 6 ++++-- cpp/benchmarks/tpch/q10.cpp | 7 +++++-- cpp/benchmarks/tpch/q5.cpp | 8 +++++--- cpp/benchmarks/tpch/q6.cpp | 6 ++++-- cpp/benchmarks/tpch/q9.cpp | 8 +++++--- cpp/benchmarks/tpch/utils.hpp | 17 +++-------------- 6 files changed, 26 insertions(+), 26 deletions(-) diff --git a/cpp/benchmarks/tpch/q1.cpp b/cpp/benchmarks/tpch/q1.cpp index 82776362d8f..9e417b4786f 100644 --- a/cpp/benchmarks/tpch/q1.cpp +++ b/cpp/benchmarks/tpch/q1.cpp @@ -103,9 +103,11 @@ void run_tpch_q1(nvbench::state& state) { + double const scale_factor = state.get_float64("scale_factor"); + // Define a map for holding parquet data sources std::unordered_map sources; - generate_parquet_data_sources({"lineitem"}, sources); + generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); // Define the column projections and filter predicate for `lineitem` table std::vector const lineitem_cols = {"l_returnflag", @@ -174,4 +176,4 @@ void tpch_q1(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q1(state); }); } -NVBENCH_BENCH(tpch_q1).set_name("tpch_q1"); +NVBENCH_BENCH(tpch_q1).set_name("tpch_q1").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/benchmarks/tpch/q10.cpp b/cpp/benchmarks/tpch/q10.cpp index 576ce2f8f5e..4acf35f55bd 100644 --- a/cpp/benchmarks/tpch/q10.cpp +++ b/cpp/benchmarks/tpch/q10.cpp @@ -93,9 +93,12 @@ void run_tpch_q10(nvbench::state& state) { + double const scale_factor = state.get_float64("scale_factor"); + // Define a map for holding parquet data sources std::unordered_map sources; - generate_parquet_data_sources({"customer", "orders", "lineitem", "nation"}, sources); + generate_parquet_data_sources( + scale_factor, {"customer", "orders", "lineitem", "nation"}, sources); // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; @@ -167,4 +170,4 @@ void tpch_q10(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q10(state); }); } -NVBENCH_BENCH(tpch_q10).set_name("tpch_q10"); +NVBENCH_BENCH(tpch_q10).set_name("tpch_q10").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/benchmarks/tpch/q5.cpp b/cpp/benchmarks/tpch/q5.cpp index effe84c5b04..c9dd5040f9b 100644 --- a/cpp/benchmarks/tpch/q5.cpp +++ b/cpp/benchmarks/tpch/q5.cpp @@ -88,10 +88,12 @@ void run_tpch_q5(nvbench::state& state) { + double const scale_factor = state.get_float64("scale_factor"); + // Define a map for holding parquet data sources std::unordered_map sources; - generate_parquet_data_sources({"customer", "orders", "lineitem", "supplier", "nation", "region"}, - sources); + generate_parquet_data_sources( + scale_factor, {"customer", "orders", "lineitem", "supplier", "nation", "region"}, sources); // Define the column projection and filter predicate for the `orders` table std::vector const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"}; @@ -170,4 +172,4 @@ void tpch_q5(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q5(state); }); } -NVBENCH_BENCH(tpch_q5).set_name("tpch_q5"); +NVBENCH_BENCH(tpch_q5).set_name("tpch_q5").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/benchmarks/tpch/q6.cpp b/cpp/benchmarks/tpch/q6.cpp index 7bca58c1213..1a53432bbe5 100644 --- a/cpp/benchmarks/tpch/q6.cpp +++ b/cpp/benchmarks/tpch/q6.cpp @@ -62,9 +62,11 @@ void run_tpch_q6(nvbench::state& state) { + double const scale_factor = state.get_float64("scale_factor"); + // Define a map for holding parquet data sources std::unordered_map sources; - generate_parquet_data_sources({"lineitem"}, sources); + generate_parquet_data_sources(scale_factor, {"lineitem"}, sources); // Read out the `lineitem` table from parquet file std::vector const lineitem_cols = { @@ -137,4 +139,4 @@ void tpch_q6(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q6(state); }); } -NVBENCH_BENCH(tpch_q6).set_name("tpch_q6"); +NVBENCH_BENCH(tpch_q6).set_name("tpch_q6").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/benchmarks/tpch/q9.cpp b/cpp/benchmarks/tpch/q9.cpp index 048c2c41048..0ade0381369 100644 --- a/cpp/benchmarks/tpch/q9.cpp +++ b/cpp/benchmarks/tpch/q9.cpp @@ -111,10 +111,12 @@ void run_tpch_q9(nvbench::state& state) { + double const scale_factor = state.get_float64("scale_factor"); + // Define a map for holding parquet data sources std::unordered_map sources; - generate_parquet_data_sources({"part", "supplier", "lineitem", "partsupp", "orders", "nation"}, - sources); + generate_parquet_data_sources( + scale_factor, {"part", "supplier", "lineitem", "partsupp", "orders", "nation"}, sources); // Read out the table from parquet files auto const lineitem = read_parquet( @@ -183,4 +185,4 @@ void tpch_q9(nvbench::state& state) state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q9(state); }); } -NVBENCH_BENCH(tpch_q9).set_name("tpch_q9"); +NVBENCH_BENCH(tpch_q9).set_name("tpch_q9").add_float64_axis("scale_factor", {0.01, 0.1, 1}); diff --git a/cpp/benchmarks/tpch/utils.hpp b/cpp/benchmarks/tpch/utils.hpp index af3673ce87e..0f9e6210481 100644 --- a/cpp/benchmarks/tpch/utils.hpp +++ b/cpp/benchmarks/tpch/utils.hpp @@ -526,17 +526,6 @@ int32_t days_since_epoch(int year, int month, int day) return static_cast(diff); } -/** - * @brief Read the scale factor from the environment variable `CUDF_TPCH_SF` - */ -double get_sf() -{ - char* val = getenv("CUDF_TPCH_SF"); - double const sf = (val == nullptr) ? 1 : std::atof(val); - std::cout << "Using scale factor: " << sf << std::endl; - return sf; -} - /** * @brief Struct representing a parquet device buffer */ @@ -588,10 +577,12 @@ void write_to_parquet_device_buffer(std::unique_ptr const& table, /** * @brief Generate TPC-H tables and write to parquet device buffers * + * @param scale_factor The scale factor of NDS-H tables to generate * @param table_names The names of the tables to generate * @param sources The parquet data sources to populate */ -void generate_parquet_data_sources(std::vector const& table_names, +void generate_parquet_data_sources(double scale_factor, + std::vector const& table_names, std::unordered_map& sources) { CUDF_FUNC_RANGE(); @@ -599,8 +590,6 @@ void generate_parquet_data_sources(std::vector const& table_names, sources[table_name] = parquet_device_buffer(); }); - auto scale_factor = get_sf(); - auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part( scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource());