Skip to content

Commit

Permalink
Add scale_factor axes to queries
Browse files Browse the repository at this point in the history
  • Loading branch information
JayjeetAtGithub committed Sep 4, 2024
1 parent c366bd0 commit 0b154b4
Show file tree
Hide file tree
Showing 6 changed files with 26 additions and 26 deletions.
6 changes: 4 additions & 2 deletions cpp/benchmarks/tpch/q1.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -103,9 +103,11 @@

void run_tpch_q1(nvbench::state& state)
{
double const scale_factor = state.get_float64("scale_factor");

// Define a map for holding parquet data sources
std::unordered_map<std::string, parquet_device_buffer> sources;
generate_parquet_data_sources({"lineitem"}, sources);
generate_parquet_data_sources(scale_factor, {"lineitem"}, sources);

// Define the column projections and filter predicate for `lineitem` table
std::vector<std::string> const lineitem_cols = {"l_returnflag",
Expand Down Expand Up @@ -174,4 +176,4 @@ void tpch_q1(nvbench::state& state)
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q1(state); });
}

NVBENCH_BENCH(tpch_q1).set_name("tpch_q1");
NVBENCH_BENCH(tpch_q1).set_name("tpch_q1").add_float64_axis("scale_factor", {0.01, 0.1, 1});
7 changes: 5 additions & 2 deletions cpp/benchmarks/tpch/q10.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -93,9 +93,12 @@

void run_tpch_q10(nvbench::state& state)
{
double const scale_factor = state.get_float64("scale_factor");

// Define a map for holding parquet data sources
std::unordered_map<std::string, parquet_device_buffer> sources;
generate_parquet_data_sources({"customer", "orders", "lineitem", "nation"}, sources);
generate_parquet_data_sources(
scale_factor, {"customer", "orders", "lineitem", "nation"}, sources);

// Define the column projection and filter predicate for the `orders` table
std::vector<std::string> const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"};
Expand Down Expand Up @@ -167,4 +170,4 @@ void tpch_q10(nvbench::state& state)
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q10(state); });
}

NVBENCH_BENCH(tpch_q10).set_name("tpch_q10");
NVBENCH_BENCH(tpch_q10).set_name("tpch_q10").add_float64_axis("scale_factor", {0.01, 0.1, 1});
8 changes: 5 additions & 3 deletions cpp/benchmarks/tpch/q5.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -88,10 +88,12 @@

void run_tpch_q5(nvbench::state& state)
{
double const scale_factor = state.get_float64("scale_factor");

// Define a map for holding parquet data sources
std::unordered_map<std::string, parquet_device_buffer> sources;
generate_parquet_data_sources({"customer", "orders", "lineitem", "supplier", "nation", "region"},
sources);
generate_parquet_data_sources(
scale_factor, {"customer", "orders", "lineitem", "supplier", "nation", "region"}, sources);

// Define the column projection and filter predicate for the `orders` table
std::vector<std::string> const orders_cols = {"o_custkey", "o_orderkey", "o_orderdate"};
Expand Down Expand Up @@ -170,4 +172,4 @@ void tpch_q5(nvbench::state& state)
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q5(state); });
}

NVBENCH_BENCH(tpch_q5).set_name("tpch_q5");
NVBENCH_BENCH(tpch_q5).set_name("tpch_q5").add_float64_axis("scale_factor", {0.01, 0.1, 1});
6 changes: 4 additions & 2 deletions cpp/benchmarks/tpch/q6.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,9 +62,11 @@

void run_tpch_q6(nvbench::state& state)
{
double const scale_factor = state.get_float64("scale_factor");

// Define a map for holding parquet data sources
std::unordered_map<std::string, parquet_device_buffer> sources;
generate_parquet_data_sources({"lineitem"}, sources);
generate_parquet_data_sources(scale_factor, {"lineitem"}, sources);

// Read out the `lineitem` table from parquet file
std::vector<std::string> const lineitem_cols = {
Expand Down Expand Up @@ -137,4 +139,4 @@ void tpch_q6(nvbench::state& state)
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q6(state); });
}

NVBENCH_BENCH(tpch_q6).set_name("tpch_q6");
NVBENCH_BENCH(tpch_q6).set_name("tpch_q6").add_float64_axis("scale_factor", {0.01, 0.1, 1});
8 changes: 5 additions & 3 deletions cpp/benchmarks/tpch/q9.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,12 @@

void run_tpch_q9(nvbench::state& state)
{
double const scale_factor = state.get_float64("scale_factor");

// Define a map for holding parquet data sources
std::unordered_map<std::string, parquet_device_buffer> sources;
generate_parquet_data_sources({"part", "supplier", "lineitem", "partsupp", "orders", "nation"},
sources);
generate_parquet_data_sources(
scale_factor, {"part", "supplier", "lineitem", "partsupp", "orders", "nation"}, sources);

// Read out the table from parquet files
auto const lineitem = read_parquet(
Expand Down Expand Up @@ -183,4 +185,4 @@ void tpch_q9(nvbench::state& state)
state.exec(nvbench::exec_tag::sync, [&](nvbench::launch& launch) { run_tpch_q9(state); });
}

NVBENCH_BENCH(tpch_q9).set_name("tpch_q9");
NVBENCH_BENCH(tpch_q9).set_name("tpch_q9").add_float64_axis("scale_factor", {0.01, 0.1, 1});
17 changes: 3 additions & 14 deletions cpp/benchmarks/tpch/utils.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -526,17 +526,6 @@ int32_t days_since_epoch(int year, int month, int day)
return static_cast<int32_t>(diff);
}

/**
* @brief Read the scale factor from the environment variable `CUDF_TPCH_SF`
*/
double get_sf()
{
char* val = getenv("CUDF_TPCH_SF");
double const sf = (val == nullptr) ? 1 : std::atof(val);
std::cout << "Using scale factor: " << sf << std::endl;
return sf;
}

/**
* @brief Struct representing a parquet device buffer
*/
Expand Down Expand Up @@ -588,19 +577,19 @@ void write_to_parquet_device_buffer(std::unique_ptr<cudf::table> const& table,
/**
* @brief Generate TPC-H tables and write to parquet device buffers
*
* @param scale_factor The scale factor of NDS-H tables to generate
* @param table_names The names of the tables to generate
* @param sources The parquet data sources to populate
*/
void generate_parquet_data_sources(std::vector<std::string> const& table_names,
void generate_parquet_data_sources(double scale_factor,
std::vector<std::string> const& table_names,
std::unordered_map<std::string, parquet_device_buffer>& sources)
{
CUDF_FUNC_RANGE();
std::for_each(table_names.begin(), table_names.end(), [&](auto const& table_name) {
sources[table_name] = parquet_device_buffer();
});

auto scale_factor = get_sf();

auto [orders, lineitem, part] = cudf::datagen::generate_orders_lineitem_part(
scale_factor, cudf::get_default_stream(), rmm::mr::get_current_device_resource());

Expand Down

0 comments on commit 0b154b4

Please sign in to comment.