From 446e582a3fde8bfc2c33e192227e3d8c024fe2ba Mon Sep 17 00:00:00 2001 From: JayjeetAtGithub Date: Wed, 17 Jul 2024 19:57:42 -0700 Subject: [PATCH] Add ps_suppkey col --- cpp/benchmarks/common/cudf_datagen/dbgen.cu | 76 ++++++++++++++++++++- 1 file changed, 73 insertions(+), 3 deletions(-) diff --git a/cpp/benchmarks/common/cudf_datagen/dbgen.cu b/cpp/benchmarks/common/cudf_datagen/dbgen.cu index 5977578cdd2..66c81f6cbb6 100644 --- a/cpp/benchmarks/common/cudf_datagen/dbgen.cu +++ b/cpp/benchmarks/common/cudf_datagen/dbgen.cu @@ -24,11 +24,14 @@ #include #include #include +#include +#include #include #include #include #include #include +#include #include #include @@ -322,6 +325,67 @@ void generate_orders(int64_t scale_factor) write_parquet(orders, "orders.parquet", {"o_orderpriority", "o_shippriority", "o_comment"}); } +std::unique_ptr calc_ps_suppkey(cudf::column_view const& ps_partkey, + int64_t const& scale_factor, + int64_t const& num_rows) +{ + // Generating the `s` col + auto s_empty = cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT64}, + num_rows, + cudf::mask_state::UNALLOCATED, + cudf::get_default_stream()); + + auto s = + cudf::fill(s_empty->view(), 0, num_rows, cudf::numeric_scalar(10000 * scale_factor)); + + // Generating the `i` col + auto seq = gen_primary_key_col(0, num_rows); + auto i = cudf::binary_operation(seq->view(), + cudf::numeric_scalar(4), + cudf::binary_operator::MOD, + cudf::data_type{cudf::type_id::INT64}); + + // Create a table view out of `p_partkey`, `s`, and `i` + auto table = cudf::table_view({ps_partkey, s->view(), i->view()}); + + // Create the AST expression + auto scalar_1 = cudf::numeric_scalar(1); + auto scalar_4 = cudf::numeric_scalar(4); + auto literal_1 = cudf::ast::literal(scalar_1); + auto literal_4 = cudf::ast::literal(scalar_4); + + auto ps_partkey_col_ref = cudf::ast::column_reference(0); + auto s_col_ref = cudf::ast::column_reference(1); + auto i_col_ref = cudf::ast::column_reference(2); + + // (int)(ps_partkey - 1)/s + auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::SUB, ps_partkey_col_ref, literal_1); + auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::DIV, expr_a, s_col_ref); + auto expr_b_casted = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_INT64, expr_b); + + // s/4 + auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::DIV, s_col_ref, literal_4); + + // (s/4 + (int)(ps_partkey - 1)/s) + auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_c, expr_b_casted); + + // (i * (s/4 + (int)(ps_partkey - 1)/s)) + auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::MUL, i_col_ref, expr_d); + + // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) + auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, ps_partkey_col_ref, expr_e); + + // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + auto expr_g = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_f, s_col_ref); + + // (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + 1 + auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_g, literal_1); + + // Execute the AST expression + auto ps_suppkey = cudf::compute_column(table, final_expr); + return ps_suppkey; +} + /** * @brief Generate the `partsupp` table * @@ -347,6 +411,9 @@ void generate_partsupp(int64_t const& scale_factor, auto rep_table = cudf::repeat(cudf::table_view({p_partkey->view()}), rep_freq->view()); auto ps_partkey = rep_table->get_column(0); + // Generate the `ps_suppkey` column + auto ps_suppkey = calc_ps_suppkey(ps_partkey.view(), scale_factor, num_rows); + // Generate the `p_availqty` column auto ps_availqty = gen_rand_num_col(1, 9999, num_rows); @@ -357,8 +424,11 @@ void generate_partsupp(int64_t const& scale_factor, // NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification auto ps_comment = gen_rand_str_col(49, 198, num_rows); - auto partsupp = cudf::table_view( - {ps_partkey.view(), ps_availqty->view(), ps_supplycost->view(), ps_comment->view()}); + auto partsupp = cudf::table_view({ps_partkey.view(), + ps_suppkey->view(), + ps_availqty->view(), + ps_supplycost->view(), + ps_comment->view()}); write_parquet(partsupp, "partsupp.parquet", schema_partsupp); } @@ -711,7 +781,7 @@ int main(int argc, char** argv) // generate_lineitem(scale_factor); // generate_orders(scale_factor); - // generate_partsupp(scale_factor); + generate_partsupp(scale_factor); generate_part(scale_factor); generate_supplier(scale_factor); generate_customer(scale_factor);