Skip to content

Commit

Permalink
Add ps_suppkey col
Browse files Browse the repository at this point in the history
  • Loading branch information
JayjeetAtGithub committed Jul 18, 2024
1 parent e2f4341 commit 446e582
Showing 1 changed file with 73 additions and 3 deletions.
76 changes: 73 additions & 3 deletions cpp/benchmarks/common/cudf_datagen/dbgen.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,14 @@
#include <cudf/column/column_factories.hpp>
#include <cudf/concatenate.hpp>
#include <cudf/filling.hpp>
#include <cudf/lists/combine.hpp>
#include <cudf/lists/filling.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/combine.hpp>
#include <cudf/strings/convert/convert_integers.hpp>
#include <cudf/strings/padding.hpp>
#include <cudf/table/table.hpp>
#include <cudf/transform.hpp>

#include <rmm/cuda_device.hpp>
#include <rmm/exec_policy.hpp>
Expand Down Expand Up @@ -322,6 +325,67 @@ void generate_orders(int64_t scale_factor)
write_parquet(orders, "orders.parquet", {"o_orderpriority", "o_shippriority", "o_comment"});
}

std::unique_ptr<cudf::column> calc_ps_suppkey(cudf::column_view const& ps_partkey,
int64_t const& scale_factor,
int64_t const& num_rows)
{
// Generating the `s` col
auto s_empty = cudf::make_numeric_column(cudf::data_type{cudf::type_id::INT64},
num_rows,
cudf::mask_state::UNALLOCATED,
cudf::get_default_stream());

auto s =
cudf::fill(s_empty->view(), 0, num_rows, cudf::numeric_scalar<int64_t>(10000 * scale_factor));

// Generating the `i` col
auto seq = gen_primary_key_col(0, num_rows);
auto i = cudf::binary_operation(seq->view(),
cudf::numeric_scalar<int64_t>(4),
cudf::binary_operator::MOD,
cudf::data_type{cudf::type_id::INT64});

// Create a table view out of `p_partkey`, `s`, and `i`
auto table = cudf::table_view({ps_partkey, s->view(), i->view()});

// Create the AST expression
auto scalar_1 = cudf::numeric_scalar<int64_t>(1);
auto scalar_4 = cudf::numeric_scalar<int64_t>(4);
auto literal_1 = cudf::ast::literal(scalar_1);
auto literal_4 = cudf::ast::literal(scalar_4);

auto ps_partkey_col_ref = cudf::ast::column_reference(0);
auto s_col_ref = cudf::ast::column_reference(1);
auto i_col_ref = cudf::ast::column_reference(2);

// (int)(ps_partkey - 1)/s
auto expr_a = cudf::ast::operation(cudf::ast::ast_operator::SUB, ps_partkey_col_ref, literal_1);
auto expr_b = cudf::ast::operation(cudf::ast::ast_operator::DIV, expr_a, s_col_ref);
auto expr_b_casted = cudf::ast::operation(cudf::ast::ast_operator::CAST_TO_INT64, expr_b);

// s/4
auto expr_c = cudf::ast::operation(cudf::ast::ast_operator::DIV, s_col_ref, literal_4);

// (s/4 + (int)(ps_partkey - 1)/s)
auto expr_d = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_c, expr_b_casted);

// (i * (s/4 + (int)(ps_partkey - 1)/s))
auto expr_e = cudf::ast::operation(cudf::ast::ast_operator::MUL, i_col_ref, expr_d);

// (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s)))
auto expr_f = cudf::ast::operation(cudf::ast::ast_operator::ADD, ps_partkey_col_ref, expr_e);

// (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s
auto expr_g = cudf::ast::operation(cudf::ast::ast_operator::MOD, expr_f, s_col_ref);

// (ps_partkey + (i * (s/4 + (int)(ps_partkey - 1)/s))) % s + 1
auto final_expr = cudf::ast::operation(cudf::ast::ast_operator::ADD, expr_g, literal_1);

// Execute the AST expression
auto ps_suppkey = cudf::compute_column(table, final_expr);
return ps_suppkey;
}

/**
* @brief Generate the `partsupp` table
*
Expand All @@ -347,6 +411,9 @@ void generate_partsupp(int64_t const& scale_factor,
auto rep_table = cudf::repeat(cudf::table_view({p_partkey->view()}), rep_freq->view());
auto ps_partkey = rep_table->get_column(0);

// Generate the `ps_suppkey` column
auto ps_suppkey = calc_ps_suppkey(ps_partkey.view(), scale_factor, num_rows);

// Generate the `p_availqty` column
auto ps_availqty = gen_rand_num_col<int64_t>(1, 9999, num_rows);

Expand All @@ -357,8 +424,11 @@ void generate_partsupp(int64_t const& scale_factor,
// NOTE: This column is not compliant with clause 4.2.2.10 of the TPC-H specification
auto ps_comment = gen_rand_str_col(49, 198, num_rows);

auto partsupp = cudf::table_view(
{ps_partkey.view(), ps_availqty->view(), ps_supplycost->view(), ps_comment->view()});
auto partsupp = cudf::table_view({ps_partkey.view(),
ps_suppkey->view(),
ps_availqty->view(),
ps_supplycost->view(),
ps_comment->view()});
write_parquet(partsupp, "partsupp.parquet", schema_partsupp);
}

Expand Down Expand Up @@ -711,7 +781,7 @@ int main(int argc, char** argv)

// generate_lineitem(scale_factor);
// generate_orders(scale_factor);
// generate_partsupp(scale_factor);
generate_partsupp(scale_factor);
generate_part(scale_factor);
generate_supplier(scale_factor);
generate_customer(scale_factor);
Expand Down

0 comments on commit 446e582

Please sign in to comment.