Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into mixed_semi_join_refactor
Browse files Browse the repository at this point in the history
  • Loading branch information
mhaseeb123 authored Sep 16, 2024
2 parents 7b640d6 + 124d3e3 commit 0e88b41
Show file tree
Hide file tree
Showing 60 changed files with 2,743 additions and 1,165 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ build/
cpp/build/
cpp/examples/*/install/
cpp/examples/*/build/
cpp/examples/tpch/datagen/datafusion
cpp/include/cudf/ipc_generated/*.h
cpp/thirdparty/googletest/

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ pip install --extra-index-url=https://pypi.nvidia.com cudf-cu12

### Conda

cuDF can be installed with conda (via [miniconda](https://docs.conda.io/projects/miniconda/en/latest/) or the full [Anaconda distribution](https://www.anaconda.com/download) from the `rapidsai` channel:
cuDF can be installed with conda (via [miniforge](https://github.com/conda-forge/miniforge)) from the `rapidsai` channel:

```bash
conda install -c rapidsai -c conda-forge -c nvidia \
Expand Down
1 change: 1 addition & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -384,6 +384,7 @@ add_library(
src/io/json/nested_json_gpu.cu
src/io/json/read_json.cu
src/io/json/parser_features.cpp
src/io/json/process_tokens.cu
src/io/json/write_json.cu
src/io/orc/aggregate_orc_metadata.cpp
src/io/orc/dict_enc.cu
Expand Down
26 changes: 17 additions & 9 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,25 @@ target_include_directories(
)

add_library(
tpch_data_generator STATIC
common/tpch_data_generator/tpch_data_generator.cpp common/tpch_data_generator/table_helpers.cpp
common/tpch_data_generator/random_column_generator.cu
ndsh_data_generator STATIC
common/ndsh_data_generator/ndsh_data_generator.cpp common/ndsh_data_generator/table_helpers.cpp
common/ndsh_data_generator/random_column_generator.cu
)
target_compile_features(tpch_data_generator PUBLIC cxx_std_17 cuda_std_17)
target_compile_features(ndsh_data_generator PUBLIC cxx_std_17 cuda_std_17)

target_compile_options(
tpch_data_generator PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
ndsh_data_generator PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)

target_link_libraries(
tpch_data_generator
ndsh_data_generator
PUBLIC cudf cudftestutil nvtx3::nvtx3-cpp
PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
)

target_include_directories(
tpch_data_generator
ndsh_data_generator
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>" "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
"$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
)
Expand Down Expand Up @@ -127,8 +127,8 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen nvbench::nvbench
$<TARGET_NAME_IF_EXISTS:conda_env>
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen
nvbench::nvbench $<TARGET_NAME_IF_EXISTS:conda_env>
)
install(
TARGETS ${CMAKE_BENCH_NAME}
Expand Down Expand Up @@ -175,6 +175,14 @@ ConfigureBench(COPY_IF_ELSE_BENCH copying/copy_if_else.cpp)
# * transpose benchmark ---------------------------------------------------------------------------
ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cpp)

# ##################################################################################################
# * nds-h benchmark --------------------------------------------------------------------------------
ConfigureNVBench(NDSH_Q1 ndsh/q01.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q5 ndsh/q05.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q6 ndsh/q06.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q9 ndsh/q09.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q10 ndsh/q10.cpp ndsh/utilities.cpp)

# ##################################################################################################
# * stream_compaction benchmark -------------------------------------------------------------------
ConfigureNVBench(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "tpch_data_generator.hpp"
#include "ndsh_data_generator.hpp"

#include "random_column_generator.hpp"
#include "table_helpers.hpp"
Expand Down Expand Up @@ -435,46 +435,37 @@ std::unique_ptr<cudf::table> generate_lineitem_partial(cudf::table_view const& o
columns.push_back(std::move(l_quantity));
columns.push_back(std::move(l_discount));
columns.push_back(std::move(l_tax));
columns.push_back(std::move(l_returnflag));
columns.push_back(std::move(l_linestatus));
columns.push_back(std::move(l_shipdate_ts));
columns.push_back(std::move(l_commitdate_ts));
columns.push_back(std::move(l_receiptdate_ts));
columns.push_back(std::move(l_returnflag));
columns.push_back(std::move(l_linestatus));
columns.push_back(std::move(l_shipinstruct));
columns.push_back(std::move(l_shipmode));
columns.push_back(std::move(l_comment));
return std::make_unique<cudf::table>(std::move(columns));
}

std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& lineitem,
/**
* @brief Generate the part of the `orders` table dependent on the `lineitem` table
*
* @param lineitem_partial The partially generated `lineitem` table
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*/
std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& lineitem_partial,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
auto const l_linestatus_mask = lineitem.column(0);
auto const l_orderkey = lineitem.column(1);
auto const l_discount = lineitem.column(6);
auto const l_tax = lineitem.column(7);
auto const l_extendedprice = lineitem.column(16);
auto const l_linestatus_mask = lineitem_partial.column(0);
auto const l_orderkey = lineitem_partial.column(1);
auto const l_extendedprice = lineitem_partial.column(6);
auto const l_discount = lineitem_partial.column(7);
auto const l_tax = lineitem_partial.column(8);

std::vector<std::unique_ptr<cudf::column>> orders_dependent_columns;

// Generate the `o_totalprice` column
// We calculate the `charge` column, which is a function of `l_extendedprice`,
// `l_tax`, and `l_discount` and then group by `l_orderkey` and sum the `charge`
auto const l_charge = calculate_charge(l_extendedprice, l_tax, l_discount, stream, mr);
auto o_totalprice = [&]() {
auto const keys = cudf::table_view({l_orderkey});
cudf::groupby::groupby gb(keys);
std::vector<cudf::groupby::aggregation_request> requests;
requests.push_back(cudf::groupby::aggregation_request());
requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
requests[0].values = l_charge->view();
auto agg_result = gb.aggregate(requests);
return cudf::round(agg_result.second[0].results[0]->view(), 2);
}();
orders_dependent_columns.push_back(std::move(o_totalprice));

// Generate the `o_orderstatus` column
auto o_orderstatus = [&]() {
auto const keys = cudf::table_view({l_orderkey});
Expand Down Expand Up @@ -529,6 +520,22 @@ std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& l
cudf::string_scalar("P"), o_orderstatus_intermediate->view(), mask_b->view());
}();
orders_dependent_columns.push_back(std::move(o_orderstatus));

// Generate the `o_totalprice` column
// We calculate the `charge` column, which is a function of `l_extendedprice`,
// `l_tax`, and `l_discount` and then group by `l_orderkey` and sum the `charge`
auto const l_charge = calculate_charge(l_extendedprice, l_tax, l_discount, stream, mr);
auto o_totalprice = [&]() {
auto const keys = cudf::table_view({l_orderkey});
cudf::groupby::groupby gb(keys);
std::vector<cudf::groupby::aggregation_request> requests;
requests.push_back(cudf::groupby::aggregation_request());
requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
requests[0].values = l_charge->view();
auto agg_result = gb.aggregate(requests);
return cudf::round(agg_result.second[0].results[0]->view(), 2);
}();
orders_dependent_columns.push_back(std::move(o_totalprice));
return std::make_unique<cudf::table>(std::move(orders_dependent_columns));
}

Expand Down Expand Up @@ -730,9 +737,7 @@ generate_orders_lineitem_part(double scale_factor,
// Generate the `part` table
auto part = generate_part(scale_factor, stream, mr);

// Join the `part` and partial `lineitem` tables, then calculate the `l_extendedprice` column,
// add the column to the `lineitem` table, and write the `lineitem` table to a parquet file

// Join the `part` and partial `lineitem` tables, then calculate the `l_extendedprice` column
auto l_extendedprice = [&]() {
auto const left = cudf::table_view(
{lineitem_partial->get_column(2).view(), lineitem_partial->get_column(5).view()});
Expand All @@ -752,8 +757,9 @@ generate_orders_lineitem_part(double scale_factor,
return cudf::round(col->view(), 2);
}();

// Insert the `l_extendedprice` column into the partial columns of the `lineitem` table
auto lineitem_partial_columns = lineitem_partial->release();
lineitem_partial_columns.push_back(std::move(l_extendedprice));
lineitem_partial_columns.insert(lineitem_partial_columns.begin() + 6, std::move(l_extendedprice));
auto lineitem_temp = std::make_unique<cudf::table>(std::move(lineitem_partial_columns));

// Generate the dependent columns of the `orders` table
Expand All @@ -762,7 +768,7 @@ generate_orders_lineitem_part(double scale_factor,

auto orders_independent_columns = orders_independent->release();
auto orders_dependent_columns = orders_dependent->release();
orders_independent_columns.insert(orders_independent_columns.end(),
orders_independent_columns.insert(orders_independent_columns.begin() + 2,
std::make_move_iterator(orders_dependent_columns.begin()),
std::make_move_iterator(orders_dependent_columns.end()));

Expand Down
14 changes: 7 additions & 7 deletions cpp/benchmarks/hashing/hash.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ static void bench_hash(nvbench::state& state)
state.add_global_memory_reads<nvbench::int64_t>(num_rows);
// add memory read from bitmaks
if (!no_nulls) {
state.add_global_memory_reads<nvbench::int8_t>(2 *
state.add_global_memory_reads<nvbench::int8_t>(2L *
cudf::bitmask_allocation_size_bytes(num_rows));
}
// memory written depends on used hash
Expand All @@ -63,37 +63,37 @@ static void bench_hash(nvbench::state& state)
});
} else if (hash_name == "md5") {
// md5 creates a 32-byte string
state.add_global_memory_writes<nvbench::int8_t>(32 * num_rows);
state.add_global_memory_writes<nvbench::int8_t>(32L * num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::hashing::md5(data->view()); });
} else if (hash_name == "sha1") {
// sha1 creates a 40-byte string
state.add_global_memory_writes<nvbench::int8_t>(40 * num_rows);
state.add_global_memory_writes<nvbench::int8_t>(40L * num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::hashing::sha1(data->view()); });
} else if (hash_name == "sha224") {
// sha224 creates a 56-byte string
state.add_global_memory_writes<nvbench::int8_t>(56 * num_rows);
state.add_global_memory_writes<nvbench::int8_t>(56L * num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::hashing::sha224(data->view()); });
} else if (hash_name == "sha256") {
// sha256 creates a 64-byte string
state.add_global_memory_writes<nvbench::int8_t>(64 * num_rows);
state.add_global_memory_writes<nvbench::int8_t>(64L * num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::hashing::sha256(data->view()); });
} else if (hash_name == "sha384") {
// sha384 creates a 96-byte string
state.add_global_memory_writes<nvbench::int8_t>(96 * num_rows);
state.add_global_memory_writes<nvbench::int8_t>(96L * num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::hashing::sha384(data->view()); });
} else if (hash_name == "sha512") {
// sha512 creates a 128-byte string
state.add_global_memory_writes<nvbench::int8_t>(128 * num_rows);
state.add_global_memory_writes<nvbench::int8_t>(128L * num_rows);

state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { auto result = cudf::hashing::sha512(data->view()); });
Expand Down
11 changes: 11 additions & 0 deletions cpp/benchmarks/ndsh/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
# NDS-H Benchmarks for `libcudf`

## Disclaimer

NDS-H is derived from the TPC-H Benchmarks and as such any results obtained using NDS-H are not
comparable to published TPC-H Benchmark results, as the results obtained from using NDS-H do not
comply with the TPC-H Benchmarks.

## Current Status

For now, only Q1, Q5, Q6, Q9, and Q10 have been implemented
49 changes: 28 additions & 21 deletions cpp/examples/tpch/q1.cpp → cpp/benchmarks/ndsh/q01.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,19 @@
* limitations under the License.
*/

#include "../utilities/timer.hpp"
#include "utils.hpp"
#include "utilities.hpp"

#include <cudf/ast/expressions.hpp>
#include <cudf/binaryop.hpp>
#include <cudf/column/column.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <nvbench/nvbench.cuh>

/**
* @file q1.cpp
* @brief Implement query 1 of the TPC-H benchmark.
* @file q01.cpp
* @brief Implement query 1 of the NDS-H benchmark.
*
* create view lineitem as select * from '/tables/scale-1/lineitem.parquet';
*
Expand Down Expand Up @@ -59,7 +61,7 @@
* @param stream The CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
[[nodiscard]] std::unique_ptr<cudf::column> calc_disc_price(
[[nodiscard]] std::unique_ptr<cudf::column> calculate_disc_price(
cudf::column_view const& discount,
cudf::column_view const& extendedprice,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand All @@ -86,7 +88,7 @@
* @param stream The CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*/
[[nodiscard]] std::unique_ptr<cudf::column> calc_charge(
[[nodiscard]] std::unique_ptr<cudf::column> calculate_charge(
cudf::column_view const& tax,
cudf::column_view const& disc_price,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
Expand All @@ -101,16 +103,9 @@
return charge;
}

int main(int argc, char const** argv)
void run_ndsh_q1(nvbench::state& state,
std::unordered_map<std::string, parquet_device_buffer>& sources)
{
auto const args = parse_args(argc, argv);

// Use a memory pool
auto resource = create_memory_resource(args.memory_resource_type);
cudf::set_current_device_resource(resource.get());

cudf::examples::timer timer;

// Define the column projections and filter predicate for `lineitem` table
std::vector<std::string> const lineitem_cols = {"l_returnflag",
"l_linestatus",
Expand All @@ -130,12 +125,12 @@ int main(int argc, char const** argv)

// Read out the `lineitem` table from parquet file
auto lineitem =
read_parquet(args.dataset_dir + "/lineitem.parquet", lineitem_cols, std::move(lineitem_pred));
read_parquet(sources["lineitem"].make_source_info(), lineitem_cols, std::move(lineitem_pred));

// Calculate the discount price and charge columns and append to lineitem table
auto disc_price =
calc_disc_price(lineitem->column("l_discount"), lineitem->column("l_extendedprice"));
auto charge = calc_charge(lineitem->column("l_tax"), disc_price->view());
calculate_disc_price(lineitem->column("l_discount"), lineitem->column("l_extendedprice"));
auto charge = calculate_charge(lineitem->column("l_tax"), disc_price->view());
(*lineitem).append(disc_price, "disc_price").append(charge, "charge");

// Perform the group by operation
Expand Down Expand Up @@ -167,9 +162,21 @@ int main(int argc, char const** argv)
{"l_returnflag", "l_linestatus"},
{cudf::order::ASCENDING, cudf::order::ASCENDING});

timer.print_elapsed_millis();

// Write query result to a parquet file
orderedby_table->to_parquet("q1.parquet");
return 0;
}

void ndsh_q1(nvbench::state& state)
{
// Generate the required parquet files in device buffers
double const scale_factor = state.get_float64("scale_factor");
std::unordered_map<std::string, parquet_device_buffer> sources;
generate_parquet_data_sources(scale_factor, {"lineitem"}, sources);

auto stream = cudf::get_default_stream();
state.set_cuda_stream(nvbench::make_cuda_stream_view(stream.value()));
state.exec(nvbench::exec_tag::sync,
[&](nvbench::launch& launch) { run_ndsh_q1(state, sources); });
}

NVBENCH_BENCH(ndsh_q1).set_name("ndsh_q1").add_float64_axis("scale_factor", {0.01, 0.1, 1});
Loading

0 comments on commit 0e88b41

Please sign in to comment.