Skip to content

Commit

Permalink
Merge branch 'branch-24.10' into multi-string-contains-review
Browse files Browse the repository at this point in the history
  • Loading branch information
res-life authored Sep 11, 2024
2 parents ab5ef90 + 750adca commit da1d92b
Show file tree
Hide file tree
Showing 675 changed files with 2,884 additions and 2,692 deletions.
1 change: 0 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ build/
cpp/build/
cpp/examples/*/install/
cpp/examples/*/build/
cpp/examples/tpch/datagen/datafusion
cpp/include/cudf/ipc_generated/*.h
cpp/thirdparty/googletest/

Expand Down
26 changes: 17 additions & 9 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,25 +36,25 @@ target_include_directories(
)

add_library(
tpch_data_generator STATIC
common/tpch_data_generator/tpch_data_generator.cpp common/tpch_data_generator/table_helpers.cpp
common/tpch_data_generator/random_column_generator.cu
ndsh_data_generator STATIC
common/ndsh_data_generator/ndsh_data_generator.cpp common/ndsh_data_generator/table_helpers.cpp
common/ndsh_data_generator/random_column_generator.cu
)
target_compile_features(tpch_data_generator PUBLIC cxx_std_17 cuda_std_17)
target_compile_features(ndsh_data_generator PUBLIC cxx_std_17 cuda_std_17)

target_compile_options(
tpch_data_generator PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
ndsh_data_generator PUBLIC "$<$<COMPILE_LANGUAGE:CXX>:${CUDF_CXX_FLAGS}>"
"$<$<COMPILE_LANGUAGE:CUDA>:${CUDF_CUDA_FLAGS}>"
)

target_link_libraries(
tpch_data_generator
ndsh_data_generator
PUBLIC cudf cudftestutil nvtx3::nvtx3-cpp
PRIVATE $<TARGET_NAME_IF_EXISTS:conda_env>
)

target_include_directories(
tpch_data_generator
ndsh_data_generator
PUBLIC "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}>" "$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}>"
"$<BUILD_INTERFACE:${CUDF_SOURCE_DIR}/src>"
)
Expand Down Expand Up @@ -127,8 +127,8 @@ function(ConfigureNVBench CMAKE_BENCH_NAME)
INSTALL_RPATH "\$ORIGIN/../../../lib"
)
target_link_libraries(
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common cudf_datagen nvbench::nvbench
$<TARGET_NAME_IF_EXISTS:conda_env>
${CMAKE_BENCH_NAME} PRIVATE cudf_benchmark_common ndsh_data_generator cudf_datagen
nvbench::nvbench $<TARGET_NAME_IF_EXISTS:conda_env>
)
install(
TARGETS ${CMAKE_BENCH_NAME}
Expand Down Expand Up @@ -175,6 +175,14 @@ ConfigureBench(COPY_IF_ELSE_BENCH copying/copy_if_else.cpp)
# * transpose benchmark ---------------------------------------------------------------------------
ConfigureBench(TRANSPOSE_BENCH transpose/transpose.cpp)

# ##################################################################################################
# * nds-h benchmark --------------------------------------------------------------------------------
ConfigureNVBench(NDSH_Q1 ndsh/q01.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q5 ndsh/q05.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q6 ndsh/q06.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q9 ndsh/q09.cpp ndsh/utilities.cpp)
ConfigureNVBench(NDSH_Q10 ndsh/q10.cpp ndsh/utilities.cpp)

# ##################################################################################################
# * stream_compaction benchmark -------------------------------------------------------------------
ConfigureNVBench(
Expand Down
14 changes: 7 additions & 7 deletions cpp/benchmarks/common/generate_input.cu
Original file line number Diff line number Diff line change
Expand Up @@ -28,10 +28,10 @@
#include <cudf/types.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/error.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <rmm/device_buffer.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/mr/device/per_device_resource.hpp>

#include <cuda/functional>
#include <thrust/binary_search.h>
Expand Down Expand Up @@ -507,7 +507,7 @@ std::unique_ptr<cudf::column> create_random_column(data_profile const& profile,
null_mask.end(),
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::get_current_device_resource_ref());

return std::make_unique<cudf::column>(
dtype,
Expand Down Expand Up @@ -591,7 +591,7 @@ std::unique_ptr<cudf::column> create_random_utf8_string_column(data_profile cons
null_mask.end() - 1,
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::get_current_device_resource_ref());
return cudf::make_strings_column(
num_rows,
std::make_unique<cudf::column>(std::move(offsets), rmm::device_buffer{}, 0),
Expand Down Expand Up @@ -626,7 +626,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::string_view>(data_profi
cudf::out_of_bounds_policy::DONT_CHECK,
cudf::detail::negative_index_policy::NOT_ALLOWED,
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::get_current_device_resource_ref());
return std::move(str_table->release()[0]);
}

Expand Down Expand Up @@ -688,7 +688,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::struct_view>(data_profi
valids.end(),
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::get_current_device_resource_ref());
}
return std::pair<rmm::device_buffer, cudf::size_type>{};
}();
Expand Down Expand Up @@ -782,7 +782,7 @@ std::unique_ptr<cudf::column> create_random_column<cudf::list_view>(data_profile
valids.end(),
thrust::identity<bool>{},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::get_current_device_resource_ref());
list_column = cudf::make_lists_column(
current_num_rows,
std::move(offsets_column),
Expand Down Expand Up @@ -933,7 +933,7 @@ std::pair<rmm::device_buffer, cudf::size_type> create_random_null_mask(
thrust::make_counting_iterator<cudf::size_type>(size),
bool_generator{seed, 1.0 - *null_probability},
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
cudf::get_current_device_resource_ref());
}
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include "tpch_data_generator.hpp"
#include "ndsh_data_generator.hpp"

#include "random_column_generator.hpp"
#include "table_helpers.hpp"
Expand All @@ -36,6 +36,9 @@
#include <cudf/transform.hpp>
#include <cudf/unary.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/resource_ref.hpp>

#include <array>
#include <string>
#include <vector>
Expand Down Expand Up @@ -432,46 +435,37 @@ std::unique_ptr<cudf::table> generate_lineitem_partial(cudf::table_view const& o
columns.push_back(std::move(l_quantity));
columns.push_back(std::move(l_discount));
columns.push_back(std::move(l_tax));
columns.push_back(std::move(l_returnflag));
columns.push_back(std::move(l_linestatus));
columns.push_back(std::move(l_shipdate_ts));
columns.push_back(std::move(l_commitdate_ts));
columns.push_back(std::move(l_receiptdate_ts));
columns.push_back(std::move(l_returnflag));
columns.push_back(std::move(l_linestatus));
columns.push_back(std::move(l_shipinstruct));
columns.push_back(std::move(l_shipmode));
columns.push_back(std::move(l_comment));
return std::make_unique<cudf::table>(std::move(columns));
}

std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& lineitem,
/**
* @brief Generate the part of the `orders` table dependent on the `lineitem` table
*
* @param lineitem_partial The partially generated `lineitem` table
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource used to allocate the returned column's device memory
*/
std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& lineitem_partial,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
CUDF_FUNC_RANGE();
auto const l_linestatus_mask = lineitem.column(0);
auto const l_orderkey = lineitem.column(1);
auto const l_discount = lineitem.column(6);
auto const l_tax = lineitem.column(7);
auto const l_extendedprice = lineitem.column(16);
auto const l_linestatus_mask = lineitem_partial.column(0);
auto const l_orderkey = lineitem_partial.column(1);
auto const l_extendedprice = lineitem_partial.column(6);
auto const l_discount = lineitem_partial.column(7);
auto const l_tax = lineitem_partial.column(8);

std::vector<std::unique_ptr<cudf::column>> orders_dependent_columns;

// Generate the `o_totalprice` column
// We calculate the `charge` column, which is a function of `l_extendedprice`,
// `l_tax`, and `l_discount` and then group by `l_orderkey` and sum the `charge`
auto const l_charge = calculate_charge(l_extendedprice, l_tax, l_discount, stream, mr);
auto o_totalprice = [&]() {
auto const keys = cudf::table_view({l_orderkey});
cudf::groupby::groupby gb(keys);
std::vector<cudf::groupby::aggregation_request> requests;
requests.push_back(cudf::groupby::aggregation_request());
requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
requests[0].values = l_charge->view();
auto agg_result = gb.aggregate(requests);
return cudf::round(agg_result.second[0].results[0]->view(), 2);
}();
orders_dependent_columns.push_back(std::move(o_totalprice));

// Generate the `o_orderstatus` column
auto o_orderstatus = [&]() {
auto const keys = cudf::table_view({l_orderkey});
Expand Down Expand Up @@ -526,6 +520,22 @@ std::unique_ptr<cudf::table> generate_orders_dependent(cudf::table_view const& l
cudf::string_scalar("P"), o_orderstatus_intermediate->view(), mask_b->view());
}();
orders_dependent_columns.push_back(std::move(o_orderstatus));

// Generate the `o_totalprice` column
// We calculate the `charge` column, which is a function of `l_extendedprice`,
// `l_tax`, and `l_discount` and then group by `l_orderkey` and sum the `charge`
auto const l_charge = calculate_charge(l_extendedprice, l_tax, l_discount, stream, mr);
auto o_totalprice = [&]() {
auto const keys = cudf::table_view({l_orderkey});
cudf::groupby::groupby gb(keys);
std::vector<cudf::groupby::aggregation_request> requests;
requests.push_back(cudf::groupby::aggregation_request());
requests[0].aggregations.push_back(cudf::make_sum_aggregation<cudf::groupby_aggregation>());
requests[0].values = l_charge->view();
auto agg_result = gb.aggregate(requests);
return cudf::round(agg_result.second[0].results[0]->view(), 2);
}();
orders_dependent_columns.push_back(std::move(o_totalprice));
return std::make_unique<cudf::table>(std::move(orders_dependent_columns));
}

Expand Down Expand Up @@ -727,9 +737,7 @@ generate_orders_lineitem_part(double scale_factor,
// Generate the `part` table
auto part = generate_part(scale_factor, stream, mr);

// Join the `part` and partial `lineitem` tables, then calculate the `l_extendedprice` column,
// add the column to the `lineitem` table, and write the `lineitem` table to a parquet file

// Join the `part` and partial `lineitem` tables, then calculate the `l_extendedprice` column
auto l_extendedprice = [&]() {
auto const left = cudf::table_view(
{lineitem_partial->get_column(2).view(), lineitem_partial->get_column(5).view()});
Expand All @@ -749,8 +757,9 @@ generate_orders_lineitem_part(double scale_factor,
return cudf::round(col->view(), 2);
}();

// Insert the `l_extendedprice` column into the partial columns of the `lineitem` table
auto lineitem_partial_columns = lineitem_partial->release();
lineitem_partial_columns.push_back(std::move(l_extendedprice));
lineitem_partial_columns.insert(lineitem_partial_columns.begin() + 6, std::move(l_extendedprice));
auto lineitem_temp = std::make_unique<cudf::table>(std::move(lineitem_partial_columns));

// Generate the dependent columns of the `orders` table
Expand All @@ -759,7 +768,7 @@ generate_orders_lineitem_part(double scale_factor,

auto orders_independent_columns = orders_independent->release();
auto orders_dependent_columns = orders_dependent->release();
orders_independent_columns.insert(orders_independent_columns.end(),
orders_independent_columns.insert(orders_independent_columns.begin() + 2,
std::make_move_iterator(orders_dependent_columns.begin()),
std::make_move_iterator(orders_dependent_columns.end()));

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#pragma once

#include <cudf/table/table.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/memory_resource.hpp>

namespace CUDF_EXPORT cudf {
namespace datagen {
Expand All @@ -32,7 +34,7 @@ std::tuple<std::unique_ptr<cudf::table>, std::unique_ptr<cudf::table>, std::uniq
generate_orders_lineitem_part(
double scale_factor,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate the `partsupp` table
Expand All @@ -44,7 +46,7 @@ generate_orders_lineitem_part(
std::unique_ptr<cudf::table> generate_partsupp(
double scale_factor,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate the `supplier` table
Expand All @@ -56,7 +58,7 @@ std::unique_ptr<cudf::table> generate_partsupp(
std::unique_ptr<cudf::table> generate_supplier(
double scale_factor,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate the `customer` table
Expand All @@ -68,7 +70,7 @@ std::unique_ptr<cudf::table> generate_supplier(
std::unique_ptr<cudf::table> generate_customer(
double scale_factor,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate the `nation` table
Expand All @@ -78,7 +80,7 @@ std::unique_ptr<cudf::table> generate_customer(
*/
std::unique_ptr<cudf::table> generate_nation(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate the `region` table
Expand All @@ -88,7 +90,7 @@ std::unique_ptr<cudf::table> generate_nation(
*/
std::unique_ptr<cudf::table> generate_region(
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

} // namespace datagen
} // namespace CUDF_EXPORT cudf
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/utilities/default_stream.hpp>
#include <cudf/utilities/memory_resource.hpp>

#include <string>

Expand All @@ -36,7 +38,7 @@ std::unique_ptr<cudf::column> generate_random_string_column(
cudf::size_type upper,
cudf::size_type num_rows,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate a column of random numbers
Expand All @@ -61,7 +63,7 @@ std::unique_ptr<cudf::column> generate_random_numeric_column(
T upper,
cudf::size_type num_rows,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate a primary key column
Expand All @@ -81,7 +83,7 @@ std::unique_ptr<cudf::column> generate_primary_key_column(
cudf::scalar const& start,
cudf::size_type num_rows,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate a column where all the rows have the same string value
Expand All @@ -101,7 +103,7 @@ std::unique_ptr<cudf::column> generate_repeat_string_column(
std::string const& value,
cudf::size_type num_rows,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate a column by randomly choosing from set of strings
Expand All @@ -121,7 +123,7 @@ std::unique_ptr<cudf::column> generate_random_string_column_from_set(
cudf::host_span<const char* const> set,
cudf::size_type num_rows,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

/**
* @brief Generate a column consisting of a repeating sequence of integers
Expand All @@ -145,6 +147,6 @@ std::unique_ptr<cudf::column> generate_repeat_sequence_column(
bool zero_indexed,
cudf::size_type num_rows,
rmm::cuda_stream_view stream = cudf::get_default_stream(),
rmm::device_async_resource_ref mr = rmm::mr::get_current_device_resource());
rmm::device_async_resource_ref mr = cudf::get_current_device_resource_ref());

} // namespace cudf::datagen
Loading

0 comments on commit da1d92b

Please sign in to comment.