-
Notifications
You must be signed in to change notification settings - Fork 912
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add bytes_per_second
to transpose benchmark
#14170
Changes from 5 commits
66f43e4
d2676b5
f594a81
dcd9ef1
60845b7
05e1afa
e9280cc
4187495
c50a5ef
8083756
e6d9f24
82cbcf9
ed8aa1e
943b9f3
09f10d8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change | ||||
---|---|---|---|---|---|---|
|
@@ -20,17 +20,19 @@ | |||||
#include <cudf/column/column_factories.hpp> | ||||||
#include <cudf/table/table.hpp> | ||||||
#include <cudf/transpose.hpp> | ||||||
#include <cudf/types.hpp> | ||||||
|
||||||
#include <thrust/iterator/counting_iterator.h> | ||||||
#include <thrust/iterator/transform_iterator.h> | ||||||
|
||||||
static void BM_transpose(benchmark::State& state) | ||||||
{ | ||||||
auto count = state.range(0); | ||||||
constexpr auto column_type = cudf::data_type{cudf::type_id::INT32}; | ||||||
auto int_column_generator = | ||||||
thrust::make_transform_iterator(thrust::counting_iterator(0), [count](int i) { | ||||||
return cudf::make_numeric_column( | ||||||
cudf::data_type{cudf::type_id::INT32}, count, cudf::mask_state::ALL_VALID); | ||||||
column_type, count, cudf::mask_state::ALL_VALID); | ||||||
}); | ||||||
|
||||||
auto input_table = cudf::table(std::vector(int_column_generator, int_column_generator + count)); | ||||||
|
@@ -40,16 +42,29 @@ static void BM_transpose(benchmark::State& state) | |||||
cuda_event_timer raii(state, true); | ||||||
auto output = cudf::transpose(input); | ||||||
} | ||||||
|
||||||
// Collect memory statistics. | ||||||
auto const bytes_read = input.num_columns() * input.num_rows() * cudf::size_of(column_type); | ||||||
auto const bytes_written = bytes_read; | ||||||
// Account for nullability in input and output. | ||||||
auto const null_bytes = | ||||||
2 * input.num_columns() * cudf::bitmask_allocation_size_bytes(input.num_rows()); | ||||||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. suggestion: This one could also overflow, I think, perhaps:
Suggested change
? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Are you sure about this one? Since the return type of That said, just having this discussion indicates I should've included an explicit cast upfront to clear up any potential confusion. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Left-to-right associativity means that this is evaluated as There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why not just always put the thing that returns There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Personally, I would keep the cast explicit to make visible what is happening, but I don't have a strong stance on it. |
||||||
|
||||||
state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * | ||||||
(bytes_read + bytes_written + null_bytes)); | ||||||
} | ||||||
|
||||||
class Transpose : public cudf::benchmark {}; | ||||||
|
||||||
#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \ | ||||||
BENCHMARK_DEFINE_F(Transpose, name)(::benchmark::State & state) { BM_transpose(state); } \ | ||||||
BENCHMARK_REGISTER_F(Transpose, name) \ | ||||||
->RangeMultiplier(4) \ | ||||||
->Range(4, 4 << 13) \ | ||||||
->UseManualTime() \ | ||||||
#define TRANSPOSE_BM_BENCHMARK_DEFINE(name) \ | ||||||
BENCHMARK_DEFINE_F(Transpose, name)(::benchmark::State & state) \ | ||||||
{ \ | ||||||
BM_transpose(state); \ | ||||||
} \ | ||||||
BENCHMARK_REGISTER_F(Transpose, name) \ | ||||||
->RangeMultiplier(4) \ | ||||||
->Range(4, 4 << 13) \ | ||||||
->UseManualTime() \ | ||||||
->Unit(benchmark::kMillisecond); | ||||||
|
||||||
TRANSPOSE_BM_BENCHMARK_DEFINE(transpose_simple); |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
💡 suggestion: This is one way to do it. But I think the way I suggested is a bit better because it all happens at compile time, whereas
cudf::size_of()
invokes the type dispatcher at run time. Not that it will affect benchmarks, but it just seems cleaner to usesizeof(cudf::id_to_type<column_type>)
.