Skip to content

Commit

Permalink
make_empty_tdigests_column
Browse files Browse the repository at this point in the history
  • Loading branch information
jihoonson committed Sep 30, 2024
1 parent 35d466a commit 9d5fe05
Show file tree
Hide file tree
Showing 6 changed files with 31 additions and 32 deletions.
11 changes: 6 additions & 5 deletions cpp/include/cudf/detail/tdigest/tdigest.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -143,19 +143,20 @@ std::unique_ptr<column> make_tdigest_column(size_type num_rows,
rmm::device_async_resource_ref mr);

/**
* @brief Create a tdigest column of empty clusters.
* @brief Create a tdigest column of empty tdigests.
*
* The column created contains the specified number of rows of empty clusters.
* The column created contains the specified number of rows of empty tdigests.
*
* @param num_rows The number of rows in the output column.
* @param stream CUDA stream used for device memory operations and kernel launches.
* @param mr Device memory resource used to allocate the returned column's device memory.
*
* @returns A tdigest column of empty clusters.
*/
CUDF_EXPORT
std::unique_ptr<column> make_tdigest_column_of_empty_clusters(size_type num_rows,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);
std::unique_ptr<column> make_empty_tdigests_column(size_type num_rows,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr);

/**
* @brief Create a scalar of an empty tdigest cluster.
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cudf_test/tdigest_utilities.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -270,7 +270,7 @@ void tdigest_simple_all_nulls_aggregation(Func op)
static_cast<column_view>(values).type(), tdigest_gen{}, op, values, delta);

// NOTE: an empty tdigest column still has 1 row.
auto expected = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto expected = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*result, *expected);
Expand Down Expand Up @@ -562,11 +562,11 @@ template <typename MergeFunc>
void tdigest_merge_empty(MergeFunc merge_op)
{
// 3 empty tdigests all in the same group
auto a = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto a = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
auto b = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto b = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
auto c = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto c = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
std::vector<column_view> cols;
cols.push_back(*a);
Expand All @@ -577,7 +577,7 @@ void tdigest_merge_empty(MergeFunc merge_op)
auto const delta = 1000;
auto result = merge_op(*values, delta);

auto expected = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto expected = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());

CUDF_TEST_EXPECT_COLUMNS_EQUAL(*expected, *result);
Expand Down
8 changes: 4 additions & 4 deletions cpp/src/quantiles/tdigest/tdigest.cu
Original file line number Diff line number Diff line change
Expand Up @@ -292,9 +292,9 @@ std::unique_ptr<column> make_tdigest_column(size_type num_rows,
return make_structs_column(num_rows, std::move(children), 0, {}, stream, mr);
}

std::unique_ptr<column> make_tdigest_column_of_empty_clusters(size_type num_rows,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
std::unique_ptr<column> make_empty_tdigests_column(size_type num_rows,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
auto offsets = cudf::make_fixed_width_column(
data_type(type_id::INT32), num_rows + 1, mask_state::UNALLOCATED, stream, mr);
Expand Down Expand Up @@ -339,7 +339,7 @@ std::unique_ptr<column> make_tdigest_column_of_empty_clusters(size_type num_rows
std::unique_ptr<scalar> make_empty_tdigest_scalar(rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
auto contents = make_tdigest_column_of_empty_clusters(1, stream, mr)->release();
auto contents = make_empty_tdigests_column(1, stream, mr)->release();
return std::make_unique<struct_scalar>(
std::move(*std::make_unique<table>(std::move(contents.children))), true, stream, mr);
}
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/quantiles/tdigest/tdigest_aggregation.cu
Original file line number Diff line number Diff line change
Expand Up @@ -759,7 +759,7 @@ std::unique_ptr<column> compute_tdigests(int delta,
// }
//
if (total_clusters == 0) {
return cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(1, stream, mr);
return cudf::tdigest::detail::make_empty_tdigests_column(1, stream, mr);
}

// each input group represents an individual tdigest. within each tdigest, we want the keys
Expand Down Expand Up @@ -1339,9 +1339,7 @@ std::unique_ptr<column> group_tdigest(column_view const& col,
rmm::cuda_stream_view stream,
rmm::device_async_resource_ref mr)
{
if (col.size() == 0) {
return cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(1, stream, mr);
}
if (col.size() == 0) { return cudf::tdigest::detail::make_empty_tdigests_column(1, stream, mr); }

auto const delta = max_centroids;
return cudf::type_dispatcher(col.type(),
Expand All @@ -1367,15 +1365,15 @@ std::unique_ptr<column> group_merge_tdigest(column_view const& input,
tdigest_column_view tdv(input);

if (num_groups == 0 || input.size() == 0) {
return cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(1, stream, mr);
return cudf::tdigest::detail::make_empty_tdigests_column(1, stream, mr);
}

if (tdv.means().size() == 0) {
// `group_merge_tdigest` takes the output of `typed_group_tdigest` as its input, which wipes
// out the means and weights for empty clusters. Thus, no mean here indicates that all clusters
// are empty in the input. Let's skip all complex computation in the below, but just return
// an empty tdigest per group.
return cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(num_groups, stream, mr);
return cudf::tdigest::detail::make_empty_tdigests_column(num_groups, stream, mr);
}

// bring group offsets back to the host
Expand Down
22 changes: 11 additions & 11 deletions cpp/tests/groupby/tdigest_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -469,15 +469,15 @@ TEST_F(TDigestMergeTest, EmptyGroups)
cudf::test::fixed_width_column_wrapper<int> keys{0, 0, 0, 0, 0, 0, 0};
int const delta = 1000;

auto a = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto a = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
auto b = cudf::type_dispatcher(
static_cast<cudf::column_view>(values_b).type(), tdigest_gen_grouped{}, keys, values_b, delta);
auto c = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto c = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
auto d = cudf::type_dispatcher(
static_cast<cudf::column_view>(values_d).type(), tdigest_gen_grouped{}, keys, values_d, delta);
auto e = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto e = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());

std::vector<cudf::column_view> cols;
Expand Down Expand Up @@ -561,10 +561,10 @@ TEST_F(TDigestMergeTest, AllValuesAreNull)

auto const expected_computed_keys = cudf::test::fixed_width_column_wrapper<int32_t>{{0, 1, 2}};
cudf::column_view const expected_computed_keys_view{expected_computed_keys};
auto const expected_computed_vals = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
expected_computed_keys_view.size(),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
auto const expected_computed_vals =
cudf::tdigest::detail::make_empty_tdigests_column(expected_computed_keys_view.size(),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_computed_keys_view, compute_result->get_column(0).view());
// The computed values are nullable even though the input values are not.
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_computed_vals->view(),
Expand All @@ -578,10 +578,10 @@ TEST_F(TDigestMergeTest, AllValuesAreNull)

auto const expected_merged_keys = cudf::test::fixed_width_column_wrapper<int32_t>{{0, 1, 2}};
cudf::column_view const expected_merged_keys_view{expected_merged_keys};
auto const expected_merged_vals = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
expected_merged_keys_view.size(),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
auto const expected_merged_vals =
cudf::tdigest::detail::make_empty_tdigests_column(expected_merged_keys_view.size(),
cudf::get_default_stream(),
rmm::mr::get_current_device_resource());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_merged_keys_view, merge_result->get_column(0).view());
CUDF_TEST_EXPECT_COLUMNS_EQUAL(expected_merged_vals->view(), merge_result->get_column(1).view());
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/tests/quantiles/percentile_approx_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -371,7 +371,7 @@ struct PercentileApproxTest : public cudf::test::BaseFixture {};

TEST_F(PercentileApproxTest, EmptyInput)
{
auto empty_ = cudf::tdigest::detail::make_tdigest_column_of_empty_clusters(
auto empty_ = cudf::tdigest::detail::make_empty_tdigests_column(
1, cudf::get_default_stream(), cudf::get_current_device_resource_ref());
cudf::test::fixed_width_column_wrapper<double> percentiles{0.0, 0.25, 0.3};

Expand Down

0 comments on commit 9d5fe05

Please sign in to comment.