diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
index 663f2210ef4..cd92e086329 100644
--- a/cpp/CMakeLists.txt
+++ b/cpp/CMakeLists.txt
@@ -321,7 +321,6 @@ add_library(
src/groupby/hash/groupby.cu
src/groupby/hash/hash_compound_agg_finalizer.cu
src/groupby/hash/sparse_to_dense_results.cu
- src/groupby/hash/var_hash_functor.cu
src/groupby/sort/aggregate.cpp
src/groupby/sort/group_argmax.cu
src/groupby/sort/group_argmin.cu
diff --git a/cpp/src/groupby/hash/compute_groupby.cu b/cpp/src/groupby/hash/compute_groupby.cu
index 4aa03d17999..1eb208c588d 100644
--- a/cpp/src/groupby/hash/compute_groupby.cu
+++ b/cpp/src/groupby/hash/compute_groupby.cu
@@ -82,7 +82,7 @@ std::unique_ptr
compute_groupby(table_view const& keys,
// column is indexed by the hash set
cudf::detail::result_cache sparse_results(requests.size());
- auto const set = cuco::static_set{
+ auto set = cuco::static_set{
cuco::extent{num_keys},
cudf::detail::CUCO_DESIRED_LOAD_FACTOR, // 50% occupancy
cuco::empty_key{cudf::detail::CUDF_SIZE_TYPE_SENTINEL},
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
index e7a7af92f15..119ac8cf6fd 100644
--- a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
+++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cu
@@ -14,10 +14,185 @@
* limitations under the License.
*/
-#include "hash_compound_agg_finalizer.cuh"
+#include "hash_compound_agg_finalizer.hpp"
#include "helpers.cuh"
+#include "var_hash_functor.cuh"
+
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+#include
namespace cudf::groupby::detail::hash {
+template
+hash_compound_agg_finalizer::hash_compound_agg_finalizer(
+ column_view col,
+ cudf::detail::result_cache* sparse_results,
+ cudf::detail::result_cache* dense_results,
+ device_span gather_map,
+ SetType set,
+ bitmask_type const* row_bitmask,
+ rmm::cuda_stream_view stream,
+ rmm::device_async_resource_ref mr)
+ : col(col),
+ sparse_results(sparse_results),
+ dense_results(dense_results),
+ gather_map(gather_map),
+ set(set),
+ row_bitmask(row_bitmask),
+ stream(stream),
+ mr(mr)
+{
+ result_type =
+ cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).keys().type() : col.type();
+}
+
+template
+auto hash_compound_agg_finalizer::to_dense_agg_result(cudf::aggregation const& agg)
+{
+ auto s = sparse_results->get_result(col, agg);
+ auto dense_result_table = cudf::detail::gather(table_view({std::move(s)}),
+ gather_map,
+ out_of_bounds_policy::DONT_CHECK,
+ cudf::detail::negative_index_policy::NOT_ALLOWED,
+ stream,
+ mr);
+ return std::move(dense_result_table->release()[0]);
+}
+
+template
+auto hash_compound_agg_finalizer::gather_argminmax(aggregation const& agg)
+{
+ auto arg_result = to_dense_agg_result(agg);
+ // We make a view of ARG(MIN/MAX) result without a null mask and gather
+ // using this map. The values in data buffer of ARG(MIN/MAX) result
+ // corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL
+ // which is an out of bounds index value (-1) and causes the gathered
+ // value to be null.
+ column_view null_removed_map(
+ data_type(type_to_id()),
+ arg_result->size(),
+ static_cast(arg_result->view().template data()),
+ nullptr,
+ 0);
+ auto gather_argminmax =
+ cudf::detail::gather(table_view({col}),
+ null_removed_map,
+ arg_result->nullable() ? cudf::out_of_bounds_policy::NULLIFY
+ : cudf::out_of_bounds_policy::DONT_CHECK,
+ cudf::detail::negative_index_policy::NOT_ALLOWED,
+ stream,
+ mr);
+ return std::move(gather_argminmax->release()[0]);
+}
+
+template
+void hash_compound_agg_finalizer::visit(cudf::aggregation const& agg)
+{
+ if (dense_results->has_result(col, agg)) return;
+ dense_results->add_result(col, agg, to_dense_agg_result(agg));
+}
+
+template
+void hash_compound_agg_finalizer::visit(cudf::detail::min_aggregation const& agg)
+{
+ if (dense_results->has_result(col, agg)) return;
+ if (result_type.id() == type_id::STRING) {
+ auto transformed_agg = make_argmin_aggregation();
+ dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
+ } else {
+ dense_results->add_result(col, agg, to_dense_agg_result(agg));
+ }
+}
+
+template
+void hash_compound_agg_finalizer::visit(cudf::detail::max_aggregation const& agg)
+{
+ if (dense_results->has_result(col, agg)) return;
+
+ if (result_type.id() == type_id::STRING) {
+ auto transformed_agg = make_argmax_aggregation();
+ dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
+ } else {
+ dense_results->add_result(col, agg, to_dense_agg_result(agg));
+ }
+}
+
+template
+void hash_compound_agg_finalizer::visit(cudf::detail::mean_aggregation const& agg)
+{
+ if (dense_results->has_result(col, agg)) return;
+
+ auto sum_agg = make_sum_aggregation();
+ auto count_agg = make_count_aggregation();
+ this->visit(*sum_agg);
+ this->visit(*count_agg);
+ column_view sum_result = dense_results->get_result(col, *sum_agg);
+ column_view count_result = dense_results->get_result(col, *count_agg);
+
+ auto result =
+ cudf::detail::binary_operation(sum_result,
+ count_result,
+ binary_operator::DIV,
+ cudf::detail::target_type(result_type, aggregation::MEAN),
+ stream,
+ mr);
+ dense_results->add_result(col, agg, std::move(result));
+}
+
+template
+void hash_compound_agg_finalizer::visit(cudf::detail::var_aggregation const& agg)
+{
+ if (dense_results->has_result(col, agg)) return;
+
+ auto sum_agg = make_sum_aggregation();
+ auto count_agg = make_count_aggregation();
+ this->visit(*sum_agg);
+ this->visit(*count_agg);
+ column_view sum_result = sparse_results->get_result(col, *sum_agg);
+ column_view count_result = sparse_results->get_result(col, *count_agg);
+
+ auto values_view = column_device_view::create(col, stream);
+ auto sum_view = column_device_view::create(sum_result, stream);
+ auto count_view = column_device_view::create(count_result, stream);
+
+ auto var_result = make_fixed_width_column(
+ cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream);
+ auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream);
+ mutable_table_view var_table_view{{var_result->mutable_view()}};
+ cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream);
+
+ thrust::for_each_n(
+ rmm::exec_policy(stream),
+ thrust::make_counting_iterator(0),
+ col.size(),
+ var_hash_functor{
+ set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof});
+ sparse_results->add_result(col, agg, std::move(var_result));
+ dense_results->add_result(col, agg, to_dense_agg_result(agg));
+}
+
+template
+void hash_compound_agg_finalizer::visit(cudf::detail::std_aggregation const& agg)
+{
+ if (dense_results->has_result(col, agg)) return;
+ auto var_agg = make_variance_aggregation(agg._ddof);
+ this->visit(*dynamic_cast(var_agg.get()));
+ column_view variance = dense_results->get_result(col, *var_agg);
+
+ auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr);
+ dense_results->add_result(col, agg, std::move(result));
+}
template class hash_compound_agg_finalizer;
template class hash_compound_agg_finalizer;
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cuh b/cpp/src/groupby/hash/hash_compound_agg_finalizer.cuh
deleted file mode 100644
index 1c40b77b5a1..00000000000
--- a/cpp/src/groupby/hash/hash_compound_agg_finalizer.cuh
+++ /dev/null
@@ -1,199 +0,0 @@
-/*
- * Copyright (c) 2019-2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-#pragma once
-
-#include "var_hash_functor.cuh"
-
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-#include
-
-namespace cudf::groupby::detail::hash {
-
-template
-class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer {
- column_view col;
- data_type result_type;
- cudf::detail::result_cache* sparse_results;
- cudf::detail::result_cache* dense_results;
- device_span gather_map;
- SetType set;
- bitmask_type const* __restrict__ row_bitmask;
- rmm::cuda_stream_view stream;
- rmm::device_async_resource_ref mr;
-
- public:
- using cudf::detail::aggregation_finalizer::visit;
-
- hash_compound_agg_finalizer(column_view col,
- cudf::detail::result_cache* sparse_results,
- cudf::detail::result_cache* dense_results,
- device_span gather_map,
- SetType set,
- bitmask_type const* row_bitmask,
- rmm::cuda_stream_view stream,
- rmm::device_async_resource_ref mr)
- : col(col),
- sparse_results(sparse_results),
- dense_results(dense_results),
- gather_map(gather_map),
- set(set),
- row_bitmask(row_bitmask),
- stream(stream),
- mr(mr)
- {
- result_type = cudf::is_dictionary(col.type()) ? cudf::dictionary_column_view(col).keys().type()
- : col.type();
- }
-
- auto to_dense_agg_result(cudf::aggregation const& agg)
- {
- auto s = sparse_results->get_result(col, agg);
- auto dense_result_table = cudf::detail::gather(table_view({std::move(s)}),
- gather_map,
- out_of_bounds_policy::DONT_CHECK,
- cudf::detail::negative_index_policy::NOT_ALLOWED,
- stream,
- mr);
- return std::move(dense_result_table->release()[0]);
- }
-
- // Enables conversion of ARGMIN/ARGMAX into MIN/MAX
- auto gather_argminmax(aggregation const& agg)
- {
- auto arg_result = to_dense_agg_result(agg);
- // We make a view of ARG(MIN/MAX) result without a null mask and gather
- // using this map. The values in data buffer of ARG(MIN/MAX) result
- // corresponding to null values was initialized to ARG(MIN/MAX)_SENTINEL
- // which is an out of bounds index value (-1) and causes the gathered
- // value to be null.
- column_view null_removed_map(
- data_type(type_to_id()),
- arg_result->size(),
- static_cast(arg_result->view().template data()),
- nullptr,
- 0);
- auto gather_argminmax =
- cudf::detail::gather(table_view({col}),
- null_removed_map,
- arg_result->nullable() ? cudf::out_of_bounds_policy::NULLIFY
- : cudf::out_of_bounds_policy::DONT_CHECK,
- cudf::detail::negative_index_policy::NOT_ALLOWED,
- stream,
- mr);
- return std::move(gather_argminmax->release()[0]);
- }
-
- // Declare overloads for each kind of aggregation to dispatch
- void visit(cudf::aggregation const& agg) override
- {
- if (dense_results->has_result(col, agg)) return;
- dense_results->add_result(col, agg, to_dense_agg_result(agg));
- }
-
- void visit(cudf::detail::min_aggregation const& agg) override
- {
- if (dense_results->has_result(col, agg)) return;
- if (result_type.id() == type_id::STRING) {
- auto transformed_agg = make_argmin_aggregation();
- dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
- } else {
- dense_results->add_result(col, agg, to_dense_agg_result(agg));
- }
- }
-
- void visit(cudf::detail::max_aggregation const& agg) override
- {
- if (dense_results->has_result(col, agg)) return;
-
- if (result_type.id() == type_id::STRING) {
- auto transformed_agg = make_argmax_aggregation();
- dense_results->add_result(col, agg, gather_argminmax(*transformed_agg));
- } else {
- dense_results->add_result(col, agg, to_dense_agg_result(agg));
- }
- }
-
- void visit(cudf::detail::mean_aggregation const& agg) override
- {
- if (dense_results->has_result(col, agg)) return;
-
- auto sum_agg = make_sum_aggregation();
- auto count_agg = make_count_aggregation();
- this->visit(*sum_agg);
- this->visit(*count_agg);
- column_view sum_result = dense_results->get_result(col, *sum_agg);
- column_view count_result = dense_results->get_result(col, *count_agg);
-
- auto result =
- cudf::detail::binary_operation(sum_result,
- count_result,
- binary_operator::DIV,
- cudf::detail::target_type(result_type, aggregation::MEAN),
- stream,
- mr);
- dense_results->add_result(col, agg, std::move(result));
- }
-
- void visit(cudf::detail::var_aggregation const& agg) override
- {
- if (dense_results->has_result(col, agg)) return;
-
- auto sum_agg = make_sum_aggregation();
- auto count_agg = make_count_aggregation();
- this->visit(*sum_agg);
- this->visit(*count_agg);
- column_view sum_result = sparse_results->get_result(col, *sum_agg);
- column_view count_result = sparse_results->get_result(col, *count_agg);
-
- auto values_view = column_device_view::create(col, stream);
- auto sum_view = column_device_view::create(sum_result, stream);
- auto count_view = column_device_view::create(count_result, stream);
-
- auto var_result = make_fixed_width_column(
- cudf::detail::target_type(result_type, agg.kind), col.size(), mask_state::ALL_NULL, stream);
- auto var_result_view = mutable_column_device_view::create(var_result->mutable_view(), stream);
- mutable_table_view var_table_view{{var_result->mutable_view()}};
- cudf::detail::initialize_with_identity(var_table_view, {agg.kind}, stream);
-
- thrust::for_each_n(
- rmm::exec_policy(stream),
- thrust::make_counting_iterator(0),
- col.size(),
- var_hash_functor{
- set, row_bitmask, *var_result_view, *values_view, *sum_view, *count_view, agg._ddof});
- sparse_results->add_result(col, agg, std::move(var_result));
- dense_results->add_result(col, agg, to_dense_agg_result(agg));
- }
-
- void visit(cudf::detail::std_aggregation const& agg) override
- {
- if (dense_results->has_result(col, agg)) return;
- auto var_agg = make_variance_aggregation(agg._ddof);
- this->visit(*dynamic_cast(var_agg.get()));
- column_view variance = dense_results->get_result(col, *var_agg);
-
- auto result = cudf::detail::unary_operation(variance, unary_operator::SQRT, stream, mr);
- dense_results->add_result(col, agg, std::move(result));
- }
-};
-
-} // namespace cudf::groupby::detail::hash
diff --git a/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
new file mode 100644
index 00000000000..16cbe92511f
--- /dev/null
+++ b/cpp/src/groupby/hash/hash_compound_agg_finalizer.hpp
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2019-2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include
+
+#include
+#include
+
+namespace cudf::groupby::detail::hash {
+template
+class hash_compound_agg_finalizer final : public cudf::detail::aggregation_finalizer {
+ column_view col;
+ data_type result_type;
+ cudf::detail::result_cache* sparse_results;
+ cudf::detail::result_cache* dense_results;
+ device_span gather_map;
+ SetType set;
+ bitmask_type const* __restrict__ row_bitmask;
+ rmm::cuda_stream_view stream;
+ rmm::device_async_resource_ref mr;
+
+ public:
+ using cudf::detail::aggregation_finalizer::visit;
+
+ hash_compound_agg_finalizer(column_view col,
+ cudf::detail::result_cache* sparse_results,
+ cudf::detail::result_cache* dense_results,
+ device_span gather_map,
+ SetType set,
+ bitmask_type const* row_bitmask,
+ rmm::cuda_stream_view stream,
+ rmm::device_async_resource_ref mr);
+
+ auto to_dense_agg_result(cudf::aggregation const& agg);
+
+ // Enables conversion of ARGMIN/ARGMAX into MIN/MAX
+ auto gather_argminmax(cudf::aggregation const& agg);
+
+ // Declare overloads for each kind of aggregation to dispatch
+ void visit(cudf::aggregation const& agg) override;
+
+ void visit(cudf::detail::min_aggregation const& agg) override;
+
+ void visit(cudf::detail::max_aggregation const& agg) override;
+
+ void visit(cudf::detail::mean_aggregation const& agg) override;
+
+ void visit(cudf::detail::var_aggregation const& agg) override;
+
+ void visit(cudf::detail::std_aggregation const& agg) override;
+};
+} // namespace cudf::groupby::detail::hash
diff --git a/cpp/src/groupby/hash/helpers.cuh b/cpp/src/groupby/hash/helpers.cuh
index 650b936372d..c1dd68c2b78 100644
--- a/cpp/src/groupby/hash/helpers.cuh
+++ b/cpp/src/groupby/hash/helpers.cuh
@@ -70,10 +70,20 @@ using nullable_row_comparator_t = cudf::experimental::row::equality::device_row_
cudf::nullate::DYNAMIC,
cudf::experimental::row::equality::nan_equal_physical_equality_comparator>;
-using hash_set_ref_t = cuco::
- static_set_ref>, cuco::op::find_tag, >;
-
-using nullable_hash_set_ref_t = cuco::
- static_set_ref>, cuco::op::find_tag, >;
+using hash_set_ref_t = cuco::static_set_ref<
+ cudf::size_type,
+ cuda::thread_scope_device,
+ row_comparator_t,
+ probing_scheme_t,
+ cuco::aow_storage_ref>,
+ cuco::op::find_tag>;
+
+using nullable_hash_set_ref_t = cuco::static_set_ref<
+ cudf::size_type,
+ cuda::thread_scope_device,
+ nullable_row_comparator_t,
+ probing_scheme_t,
+ cuco::aow_storage_ref>,
+ cuco::op::find_tag>;
} // namespace cudf::groupby::detail::hash
diff --git a/cpp/src/groupby/hash/sparse_to_dense_results.cu b/cpp/src/groupby/hash/sparse_to_dense_results.cu
index 7f7290141f9..af61173fb6a 100644
--- a/cpp/src/groupby/hash/sparse_to_dense_results.cu
+++ b/cpp/src/groupby/hash/sparse_to_dense_results.cu
@@ -14,7 +14,7 @@
* limitations under the License.
*/
-#include "hash_compound_agg_finalizer.cuh"
+#include "hash_compound_agg_finalizer.hpp"
#include "helpers.cuh"
#include
diff --git a/cpp/src/groupby/hash/var_hash_functor.cu b/cpp/src/groupby/hash/var_hash_functor.cu
deleted file mode 100644
index 4881f4ed85e..00000000000
--- a/cpp/src/groupby/hash/var_hash_functor.cu
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * Copyright (c) 2024, NVIDIA CORPORATION.
- *
- * Licensed under the Apache License, Version 2.0 (the "License");
- * you may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-#include "helpers.cuh"
-#include "var_hash_functor.cuh"
-
-namespace cudf::groupby::detail::hash {
-
-// explicit template instantiation to reduce build time
-template struct var_hash_functor;
-template struct var_hash_functor;
-
-} // namespace cudf::groupby::detail::hash
diff --git a/cpp/src/groupby/hash/var_hash_functor.cuh b/cpp/src/groupby/hash/var_hash_functor.cuh
index 98668d0cb45..abcd57263f4 100644
--- a/cpp/src/groupby/hash/var_hash_functor.cuh
+++ b/cpp/src/groupby/hash/var_hash_functor.cuh
@@ -16,6 +16,8 @@
#pragma once
+#include "helpers.cuh"
+
#include
#include
#include