Make vertex and edge shuffling function public

rapidsai · Mar 9, 2024 · 8fb86fb · 8fb86fb
1 parent 47119c3
commit 8fb86fb
Show file tree

Hide file tree

Showing 15 changed files with 110 additions and 73 deletions.
diff --git a/cpp/include/cugraph/graph_partition_utils.cuh b/cpp/include/cugraph/graph_partition_utils.cuh
@@ -0,0 +1,71 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#pragma once
+
+#include <cugraph/partition_manager.hpp>
+
+#include <raft/core/device_span.hpp>
+
+#include <cuco/hash_functions.cuh>
+
+namespace cugraph {
+
+template <typename vertex_t>
+struct compute_gpu_id_from_ext_vertex_t {
+  int comm_size{0};
+  int major_comm_size{0};
+  int minor_comm_size{0};
+
+  __host__ __device__ int operator()(vertex_t v) const
+  {
+    cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
+    auto vertex_partition_id = static_cast<int>(hash_func(v) % comm_size);
+    return partition_manager::compute_global_comm_rank_from_vertex_partition_id(
+      major_comm_size, minor_comm_size, vertex_partition_id);
+  }
+};
+
+template <typename vertex_t>
+struct compute_gpu_id_from_ext_edge_endpoints_t {
+  int comm_size{0};
+  int major_comm_size{0};
+  int minor_comm_size{0};
+
+  __host__ __device__ int operator()(vertex_t major, vertex_t minor) const
+  {
+    cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
+    auto major_vertex_partition_id = static_cast<int>(hash_func(major) % comm_size);
+    auto minor_vertex_partition_id = static_cast<int>(hash_func(minor) % comm_size);
+    auto major_comm_rank           = major_vertex_partition_id % major_comm_size;
+    auto minor_comm_rank           = minor_vertex_partition_id / major_comm_size;
+    return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks(
+      major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank);
+  }
+
+  __host__ __device__ int operator()(
+    thrust::tuple<vertex_t, vertex_t> pair /* major, minor */) const
+  {
+    cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
+    auto major_vertex_partition_id = static_cast<int>(hash_func(thrust::get<0>(pair)) % comm_size);
+    auto minor_vertex_partition_id = static_cast<int>(hash_func(thrust::get<1>(pair)) % comm_size);
+    auto major_comm_rank           = major_vertex_partition_id % major_comm_size;
+    auto minor_comm_rank           = minor_vertex_partition_id / major_comm_size;
+    return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks(
+      major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank);
+  }
+};
+
+}  // namespace cugraph
diff --git a/cpp/src/community/detail/common_methods.cuh b/cpp/src/community/detail/common_methods.cuh
@@ -28,6 +28,7 @@
 
 #include <cugraph/detail/utility_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 
 #include <thrust/binary_search.h>
 #include <thrust/execution_policy.h>
@@ -278,7 +279,7 @@ rmm::device_uvector<vertex_t> update_clustering_by_delta_modularity(
     auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
     auto const minor_comm_size = minor_comm.get_size();
 
-    cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
+    cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
       comm_size, major_comm_size, minor_comm_size};
 
     kv_store_t<vertex_t, weight_t, false> cluster_key_weight_map(

diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh
@@ -178,7 +178,7 @@ refine_clustering(
     auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
     auto const minor_comm_size = minor_comm.get_size();
 
-    cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
+    cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
       comm_size, major_comm_size, minor_comm_size};
 
     vertex_louvain_cluster_weights =
@@ -460,7 +460,7 @@ refine_clustering(
         major_comm_size,
         minor_comm_size};
 
-      // cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
+      // cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
       //   comm_size, major_comm_size, minor_comm_size};
 
       louvain_of_leiden_keys_used_in_edge_reduction =
@@ -850,7 +850,7 @@ refine_clustering(
       major_comm_size,
       minor_comm_size};
 
-    // cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
+    // cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t> vertex_to_gpu_id_op{
     //   comm_size, major_comm_size, minor_comm_size};
 
     lovain_of_leiden_cluster_keys =

diff --git a/cpp/src/detail/collect_local_vertex_values.cu b/cpp/src/detail/collect_local_vertex_values.cu
@@ -17,6 +17,7 @@
 #include "detail/graph_partition_utils.cuh"
 
 #include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/utilities/shuffle_comm.cuh>
 
 #include <cuda/functional>
@@ -50,7 +51,7 @@ rmm::device_uvector<value_t> collect_local_vertex_values_from_ext_vertex_value_p
       d_vertices.begin(),
       d_vertices.end(),
       d_values.begin(),
-      cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+      cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
         comm_size, major_comm_size, minor_comm_size},
       handle.get_stream());
   }

diff --git a/cpp/src/detail/graph_partition_utils.cuh b/cpp/src/detail/graph_partition_utils.cuh
@@ -35,21 +35,6 @@
 namespace cugraph {
 namespace detail {
 
-template <typename vertex_t>
-struct compute_gpu_id_from_ext_vertex_t {
-  int comm_size{0};
-  int major_comm_size{0};
-  int minor_comm_size{0};
-
-  __host__ __device__ int operator()(vertex_t v) const
-  {
-    cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
-    auto vertex_partition_id = static_cast<int>(hash_func(v) % comm_size);
-    return partition_manager::compute_global_comm_rank_from_vertex_partition_id(
-      major_comm_size, minor_comm_size, vertex_partition_id);
-  }
-};
-
 template <typename vertex_t>
 struct compute_gpu_id_from_int_vertex_t {
   raft::device_span<vertex_t const> vertex_partition_range_lasts{};
@@ -91,36 +76,6 @@ struct compute_vertex_partition_id_from_int_vertex_t {
   }
 };
 
-template <typename vertex_t>
-struct compute_gpu_id_from_ext_edge_endpoints_t {
-  int comm_size{0};
-  int major_comm_size{0};
-  int minor_comm_size{0};
-
-  __host__ __device__ int operator()(vertex_t major, vertex_t minor) const
-  {
-    cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
-    auto major_vertex_partition_id = static_cast<int>(hash_func(major) % comm_size);
-    auto minor_vertex_partition_id = static_cast<int>(hash_func(minor) % comm_size);
-    auto major_comm_rank           = major_vertex_partition_id % major_comm_size;
-    auto minor_comm_rank           = minor_vertex_partition_id / major_comm_size;
-    return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks(
-      major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank);
-  }
-
-  __host__ __device__ int operator()(
-    thrust::tuple<vertex_t, vertex_t> pair /* major, minor */) const
-  {
-    cuco::detail::MurmurHash3_32<vertex_t> hash_func{};
-    auto major_vertex_partition_id = static_cast<int>(hash_func(thrust::get<0>(pair)) % comm_size);
-    auto minor_vertex_partition_id = static_cast<int>(hash_func(thrust::get<1>(pair)) % comm_size);
-    auto major_comm_rank           = major_vertex_partition_id % major_comm_size;
-    auto minor_comm_rank           = minor_vertex_partition_id / major_comm_size;
-    return partition_manager::compute_global_comm_rank_from_graph_subcomm_ranks(
-      major_comm_size, minor_comm_size, major_comm_rank, minor_comm_rank);
-  }
-};
-
 template <typename vertex_t>
 struct compute_gpu_id_from_int_edge_endpoints_t {
   raft::device_span<vertex_t const> vertex_partition_range_lasts{};

diff --git a/cpp/src/detail/shuffle_vertex_pairs.cu b/cpp/src/detail/shuffle_vertex_pairs.cu
@@ -17,6 +17,7 @@
 
 #include <cugraph/detail/shuffle_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/partition_manager.hpp>
 #include <cugraph/utilities/host_scalar_comm.hpp>
 #include <cugraph/utilities/shuffle_comm.cuh>
@@ -308,7 +309,7 @@ shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning(
     std::move(weights),
     std::move(edge_ids),
     std::move(edge_types),
-    cugraph::detail::compute_gpu_id_from_ext_edge_endpoints_t<vertex_t>{
+    cugraph::compute_gpu_id_from_ext_edge_endpoints_t<vertex_t>{
       comm_size, major_comm_size, minor_comm_size});
 }
 

diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu
@@ -16,6 +16,7 @@
 #include "detail/graph_partition_utils.cuh"
 
 #include <cugraph/detail/shuffle_wrappers.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/utilities/shuffle_comm.cuh>
 
 #include <thrust/tuple.h>
@@ -73,11 +74,10 @@ rmm::device_uvector<vertex_t> shuffle_ext_vertices_to_local_gpu_by_vertex_partit
   auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
   auto const minor_comm_size = minor_comm.get_size();
 
-  return shuffle_vertices_by_gpu_id_impl(
-    handle,
-    std::move(vertices),
-    cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
-      comm_size, major_comm_size, minor_comm_size});
+  return shuffle_vertices_by_gpu_id_impl(handle,
+                                         std::move(vertices),
+                                         cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+                                           comm_size, major_comm_size, minor_comm_size});
 }
 
 template <typename vertex_t, typename value_t>
@@ -97,7 +97,7 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning(
     handle,
     std::move(vertices),
     std::move(values),
-    cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+    cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
       comm_size, major_comm_size, minor_comm_size});
 }
 

diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh
@@ -23,6 +23,7 @@
 #include <cugraph/edge_partition_device_view.cuh>
 #include <cugraph/edge_partition_endpoint_property_device_view.cuh>
 #include <cugraph/edge_src_dst_property.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/dataframe_buffer.hpp>
 #include <cugraph/utilities/device_functors.cuh>
@@ -205,7 +206,7 @@ struct reduce_with_init_t {
  * destinations assigned to this process in multi-GPU). Use  cugraph::edge_dst_property_t::view().
  * Use update_edge_dst_property to fill the wrapper.
  * @param map_unique_key_first Iterator pointing to the first (inclusive) key in (key, value) pairs
- * (assigned to this process in multi-GPU, `cugraph::detail::compute_gpu_id_from_ext_vertex_t` is
+ * (assigned to this process in multi-GPU, `cugraph::compute_gpu_id_from_ext_vertex_t` is
  * used to map keys to processes). (Key, value) pairs may be provided by
  * transform_reduce_by_src_key_e() or transform_reduce_by_dst_key_e().
  * @param map_unique_key_last Iterator pointing to the last (exclusive) key in (key, value) pairs
@@ -730,7 +731,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e(
         collect_values_for_unique_keys(handle,
                                        kv_store_view,
                                        std::move(unique_minor_keys),
-                                       cugraph::detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+                                       cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
                                          comm_size, major_comm_size, minor_comm_size});
 
       if constexpr (KVStoreViewType::binary_search) {

diff --git a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh
@@ -21,6 +21,7 @@
 #include <cugraph/edge_partition_device_view.cuh>
 #include <cugraph/edge_partition_endpoint_property_device_view.cuh>
 #include <cugraph/edge_src_dst_property.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/graph_view.hpp>
 #include <cugraph/partition_manager.hpp>
 #include <cugraph/utilities/dataframe_buffer.hpp>
@@ -564,7 +565,7 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle,
           tmp_keys.end(),
           get_dataframe_buffer_begin(tmp_value_buffer),
           [key_func =
-             detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+             cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
                comm_size, major_comm_size, minor_comm_size}] __device__(auto val) {
             return key_func(val);
           },

diff --git a/cpp/src/structure/create_graph_from_edgelist_impl.cuh b/cpp/src/structure/create_graph_from_edgelist_impl.cuh
@@ -22,6 +22,7 @@
 #include <cugraph/detail/utility_wrappers.hpp>
 #include <cugraph/graph.hpp>
 #include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/graph_view.hpp>
 #include <cugraph/partition_manager.hpp>
 #include <cugraph/utilities/device_comm.hpp>
@@ -148,7 +149,7 @@ void expensive_check_edgelist(raft::handle_t const& handle,
                          (*vertices).end(),
                          [comm_rank,
                           key_func =
-                            detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+                            cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
                               comm_size, major_comm_size, minor_comm_size}] __device__(auto val) {
                            return key_func(val) != comm_rank;
                          }) == 0,
@@ -163,7 +164,7 @@ void expensive_check_edgelist(raft::handle_t const& handle,
                        edge_first + edgelist_majors.size(),
                        [comm_rank,
                         gpu_id_key_func =
-                          detail::compute_gpu_id_from_ext_edge_endpoints_t<vertex_t>{
+                          cugraph::compute_gpu_id_from_ext_edge_endpoints_t<vertex_t>{
                             comm_size, major_comm_size, minor_comm_size}] __device__(auto e) {
                          return (gpu_id_key_func(e) != comm_rank);
                        }) == 0,

diff --git a/cpp/src/structure/relabel_impl.cuh b/cpp/src/structure/relabel_impl.cuh
@@ -20,6 +20,7 @@
 
 #include <cugraph/graph.hpp>
 #include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/error.hpp>
 #include <cugraph/utilities/shuffle_comm.cuh>
@@ -67,7 +68,7 @@ void relabel(raft::handle_t const& handle,
     auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
     auto const minor_comm_size = minor_comm.get_size();
 
-    auto key_func = detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+    auto key_func = cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
       comm_size, major_comm_size, minor_comm_size};
 
     // find unique old labels (to be relabeled)

diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh
@@ -20,6 +20,7 @@
 
 #include <cugraph/detail/shuffle_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/graph_view.hpp>
 #include <cugraph/utilities/device_comm.hpp>
 #include <cugraph/utilities/device_functors.cuh>
@@ -78,7 +79,7 @@ template <typename vertex_t>
 struct find_unused_id_t {
   raft::device_span<vertex_t const> sorted_local_vertices{};
   size_t num_workers{};
-  compute_gpu_id_from_ext_vertex_t<vertex_t> gpu_id_op{};
+  cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t> gpu_id_op{};
   int comm_rank{};
   vertex_t invalid_id{};
 
@@ -199,16 +200,16 @@ std::optional<vertex_t> find_locally_unused_ext_vertex_id(
   auto num_workers =
     std::min(static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * size_t{1024},
              sorted_local_vertices.size() + size_t{1});
-  auto gpu_id_op = compute_gpu_id_from_ext_vertex_t<vertex_t>{int{1}, int{1}, int{1}};
+  auto gpu_id_op = cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{int{1}, int{1}, int{1}};
   if (multi_gpu && (handle.get_comms().get_size() > int{1})) {
     auto& comm                 = handle.get_comms();
     auto const comm_size       = comm.get_size();
     auto& major_comm           = handle.get_subcomm(cugraph::partition_manager::major_comm_name());
     auto const major_comm_size = major_comm.get_size();
     auto& minor_comm           = handle.get_subcomm(cugraph::partition_manager::minor_comm_name());
     auto const minor_comm_size = minor_comm.get_size();
-    gpu_id_op =
-      compute_gpu_id_from_ext_vertex_t<vertex_t>{comm_size, major_comm_size, minor_comm_size};
+    gpu_id_op                  = cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+      comm_size, major_comm_size, minor_comm_size};
   }
   auto unused_id = thrust::transform_reduce(
     handle.get_thrust_policy(),
@@ -665,7 +666,7 @@ void expensive_check_edgelist(
            minor_comm_rank,
            i,
            gpu_id_key_func =
-             detail::compute_gpu_id_from_ext_edge_endpoints_t<vertex_t>{
+             cugraph::compute_gpu_id_from_ext_edge_endpoints_t<vertex_t>{
                comm_size, major_comm_size, minor_comm_size},
            local_edge_partition_id_key_func =
              detail::compute_local_edge_partition_id_from_ext_edge_endpoints_t<vertex_t>{
@@ -708,7 +709,7 @@ void expensive_check_edgelist(
                          (*sorted_local_vertices).end(),
                          [comm_rank,
                           key_func =
-                            detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+                            cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
                               comm_size, major_comm_size, minor_comm_size}] __device__(auto val) {
                            return key_func(val) != comm_rank;
                          }) == 0,

diff --git a/cpp/src/structure/renumber_utils_impl.cuh b/cpp/src/structure/renumber_utils_impl.cuh
@@ -21,6 +21,7 @@
 
 #include <cugraph/graph.hpp>
 #include <cugraph/graph_functions.hpp>
+#include <cugraph/graph_partition_utils.cuh>
 #include <cugraph/utilities/error.hpp>
 #include <cugraph/utilities/host_scalar_comm.hpp>
 #include <cugraph/utilities/shuffle_comm.cuh>
@@ -405,7 +406,7 @@ void renumber_ext_vertices(raft::handle_t const& handle,
       collect_values_for_unique_keys(handle,
                                      local_renumber_map.view(),
                                      std::move(sorted_unique_ext_vertices),
-                                     detail::compute_gpu_id_from_ext_vertex_t<vertex_t>{
+                                     cugraph::compute_gpu_id_from_ext_vertex_t<vertex_t>{
                                        comm_size, major_comm_size, minor_comm_size});
 
     renumber_map_ptr = std::make_unique<kv_store_t<vertex_t, vertex_t, false>>(