From c568fd0b440f8afaf805ceaa492a59d5a2917be7 Mon Sep 17 00:00:00 2001 From: Naim Date: Tue, 5 Dec 2023 01:00:26 +0100 Subject: [PATCH] Update select_random_vertices to select as many random vetices local vertex partition range size on each GPU --- cpp/include/cugraph/graph_functions.hpp | 6 ++- .../structure/select_random_vertices_impl.hpp | 45 ++++++++++++++++++- .../structure/select_random_vertices_mg.cu | 6 +++ .../structure/select_random_vertices_sg.cu | 6 +++ 4 files changed, 60 insertions(+), 3 deletions(-) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 6a75a420bf8..453f8b0bd33 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -903,6 +903,9 @@ weight_t compute_total_edge_weight( * @param select_count The number of vertices to select from the graph * @param with_replacement If true, select with replacement, if false select without replacement * @param sort_vertices If true, return the sorted vertices (in the ascending order). + * @param shuffle_int_to_local If true and If @p given_set is not specified + * then shuffle internal (i.e. renumbered) vertices to their local GPUs based on vertex + * partitioning, otherwise shuffle as many vertices as local vertex partition size to each GPU. * @return Device vector of selected vertices. */ template @@ -914,7 +917,8 @@ rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, - bool do_expensive_check = false); + bool shuffle_int_to_local = true, + bool do_expensive_check = false); /** * @brief renumber sampling output diff --git a/cpp/src/structure/select_random_vertices_impl.hpp b/cpp/src/structure/select_random_vertices_impl.hpp index b6a0c364848..c142bfab101 100644 --- a/cpp/src/structure/select_random_vertices_impl.hpp +++ b/cpp/src/structure/select_random_vertices_impl.hpp @@ -52,6 +52,7 @@ rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check) { size_t num_of_elements_in_given_set{0}; @@ -232,8 +233,48 @@ rmm::device_uvector select_random_vertices( } if constexpr (multi_gpu) { - mg_sample_buffer = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( - handle, std::move(mg_sample_buffer), partition_range_lasts); + if (given_set) { + mg_sample_buffer = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( + handle, std::move(mg_sample_buffer), partition_range_lasts); + } else { + if (shuffle_int_to_local) { + mg_sample_buffer = + cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( + handle, std::move(mg_sample_buffer), partition_range_lasts); + + } else { + // shuffle as many vertices as local vertex partition size to each GPU. + + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto const comm_rank = comm.get_rank(); + std::vector tx_value_counts(comm_size, 0); + auto sample_buffer_sizes = cugraph::host_scalar_allgather( + handle.get_comms(), mg_sample_buffer.size(), handle.get_stream()); + + auto expected_sample_buffer_sizes = cugraph::host_scalar_allgather( + handle.get_comms(), graph_view.local_vertex_partition_range_size(), handle.get_stream()); + + std::vector nr_smaples(comm_size, 0); + + // find out how many elements current GPU needs to send to other GPUs + for (int i = 0; i < comm_size; i++) { + size_t nr_samples_ith_gpu = sample_buffer_sizes[i]; + for (int j = 0; nr_samples_ith_gpu > 0 && j < comm_size; j++) { + if (expected_sample_buffer_sizes[j] > static_cast(nr_smaples[j])) { + size_t delta = + std::min(nr_samples_ith_gpu, expected_sample_buffer_sizes[j] - nr_smaples[j]); + if (comm_rank == i) { tx_value_counts[j] = delta; } + nr_smaples[j] += delta; + nr_samples_ith_gpu -= delta; + } + } + } + + std::tie(mg_sample_buffer, std::ignore) = cugraph::shuffle_values( + handle.get_comms(), mg_sample_buffer.begin(), tx_value_counts, handle.get_stream()); + } + } } if (given_set) { diff --git a/cpp/src/structure/select_random_vertices_mg.cu b/cpp/src/structure/select_random_vertices_mg.cu index 595da12f678..1c80f441d76 100644 --- a/cpp/src/structure/select_random_vertices_mg.cu +++ b/cpp/src/structure/select_random_vertices_mg.cu @@ -26,6 +26,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -36,6 +37,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -46,6 +48,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -56,6 +59,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -66,6 +70,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -76,6 +81,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); } // namespace cugraph diff --git a/cpp/src/structure/select_random_vertices_sg.cu b/cpp/src/structure/select_random_vertices_sg.cu index 1ca1878c9db..1cf6002d729 100644 --- a/cpp/src/structure/select_random_vertices_sg.cu +++ b/cpp/src/structure/select_random_vertices_sg.cu @@ -26,6 +26,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -36,6 +37,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -46,6 +48,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -56,6 +59,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -66,6 +70,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); template rmm::device_uvector select_random_vertices( @@ -76,6 +81,7 @@ template rmm::device_uvector select_random_vertices( size_t select_count, bool with_replacement, bool sort_vertices, + bool shuffle_int_to_local, bool do_expensive_check); } // namespace cugraph