From 77b06663de0f1351fb28ccdcea2f0af03cdfc592 Mon Sep 17 00:00:00 2001 From: Naim Date: Tue, 5 Dec 2023 03:00:49 +0100 Subject: [PATCH] Update select_random_vertices to select as many random vetices local vertex partition range size on each GPU --- .../structure/select_random_vertices_impl.hpp | 12 ++-- .../mg_select_random_vertices_test.cpp | 68 ++++++++++++++++++- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/cpp/src/structure/select_random_vertices_impl.hpp b/cpp/src/structure/select_random_vertices_impl.hpp index 5220b890c98..989ce74585d 100644 --- a/cpp/src/structure/select_random_vertices_impl.hpp +++ b/cpp/src/structure/select_random_vertices_impl.hpp @@ -236,9 +236,8 @@ rmm::device_uvector select_random_vertices( if (given_set) { mg_sample_buffer = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( handle, std::move(mg_sample_buffer), partition_range_lasts); - - } else { - if (!shuffle_int_to_local && + } else if (!shuffle_int_to_local) { + if (!with_replacement && select_count == static_cast(graph_view.number_of_vertices())) { // shuffle as many vertices as local vertex partition size to each GPU. auto& comm = handle.get_comms(); @@ -269,11 +268,10 @@ rmm::device_uvector select_random_vertices( std::tie(mg_sample_buffer, std::ignore) = cugraph::shuffle_values( handle.get_comms(), mg_sample_buffer.begin(), tx_value_counts, handle.get_stream()); - } else { - mg_sample_buffer = - cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( - handle, std::move(mg_sample_buffer), partition_range_lasts); } + } else { + mg_sample_buffer = cugraph::detail::shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( + handle, std::move(mg_sample_buffer), partition_range_lasts); } } diff --git a/cpp/tests/structure/mg_select_random_vertices_test.cpp b/cpp/tests/structure/mg_select_random_vertices_test.cpp index d000a6107fb..20a5643a825 100644 --- a/cpp/tests/structure/mg_select_random_vertices_test.cpp +++ b/cpp/tests/structure/mg_select_random_vertices_test.cpp @@ -183,6 +183,70 @@ class Tests_MGSelectRandomVertices }); } } + + std::vector sort_vertices_flags = {true, false}; + std::vector shuffle_int_to_local_flags = {true, false}; + std::vector select_counts = {mg_graph_view.number_of_vertices(), + mg_graph_view.number_of_vertices() / 4}; + + for (int i = 0; i < with_replacement_flags.size(); i++) { + for (int j = 0; j < sort_vertices_flags.size(); j++) { + for (int k = 0; k < shuffle_int_to_local_flags.size(); k++) { + for (int l = 0; l < select_counts.size(); l++) { + bool with_replacement = with_replacement_flags[i]; + bool sort_vertices = sort_vertices_flags[j]; + bool shuffle_int_to_local = shuffle_int_to_local_flags[k]; + auto select_count = static_cast(select_counts[l]); + + auto d_sampled_vertices = cugraph::select_random_vertices( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + rng_state, + select_count, + with_replacement, + sort_vertices, + shuffle_int_to_local); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); + + if (select_random_vertices_usecase.check_correctness) { + if (!with_replacement) { + std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); + + auto nr_duplicates = + std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), + h_sampled_vertices.end()); + + ASSERT_EQ(nr_duplicates, 0); + } + + if (shuffle_int_to_local) { + auto vertex_first = mg_graph_view.local_vertex_partition_range_first(); + auto vertex_last = mg_graph_view.local_vertex_partition_range_last(); + + std::for_each(h_sampled_vertices.begin(), + h_sampled_vertices.end(), + [vertex_first, vertex_last](vertex_t v) { + ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); + }); + } else { + if (!with_replacement && + select_count == static_cast(mg_graph_view.number_of_vertices())) { + ASSERT_EQ(h_sampled_vertices.size(), + mg_graph_view.local_vertex_partition_range_size()); + } + + std::cout << "silv: " << shuffle_int_to_local << " sc: " << select_count + << " got: " << h_sampled_vertices.size() << std::endl; + } + } + } + } + } + } } private: @@ -242,8 +306,8 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGSelectRandomVertices_Rmat, ::testing::Combine( - ::testing::Values(SelectRandomVertices_Usecase{500, false}, - SelectRandomVertices_Usecase{500, false}), + ::testing::Values(SelectRandomVertices_Usecase{500, true}, + SelectRandomVertices_Usecase{500, true}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN()