From 05c78bbbc1d1f7bcb284712b7496e68f9633b07e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 13 Dec 2023 03:52:26 +0900 Subject: [PATCH] MG C-API test failure fixes (#4047) This PR fixes MG C-API test failures in egonet (due to a C-API bug), leiden (due to a C++ API bug), graph creation (due to a bug in test code). Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) URL: https://github.com/rapidsai/cugraph/pull/4047 --- cpp/CMakeLists.txt | 2 +- cpp/src/c_api/capi_helper.cu | 98 ++++++++++++++++++++++++++ cpp/src/c_api/capi_helper.hpp | 12 ++++ cpp/src/c_api/extract_ego.cpp | 46 +++++++++++- cpp/src/community/leiden_impl.cuh | 6 +- cpp/src/detail/shuffle_vertices.cu | 12 ++++ cpp/tests/CMakeLists.txt | 4 +- cpp/tests/c_api/mg_create_graph_test.c | 22 +++--- 8 files changed, 183 insertions(+), 19 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c09fe3f4004..84a5534facd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -423,7 +423,7 @@ add_library(cugraph_c src/c_api/core_result.cpp src/c_api/extract_ego.cpp src/c_api/k_core.cpp - src/c_api/hierarchical_clustering_result.cpp + src/c_api/hierarchical_clustering_result.cpp src/c_api/induced_subgraph.cpp src/c_api/capi_helper.cu src/c_api/legacy_spectral.cpp diff --git a/cpp/src/c_api/capi_helper.cu b/cpp/src/c_api/capi_helper.cu index 0ee49f87265..f08af4137db 100644 --- a/cpp/src/c_api/capi_helper.cu +++ b/cpp/src/c_api/capi_helper.cu @@ -74,6 +74,104 @@ template void sort_by_key(raft::handle_t const& handle, raft::device_span keys, raft::device_span values); +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights) +{ + rmm::device_uvector sort_indices(edge_srcs.size(), handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + sort_indices.begin(), + sort_indices.end(), + [offset_lasts = raft::device_span(offsets.begin() + 1, offsets.end()), + source_indices = raft::device_span(source_indices.data(), + source_indices.size())] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + offset_lasts.begin(), + thrust::upper_bound(thrust::seq, offset_lasts.begin(), offset_lasts.end(), i))); + return source_indices[idx]; + }); + source_indices.resize(0, handle.get_stream()); + source_indices.shrink_to_fit(handle.get_stream()); + + auto triplet_first = + thrust::make_zip_iterator(sort_indices.begin(), edge_srcs.begin(), edge_dsts.begin()); + if (edge_weights) { + thrust::sort_by_key(handle.get_thrust_policy(), + triplet_first, + triplet_first + sort_indices.size(), + (*edge_weights).begin()); + } else { + thrust::sort(handle.get_thrust_policy(), triplet_first, triplet_first + sort_indices.size()); + } + + thrust::tabulate( + handle.get_thrust_policy(), + offsets.begin() + 1, + offsets.end(), + [sort_indices = raft::device_span(sort_indices.data(), + sort_indices.size())] __device__(size_t i) { + return static_cast(thrust::distance( + sort_indices.begin(), + thrust::upper_bound(thrust::seq, sort_indices.begin(), sort_indices.end(), i))); + }); + + return std::make_tuple( + std::move(offsets), std::move(edge_srcs), std::move(edge_dsts), std::move(edge_weights)); +} + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + } // namespace detail } // namespace c_api } // namespace cugraph diff --git a/cpp/src/c_api/capi_helper.hpp b/cpp/src/c_api/capi_helper.hpp index ce08e8d90d3..56401606477 100644 --- a/cpp/src/c_api/capi_helper.hpp +++ b/cpp/src/c_api/capi_helper.hpp @@ -36,6 +36,18 @@ void sort_by_key(raft::handle_t const& handle, raft::device_span keys, raft::device_span values); +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + } // namespace detail } // namespace c_api } // namespace cugraph diff --git a/cpp/src/c_api/extract_ego.cpp b/cpp/src/c_api/extract_ego.cpp index 931d58b5185..cbe07af2e77 100644 --- a/cpp/src/c_api/extract_ego.cpp +++ b/cpp/src/c_api/extract_ego.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -26,7 +27,10 @@ #include #include #include +#include +#include +#include #include namespace { @@ -91,9 +95,22 @@ struct extract_ego_functor : public cugraph::c_api::abstract_functor { source_vertices.size(), handle_.get_stream()); + std::optional> source_indices{std::nullopt}; + if constexpr (multi_gpu) { - source_vertices = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( - handle_, std::move(source_vertices)); + auto displacements = cugraph::host_scalar_allgather( + handle_.get_comms(), source_vertices.size(), handle_.get_stream()); + std::exclusive_scan( + displacements.begin(), displacements.end(), displacements.begin(), size_t{0}); + source_indices = rmm::device_uvector(source_vertices.size(), handle_.get_stream()); + cugraph::detail::sequence_fill(handle_.get_stream(), + (*source_indices).data(), + (*source_indices).size(), + displacements[handle_.get_comms().get_rank()]); + + std::tie(source_vertices, source_indices) = + cugraph::detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle_, std::move(source_vertices), std::move(*source_indices)); } cugraph::renumber_ext_vertices( @@ -130,6 +147,31 @@ struct extract_ego_functor : public cugraph::c_api::abstract_functor { graph_view.vertex_partition_range_lasts(), do_expensive_check_); + if constexpr (multi_gpu) { + auto recvcounts = cugraph::host_scalar_allgather( + handle_.get_comms(), (*source_indices).size(), handle_.get_stream()); + std::vector displacements(recvcounts.size()); + std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); + rmm::device_uvector allgathered_indices(displacements.back() + recvcounts.back(), + handle_.get_stream()); + cugraph::device_allgatherv(handle_.get_comms(), + (*source_indices).begin(), + allgathered_indices.begin(), + recvcounts, + displacements, + handle_.get_stream()); + source_indices = std::move(allgathered_indices); + + std::tie(edge_offsets, src, dst, wgt) = + cugraph::c_api::detail::reorder_extracted_egonets( + handle_, + std::move(*source_indices), + std::move(edge_offsets), + std::move(src), + std::move(dst), + std::move(wgt)); + } + result_ = new cugraph::c_api::cugraph_induced_subgraph_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(src, graph_->vertex_type_), new cugraph::c_api::cugraph_type_erased_device_array_t(dst, graph_->vertex_type_), diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index b6e20272de9..1e2b8f2ad44 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -568,17 +568,17 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, leiden_partition_at_level( handle, dendrogram, clustering, dendrogram.num_levels()); - rmm::device_uvector unique_cluster_ids(graph_view.number_of_vertices(), + rmm::device_uvector unique_cluster_ids(graph_view.local_vertex_partition_range_size(), handle.get_stream()); thrust::copy(handle.get_thrust_policy(), clustering, - clustering + graph_view.number_of_vertices(), + clustering + graph_view.local_vertex_partition_range_size(), unique_cluster_ids.begin()); remove_duplicates(handle, unique_cluster_ids); relabel_cluster_ids( - handle, unique_cluster_ids, clustering, graph_view.number_of_vertices()); + handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size()); } } // namespace detail diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu index bc450ce3bbf..94729a770f7 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/detail/shuffle_vertices.cu @@ -200,6 +200,12 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); +template std::tuple, rmm::device_uvector> +shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, @@ -224,6 +230,12 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); +template std::tuple, rmm::device_uvector> +shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d9c88bc179e..e9c6dc446af 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -257,7 +257,7 @@ ConfigureTest(BALANCED_TEST community/balanced_edge_test.cpp) ################################################################################################### # - EGO tests ------------------------------------------------------------------------------------- -ConfigureTest(EGO_TEST community/egonet_test.cpp GPUS 1 PERCENT 75) +ConfigureTest(EGONET_TEST community/egonet_test.cpp GPUS 1 PERCENT 75) ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- @@ -531,7 +531,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG LOUVAIN tests -------------------------------------------------------------------------- - ConfigureTestMG(MG_EGO_TEST community/mg_egonet_test.cu) + ConfigureTestMG(MG_EGONET_TEST community/mg_egonet_test.cu) ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c index fec319d1881..7156647b025 100644 --- a/cpp/tests/c_api/mg_create_graph_test.c +++ b/cpp/tests/c_api/mg_create_graph_test.c @@ -175,18 +175,18 @@ int test_create_mg_graph_multiple_edge_lists(const cugraph_resource_handle_t* ha int my_rank = cugraph_resource_handle_get_rank(handle); int comm_size = cugraph_resource_handle_get_comm_size(handle); - size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_num_vertices = num_vertices / comm_size; size_t local_start_vertex = my_rank * local_num_vertices; - size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_num_edges = num_edges / comm_size; size_t local_start_edge = my_rank * local_num_edges; - local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); - local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + local_num_edges = (my_rank != (comm_size - 1)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (my_rank != (comm_size - 1)) ? local_num_vertices : (num_vertices - local_start_vertex); for (size_t i = 0 ; i < num_local_arrays ; ++i) { - size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; + size_t vertex_count = local_num_vertices / num_local_arrays; size_t vertex_start = i * vertex_count; - vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); @@ -363,18 +363,18 @@ int test_create_mg_graph_multiple_edge_lists_multi_edge(const cugraph_resource_h int my_rank = cugraph_resource_handle_get_rank(handle); int comm_size = cugraph_resource_handle_get_comm_size(handle); - size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_num_vertices = num_vertices / comm_size; size_t local_start_vertex = my_rank * local_num_vertices; - size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_num_edges = num_edges / comm_size; size_t local_start_edge = my_rank * local_num_edges; - local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); - local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + local_num_edges = (my_rank != (comm_size - 1)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (my_rank != (comm_size - 1)) ? local_num_vertices : (num_vertices - local_start_vertex); for (size_t i = 0 ; i < num_local_arrays ; ++i) { size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; size_t vertex_start = i * vertex_count; - vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error);