From 0e0b36371bfc5d3055007d596e383d5e12ddb457 Mon Sep 17 00:00:00 2001 From: Anthony Mahanna <43019056+aMahanna@users.noreply.github.com> Date: Tue, 12 Mar 2024 12:40:08 -0400 Subject: [PATCH 1/5] [docs] fix GNN typo (#4196) Authors: - Anthony Mahanna (https://github.com/aMahanna) - Alex Barghi (https://github.com/alexbarghi-nv) - Ralph Liu (https://github.com/nv-rliu) - Brad Rees (https://github.com/BradReesWork) Approvers: - Don Acosta (https://github.com/acostadon) URL: https://github.com/rapidsai/cugraph/pull/4196 From e4b58ec04e4add07578ce779977835cfe28f516a Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Tue, 12 Mar 2024 19:00:31 +0100 Subject: [PATCH 2/5] Make external vertex and edge shuffling function public (#4227) Make vertex and edge shuffling function public Authors: - Naim (https://github.com/naimnv) - Ralph Liu (https://github.com/nv-rliu) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/4227 --- cpp/CMakeLists.txt | 4 +- cpp/include/cugraph/graph_functions.hpp | 63 +++++++++++ .../shuffle_vertex_pairs.cu | 102 ++++++++++++++++++ .../{detail => utilities}/shuffle_vertices.cu | 70 ++++++++++++ 4 files changed, 237 insertions(+), 2 deletions(-) rename cpp/src/{detail => utilities}/shuffle_vertex_pairs.cu (80%) rename cpp/src/{detail => utilities}/shuffle_vertices.cu (73%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index a3392627fb8..3131404712f 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -186,9 +186,9 @@ endif() # which should give us a better parallel schedule. set(CUGRAPH_SOURCES - src/detail/shuffle_vertices.cu + src/utilities/shuffle_vertices.cu src/detail/permute_range.cu - src/detail/shuffle_vertex_pairs.cu + src/utilities/shuffle_vertex_pairs.cu src/detail/collect_local_vertex_values.cu src/detail/groupby_and_count.cu src/detail/collect_comm_wrapper.cu diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 90425f86bef..6d4470e8251 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1052,4 +1052,67 @@ remove_multi_edges(raft::handle_t const& handle, std::optional>&& edgelist_edge_types, bool keep_min_value_edge = false); +/** + * @brief Shuffle external vertex ids to the proper GPU. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices List of vertex ids + * @return Vector of vertex ids mapped to this GPU. + */ +template +rmm::device_uvector shuffle_external_vertices(raft::handle_t const& handle, + rmm::device_uvector&& vertices); + +/** + * @brief Shuffle external vertex ids and values to the proper GPU. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam value_t Type of values. currently supported types are int32_t, + * int64_t, size_t, float and double. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param vertices List of vertex ids + * @param values List of values + * @return Tuple of vectors storing vertex ids and values mapped to this GPU. + */ +template +std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +/** + * @brief Shuffle external edges to the proper GPU. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weight. Currently float and double are supported. + * + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edge_srcs List of source vertex ids + * @param edge_dsts List of destination vertex ids + * @param edge_weights Optional list of edge weights + * @param edge_ids Optional list of edge ids + * @param edge_types Optional list of edge types + * @return Tuple of vectors storing edge sources, destinations, optional weights, + * optional edge ids, optional edge types mapped to this GPU. + */ +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + } // namespace cugraph diff --git a/cpp/src/detail/shuffle_vertex_pairs.cu b/cpp/src/utilities/shuffle_vertex_pairs.cu similarity index 80% rename from cpp/src/detail/shuffle_vertex_pairs.cu rename to cpp/src/utilities/shuffle_vertex_pairs.cu index 33a7834f5ff..b473796aa9d 100644 --- a/cpp/src/detail/shuffle_vertex_pairs.cu +++ b/cpp/src/utilities/shuffle_vertex_pairs.cu @@ -519,4 +519,106 @@ shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( std::vector const& vertex_partition_range_lasts); } // namespace detail + +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights, + std::optional>&& edge_ids, + std::optional>&& edge_types) +{ + auto& comm = handle.get_comms(); + auto const comm_size = comm.get_size(); + auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); + auto const major_comm_size = major_comm.get_size(); + auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); + auto const minor_comm_size = minor_comm.get_size(); + + return detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle, + std::move(edge_srcs), + std::move(edge_dsts), + std::move(edge_weights), + std::move(edge_ids), + std::move(edge_types)); +} + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>> +shuffle_external_edges(raft::handle_t const& handle, + rmm::device_uvector&& majors, + rmm::device_uvector&& minors, + std::optional>&& weights, + std::optional>&& edge_ids, + std::optional>&& edge_types); + } // namespace cugraph diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/utilities/shuffle_vertices.cu similarity index 73% rename from cpp/src/detail/shuffle_vertices.cu rename to cpp/src/utilities/shuffle_vertices.cu index be6875f1073..b396201f509 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/utilities/shuffle_vertices.cu @@ -249,4 +249,74 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& values); } // namespace detail + +template +std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values) +{ + return detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, std::move(vertices), std::move(values)); +} + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template std::tuple, rmm::device_uvector> +shuffle_external_vertex_value_pairs(raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + +template +rmm::device_uvector shuffle_external_vertices(raft::handle_t const& handle, + rmm::device_uvector&& vertices) +{ + return detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning(handle, + std::move(vertices)); +} + +template rmm::device_uvector shuffle_external_vertices( + raft::handle_t const& handle, rmm::device_uvector&& d_vertices); + +template rmm::device_uvector shuffle_external_vertices( + raft::handle_t const& handle, rmm::device_uvector&& d_vertices); + } // namespace cugraph From a4eab99f8f0ff50aab980dfab1fffa68e8983942 Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Tue, 12 Mar 2024 19:01:20 +0100 Subject: [PATCH 3/5] Graph coloring and MIS (#4211) This PR implements - MNMG Graph Coloring - MNMG MIS for general graphs Closes #4230 Closes #4231 Authors: - Naim (https://github.com/naimnv) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4211 --- cpp/CMakeLists.txt | 8 +- cpp/include/cugraph/algorithms.hpp | 30 +- .../detail/maximal_independent_moves.cuh | 313 ++++++++++++++++++ .../maximal_independent_moves.hpp} | 6 +- .../detail/maximal_independent_moves_mg.cu | 38 +++ .../detail/maximal_independent_moves_sg.cu | 37 +++ cpp/src/community/detail/refine_impl.cuh | 6 +- .../detail => components}/mis_impl.cuh | 37 ++- .../detail => components}/mis_mg.cu | 8 +- .../detail => components}/mis_sg.cu | 7 +- cpp/src/components/vertex_coloring_impl.cuh | 150 +++++++++ cpp/src/components/vertex_coloring_mg.cu | 35 ++ cpp/src/components/vertex_coloring_sg.cu | 35 ++ cpp/tests/CMakeLists.txt | 20 +- .../{community => components}/mg_mis_test.cu | 19 +- .../components/mg_vertex_coloring_test.cu | 275 +++++++++++++++ cpp/tests/components/mis_test.cu | 240 ++++++++++++++ cpp/tests/components/vertex_coloring_test.cu | 244 ++++++++++++++ 18 files changed, 1468 insertions(+), 40 deletions(-) create mode 100644 cpp/src/community/detail/maximal_independent_moves.cuh rename cpp/src/community/{mis.hpp => detail/maximal_independent_moves.hpp} (91%) create mode 100644 cpp/src/community/detail/maximal_independent_moves_mg.cu create mode 100644 cpp/src/community/detail/maximal_independent_moves_sg.cu rename cpp/src/{community/detail => components}/mis_impl.cuh (90%) rename cpp/src/{community/detail => components}/mis_mg.cu (83%) rename cpp/src/{community/detail => components}/mis_sg.cu (82%) create mode 100644 cpp/src/components/vertex_coloring_impl.cuh create mode 100644 cpp/src/components/vertex_coloring_mg.cu create mode 100644 cpp/src/components/vertex_coloring_sg.cu rename cpp/tests/{community => components}/mg_mis_test.cu (95%) create mode 100644 cpp/tests/components/mg_vertex_coloring_test.cu create mode 100644 cpp/tests/components/mis_test.cu create mode 100644 cpp/tests/components/vertex_coloring_test.cu diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 3131404712f..b12403710ab 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -197,8 +197,8 @@ set(CUGRAPH_SOURCES src/community/detail/common_methods_sg.cu src/community/detail/refine_sg.cu src/community/detail/refine_mg.cu - src/community/detail/mis_sg.cu - src/community/detail/mis_mg.cu + src/community/detail/maximal_independent_moves_sg.cu + src/community/detail/maximal_independent_moves_mg.cu src/detail/utility_wrappers.cu src/structure/graph_view_mg.cu src/structure/remove_self_loops.cu @@ -295,6 +295,10 @@ set(CUGRAPH_SOURCES src/tree/legacy/mst.cu src/components/weakly_connected_components_sg.cu src/components/weakly_connected_components_mg.cu + src/components/mis_sg.cu + src/components/mis_mg.cu + src/components/vertex_coloring_sg.cu + src/components/vertex_coloring_mg.cu src/structure/create_graph_from_edgelist_sg.cu src/structure/create_graph_from_edgelist_mg.cu src/structure/symmetrize_edgelist_sg.cu diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 5c29604a5a7..1471d340cec 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2340,15 +2340,41 @@ std::tuple, rmm::device_uvector> k_hop_nbr * handles to various CUDA libraries) to run graph algorithms. * @param graph_view Graph view object. * @param rng_state The RngState instance holding pseudo-random number generator state. - * @return A device vector containing vertices found in the maximal independent set + * @return A device vector containing vertices in the maximal independent set. */ - template rmm::device_uvector maximal_independent_set( raft::handle_t const& handle, graph_view_t const& graph_view, raft::random::RngState& rng_state); +/* + * @brief Find a Greedy Vertex Coloring + * + * A vertex coloring is an assignment of colors or labels to each vertex of a graph so that + * no two adjacent vertices have the same color or label. Finding the minimum number of colors + * needed to color the vertices of a graph is an NP-hard problem and therefore for practical + * use cases greedy coloring is used. Here we provide an implementation of greedy vertex + * coloring based on maximal independent set. + * See + * https://research.nvidia.com/sites/default/files/pubs/2015-05_Parallel-Graph-Coloring/nvr-2015-001.pdf + * for further information. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param rng_state The RngState instance holding pseudo-random number generator state. + * @return A device vector containing color for each vertex. + */ +template +rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + } // namespace cugraph /** diff --git a/cpp/src/community/detail/maximal_independent_moves.cuh b/cpp/src/community/detail/maximal_independent_moves.cuh new file mode 100644 index 00000000000..82d20a04203 --- /dev/null +++ b/cpp/src/community/detail/maximal_independent_moves.cuh @@ -0,0 +1,313 @@ + +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "maximal_independent_moves.hpp" +#include "prims/fill_edge_src_dst_property.cuh" +#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" +#include "prims/update_edge_src_dst_property.cuh" + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cugraph { + +namespace detail { + +template +rmm::device_uvector maximal_independent_moves( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + raft::random::RngState& rng_state) +{ + using GraphViewType = cugraph::graph_view_t; + + vertex_t local_vtx_partitoin_size = graph_view.local_vertex_partition_range_size(); + + rmm::device_uvector remaining_vertices(local_vtx_partitoin_size, handle.get_stream()); + + auto vertex_begin = + thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()); + auto vertex_end = thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()); + + // Compute out-degree + auto out_degrees = graph_view.compute_out_degrees(handle); + + // Only vertices with non-zero out-degree are possible can move + remaining_vertices.resize( + thrust::distance(remaining_vertices.begin(), + thrust::copy_if(handle.get_thrust_policy(), + vertex_begin, + vertex_end, + out_degrees.begin(), + remaining_vertices.begin(), + [] __device__(auto deg) { return deg > 0; })), + handle.get_stream()); + + // Set ID of each vertex as its rank + rmm::device_uvector ranks(local_vtx_partitoin_size, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin()); + + // Set ranks of zero out-degree vetices to std::numeric_limits::lowest() + thrust::transform_if(handle.get_thrust_policy(), + out_degrees.begin(), + out_degrees.end(), + ranks.begin(), + cuda::proclaim_return_type( + [] __device__(auto) { return std::numeric_limits::lowest(); }), + [] __device__(auto deg) { return deg == 0; }); + + out_degrees.resize(0, handle.get_stream()); + out_degrees.shrink_to_fit(handle.get_stream()); + + size_t loop_counter = 0; + while (true) { + loop_counter++; + + // Copy ranks into temporary vector to begin with + + rmm::device_uvector temporary_ranks(local_vtx_partitoin_size, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), ranks.begin(), ranks.end(), temporary_ranks.begin()); + + // Select a random set of candidate vertices + + vertex_t nr_remaining_vertices_to_check = remaining_vertices.size(); + if (multi_gpu) { + nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), + nr_remaining_vertices_to_check, + raft::comms::op_t::SUM, + handle.get_stream()); + } + + vertex_t nr_candidates = (nr_remaining_vertices_to_check < 1024) + ? nr_remaining_vertices_to_check + : std::min(static_cast((0.50 + 0.25 * loop_counter) * + nr_remaining_vertices_to_check), + nr_remaining_vertices_to_check); + + // FIXME: Can we improve performance here? + // FIXME: if(nr_remaining_vertices_to_check < 1024), may avoid calling select_random_vertices + auto d_sampled_vertices = + cugraph::select_random_vertices(handle, + graph_view, + std::make_optional(raft::device_span{ + remaining_vertices.data(), remaining_vertices.size()}), + rng_state, + nr_candidates, + false, + true); + + rmm::device_uvector non_candidate_vertices( + remaining_vertices.size() - d_sampled_vertices.size(), handle.get_stream()); + + thrust::set_difference(handle.get_thrust_policy(), + remaining_vertices.begin(), + remaining_vertices.end(), + d_sampled_vertices.begin(), + d_sampled_vertices.end(), + non_candidate_vertices.begin()); + + // Set temporary ranks of non-candidate vertices to std::numeric_limits::lowest() + thrust::for_each( + handle.get_thrust_policy(), + non_candidate_vertices.begin(), + non_candidate_vertices.end(), + [temporary_ranks = + raft::device_span(temporary_ranks.data(), temporary_ranks.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { + // + // if rank of a non-candidate vertex is not std::numeric_limits::max() (i.e. the + // vertex is not already in MIS), set it to std::numeric_limits::lowest() + // + auto v_offset = v - v_first; + if (temporary_ranks[v_offset] < std::numeric_limits::max()) { + temporary_ranks[v_offset] = std::numeric_limits::lowest(); + } + }); + + // Caches for ranks + edge_src_property_t src_rank_cache(handle); + edge_dst_property_t dst_rank_cache(handle); + + // Update rank caches with temporary ranks + if constexpr (multi_gpu) { + src_rank_cache = edge_src_property_t(handle, graph_view); + dst_rank_cache = edge_dst_property_t(handle, graph_view); + update_edge_src_property(handle, graph_view, temporary_ranks.begin(), src_rank_cache); + update_edge_dst_property(handle, graph_view, temporary_ranks.begin(), dst_rank_cache); + } + + // + // Find maximum rank outgoing neighbor for each vertex + // + + rmm::device_uvector max_outgoing_ranks(local_vtx_partitoin_size, handle.get_stream()); + + per_v_transform_reduce_outgoing_e( + handle, + graph_view, + multi_gpu + ? src_rank_cache.view() + : detail::edge_major_property_view_t(temporary_ranks.data()), + multi_gpu ? dst_rank_cache.view() + : detail::edge_minor_property_view_t( + temporary_ranks.data(), vertex_t{0}), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return dst_rank; }, + std::numeric_limits::lowest(), + cugraph::reduce_op::maximum{}, + max_outgoing_ranks.begin()); + + // + // Find maximum rank incoming neighbor for each vertex + // + + rmm::device_uvector max_incoming_ranks(local_vtx_partitoin_size, handle.get_stream()); + + per_v_transform_reduce_incoming_e( + handle, + graph_view, + multi_gpu + ? src_rank_cache.view() + : detail::edge_major_property_view_t(temporary_ranks.data()), + multi_gpu ? dst_rank_cache.view() + : detail::edge_minor_property_view_t( + temporary_ranks.data(), vertex_t{0}), + edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return src_rank; }, + std::numeric_limits::lowest(), + cugraph::reduce_op::maximum{}, + max_incoming_ranks.begin()); + + temporary_ranks.resize(0, handle.get_stream()); + temporary_ranks.shrink_to_fit(handle.get_stream()); + + // + // Compute max of outgoing and incoming neighbors + // + thrust::transform(handle.get_thrust_policy(), + max_incoming_ranks.begin(), + max_incoming_ranks.end(), + max_outgoing_ranks.begin(), + max_outgoing_ranks.begin(), + thrust::maximum()); + + max_incoming_ranks.resize(0, handle.get_stream()); + max_incoming_ranks.shrink_to_fit(handle.get_stream()); + + // + // If the max neighbor of a vertex is already in MIS (i.e. has rank + // std::numeric_limits::max()), discard it, otherwise, + // include the vertex if it has larger rank than its maximum rank neighbor + // + auto last = thrust::remove_if( + handle.get_thrust_policy(), + d_sampled_vertices.begin(), + d_sampled_vertices.end(), + [max_rank_neighbor_first = max_outgoing_ranks.begin(), + ranks = raft::device_span(ranks.data(), ranks.size()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + auto max_neighbor_rank = *(max_rank_neighbor_first + v_offset); + auto rank_of_v = ranks[v_offset]; + + if (max_neighbor_rank >= std::numeric_limits::max()) { + // Maximum rank neighbor is alreay in MIS + // Discard current vertex by setting its rank to + // std::numeric_limits::lowest() + ranks[v_offset] = std::numeric_limits::lowest(); + return true; + } + + if (rank_of_v >= max_neighbor_rank) { + // Include v and set its rank to std::numeric_limits::max() + ranks[v_offset] = std::numeric_limits::max(); + return true; + } + return false; + }); + + max_outgoing_ranks.resize(0, handle.get_stream()); + max_outgoing_ranks.shrink_to_fit(handle.get_stream()); + + d_sampled_vertices.resize(thrust::distance(d_sampled_vertices.begin(), last), + handle.get_stream()); + d_sampled_vertices.shrink_to_fit(handle.get_stream()); + + remaining_vertices.resize(non_candidate_vertices.size() + d_sampled_vertices.size(), + handle.get_stream()); + remaining_vertices.shrink_to_fit(handle.get_stream()); + + // merge non-candidate and remaining candidate vertices + thrust::merge(handle.get_thrust_policy(), + non_candidate_vertices.begin(), + non_candidate_vertices.end(), + d_sampled_vertices.begin(), + d_sampled_vertices.end(), + remaining_vertices.begin()); + + nr_remaining_vertices_to_check = remaining_vertices.size(); + if (multi_gpu) { + nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(), + nr_remaining_vertices_to_check, + raft::comms::op_t::SUM, + handle.get_stream()); + } + + if (nr_remaining_vertices_to_check == 0) { break; } + } + + // Count number of vertices included in MIS + + vertex_t nr_vertices_included_in_mis = thrust::count_if( + handle.get_thrust_policy(), ranks.begin(), ranks.end(), [] __device__(auto v_rank) { + return v_rank >= std::numeric_limits::max(); + }); + + // Build MIS and return + rmm::device_uvector mis(nr_vertices_included_in_mis, handle.get_stream()); + thrust::copy_if( + handle.get_thrust_policy(), + vertex_begin, + vertex_end, + ranks.begin(), + mis.begin(), + [] __device__(auto v_rank) { return v_rank >= std::numeric_limits::max(); }); + + ranks.resize(0, handle.get_stream()); + ranks.shrink_to_fit(handle.get_stream()); + return mis; +} +} // namespace detail + +} // namespace cugraph diff --git a/cpp/src/community/mis.hpp b/cpp/src/community/detail/maximal_independent_moves.hpp similarity index 91% rename from cpp/src/community/mis.hpp rename to cpp/src/community/detail/maximal_independent_moves.hpp index 83c0d9775f9..b5588b11ef7 100644 --- a/cpp/src/community/mis.hpp +++ b/cpp/src/community/detail/maximal_independent_moves.hpp @@ -23,9 +23,13 @@ #include namespace cugraph { +namespace detail { + template -rmm::device_uvector maximal_independent_set( +rmm::device_uvector maximal_independent_moves( raft::handle_t const& handle, graph_view_t const& graph_view, raft::random::RngState& rng_state); + +} // namespace detail } // namespace cugraph diff --git a/cpp/src/community/detail/maximal_independent_moves_mg.cu b/cpp/src/community/detail/maximal_independent_moves_mg.cu new file mode 100644 index 00000000000..577253cdf58 --- /dev/null +++ b/cpp/src/community/detail/maximal_independent_moves_mg.cu @@ -0,0 +1,38 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "maximal_independent_moves.cuh" + +namespace cugraph { +namespace detail { + +template rmm::device_uvector maximal_independent_moves( + raft::handle_t const& handle, + graph_view_t const& decision_graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector maximal_independent_moves( + raft::handle_t const& handle, + graph_view_t const& decision_graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector maximal_independent_moves( + raft::handle_t const& handle, + graph_view_t const& decision_graph_view, + raft::random::RngState& rng_state); + +} // namespace detail + +} // namespace cugraph diff --git a/cpp/src/community/detail/maximal_independent_moves_sg.cu b/cpp/src/community/detail/maximal_independent_moves_sg.cu new file mode 100644 index 00000000000..18527c1ce48 --- /dev/null +++ b/cpp/src/community/detail/maximal_independent_moves_sg.cu @@ -0,0 +1,37 @@ +/* + * Copyright (c) 2023-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "maximal_independent_moves.cuh" + +namespace cugraph { +namespace detail { + +template rmm::device_uvector maximal_independent_moves( + raft::handle_t const& handle, + graph_view_t const& decision_graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector maximal_independent_moves( + raft::handle_t const& handle, + graph_view_t const& decision_graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector maximal_independent_moves( + raft::handle_t const& handle, + graph_view_t const& decision_graph_view, + raft::random::RngState& rng_state); + +} // namespace detail +} // namespace cugraph diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index b767ce7d8bb..ef34ad90584 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -16,8 +16,8 @@ #pragma once #include "common_methods.hpp" -#include "community/mis.hpp" #include "detail/graph_partition_utils.cuh" +#include "maximal_independent_moves.hpp" #include "prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh" #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/reduce_op.cuh" @@ -660,8 +660,8 @@ refine_clustering( // Determine a set of moves using MIS of the decision_graph // - auto vertices_in_mis = - maximal_independent_set(handle, decision_graph_view, rng_state); + auto vertices_in_mis = maximal_independent_moves( + handle, decision_graph_view, rng_state); rmm::device_uvector numbering_indices((*renumber_map).size(), handle.get_stream()); detail::sequence_fill(handle.get_stream(), diff --git a/cpp/src/community/detail/mis_impl.cuh b/cpp/src/components/mis_impl.cuh similarity index 90% rename from cpp/src/community/detail/mis_impl.cuh rename to cpp/src/components/mis_impl.cuh index 217d64eb1c1..550edf9807a 100644 --- a/cpp/src/community/detail/mis_impl.cuh +++ b/cpp/src/components/mis_impl.cuh @@ -16,11 +16,12 @@ */ #pragma once -#include "community/mis.hpp" +#include "prims/fill_edge_property.cuh" #include "prims/fill_edge_src_dst_property.cuh" #include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/update_edge_src_dst_property.cuh" +#include #include #include #include @@ -60,36 +61,46 @@ rmm::device_uvector maximal_independent_set( thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first()); auto vertex_end = thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last()); - // Compute out-degree auto out_degrees = graph_view.compute_out_degrees(handle); + auto in_degrees = graph_view.compute_in_degrees(handle); - // Vertices with non-zero out-degree are possible candidates for MIS. + // Vertices with degree zero are always part of MIS remaining_vertices.resize( thrust::distance(remaining_vertices.begin(), thrust::copy_if(handle.get_thrust_policy(), vertex_begin, vertex_end, - out_degrees.begin(), + thrust::make_zip_iterator( + thrust::make_tuple(out_degrees.begin(), in_degrees.begin())), remaining_vertices.begin(), - [] __device__(auto deg) { return deg > 0; })), + [] __device__(auto out_deg_and_in_deg) { + return !((thrust::get<0>(out_deg_and_in_deg) == 0) && + (thrust::get<1>(out_deg_and_in_deg) == 0)); + })), handle.get_stream()); // Set ID of each vertex as its rank rmm::device_uvector ranks(local_vtx_partitoin_size, handle.get_stream()); thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin()); - // Set ranks of zero out-degree vetices to std::numeric_limits::lowest() - thrust::transform_if(handle.get_thrust_policy(), - out_degrees.begin(), - out_degrees.end(), - ranks.begin(), - cuda::proclaim_return_type( - [] __device__(auto) { return std::numeric_limits::lowest(); }), - [] __device__(auto deg) { return deg == 0; }); + // Set ranks of zero degree vetices to std::numeric_limits::max() + thrust::transform_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(thrust::make_tuple(out_degrees.begin(), in_degrees.begin())), + thrust::make_zip_iterator(thrust::make_tuple(out_degrees.end(), in_degrees.end())), + ranks.begin(), + cuda::proclaim_return_type( + [] __device__(auto) { return std::numeric_limits::max(); }), + [] __device__(auto in_out_degree) { + return (thrust::get<0>(in_out_degree) == 0) && (thrust::get<1>(in_out_degree) == 0); + }); out_degrees.resize(0, handle.get_stream()); out_degrees.shrink_to_fit(handle.get_stream()); + in_degrees.resize(0, handle.get_stream()); + in_degrees.shrink_to_fit(handle.get_stream()); + size_t loop_counter = 0; while (true) { loop_counter++; diff --git a/cpp/src/community/detail/mis_mg.cu b/cpp/src/components/mis_mg.cu similarity index 83% rename from cpp/src/community/detail/mis_mg.cu rename to cpp/src/components/mis_mg.cu index 0fc5eeb63c0..2418b38dd0b 100644 --- a/cpp/src/community/detail/mis_mg.cu +++ b/cpp/src/components/mis_mg.cu @@ -14,21 +14,21 @@ * limitations under the License. */ #include "mis_impl.cuh" - namespace cugraph { + template rmm::device_uvector maximal_independent_set( raft::handle_t const& handle, - graph_view_t const& decision_graph_view, + graph_view_t const& graph_view, raft::random::RngState& rng_state); template rmm::device_uvector maximal_independent_set( raft::handle_t const& handle, - graph_view_t const& decision_graph_view, + graph_view_t const& graph_view, raft::random::RngState& rng_state); template rmm::device_uvector maximal_independent_set( raft::handle_t const& handle, - graph_view_t const& decision_graph_view, + graph_view_t const& graph_view, raft::random::RngState& rng_state); } // namespace cugraph diff --git a/cpp/src/community/detail/mis_sg.cu b/cpp/src/components/mis_sg.cu similarity index 82% rename from cpp/src/community/detail/mis_sg.cu rename to cpp/src/components/mis_sg.cu index 8a20b31d157..fea4c4f2765 100644 --- a/cpp/src/community/detail/mis_sg.cu +++ b/cpp/src/components/mis_sg.cu @@ -16,19 +16,20 @@ #include "mis_impl.cuh" namespace cugraph { + template rmm::device_uvector maximal_independent_set( raft::handle_t const& handle, - graph_view_t const& decision_graph_view, + graph_view_t const& graph_view, raft::random::RngState& rng_state); template rmm::device_uvector maximal_independent_set( raft::handle_t const& handle, - graph_view_t const& decision_graph_view, + graph_view_t const& graph_view, raft::random::RngState& rng_state); template rmm::device_uvector maximal_independent_set( raft::handle_t const& handle, - graph_view_t const& decision_graph_view, + graph_view_t const& graph_view, raft::random::RngState& rng_state); } // namespace cugraph diff --git a/cpp/src/components/vertex_coloring_impl.cuh b/cpp/src/components/vertex_coloring_impl.cuh new file mode 100644 index 00000000000..ce445ab3809 --- /dev/null +++ b/cpp/src/components/vertex_coloring_impl.cuh @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include "prims/fill_edge_property.cuh" +#include "prims/transform_e.cuh" +#include "prims/update_edge_src_dst_property.cuh" + +#include + +#include +#include + +namespace cugraph { + +namespace detail { + +template +rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + raft::random::RngState& rng_state) +{ + using graph_view_t = cugraph::graph_view_t; + graph_view_t current_graph_view(graph_view); + + // edge mask + cugraph::edge_property_t edge_masks_even(handle, current_graph_view); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + + cugraph::edge_property_t edge_masks_odd(handle, current_graph_view); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + + cugraph::transform_e( + handle, + current_graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, thrust::nullopt_t) { + return !(src == dst); // mask out self-loop + }, + edge_masks_even.mutable_view()); + + current_graph_view.attach_edge_mask(edge_masks_even.view()); + + // device vector to store colors of vertices + rmm::device_uvector colors = rmm::device_uvector( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::fill( + handle.get_thrust_policy(), colors.begin(), colors.end(), std::numeric_limits::max()); + + vertex_t color_id = 0; + while (true) { + auto mis = cugraph::maximal_independent_set( + handle, current_graph_view, rng_state); + + using flag_t = uint8_t; + rmm::device_uvector is_vertex_in_mis = rmm::device_uvector( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), is_vertex_in_mis.begin(), is_vertex_in_mis.end(), 0); + + thrust::for_each( + handle.get_thrust_policy(), + mis.begin(), + mis.end(), + [color_id, + colors = colors.data(), + is_vertex_in_mis = is_vertex_in_mis.data(), + v_first = current_graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { + auto v_offset = v - v_first; + is_vertex_in_mis[v_offset] = flag_t{1}; + vertex_t initial_color_id = colors[v_offset]; + colors[v_offset] = (color_id < initial_color_id) ? color_id : initial_color_id; + }); + + if (current_graph_view.compute_number_of_edges(handle) == 0) { break; } + + cugraph::edge_src_property_t src_mis_flags(handle, current_graph_view); + cugraph::edge_dst_property_t dst_mis_flags(handle, current_graph_view); + + cugraph::update_edge_src_property( + handle, current_graph_view, is_vertex_in_mis.begin(), src_mis_flags); + + cugraph::update_edge_dst_property( + handle, current_graph_view, is_vertex_in_mis.begin(), dst_mis_flags); + + if (color_id % 2 == 0) { + cugraph::transform_e( + handle, + current_graph_view, + src_mis_flags.view(), + dst_mis_flags.view(), + cugraph::edge_dummy_property_t{}.view(), + [color_id] __device__( + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); + }, + edge_masks_odd.mutable_view()); + + if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_even); + current_graph_view.attach_edge_mask(edge_masks_odd.view()); + } else { + cugraph::transform_e( + handle, + current_graph_view, + src_mis_flags.view(), + dst_mis_flags.view(), + cugraph::edge_dummy_property_t{}.view(), + [color_id] __device__( + auto src, auto dst, auto is_src_in_mis, auto is_dst_in_mis, thrust::nullopt_t) { + return !((is_src_in_mis == uint8_t{true}) || (is_dst_in_mis == uint8_t{true})); + }, + edge_masks_even.mutable_view()); + + if (current_graph_view.has_edge_mask()) current_graph_view.clear_edge_mask(); + cugraph::fill_edge_property(handle, current_graph_view, bool{false}, edge_masks_odd); + current_graph_view.attach_edge_mask(edge_masks_even.view()); + } + + color_id++; + } + return colors; +} +} // namespace detail + +template +rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state) +{ + return detail::vertex_coloring(handle, graph_view, rng_state); +} + +} // namespace cugraph diff --git a/cpp/src/components/vertex_coloring_mg.cu b/cpp/src/components/vertex_coloring_mg.cu new file mode 100644 index 00000000000..8f87e8bd534 --- /dev/null +++ b/cpp/src/components/vertex_coloring_mg.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "vertex_coloring_impl.cuh" + +namespace cugraph { + +template rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + +} // namespace cugraph diff --git a/cpp/src/components/vertex_coloring_sg.cu b/cpp/src/components/vertex_coloring_sg.cu new file mode 100644 index 00000000000..427bc0b2c81 --- /dev/null +++ b/cpp/src/components/vertex_coloring_sg.cu @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include "vertex_coloring_impl.cuh" + +namespace cugraph { + +template rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + +template rmm::device_uvector vertex_coloring( + raft::handle_t const& handle, + graph_view_t const& graph_view, + raft::random::RngState& rng_state); + +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 46a895536ef..af0dffcbf65 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -372,6 +372,14 @@ ConfigureTest(EDGE_BETWEENNESS_CENTRALITY_TEST centrality/edge_betweenness_centr # - WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------------- ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_components_test.cpp) +############################################################################################### +# - MIS tests ------------------------------------------------------------------------------ +ConfigureTest(MIS_TEST components/mis_test.cu) + +############################################################################################### +# - VERTEX COLORING tests ------------------------------------------------------------------- +ConfigureTest(VERTEX_COLORING_TEST components/vertex_coloring_test.cu) + ################################################################################################### # - SIMILARITY tests ------------------------------------------------------------------------------ ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cu) @@ -535,10 +543,6 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG ECG tests -------------------------------------------------------------------------- ConfigureTestMG(MG_ECG_TEST community/mg_ecg_test.cpp) - ############################################################################################### - # - MG MIS tests ------------------------------------------------------------------------------ - ConfigureTestMG(MG_MIS_TEST community/mg_mis_test.cu) - ############################################################################################### # - MG SELECT RANDOM VERTICES tests ----------------------------------------------------------- ConfigureTestMG(MG_SELECT_RANDOM_VERTICES structure/mg_select_random_vertices_test.cpp) @@ -552,6 +556,14 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureTestMG(MG_WEAKLY_CONNECTED_COMPONENTS_TEST components/mg_weakly_connected_components_test.cpp) + ############################################################################################### + # - MG MIS tests ------------------------------------------------------------------------------ + ConfigureTestMG(MG_MIS_TEST components/mg_mis_test.cu) + + ############################################################################################### + # - MG VERTEX COLORING tests ------------------------------------------------------------------- + ConfigureTestMG(MG_VERTEX_COLORING_TEST components/mg_vertex_coloring_test.cu) + ############################################################################################### # - MG Core Number tests ---------------------------------------------------------------------- ConfigureTestMG(MG_CORE_NUMBER_TEST cores/mg_core_number_test.cpp) diff --git a/cpp/tests/community/mg_mis_test.cu b/cpp/tests/components/mg_mis_test.cu similarity index 95% rename from cpp/tests/community/mg_mis_test.cu rename to cpp/tests/components/mg_mis_test.cu index 1240cf812f9..04c346b0f00 100644 --- a/cpp/tests/community/mg_mis_test.cu +++ b/cpp/tests/components/mg_mis_test.cu @@ -245,18 +245,20 @@ TEST_P(Tests_MGMaximalIndependentSet_Rmat, CheckInt64Int64FloatFloat) override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } +bool constexpr check_correctness = false; INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGMaximalIndependentSet_File, - ::testing::Combine(::testing::Values(MaximalIndependentSet_Usecase{false}, - MaximalIndependentSet_Usecase{false}), + ::testing::Combine(::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, + MaximalIndependentSet_Usecase{check_correctness}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_test, - Tests_MGMaximalIndependentSet_Rmat, - ::testing::Combine(::testing::Values(MaximalIndependentSet_Usecase{false}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 3, 4, 0.57, 0.19, 0.19, 0, true, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGMaximalIndependentSet_Rmat, + ::testing::Combine( + ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(3, 4, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -266,7 +268,8 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGMaximalIndependentSet_Rmat, ::testing::Combine( - ::testing::Values(MaximalIndependentSet_Usecase{false}, MaximalIndependentSet_Usecase{false}), + ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, + MaximalIndependentSet_Usecase{check_correctness}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/mg_vertex_coloring_test.cu b/cpp/tests/components/mg_vertex_coloring_test.cu new file mode 100644 index 00000000000..ce4dadaa786 --- /dev/null +++ b/cpp/tests/components/mg_vertex_coloring_test.cu @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include "prims/fill_edge_src_dst_property.cuh" +#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" +#include "prims/property_generator.cuh" +#include "prims/reduce_op.cuh" +#include "prims/transform_reduce_e.cuh" +#include "prims/update_edge_src_dst_property.cuh" +#include "utilities/base_fixture.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/test_utilities.hpp" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +struct GraphColoring_UseCase { + bool check_correctness{true}; +}; + +template +class Tests_MGGraphColoring + : public ::testing::TestWithParam> { + public: + Tests_MGGraphColoring() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param) + { + auto [coloring_usecase, input_usecase] = param; + + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + constexpr bool multi_gpu = true; + + auto [mg_graph, mg_edge_weights, mg_renumber_map] = + cugraph::test::construct_graph( + *handle_, input_usecase, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; + + raft::random::RngState rng_state(multi_gpu ? handle_->get_comms().get_rank() : 0); + auto d_colors = + cugraph::vertex_coloring(*handle_, mg_graph_view, rng_state); + + // Test Graph Coloring + + if (coloring_usecase.check_correctness) { + std::vector h_colors(d_colors.size()); + raft::update_host(h_colors.data(), d_colors.data(), d_colors.size(), handle_->get_stream()); + + std::for_each(h_colors.begin(), + h_colors.end(), + [num_vertices = mg_graph_view.number_of_vertices()](vertex_t color_id) { + ASSERT_TRUE(color_id <= num_vertices); + }); + + using GraphViewType = cugraph::graph_view_t; + cugraph::edge_src_property_t src_color_cache(*handle_); + cugraph::edge_dst_property_t dst_color_cache(*handle_); + + if constexpr (multi_gpu) { + src_color_cache = + cugraph::edge_src_property_t(*handle_, mg_graph_view); + dst_color_cache = + cugraph::edge_dst_property_t(*handle_, mg_graph_view); + update_edge_src_property(*handle_, mg_graph_view, d_colors.begin(), src_color_cache); + update_edge_dst_property(*handle_, mg_graph_view, d_colors.begin(), dst_color_cache); + } + + rmm::device_uvector d_color_conflicts( + mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); + + per_v_transform_reduce_outgoing_e( + *handle_, + mg_graph_view, + multi_gpu + ? src_color_cache.view() + : cugraph::detail::edge_major_property_view_t(d_colors.data()), + multi_gpu ? dst_color_cache.view() + : cugraph::detail::edge_minor_property_view_t( + d_colors.data(), vertex_t{0}), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + if ((src != dst) && (src_color == dst_color)) { + return uint8_t{1}; + } else { + return uint8_t{0}; + } + }, + uint8_t{0}, + cugraph::reduce_op::maximum{}, + d_color_conflicts.begin()); + + std::vector h_color_conflicts(d_color_conflicts.size()); + raft::update_host(h_color_conflicts.data(), + d_color_conflicts.data(), + d_color_conflicts.size(), + handle_->get_stream()); + + std::vector h_vertices_in_this_proces((*mg_renumber_map).size()); + + raft::update_host(h_vertices_in_this_proces.data(), + (*mg_renumber_map).data(), + (*mg_renumber_map).size(), + handle_->get_stream()); + handle_->sync_stream(); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + size_t nr_conflicts = cugraph::transform_reduce_e( + *handle_, + mg_graph_view, + multi_gpu ? src_color_cache.view() + : cugraph::detail::edge_major_property_view_t( + d_colors.begin()), + multi_gpu ? dst_color_cache.view() + : cugraph::detail::edge_minor_property_view_t( + d_colors.begin(), vertex_t{0}), + cugraph::edge_dummy_property_t{}.view(), + [renumber_map = (*mg_renumber_map).data()] __device__( + auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + if ((src != dst) && (src_color == dst_color)) { + return vertex_t{1}; + } else { + return vertex_t{0}; + } + }, + vertex_t{0}); + + ASSERT_TRUE(nr_conflicts == edge_t{0}) + << "adjacent vertices can't have same color." << std::endl; + + { + thrust::for_each( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple( + h_colors.begin(), h_vertices_in_this_proces.begin(), h_color_conflicts.begin())), + thrust::make_zip_iterator(thrust::make_tuple( + h_colors.end(), h_vertices_in_this_proces.end(), h_color_conflicts.end())), + [](auto color_vetex_and_conflict_flag) { + auto color = thrust::get<0>(color_vetex_and_conflict_flag); + auto v = thrust::get<1>(color_vetex_and_conflict_flag); + auto conflict_flag = thrust::get<2>(color_vetex_and_conflict_flag); + ASSERT_TRUE(conflict_flag == 0) + << v << " got same color as one of its neighbor" << std::endl; + }); + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGGraphColoring::handle_ = nullptr; + +using Tests_MGGraphColoring_File = Tests_MGGraphColoring; +using Tests_MGGraphColoring_Rmat = Tests_MGGraphColoring; + +TEST_P(Tests_MGGraphColoring_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGGraphColoring_File, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGGraphColoring_File, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGGraphColoring_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGGraphColoring_Rmat, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_MGGraphColoring_Rmat, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +bool constexpr check_correctness = false; + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGGraphColoring_File, + ::testing::Combine(::testing::Values(GraphColoring_UseCase{check_correctness}, + GraphColoring_UseCase{check_correctness}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGGraphColoring_Rmat, + ::testing::Combine( + ::testing::Values(GraphColoring_UseCase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(3, 4, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGGraphColoring_Rmat, + ::testing::Combine( + ::testing::Values(GraphColoring_UseCase{check_correctness}, + GraphColoring_UseCase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/mis_test.cu b/cpp/tests/components/mis_test.cu new file mode 100644 index 00000000000..f3bdd3d0e8b --- /dev/null +++ b/cpp/tests/components/mis_test.cu @@ -0,0 +1,240 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" +#include "prims/reduce_op.cuh" +#include "utilities/base_fixture.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/test_utilities.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +struct MaximalIndependentSet_Usecase { + bool check_correctness{true}; +}; + +template +class Tests_SGMaximalIndependentSet + : public ::testing::TestWithParam> { + public: + Tests_SGMaximalIndependentSet() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param) + { + auto [mis_usecase, input_usecase] = param; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + hr_timer.start("Construct graph"); + } + + constexpr bool multi_gpu = false; + + auto [sg_graph, sg_edge_weights, sg_renumber_map] = + cugraph::test::construct_graph( + handle, input_usecase, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto sg_graph_view = sg_graph.view(); + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + + raft::random::RngState rng_state(0); + auto d_mis = cugraph::maximal_independent_set( + handle, sg_graph_view, rng_state); + + // Test MIS + if (mis_usecase.check_correctness) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + std::vector h_mis(d_mis.size()); + raft::update_host(h_mis.data(), d_mis.data(), d_mis.size(), handle.get_stream()); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + auto vertex_first = sg_graph_view.local_vertex_partition_range_first(); + auto vertex_last = sg_graph_view.local_vertex_partition_range_last(); + + std::for_each(h_mis.begin(), h_mis.end(), [vertex_first, vertex_last](vertex_t v) { + ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); + }); + + // If a vertex is included in MIS, then none of its neighbor should be + + vertex_t local_vtx_partitoin_size = sg_graph_view.local_vertex_partition_range_size(); + rmm::device_uvector d_total_outgoing_nbrs_included_mis(local_vtx_partitoin_size, + handle.get_stream()); + + rmm::device_uvector inclusiong_flags(local_vtx_partitoin_size, handle.get_stream()); + + thrust::uninitialized_fill( + handle.get_thrust_policy(), inclusiong_flags.begin(), inclusiong_flags.end(), vertex_t{0}); + + thrust::for_each( + handle.get_thrust_policy(), + d_mis.begin(), + d_mis.end(), + [inclusiong_flags = + raft::device_span(inclusiong_flags.data(), inclusiong_flags.size()), + v_first = sg_graph_view.local_vertex_partition_range_first()] __device__(auto v) { + auto v_offset = v - v_first; + inclusiong_flags[v_offset] = vertex_t{1}; + }); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + per_v_transform_reduce_outgoing_e( + handle, + sg_graph_view, + cugraph::detail::edge_major_property_view_t( + inclusiong_flags.data()), + cugraph::detail::edge_minor_property_view_t( + inclusiong_flags.data(), vertex_t{0}), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_included, auto dst_included, auto wt) { + return (src == dst) ? 0 : dst_included; + }, + vertex_t{0}, + cugraph::reduce_op::plus{}, + d_total_outgoing_nbrs_included_mis.begin()); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + std::vector h_total_outgoing_nbrs_included_mis( + d_total_outgoing_nbrs_included_mis.size()); + raft::update_host(h_total_outgoing_nbrs_included_mis.data(), + d_total_outgoing_nbrs_included_mis.data(), + d_total_outgoing_nbrs_included_mis.size(), + handle.get_stream()); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + { + auto vertex_first = sg_graph_view.local_vertex_partition_range_first(); + auto vertex_last = sg_graph_view.local_vertex_partition_range_last(); + + std::for_each(h_mis.begin(), + h_mis.end(), + [vertex_first, vertex_last, &h_total_outgoing_nbrs_included_mis](vertex_t v) { + ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)) + << v << " is not within vertex parition range" << std::endl; + + ASSERT_TRUE(h_total_outgoing_nbrs_included_mis[v - vertex_first] == 0) + << v << "'s neighbor is included in MIS" << std::endl; + }); + } + } + } +}; + +using Tests_SGMaximalIndependentSet_File = + Tests_SGMaximalIndependentSet; +using Tests_SGMaximalIndependentSet_Rmat = + Tests_SGMaximalIndependentSet; + +TEST_P(Tests_SGMaximalIndependentSet_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGMaximalIndependentSet_File, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGMaximalIndependentSet_File, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGMaximalIndependentSet_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGMaximalIndependentSet_Rmat, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGMaximalIndependentSet_Rmat, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +bool constexpr check_correctness = false; +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_SGMaximalIndependentSet_File, + ::testing::Combine(::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, + MaximalIndependentSet_Usecase{check_correctness}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_SGMaximalIndependentSet_Rmat, + ::testing::Combine( + ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(3, 4, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_SGMaximalIndependentSet_Rmat, + ::testing::Combine( + ::testing::Values(MaximalIndependentSet_Usecase{check_correctness}, + MaximalIndependentSet_Usecase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/components/vertex_coloring_test.cu b/cpp/tests/components/vertex_coloring_test.cu new file mode 100644 index 00000000000..27a0c5013bd --- /dev/null +++ b/cpp/tests/components/vertex_coloring_test.cu @@ -0,0 +1,244 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ + +#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" +#include "prims/reduce_op.cuh" +#include "prims/transform_reduce_e.cuh" +#include "utilities/base_fixture.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/test_utilities.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include + +struct GraphColoring_UseCase { + bool check_correctness{true}; +}; + +template +class Tests_SGGraphColoring + : public ::testing::TestWithParam> { + public: + Tests_SGGraphColoring() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param) + { + auto [coloring_usecase, input_usecase] = param; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + hr_timer.start("Construct graph"); + } + + constexpr bool multi_gpu = false; + + auto [sg_graph, sg_edge_weights, sg_renumber_map] = + cugraph::test::construct_graph( + handle, input_usecase, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto sg_graph_view = sg_graph.view(); + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + + raft::random::RngState rng_state(0); + auto d_colors = + cugraph::vertex_coloring(handle, sg_graph_view, rng_state); + + // Test Graph Coloring + + if (coloring_usecase.check_correctness) { + std::vector h_colors(d_colors.size()); + raft::update_host(h_colors.data(), d_colors.data(), d_colors.size(), handle.get_stream()); + + std::for_each(h_colors.begin(), + h_colors.end(), + [num_vertices = sg_graph_view.number_of_vertices()](vertex_t color_id) { + ASSERT_TRUE(color_id <= num_vertices); + }); + + rmm::device_uvector d_color_conflict_flags( + sg_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + + per_v_transform_reduce_outgoing_e( + handle, + sg_graph_view, + cugraph::detail::edge_major_property_view_t(d_colors.data()), + cugraph::detail::edge_minor_property_view_t(d_colors.data(), + vertex_t{0}), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + if ((src != dst) && (src_color == dst_color)) { + return uint8_t{1}; + } else { + return uint8_t{0}; + } + }, + uint8_t{0}, + cugraph::reduce_op::maximum{}, + d_color_conflict_flags.begin()); + + std::vector h_color_conflict_flags(d_color_conflict_flags.size()); + raft::update_host(h_color_conflict_flags.data(), + d_color_conflict_flags.data(), + d_color_conflict_flags.size(), + handle.get_stream()); + + std::vector h_vertices_in_this_proces((*sg_renumber_map).size()); + + raft::update_host(h_vertices_in_this_proces.data(), + (*sg_renumber_map).data(), + (*sg_renumber_map).size(), + handle.get_stream()); + handle.sync_stream(); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + edge_t nr_conflicts = cugraph::transform_reduce_e( + handle, + sg_graph_view, + cugraph::detail::edge_major_property_view_t(d_colors.begin()), + cugraph::detail::edge_minor_property_view_t(d_colors.begin(), + vertex_t{0}), + cugraph::edge_dummy_property_t{}.view(), + [renumber_map = (*sg_renumber_map).data()] __device__( + auto src, auto dst, auto src_color, auto dst_color, thrust::nullopt_t) { + if ((src != dst) && (src_color == dst_color)) { + return vertex_t{1}; + } else { + return vertex_t{0}; + } + }, + vertex_t{0}); + + ASSERT_TRUE(nr_conflicts == edge_t{0}) + << "adjacent vertices can't have same color." << std::endl; + + if (nr_conflicts >= 0) { + thrust::for_each( + thrust::host, + thrust::make_zip_iterator(thrust::make_tuple( + h_colors.begin(), h_vertices_in_this_proces.begin(), h_color_conflict_flags.begin())), + thrust::make_zip_iterator(thrust::make_tuple( + h_colors.end(), h_vertices_in_this_proces.end(), h_color_conflict_flags.end())), + [](auto color_vetex_and_conflict_flag) { + auto color = thrust::get<0>(color_vetex_and_conflict_flag); + auto v = thrust::get<1>(color_vetex_and_conflict_flag); + auto conflict_flag = thrust::get<2>(color_vetex_and_conflict_flag); + ASSERT_TRUE(conflict_flag == 0) + << v << " got same color as one of its neighbor" << std::endl; + }); + } + } + } +}; + +using Tests_SGGraphColoring_File = Tests_SGGraphColoring; +using Tests_SGGraphColoring_Rmat = Tests_SGGraphColoring; + +TEST_P(Tests_SGGraphColoring_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGGraphColoring_File, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGGraphColoring_File, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGGraphColoring_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGGraphColoring_Rmat, CheckInt32Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SGGraphColoring_Rmat, CheckInt64Int64FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +bool constexpr check_correctness = false; + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_SGGraphColoring_File, + ::testing::Combine(::testing::Values(GraphColoring_UseCase{check_correctness}, + GraphColoring_UseCase{check_correctness}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_SGGraphColoring_Rmat, + ::testing::Combine( + ::testing::Values(GraphColoring_UseCase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(3, 4, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_SGGraphColoring_Rmat, + ::testing::Combine( + ::testing::Values(GraphColoring_UseCase{check_correctness}, + GraphColoring_UseCase{check_correctness}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() From fdc6aa5acde918e50b6f6f365cc1ec03fcfb18ec Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 12 Mar 2024 19:01:26 -0400 Subject: [PATCH 4/5] Add degrees to C API (#4212) Add new method `cugraph_degrees` to the C API to compute and return the degrees of vertices. Closes #4171 Authors: - Chuck Hastings (https://github.com/ChuckHastings) - Joseph Nke (https://github.com/jnke2016) - Ralph Liu (https://github.com/nv-rliu) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4212 --- cpp/CMakeLists.txt | 2 + cpp/include/cugraph_c/graph_functions.h | 112 +++++ cpp/src/c_api/abstract_functor.hpp | 2 +- cpp/src/c_api/degrees.cu | 225 ++++++++++ cpp/src/c_api/degrees_result.cpp | 63 +++ cpp/src/c_api/degrees_result.hpp | 32 ++ cpp/tests/CMakeLists.txt | 2 + cpp/tests/c_api/c_test_utils.h | 4 +- cpp/tests/c_api/degrees_test.c | 387 +++++++++++++++++ cpp/tests/c_api/mg_degrees_test.c | 407 ++++++++++++++++++ cpp/tests/c_api/test_utils.cpp | 14 +- .../simpleDistributedGraph.py | 307 +++++++------ .../graph_implementation/simpleGraph.py | 198 +++++---- .../centrality/test_degree_centrality_mg.py | 4 +- .../pylibcugraph/pylibcugraph/CMakeLists.txt | 3 +- python/pylibcugraph/pylibcugraph/__init__.py | 2 + .../_cugraph_c/graph_functions.pxd | 55 +++ python/pylibcugraph/pylibcugraph/degrees.pyx | 307 +++++++++++++ 18 files changed, 1905 insertions(+), 221 deletions(-) create mode 100644 cpp/src/c_api/degrees.cu create mode 100644 cpp/src/c_api/degrees_result.cpp create mode 100644 cpp/src/c_api/degrees_result.hpp create mode 100644 cpp/tests/c_api/degrees_test.c create mode 100644 cpp/tests/c_api/mg_degrees_test.c create mode 100644 python/pylibcugraph/pylibcugraph/degrees.pyx diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index b12403710ab..88908ef70ce 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -415,6 +415,8 @@ endif() add_library(cugraph_c src/c_api/resource_handle.cpp src/c_api/array.cpp + src/c_api/degrees.cu + src/c_api/degrees_result.cpp src/c_api/error.cpp src/c_api/graph_sg.cpp src/c_api/graph_mg.cpp diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index 8fe1ea0b958..94b06189796 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -229,6 +229,118 @@ cugraph_error_code_t cugraph_allgather(const cugraph_resource_handle_t* handle, cugraph_induced_subgraph_result_t** result, cugraph_error_t** error); +/** + * @brief Opaque degree result type + */ +typedef struct { + int32_t align_; +} cugraph_degrees_result_t; + +/** + * @brief Compute in degrees + * + * Compute the in degrees for the vertices in the graph. + * + * @param [in] handle Handle for accessing resources. + * @param [in] graph Pointer to graph + * @param [in] source_vertices Device array of vertices we want to compute in degrees for. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * true) + * @param [out] result Opaque pointer to degrees result + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_in_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error); + +/** + * @brief Compute out degrees + * + * Compute the out degrees for the vertices in the graph. + * + * @param [in] handle Handle for accessing resources. + * @param [in] graph Pointer to graph + * @param [in] source_vertices Device array of vertices we want to compute out degrees for. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * true) + * @param [out] result Opaque pointer to degrees result + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_out_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error); + +/** + * @brief Compute degrees + * + * Compute the degrees for the vertices in the graph. + * + * @param [in] handle Handle for accessing resources. + * @param [in] graph Pointer to graph + * @param [in] source_vertices Device array of vertices we want to compute degrees for. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * true) + * @param [out] result Opaque pointer to degrees result + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_degrees(const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error); + +/** + * @brief Get the vertex ids + * + * @param [in] degrees_result Opaque pointer to degree result + * @return type erased array view of vertex ids + */ +cugraph_type_erased_device_array_view_t* cugraph_degrees_result_get_vertices( + cugraph_degrees_result_t* degrees_result); + +/** + * @brief Get the in degrees + * + * @param [in] degrees_result Opaque pointer to degree result + * @return type erased array view of vertex ids + */ +cugraph_type_erased_device_array_view_t* cugraph_degrees_result_get_in_degrees( + cugraph_degrees_result_t* degrees_result); + +/** + * @brief Get the out degrees + * + * If the graph is symmetric, in degrees and out degrees will be equal (and + * will be stored in the same memory). + * + * @param [in] degrees_result Opaque pointer to degree result + * @return type erased array view of vertex ids + */ +cugraph_type_erased_device_array_view_t* cugraph_degrees_result_get_out_degrees( + cugraph_degrees_result_t* degrees_result); + +/** + * @brief Free degree result + * + * @param [in] degrees_result Opaque pointer to degree result + */ +void cugraph_degrees_result_free(cugraph_degrees_result_t* degrees_result); + #ifdef __cplusplus } #endif diff --git a/cpp/src/c_api/abstract_functor.hpp b/cpp/src/c_api/abstract_functor.hpp index 219b1256065..8d3ed11341f 100644 --- a/cpp/src/c_api/abstract_functor.hpp +++ b/cpp/src/c_api/abstract_functor.hpp @@ -27,7 +27,7 @@ namespace c_api { struct abstract_functor { // Move to abstract functor... make operator a void, add cugraph_graph_t * result to functor // try that with instantiation questions - std::unique_ptr error_{std::make_unique("")}; + std::unique_ptr error_ = {std::make_unique("")}; cugraph_error_code_t error_code_{CUGRAPH_SUCCESS}; void unsupported() diff --git a/cpp/src/c_api/degrees.cu b/cpp/src/c_api/degrees.cu new file mode 100644 index 00000000000..d6481efa905 --- /dev/null +++ b/cpp/src/c_api/degrees.cu @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_api/abstract_functor.hpp" +#include "c_api/degrees_result.hpp" +#include "c_api/graph.hpp" +#include "c_api/resource_handle.hpp" +#include "c_api/utils.hpp" + +#include + +#include +#include +#include +#include +#include + +#include + +#include + +namespace { + +struct degrees_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_graph_t* graph_{}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* source_vertices_; + bool in_degrees_{false}; + bool out_degrees_{false}; + bool do_expensive_check_{false}; + cugraph::c_api::cugraph_degrees_result_t* result_{}; + + degrees_functor(cugraph_resource_handle_t const* handle, + cugraph_graph_t* graph, + ::cugraph_type_erased_device_array_view_t const* source_vertices, + bool in_degrees, + bool out_degrees, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + source_vertices_( + reinterpret_cast( + source_vertices)), + in_degrees_{in_degrees}, + out_degrees_{out_degrees}, + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + // FIXME: Think about how to handle SG vice MG + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + auto graph = + reinterpret_cast*>( + graph_->graph_); + + auto graph_view = graph->view(); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + std::optional> in_degrees{std::nullopt}; + std::optional> out_degrees{std::nullopt}; + + if (in_degrees_ && out_degrees_ && graph_view.is_symmetric()) { + in_degrees = store_transposed ? graph_view.compute_in_degrees(handle_) + : graph_view.compute_out_degrees(handle_); + // out_degrees will be extracted from in_degrees in the result + } else { + if (in_degrees_) in_degrees = graph_view.compute_in_degrees(handle_); + + if (out_degrees_) out_degrees = graph_view.compute_out_degrees(handle_); + } + + rmm::device_uvector vertex_ids(0, handle_.get_stream()); + + if (source_vertices_) { + // FIXME: Would be more efficient if graph_view.compute_*_degrees could take a vertex + // subset + vertex_ids.resize(source_vertices_->size_, handle_.get_stream()); + raft::copy(vertex_ids.data(), + source_vertices_->as_type(), + vertex_ids.size(), + handle_.get_stream()); + + if constexpr (multi_gpu) { + vertex_ids = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( + handle_, std::move(vertex_ids)); + } + + cugraph::renumber_ext_vertices( + handle_, + vertex_ids.data(), + vertex_ids.size(), + number_map->data(), + graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last(), + do_expensive_check_); + + auto vertex_partition = cugraph::vertex_partition_device_view_t( + graph_view.local_vertex_partition_view()); + + auto vertices_iter = thrust::make_transform_iterator( + vertex_ids.begin(), + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { + return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); + })); + + if (in_degrees && out_degrees) { + rmm::device_uvector tmp_in_degrees(vertex_ids.size(), handle_.get_stream()); + rmm::device_uvector tmp_out_degrees(vertex_ids.size(), handle_.get_stream()); + thrust::gather( + handle_.get_thrust_policy(), + vertices_iter, + vertices_iter + vertex_ids.size(), + thrust::make_zip_iterator(in_degrees->begin(), out_degrees->begin()), + thrust::make_zip_iterator(tmp_in_degrees.begin(), tmp_out_degrees.begin())); + in_degrees = std::move(tmp_in_degrees); + out_degrees = std::move(tmp_out_degrees); + } else if (in_degrees) { + rmm::device_uvector tmp_in_degrees(vertex_ids.size(), handle_.get_stream()); + thrust::gather(handle_.get_thrust_policy(), + vertices_iter, + vertices_iter + vertex_ids.size(), + in_degrees->begin(), + tmp_in_degrees.begin()); + in_degrees = std::move(tmp_in_degrees); + } else { + rmm::device_uvector tmp_out_degrees(vertex_ids.size(), handle_.get_stream()); + thrust::gather(handle_.get_thrust_policy(), + vertices_iter, + vertices_iter + vertex_ids.size(), + out_degrees->begin(), + tmp_out_degrees.begin()); + out_degrees = std::move(tmp_out_degrees); + } + + cugraph::unrenumber_local_int_vertices( + handle_, + vertex_ids.data(), + vertex_ids.size(), + number_map->data(), + graph_view.local_vertex_partition_range_first(), + graph_view.local_vertex_partition_range_last(), + do_expensive_check_); + } else { + vertex_ids.resize(graph_view.local_vertex_partition_range_size(), handle_.get_stream()); + raft::copy(vertex_ids.data(), number_map->data(), vertex_ids.size(), handle_.get_stream()); + } + + result_ = new cugraph::c_api::cugraph_degrees_result_t{ + graph_view.is_symmetric(), + new cugraph::c_api::cugraph_type_erased_device_array_t(vertex_ids, graph_->vertex_type_), + in_degrees + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*in_degrees, graph_->edge_type_) + : nullptr, + out_degrees + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*out_degrees, graph_->edge_type_) + : nullptr}; + } + } +}; + +} // namespace + +extern "C" cugraph_error_code_t cugraph_in_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error) +{ + degrees_functor functor(handle, graph, source_vertices, true, false, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_out_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error) +{ + degrees_functor functor(handle, graph, source_vertices, false, true, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error) +{ + degrees_functor functor(handle, graph, source_vertices, true, true, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/c_api/degrees_result.cpp b/cpp/src/c_api/degrees_result.cpp new file mode 100644 index 00000000000..a4649e36d05 --- /dev/null +++ b/cpp/src/c_api/degrees_result.cpp @@ -0,0 +1,63 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_api/degrees_result.hpp" + +#include + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_degrees_result_get_vertices( + cugraph_degrees_result_t* degrees_result) +{ + auto internal_pointer = + reinterpret_cast(degrees_result); + return reinterpret_cast( + internal_pointer->vertex_ids_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_degrees_result_get_in_degrees( + cugraph_degrees_result_t* degrees_result) +{ + auto internal_pointer = + reinterpret_cast(degrees_result); + return internal_pointer->in_degrees_ == nullptr + ? nullptr + : reinterpret_cast( + internal_pointer->in_degrees_->view()); +} + +extern "C" cugraph_type_erased_device_array_view_t* cugraph_degrees_result_get_out_degrees( + cugraph_degrees_result_t* degrees_result) +{ + auto internal_pointer = + reinterpret_cast(degrees_result); + return internal_pointer->out_degrees_ != nullptr + ? reinterpret_cast( + internal_pointer->out_degrees_->view()) + : internal_pointer->is_symmetric + ? reinterpret_cast( + internal_pointer->in_degrees_->view()) + : nullptr; +} + +extern "C" void cugraph_degrees_result_free(cugraph_degrees_result_t* degrees_result) +{ + auto internal_pointer = + reinterpret_cast(degrees_result); + delete internal_pointer->vertex_ids_; + delete internal_pointer->in_degrees_; + delete internal_pointer->out_degrees_; + delete internal_pointer; +} diff --git a/cpp/src/c_api/degrees_result.hpp b/cpp/src/c_api/degrees_result.hpp new file mode 100644 index 00000000000..c6e9bffa5a1 --- /dev/null +++ b/cpp/src/c_api/degrees_result.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include "c_api/array.hpp" + +namespace cugraph { +namespace c_api { + +struct cugraph_degrees_result_t { + bool is_symmetric{false}; + cugraph_type_erased_device_array_t* vertex_ids_{}; + cugraph_type_erased_device_array_t* in_degrees_{}; + cugraph_type_erased_device_array_t* out_degrees_{}; +}; + +} // namespace c_api +} // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index af0dffcbf65..c84711e1a69 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -696,6 +696,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ConfigureCTestMG(MG_CAPI_SIMILARITY_TEST c_api/mg_similarity_test.c) ConfigureCTestMG(MG_CAPI_K_CORE_TEST c_api/mg_k_core_test.c) ConfigureCTestMG(MG_CAPI_INDUCED_SUBGRAPH_TEST c_api/mg_induced_subgraph_test.c) + ConfigureCTestMG(MG_CAPI_DEGREES c_api/mg_degrees_test.c) ConfigureCTestMG(MG_CAPI_EGONET_TEST c_api/mg_egonet_test.c) ConfigureCTestMG(MG_CAPI_TWO_HOP_NEIGHBORS_TEST c_api/mg_two_hop_neighbors_test.c) @@ -764,6 +765,7 @@ ConfigureCTest(CAPI_CORE_NUMBER_TEST c_api/core_number_test.c) ConfigureCTest(CAPI_SIMILARITY_TEST c_api/similarity_test.c) ConfigureCTest(CAPI_K_CORE_TEST c_api/k_core_test.c) ConfigureCTest(CAPI_INDUCED_SUBGRAPH_TEST c_api/induced_subgraph_test.c) +ConfigureCTest(CAPI_DEGREES c_api/degrees_test.c) ConfigureCTest(CAPI_EGONET_TEST c_api/egonet_test.c) ConfigureCTest(CAPI_TWO_HOP_NEIGHBORS_TEST c_api/two_hop_neighbors_test.c) ConfigureCTest(CAPI_LEGACY_K_TRUSS_TEST c_api/legacy_k_truss_test.c) diff --git a/cpp/tests/c_api/c_test_utils.h b/cpp/tests/c_api/c_test_utils.h index ab9fbeccd4b..fbbf6333ee3 100644 --- a/cpp/tests/c_api/c_test_utils.h +++ b/cpp/tests/c_api/c_test_utils.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -101,6 +101,8 @@ int create_sg_test_graph(const cugraph_resource_handle_t* handle, cugraph_graph_t** graph, cugraph_error_t** ret_error); +size_t cugraph_size_t_allreduce(const cugraph_resource_handle_t* handle, size_t value); + #ifdef __cplusplus } #endif diff --git a/cpp/tests/c_api/degrees_test.c b/cpp/tests/c_api/degrees_test.c new file mode 100644 index 00000000000..10a038b323b --- /dev/null +++ b/cpp/tests/c_api/degrees_test.c @@ -0,0 +1,387 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "c_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +/* + * Simple check of creating a graph from a COO on device memory. + */ +int generic_degrees_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + vertex_t* h_vertices, + size_t num_vertices_to_compute, + bool_t in_degrees, + bool_t out_degrees, + bool_t store_transposed, + bool_t is_symmetric, + edge_t *h_in_degrees, + edge_t *h_out_degrees) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_degrees_result_t* result = NULL; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + ret_code = create_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, is_symmetric, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + if (h_vertices == NULL) { + if (in_degrees && out_degrees) { + ret_code = cugraph_degrees( + handle, graph, NULL, FALSE, &result, &ret_error); + } else if (in_degrees) { + ret_code = cugraph_in_degrees( + handle, graph, NULL, FALSE, &result, &ret_error); + } else { + ret_code = cugraph_out_degrees( + handle, graph, NULL, FALSE, &result, &ret_error); + } + + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_extract_degrees failed."); + } else { + cugraph_type_erased_device_array_t* vertices = NULL; + cugraph_type_erased_device_array_view_t* vertices_view = NULL; + + ret_code = + cugraph_type_erased_device_array_create(handle, num_vertices_to_compute, INT32, &vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds create failed."); + + vertices_view = cugraph_type_erased_device_array_view(vertices); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, vertices_view, (byte_t*)h_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + if (in_degrees && out_degrees) { + ret_code = cugraph_degrees( + handle, graph, vertices_view, FALSE, &result, &ret_error); + } else if (in_degrees) { + ret_code = cugraph_in_degrees( + handle, graph, vertices_view, FALSE, &result, &ret_error); + } else { + ret_code = cugraph_out_degrees( + handle, graph, vertices_view, FALSE, &result, &ret_error); + } + + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_extract_degrees failed."); + } + + cugraph_type_erased_device_array_view_t* result_vertices; + cugraph_type_erased_device_array_view_t* result_in_degrees; + cugraph_type_erased_device_array_view_t* result_out_degrees; + + result_vertices = cugraph_degrees_result_get_vertices(result); + result_in_degrees = cugraph_degrees_result_get_in_degrees(result); + result_out_degrees = cugraph_degrees_result_get_out_degrees(result); + + size_t num_result_vertices = cugraph_type_erased_device_array_view_size(result_vertices); + + vertex_t h_result_vertices[num_result_vertices]; + edge_t h_result_in_degrees[num_result_vertices]; + edge_t h_result_out_degrees[num_result_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_vertices, result_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + if (result_in_degrees != NULL) { + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_in_degrees, result_in_degrees, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + } + + if (result_out_degrees != NULL) { + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_out_degrees, result_out_degrees, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + } + + if (h_vertices != NULL) { + TEST_ASSERT(test_ret_value, num_result_vertices == num_vertices_to_compute, "results not the same size"); + } else { + TEST_ASSERT(test_ret_value, num_result_vertices == num_vertices, "results not the same size"); + } + + for (size_t i = 0; (i < num_result_vertices) && (test_ret_value == 0); ++i) { + if (h_in_degrees != NULL) { + TEST_ASSERT(test_ret_value, h_result_in_degrees[i] == h_in_degrees[h_result_vertices[i]], "in degree did not match"); + } + + if (h_out_degrees != NULL) { + TEST_ASSERT(test_ret_value, h_result_out_degrees[i] == h_out_degrees[h_result_vertices[i]], "out degree did not match"); + } + } + + cugraph_degrees_result_free(result); + cugraph_graph_free(graph); + cugraph_error_free(ret_error); + + return test_ret_value; +} + +int test_degrees() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_in_degrees[] = {1, 2, 0, 2, 1, 2}; + vertex_t h_out_degrees[] = {1, 2, 3, 1, 1, 0}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + TRUE, + TRUE, + FALSE, + FALSE, + h_in_degrees, + h_out_degrees); +} + +int test_degrees_symmetric() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_in_degrees[] = {2, 4, 3, 3, 2, 2}; + vertex_t h_out_degrees[] = {2, 4, 3, 3, 2, 2}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + TRUE, + TRUE, + FALSE, + TRUE, + h_in_degrees, + h_out_degrees); +} + +int test_in_degrees() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_in_degrees[] = {1, 2, 0, 2, 1, 2}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + TRUE, + FALSE, + FALSE, + TRUE, + h_in_degrees, + NULL); +} + +int test_out_degrees() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_out_degrees[] = {1, 2, 3, 1, 1, 0}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + FALSE, + TRUE, + FALSE, + TRUE, + NULL, + h_out_degrees); +} + +int test_degrees_subset() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_in_degrees[] = {-1, -1, 0, 2, -1, 2}; + vertex_t h_out_degrees[] = {-1, -1, 3, 1, -1, 0}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + TRUE, + TRUE, + FALSE, + FALSE, + h_in_degrees, + h_out_degrees); +} + +int test_degrees_symmetric_subset() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_in_degrees[] = {-1, -1, 3, 3, -1, 2}; + vertex_t h_out_degrees[] = {-1, -1, 3, 3, -1, 2}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + TRUE, + TRUE, + FALSE, + TRUE, + h_in_degrees, + h_out_degrees); +} + +int test_in_degrees_subset() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_in_degrees[] = {-1, -1, 0, 2, -1, 2}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + TRUE, + FALSE, + FALSE, + TRUE, + h_in_degrees, + NULL); +} + +int test_out_degrees_subset() +{ + size_t num_edges = 8; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_out_degrees[] = {-1, -1, 3, 1, -1, 0}; + + return generic_degrees_test(h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + FALSE, + TRUE, + FALSE, + TRUE, + NULL, + h_out_degrees); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + int result = 0; + result |= RUN_TEST(test_degrees); + result |= RUN_TEST(test_degrees_symmetric); + result |= RUN_TEST(test_in_degrees); + result |= RUN_TEST(test_out_degrees); + result |= RUN_TEST(test_degrees_subset); + result |= RUN_TEST(test_degrees_symmetric_subset); + result |= RUN_TEST(test_in_degrees_subset); + result |= RUN_TEST(test_out_degrees_subset); + return result; +} diff --git a/cpp/tests/c_api/mg_degrees_test.c b/cpp/tests/c_api/mg_degrees_test.c new file mode 100644 index 00000000000..3312dd4f5bb --- /dev/null +++ b/cpp/tests/c_api/mg_degrees_test.c @@ -0,0 +1,407 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "mg_test_utils.h" /* RUN_TEST */ + +#include +#include + +#include + +typedef int32_t vertex_t; +typedef int32_t edge_t; +typedef float weight_t; + +/* + * Simple check of creating a graph from a COO on device memory. + */ +int generic_degrees_test(const cugraph_resource_handle_t* handle, + vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + size_t num_vertices, + size_t num_edges, + vertex_t* h_vertices, + size_t num_vertices_to_compute, + bool_t in_degrees, + bool_t out_degrees, + bool_t store_transposed, + bool_t is_symmetric, + edge_t* h_in_degrees, + edge_t* h_out_degrees) +{ + int test_ret_value = 0; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_graph_t* graph = NULL; + cugraph_degrees_result_t* result = NULL; + + ret_code = create_mg_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, is_symmetric, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + if (h_vertices == NULL) { + if (in_degrees && out_degrees) { + ret_code = cugraph_degrees( + handle, graph, NULL, FALSE, &result, &ret_error); + } else if (in_degrees) { + ret_code = cugraph_in_degrees( + handle, graph, NULL, FALSE, &result, &ret_error); + } else { + ret_code = cugraph_out_degrees( + handle, graph, NULL, FALSE, &result, &ret_error); + } + + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_extract_degrees failed."); + } else { + cugraph_type_erased_device_array_t* vertices = NULL; + cugraph_type_erased_device_array_view_t* vertices_view = NULL; + + int rank = cugraph_resource_handle_get_rank(handle); + + size_t num_to_allocate = 0; + if (rank == 0) num_to_allocate = num_vertices_to_compute; + + ret_code = + cugraph_type_erased_device_array_create(handle, num_to_allocate, INT32, &vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "seeds create failed."); + + vertices_view = cugraph_type_erased_device_array_view(vertices); + + ret_code = cugraph_type_erased_device_array_view_copy_from_host( + handle, vertices_view, (byte_t*)h_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "src copy_from_host failed."); + + if (in_degrees && out_degrees) { + ret_code = cugraph_degrees( + handle, graph, vertices_view, FALSE, &result, &ret_error); + } else if (in_degrees) { + ret_code = cugraph_in_degrees( + handle, graph, vertices_view, FALSE, &result, &ret_error); + } else { + ret_code = cugraph_out_degrees( + handle, graph, vertices_view, FALSE, &result, &ret_error); + } + + TEST_ASSERT( + test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph_extract_degrees failed."); + } + + cugraph_type_erased_device_array_view_t* result_vertices; + cugraph_type_erased_device_array_view_t* result_in_degrees; + cugraph_type_erased_device_array_view_t* result_out_degrees; + + result_vertices = cugraph_degrees_result_get_vertices(result); + result_in_degrees = cugraph_degrees_result_get_in_degrees(result); + result_out_degrees = cugraph_degrees_result_get_out_degrees(result); + + size_t num_result_vertices = cugraph_type_erased_device_array_view_size(result_vertices); + + vertex_t h_result_vertices[num_result_vertices]; + edge_t h_result_in_degrees[num_result_vertices]; + edge_t h_result_out_degrees[num_result_vertices]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_vertices, result_vertices, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + if (result_in_degrees != NULL) { + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_in_degrees, result_in_degrees, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + } + + if (result_out_degrees != NULL) { + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_out_degrees, result_out_degrees, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + } + + if (h_vertices != NULL) { + size_t xxx = cugraph_size_t_allreduce(handle, num_result_vertices); + TEST_ASSERT(test_ret_value, cugraph_size_t_allreduce(handle, num_result_vertices) == num_vertices_to_compute, "results not the same size"); + } else { + size_t xxx = cugraph_size_t_allreduce(handle, num_result_vertices); + TEST_ASSERT(test_ret_value, cugraph_size_t_allreduce(handle, num_result_vertices) == num_vertices, "results not the same size"); + } + + for (size_t i = 0; (i < num_result_vertices) && (test_ret_value == 0); ++i) { + if (h_in_degrees != NULL) { + TEST_ASSERT(test_ret_value, h_result_in_degrees[i] == h_in_degrees[h_result_vertices[i]], "in degree did not match"); + } + + if (h_out_degrees != NULL) { + TEST_ASSERT(test_ret_value, h_result_out_degrees[i] == h_out_degrees[h_result_vertices[i]], "out degree did not match"); + } + } + + cugraph_degrees_result_free(result); + cugraph_graph_free(graph); + cugraph_error_free(ret_error); + return test_ret_value; +} + +int test_degrees(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_in_degrees[] = {1, 2, 0, 2, 1, 2}; + vertex_t h_out_degrees[] = {1, 2, 3, 1, 1, 0}; + + // Pagerank wants store_transposed = TRUE + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + TRUE, + TRUE, + TRUE, + FALSE, + h_in_degrees, + h_out_degrees); +} + +int test_degrees_symmetric(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 16; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_in_degrees[] = {2, 4, 3, 3, 2, 2}; + vertex_t h_out_degrees[] = {2, 4, 3, 3, 2, 2}; + + // Pagerank wants store_transposed = TRUE + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + TRUE, + TRUE, + TRUE, + TRUE, + h_in_degrees, + h_out_degrees); +} + +int test_in_degrees(const cugraph_resource_handle_t *handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_in_degrees[] = {1, 2, 0, 2, 1, 2}; + + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + TRUE, + FALSE, + FALSE, + TRUE, + h_in_degrees, + NULL); +} + +int test_out_degrees(const cugraph_resource_handle_t *handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_out_degrees[] = {1, 2, 3, 1, 1, 0}; + + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + NULL, + 0, + FALSE, + TRUE, + FALSE, + TRUE, + NULL, + h_out_degrees); +} + +int test_degrees_subset(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_in_degrees[] = {-1, -1, 0, 2, -1, 2}; + vertex_t h_out_degrees[] = {-1, -1, 3, 1, -1, 0}; + + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + TRUE, + TRUE, + FALSE, + FALSE, + h_in_degrees, + h_out_degrees); +} + +int test_degrees_symmetric_subset(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f, + 0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_in_degrees[] = {-1, -1, 3, 3, -1, 2}; + vertex_t h_out_degrees[] = {-1, -1, 3, 3, -1, 2}; + + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + TRUE, + TRUE, + FALSE, + TRUE, + h_in_degrees, + h_out_degrees); +} + +int test_in_degrees_subset(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_in_degrees[] = {-1, -1, 0, 2, -1, 2}; + + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + TRUE, + FALSE, + FALSE, + TRUE, + h_in_degrees, + NULL); +} + +int test_out_degrees_subset(const cugraph_resource_handle_t* handle) +{ + size_t num_edges = 8; + size_t num_vertices = 6; + size_t num_vertices_to_compute = 3; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_vertices[] = {2, 3, 5}; + vertex_t h_out_degrees[] = {-1, -1, 3, 1, -1, 0}; + + return generic_degrees_test(handle, + h_src, + h_dst, + h_wgt, + num_vertices, + num_edges, + h_vertices, + num_vertices_to_compute, + FALSE, + TRUE, + FALSE, + TRUE, + NULL, + h_out_degrees); +} + +/******************************************************************************/ + +int main(int argc, char** argv) +{ + void* raft_handle = create_mg_raft_handle(argc, argv); + cugraph_resource_handle_t* handle = cugraph_create_resource_handle(raft_handle); + + int result = 0; + result |= RUN_MG_TEST(test_degrees, handle); + result |= RUN_MG_TEST(test_degrees_symmetric, handle); + result |= RUN_MG_TEST(test_in_degrees, handle); + result |= RUN_MG_TEST(test_out_degrees, handle); + result |= RUN_MG_TEST(test_degrees_subset, handle); + result |= RUN_MG_TEST(test_degrees_symmetric_subset, handle); + result |= RUN_MG_TEST(test_in_degrees_subset, handle); + result |= RUN_MG_TEST(test_out_degrees_subset, handle); + + cugraph_free_resource_handle(handle); + free_mg_raft_handle(raft_handle); + + return result; +} diff --git a/cpp/tests/c_api/test_utils.cpp b/cpp/tests/c_api/test_utils.cpp index e37cc4555dd..3013cbb7cc6 100644 --- a/cpp/tests/c_api/test_utils.cpp +++ b/cpp/tests/c_api/test_utils.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,9 @@ */ #include "c_test_utils.h" +#include "c_api/resource_handle.hpp" + +#include #include @@ -388,3 +391,12 @@ int create_sg_test_graph(const cugraph_resource_handle_t* handle, return test_ret_value; } + +extern "C" size_t cugraph_size_t_allreduce(const cugraph_resource_handle_t* handle, size_t value) +{ + auto internal_handle = reinterpret_cast(handle); + return cugraph::host_scalar_allreduce(internal_handle->handle_->get_comms(), + value, + raft::comms::op_t::SUM, + internal_handle->handle_->get_stream()); +} diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index cdf1e937e67..0ef5eaf1b9e 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -12,7 +12,7 @@ # limitations under the License. import gc -from typing import Union +from typing import Union, Iterable import warnings import cudf @@ -28,10 +28,11 @@ GraphProperties, get_two_hop_neighbors as pylibcugraph_get_two_hop_neighbors, select_random_vertices as pylibcugraph_select_random_vertices, + degrees as pylibcugraph_degrees, + in_degrees as pylibcugraph_in_degrees, + out_degrees as pylibcugraph_out_degrees, ) -from cugraph.structure import graph_primtypes_wrapper -from cugraph.structure.graph_primtypes_wrapper import Direction from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize from cugraph.dask.common.part_utils import ( @@ -536,7 +537,158 @@ def number_of_edges(self, directed_edges=False): raise RuntimeError("Graph is Empty") return self.properties.edge_count - def in_degree(self, vertex_subset=None): + def degrees_function( + self, + vertex_subset: Union[cudf.Series, dask_cudf.Series, Iterable] = None, + degree_type: str = "in_degree", + ) -> dask_cudf.DataFrame: + """ + Compute vertex in-degree, out-degree, degree and degrees. + + 1) Vertex in-degree is the number of edges pointing into the vertex. + 2) Vertex out-degree is the number of edges pointing out from the vertex. + 3) Vertex degree, is the total number of edges incident to a vertex + (both in and out edges) + 4) Vertex degrees computes vertex in-degree and out-degree. + + By default, this method computes vertex in-degree, out-degree, degree + or degrees for the entire set of vertices. If vertex_subset is provided, + this method optionally filters out all but those listed in + vertex_subset. + + Parameters + ---------- + vertex_subset : cudf.Series or dask_cudf.Series, iterable container, optional + A container of vertices for displaying corresponding in-degree. + If not set, degrees are computed for the entire set of vertices. + + degree_type : str (default='in_degree') + + Returns + ------- + df : dask_cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the in_degree, out_degrees, + degree or degrees. The ordering is relative to the adjacency list, + or that given by the specified vertex_subset. + + Examples + -------- + >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', + ... dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.degrees_function([0,9,12], "in_degree") + + """ + _client = default_client() + + def _call_plc_degrees_function( + sID: bytes, mg_graph_x, source_vertices: cudf.Series, degree_type: str + ) -> cp.array: + + if degree_type == "in_degree": + results = pylibcugraph_in_degrees( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + source_vertices=source_vertices, + do_expensive_check=False, + ) + elif degree_type == "out_degree": + results = pylibcugraph_out_degrees( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + source_vertices=source_vertices, + do_expensive_check=False, + ) + elif degree_type in ["degree", "degrees"]: + results = pylibcugraph_degrees( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + source_vertices=source_vertices, + do_expensive_check=False, + ) + else: + raise ValueError( + "Incorrect degree type passed, valid values are ", + "'in_degree', 'out_degree', 'degree' and 'degrees' ", + f"got '{degree_type}'", + ) + + return results + + if isinstance(vertex_subset, int): + vertex_subset = [vertex_subset] + + if isinstance(vertex_subset, list): + vertex_subset = cudf.Series(vertex_subset) + + if vertex_subset is not None: + if self.renumbered: + vertex_subset = self.renumber_map.to_internal_vertex_id(vertex_subset) + vertex_subset_type = self.edgelist.edgelist_df.dtypes.iloc[0] + else: + vertex_subset_type = self.input_df.dtypes.iloc[0] + + vertex_subset = vertex_subset.astype(vertex_subset_type) + + cupy_result = [ + _client.submit( + _call_plc_degrees_function, + Comms.get_session_id(), + self._plc_graph[w], + vertex_subset, + degree_type, + workers=[w], + allow_other_workers=False, + ) + for w in Comms.get_workers() + ] + + wait(cupy_result) + + def convert_to_cudf(cp_arrays: cp.ndarray, degree_type: bool) -> cudf.DataFrame: + """ + Creates a cudf DataFrame from cupy arrays from pylibcugraph wrapper + """ + df = cudf.DataFrame() + df["vertex"] = cp_arrays[0] + if degree_type in ["in_degree", "out_degree"]: + df["degree"] = cp_arrays[1] + # degree_type must be either 'degree' or 'degrees' + else: + if degree_type == "degrees": + df["in_degree"] = cp_arrays[1] + df["out_degree"] = cp_arrays[2] + else: + df["degree"] = cp_arrays[1] + cp_arrays[2] + return df + + cudf_result = [ + _client.submit( + convert_to_cudf, + cp_arrays, + degree_type, + workers=_client.who_has(cp_arrays)[cp_arrays.key], + ) + for cp_arrays in cupy_result + ] + + wait(cudf_result) + ddf = dask_cudf.from_delayed(cudf_result).persist() + wait(ddf) + + # Wait until the inactive futures are released + wait([(r.release(), c_r.release()) for r, c_r in zip(cupy_result, cudf_result)]) + + if self.properties.renumbered: + ddf = self.renumber_map.unrenumber(ddf, "vertex") + + return ddf + + def in_degree( + self, vertex_subset: Union[cudf.Series, dask_cudf.Series, Iterable] = None + ) -> dask_cudf.DataFrame: """ Compute vertex in-degree. Vertex in-degree is the number of edges pointing into the vertex. By default, this method computes vertex @@ -572,61 +724,11 @@ def in_degree(self, vertex_subset=None): >>> df = G.in_degree([0,9,12]) """ - src_col_name = self.source_columns - dst_col_name = self.destination_columns - - # select only the vertex columns - if not isinstance(src_col_name, list) and not isinstance(dst_col_name, list): - vertex_col_names = [src_col_name] + [dst_col_name] - - df = self.input_df[vertex_col_names] - df = df.drop(columns=src_col_name) - - nodes = self.nodes() - if isinstance(nodes, dask_cudf.Series): - nodes = nodes.to_frame() - - if not isinstance(dst_col_name, list): - df = df.rename(columns={dst_col_name: "vertex"}) - dst_col_name = "vertex" - - vertex_col_names = df.columns - nodes.columns = vertex_col_names - - df["degree"] = 1 - - # FIXME: leverage the C++ in_degree for optimal performance - in_degree = ( - df.groupby(dst_col_name) - .degree.count(split_out=df.npartitions) - .reset_index() - ) - - # Add vertices with zero in_degree - in_degree = nodes.merge(in_degree, how="outer").fillna(0) - - # Convert vertex_subset to dataframe. - if vertex_subset is not None: - if not isinstance(vertex_subset, (dask_cudf.DataFrame, cudf.DataFrame)): - if isinstance(vertex_subset, dask_cudf.Series): - vertex_subset = vertex_subset.to_frame() - else: - df = cudf.DataFrame() - if isinstance(vertex_subset, (cudf.Series, list)): - df["vertex"] = vertex_subset - vertex_subset = df - if isinstance(vertex_subset, (dask_cudf.DataFrame, cudf.DataFrame)): - vertex_subset.columns = vertex_col_names - in_degree = in_degree.merge(vertex_subset, how="inner") - else: - raise TypeError( - f"Expected type are: cudf, dask_cudf objects, " - f"iterable container, got " - f"{type(vertex_subset)}" - ) - return in_degree + return self.degrees_function(vertex_subset, "in_degree") - def out_degree(self, vertex_subset=None): + def out_degree( + self, vertex_subset: Union[cudf.Series, dask_cudf.Series, Iterable] = None + ) -> dask_cudf.DataFrame: """ Compute vertex out-degree. Vertex out-degree is the number of edges pointing out from the vertex. By default, this method computes vertex @@ -662,62 +764,11 @@ def out_degree(self, vertex_subset=None): >>> df = G.out_degree([0,9,12]) """ - src_col_name = self.source_columns - dst_col_name = self.destination_columns - - # select only the vertex columns - if not isinstance(src_col_name, list) and not isinstance(dst_col_name, list): - vertex_col_names = [src_col_name] + [dst_col_name] - - df = self.input_df[vertex_col_names] - df = df.drop(columns=dst_col_name) - - nodes = self.nodes() - if isinstance(nodes, dask_cudf.Series): - nodes = nodes.to_frame() - - if not isinstance(src_col_name, list): - df = df.rename(columns={src_col_name: "vertex"}) - src_col_name = "vertex" - - vertex_col_names = df.columns - - nodes.columns = vertex_col_names - - df["degree"] = 1 - # leverage the C++ out_degree for optimal performance - out_degree = ( - df.groupby(src_col_name) - .degree.count(split_out=df.npartitions) - .reset_index() - ) - - # Add vertices with zero out_degree - out_degree = nodes.merge(out_degree, how="outer").fillna(0) - - # Convert vertex_subset to dataframe. - if vertex_subset is not None: - if not isinstance(vertex_subset, (dask_cudf.DataFrame, cudf.DataFrame)): - if isinstance(vertex_subset, dask_cudf.Series): - vertex_subset = vertex_subset.to_frame() - else: - df = cudf.DataFrame() - if isinstance(vertex_subset, (cudf.Series, list)): - df["vertex"] = vertex_subset - vertex_subset = df - if isinstance(vertex_subset, (dask_cudf.DataFrame, cudf.DataFrame)): - vertex_subset.columns = vertex_col_names - out_degree = out_degree.merge(vertex_subset, how="inner") - else: - raise TypeError( - f"Expected type are: cudf, dask_cudf objects, " - f"iterable container, got " - f"{type(vertex_subset)}" - ) + return self.degrees_function(vertex_subset, "out_degree") - return out_degree - - def degree(self, vertex_subset=None): + def degree( + self, vertex_subset: Union[cudf.Series, dask_cudf.Series, Iterable] = None + ) -> dask_cudf.DataFrame: """ Compute vertex degree, which is the total number of edges incident to a vertex (both in and out edges). By default, this method computes @@ -754,18 +805,12 @@ def degree(self, vertex_subset=None): """ - vertex_in_degree = self.in_degree(vertex_subset) - vertex_out_degree = self.out_degree(vertex_subset) - # FIXME: leverage the C++ degree for optimal performance - vertex_degree = dask_cudf.concat([vertex_in_degree, vertex_out_degree]) - vertex_degree = vertex_degree.groupby(["vertex"], as_index=False).sum( - split_out=self.input_df.npartitions - ) - - return vertex_degree + return self.degrees_function(vertex_subset, "degree") # FIXME: vertex_subset could be a DataFrame for multi-column vertices - def degrees(self, vertex_subset=None): + def degrees( + self, vertex_subset: Union[cudf.Series, dask_cudf.Series, Iterable] = None + ) -> dask_cudf.DataFrame: """ Compute vertex in-degree and out-degree. By default, this method computes vertex degrees for the entire set of vertices. If @@ -802,21 +847,7 @@ def degrees(self, vertex_subset=None): >>> df = G.degrees([0,9,12]) """ - raise NotImplementedError("Not supported for distributed graph") - - def _degree(self, vertex_subset, direction=Direction.ALL): - vertex_col, degree_col = graph_primtypes_wrapper._mg_degree(self, direction) - df = cudf.DataFrame() - df["vertex"] = vertex_col - df["degree"] = degree_col - - if self.renumbered is True: - df = self.renumber_map.unrenumber(df, "vertex") - - if vertex_subset is not None: - df = df[df["vertex"].isin(vertex_subset)] - - return df + return self.degrees_function(vertex_subset, "degrees") def get_two_hop_neighbors(self, start_vertices=None): """ diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 121a4c6245a..99934e02b10 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -12,7 +12,6 @@ # limitations under the License. from cugraph.structure import graph_primtypes_wrapper -from cugraph.structure.graph_primtypes_wrapper import Direction from cugraph.structure.symmetrize import symmetrize from cugraph.structure.number_map import NumberMap import cugraph.dask.common.mg_utils as mg_utils @@ -23,10 +22,13 @@ import numpy as np import warnings from cugraph.dask.structure import replication -from typing import Union, Dict +from typing import Union, Dict, Iterable from pylibcugraph import ( get_two_hop_neighbors as pylibcugraph_get_two_hop_neighbors, select_random_vertices as pylibcugraph_select_random_vertices, + degrees as pylibcugraph_degrees, + in_degrees as pylibcugraph_in_degrees, + out_degrees as pylibcugraph_out_degrees, ) from pylibcugraph import ( @@ -854,7 +856,111 @@ def number_of_edges(self, directed_edges=False): raise ValueError("Graph is Empty") return self.properties.edge_count - def in_degree(self, vertex_subset=None): + def degrees_function( + self, + vertex_subset: Union[cudf.Series, Iterable] = None, + degree_type: str = "in_degree", + ) -> cudf.DataFrame: + """ + Compute vertex in-degree, out-degree, degree and degrees. + + 1) Vertex in-degree is the number of edges pointing into the vertex. + 2) Vertex out-degree is the number of edges pointing out from the vertex. + 3) Vertex degree, is the total number of edges incident to a vertex + (both in and out edges) + 4) Vertex degrees computes vertex in-degree and out-degree. + + By default, this method computes vertex in-degree, out-degree, degree + or degrees for the entire set of vertices. If vertex_subset is provided, + this method optionally filters out all but those listed in + vertex_subset. + + Parameters + ---------- + vertex_subset : cudf.Series or iterable container, optional + A container of vertices for displaying corresponding in-degree. + If not set, degrees are computed for the entire set of vertices. + + degree_type : str (default='in_degree') + + Returns + ------- + df : cudf.DataFrame + GPU DataFrame of size N (the default) or the size of the given + vertices (vertex_subset) containing the in_degree, out_degrees, + degree or degrees. The ordering is relative to the adjacency list, + or that given by the specified vertex_subset. + + Examples + -------- + >>> M = cudf.read_csv(datasets_path / 'karate.csv', delimiter=' ', + ... dtype=['int32', 'int32', 'float32'], header=None) + >>> G = cugraph.Graph() + >>> G.from_cudf_edgelist(M, '0', '1') + >>> df = G.degrees_function([0,9,12], "in_degree") + + """ + if vertex_subset is not None: + if not isinstance(vertex_subset, cudf.Series): + vertex_subset = cudf.Series(vertex_subset) + if self.properties.renumbered is True: + vertex_subset = self.renumber_map.to_internal_vertex_id( + vertex_subset + ) + vertex_subset_type = self.edgelist.edgelist_df.dtypes.iloc[0] + else: + vertex_subset_type = self.input_df.dtypes.iloc[0] + + vertex_subset = vertex_subset.astype(vertex_subset_type) + + do_expensive_check = False + df = cudf.DataFrame() + vertex = None + + if degree_type == "in_degree": + vertex, in_degrees = pylibcugraph_in_degrees( + resource_handle=ResourceHandle(), + graph=self._plc_graph, + source_vertices=vertex_subset, + do_expensive_check=do_expensive_check, + ) + df["degree"] = in_degrees + elif degree_type == "out_degree": + vertex, out_degrees = pylibcugraph_out_degrees( + resource_handle=ResourceHandle(), + graph=self._plc_graph, + source_vertices=vertex_subset, + do_expensive_check=do_expensive_check, + ) + df["degree"] = out_degrees + elif degree_type in ["degree", "degrees"]: + vertex, in_degrees, out_degrees = pylibcugraph_degrees( + resource_handle=ResourceHandle(), + graph=self._plc_graph, + source_vertices=vertex_subset, + do_expensive_check=do_expensive_check, + ) + if degree_type == "degrees": + df["in_degree"] = in_degrees + df["out_degree"] = out_degrees + + else: + df["degree"] = in_degrees + out_degrees + else: + raise ValueError( + "Incorrect degree type passed, valid values are ", + "'in_degree', 'out_degree', 'degree' and 'degrees' ", + f"got '{degree_type}'", + ) + df["vertex"] = vertex + if self.properties.renumbered is True: + df = self.renumber_map.unrenumber(df, "vertex") + + return df + + def in_degree( + self, vertex_subset: Union[cudf.Series, Iterable] = None + ) -> cudf.DataFrame: """ Compute vertex in-degree. Vertex in-degree is the number of edges pointing into the vertex. By default, this method computes vertex @@ -892,11 +998,11 @@ def in_degree(self, vertex_subset=None): >>> df = G.in_degree([0,9,12]) """ - in_degree = self._degree(vertex_subset, direction=Direction.IN) - - return in_degree + return self.degrees_function(vertex_subset, "in_degree") - def out_degree(self, vertex_subset=None): + def out_degree( + self, vertex_subset: Union[cudf.Series, Iterable] = None + ) -> cudf.DataFrame: """ Compute vertex out-degree. Vertex out-degree is the number of edges pointing out from the vertex. By default, this method computes vertex @@ -934,10 +1040,11 @@ def out_degree(self, vertex_subset=None): >>> df = G.out_degree([0,9,12]) """ - out_degree = self._degree(vertex_subset, direction=Direction.OUT) - return out_degree + return self.degrees_function(vertex_subset, "out_degree") - def degree(self, vertex_subset=None): + def degree( + self, vertex_subset: Union[cudf.Series, Iterable] = None + ) -> cudf.DataFrame: """ Compute vertex degree, which is the total number of edges incident to a vertex (both in and out edges). By default, this method computes @@ -976,10 +1083,12 @@ def degree(self, vertex_subset=None): >>> subset_df = G.degree([0,9,12]) """ - return self._degree(vertex_subset) + return self.degrees_function(vertex_subset, "degree") # FIXME: vertex_subset could be a DataFrame for multi-column vertices - def degrees(self, vertex_subset=None): + def degrees( + self, vertex_subset: Union[cudf.Series, Iterable] = None + ) -> cudf.DataFrame: """ Compute vertex in-degree and out-degree. By default, this method computes vertex degrees for the entire set of vertices. If @@ -1019,70 +1128,7 @@ def degrees(self, vertex_subset=None): >>> df = G.degrees([0,9,12]) """ - ( - vertex_col, - in_degree_col, - out_degree_col, - ) = graph_primtypes_wrapper._degrees(self) - - df = cudf.DataFrame() - df["vertex"] = vertex_col - df["in_degree"] = in_degree_col - df["out_degree"] = out_degree_col - - if self.properties.renumbered: - # Get the internal vertex IDs - nodes = self.renumber_map.df_internal_to_external["id"] - else: - nodes = self.nodes() - # If the vertex IDs are not contiguous, remove results for the - # isolated vertices - df = df[df["vertex"].isin(nodes.to_cupy())] - - if vertex_subset is not None: - if not isinstance(vertex_subset, cudf.Series): - vertex_subset = cudf.Series(vertex_subset) - if self.properties.renumbered: - vertex_subset = self.renumber_map.to_internal_vertex_id( - vertex_subset - ) - vertex_subset = vertex_subset.to_cupy() - df = df[df["vertex"].isin(vertex_subset)] - - if self.properties.renumbered: - df = self.renumber_map.unrenumber(df, "vertex") - - return df - - def _degree(self, vertex_subset, direction=Direction.ALL): - vertex_col, degree_col = graph_primtypes_wrapper._degree(self, direction) - df = cudf.DataFrame() - df["vertex"] = vertex_col - df["degree"] = degree_col - - if self.properties.renumbered: - # Get the internal vertex IDs - nodes = self.renumber_map.df_internal_to_external["id"] - else: - nodes = self.nodes() - # If the vertex IDs are not contiguous, remove results for the - # isolated vertices - df = df[df["vertex"].isin(nodes.to_cupy())] - - if vertex_subset is not None: - if not isinstance(vertex_subset, cudf.Series): - vertex_subset = cudf.Series(vertex_subset) - if self.properties.renumbered: - vertex_subset = self.renumber_map.to_internal_vertex_id( - vertex_subset - ) - vertex_subset = vertex_subset.to_cupy() - df = df[df["vertex"].isin(vertex_subset)] - - if self.properties.renumbered: - df = self.renumber_map.unrenumber(df, "vertex") - - return df + return self.degrees_function(vertex_subset, "degrees") def _make_plc_graph( self, diff --git a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py index a46f4b9463b..1bef1e0872b 100644 --- a/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_degree_centrality_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018-2023, NVIDIA CORPORATION. +# Copyright (c) 2018-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,7 +18,6 @@ import cudf import dask_cudf import cugraph -from cugraph.dask.common.mg_utils import is_single_gpu from cugraph.testing.utils import RAPIDS_DATASET_ROOT_DIR_PATH from cudf.testing import assert_series_equal @@ -41,7 +40,6 @@ def setup_function(): @pytest.mark.mg -@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize("directed", IS_DIRECTED) @pytest.mark.parametrize("data_file", DATA_PATH) def test_dask_mg_degree(dask_client, directed, data_file): diff --git a/python/pylibcugraph/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/pylibcugraph/CMakeLists.txt index c2e22fc1ff7..7cc90145949 100644 --- a/python/pylibcugraph/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/pylibcugraph/CMakeLists.txt @@ -1,5 +1,5 @@ # ============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except # in compliance with the License. You may obtain a copy of the License at @@ -57,6 +57,7 @@ set(cython_sources utils.pyx weakly_connected_components.pyx replicate_edgelist.pyx + degrees.pyx ) set(linked_libraries cugraph::cugraph;cugraph::cugraph_c) diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index ab518e24cae..dcdef05e106 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -95,6 +95,8 @@ from pylibcugraph.sorensen_coefficients import sorensen_coefficients +from pylibcugraph.degrees import in_degrees, out_degrees, degrees + from pylibcugraph import exceptions diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd index 90bc041e5f0..6f1ac1f640b 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/graph_functions.pxd @@ -182,3 +182,58 @@ cdef extern from "cugraph_c/graph_functions.h": cugraph_induced_subgraph_result_t** result, cugraph_error_t** error ) + + ########################################################################### + # degrees + ctypedef struct cugraph_degrees_result_t: + pass + + cdef cugraph_error_code_t \ + cugraph_in_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error + ) + + cdef cugraph_error_code_t \ + cugraph_out_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error + ) + + cdef cugraph_error_code_t \ + cugraph_degrees( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* source_vertices, + bool_t do_expensive_check, + cugraph_degrees_result_t** result, + cugraph_error_t** error + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_degrees_result_get_vertices( + cugraph_degrees_result_t* degrees_result + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_degrees_result_get_in_degrees( + cugraph_degrees_result_t* degrees_result + ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_degrees_result_get_out_degrees( + cugraph_degrees_result_t* degrees_result + ) + + cdef void \ + cugraph_degrees_result_free( + cugraph_degrees_result_t* degrees_result + ) diff --git a/python/pylibcugraph/pylibcugraph/degrees.pyx b/python/pylibcugraph/pylibcugraph/degrees.pyx new file mode 100644 index 00000000000..7818da441bd --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/degrees.pyx @@ -0,0 +1,307 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from libc.stdint cimport uintptr_t + +from pylibcugraph._cugraph_c.resource_handle cimport ( + bool_t, + data_type_id_t, + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.graph_functions cimport ( + cugraph_degrees_result_t, + cugraph_degrees, + cugraph_in_degrees, + cugraph_out_degrees, + cugraph_degrees_result_get_vertices, + cugraph_degrees_result_get_in_degrees, + cugraph_degrees_result_get_out_degrees, + cugraph_degrees_result_free, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph.utils cimport ( + assert_success, + copy_to_cupy_array, + assert_CAI_type, + create_cugraph_type_erased_device_array_view_from_py_obj, +) + + +def in_degrees(ResourceHandle resource_handle, + _GPUGraph graph, + source_vertices, + bool_t do_expensive_check): + """ + Compute the in degrees for the nodes of the graph. + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + source_vertices : cupy array + The nodes for which we will compute degrees. + + do_expensive_check : bool_t + A flag to run expensive checks for input arguments if True. + + Returns + ------- + A tuple of device arrays, where the first item in the tuple is a device + array containing the vertices, the second item in the tuple is a device + array containing the in degrees for the vertices. + + Examples + -------- + >>> import pylibcugraph, cupy, numpy + >>> srcs = cupy.asarray([0, 1, 2], dtype=numpy.int32) + >>> dsts = cupy.asarray([1, 2, 3], dtype=numpy.int32) + >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32) + >>> resource_handle = pylibcugraph.ResourceHandle() + >>> graph_props = pylibcugraph.GraphProperties( + ... is_symmetric=False, is_multigraph=False) + >>> G = pylibcugraph.SGGraph( + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, + ... store_transposed=True, renumber=False, do_expensive_check=False) + >>> (vertices, in_degrees) = pylibcugraph.in_degrees( + resource_handle, G, None, False) + + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef cugraph_degrees_result_t* result_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + assert_CAI_type(source_vertices, "source_vertices", True) + + cdef cugraph_type_erased_device_array_view_t* \ + source_vertices_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + source_vertices) + + error_code = cugraph_in_degrees(c_resource_handle_ptr, + c_graph_ptr, + source_vertices_ptr, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_in_degrees") + + # Extract individual device array pointers from result and copy to cupy + # arrays for returning. + cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ + cugraph_degrees_result_get_vertices(result_ptr) + cdef cugraph_type_erased_device_array_view_t* in_degrees_ptr = \ + cugraph_degrees_result_get_in_degrees(result_ptr) + + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) + cupy_in_degrees = copy_to_cupy_array(c_resource_handle_ptr, in_degrees_ptr) + + cugraph_degrees_result_free(result_ptr) + + return (cupy_vertices, cupy_in_degrees) + +def out_degrees(ResourceHandle resource_handle, + _GPUGraph graph, + source_vertices, + bool_t do_expensive_check): + """ + Compute the out degrees for the nodes of the graph. + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + source_vertices : cupy array + The nodes for which we will compute degrees. + + do_expensive_check : bool_t + A flag to run expensive checks for input arguments if True. + + Returns + ------- + A tuple of device arrays, where the first item in the tuple is a device + array containing the vertices, the second item in the tuple is a device + array containing the out degrees for the vertices. + + Examples + -------- + >>> import pylibcugraph, cupy, numpy + >>> srcs = cupy.asarray([0, 1, 2], dtype=numpy.int32) + >>> dsts = cupy.asarray([1, 2, 3], dtype=numpy.int32) + >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32) + >>> resource_handle = pylibcugraph.ResourceHandle() + >>> graph_props = pylibcugraph.GraphProperties( + ... is_symmetric=False, is_multigraph=False) + >>> G = pylibcugraph.SGGraph( + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, + ... store_transposed=True, renumber=False, do_expensive_check=False) + >>> (vertices, out_degrees) = pylibcugraph.out_degrees( + resource_handle, G, None, False) + + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef cugraph_degrees_result_t* result_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + assert_CAI_type(source_vertices, "source_vertices", True) + + cdef cugraph_type_erased_device_array_view_t* \ + source_vertices_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + source_vertices) + + error_code = cugraph_out_degrees(c_resource_handle_ptr, + c_graph_ptr, + source_vertices_ptr, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_out_degrees") + + # Extract individual device array pointers from result and copy to cupy + # arrays for returning. + cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ + cugraph_degrees_result_get_vertices(result_ptr) + cdef cugraph_type_erased_device_array_view_t* out_degrees_ptr = \ + cugraph_degrees_result_get_out_degrees(result_ptr) + + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) + cupy_out_degrees = copy_to_cupy_array(c_resource_handle_ptr, out_degrees_ptr) + + cugraph_degrees_result_free(result_ptr) + + return (cupy_vertices, cupy_out_degrees) + + +def degrees(ResourceHandle resource_handle, + _GPUGraph graph, + source_vertices, + bool_t do_expensive_check): + """ + Compute the degrees for the nodes of the graph. + + Parameters + ---------- + resource_handle : ResourceHandle + Handle to the underlying device resources needed for referencing data + and running algorithms. + + graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + source_vertices : cupy array + The nodes for which we will compute degrees. + + do_expensive_check : bool_t + A flag to run expensive checks for input arguments if True. + + Returns + ------- + A tuple of device arrays, where the first item in the tuple is a device + array containing the vertices, the second item in the tuple is a device + array containing the in degrees for the vertices, the third item in the + tuple is a device array containing the out degrees for the vertices. + + Examples + -------- + >>> import pylibcugraph, cupy, numpy + >>> srcs = cupy.asarray([0, 1, 2], dtype=numpy.int32) + >>> dsts = cupy.asarray([1, 2, 3], dtype=numpy.int32) + >>> weights = cupy.asarray([1.0, 1.0, 1.0], dtype=numpy.float32) + >>> resource_handle = pylibcugraph.ResourceHandle() + >>> graph_props = pylibcugraph.GraphProperties( + ... is_symmetric=False, is_multigraph=False) + >>> G = pylibcugraph.SGGraph( + ... resource_handle, graph_props, srcs, dsts, weight_array=weights, + ... store_transposed=True, renumber=False, do_expensive_check=False) + >>> (vertices, in_degrees, out_degrees) = pylibcugraph.degrees( + resource_handle, G, None, False) + + """ + + cdef cugraph_resource_handle_t* c_resource_handle_ptr = \ + resource_handle.c_resource_handle_ptr + cdef cugraph_graph_t* c_graph_ptr = graph.c_graph_ptr + + cdef cugraph_degrees_result_t* result_ptr + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + + assert_CAI_type(source_vertices, "source_vertices", True) + + cdef cugraph_type_erased_device_array_view_t* \ + source_vertices_ptr = \ + create_cugraph_type_erased_device_array_view_from_py_obj( + source_vertices) + + error_code = cugraph_degrees(c_resource_handle_ptr, + c_graph_ptr, + source_vertices_ptr, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_degrees") + + # Extract individual device array pointers from result and copy to cupy + # arrays for returning. + cdef cugraph_type_erased_device_array_view_t* vertices_ptr = \ + cugraph_degrees_result_get_vertices(result_ptr) + cdef cugraph_type_erased_device_array_view_t* in_degrees_ptr = \ + cugraph_degrees_result_get_in_degrees(result_ptr) + cdef cugraph_type_erased_device_array_view_t* out_degrees_ptr = \ + cugraph_degrees_result_get_out_degrees(result_ptr) + + cupy_vertices = copy_to_cupy_array(c_resource_handle_ptr, vertices_ptr) + cupy_in_degrees = copy_to_cupy_array(c_resource_handle_ptr, in_degrees_ptr) + cupy_out_degrees = copy_to_cupy_array(c_resource_handle_ptr, out_degrees_ptr) + + cugraph_degrees_result_free(result_ptr) + + return (cupy_vertices, cupy_in_degrees, cupy_out_degrees) From 120e5b892d5396f10ceb5d957de01c8d74a2e781 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 12 Mar 2024 20:10:13 -0700 Subject: [PATCH 5/5] Enable edge masking in the remaining primitives (#4186) This PR will update all the remaining primitives to support edge masking. This PR pulls updates from https://github.com/rapidsai/cugraph/pull/4126 and better be reviewed/merged after PR https://github.com/rapidsai/cugraph/pull/4126. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/4186 --- cpp/include/cugraph/graph_view.hpp | 4 +- cpp/include/cugraph/utilities/misc_utils.cuh | 8 + cpp/src/prims/detail/nbr_intersection.cuh | 11 +- ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 529 +++++++++++----- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 24 +- .../transform_reduce_e_by_src_dst_key.cuh | 292 +++++++-- cpp/tests/CMakeLists.txt | 33 +- ...rm_reduce_dst_key_aggregated_outgoing_e.cu | 599 ++++++++++++++++++ ..._v_transform_reduce_incoming_outgoing_e.cu | 84 +-- cpp/tests/prims/mg_reduce_v.cu | 47 +- ...st_nbr_intersection_of_e_endpoints_by_v.cu | 289 +++++++++ cpp/tests/prims/mg_transform_reduce_e.cu | 41 +- .../mg_transform_reduce_e_by_src_dst_key.cu | 495 +++++++++++++++ cpp/tests/prims/mg_transform_reduce_v.cu | 47 +- cpp/tests/prims/result_compare.cuh | 143 +++++ 15 files changed, 2189 insertions(+), 457 deletions(-) create mode 100644 cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu create mode 100644 cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu create mode 100644 cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu create mode 100644 cpp/tests/prims/result_compare.cuh diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index 3f3514179bf..cbb52ef3b1e 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -613,7 +613,7 @@ class graph_view_tnumber_of_vertices()); } - // FIXME: deprecated, replaced with copmute_number_of_edges (which works with or without edge + // FIXME: deprecated, replaced with compute_number_of_edges (which works with or without edge // masking) edge_t number_of_edges() const { diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index d3917a3e851..633dabe5b40 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -94,6 +94,14 @@ thrust::optional to_thrust_optional(std::optional val) return ret; } +template +std::optional to_std_optional(thrust::optional val) +{ + std::optional ret{std::nullopt}; + if (val) { ret = *val; } + return ret; +} + template rmm::device_uvector expand_sparse_offsets(raft::device_span offsets, idx_t base_idx, diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index e0a04eb59da..847c1db6937 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -50,6 +50,7 @@ #include #include #include +#include #include #include #include @@ -1232,9 +1233,11 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_sizes.size() + 1, handle.get_stream()); rx_v_pair_nbr_intersection_offsets.set_element_to_zero_async(size_t{0}, handle.get_stream()); + auto size_first = thrust::make_transform_iterator( + rx_v_pair_nbr_intersection_sizes.begin(), cugraph::detail::typecast_t{}); thrust::inclusive_scan(handle.get_thrust_policy(), - rx_v_pair_nbr_intersection_sizes.begin(), - rx_v_pair_nbr_intersection_sizes.end(), + size_first, + size_first + rx_v_pair_nbr_intersection_sizes.size(), rx_v_pair_nbr_intersection_offsets.begin() + 1); rx_v_pair_nbr_intersection_indices.resize( @@ -1344,8 +1347,8 @@ nbr_intersection(raft::handle_t const& handle, } thrust::inclusive_scan(handle.get_thrust_policy(), - rx_v_pair_nbr_intersection_sizes.begin(), - rx_v_pair_nbr_intersection_sizes.end(), + size_first, + size_first + rx_v_pair_nbr_intersection_sizes.size(), rx_v_pair_nbr_intersection_offsets.begin() + 1); std::vector h_rx_v_pair_lasts(rx_v_pair_counts.size()); diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 3b25ae50773..5e4cd81513e 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -16,6 +16,7 @@ #pragma once #include "detail/graph_partition_utils.cuh" +#include "prims/detail/optional_dataframe_buffer.hpp" #include "prims/kv_store.cuh" #include "utilities/collect_comm.cuh" @@ -83,15 +84,23 @@ struct rebase_offset_t { // a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used template -struct triplet_to_minor_comm_rank_t { +struct tuple_to_minor_comm_rank_t { compute_vertex_partition_id_from_ext_vertex_t key_func{}; int minor_comm_size{}; - __device__ int operator()( + template + __device__ std::enable_if_t, int> operator()( thrust::tuple val /* major, minor key, edge value */) const { return key_func(thrust::get<1>(val)) % minor_comm_size; } + + template + __device__ std::enable_if_t, int> operator()( + thrust::tuple val /* major, minor key */) const + { + return key_func(thrust::get<1>(val)) % minor_comm_size; + } }; // a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used @@ -106,6 +115,7 @@ struct pair_to_binary_partition_id_t { // a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used template - val /* major, minor key, aggregated edge value */) const + template + __device__ std::enable_if_t, e_op_result_t> + operator()(thrust::tuple + val /* major, minor key, aggregated edge value */) const { auto major = thrust::get<0>(val); auto minor_key = thrust::get<1>(val); @@ -131,6 +143,20 @@ struct call_key_aggregated_e_op_t { return key_aggregated_e_op( major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), aggregated_edge_value); } + + template + __device__ std::enable_if_t, e_op_result_t> + operator()(thrust::tuple val /* major, minor key */) const + { + auto major = thrust::get<0>(val); + auto minor_key = thrust::get<1>(val); + auto major_val = edge_major_value_map + ? (*edge_major_value_map).find(major) + : edge_partition_major_value_input.get( + edge_partition.major_offset_from_major_nocheck(major)); + return key_aggregated_e_op( + major, minor_key, major_val, edge_minor_key_value_map.find(minor_key), thrust::nullopt); + } }; // a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used @@ -182,9 +208,8 @@ struct reduce_with_init_t { * @tparam EdgeSrcValueInputWrapper Type of the wrapper for edge source property values. * @tparam EdgeValueInputWrapper Type of the wrapper for edge property values. * @tparam EdgeDstKeyInputWrapper Type of the wrapper for edge destination key values. - * @tparam VertexIterator Type of the iterator for keys in (key, value) pairs (key type should - * coincide with vertex type). - * @tparam ValueIterator Type of the iterator for values in (key, value) pairs. + * @tparam KVStoreViewType Type of the (key, value) store. Key type should coincide with vertex + * type. * @tparam KeyAggregatedEdgeOp Type of the quinary key-aggregated edge operator. * @tparam ReduceOp Type of the binary reduction operator. * @tparam T Type of the initial value for per-vertex reduction. @@ -204,15 +229,10 @@ struct reduce_with_init_t { * @param edge_dst_key_input Wrapper used to access destination input key values (for the edge * destinations assigned to this process in multi-GPU). Use cugraph::edge_dst_property_t::view(). * Use update_edge_dst_property to fill the wrapper. - * @param map_unique_key_first Iterator pointing to the first (inclusive) key in (key, value) pairs - * (assigned to this process in multi-GPU, `cugraph::detail::compute_gpu_id_from_ext_vertex_t` is - * used to map keys to processes). (Key, value) pairs may be provided by - * transform_reduce_by_src_key_e() or transform_reduce_by_dst_key_e(). - * @param map_unique_key_last Iterator pointing to the last (exclusive) key in (key, value) pairs - * (assigned to this process in multi-GPU). - * @param map_value_first Iterator pointing to the first (inclusive) value in (key, value) pairs - * (assigned to this process in multi-GPU). `map_value_last` (exclusive) is deduced as @p - * map_value_first + thrust::distance(@p map_unique_key_first, @p map_unique_key_last). + * @param kv_store_view view object of the (key, value) store (for the keys assigned to this process + * in multi-GPU). `cugraph::detail::compute_gpu_id_from_ext_vertex_t` is used to map keys to + * processes). (Key, value) pairs may be provided by transform_reduce_e_by_src_key() or + * transform_reduce_e_by_dst_key(). * @param key_aggregated_e_op Quinary operator takes 1) edge source, 2) key, 3) *(@p * edge_partition_src_value_input_first + i), 4) value for the key stored in the input (key, value) * pairs provided by @p map_unique_key_first, @p map_unique_key_last, and @p map_value_first @@ -263,8 +283,11 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( using edge_src_value_t = typename EdgeSrcValueInputWrapper::value_type; using edge_value_t = typename EdgeValueInputWrapper::value_type; using kv_pair_value_t = typename KVStoreViewType::value_type; + using optional_edge_value_buffer_value_type = + std::conditional_t, edge_value_t, void>; + static_assert( - std::is_arithmetic_v, + std::is_same_v || std::is_arithmetic_v, "Currently only scalar values are supported, should be extended to support thrust::tuple of " "arithmetic types and void (for dummy property values) to be consistent with other " "primitives."); // this will also require a custom edge value aggregation op. @@ -284,16 +307,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( detail::edge_partition_edge_dummy_property_device_view_t, detail::edge_partition_edge_property_device_view_t< edge_t, - typename EdgeValueInputWrapper::value_iterator>>; - - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + typename EdgeValueInputWrapper::value_iterator, + typename EdgeValueInputWrapper::value_type>>; if (do_expensive_check) { /* currently, nothing to do */ } auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; // major + minor keys - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { static_assert(is_arithmetic_or_thrust_tuple_of_arithmetic::value); if constexpr (is_thrust_tuple_of_arithmetic::value) { element_size += sum_thrust_tuple_element_sizes(); @@ -317,24 +339,78 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( // 1. aggregate each vertex out-going edges based on keys and transform-reduce. + auto edge_mask_view = graph_view.edge_mask_view(); + rmm::device_uvector majors(0, handle.get_stream()); auto e_op_result_buffer = allocate_dataframe_buffer(0, handle.get_stream()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; auto edge_partition_src_value_input = edge_partition_src_input_device_view_t(edge_src_value_input, i); auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); - rmm::device_uvector tmp_majors(edge_partition.number_of_edges(), handle.get_stream()); + std::optional> offsets_with_mask{std::nullopt}; + if (edge_partition_e_mask) { + rmm::device_uvector degrees_with_mask(0, handle.get_stream()); + if (edge_partition.dcs_nzd_vertices()) { + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); + + auto major_sparse_range_size = + (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition]; + degrees_with_mask = rmm::device_uvector( + major_sparse_range_size + *(edge_partition.dcs_nzd_vertex_count()), handle.get_stream()); + auto major_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(vertex_t{0}), + cuda::proclaim_return_type( + [major_sparse_range_size, + major_range_first = edge_partition.major_range_first(), + dcs_nzd_vertices = *(edge_partition.dcs_nzd_vertices())] __device__(vertex_t i) { + if (i < major_sparse_range_size) { // sparse + return major_range_first + i; + } else { // hypersparse + return *(dcs_nzd_vertices + (i - major_sparse_range_size)); + } + })); + degrees_with_mask = + edge_partition.compute_local_degrees_with_mask((*edge_partition_e_mask).value_first(), + major_first, + major_first + degrees_with_mask.size(), + handle.get_stream()); + } else { + degrees_with_mask = edge_partition.compute_local_degrees_with_mask( + (*edge_partition_e_mask).value_first(), + thrust::make_counting_iterator(edge_partition.major_range_first()), + thrust::make_counting_iterator(edge_partition.major_range_last()), + handle.get_stream()); + } + offsets_with_mask = + rmm::device_uvector(degrees_with_mask.size() + 1, handle.get_stream()); + (*offsets_with_mask).set_element_to_zero_async(0, handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + degrees_with_mask.begin(), + degrees_with_mask.end(), + (*offsets_with_mask).begin() + 1); + } + + rmm::device_uvector tmp_majors( + edge_partition_e_mask ? (*offsets_with_mask).back_element(handle.get_stream()) + : edge_partition.number_of_edges(), + handle.get_stream()); rmm::device_uvector tmp_minor_keys(tmp_majors.size(), handle.get_stream()); - // FIXME: this doesn't work if edge_value_t is thrust::tuple or void - rmm::device_uvector tmp_key_aggregated_edge_values(tmp_majors.size(), - handle.get_stream()); + auto tmp_key_aggregated_edge_values = + detail::allocate_optional_dataframe_buffer( + tmp_majors.size(), handle.get_stream()); - if (edge_partition.number_of_edges() > 0) { + if (tmp_majors.size() > 0) { auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); detail::decompress_edge_partition_to_fill_edgelist_majors( handle, edge_partition, - std::nullopt, + detail::to_std_optional(edge_partition_e_mask), raft::device_span(tmp_majors.data(), tmp_majors.size()), segment_offsets); @@ -357,14 +433,14 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - raft::device_span{ - edge_partition.offsets(), - 1 + static_cast( - edge_partition.dcs_nzd_vertices() - ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + - *(edge_partition.dcs_nzd_vertex_count()) - : edge_partition.major_range_size())}, - edge_partition.number_of_edges(), + raft::device_span( + offsets_with_mask ? (*offsets_with_mask).data() : edge_partition.offsets(), + (edge_partition.dcs_nzd_vertices() + ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + + *(edge_partition.dcs_nzd_vertex_count()) + : edge_partition.major_range_size()) + + 1), + static_cast(tmp_majors.size()), approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; @@ -376,30 +452,69 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( rmm::device_uvector unreduced_majors(max_chunk_size, handle.get_stream()); rmm::device_uvector unreduced_minor_keys(unreduced_majors.size(), handle.get_stream()); - // FIXME: this doesn't work if edge_value_t is thrust::tuple or void - rmm::device_uvector unreduced_key_aggregated_edge_values( - unreduced_majors.size(), handle.get_stream()); + auto unreduced_key_aggregated_edge_values = + detail::allocate_optional_dataframe_buffer( + unreduced_majors.size(), handle.get_stream()); rmm::device_uvector d_tmp_storage(0, handle.get_stream()); size_t reduced_size{0}; for (size_t j = 0; j < num_chunks; ++j) { - thrust::copy(handle.get_thrust_policy(), - minor_key_first + h_edge_offsets[j], - minor_key_first + h_edge_offsets[j + 1], - tmp_minor_keys.begin() + h_edge_offsets[j]); + if (edge_partition_e_mask) { + std::array unmasked_ranges{}; + raft::update_host(unmasked_ranges.data(), + edge_partition.offsets() + h_vertex_offsets[j], + 1, + handle.get_stream()); + raft::update_host(unmasked_ranges.data() + 1, + edge_partition.offsets() + h_vertex_offsets[j + 1], + 1, + handle.get_stream()); + handle.sync_stream(); + if constexpr (!std::is_same_v) { + detail::copy_if_mask_set( + handle, + thrust::make_zip_iterator(minor_key_first, + edge_partition_e_value_input.value_first()) + + unmasked_ranges[0], + thrust::make_zip_iterator(minor_key_first, + edge_partition_e_value_input.value_first()) + + unmasked_ranges[1], + (*edge_partition_e_mask).value_first() + unmasked_ranges[0], + thrust::make_zip_iterator(tmp_minor_keys.begin(), + detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values)) + + h_edge_offsets[j]); + } else { + detail::copy_if_mask_set(handle, + minor_key_first + unmasked_ranges[0], + minor_key_first + unmasked_ranges[1], + (*edge_partition_e_mask).value_first() + unmasked_ranges[0], + tmp_minor_keys.begin() + h_edge_offsets[j]); + } + } else { + thrust::copy(handle.get_thrust_policy(), + minor_key_first + h_edge_offsets[j], + minor_key_first + h_edge_offsets[j + 1], + tmp_minor_keys.begin() + h_edge_offsets[j]); + } size_t tmp_storage_bytes{0}; - auto offset_first = - thrust::make_transform_iterator(edge_partition.offsets() + h_vertex_offsets[j], - detail::rebase_offset_t{h_edge_offsets[j]}); - if constexpr (!std::is_same_v) { + auto offset_first = thrust::make_transform_iterator( + (offsets_with_mask ? (*offsets_with_mask).data() : edge_partition.offsets()) + + h_vertex_offsets[j], + detail::rebase_offset_t{h_edge_offsets[j]}); + if constexpr (!std::is_same_v) { cub::DeviceSegmentedSort::SortPairs( static_cast(nullptr), tmp_storage_bytes, tmp_minor_keys.begin() + h_edge_offsets[j], unreduced_minor_keys.begin(), - edge_partition_e_value_input.value_first() + h_edge_offsets[j], - unreduced_key_aggregated_edge_values.begin(), + (edge_partition_e_mask ? detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values) + : edge_partition_e_value_input.value_first()) + + h_edge_offsets[j], + detail::get_optional_dataframe_buffer_begin( + unreduced_key_aggregated_edge_values), h_edge_offsets[j + 1] - h_edge_offsets[j], h_vertex_offsets[j + 1] - h_vertex_offsets[j], offset_first, @@ -419,14 +534,18 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( if (tmp_storage_bytes > d_tmp_storage.size()) { d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); } - if constexpr (!std::is_same_v) { + if constexpr (!std::is_same_v) { cub::DeviceSegmentedSort::SortPairs( d_tmp_storage.data(), tmp_storage_bytes, tmp_minor_keys.begin() + h_edge_offsets[j], unreduced_minor_keys.begin(), - edge_partition_e_value_input.value_first() + h_edge_offsets[j], - unreduced_key_aggregated_edge_values.begin(), + (edge_partition_e_mask ? detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values) + : edge_partition_e_value_input.value_first()) + + h_edge_offsets[j], + detail::get_optional_dataframe_buffer_begin( + unreduced_key_aggregated_edge_values), h_edge_offsets[j + 1] - h_edge_offsets[j], h_vertex_offsets[j + 1] - h_vertex_offsets[j], offset_first, @@ -448,39 +567,44 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( tmp_majors.begin() + h_edge_offsets[j], tmp_majors.begin() + h_edge_offsets[j + 1], unreduced_majors.begin()); - auto input_key_first = thrust::make_zip_iterator( - thrust::make_tuple(unreduced_majors.begin(), unreduced_minor_keys.begin())); + auto input_key_first = + thrust::make_zip_iterator(unreduced_majors.begin(), unreduced_minor_keys.begin()); auto output_key_first = - thrust::make_zip_iterator(thrust::make_tuple(tmp_majors.begin(), tmp_minor_keys.begin())); - if constexpr (!std::is_same_v) { + thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin()); + if constexpr (!std::is_same_v) { reduced_size += thrust::distance(output_key_first + reduced_size, thrust::get<0>(thrust::reduce_by_key( handle.get_thrust_policy(), input_key_first, input_key_first + (h_edge_offsets[j + 1] - h_edge_offsets[j]), - unreduced_key_aggregated_edge_values.begin(), + detail::get_optional_dataframe_buffer_begin( + unreduced_key_aggregated_edge_values), output_key_first + reduced_size, - tmp_key_aggregated_edge_values.begin() + reduced_size))); + detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values) + + reduced_size))); } else { - reduced_size += - thrust::distance(output_key_first + reduced_size, - thrust::get<0>(thrust::unique( - handle.get_thrust_policy(), - input_key_first, - input_key_first + (h_edge_offsets[j + 1] - h_edge_offsets[j]), - output_key_first + reduced_size))); + reduced_size += thrust::distance( + output_key_first + reduced_size, + thrust::copy_if( + handle.get_thrust_policy(), + input_key_first, + input_key_first + (h_edge_offsets[j + 1] - h_edge_offsets[j]), + thrust::make_counting_iterator(size_t{0}), + output_key_first + reduced_size, + cugraph::detail::is_first_in_run_t{input_key_first})); } } tmp_majors.resize(reduced_size, handle.get_stream()); tmp_minor_keys.resize(tmp_majors.size(), handle.get_stream()); - // FIXME: this doesn't work if edge_value_t is thrust::tuple or void - tmp_key_aggregated_edge_values.resize(tmp_majors.size(), handle.get_stream()); + detail::resize_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, tmp_majors.size(), handle.get_stream()); } tmp_majors.shrink_to_fit(handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); - // FIXME: this doesn't work if edge_value_t is thrust::tuple or void - tmp_key_aggregated_edge_values.shrink_to_fit(handle.get_stream()); + detail::shrink_to_fit_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, handle.get_stream()); std::unique_ptr< kv_store_t{ - detail::compute_vertex_partition_id_from_ext_vertex_t{comm_size}, - minor_comm_size}, - minor_comm_size, - mem_frugal_threshold, - handle.get_stream()); + rmm::device_uvector d_tx_value_counts(0, handle.get_stream()); + if constexpr (!std::is_same_v) { + auto triplet_first = + thrust::make_zip_iterator(tmp_majors.begin(), + tmp_minor_keys.begin(), + detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values)); + d_tx_value_counts = cugraph::groupby_and_count( + triplet_first, + triplet_first + tmp_majors.size(), + detail::tuple_to_minor_comm_rank_t{ + detail::compute_vertex_partition_id_from_ext_vertex_t{comm_size}, + minor_comm_size}, + minor_comm_size, + mem_frugal_threshold, + handle.get_stream()); + } else { + auto pair_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin()); + d_tx_value_counts = cugraph::groupby_and_count( + pair_first, + pair_first + tmp_majors.size(), + detail::tuple_to_minor_comm_rank_t{ + detail::compute_vertex_partition_id_from_ext_vertex_t{comm_size}, + minor_comm_size}, + minor_comm_size, + mem_frugal_threshold, + handle.get_stream()); + } std::vector h_tx_value_counts(d_tx_value_counts.size()); raft::update_host(h_tx_value_counts.data(), @@ -544,8 +684,7 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( thrust::copy( handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end(), majors.begin()); - auto pair_first = - thrust::make_zip_iterator(thrust::make_tuple(minor_comm_ranks.begin(), majors.begin())); + auto pair_first = thrust::make_zip_iterator(minor_comm_ranks.begin(), majors.begin()); thrust::sort( handle.get_thrust_policy(), pair_first, pair_first + minor_comm_ranks.size()); auto unique_pair_last = thrust::unique( @@ -622,7 +761,9 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( rmm::device_uvector rx_majors(0, handle.get_stream()); rmm::device_uvector rx_minor_keys(0, handle.get_stream()); - rmm::device_uvector rx_key_aggregated_edge_values(0, handle.get_stream()); + auto rx_key_aggregated_edge_values = + detail::allocate_optional_dataframe_buffer( + 0, handle.get_stream()); auto mem_frugal_flag = host_scalar_allreduce(minor_comm, tmp_majors.size() > mem_frugal_threshold ? int{1} : int{0}, @@ -639,66 +780,120 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( tmp_minor_keys.resize(0, handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); - std::tie(rx_key_aggregated_edge_values, std::ignore) = - shuffle_values(minor_comm, - tmp_key_aggregated_edge_values.begin(), - h_tx_value_counts, - handle.get_stream()); - tmp_key_aggregated_edge_values.resize(0, handle.get_stream()); - tmp_key_aggregated_edge_values.shrink_to_fit(handle.get_stream()); + if constexpr (!std::is_same_v) { + std::tie(rx_key_aggregated_edge_values, std::ignore) = + shuffle_values(minor_comm, + detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values), + h_tx_value_counts, + handle.get_stream()); + } + detail::resize_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, 0, handle.get_stream()); + detail::shrink_to_fit_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, handle.get_stream()); } else { - std::forward_as_tuple(std::tie(rx_majors, rx_minor_keys, rx_key_aggregated_edge_values), - std::ignore) = - shuffle_values(minor_comm, triplet_first, h_tx_value_counts, handle.get_stream()); + if constexpr (!std::is_same_v) { + auto triplet_first = + thrust::make_zip_iterator(tmp_majors.begin(), + tmp_minor_keys.begin(), + detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values)); + std::forward_as_tuple(std::tie(rx_majors, rx_minor_keys, rx_key_aggregated_edge_values), + std::ignore) = + shuffle_values(minor_comm, triplet_first, h_tx_value_counts, handle.get_stream()); + } else { + auto pair_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin()); + std::forward_as_tuple(std::tie(rx_majors, rx_minor_keys), std::ignore) = + shuffle_values(minor_comm, pair_first, h_tx_value_counts, handle.get_stream()); + } tmp_majors.resize(0, handle.get_stream()); tmp_majors.shrink_to_fit(handle.get_stream()); tmp_minor_keys.resize(0, handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); - tmp_key_aggregated_edge_values.resize(0, handle.get_stream()); - tmp_key_aggregated_edge_values.shrink_to_fit(handle.get_stream()); + detail::resize_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, 0, handle.get_stream()); + detail::shrink_to_fit_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, handle.get_stream()); } - auto key_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(rx_majors.begin(), rx_minor_keys.begin())); - if (rx_majors.size() > mem_frugal_threshold) { // trade-off parallelism to lower peak memory - auto second_first = - detail::mem_frugal_partition(key_pair_first, - key_pair_first + rx_majors.size(), - rx_key_aggregated_edge_values.begin(), - detail::pair_to_binary_partition_id_t{}, - int{1}, - handle.get_stream()); - - thrust::sort_by_key(handle.get_thrust_policy(), - key_pair_first, - std::get<0>(second_first), - rx_key_aggregated_edge_values.begin()); - - thrust::sort_by_key(handle.get_thrust_policy(), - std::get<0>(second_first), - key_pair_first + rx_majors.size(), - std::get<1>(second_first)); + auto key_pair_first = thrust::make_zip_iterator(rx_majors.begin(), rx_minor_keys.begin()); + if constexpr (!std::is_same_v) { + if (rx_majors.size() > + mem_frugal_threshold) { // trade-off parallelism to lower peak memory + auto second_first = + detail::mem_frugal_partition(key_pair_first, + key_pair_first + rx_majors.size(), + detail::get_optional_dataframe_buffer_begin( + rx_key_aggregated_edge_values), + detail::pair_to_binary_partition_id_t{}, + int{1}, + handle.get_stream()); + + thrust::sort_by_key(handle.get_thrust_policy(), + key_pair_first, + std::get<0>(second_first), + detail::get_optional_dataframe_buffer_begin( + rx_key_aggregated_edge_values)); + + thrust::sort_by_key(handle.get_thrust_policy(), + std::get<0>(second_first), + key_pair_first + rx_majors.size(), + std::get<1>(second_first)); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + key_pair_first, + key_pair_first + rx_majors.size(), + detail::get_optional_dataframe_buffer_begin( + rx_key_aggregated_edge_values)); + } + + auto num_uniques = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(rx_majors.size()), + detail::is_first_in_run_t{key_pair_first}); + tmp_majors.resize(num_uniques, handle.get_stream()); + tmp_minor_keys.resize(tmp_majors.size(), handle.get_stream()); + detail::resize_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, tmp_majors.size(), handle.get_stream()); + thrust::reduce_by_key( + handle.get_thrust_policy(), + key_pair_first, + key_pair_first + rx_majors.size(), + detail::get_optional_dataframe_buffer_begin(rx_key_aggregated_edge_values), + thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin()), + detail::get_optional_dataframe_buffer_begin( + tmp_key_aggregated_edge_values)); } else { - thrust::sort_by_key(handle.get_thrust_policy(), - key_pair_first, - key_pair_first + rx_majors.size(), - rx_key_aggregated_edge_values.begin()); + if (rx_majors.size() > + mem_frugal_threshold) { // trade-off parallelism to lower peak memory + auto second_first = + detail::mem_frugal_partition(key_pair_first, + key_pair_first + rx_majors.size(), + detail::pair_to_binary_partition_id_t{}, + int{1}, + handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), key_pair_first, second_first); + + thrust::sort(handle.get_thrust_policy(), second_first, key_pair_first + rx_majors.size()); + } else { + thrust::sort( + handle.get_thrust_policy(), key_pair_first, key_pair_first + rx_majors.size()); + } + + auto num_uniques = thrust::distance( + key_pair_first, + thrust::unique( + handle.get_thrust_policy(), key_pair_first, key_pair_first + rx_majors.size())); + tmp_majors.resize(num_uniques, handle.get_stream()); + tmp_minor_keys.resize(tmp_majors.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + key_pair_first, + key_pair_first + num_uniques, + thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin())); } - auto num_uniques = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(rx_majors.size()), - detail::is_first_in_run_t{key_pair_first}); - tmp_majors.resize(num_uniques, handle.get_stream()); - tmp_minor_keys.resize(tmp_majors.size(), handle.get_stream()); - tmp_key_aggregated_edge_values.resize(tmp_majors.size(), handle.get_stream()); - thrust::reduce_by_key( - handle.get_thrust_policy(), - key_pair_first, - key_pair_first + rx_majors.size(), - rx_key_aggregated_edge_values.begin(), - thrust::make_zip_iterator(thrust::make_tuple(tmp_majors.begin(), tmp_minor_keys.begin())), - tmp_key_aggregated_edge_values.begin()); } std::unique_ptr> @@ -756,8 +951,6 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( auto tmp_e_op_result_buffer = allocate_dataframe_buffer(tmp_majors.size(), handle.get_stream()); - auto triplet_first = thrust::make_zip_iterator(thrust::make_tuple( - tmp_majors.begin(), tmp_minor_keys.begin(), tmp_key_aggregated_edge_values.begin())); auto major_value_map_device_view = (GraphViewType::is_multi_gpu && edge_src_value_input.keys()) ? thrust::make_optional> dst_key_value_map_device_view( GraphViewType::is_multi_gpu ? multi_gpu_minor_key_value_map_ptr->view() : kv_store_view); - thrust::transform(handle.get_thrust_policy(), - triplet_first, - triplet_first + tmp_majors.size(), - get_dataframe_buffer_begin(tmp_e_op_result_buffer), - detail::call_key_aggregated_e_op_t< - vertex_t, - edge_value_t, - decltype(edge_partition), - std::remove_reference_t, - edge_partition_src_input_device_view_t, - decltype(dst_key_value_map_device_view), - KeyAggregatedEdgeOp>{edge_partition, - major_value_map_device_view, - edge_partition_src_value_input, - dst_key_value_map_device_view, - key_aggregated_e_op}); + if constexpr (!std::is_same_v) { + auto triplet_first = thrust::make_zip_iterator( + tmp_majors.begin(), + tmp_minor_keys.begin(), + detail::get_optional_dataframe_buffer_begin(tmp_key_aggregated_edge_values)); + thrust::transform(handle.get_thrust_policy(), + triplet_first, + triplet_first + tmp_majors.size(), + get_dataframe_buffer_begin(tmp_e_op_result_buffer), + detail::call_key_aggregated_e_op_t< + vertex_t, + edge_value_t, + T, + decltype(edge_partition), + std::remove_reference_t, + edge_partition_src_input_device_view_t, + decltype(dst_key_value_map_device_view), + KeyAggregatedEdgeOp>{edge_partition, + major_value_map_device_view, + edge_partition_src_value_input, + dst_key_value_map_device_view, + key_aggregated_e_op}); + } else { + auto pair_first = thrust::make_zip_iterator(tmp_majors.begin(), tmp_minor_keys.begin()); + thrust::transform(handle.get_thrust_policy(), + pair_first, + pair_first + tmp_majors.size(), + get_dataframe_buffer_begin(tmp_e_op_result_buffer), + detail::call_key_aggregated_e_op_t< + vertex_t, + edge_value_t, + T, + decltype(edge_partition), + std::remove_reference_t, + edge_partition_src_input_device_view_t, + decltype(dst_key_value_map_device_view), + KeyAggregatedEdgeOp>{edge_partition, + major_value_map_device_view, + edge_partition_src_value_input, + dst_key_value_map_device_view, + key_aggregated_e_op}); + } if constexpr (GraphViewType::is_multi_gpu) { multi_gpu_minor_key_value_map_ptr.reset(); } tmp_minor_keys.resize(0, handle.get_stream()); tmp_minor_keys.shrink_to_fit(handle.get_stream()); - tmp_key_aggregated_edge_values.resize(0, handle.get_stream()); - tmp_key_aggregated_edge_values.shrink_to_fit(handle.get_stream()); + detail::resize_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, 0, handle.get_stream()); + detail::shrink_to_fit_optional_dataframe_buffer( + tmp_key_aggregated_edge_values, handle.get_stream()); { auto num_uniques = diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index b63b014ed05..244586e6d9e 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -19,6 +19,7 @@ #include "prims/detail/nbr_intersection.cuh" #include "prims/property_op_utils.cuh" +#include #include #include #include @@ -130,7 +131,9 @@ std::tuple, ValueBuffer> sort_and_reduce_by_vertic vertices.end(), get_dataframe_buffer_begin(value_buffer), reduced_vertices.begin(), - get_dataframe_buffer_begin(reduced_value_buffer)); + get_dataframe_buffer_begin(reduced_value_buffer), + thrust::equal_to{}, + property_op{}); vertices.resize(size_t{0}, handle.get_stream()); resize_dataframe_buffer(value_buffer, size_t{0}, handle.get_stream()); @@ -201,14 +204,14 @@ struct accumulate_vertex_property_t { * @param graph_view Non-owning graph object. * @param edge_src_value_input Wrapper used to access source input property values (for the edge * sources assigned to this process in multi-GPU). Use either cugraph::edge_src_property_t::view() - * (if @p e_op needs to access source property values) or cugraph::edge_src_dummy_property_t::view() - * (if @p e_op does not access source property values). Use update_edge_src_property to fill the - * wrapper. + * (if @p intersection_op needs to access source property values) or + * cugraph::edge_src_dummy_property_t::view() (if @p intersection_op does not access source property + * values). Use update_edge_src_property to fill the wrapper. * @param edge_dst_value_input Wrapper used to access destination input property values (for the * edge destinations assigned to this process in multi-GPU). Use either - * cugraph::edge_dst_property_t::view() (if @p e_op needs to access destination property values) or - * cugraph::edge_dst_dummy_property_t::view() (if @p e_op does not access destination property - * values). Use update_edge_dst_property to fill the wrapper. + * cugraph::edge_dst_property_t::view() (if @p intersection_op needs to access destination property + * values) or cugraph::edge_dst_dummy_property_t::view() (if @p intersection_op does not access + * destination property values). Use update_edge_dst_property to fill the wrapper. * @param intersection_op quinary operator takes edge source, edge destination, property values for * the source, property values for the destination, and a list of vertices in the intersection of * edge source & destination vertices' destination neighbors and returns a thrust::tuple of three @@ -260,8 +263,6 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( typename EdgeDstValueInputWrapper::value_iterator, typename EdgeDstValueInputWrapper::value_type>>; - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do. } @@ -272,6 +273,7 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( init); auto edge_mask_view = graph_view.edge_mask_view(); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( @@ -484,7 +486,9 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( merged_vertices.end(), get_dataframe_buffer_begin(merged_value_buffer), reduced_vertices.begin(), - get_dataframe_buffer_begin(reduced_value_buffer)); + get_dataframe_buffer_begin(reduced_value_buffer), + thrust::equal_to{}, + property_op{}); merged_vertices.resize(size_t{0}, handle.get_stream()); merged_vertices.shrink_to_fit(handle.get_stream()); resize_dataframe_buffer(merged_value_buffer, size_t{0}, handle.get_stream()); diff --git a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh index eee0ed03d1c..00876012906 100644 --- a/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh +++ b/cpp/src/prims/transform_reduce_e_by_src_dst_key.cuh @@ -95,6 +95,7 @@ template __global__ static void transform_reduce_by_src_dst_key_hypersparse( @@ -105,6 +106,9 @@ __global__ static void transform_reduce_by_src_dst_key_hypersparse( EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, + EdgePartitionEdgeMaskWrapper edge_partition_e_mask, + thrust::optional> + edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, ValueIterator value_iter) @@ -129,19 +133,42 @@ __global__ static void transform_reduce_by_src_dst_key_hypersparse( edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(static_cast(major_idx)); - auto local_offset = edge_partition.local_offset(major_idx); - for (edge_t i = 0; i < local_degree; ++i) { - update_buffer_element(edge_partition, - major, - indices[i], - edge_offset + i, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_src_dst_key_input, - e_op, - keys + local_offset + i, - value_iter + local_offset + i); + if (edge_partition_e_mask) { + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + auto edge_offset_with_mask = (*edge_offsets_with_mask)[major_offset]; + edge_t counter{0}; + for (edge_t i = 0; i < local_degree; ++i) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset_with_mask + counter, + value_iter + edge_offset_with_mask + counter); + ++counter; + } + } + } else { + for (edge_t i = 0; i < local_degree; ++i) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset + i, + value_iter + edge_offset + i); + } } idx += gridDim.x * blockDim.x; @@ -154,6 +181,7 @@ template __global__ static void transform_reduce_by_src_dst_key_low_degree( @@ -166,6 +194,9 @@ __global__ static void transform_reduce_by_src_dst_key_low_degree( EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, + EdgePartitionEdgeMaskWrapper edge_partition_e_mask, + thrust::optional> + edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, ValueIterator value_iter) @@ -187,19 +218,41 @@ __global__ static void transform_reduce_by_src_dst_key_low_degree( edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(static_cast(major_offset)); - auto local_offset = edge_partition.local_offset(major_offset); - for (edge_t i = 0; i < local_degree; ++i) { - update_buffer_element(edge_partition, - major, - indices[i], - edge_offset + i, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_src_dst_key_input, - e_op, - keys + local_offset + i, - value_iter + local_offset + i); + if (edge_partition_e_mask) { + auto edge_offset_with_mask = (*edge_offsets_with_mask)[major_offset]; + edge_t counter{0}; + for (edge_t i = 0; i < local_degree; ++i) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset_with_mask + counter, + value_iter + edge_offset_with_mask + counter); + ++counter; + } + } + } else { + for (edge_t i = 0; i < local_degree; ++i) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset + i, + value_iter + edge_offset + i); + } } idx += gridDim.x * blockDim.x; @@ -212,6 +265,7 @@ template __global__ static void transform_reduce_by_src_dst_key_mid_degree( @@ -224,6 +278,9 @@ __global__ static void transform_reduce_by_src_dst_key_mid_degree( EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, + EdgePartitionEdgeMaskWrapper edge_partition_e_mask, + thrust::optional> + edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, ValueIterator value_iter) @@ -238,6 +295,9 @@ __global__ static void transform_reduce_by_src_dst_key_mid_degree( static_cast(major_range_first - edge_partition.major_range_first()); size_t idx = static_cast(tid / raft::warp_size()); + using WarpScan = cub::WarpScan; + __shared__ typename WarpScan::TempStorage temp_storage; + while (idx < static_cast(major_range_last - major_range_first)) { auto major_offset = major_start_offset + idx; auto major = @@ -247,19 +307,49 @@ __global__ static void transform_reduce_by_src_dst_key_mid_degree( edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(static_cast(major_offset)); - auto local_offset = edge_partition.local_offset(major_offset); - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { - update_buffer_element(edge_partition, - major, - indices[i], - edge_offset + i, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_src_dst_key_input, - e_op, - keys + local_offset + i, - value_iter + local_offset + i); + if (edge_partition_e_mask) { + // FIXME: it might be faster to update in warp-sync way + auto edge_offset_with_mask = (*edge_offsets_with_mask)[major_offset]; + edge_t counter{0}; + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { ++counter; } + } + edge_t offset_within_warp{}; + WarpScan(temp_storage).ExclusiveSum(counter, offset_within_warp); + edge_offset_with_mask += offset_within_warp; + counter = 0; + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset_with_mask + counter, + value_iter + edge_offset_with_mask + counter); + ++counter; + } + } + } else { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset + i, + value_iter + edge_offset + i); + } } idx += gridDim.x * (blockDim.x / raft::warp_size()); @@ -272,6 +362,7 @@ template __global__ static void transform_reduce_by_src_dst_key_high_degree( @@ -284,6 +375,9 @@ __global__ static void transform_reduce_by_src_dst_key_high_degree( EdgePartitionDstValueInputWrapper edge_partition_dst_value_input, EdgePartitionEdgeValueInputWrapper edge_partition_e_value_input, EdgePartitionSrcDstKeyInputWrapper edge_partition_src_dst_key_input, + EdgePartitionEdgeMaskWrapper edge_partition_e_mask, + thrust::optional> + edge_offsets_with_mask, EdgeOp e_op, typename GraphViewType::vertex_type* keys, ValueIterator value_iter) @@ -295,6 +389,9 @@ __global__ static void transform_reduce_by_src_dst_key_high_degree( static_cast(major_range_first - edge_partition.major_range_first()); auto idx = static_cast(blockIdx.x); + using BlockScan = cub::BlockScan; + __shared__ typename BlockScan::TempStorage temp_storage; + while (idx < static_cast(major_range_last - major_range_first)) { auto major_offset = major_start_offset + idx; auto major = @@ -304,19 +401,49 @@ __global__ static void transform_reduce_by_src_dst_key_high_degree( edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(static_cast(major_offset)); - auto local_offset = edge_partition.local_offset(major_offset); - for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { - update_buffer_element(edge_partition, - major, - indices[i], - edge_offset + i, - edge_partition_src_value_input, - edge_partition_dst_value_input, - edge_partition_e_value_input, - edge_partition_src_dst_key_input, - e_op, - keys + local_offset + i, - value_iter + local_offset + i); + if (edge_partition_e_mask) { + // FIXME: it might be faster to update in block-sync way + auto edge_offset_with_mask = (*edge_offsets_with_mask)[major_offset]; + edge_t counter{0}; + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { ++counter; } + } + edge_t offset_within_block{}; + BlockScan(temp_storage).ExclusiveSum(counter, offset_within_block); + edge_offset_with_mask += offset_within_block; + counter = 0; + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset_with_mask + counter, + value_iter + edge_offset_with_mask + counter); + ++counter; + } + } + } else { + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + update_buffer_element( + edge_partition, + major, + indices[i], + edge_offset + i, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + edge_partition_src_dst_key_input, + e_op, + keys + edge_offset + i, + value_iter + edge_offset + i); + } } idx += gridDim.x; @@ -410,19 +537,41 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, typename EdgeSrcDstKeyInputWrapper::value_iterator, typename EdgeSrcDstKeyInputWrapper::value_type>; + auto edge_mask_view = graph_view.edge_mask_view(); + rmm::device_uvector keys(0, handle.get_stream()); auto value_buffer = allocate_dataframe_buffer(0, handle.get_stream()); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - - auto num_edges = edge_partition.number_of_edges(); - - rmm::device_uvector tmp_keys(num_edges, handle.get_stream()); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; + + rmm::device_uvector tmp_keys(0, handle.get_stream()); + std::optional> edge_offsets_with_mask{std::nullopt}; + if (edge_partition_e_mask) { + auto local_degrees = edge_partition.compute_local_degrees_with_mask( + (*edge_partition_e_mask).value_first(), handle.get_stream()); + edge_offsets_with_mask = + rmm::device_uvector(edge_partition.major_range_size() + 1, handle.get_stream()); + (*edge_offsets_with_mask).set_element_to_zero_async(0, handle.get_stream()); + thrust::inclusive_scan(handle.get_thrust_policy(), + local_degrees.begin(), + local_degrees.end(), + (*edge_offsets_with_mask).begin() + 1); + tmp_keys.resize((*edge_offsets_with_mask).back_element(handle.get_stream()), + handle.get_stream()); + } else { + tmp_keys.resize(edge_partition.number_of_edges(), handle.get_stream()); + } auto tmp_value_buffer = allocate_dataframe_buffer(tmp_keys.size(), handle.get_stream()); - if (num_edges > 0) { + if (tmp_keys.size() > 0) { edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; if constexpr (GraphViewType::is_storage_transposed) { @@ -467,6 +616,11 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_dst_value_input, edge_partition_e_value_input, edge_partition_src_dst_key_input, + edge_partition_e_mask, + edge_offsets_with_mask + ? thrust::make_optional>( + (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) + : thrust::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -485,6 +639,11 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_dst_value_input, edge_partition_e_value_input, edge_partition_src_dst_key_input, + edge_partition_e_mask, + edge_offsets_with_mask + ? thrust::make_optional>( + (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) + : thrust::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -503,6 +662,11 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_dst_value_input, edge_partition_e_value_input, edge_partition_src_dst_key_input, + edge_partition_e_mask, + edge_offsets_with_mask + ? thrust::make_optional>( + (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) + : thrust::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -520,6 +684,11 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_dst_value_input, edge_partition_e_value_input, edge_partition_src_dst_key_input, + edge_partition_e_mask, + edge_offsets_with_mask + ? thrust::make_optional>( + (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) + : thrust::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -539,6 +708,11 @@ transform_reduce_e_by_src_dst_key(raft::handle_t const& handle, edge_partition_dst_value_input, edge_partition_e_value_input, edge_partition_src_dst_key_input, + edge_partition_e_mask, + edge_offsets_with_mask + ? thrust::make_optional>( + (*edge_offsets_with_mask).data(), (*edge_offsets_with_mask).size()) + : thrust::nullopt, e_op, tmp_keys.data(), get_dataframe_buffer_begin(tmp_value_buffer)); @@ -682,8 +856,6 @@ auto transform_reduce_e_by_src_key(raft::handle_t const& handle, typename GraphViewType::vertex_type>::value); static_assert(ReduceOp::pure_function, "ReduceOp should be a pure function."); - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do } @@ -772,8 +944,6 @@ auto transform_reduce_e_by_dst_key(raft::handle_t const& handle, typename GraphViewType::vertex_type>::value); static_assert(ReduceOp::pure_function, "ReduceOp should be a pure function."); - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index c84711e1a69..4d37c93326d 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -396,12 +396,10 @@ ConfigureTest(RANDOM_WALKS_TEST sampling/sg_random_walks_test.cpp) ################################################################################################### # - NBR SAMPLING tests ---------------------------------------------------------------------------- ConfigureTest(UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/sg_uniform_neighbor_sampling.cu) -target_link_libraries(UNIFORM_NEIGHBOR_SAMPLING_TEST PRIVATE cuco::cuco) ################################################################################################### # - SAMPLING_POST_PROCESSING tests ---------------------------------------------------------------- ConfigureTest(SAMPLING_POST_PROCESSING_TEST sampling/sampling_post_processing_test.cu) -target_link_libraries(SAMPLING_POST_PROCESSING_TEST PRIVATE cuco::cuco) ################################################################################################### # - Renumber tests -------------------------------------------------------------------------------- @@ -583,78 +581,79 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG PRIMS COUNT_IF_V tests ----------------------------------------------------------------- ConfigureTestMG(MG_COUNT_IF_V_TEST prims/mg_count_if_v.cu) - target_link_libraries(MG_COUNT_IF_V_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST tests ------------------------------ ConfigureTestMG(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST_TEST prims/mg_transform_reduce_v_frontier_outgoing_e_by_dst.cu) - target_link_libraries(MG_TRANSFORM_REDUCE_V_FRONTIER_OUTGOING_E_BY_DST_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS REDUCE_V tests ------------------------------------------------------------------- ConfigureTestMG(MG_REDUCE_V_TEST prims/mg_reduce_v.cu) - target_link_libraries(MG_REDUCE_V_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS TRANSFORM_REDUCE_V tests --------------------------------------------------------- ConfigureTestMG(MG_TRANSFORM_REDUCE_V_TEST prims/mg_transform_reduce_v.cu) - target_link_libraries(MG_TRANSFORM_REDUCE_V_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS TRANSFORM_REDUCE_E tests --------------------------------------------------------- ConfigureTestMG(MG_TRANSFORM_REDUCE_E_TEST prims/mg_transform_reduce_e.cu) - target_link_libraries(MG_TRANSFORM_REDUCE_E_TEST PRIVATE cuco::cuco) + + ############################################################################################### + # - MG PRIMS TRANSFORM_REDUCE_E _BY_SRC_DST_KEY tests ----------------------------------------- + ConfigureTestMG(MG_TRANSFORM_REDUCE_E_BY_SRC_DST_KEY_TEST + prims/mg_transform_reduce_e_by_src_dst_key.cu) ############################################################################################### # - MG PRIMS TRANSFORM_E tests ---------------------------------------------------------------- ConfigureTestMG(MG_TRANSFORM_E_TEST prims/mg_transform_e.cu) - target_link_libraries(MG_TRANSFORM_E_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS COUNT_IF_E tests ----------------------------------------------------------------- ConfigureTestMG(MG_COUNT_IF_E_TEST prims/mg_count_if_e.cu) - target_link_libraries(MG_COUNT_IF_E_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS PER_V_TRANSFORM_REDUCE_INCOMING_OUTGOING_E tests --------------------------------- ConfigureTestMG(MG_PER_V_TRANSFORM_REDUCE_INCOMING_OUTGOING_E_TEST prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu) - target_link_libraries(MG_PER_V_TRANSFORM_REDUCE_INCOMING_OUTGOING_E_TEST PRIVATE cuco::cuco) + + ############################################################################################### + # - MG PRIMS PER_V_TRANSFORM_REDUCE_DST_KEY_AGGREGATED_OUTGOING_E tests ----------------------- + ConfigureTestMG(MG_PER_V_TRANSFORM_REDUCE_DST_KEY_AGGREGATED_OUTGOING_E_TEST + prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu) ############################################################################################### # - MG PRIMS EXTRACT_TRANSFORM_E tests -------------------------------------------------------- ConfigureTestMG(MG_EXTRACT_TRANSFORM_E_TEST prims/mg_extract_transform_e.cu) - target_link_libraries(MG_EXTRACT_TRANSFORM_E_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E tests ------------------------------------ ConfigureTestMG(MG_EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E_TEST prims/mg_extract_transform_v_frontier_outgoing_e.cu) - target_link_libraries(MG_EXTRACT_TRANSFORM_V_FRONTIER_OUTGOING_E_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E tests ----------------------------------- ConfigureTestMG(MG_PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E_TEST prims/mg_per_v_random_select_transform_outgoing_e.cu) - target_link_libraries(MG_PER_V_RANDOM_SELECT_TRANSFORM_OUTGOING_E_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG PRIMS PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION tests ---------------------------------- ConfigureTestMG(MG_PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION_TEST prims/mg_per_v_pair_transform_dst_nbr_intersection.cu) - target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_INTERSECTION_TEST PRIVATE cuco::cuco) + + ############################################################################################### + # - MG PRIMS TRANSFORM_REDUCE_DST_NBR_INTERSECTION OF_E_ENDPOINTS_BY_V tests ------------------ + ConfigureTestMG(MG_TRANSFORM_REDUCE_DST_NBR_INTERSECTION_BY_E_ENDPOINTS_BY_V_TEST + prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu) ############################################################################################### # - MG PRIMS PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION tests ------------------------- ConfigureTestMG(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST - prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu) - target_link_libraries(MG_PER_V_PAIR_TRANSFORM_DST_NBR_WEIGHTED_INTERSECTION_TEST PRIVATE cuco::cuco) + prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu) ############################################################################################### # - MG NBR SAMPLING tests --------------------------------------------------------------------- ConfigureTestMG(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_uniform_neighbor_sampling.cu) - target_link_libraries(MG_UNIFORM_NEIGHBOR_SAMPLING_TEST PRIVATE cuco::cuco) ############################################################################################### # - MG RANDOM_WALKS tests --------------------------------------------------------------------- diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu new file mode 100644 index 00000000000..af56807746a --- /dev/null +++ b/cpp/tests/prims/mg_per_v_transform_reduce_dst_key_aggregated_outgoing_e.cu @@ -0,0 +1,599 @@ +/* + * Copyright (c) 2021-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh" +#include "prims/reduce_op.cuh" +#include "prims/update_edge_src_dst_property.cuh" +#include "property_generator.cuh" +#include "result_compare.cuh" +#include "utilities/base_fixture.hpp" +#include "utilities/device_comm_wrapper.hpp" +#include "utilities/mg_utilities.hpp" +#include "utilities/test_graphs.hpp" +#include "utilities/test_utilities.hpp" +#include "utilities/thrust_wrapper.hpp" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include + +template +struct key_aggregated_e_op_t { + __device__ result_t operator()(vertex_t src, + vertex_t key, + result_t src_property, + result_t key_property, + edge_value_t edge_property) const + { + if (src_property < key_property) { + return src_property; + } else { + return key_property; + } + } +}; + +struct Prims_Usecase { + bool test_weighted{false}; + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE + : public ::testing::TestWithParam> { + public: + Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of per_v_transform_reduce_incoming|outgoing_e primitive + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + HighResTimer hr_timer{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + auto [mg_graph, mg_edge_weights, mg_renumber_map] = + cugraph::test::construct_graph( + *handle_, input_usecase, prims_usecase.test_weighted, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + auto mg_edge_weight_view = + mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; + + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + // 2. run MG per_v_transform_reduce_dst_key_aggregated_outgoing_e + + const int vertex_prop_hash_bin_count = 5; + const int key_hash_bin_count = 10; + const int key_prop_hash_bin_count = 20; + const int initial_value = 4; + + auto property_initial_value = + cugraph::test::generate::initial_value(initial_value); + + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *mg_renumber_map, vertex_prop_hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + + auto mg_vertex_key = cugraph::test::generate::vertex_property( + *handle_, *mg_renumber_map, key_hash_bin_count); + auto mg_dst_key = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_key); + + rmm::device_uvector mg_kv_store_keys(comm_rank == 0 ? key_hash_bin_count : int{0}, + handle_->get_stream()); + thrust::sequence( + handle_->get_thrust_policy(), mg_kv_store_keys.begin(), mg_kv_store_keys.end(), vertex_t{0}); + mg_kv_store_keys = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( + *handle_, std::move(mg_kv_store_keys)); + auto mg_kv_store_values = cugraph::test::generate::vertex_property( + *handle_, mg_kv_store_keys, key_prop_hash_bin_count); + + static_assert(std::is_same_v || + std::is_same_v>); + result_t invalid_value{}; + if constexpr (std::is_same_v) { + invalid_value = std::numeric_limits::max(); + } else { + invalid_value = + thrust::make_tuple(std::numeric_limits::max(), std::numeric_limits::max()); + } + cugraph::kv_store_t mg_kv_store( + mg_kv_store_keys.begin(), + mg_kv_store_keys.end(), + cugraph::get_dataframe_buffer_begin(mg_kv_store_values), + cugraph::invalid_vertex_id::value, + invalid_value, + handle_->get_stream()); + + enum class reduction_type_t { PLUS, ELEMWISE_MIN, ELEMWISE_MAX }; + std::array reduction_types = { + reduction_type_t::PLUS, reduction_type_t::ELEMWISE_MIN, reduction_type_t::ELEMWISE_MAX}; + + std::vector(0, rmm::cuda_stream_view{}))> + mg_results{}; + mg_results.reserve(reduction_types.size()); + + for (size_t i = 0; i < reduction_types.size(); ++i) { + mg_results.push_back(cugraph::allocate_dataframe_buffer( + mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream())); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG per_v_transform_reduce_outgoing_e"); + } + + switch (reduction_types[i]) { + case reduction_type_t::PLUS: + if (mg_edge_weight_view) { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + mg_graph_view, + mg_src_prop.view(), + *mg_edge_weight_view, + mg_dst_key.view(), + mg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::plus{}, + cugraph::get_dataframe_buffer_begin(mg_results[i])); + } else { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + mg_graph_view, + mg_src_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + mg_dst_key.view(), + mg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::plus{}, + cugraph::get_dataframe_buffer_begin(mg_results[i])); + } + break; + case reduction_type_t::ELEMWISE_MIN: + if (mg_edge_weight_view) { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + mg_graph_view, + mg_src_prop.view(), + *mg_edge_weight_view, + mg_dst_key.view(), + mg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_minimum{}, + cugraph::get_dataframe_buffer_begin(mg_results[i])); + } else { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + mg_graph_view, + mg_src_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + mg_dst_key.view(), + mg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_minimum{}, + cugraph::get_dataframe_buffer_begin(mg_results[i])); + } + break; + case reduction_type_t::ELEMWISE_MAX: + if (mg_edge_weight_view) { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + mg_graph_view, + mg_src_prop.view(), + *mg_edge_weight_view, + mg_dst_key.view(), + mg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_maximum{}, + cugraph::get_dataframe_buffer_begin(mg_results[i])); + } else { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + mg_graph_view, + mg_src_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + mg_dst_key.view(), + mg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_maximum{}, + cugraph::get_dataframe_buffer_begin(mg_results[i])); + } + break; + default: FAIL() << "should not be reached."; + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + } + + // 3. compare SG & MG results + + if (prims_usecase.check_correctness) { + cugraph::graph_t sg_graph(*handle_); + std::optional< + cugraph::edge_property_t, weight_t>> + sg_edge_weights{std::nullopt}; + std::tie(sg_graph, sg_edge_weights, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + for (size_t i = 0; i < reduction_types.size(); ++i) { + auto mg_aggregate_results = + cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); + + static_assert(cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); + if constexpr (std::is_arithmetic_v) { + std::tie(std::ignore, mg_aggregate_results) = + cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( + *handle_, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + mg_graph_view.local_vertex_partition_range(), + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + raft::device_span(mg_results[i].data(), mg_results[i].size())); + } else { + std::tie(std::ignore, std::get<0>(mg_aggregate_results)) = + cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( + *handle_, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + mg_graph_view.local_vertex_partition_range(), + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + raft::device_span::type const>( + std::get<0>(mg_results[i]).data(), std::get<0>(mg_results[i]).size())); + + std::tie(std::ignore, std::get<1>(mg_aggregate_results)) = + cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( + *handle_, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + mg_graph_view.local_vertex_partition_range(), + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + raft::device_span::type const>( + std::get<1>(mg_results[i]).data(), std::get<1>(mg_results[i]).size())); + } + + if (handle_->get_comms().get_rank() == int{0}) { + auto sg_graph_view = sg_graph.view(); + auto sg_edge_weight_view = + sg_edge_weights ? std::make_optional((*sg_edge_weights).view()) : std::nullopt; + + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + vertex_prop_hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + + auto sg_vertex_key = cugraph::test::generate::vertex_property( + *handle_, + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + key_hash_bin_count); + auto sg_dst_key = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_key); + + rmm::device_uvector sg_kv_store_keys(key_hash_bin_count, handle_->get_stream()); + thrust::sequence(handle_->get_thrust_policy(), + sg_kv_store_keys.begin(), + sg_kv_store_keys.end(), + vertex_t{0}); + auto sg_kv_store_values = cugraph::test::generate::vertex_property( + *handle_, sg_kv_store_keys, key_prop_hash_bin_count); + + cugraph::kv_store_t sg_kv_store( + sg_kv_store_keys.begin(), + sg_kv_store_keys.end(), + cugraph::get_dataframe_buffer_begin(sg_kv_store_values), + cugraph::invalid_vertex_id::value, + invalid_value, + handle_->get_stream()); + + cugraph::test::vector_result_compare compare{*handle_}; + + auto global_result = cugraph::allocate_dataframe_buffer( + sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); + + switch (reduction_types[i]) { + case reduction_type_t::PLUS: + if (sg_edge_weight_view) { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + sg_graph_view, + sg_src_prop.view(), + *sg_edge_weight_view, + sg_dst_key.view(), + sg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::plus{}, + cugraph::get_dataframe_buffer_begin(global_result)); + } else { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + sg_graph_view, + sg_src_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + sg_dst_key.view(), + sg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::plus{}, + cugraph::get_dataframe_buffer_begin(global_result)); + } + break; + case reduction_type_t::ELEMWISE_MIN: + if (sg_edge_weight_view) { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + sg_graph_view, + sg_src_prop.view(), + *sg_edge_weight_view, + sg_dst_key.view(), + sg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_minimum{}, + cugraph::get_dataframe_buffer_begin(global_result)); + } else { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + sg_graph_view, + sg_src_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + sg_dst_key.view(), + sg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_minimum{}, + cugraph::get_dataframe_buffer_begin(global_result)); + } + break; + case reduction_type_t::ELEMWISE_MAX: + if (sg_edge_weight_view) { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + sg_graph_view, + sg_src_prop.view(), + *sg_edge_weight_view, + sg_dst_key.view(), + sg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_maximum{}, + cugraph::get_dataframe_buffer_begin(global_result)); + } else { + per_v_transform_reduce_dst_key_aggregated_outgoing_e( + *handle_, + sg_graph_view, + sg_src_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + sg_dst_key.view(), + sg_kv_store.view(), + key_aggregated_e_op_t{}, + property_initial_value, + cugraph::reduce_op::elementwise_maximum{}, + cugraph::get_dataframe_buffer_begin(global_result)); + } + break; + default: FAIL() << "should not be reached."; + } + + ASSERT_TRUE(compare(mg_aggregate_results, global_result)); + } + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr + Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE::handle_ = nullptr; + +using Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_File = + Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE; +using Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat = + Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE; + +// FIXME: this tests do not build as cugrpah::kv_store_t has a build error when use_binary_search = +// false and value_t is thrust::tuple, this will be fixed in a separate PR +#if 0 +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_File, + CheckInt32Int32FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + CheckInt32Int32FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + CheckInt32Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + CheckInt64Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} +#endif + +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_File, + CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + CheckInt32Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + CheckInt64Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{false, false, true}, + Prims_Usecase{false, true, true}, + Prims_Usecase{true, false, true}, + Prims_Usecase{true, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{false, false, true}, + Prims_Usecase{false, true, true}, + Prims_Usecase{true, false, true}, + Prims_Usecase{true, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGPerVTransformReduceDstKeyAggregatedOutgoingE_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{false, false, false}, + Prims_Usecase{false, true, false}, + Prims_Usecase{true, false, false}, + Prims_Usecase{true, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index efab53f89e6..a459a677569 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -18,6 +18,7 @@ #include "prims/reduce_op.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "property_generator.cuh" +#include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/device_comm_wrapper.hpp" #include "utilities/mg_utilities.hpp" @@ -72,83 +73,6 @@ struct e_op_t { } }; -template -__host__ __device__ bool compare_scalar(T val0, T val1, thrust::optional threshold_ratio) -{ - if (threshold_ratio) { - return std::abs(val0 - val1) <= (std::max(std::abs(val0), std::abs(val1)) * *threshold_ratio); - } else { - return val0 == val1; - } -} - -template -struct comparator { - static constexpr double threshold_ratio{1e-2}; - - __host__ __device__ bool operator()(T t0, T t1) const - { - static_assert(cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); - if constexpr (std::is_arithmetic_v) { - return compare_scalar( - t0, - t1, - std::is_floating_point_v ? thrust::optional{threshold_ratio} : thrust::nullopt); - } else { - auto val0 = thrust::get<0>(t0); - auto val1 = thrust::get<0>(t1); - auto passed = compare_scalar(val0, - val1, - std::is_floating_point_v - ? thrust::optional{threshold_ratio} - : thrust::nullopt); - if (!passed) return false; - - if constexpr (thrust::tuple_size::value >= 2) { - auto val0 = thrust::get<1>(t0); - auto val1 = thrust::get<1>(t1); - auto passed = compare_scalar(val0, - val1, - std::is_floating_point_v - ? thrust::optional{threshold_ratio} - : thrust::nullopt); - if (!passed) return false; - } - if constexpr (thrust::tuple_size::value >= 3) { - assert(false); // should not be reached. - } - return true; - } - } -}; - -struct result_compare { - const raft::handle_t& handle_; - result_compare(raft::handle_t const& handle) : handle_(handle) {} - - template - auto operator()(const std::tuple...>& t1, - const std::tuple...>& t2) - { - using type = thrust::tuple; - return equality_impl(t1, t2, std::make_index_sequence::value>()); - } - - template - auto operator()(const rmm::device_uvector& t1, const rmm::device_uvector& t2) - { - return thrust::equal( - handle_.get_thrust_policy(), t1.begin(), t1.end(), t2.begin(), comparator()); - } - - private: - template - auto equality_impl(T& t1, T& t2, std::index_sequence) - { - return (... && (result_compare::operator()(std::get(t1), std::get(t2)))); - } -}; - struct Prims_Usecase { bool test_weighted{false}; bool edge_masking{false}; @@ -440,7 +364,7 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE *handle_, sg_graph_view, sg_vertex_prop); auto sg_dst_prop = cugraph::test::generate::dst_property( *handle_, sg_graph_view, sg_vertex_prop); - result_compare comp{*handle_}; + cugraph::test::vector_result_compare compare{*handle_}; auto global_in_result = cugraph::allocate_dataframe_buffer( sg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); @@ -528,8 +452,8 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE default: FAIL() << "should not be reached."; } - ASSERT_TRUE(comp(mg_aggregate_in_results, global_in_result)); - ASSERT_TRUE(comp(mg_aggregate_out_results, global_out_result)); + ASSERT_TRUE(compare(mg_aggregate_in_results, global_in_result)); + ASSERT_TRUE(compare(mg_aggregate_out_results, global_out_result)); } } } diff --git a/cpp/tests/prims/mg_reduce_v.cu b/cpp/tests/prims/mg_reduce_v.cu index da3354b77d9..783e17b6d8f 100644 --- a/cpp/tests/prims/mg_reduce_v.cu +++ b/cpp/tests/prims/mg_reduce_v.cu @@ -17,6 +17,7 @@ #include "prims/property_op_utils.cuh" #include "prims/reduce_v.cuh" #include "property_generator.cuh" +#include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/device_comm_wrapper.hpp" #include "utilities/mg_utilities.hpp" @@ -49,50 +50,6 @@ #include -template -struct result_compare { - static constexpr double threshold_ratio{1e-2}; - constexpr auto operator()(const T& t1, const T& t2) - { - if constexpr (std::is_floating_point_v) { - bool passed = (t1 == t2) // when t1 == t2 == 0 - || - (std::abs(t1 - t2) < (std::max(std::abs(t1), std::abs(t2)) * threshold_ratio)); - return passed; - } - return t1 == t2; - } -}; - -template -struct result_compare> { - static constexpr double threshold_ratio{1e-3}; - - using Type = thrust::tuple; - constexpr auto operator()(const Type& t1, const Type& t2) - { - return equality_impl(t1, t2, std::make_index_sequence::value>()); - } - - private: - template - constexpr bool equal(T t1, T t2) - { - if constexpr (std::is_floating_point_v) { - bool passed = (t1 == t2) // when t1 == t2 == 0 - || - (std::abs(t1 - t2) < (std::max(std::abs(t1), std::abs(t2)) * threshold_ratio)); - return passed; - } - return t1 == t2; - } - template - constexpr auto equality_impl(T& t1, T& t2, std::index_sequence) - { - return (... && (equal(thrust::get(t1), thrust::get(t2)))); - } -}; - struct Prims_Usecase { bool check_correctness{true}; }; @@ -249,7 +206,7 @@ class Tests_MGReduceV break; default: FAIL() << "should not be reached."; } - result_compare compare{}; + cugraph::test::scalar_result_compare compare{}; ASSERT_TRUE(compare(expected_result, results[reduction_type])); } } diff --git a/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu new file mode 100644 index 00000000000..5fa37250e21 --- /dev/null +++ b/cpp/tests/prims/mg_transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cu @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "property_generator.cuh" + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +template +struct intersection_op_t { + __device__ thrust::tuple operator()( + vertex_t v0, + vertex_t v1, + edge_t v0_prop, + edge_t v1_prop, + raft::device_span intersection) const + { + return thrust::make_tuple( + v0_prop + v1_prop, v0_prop + v1_prop, static_cast(intersection.size())); + } +}; + +struct Prims_Usecase { + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV + : public ::testing::TestWithParam> { + public: + Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Verify the results of transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v primitive + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + HighResTimer hr_timer{}; + + auto const comm_rank = handle_->get_comms().get_rank(); + auto const comm_size = handle_->get_comms().get_size(); + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + cugraph::graph_t mg_graph(*handle_); + std::optional> mg_renumber_map{std::nullopt}; + std::tie(mg_graph, std::ignore, mg_renumber_map) = + cugraph::test::construct_graph( + *handle_, input_usecase, false, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + // 2. run MG transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v primitive + + const int hash_bin_count = 5; + const int initial_value = 4; + + auto property_initial_value = + cugraph::test::generate::initial_value(initial_value); + + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *mg_renumber_map, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); + + auto mg_result_buffer = rmm::device_uvector( + mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v"); + } + + cugraph::transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( + *handle_, + mg_graph_view, + mg_src_prop.view(), + mg_dst_prop.view(), + intersection_op_t{}, + property_initial_value, + mg_result_buffer.begin()); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. validate MG results + + if (prims_usecase.check_correctness) { + rmm::device_uvector mg_aggregate_result_buffer(0, handle_->get_stream()); + std::tie(std::ignore, mg_aggregate_result_buffer) = + cugraph::test::mg_vertex_property_values_to_sg_vertex_property_values( + *handle_, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + mg_graph_view.local_vertex_partition_range(), + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + raft::device_span(mg_result_buffer.data(), mg_result_buffer.size())); + + cugraph::graph_t sg_graph(*handle_); + std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + if (handle_->get_comms().get_rank() == 0) { + auto sg_graph_view = sg_graph.view(); + + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); + + auto sg_result_buffer = cugraph::allocate_dataframe_buffer( + sg_graph_view.number_of_vertices(), handle_->get_stream()); + + cugraph::transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( + *handle_, + sg_graph_view, + sg_src_prop.view(), + sg_dst_prop.view(), + intersection_op_t{}, + property_initial_value, + sg_result_buffer.begin()); + + bool valid = thrust::equal(handle_->get_thrust_policy(), + mg_aggregate_result_buffer.begin(), + mg_aggregate_result_buffer.end(), + sg_result_buffer.begin()); + + ASSERT_TRUE(valid); + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr + Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV::handle_ = nullptr; + +using Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_File = + Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV; +using Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_Rmat = + Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV; + +TEST_P(Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_File, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_Rmat, CheckInt32Int32Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_Rmat, CheckInt32Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_Rmat, CheckInt64Int64Float) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{false, true}, Prims_Usecase{true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{false, true}, + Prims_Usecase{true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGTransformReduceDstNbrIntersectionOfEEndpointsByV_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{false, false}, Prims_Usecase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_transform_reduce_e.cu b/cpp/tests/prims/mg_transform_reduce_e.cu index c8ce9fc3a47..53f37e83b30 100644 --- a/cpp/tests/prims/mg_transform_reduce_e.cu +++ b/cpp/tests/prims/mg_transform_reduce_e.cu @@ -17,6 +17,7 @@ #include "prims/transform_reduce_e.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "property_generator.cuh" +#include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/device_comm_wrapper.hpp" #include "utilities/mg_utilities.hpp" @@ -52,44 +53,6 @@ #include -template -struct result_compare { - static constexpr double threshold_ratio{1e-3}; - constexpr auto operator()(const T& t1, const T& t2) - { - if constexpr (std::is_floating_point_v) { - return std::abs(t1 - t2) < (std::max(t1, t2) * threshold_ratio); - } - return t1 == t2; - } -}; - -template -struct result_compare> { - static constexpr double threshold_ratio{1e-3}; - - using type = thrust::tuple; - constexpr auto operator()(const type& t1, const type& t2) - { - return equality_impl(t1, t2, std::make_index_sequence::value>()); - } - - private: - template - constexpr bool equal(T t1, T t2) - { - if constexpr (std::is_floating_point_v) { - return std::abs(t1 - t2) < (std::max(t1, t2) * threshold_ratio); - } - return t1 == t2; - } - template - constexpr auto equality_impl(T& t1, T& t2, std::index_sequence) - { - return (... && (equal(thrust::get(t1), thrust::get(t2)))); - } -}; - struct Prims_Usecase { bool test_weighted{false}; bool edge_masking{false}; @@ -231,7 +194,7 @@ class Tests_MGTransformReduceE } }, property_initial_value); - result_compare compare{}; + cugraph::test::scalar_result_compare compare{}; ASSERT_TRUE(compare(expected_result, result)); } } diff --git a/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu new file mode 100644 index 00000000000..457e6b5ab93 --- /dev/null +++ b/cpp/tests/prims/mg_transform_reduce_e_by_src_dst_key.cu @@ -0,0 +1,495 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "property_generator.cuh" +#include "result_compare.cuh" + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +struct Prims_Usecase { + bool test_weighted{false}; + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_MGTransformReduceEBySrcDstKey + : public ::testing::TestWithParam> { + public: + Tests_MGTransformReduceEBySrcDstKey() {} + + static void SetUpTestCase() { handle_ = cugraph::test::initialize_mg_handle(); } + + static void TearDownTestCase() { handle_.reset(); } + + virtual void SetUp() {} + virtual void TearDown() {} + + // Compare the results of transform_reduce_e_by_src|dst_key primitive + template + void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) + { + HighResTimer hr_timer{}; + + // 1. create MG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG Construct graph"); + } + + cugraph::graph_t mg_graph(*handle_); + std::optional> mg_renumber_map{std::nullopt}; + std::tie(mg_graph, std::ignore, mg_renumber_map) = + cugraph::test::construct_graph( + *handle_, input_usecase, prims_usecase.test_weighted, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto mg_graph_view = mg_graph.view(); + + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + + // 2. run MG transform reduce + + const int hash_bin_count = 5; + const int initial_value = 4; + + auto property_initial_value = + cugraph::test::generate::initial_value(initial_value); + + auto mg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, *mg_renumber_map, hash_bin_count); + auto mg_src_prop = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_prop); + auto mg_dst_prop = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_prop); + + auto mg_vertex_key = cugraph::test::generate::vertex_property( + *handle_, *mg_renumber_map, hash_bin_count); + auto mg_src_key = cugraph::test::generate::src_property( + *handle_, mg_graph_view, mg_vertex_key); + auto mg_dst_key = cugraph::test::generate::dst_property( + *handle_, mg_graph_view, mg_vertex_key); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG transform_reduce_e_by_src_key"); + } + + auto [by_src_keys, by_src_values] = transform_reduce_e_by_src_key( + *handle_, + mg_graph_view, + mg_src_prop.view(), + mg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + mg_src_key.view(), + [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + if (src_property < dst_property) { + return src_property; + } else { + return dst_property; + } + }, + property_initial_value, + cugraph::reduce_op::plus{}); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.start("MG transform_reduce_e_by_dst_key"); + } + + auto [by_dst_keys, by_dst_values] = transform_reduce_e_by_dst_key( + *handle_, + mg_graph_view, + mg_src_prop.view(), + mg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + mg_dst_key.view(), + [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + if (src_property < dst_property) { + return src_property; + } else { + return dst_property; + } + }, + property_initial_value, + cugraph::reduce_op::plus{}); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + handle_->get_comms().barrier(); + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. compare SG & MG results + + if (prims_usecase.check_correctness) { + auto mg_aggregate_by_src_keys = + cugraph::test::device_gatherv(*handle_, by_src_keys.data(), by_src_keys.size()); + auto mg_aggregate_by_src_values = + cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); + if constexpr (std::is_arithmetic_v) { + mg_aggregate_by_src_values = + cugraph::test::device_gatherv(*handle_, by_src_values.data(), by_src_values.size()); + } else { + std::get<0>(mg_aggregate_by_src_values) = cugraph::test::device_gatherv( + *handle_, std::get<0>(by_src_values).data(), std::get<0>(by_src_values).size()); + std::get<1>(mg_aggregate_by_src_values) = cugraph::test::device_gatherv( + *handle_, std::get<1>(by_src_values).data(), std::get<1>(by_src_values).size()); + } + thrust::sort_by_key(handle_->get_thrust_policy(), + mg_aggregate_by_src_keys.begin(), + mg_aggregate_by_src_keys.end(), + cugraph::get_dataframe_buffer_begin(mg_aggregate_by_src_values)); + + auto mg_aggregate_by_dst_keys = + cugraph::test::device_gatherv(*handle_, by_dst_keys.data(), by_dst_keys.size()); + auto mg_aggregate_by_dst_values = + cugraph::allocate_dataframe_buffer(0, handle_->get_stream()); + if constexpr (std::is_arithmetic_v) { + mg_aggregate_by_dst_values = + cugraph::test::device_gatherv(*handle_, by_dst_values.data(), by_dst_values.size()); + } else { + std::get<0>(mg_aggregate_by_dst_values) = cugraph::test::device_gatherv( + *handle_, std::get<0>(by_dst_values).data(), std::get<0>(by_dst_values).size()); + std::get<1>(mg_aggregate_by_dst_values) = cugraph::test::device_gatherv( + *handle_, std::get<1>(by_dst_values).data(), std::get<1>(by_dst_values).size()); + } + thrust::sort_by_key(handle_->get_thrust_policy(), + mg_aggregate_by_dst_keys.begin(), + mg_aggregate_by_dst_keys.end(), + cugraph::get_dataframe_buffer_begin(mg_aggregate_by_dst_values)); + + cugraph::graph_t sg_graph(*handle_); + std::tie(sg_graph, std::ignore, std::ignore) = cugraph::test::mg_graph_to_sg_graph( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + std::make_optional>((*mg_renumber_map).data(), + (*mg_renumber_map).size()), + false); + + if (handle_->get_comms().get_rank() == 0) { + auto sg_graph_view = sg_graph.view(); + + auto sg_vertex_prop = cugraph::test::generate::vertex_property( + *handle_, + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + hash_bin_count); + auto sg_src_prop = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_prop); + auto sg_dst_prop = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_prop); + + auto sg_vertex_key = cugraph::test::generate::vertex_property( + *handle_, + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_first()), + thrust::make_counting_iterator(sg_graph_view.local_vertex_partition_range_last()), + hash_bin_count); + auto sg_src_key = cugraph::test::generate::src_property( + *handle_, sg_graph_view, sg_vertex_key); + auto sg_dst_key = cugraph::test::generate::dst_property( + *handle_, sg_graph_view, sg_vertex_key); + + auto [sg_by_src_keys, sg_by_src_values] = transform_reduce_e_by_src_key( + *handle_, + sg_graph_view, + sg_src_prop.view(), + sg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + sg_src_key.view(), + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + if (src_property < dst_property) { + return src_property; + } else { + return dst_property; + } + }, + property_initial_value, + cugraph::reduce_op::plus{}); + thrust::sort_by_key(handle_->get_thrust_policy(), + sg_by_src_keys.begin(), + sg_by_src_keys.end(), + cugraph::get_dataframe_buffer_begin(sg_by_src_values)); + + auto [sg_by_dst_keys, sg_by_dst_values] = transform_reduce_e_by_dst_key( + *handle_, + sg_graph_view, + sg_src_prop.view(), + sg_dst_prop.view(), + cugraph::edge_dummy_property_t{}.view(), + sg_dst_key.view(), + [] __device__( + auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + if (src_property < dst_property) { + return src_property; + } else { + return dst_property; + } + }, + property_initial_value, + cugraph::reduce_op::plus{}); + thrust::sort_by_key(handle_->get_thrust_policy(), + sg_by_dst_keys.begin(), + sg_by_dst_keys.end(), + cugraph::get_dataframe_buffer_begin(sg_by_dst_values)); + + cugraph::test::vector_result_compare compare{*handle_}; + + ASSERT_TRUE(compare(sg_by_src_keys, mg_aggregate_by_src_keys)); + ASSERT_TRUE(compare(sg_by_src_values, mg_aggregate_by_src_values)); + + ASSERT_TRUE(compare(sg_by_dst_keys, mg_aggregate_by_dst_keys)); + ASSERT_TRUE(compare(sg_by_dst_values, mg_aggregate_by_dst_values)); + } + } + } + + private: + static std::unique_ptr handle_; +}; + +template +std::unique_ptr Tests_MGTransformReduceEBySrcDstKey::handle_ = + nullptr; + +using Tests_MGTransformReduceEBySrcDstKey_File = + Tests_MGTransformReduceEBySrcDstKey; +using Tests_MGTransformReduceEBySrcDstKey_Rmat = + Tests_MGTransformReduceEBySrcDstKey; + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_File, CheckInt32Int32FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test, false>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_File, CheckInt32Int32FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>(std::get<0>(param), + std::get<1>(param)); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int32FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt64Int64FloatTupleIntFloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test, true>( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_File, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int32FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt64Int64FloatTransposeFalse) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_File, CheckInt32Int32FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test(std::get<0>(param), std::get<1>(param)); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int32FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt32Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +TEST_P(Tests_MGTransformReduceEBySrcDstKey_Rmat, CheckInt64Int64FloatTransposeTrue) +{ + auto param = GetParam(); + run_current_test( + std::get<0>(param), + cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_MGTransformReduceEBySrcDstKey_File, + ::testing::Combine( + ::testing::Values(Prims_Usecase{false, false, true}, + Prims_Usecase{false, true, true}, + Prims_Usecase{true, false, true}, + Prims_Usecase{true, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P(rmat_small_test, + Tests_MGTransformReduceEBySrcDstKey_Rmat, + ::testing::Combine(::testing::Values(Prims_Usecase{false, false, true}, + Prims_Usecase{false, true, true}, + Prims_Usecase{true, false, true}, + Prims_Usecase{true, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase( + 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_MGTransformReduceEBySrcDstKey_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{false, false, false}, + Prims_Usecase{false, true, false}, + Prims_Usecase{true, false, false}, + Prims_Usecase{true, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_transform_reduce_v.cu b/cpp/tests/prims/mg_transform_reduce_v.cu index c0d44bc94f1..c954f31d0f9 100644 --- a/cpp/tests/prims/mg_transform_reduce_v.cu +++ b/cpp/tests/prims/mg_transform_reduce_v.cu @@ -16,6 +16,7 @@ #include "prims/transform_reduce_v.cuh" #include "property_generator.cuh" +#include "result_compare.cuh" #include "utilities/base_fixture.hpp" #include "utilities/device_comm_wrapper.hpp" #include "utilities/mg_utilities.hpp" @@ -56,50 +57,6 @@ struct v_op_t { } }; -template -struct result_compare { - static constexpr double threshold_ratio{1e-3}; - constexpr auto operator()(const T& t1, const T& t2) - { - if constexpr (std::is_floating_point_v) { - bool passed = (t1 == t2) // when t1 == t2 == 0 - || - (std::abs(t1 - t2) < (std::max(std::abs(t1), std::abs(t2)) * threshold_ratio)); - return passed; - } - return t1 == t2; - } -}; - -template -struct result_compare> { - static constexpr double threshold_ratio{1e-3}; - - using Type = thrust::tuple; - constexpr auto operator()(const Type& t1, const Type& t2) - { - return equality_impl(t1, t2, std::make_index_sequence::value>()); - } - - private: - template - constexpr bool equal(T t1, T t2) - { - if constexpr (std::is_floating_point_v) { - bool passed = (t1 == t2) // when t1 == t2 == 0 - || - (std::abs(t1 - t2) < (std::max(std::abs(t1), std::abs(t2)) * threshold_ratio)); - return passed; - } - return t1 == t2; - } - template - constexpr auto equality_impl(T& t1, T& t2, std::index_sequence) - { - return (... && (equal(thrust::get(t1), thrust::get(t2)))); - } -}; - struct Prims_Usecase { bool check_correctness{true}; }; @@ -254,7 +211,7 @@ class Tests_MGTransformReduceV break; default: FAIL() << "should not be reached."; } - result_compare compare{}; + cugraph::test::scalar_result_compare compare{}; ASSERT_TRUE(compare(expected_result, results[reduction_type])); } } diff --git a/cpp/tests/prims/result_compare.cuh b/cpp/tests/prims/result_compare.cuh new file mode 100644 index 00000000000..5a1abb90e3c --- /dev/null +++ b/cpp/tests/prims/result_compare.cuh @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once +#include + +#include + +#include +#include +#include + +#include +#include +#include +#include + +namespace cugraph { +namespace test { + +namespace detail { + +template +__host__ __device__ bool compare_arithmetic_scalar(T val0, + T val1, + thrust::optional threshold_ratio) +{ + if (threshold_ratio) { + return std::abs(val0 - val1) <= (std::max(std::abs(val0), std::abs(val1)) * *threshold_ratio); + } else { + return val0 == val1; + } +} + +} // namespace detail + +template +struct comparator { + static constexpr double threshold_ratio{1e-2}; + + __host__ __device__ bool operator()(T t0, T t1) const + { + static_assert(cugraph::is_arithmetic_or_thrust_tuple_of_arithmetic::value); + if constexpr (std::is_arithmetic_v) { + return detail::compare_arithmetic_scalar( + t0, + t1, + std::is_floating_point_v ? thrust::optional{threshold_ratio} : thrust::nullopt); + } else { + auto val0 = thrust::get<0>(t0); + auto val1 = thrust::get<0>(t1); + auto passed = detail::compare_arithmetic_scalar( + val0, + val1, + std::is_floating_point_v ? thrust::optional{threshold_ratio} + : thrust::nullopt); + if (!passed) return false; + + if constexpr (thrust::tuple_size::value >= 2) { + auto val0 = thrust::get<1>(t0); + auto val1 = thrust::get<1>(t1); + auto passed = + detail::compare_arithmetic_scalar(val0, + val1, + std::is_floating_point_v + ? thrust::optional{threshold_ratio} + : thrust::nullopt); + if (!passed) return false; + } + if constexpr (thrust::tuple_size::value >= 3) { + assert(false); // should not be reached. + } + return true; + } + } +}; + +struct scalar_result_compare { + template + auto operator()(thrust::tuple t1, thrust::tuple t2) + { + using type = thrust::tuple; + return equality_impl(t1, t2, std::make_index_sequence::value>()); + } + + template + auto operator()(T t1, T t2) + { + comparator comp{}; + return comp(t1, t2); + } + + private: + template + auto equality_impl(T t1, T t2, std::index_sequence) + { + return (... && (scalar_result_compare::operator()(thrust::get(t1), thrust::get(t2)))); + } +}; + +struct vector_result_compare { + const raft::handle_t& handle_; + + vector_result_compare(raft::handle_t const& handle) : handle_(handle) {} + + template + auto operator()(std::tuple...> const& t1, + std::tuple...> const& t2) + { + using type = thrust::tuple; + return equality_impl(t1, t2, std::make_index_sequence::value>()); + } + + template + auto operator()(rmm::device_uvector const& t1, rmm::device_uvector const& t2) + { + return thrust::equal( + handle_.get_thrust_policy(), t1.begin(), t1.end(), t2.begin(), comparator()); + } + + private: + template + auto equality_impl(T& t1, T& t2, std::index_sequence) + { + return (... && (vector_result_compare::operator()(std::get(t1), std::get(t2)))); + } +}; + +} // namespace test +} // namespace cugraph