From 880f1d6ad855f7427e6fd21cfa5fd8fbff2aefee Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 8 Feb 2024 08:55:37 -0800 Subject: [PATCH] add new all-pairs similarity algorithm --- cpp/include/cugraph/algorithms.hpp | 120 +++++ cpp/include/cugraph_c/graph_functions.h | 2 +- cpp/include/cugraph_c/similarity_algorithms.h | 126 ++++- cpp/src/c_api/graph_functions.cpp | 2 +- cpp/src/c_api/similarity.cpp | 219 +++++++- cpp/src/link_prediction/jaccard_impl.cuh | 24 +- cpp/src/link_prediction/jaccard_mg.cu | 62 ++- cpp/src/link_prediction/jaccard_sg.cu | 62 ++- cpp/src/link_prediction/overlap_impl.cuh | 24 +- cpp/src/link_prediction/overlap_mg.cu | 62 ++- cpp/src/link_prediction/overlap_sg.cu | 62 ++- cpp/src/link_prediction/similarity_impl.cuh | 298 +++++++++++ cpp/src/link_prediction/sorensen_impl.cuh | 24 +- cpp/src/link_prediction/sorensen_mg.cu | 62 ++- cpp/src/link_prediction/sorensen_sg.cu | 62 ++- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/c_api/mg_two_hop_neighbors_test.c | 4 +- cpp/tests/c_api/similarity_test.c | 480 +++++++++++++++++- cpp/tests/c_api/two_hop_neighbors_test.c | 4 +- .../link_prediction/similarity_compare.hpp | 38 +- cpp/tests/link_prediction/similarity_test.cu | 350 +++++++++++++ cpp/tests/mtmg/threaded_test_louvain.cu | 4 +- 22 files changed, 2062 insertions(+), 31 deletions(-) create mode 100644 cpp/tests/link_prediction/similarity_test.cu diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index bb721468106..2f4d4d8eea5 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2137,6 +2137,126 @@ rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check = false); +/** + * @brief Compute Jaccard all pairs similarity coefficient + * + * Similarity is computed for all pairs of vertices. If the vertices + * variable is specified it will be all pairs based on two hop neighbors + * of these seeds. If the vertices variable is not specified it will be + * all pairs of all two hop neighbors. + * + * If topk is specified only the top scoring vertex pairs will be returned, + * if not specified then all vertex pairs will be returned. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing the tuples (t1, t2, similarity score) + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + +/** + * @brief Compute Sorensen similarity coefficient + * + * Similarity is computed for all pairs of vertices. If the vertices + * variable is specified it will be all pairs based on two hop neighbors + * of these seeds. If the vertices variable is not specified it will be + * all pairs of all two hop neighbors. + * + * If topk is specified only the top scoring vertex pairs will be returned, + * if not specified then all vertex pairs will be returned. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing the tuples (t1, t2, similarity score) + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + +/** + * @brief Compute overlap similarity coefficient + * + * Similarity is computed for all pairs of vertices. If the vertices + * variable is specified it will be all pairs based on two hop neighbors + * of these seeds. If the vertices variable is not specified it will be + * all pairs of all two hop neighbors. + * + * If topk is specified only the top scoring vertex pairs will be returned, + * if not specified then all vertex pairs will be returned. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing the tuples (t1, t2, similarity score) + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + /* * @brief Enumerate K-hop neighbors * diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index 19b69922fa5..8fe1ea0b958 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cugraph_c/similarity_algorithms.h b/cpp/include/cugraph_c/similarity_algorithms.h index b8f61b46545..8d23182628b 100644 --- a/cpp/include/cugraph_c/similarity_algorithms.h +++ b/cpp/include/cugraph_c/similarity_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,6 +36,16 @@ typedef struct { int32_t align_; } cugraph_similarity_result_t; +/** + * @ingroup similarity + * @brief Get vertex pair from the similarity result. + * + * @param [in] result The result from a similarity algorithm + * @return vertex pairs + */ +cugraph_vertex_pairs_t* cugraph_similarity_result_get_vertex_pairs( + cugraph_similarity_result_t* result); + /** * @ingroup similarity * @brief Get the similarity coefficient array @@ -135,6 +145,120 @@ cugraph_error_code_t cugraph_overlap_coefficients(const cugraph_resource_handle_ cugraph_similarity_result_t** result, cugraph_error_t** error); +/** + * @brief Perform All-Pairs Jaccard similarity computation + * + * Compute the similarity for all vertex pairs derived from an optional specified + * vertex list. This function will identify the two-hop neighbors of the specified + * vertices (all vertices in the graph if not specified) and compute similarity + * for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that Jaccard similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + +/** + * @brief Perform All Pairs Sorensen similarity computation + * + * Compute the similarity for all vertex pairs derived from an optional specified + * vertex list. This function will identify the two-hop neighbors of the specified + * vertices (all vertices in the graph if not specified) and compute similarity + * for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that Sorensen similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_sorensen_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + +/** + * @brief Perform All Pairs overlap similarity computation + * + * Compute the similarity for all vertex pairs derived from an optional specified + * vertex list. This function will identify the two-hop neighbors of the specified + * vertices (all vertices in the graph if not specified) and compute similarity + * for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that overlap similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_overlap_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + #ifdef __cplusplus } #endif diff --git a/cpp/src/c_api/graph_functions.cpp b/cpp/src/c_api/graph_functions.cpp index aedb8f8e287..cd68490aa5f 100644 --- a/cpp/src/c_api/graph_functions.cpp +++ b/cpp/src/c_api/graph_functions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/c_api/similarity.cpp b/cpp/src/c_api/similarity.cpp index 730416abd7b..29b27f68ee4 100644 --- a/cpp/src/c_api/similarity.cpp +++ b/cpp/src/c_api/similarity.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -34,6 +34,7 @@ namespace c_api { struct cugraph_similarity_result_t { cugraph_type_erased_device_array_t* similarity_coefficients_; + cugraph_vertex_pairs_t* vertex_pairs_; }; } // namespace c_api @@ -131,12 +132,92 @@ struct similarity_functor : public cugraph::c_api::abstract_functor { graph_view, use_weight_ ? std::make_optional(edge_weights->view()) : std::nullopt, std::make_tuple(raft::device_span{v1.data(), v1.size()}, - raft::device_span{v2.data(), v2.size()}), - use_weight_); + raft::device_span{v2.data(), v2.size()})); result_ = new cugraph::c_api::cugraph_similarity_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients, - graph_->weight_type_)}; + graph_->weight_type_), + nullptr}; + } + } +}; + +template +struct all_pairs_similarity_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_graph_t* graph_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices_; + call_similarity_functor_t call_similarity_; + bool use_weight_; + size_t topk_; + bool do_expensive_check_; + + cugraph::c_api::cugraph_similarity_result_t* result_{}; + + all_pairs_similarity_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + ::cugraph_type_erased_device_array_view_t const* vertices, + call_similarity_functor_t call_similarity, + bool use_weight, + size_t topk, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + vertices_( + reinterpret_cast(vertices)), + call_similarity_(call_similarity), + use_weight_(use_weight), + topk_(topk), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + // similarity algorithms expect store_transposed == false + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>(graph_->graph_); + + auto graph_view = graph->view(); + + auto edge_weights = reinterpret_cast< + cugraph::edge_property_t, + weight_t>*>(graph_->edge_weights_); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + auto [v1, v2, similarity_coefficients] = + call_similarity_(handle_, + graph_view, + use_weight_ ? std::make_optional(edge_weights->view()) : std::nullopt, + vertices_ ? std::make_optional(raft::device_span{ + vertices_->as_type(), vertices_->size_}) + : std::nullopt, + topk_ != SIZE_MAX ? std::make_optional(topk_) : std::nullopt); + + result_ = new cugraph::c_api::cugraph_similarity_result_t{ + new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients, + graph_->weight_type_), + new cugraph::c_api::cugraph_vertex_pairs_t{ + new cugraph::c_api::cugraph_type_erased_device_array_t(v1, graph_->vertex_type_), + new cugraph::c_api::cugraph_type_erased_device_array_t(v2, graph_->vertex_type_)}}; } } }; @@ -147,11 +228,24 @@ struct jaccard_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::jaccard_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::jaccard_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct sorensen_functor { @@ -160,11 +254,24 @@ struct sorensen_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::sorensen_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::sorensen_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct overlap_functor { @@ -173,11 +280,24 @@ struct overlap_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::overlap_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::overlap_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; } // namespace @@ -185,11 +305,19 @@ struct overlap_functor { extern "C" cugraph_type_erased_device_array_view_t* cugraph_similarity_result_get_similarity( cugraph_similarity_result_t* result) { - auto internal_pointer = reinterpret_cast(result); + auto internal_pointer = + reinterpret_cast(result); return reinterpret_cast( internal_pointer->similarity_coefficients_->view()); } +extern "C" cugraph_vertex_pairs_t* cugraph_similarity_result_get_vertex_pairs( + cugraph_similarity_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return reinterpret_cast(internal_pointer->vertex_pairs_); +} + extern "C" void cugraph_similarity_result_free(cugraph_similarity_result_t* result) { auto internal_pointer = reinterpret_cast(result); @@ -262,3 +390,72 @@ extern "C" cugraph_error_code_t cugraph_overlap_coefficients( return cugraph::c_api::run_algorithm(graph, functor, result, error); } + +extern "C" cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, jaccard_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_all_pairs_sorensen_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, sorensen_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_all_pairs_overlap_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, overlap_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh index bd4e2d5e58e..cfc62a68c97 100644 --- a/cpp/src/link_prediction/jaccard_impl.cuh +++ b/cpp/src/link_prediction/jaccard_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,4 +56,26 @@ rmm::device_uvector jaccard_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + source_vertices, + topk, + detail::jaccard_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard_mg.cu b/cpp/src/link_prediction/jaccard_mg.cu index 3207e2a8d6f..3978e86d676 100644 --- a/cpp/src/link_prediction/jaccard_mg.cu +++ b/cpp/src/link_prediction/jaccard_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,4 +59,64 @@ template rmm::device_uvector jaccard_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard_sg.cu b/cpp/src/link_prediction/jaccard_sg.cu index 74d5b3d429e..b3ed28ac69c 100644 --- a/cpp/src/link_prediction/jaccard_sg.cu +++ b/cpp/src/link_prediction/jaccard_sg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,4 +59,64 @@ template rmm::device_uvector jaccard_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh index 1810df2f76b..571ee1949cf 100644 --- a/cpp/src/link_prediction/overlap_impl.cuh +++ b/cpp/src/link_prediction/overlap_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,4 +56,26 @@ rmm::device_uvector overlap_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + source_vertices, + topk, + detail::overlap_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_mg.cu b/cpp/src/link_prediction/overlap_mg.cu index bbc464375ba..e3bfda7aad7 100644 --- a/cpp/src/link_prediction/overlap_mg.cu +++ b/cpp/src/link_prediction/overlap_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,4 +59,64 @@ template rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_sg.cu b/cpp/src/link_prediction/overlap_sg.cu index ac3a3bc7d2b..1fc386c166e 100644 --- a/cpp/src/link_prediction/overlap_sg.cu +++ b/cpp/src/link_prediction/overlap_sg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,4 +59,64 @@ template rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 7ac294d7719..26b96ffe8f7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -17,6 +17,7 @@ #include #include +#include #include #include @@ -162,5 +163,302 @@ rmm::device_uvector similarity( } } +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +all_pairs_similarity(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + functor_t functor, + bool do_expensive_check = false) +{ + using GraphViewType = graph_view_t; + + CUGRAPH_EXPECTS(graph_view.is_symmetric(), + "similarity algorithms require an undirected(symmetric) graph"); + + if (do_expensive_check) { + if (source_vertices) { + auto vertex_partition = vertex_partition_device_view_t( + graph_view.local_vertex_partition_view()); + auto num_invalid_vertices = + thrust::count_if(handle.get_thrust_policy(), + source_vertices->begin(), + source_vertices->end(), + [vertex_partition] __device__(auto val) { + return !(vertex_partition.is_valid_vertex(val) && + vertex_partition.in_local_vertex_partition_range_nocheck(val)); + }); + + if constexpr (multi_gpu) { + num_invalid_vertices = cugraph::host_scalar_allreduce( + handle.get_comms(), num_invalid_vertices, raft::comms::op_t::SUM, handle.get_stream()); + } + + CUGRAPH_EXPECTS(num_invalid_vertices == 0, + "Invalid input arguments: there are invalid input vertices."); + } + + if (edge_weight_view) { + auto num_negative_edge_weights = + count_if_e(handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + [] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; }); + + if constexpr (multi_gpu) { + num_negative_edge_weights = cugraph::host_scalar_allreduce(handle.get_comms(), + num_negative_edge_weights, + raft::comms::op_t::SUM, + handle.get_stream()); + } + + CUGRAPH_EXPECTS( + num_negative_edge_weights == 0, + "Invalid input argument: input edge weights should have non-negative values."); + } + } + + rmm::device_uvector sources(0, handle.get_stream()); + + if (source_vertices) { + sources.resize(source_vertices->size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + source_vertices->begin(), + source_vertices->end(), + sources.begin()); + } else { + sources.resize(graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), + sources.begin(), + sources.end(), + graph_view.local_vertex_partition_range_first()); + } + + if (topk) { + // We can reduce memory footprint by doing work in batches and + // computing/updating topk with each batch + rmm::device_uvector top_v1(0, handle.get_stream()); + rmm::device_uvector top_v2(0, handle.get_stream()); + rmm::device_uvector top_score(0, handle.get_stream()); + + top_v1.reserve(*topk, handle.get_stream()); + top_v2.reserve(*topk, handle.get_stream()); + top_score.reserve(*topk, handle.get_stream()); + + // FIXME: Think about what this should be + edge_t const MAX_PAIRS{2 << 20}; + + rmm::device_uvector degrees = graph_view.compute_out_degrees(handle); + rmm::device_uvector two_hop_degrees(degrees.size(), handle.get_stream()); + + // Let's compute the maximum size of the 2-hop neighborhood of each vertex + // FIXME: If sources is specified, this could be done on a subset of the vertices + // + edge_dst_property_t edge_dst_degrees(handle, graph_view); + update_edge_dst_property(handle, graph_view, degrees.begin(), edge_dst_degrees); + + per_v_transform_reduce_incoming_e( + handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_degrees.view(), + edge_dummy_property_t{}.view(), + [] __device__(vertex_t, vertex_t, auto, auto dst_degree, auto) { return dst_degree; }, + edge_t{0}, + reduce_op::plus{}, + two_hop_degrees.begin()); + + if (source_vertices) { + rmm::device_uvector gathered_degrees(sources.size(), handle.get_stream()); + + thrust::gather( + handle.get_thrust_policy(), + thrust::make_transform_iterator( + sources.begin(), + cugraph::detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + thrust::make_transform_iterator( + sources.end(), + cugraph::detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + two_hop_degrees.begin(), + gathered_degrees.begin()); + + two_hop_degrees = std::move(gathered_degrees); + } + + thrust::sort_by_key(handle.get_thrust_policy(), + two_hop_degrees.begin(), + two_hop_degrees.end(), + sources.begin(), + thrust::greater{}); + + thrust::inclusive_scan(handle.get_thrust_policy(), + two_hop_degrees.begin(), + two_hop_degrees.end(), + two_hop_degrees.begin()); + + size_t current_pos{0}; + size_t next_pos{0}; + + while (true) { + if (current_pos < two_hop_degrees.size()) { + edge_t next_boundary; + raft::update_host( + &next_boundary, two_hop_degrees.data() + current_pos, 1, handle.get_stream()); + next_boundary += MAX_PAIRS; + + next_pos = + current_pos + thrust::distance(two_hop_degrees.begin() + current_pos, + thrust::upper_bound(handle.get_thrust_policy(), + two_hop_degrees.begin() + current_pos, + two_hop_degrees.end(), + next_boundary)); + + if (next_pos == current_pos) next_pos++; + } + + size_t batch_size = next_pos - current_pos; + + if constexpr (multi_gpu) { + batch_size = cugraph::host_scalar_allreduce( + handle.get_comms(), batch_size, raft::comms::op_t::SUM, handle.get_stream()); + } + + if (batch_size == 0) break; + + auto [offsets, v2] = k_hop_nbrs( + handle, + graph_view, + raft::device_span{sources.data() + current_pos, next_pos - current_pos}, + 2, + do_expensive_check); + + auto v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices(handle, + v1.data(), + v1.size(), + sources.data() + current_pos, + vertex_t{0}, + static_cast(next_pos - current_pos), + do_expensive_check); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + auto score = + similarity(handle, + graph_view, + edge_weight_view, + std::make_tuple(raft::device_span{v1.data(), v1.size()}, + raft::device_span{v2.data(), v2.size()}), + functor, + do_expensive_check); + + thrust::sort_by_key(handle.get_thrust_policy(), + score.begin(), + score.end(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::greater{}); + + size_t v1_keep = std::min(*topk, v1.size()); + + if (score.size() < (top_v1.size() + v1_keep)) { + score.resize(top_v1.size() + v1_keep, handle.get_stream()); + v1.resize(score.size(), handle.get_stream()); + v2.resize(score.size(), handle.get_stream()); + } + + thrust::copy(handle.get_thrust_policy(), top_v1.begin(), top_v1.end(), v1.begin() + v1_keep); + thrust::copy(handle.get_thrust_policy(), top_v2.begin(), top_v2.end(), v2.begin() + v1_keep); + thrust::copy( + handle.get_thrust_policy(), top_score.begin(), top_score.end(), score.begin() + v1_keep); + + thrust::sort_by_key(handle.get_thrust_policy(), + score.begin(), + score.end(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::greater{}); + + if (top_v1.size() < std::min(*topk, v1.size())) { + top_v1.resize(std::min(*topk, v1.size()), handle.get_stream()); + top_v2.resize(top_v1.size(), handle.get_stream()); + top_score.resize(top_v1.size(), handle.get_stream()); + } + + thrust::copy( + handle.get_thrust_policy(), v1.begin(), v1.begin() + top_v1.size(), top_v1.begin()); + thrust::copy( + handle.get_thrust_policy(), v2.begin(), v2.begin() + top_v1.size(), top_v2.begin()); + thrust::copy(handle.get_thrust_policy(), + score.begin(), + score.begin() + top_v1.size(), + top_score.begin()); + + current_pos = next_pos; + } + + return std::make_tuple(std::move(top_v1), std::move(top_v2), std::move(top_score)); + } else { + auto [offsets, v2] = + k_hop_nbrs(handle, + graph_view, + raft::device_span{sources.data(), sources.size()}, + 2, + do_expensive_check); + + auto v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices(handle, + v1.data(), + v1.size(), + sources.data(), + vertex_t{0}, + static_cast(sources.size()), + do_expensive_check); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + auto score = + similarity(handle, + graph_view, + edge_weight_view, + std::make_tuple(raft::device_span{v1.data(), v1.size()}, + raft::device_span{v2.data(), v2.size()}), + functor, + do_expensive_check); + + return std::make_tuple(std::move(v1), std::move(v2), std::move(score)); + } +} + } // namespace detail } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh index 00c9a8107f3..77fa1ebeedb 100644 --- a/cpp/src/link_prediction/sorensen_impl.cuh +++ b/cpp/src/link_prediction/sorensen_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -56,4 +56,26 @@ rmm::device_uvector sorensen_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + source_vertices, + topk, + detail::sorensen_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_mg.cu b/cpp/src/link_prediction/sorensen_mg.cu index 68e976fb114..93c1fad82e5 100644 --- a/cpp/src/link_prediction/sorensen_mg.cu +++ b/cpp/src/link_prediction/sorensen_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,4 +59,64 @@ template rmm::device_uvector sorensen_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_sg.cu b/cpp/src/link_prediction/sorensen_sg.cu index 3eaf1d7c84f..57050a52968 100644 --- a/cpp/src/link_prediction/sorensen_sg.cu +++ b/cpp/src/link_prediction/sorensen_sg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -59,4 +59,64 @@ template rmm::device_uvector sorensen_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> source_vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 3df979fe5c2..62ed15d249c 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -374,7 +374,7 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo ################################################################################################### # - SIMILARITY tests ------------------------------------------------------------------------------ -ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cpp) +ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cu) ################################################################################################### # - WEIGHTED_SIMILARITY tests ------------------------------------------------------------------------------ diff --git a/cpp/tests/c_api/mg_two_hop_neighbors_test.c b/cpp/tests/c_api/mg_two_hop_neighbors_test.c index 37ae191c6b2..056da2bcc45 100644 --- a/cpp/tests/c_api/mg_two_hop_neighbors_test.c +++ b/cpp/tests/c_api/mg_two_hop_neighbors_test.c @@ -76,8 +76,8 @@ int generic_two_hop_nbr_test(const cugraph_resource_handle_t* resource_handle, ret_code = cugraph_two_hop_neighbors( resource_handle, graph, start_vertices_view, FALSE, &result, &ret_error); - cugraph_type_erased_device_array_view_t* v1; - cugraph_type_erased_device_array_view_t* v2; + cugraph_type_erased_device_array_view_t const* v1; + cugraph_type_erased_device_array_view_t const* v2; v1 = cugraph_vertex_pairs_get_first(result); v2 = cugraph_vertex_pairs_get_second(result); diff --git a/cpp/tests/c_api/similarity_test.c b/cpp/tests/c_api/similarity_test.c index 52f849ccd28..5637a9d2b01 100644 --- a/cpp/tests/c_api/similarity_test.c +++ b/cpp/tests/c_api/similarity_test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -131,6 +131,112 @@ int generic_similarity_test(vertex_t* h_src, return test_ret_value; } +int generic_all_pairs_similarity_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + vertex_t* h_first, + vertex_t* h_second, + weight_t* h_result, + size_t num_vertices, + size_t num_edges, + size_t num_pairs, + bool_t store_transposed, + bool_t use_weight, + size_t topk, + similarity_t test_type) +{ + int test_ret_value = 0; + data_type_id_t vertex_tid = INT32; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_similarity_result_t* result = NULL; + cugraph_type_erased_device_array_t* vertices = NULL; + cugraph_type_erased_device_array_view_t* vertices_view = NULL; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + ret_code = create_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, TRUE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + switch (test_type) { + case JACCARD: + ret_code = cugraph_all_pairs_jaccard_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + case SORENSEN: + ret_code = cugraph_all_pairs_sorensen_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + case OVERLAP: + ret_code = cugraph_all_pairs_overlap_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + } + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph similarity failed."); + + cugraph_type_erased_device_array_view_t* similarity_coefficient; + + cugraph_vertex_pairs_t * vertex_pairs; + vertex_pairs = cugraph_similarity_result_get_vertex_pairs(result); + similarity_coefficient = cugraph_similarity_result_get_similarity(result); + + cugraph_type_erased_device_array_view_t *result_v1; + cugraph_type_erased_device_array_view_t *result_v2; + + result_v1 = cugraph_vertex_pairs_get_first(vertex_pairs); + result_v2 = cugraph_vertex_pairs_get_second(vertex_pairs); + size_t result_num_pairs = cugraph_type_erased_device_array_view_size(result_v1); + + TEST_ASSERT(test_ret_value, result_num_pairs == num_pairs, "Incorrect number of results"); + + vertex_t h_result_v1[result_num_pairs]; + vertex_t h_result_v2[result_num_pairs]; + weight_t h_similarity_coefficient[result_num_pairs]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_v1, result_v1, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_v2, result_v2, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_similarity_coefficient, similarity_coefficient, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + weight_t result_matrix[num_vertices][num_vertices]; + for (int i = 0 ; i < num_vertices ; ++i) + for (int j = 0 ; j < num_vertices ; ++j) + result_matrix[i][j] = 0; + + for (int i = 0 ; i < num_pairs ; ++i) + result_matrix[h_result_v1[i]][h_result_v2[i]] = h_similarity_coefficient[i]; + + for (int i = 0; (i < num_pairs) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(result_matrix[h_first[i]][h_second[i]], h_result[i], 0.001), + "similarity results don't match"); + } + + if (result != NULL) cugraph_similarity_result_free(result); + cugraph_sg_graph_free(graph); + cugraph_free_resource_handle(handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + int test_jaccard() { size_t num_edges = 16; @@ -296,6 +402,366 @@ int test_weighted_overlap() OVERLAP); } +int test_all_pairs_jaccard() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.2,0.25,0.666667,0.333333,0.2,0.4,0.166667,0.5,0.25,0.4,0.2,0.25,0.25,0.666667,0.166667,0.2,0.666667,0.333333,0.25,0.666667,0.5,0.25}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + JACCARD); +} + +int test_weighted_all_pairs_jaccard() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.357143, 0.208333, 0.357143, 0.411765, 0.208333, 0.411765, 0.4, 0.0833333, 0.272727, 0.4, 0.222222, 0.352941, 0.0833333, 0.222222, 0.272727, 0.352941}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + JACCARD); +} + +int test_all_pairs_sorensen() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.333333, 0.4, 0.8, 0.5, 0.333333, 0.571429, 0.285714, 0.666667, 0.4, 0.571429, 0.333333, 0.4, 0.4, 0.8, 0.285714, 0.333333, 0.8, 0.5, 0.4, 0.8, 0.666667, 0.4}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + SORENSEN); +} + +int test_weighted_all_pairs_sorensen() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.526316, 0.344828, 0.526316, 0.583333, 0.344828, 0.583333, 0.571429, 0.153846, 0.428571, 0.571429, 0.363636, 0.521739, 0.153846, 0.363636, 0.428571, 0.521739}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + SORENSEN); +} + +int test_all_pairs_overlap() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.5, 0.5, 1, 0.5, 0.5, 0.666667, 0.333333, 1, 0.5, 0.666667, 0.333333, 0.5, 0.5, 1, 0.333333, 0.333333, 1, 0.5, 0.5, 1, 1, 0.5}; + + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + OVERLAP); +} + +int test_weighted_all_pairs_overlap() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.714286, 0.416667, 0.714286, 1, 0.416667, 1, 1, 0.166667, 0.5, 1, 0.571429, 0.75, 0.166667, 0.571429, 0.5, 0.75}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + OVERLAP); +} + +int test_all_pairs_jaccard_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t topk = 6; + size_t num_pairs = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {0.666667, 0.5, 0.666667, 0.666667, 0.666667, 0.5}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + JACCARD); +} + +int test_weighted_all_pairs_jaccard_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 1, 1, 2, 3, 4}; + vertex_t h_second[] = {1, 0, 2, 1, 4, 3}; + weight_t h_result[] = {0.357143, 0.357143, 0.411765, 0.411765, 0.4, 0.4}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + JACCARD); +} + +int test_all_pairs_sorensen_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {0.8, 0.666667, 0.8, 0.8, 0.8, 0.666667}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + SORENSEN); +} + +int test_weighted_all_pairs_sorensen_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 1, 1, 2, 3, 4}; + vertex_t h_second[] = {1, 0, 2, 1, 4, 3}; + weight_t h_result[] = {0.526316, 0.526316, 0.583333, 0.583333, 0.571429, 0.571429}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + SORENSEN); +} + +int test_all_pairs_overlap_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {1, 1, 1, 1, 1, 1}; + + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + OVERLAP); +} + +int test_weighted_all_pairs_overlap_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {1, 2, 3, 4, 4, 6}; + vertex_t h_second[] = {2, 1, 4, 3, 6, 4}; + weight_t h_result[] = {1, 1, 1, 1, 0.75, 0.75}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + OVERLAP); +} + /******************************************************************************/ int main(int argc, char** argv) @@ -307,5 +773,17 @@ int main(int argc, char** argv) result |= RUN_TEST(test_weighted_jaccard); result |= RUN_TEST(test_weighted_sorensen); result |= RUN_TEST(test_weighted_overlap); + result |= RUN_TEST(test_all_pairs_jaccard); + result |= RUN_TEST(test_all_pairs_sorensen); + result |= RUN_TEST(test_all_pairs_overlap); + result |= RUN_TEST(test_weighted_all_pairs_jaccard); + result |= RUN_TEST(test_weighted_all_pairs_sorensen); + result |= RUN_TEST(test_weighted_all_pairs_overlap); + result |= RUN_TEST(test_all_pairs_jaccard_topk); + result |= RUN_TEST(test_all_pairs_sorensen_topk); + result |= RUN_TEST(test_all_pairs_overlap_topk); + result |= RUN_TEST(test_weighted_all_pairs_jaccard_topk); + result |= RUN_TEST(test_weighted_all_pairs_sorensen_topk); + result |= RUN_TEST(test_weighted_all_pairs_overlap_topk); return result; } diff --git a/cpp/tests/c_api/two_hop_neighbors_test.c b/cpp/tests/c_api/two_hop_neighbors_test.c index d47280276c5..bc95db3932b 100644 --- a/cpp/tests/c_api/two_hop_neighbors_test.c +++ b/cpp/tests/c_api/two_hop_neighbors_test.c @@ -81,8 +81,8 @@ int generic_two_hop_nbr_test(vertex_t* h_src, ret_code = cugraph_two_hop_neighbors( resource_handle, graph, start_vertices_view, FALSE, &result, &ret_error); - cugraph_type_erased_device_array_view_t* v1; - cugraph_type_erased_device_array_view_t* v2; + cugraph_type_erased_device_array_view_t const* v1; + cugraph_type_erased_device_array_view_t const* v2; v1 = cugraph_vertex_pairs_get_first(result); v2 = cugraph_vertex_pairs_get_second(result); diff --git a/cpp/tests/link_prediction/similarity_compare.hpp b/cpp/tests/link_prediction/similarity_compare.hpp index 5c312a768d0..b414df1ee35 100644 --- a/cpp/tests/link_prediction/similarity_compare.hpp +++ b/cpp/tests/link_prediction/similarity_compare.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -52,6 +52,18 @@ struct test_jaccard_t { { return cugraph::jaccard_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::jaccard_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct test_sorensen_t { @@ -81,6 +93,18 @@ struct test_sorensen_t { { return cugraph::sorensen_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::sorensen_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct test_overlap_t { @@ -110,6 +134,18 @@ struct test_overlap_t { { return cugraph::overlap_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::overlap_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; template diff --git a/cpp/tests/link_prediction/similarity_test.cu b/cpp/tests/link_prediction/similarity_test.cu new file mode 100644 index 00000000000..49df9f377a4 --- /dev/null +++ b/cpp/tests/link_prediction/similarity_test.cu @@ -0,0 +1,350 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include + +struct Similarity_Usecase { + bool use_weights{false}; + bool check_correctness{true}; + bool all_pairs{false}; + std::optional max_seeds{std::nullopt}; + std::optional max_vertex_pairs_to_check{std::nullopt}; + std::optional topk{std::nullopt}; +}; + +template +class Tests_Similarity + : public ::testing::TestWithParam> { + public: + Tests_Similarity() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param, + test_functor_t const& test_functor) + { + constexpr bool renumber = true; + auto [similarity_usecase, input_usecase] = param; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + // 2. create SG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, similarity_usecase.use_weights, renumber); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. run similarity + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Similarity test"); + } + + rmm::device_uvector v1(0, handle.get_stream()); + rmm::device_uvector v2(0, handle.get_stream()); + rmm::device_uvector result_score(0, handle.get_stream()); + + raft::random::RngState rng_state{0}; + + rmm::device_uvector sources(0, handle.get_stream()); + std::optional> sources_span{std::nullopt}; + + if (similarity_usecase.max_seeds) { + sources = cugraph::select_random_vertices( + handle, + graph_view, + std::optional>{std::nullopt}, + rng_state, + std::min(*similarity_usecase.max_seeds, + static_cast(graph_view.number_of_vertices())), + false, + false); + sources_span = raft::device_span{sources.data(), sources.size()}; + } + + if (similarity_usecase.all_pairs) { + std::tie(v1, v2, result_score) = test_functor.run(handle, + graph_view, + edge_weight_view, + sources_span, + similarity_usecase.use_weights, + similarity_usecase.topk); + } else { + if (!sources_span) { + sources.resize(graph_view.number_of_vertices(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), sources.begin(), sources.end(), vertex_t{0}); + sources_span = raft::device_span{sources.data(), sources.size()}; + } + + rmm::device_uvector offsets(0, handle.get_stream()); + + std::tie(offsets, v2) = k_hop_nbrs(handle, graph_view, *sources_span, 2, true); + + v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices(handle, + v1.data(), + v1.size(), + sources.data(), + vertex_t{0}, + static_cast(sources.size()), + true); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + // FIXME: Need to add some tests that specify actual vertex pairs + std::tuple, raft::device_span> vertex_pairs{ + {v1.data(), v1.size()}, {v2.data(), v2.size()}}; + + result_score = test_functor.run( + handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (similarity_usecase.check_correctness) { + auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view); + + size_t check_size = similarity_usecase.max_vertex_pairs_to_check + ? std::min(v1.size(), *similarity_usecase.max_vertex_pairs_to_check) + : v1.size(); + + // + // FIXME: Need to reorder here. thrust::shuffle on the tuples (vertex_pairs_1, + // vertex_pairs_2, result_score) would + // be sufficient. + // + std::vector h_vertex_pair_1(check_size); + std::vector h_vertex_pair_2(check_size); + std::vector h_result_score(check_size); + + raft::update_host(h_vertex_pair_1.data(), v1.data(), check_size, handle.get_stream()); + raft::update_host(h_vertex_pair_2.data(), v2.data(), check_size, handle.get_stream()); + raft::update_host( + h_result_score.data(), result_score.data(), check_size, handle.get_stream()); + + if (similarity_usecase.use_weights) { + weighted_similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } else { + similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } + } + } +}; + +using Tests_Similarity_File = Tests_Similarity; +using Tests_Similarity_Rmat = Tests_Similarity; + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Similarity_File, + ::testing::Combine(::testing::Values(Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100, 10}, + Similarity_Usecase{true, true, false, 20, 100, 10}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100, 10}, + Similarity_Usecase{true, true, true, 20, 100, 10}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Similarity_Rmat, + ::testing::Combine( + ::testing::Values(Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 1000, 100, 10}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100, 10}, +#if 0 + // FIXME: See Issue #4132... these tests don't work for multi-graph right now + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100, 10}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100, 10}, +#endif + Similarity_Usecase{false, true, true, std::nullopt, std::nullopt, 100}, + Similarity_Usecase{false, true, true, std::nullopt, std::nullopt, 10}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with + --gtest_filter to select only the file_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one File_Usecase that differ only in filename + (to avoid running same benchmarks more than once) */ + Tests_Similarity_File, + ::testing::Combine( + // disable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{false, false, false}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Similarity_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/mtmg/threaded_test_louvain.cu b/cpp/tests/mtmg/threaded_test_louvain.cu index c1395037646..cca67921c04 100644 --- a/cpp/tests/mtmg/threaded_test_louvain.cu +++ b/cpp/tests/mtmg/threaded_test_louvain.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -277,6 +277,7 @@ class Tests_Multithreaded std::tie(std::ignore, modularity) = cugraph::louvain( thread_handle.raft_handle(), + std::nullopt, graph_view.get(thread_handle), edge_weights ? std::make_optional(edge_weights->get(thread_handle).view()) : std::nullopt, local_louvain_clusters.data(), @@ -406,6 +407,7 @@ class Tests_Multithreaded std::tie(std::ignore, modularity) = cugraph::louvain( handle, + std::nullopt, sg_graph.view(), sg_edge_weights ? std::make_optional(sg_edge_weights->view()) : std::nullopt, sg_clusters.data(),