diff --git a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py index 157c64b0b20..face22c9283 100644 --- a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py +++ b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -39,7 +39,7 @@ uniform_neighbor_sample, ) from cugraph.generators import rmat -from cugraph.experimental import datasets +from cugraph import datasets from cugraph.dask import uniform_neighbor_sample as uniform_neighbor_sample_mg from cugraph_benchmarking import params diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 30a1c98c106..9de1750de81 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -54,12 +54,12 @@ cd "${package_dir}" python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check -# pure-python packages should not have auditwheel run on them. +# pure-python packages should be marked as pure, and not have auditwheel run on them. if [[ ${package_name} == "nx-cugraph" ]] || \ [[ ${package_name} == "cugraph-dgl" ]] || \ [[ ${package_name} == "cugraph-pyg" ]] || \ [[ ${package_name} == "cugraph-equivariant" ]]; then - RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 dist else mkdir -p final_dist python -m auditwheel repair -w final_dist dist/* diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 8c5832e412f..158704e08d1 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -11,8 +11,13 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g') mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" +# nx-cugraph is a pure wheel, which is part of generating the download path +if [[ "${package_name}" == "nx-cugraph" ]]; then + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist +else + RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +fi # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh index 367b169bd13..827ad487115 100755 --- a/ci/test_wheel_cugraph-dgl.sh +++ b/ci/test_wheel_cugraph-dgl.sh @@ -17,7 +17,7 @@ RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f python -m pip install ./local-deps/*.whl # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/ci/test_wheel_cugraph-equivariant.sh b/ci/test_wheel_cugraph-equivariant.sh index f054780b03a..cb952055f06 100755 --- a/ci/test_wheel_cugraph-equivariant.sh +++ b/ci/test_wheel_cugraph-equivariant.sh @@ -12,7 +12,7 @@ mkdir -p ./dist RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 6e44e1ad958..50cbfb3e1fe 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -17,7 +17,7 @@ RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f python -m pip install ./local-deps/*.whl # use 'ls' to expand wildcard before adding `[extra]` requires for pip -RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist # pip creates wheels using python package names python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index cd2dafc5217..5c29604a5a7 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -2135,6 +2135,172 @@ rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check = false); +/** + * @brief Compute Jaccard all pairs similarity coefficient + * + * Similarity is computed for all pairs of vertices. Note that in a sparse + * graph, many of the vertex pairs will have a score of zero. We actually + * compute similarity only for vertices that are two hop neighbors within + * the graph, since vertices that are not two hop neighbors will have + * a score of 0. + * + * If @p vertices is specified we will compute similarity on two hop + * neighbors the @p vertices. If @p vertices is not specified it will + * compute similarity on all two hop neighbors in the graph. + * + * If @p topk is specified only the top @p topk scoring vertex pairs + * will be returned, if not specified then scores for all computed vertex pairs + * will be returned. + * + * Note the list of two hop neighbors in the entire graph might be a large + * number of vertex pairs. If the graph is dense enough it could be as large + * as the the number of vertices squared, which might run out of memory. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. In a multi-gpu context the + * vertices should be local to this GPU. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding + * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying + * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2. + * If @p topk was specified then the vectors will be no longer than @p topk elements. In a + * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they + * will be returned on the local GPU for vertex v1. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + +/** + * @brief Compute Sorensen similarity coefficient + * + * Similarity is computed for all pairs of vertices. Note that in a sparse + * graph, many of the vertex pairs will have a score of zero. We actually + * compute similarity only for vertices that are two hop neighbors within + * the graph, since vertices that are not two hop neighbors will have + * a score of 0. + * + * If @p vertices is specified we will compute similarity on two hop + * neighbors the @p vertices. If @p vertices is not specified it will + * compute similarity on all two hop neighbors in the graph. + * + * If @p topk is specified only the top @p topk scoring vertex pairs + * will be returned, if not specified then scores for all computed vertex pairs + * will be returned. + * + * Note the list of two hop neighbors in the entire graph might be a large + * number of vertex pairs. If the graph is dense enough it could be as large + * as the the number of vertices squared, which might run out of memory. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding + * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying + * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2. + * If @p topk was specified then the vectors will be no longer than @p topk elements. In a + * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they + * will be returned on the local GPU for vertex v1. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + +/** + * @brief Compute overlap similarity coefficient + * + * Similarity is computed for all pairs of vertices. Note that in a sparse + * graph, many of the vertex pairs will have a score of zero. We actually + * compute similarity only for vertices that are two hop neighbors within + * the graph, since vertices that are not two hop neighbors will have + * a score of 0. + * + * If @p vertices is specified we will compute similarity on two hop + * neighbors the @p vertices. If @p vertices is not specified it will + * compute similarity on all two hop neighbors in the graph. + * + * If @p topk is specified only the top @p topk scoring vertex pairs + * will be returned, if not specified then scores for all computed vertex pairs + * will be returned. + * + * Note the list of two hop neighbors in the entire graph might be a large + * number of vertex pairs. If the graph is dense enough it could be as large + * as the the number of vertices squared, which might run out of memory. + * + * @throws cugraph::logic_error when an error occurs. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param graph_view Graph view object. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p + * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume + * a weight of 1 for all edges. + * @param vertices optional device span defining the seed vertices. + * @param topk optional specification of the how many of the top scoring vertex pairs should be + * returned + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple containing three device vectors (v1, v2, score) of the same length. Corresponding + * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying + * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2. + * If @p topk was specified then the vectors will be no longer than @p topk elements. In a + * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they + * will be returned on the local GPU for vertex v1. + */ +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check = false); + /* * @brief Enumerate K-hop neighbors * diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 5b66d978b54..d3917a3e851 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -37,51 +37,52 @@ namespace cugraph { namespace detail { -template -std::tuple, std::vector> compute_offset_aligned_edge_chunks( +template +std::tuple, std::vector> compute_offset_aligned_element_chunks( raft::handle_t const& handle, - edge_t const* offsets, - vertex_t num_vertices, - edge_t num_edges, - size_t approx_edge_chunk_size) + raft::device_span offsets, + offset_t num_elements, + vertex_t approx_element_chunk_size) { auto search_offset_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{1}), cuda::proclaim_return_type( - [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; })); - auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size; + [approx_element_chunk_size] __device__(auto i) { return i * approx_element_chunk_size; })); + auto num_chunks = (num_elements + approx_element_chunk_size - 1) / approx_element_chunk_size; if (num_chunks > 1) { - rmm::device_uvector d_vertex_offsets(num_chunks - 1, handle.get_stream()); + rmm::device_uvector d_chunk_offsets(num_chunks - 1, handle.get_stream()); thrust::lower_bound(handle.get_thrust_policy(), - offsets, - offsets + num_vertices + 1, + offsets.begin(), + offsets.end(), search_offset_first, - search_offset_first + d_vertex_offsets.size(), - d_vertex_offsets.begin()); - rmm::device_uvector d_edge_offsets(d_vertex_offsets.size(), handle.get_stream()); + search_offset_first + d_chunk_offsets.size(), + d_chunk_offsets.begin()); + rmm::device_uvector d_element_offsets(d_chunk_offsets.size(), handle.get_stream()); thrust::gather(handle.get_thrust_policy(), - d_vertex_offsets.begin(), - d_vertex_offsets.end(), - offsets, - d_edge_offsets.begin()); - std::vector h_edge_offsets(num_chunks + 1, edge_t{0}); - h_edge_offsets.back() = num_edges; - raft::update_host( - h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream()); - std::vector h_vertex_offsets(num_chunks + 1, vertex_t{0}); - h_vertex_offsets.back() = num_vertices; - raft::update_host(h_vertex_offsets.data() + 1, - d_vertex_offsets.data(), - d_vertex_offsets.size(), + d_chunk_offsets.begin(), + d_chunk_offsets.end(), + offsets.begin(), + d_element_offsets.begin()); + std::vector h_element_offsets(num_chunks + 1, offset_t{0}); + h_element_offsets.back() = num_elements; + raft::update_host(h_element_offsets.data() + 1, + d_element_offsets.data(), + d_element_offsets.size(), + handle.get_stream()); + std::vector h_chunk_offsets(num_chunks + 1, vertex_t{0}); + h_chunk_offsets.back() = offsets.size() - 1; + raft::update_host(h_chunk_offsets.data() + 1, + d_chunk_offsets.data(), + d_chunk_offsets.size(), handle.get_stream()); handle.sync_stream(); - return std::make_tuple(h_vertex_offsets, h_edge_offsets); + return std::make_tuple(h_chunk_offsets, h_element_offsets); } else { - return std::make_tuple(std::vector{{0, num_vertices}}, - std::vector{{0, num_edges}}); + return std::make_tuple(std::vector{{0, offsets.size() - 1}}, + std::vector{{0, num_elements}}); } } diff --git a/cpp/include/cugraph_c/graph_functions.h b/cpp/include/cugraph_c/graph_functions.h index 19b69922fa5..8fe1ea0b958 100644 --- a/cpp/include/cugraph_c/graph_functions.h +++ b/cpp/include/cugraph_c/graph_functions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/include/cugraph_c/similarity_algorithms.h b/cpp/include/cugraph_c/similarity_algorithms.h index b8f61b46545..5b8462a1666 100644 --- a/cpp/include/cugraph_c/similarity_algorithms.h +++ b/cpp/include/cugraph_c/similarity_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -36,6 +36,16 @@ typedef struct { int32_t align_; } cugraph_similarity_result_t; +/** + * @ingroup similarity + * @brief Get vertex pair from the similarity result. + * + * @param [in] result The result from a similarity algorithm + * @return vertex pairs + */ +cugraph_vertex_pairs_t* cugraph_similarity_result_get_vertex_pairs( + cugraph_similarity_result_t* result); + /** * @ingroup similarity * @brief Get the similarity coefficient array @@ -135,6 +145,120 @@ cugraph_error_code_t cugraph_overlap_coefficients(const cugraph_resource_handle_ cugraph_similarity_result_t** result, cugraph_error_t** error); +/** + * @brief Perform All-Pairs Jaccard similarity computation + * + * Compute the similarity for all vertex pairs derived from the two-hop neighbors + * of an optional specified vertex list. This function will identify the two-hop + * neighbors of the specified vertices (all vertices in the graph if not specified) + * and compute similarity for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that Jaccard similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + +/** + * @brief Perform All Pairs Sorensen similarity computation + * + * Compute the similarity for all vertex pairs derived from the two-hop neighbors + * of an optional specified vertex list. This function will identify the two-hop + * neighbors of the specified vertices (all vertices in the graph if not specified) + * and compute similarity for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that Sorensen similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_sorensen_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + +/** + * @brief Perform All Pairs overlap similarity computation + * + * Compute the similarity for all vertex pairs derived from the two-hop neighbors + * of an optional specified vertex list. This function will identify the two-hop + * neighbors of the specified vertices (all vertices in the graph if not specified) + * and compute similarity for those vertices. + * + * If the topk parameter is specified then the result will only contain the top k + * highest scoring results. + * + * Note that overlap similarity must run on a symmetric graph. + * + * @param [in] handle Handle for accessing resources + * @param [in] graph Pointer to graph + * @param [in] vertices Vertex list for input. If null then compute based on + * all vertices in the graph. + * @param [in] use_weight If true consider the edge weight in the graph, if false use an + * edge weight of 1 + * @param [in] topk Specify how many answers to return. Specifying SIZE_MAX + * will return all values. + * @param [in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + * @param [out] result Opaque pointer to similarity results + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_all_pairs_overlap_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error); + #ifdef __cplusplus } #endif diff --git a/cpp/src/c_api/similarity.cpp b/cpp/src/c_api/similarity.cpp index f456c957f8e..aa54fc6dee7 100644 --- a/cpp/src/c_api/similarity.cpp +++ b/cpp/src/c_api/similarity.cpp @@ -34,6 +34,7 @@ namespace c_api { struct cugraph_similarity_result_t { cugraph_type_erased_device_array_t* similarity_coefficients_; + cugraph_vertex_pairs_t* vertex_pairs_; }; } // namespace c_api @@ -131,12 +132,92 @@ struct similarity_functor : public cugraph::c_api::abstract_functor { graph_view, use_weight_ ? std::make_optional(edge_weights->view()) : std::nullopt, std::make_tuple(raft::device_span{v1.data(), v1.size()}, - raft::device_span{v2.data(), v2.size()}), - use_weight_); + raft::device_span{v2.data(), v2.size()})); result_ = new cugraph::c_api::cugraph_similarity_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients, - graph_->weight_type_)}; + graph_->weight_type_), + nullptr}; + } + } +}; + +template +struct all_pairs_similarity_functor : public cugraph::c_api::abstract_functor { + raft::handle_t const& handle_; + cugraph::c_api::cugraph_graph_t* graph_; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertices_; + call_similarity_functor_t call_similarity_; + bool use_weight_; + size_t topk_; + bool do_expensive_check_; + + cugraph::c_api::cugraph_similarity_result_t* result_{}; + + all_pairs_similarity_functor(::cugraph_resource_handle_t const* handle, + ::cugraph_graph_t* graph, + ::cugraph_type_erased_device_array_view_t const* vertices, + call_similarity_functor_t call_similarity, + bool use_weight, + size_t topk, + bool do_expensive_check) + : abstract_functor(), + handle_(*reinterpret_cast(handle)->handle_), + graph_(reinterpret_cast(graph)), + vertices_( + reinterpret_cast(vertices)), + call_similarity_(call_similarity), + use_weight_(use_weight), + topk_(topk), + do_expensive_check_(do_expensive_check) + { + } + + template + void operator()() + { + if constexpr (!cugraph::is_candidate::value) { + unsupported(); + } else { + // similarity algorithms expect store_transposed == false + if constexpr (store_transposed) { + error_code_ = cugraph::c_api:: + transpose_storage( + handle_, graph_, error_.get()); + if (error_code_ != CUGRAPH_SUCCESS) return; + } + + auto graph = + reinterpret_cast*>(graph_->graph_); + + auto graph_view = graph->view(); + + auto edge_weights = reinterpret_cast< + cugraph::edge_property_t, + weight_t>*>(graph_->edge_weights_); + + auto number_map = reinterpret_cast*>(graph_->number_map_); + + auto [v1, v2, similarity_coefficients] = + call_similarity_(handle_, + graph_view, + use_weight_ ? std::make_optional(edge_weights->view()) : std::nullopt, + vertices_ ? std::make_optional(raft::device_span{ + vertices_->as_type(), vertices_->size_}) + : std::nullopt, + topk_ != SIZE_MAX ? std::make_optional(topk_) : std::nullopt); + + result_ = new cugraph::c_api::cugraph_similarity_result_t{ + new cugraph::c_api::cugraph_type_erased_device_array_t(similarity_coefficients, + graph_->weight_type_), + new cugraph::c_api::cugraph_vertex_pairs_t{ + new cugraph::c_api::cugraph_type_erased_device_array_t(v1, graph_->vertex_type_), + new cugraph::c_api::cugraph_type_erased_device_array_t(v2, graph_->vertex_type_)}}; } } }; @@ -147,11 +228,24 @@ struct jaccard_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::jaccard_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::jaccard_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct sorensen_functor { @@ -160,11 +254,24 @@ struct sorensen_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::sorensen_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::sorensen_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct overlap_functor { @@ -173,11 +280,24 @@ struct overlap_functor { raft::handle_t const& handle, cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, - std::tuple, raft::device_span> vertex_pairs, - bool use_weights) + std::tuple, raft::device_span> vertex_pairs) { return cugraph::overlap_coefficients(handle, graph_view, edge_weight_view, vertex_pairs); } + + template + std::tuple, + rmm::device_uvector, + rmm::device_uvector> + operator()(raft::handle_t const& handle, + cugraph::graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk) + { + return cugraph::overlap_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; } // namespace @@ -185,11 +305,19 @@ struct overlap_functor { extern "C" cugraph_type_erased_device_array_view_t* cugraph_similarity_result_get_similarity( cugraph_similarity_result_t* result) { - auto internal_pointer = reinterpret_cast(result); + auto internal_pointer = + reinterpret_cast(result); return reinterpret_cast( internal_pointer->similarity_coefficients_->view()); } +extern "C" cugraph_vertex_pairs_t* cugraph_similarity_result_get_vertex_pairs( + cugraph_similarity_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return reinterpret_cast(internal_pointer->vertex_pairs_); +} + extern "C" void cugraph_similarity_result_free(cugraph_similarity_result_t* result) { auto internal_pointer = reinterpret_cast(result); @@ -262,3 +390,72 @@ extern "C" cugraph_error_code_t cugraph_overlap_coefficients( return cugraph::c_api::run_algorithm(graph, functor, result, error); } + +extern "C" cugraph_error_code_t cugraph_all_pairs_jaccard_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, jaccard_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_all_pairs_sorensen_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, sorensen_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +extern "C" cugraph_error_code_t cugraph_all_pairs_overlap_coefficients( + const cugraph_resource_handle_t* handle, + cugraph_graph_t* graph, + const cugraph_type_erased_device_array_view_t* vertices, + bool_t use_weight, + size_t topk, + bool_t do_expensive_check, + cugraph_similarity_result_t** result, + cugraph_error_t** error) +{ + if (use_weight) { + CAPI_EXPECTS( + reinterpret_cast(graph)->edge_weights_ != nullptr, + CUGRAPH_INVALID_INPUT, + "use_weight is true but edge weights are not provided.", + *error); + } + all_pairs_similarity_functor functor( + handle, graph, vertices, overlap_functor{}, use_weight, topk, do_expensive_check); + + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} diff --git a/cpp/src/link_prediction/jaccard_impl.cuh b/cpp/src/link_prediction/jaccard_impl.cuh index bdc80f4f6ac..d8cfcf19b4f 100644 --- a/cpp/src/link_prediction/jaccard_impl.cuh +++ b/cpp/src/link_prediction/jaccard_impl.cuh @@ -57,4 +57,26 @@ rmm::device_uvector jaccard_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + vertices, + topk, + detail::jaccard_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard_mg.cu b/cpp/src/link_prediction/jaccard_mg.cu index 8e91bb9f3e1..f53d173c6dd 100644 --- a/cpp/src/link_prediction/jaccard_mg.cu +++ b/cpp/src/link_prediction/jaccard_mg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector jaccard_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/jaccard_sg.cu b/cpp/src/link_prediction/jaccard_sg.cu index e25d2d72d3b..6bb89a21368 100644 --- a/cpp/src/link_prediction/jaccard_sg.cu +++ b/cpp/src/link_prediction/jaccard_sg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector jaccard_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + jaccard_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_impl.cuh b/cpp/src/link_prediction/overlap_impl.cuh index 95542a83ff5..38e654453ff 100644 --- a/cpp/src/link_prediction/overlap_impl.cuh +++ b/cpp/src/link_prediction/overlap_impl.cuh @@ -57,4 +57,26 @@ rmm::device_uvector overlap_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + vertices, + topk, + detail::overlap_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_mg.cu b/cpp/src/link_prediction/overlap_mg.cu index 54f7912aa14..a88159e682b 100644 --- a/cpp/src/link_prediction/overlap_mg.cu +++ b/cpp/src/link_prediction/overlap_mg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/overlap_sg.cu b/cpp/src/link_prediction/overlap_sg.cu index 1b169570e57..17e4bafdcbe 100644 --- a/cpp/src/link_prediction/overlap_sg.cu +++ b/cpp/src/link_prediction/overlap_sg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector overlap_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + overlap_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh index 4344262e453..c13259f0da7 100644 --- a/cpp/src/link_prediction/similarity_impl.cuh +++ b/cpp/src/link_prediction/similarity_impl.cuh @@ -17,6 +17,7 @@ #include "prims/count_if_e.cuh" #include "prims/per_v_pair_transform_dst_nbr_intersection.cuh" +#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh" #include "prims/update_edge_src_dst_property.cuh" #include "utilities/error_check_utils.cuh" @@ -162,5 +163,392 @@ rmm::device_uvector similarity( } } +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector> +all_pairs_similarity(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + functor_t functor, + bool do_expensive_check = false) +{ + using GraphViewType = graph_view_t; + + CUGRAPH_EXPECTS(graph_view.is_symmetric(), + "similarity algorithms require an undirected(symmetric) graph"); + + // FIXME: See https://github.com/rapidsai/cugraph/issues/4132 + // Once that issue is resolved we can drop this check + CUGRAPH_EXPECTS(!graph_view.is_multigraph() || !edge_weight_view, + "Weighted implementation currently fails on multi-graph"); + + if (do_expensive_check) { + if (vertices) { + auto vertex_partition = vertex_partition_device_view_t( + graph_view.local_vertex_partition_view()); + auto num_invalid_vertices = + thrust::count_if(handle.get_thrust_policy(), + vertices->begin(), + vertices->end(), + [vertex_partition] __device__(auto val) { + return !(vertex_partition.is_valid_vertex(val) && + vertex_partition.in_local_vertex_partition_range_nocheck(val)); + }); + + if constexpr (multi_gpu) { + num_invalid_vertices = cugraph::host_scalar_allreduce( + handle.get_comms(), num_invalid_vertices, raft::comms::op_t::SUM, handle.get_stream()); + } + + CUGRAPH_EXPECTS(num_invalid_vertices == 0, + "Invalid input arguments: there are invalid input vertices."); + } + + if (edge_weight_view) { + auto num_negative_edge_weights = + count_if_e(handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_dummy_property_t{}.view(), + *edge_weight_view, + [] __device__(vertex_t, vertex_t, auto, auto, weight_t w) { return w < 0.0; }); + + if constexpr (multi_gpu) { + num_negative_edge_weights = cugraph::host_scalar_allreduce(handle.get_comms(), + num_negative_edge_weights, + raft::comms::op_t::SUM, + handle.get_stream()); + } + + CUGRAPH_EXPECTS( + num_negative_edge_weights == 0, + "Invalid input argument: input edge weights should have non-negative values."); + } + } + + if (topk) { + rmm::device_uvector tmp_vertices(0, handle.get_stream()); + + if (vertices) { + tmp_vertices.resize(vertices->size(), handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), vertices->begin(), vertices->end(), tmp_vertices.begin()); + } else { + tmp_vertices.resize(graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), + tmp_vertices.begin(), + tmp_vertices.end(), + graph_view.local_vertex_partition_range_first()); + } + + // We can reduce memory footprint by doing work in batches and + // computing/updating topk with each batch + + // FIXME: Experiment with this and adjust as necessary + // size_t const + // MAX_PAIRS_PER_BATCH{static_cast(handle.get_device_properties().multiProcessorCount) * + // (1 << 15)}; + size_t const MAX_PAIRS_PER_BATCH{100}; + + rmm::device_uvector degrees = graph_view.compute_out_degrees(handle); + rmm::device_uvector two_hop_degrees(degrees.size() + 1, handle.get_stream()); + + // Let's compute the maximum size of the 2-hop neighborhood of each vertex + // FIXME: If vertices is specified, this could be done on a subset of the vertices + // + edge_dst_property_t edge_dst_degrees(handle, graph_view); + update_edge_dst_property(handle, graph_view, degrees.begin(), edge_dst_degrees); + + per_v_transform_reduce_incoming_e( + handle, + graph_view, + edge_src_dummy_property_t{}.view(), + edge_dst_degrees.view(), + edge_dummy_property_t{}.view(), + [] __device__(vertex_t, vertex_t, auto, auto dst_degree, auto) { + return static_cast(dst_degree); + }, + size_t{0}, + reduce_op::plus{}, + two_hop_degrees.begin()); + + if (vertices) { + rmm::device_uvector gathered_two_hop_degrees(tmp_vertices.size() + 1, + handle.get_stream()); + + thrust::gather( + handle.get_thrust_policy(), + thrust::make_transform_iterator( + tmp_vertices.begin(), + cugraph::detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + thrust::make_transform_iterator( + tmp_vertices.end(), + cugraph::detail::shift_left_t{graph_view.local_vertex_partition_range_first()}), + two_hop_degrees.begin(), + gathered_two_hop_degrees.begin()); + + two_hop_degrees = std::move(gathered_two_hop_degrees); + } + + thrust::sort_by_key(handle.get_thrust_policy(), + two_hop_degrees.begin(), + two_hop_degrees.end() - 1, + tmp_vertices.begin(), + thrust::greater{}); + + thrust::exclusive_scan(handle.get_thrust_policy(), + two_hop_degrees.begin(), + two_hop_degrees.end(), + two_hop_degrees.begin()); + + auto two_hop_degree_offsets = std::move(two_hop_degrees); + + rmm::device_uvector top_v1(0, handle.get_stream()); + rmm::device_uvector top_v2(0, handle.get_stream()); + rmm::device_uvector top_score(0, handle.get_stream()); + + top_v1.reserve(*topk, handle.get_stream()); + top_v2.reserve(*topk, handle.get_stream()); + top_score.reserve(*topk, handle.get_stream()); + + size_t sum_two_hop_degrees{0}; + weight_t similarity_threshold{0}; + std::vector batch_offsets; + + raft::update_host(&sum_two_hop_degrees, + two_hop_degree_offsets.data() + two_hop_degree_offsets.size() - 1, + 1, + handle.get_stream()); + + std::tie(batch_offsets, std::ignore) = compute_offset_aligned_element_chunks( + handle, + raft::device_span{two_hop_degree_offsets.data(), two_hop_degree_offsets.size()}, + sum_two_hop_degrees, + MAX_PAIRS_PER_BATCH); + + for (size_t batch_number = 0; batch_number < (batch_offsets.size() - 1); ++batch_number) { + if (batch_offsets[batch_number + 1] > batch_offsets[batch_number]) { + auto [offsets, v2] = + k_hop_nbrs(handle, + graph_view, + raft::device_span{ + tmp_vertices.data() + batch_offsets[batch_number], + batch_offsets[batch_number + 1] - batch_offsets[batch_number]}, + 2, + do_expensive_check); + + auto v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices( + handle, + v1.data(), + v1.size(), + tmp_vertices.data() + batch_offsets[batch_number], + vertex_t{0}, + static_cast(batch_offsets[batch_number + 1] - batch_offsets[batch_number]), + do_expensive_check); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + auto score = + similarity(handle, + graph_view, + edge_weight_view, + std::make_tuple(raft::device_span{v1.data(), v1.size()}, + raft::device_span{v2.data(), v2.size()}), + functor, + do_expensive_check); + + // Add a remove_if to remove items that are less than the last topk element + new_size = thrust::distance( + thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()), + thrust::remove_if(handle.get_thrust_policy(), + thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()), + thrust::make_zip_iterator(score.end(), v1.end(), v2.end()), + [similarity_threshold] __device__(auto tuple) { + return thrust::get<0>(tuple) < similarity_threshold; + })); + + score.resize(new_size, handle.get_stream()); + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + thrust::sort_by_key(handle.get_thrust_policy(), + score.begin(), + score.end(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::greater{}); + + size_t v1_keep = std::min(*topk, v1.size()); + + if (score.size() < (top_v1.size() + v1_keep)) { + score.resize(top_v1.size() + v1_keep, handle.get_stream()); + v1.resize(score.size(), handle.get_stream()); + v2.resize(score.size(), handle.get_stream()); + } + + thrust::copy( + handle.get_thrust_policy(), top_v1.begin(), top_v1.end(), v1.begin() + v1_keep); + thrust::copy( + handle.get_thrust_policy(), top_v2.begin(), top_v2.end(), v2.begin() + v1_keep); + thrust::copy( + handle.get_thrust_policy(), top_score.begin(), top_score.end(), score.begin() + v1_keep); + + thrust::sort_by_key(handle.get_thrust_policy(), + score.begin(), + score.end(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::greater{}); + + if (top_v1.size() < std::min(*topk, v1.size())) { + top_v1.resize(std::min(*topk, v1.size()), handle.get_stream()); + top_v2.resize(top_v1.size(), handle.get_stream()); + top_score.resize(top_v1.size(), handle.get_stream()); + } + + thrust::copy( + handle.get_thrust_policy(), v1.begin(), v1.begin() + top_v1.size(), top_v1.begin()); + thrust::copy( + handle.get_thrust_policy(), v2.begin(), v2.begin() + top_v1.size(), top_v2.begin()); + thrust::copy(handle.get_thrust_policy(), + score.begin(), + score.begin() + top_v1.size(), + top_score.begin()); + + if constexpr (multi_gpu) { + bool is_root = handle.get_comms().get_rank() == int{0}; + auto rx_sizes = cugraph::host_scalar_gather( + handle.get_comms(), top_v1.size(), int{0}, handle.get_stream()); + std::vector rx_displs; + size_t gathered_size{0}; + + if (is_root) { + rx_displs.resize(handle.get_comms().get_size()); + rx_displs[0] = 0; + std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1); + gathered_size = std::reduce(rx_sizes.begin(), rx_sizes.end()); + } + + rmm::device_uvector gathered_v1(gathered_size, handle.get_stream()); + rmm::device_uvector gathered_v2(gathered_size, handle.get_stream()); + rmm::device_uvector gathered_score(gathered_size, handle.get_stream()); + + cugraph::device_gatherv( + handle.get_comms(), + thrust::make_zip_iterator(top_v1.begin(), top_v2.begin(), top_score.begin()), + thrust::make_zip_iterator( + gathered_v1.begin(), gathered_v2.begin(), gathered_score.begin()), + + top_v1.size(), + rx_sizes, + rx_displs, + int{0}, + handle.get_stream()); + + if (is_root) { + thrust::sort_by_key(handle.get_thrust_policy(), + gathered_score.begin(), + gathered_score.end(), + thrust::make_zip_iterator(gathered_v1.begin(), gathered_v2.begin()), + thrust::greater{}); + + if (gathered_v1.size() > *topk) { + gathered_v1.resize(*topk, handle.get_stream()); + gathered_v2.resize(*topk, handle.get_stream()); + gathered_score.resize(*topk, handle.get_stream()); + } + + top_v1 = std::move(gathered_v1); + top_v2 = std::move(gathered_v2); + top_score = std::move(gathered_score); + } else { + top_v1.resize(0, handle.get_stream()); + top_v2.resize(0, handle.get_stream()); + top_score.resize(0, handle.get_stream()); + } + } + + if (top_score.size() == *topk) { + raft::update_host( + &similarity_threshold, top_score.data() + *topk - 1, 1, handle.get_stream()); + + if constexpr (multi_gpu) { + similarity_threshold = host_scalar_bcast( + handle.get_comms(), similarity_threshold, int{0}, handle.get_stream()); + } + } + } + } + + return std::make_tuple(std::move(top_v1), std::move(top_v2), std::move(top_score)); + } else { + rmm::device_uvector tmp_vertices(0, handle.get_stream()); + raft::device_span vertices_span{nullptr, size_t{0}}; + + if (vertices) { + vertices_span = raft::device_span{vertices->data(), vertices->size()}; + } else { + tmp_vertices.resize(graph_view.local_vertex_partition_range_size(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), + tmp_vertices.begin(), + tmp_vertices.end(), + graph_view.local_vertex_partition_range_first()); + vertices_span = raft::device_span{tmp_vertices.data(), tmp_vertices.size()}; + } + + auto [offsets, v2] = k_hop_nbrs(handle, graph_view, vertices_span, 2, do_expensive_check); + + auto v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices(handle, + v1.data(), + v1.size(), + vertices_span.data(), + vertex_t{0}, + static_cast(vertices_span.size()), + do_expensive_check); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + auto score = + similarity(handle, + graph_view, + edge_weight_view, + std::make_tuple(raft::device_span{v1.data(), v1.size()}, + raft::device_span{v2.data(), v2.size()}), + functor, + do_expensive_check); + + return std::make_tuple(std::move(v1), std::move(v2), std::move(score)); + } +} + } // namespace detail } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_impl.cuh b/cpp/src/link_prediction/sorensen_impl.cuh index 994d824b849..af99732a45e 100644 --- a/cpp/src/link_prediction/sorensen_impl.cuh +++ b/cpp/src/link_prediction/sorensen_impl.cuh @@ -57,4 +57,26 @@ rmm::device_uvector sorensen_coefficients( do_expensive_check); } +template +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); + + return detail::all_pairs_similarity(handle, + graph_view, + edge_weight_view, + vertices, + topk, + detail::sorensen_functor_t{}, + do_expensive_check); +} + } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_mg.cu b/cpp/src/link_prediction/sorensen_mg.cu index 0a67a871b87..c3d010f8503 100644 --- a/cpp/src/link_prediction/sorensen_mg.cu +++ b/cpp/src/link_prediction/sorensen_mg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector sorensen_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/link_prediction/sorensen_sg.cu b/cpp/src/link_prediction/sorensen_sg.cu index 2edfb92bb0f..c129cd40ca4 100644 --- a/cpp/src/link_prediction/sorensen_sg.cu +++ b/cpp/src/link_prediction/sorensen_sg.cu @@ -59,4 +59,64 @@ template rmm::device_uvector sorensen_coefficients( std::tuple, raft::device_span> vertex_pairs, bool do_expensive_check); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + sorensen_all_pairs_coefficients( + raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + std::optional topk, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 8e627392555..3b25ae50773 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -355,13 +355,15 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - edge_partition.offsets(), - edge_partition.dcs_nzd_vertices() - ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + - *(edge_partition.dcs_nzd_vertex_count()) - : edge_partition.major_range_size(), + raft::device_span{ + edge_partition.offsets(), + 1 + static_cast( + edge_partition.dcs_nzd_vertices() + ? (*segment_offsets)[detail::num_sparse_segments_per_vertex_partition] + + *(edge_partition.dcs_nzd_vertex_count()) + : edge_partition.major_range_size())}, edge_partition.number_of_edges(), approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; diff --git a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh index 0f128eb8410..f5bc3ef6d2e 100644 --- a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh +++ b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh @@ -107,12 +107,8 @@ compute_min_hop_for_unique_label_vertex_pairs( rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - auto [h_label_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - (*label_offsets).data(), - num_labels, - vertices.size(), - approx_edges_to_sort_per_iteration); + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; for (size_t i = 0; i < num_chunks; ++i) { @@ -599,10 +595,10 @@ renumber_sampled_edgelist( static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - (*renumber_map_label_offsets).data(), - static_cast((*renumber_map_label_offsets).size() - 1), + raft::device_span{(*renumber_map_label_offsets).data(), + (*renumber_map_label_offsets).size()}, renumber_map.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index f506e4bd04c..299aae13718 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -286,12 +286,8 @@ compute_min_hop_for_unique_label_vertex_pairs( rmm::device_uvector d_tmp_storage(0, handle.get_stream()); - auto [h_label_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - (*label_offsets).data(), - num_labels, - vertices.size(), - approx_edges_to_sort_per_iteration); + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, *label_offsets, vertices.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; for (size_t i = 0; i < num_chunks; ++i) { @@ -741,10 +737,10 @@ renumber_sampled_edgelist( static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20) /* tuning parameter */; // for segmented sort - auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( handle, - (*renumber_map_label_offsets).data(), - static_cast((*renumber_map_label_offsets).size() - 1), + raft::device_span{(*renumber_map_label_offsets).data(), + (*renumber_map_label_offsets).size()}, renumber_map.size(), approx_edges_to_sort_per_iteration); auto num_chunks = h_label_offsets.size() - 1; @@ -910,11 +906,10 @@ sort_sampled_edge_tuples( (1 << 20) /* tuning parameter */; // for sorts in chunks std::tie(h_label_offsets, h_edge_offsets) = - detail::compute_offset_aligned_edge_chunks(handle, - std::get<0>(*edgelist_label_offsets).data(), - std::get<1>(*edgelist_label_offsets), - edgelist_majors.size(), - approx_edges_to_sort_per_iteration); + detail::compute_offset_aligned_element_chunks(handle, + std::get<0>(*edgelist_label_offsets), + edgelist_majors.size(), + approx_edges_to_sort_per_iteration); } else { h_label_offsets = {0, 1}; h_edge_offsets = {0, edgelist_majors.size()}; diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh index a96467ce06b..1ef975c1dec 100644 --- a/cpp/src/structure/detail/structure_utils.cuh +++ b/cpp/src/structure/detail/structure_utils.cuh @@ -316,12 +316,8 @@ void sort_adjacency_list(raft::handle_t const& handle, // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - offsets.data(), - static_cast(offsets.size() - 1), - num_edges, - approx_edges_to_sort_per_iteration); + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, offsets, num_edges, approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; // 3. Segmented sort each vertex's neighbors @@ -451,12 +447,8 @@ void sort_adjacency_list(raft::handle_t const& handle, // to limit memory footprint ((1 << 20) is a tuning parameter) auto approx_edges_to_sort_per_iteration = static_cast(handle.get_device_properties().multiProcessorCount) * (1 << 20); - auto [h_vertex_offsets, h_edge_offsets] = - detail::compute_offset_aligned_edge_chunks(handle, - offsets.data(), - static_cast(offsets.size() - 1), - num_edges, - approx_edges_to_sort_per_iteration); + auto [h_vertex_offsets, h_edge_offsets] = detail::compute_offset_aligned_element_chunks( + handle, offsets, num_edges, approx_edges_to_sort_per_iteration); auto num_chunks = h_vertex_offsets.size() - 1; // 3. Segmented sort each vertex's neighbors diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index cb57b96f955..46a895536ef 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -374,7 +374,7 @@ ConfigureTest(WEAKLY_CONNECTED_COMPONENTS_TEST components/weakly_connected_compo ################################################################################################### # - SIMILARITY tests ------------------------------------------------------------------------------ -ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cpp) +ConfigureTest(SIMILARITY_TEST link_prediction/similarity_test.cu) ################################################################################################### # - WEIGHTED_SIMILARITY tests ------------------------------------------------------------------------------ diff --git a/cpp/tests/c_api/mg_two_hop_neighbors_test.c b/cpp/tests/c_api/mg_two_hop_neighbors_test.c index 37ae191c6b2..056da2bcc45 100644 --- a/cpp/tests/c_api/mg_two_hop_neighbors_test.c +++ b/cpp/tests/c_api/mg_two_hop_neighbors_test.c @@ -76,8 +76,8 @@ int generic_two_hop_nbr_test(const cugraph_resource_handle_t* resource_handle, ret_code = cugraph_two_hop_neighbors( resource_handle, graph, start_vertices_view, FALSE, &result, &ret_error); - cugraph_type_erased_device_array_view_t* v1; - cugraph_type_erased_device_array_view_t* v2; + cugraph_type_erased_device_array_view_t const* v1; + cugraph_type_erased_device_array_view_t const* v2; v1 = cugraph_vertex_pairs_get_first(result); v2 = cugraph_vertex_pairs_get_second(result); diff --git a/cpp/tests/c_api/similarity_test.c b/cpp/tests/c_api/similarity_test.c index 52f849ccd28..5637a9d2b01 100644 --- a/cpp/tests/c_api/similarity_test.c +++ b/cpp/tests/c_api/similarity_test.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -131,6 +131,112 @@ int generic_similarity_test(vertex_t* h_src, return test_ret_value; } +int generic_all_pairs_similarity_test(vertex_t* h_src, + vertex_t* h_dst, + weight_t* h_wgt, + vertex_t* h_first, + vertex_t* h_second, + weight_t* h_result, + size_t num_vertices, + size_t num_edges, + size_t num_pairs, + bool_t store_transposed, + bool_t use_weight, + size_t topk, + similarity_t test_type) +{ + int test_ret_value = 0; + data_type_id_t vertex_tid = INT32; + + cugraph_error_code_t ret_code = CUGRAPH_SUCCESS; + cugraph_error_t* ret_error; + + cugraph_resource_handle_t* handle = NULL; + cugraph_graph_t* graph = NULL; + cugraph_similarity_result_t* result = NULL; + cugraph_type_erased_device_array_t* vertices = NULL; + cugraph_type_erased_device_array_view_t* vertices_view = NULL; + + handle = cugraph_create_resource_handle(NULL); + TEST_ASSERT(test_ret_value, handle != NULL, "resource handle creation failed."); + + ret_code = create_test_graph( + handle, h_src, h_dst, h_wgt, num_edges, store_transposed, FALSE, TRUE, &graph, &ret_error); + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "create_test_graph failed."); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + + switch (test_type) { + case JACCARD: + ret_code = cugraph_all_pairs_jaccard_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + case SORENSEN: + ret_code = cugraph_all_pairs_sorensen_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + case OVERLAP: + ret_code = cugraph_all_pairs_overlap_coefficients( + handle, graph, vertices_view, use_weight, topk, FALSE, &result, &ret_error); + break; + } + + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "cugraph similarity failed."); + + cugraph_type_erased_device_array_view_t* similarity_coefficient; + + cugraph_vertex_pairs_t * vertex_pairs; + vertex_pairs = cugraph_similarity_result_get_vertex_pairs(result); + similarity_coefficient = cugraph_similarity_result_get_similarity(result); + + cugraph_type_erased_device_array_view_t *result_v1; + cugraph_type_erased_device_array_view_t *result_v2; + + result_v1 = cugraph_vertex_pairs_get_first(vertex_pairs); + result_v2 = cugraph_vertex_pairs_get_second(vertex_pairs); + size_t result_num_pairs = cugraph_type_erased_device_array_view_size(result_v1); + + TEST_ASSERT(test_ret_value, result_num_pairs == num_pairs, "Incorrect number of results"); + + vertex_t h_result_v1[result_num_pairs]; + vertex_t h_result_v2[result_num_pairs]; + weight_t h_similarity_coefficient[result_num_pairs]; + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_v1, result_v1, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_result_v2, result_v2, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + ret_code = cugraph_type_erased_device_array_view_copy_to_host( + handle, (byte_t*)h_similarity_coefficient, similarity_coefficient, &ret_error); + TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, "copy_to_host failed."); + + weight_t result_matrix[num_vertices][num_vertices]; + for (int i = 0 ; i < num_vertices ; ++i) + for (int j = 0 ; j < num_vertices ; ++j) + result_matrix[i][j] = 0; + + for (int i = 0 ; i < num_pairs ; ++i) + result_matrix[h_result_v1[i]][h_result_v2[i]] = h_similarity_coefficient[i]; + + for (int i = 0; (i < num_pairs) && (test_ret_value == 0); ++i) { + TEST_ASSERT(test_ret_value, + nearlyEqual(result_matrix[h_first[i]][h_second[i]], h_result[i], 0.001), + "similarity results don't match"); + } + + if (result != NULL) cugraph_similarity_result_free(result); + cugraph_sg_graph_free(graph); + cugraph_free_resource_handle(handle); + cugraph_error_free(ret_error); + + return test_ret_value; +} + int test_jaccard() { size_t num_edges = 16; @@ -296,6 +402,366 @@ int test_weighted_overlap() OVERLAP); } +int test_all_pairs_jaccard() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.2,0.25,0.666667,0.333333,0.2,0.4,0.166667,0.5,0.25,0.4,0.2,0.25,0.25,0.666667,0.166667,0.2,0.666667,0.333333,0.25,0.666667,0.5,0.25}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + JACCARD); +} + +int test_weighted_all_pairs_jaccard() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.357143, 0.208333, 0.357143, 0.411765, 0.208333, 0.411765, 0.4, 0.0833333, 0.272727, 0.4, 0.222222, 0.352941, 0.0833333, 0.222222, 0.272727, 0.352941}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + JACCARD); +} + +int test_all_pairs_sorensen() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.333333, 0.4, 0.8, 0.5, 0.333333, 0.571429, 0.285714, 0.666667, 0.4, 0.571429, 0.333333, 0.4, 0.4, 0.8, 0.285714, 0.333333, 0.8, 0.5, 0.4, 0.8, 0.666667, 0.4}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + SORENSEN); +} + +int test_weighted_all_pairs_sorensen() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.526316, 0.344828, 0.526316, 0.583333, 0.344828, 0.583333, 0.571429, 0.153846, 0.428571, 0.571429, 0.363636, 0.521739, 0.153846, 0.363636, 0.428571, 0.521739}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + SORENSEN); +} + +int test_all_pairs_overlap() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 22; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + vertex_t h_first[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 5, 5}; + vertex_t h_second[] = {1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 3, 4, 5, 0, 1, 2, 4, 0, 2, 3, 1, 2}; + weight_t h_result[] = {0.5, 0.5, 1, 0.5, 0.5, 0.666667, 0.333333, 1, 0.5, 0.666667, 0.333333, 0.5, 0.5, 1, 0.333333, 0.333333, 1, 0.5, 0.5, 1, 1, 0.5}; + + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + SIZE_MAX, + OVERLAP); +} + +int test_weighted_all_pairs_overlap() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 16; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 0, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 6, 6}; + vertex_t h_second[] = {1, 2, 0, 2, 0, 1, 4, 5, 6, 3, 5, 6, 3, 4, 3, 4}; + weight_t h_result[] = {0.714286, 0.416667, 0.714286, 1, 0.416667, 1, 1, 0.166667, 0.5, 1, 0.571429, 0.75, 0.166667, 0.571429, 0.5, 0.75}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + SIZE_MAX, + OVERLAP); +} + +int test_all_pairs_jaccard_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t topk = 6; + size_t num_pairs = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {0.666667, 0.5, 0.666667, 0.666667, 0.666667, 0.5}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + JACCARD); +} + +int test_weighted_all_pairs_jaccard_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 1, 1, 2, 3, 4}; + vertex_t h_second[] = {1, 0, 2, 1, 4, 3}; + weight_t h_result[] = {0.357143, 0.357143, 0.411765, 0.411765, 0.4, 0.4}; + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + JACCARD); +} + +int test_all_pairs_sorensen_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {0.8, 0.666667, 0.8, 0.8, 0.8, 0.666667}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + SORENSEN); +} + +int test_weighted_all_pairs_sorensen_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {0, 1, 1, 2, 3, 4}; + vertex_t h_second[] = {1, 0, 2, 1, 4, 3}; + weight_t h_result[] = {0.526316, 0.526316, 0.583333, 0.583333, 0.571429, 0.571429}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + SORENSEN); +} + +int test_all_pairs_overlap_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 6; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; + vertex_t h_dst[] = {1, 3, 4, 0, 1, 3, 5, 5, 0, 1, 1, 2, 2, 2, 3, 4}; + weight_t h_wgt[] = {0.1f, 2.1f, 1.1f, 5.1f, 3.1f, 4.1f, 7.2f, 3.2f}; + + vertex_t h_first[] = {0, 1, 3, 3, 4, 5}; + vertex_t h_second[] = {3, 5, 0, 4, 3, 1}; + weight_t h_result[] = {1, 1, 1, 1, 1, 1}; + + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + FALSE, + topk, + OVERLAP); +} + +int test_weighted_all_pairs_overlap_topk() +{ + size_t num_edges = 16; + size_t num_vertices = 7; + size_t num_pairs = 6; + size_t topk = 6; + + vertex_t h_src[] = {0, 1, 2, 0, 1, 2, 3, 3, 3, 4, 4, 4, 0, 5, 2, 6}; + vertex_t h_dst[] = {3, 3, 3, 4, 4, 4, 0, 1, 2, 0, 1, 2, 5, 0, 6, 2}; + weight_t h_wgt[] = {0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 0.5, 1.0, 1.5, 2.0, 2.5, 3.0, 3.5, 3.5, 4.0, 4.0}; + + vertex_t h_first[] = {1, 2, 3, 4, 4, 6}; + vertex_t h_second[] = {2, 1, 4, 3, 6, 4}; + weight_t h_result[] = {1, 1, 1, 1, 0.75, 0.75}; + + + return generic_all_pairs_similarity_test(h_src, + h_dst, + h_wgt, + h_first, + h_second, + h_result, + num_vertices, + num_edges, + num_pairs, + FALSE, + TRUE, + topk, + OVERLAP); +} + /******************************************************************************/ int main(int argc, char** argv) @@ -307,5 +773,17 @@ int main(int argc, char** argv) result |= RUN_TEST(test_weighted_jaccard); result |= RUN_TEST(test_weighted_sorensen); result |= RUN_TEST(test_weighted_overlap); + result |= RUN_TEST(test_all_pairs_jaccard); + result |= RUN_TEST(test_all_pairs_sorensen); + result |= RUN_TEST(test_all_pairs_overlap); + result |= RUN_TEST(test_weighted_all_pairs_jaccard); + result |= RUN_TEST(test_weighted_all_pairs_sorensen); + result |= RUN_TEST(test_weighted_all_pairs_overlap); + result |= RUN_TEST(test_all_pairs_jaccard_topk); + result |= RUN_TEST(test_all_pairs_sorensen_topk); + result |= RUN_TEST(test_all_pairs_overlap_topk); + result |= RUN_TEST(test_weighted_all_pairs_jaccard_topk); + result |= RUN_TEST(test_weighted_all_pairs_sorensen_topk); + result |= RUN_TEST(test_weighted_all_pairs_overlap_topk); return result; } diff --git a/cpp/tests/c_api/two_hop_neighbors_test.c b/cpp/tests/c_api/two_hop_neighbors_test.c index d47280276c5..bc95db3932b 100644 --- a/cpp/tests/c_api/two_hop_neighbors_test.c +++ b/cpp/tests/c_api/two_hop_neighbors_test.c @@ -81,8 +81,8 @@ int generic_two_hop_nbr_test(vertex_t* h_src, ret_code = cugraph_two_hop_neighbors( resource_handle, graph, start_vertices_view, FALSE, &result, &ret_error); - cugraph_type_erased_device_array_view_t* v1; - cugraph_type_erased_device_array_view_t* v2; + cugraph_type_erased_device_array_view_t const* v1; + cugraph_type_erased_device_array_view_t const* v2; v1 = cugraph_vertex_pairs_get_first(result); v2 = cugraph_vertex_pairs_get_second(result); diff --git a/cpp/tests/link_prediction/similarity_compare.hpp b/cpp/tests/link_prediction/similarity_compare.hpp index bbd942e2664..eed0a82fe7e 100644 --- a/cpp/tests/link_prediction/similarity_compare.hpp +++ b/cpp/tests/link_prediction/similarity_compare.hpp @@ -53,6 +53,18 @@ struct test_jaccard_t { { return cugraph::jaccard_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::jaccard_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct test_sorensen_t { @@ -82,6 +94,18 @@ struct test_sorensen_t { { return cugraph::sorensen_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::sorensen_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; struct test_overlap_t { @@ -111,6 +135,18 @@ struct test_overlap_t { { return cugraph::overlap_coefficients(handle, graph_view, edge_weight_view, vertex_pairs, true); } + + template + auto run(raft::handle_t const& handle, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> vertices, + bool use_weights, + std::optional topk) const + { + return cugraph::overlap_all_pairs_coefficients( + handle, graph_view, edge_weight_view, vertices, topk); + } }; template diff --git a/cpp/tests/link_prediction/similarity_test.cu b/cpp/tests/link_prediction/similarity_test.cu new file mode 100644 index 00000000000..f5c15c760e2 --- /dev/null +++ b/cpp/tests/link_prediction/similarity_test.cu @@ -0,0 +1,352 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governin_from_mtxg permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +struct Similarity_Usecase { + bool use_weights{false}; + bool check_correctness{true}; + bool all_pairs{false}; + std::optional max_seeds{std::nullopt}; + std::optional max_vertex_pairs_to_check{std::nullopt}; + std::optional topk{std::nullopt}; +}; + +template +class Tests_Similarity + : public ::testing::TestWithParam> { + public: + Tests_Similarity() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test(std::tuple const& param, + test_functor_t const& test_functor) + { + constexpr bool renumber = true; + auto [similarity_usecase, input_usecase] = param; + + // 1. initialize handle + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + // 2. create SG graph + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, similarity_usecase.use_weights, renumber); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + // 3. run similarity + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Similarity test"); + } + + rmm::device_uvector v1(0, handle.get_stream()); + rmm::device_uvector v2(0, handle.get_stream()); + rmm::device_uvector result_score(0, handle.get_stream()); + + raft::random::RngState rng_state{0}; + + rmm::device_uvector sources(0, handle.get_stream()); + std::optional> sources_span{std::nullopt}; + + if (similarity_usecase.max_seeds) { + sources = cugraph::select_random_vertices( + handle, + graph_view, + std::optional>{std::nullopt}, + rng_state, + std::min(*similarity_usecase.max_seeds, + static_cast(graph_view.number_of_vertices())), + false, + false); + sources_span = raft::device_span{sources.data(), sources.size()}; + } + + if (similarity_usecase.all_pairs) { + std::tie(v1, v2, result_score) = test_functor.run(handle, + graph_view, + edge_weight_view, + sources_span, + similarity_usecase.use_weights, + similarity_usecase.topk); + } else { + if (!sources_span) { + sources.resize(graph_view.number_of_vertices(), handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), sources.begin(), sources.end(), vertex_t{0}); + sources_span = raft::device_span{sources.data(), sources.size()}; + } + + rmm::device_uvector offsets(0, handle.get_stream()); + + std::tie(offsets, v2) = k_hop_nbrs(handle, graph_view, *sources_span, 2, true); + + v1 = cugraph::detail::expand_sparse_offsets( + raft::device_span{offsets.data(), offsets.size()}, + vertex_t{0}, + handle.get_stream()); + + cugraph::unrenumber_local_int_vertices(handle, + v1.data(), + v1.size(), + sources.data(), + vertex_t{0}, + static_cast(sources.size()), + true); + + auto new_size = thrust::distance( + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::remove_if( + handle.get_thrust_policy(), + thrust::make_zip_iterator(v1.begin(), v2.begin()), + thrust::make_zip_iterator(v1.end(), v2.end()), + [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); })); + + v1.resize(new_size, handle.get_stream()); + v2.resize(new_size, handle.get_stream()); + + // FIXME: Need to add some tests that specify actual vertex pairs + std::tuple, raft::device_span> vertex_pairs{ + {v1.data(), v1.size()}, {v2.data(), v2.size()}}; + + result_score = test_functor.run( + handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights); + } + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (similarity_usecase.check_correctness) { + auto [src, dst, wgt] = cugraph::test::graph_to_host_coo(handle, graph_view, edge_weight_view); + + size_t check_size = similarity_usecase.max_vertex_pairs_to_check + ? std::min(v1.size(), *similarity_usecase.max_vertex_pairs_to_check) + : v1.size(); + + // + // FIXME: Need to reorder here. thrust::shuffle on the tuples (vertex_pairs_1, + // vertex_pairs_2, result_score) would + // be sufficient. + // + std::vector h_vertex_pair_1(check_size); + std::vector h_vertex_pair_2(check_size); + std::vector h_result_score(check_size); + + raft::update_host(h_vertex_pair_1.data(), v1.data(), check_size, handle.get_stream()); + raft::update_host(h_vertex_pair_2.data(), v2.data(), check_size, handle.get_stream()); + raft::update_host( + h_result_score.data(), result_score.data(), check_size, handle.get_stream()); + + if (similarity_usecase.use_weights) { + weighted_similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } else { + similarity_compare(graph_view.number_of_vertices(), + std::tie(src, dst, wgt), + std::tie(h_vertex_pair_1, h_vertex_pair_2), + h_result_score, + test_functor); + } + } + } +}; + +using Tests_Similarity_File = Tests_Similarity; +using Tests_Similarity_Rmat = Tests_Similarity; + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatJaccard) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_jaccard_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatSorensen) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_sorensen_t{}); +} + +TEST_P(Tests_Similarity_File, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int32FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt32Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +TEST_P(Tests_Similarity_Rmat, CheckInt64Int64FloatOverlap) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), cugraph::test::test_overlap_t{}); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Similarity_File, + ::testing::Combine(::testing::Values(Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100, 10}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100, 10}), +#if 0 + // FIXME: See Issue #4132... these tests don't work for multi-graph right now + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100, 10}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100, 10}), +#endif + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Similarity_Rmat, + ::testing::Combine( + ::testing::Values(Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 20, 100}, + Similarity_Usecase{false, true, false, 1000, 100, 10}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 20, 100}, + Similarity_Usecase{false, true, true, 10000, 10000, 10}, +#if 0 + // FIXME: See Issue #4132... these tests don't work for multi-graph right now + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100, 10}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, false, 20, 100}, + Similarity_Usecase{true, true, true, 20, 100, 10}, +#endif + Similarity_Usecase{false, true, true, std::nullopt, std::nullopt, 100}, + Similarity_Usecase{false, true, true, std::nullopt, std::nullopt, 10}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +INSTANTIATE_TEST_SUITE_P( + file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with + --gtest_filter to select only the file_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one File_Usecase that differ only in filename + (to avoid running same benchmarks more than once) */ + Tests_Similarity_File, + ::testing::Combine( + // disable correctness checks + // Disable weighted computation testing in 22.10 + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{false, false, false}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Similarity_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{false, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/mtmg/threaded_test_louvain.cu b/cpp/tests/mtmg/threaded_test_louvain.cu index d1e12057230..ff9641d59f8 100644 --- a/cpp/tests/mtmg/threaded_test_louvain.cu +++ b/cpp/tests/mtmg/threaded_test_louvain.cu @@ -276,6 +276,7 @@ class Tests_Multithreaded std::tie(std::ignore, modularity) = cugraph::louvain( thread_handle.raft_handle(), + std::nullopt, graph_view.get(thread_handle), edge_weights ? std::make_optional(edge_weights->get(thread_handle).view()) : std::nullopt, local_louvain_clusters.data(), @@ -405,6 +406,7 @@ class Tests_Multithreaded std::tie(std::ignore, modularity) = cugraph::louvain( handle, + std::nullopt, sg_graph.view(), sg_edge_weights ? std::make_optional(sg_edge_weights->view()) : std::nullopt, sg_clusters.data(), diff --git a/python/cugraph/cugraph/layout/force_atlas2.py b/python/cugraph/cugraph/layout/force_atlas2.py index 0e15eee718f..639801c9b59 100644 --- a/python/cugraph/cugraph/layout/force_atlas2.py +++ b/python/cugraph/cugraph/layout/force_atlas2.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,6 +13,7 @@ from cugraph.layout import force_atlas2_wrapper from cugraph.utilities import ensure_cugraph_obj_for_nx +import cudf def force_atlas2( @@ -55,8 +56,8 @@ def force_atlas2( Above 1000 iterations is discouraged. pos_list: cudf.DataFrame, optional (default=None) - Data frame with initial vertex positions containing two columns: - 'x' and 'y' positions. + Data frame with initial vertex positions containing three columns: + 'vertex', 'x' and 'y' positions. outbound_attraction_distribution: bool, optional (default=True) Distributes attraction along outbound edges. @@ -131,6 +132,10 @@ def on_train_end(self, positions): input_graph, isNx = ensure_cugraph_obj_for_nx(input_graph) if pos_list is not None: + if not isinstance(pos_list, cudf.DataFrame): + raise TypeError("pos_list should be a cudf.DataFrame") + if set(pos_list.columns) != set(["x", "y", "vertex"]): + raise ValueError("pos_list has wrong column names") if input_graph.renumbered is True: if input_graph.vertex_column_size() > 1: cols = pos_list.columns[:-2].to_list()