Merge remote-tracking branch 'upstream/branch-24.04' into trim-traili…

…ng-whitespace
rapidsai · Mar 7, 2024 · 0862d89 · 0862d89
2 parents 917c5b8 + a9ec503
commit 0862d89
Show file tree

Hide file tree

Showing 56 changed files with 3,004 additions and 214 deletions.
diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml
@@ -162,7 +162,7 @@ jobs:
     with:
       build_type: pull-request
       script: ci/test_wheel_cugraph-pyg.sh
-      matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0"))
+      matrix_filter: map(select(.ARCH == "amd64"))
   wheel-build-cugraph-equivariant:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]

diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -76,7 +76,7 @@ jobs:
       date: ${{ inputs.date }}
       sha: ${{ inputs.sha }}
       script: ci/test_wheel_cugraph-pyg.sh
-      matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0"))
+      matrix_filter: map(select(.ARCH == "amd64"))
   wheel-tests-cugraph-equivariant:
     secrets: inherit
     uses: rapidsai/shared-workflows/.github/workflows/[email protected]

diff --git a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -39,7 +39,7 @@
     uniform_neighbor_sample,
 )
 from cugraph.generators import rmat
-from cugraph.experimental import datasets
+from cugraph import datasets
 from cugraph.dask import uniform_neighbor_sample as uniform_neighbor_sample_mg
 
 from cugraph_benchmarking import params

diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh
@@ -54,12 +54,12 @@ cd "${package_dir}"
 
 python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check
 
-# pure-python packages should not have auditwheel run on them.
+# pure-python packages should be marked as pure, and not have auditwheel run on them.
 if [[ ${package_name} == "nx-cugraph" ]] || \
    [[ ${package_name} == "cugraph-dgl" ]] || \
    [[ ${package_name} == "cugraph-pyg" ]] || \
    [[ ${package_name} == "cugraph-equivariant" ]]; then
-    RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist
+    RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-upload-wheels-to-s3 dist
 else
     mkdir -p final_dist
     python -m auditwheel repair -w final_dist dist/*

diff --git a/ci/test_python.sh b/ci/test_python.sh
@@ -166,7 +166,7 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then
       pylibcugraphops \
       cugraph \
       cugraph-dgl \
-      'dgl>=1.1.0.cu*' \
+      'dgl>=1.1.0.cu*,<=2.0.0.cu*' \
       'pytorch>=2.0' \
       'pytorch-cuda>=11.8'
 

diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh
@@ -11,8 +11,13 @@ python_package_name=$(echo ${package_name}|sed 's/-/_/g')
 mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
+# nx-cugraph is a pure wheel, which is part of generating the download path
+if [[ "${package_name}" == "nx-cugraph" ]]; then
+    RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
+else
+    RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+fi
 # use 'ls' to expand wildcard before adding `[extra]` requires for pip
-RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
 # pip creates wheels using python package names
 python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]
 

diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh
@@ -17,7 +17,7 @@ RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
 python -m pip install ./local-deps/*.whl
 
 # use 'ls' to expand wildcard before adding `[extra]` requires for pip
-RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
 # pip creates wheels using python package names
 python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]
 
@@ -34,6 +34,6 @@ DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html"
 
 rapids-logger "Installing PyTorch and DGL"
 rapids-retry python -m pip install torch --index-url ${PYTORCH_URL}
-rapids-retry python -m pip install dgl --find-links ${DGL_URL}
+rapids-retry python -m pip install dgl==2.0.0 --find-links ${DGL_URL}
 
 python -m pytest python/cugraph-dgl/tests
diff --git a/ci/test_wheel_cugraph-equivariant.sh b/ci/test_wheel_cugraph-equivariant.sh
@@ -12,7 +12,7 @@ mkdir -p ./dist
 RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})"
 
 # use 'ls' to expand wildcard before adding `[extra]` requires for pip
-RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
 # pip creates wheels using python package names
 python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]
 

diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh
@@ -17,7 +17,7 @@ RAPIDS_PY_WHEEL_NAME="cugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-f
 python -m pip install ./local-deps/*.whl
 
 # use 'ls' to expand wildcard before adding `[extra]` requires for pip
-RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist
+RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" RAPIDS_PY_WHEEL_PURE="1" rapids-download-wheels-from-s3 ./dist
 # pip creates wheels using python package names
 python -m pip install $(ls ./dist/${python_package_name}*.whl)[test]
 

diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt
@@ -423,6 +423,7 @@ add_library(cugraph_c
         src/c_api/core_number.cpp
         src/c_api/core_result.cpp
         src/c_api/extract_ego.cpp
+        src/c_api/ecg.cpp
         src/c_api/k_core.cpp
         src/c_api/hierarchical_clustering_result.cpp
         src/c_api/induced_subgraph.cpp

diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
@@ -2135,6 +2135,172 @@ rmm::device_uvector<weight_t> overlap_coefficients(
   std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs,
   bool do_expensive_check = false);
 
+/**
+ * @brief     Compute Jaccard all pairs similarity coefficient
+ *
+ * Similarity is computed for all pairs of vertices.  Note that in a sparse
+ * graph, many of the vertex pairs will have a score of zero.  We actually
+ * compute similarity only for vertices that are two hop neighbors within
+ * the graph, since vertices that are not two hop neighbors will have
+ * a score of 0.
+ *
+ * If @p vertices is specified we will compute similarity on two hop
+ * neighbors the @p vertices.  If @p vertices is not specified it will
+ * compute similarity on all two hop neighbors in the graph.
+ *
+ * If @p topk is specified only the top @p topk scoring vertex pairs
+ * will be returned, if not specified then scores for all computed vertex pairs
+ * will be returned.
+ *
+ * Note the list of two hop neighbors in the entire graph might be a large
+ * number of vertex pairs.  If the graph is dense enough it could be as large
+ * as the the number of vertices squared, which might run out of memory.
+ *
+ * @throws                 cugraph::logic_error when an error occurs.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
+ * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
+ * a weight of 1 for all edges.
+ * @param vertices optional device span defining the seed vertices. In a multi-gpu context the
+ * vertices should be local to this GPU.
+ * @param topk optional specification of the how many of the top scoring vertex pairs should be
+ * returned
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple containing three device vectors (v1, v2, score) of the same length.  Corresponding
+ * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
+ * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
+ * If @p topk was specified then the vectors will be no longer than @p topk elements.  In a
+ * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
+ * will be returned on the local GPU for vertex v1.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::
+  tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
+  jaccard_all_pairs_coefficients(
+    raft::handle_t const& handle,
+    graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+    std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+    std::optional<raft::device_span<vertex_t const>> vertices,
+    std::optional<size_t> topk,
+    bool do_expensive_check = false);
+
+/**
+ * @brief     Compute Sorensen similarity coefficient
+ *
+ * Similarity is computed for all pairs of vertices.  Note that in a sparse
+ * graph, many of the vertex pairs will have a score of zero.  We actually
+ * compute similarity only for vertices that are two hop neighbors within
+ * the graph, since vertices that are not two hop neighbors will have
+ * a score of 0.
+ *
+ * If @p vertices is specified we will compute similarity on two hop
+ * neighbors the @p vertices.  If @p vertices is not specified it will
+ * compute similarity on all two hop neighbors in the graph.
+ *
+ * If @p topk is specified only the top @p topk scoring vertex pairs
+ * will be returned, if not specified then scores for all computed vertex pairs
+ * will be returned.
+ *
+ * Note the list of two hop neighbors in the entire graph might be a large
+ * number of vertex pairs.  If the graph is dense enough it could be as large
+ * as the the number of vertices squared, which might run out of memory.
+ *
+ * @throws                 cugraph::logic_error when an error occurs.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
+ * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
+ * a weight of 1 for all edges.
+ * @param vertices optional device span defining the seed vertices.
+ * @param topk optional specification of the how many of the top scoring vertex pairs should be
+ * returned
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple containing three device vectors (v1, v2, score) of the same length.  Corresponding
+ * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
+ * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
+ * If @p topk was specified then the vectors will be no longer than @p topk elements.  In a
+ * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
+ * will be returned on the local GPU for vertex v1.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::
+  tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
+  sorensen_all_pairs_coefficients(
+    raft::handle_t const& handle,
+    graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+    std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+    std::optional<raft::device_span<vertex_t const>> vertices,
+    std::optional<size_t> topk,
+    bool do_expensive_check = false);
+
+/**
+ * @brief     Compute overlap similarity coefficient
+ *
+ * Similarity is computed for all pairs of vertices.  Note that in a sparse
+ * graph, many of the vertex pairs will have a score of zero.  We actually
+ * compute similarity only for vertices that are two hop neighbors within
+ * the graph, since vertices that are not two hop neighbors will have
+ * a score of 0.
+ *
+ * If @p vertices is specified we will compute similarity on two hop
+ * neighbors the @p vertices.  If @p vertices is not specified it will
+ * compute similarity on all two hop neighbors in the graph.
+ *
+ * If @p topk is specified only the top @p topk scoring vertex pairs
+ * will be returned, if not specified then scores for all computed vertex pairs
+ * will be returned.
+ *
+ * Note the list of two hop neighbors in the entire graph might be a large
+ * number of vertex pairs.  If the graph is dense enough it could be as large
+ * as the the number of vertices squared, which might run out of memory.
+ *
+ * @throws                 cugraph::logic_error when an error occurs.
+ *
+ * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
+ * @tparam edge_t Type of edge identifiers. Needs to be an integral type.
+ * @tparam weight_t Type of edge weights. Needs to be a floating point type.
+ * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
+ * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
+ * handles to various CUDA libraries) to run graph algorithms.
+ * @param graph_view Graph view object.
+ * @param edge_weight_view Optional view object holding edge weights for @p graph_view. If @p
+ * edge_weight_view.has_value() == true, use the weights associated with the graph. If false, assume
+ * a weight of 1 for all edges.
+ * @param vertices optional device span defining the seed vertices.
+ * @param topk optional specification of the how many of the top scoring vertex pairs should be
+ * returned
+ * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
+ * @return tuple containing three device vectors (v1, v2, score) of the same length.  Corresponding
+ * elements in the vectors identify a result, v1 identifying a vertex in the graph, v2 identifying
+ * one of v1's two hop neighors, and the score identifying the similarity score between v1 and v2.
+ * If @p topk was specified then the vectors will be no longer than @p topk elements.  In a
+ * multi-gpu context, if @p topk is specified all results will return on GPU rank 0, otherwise they
+ * will be returned on the local GPU for vertex v1.
+ */
+template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
+std::
+  tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>, rmm::device_uvector<weight_t>>
+  overlap_all_pairs_coefficients(
+    raft::handle_t const& handle,
+    graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+    std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
+    std::optional<raft::device_span<vertex_t const>> vertices,
+    std::optional<size_t> topk,
+    bool do_expensive_check = false);
+
 /*
  * @brief Enumerate K-hop neighbors
  *

diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh
@@ -37,51 +37,52 @@ namespace cugraph {
 
 namespace detail {
 
-template <typename vertex_t, typename edge_t>
-std::tuple<std::vector<vertex_t>, std::vector<edge_t>> compute_offset_aligned_edge_chunks(
+template <typename vertex_t, typename offset_t>
+std::tuple<std::vector<vertex_t>, std::vector<offset_t>> compute_offset_aligned_element_chunks(
   raft::handle_t const& handle,
-  edge_t const* offsets,
-  vertex_t num_vertices,
-  edge_t num_edges,
-  size_t approx_edge_chunk_size)
+  raft::device_span<offset_t const> offsets,
+  offset_t num_elements,
+  vertex_t approx_element_chunk_size)
 {
   auto search_offset_first = thrust::make_transform_iterator(
     thrust::make_counting_iterator(size_t{1}),
     cuda::proclaim_return_type<size_t>(
-      [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }));
-  auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size;
+      [approx_element_chunk_size] __device__(auto i) { return i * approx_element_chunk_size; }));
+  auto num_chunks = (num_elements + approx_element_chunk_size - 1) / approx_element_chunk_size;
 
   if (num_chunks > 1) {
-    rmm::device_uvector<vertex_t> d_vertex_offsets(num_chunks - 1, handle.get_stream());
+    rmm::device_uvector<vertex_t> d_chunk_offsets(num_chunks - 1, handle.get_stream());
     thrust::lower_bound(handle.get_thrust_policy(),
-                        offsets,
-                        offsets + num_vertices + 1,
+                        offsets.begin(),
+                        offsets.end(),
                         search_offset_first,
-                        search_offset_first + d_vertex_offsets.size(),
-                        d_vertex_offsets.begin());
-    rmm::device_uvector<edge_t> d_edge_offsets(d_vertex_offsets.size(), handle.get_stream());
+                        search_offset_first + d_chunk_offsets.size(),
+                        d_chunk_offsets.begin());
+    rmm::device_uvector<offset_t> d_element_offsets(d_chunk_offsets.size(), handle.get_stream());
     thrust::gather(handle.get_thrust_policy(),
-                   d_vertex_offsets.begin(),
-                   d_vertex_offsets.end(),
-                   offsets,
-                   d_edge_offsets.begin());
-    std::vector<edge_t> h_edge_offsets(num_chunks + 1, edge_t{0});
-    h_edge_offsets.back() = num_edges;
-    raft::update_host(
-      h_edge_offsets.data() + 1, d_edge_offsets.data(), d_edge_offsets.size(), handle.get_stream());
-    std::vector<vertex_t> h_vertex_offsets(num_chunks + 1, vertex_t{0});
-    h_vertex_offsets.back() = num_vertices;
-    raft::update_host(h_vertex_offsets.data() + 1,
-                      d_vertex_offsets.data(),
-                      d_vertex_offsets.size(),
+                   d_chunk_offsets.begin(),
+                   d_chunk_offsets.end(),
+                   offsets.begin(),
+                   d_element_offsets.begin());
+    std::vector<offset_t> h_element_offsets(num_chunks + 1, offset_t{0});
+    h_element_offsets.back() = num_elements;
+    raft::update_host(h_element_offsets.data() + 1,
+                      d_element_offsets.data(),
+                      d_element_offsets.size(),
+                      handle.get_stream());
+    std::vector<vertex_t> h_chunk_offsets(num_chunks + 1, vertex_t{0});
+    h_chunk_offsets.back() = offsets.size() - 1;
+    raft::update_host(h_chunk_offsets.data() + 1,
+                      d_chunk_offsets.data(),
+                      d_chunk_offsets.size(),
                       handle.get_stream());
 
     handle.sync_stream();
 
-    return std::make_tuple(h_vertex_offsets, h_edge_offsets);
+    return std::make_tuple(h_chunk_offsets, h_element_offsets);
   } else {
-    return std::make_tuple(std::vector<vertex_t>{{0, num_vertices}},
-                           std::vector<edge_t>{{0, num_edges}});
+    return std::make_tuple(std::vector<vertex_t>{{0, offsets.size() - 1}},
+                           std::vector<offset_t>{{0, num_elements}});
   }
 }