Merge remote-tracking branch 'upstream/branch-24.10' into branch-24.1…

…0_refactor_symmetrize
rapidsai · Sep 23, 2024 · 6e433ab · 6e433ab
2 parents bd7eede + 7e058e2
commit 6e433ab
Show file tree

Hide file tree

Showing 66 changed files with 1,282 additions and 593 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -21,7 +21,7 @@ repos:
         files: ^(python/.*|benchmarks/.*)$
         exclude: ^python/nx-cugraph/
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
       - id: flake8
         args: ["--config=.flake8"]
@@ -34,7 +34,7 @@ repos:
     hooks:
       - id: yesqa
         additional_dependencies:
-          - flake8==6.0.0
+          - flake8==7.1.1
   - repo: https://github.com/pre-commit/mirrors-clang-format
     rev: v16.0.6
     hooks:

diff --git a/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb b/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb
@@ -18,7 +18,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }

diff --git a/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb b/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb
@@ -176,7 +176,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
       "  dgl_warning(f'Dataloader CPU affinity opt is not enabled, consider switching it on '\n"
      ]
     },

diff --git a/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb b/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb
@@ -26,7 +26,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }
@@ -190,7 +190,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
       "  dgl_warning(f'Dataloader CPU affinity opt is not enabled, consider switching it on '\n"
      ]
     },
@@ -278,7 +278,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/distributed/worker.py:2988: UserWarning: Large object of size 1.42 MiB detected in task graph: \n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/distributed/worker.py:2988: UserWarning: Large object of size 1.42 MiB detected in task graph: \n",
       "  [b'\\xad\\xd1\\xe3\\x9c\\x96\\x83O\\xb3\\xba1\\x86\\x94\\xb6\\ ... =int32), False]\n",
       "Consider scattering large objects ahead of time\n",
       "with client.scatter to reduce scheduler burden and \n",

diff --git a/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py b/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -126,4 +126,4 @@ def sampling_func(g, seed_nodes, labels, train_loader):
     st = time.time()
     sampling_func(g, subset_split_idx["train"], labels, train_loader)
     et = time.time()
-    print(f"Sampling time taken  = {et-st} s")
+    print(f"Sampling time taken  = {et - st} s")
diff --git a/benchmarks/cugraph/notebooks/feature_storage.ipynb b/benchmarks/cugraph/notebooks/feature_storage.ipynb
@@ -18,7 +18,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }

diff --git a/benchmarks/nx-cugraph/pytest-based/README.md b/benchmarks/nx-cugraph/pytest-based/README.md
@@ -19,13 +19,22 @@ Our current benchmarks provide the following datasets:
 ### Scripts
 
 #### 1. `run-main-benchmarks.sh`
-This script allows users to run selected algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark.
+This script allows users to run a small set of commonly-used algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark.
 
-NOTE: If running with all algorithms, datasets, and backends, this script may take a few hours to finish running.
+NOTE: If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running.
 
 **Usage:**
+ - Run with `--cpu-only`:
   ```bash
-  bash run-main-benchmarks.sh  # edit this script directly
+  ./run-main-benchmarks.sh --cpu-only
+  ```
+ - Run with `--gpu-only`:
+  ```bash
+  ./run-main-benchmarks.sh --gpu-only
+  ```
+ - Run without any arguments (all backends):
+  ```bash
+  ./run-main-benchmarks.sh
   ```
 
 #### 2. `get_graph_bench_dataset.py`
@@ -37,7 +46,7 @@ This script downloads the specified dataset using `cugraph.datasets`.
   ```
 
 #### 3. `create_results_summary_page.py`
-This script is designed to be run after `run-gap-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system.
+This script is designed to be run after `run-gap-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run.
 
 **Usage:**
   ```bash

diff --git a/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh b/benchmarks/nx-cugraph/pytest-based/run-main-benchmarks.sh
@@ -40,10 +40,26 @@ backends="
     None
     cugraph-preconverted
 "
+# check for --cpu-only or --gpu-only args
+if [[ "$#" -eq 1 ]]; then
+    case $1 in
+        --cpu-only)
+            backends="None"
+            ;;
+        --gpu-only)
+            backends="cugraph-preconverted"
+            ;;
+        *)
+            echo "Unknown option: $1"
+            exit 1
+            ;;
+    esac
+fi
 
 for algo in $algos; do
     for dataset in $datasets; do
-    python get_graph_bench_dataset.py $dataset
+	# this script can be used to download benchmarking datasets by name via cugraph.datasets
+    	python get_graph_bench_dataset.py $dataset
         for backend in $backends; do
             name="${backend}__${algo}__${dataset}"
             echo "Running: $backend, $dataset, bench_$algo"

diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp
@@ -1579,11 +1579,11 @@ std::
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
 std::tuple<rmm::device_uvector<vertex_t>, std::optional<rmm::device_uvector<weight_t>>>
 uniform_random_walks(raft::handle_t const& handle,
+                     raft::random::RngState& rng_state,
                      graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
                      std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
                      raft::device_span<vertex_t const> start_vertices,
-                     size_t max_length,
-                     uint64_t seed = std::numeric_limits<uint64_t>::max());
+                     size_t max_length);
 
 /**
  * @brief returns biased random walks from starting sources, where each path is of given
@@ -1623,11 +1623,11 @@ uniform_random_walks(raft::handle_t const& handle,
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
 std::tuple<rmm::device_uvector<vertex_t>, std::optional<rmm::device_uvector<weight_t>>>
 biased_random_walks(raft::handle_t const& handle,
+                    raft::random::RngState& rng_state,
                     graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
                     edge_property_view_t<edge_t, weight_t const*> edge_weight_view,
                     raft::device_span<vertex_t const> start_vertices,
-                    size_t max_length,
-                    uint64_t seed = std::numeric_limits<uint64_t>::max());
+                    size_t max_length);
 
 /**
  * @brief returns biased random walks with node2vec biases from starting sources,
@@ -1670,13 +1670,13 @@ biased_random_walks(raft::handle_t const& handle,
 template <typename vertex_t, typename edge_t, typename weight_t, bool multi_gpu>
 std::tuple<rmm::device_uvector<vertex_t>, std::optional<rmm::device_uvector<weight_t>>>
 node2vec_random_walks(raft::handle_t const& handle,
+                      raft::random::RngState& rng_state,
                       graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
                       std::optional<edge_property_view_t<edge_t, weight_t const*>> edge_weight_view,
                       raft::device_span<vertex_t const> start_vertices,
                       size_t max_length,
                       weight_t p,
-                      weight_t q,
-                      uint64_t seed = std::numeric_limits<uint64_t>::max());
+                      weight_t q);
 
 #ifndef NO_CUGRAPH_OPS
 /**
@@ -1873,12 +1873,16 @@ void triangle_count(raft::handle_t const& handle,
  * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
  * handles to various CUDA libraries) to run graph algorithms.
  * @param graph_view Graph view object.
+ *  * @param do_expensive_check A flag to run expensive checks for input arguments (if set to
+ * `true`).
  *
  * @return edge_property_t containing the edge triangle count
  */
 template <typename vertex_t, typename edge_t, bool multi_gpu>
 edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> edge_triangle_count(
-  raft::handle_t const& handle, graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view);
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+  bool do_expensive_check = false);
 
 /*
  * @brief Compute K-Truss.

diff --git a/cpp/src/c_api/random_walks.cpp b/cpp/src/c_api/random_walks.cpp
@@ -16,6 +16,7 @@
 
 #include "c_api/abstract_functor.hpp"
 #include "c_api/graph.hpp"
+#include "c_api/random.hpp"
 #include "c_api/resource_handle.hpp"
 #include "c_api/utils.hpp"
 
@@ -153,10 +154,11 @@ namespace {
 
 struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
+  //  FIXME: rng_state_ should be passed as a parameter
+  cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr};
   size_t max_length_{0};
-  size_t seed_{0};
   cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr};
 
   uniform_random_walks_functor(cugraph_resource_handle_t const* handle,
@@ -222,13 +224,17 @@ struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor {
         graph_view.local_vertex_partition_range_last(),
         false);
 
+      //  FIXME: remove once rng_state passed as parameter
+      rng_state_ = reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(
+        new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}});
+
       auto [paths, weights] = cugraph::uniform_random_walks(
         handle_,
+        rng_state_->rng_state_,
         graph_view,
         (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
         raft::device_span<vertex_t const>{start_vertices.data(), start_vertices.size()},
-        max_length_,
-        seed_);
+        max_length_);
 
       //
       // Need to unrenumber the vertices in the resulting paths
@@ -255,11 +261,12 @@ struct uniform_random_walks_functor : public cugraph::c_api::abstract_functor {
 
 struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
+  //  FIXME: rng_state_ should be passed as a parameter
+  cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr};
   size_t max_length_{0};
   cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr};
-  uint64_t seed_{0};
 
   biased_random_walks_functor(cugraph_resource_handle_t const* handle,
                               cugraph_graph_t* graph,
@@ -326,13 +333,17 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
         graph_view.local_vertex_partition_range_last(),
         false);
 
+      //  FIXME: remove once rng_state passed as parameter
+      rng_state_ = reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(
+        new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}});
+
       auto [paths, weights] = cugraph::biased_random_walks(
         handle_,
+        rng_state_->rng_state_,
         graph_view,
         edge_weights->view(),
         raft::device_span<vertex_t const>{start_vertices.data(), start_vertices.size()},
-        max_length_,
-        seed_);
+        max_length_);
 
       //
       // Need to unrenumber the vertices in the resulting paths
@@ -354,12 +365,13 @@ struct biased_random_walks_functor : public cugraph::c_api::abstract_functor {
 
 struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
   raft::handle_t const& handle_;
+  //  FIXME: rng_state_ should be passed as a parameter
+  cugraph::c_api::cugraph_rng_state_t* rng_state_{nullptr};
   cugraph::c_api::cugraph_graph_t* graph_{nullptr};
   cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr};
   size_t max_length_{0};
   double p_{0};
   double q_{0};
-  uint64_t seed_{0};
   cugraph::c_api::cugraph_random_walk_result_t* result_{nullptr};
 
   node2vec_random_walks_functor(cugraph_resource_handle_t const* handle,
@@ -431,15 +443,19 @@ struct node2vec_random_walks_functor : public cugraph::c_api::abstract_functor {
         graph_view.local_vertex_partition_range_last(),
         false);
 
+      //  FIXME: remove once rng_state passed as parameter
+      rng_state_ = reinterpret_cast<cugraph::c_api::cugraph_rng_state_t*>(
+        new cugraph::c_api::cugraph_rng_state_t{raft::random::RngState{0}});
+
       auto [paths, weights] = cugraph::node2vec_random_walks(
         handle_,
+        rng_state_->rng_state_,
         graph_view,
         (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt,
         raft::device_span<vertex_t const>{start_vertices.data(), start_vertices.size()},
         max_length_,
         static_cast<weight_t>(p_),
-        static_cast<weight_t>(q_),
-        seed_);
+        static_cast<weight_t>(q_));
 
       // FIXME:  Need to fix invalid_vtx issue here.  We can't unrenumber max_vertex_id+1
       // properly...

diff --git a/cpp/src/community/edge_triangle_count_impl.cuh b/cpp/src/community/edge_triangle_count_impl.cuh
@@ -18,8 +18,8 @@
 
 #include "detail/graph_partition_utils.cuh"
 #include "prims/edge_bucket.cuh"
+#include "prims/per_v_pair_dst_nbr_intersection.cuh"
 #include "prims/transform_e.cuh"
-#include "prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh"
 
 #include <cugraph/detail/shuffle_wrappers.hpp>
 #include <cugraph/graph_functions.hpp>
@@ -124,7 +124,8 @@ struct extract_q_r {
 template <typename vertex_t, typename edge_t, bool store_transposed, bool multi_gpu>
 edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> edge_triangle_count_impl(
   raft::handle_t const& handle,
-  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view)
+  graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu> const& graph_view,
+  bool do_expensive_check)
 {
   using weight_t = float;
   rmm::device_uvector<vertex_t> edgelist_srcs(0, handle.get_stream());
@@ -158,14 +159,11 @@ edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> edge_t
     num_remaining_edges -= chunk_size;
     // Perform 'nbr_intersection' in chunks to reduce peak memory.
     auto [intersection_offsets, intersection_indices] =
-      detail::nbr_intersection(handle,
-                               graph_view,
-                               cugraph::edge_dummy_property_t{}.view(),
-                               edge_first + prev_chunk_size,
-                               edge_first + prev_chunk_size + chunk_size,
-                               std::array<bool, 2>{true, true},
-                               false /*FIXME: pass 'do_expensive_check' as argument*/);
-
+      per_v_pair_dst_nbr_intersection(handle,
+                                      graph_view,
+                                      edge_first + prev_chunk_size,
+                                      edge_first + prev_chunk_size + chunk_size,
+                                      do_expensive_check);
     // Update the number of triangles of each (p, q) edges by looking at their intersection
     // size
     thrust::for_each(
@@ -365,9 +363,11 @@ edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> edge_t
 
 template <typename vertex_t, typename edge_t, bool multi_gpu>
 edge_property_t<graph_view_t<vertex_t, edge_t, false, multi_gpu>, edge_t> edge_triangle_count(
-  raft::handle_t const& handle, graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view)
+  raft::handle_t const& handle,
+  graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
+  bool do_expensive_check)
 {
-  return detail::edge_triangle_count_impl(handle, graph_view);
+  return detail::edge_triangle_count_impl(handle, graph_view, do_expensive_check);
 }
 
 }  // namespace cugraph
diff --git a/cpp/src/community/edge_triangle_count_mg_v32_e32.cu b/cpp/src/community/edge_triangle_count_mg_v32_e32.cu
@@ -20,6 +20,7 @@ namespace cugraph {
 // SG instantiation
 template edge_property_t<graph_view_t<int32_t, int32_t, false, true>, int32_t> edge_triangle_count(
   raft::handle_t const& handle,
-  cugraph::graph_view_t<int32_t, int32_t, false, true> const& graph_view);
+  cugraph::graph_view_t<int32_t, int32_t, false, true> const& graph_view,
+  bool do_expensive_check);
 
 }  // namespace cugraph
diff --git a/cpp/src/community/edge_triangle_count_mg_v32_e64.cu b/cpp/src/community/edge_triangle_count_mg_v32_e64.cu
@@ -20,6 +20,7 @@ namespace cugraph {
 // SG instantiation
 template edge_property_t<graph_view_t<int32_t, int64_t, false, true>, int64_t> edge_triangle_count(
   raft::handle_t const& handle,
-  cugraph::graph_view_t<int32_t, int64_t, false, true> const& graph_view);
+  cugraph::graph_view_t<int32_t, int64_t, false, true> const& graph_view,
+  bool do_expensive_check);
 
 }  // namespace cugraph