Merge branch 'branch-24.10' into publish_xml

rapidsai · Sep 23, 2024 · cd8714a · cd8714a
2 parents 02c1e41 + 7e058e2
commit cd8714a
Show file tree

Hide file tree

Showing 191 changed files with 12,948 additions and 3,730 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,11 +17,11 @@ repos:
     hooks:
       - id: black
         language_version: python3
-        args: [--target-version=py39]
+        args: [--target-version=py310]
         files: ^(python/.*|benchmarks/.*)$
         exclude: ^python/nx-cugraph/
   - repo: https://github.com/PyCQA/flake8
-    rev: 6.0.0
+    rev: 7.1.1
     hooks:
       - id: flake8
         args: ["--config=.flake8"]
@@ -34,15 +34,15 @@ repos:
     hooks:
       - id: yesqa
         additional_dependencies:
-          - flake8==6.0.0
+          - flake8==7.1.1
   - repo: https://github.com/pre-commit/mirrors-clang-format
     rev: v16.0.6
     hooks:
       - id: clang-format
         types_or: [c, c++, cuda]
         args: ["-fallback-style=none", "-style=file", "-i"]
   - repo: https://github.com/rapidsai/pre-commit-hooks
-    rev: v0.2.0
+    rev: v0.4.0
     hooks:
       - id: verify-copyright
         files: |

diff --git a/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb b/benchmarks/cugraph-dgl/notebooks/get_node_storage.ipynb
@@ -18,7 +18,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }

diff --git a/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb b/benchmarks/cugraph-dgl/notebooks/heterogeneous_dataloader_benchmark.ipynb
@@ -176,7 +176,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
       "  dgl_warning(f'Dataloader CPU affinity opt is not enabled, consider switching it on '\n"
      ]
     },

diff --git a/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb b/benchmarks/cugraph-dgl/notebooks/homogenous_dataloader_benchmark.ipynb
@@ -26,7 +26,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }
@@ -190,7 +190,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/dgl/dataloading/dataloader.py:859: DGLWarning: Dataloader CPU affinity opt is not enabled, consider switching it on (see enable_cpu_affinity() or CPU best practices for DGL [https://docs.dgl.ai/tutorials/cpu/cpu_best_practises.html])\n",
       "  dgl_warning(f'Dataloader CPU affinity opt is not enabled, consider switching it on '\n"
      ]
     },
@@ -278,7 +278,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/distributed/worker.py:2988: UserWarning: Large object of size 1.42 MiB detected in task graph: \n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/distributed/worker.py:2988: UserWarning: Large object of size 1.42 MiB detected in task graph: \n",
       "  [b'\\xad\\xd1\\xe3\\x9c\\x96\\x83O\\xb3\\xba1\\x86\\x94\\xb6\\ ... =int32), False]\n",
       "Consider scattering large objects ahead of time\n",
       "with client.scatter to reduce scheduler burden and \n",

diff --git a/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py b/benchmarks/cugraph-dgl/python-script/ogbn_mag_benchmark.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
@@ -126,4 +126,4 @@ def sampling_func(g, seed_nodes, labels, train_loader):
     st = time.time()
     sampling_func(g, subset_split_idx["train"], labels, train_loader)
     et = time.time()
-    print(f"Sampling time taken  = {et-st} s")
+    print(f"Sampling time taken  = {et - st} s")
diff --git a/benchmarks/cugraph/notebooks/feature_storage.ipynb b/benchmarks/cugraph/notebooks/feature_storage.ipynb
@@ -18,7 +18,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "/datasets/vjawa/miniconda3/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "/datasets/vjawa/miniforge/envs/all_cuda-115_arch-x86_64/lib/python3.9/site-packages/tqdm/auto.py:22: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
       "  from .autonotebook import tqdm as notebook_tqdm\n"
      ]
     }

diff --git a/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py b/benchmarks/cugraph/pytest-based/bench_cugraph_uniform_neighbor_sample.py
@@ -266,7 +266,7 @@ def uns_func(*args, **kwargs):
 @pytest.mark.managedmem_off
 @pytest.mark.poolallocator_on
 @pytest.mark.parametrize("batch_size", params.batch_sizes.values())
-@pytest.mark.parametrize("fanout", [params.fanout_10_25, params.fanout_5_10_15])
+@pytest.mark.parametrize("fanout", [params.fanout_10_25])
 @pytest.mark.parametrize(
     "with_replacement", [False], ids=lambda v: f"with_replacement={v}"
 )
@@ -287,6 +287,8 @@ def bench_cugraph_uniform_neighbor_sample(
         start_list=uns_args["start_list"],
         fanout_vals=uns_args["fanout"],
         with_replacement=uns_args["with_replacement"],
+        use_legacy_names=False,
+        with_edge_properties=True,
     )
     """
     dtmap = {"int32": 32 // 8, "int64": 64 // 8}

diff --git a/benchmarks/nx-cugraph/pytest-based/README.md b/benchmarks/nx-cugraph/pytest-based/README.md
@@ -0,0 +1,54 @@
+## `nx-cugraph` Benchmarks
+
+### Overview
+
+This directory contains a set of scripts designed to benchmark NetworkX with the `nx-cugraph` backend and deliver a report that summarizes the speed-up and runtime deltas over default NetworkX.
+
+Our current benchmarks provide the following datasets:
+
+| Dataset     | Nodes | Edges | Directed |
+| --------    | ------- | ------- | ------- |
+| netscience  | 1,461    | 5,484 | Yes |
+| email-Eu-core  | 1,005    | 25,571 | Yes |
+| cit-Patents  | 3,774,768    | 16,518,948 | Yes |
+| hollywood  | 1,139,905    | 57,515,616 | No |
+| soc-LiveJournal1  | 4,847,571    | 68,993,773 | Yes |
+
+
+
+### Scripts
+
+#### 1. `run-main-benchmarks.sh`
+This script allows users to run a small set of commonly-used algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark.
+
+NOTE: If running with all algorithms and datasets using NetworkX without an accelerated backend, this script may take a few hours to finish running.
+
+**Usage:**
+ - Run with `--cpu-only`:
+  ```bash
+  ./run-main-benchmarks.sh --cpu-only
+  ```
+ - Run with `--gpu-only`:
+  ```bash
+  ./run-main-benchmarks.sh --gpu-only
+  ```
+ - Run without any arguments (all backends):
+  ```bash
+  ./run-main-benchmarks.sh
+  ```
+
+#### 2. `get_graph_bench_dataset.py`
+This script downloads the specified dataset using `cugraph.datasets`.
+
+**Usage:**
+  ```bash
+  python get_graph_bench_dataset.py [dataset]
+  ```
+
+#### 3. `create_results_summary_page.py`
+This script is designed to be run after `run-gap-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system, so it should be run on the machine on which benchmarks were run.
+
+**Usage:**
+  ```bash
+  python create_results_summary_page.py > report.html
+  ```
diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py
@@ -271,9 +271,8 @@ def bench_from_networkx(benchmark, graph_obj):
 
 
 # normalized_param_values = [True, False]
-# k_param_values = [10, 100]
 normalized_param_values = [True]
-k_param_values = [10]
+k_param_values = [10, 100, 1000]
 
 
 @pytest.mark.parametrize(
@@ -282,6 +281,10 @@ def bench_from_networkx(benchmark, graph_obj):
 @pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}")
 def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k):
     G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
+
+    if k > G.number_of_nodes():
+        pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")
+
     result = benchmark.pedantic(
         target=backend_wrapper(nx.betweenness_centrality),
         args=(G,),
@@ -305,6 +308,10 @@ def bench_edge_betweenness_centrality(
     benchmark, graph_obj, backend_wrapper, normalized, k
 ):
     G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
+
+    if k > G.number_of_nodes():
+        pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")
+
     result = benchmark.pedantic(
         target=backend_wrapper(nx.edge_betweenness_centrality),
         args=(G,),
@@ -473,6 +480,26 @@ def bench_pagerank_personalized(benchmark, graph_obj, backend_wrapper):
     assert type(result) is dict
 
 
+def bench_shortest_path(benchmark, graph_obj, backend_wrapper):
+    """
+    This passes in the source node with the highest degree, but no target.
+    """
+    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
+    node = get_highest_degree_node(graph_obj)
+
+    result = benchmark.pedantic(
+        target=backend_wrapper(nx.shortest_path),
+        args=(G,),
+        kwargs=dict(
+            source=node,
+        ),
+        rounds=rounds,
+        iterations=iterations,
+        warmup_rounds=warmup_rounds,
+    )
+    assert type(result) is dict
+
+
 def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper):
     G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
     node = get_highest_degree_node(graph_obj)