Merge branch 'branch-24.10' into my_new_branch

rapidsai · Sep 5, 2024 · 6fa8d19 · 6fa8d19
2 parents ba6cbab + 52c4457
commit 6fa8d19
Show file tree

Hide file tree

Showing 40 changed files with 5,469 additions and 1,901 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -17,7 +17,7 @@ repos:
     hooks:
       - id: black
         language_version: python3
-        args: [--target-version=py39]
+        args: [--target-version=py310]
         files: ^(python/.*|benchmarks/.*)$
         exclude: ^python/nx-cugraph/
   - repo: https://github.com/PyCQA/flake8

diff --git a/benchmarks/nx-cugraph/pytest-based/README.md b/benchmarks/nx-cugraph/pytest-based/README.md
@@ -0,0 +1,45 @@
+## `nx-cugraph` Benchmarks
+
+### Overview
+
+This directory contains a set of scripts designed to benchmark NetworkX with the `nx-cugraph` backend and deliver a report that summarizes the speed-up and runtime deltas over default NetworkX.
+
+Our current benchmarks provide the following datasets:
+
+| Dataset     | Nodes | Edges | Directed |
+| --------    | ------- | ------- | ------- |
+| netscience  | 1,461    | 5,484 | Yes |
+| email-Eu-core  | 1,005    | 25,571 | Yes |
+| cit-Patents  | 3,774,768    | 16,518,948 | Yes |
+| hollywood  | 1,139,905    | 57,515,616 | No |
+| soc-LiveJournal1  | 4,847,571    | 68,993,773 | Yes |
+
+
+
+### Scripts
+
+#### 1. `run-main-benchmarks.sh`
+This script allows users to run selected algorithms across multiple datasets and backends. All results are stored inside a sub-directory (`logs/`) and output files are named based on the combination of parameters for that benchmark.
+
+NOTE: If running with all algorithms, datasets, and backends, this script may take a few hours to finish running.
+
+**Usage:**
+  ```bash
+  bash run-main-benchmarks.sh  # edit this script directly
+  ```
+
+#### 2. `get_graph_bench_dataset.py`
+This script downloads the specified dataset using `cugraph.datasets`.
+
+**Usage:**
+  ```bash
+  python get_graph_bench_dataset.py [dataset]
+  ```
+
+#### 3. `create_results_summary_page.py`
+This script is designed to be run after `run-gap-benchmarks.sh` in order to generate an HTML page displaying a results table comparing default NetworkX to nx-cugraph. The script also provides information about the current system.
+
+**Usage:**
+  ```bash
+  python create_results_summary_page.py > report.html
+  ```
diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py
@@ -271,9 +271,8 @@ def bench_from_networkx(benchmark, graph_obj):
 
 
 # normalized_param_values = [True, False]
-# k_param_values = [10, 100]
 normalized_param_values = [True]
-k_param_values = [10]
+k_param_values = [10, 100, 1000]
 
 
 @pytest.mark.parametrize(
@@ -282,6 +281,10 @@ def bench_from_networkx(benchmark, graph_obj):
 @pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}")
 def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k):
     G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
+
+    if k > G.number_of_nodes():
+        pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")
+
     result = benchmark.pedantic(
         target=backend_wrapper(nx.betweenness_centrality),
         args=(G,),
@@ -305,6 +308,10 @@ def bench_edge_betweenness_centrality(
     benchmark, graph_obj, backend_wrapper, normalized, k
 ):
     G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
+
+    if k > G.number_of_nodes():
+        pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")
+
     result = benchmark.pedantic(
         target=backend_wrapper(nx.edge_betweenness_centrality),
         args=(G,),
@@ -473,6 +480,26 @@ def bench_pagerank_personalized(benchmark, graph_obj, backend_wrapper):
     assert type(result) is dict
 
 
+def bench_shortest_path(benchmark, graph_obj, backend_wrapper):
+    """
+    This passes in the source node with the highest degree, but no target.
+    """
+    G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
+    node = get_highest_degree_node(graph_obj)
+
+    result = benchmark.pedantic(
+        target=backend_wrapper(nx.shortest_path),
+        args=(G,),
+        kwargs=dict(
+            source=node,
+        ),
+        rounds=rounds,
+        iterations=iterations,
+        warmup_rounds=warmup_rounds,
+    )
+    assert type(result) is dict
+
+
 def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper):
     G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
     node = get_highest_degree_node(graph_obj)