From cbaff2049231f523ab122292ca1b0ab882870503 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Sat, 17 Aug 2024 21:27:27 -0500 Subject: [PATCH 1/2] Adds shortest_path benchmark, adds checks to ensure k is not > number of nodes. --- .../nx-cugraph/pytest-based/bench_algos.py | 31 +++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index d40b5130827..f88d93c3f17 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -271,9 +271,8 @@ def bench_from_networkx(benchmark, graph_obj): # normalized_param_values = [True, False] -# k_param_values = [10, 100] normalized_param_values = [True] -k_param_values = [10] +k_param_values = [10, 100, 1000] @pytest.mark.parametrize( @@ -282,6 +281,10 @@ def bench_from_networkx(benchmark, graph_obj): @pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + if k > G.number_of_nodes(): + pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}") + result = benchmark.pedantic( target=backend_wrapper(nx.betweenness_centrality), args=(G,), @@ -305,6 +308,10 @@ def bench_edge_betweenness_centrality( benchmark, graph_obj, backend_wrapper, normalized, k ): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + if k > G.number_of_nodes(): + pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}") + result = benchmark.pedantic( target=backend_wrapper(nx.edge_betweenness_centrality), args=(G,), @@ -473,6 +480,26 @@ def bench_pagerank_personalized(benchmark, graph_obj, backend_wrapper): assert type(result) is dict +def bench_shortest_path(benchmark, graph_obj, backend_wrapper): + """ + This passes in the source node with the highest degree, but no target. + """ + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + + result = benchmark.pedantic( + target=backend_wrapper(nx.shortest_path), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) node = get_highest_degree_node(graph_obj) From f294951b6719ec80e13aa75b2229754071149d75 Mon Sep 17 00:00:00 2001 From: Rick Ratzel Date: Mon, 19 Aug 2024 14:13:25 -0500 Subject: [PATCH 2/2] Initial version of nx-cugraph benchmark tools for running pytest with bench_algos.py. --- .../pytest-based/ensure_dataset_accessible.py | 22 +++ .../nx-cugraph/pytest-based/gen_table.py | 177 ++++++++++++++++++ .../nx-cugraph/pytest-based/run-main.sh | 46 +++++ 3 files changed, 245 insertions(+) create mode 100644 benchmarks/nx-cugraph/pytest-based/ensure_dataset_accessible.py create mode 100644 benchmarks/nx-cugraph/pytest-based/gen_table.py create mode 100755 benchmarks/nx-cugraph/pytest-based/run-main.sh diff --git a/benchmarks/nx-cugraph/pytest-based/ensure_dataset_accessible.py b/benchmarks/nx-cugraph/pytest-based/ensure_dataset_accessible.py new file mode 100644 index 00000000000..d8fc18e166c --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/ensure_dataset_accessible.py @@ -0,0 +1,22 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys + +import cugraph.datasets as cgds + +dataset = sys.argv[1].replace("-", "_") +dataset_obj = getattr(cgds, dataset) + +if not dataset_obj.get_path().exists(): + dataset_obj.get_edgelist(download=True) diff --git a/benchmarks/nx-cugraph/pytest-based/gen_table.py b/benchmarks/nx-cugraph/pytest-based/gen_table.py new file mode 100644 index 00000000000..ef7440c1d77 --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/gen_table.py @@ -0,0 +1,177 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import pathlib +import json + +logs_dir = pathlib.Path("logs") + +dataset_patt = re.compile(".*ds=([\w-]+).*") +backend_patt = re.compile(".*backend=(\w+).*") +k_patt = re.compile(".*k=(10*).*") + +# Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset +benchmarks = {} + + +def compute_perf_vals(cugraph_runtime, networkx_runtime): + speedup_string = f"{networkx_runtime / cugraph_runtime:.3f}X" + delta = networkx_runtime - cugraph_runtime + if abs(delta) < 1: + if abs(delta) < 0.001: + units = "us" + delta *= 1e6 + else: + units = "ms" + delta *= 1e3 + else: + units = "s" + delta_string = f"{delta:.3f}{units}" + + return (speedup_string, delta_string) + + +# Populate benchmarks dir from .json files +for json_file in logs_dir.glob("*.json"): + # print(f"READING {json_file}") + try: + data = json.loads(open(json_file).read()) + except json.decoder.JSONDecodeError: + # print(f"PROBLEM READING {json_file}, skipping.") + continue + + for benchmark_run in data["benchmarks"]: + # example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]" + name = benchmark_run["name"] + + algo_name = name.split("[")[0] + if algo_name.startswith("bench_"): + algo_name = algo_name[6:] + # special case for betweenness_centrality + match = k_patt.match(name) + if match is not None: + algo_name += f", k={match.group(1)}" + + match = dataset_patt.match(name) + if match is None: + raise RuntimeError( + f"benchmark name {name} in file {json_file} has an unexpected format" + ) + dataset = match.group(1) + if dataset.endswith("-backend"): + dataset = dataset[:-8] + + match = backend_patt.match(name) + if match is None: + raise RuntimeError( + f"benchmark name {name} in file {json_file} has an unexpected format" + ) + backend = match.group(1) + if backend == "None": + backend = "networkx" + + runtime = benchmark_run["stats"]["mean"] + benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime + + +# dump HTML table +ordered_datasets = [ + "netscience", + "email_Eu_core", + "cit-patents", + "hollywood", + "soc-livejournal1", +] + +print( + """ + + + + + + + + """ +) +for ds in ordered_datasets: + print(f" ") +print( + """ + + +""" +) + + +for algo_name in benchmarks: + algo_runs = benchmarks[algo_name] + print(" ") + print(f" ") + + # Proceed only if any results are present for both cugraph and NX + if "cugraph" in algo_runs and "networkx" in algo_runs: + cugraph_algo_runs = algo_runs["cugraph"] + networkx_algo_runs = algo_runs["networkx"] + datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs) + + # populate the table with speedup results for each dataset in the order + # specified in ordered_datasets. If results for a run using a dataset + # are not present for both cugraph and NX, output an empty cell. + for dataset in ordered_datasets: + if dataset in datasets_in_both: + cugraph_runtime = cugraph_algo_runs[dataset] + networkx_runtime = networkx_algo_runs[dataset] + (speedup, runtime_delta) = compute_perf_vals( + cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime + ) + print(f" ") + else: + print(f" ") + + # If a comparison between cugraph and NX cannot be made, output empty cells + # for each dataset + else: + for _ in range(len(ordered_datasets)): + print(" ") + + print(" ") + +print( + """ + +
{ds}
{algo_name}{speedup}
{runtime_delta}
+ +""" +) diff --git a/benchmarks/nx-cugraph/pytest-based/run-main.sh b/benchmarks/nx-cugraph/pytest-based/run-main.sh new file mode 100755 index 00000000000..47e39ce85e2 --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/run-main.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Runs benchmarks for the 24.02 algos. +# Pass either a or b or both. This is useful for separating batches of runs on different GPUs: +# CUDA_VISIBLE_DEVICES=1 run-2402.sh b + +export RAPIDS_DATASET_ROOT_DIR=/datasets/cugraph +mkdir -p logs + +algos=" + pagerank + betweenness_centrality + louvain + shortest_path + weakly_connected_components + triangles + bfs_predecessors +" + +datasets=" + netscience + email_Eu_core + cit_patents + hollywood + soc-livejournal +" + +# None backend is default networkx +# cugraph-preconvert backend is nx-cugraph +backends=" + None + cugraph-preconverted +" + +for dataset in $datasets; do + python ensure_dataset_accessible.py $dataset + for backend in $backends; do + for algo in $algos; do + name="${backend}__${algo}__${dataset}" + echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py" + pytest -sv -k "$backend and $dataset and bench_$algo and not 1000" --benchmark-json="logs/${name}.json" bench_algos.py 2>&1 | tee "logs/${name}.out" + done + done +done