Skip to content

Commit

Permalink
Merge pull request #19 from rlratzel/branch-24.10-nxcg_benchmarking
Browse files Browse the repository at this point in the history
Merge `rlratzel:branch-24.10-nxcg_benchmarking` into Forked Branch
  • Loading branch information
nv-rliu authored Aug 19, 2024
2 parents 4c26e7a + b626c3c commit f2a0f77
Show file tree
Hide file tree
Showing 4 changed files with 274 additions and 2 deletions.
31 changes: 29 additions & 2 deletions benchmarks/nx-cugraph/pytest-based/bench_algos.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,8 @@ def bench_from_networkx(benchmark, graph_obj):


# normalized_param_values = [True, False]
# k_param_values = [10, 100]
normalized_param_values = [True]
k_param_values = [10]
k_param_values = [10, 100, 1000]


@pytest.mark.parametrize(
Expand All @@ -282,6 +281,10 @@ def bench_from_networkx(benchmark, graph_obj):
@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}")
def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k):
G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)

if k > G.number_of_nodes():
pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")

result = benchmark.pedantic(
target=backend_wrapper(nx.betweenness_centrality),
args=(G,),
Expand All @@ -305,6 +308,10 @@ def bench_edge_betweenness_centrality(
benchmark, graph_obj, backend_wrapper, normalized, k
):
G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)

if k > G.number_of_nodes():
pytest.skip(reason=f"{k=} > {G.number_of_nodes()=}")

result = benchmark.pedantic(
target=backend_wrapper(nx.edge_betweenness_centrality),
args=(G,),
Expand Down Expand Up @@ -473,6 +480,26 @@ def bench_pagerank_personalized(benchmark, graph_obj, backend_wrapper):
assert type(result) is dict


def bench_shortest_path(benchmark, graph_obj, backend_wrapper):
"""
This passes in the source node with the highest degree, but no target.
"""
G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
node = get_highest_degree_node(graph_obj)

result = benchmark.pedantic(
target=backend_wrapper(nx.shortest_path),
args=(G,),
kwargs=dict(
source=node,
),
rounds=rounds,
iterations=iterations,
warmup_rounds=warmup_rounds,
)
assert type(result) is dict


def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper):
G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper)
node = get_highest_degree_node(graph_obj)
Expand Down
22 changes: 22 additions & 0 deletions benchmarks/nx-cugraph/pytest-based/ensure_dataset_accessible.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import sys

import cugraph.datasets as cgds

dataset = sys.argv[1].replace("-", "_")
dataset_obj = getattr(cgds, dataset)

if not dataset_obj.get_path().exists():
dataset_obj.get_edgelist(download=True)
177 changes: 177 additions & 0 deletions benchmarks/nx-cugraph/pytest-based/gen_table.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import re
import pathlib
import json

logs_dir = pathlib.Path("logs")

dataset_patt = re.compile(".*ds=([\w-]+).*")
backend_patt = re.compile(".*backend=(\w+).*")
k_patt = re.compile(".*k=(10*).*")

# Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset
benchmarks = {}


def compute_perf_vals(cugraph_runtime, networkx_runtime):
speedup_string = f"{networkx_runtime / cugraph_runtime:.3f}X"
delta = networkx_runtime - cugraph_runtime
if abs(delta) < 1:
if abs(delta) < 0.001:
units = "us"
delta *= 1e6
else:
units = "ms"
delta *= 1e3
else:
units = "s"
delta_string = f"{delta:.3f}{units}"

return (speedup_string, delta_string)


# Populate benchmarks dir from .json files
for json_file in logs_dir.glob("*.json"):
# print(f"READING {json_file}")
try:
data = json.loads(open(json_file).read())
except json.decoder.JSONDecodeError:
# print(f"PROBLEM READING {json_file}, skipping.")
continue

for benchmark_run in data["benchmarks"]:
# example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]"
name = benchmark_run["name"]

algo_name = name.split("[")[0]
if algo_name.startswith("bench_"):
algo_name = algo_name[6:]
# special case for betweenness_centrality
match = k_patt.match(name)
if match is not None:
algo_name += f", k={match.group(1)}"

match = dataset_patt.match(name)
if match is None:
raise RuntimeError(
f"benchmark name {name} in file {json_file} has an unexpected format"
)
dataset = match.group(1)
if dataset.endswith("-backend"):
dataset = dataset[:-8]

match = backend_patt.match(name)
if match is None:
raise RuntimeError(
f"benchmark name {name} in file {json_file} has an unexpected format"
)
backend = match.group(1)
if backend == "None":
backend = "networkx"

runtime = benchmark_run["stats"]["mean"]
benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime


# dump HTML table
ordered_datasets = [
"netscience",
"email_Eu_core",
"cit-patents",
"hollywood",
"soc-livejournal1",
]

print(
"""
<html>
<head>
<style>
table {
table-layout: fixed;
width: 100%;
border-collapse: collapse;
}
tbody tr:nth-child(odd) {
background-color: #ffffff;
}
tbody tr:nth-child(even) {
background-color: #d3d3d3;
}
tbody td {
text-align: center;
}
th,
td {
padding: 10px;
}
</style>
</head>
<table>
<thead>
<tr>
<th></th>"""
)
for ds in ordered_datasets:
print(f" <th>{ds}</th>")
print(
""" </tr>
</thead>
<tbody>
"""
)


for algo_name in benchmarks:
algo_runs = benchmarks[algo_name]
print(" <tr>")
print(f" <td>{algo_name}</td>")

# Proceed only if any results are present for both cugraph and NX
if "cugraph" in algo_runs and "networkx" in algo_runs:
cugraph_algo_runs = algo_runs["cugraph"]
networkx_algo_runs = algo_runs["networkx"]
datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs)

# populate the table with speedup results for each dataset in the order
# specified in ordered_datasets. If results for a run using a dataset
# are not present for both cugraph and NX, output an empty cell.
for dataset in ordered_datasets:
if dataset in datasets_in_both:
cugraph_runtime = cugraph_algo_runs[dataset]
networkx_runtime = networkx_algo_runs[dataset]
(speedup, runtime_delta) = compute_perf_vals(
cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime
)
print(f" <td>{speedup}<br>{runtime_delta}</td>")
else:
print(f" <td></td>")

# If a comparison between cugraph and NX cannot be made, output empty cells
# for each dataset
else:
for _ in range(len(ordered_datasets)):
print(" <td></td>")

print(" </tr>")

print(
"""
</tbody>
</table>
</html>
"""
)
46 changes: 46 additions & 0 deletions benchmarks/nx-cugraph/pytest-based/run-main.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
#!/bin/bash
#
# Copyright (c) 2024, NVIDIA CORPORATION.
#
# Runs benchmarks for the 24.02 algos.
# Pass either a or b or both. This is useful for separating batches of runs on different GPUs:
# CUDA_VISIBLE_DEVICES=1 run-2402.sh b

export RAPIDS_DATASET_ROOT_DIR=/datasets/cugraph
mkdir -p logs

algos="
pagerank
betweenness_centrality
louvain
shortest_path
weakly_connected_components
triangles
bfs_predecessors
"

datasets="
netscience
email_Eu_core
cit_patents
hollywood
soc-livejournal
"

# None backend is default networkx
# cugraph-preconvert backend is nx-cugraph
backends="
None
cugraph-preconverted
"

for dataset in $datasets; do
python ensure_dataset_accessible.py $dataset
for backend in $backends; do
for algo in $algos; do
name="${backend}__${algo}__${dataset}"
echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"
pytest -sv -k "$backend and $dataset and bench_$algo and not 1000" --benchmark-json="logs/${name}.json" bench_algos.py 2>&1 | tee "logs/${name}.out"
done
done
done

0 comments on commit f2a0f77

Please sign in to comment.