Skip to content

Commit

Permalink
Add files
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-rliu committed Aug 26, 2024
1 parent 08520a0 commit 52e13fa
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 150 deletions.
301 changes: 161 additions & 140 deletions benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,19 +11,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.


import re
import pathlib
import json

logs_dir = pathlib.Path("logs")

dataset_patt = re.compile(".*ds=([\w-]+).*")
backend_patt = re.compile(".*backend=(\w+).*")
k_patt = re.compile(".*k=(10*).*")

# Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset
benchmarks = {}


def compute_perf_vals(cugraph_runtime, networkx_runtime):
speedup_string = f"{networkx_runtime / cugraph_runtime:.3f}X"
Expand All @@ -42,136 +34,165 @@ def compute_perf_vals(cugraph_runtime, networkx_runtime):
return (speedup_string, delta_string)


# Populate benchmarks dir from .json files
for json_file in logs_dir.glob("*.json"):
# print(f"READING {json_file}")
try:
data = json.loads(open(json_file).read())
except json.decoder.JSONDecodeError:
# print(f"PROBLEM READING {json_file}, skipping.")
continue

for benchmark_run in data["benchmarks"]:
# example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]"
name = benchmark_run["name"]

algo_name = name.split("[")[0]
if algo_name.startswith("bench_"):
algo_name = algo_name[6:]
# special case for betweenness_centrality
match = k_patt.match(name)
if match is not None:
algo_name += f", k={match.group(1)}"

match = dataset_patt.match(name)
if match is None:
raise RuntimeError(
f"benchmark name {name} in file {json_file} has an unexpected format"
)
dataset = match.group(1)
if dataset.endswith("-backend"):
dataset = dataset[:-8]

match = backend_patt.match(name)
if match is None:
raise RuntimeError(
f"benchmark name {name} in file {json_file} has an unexpected format"
)
backend = match.group(1)
if backend == "None":
backend = "networkx"

runtime = benchmark_run["stats"]["mean"]
benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime


# dump HTML table
ordered_datasets = [
"netscience",
"email_Eu_core",
"cit-patents",
"hollywood",
"soc-livejournal1",
]

print(
"""
<html>
<head>
<style>
table {
table-layout: fixed;
width: 100%;
border-collapse: collapse;
}
tbody tr:nth-child(odd) {
background-color: #ffffff;
}
tbody tr:nth-child(even) {
background-color: #d3d3d3;
}
tbody td {
text-align: center;
}
th,
td {
padding: 10px;
}
</style>
</head>
<table>
<thead>
<tr>
<th></th>"""
)
for ds in ordered_datasets:
print(f" <th>{ds}</th>")
print(
""" </tr>
</thead>
<tbody>
"""
)


for algo_name in benchmarks:
algo_runs = benchmarks[algo_name]
print(" <tr>")
print(f" <td>{algo_name}</td>")

# Proceed only if any results are present for both cugraph and NX
if "cugraph" in algo_runs and "networkx" in algo_runs:
cugraph_algo_runs = algo_runs["cugraph"]
networkx_algo_runs = algo_runs["networkx"]
datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs)

# populate the table with speedup results for each dataset in the order
# specified in ordered_datasets. If results for a run using a dataset
# are not present for both cugraph and NX, output an empty cell.
for dataset in ordered_datasets:
if dataset in datasets_in_both:
cugraph_runtime = cugraph_algo_runs[dataset]
networkx_runtime = networkx_algo_runs[dataset]
(speedup, runtime_delta) = compute_perf_vals(
cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime
if __name__ == "__main__":
logs_dir = pathlib.Path("logs")

dataset_patt = re.compile(".*ds=([\w-]+).*")
backend_patt = re.compile(".*backend=(\w+).*")
k_patt = re.compile(".*k=(10*).*")

# Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset
benchmarks = {}

# Populate benchmarks dir from .json files
for json_file in logs_dir.glob("*.json"):
# print(f"READING {json_file}")
try:
data = json.loads(open(json_file).read())
except json.decoder.JSONDecodeError:
# print(f"PROBLEM READING {json_file}, skipping.")
continue

for benchmark_run in data["benchmarks"]:
# example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]"
name = benchmark_run["name"]

algo_name = name.split("[")[0]
if algo_name.startswith("bench_"):
algo_name = algo_name[6:]
# special case for betweenness_centrality
match = k_patt.match(name)
if match is not None:
algo_name += f", k={match.group(1)}"

match = dataset_patt.match(name)
if match is None:
raise RuntimeError(
f"benchmark name {name} in file {json_file} has an unexpected format"
)
print(f" <td>{speedup}<br>{runtime_delta}</td>")
else:
print(f" <td></td>")

# If a comparison between cugraph and NX cannot be made, output empty cells
# for each dataset
else:
for _ in range(len(ordered_datasets)):
print(" <td></td>")

print(" </tr>")

print(
dataset = match.group(1)
if dataset.endswith("-backend"):
dataset = dataset[:-8]

match = backend_patt.match(name)
if match is None:
raise RuntimeError(
f"benchmark name {name} in file {json_file} has an unexpected format"
)
backend = match.group(1)
if backend == "None":
backend = "networkx"

runtime = benchmark_run["stats"]["mean"]
benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime
# breakpoint()

# dump HTML table
ordered_datasets = [
"netscience",
"email_Eu_core",
"cit-patents",
"hollywood",
"soc-livejournal1",
]

print(
"""
<html>
<head>
<style>
table {
table-layout: fixed;
width: 100%;
border-collapse: collapse;
}
tbody tr:nth-child(odd) {
background-color: #ffffff;
}
tbody tr:nth-child(even) {
background-color: #d3d3d3;
}
tbody td {
text-align: center;
color: black;
}
th,
td {
padding: 10px;
}
.footer {
background-color: #f1f1f1;
padding: 10px;
text-align: center;
font-size: 14px;
color: #333;
left: 0;
bottom: 0;
width: 100%;
}
.footer a {
color: #007bff;
text-decoration: none;
}
.footer a:hover {
text-decoration: underline;
}
</style>
</head>
<table>
<thead>
<tr>
<th></th>"""
)
for ds in ordered_datasets:
print(f" <th>{ds}</th>")
print(
""" </tr>
</thead>
<tbody>
"""
)


for algo_name in benchmarks:
algo_runs = benchmarks[algo_name]
print(" <tr>")
print(f" <td>{algo_name}</td>")
# Proceed only if any results are present for both cugraph and NX
if "cugraph" in algo_runs and "networkx" in algo_runs:
cugraph_algo_runs = algo_runs["cugraph"]
networkx_algo_runs = algo_runs["networkx"]
datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs)

# populate the table with speedup results for each dataset in the order
# specified in ordered_datasets. If results for a run using a dataset
# are not present for both cugraph and NX, output an empty cell.
for dataset in ordered_datasets:
if dataset in datasets_in_both:
cugraph_runtime = cugraph_algo_runs[dataset]
networkx_runtime = networkx_algo_runs[dataset]
(speedup, runtime_delta) = compute_perf_vals(
cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime
)
print(f" <td>{speedup}<br>{runtime_delta}</td>")
else:
print(f" <td></td>")

# If a comparison between cugraph and NX cannot be made, output empty cells
# for each dataset
else:
for _ in range(len(ordered_datasets)):
print(" <td></td>")

print(" </tr>")

print(
"""
</tbody>
</table>
<div class="footer">
</div>
</html>
"""
</tbody>
</table>
</html>
"""
)
)
12 changes: 7 additions & 5 deletions benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,11 @@

import cugraph.datasets as cgds

# download and store dataset (csv) by using the Datasets API
dataset = sys.argv[1].replace("-", "_")
dataset_obj = getattr(cgds, dataset)

if not dataset_obj.get_path().exists():
dataset_obj.get_edgelist(download=True)
if __name__ == "__main__":
# download and store dataset (csv) by using the Datasets API
dataset = sys.argv[1].replace("-", "_")
dataset_obj = getattr(cgds, dataset)

if not dataset_obj.get_path().exists():
dataset_obj.get_edgelist(download=True)
31 changes: 26 additions & 5 deletions benchmarks/nx-cugraph/pytest-based/run-gap-benchmarks.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,27 +27,48 @@ algos="
triangles
bfs_predecessors
"
algos="
weakly_connected_components
"
datasets="
netscience
email_Eu_core
cit_patents
hollywood
soc-livejournal
"
datasets="
netscience
email_Eu_core
cit_patents
hollywood
soc-livejournal
"
datasets="
hollywood
"

# None backend is default networkx
# cugraph-preconvert backend is nx-cugraph
backends="
None
cugraph-preconverted
"
backends="
cugraph-preconverted
"

for dataset in $datasets; do
python ensure_dataset_accessible.py $dataset
for backend in $backends; do
for algo in $algos; do
for algo in $algos; do
for dataset in $datasets; do
python get_graph_bench_dataset.py $dataset
for backend in $backends; do
name="${backend}__${algo}__${dataset}"
# echo "Running: $backend, $dataset, bench_$algo"
echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"
pytest -sv -k "$backend and $dataset and bench_$algo and not 1000" --benchmark-json="logs/${name}.json" bench_algos.py 2>&1 | tee "logs/${name}.out"
pytest -sv \
-k "$backend and $dataset and bench_$algo and not 1000" \
--benchmark-json="logs/${name}.json" \
bench_algos.py 2>&1 | tee "logs/${name}.out"
done
done
done

0 comments on commit 52e13fa

Please sign in to comment.