diff --git a/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py b/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py index ef7440c1d77..5ed16a5cd75 100644 --- a/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py +++ b/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py @@ -11,19 +11,11 @@ # See the License for the specific language governing permissions and # limitations under the License. + import re import pathlib import json -logs_dir = pathlib.Path("logs") - -dataset_patt = re.compile(".*ds=([\w-]+).*") -backend_patt = re.compile(".*backend=(\w+).*") -k_patt = re.compile(".*k=(10*).*") - -# Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset -benchmarks = {} - def compute_perf_vals(cugraph_runtime, networkx_runtime): speedup_string = f"{networkx_runtime / cugraph_runtime:.3f}X" @@ -42,136 +34,165 @@ def compute_perf_vals(cugraph_runtime, networkx_runtime): return (speedup_string, delta_string) -# Populate benchmarks dir from .json files -for json_file in logs_dir.glob("*.json"): - # print(f"READING {json_file}") - try: - data = json.loads(open(json_file).read()) - except json.decoder.JSONDecodeError: - # print(f"PROBLEM READING {json_file}, skipping.") - continue - - for benchmark_run in data["benchmarks"]: - # example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]" - name = benchmark_run["name"] - - algo_name = name.split("[")[0] - if algo_name.startswith("bench_"): - algo_name = algo_name[6:] - # special case for betweenness_centrality - match = k_patt.match(name) - if match is not None: - algo_name += f", k={match.group(1)}" - - match = dataset_patt.match(name) - if match is None: - raise RuntimeError( - f"benchmark name {name} in file {json_file} has an unexpected format" - ) - dataset = match.group(1) - if dataset.endswith("-backend"): - dataset = dataset[:-8] - - match = backend_patt.match(name) - if match is None: - raise RuntimeError( - f"benchmark name {name} in file {json_file} has an unexpected format" - ) - backend = match.group(1) - if backend == "None": - backend = "networkx" - - runtime = benchmark_run["stats"]["mean"] - benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime - - -# dump HTML table -ordered_datasets = [ - "netscience", - "email_Eu_core", - "cit-patents", - "hollywood", - "soc-livejournal1", -] - -print( - """ - - - - - - - - """ -) -for ds in ordered_datasets: - print(f" ") -print( - """ - - -""" -) - - -for algo_name in benchmarks: - algo_runs = benchmarks[algo_name] - print(" ") - print(f" ") - - # Proceed only if any results are present for both cugraph and NX - if "cugraph" in algo_runs and "networkx" in algo_runs: - cugraph_algo_runs = algo_runs["cugraph"] - networkx_algo_runs = algo_runs["networkx"] - datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs) - - # populate the table with speedup results for each dataset in the order - # specified in ordered_datasets. If results for a run using a dataset - # are not present for both cugraph and NX, output an empty cell. - for dataset in ordered_datasets: - if dataset in datasets_in_both: - cugraph_runtime = cugraph_algo_runs[dataset] - networkx_runtime = networkx_algo_runs[dataset] - (speedup, runtime_delta) = compute_perf_vals( - cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime +if __name__ == "__main__": + logs_dir = pathlib.Path("logs") + + dataset_patt = re.compile(".*ds=([\w-]+).*") + backend_patt = re.compile(".*backend=(\w+).*") + k_patt = re.compile(".*k=(10*).*") + + # Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset + benchmarks = {} + + # Populate benchmarks dir from .json files + for json_file in logs_dir.glob("*.json"): + # print(f"READING {json_file}") + try: + data = json.loads(open(json_file).read()) + except json.decoder.JSONDecodeError: + # print(f"PROBLEM READING {json_file}, skipping.") + continue + + for benchmark_run in data["benchmarks"]: + # example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]" + name = benchmark_run["name"] + + algo_name = name.split("[")[0] + if algo_name.startswith("bench_"): + algo_name = algo_name[6:] + # special case for betweenness_centrality + match = k_patt.match(name) + if match is not None: + algo_name += f", k={match.group(1)}" + + match = dataset_patt.match(name) + if match is None: + raise RuntimeError( + f"benchmark name {name} in file {json_file} has an unexpected format" ) - print(f" ") - else: - print(f" ") - - # If a comparison between cugraph and NX cannot be made, output empty cells - # for each dataset - else: - for _ in range(len(ordered_datasets)): - print(" ") - - print(" ") - -print( + dataset = match.group(1) + if dataset.endswith("-backend"): + dataset = dataset[:-8] + + match = backend_patt.match(name) + if match is None: + raise RuntimeError( + f"benchmark name {name} in file {json_file} has an unexpected format" + ) + backend = match.group(1) + if backend == "None": + backend = "networkx" + + runtime = benchmark_run["stats"]["mean"] + benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime + # breakpoint() + + # dump HTML table + ordered_datasets = [ + "netscience", + "email_Eu_core", + "cit-patents", + "hollywood", + "soc-livejournal1", + ] + + print( + """ + + + + +
{ds}
{algo_name}{speedup}
{runtime_delta}
+ + + """ + ) + for ds in ordered_datasets: + print(f" ") + print( + """ + + + """ + ) + + + for algo_name in benchmarks: + algo_runs = benchmarks[algo_name] + print(" ") + print(f" ") + # Proceed only if any results are present for both cugraph and NX + if "cugraph" in algo_runs and "networkx" in algo_runs: + cugraph_algo_runs = algo_runs["cugraph"] + networkx_algo_runs = algo_runs["networkx"] + datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs) + + # populate the table with speedup results for each dataset in the order + # specified in ordered_datasets. If results for a run using a dataset + # are not present for both cugraph and NX, output an empty cell. + for dataset in ordered_datasets: + if dataset in datasets_in_both: + cugraph_runtime = cugraph_algo_runs[dataset] + networkx_runtime = networkx_algo_runs[dataset] + (speedup, runtime_delta) = compute_perf_vals( + cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime + ) + print(f" ") + else: + print(f" ") + + # If a comparison between cugraph and NX cannot be made, output empty cells + # for each dataset + else: + for _ in range(len(ordered_datasets)): + print(" ") + + print(" ") + + print( + """ + +
{ds}
{algo_name}{speedup}
{runtime_delta}
+ + """ - - - -""" -) + ) diff --git a/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py b/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py index 259a78cb7c0..5a0a15da8ee 100644 --- a/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py +++ b/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py @@ -25,9 +25,11 @@ import cugraph.datasets as cgds -# download and store dataset (csv) by using the Datasets API -dataset = sys.argv[1].replace("-", "_") -dataset_obj = getattr(cgds, dataset) -if not dataset_obj.get_path().exists(): - dataset_obj.get_edgelist(download=True) +if __name__ == "__main__": + # download and store dataset (csv) by using the Datasets API + dataset = sys.argv[1].replace("-", "_") + dataset_obj = getattr(cgds, dataset) + + if not dataset_obj.get_path().exists(): + dataset_obj.get_edgelist(download=True) diff --git a/benchmarks/nx-cugraph/pytest-based/run-gap-benchmarks.sh b/benchmarks/nx-cugraph/pytest-based/run-gap-benchmarks.sh index 6a7102f762b..7099362426e 100755 --- a/benchmarks/nx-cugraph/pytest-based/run-gap-benchmarks.sh +++ b/benchmarks/nx-cugraph/pytest-based/run-gap-benchmarks.sh @@ -27,6 +27,9 @@ algos=" triangles bfs_predecessors " +algos=" + weakly_connected_components +" datasets=" netscience email_Eu_core @@ -34,20 +37,38 @@ datasets=" hollywood soc-livejournal " +datasets=" + netscience + email_Eu_core + cit_patents + hollywood + soc-livejournal +" +datasets=" + hollywood +" + # None backend is default networkx # cugraph-preconvert backend is nx-cugraph backends=" None cugraph-preconverted " +backends=" + cugraph-preconverted +" -for dataset in $datasets; do - python ensure_dataset_accessible.py $dataset - for backend in $backends; do - for algo in $algos; do +for algo in $algos; do + for dataset in $datasets; do + python get_graph_bench_dataset.py $dataset + for backend in $backends; do name="${backend}__${algo}__${dataset}" + # echo "Running: $backend, $dataset, bench_$algo" echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py" - pytest -sv -k "$backend and $dataset and bench_$algo and not 1000" --benchmark-json="logs/${name}.json" bench_algos.py 2>&1 | tee "logs/${name}.out" + pytest -sv \ + -k "$backend and $dataset and bench_$algo and not 1000" \ + --benchmark-json="logs/${name}.json" \ + bench_algos.py 2>&1 | tee "logs/${name}.out" done done done