Add files

rapidsai · Aug 26, 2024 · 52e13fa · 52e13fa
1 parent 08520a0
commit 52e13fa
Show file tree

Hide file tree

Showing 3 changed files with 194 additions and 150 deletions.
diff --git a/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py b/benchmarks/nx-cugraph/pytest-based/create_results_summary_page.py
@@ -11,19 +11,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+
 import re
 import pathlib
 import json
 
-logs_dir = pathlib.Path("logs")
-
-dataset_patt = re.compile(".*ds=([\w-]+).*")
-backend_patt = re.compile(".*backend=(\w+).*")
-k_patt = re.compile(".*k=(10*).*")
-
-# Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset
-benchmarks = {}
-
 
 def compute_perf_vals(cugraph_runtime, networkx_runtime):
     speedup_string = f"{networkx_runtime / cugraph_runtime:.3f}X"
@@ -42,136 +34,165 @@ def compute_perf_vals(cugraph_runtime, networkx_runtime):
     return (speedup_string, delta_string)
 
 
-# Populate benchmarks dir from .json files
-for json_file in logs_dir.glob("*.json"):
-    # print(f"READING {json_file}")
-    try:
-        data = json.loads(open(json_file).read())
-    except json.decoder.JSONDecodeError:
-        # print(f"PROBLEM READING {json_file}, skipping.")
-        continue
-
-    for benchmark_run in data["benchmarks"]:
-        # example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]"
-        name = benchmark_run["name"]
-
-        algo_name = name.split("[")[0]
-        if algo_name.startswith("bench_"):
-            algo_name = algo_name[6:]
-        # special case for betweenness_centrality
-        match = k_patt.match(name)
-        if match is not None:
-            algo_name += f", k={match.group(1)}"
-
-        match = dataset_patt.match(name)
-        if match is None:
-            raise RuntimeError(
-                f"benchmark name {name} in file {json_file} has an unexpected format"
-            )
-        dataset = match.group(1)
-        if dataset.endswith("-backend"):
-            dataset = dataset[:-8]
-
-        match = backend_patt.match(name)
-        if match is None:
-            raise RuntimeError(
-                f"benchmark name {name} in file {json_file} has an unexpected format"
-            )
-        backend = match.group(1)
-        if backend == "None":
-            backend = "networkx"
-
-        runtime = benchmark_run["stats"]["mean"]
-        benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime
-
-
-# dump HTML table
-ordered_datasets = [
-    "netscience",
-    "email_Eu_core",
-    "cit-patents",
-    "hollywood",
-    "soc-livejournal1",
-]
-
-print(
-    """
-<html>
-<head>
-   <style>
-      table {
-        table-layout: fixed;
-        width: 100%;
-        border-collapse: collapse;
-      }
-      tbody tr:nth-child(odd) {
-        background-color: #ffffff;
-      }
-
-      tbody tr:nth-child(even) {
-        background-color: #d3d3d3;
-      }
-      tbody td {
-        text-align: center;
-      }
-      th,
-      td {
-        padding: 10px;
-      }
-   </style>
-</head>
-<table>
-   <thead>
-   <tr>
-      <th></th>"""
-)
-for ds in ordered_datasets:
-    print(f"      <th>{ds}</th>")
-print(
-    """   </tr>
-   </thead>
-   <tbody>
-"""
-)
-
-
-for algo_name in benchmarks:
-    algo_runs = benchmarks[algo_name]
-    print("   <tr>")
-    print(f"      <td>{algo_name}</td>")
-
-    # Proceed only if any results are present for both cugraph and NX
-    if "cugraph" in algo_runs and "networkx" in algo_runs:
-        cugraph_algo_runs = algo_runs["cugraph"]
-        networkx_algo_runs = algo_runs["networkx"]
-        datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs)
-
-        # populate the table with speedup results for each dataset in the order
-        # specified in ordered_datasets. If results for a run using a dataset
-        # are not present for both cugraph and NX, output an empty cell.
-        for dataset in ordered_datasets:
-            if dataset in datasets_in_both:
-                cugraph_runtime = cugraph_algo_runs[dataset]
-                networkx_runtime = networkx_algo_runs[dataset]
-                (speedup, runtime_delta) = compute_perf_vals(
-                    cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime
+if __name__ == "__main__":
+    logs_dir = pathlib.Path("logs")
+
+    dataset_patt = re.compile(".*ds=([\w-]+).*")
+    backend_patt = re.compile(".*backend=(\w+).*")
+    k_patt = re.compile(".*k=(10*).*")
+
+    # Organize all benchmark runs by the following hierarchy: algo -> backend -> dataset
+    benchmarks = {}
+
+    # Populate benchmarks dir from .json files
+    for json_file in logs_dir.glob("*.json"):
+        # print(f"READING {json_file}")
+        try:
+            data = json.loads(open(json_file).read())
+        except json.decoder.JSONDecodeError:
+            # print(f"PROBLEM READING {json_file}, skipping.")
+            continue
+
+        for benchmark_run in data["benchmarks"]:
+            # example name: "bench_triangles[ds=netscience-backend=cugraph-preconverted]"
+            name = benchmark_run["name"]
+
+            algo_name = name.split("[")[0]
+            if algo_name.startswith("bench_"):
+                algo_name = algo_name[6:]
+            # special case for betweenness_centrality
+            match = k_patt.match(name)
+            if match is not None:
+                algo_name += f", k={match.group(1)}"
+
+            match = dataset_patt.match(name)
+            if match is None:
+                raise RuntimeError(
+                    f"benchmark name {name} in file {json_file} has an unexpected format"
                 )
-                print(f"      <td>{speedup}<br>{runtime_delta}</td>")
-            else:
-                print(f"      <td></td>")
-
-    # If a comparison between cugraph and NX cannot be made, output empty cells
-    # for each dataset
-    else:
-        for _ in range(len(ordered_datasets)):
-            print("      <td></td>")
-
-    print("   </tr>")
-
-print(
+            dataset = match.group(1)
+            if dataset.endswith("-backend"):
+                dataset = dataset[:-8]
+
+            match = backend_patt.match(name)
+            if match is None:
+                raise RuntimeError(
+                    f"benchmark name {name} in file {json_file} has an unexpected format"
+                )
+            backend = match.group(1)
+            if backend == "None":
+                backend = "networkx"
+
+            runtime = benchmark_run["stats"]["mean"]
+            benchmarks.setdefault(algo_name, {}).setdefault(backend, {})[dataset] = runtime
+    # breakpoint()
+
+    # dump HTML table
+    ordered_datasets = [
+        "netscience",
+        "email_Eu_core",
+        "cit-patents",
+        "hollywood",
+        "soc-livejournal1",
+    ]
+
+    print(
+        """
+    <html>
+    <head>
+    <style>
+        table {
+            table-layout: fixed;
+            width: 100%;
+            border-collapse: collapse;
+        }
+        tbody tr:nth-child(odd) {
+            background-color: #ffffff;
+        }
+        tbody tr:nth-child(even) {
+            background-color: #d3d3d3;
+        }
+        tbody td {
+            text-align: center;
+            color: black;
+        }
+        th,
+        td {
+            padding: 10px;
+        }
+        .footer {
+            background-color: #f1f1f1;
+            padding: 10px;
+            text-align: center;
+            font-size: 14px;
+            color: #333;
+            left: 0;
+            bottom: 0;
+            width: 100%;
+        }
+        .footer a {
+            color: #007bff;
+            text-decoration: none;
+        }
+        .footer a:hover {
+            text-decoration: underline;
+        }
+    </style>
+    </head>
+    <table>
+    <thead>
+    <tr>
+        <th></th>"""
+    )
+    for ds in ordered_datasets:
+        print(f"      <th>{ds}</th>")
+    print(
+        """   </tr>
+    </thead>
+    <tbody>
+    """
+    )
+
+
+    for algo_name in benchmarks:
+        algo_runs = benchmarks[algo_name]
+        print("   <tr>")
+        print(f"      <td>{algo_name}</td>")
+        # Proceed only if any results are present for both cugraph and NX
+        if "cugraph" in algo_runs and "networkx" in algo_runs:
+            cugraph_algo_runs = algo_runs["cugraph"]
+            networkx_algo_runs = algo_runs["networkx"]
+            datasets_in_both = set(cugraph_algo_runs).intersection(networkx_algo_runs)
+
+            # populate the table with speedup results for each dataset in the order
+            # specified in ordered_datasets. If results for a run using a dataset
+            # are not present for both cugraph and NX, output an empty cell.
+            for dataset in ordered_datasets:
+                if dataset in datasets_in_both:
+                    cugraph_runtime = cugraph_algo_runs[dataset]
+                    networkx_runtime = networkx_algo_runs[dataset]
+                    (speedup, runtime_delta) = compute_perf_vals(
+                        cugraph_runtime=cugraph_runtime, networkx_runtime=networkx_runtime
+                    )
+                    print(f"      <td>{speedup}<br>{runtime_delta}</td>")
+                else:
+                    print(f"      <td></td>")
+
+        # If a comparison between cugraph and NX cannot be made, output empty cells
+        # for each dataset
+        else:
+            for _ in range(len(ordered_datasets)):
+                print("      <td></td>")
+
+        print("   </tr>")
+
+    print(
+        """
+    </tbody>
+    </table>
+    <div class="footer">
+        
+    </div>
+    </html>
     """
-   </tbody>
-</table>
-</html>
-"""
-)
+    )
diff --git a/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py b/benchmarks/nx-cugraph/pytest-based/get_graph_bench_dataset.py
@@ -25,9 +25,11 @@
 
 import cugraph.datasets as cgds
 
-# download and store dataset (csv) by using the Datasets API
-dataset = sys.argv[1].replace("-", "_")
-dataset_obj = getattr(cgds, dataset)
 
-if not dataset_obj.get_path().exists():
-    dataset_obj.get_edgelist(download=True)
+if __name__ == "__main__":
+    # download and store dataset (csv) by using the Datasets API
+    dataset = sys.argv[1].replace("-", "_")
+    dataset_obj = getattr(cgds, dataset)
+
+    if not dataset_obj.get_path().exists():
+        dataset_obj.get_edgelist(download=True)
diff --git a/benchmarks/nx-cugraph/pytest-based/run-gap-benchmarks.sh b/benchmarks/nx-cugraph/pytest-based/run-gap-benchmarks.sh
@@ -27,27 +27,48 @@ algos="
     triangles
     bfs_predecessors
 "
+algos="
+    weakly_connected_components
+"
 datasets="
    netscience
    email_Eu_core
    cit_patents
    hollywood
    soc-livejournal
 "
+datasets="
+   netscience
+   email_Eu_core
+   cit_patents
+   hollywood
+   soc-livejournal
+"
+datasets="
+   hollywood
+"
+
 # None backend is default networkx
 # cugraph-preconvert backend is nx-cugraph
 backends="
     None
     cugraph-preconverted
 "
+backends="
+    cugraph-preconverted
+"
 
-for dataset in $datasets; do
-    python ensure_dataset_accessible.py $dataset
-    for backend in $backends; do
-        for algo in $algos; do
+for algo in $algos; do
+    for dataset in $datasets; do
+    python get_graph_bench_dataset.py $dataset
+        for backend in $backends; do
             name="${backend}__${algo}__${dataset}"
+            # echo "Running: $backend, $dataset, bench_$algo"
             echo "RUNNING: \"pytest -sv -k \"$backend and $dataset and bench_$algo and not 1000\" --benchmark-json=\"logs/${name}.json\" bench_algos.py"
-            pytest -sv -k "$backend and $dataset and bench_$algo and not 1000" --benchmark-json="logs/${name}.json" bench_algos.py 2>&1 | tee "logs/${name}.out"
+            pytest -sv \
+                -k "$backend and $dataset and bench_$algo and not 1000" \
+                --benchmark-json="logs/${name}.json" \
+                bench_algos.py 2>&1 | tee "logs/${name}.out"
         done
     done
 done