From a0947995eb1556bf46957804bdbd02c3ee4cb0a9 Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Thu, 15 Feb 2024 14:47:30 +0100 Subject: [PATCH] Update SG notebook (#4169) Update SG notebook to handle a handle a couple of cases where the current notebook would fail for unweighted graph and if the graph doesn't have node id 0. **Changes for SG notebook:** - For graphs without weights, run bfs instead of sssp. - For the input graph doesn't have node with id 0, peek an existing node id as seed for BFS and SSP - simplify bookkeeping for run statistics **Changes for MG notebook:** - Call SSSP or BFS based on weighted attribute Authors: - Naim (https://github.com/naimnv) Approvers: - Brad Rees (https://github.com/BradReesWork) - Don Acosta (https://github.com/acostadon) URL: https://github.com/rapidsai/cugraph/pull/4169 --- ...e.ipynb => synth_release_single_gpu.ipynb} | 276 ++++++++---------- .../synth_release_single_node_multi_gpu.ipynb | 43 +-- 2 files changed, 154 insertions(+), 165 deletions(-) rename notebooks/cugraph_benchmarks/{synth_release.ipynb => synth_release_single_gpu.ipynb} (77%) diff --git a/notebooks/cugraph_benchmarks/synth_release.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb similarity index 77% rename from notebooks/cugraph_benchmarks/synth_release.ipynb rename to notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb index 18979f3ecee..1acef5d558b 100644 --- a/notebooks/cugraph_benchmarks/synth_release.ipynb +++ b/notebooks/cugraph_benchmarks/synth_release_single_gpu.ipynb @@ -40,7 +40,8 @@ "| Author | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|---------------------|-----------------|------------------------|\n", "| Don Acosta | 1/12/2023 | Created | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", - "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n" + "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", + "| Naim, Md | 2/12/2024 | Modified | 24.04 nightly | RTX A6000, CUDA 12.0 |\n" ] }, { @@ -124,12 +125,11 @@ "import gc\n", "import os\n", "from time import perf_counter\n", - "import numpy as np\n", - "import math\n", + "import pandas as pd\n", + "from collections import defaultdict\n", "\n", "# rapids\n", "import cugraph\n", - "import cudf\n", "\n", "# NetworkX libraries\n", "import networkx as nx\n", @@ -212,7 +212,7 @@ "\n", "\n", "# Which dataset is to be used\n", - "data = data_full\n" + "data = data_quick\n" ] }, { @@ -518,16 +518,13 @@ "metadata": {}, "outputs": [], "source": [ - "def nx_bfs(_G):\n", - " seed = 0\n", + "def nx_bfs(_G, seed):\n", " t1 = perf_counter()\n", - " nb = nx.bfs_edges(_G, seed)\n", - " nb_list = list(nb) # gen -> list\n", + " _ = nx.bfs_edges(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n", "\n", - "def cu_bfs(_G):\n", - " seed = 0\n", + "def cu_bfs(_G, seed):\n", " t1 = perf_counter()\n", " _ = cugraph.bfs(_G, seed)\n", " t2 = perf_counter() - t1\n", @@ -547,17 +544,21 @@ "metadata": {}, "outputs": [], "source": [ - "def nx_sssp(_G):\n", - " seed = 0\n", + "def nx_sssp(_G, seed):\n", " t1 = perf_counter()\n", - " _ = nx.shortest_path(_G, seed)\n", + " if nx.is_weighted(_G):\n", + " _ = nx.shortest_path(_G, seed)\n", + " else:\n", + " _ = nx.bfs_edges(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n", "\n", - "def cu_sssp(_G):\n", - " seed = 0\n", + "def cu_sssp(_G, seed):\n", " t1 = perf_counter()\n", - " _ = cugraph.sssp(_G, seed)\n", + " if _G.weighted:\n", + " _ = cugraph.sssp(_G, seed)\n", + " else:\n", + " _ = cugraph.bfs(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n" ] @@ -571,6 +572,27 @@ "# Benchmark" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "### Placeholders to collect algorithm run statistics" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "cugraph_algo_run_times = defaultdict(defaultdict)\n", + "nx_algo_run_times = defaultdict(defaultdict)\n", + "cugraph_graph_creation_times = defaultdict()\n", + "nx_graph_creation_times = defaultdict()\n", + "perf_algos = defaultdict(defaultdict)\n", + "perf = defaultdict(defaultdict)" + ] + }, { "cell_type": "code", "execution_count": null, @@ -587,79 +609,50 @@ "metadata": {}, "outputs": [], "source": [ - "# arrays to capture performance gains\n", - "names = []\n", - "algos = []\n", - "graph_create_cu = []\n", - "graph_create_nx = []\n", - "\n", - "# Two dimension data [file, perf]\n", - "time_algo_nx = [] # NetworkX\n", - "time_algo_cu = [] # cuGraph\n", - "perf = []\n", - "perf_algo = []\n", "\n", - "algos.append(\" \")\n", - "\n", - "i = 0\n", - "for k,v in data.items():\n", - " # init all the 2-d arrays\n", - " time_algo_nx.append([])\n", - " time_algo_cu.append([])\n", - " perf.append([])\n", - " perf_algo.append([])\n", - "\n", - " # Saved the file Name\n", - " names.append(k)\n", + "for dataset, scale in data.items():\n", "\n", " # generate data\n", " print(\"------------------------------\")\n", - " print(f'Creating Graph of Scale = {v}')\n", - "\n", - " gdf = generate_data(v)\n", + " print(f'Creating Graph of Scale = {scale}')\n", + " \n", + " gdf = generate_data(scale)\n", " pdf = gdf.to_pandas()\n", - " print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n", "\n", - " # create the graphs\n", + " print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n", + " \n", + " # create cuGraph and NX graphs\n", " g_cu, tcu = create_cu_graph(gdf)\n", " g_nx, tnx = create_nx_graph(pdf)\n", - " graph_create_cu.append(tcu)\n", - " graph_create_nx.append(tnx)\n", + " cugraph_graph_creation_times[dataset] = tcu\n", + " nx_graph_creation_times[dataset] = tnx\n", " del gdf, pdf\n", "\n", " # prep\n", " deg = g_cu.degree()\n", " deg_max = deg['degree'].max()\n", - "\n", " alpha = 1 / deg_max\n", " num_nodes = g_cu.number_of_vertices()\n", - "\n", " del deg\n", " gc.collect()\n", "\n", - " #----- Algorithm order is same as defined at top ----\n", - "\n", " #-- Katz \n", - " print(\"\\tKatz \", end = '')\n", - " if i == 0: \n", - " algos.append(\"Katz\")\n", - "\n", + " algorithm = \"Katz\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_katz(g_nx, alpha)\n", " print(\"c.\", end='')\n", " tc = cu_katz(g_cu, alpha)\n", " print(\"\")\n", - "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " \n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- BC\n", - " print(\"\\tBC k=100 \", end='')\n", - " if i == 0:\n", - " algos.append(\"BC Estimate fixed\")\n", - "\n", + " algorithm = \"BC\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " k = 100\n", " if k > num_nodes:\n", " k = int(num_nodes)\n", @@ -668,80 +661,70 @@ " print(\"c.\", end='')\n", " tc = cu_bc(g_cu, k)\n", " print(\" \")\n", - "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- Louvain\n", - " print(\"\\tLouvain \", end='')\n", - " if i == 0:\n", - " algos.append(\"Louvain\")\n", - "\n", + " algorithm = \"Louvain\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_louvain(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_louvain(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- TC\n", - " print(\"\\tTC \", end='')\n", - " if i == 0:\n", - " algos.append(\"TC\")\n", - "\n", + " algorithm = \"TC\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_tc(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_tc(g_cu)\n", " print(\" \")\n", - "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " \n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- Core Number\n", - " print(\"\\tCore Number \", end='')\n", - " if i == 0:\n", - " algos.append(\"Core Number\")\n", - "\n", + " algorithm = \"Core Number\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_core_num(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_core_num(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- PageRank\n", - " print(\"\\tPageRank \", end='')\n", - " if i == 0:\n", - " algos.append(\"PageRank\")\n", - "\n", + " algorithm = \"PageRank\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", " tx = nx_pagerank(g_nx)\n", " print(\"c.\", end='')\n", " tc = cu_pagerank(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- Jaccard\n", - " print(\"\\tJaccard \", end='')\n", - " if i == 0:\n", - " algos.append(\"Jaccard\")\n", + " algorithm = \"Jaccard\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", "\n", " print(\"n.\", end='')\n", " tx = nx_jaccard(g_nx)\n", @@ -749,46 +732,44 @@ " tc = cu_jaccard(g_cu)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", - " #-- BFS\n", - " print(\"\\tBFS \", end='')\n", - " if i == 0:\n", - " algos.append(\"BFS\")\n", + " # Seed for BFS and SSSP\n", + " nx_seed = list(g_nx.nodes)[0]\n", + " cu_seed = g_cu.nodes().to_pandas().iloc[0]\n", "\n", + " #-- BFS\n", + " algorithm = \"BFS\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_bfs(g_nx)\n", + " tx = nx_bfs(g_nx, seed=nx_seed)\n", " print(\"c.\", end='')\n", - " tc = cu_bfs(g_cu)\n", + " tc = cu_bfs(g_cu, seed=cu_seed)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", " #-- SSSP\n", - " print(\"\\tSSSP \", end='')\n", - " if i == 0:\n", - " algos.append(\"SSP\")\n", - "\n", + " algorithm = \"SSSP\"\n", + " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_sssp(g_nx)\n", + " tx = nx_sssp(g_nx, seed=nx_seed)\n", + "\n", " print(\"c.\", end='')\n", - " tc = cu_sssp(g_cu)\n", + " tc = cu_sssp(g_cu, seed=cu_seed)\n", " print(\" \")\n", "\n", - " time_algo_nx[i].append(tx)\n", - " time_algo_cu[i].append(tc)\n", - " perf_algo[i].append ( (tx/tc) )\n", - " perf[i].append( (tx + tnx) / (tc + tcu) )\n", + " nx_algo_run_times[dataset][algorithm] = tx\n", + " cugraph_algo_run_times[dataset][algorithm] = tc\n", + " perf_algos[dataset][algorithm] = tx/tc \n", + " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", - " # increament count\n", - " i = i + 1\n", - " \n", " del g_cu, g_nx\n", " gc.collect()\n" ] @@ -799,13 +780,11 @@ "metadata": {}, "outputs": [], "source": [ - "#Print results\n", - "print(algos)\n", - "\n", - "for i in range(num_datasets):\n", - " print(f\"{names[i]}\")\n", - " print(f\"{perf[i]}\")\n", - " print(f\"{perf_algo[i]}\")" + "# Speedup\n", + "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX)------\\n\")\n", + "print(pd.DataFrame(perf))\n", + "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX, excluding graph creation time)------\\n\")\n", + "print(pd.DataFrame(perf_algos))" ] }, { @@ -814,15 +793,16 @@ "metadata": {}, "outputs": [], "source": [ - "#Print results\n", - "print(\"\\n------------------------------\")\n", - "print(\"\\tAlgorithm Run times (NX then cuGraph)\\n\")\n", + "# Nx and cuGraph execution times for different algorithms\n", + "nx_and_cugraph_run_times = pd.DataFrame()\n", + "for dataset in cugraph_algo_run_times.keys():\n", + " temp_df = pd.DataFrame({'NX': nx_algo_run_times[dataset], 'cuGraph': cugraph_algo_run_times[dataset]})\n", + " columns = [(dataset, 'cuGraph'), (dataset, 'NX')]\n", + " temp_df.columns = pd.MultiIndex.from_tuples(columns)\n", + " nx_and_cugraph_run_times = pd.concat([temp_df, nx_and_cugraph_run_times], axis=1)\n", "\n", - "print(algos)\n", - "for i in range(num_datasets):\n", - " print(f\"{names[i]}\")\n", - " print(f\"{time_algo_nx[i]}\")\n", - " print(f\"{time_algo_cu[i]}\")" + "print(\"\\n\\t------cuGraph and NX execution times for different algorithms-----\\n\")\n", + "print(nx_and_cugraph_run_times)" ] }, { @@ -855,7 +835,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.15 | packaged by conda-forge | (main, Nov 22 2022, 15:55:03) \n[GCC 10.4.0]" + "version": "3.1.0" }, "vscode": { "interpreter": { diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb index c44f475c441..dd09830160f 100644 --- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb +++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb @@ -39,9 +39,9 @@ " \n", "| Author | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|---------------------|-----------------|------------------------|\n", - "| Don Acosta | 1/12/2023 | Created | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", - "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", - "| Naim, Md | 2/08/2024 | Modified for SNMG | 24.04 nightly | RTX A6000, CUDA 12.0 |\n" + "| Don Acosta | 1/12/2023 | Created | 23.02 nightly | 2xRTX A6000, CUDA 11.7 |\n", + "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | 2xRTX A6000, CUDA 11.7 |\n", + "| Naim, Md | 2/08/2024 | Modified for SNMG | 24.04 nightly | 2xRTX A6000, CUDA 12.0 |\n" ] }, { @@ -590,7 +590,10 @@ "source": [ "def nx_sssp(_G, seed):\n", " t1 = perf_counter()\n", - " _ = nx.shortest_path(_G, seed)\n", + " if nx.is_weighted(_G):\n", + " _ = nx.shortest_path(_G, seed)\n", + " else:\n", + " _ = nx.bfs_edges(_G, seed)\n", " t2 = perf_counter() - t1\n", " return t2\n", "\n", @@ -599,9 +602,17 @@ " t1 = perf_counter()\n", " # SSSP requires weighted graph\n", " if mg:\n", - " _ = cugraph.dask.bfs(_G, seed)\n", + " if _G.weighted: \n", + " _ = cugraph.dask.sssp(_G, seed)\n", + " else:\n", + " _ = cugraph.dask.bfs(_G, seed)\n", + "\n", " else:\n", - " _ = cugraph.bfs(_G, seed)\n", + " if _G.weighted:\n", + " _ = cugraph.ssp(_G, seed)\n", + " else:\n", + " _ = cugraph.bfs(_G, seed)\n", + "\n", " t2 = perf_counter() - t1\n", " return t2\n" ] @@ -646,14 +657,12 @@ "metadata": {}, "outputs": [], "source": [ - "\n", - "\n", - "nx_algo_run_times = defaultdict(defaultdict)\n", "cugraph_algo_run_times = defaultdict(defaultdict)\n", - "perf_algos = defaultdict(defaultdict)\n", - "perf = defaultdict(defaultdict)\n", + "nx_algo_run_times = defaultdict(defaultdict)\n", "cugraph_graph_creation_times = defaultdict()\n", - "nx_graph_creation_times = defaultdict()\n" + "nx_graph_creation_times = defaultdict()\n", + "perf_algos = defaultdict(defaultdict)\n", + "perf = defaultdict(defaultdict)" ] }, { @@ -811,9 +820,9 @@ " algorithm = \"BFS\"\n", " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_bfs(g_nx, nx_seed)\n", + " tx = nx_bfs(g_nx, seed=nx_seed)\n", " print(\"c.\", end='')\n", - " tc = cu_bfs(g_cu, seed = cu_seed, mg=True)\n", + " tc = cu_bfs(g_cu, seed=cu_seed, mg=True)\n", " print(\" \")\n", "\n", " nx_algo_run_times[dataset][algorithm] = tx\n", @@ -825,10 +834,10 @@ " algorithm = \"SSSP\"\n", " print(f\"\\t{algorithm} \", end = '')\n", " print(\"n.\", end='')\n", - " tx = nx_sssp(g_nx, nx_seed)\n", + " tx = nx_sssp(g_nx, seed=nx_seed)\n", "\n", " print(\"c.\", end='')\n", - " tc = cu_sssp(g_cu, seed = cu_seed, mg=True)\n", + " tc = cu_sssp(g_cu, seed=cu_seed, mg=True)\n", " print(\" \")\n", "\n", " nx_algo_run_times[dataset][algorithm] = tx\n", @@ -856,7 +865,7 @@ "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX)------\\n\")\n", "print(pd.DataFrame(perf))\n", "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX, excluding graph creation time)------\\n\")\n", - "print(pd.DataFrame(perf_algos))\n" + "print(pd.DataFrame(perf_algos))" ] }, {