From e7b42a456ad17771465ce7280c45001956af9e36 Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Sat, 13 Jan 2024 02:08:55 +0100
Subject: [PATCH 1/8] Update notebook to include example MG runs

---
 .../cugraph_benchmarks/synth_release.ipynb    | 402 +++++++++++++++---
 1 file changed, 349 insertions(+), 53 deletions(-)

diff --git a/notebooks/cugraph_benchmarks/synth_release.ipynb b/notebooks/cugraph_benchmarks/synth_release.ipynb
index 18979f3ecee..395a65ce73c 100644
--- a/notebooks/cugraph_benchmarks/synth_release.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release.ipynb
@@ -116,7 +116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -131,6 +131,11 @@
     "import cugraph\n",
     "import cudf\n",
     "\n",
+    "# to parallelize with dask\n",
+    "import dask_cudf\n",
+    "from cugraph.dask.common.mg_utils import get_visible_devices\n",
+    "from cugraph.testing.mg_utils import start_dask_client, stop_dask_client\n",
+    "\n",
     "# NetworkX libraries\n",
     "import networkx as nx\n",
     "\n",
@@ -141,7 +146,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -184,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -212,7 +217,7 @@
     "\n",
     "\n",
     "# Which dataset is to be used\n",
-    "data = data_full\n"
+    "data = data_quick\n"
    ]
   },
   {
@@ -225,13 +230,13 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
     "# Data generator \n",
     "#  The result is an edgelist of the size determined by the scale and edge factor\n",
-    "def generate_data(scale, edgefactor=16):\n",
+    "def generate_data(scale, edgefactor=16, mg=False):\n",
     "    _gdf = rmat(\n",
     "        scale,\n",
     "        (2 ** scale) * edgefactor,\n",
@@ -242,12 +247,17 @@
     "        clip_and_flip=False,\n",
     "        scramble_vertex_ids=True,\n",
     "        create_using=None,  # return edgelist instead of Graph instance\n",
-    "        mg=False # determines whether generated data will be used on one or multiple GPUs\n",
+    "        mg=mg # determines whether generated data will be used on one or multiple GPUs\n",
     "        )\n",
     "\n",
     "    clean_coo = NumberMap.renumber(_gdf, src_col_names=\"src\", dst_col_names=\"dst\")[0]\n",
-    "    clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"}, inplace=True)\n",
-    "    print(f'Generated a dataframe of {len(clean_coo)} edges')\n",
+    "    if mg:\n",
+    "        clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"})\n",
+    "    else:\n",
+    "        clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"}, inplace=True)\n",
+    "\n",
+    "    print(f'Generated a dataframe of type {type(clean_coo)}, with {len(clean_coo)} edges')\n",
+    "    \n",
     "    return clean_coo"
    ]
   },
@@ -263,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -284,17 +294,23 @@
     "\n",
     "    return _gnx, t2\n",
     "\n",
-    "\n",
     "# cuGraph\n",
-    "def create_cu_graph(_df,transpose=False, directed=False):\n",
+    "def create_cu_graph(_df,transpose=False, directed=False, mg=False):\n",
     "    t1 = perf_counter()\n",
     "    _g = cugraph.Graph(directed=directed)\n",
-    "    _g.from_cudf_edgelist(_df,\n",
-    "                          source='src',\n",
-    "                          destination='dst',\n",
-    "                          edge_attr=None,\n",
-    "                          renumber=False,\n",
-    "                          store_transposed=transpose)\n",
+    "\n",
+    "    if mg:\n",
+    "        # Set the number of partition to #GPUs\n",
+    "        npartitions = len(get_visible_devices())\n",
+    "        _ddf = dask_cudf.from_cudf(_df.compute(), npartitions=npartitions)\n",
+    "        _g.from_dask_cudf_edgelist(_ddf, source=\"src\", destination=\"dst\", edge_attr=None)\n",
+    "    else:\n",
+    "        _g.from_cudf_edgelist(_df,\n",
+    "                            source='src',\n",
+    "                            destination='dst',\n",
+    "                            edge_attr=None,\n",
+    "                            renumber=False,\n",
+    "                            store_transposed=transpose)\n",
     "    t2 = perf_counter() - t1\n",
     "\n",
     "    return _g, t2"
@@ -316,7 +332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -326,9 +342,13 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_katz(_G, alpha):\n",
+    "def cu_katz(_G, alpha, mg=False):\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.katz_centrality(_G, alpha)\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.katz_centrality(_G, alpha)\n",
+    "    else:\n",
+    "\n",
+    "        _ = cugraph.katz_centrality(_G, alpha)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -342,7 +362,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -352,9 +372,12 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_bc(_G, _k):\n",
+    "def cu_bc(_G, _k, mg=False):\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.betweenness_centrality(_G, k=_k)\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.betweenness_centrality(_G, k=_k)\n",
+    "    else:   \n",
+    "        _ = cugraph.betweenness_centrality(_G, k=_k)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -368,7 +391,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -382,11 +405,16 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_louvain(_G):\n",
+    "def cu_louvain(_G, mg=False):\n",
     "    t1 = perf_counter()\n",
-    "    _,_ = cugraph.louvain(_G)\n",
+    "    if mg:\n",
+    "        _, modularity = cugraph.dask.louvain(_G)\n",
+    "        print (f'modularity: {modularity}')\n",
+    "    else:\n",
+    "        _,_ = cugraph.louvain(_G)\n",
     "    t2 = perf_counter() - t1\n",
-    "    return t2\n"
+    "    return t2\n",
+    "\n"
    ]
   },
   {
@@ -398,7 +426,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -414,9 +442,12 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_tc(_G):\n",
+    "def cu_tc(_G, mg=False):\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.triangle_count(_G)\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.triangle_count(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.triangle_count(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -430,7 +461,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -446,9 +477,12 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_core_num(_G):\n",
+    "def cu_core_num(_G, mg=False):\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.core_number(_G)\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.core_number(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.core_number(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -462,7 +496,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -472,9 +506,12 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2 \n",
     "\n",
-    "def cu_pagerank(_G):\n",
+    "def cu_pagerank(_G, mg=False):\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.pagerank(_G)\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.pagerank(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.pagerank(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -488,7 +525,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 12,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -498,9 +535,13 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_jaccard(_G):\n",
+    "def cu_jaccard(_G, mg=False):\n",
+    "    t1 = perf_counter()\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.jaccard_coefficient(_G)\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.jaccard(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.jaccard_coefficient(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -514,7 +555,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 13,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -526,10 +567,13 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_bfs(_G):\n",
+    "def cu_bfs(_G, mg=False):\n",
     "    seed = 0\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.bfs(_G, seed)\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.bfs(_G, seed)\n",
+    "    else:\n",
+    "        _ = cugraph.bfs(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -543,7 +587,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 14,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -554,10 +598,14 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_sssp(_G):\n",
+    "def cu_sssp(_G, mg=False):\n",
     "    seed = 0\n",
     "    t1 = perf_counter()\n",
-    "    _ = cugraph.sssp(_G, seed)\n",
+    "    # SSSP requires weighted graph\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.bfs(_G, seed)\n",
+    "    else:\n",
+    "        _ = cugraph.bfs(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -573,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 15,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -583,9 +631,137 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 16,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "------------------------------\n",
+      "Creating Graph of Scale = 9\n",
+      "Generated a dataframe of type <class 'cudf.core.dataframe.DataFrame'>, with 8192 edges\n",
+      "\tdata in gdf 8192 and data in pandas 8192\n",
+      "\tKatz  n.c.\n",
+      "\tBC k=100  n.c."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/centrality/katz_centrality.py:121: UserWarning: Katz centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
+      "  warnings.warn(warning_msg, UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      " \n",
+      "\tLouvain  n.c. \n",
+      "\tTC  n.c. \n",
+      "\tCore Number  n.c. \n",
+      "\tPageRank  n.c. \n",
+      "\tJaccard  n.c. \n",
+      "\tBFS  n.c. \n",
+      "\tSSSP  n.c. \n",
+      "------------------------------\n",
+      "Creating Graph of Scale = 10\n",
+      "Generated a dataframe of type <class 'cudf.core.dataframe.DataFrame'>, with 16384 edges\n",
+      "\tdata in gdf 16384 and data in pandas 16384\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/link_analysis/pagerank.py:227: UserWarning: Pagerank expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
+      "  warnings.warn(warning_msg, UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\tKatz  n.c.\n",
+      "\tBC k=100  n."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/centrality/katz_centrality.py:121: UserWarning: Katz centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
+      "  warnings.warn(warning_msg, UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "c. \n",
+      "\tLouvain  n.c. \n",
+      "\tTC  n.c. \n",
+      "\tCore Number  n.c. \n",
+      "\tPageRank  n.c. \n",
+      "\tJaccard  n.c. \n",
+      "\tBFS  n.c. \n",
+      "\tSSSP  n.c. \n",
+      "------------------------------\n",
+      "Creating Graph of Scale = 11\n",
+      "Generated a dataframe of type <class 'cudf.core.dataframe.DataFrame'>, with 32768 edges\n",
+      "\tdata in gdf 32768 and data in pandas 32768\n",
+      "\tKatz  n."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/link_analysis/pagerank.py:227: UserWarning: Pagerank expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
+      "  warnings.warn(warning_msg, UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "c.\n",
+      "\tBC k=100  n."
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/centrality/katz_centrality.py:121: UserWarning: Katz centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
+      "  warnings.warn(warning_msg, UserWarning)\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "c. \n",
+      "\tLouvain  n.c. \n",
+      "\tTC  n.c. \n",
+      "\tCore Number  n.c. \n",
+      "\tPageRank  n.c. \n",
+      "\tJaccard  n.c. \n",
+      "\tBFS  n.c. \n",
+      "\tSSSP  n.c. \n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/link_analysis/pagerank.py:227: UserWarning: Pagerank expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
+      "  warnings.warn(warning_msg, UserWarning)\n"
+     ]
+    }
+   ],
    "source": [
     "# arrays to capture performance gains\n",
     "names = []\n",
@@ -795,9 +971,26 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 17,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "['   ', 'Katz', 'BC Estimate fixed', 'Louvain', 'TC', 'Core Number', 'PageRank', 'Jaccard', 'BFS', 'SSP']\n",
+      "data_scale_9\n",
+      "[0.30174025119666154, 0.5252496066735675, 0.4815963183980694, 0.10059243531734213, 0.06274260384596582, 0.08087691640849742, 0.04259226789052634, 0.04541648848958121, 0.04630903277395244]\n",
+      "[10.57018736844198, 1.0236825289491465, 3.4565544669058315, 1.7519735735549575, 0.20360754388242536, 1.996866060869272, 0.017142060522749525, 0.07449551793212239, 0.10215939753145187]\n",
+      "data_scale_10\n",
+      "[10.422613622706065, 2.2596945327623845, 5.400742833397456, 2.670122216953594, 0.682892670528767, 2.511963848371226, 0.5618378459026585, 1.0027274366915082, 1.0377674050792682]\n",
+      "[27.48959075860895, 2.275587863157422, 6.2873877384903345, 3.4689708464108686, 0.3821397055328311, 3.8512864713871493, 0.0005884210429041734, 0.1674301749913076, 0.19472820374933392]\n",
+      "data_scale_11\n",
+      "[18.597918818212044, 4.662328129832614, 9.282539915015656, 5.826916762148887, 1.2560074010281037, 4.831561094416702, 0.6815771129976337, 1.956713995982869, 2.0017224534087275]\n",
+      "[50.381630992178266, 4.706337822317938, 10.371674476982982, 8.016961514792193, 0.7960839977603095, 7.554931432029648, 0.000286892032694886, 0.36503329352194, 0.3936930402823485]\n"
+     ]
+    }
+   ],
    "source": [
     "#Print results\n",
     "print(algos)\n",
@@ -810,9 +1003,30 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 18,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "------------------------------\n",
+      "\tAlgorithm Run times  (NX then cuGraph)\n",
+      "\n",
+      "['   ', 'Katz', 'BC Estimate fixed', 'Louvain', 'TC', 'Core Number', 'PageRank', 'Jaccard', 'BFS', 'SSP']\n",
+      "data_scale_9\n",
+      "[0.03168975654989481, 0.11808181460946798, 0.0607406310737133, 0.007159244269132614, 0.003216283395886421, 0.004577603191137314, 0.00012310687452554703, 0.00039636343717575073, 0.0004781009629368782]\n",
+      "[0.0029980316758155823, 0.11535003408789635, 0.017572594806551933, 0.004086388275027275, 0.015796484425663948, 0.002292393706738949, 0.007181568071246147, 0.005320634692907333, 0.004679950885474682]\n",
+      "data_scale_10\n",
+      "[0.07129270676523447, 0.26657472364604473, 0.1288990117609501, 0.01734251156449318, 0.008041061460971832, 0.009121773764491081, 7.294118404388428e-06, 0.0009355107322335243, 0.0010360050946474075]\n",
+      "[0.0025934437289834023, 0.11714543215930462, 0.020501202903687954, 0.004999324679374695, 0.021042203530669212, 0.002368500456213951, 0.012396086938679218, 0.005587467923760414, 0.005320262163877487]\n",
+      "data_scale_11\n",
+      "[0.13116520550101995, 0.5985189070925117, 0.296876666136086, 0.04408951010555029, 0.022953813895583153, 0.018125338479876518, 6.923452019691467e-06, 0.0021516336128115654, 0.0022359658032655716]\n",
+      "[0.002603433094918728, 0.1271729590371251, 0.0286237932741642, 0.005499528720974922, 0.02883340697735548, 0.0023991400375962257, 0.024132604710757732, 0.005894348956644535, 0.005679464899003506]\n"
+     ]
+    }
+   ],
    "source": [
     "#Print results\n",
     "print(\"\\n------------------------------\")\n",
@@ -825,6 +1039,88 @@
     "    print(f\"{time_algo_cu[i]}\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Example MG runs"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 22,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n",
+      "Perhaps you already have a cluster running?\n",
+      "Hosting the HTTP server on port 37567 instead\n",
+      "  warnings.warn(\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\n",
+      "Dask client/cluster created using LocalCUDACluster\n"
+     ]
+    },
+    {
+     "ename": "Exception",
+     "evalue": "Communicator is already initialized",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[22], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Setting up cluter\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m setup_objs \u001b[38;5;241m=\u001b[39m \u001b[43mstart_dask_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      4\u001b[0m gdf \u001b[38;5;241m=\u001b[39m generate_data(\u001b[38;5;241m10\u001b[39m, mg\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# create graph\u001b[39;00m\n",
+      "File \u001b[0;32m~/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/testing/mg_utils.py:167\u001b[0m, in \u001b[0;36mstart_dask_client\u001b[0;34m(protocol, rmm_async, rmm_pool_size, dask_worker_devices, jit_unspill, worker_class, device_memory_limit)\u001b[0m\n\u001b[1;32m    164\u001b[0m     \u001b[38;5;66;03m# FIXME: use proper logging, INFO or DEBUG level\u001b[39;00m\n\u001b[1;32m    165\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mDask client/cluster created using LocalCUDACluster\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 167\u001b[0m \u001b[43mComms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mp2p\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m    169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (client, cluster)\n",
+      "File \u001b[0;32m~/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/dask/comms/comms.py:158\u001b[0m, in \u001b[0;36minitialize\u001b[0;34m(comms, p2p, prows, pcols, partition_type)\u001b[0m\n\u001b[1;32m    156\u001b[0m         __instance \u001b[38;5;241m=\u001b[39m comms\n\u001b[1;32m    157\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommunicator is already initialized\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "\u001b[0;31mException\u001b[0m: Communicator is already initialized"
+     ]
+    }
+   ],
+   "source": [
+    "\n",
+    "# Setting up cluter\n",
+    "setup_objs = start_dask_client()\n",
+    "\n",
+    "gdf = generate_data(10, mg=True)\n",
+    "# create graph\n",
+    "g_cu, tcu = create_cu_graph(gdf, mg=True)\n",
+    "del gdf\n",
+    "\n",
+    "# prep\n",
+    "deg = g_cu.degree()\n",
+    "deg_max = deg['degree'].max().compute()\n",
+    "\n",
+    "alpha = 1 / deg_max\n",
+    "num_nodes = g_cu.number_of_vertices()\n",
+    "k = 100 if num_nodes > 100 else num_nodes\n",
+    "\n",
+    "tc = cu_katz(g_cu, alpha, mg=True)\n",
+    "tc = cu_bc(g_cu, k, mg=True)\n",
+    "tc = cu_louvain(g_cu, mg=True)\n",
+    "tc = cu_tc(g_cu, mg=True)\n",
+    "tc = cu_core_num(g_cu, mg=True)\n",
+    "tc = cu_pagerank(g_cu, mg=True)\n",
+    "tc = cu_jaccard(g_cu, mg=True)\n",
+    "tc = cu_bfs(g_cu, mg=True)\n",
+    "\n",
+    "\n",
+    "\n",
+    "# Tearing down the cluster\n",
+    "stop_dask_client(*setup_objs)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "markdown",
    "metadata": {},

From 049d83352d7ed5091e6b8571c650139051dae439 Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Sat, 13 Jan 2024 02:14:24 +0100
Subject: [PATCH 2/8] Clear output

---
 .../cugraph_benchmarks/synth_release.ipynb    | 264 +++---------------
 1 file changed, 32 insertions(+), 232 deletions(-)

diff --git a/notebooks/cugraph_benchmarks/synth_release.ipynb b/notebooks/cugraph_benchmarks/synth_release.ipynb
index 395a65ce73c..a04fa90ee3a 100644
--- a/notebooks/cugraph_benchmarks/synth_release.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release.ipynb
@@ -116,7 +116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -146,7 +146,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -189,7 +189,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -230,7 +230,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -273,7 +273,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -332,7 +332,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -362,7 +362,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -391,7 +391,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -426,7 +426,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -461,7 +461,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -496,7 +496,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -525,7 +525,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -555,7 +555,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 13,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -587,7 +587,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 14,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -621,7 +621,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 15,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -631,137 +631,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "------------------------------\n",
-      "Creating Graph of Scale = 9\n",
-      "Generated a dataframe of type <class 'cudf.core.dataframe.DataFrame'>, with 8192 edges\n",
-      "\tdata in gdf 8192 and data in pandas 8192\n",
-      "\tKatz  n.c.\n",
-      "\tBC k=100  n.c."
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/centrality/katz_centrality.py:121: UserWarning: Katz centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
-      "  warnings.warn(warning_msg, UserWarning)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      " \n",
-      "\tLouvain  n.c. \n",
-      "\tTC  n.c. \n",
-      "\tCore Number  n.c. \n",
-      "\tPageRank  n.c. \n",
-      "\tJaccard  n.c. \n",
-      "\tBFS  n.c. \n",
-      "\tSSSP  n.c. \n",
-      "------------------------------\n",
-      "Creating Graph of Scale = 10\n",
-      "Generated a dataframe of type <class 'cudf.core.dataframe.DataFrame'>, with 16384 edges\n",
-      "\tdata in gdf 16384 and data in pandas 16384\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/link_analysis/pagerank.py:227: UserWarning: Pagerank expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
-      "  warnings.warn(warning_msg, UserWarning)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\tKatz  n.c.\n",
-      "\tBC k=100  n."
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/centrality/katz_centrality.py:121: UserWarning: Katz centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
-      "  warnings.warn(warning_msg, UserWarning)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "c. \n",
-      "\tLouvain  n.c. \n",
-      "\tTC  n.c. \n",
-      "\tCore Number  n.c. \n",
-      "\tPageRank  n.c. \n",
-      "\tJaccard  n.c. \n",
-      "\tBFS  n.c. \n",
-      "\tSSSP  n.c. \n",
-      "------------------------------\n",
-      "Creating Graph of Scale = 11\n",
-      "Generated a dataframe of type <class 'cudf.core.dataframe.DataFrame'>, with 32768 edges\n",
-      "\tdata in gdf 32768 and data in pandas 32768\n",
-      "\tKatz  n."
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/link_analysis/pagerank.py:227: UserWarning: Pagerank expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
-      "  warnings.warn(warning_msg, UserWarning)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "c.\n",
-      "\tBC k=100  n."
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/centrality/katz_centrality.py:121: UserWarning: Katz centrality expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
-      "  warnings.warn(warning_msg, UserWarning)\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "c. \n",
-      "\tLouvain  n.c. \n",
-      "\tTC  n.c. \n",
-      "\tCore Number  n.c. \n",
-      "\tPageRank  n.c. \n",
-      "\tJaccard  n.c. \n",
-      "\tBFS  n.c. \n",
-      "\tSSSP  n.c. \n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/link_analysis/pagerank.py:227: UserWarning: Pagerank expects the 'store_transposed' flag to be set to 'True' for optimal performance during the graph creation\n",
-      "  warnings.warn(warning_msg, UserWarning)\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# arrays to capture performance gains\n",
     "names = []\n",
@@ -971,26 +843,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 17,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "['   ', 'Katz', 'BC Estimate fixed', 'Louvain', 'TC', 'Core Number', 'PageRank', 'Jaccard', 'BFS', 'SSP']\n",
-      "data_scale_9\n",
-      "[0.30174025119666154, 0.5252496066735675, 0.4815963183980694, 0.10059243531734213, 0.06274260384596582, 0.08087691640849742, 0.04259226789052634, 0.04541648848958121, 0.04630903277395244]\n",
-      "[10.57018736844198, 1.0236825289491465, 3.4565544669058315, 1.7519735735549575, 0.20360754388242536, 1.996866060869272, 0.017142060522749525, 0.07449551793212239, 0.10215939753145187]\n",
-      "data_scale_10\n",
-      "[10.422613622706065, 2.2596945327623845, 5.400742833397456, 2.670122216953594, 0.682892670528767, 2.511963848371226, 0.5618378459026585, 1.0027274366915082, 1.0377674050792682]\n",
-      "[27.48959075860895, 2.275587863157422, 6.2873877384903345, 3.4689708464108686, 0.3821397055328311, 3.8512864713871493, 0.0005884210429041734, 0.1674301749913076, 0.19472820374933392]\n",
-      "data_scale_11\n",
-      "[18.597918818212044, 4.662328129832614, 9.282539915015656, 5.826916762148887, 1.2560074010281037, 4.831561094416702, 0.6815771129976337, 1.956713995982869, 2.0017224534087275]\n",
-      "[50.381630992178266, 4.706337822317938, 10.371674476982982, 8.016961514792193, 0.7960839977603095, 7.554931432029648, 0.000286892032694886, 0.36503329352194, 0.3936930402823485]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#Print results\n",
     "print(algos)\n",
@@ -1003,30 +858,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "------------------------------\n",
-      "\tAlgorithm Run times  (NX then cuGraph)\n",
-      "\n",
-      "['   ', 'Katz', 'BC Estimate fixed', 'Louvain', 'TC', 'Core Number', 'PageRank', 'Jaccard', 'BFS', 'SSP']\n",
-      "data_scale_9\n",
-      "[0.03168975654989481, 0.11808181460946798, 0.0607406310737133, 0.007159244269132614, 0.003216283395886421, 0.004577603191137314, 0.00012310687452554703, 0.00039636343717575073, 0.0004781009629368782]\n",
-      "[0.0029980316758155823, 0.11535003408789635, 0.017572594806551933, 0.004086388275027275, 0.015796484425663948, 0.002292393706738949, 0.007181568071246147, 0.005320634692907333, 0.004679950885474682]\n",
-      "data_scale_10\n",
-      "[0.07129270676523447, 0.26657472364604473, 0.1288990117609501, 0.01734251156449318, 0.008041061460971832, 0.009121773764491081, 7.294118404388428e-06, 0.0009355107322335243, 0.0010360050946474075]\n",
-      "[0.0025934437289834023, 0.11714543215930462, 0.020501202903687954, 0.004999324679374695, 0.021042203530669212, 0.002368500456213951, 0.012396086938679218, 0.005587467923760414, 0.005320262163877487]\n",
-      "data_scale_11\n",
-      "[0.13116520550101995, 0.5985189070925117, 0.296876666136086, 0.04408951010555029, 0.022953813895583153, 0.018125338479876518, 6.923452019691467e-06, 0.0021516336128115654, 0.0022359658032655716]\n",
-      "[0.002603433094918728, 0.1271729590371251, 0.0286237932741642, 0.005499528720974922, 0.02883340697735548, 0.0023991400375962257, 0.024132604710757732, 0.005894348956644535, 0.005679464899003506]\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "#Print results\n",
     "print(\"\\n------------------------------\")\n",
@@ -1048,48 +882,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 22,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/home/mnaim/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/distributed/node.py:182: UserWarning: Port 8787 is already in use.\n",
-      "Perhaps you already have a cluster running?\n",
-      "Hosting the HTTP server on port 37567 instead\n",
-      "  warnings.warn(\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\n",
-      "Dask client/cluster created using LocalCUDACluster\n"
-     ]
-    },
-    {
-     "ename": "Exception",
-     "evalue": "Communicator is already initialized",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mException\u001b[0m                                 Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[22], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[38;5;66;03m# Setting up cluter\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m setup_objs \u001b[38;5;241m=\u001b[39m \u001b[43mstart_dask_client\u001b[49m\u001b[43m(\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m      4\u001b[0m gdf \u001b[38;5;241m=\u001b[39m generate_data(\u001b[38;5;241m10\u001b[39m, mg\u001b[38;5;241m=\u001b[39m\u001b[38;5;28;01mTrue\u001b[39;00m)\n\u001b[1;32m      5\u001b[0m \u001b[38;5;66;03m# create graph\u001b[39;00m\n",
-      "File \u001b[0;32m~/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/testing/mg_utils.py:167\u001b[0m, in \u001b[0;36mstart_dask_client\u001b[0;34m(protocol, rmm_async, rmm_pool_size, dask_worker_devices, jit_unspill, worker_class, device_memory_limit)\u001b[0m\n\u001b[1;32m    164\u001b[0m     \u001b[38;5;66;03m# FIXME: use proper logging, INFO or DEBUG level\u001b[39;00m\n\u001b[1;32m    165\u001b[0m     \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;130;01m\\n\u001b[39;00m\u001b[38;5;124mDask client/cluster created using LocalCUDACluster\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m--> 167\u001b[0m \u001b[43mComms\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minitialize\u001b[49m\u001b[43m(\u001b[49m\u001b[43mp2p\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m)\u001b[49m\n\u001b[1;32m    169\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m (client, cluster)\n",
-      "File \u001b[0;32m~/miniconda3/envs/cuda_12_new_2/lib/python3.10/site-packages/cugraph/dask/comms/comms.py:158\u001b[0m, in \u001b[0;36minitialize\u001b[0;34m(comms, p2p, prows, pcols, partition_type)\u001b[0m\n\u001b[1;32m    156\u001b[0m         __instance \u001b[38;5;241m=\u001b[39m comms\n\u001b[1;32m    157\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m--> 158\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mException\u001b[39;00m(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mCommunicator is already initialized\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "\u001b[0;31mException\u001b[0m: Communicator is already initialized"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "\n",
     "# Setting up cluter\n",
     "setup_objs = start_dask_client()\n",
     "\n",
-    "gdf = generate_data(10, mg=True)\n",
     "# create graph\n",
+    "gdf = generate_data(10, mg=True)\n",
     "g_cu, tcu = create_cu_graph(gdf, mg=True)\n",
     "del gdf\n",
     "\n",
@@ -1101,16 +903,14 @@
     "num_nodes = g_cu.number_of_vertices()\n",
     "k = 100 if num_nodes > 100 else num_nodes\n",
     "\n",
-    "tc = cu_katz(g_cu, alpha, mg=True)\n",
-    "tc = cu_bc(g_cu, k, mg=True)\n",
-    "tc = cu_louvain(g_cu, mg=True)\n",
-    "tc = cu_tc(g_cu, mg=True)\n",
-    "tc = cu_core_num(g_cu, mg=True)\n",
-    "tc = cu_pagerank(g_cu, mg=True)\n",
-    "tc = cu_jaccard(g_cu, mg=True)\n",
-    "tc = cu_bfs(g_cu, mg=True)\n",
-    "\n",
-    "\n",
+    "cu_katz(g_cu, alpha, mg=True)\n",
+    "cu_bc(g_cu, k, mg=True)\n",
+    "cu_louvain(g_cu, mg=True)\n",
+    "cu_tc(g_cu, mg=True)\n",
+    "cu_core_num(g_cu, mg=True)\n",
+    "cu_pagerank(g_cu, mg=True)\n",
+    "cu_jaccard(g_cu, mg=True)\n",
+    "cu_bfs(g_cu, mg=True)\n",
     "\n",
     "# Tearing down the cluster\n",
     "stop_dask_client(*setup_objs)"

From 8b3165c1790020426dbd97a2d492134226d90a77 Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Wed, 17 Jan 2024 19:19:37 +0100
Subject: [PATCH 3/8] Add MG Benchmark runs

---
 .../cugraph_benchmarks/synth_release.ipynb    | 286 ++++++++++++++++--
 1 file changed, 264 insertions(+), 22 deletions(-)

diff --git a/notebooks/cugraph_benchmarks/synth_release.ipynb b/notebooks/cugraph_benchmarks/synth_release.ipynb
index a04fa90ee3a..76b43d23266 100644
--- a/notebooks/cugraph_benchmarks/synth_release.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release.ipynb
@@ -877,7 +877,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Example MG runs"
+    "## MG Benchmark"
    ]
   },
   {
@@ -887,32 +887,274 @@
    "outputs": [],
    "source": [
     "\n",
-    "# Setting up cluter\n",
-    "setup_objs = start_dask_client()\n",
+    "# Create dask client and cluter\n",
+    "setup_objs = start_dask_client()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# arrays to capture performance gains\n",
+    "names = []\n",
+    "algos = []\n",
+    "graph_create_cu_mg = []\n",
+    "\n",
+    "\n",
+    "# Two dimension data [file, perf]\n",
+    "\n",
+    "time_algo_cu_mg = []  # cuGraph mg\n",
+    "perf = []\n",
+    "perf_algo = []\n",
+    "\n",
+    "algos.append(\"   \")\n",
+    "\n",
+    "i = 0\n",
+    "for k,v in data.items():\n",
+    "    # init all the 2-d arrays\n",
+    "    \n",
+    "    time_algo_cu_mg.append([])\n",
+    "    perf.append([])\n",
+    "    perf_algo.append([])\n",
+    "\n",
+    "    # Saved the file Name\n",
+    "    names.append(k)\n",
+    "\n",
+    "    # generate data\n",
+    "    print(\"------------------------------\")\n",
+    "    print(f'Creating Graph of Scale = {v}')\n",
+    "\n",
+    "    gdf = generate_data(v, mg=True)\n",
+    "    \n",
+    "    print(f\"\\tdata in gdf {len(gdf)}\")\n",
+    "\n",
+    "    # create the graphs\n",
+    "    g_cu, tcu = create_cu_graph(gdf, mg=True)\n",
+    "    \n",
+    "    graph_create_cu_mg.append(tcu)\n",
+    "    \n",
+    "    del gdf\n",
+    "\n",
+    "    # prep\n",
+    "    deg = g_cu.degree()\n",
+    "    deg_max = deg['degree'].max().compute()\n",
+    "\n",
+    "    alpha = 1 / deg_max\n",
+    "    num_nodes = g_cu.number_of_vertices()\n",
+    "\n",
+    "    del deg\n",
+    "    gc.collect()\n",
+    "\n",
+    "    #----- Algorithm order is same as defined at top ----\n",
+    "\n",
+    "    tnx = graph_create_nx[i]\n",
+    "    j = 0\n",
+    "\n",
+    "    #-- Katz \n",
+    "    print(\"\\tKatz  \", end = '')\n",
+    "    if i == 0: \n",
+    "        algos.append(\"Katz\")\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1 \n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_katz(g_cu, alpha, mg=True)\n",
+    "    print(\"\")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "\n",
+    "    #-- BC\n",
+    "    print(\"\\tBC k=100  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"BC Estimate fixed\")\n",
+    "\n",
+    "    k = 100\n",
+    "    if k > num_nodes:\n",
+    "        k = int(num_nodes)\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_bc(g_cu, k, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "\n",
+    "    #-- Louvain\n",
+    "    print(\"\\tLouvain  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"Louvain\")\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_louvain(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "\n",
+    "    #-- TC\n",
+    "    print(\"\\tTC  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"TC\")\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_tc(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
     "\n",
-    "# create graph\n",
-    "gdf = generate_data(10, mg=True)\n",
-    "g_cu, tcu = create_cu_graph(gdf, mg=True)\n",
-    "del gdf\n",
+    "    #-- Core Number\n",
+    "    print(\"\\tCore Number  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"Core Number\")\n",
     "\n",
-    "# prep\n",
-    "deg = g_cu.degree()\n",
-    "deg_max = deg['degree'].max().compute()\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_core_num(g_cu, mg=True)\n",
+    "    print(\" \")\n",
     "\n",
-    "alpha = 1 / deg_max\n",
-    "num_nodes = g_cu.number_of_vertices()\n",
-    "k = 100 if num_nodes > 100 else num_nodes\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
     "\n",
-    "cu_katz(g_cu, alpha, mg=True)\n",
-    "cu_bc(g_cu, k, mg=True)\n",
-    "cu_louvain(g_cu, mg=True)\n",
-    "cu_tc(g_cu, mg=True)\n",
-    "cu_core_num(g_cu, mg=True)\n",
-    "cu_pagerank(g_cu, mg=True)\n",
-    "cu_jaccard(g_cu, mg=True)\n",
-    "cu_bfs(g_cu, mg=True)\n",
+    "    #-- PageRank\n",
+    "    print(\"\\tPageRank  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"PageRank\")\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_pagerank(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "\n",
+    "    #-- Jaccard\n",
+    "    print(\"\\tJaccard  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"Jaccard\")\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_jaccard(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "\n",
+    "    #-- BFS\n",
+    "    print(\"\\tBFS  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"BFS\")\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_bfs(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "\n",
+    "    #-- SSSP\n",
+    "    print(\"\\tSSSP  \", end='')\n",
+    "    if i == 0:\n",
+    "        algos.append(\"SSP\")\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx =  time_algo_nx[i][j]\n",
+    "    j = j + 1\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_sssp(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "    time_algo_cu_mg[i].append(tc)\n",
+    "    perf_algo[i].append ( (tx/tc) )\n",
+    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
+    "\n",
+    "    # increament count\n",
+    "    i = i + 1\n",
+    "    \n",
+    "    del g_cu\n",
+    "    gc.collect()\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Print results\n",
+    "print(algos)\n",
+    "\n",
+    "for i in range(num_datasets):\n",
+    "    print(f\"{names[i]}\")\n",
+    "    print(f\"{perf[i]}\")\n",
+    "    print(f\"{perf_algo[i]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "#Print results\n",
+    "print(\"\\n------------------------------\")\n",
+    "print(\"\\tAlgorithm Run times  (NX then cuGraph MG)\\n\")\n",
+    "\n",
+    "print(algos)\n",
+    "for i in range(num_datasets):\n",
+    "    print(f\"{names[i]}\")\n",
+    "    print(f\"{time_algo_nx[i]}\")\n",
+    "    print(f\"{time_algo_cu_mg[i]}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "\n",
-    "# Tearing down the cluster\n",
+    "# Cleanup client and cluster\n",
     "stop_dask_client(*setup_objs)"
    ]
   },

From 53894ddcd91f859f5cca9518eb46be40bfaeaf45 Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Fri, 9 Feb 2024 03:38:40 +0100
Subject: [PATCH 4/8] Add notbook for SNMG benchmark runs

---
 .../cugraph_benchmarks/synth_release.ipynb    | 402 +-------
 .../synth_release_single_node_multi_gpu.ipynb | 962 ++++++++++++++++++
 2 files changed, 994 insertions(+), 370 deletions(-)
 create mode 100644 notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb

diff --git a/notebooks/cugraph_benchmarks/synth_release.ipynb b/notebooks/cugraph_benchmarks/synth_release.ipynb
index 76b43d23266..18979f3ecee 100644
--- a/notebooks/cugraph_benchmarks/synth_release.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release.ipynb
@@ -131,11 +131,6 @@
     "import cugraph\n",
     "import cudf\n",
     "\n",
-    "# to parallelize with dask\n",
-    "import dask_cudf\n",
-    "from cugraph.dask.common.mg_utils import get_visible_devices\n",
-    "from cugraph.testing.mg_utils import start_dask_client, stop_dask_client\n",
-    "\n",
     "# NetworkX libraries\n",
     "import networkx as nx\n",
     "\n",
@@ -217,7 +212,7 @@
     "\n",
     "\n",
     "# Which dataset is to be used\n",
-    "data = data_quick\n"
+    "data = data_full\n"
    ]
   },
   {
@@ -236,7 +231,7 @@
    "source": [
     "# Data generator \n",
     "#  The result is an edgelist of the size determined by the scale and edge factor\n",
-    "def generate_data(scale, edgefactor=16, mg=False):\n",
+    "def generate_data(scale, edgefactor=16):\n",
     "    _gdf = rmat(\n",
     "        scale,\n",
     "        (2 ** scale) * edgefactor,\n",
@@ -247,17 +242,12 @@
     "        clip_and_flip=False,\n",
     "        scramble_vertex_ids=True,\n",
     "        create_using=None,  # return edgelist instead of Graph instance\n",
-    "        mg=mg # determines whether generated data will be used on one or multiple GPUs\n",
+    "        mg=False # determines whether generated data will be used on one or multiple GPUs\n",
     "        )\n",
     "\n",
     "    clean_coo = NumberMap.renumber(_gdf, src_col_names=\"src\", dst_col_names=\"dst\")[0]\n",
-    "    if mg:\n",
-    "        clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"})\n",
-    "    else:\n",
-    "        clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"}, inplace=True)\n",
-    "\n",
-    "    print(f'Generated a dataframe of type {type(clean_coo)}, with {len(clean_coo)} edges')\n",
-    "    \n",
+    "    clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"}, inplace=True)\n",
+    "    print(f'Generated a dataframe of {len(clean_coo)} edges')\n",
     "    return clean_coo"
    ]
   },
@@ -294,23 +284,17 @@
     "\n",
     "    return _gnx, t2\n",
     "\n",
+    "\n",
     "# cuGraph\n",
-    "def create_cu_graph(_df,transpose=False, directed=False, mg=False):\n",
+    "def create_cu_graph(_df,transpose=False, directed=False):\n",
     "    t1 = perf_counter()\n",
     "    _g = cugraph.Graph(directed=directed)\n",
-    "\n",
-    "    if mg:\n",
-    "        # Set the number of partition to #GPUs\n",
-    "        npartitions = len(get_visible_devices())\n",
-    "        _ddf = dask_cudf.from_cudf(_df.compute(), npartitions=npartitions)\n",
-    "        _g.from_dask_cudf_edgelist(_ddf, source=\"src\", destination=\"dst\", edge_attr=None)\n",
-    "    else:\n",
-    "        _g.from_cudf_edgelist(_df,\n",
-    "                            source='src',\n",
-    "                            destination='dst',\n",
-    "                            edge_attr=None,\n",
-    "                            renumber=False,\n",
-    "                            store_transposed=transpose)\n",
+    "    _g.from_cudf_edgelist(_df,\n",
+    "                          source='src',\n",
+    "                          destination='dst',\n",
+    "                          edge_attr=None,\n",
+    "                          renumber=False,\n",
+    "                          store_transposed=transpose)\n",
     "    t2 = perf_counter() - t1\n",
     "\n",
     "    return _g, t2"
@@ -342,13 +326,9 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_katz(_G, alpha, mg=False):\n",
+    "def cu_katz(_G, alpha):\n",
     "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.katz_centrality(_G, alpha)\n",
-    "    else:\n",
-    "\n",
-    "        _ = cugraph.katz_centrality(_G, alpha)\n",
+    "    _ = cugraph.katz_centrality(_G, alpha)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -372,12 +352,9 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_bc(_G, _k, mg=False):\n",
+    "def cu_bc(_G, _k):\n",
     "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.betweenness_centrality(_G, k=_k)\n",
-    "    else:   \n",
-    "        _ = cugraph.betweenness_centrality(_G, k=_k)\n",
+    "    _ = cugraph.betweenness_centrality(_G, k=_k)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -405,16 +382,11 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_louvain(_G, mg=False):\n",
+    "def cu_louvain(_G):\n",
     "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _, modularity = cugraph.dask.louvain(_G)\n",
-    "        print (f'modularity: {modularity}')\n",
-    "    else:\n",
-    "        _,_ = cugraph.louvain(_G)\n",
+    "    _,_ = cugraph.louvain(_G)\n",
     "    t2 = perf_counter() - t1\n",
-    "    return t2\n",
-    "\n"
+    "    return t2\n"
    ]
   },
   {
@@ -442,12 +414,9 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_tc(_G, mg=False):\n",
+    "def cu_tc(_G):\n",
     "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.triangle_count(_G)\n",
-    "    else:\n",
-    "        _ = cugraph.triangle_count(_G)\n",
+    "    _ = cugraph.triangle_count(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -477,12 +446,9 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_core_num(_G, mg=False):\n",
+    "def cu_core_num(_G):\n",
     "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.core_number(_G)\n",
-    "    else:\n",
-    "        _ = cugraph.core_number(_G)\n",
+    "    _ = cugraph.core_number(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -506,12 +472,9 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2 \n",
     "\n",
-    "def cu_pagerank(_G, mg=False):\n",
+    "def cu_pagerank(_G):\n",
     "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.pagerank(_G)\n",
-    "    else:\n",
-    "        _ = cugraph.pagerank(_G)\n",
+    "    _ = cugraph.pagerank(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -535,13 +498,9 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_jaccard(_G, mg=False):\n",
+    "def cu_jaccard(_G):\n",
     "    t1 = perf_counter()\n",
-    "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.jaccard(_G)\n",
-    "    else:\n",
-    "        _ = cugraph.jaccard_coefficient(_G)\n",
+    "    _ = cugraph.jaccard_coefficient(_G)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -567,13 +526,10 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_bfs(_G, mg=False):\n",
+    "def cu_bfs(_G):\n",
     "    seed = 0\n",
     "    t1 = perf_counter()\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.bfs(_G, seed)\n",
-    "    else:\n",
-    "        _ = cugraph.bfs(_G, seed)\n",
+    "    _ = cugraph.bfs(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -598,14 +554,10 @@
     "    t2 = perf_counter() - t1\n",
     "    return t2\n",
     "\n",
-    "def cu_sssp(_G, mg=False):\n",
+    "def cu_sssp(_G):\n",
     "    seed = 0\n",
     "    t1 = perf_counter()\n",
-    "    # SSSP requires weighted graph\n",
-    "    if mg:\n",
-    "        _ = cugraph.dask.bfs(_G, seed)\n",
-    "    else:\n",
-    "        _ = cugraph.bfs(_G, seed)\n",
+    "    _ = cugraph.sssp(_G, seed)\n",
     "    t2 = perf_counter() - t1\n",
     "    return t2\n"
    ]
@@ -873,296 +825,6 @@
     "    print(f\"{time_algo_cu[i]}\")"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## MG Benchmark"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "# Create dask client and cluter\n",
-    "setup_objs = start_dask_client()\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# arrays to capture performance gains\n",
-    "names = []\n",
-    "algos = []\n",
-    "graph_create_cu_mg = []\n",
-    "\n",
-    "\n",
-    "# Two dimension data [file, perf]\n",
-    "\n",
-    "time_algo_cu_mg = []  # cuGraph mg\n",
-    "perf = []\n",
-    "perf_algo = []\n",
-    "\n",
-    "algos.append(\"   \")\n",
-    "\n",
-    "i = 0\n",
-    "for k,v in data.items():\n",
-    "    # init all the 2-d arrays\n",
-    "    \n",
-    "    time_algo_cu_mg.append([])\n",
-    "    perf.append([])\n",
-    "    perf_algo.append([])\n",
-    "\n",
-    "    # Saved the file Name\n",
-    "    names.append(k)\n",
-    "\n",
-    "    # generate data\n",
-    "    print(\"------------------------------\")\n",
-    "    print(f'Creating Graph of Scale = {v}')\n",
-    "\n",
-    "    gdf = generate_data(v, mg=True)\n",
-    "    \n",
-    "    print(f\"\\tdata in gdf {len(gdf)}\")\n",
-    "\n",
-    "    # create the graphs\n",
-    "    g_cu, tcu = create_cu_graph(gdf, mg=True)\n",
-    "    \n",
-    "    graph_create_cu_mg.append(tcu)\n",
-    "    \n",
-    "    del gdf\n",
-    "\n",
-    "    # prep\n",
-    "    deg = g_cu.degree()\n",
-    "    deg_max = deg['degree'].max().compute()\n",
-    "\n",
-    "    alpha = 1 / deg_max\n",
-    "    num_nodes = g_cu.number_of_vertices()\n",
-    "\n",
-    "    del deg\n",
-    "    gc.collect()\n",
-    "\n",
-    "    #----- Algorithm order is same as defined at top ----\n",
-    "\n",
-    "    tnx = graph_create_nx[i]\n",
-    "    j = 0\n",
-    "\n",
-    "    #-- Katz \n",
-    "    print(\"\\tKatz  \", end = '')\n",
-    "    if i == 0: \n",
-    "        algos.append(\"Katz\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1 \n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_katz(g_cu, alpha, mg=True)\n",
-    "    print(\"\")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- BC\n",
-    "    print(\"\\tBC k=100  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"BC Estimate fixed\")\n",
-    "\n",
-    "    k = 100\n",
-    "    if k > num_nodes:\n",
-    "        k = int(num_nodes)\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_bc(g_cu, k, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- Louvain\n",
-    "    print(\"\\tLouvain  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"Louvain\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_louvain(g_cu, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- TC\n",
-    "    print(\"\\tTC  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"TC\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_tc(g_cu, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- Core Number\n",
-    "    print(\"\\tCore Number  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"Core Number\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_core_num(g_cu, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- PageRank\n",
-    "    print(\"\\tPageRank  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"PageRank\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_pagerank(g_cu, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- Jaccard\n",
-    "    print(\"\\tJaccard  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"Jaccard\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_jaccard(g_cu, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- BFS\n",
-    "    print(\"\\tBFS  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"BFS\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_bfs(g_cu, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    #-- SSSP\n",
-    "    print(\"\\tSSSP  \", end='')\n",
-    "    if i == 0:\n",
-    "        algos.append(\"SSP\")\n",
-    "\n",
-    "    print(\"n.\", end='')\n",
-    "    tx =  time_algo_nx[i][j]\n",
-    "    j = j + 1\n",
-    "    print(\"c.\", end='')\n",
-    "    tc = cu_sssp(g_cu, mg=True)\n",
-    "    print(\" \")\n",
-    "\n",
-    "    \n",
-    "    time_algo_cu_mg[i].append(tc)\n",
-    "    perf_algo[i].append ( (tx/tc) )\n",
-    "    perf[i].append( (tx + tnx) /  (tc + tcu) )\n",
-    "\n",
-    "    # increament count\n",
-    "    i = i + 1\n",
-    "    \n",
-    "    del g_cu\n",
-    "    gc.collect()\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Print results\n",
-    "print(algos)\n",
-    "\n",
-    "for i in range(num_datasets):\n",
-    "    print(f\"{names[i]}\")\n",
-    "    print(f\"{perf[i]}\")\n",
-    "    print(f\"{perf_algo[i]}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#Print results\n",
-    "print(\"\\n------------------------------\")\n",
-    "print(\"\\tAlgorithm Run times  (NX then cuGraph MG)\\n\")\n",
-    "\n",
-    "print(algos)\n",
-    "for i in range(num_datasets):\n",
-    "    print(f\"{names[i]}\")\n",
-    "    print(f\"{time_algo_nx[i]}\")\n",
-    "    print(f\"{time_algo_cu_mg[i]}\")"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "\n",
-    "# Cleanup client and cluster\n",
-    "stop_dask_client(*setup_objs)"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
new file mode 100644
index 00000000000..1796bc489d3
--- /dev/null
+++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
@@ -0,0 +1,962 @@
+{
+ "cells": [
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Skip notebook test\n",
+    "-----\n",
+    "\n",
+    "#### NOTE:  This notebook will take hours to run.\n",
+    "-----\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "# Comparing NetworkX vs cuGraph using synthetic data on various algorithms on single node multi GPU cluster\n",
+    "\n",
+    "\n",
+    "This notebook compares the execution times of many of the cuGraph and NetworkX algorithms when run against identical synthetic data at multiple scales.\n",
+    "\n",
+    "This notebook uses the RMAT data generator which allows the creation of graphs at various scales.  The notebook, by default, runs on a set of selected sizes but users are free to change or add to that list."
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Notebook Credits\n",
+    "\n",
+    "    \n",
+    "| Author        |    Date    |  Update             | cuGraph Version |  Test Hardware         |\n",
+    "| --------------|------------|---------------------|-----------------|------------------------|\n",
+    "| Don Acosta    | 1/12/2023  | Created             | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
+    "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "### Timing "
+   ]
+  },
+  {
+   "attachments": {},
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "When looking at the overall workflow, NetworkX and cuGraph do things differently.  For example, NetworkX spends a lot of time creating the graph data structure.  cuGraph on the other hand does a lazy creation of the data structure when an algorithm is called.  To further complicate the comparison problem, NetworkX does not always return the answer.  In some cases, it returns a generator that is then called to produce the data.  \n",
+    "\n",
+    "This benchmark produces two performance metrics:\n",
+    " - (1)\tJust the algorithm run time \n",
+    " - (2)\tThe algorithm plus graph creation time\n",
+    "\n",
+    "Since GPU memory is a precious resource, having a lot of temporary data laying around is avoided.  So once a graph is created, the raw data is dropped.  \n",
+    " \n",
+    "__What is not timed__:  Generating the data with R-MAT</p>\n",
+    "__What is timed__:     (1) creating a Graph, (2) running the algorithm (3) run any generators\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Algorithms"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "|        Algorithm        |  Type         | Undirected Graph | Directed Graph |   Notes\n",
+    "| ------------------------|---------------|------ | ------- |-------------\n",
+    "| Katz                    | Centrality    |   X   |         | \n",
+    "| Betweenness Centrality  | Centrality    |   X   |         | Estimated, k = 100\n",
+    "| Louvain                 | Community     |   X   |         | Uses python-louvain for comparison\n",
+    "| Triangle Counting       | Community     |   X   |         |\n",
+    "| Core Number             | Core          |   X   |         |\n",
+    "| PageRank                | Link Analysis |       |    X    |\n",
+    "| Jaccard                 | Similarity    |   X   |         |\n",
+    "| BFS                     | Traversal     |   X   |         | No depth limit\n",
+    "| SSSP                    | Traversal     |   X   |         | \n",
+    "\n",
+    "\n",
+    "### Test Data\n",
+    "Data is generated using a Recursive MATrix (R-MAT) graph generation algorithm. \n",
+    "The generator specifics are documented [here](https://docs.rapids.ai/api/cugraph/stable/api_docs/generator.html)\n",
+    "\n",
+    "\n",
+    "\n",
+    "### Notes\n",
+    "* Running Betweenness Centrality on the full graph is prohibitive using NetworkX.  Anything over k=100 can explode runtime to days\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {
+    "tags": []
+   },
+   "source": [
+    "## Import Modules"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# system and other\n",
+    "import gc\n",
+    "import os\n",
+    "from time import perf_counter\n",
+    "import numpy as np\n",
+    "import math\n",
+    "import pandas as pd\n",
+    "from collections import defaultdict\n",
+    "\n",
+    "# rapids\n",
+    "import cugraph\n",
+    "import cudf\n",
+    "\n",
+    "# to parallelize with dask\n",
+    "import dask_cudf\n",
+    "from cugraph.dask.common.mg_utils import get_visible_devices\n",
+    "\n",
+    "# liblibraries to setup dask cluster and client\n",
+    "from dask.distributed import Client, wait\n",
+    "from dask_cuda import LocalCUDACluster\n",
+    "from cugraph.dask.comms import comms as Comms\n",
+    "\n",
+    "# NetworkX libraries\n",
+    "import networkx as nx\n",
+    "\n",
+    "# RMAT data generator\n",
+    "from cugraph.generators import rmat\n",
+    "from cugraph.structure import NumberMap"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "try: \n",
+    "    import community\n",
+    "except ModuleNotFoundError:\n",
+    "    os.system('pip install python-louvain')\n",
+    "    import community"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Determine the scale of the test data\n",
+    "RMAT generates graph where the number of vertices is a power of 2 and the number of edges is based on an edge factor times the number vertices.\n",
+    "\n",
+    "Since RMAT tends to generate about 50% isolated vertices, those vertices are dropped from the graph data.  Hence the number of vertices is closer to (2 ** scale) / 2\n",
+    "\n",
+    "\n",
+    "| Scale | Vertices (est) | Edges  |\n",
+    "| ------|----------------|--------|\n",
+    "| 10 | 512 | 16,384 | \n",
+    "| 11 | 1,024 | 32,768| \n",
+    "| 12 | 2,048 | 65,536| \n",
+    "| 13 | 4,096 | 131,072| \n",
+    "| 14 | 8,192 | 262,144| \n",
+    "| 15 | 16,384 | 524,288 | \n",
+    "| 16 | 32,768 | 1,048,576 | \n",
+    "| 17 | 65,536 | 2,097,152 | \n",
+    "| 18 | 131,072 | 4,194,304 | \n",
+    "| 19 | 262,144 | 8,388,608 | \n",
+    "| 20 | 524,288 | 16,777,216 | \n",
+    "| 21 | 1,048,576 | 33,554,432 | \n",
+    "| 22 | 2,097,152 | 67,108,864 | \n",
+    "| 23 | 4,194,304 | 134,217,728 | \n",
+    "| 24 | 8,388,608 | 268,435,456 | \n",
+    "| 25 | 16,777,216 | 536,870,912 | \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Test Data Sizes\n",
+    "# Here you can create an array of test data sizes.   Then set the \"data\" variable to the array you want\n",
+    "# the dictionary format is 'name' : scale\n",
+    "\n",
+    "\n",
+    "# These scales are used by R-MAT to determine the number of vertices/edges in the synthetic data graph.\n",
+    "data_full = {\n",
+    "    'data_scale_10'   :  10,\n",
+    "    'data_scale_12'   :  12,\n",
+    "    'data_scale_14'  :   14,\n",
+    "    'data_scale_16'  :   16,\n",
+    "    'data_scale_18'  :   18,\n",
+    "    'data_scale_20'  :   20,\n",
+    "}\n",
+    "\n",
+    "# for quick testing\n",
+    "data_quick = {\n",
+    "   'data_scale_9' : 9,\n",
+    "   'data_scale_10' : 10,\n",
+    "   'data_scale_11' : 11,\n",
+    "}\n",
+    "\n",
+    "\n",
+    "# Which dataset is to be used\n",
+    "data = data_quick\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Generate data\n",
+    "The data is generated once for each size."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Data generator \n",
+    "#  The result is an edgelist of the size determined by the scale and edge factor\n",
+    "def generate_data(scale, edgefactor=16, mg=False):\n",
+    "    _gdf = rmat(\n",
+    "        scale,\n",
+    "        (2 ** scale) * edgefactor,\n",
+    "        0.57,\n",
+    "        0.19,\n",
+    "        0.19,\n",
+    "        42,\n",
+    "        clip_and_flip=False,\n",
+    "        scramble_vertex_ids=True,\n",
+    "        create_using=None,  # return edgelist instead of Graph instance\n",
+    "        mg=mg # determines whether generated data will be used on one or multiple GPUs\n",
+    "        )\n",
+    "\n",
+    "    clean_coo = NumberMap.renumber(_gdf, src_col_names=\"src\", dst_col_names=\"dst\")[0]\n",
+    "    if mg:\n",
+    "        clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"})\n",
+    "    else:\n",
+    "        clean_coo.rename(columns={\"renumbered_src\": \"src\", \"renumbered_dst\": \"dst\"}, inplace=True)\n",
+    "\n",
+    "    print(f'Generated a dataframe of type {type(clean_coo)}, with {len(clean_coo)} edges')\n",
+    "    \n",
+    "    return clean_coo"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Create Graph functions\n",
+    "There are two types of graphs created:\n",
+    "* Directed Graphs - calls to create_nx_digraph, create_cu_directed_graph.\n",
+    "* Undirected Graphs - calls to create_xx_ugraph <- fully symmeterized"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# NetworkX\n",
+    "def create_nx_graph(_df , directed=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if directed:\n",
+    "        g_type = nx.DiGraph\n",
+    "    else:\n",
+    "        g_type = nx.Graph\n",
+    "    \n",
+    "    _gnx = nx.from_pandas_edgelist(_df,\n",
+    "                            source='src',\n",
+    "                            target='dst',\n",
+    "                            edge_attr=None,\n",
+    "                            create_using=g_type)\n",
+    "    t2 = perf_counter() - t1\n",
+    "\n",
+    "    return _gnx, t2\n",
+    "\n",
+    "\n",
+    "\n",
+    "# cuGraph\n",
+    "def create_cu_graph(_df, transpose=False, directed=False, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    _g = cugraph.Graph(directed=directed)\n",
+    "\n",
+    "    if mg:\n",
+    "        # Set the number of partition to #GPUs\n",
+    "        npartitions = 3 * len(get_visible_devices())\n",
+    "        _ddf = dask_cudf.from_cudf(_df.compute(), npartitions=npartitions)\n",
+    "        _g.from_dask_cudf_edgelist(_ddf, source=\"src\", destination=\"dst\", edge_attr=None)\n",
+    "    else:\n",
+    "        _g.from_cudf_edgelist(_df,\n",
+    "                            source='src',\n",
+    "                            destination='dst',\n",
+    "                            edge_attr=None,\n",
+    "                            renumber=False,\n",
+    "                            store_transposed=transpose)\n",
+    "    t2 = perf_counter() - t1\n",
+    "\n",
+    "    return _g, t2"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Algorithm Execution"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Katz"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_katz(_G, alpha):\n",
+    "    t1 = perf_counter()\n",
+    "    _ = nx.katz_centrality(_G, alpha)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_katz(_G, alpha, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.katz_centrality(_G, alpha)\n",
+    "    else:\n",
+    "\n",
+    "        _ = cugraph.katz_centrality(_G, alpha)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Betweenness Centrality"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_bc(_G, _k):\n",
+    "    t1 = perf_counter()\n",
+    "    _ = nx.betweenness_centrality(_G, k=_k)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_bc(_G, _k, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.betweenness_centrality(_G, k=_k)\n",
+    "    else:   \n",
+    "        _ = cugraph.betweenness_centrality(_G, k=_k)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Louvain"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_louvain(_G):\n",
+    "    t1 = perf_counter()\n",
+    "    parts = community.best_partition(_G)\n",
+    "    \n",
+    "    # Calculating modularity scores for comparison\n",
+    "    _ = community.modularity(parts, _G)\n",
+    "    \n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_louvain(_G, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _, modularity = cugraph.dask.louvain(_G)\n",
+    "        print (f'modularity: {modularity}')\n",
+    "    else:\n",
+    "        _,_ = cugraph.louvain(_G)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Triangle Counting"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_tc(_G):\n",
+    "    t1 = perf_counter()\n",
+    "    nx_count = nx.triangles(_G)\n",
+    "\n",
+    "    # To get the number of triangles, we would need to loop through the array and add up each count\n",
+    "    count = 0\n",
+    "    for key, value in nx_count.items():\n",
+    "        count = count + value\n",
+    "    \n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_tc(_G, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.triangle_count(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.triangle_count(_G)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Core Number"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_core_num(_G):\n",
+    "    t1 = perf_counter()\n",
+    "    _G.remove_edges_from(nx.selfloop_edges(_G))\n",
+    "    nx_count = nx.core_number(_G)\n",
+    "    \n",
+    "    count = 0\n",
+    "    for key, value in nx_count.items():\n",
+    "        count = count + value\n",
+    "    \n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_core_num(_G, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.core_number(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.core_number(_G)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### PageRank"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_pagerank(_G):\n",
+    "    t1 = perf_counter()\n",
+    "    _ = nx.pagerank(_G)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2 \n",
+    "\n",
+    "def cu_pagerank(_G, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.pagerank(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.pagerank(_G)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Jaccard"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_jaccard(_G):\n",
+    "    t1 = perf_counter()\n",
+    "    nj = nx.jaccard_coefficient(_G)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_jaccard(_G, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.jaccard(_G)\n",
+    "    else:\n",
+    "        _ = cugraph.jaccard_coefficient(_G)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### BFS"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_bfs(_G, seed):\n",
+    "    t1 = perf_counter()\n",
+    "    nb = nx.bfs_edges(_G, seed)\n",
+    "    nb_list = list(nb) # gen -> list\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_bfs(_G, seed=0, mg=False):\n",
+    "    t1 = perf_counter()\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.bfs(_G, seed)\n",
+    "    else:\n",
+    "        _ = cugraph.bfs(_G, seed)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### SSSP"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def nx_sssp(_G, seed):\n",
+    "    t1 = perf_counter()\n",
+    "    _ = nx.shortest_path(_G, seed)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n",
+    "\n",
+    "def cu_sssp(_G, seed = 0, mg=False):\n",
+    "    \n",
+    "    t1 = perf_counter()\n",
+    "    # SSSP requires weighted graph\n",
+    "    if mg:\n",
+    "        _ = cugraph.dask.bfs(_G, seed)\n",
+    "    else:\n",
+    "        _ = cugraph.bfs(_G, seed)\n",
+    "    t2 = perf_counter() - t1\n",
+    "    return t2\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## MG Benchmark"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Initialize multi-GPU environment\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Setup a local dask cluster of workers, and a client\n",
+    "\n",
+    "cluster = LocalCUDACluster()\n",
+    "client = Client(cluster)\n",
+    "Comms.initialize(p2p=True)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Placeholders to collect execution run statistics "
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "\n",
+    "nx_algo_run_times = defaultdict(defaultdict)\n",
+    "cugraph_algo_run_times = defaultdict(defaultdict)\n",
+    "perf_algos = defaultdict(defaultdict)\n",
+    "perf = defaultdict(defaultdict)\n",
+    "cugraph_graph_creation_times = defaultdict()\n",
+    "nx_graph_creation_times = defaultdict()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Run NX and cuGraph algorithms for all datasets"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "for dataset, scale in data.items():\n",
+    "    \n",
+    "    # generate data\n",
+    "    print(\"------------------------------\")\n",
+    "    print(f'Creating Graph of Scale = {scale}')\n",
+    "    \n",
+    "    gdf = generate_data(scale, edgefactor=16, mg=True)\n",
+    "    gdf = gdf.repartition(gdf.npartitions * 3)\n",
+    "\n",
+    "    # Copy data to host to create NX graph\n",
+    "    pdf = pd.DataFrame(columns=['src', 'dst'])\n",
+    "    for part_idx in range(gdf.npartitions):\n",
+    "        computed_df = gdf.partitions[part_idx].compute().to_pandas()\n",
+    "        pdf = pd.concat([pdf, computed_df], ignore_index=True, sort=False)\n",
+    "\n",
+    "    print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n",
+    "    # create the graphs\n",
+    "    g_cu, tcu = create_cu_graph(gdf, mg=True)\n",
+    "    g_nx, tnx = create_nx_graph(pdf)\n",
+    "    cugraph_graph_creation_times[dataset] = tcu\n",
+    "    nx_graph_creation_times[dataset] = tnx\n",
+    "    del gdf, pdf\n",
+    "    # prep\n",
+    "    deg = g_cu.degree()\n",
+    "    deg_max = deg['degree'].max().compute()\n",
+    "    alpha = 1 / deg_max\n",
+    "    num_nodes = g_cu.number_of_vertices()\n",
+    "    del deg\n",
+    "    gc.collect()\n",
+    "\n",
+    "    #-- Katz \n",
+    "    algorithm = \"Katz\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_katz(g_nx, alpha)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_katz(g_cu, alpha, mg=True)\n",
+    "    print(\"\")\n",
+    "    \n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "    #-- BC\n",
+    "    algorithm = \"BC\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    k = 100\n",
+    "    if k > num_nodes:\n",
+    "        k = int(num_nodes)\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_bc(g_nx, k)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_bc(g_cu, k, mg=True)\n",
+    "    print(\" \")\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "    #-- Louvain\n",
+    "    algorithm = \"Louvain\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_louvain(g_nx)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_louvain(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "    \n",
+    "    #-- TC\n",
+    "    algorithm = \"TC\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_tc(g_nx)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_tc(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "    \n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "    #-- Core Number\n",
+    "    algorithm = \"Core Number\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_core_num(g_nx)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_core_num(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "\n",
+    "    #-- PageRank\n",
+    "    algorithm = \"PageRank\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_pagerank(g_nx)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_pagerank(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "\n",
+    "    #-- Jaccard\n",
+    "    algorithm = \"Jaccard\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_jaccard(g_nx)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_jaccard(g_cu, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "    # Seed for BFS and SSSP\n",
+    "    nx_seed = list(g_nx.nodes)[0]\n",
+    "    cu_seed = g_cu.nodes().compute().to_pandas().iloc[0]\n",
+    "\n",
+    "    #-- BFS\n",
+    "    algorithm = \"BFS\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_bfs(g_nx, nx_seed)\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_bfs(g_cu, seed = cu_seed, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    \n",
+    "\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "    #-- SSSP\n",
+    "    algorithm = \"SSSP\"\n",
+    "    print(f\"\\t{algorithm}  \", end = '')\n",
+    "    print(\"n.\", end='')\n",
+    "    tx = nx_sssp(g_nx, nx_seed)\n",
+    "\n",
+    "    print(\"c.\", end='')\n",
+    "    tc = cu_sssp(g_cu, seed = cu_seed, mg=True)\n",
+    "    print(\" \")\n",
+    "\n",
+    "    nx_algo_run_times[dataset][algorithm] = tx\n",
+    "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
+    "    perf_algos[dataset][algorithm] = tx/tc \n",
+    "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
+    "\n",
+    "    del g_cu, g_nx\n",
+    "    gc.collect()\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### cuGraph speedup of different algorithms w.r.t. NX"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX)------\\n\")\n",
+    "print(pd.DataFrame(perf))\n",
+    "print(\"\\n\\t------Speedup (cuGraph w.r.t. NX, excluding graph creation time)------\\n\")\n",
+    "print(pd.DataFrame(perf_algos))\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Nx and cuGraph execution times for different algorithms"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "\n",
+    "nx_and_cugraph_run_times = pd.DataFrame()\n",
+    "for dataset in cugraph_algo_run_times.keys():\n",
+    "    temp_df = pd.DataFrame({'NX': nx_algo_run_times[dataset], 'cuGraph': cugraph_algo_run_times[dataset]})\n",
+    "    columns = [(dataset, 'NX'), (dataset, 'cuGraph')]\n",
+    "    temp_df.columns = pd.MultiIndex.from_tuples(columns)\n",
+    "    nx_and_cugraph_run_times = pd.concat([temp_df, nx_and_cugraph_run_times], axis=1)\n",
+    "\n",
+    "print(\"\\n\\t------Nx and cuGraph execution times for different algorithms-----\\n\")\n",
+    "print(nx_and_cugraph_run_times)\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Clean up multi-GPU environment"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "Comms.destroy()\n",
+    "client.close()\n",
+    "cluster.close()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "___\n",
+    "Copyright (c) 2020-2023, NVIDIA CORPORATION.\n",
+    "\n",
+    "Licensed under the Apache License, Version 2.0 (the \"License\");  you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0\n",
+    "\n",
+    "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n",
+    "___"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "cudfdev",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  },
+  "vscode": {
+   "interpreter": {
+    "hash": "587ff963ecd34554a9da41c94362e2baa062d9a57502e220f049e10816826984"
+   }
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}

From 6273101f45e632dc6848257082ca62889f5c8236 Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Fri, 9 Feb 2024 04:29:27 +0100
Subject: [PATCH 5/8] Remove redundant computation, and a few debug statements

---
 .../synth_release_single_node_multi_gpu.ipynb | 22 ++++++-------------
 1 file changed, 7 insertions(+), 15 deletions(-)

diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
index 1796bc489d3..727bcd84efd 100644
--- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
@@ -40,7 +40,8 @@
     "| Author        |    Date    |  Update             | cuGraph Version |  Test Hardware         |\n",
     "| --------------|------------|---------------------|-----------------|------------------------|\n",
     "| Don Acosta    | 1/12/2023  | Created             | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
-    "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n"
+    "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
+    "| Naim, Md      | 2/08/2024  | Modified            | 24.04 nightly   | RTX A6000, CUDA 12.0   |\n"
    ]
   },
   {
@@ -124,21 +125,18 @@
     "import gc\n",
     "import os\n",
     "from time import perf_counter\n",
-    "import numpy as np\n",
-    "import math\n",
     "import pandas as pd\n",
     "from collections import defaultdict\n",
     "\n",
     "# rapids\n",
     "import cugraph\n",
-    "import cudf\n",
     "\n",
     "# to parallelize with dask\n",
     "import dask_cudf\n",
     "from cugraph.dask.common.mg_utils import get_visible_devices\n",
     "\n",
     "# liblibraries to setup dask cluster and client\n",
-    "from dask.distributed import Client, wait\n",
+    "from dask.distributed import Client\n",
     "from dask_cuda import LocalCUDACluster\n",
     "from cugraph.dask.comms import comms as Comms\n",
     "\n",
@@ -308,10 +306,7 @@
     "    _g = cugraph.Graph(directed=directed)\n",
     "\n",
     "    if mg:\n",
-    "        # Set the number of partition to #GPUs\n",
-    "        npartitions = 3 * len(get_visible_devices())\n",
-    "        _ddf = dask_cudf.from_cudf(_df.compute(), npartitions=npartitions)\n",
-    "        _g.from_dask_cudf_edgelist(_ddf, source=\"src\", destination=\"dst\", edge_attr=None)\n",
+    "        _g.from_dask_cudf_edgelist(_df, source=\"src\", destination=\"dst\", edge_attr=None)\n",
     "    else:\n",
     "        _g.from_cudf_edgelist(_df,\n",
     "                            source='src',\n",
@@ -695,12 +690,14 @@
     "        pdf = pd.concat([pdf, computed_df], ignore_index=True, sort=False)\n",
     "\n",
     "    print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n",
-    "    # create the graphs\n",
+    "    \n",
+    "    # create cuGraph and NX graphs\n",
     "    g_cu, tcu = create_cu_graph(gdf, mg=True)\n",
     "    g_nx, tnx = create_nx_graph(pdf)\n",
     "    cugraph_graph_creation_times[dataset] = tcu\n",
     "    nx_graph_creation_times[dataset] = tnx\n",
     "    del gdf, pdf\n",
+    "\n",
     "    # prep\n",
     "    deg = g_cu.degree()\n",
     "    deg_max = deg['degree'].max().compute()\n",
@@ -753,7 +750,6 @@
     "    perf_algos[dataset][algorithm] = tx/tc \n",
     "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
-    "    \n",
     "    #-- TC\n",
     "    algorithm = \"TC\"\n",
     "    print(f\"\\t{algorithm}  \", end = '')\n",
@@ -782,7 +778,6 @@
     "    perf_algos[dataset][algorithm] = tx/tc \n",
     "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
-    "\n",
     "    #-- PageRank\n",
     "    algorithm = \"PageRank\"\n",
     "    print(f\"\\t{algorithm}  \", end = '')\n",
@@ -797,7 +792,6 @@
     "    perf_algos[dataset][algorithm] = tx/tc \n",
     "    perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n",
     "\n",
-    "\n",
     "    #-- Jaccard\n",
     "    algorithm = \"Jaccard\"\n",
     "    print(f\"\\t{algorithm}  \", end = '')\n",
@@ -826,8 +820,6 @@
     "    tc = cu_bfs(g_cu, seed = cu_seed, mg=True)\n",
     "    print(\" \")\n",
     "\n",
-    "    \n",
-    "\n",
     "    nx_algo_run_times[dataset][algorithm] = tx\n",
     "    cugraph_algo_run_times[dataset][algorithm] = tc\n",
     "    perf_algos[dataset][algorithm] = tx/tc \n",

From 6796abe232cdfe8315b6c85dad21a4804ac16b3a Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Fri, 9 Feb 2024 04:33:32 +0100
Subject: [PATCH 6/8] Minor update to markup

---
 .../synth_release_single_node_multi_gpu.ipynb                 | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
index 727bcd84efd..427cabece6b 100644
--- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
@@ -21,7 +21,7 @@
     "tags": []
    },
    "source": [
-    "# Comparing NetworkX vs cuGraph using synthetic data on various algorithms on single node multi GPU cluster\n",
+    "# Comparing NetworkX vs cuGraph using synthetic data on various algorithms on single node multi GPU (SNMG) cluster\n",
     "\n",
     "\n",
     "This notebook compares the execution times of many of the cuGraph and NetworkX algorithms when run against identical synthetic data at multiple scales.\n",
@@ -41,7 +41,7 @@
     "| --------------|------------|---------------------|-----------------|------------------------|\n",
     "| Don Acosta    | 1/12/2023  | Created             | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
     "| Brad Rees     | 1/27/2023  | Modified            | 23.02 nightly   | RTX A6000, CUDA 11.7   |\n",
-    "| Naim, Md      | 2/08/2024  | Modified            | 24.04 nightly   | RTX A6000, CUDA 12.0   |\n"
+    "| Naim, Md      | 2/08/2024  | Modified for SNMG   | 24.04 nightly   | RTX A6000, CUDA 12.0   |\n"
    ]
   },
   {

From f3645f326898955985fecac9d967371a81fce67d Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Fri, 9 Feb 2024 04:47:58 +0100
Subject: [PATCH 7/8] Remove unused imports

---
 .../synth_release_single_node_multi_gpu.ipynb               | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
index 427cabece6b..1e371ce4bca 100644
--- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
@@ -131,10 +131,6 @@
     "# rapids\n",
     "import cugraph\n",
     "\n",
-    "# to parallelize with dask\n",
-    "import dask_cudf\n",
-    "from cugraph.dask.common.mg_utils import get_visible_devices\n",
-    "\n",
     "# liblibraries to setup dask cluster and client\n",
     "from dask.distributed import Client\n",
     "from dask_cuda import LocalCUDACluster\n",
@@ -880,7 +876,7 @@
     "nx_and_cugraph_run_times = pd.DataFrame()\n",
     "for dataset in cugraph_algo_run_times.keys():\n",
     "    temp_df = pd.DataFrame({'NX': nx_algo_run_times[dataset], 'cuGraph': cugraph_algo_run_times[dataset]})\n",
-    "    columns = [(dataset, 'NX'), (dataset, 'cuGraph')]\n",
+    "    columns = [(dataset, 'cuGraph'), (dataset, 'NX')]\n",
     "    temp_df.columns = pd.MultiIndex.from_tuples(columns)\n",
     "    nx_and_cugraph_run_times = pd.concat([temp_df, nx_and_cugraph_run_times], axis=1)\n",
     "\n",

From 73ed7d2acff8a62e9c1bfaf22e354d8d5d60e142 Mon Sep 17 00:00:00 2001
From: Naim <naim@uib.no>
Date: Fri, 9 Feb 2024 17:04:55 +0100
Subject: [PATCH 8/8] Add descriptions on local dask clsuter

---
 .../synth_release_single_node_multi_gpu.ipynb               | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
index 1e371ce4bca..c44f475c441 100644
--- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
+++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb
@@ -617,7 +617,8 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "### Initialize multi-GPU environment\n"
+    "### Initialize multi-GPU environment\n",
+    "Before we get started, we need to set up a dask (local) cluster of workers to execute our work, and a client to coordinate and schedule work for that cluster.\n"
    ]
   },
   {
@@ -627,7 +628,6 @@
    "outputs": [],
    "source": [
     "# Setup a local dask cluster of workers, and a client\n",
-    "\n",
     "cluster = LocalCUDACluster()\n",
     "client = Client(cluster)\n",
     "Comms.initialize(p2p=True)"
@@ -937,7 +937,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.10.13"
+   "version": "3.1.0"
   },
   "vscode": {
    "interpreter": {