From 6273101f45e632dc6848257082ca62889f5c8236 Mon Sep 17 00:00:00 2001 From: Naim Date: Fri, 9 Feb 2024 04:29:27 +0100 Subject: [PATCH] Remove redundant computation, and a few debug statements --- .../synth_release_single_node_multi_gpu.ipynb | 22 ++++++------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb index 1796bc489d3..727bcd84efd 100644 --- a/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb +++ b/notebooks/cugraph_benchmarks/synth_release_single_node_multi_gpu.ipynb @@ -40,7 +40,8 @@ "| Author | Date | Update | cuGraph Version | Test Hardware |\n", "| --------------|------------|---------------------|-----------------|------------------------|\n", "| Don Acosta | 1/12/2023 | Created | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", - "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n" + "| Brad Rees | 1/27/2023 | Modified | 23.02 nightly | RTX A6000, CUDA 11.7 |\n", + "| Naim, Md | 2/08/2024 | Modified | 24.04 nightly | RTX A6000, CUDA 12.0 |\n" ] }, { @@ -124,21 +125,18 @@ "import gc\n", "import os\n", "from time import perf_counter\n", - "import numpy as np\n", - "import math\n", "import pandas as pd\n", "from collections import defaultdict\n", "\n", "# rapids\n", "import cugraph\n", - "import cudf\n", "\n", "# to parallelize with dask\n", "import dask_cudf\n", "from cugraph.dask.common.mg_utils import get_visible_devices\n", "\n", "# liblibraries to setup dask cluster and client\n", - "from dask.distributed import Client, wait\n", + "from dask.distributed import Client\n", "from dask_cuda import LocalCUDACluster\n", "from cugraph.dask.comms import comms as Comms\n", "\n", @@ -308,10 +306,7 @@ " _g = cugraph.Graph(directed=directed)\n", "\n", " if mg:\n", - " # Set the number of partition to #GPUs\n", - " npartitions = 3 * len(get_visible_devices())\n", - " _ddf = dask_cudf.from_cudf(_df.compute(), npartitions=npartitions)\n", - " _g.from_dask_cudf_edgelist(_ddf, source=\"src\", destination=\"dst\", edge_attr=None)\n", + " _g.from_dask_cudf_edgelist(_df, source=\"src\", destination=\"dst\", edge_attr=None)\n", " else:\n", " _g.from_cudf_edgelist(_df,\n", " source='src',\n", @@ -695,12 +690,14 @@ " pdf = pd.concat([pdf, computed_df], ignore_index=True, sort=False)\n", "\n", " print(f\"\\tdata in gdf {len(gdf)} and data in pandas {len(pdf)}\")\n", - " # create the graphs\n", + " \n", + " # create cuGraph and NX graphs\n", " g_cu, tcu = create_cu_graph(gdf, mg=True)\n", " g_nx, tnx = create_nx_graph(pdf)\n", " cugraph_graph_creation_times[dataset] = tcu\n", " nx_graph_creation_times[dataset] = tnx\n", " del gdf, pdf\n", + "\n", " # prep\n", " deg = g_cu.degree()\n", " deg_max = deg['degree'].max().compute()\n", @@ -753,7 +750,6 @@ " perf_algos[dataset][algorithm] = tx/tc \n", " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", - " \n", " #-- TC\n", " algorithm = \"TC\"\n", " print(f\"\\t{algorithm} \", end = '')\n", @@ -782,7 +778,6 @@ " perf_algos[dataset][algorithm] = tx/tc \n", " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", - "\n", " #-- PageRank\n", " algorithm = \"PageRank\"\n", " print(f\"\\t{algorithm} \", end = '')\n", @@ -797,7 +792,6 @@ " perf_algos[dataset][algorithm] = tx/tc \n", " perf[dataset][algorithm] = (tx + tnx) / (tc + tcu)\n", "\n", - "\n", " #-- Jaccard\n", " algorithm = \"Jaccard\"\n", " print(f\"\\t{algorithm} \", end = '')\n", @@ -826,8 +820,6 @@ " tc = cu_bfs(g_cu, seed = cu_seed, mg=True)\n", " print(\" \")\n", "\n", - " \n", - "\n", " nx_algo_run_times[dataset][algorithm] = tx\n", " cugraph_algo_run_times[dataset][algorithm] = tc\n", " perf_algos[dataset][algorithm] = tx/tc \n",