From 4ee227c7c84ef487828ecadd5fe86934f1fce4eb Mon Sep 17 00:00:00 2001
From: Joseph Nke <76006812+jnke2016@users.noreply.github.com>
Date: Thu, 7 Sep 2023 16:39:22 -0500
Subject: [PATCH 1/3] Remove the assumption made on the client data's keys
 (#3835)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When calling `client.has_what(`) which returns the data's key that are held in each worker’s memory, those keys used to be returned as string but a recent change in `dask` changed the type to tuples
 
From `{worker_ip_address: ("('from-delayed-190587f1b2318dc54d5f92a79e59b71a', 0)", "('from-delayed-190587f1b2318dc54d5f92a79e59b71a', 1)")}` to`{worker_ip_address: (('from-delayed-c3d92b2cc9948634e82a0b2b62453a6c', 0), ('from-delayed-c3d92b2cc9948634e82a0b2b62453a6c', 1))}`
 
When mapping workers to persisted data in the function `get_persisted_df_worker_map`, an assumption about the type of those keys was made thereby breaking our MG tests.

This PR removes that assumption.
Closes #3834

Authors:
  - Joseph Nke (https://github.com/jnke2016)
  - Alex Barghi (https://github.com/alexbarghi-nv)

Approvers:
  - Alex Barghi (https://github.com/alexbarghi-nv)

URL: https://github.com/rapidsai/cugraph/pull/3835
---
 .../cugraph/cugraph/dask/common/part_utils.py | 20 +++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

diff --git a/python/cugraph/cugraph/dask/common/part_utils.py b/python/cugraph/cugraph/dask/common/part_utils.py
index fda7e257367..7c0aad6c3ee 100644
--- a/python/cugraph/cugraph/dask/common/part_utils.py
+++ b/python/cugraph/cugraph/dask/common/part_utils.py
@@ -73,7 +73,7 @@ def persist_distributed_data(dask_df, client):
     _keys = dask_df.__dask_keys__()
     worker_dict = {}
     for i, key in enumerate(_keys):
-        worker_dict[str(key)] = tuple([worker_addresses[i]])
+        worker_dict[key] = tuple([worker_addresses[i]])
     persisted = client.persist(dask_df, workers=worker_dict)
     parts = futures_of(persisted)
     return parts
@@ -89,7 +89,7 @@ def get_persisted_df_worker_map(dask_df, client):
     ddf_keys = futures_of(dask_df)
     output_map = {}
     for w, w_keys in client.has_what().items():
-        output_map[w] = [ddf_k for ddf_k in ddf_keys if str(ddf_k.key) in w_keys]
+        output_map[w] = [ddf_k for ddf_k in ddf_keys if ddf_k.key in w_keys]
         if len(output_map[w]) == 0:
             output_map[w] = _create_empty_dask_df_future(dask_df._meta, client, w)
     return output_map
@@ -157,7 +157,7 @@ async def _extract_partitions(
         # NOTE: We colocate (X, y) here by zipping delayed
         # n partitions of them as (X1, y1), (X2, y2)...
         # and asking client to compute a single future for
-        # each tuple in the list
+        # each tuple in the list.
         dela = [np.asarray(d.to_delayed()) for d in dask_obj]
 
         # TODO: ravel() is causing strange behavior w/ delayed Arrays which are
@@ -167,7 +167,7 @@ async def _extract_partitions(
         parts = client.compute([p for p in zip(*raveled)])
 
     await wait(parts)
-    key_to_part = [(str(part.key), part) for part in parts]
+    key_to_part = [(part.key, part) for part in parts]
     who_has = await client.who_has(parts)
     return [(first(who_has[key]), part) for key, part in key_to_part]
 
@@ -229,7 +229,7 @@ def load_balance_func(ddf_, by, client=None):
     wait(parts)
 
     who_has = client.who_has(parts)
-    key_to_part = [(str(part.key), part) for part in parts]
+    key_to_part = [(part.key, part) for part in parts]
     gpu_fututres = [
         (first(who_has[key]), part.key[1], part) for key, part in key_to_part
     ]
@@ -245,7 +245,7 @@ def load_balance_func(ddf_, by, client=None):
     for cumsum in cumsum_parts:
         num_rows.append(cumsum.iloc[-1])
 
-    # Calculate current partition divisions
+    # Calculate current partition divisions.
     divisions = [sum(num_rows[0:x:1]) for x in range(0, len(num_rows) + 1)]
     divisions[-1] = divisions[-1] - 1
     divisions = tuple(divisions)
@@ -271,7 +271,7 @@ def load_balance_func(ddf_, by, client=None):
 
 def concat_dfs(df_list):
     """
-    Concat a list of cudf dataframes
+    Concat a list of cudf dataframes.
     """
     return cudf.concat(df_list)
 
@@ -279,17 +279,17 @@ def concat_dfs(df_list):
 def get_delayed_dict(ddf):
     """
     Returns a dicitionary with the dataframe tasks as keys and
-    the dataframe delayed objects as values
+    the dataframe delayed objects as values.
     """
     df_delayed = {}
     for delayed_obj in ddf.to_delayed():
-        df_delayed[str(delayed_obj.key)] = delayed_obj
+        df_delayed[delayed_obj.key] = delayed_obj
     return df_delayed
 
 
 def concat_within_workers(client, ddf):
     """
-    Concats all partitions within workers without transfers
+    Concats all partitions within workers without transfers.
     """
     df_delayed = get_delayed_dict(ddf)
 

From 6779e896edf310f5bcaad5acb8673995041c2801 Mon Sep 17 00:00:00 2001
From: ralph <137829296+nv-rliu@users.noreply.github.com>
Date: Fri, 8 Sep 2023 09:58:44 -0400
Subject: [PATCH 2/3] Adding metadata getter methods to datasets API (#3821)

Closes #3820

This PR adds simple getter methods to the `dataset` class, which allows users to easily get information about datasets without need to access the `metadata` dict or look in the directory.

```python
from cugraph.datasets import karate

# users now call
karate.number_of_nodes()

# instead of
karate.metadata['number_of_nodes']
```

Authors:
  - ralph (https://github.com/nv-rliu)

Approvers:
  - Alex Barghi (https://github.com/alexbarghi-nv)

URL: https://github.com/rapidsai/cugraph/pull/3821
---
 python/cugraph/cugraph/datasets/dataset.py    | 36 +++++++++++++++++++
 .../cugraph/tests/utils/test_dataset.py       | 10 ++++++
 2 files changed, 46 insertions(+)

diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py
index b276a87b88e..877eade7708 100644
--- a/python/cugraph/cugraph/datasets/dataset.py
+++ b/python/cugraph/cugraph/datasets/dataset.py
@@ -266,6 +266,42 @@ def get_path(self):
 
         return self._path.absolute()
 
+    def is_directed(self):
+        """
+        Returns True if the graph is a directed graph.
+        """
+        return self.metadata["is_directed"]
+
+    def is_multigraph(self):
+        """
+        Returns True if the graph is a multigraph.
+        """
+        return self.metadata["is_multigraph"]
+
+    def is_symmetric(self):
+        """
+        Returns True if the graph is symmetric.
+        """
+        return self.metadata["is_symmetric"]
+
+    def number_of_nodes(self):
+        """
+        An alias of number_of_vertices()
+        """
+        return self.number_of_vertices()
+
+    def number_of_vertices(self):
+        """
+        Get the number of vertices in the graph.
+        """
+        return self.metadata["number_of_nodes"]
+
+    def number_of_edges(self):
+        """
+        Get the number of edges in the graph.
+        """
+        return self.metadata["number_of_edges"]
+
 
 def download_all(force=False):
     """
diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py
index 643d0468d46..c2a4f7c6072 100644
--- a/python/cugraph/cugraph/tests/utils/test_dataset.py
+++ b/python/cugraph/cugraph/tests/utils/test_dataset.py
@@ -328,6 +328,16 @@ def test_is_multigraph(dataset):
     assert G.is_multigraph() == dataset.metadata["is_multigraph"]
 
 
+@pytest.mark.parametrize("dataset", ALL_DATASETS)
+def test_object_getters(dataset):
+    assert dataset.is_directed() == dataset.metadata["is_directed"]
+    assert dataset.is_multigraph() == dataset.metadata["is_multigraph"]
+    assert dataset.is_symmetric() == dataset.metadata["is_symmetric"]
+    assert dataset.number_of_nodes() == dataset.metadata["number_of_nodes"]
+    assert dataset.number_of_vertices() == dataset.metadata["number_of_nodes"]
+    assert dataset.number_of_edges() == dataset.metadata["number_of_edges"]
+
+
 #
 # Test experimental for DeprecationWarnings
 #

From 17b34479094e42e1401d0e5354d8da98672ba291 Mon Sep 17 00:00:00 2001
From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com>
Date: Fri, 8 Sep 2023 13:38:22 -0500
Subject: [PATCH 3/3] Uses `conda mambabuild` rather than `mamba mambabuild`
 (#3853)

Applies same changes for the same reasons as cuDF PR https://github.com/rapidsai/cudf/pull/14067 to cuGraph.

Authors:
  - Rick Ratzel (https://github.com/rlratzel)

Approvers:
  - Ray Douglass (https://github.com/raydouglass)

URL: https://github.com/rapidsai/cugraph/pull/3853
---
 ci/build_cpp.sh    |  2 +-
 ci/build_python.sh | 12 ++++++------
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh
index 3fd57f24c40..3fb72cac08b 100755
--- a/ci/build_cpp.sh
+++ b/ci/build_cpp.sh
@@ -11,6 +11,6 @@ rapids-print-env
 
 rapids-logger "Begin cpp build"
 
-rapids-mamba-retry mambabuild conda/recipes/libcugraph
+rapids-conda-retry mambabuild conda/recipes/libcugraph
 
 rapids-upload-conda-to-s3 cpp
diff --git a/ci/build_python.sh b/ci/build_python.sh
index 429ba649d1d..62eb6c2ccec 100755
--- a/ci/build_python.sh
+++ b/ci/build_python.sh
@@ -15,12 +15,12 @@ rapids-logger "Begin py build"
 
 # TODO: Remove `--no-test` flags once importing on a CPU
 # node works correctly
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   conda/recipes/pylibcugraph
 
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
@@ -30,7 +30,7 @@ rapids-mamba-retry mambabuild \
 # platform to ensure it is included in each set of artifacts, since test
 # scripts only install from one set of artifacts based on the CUDA version used
 # for the test run.
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
@@ -40,7 +40,7 @@ rapids-mamba-retry mambabuild \
 # built on each CUDA platform to ensure they are included in each set of
 # artifacts, since test scripts only install from one set of artifacts based on
 # the CUDA version used for the test run.
-rapids-mamba-retry mambabuild \
+rapids-conda-retry mambabuild \
   --no-test \
   --channel "${CPP_CHANNEL}" \
   --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
@@ -50,7 +50,7 @@ RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}"
 
 if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
   # Only CUDA 11 is supported right now due to PyTorch requirement.
-  rapids-mamba-retry mambabuild \
+  rapids-conda-retry mambabuild \
     --no-test \
     --channel "${CPP_CHANNEL}" \
     --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \
@@ -60,7 +60,7 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then
     conda/recipes/cugraph-pyg
 
   # Only CUDA 11 is supported right now due to PyTorch requirement.
-  rapids-mamba-retry mambabuild \
+  rapids-conda-retry mambabuild \
     --no-test \
     --channel "${CPP_CHANNEL}" \
     --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \