Merge remote-tracking branch 'upstream/branch-23.10' into branch-23.1…

…0_refactor-legacy-k-truss
rapidsai · Sep 29, 2023 · 8f18b43 · 8f18b43
2 parents 4127838 + 6e5e066
commit 8f18b43
Show file tree

Hide file tree

Showing 28 changed files with 400 additions and 89 deletions.
diff --git a/ci/release/update-version.sh b/ci/release/update-version.sh
@@ -62,6 +62,7 @@ sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugr
 sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/cugraph-service/server/cugraph_service_server/__init__.py
 sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/pylibcugraph/pylibcugraph/__init__.py
 sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/nx_cugraph/__init__.py
+sed_runner "s/__version__ = .*/__version__ = \"${NEXT_FULL_TAG}\"/g" python/nx-cugraph/_nx_cugraph/__init__.py
 
 # Python pyproject.toml updates
 sed_runner "s/^version = .*/version = \"${NEXT_FULL_TAG}\"/g" python/cugraph/pyproject.toml

diff --git a/ci/test_wheel_cugraph.sh b/ci/test_wheel_cugraph.sh
@@ -9,6 +9,6 @@ RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-whe
 python -m pip install --no-deps ./local-pylibcugraph-dep/pylibcugraph*.whl
 
 # Always install latest dask for testing
-python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main
+python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2
 
 ./ci/test_wheel.sh cugraph python/cugraph
diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml
@@ -19,11 +19,11 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
 - dask-cuda==23.10.*
 - dask-cudf==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
 - doxygen
 - fsspec>=0.6.0
 - gcc_linux-64=11.*

diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml
@@ -19,11 +19,11 @@ dependencies:
 - cupy>=12.0.0
 - cxx-compiler
 - cython>=3.0.0
-- dask-core>=2023.7.1
+- dask-core==2023.9.2
 - dask-cuda==23.10.*
 - dask-cudf==23.10.*
-- dask>=2023.7.1
-- distributed>=2023.7.1
+- dask==2023.9.2
+- distributed==2023.9.2
 - doxygen
 - fsspec>=0.6.0
 - gcc_linux-64=11.*

diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml
@@ -26,7 +26,7 @@ requirements:
     - python
     - scikit-build >=0.13.1
   run:
-    - distributed >=2023.7.1
+    - distributed ==2023.9.2
     - numba >=0.57
     - numpy >=1.21
     - python

diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml
@@ -59,7 +59,7 @@ outputs:
         - cupy >=12.0.0
         - dask-cuda ={{ minor_version }}
         - dask-cudf ={{ minor_version }}
-        - distributed >=2023.7.1
+        - distributed ==2023.9.2
         - numba >=0.57
         - numpy >=1.21
         - python

diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml
@@ -76,9 +76,9 @@ requirements:
     - cupy >=12.0.0
     - dask-cuda ={{ minor_version }}
     - dask-cudf ={{ minor_version }}
-    - dask >=2023.7.1
-    - dask-core >=2023.7.1
-    - distributed >=2023.7.1
+    - dask ==2023.9.2
+    - dask-core ==2023.9.2
+    - distributed ==2023.9.2
     - fsspec>=0.6.0
     - libcugraph ={{ version }}
     - pylibcugraph ={{ version }}

diff --git a/dependencies.yaml b/dependencies.yaml
@@ -373,15 +373,15 @@ dependencies:
     common:
       - output_types: [conda, pyproject]
         packages:
-          - &dask dask>=2023.7.1
-          - &distributed distributed>=2023.7.1
+          - &dask dask==2023.9.2
+          - &distributed distributed==2023.9.2
           - &dask_cuda dask-cuda==23.10.*
           - &numba numba>=0.57
           - &ucx_py ucx-py==0.34.*
       - output_types: conda
         packages:
           - aiohttp
-          - &dask-core_conda dask-core>=2023.7.1
+          - &dask-core_conda dask-core==2023.9.2
           - fsspec>=0.6.0
           - libcudf==23.10.*
           - requests

diff --git a/docs/cugraph/source/installation/source_build.md b/docs/cugraph/source/installation/source_build.md
@@ -6,10 +6,10 @@ The cuGraph package include both a C/C++ CUDA portion and a python portion.  Bot
 
 ## Prerequisites
 
-__Compiler__:
-* `gcc`         version 9.3+
-* `nvcc`        version 11.0+
-* `cmake`       version 3.20.1+
+__Compiler:__
+* `gcc`           version 9.3+
+* `nvcc`          version 11.0+
+* `cmake`         version 3.20.1+
 
 __CUDA:__
 * CUDA 11.0+
@@ -18,6 +18,11 @@ __CUDA:__
 
 You can obtain CUDA from [https://developer.nvidia.com/cuda-downloads](https://developer.nvidia.com/cuda-downloads).
 
+__Packages:__
+* `cmake`         version 3.20.1+
+* `libcugraphops` (version matching source branch version, eg. `23.10`)
+
+You can obtain `libcugraphops` using `conda`/`mamba` from the `nvidia` channel, or using `pip` with the `--extra-index-url=https://pypi.nvidia.com` option.  See the [RAPIDS docs](https://docs.rapids.ai/install#environment) for more details.
 
 ## Building cuGraph
 To install cuGraph from source, ensure the dependencies are met.

diff --git a/python/cugraph-service/server/pyproject.toml b/python/cugraph-service/server/pyproject.toml
@@ -25,8 +25,8 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "dask-cuda==23.10.*",
     "dask-cudf==23.10.*",
-    "dask>=2023.7.1",
-    "distributed>=2023.7.1",
+    "dask==2023.9.2",
+    "distributed==2023.9.2",
     "numba>=0.57",
     "numpy>=1.21",
     "rmm==23.10.*",

diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py
@@ -11,7 +11,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+from typing import Union, Tuple
+from cugraph.structure import Graph
 from cugraph.utilities import (
+    is_nx_graph_type,
     ensure_cugraph_obj_for_nx,
     df_score_to_dictionary,
 )
@@ -21,9 +24,26 @@
 from pylibcugraph import louvain as pylibcugraph_louvain
 from pylibcugraph import ResourceHandle
 
+from cugraph.utilities.utils import import_optional
+
+# FIXME: the networkx.Graph type used in type annotations is specified
+# using a string literal to avoid depending on and importing networkx.
+# Instead, networkx is imported optionally, which may cause a problem
+# for a type checker if run in an environment where networkx is not installed.
+networkx = import_optional("networkx")
+
+VERTEX_COL_NAME = "vertex"
+CLUSTER_ID_COL_NAME = "partition"
+
 
 # FIXME: max_level should default to 100 once max_iter is removed
-def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
+def louvain(
+    G: Union[Graph, "networkx.Graph"],
+    max_level: Union[int, None] = None,
+    max_iter: Union[int, None] = None,
+    resolution: float = 1.0,
+    threshold: float = 1e-7,
+) -> Tuple[Union[cudf.DataFrame, dict], float]:
     """
     Compute the modularity optimizing partition of the input graph using the
     Louvain method
@@ -48,6 +68,9 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
         than the specified number of levels. No error occurs when the
         algorithm terminates early in this manner.
 
+        If max_level > 500, it will be set to 500 and a warning is emitted
+        in order to prevent excessive runtime.
+
     max_iter : integer, optional (default=None)
         This parameter is deprecated in favor of max_level.  Previously
         it was used to control the maximum number of levels of the Louvain
@@ -68,18 +91,21 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
 
     Returns
     -------
-    parts : cudf.DataFrame
-        GPU data frame of size V containing two columns the vertex id and the
-        partition id it is assigned to.
+    result: cudf.DataFrame or dict
+        If input graph G is of type cugraph.Graph, a GPU dataframe
+        with two columns.
+
+            result[VERTEX_COL_NAME] : cudf.Series
+                Contains the vertex identifiers
+            result[CLUSTER_ID_COL_NAME] : cudf.Series
+                Contains the partition assigned to the vertices
 
-        df['vertex'] : cudf.Series
-            Contains the vertex identifiers
-        df['partition'] : cudf.Series
-            Contains the partition assigned to the vertices
+        If input graph G is of type networkx.Graph, a dict
+        Dictionary of vertices and their partition ids.
 
     modularity_score : float
-        a floating point number containing the global modularity score of the
-        partitioning.
+        A floating point number containing the global modularity score
+        of the partitioning.
 
     Examples
     --------
@@ -89,6 +115,17 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
 
     """
 
+    # FIXME: Onece the graph construction calls support isolated vertices through
+    #  the C API (the C++ interface already supports this) then there will be
+    # no need to compute isolated vertices here.
+
+    isolated_vertices = list()
+    if is_nx_graph_type(type(G)):
+        isolated_vertices = [v for v in range(G.number_of_nodes()) if G.degree[v] == 0]
+    else:
+        # FIXME: Gather isolated vertices of G
+        pass
+
     G, isNx = ensure_cugraph_obj_for_nx(G)
 
     if G.is_directed():
@@ -112,7 +149,12 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
     if max_level is None:
         max_level = 100
 
-    vertex, partition, mod_score = pylibcugraph_louvain(
+    if max_level > 500:
+        w_msg = "max_level is set too high, clamping it down to 500."
+        warnings.warn(w_msg)
+        max_level = 500
+
+    vertex, partition, modularity_score = pylibcugraph_louvain(
         resource_handle=ResourceHandle(),
         graph=G._plc_graph,
         max_level=max_level,
@@ -121,14 +163,27 @@ def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7):
         do_expensive_check=False,
     )
 
-    df = cudf.DataFrame()
-    df["vertex"] = vertex
-    df["partition"] = partition
+    result = cudf.DataFrame()
+    result[VERTEX_COL_NAME] = vertex
+    result[CLUSTER_ID_COL_NAME] = partition
+
+    if len(isolated_vertices) > 0:
+        unique_cids = result[CLUSTER_ID_COL_NAME].unique()
+        max_cluster_id = -1 if len(result) == 0 else unique_cids.max()
+
+        isolated_vtx_and_cids = cudf.DataFrame()
+        isolated_vtx_and_cids[VERTEX_COL_NAME] = isolated_vertices
+        isolated_vtx_and_cids[CLUSTER_ID_COL_NAME] = [
+            (max_cluster_id + i + 1) for i in range(len(isolated_vertices))
+        ]
+        result = cudf.concat(
+            [result, isolated_vtx_and_cids], ignore_index=True, sort=False
+        )
 
-    if G.renumbered:
-        df = G.unrenumber(df, "vertex")
+    if G.renumbered and len(G.input_df) > 0:
+        result = G.unrenumber(result, VERTEX_COL_NAME)
 
     if isNx is True:
-        df = df_score_to_dictionary(df, "partition")
+        result = df_score_to_dictionary(result, CLUSTER_ID_COL_NAME)
 
-    return df, mod_score
+    return result, modularity_score
diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py
@@ -14,7 +14,6 @@
 import gc
 from typing import Union
 import warnings
-import random
 
 import cudf
 import cupy as cp
@@ -182,10 +181,7 @@ def __from_edgelist(
         workers = _client.scheduler_info()["workers"]
         # Repartition to 2 partitions per GPU for memory efficient process
         input_ddf = input_ddf.repartition(npartitions=len(workers) * 2)
-        # FIXME: Make a copy of the input ddf before implicitly altering it.
-        input_ddf = input_ddf.map_partitions(
-            lambda df: df.copy(), token="custom-" + str(random.random())
-        )
+        input_ddf = input_ddf.map_partitions(lambda df: df.copy())
         # The dataframe will be symmetrized iff the graph is undirected
         # otherwise, the inital dataframe will be returned
         if edge_attr is not None:
@@ -337,7 +333,7 @@ def __from_edgelist(
             )
             for w, edata in ddf.items()
         }
-        del ddf
+        # FIXME: For now, don't delete the copied dataframe to avoid crash
         self._plc_graph = {
             w: _client.compute(delayed_task, workers=w, allow_other_workers=False)
             for w, delayed_task in delayed_tasks_d.items()
@@ -1196,7 +1192,5 @@ def _get_column_from_ls_dfs(lst_df, col_name):
     if len_df == 0:
         return lst_df[0][col_name]
     output_col = cudf.concat([df[col_name] for df in lst_df], ignore_index=True)
-    for df in lst_df:
-        df.drop(columns=[col_name], inplace=True)
-    gc.collect()
+    # FIXME: For now, don't delete the copied dataframe to avoid cras
     return output_col
diff --git a/python/cugraph/cugraph/tests/community/test_louvain.py b/python/cugraph/cugraph/tests/community/test_louvain.py
@@ -142,3 +142,19 @@ def test_louvain_csr_graph(is_weighted):
 
     assert len(parition_diffs) == 0
     assert mod_csr == mod_coo
+
+
+@pytest.mark.sg
+def test_louvain_nx_graph_with_isolated_nodes():
+    # Cluster IDs are expected to unique if all nodes are isolated
+    G = nx.Graph()
+    G.add_nodes_from(range(5))
+    result, _ = cugraph.louvain(G)
+    assert set(result.keys()) == set(G.nodes)
+    assert len(set(result.values())) == G.number_of_nodes()
+
+    # A graph with 5 nodes, where 3 of the nodes are isolated
+    G.add_edge(1, 2)
+    result, _ = cugraph.louvain(G)
+    assert set(result.keys()) == set(G.nodes)
+    assert len(set(result.values())) == G.number_of_nodes() - 1
diff --git a/python/cugraph/cugraph/utilities/utils.py b/python/cugraph/cugraph/utilities/utils.py
@@ -364,8 +364,8 @@ def is_matrix_type(m):
     return is_cp_matrix_type(m) or is_sp_matrix_type(m)
 
 
-def is_nx_graph_type(g):
-    return g in __nx_graph_types
+def is_nx_graph_type(graph_type):
+    return graph_type in __nx_graph_types
 
 
 def is_cugraph_graph_type(g):

diff --git a/python/cugraph/pyproject.toml b/python/cugraph/pyproject.toml
@@ -33,8 +33,8 @@ dependencies = [
     "cupy-cuda11x>=12.0.0",
     "dask-cuda==23.10.*",
     "dask-cudf==23.10.*",
-    "dask>=2023.7.1",
-    "distributed>=2023.7.1",
+    "dask==2023.9.2",
+    "distributed==2023.9.2",
     "fsspec[http]>=0.6.0",
     "numba>=0.57",
     "pylibcugraph==23.10.*",

diff --git a/python/nx-cugraph/.flake8 b/python/nx-cugraph/.flake8
@@ -11,3 +11,4 @@ extend-ignore =
 per-file-ignores =
     nx_cugraph/tests/*.py:T201,
     __init__.py:F401,F403,
+    _nx_cugraph/__init__.py:E501,
diff --git a/python/nx-cugraph/Makefile b/python/nx-cugraph/Makefile
@@ -1,7 +1,17 @@
 # Copyright (c) 2023, NVIDIA CORPORATION.
 SHELL= /bin/bash
 
+.PHONY: all
+all: plugin-info lint
+
+.PHONY: lint
 lint:
 	git ls-files | xargs pre-commit run --config lint.yaml --files
+
+.PHONY: lint-update
 lint-update:
 	pre-commit autoupdate --config lint.yaml
+
+.PHONY: plugin-info
+plugin-info:
+	python _nx_cugraph/__init__.py