From 5ce3ee1b11db62647337514a200845bbb392d351 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 3 Oct 2023 16:50:10 -0500 Subject: [PATCH] nx-cugraph: handle louvain with isolated nodes (#3897) This handles isolated nodes in `louvain_communities` similar to what is done in #3886. This is expected to be a temporary fix until pylibcugraph can handle isolated nodes. As a bonus, I added `isolates` algorithm :tada: CC @naimnv @rlratzel Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3897 --- python/nx-cugraph/_nx_cugraph/__init__.py | 7 ++- python/nx-cugraph/lint.yaml | 2 +- .../nx_cugraph/algorithms/__init__.py | 1 + .../algorithms/community/louvain.py | 28 +++++++-- .../nx_cugraph/algorithms/isolate.py | 63 +++++++++++++++++++ .../nx-cugraph/nx_cugraph/classes/digraph.py | 2 +- python/nx-cugraph/nx_cugraph/classes/graph.py | 6 +- python/nx-cugraph/nx_cugraph/convert.py | 2 +- .../nx_cugraph/tests/test_community.py | 53 ++++++++++++++++ 9 files changed, 151 insertions(+), 13 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/isolate.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_community.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 9b3332106ec..ebd13daded0 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -31,20 +31,23 @@ # BEGIN: functions "betweenness_centrality", "edge_betweenness_centrality", + "is_isolate", + "isolates", "louvain_communities", + "number_of_isolates", # END: functions }, "extra_docstrings": { # BEGIN: extra_docstrings "betweenness_centrality": "`weight` parameter is not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported.", - "louvain_communities": "`threshold` and `seed` parameters are currently ignored.", + "louvain_communities": "`seed` parameter is currently ignored.", # END: extra_docstrings }, "extra_parameters": { # BEGIN: extra_parameters "louvain_communities": { - "max_level : int, optional": "Upper limit of the number of macro-iterations.", + "max_level : int, optional": "Upper limit of the number of macro-iterations (max: 500).", }, # END: extra_parameters }, diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index 6a462a6af79..338ca7b230e 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -63,7 +63,7 @@ repos: # These versions need updated manually - flake8==6.1.0 - flake8-bugbear==23.9.16 - - flake8-simplify==0.20.0 + - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa rev: v1.5.0 hooks: diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 3a585452d6d..dfd9adfc61a 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -12,3 +12,4 @@ # limitations under the License. from . import centrality, community from .centrality import * +from .isolate import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index a183b59fe1d..dc209870c89 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -10,7 +10,7 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -import sys +import warnings import pylibcugraph as plc @@ -22,19 +22,23 @@ not_implemented_for, ) +from ..isolate import _isolates + __all__ = ["louvain_communities"] @not_implemented_for("directed") @networkx_algorithm( extra_params={ - "max_level : int, optional": "Upper limit of the number of macro-iterations." + "max_level : int, optional": ( + "Upper limit of the number of macro-iterations (max: 500)." + ) } ) def louvain_communities( G, weight="weight", resolution=1, threshold=0.0000001, seed=None, *, max_level=None ): - """`threshold` and `seed` parameters are currently ignored.""" + """`seed` parameter is currently ignored.""" # NetworkX allows both directed and undirected, but cugraph only allows undirected. seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility G = _to_undirected_graph(G, weight) @@ -42,7 +46,14 @@ def louvain_communities( # TODO: PLC doesn't handle empty graphs gracefully! return [{key} for key in G._nodeiter_to_iter(range(len(G)))] if max_level is None: - max_level = sys.maxsize + max_level = 500 + elif max_level > 500: + warnings.warn( + f"max_level is set too high (={max_level}), setting it to 500.", + UserWarning, + stacklevel=2, + ) + max_level = 500 vertices, clusters, modularity = plc.louvain( resource_handle=plc.ResourceHandle(), graph=G._get_plc_graph(), @@ -52,7 +63,14 @@ def louvain_communities( do_expensive_check=False, ) groups = _groupby(clusters, vertices) - return [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()] + rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()] + # TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix + isolates = _isolates(G) + if isolates.size > 0: + isolates = isolates[isolates > vertices.max()] + if isolates.size > 0: + rv.extend({node} for node in G._nodearray_to_list(isolates)) + return rv @louvain_communities._can_run diff --git a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py new file mode 100644 index 00000000000..774627e84f6 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py @@ -0,0 +1,63 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from __future__ import annotations + +from typing import TYPE_CHECKING + +import cupy as cp + +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import networkx_algorithm + +if TYPE_CHECKING: # pragma: no cover + from nx_cugraph.typing import IndexValue + +__all__ = ["is_isolate", "isolates", "number_of_isolates"] + + +@networkx_algorithm +def is_isolate(G, n): + G = _to_graph(G) + index = n if G.key_to_id is None else G.key_to_id[n] + return not ( + (G.row_indices == index).any().tolist() + or G.is_directed() + and (G.col_indices == index).any().tolist() + ) + + +def _mark_isolates(G) -> cp.ndarray[bool]: + """Return a boolean mask array indicating indices of isolated nodes.""" + mark_isolates = cp.ones(len(G), bool) + mark_isolates[G.row_indices] = False + if G.is_directed(): + mark_isolates[G.col_indices] = False + return mark_isolates + + +def _isolates(G) -> cp.ndarray[IndexValue]: + """Like isolates, but return an array of indices instead of an iterator of nodes.""" + G = _to_graph(G) + return cp.nonzero(_mark_isolates(G))[0] + + +@networkx_algorithm +def isolates(G): + G = _to_graph(G) + return G._nodeiter_to_iter(iter(_isolates(G).tolist())) + + +@networkx_algorithm +def number_of_isolates(G): + G = _to_graph(G) + return _mark_isolates(G).sum().tolist() diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 0aaf88fd793..72a1bff21a9 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -20,7 +20,7 @@ from .graph import Graph -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from nx_cugraph.typing import NodeKey __all__ = ["DiGraph"] diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 1432f68c752..ded4cc3943f 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -23,7 +23,7 @@ import nx_cugraph as nxcg -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from collections.abc import Iterable, Iterator from nx_cugraph.typing import ( @@ -245,9 +245,9 @@ def from_dcsc( def __new__(cls, incoming_graph_data=None, **attr) -> Graph: if incoming_graph_data is None: new_graph = cls.from_coo(0, cp.empty(0, np.int32), cp.empty(0, np.int32)) - elif incoming_graph_data.__class__ is new_graph.__class__: + elif incoming_graph_data.__class__ is cls: new_graph = incoming_graph_data.copy() - elif incoming_graph_data.__class__ is new_graph.to_networkx_class(): + elif incoming_graph_data.__class__ is cls.to_networkx_class(): new_graph = nxcg.from_networkx(incoming_graph_data, preserve_all_attrs=True) else: raise NotImplementedError diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index 9be8cac7877..1240ea71db7 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -24,7 +24,7 @@ import nx_cugraph as nxcg -if TYPE_CHECKING: +if TYPE_CHECKING: # pragma: no cover from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue __all__ = [ diff --git a/python/nx-cugraph/nx_cugraph/tests/test_community.py b/python/nx-cugraph/nx_cugraph/tests/test_community.py new file mode 100644 index 00000000000..126f45c14ae --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_community.py @@ -0,0 +1,53 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest + +import nx_cugraph as nxcg + + +def test_louvain_isolated_nodes(): + is_nx_30_or_31 = hasattr(nx.classes, "backends") + + def check(left, right): + assert len(left) == len(right) + assert set(map(frozenset, left)) == set(map(frozenset, right)) + + # Empty graph (no nodes) + G = nx.Graph() + if is_nx_30_or_31: + with pytest.raises(ZeroDivisionError): + nx.community.louvain_communities(G) + else: + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) + # Graph with no edges + G.add_nodes_from(range(5)) + if is_nx_30_or_31: + with pytest.raises(ZeroDivisionError): + nx.community.louvain_communities(G) + else: + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) + # Graph with isolated nodes + G.add_edge(1, 2) + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result) + # Another one + G.add_edge(4, 4) + nx_result = nx.community.louvain_communities(G) + cg_result = nxcg.community.louvain_communities(G) + check(nx_result, cg_result)