Skip to content

Commit

Permalink
nx-cugraph: PLC now handles isolated nodes; clean up our workarounds
Browse files Browse the repository at this point in the history
  • Loading branch information
eriknw committed Jan 14, 2024
1 parent aa66a32 commit dcbac93
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 64 deletions.
10 changes: 5 additions & 5 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down Expand Up @@ -107,21 +107,21 @@
},
"extra_docstrings": {
# BEGIN: extra_docstrings
"betweenness_centrality": "`weight` parameter is not yet supported.",
"betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
"bfs_edges": "`sort_neighbors` parameter is not yet supported.",
"bfs_predecessors": "`sort_neighbors` parameter is not yet supported.",
"bfs_successors": "`sort_neighbors` parameter is not yet supported.",
"bfs_tree": "`sort_neighbors` parameter is not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
"eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.",
"from_pandas_edgelist": "cudf.DataFrame inputs also supported.",
"from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.",
"generic_bfs_edges": "`neighbors` and `sort_neighbors` parameters are not yet supported.",
"k_truss": (
"Currently raises `NotImplementedError` for graphs with more than one connected\n"
"component when k >= 3. We expect to fix this soon."
),
"katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.",
"louvain_communities": "`seed` parameter is currently ignored.",
"louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.",
"pagerank": "`dangling` parameter is not supported, but it is checked for validity.",
# END: extra_docstrings
},
Expand Down
6 changes: 2 additions & 4 deletions python/nx-cugraph/nx_cugraph/_version.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
Expand All @@ -11,12 +11,10 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#

import importlib.resources

# Read VERSION file from the module that is symlinked to VERSION file
# in the root of the repo at build time or copied to the moudle at
# in the root of the repo at build time or copied to the module at
# installation. VERSION is a separate file that allows CI build-time scripts
# to update version info (including commit hashes) without modifying
# source files.
Expand Down
12 changes: 1 addition & 11 deletions python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
not_implemented_for,
)

from ..isolate import _isolates

__all__ = ["louvain_communities"]


Expand Down Expand Up @@ -56,7 +54,6 @@ def louvain_communities(
seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility
G = _to_undirected_graph(G, weight)
if G.src_indices.size == 0:
# TODO: PLC doesn't handle empty graphs gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
if max_level is None:
max_level = 500
Expand All @@ -76,14 +73,7 @@ def louvain_communities(
do_expensive_check=False,
)
groups = _groupby(clusters, node_ids, groups_are_canonical=True)
rv = [set(G._nodearray_to_list(ids)) for ids in groups.values()]
# TODO: PLC doesn't handle isolated node_ids yet, so this is a temporary fix
isolates = _isolates(G)
if isolates.size > 0:
isolates = isolates[isolates > node_ids.max()]
if isolates.size > 0:
rv.extend({node} for node in G._nodearray_to_list(isolates))
return rv
return [set(G._nodearray_to_list(ids)) for ids in groups.values()]


@louvain_communities._can_run
Expand Down
64 changes: 22 additions & 42 deletions python/nx-cugraph/nx_cugraph/algorithms/components/connected.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,17 +10,13 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import itertools

import cupy as cp
import networkx as nx
import pylibcugraph as plc

from nx_cugraph.convert import _to_undirected_graph
from nx_cugraph.utils import _groupby, networkx_algorithm, not_implemented_for

from ..isolate import _isolates

__all__ = [
"number_connected_components",
"connected_components",
Expand All @@ -32,19 +28,17 @@
@not_implemented_for("directed")
@networkx_algorithm(plc="weakly_connected_components", version_added="23.12")
def number_connected_components(G):
return sum(1 for _ in connected_components(G))
# PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
# G = _to_undirected_graph(G)
# unused_node_ids, labels = plc.weakly_connected_components(
# resource_handle=plc.ResourceHandle(),
# graph=G._get_plc_graph(),
# offsets=None,
# indices=None,
# weights=None,
# labels=None,
# do_expensive_check=False,
# )
# return cp.unique(labels).size
G = _to_undirected_graph(G)
unused_node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
offsets=None,
indices=None,
weights=None,
labels=None,
do_expensive_check=False,
)
return cp.unique(labels).size


@number_connected_components._can_run
Expand All @@ -61,7 +55,6 @@ def _(G):
def connected_components(G):
G = _to_undirected_graph(G)
if G.src_indices.size == 0:
# TODO: PLC doesn't handle empty graphs (or isolated nodes) gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
Expand All @@ -73,16 +66,7 @@ def connected_components(G):
do_expensive_check=False,
)
groups = _groupby(labels, node_ids)
it = (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())
# TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
isolates = _isolates(G)
if isolates.size > 0:
isolates = isolates[isolates > node_ids.max()]
if isolates.size > 0:
it = itertools.chain(
it, ({node} for node in G._nodearray_to_list(isolates))
)
return it
return (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())


@not_implemented_for("directed")
Expand All @@ -93,20 +77,16 @@ def is_connected(G):
raise nx.NetworkXPointlessConcept(
"Connectivity is undefined for the null graph."
)
for community in connected_components(G):
return len(community) == len(G)
raise RuntimeError # pragma: no cover
# PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
# unused_node_ids, labels = plc.weakly_connected_components(
# resource_handle=plc.ResourceHandle(),
# graph=G._get_plc_graph(),
# offsets=None,
# indices=None,
# weights=None,
# labels=None,
# do_expensive_check=False,
# )
# return labels.size == len(G) and cp.unique(labels).size == 1
unused_node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
offsets=None,
indices=None,
weights=None,
labels=None,
do_expensive_check=False,
)
return bool((labels == labels[0]).all())


@not_implemented_for("directed")
Expand Down
3 changes: 1 addition & 2 deletions python/nx-cugraph/nx_cugraph/tests/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
Expand Down
30 changes: 30 additions & 0 deletions python/nx-cugraph/nx_cugraph/tests/test_connected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx

import nx_cugraph as nxcg


def test_connected_isolated_nodes():
G = nx.complete_graph(4)
G.add_node(max(G) + 1)
assert nx.is_connected(G) is False
assert nxcg.is_connected(G) is False
assert nx.number_connected_components(G) == 2
assert nxcg.number_connected_components(G) == 2
assert sorted(nx.connected_components(G)) == [{0, 1, 2, 3}, {4}]
assert sorted(nxcg.connected_components(G)) == [{0, 1, 2, 3}, {4}]
assert nx.node_connected_component(G, 0) == {0, 1, 2, 3}
assert nxcg.node_connected_component(G, 0) == {0, 1, 2, 3}
assert nx.node_connected_component(G, 4) == {4}
assert nxcg.node_connected_component(G, 4) == {4}

0 comments on commit dcbac93

Please sign in to comment.