From 8bee0e9d4d9465299baf77f8da5899bef8658a6a Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 15 Jan 2024 09:48:03 -0800 Subject: [PATCH 1/4] nx-cugraph: add triangles and clustering algorithms --- python/nx-cugraph/_nx_cugraph/__init__.py | 17 ++- .../nx_cugraph/algorithms/__init__.py | 4 +- .../nx_cugraph/algorithms/cluster.py | 139 ++++++++++++++++++ 3 files changed, 154 insertions(+), 6 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/cluster.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index d02c9c3e940..6f754fa9e4b 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -30,6 +30,7 @@ "functions": { # BEGIN: functions "ancestors", + "average_clustering", "barbell_graph", "betweenness_centrality", "bfs_edges", @@ -41,6 +42,7 @@ "caveman_graph", "chvatal_graph", "circular_ladder_graph", + "clustering", "complete_bipartite_graph", "complete_graph", "complete_multipartite_graph", @@ -97,6 +99,8 @@ "star_graph", "tadpole_graph", "tetrahedral_graph", + "transitivity", + "triangles", "trivial_graph", "truncated_cube_graph", "truncated_tetrahedron_graph", @@ -107,22 +111,25 @@ }, "extra_docstrings": { # BEGIN: extra_docstrings - "betweenness_centrality": "`weight` parameter is not yet supported.", + "average_clustering": "Directed graphs and `weight` parameter are not yet supported.", + "betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "bfs_edges": "`sort_neighbors` parameter is not yet supported.", "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.", "bfs_successors": "`sort_neighbors` parameter is not yet supported.", "bfs_tree": "`sort_neighbors` parameter is not yet supported.", - "edge_betweenness_centrality": "`weight` parameter is not yet supported.", + "clustering": "Directed graphs and `weight` parameter are not yet supported.", + "edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", - "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", + "from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.", "generic_bfs_edges": "`neighbors` and `sort_neighbors` parameters are not yet supported.", "k_truss": ( "Currently raises `NotImplementedError` for graphs with more than one connected\n" "component when k >= 3. We expect to fix this soon." ), "katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.", - "louvain_communities": "`seed` parameter is currently ignored.", + "louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.", "pagerank": "`dangling` parameter is not supported, but it is checked for validity.", + "transitivity": "Directed graphs are not yet supported.", # END: extra_docstrings }, "extra_parameters": { diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index d28a629fe63..8da09637003 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,6 +13,7 @@ from . import ( bipartite, centrality, + cluster, community, components, link_analysis, @@ -21,6 +22,7 @@ ) from .bipartite import complete_bipartite_graph from .centrality import * +from .cluster import * from .components import * from .core import * from .dag import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py new file mode 100644 index 00000000000..85c1e6de859 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py @@ -0,0 +1,139 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import pylibcugraph as plc + +from nx_cugraph.convert import _to_undirected_graph +from nx_cugraph.utils import networkx_algorithm, not_implemented_for + +__all__ = [ + "triangles", + "average_clustering", + "clustering", + "transitivity", +] + + +def _triangles(G, nodes): + if nodes is not None: + if is_single_node := (nodes in G): + nodes = [nodes if G.key_to_id is None else G.key_to_id[nodes]] + else: + nodes = list(nodes) + nodes = G._list_to_nodearray(nodes) + else: + is_single_node = False + if len(G) == 0: + return None, None, is_single_node + node_ids, triangles = plc.triangle_count( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + start_list=nodes, + do_expensive_check=False, + ) + return node_ids, triangles, is_single_node + + +@not_implemented_for("directed") +@networkx_algorithm(plc="triangle_count", version_added="24.02") +def triangles(G, nodes=None): + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + return {} + if is_single_node: + return int(triangles[0]) + return G._nodearrays_to_dict(node_ids, triangles) + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def clustering(G, nodes=None, weight=None): + """Directed graphs and `weight` parameter are not yet supported.""" + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + return {} + if is_single_node: + numer = int(triangles[0]) + if numer == 0: + return 0 + degree = int((G.src_indices == nodes).sum()) + return 2 * numer / (degree * (degree - 1)) + # What about self-edges? + degrees = G._degrees_array()[node_ids] + denom = degrees * (degrees - 1) + results = 2 * triangles / denom + results = cp.where(denom, results, 0) # 0 where we divided by 0 + return G._nodearrays_to_dict(node_ids, results) + + +@clustering._can_run +def _(G, nodes=None, weight=None): + return weight is None and not G.is_directed() + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def average_clustering(G, nodes=None, weight=None, count_zeros=True): + """Directed graphs and `weight` parameter are not yet supported.""" + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + raise ZeroDivisionError + # What about self-edges? + degrees = G._degrees_array()[node_ids] + if not count_zeros: + mask = triangles != 0 + triangles = triangles[mask] + if triangles.size == 0: + raise ZeroDivisionError + degrees = degrees[mask] + denom = degrees * (degrees - 1) + results = 2 * triangles / denom + if count_zeros: + results = cp.where(denom, results, 0) # 0 where we divided by 0 + return float(results.mean()) + + +@average_clustering._can_run +def _(G, nodes=None, weight=None, count_zeros=True): + return weight is None and not G.is_directed() + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def transitivity(G): + """Directed graphs are not yet supported.""" + G = _to_undirected_graph(G) + if len(G) == 0: + return 0 + node_ids, triangles = plc.triangle_count( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + start_list=None, + do_expensive_check=False, + ) + numer = int(triangles.sum()) + if numer == 0: + return 0 + # What about self-edges? + degrees = G._degrees_array()[node_ids] + denom = int((degrees * (degrees - 1)).sum()) + return 2 * numer / denom + + +@transitivity._can_run +def _(G): + # Is transitivity supposed to work on directed graphs? + return not G.is_directed() From ce806540569ee6c2926593669ff36187c141b255 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 17 Jan 2024 06:42:16 -0800 Subject: [PATCH 2/4] Add `is_bipartite` --- python/nx-cugraph/_nx_cugraph/__init__.py | 1 + .../nx_cugraph/algorithms/__init__.py | 2 +- .../algorithms/bipartite/__init__.py | 3 +- .../nx_cugraph/algorithms/bipartite/basic.py | 31 +++++++++++++++++++ .../nx_cugraph/algorithms/cluster.py | 4 +-- 5 files changed, 37 insertions(+), 4 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index e4997d89c4e..1f7d985f78a 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -70,6 +70,7 @@ "house_x_graph", "icosahedral_graph", "in_degree_centrality", + "is_bipartite", "is_connected", "is_isolate", "is_strongly_connected", diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 8da09637003..b9f0b76147f 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -20,7 +20,7 @@ shortest_paths, traversal, ) -from .bipartite import complete_bipartite_graph +from .bipartite import complete_bipartite_graph, is_bipartite from .centrality import * from .cluster import * from .components import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py index 062be973d55..e028299c675 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,4 +10,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .basic import * from .generators import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py new file mode 100644 index 00000000000..2930af84784 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp + +from nx_cugraph.algorithms.cluster import _triangles +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import networkx_algorithm + +__all__ = [ + "is_bipartite", +] + + +@networkx_algorithm(plc="triangle_count", version_added="24.02") +def is_bipartite(G): + G = _to_graph(G) + # Counting triangles may not be the fastest way to do this, but it is simple. + node_ids, triangles, is_single_node = _triangles( + G, None, symmetrize="union" if G.is_directed else None + ) + return int(cp.count_nonzero(triangles)) == 0 diff --git a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py index 85c1e6de859..e1ac64e9b5e 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py @@ -24,7 +24,7 @@ ] -def _triangles(G, nodes): +def _triangles(G, nodes, symmetrize=None): if nodes is not None: if is_single_node := (nodes in G): nodes = [nodes if G.key_to_id is None else G.key_to_id[nodes]] @@ -37,7 +37,7 @@ def _triangles(G, nodes): return None, None, is_single_node node_ids, triangles = plc.triangle_count( resource_handle=plc.ResourceHandle(), - graph=G._get_plc_graph(), + graph=G._get_plc_graph(symmetrize=symmetrize), start_list=nodes, do_expensive_check=False, ) From f04ebc1649909c9abf7e5394cb6b922f8c925116 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 18 Jan 2024 16:44:31 -0800 Subject: [PATCH 3/4] ignore selfloops and compare results with selfloops to networkx --- .../nx_cugraph/algorithms/cluster.py | 9 ++-- .../nx-cugraph/nx_cugraph/classes/digraph.py | 18 +++++-- python/nx-cugraph/nx_cugraph/classes/graph.py | 13 +++-- .../nx_cugraph/tests/test_cluster.py | 48 +++++++++++++++++++ 4 files changed, 74 insertions(+), 14 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_cluster.py diff --git a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py index e1ac64e9b5e..951c358ff26 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py @@ -70,8 +70,7 @@ def clustering(G, nodes=None, weight=None): return 0 degree = int((G.src_indices == nodes).sum()) return 2 * numer / (degree * (degree - 1)) - # What about self-edges? - degrees = G._degrees_array()[node_ids] + degrees = G._degrees_array(ignore_selfloops=True)[node_ids] denom = degrees * (degrees - 1) results = 2 * triangles / denom results = cp.where(denom, results, 0) # 0 where we divided by 0 @@ -91,8 +90,7 @@ def average_clustering(G, nodes=None, weight=None, count_zeros=True): node_ids, triangles, is_single_node = _triangles(G, nodes) if len(G) == 0: raise ZeroDivisionError - # What about self-edges? - degrees = G._degrees_array()[node_ids] + degrees = G._degrees_array(ignore_selfloops=True)[node_ids] if not count_zeros: mask = triangles != 0 triangles = triangles[mask] @@ -127,8 +125,7 @@ def transitivity(G): numer = int(triangles.sum()) if numer == 0: return 0 - # What about self-edges? - degrees = G._degrees_array()[node_ids] + degrees = G._degrees_array(ignore_selfloops=True)[node_ids] denom = int((degrees * (degrees - 1)).sum()) return 2 * numer / denom diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 3392f336201..f8217a2c79f 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -177,8 +177,16 @@ def to_undirected(self, reciprocal=False, as_view=False): # Private methods # ################### - def _in_degrees_array(self): - return cp.bincount(self.dst_indices, minlength=self._N) - - def _out_degrees_array(self): - return cp.bincount(self.src_indices, minlength=self._N) + def _in_degrees_array(self, *, ignore_selfloops=False): + dst_indices = self.dst_indices + if ignore_selfloops: + not_selfloops = self.src_indices != dst_indices + dst_indices = dst_indices[not_selfloops] + return cp.bincount(dst_indices, minlength=self._N) + + def _out_degrees_array(self, *, ignore_selfloops=False): + src_indices = self.src_indices + if ignore_selfloops: + not_selfloops = src_indices != self.dst_indices + src_indices = src_indices[not_selfloops] + return cp.bincount(src_indices, minlength=self._N) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 45f81ad1117..4aa2de1538e 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -732,10 +732,17 @@ def _become(self, other: Graph): self.graph = graph return self - def _degrees_array(self): - degrees = cp.bincount(self.src_indices, minlength=self._N) + def _degrees_array(self, *, ignore_selfloops=False): + src_indices = self.src_indices + dst_indices = self.dst_indices + if ignore_selfloops: + not_selfloops = src_indices != dst_indices + src_indices = src_indices[not_selfloops] + if self.is_directed(): + dst_indices = dst_indices[not_selfloops] + degrees = cp.bincount(src_indices, minlength=self._N) if self.is_directed(): - degrees += cp.bincount(self.dst_indices, minlength=self._N) + degrees += cp.bincount(dst_indices, minlength=self._N) return degrees _in_degrees_array = _degrees_array diff --git a/python/nx-cugraph/nx_cugraph/tests/test_cluster.py b/python/nx-cugraph/nx_cugraph/tests/test_cluster.py new file mode 100644 index 00000000000..ad4770f1ab8 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_cluster.py @@ -0,0 +1,48 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest +from packaging.version import parse + +nxver = parse(nx.__version__) + +if nxver.major == 3 and nxver.minor < 2: + pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True) + + +def test_selfloops(): + G = nx.complete_graph(5) + H = nx.complete_graph(5) + H.add_edge(0, 0) + H.add_edge(1, 1) + H.add_edge(2, 2) + # triangles + expected = nx.triangles(G) + assert expected == nx.triangles(H) + assert expected == nx.triangles(G, backend="cugraph") + assert expected == nx.triangles(H, backend="cugraph") + # average_clustering + expected = nx.average_clustering(G) + assert expected == nx.average_clustering(H) + assert expected == nx.average_clustering(G, backend="cugraph") + assert expected == nx.average_clustering(H, backend="cugraph") + # clustering + expected = nx.clustering(G) + assert expected == nx.clustering(H) + assert expected == nx.clustering(G, backend="cugraph") + assert expected == nx.clustering(H, backend="cugraph") + # transitivity + expected = nx.transitivity(G) + assert expected == nx.transitivity(H) + assert expected == nx.transitivity(G, backend="cugraph") + assert expected == nx.transitivity(H, backend="cugraph") From eb55793cb13bb8fad742c9a4a865227724ac1a70 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 18 Jan 2024 17:23:08 -0800 Subject: [PATCH 4/4] oops! --- python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py index 2930af84784..d0e9a5c7f1b 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py @@ -26,6 +26,6 @@ def is_bipartite(G): G = _to_graph(G) # Counting triangles may not be the fastest way to do this, but it is simple. node_ids, triangles, is_single_node = _triangles( - G, None, symmetrize="union" if G.is_directed else None + G, None, symmetrize="union" if G.is_directed() else None ) return int(cp.count_nonzero(triangles)) == 0