From 8bee0e9d4d9465299baf77f8da5899bef8658a6a Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 15 Jan 2024 09:48:03 -0800 Subject: [PATCH] nx-cugraph: add triangles and clustering algorithms --- python/nx-cugraph/_nx_cugraph/__init__.py | 17 ++- .../nx_cugraph/algorithms/__init__.py | 4 +- .../nx_cugraph/algorithms/cluster.py | 139 ++++++++++++++++++ 3 files changed, 154 insertions(+), 6 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/cluster.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index d02c9c3e940..6f754fa9e4b 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -30,6 +30,7 @@ "functions": { # BEGIN: functions "ancestors", + "average_clustering", "barbell_graph", "betweenness_centrality", "bfs_edges", @@ -41,6 +42,7 @@ "caveman_graph", "chvatal_graph", "circular_ladder_graph", + "clustering", "complete_bipartite_graph", "complete_graph", "complete_multipartite_graph", @@ -97,6 +99,8 @@ "star_graph", "tadpole_graph", "tetrahedral_graph", + "transitivity", + "triangles", "trivial_graph", "truncated_cube_graph", "truncated_tetrahedron_graph", @@ -107,22 +111,25 @@ }, "extra_docstrings": { # BEGIN: extra_docstrings - "betweenness_centrality": "`weight` parameter is not yet supported.", + "average_clustering": "Directed graphs and `weight` parameter are not yet supported.", + "betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "bfs_edges": "`sort_neighbors` parameter is not yet supported.", "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.", "bfs_successors": "`sort_neighbors` parameter is not yet supported.", "bfs_tree": "`sort_neighbors` parameter is not yet supported.", - "edge_betweenness_centrality": "`weight` parameter is not yet supported.", + "clustering": "Directed graphs and `weight` parameter are not yet supported.", + "edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", - "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", + "from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.", "generic_bfs_edges": "`neighbors` and `sort_neighbors` parameters are not yet supported.", "k_truss": ( "Currently raises `NotImplementedError` for graphs with more than one connected\n" "component when k >= 3. We expect to fix this soon." ), "katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.", - "louvain_communities": "`seed` parameter is currently ignored.", + "louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.", "pagerank": "`dangling` parameter is not supported, but it is checked for validity.", + "transitivity": "Directed graphs are not yet supported.", # END: extra_docstrings }, "extra_parameters": { diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index d28a629fe63..8da09637003 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,6 +13,7 @@ from . import ( bipartite, centrality, + cluster, community, components, link_analysis, @@ -21,6 +22,7 @@ ) from .bipartite import complete_bipartite_graph from .centrality import * +from .cluster import * from .components import * from .core import * from .dag import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py new file mode 100644 index 00000000000..85c1e6de859 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py @@ -0,0 +1,139 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import pylibcugraph as plc + +from nx_cugraph.convert import _to_undirected_graph +from nx_cugraph.utils import networkx_algorithm, not_implemented_for + +__all__ = [ + "triangles", + "average_clustering", + "clustering", + "transitivity", +] + + +def _triangles(G, nodes): + if nodes is not None: + if is_single_node := (nodes in G): + nodes = [nodes if G.key_to_id is None else G.key_to_id[nodes]] + else: + nodes = list(nodes) + nodes = G._list_to_nodearray(nodes) + else: + is_single_node = False + if len(G) == 0: + return None, None, is_single_node + node_ids, triangles = plc.triangle_count( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + start_list=nodes, + do_expensive_check=False, + ) + return node_ids, triangles, is_single_node + + +@not_implemented_for("directed") +@networkx_algorithm(plc="triangle_count", version_added="24.02") +def triangles(G, nodes=None): + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + return {} + if is_single_node: + return int(triangles[0]) + return G._nodearrays_to_dict(node_ids, triangles) + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def clustering(G, nodes=None, weight=None): + """Directed graphs and `weight` parameter are not yet supported.""" + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + return {} + if is_single_node: + numer = int(triangles[0]) + if numer == 0: + return 0 + degree = int((G.src_indices == nodes).sum()) + return 2 * numer / (degree * (degree - 1)) + # What about self-edges? + degrees = G._degrees_array()[node_ids] + denom = degrees * (degrees - 1) + results = 2 * triangles / denom + results = cp.where(denom, results, 0) # 0 where we divided by 0 + return G._nodearrays_to_dict(node_ids, results) + + +@clustering._can_run +def _(G, nodes=None, weight=None): + return weight is None and not G.is_directed() + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def average_clustering(G, nodes=None, weight=None, count_zeros=True): + """Directed graphs and `weight` parameter are not yet supported.""" + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + raise ZeroDivisionError + # What about self-edges? + degrees = G._degrees_array()[node_ids] + if not count_zeros: + mask = triangles != 0 + triangles = triangles[mask] + if triangles.size == 0: + raise ZeroDivisionError + degrees = degrees[mask] + denom = degrees * (degrees - 1) + results = 2 * triangles / denom + if count_zeros: + results = cp.where(denom, results, 0) # 0 where we divided by 0 + return float(results.mean()) + + +@average_clustering._can_run +def _(G, nodes=None, weight=None, count_zeros=True): + return weight is None and not G.is_directed() + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def transitivity(G): + """Directed graphs are not yet supported.""" + G = _to_undirected_graph(G) + if len(G) == 0: + return 0 + node_ids, triangles = plc.triangle_count( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + start_list=None, + do_expensive_check=False, + ) + numer = int(triangles.sum()) + if numer == 0: + return 0 + # What about self-edges? + degrees = G._degrees_array()[node_ids] + denom = int((degrees * (degrees - 1)).sum()) + return 2 * numer / denom + + +@transitivity._can_run +def _(G): + # Is transitivity supposed to work on directed graphs? + return not G.is_directed()