From 52ab54ffd83f81753741435ca3b165271e3a4ddb Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 19 Jan 2024 09:15:36 -0600 Subject: [PATCH 1/3] nx-cugraph: add triangles and clustering algorithms (#4093) NetworkX tests are somewhat underspecified regarding how to handle self-loops for these algorithms. Also, I'm not sure if transitivity is supposed to work on directed graphs. Once #4071 is merged, it should be easy to add `is_bipartite` function (and maybe others?). Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4093 --- python/nx-cugraph/_nx_cugraph/__init__.py | 8 ++ .../nx_cugraph/algorithms/__init__.py | 4 +- .../algorithms/bipartite/__init__.py | 3 +- .../nx_cugraph/algorithms/bipartite/basic.py | 31 ++++ .../nx_cugraph/algorithms/cluster.py | 136 ++++++++++++++++++ .../nx-cugraph/nx_cugraph/classes/digraph.py | 18 ++- python/nx-cugraph/nx_cugraph/classes/graph.py | 13 +- .../nx_cugraph/tests/test_cluster.py | 48 +++++++ 8 files changed, 251 insertions(+), 10 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/cluster.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_cluster.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 69320e6b55c..5bfbf082cdd 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -30,6 +30,7 @@ "functions": { # BEGIN: functions "ancestors", + "average_clustering", "barbell_graph", "betweenness_centrality", "bfs_edges", @@ -41,6 +42,7 @@ "caveman_graph", "chvatal_graph", "circular_ladder_graph", + "clustering", "complete_bipartite_graph", "complete_graph", "complete_multipartite_graph", @@ -68,6 +70,7 @@ "house_x_graph", "icosahedral_graph", "in_degree_centrality", + "is_bipartite", "is_connected", "is_isolate", "is_strongly_connected", @@ -104,6 +107,8 @@ "strongly_connected_components", "tadpole_graph", "tetrahedral_graph", + "transitivity", + "triangles", "trivial_graph", "truncated_cube_graph", "truncated_tetrahedron_graph", @@ -115,11 +120,13 @@ }, "extra_docstrings": { # BEGIN: extra_docstrings + "average_clustering": "Directed graphs and `weight` parameter are not yet supported.", "betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "bfs_edges": "`sort_neighbors` parameter is not yet supported.", "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.", "bfs_successors": "`sort_neighbors` parameter is not yet supported.", "bfs_tree": "`sort_neighbors` parameter is not yet supported.", + "clustering": "Directed graphs and `weight` parameter are not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", "from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.", @@ -131,6 +138,7 @@ "katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.", "louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.", "pagerank": "`dangling` parameter is not supported, but it is checked for validity.", + "transitivity": "Directed graphs are not yet supported.", # END: extra_docstrings }, "extra_parameters": { diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 65700838f41..de4e9466ba0 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -13,14 +13,16 @@ from . import ( bipartite, centrality, + cluster, community, components, link_analysis, shortest_paths, traversal, ) -from .bipartite import complete_bipartite_graph +from .bipartite import complete_bipartite_graph, is_bipartite from .centrality import * +from .cluster import * from .components import * from .core import * from .dag import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py index 062be973d55..e028299c675 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -10,4 +10,5 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. +from .basic import * from .generators import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py new file mode 100644 index 00000000000..d0e9a5c7f1b --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py @@ -0,0 +1,31 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp + +from nx_cugraph.algorithms.cluster import _triangles +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import networkx_algorithm + +__all__ = [ + "is_bipartite", +] + + +@networkx_algorithm(plc="triangle_count", version_added="24.02") +def is_bipartite(G): + G = _to_graph(G) + # Counting triangles may not be the fastest way to do this, but it is simple. + node_ids, triangles, is_single_node = _triangles( + G, None, symmetrize="union" if G.is_directed() else None + ) + return int(cp.count_nonzero(triangles)) == 0 diff --git a/python/nx-cugraph/nx_cugraph/algorithms/cluster.py b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py new file mode 100644 index 00000000000..951c358ff26 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/cluster.py @@ -0,0 +1,136 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import pylibcugraph as plc + +from nx_cugraph.convert import _to_undirected_graph +from nx_cugraph.utils import networkx_algorithm, not_implemented_for + +__all__ = [ + "triangles", + "average_clustering", + "clustering", + "transitivity", +] + + +def _triangles(G, nodes, symmetrize=None): + if nodes is not None: + if is_single_node := (nodes in G): + nodes = [nodes if G.key_to_id is None else G.key_to_id[nodes]] + else: + nodes = list(nodes) + nodes = G._list_to_nodearray(nodes) + else: + is_single_node = False + if len(G) == 0: + return None, None, is_single_node + node_ids, triangles = plc.triangle_count( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(symmetrize=symmetrize), + start_list=nodes, + do_expensive_check=False, + ) + return node_ids, triangles, is_single_node + + +@not_implemented_for("directed") +@networkx_algorithm(plc="triangle_count", version_added="24.02") +def triangles(G, nodes=None): + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + return {} + if is_single_node: + return int(triangles[0]) + return G._nodearrays_to_dict(node_ids, triangles) + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def clustering(G, nodes=None, weight=None): + """Directed graphs and `weight` parameter are not yet supported.""" + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + return {} + if is_single_node: + numer = int(triangles[0]) + if numer == 0: + return 0 + degree = int((G.src_indices == nodes).sum()) + return 2 * numer / (degree * (degree - 1)) + degrees = G._degrees_array(ignore_selfloops=True)[node_ids] + denom = degrees * (degrees - 1) + results = 2 * triangles / denom + results = cp.where(denom, results, 0) # 0 where we divided by 0 + return G._nodearrays_to_dict(node_ids, results) + + +@clustering._can_run +def _(G, nodes=None, weight=None): + return weight is None and not G.is_directed() + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def average_clustering(G, nodes=None, weight=None, count_zeros=True): + """Directed graphs and `weight` parameter are not yet supported.""" + G = _to_undirected_graph(G) + node_ids, triangles, is_single_node = _triangles(G, nodes) + if len(G) == 0: + raise ZeroDivisionError + degrees = G._degrees_array(ignore_selfloops=True)[node_ids] + if not count_zeros: + mask = triangles != 0 + triangles = triangles[mask] + if triangles.size == 0: + raise ZeroDivisionError + degrees = degrees[mask] + denom = degrees * (degrees - 1) + results = 2 * triangles / denom + if count_zeros: + results = cp.where(denom, results, 0) # 0 where we divided by 0 + return float(results.mean()) + + +@average_clustering._can_run +def _(G, nodes=None, weight=None, count_zeros=True): + return weight is None and not G.is_directed() + + +@not_implemented_for("directed") +@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02") +def transitivity(G): + """Directed graphs are not yet supported.""" + G = _to_undirected_graph(G) + if len(G) == 0: + return 0 + node_ids, triangles = plc.triangle_count( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + start_list=None, + do_expensive_check=False, + ) + numer = int(triangles.sum()) + if numer == 0: + return 0 + degrees = G._degrees_array(ignore_selfloops=True)[node_ids] + denom = int((degrees * (degrees - 1)).sum()) + return 2 * numer / denom + + +@transitivity._can_run +def _(G): + # Is transitivity supposed to work on directed graphs? + return not G.is_directed() diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 3392f336201..f8217a2c79f 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -177,8 +177,16 @@ def to_undirected(self, reciprocal=False, as_view=False): # Private methods # ################### - def _in_degrees_array(self): - return cp.bincount(self.dst_indices, minlength=self._N) - - def _out_degrees_array(self): - return cp.bincount(self.src_indices, minlength=self._N) + def _in_degrees_array(self, *, ignore_selfloops=False): + dst_indices = self.dst_indices + if ignore_selfloops: + not_selfloops = self.src_indices != dst_indices + dst_indices = dst_indices[not_selfloops] + return cp.bincount(dst_indices, minlength=self._N) + + def _out_degrees_array(self, *, ignore_selfloops=False): + src_indices = self.src_indices + if ignore_selfloops: + not_selfloops = src_indices != self.dst_indices + src_indices = src_indices[not_selfloops] + return cp.bincount(src_indices, minlength=self._N) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 45f81ad1117..4aa2de1538e 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -732,10 +732,17 @@ def _become(self, other: Graph): self.graph = graph return self - def _degrees_array(self): - degrees = cp.bincount(self.src_indices, minlength=self._N) + def _degrees_array(self, *, ignore_selfloops=False): + src_indices = self.src_indices + dst_indices = self.dst_indices + if ignore_selfloops: + not_selfloops = src_indices != dst_indices + src_indices = src_indices[not_selfloops] + if self.is_directed(): + dst_indices = dst_indices[not_selfloops] + degrees = cp.bincount(src_indices, minlength=self._N) if self.is_directed(): - degrees += cp.bincount(self.dst_indices, minlength=self._N) + degrees += cp.bincount(dst_indices, minlength=self._N) return degrees _in_degrees_array = _degrees_array diff --git a/python/nx-cugraph/nx_cugraph/tests/test_cluster.py b/python/nx-cugraph/nx_cugraph/tests/test_cluster.py new file mode 100644 index 00000000000..ad4770f1ab8 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_cluster.py @@ -0,0 +1,48 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest +from packaging.version import parse + +nxver = parse(nx.__version__) + +if nxver.major == 3 and nxver.minor < 2: + pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True) + + +def test_selfloops(): + G = nx.complete_graph(5) + H = nx.complete_graph(5) + H.add_edge(0, 0) + H.add_edge(1, 1) + H.add_edge(2, 2) + # triangles + expected = nx.triangles(G) + assert expected == nx.triangles(H) + assert expected == nx.triangles(G, backend="cugraph") + assert expected == nx.triangles(H, backend="cugraph") + # average_clustering + expected = nx.average_clustering(G) + assert expected == nx.average_clustering(H) + assert expected == nx.average_clustering(G, backend="cugraph") + assert expected == nx.average_clustering(H, backend="cugraph") + # clustering + expected = nx.clustering(G) + assert expected == nx.clustering(H) + assert expected == nx.clustering(G, backend="cugraph") + assert expected == nx.clustering(H, backend="cugraph") + # transitivity + expected = nx.transitivity(G) + assert expected == nx.transitivity(H) + assert expected == nx.transitivity(G, backend="cugraph") + assert expected == nx.transitivity(H, backend="cugraph") From ec65907a164fd909da5ecb8066fa840787e977ed Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Fri, 19 Jan 2024 12:46:12 -0600 Subject: [PATCH 2/3] nx-cugraph: add `core_number` (undirected graphs only) (#4100) Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4100 --- python/nx-cugraph/_nx_cugraph/__init__.py | 2 ++ .../nx-cugraph/nx_cugraph/algorithms/core.py | 30 ++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 5bfbf082cdd..8deac55f4ad 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -47,6 +47,7 @@ "complete_graph", "complete_multipartite_graph", "connected_components", + "core_number", "cubical_graph", "cycle_graph", "davis_southern_women_graph", @@ -127,6 +128,7 @@ "bfs_successors": "`sort_neighbors` parameter is not yet supported.", "bfs_tree": "`sort_neighbors` parameter is not yet supported.", "clustering": "Directed graphs and `weight` parameter are not yet supported.", + "core_number": "Directed graphs are not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.", "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.", "from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.", diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index e4520c2713b..f323cdf6004 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -15,6 +15,7 @@ import pylibcugraph as plc import nx_cugraph as nxcg +from nx_cugraph.convert import _to_undirected_graph from nx_cugraph.utils import ( _get_int_dtype, index_dtype, @@ -22,7 +23,34 @@ not_implemented_for, ) -__all__ = ["k_truss"] +__all__ = ["core_number", "k_truss"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@networkx_algorithm(is_incomplete=True, plc="core_number", version_added="24.02") +def core_number(G): + """Directed graphs are not yet supported.""" + G = _to_undirected_graph(G) + if len(G) == 0: + return {} + if nxcg.number_of_selfloops(G) > 0: + raise nx.NetworkXNotImplemented( + "Input graph has self loops which is not permitted; " + "Consider using G.remove_edges_from(nx.selfloop_edges(G))." + ) + node_ids, core_numbers = plc.core_number( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + degree_type="bidirectional", + do_expensive_check=False, + ) + return G._nodearrays_to_dict(node_ids, core_numbers) + + +@core_number._can_run +def _(G): + return not G.is_directed() @not_implemented_for("directed") From 77d833add6cc493bd29ef2eaef09fa5d7417ead7 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Sat, 20 Jan 2024 07:27:47 -0600 Subject: [PATCH 3/3] Fix Jaccard hang (#4080) This PR leverages `client.map` to simultaneously launch processes in order to avoid hangs closes #3926 Authors: - Joseph Nke (https://github.com/jnke2016) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4080 --- .../simpleDistributedGraph.py | 52 +++++++++---------- 1 file changed, 26 insertions(+), 26 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 319435575cc..8fed467bf6d 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,11 +35,9 @@ from cugraph.structure.number_map import NumberMap from cugraph.structure.symmetrize import symmetrize from cugraph.dask.common.part_utils import ( - get_persisted_df_worker_map, persist_dask_df_equal_parts_per_worker, ) from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster -from cugraph.dask import get_n_workers import cugraph.dask.comms.comms as Comms @@ -825,12 +823,13 @@ def get_two_hop_neighbors(self, start_vertices=None): _client = default_client() def _call_plc_two_hop_neighbors(sID, mg_graph_x, start_vertices): - return pylibcugraph_get_two_hop_neighbors( + results_ = pylibcugraph_get_two_hop_neighbors( resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), graph=mg_graph_x, start_vertices=start_vertices, do_expensive_check=False, ) + return results_ if isinstance(start_vertices, int): start_vertices = [start_vertices] @@ -845,31 +844,31 @@ def _call_plc_two_hop_neighbors(sID, mg_graph_x, start_vertices): else: start_vertices_type = self.input_df.dtypes[0] - if not isinstance(start_vertices, (dask_cudf.Series)): - start_vertices = dask_cudf.from_cudf( + start_vertices = start_vertices.astype(start_vertices_type) + + def create_iterable_args( + session_id, input_graph, start_vertices=None, npartitions=None + ): + session_id_it = [session_id] * npartitions + graph_it = input_graph.values() + start_vertices = cp.array_split(start_vertices.values, npartitions) + return [ + session_id_it, + graph_it, start_vertices, - npartitions=min(self._npartitions, len(start_vertices)), - ) - start_vertices = start_vertices.astype(start_vertices_type) + ] - n_workers = get_n_workers() - start_vertices = start_vertices.repartition(npartitions=n_workers) - start_vertices = persist_dask_df_equal_parts_per_worker( - start_vertices, _client + result = _client.map( + _call_plc_two_hop_neighbors, + *create_iterable_args( + Comms.get_session_id(), + self._plc_graph, + start_vertices, + self._npartitions, + ), + pure=False, ) - start_vertices = get_persisted_df_worker_map(start_vertices, _client) - result = [ - _client.submit( - _call_plc_two_hop_neighbors, - Comms.get_session_id(), - self._plc_graph[w], - start_vertices[w][0], - workers=[w], - allow_other_workers=False, - ) - for w in start_vertices.keys() - ] else: result = [ _client.submit( @@ -896,7 +895,8 @@ def convert_to_cudf(cp_arrays): return df cudf_result = [ - _client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result + _client.submit(convert_to_cudf, cp_arrays, pure=False) + for cp_arrays in result ] wait(cudf_result)