Skip to content

Commit

Permalink
nx-cugraph: add triangles and clustering algorithms (#4093)
Browse files Browse the repository at this point in the history
NetworkX tests are somewhat underspecified regarding how to handle self-loops for these algorithms. Also, I'm not sure if transitivity is supposed to work on directed graphs.

Once #4071 is merged, it should be easy to add `is_bipartite` function (and maybe others?).

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: #4093
eriknw authored Jan 19, 2024
1 parent c5d2a9a commit 52ab54f
Showing 8 changed files with 251 additions and 10 deletions.
8 changes: 8 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
@@ -30,6 +30,7 @@
"functions": {
# BEGIN: functions
"ancestors",
"average_clustering",
"barbell_graph",
"betweenness_centrality",
"bfs_edges",
@@ -41,6 +42,7 @@
"caveman_graph",
"chvatal_graph",
"circular_ladder_graph",
"clustering",
"complete_bipartite_graph",
"complete_graph",
"complete_multipartite_graph",
@@ -68,6 +70,7 @@
"house_x_graph",
"icosahedral_graph",
"in_degree_centrality",
"is_bipartite",
"is_connected",
"is_isolate",
"is_strongly_connected",
@@ -104,6 +107,8 @@
"strongly_connected_components",
"tadpole_graph",
"tetrahedral_graph",
"transitivity",
"triangles",
"trivial_graph",
"truncated_cube_graph",
"truncated_tetrahedron_graph",
@@ -115,11 +120,13 @@
},
"extra_docstrings": {
# BEGIN: extra_docstrings
"average_clustering": "Directed graphs and `weight` parameter are not yet supported.",
"betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
"bfs_edges": "`sort_neighbors` parameter is not yet supported.",
"bfs_predecessors": "`sort_neighbors` parameter is not yet supported.",
"bfs_successors": "`sort_neighbors` parameter is not yet supported.",
"bfs_tree": "`sort_neighbors` parameter is not yet supported.",
"clustering": "Directed graphs and `weight` parameter are not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported, and RNG with seed may be different.",
"eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.",
"from_pandas_edgelist": "cudf.DataFrame inputs also supported; value columns with str is unsuppported.",
@@ -131,6 +138,7 @@
"katz_centrality": "`nstart` isn't used (but is checked), and `normalized=False` is not supported.",
"louvain_communities": "`seed` parameter is currently ignored, and self-loops are not yet supported.",
"pagerank": "`dangling` parameter is not supported, but it is checked for validity.",
"transitivity": "Directed graphs are not yet supported.",
# END: extra_docstrings
},
"extra_parameters": {
4 changes: 3 additions & 1 deletion python/nx-cugraph/nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
@@ -13,14 +13,16 @@
from . import (
bipartite,
centrality,
cluster,
community,
components,
link_analysis,
shortest_paths,
traversal,
)
from .bipartite import complete_bipartite_graph
from .bipartite import complete_bipartite_graph, is_bipartite
from .centrality import *
from .cluster import *
from .components import *
from .core import *
from .dag import *
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Copyright (c) 2023-2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
@@ -10,4 +10,5 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .basic import *
from .generators import *
31 changes: 31 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/bipartite/basic.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp

from nx_cugraph.algorithms.cluster import _triangles
from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import networkx_algorithm

__all__ = [
"is_bipartite",
]


@networkx_algorithm(plc="triangle_count", version_added="24.02")
def is_bipartite(G):
G = _to_graph(G)
# Counting triangles may not be the fastest way to do this, but it is simple.
node_ids, triangles, is_single_node = _triangles(
G, None, symmetrize="union" if G.is_directed() else None
)
return int(cp.count_nonzero(triangles)) == 0
136 changes: 136 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp
import pylibcugraph as plc

from nx_cugraph.convert import _to_undirected_graph
from nx_cugraph.utils import networkx_algorithm, not_implemented_for

__all__ = [
"triangles",
"average_clustering",
"clustering",
"transitivity",
]


def _triangles(G, nodes, symmetrize=None):
if nodes is not None:
if is_single_node := (nodes in G):
nodes = [nodes if G.key_to_id is None else G.key_to_id[nodes]]
else:
nodes = list(nodes)
nodes = G._list_to_nodearray(nodes)
else:
is_single_node = False
if len(G) == 0:
return None, None, is_single_node
node_ids, triangles = plc.triangle_count(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(symmetrize=symmetrize),
start_list=nodes,
do_expensive_check=False,
)
return node_ids, triangles, is_single_node


@not_implemented_for("directed")
@networkx_algorithm(plc="triangle_count", version_added="24.02")
def triangles(G, nodes=None):
G = _to_undirected_graph(G)
node_ids, triangles, is_single_node = _triangles(G, nodes)
if len(G) == 0:
return {}
if is_single_node:
return int(triangles[0])
return G._nodearrays_to_dict(node_ids, triangles)


@not_implemented_for("directed")
@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02")
def clustering(G, nodes=None, weight=None):
"""Directed graphs and `weight` parameter are not yet supported."""
G = _to_undirected_graph(G)
node_ids, triangles, is_single_node = _triangles(G, nodes)
if len(G) == 0:
return {}
if is_single_node:
numer = int(triangles[0])
if numer == 0:
return 0
degree = int((G.src_indices == nodes).sum())
return 2 * numer / (degree * (degree - 1))
degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
denom = degrees * (degrees - 1)
results = 2 * triangles / denom
results = cp.where(denom, results, 0) # 0 where we divided by 0
return G._nodearrays_to_dict(node_ids, results)


@clustering._can_run
def _(G, nodes=None, weight=None):
return weight is None and not G.is_directed()


@not_implemented_for("directed")
@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02")
def average_clustering(G, nodes=None, weight=None, count_zeros=True):
"""Directed graphs and `weight` parameter are not yet supported."""
G = _to_undirected_graph(G)
node_ids, triangles, is_single_node = _triangles(G, nodes)
if len(G) == 0:
raise ZeroDivisionError
degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
if not count_zeros:
mask = triangles != 0
triangles = triangles[mask]
if triangles.size == 0:
raise ZeroDivisionError
degrees = degrees[mask]
denom = degrees * (degrees - 1)
results = 2 * triangles / denom
if count_zeros:
results = cp.where(denom, results, 0) # 0 where we divided by 0
return float(results.mean())


@average_clustering._can_run
def _(G, nodes=None, weight=None, count_zeros=True):
return weight is None and not G.is_directed()


@not_implemented_for("directed")
@networkx_algorithm(is_incomplete=True, plc="triangle_count", version_added="24.02")
def transitivity(G):
"""Directed graphs are not yet supported."""
G = _to_undirected_graph(G)
if len(G) == 0:
return 0
node_ids, triangles = plc.triangle_count(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
start_list=None,
do_expensive_check=False,
)
numer = int(triangles.sum())
if numer == 0:
return 0
degrees = G._degrees_array(ignore_selfloops=True)[node_ids]
denom = int((degrees * (degrees - 1)).sum())
return 2 * numer / denom


@transitivity._can_run
def _(G):
# Is transitivity supposed to work on directed graphs?
return not G.is_directed()
18 changes: 13 additions & 5 deletions python/nx-cugraph/nx_cugraph/classes/digraph.py
Original file line number Diff line number Diff line change
@@ -177,8 +177,16 @@ def to_undirected(self, reciprocal=False, as_view=False):
# Private methods #
###################

def _in_degrees_array(self):
return cp.bincount(self.dst_indices, minlength=self._N)

def _out_degrees_array(self):
return cp.bincount(self.src_indices, minlength=self._N)
def _in_degrees_array(self, *, ignore_selfloops=False):
dst_indices = self.dst_indices
if ignore_selfloops:
not_selfloops = self.src_indices != dst_indices
dst_indices = dst_indices[not_selfloops]
return cp.bincount(dst_indices, minlength=self._N)

def _out_degrees_array(self, *, ignore_selfloops=False):
src_indices = self.src_indices
if ignore_selfloops:
not_selfloops = src_indices != self.dst_indices
src_indices = src_indices[not_selfloops]
return cp.bincount(src_indices, minlength=self._N)
13 changes: 10 additions & 3 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
@@ -732,10 +732,17 @@ def _become(self, other: Graph):
self.graph = graph
return self

def _degrees_array(self):
degrees = cp.bincount(self.src_indices, minlength=self._N)
def _degrees_array(self, *, ignore_selfloops=False):
src_indices = self.src_indices
dst_indices = self.dst_indices
if ignore_selfloops:
not_selfloops = src_indices != dst_indices
src_indices = src_indices[not_selfloops]
if self.is_directed():
dst_indices = dst_indices[not_selfloops]
degrees = cp.bincount(src_indices, minlength=self._N)
if self.is_directed():
degrees += cp.bincount(self.dst_indices, minlength=self._N)
degrees += cp.bincount(dst_indices, minlength=self._N)
return degrees

_in_degrees_array = _degrees_array
48 changes: 48 additions & 0 deletions python/nx-cugraph/nx_cugraph/tests/test_cluster.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
# Copyright (c) 2024, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
import pytest
from packaging.version import parse

nxver = parse(nx.__version__)

if nxver.major == 3 and nxver.minor < 2:
pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True)


def test_selfloops():
G = nx.complete_graph(5)
H = nx.complete_graph(5)
H.add_edge(0, 0)
H.add_edge(1, 1)
H.add_edge(2, 2)
# triangles
expected = nx.triangles(G)
assert expected == nx.triangles(H)
assert expected == nx.triangles(G, backend="cugraph")
assert expected == nx.triangles(H, backend="cugraph")
# average_clustering
expected = nx.average_clustering(G)
assert expected == nx.average_clustering(H)
assert expected == nx.average_clustering(G, backend="cugraph")
assert expected == nx.average_clustering(H, backend="cugraph")
# clustering
expected = nx.clustering(G)
assert expected == nx.clustering(H)
assert expected == nx.clustering(G, backend="cugraph")
assert expected == nx.clustering(H, backend="cugraph")
# transitivity
expected = nx.transitivity(G)
assert expected == nx.transitivity(H)
assert expected == nx.transitivity(G, backend="cugraph")
assert expected == nx.transitivity(H, backend="cugraph")

0 comments on commit 52ab54f

Please sign in to comment.