Skip to content

Commit

Permalink
nx-cugraph: add CC for undirected graphs to fix k-truss
Browse files Browse the repository at this point in the history
  • Loading branch information
eriknw committed Oct 31, 2023
1 parent 9755022 commit 447cdfd
Show file tree
Hide file tree
Showing 8 changed files with 192 additions and 16 deletions.
4 changes: 4 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,13 +30,17 @@
"functions": {
# BEGIN: functions
"betweenness_centrality",
"connected_components",
"degree_centrality",
"edge_betweenness_centrality",
"in_degree_centrality",
"is_connected",
"is_isolate",
"isolates",
"k_truss",
"louvain_communities",
"node_connected_component",
"number_connected_components",
"number_of_isolates",
"number_of_selfloops",
"out_degree_centrality",
Expand Down
3 changes: 2 additions & 1 deletion python/nx-cugraph/nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,8 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import centrality, community
from . import centrality, community, components
from .centrality import *
from .components import *
from .core import *
from .isolate import *
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def louvain_communities(
resolution=resolution,
do_expensive_check=False,
)
groups = _groupby(clusters, vertices)
groups = _groupby(clusters, vertices, groups_are_canonical=True)
rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
# TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
isolates = _isolates(G)
Expand Down
13 changes: 13 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .connected import *
130 changes: 130 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/components/connected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import itertools

import cupy as cp
import networkx as nx
import pylibcugraph as plc

from nx_cugraph.convert import _to_undirected_graph
from nx_cugraph.utils import _groupby, networkx_algorithm, not_implemented_for

from ..isolate import _isolates

__all__ = [
"number_connected_components",
"connected_components",
"is_connected",
"node_connected_component",
]


@not_implemented_for("directed")
@networkx_algorithm
def number_connected_components(G):
return sum(1 for _ in connected_components(G))
# PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
# G = _to_undirected_graph(G)
# unused_node_ids, labels = plc.weakly_connected_components(
# resource_handle=plc.ResourceHandle(),
# graph=G._get_plc_graph(),
# offsets=None,
# indices=None,
# weights=None,
# labels=None,
# do_expensive_check=False,
# )
# return cp.unique(labels).size


@number_connected_components._can_run
def _(G):
# NetworkX <= 3.2.1 does not check directedness for us
try:
return not G.is_directed()
except Exception:
return False


@not_implemented_for("directed")
@networkx_algorithm
def connected_components(G):
G = _to_undirected_graph(G)
if G.src_indices.size == 0:
# TODO: PLC doesn't handle empty graphs (or isolated nodes) gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
offsets=None,
indices=None,
weights=None,
labels=None,
do_expensive_check=False,
)
groups = _groupby(labels, node_ids)
it = (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())
# TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
isolates = _isolates(G)
if isolates.size > 0:
isolates = isolates[isolates > node_ids.max()]
if isolates.size > 0:
it = itertools.chain(
it, ({node} for node in G._nodearray_to_list(isolates))
)
return it


@not_implemented_for("directed")
@networkx_algorithm
def is_connected(G):
G = _to_undirected_graph(G)
if len(G) == 0:
raise nx.NetworkXPointlessConcept(
"Connectivity is undefined for the null graph."
)
for community in connected_components(G):
return len(community) == len(G)
raise RuntimeError # pragma: no cover
# PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
# unused_node_ids, labels = plc.weakly_connected_components(
# resource_handle=plc.ResourceHandle(),
# graph=G._get_plc_graph(),
# offsets=None,
# indices=None,
# weights=None,
# labels=None,
# do_expensive_check=False,
# )
# return labels.size == len(G) and cp.unique(labels).size == 1


@not_implemented_for("directed")
@networkx_algorithm
def node_connected_component(G, n):
# We could also do plain BFS from n
G = _to_undirected_graph(G)
node_id = n if G.key_to_id is None else G.key_to_id[n]
node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
offsets=None,
indices=None,
weights=None,
labels=None,
do_expensive_check=False,
)
indices = cp.nonzero(node_ids == node_id)[0]
if indices.size == 0:
return {n}
return G._nodearray_to_set(node_ids[labels == labels[indices[0]]])
6 changes: 6 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,12 @@ def k_truss(G, k):
"Input graph has self loops which is not permitted; "
"Consider using G.remove_edges_from(nx.selfloop_edges(G))."
)
if (ncc := nxcg.number_connected_components(G)) > 1:
raise NotImplementedError(
"nx_cugraph.k_truss does not yet work on graphs with more than one "
f"connected component (this graph has {ncc}). We expect to fix this soon."
)

# TODO: create renumbering helper function(s)
if k < 3:
# k-truss graph is comprised of nodes incident on k-2 triangles, so k<3 is a
Expand Down
5 changes: 5 additions & 0 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -558,6 +558,11 @@ def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]:
return node_ids.tolist()
return list(self._nodeiter_to_iter(node_ids.tolist()))

def _nodearray_to_set(self, node_ids: cp.ndarray[IndexValue]) -> set[NodeKey]:
if self.key_to_id is None:
return set(node_ids.tolist())
return set(self._nodeiter_to_iter(node_ids.tolist()))

def _nodearray_to_dict(
self, values: cp.ndarray[NodeValue]
) -> dict[NodeKey, NodeValue]:
Expand Down
45 changes: 31 additions & 14 deletions python/nx-cugraph/nx_cugraph/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,43 +12,60 @@
# limitations under the License.
from __future__ import annotations

import itertools
import operator as op
import sys
from random import Random

import cupy as cp

try:
from itertools import pairwise # Python >=3.10
except ImportError:

def pairwise(it):
it = iter(it)
for prev in it:
for cur in it:
yield (prev, cur)
prev = cur


__all__ = ["_groupby", "_seed_to_int"]


def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
def _groupby(
groups: cp.ndarray, values: cp.ndarray, groups_are_canonical: bool = False
) -> dict[int, cp.ndarray]:
"""Perform a groupby operation given an array of group IDs and array of values.
Parameters
----------
groups : cp.ndarray
Array that holds the group IDs.
Group IDs are assumed to be consecutive integers from 0.
values : cp.ndarray
Array of values to be grouped according to groups.
Must be the same size as groups array.
groups_are_canonical : bool, default False
Whether the group IDs are consecutive integers beginning with 0.
Returns
-------
dict with group IDs as keys and cp.ndarray as values.
"""
# It would actually be easy to support groups that aren't consecutive integers,
# but let's wait until we need it to implement it.
sorted_groups = cp.argsort(groups)
sorted_values = values[sorted_groups]
rv = {}
start = 0
for i, end in enumerate(
[*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size]
):
rv[i] = sorted_values[start:end]
start = end
return rv
if groups.size == 0:
return {}
sort_indices = cp.argsort(groups)
sorted_groups = groups[sort_indices]
sorted_values = values[sort_indices]
prepend = 1 if groups_are_canonical else sorted_groups[0] + 1
left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0]
boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size]))
if groups_are_canonical:
it = enumerate(boundaries)
else:
it = zip(sorted_groups[left_bounds].tolist(), boundaries)
return {group: sorted_values[start:end] for group, (start, end) in it}


def _seed_to_int(seed: int | Random | None) -> int:
Expand Down

0 comments on commit 447cdfd

Please sign in to comment.