Skip to content

Commit

Permalink
nx-cugraph: add CC for undirected graphs to fix k-truss (#3965)
Browse files Browse the repository at this point in the history
Fixes #3963 and add `connected_components`, `is_connected`, `node_connected_component`, and `number_connected_components`.

Also updated `_groupby` to handle groups that are not consecutive integers starting with 0.

Also, `plc.weakly_connected_components` does not handle isolated nodes well, and I needed to handle this at the Python layer as was done in #3897

Authors:
  - Erik Welch (https://github.com/eriknw)

Approvers:
  - Rick Ratzel (https://github.com/rlratzel)

URL: #3965
  • Loading branch information
eriknw authored Oct 31, 2023
1 parent 24845ca commit eb1e515
Show file tree
Hide file tree
Showing 12 changed files with 243 additions and 17 deletions.
8 changes: 8 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
"complete_bipartite_graph",
"complete_graph",
"complete_multipartite_graph",
"connected_components",
"cubical_graph",
"cycle_graph",
"davis_southern_women_graph",
Expand All @@ -56,6 +57,7 @@
"house_x_graph",
"icosahedral_graph",
"in_degree_centrality",
"is_connected",
"is_isolate",
"isolates",
"k_truss",
Expand All @@ -66,7 +68,9 @@
"lollipop_graph",
"louvain_communities",
"moebius_kantor_graph",
"node_connected_component",
"null_graph",
"number_connected_components",
"number_of_isolates",
"number_of_selfloops",
"octahedral_graph",
Expand All @@ -91,6 +95,10 @@
"betweenness_centrality": "`weight` parameter is not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported.",
"from_pandas_edgelist": "cudf.DataFrame inputs also supported.",
"k_truss": (
"Currently raises `NotImplementedError` for graphs with more than one connected\n"
"component when k >= 3. We expect to fix this soon."
),
"louvain_communities": "`seed` parameter is currently ignored.",
# END: extra_docstrings
},
Expand Down
3 changes: 2 additions & 1 deletion python/nx-cugraph/nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,9 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from . import bipartite, centrality, community
from . import bipartite, centrality, community, components
from .bipartite import complete_bipartite_graph
from .centrality import *
from .components import *
from .core import *
from .isolate import *
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,7 @@ def louvain_communities(
resolution=resolution,
do_expensive_check=False,
)
groups = _groupby(clusters, vertices)
groups = _groupby(clusters, vertices, groups_are_canonical=True)
rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()]
# TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
isolates = _isolates(G)
Expand Down
13 changes: 13 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .connected import *
130 changes: 130 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/components/connected.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import itertools

import cupy as cp
import networkx as nx
import pylibcugraph as plc

from nx_cugraph.convert import _to_undirected_graph
from nx_cugraph.utils import _groupby, networkx_algorithm, not_implemented_for

from ..isolate import _isolates

__all__ = [
"number_connected_components",
"connected_components",
"is_connected",
"node_connected_component",
]


@not_implemented_for("directed")
@networkx_algorithm
def number_connected_components(G):
return sum(1 for _ in connected_components(G))
# PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
# G = _to_undirected_graph(G)
# unused_node_ids, labels = plc.weakly_connected_components(
# resource_handle=plc.ResourceHandle(),
# graph=G._get_plc_graph(),
# offsets=None,
# indices=None,
# weights=None,
# labels=None,
# do_expensive_check=False,
# )
# return cp.unique(labels).size


@number_connected_components._can_run
def _(G):
# NetworkX <= 3.2.1 does not check directedness for us
try:
return not G.is_directed()
except Exception:
return False


@not_implemented_for("directed")
@networkx_algorithm
def connected_components(G):
G = _to_undirected_graph(G)
if G.src_indices.size == 0:
# TODO: PLC doesn't handle empty graphs (or isolated nodes) gracefully!
return [{key} for key in G._nodeiter_to_iter(range(len(G)))]
node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
offsets=None,
indices=None,
weights=None,
labels=None,
do_expensive_check=False,
)
groups = _groupby(labels, node_ids)
it = (G._nodearray_to_set(connected_ids) for connected_ids in groups.values())
# TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix
isolates = _isolates(G)
if isolates.size > 0:
isolates = isolates[isolates > node_ids.max()]
if isolates.size > 0:
it = itertools.chain(
it, ({node} for node in G._nodearray_to_list(isolates))
)
return it


@not_implemented_for("directed")
@networkx_algorithm
def is_connected(G):
G = _to_undirected_graph(G)
if len(G) == 0:
raise nx.NetworkXPointlessConcept(
"Connectivity is undefined for the null graph."
)
for community in connected_components(G):
return len(community) == len(G)
raise RuntimeError # pragma: no cover
# PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL
# unused_node_ids, labels = plc.weakly_connected_components(
# resource_handle=plc.ResourceHandle(),
# graph=G._get_plc_graph(),
# offsets=None,
# indices=None,
# weights=None,
# labels=None,
# do_expensive_check=False,
# )
# return labels.size == len(G) and cp.unique(labels).size == 1


@not_implemented_for("directed")
@networkx_algorithm
def node_connected_component(G, n):
# We could also do plain BFS from n
G = _to_undirected_graph(G)
node_id = n if G.key_to_id is None else G.key_to_id[n]
node_ids, labels = plc.weakly_connected_components(
resource_handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
offsets=None,
indices=None,
weights=None,
labels=None,
do_expensive_check=False,
)
indices = cp.nonzero(node_ids == node_id)[0]
if indices.size == 0:
return {n}
return G._nodearray_to_set(node_ids[labels == labels[indices[0]]])
10 changes: 10 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,13 +24,18 @@
@not_implemented_for("multigraph")
@networkx_algorithm
def k_truss(G, k):
"""
Currently raises `NotImplementedError` for graphs with more than one connected
component when k >= 3. We expect to fix this soon.
"""
if is_nx := isinstance(G, nx.Graph):
G = nxcg.from_networkx(G, preserve_all_attrs=True)
if nxcg.number_of_selfloops(G) > 0:
raise nx.NetworkXError(
"Input graph has self loops which is not permitted; "
"Consider using G.remove_edges_from(nx.selfloop_edges(G))."
)

# TODO: create renumbering helper function(s)
if k < 3:
# k-truss graph is comprised of nodes incident on k-2 triangles, so k<3 is a
Expand All @@ -49,6 +54,11 @@ def k_truss(G, k):
# Renumber step 1: edge values (no changes needed)
edge_values = {key: val.copy() for key, val in G.edge_values.items()}
edge_masks = {key: val.copy() for key, val in G.edge_masks.items()}
elif (ncc := nxcg.number_connected_components(G)) > 1:
raise NotImplementedError(
"nx_cugraph.k_truss does not yet work on graphs with more than one "
f"connected component (this graph has {ncc}). We expect to fix this soon."
)
else:
edge_dtype = _get_int_dtype(G.src_indices.size - 1)
edge_indices = cp.arange(G.src_indices.size, dtype=edge_dtype)
Expand Down
5 changes: 5 additions & 0 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -692,6 +692,11 @@ def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]:
return node_ids.tolist()
return list(self._nodeiter_to_iter(node_ids.tolist()))

def _nodearray_to_set(self, node_ids: cp.ndarray[IndexValue]) -> set[NodeKey]:
if self.key_to_id is None:
return set(node_ids.tolist())
return set(self._nodeiter_to_iter(node_ids.tolist()))

def _nodearray_to_dict(
self, values: cp.ndarray[NodeValue]
) -> dict[NodeKey, NodeValue]:
Expand Down
9 changes: 9 additions & 0 deletions python/nx-cugraph/nx_cugraph/interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -223,11 +223,20 @@ def key(testpath):
}
)

too_slow = "Too slow to run"
skip = {
key("test_tree_isomorphism.py:test_positive"): too_slow,
key("test_tree_isomorphism.py:test_negative"): too_slow,
}

for item in items:
kset = set(item.keywords)
for (test_name, keywords), reason in xfail.items():
if item.name == test_name and keywords.issubset(kset):
item.add_marker(pytest.mark.xfail(reason=reason))
for (test_name, keywords), reason in skip.items():
if item.name == test_name and keywords.issubset(kset):
item.add_marker(pytest.mark.skip(reason=reason))

@classmethod
def can_run(cls, name, args, kwargs):
Expand Down
30 changes: 30 additions & 0 deletions python/nx-cugraph/nx_cugraph/tests/test_ktruss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import networkx as nx
import pytest

import nx_cugraph as nxcg


@pytest.mark.parametrize(
"get_graph", [nx.florentine_families_graph, nx.les_miserables_graph]
)
def test_k_truss(get_graph):
Gnx = get_graph()
Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True)
for k in range(10):
Hnx = nx.k_truss(Gnx, k)
Hcg = nxcg.k_truss(Gcg, k)
assert nx.utils.graphs_equal(Hnx, nxcg.to_networkx(Hcg))
if Hnx.number_of_edges() == 0:
break
5 changes: 4 additions & 1 deletion python/nx-cugraph/nx_cugraph/utils/decorators.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from __future__ import annotations

from functools import partial, update_wrapper
from textwrap import dedent

from networkx.utils.decorators import nodes_or_number, not_implemented_for

Expand Down Expand Up @@ -65,7 +66,9 @@ def __new__(
)
instance.extra_params = extra_params
# The docstring on our function is added to the NetworkX docstring.
instance.extra_doc = func.__doc__
instance.extra_doc = (
dedent(func.__doc__.lstrip("\n").rstrip()) if func.__doc__ else None
)
# Copy __doc__ from NetworkX
if instance.name in _registered_algorithms:
instance.__doc__ = _registered_algorithms[instance.name].__doc__
Expand Down
44 changes: 30 additions & 14 deletions python/nx-cugraph/nx_cugraph/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,40 +21,56 @@
import cupy as cp
import numpy as np

try:
from itertools import pairwise # Python >=3.10
except ImportError:

def pairwise(it):
it = iter(it)
for prev in it:
for cur in it:
yield (prev, cur)
prev = cur


__all__ = ["index_dtype", "_groupby", "_seed_to_int", "_get_int_dtype"]

# This may switch to np.uint32 at some point
index_dtype = np.int32


def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]:
def _groupby(
groups: cp.ndarray, values: cp.ndarray, groups_are_canonical: bool = False
) -> dict[int, cp.ndarray]:
"""Perform a groupby operation given an array of group IDs and array of values.
Parameters
----------
groups : cp.ndarray
Array that holds the group IDs.
Group IDs are assumed to be consecutive integers from 0.
values : cp.ndarray
Array of values to be grouped according to groups.
Must be the same size as groups array.
groups_are_canonical : bool, default False
Whether the group IDs are consecutive integers beginning with 0.
Returns
-------
dict with group IDs as keys and cp.ndarray as values.
"""
# It would actually be easy to support groups that aren't consecutive integers,
# but let's wait until we need it to implement it.
sorted_groups = cp.argsort(groups)
sorted_values = values[sorted_groups]
rv = {}
start = 0
for i, end in enumerate(
[*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size]
):
rv[i] = sorted_values[start:end]
start = end
return rv
if groups.size == 0:
return {}
sort_indices = cp.argsort(groups)
sorted_groups = groups[sort_indices]
sorted_values = values[sort_indices]
prepend = 1 if groups_are_canonical else sorted_groups[0] + 1
left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0]
boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size]))
if groups_are_canonical:
it = enumerate(boundaries)
else:
it = zip(sorted_groups[left_bounds].tolist(), boundaries)
return {group: sorted_values[start:end] for group, (start, end) in it}


def _seed_to_int(seed: int | Random | None) -> int:
Expand Down
1 change: 1 addition & 0 deletions python/nx-cugraph/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,7 @@ ignore = [
# Allow assert, print, RNG, and no docstring
"nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"]
"_nx_cugraph/__init__.py" = ["E501"]
"nx_cugraph/algorithms/**/*py" = ["D205", "D401"] # Allow flexible docstrings for algorithms

[tool.ruff.flake8-annotations]
mypy-init-return = true
Expand Down

0 comments on commit eb1e515

Please sign in to comment.