Skip to content

Commit

Permalink
Add bfs_successors and bfs_predecessors; is there a bug in `plc.b…
Browse files Browse the repository at this point in the history
…fs`?!
  • Loading branch information
eriknw committed Dec 1, 2023
1 parent 22e317c commit da1c3a1
Show file tree
Hide file tree
Showing 7 changed files with 152 additions and 22 deletions.
4 changes: 4 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@
"ancestors",
"barbell_graph",
"betweenness_centrality",
"bfs_predecessors",
"bfs_successors",
"bull_graph",
"caveman_graph",
"chvatal_graph",
Expand Down Expand Up @@ -101,6 +103,8 @@
"extra_docstrings": {
# BEGIN: extra_docstrings
"betweenness_centrality": "`weight` parameter is not yet supported.",
"bfs_predecessors": "`sort_neighbors` parameter is not yet supported.",
"bfs_successors": "`sort_neighbors` parameter is not yet supported.",
"edge_betweenness_centrality": "`weight` parameter is not yet supported.",
"eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.",
"from_pandas_edgelist": "cudf.DataFrame inputs also supported.",
Expand Down
2 changes: 2 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
components,
link_analysis,
shortest_paths,
traversal,
)
from .bipartite import complete_bipartite_graph
from .centrality import *
Expand All @@ -26,3 +27,4 @@
from .isolate import *
from .link_analysis import *
from .shortest_paths import *
from .traversal import *
26 changes: 10 additions & 16 deletions python/nx-cugraph/nx_cugraph/algorithms/dag.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,24 +28,18 @@ def _ancestors_and_descendants(G, source, *, is_ancestors):
G = _to_graph(G)
if source not in G:
hash(source) # To raise TypeError if appropriate
raise nx.NetworkXError(f"The node {source} is not in the graph.")
raise nx.NetworkXError(
f"The node {source} is not in the {G.__class__.__name__.lower()}."
)
src_index = source if G.key_to_id is None else G.key_to_id[source]
distances, predecessors, node_ids = plc.bfs(
# XXX: why can't I pass arguments as keywords?!
plc.ResourceHandle(),
G._get_plc_graph(switch_indices=is_ancestors),
cp.array([src_index], dtype=index_dtype),
False,
-1,
False,
False,
# resource_handle=plc.ResourceHandle(),
# graph = G._get_plc_graph(switch_indices=is_ancestors),
# sources=cp.array([src_index], dtype=index_dtype),
# direction_optimizing=False,
# depth_limit=-1,
# compute_predecessors=False,
# do_expensive_check=False,
handle=plc.ResourceHandle(),
graph=G._get_plc_graph(switch_indices=is_ancestors),
sources=cp.array([src_index], dtype=index_dtype),
direction_optimizing=False,
depth_limit=-1,
compute_predecessors=False,
do_expensive_check=False,
)
mask = (distances != np.iinfo(distances.dtype).max) & (distances != 0)
return G._nodearray_to_set(node_ids[mask])
Expand Down
13 changes: 13 additions & 0 deletions python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .breadth_first_search import *
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import cupy as cp
import networkx as nx
import pylibcugraph as plc

from nx_cugraph.convert import _to_graph
from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm

__all__ = [
"bfs_predecessors",
"bfs_successors",
]


@networkx_algorithm
def bfs_successors(G, source, depth_limit=None, sort_neighbors=None):
"""`sort_neighbors` parameter is not yet supported."""
G = _to_graph(G)
if source not in G:
hash(source) # To raise TypeError if appropriate
raise nx.NetworkXError(
f"The node {source} is not in the {G.__class__.__name__.lower()}."
)
if depth_limit is not None and depth_limit < 1:
yield (source, [])
return

src_index = source if G.key_to_id is None else G.key_to_id[source]
distances, predecessors, node_ids = plc.bfs(
handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
sources=cp.array([src_index], dtype=index_dtype),
direction_optimizing=False,
depth_limit=-1 if depth_limit is None else depth_limit,
compute_predecessors=True,
do_expensive_check=False,
)
mask = predecessors >= 0
distances = distances[mask]
predecessors = predecessors[mask]
node_ids = node_ids[mask]
groups = _groupby(distances, [predecessors, node_ids])
id_to_key = G.id_to_key
for key in range(1, len(groups) + 1):
parent_ids, children_ids = groups[key]
parent_id = parent_ids[0].tolist()
parent = id_to_key[parent_id] if id_to_key is not None else parent_id
children = G._nodearray_to_list(children_ids)
yield (parent, children)


@bfs_successors._can_run
def _(G, source, depth_limit=None, sort_neighbors=None):
return sort_neighbors is None


@networkx_algorithm
def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None):
"""`sort_neighbors` parameter is not yet supported."""
G = _to_graph(G)
if source not in G:
hash(source) # To raise TypeError if appropriate
raise nx.NetworkXError(
f"The node {source} is not in the {G.__class__.__name__.lower()}."
)
if depth_limit is not None and depth_limit < 1:
return

src_index = source if G.key_to_id is None else G.key_to_id[source]
distances, predecessors, node_ids = plc.bfs(
handle=plc.ResourceHandle(),
graph=G._get_plc_graph(),
sources=cp.array([src_index], dtype=index_dtype),
direction_optimizing=False,
depth_limit=-1 if depth_limit is None else depth_limit,
compute_predecessors=True,
do_expensive_check=False,
)
mask = predecessors >= 0
distances = distances[mask]
predecessors = predecessors[mask]
node_ids = node_ids[mask]
groups = _groupby(distances, [predecessors, node_ids])
for key in range(1, len(groups) + 1):
parent_ids, children_ids = groups[key]
yield from zip(
G._nodeiter_to_iter(children_ids.tolist()),
G._nodeiter_to_iter(parent_ids.tolist()),
)


@bfs_predecessors._can_run
def _(G, source, depth_limit=None, sort_neighbors=None):
return sort_neighbors is None
4 changes: 3 additions & 1 deletion python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -461,7 +461,9 @@ def has_edge(self, u: NodeKey, v: NodeKey) -> bool:
def _neighbors(self, n: NodeKey) -> cp.ndarray[NodeValue]:
if n not in self:
hash(n) # To raise TypeError if appropriate
raise nx.NetworkXError(f"The node {n} is not in the graph.")
raise nx.NetworkXError(
f"The node {n} is not in the {self.__class__.__name__.lower()}."
)
if self.key_to_id is not None:
n = self.key_to_id[n]
nbrs = self.dst_indices[self.src_indices == n]
Expand Down
20 changes: 15 additions & 5 deletions python/nx-cugraph/nx_cugraph/utils/misc.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,16 +58,18 @@ def pairwise(it):


def _groupby(
groups: cp.ndarray, values: cp.ndarray, groups_are_canonical: bool = False
groups: cp.ndarray,
values: cp.ndarray | list[cp.ndarray],
groups_are_canonical: bool = False,
) -> dict[int, cp.ndarray]:
"""Perform a groupby operation given an array of group IDs and array of values.
Parameters
----------
groups : cp.ndarray
Array that holds the group IDs.
values : cp.ndarray
Array of values to be grouped according to groups.
values : cp.ndarray or list of cp.ndarray
Array or list of arrays of values to be grouped according to groups.
Must be the same size as groups array.
groups_are_canonical : bool, default False
Whether the group IDs are consecutive integers beginning with 0.
Expand All @@ -80,15 +82,23 @@ def _groupby(
return {}
sort_indices = cp.argsort(groups)
sorted_groups = groups[sort_indices]
sorted_values = values[sort_indices]
if not isinstance(values, list):
sorted_values = values[sort_indices]
else:
sorted_values = [vals[sort_indices] for vals in values]
prepend = 1 if groups_are_canonical else sorted_groups[0] + 1
left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0]
boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size]))
if groups_are_canonical:
it = enumerate(boundaries)
else:
it = zip(sorted_groups[left_bounds].tolist(), boundaries)
return {group: sorted_values[start:end] for group, (start, end) in it}
if not isinstance(values, list):
return {group: sorted_values[start:end] for group, (start, end) in it}
return {
group: [sorted_vals[start:end] for sorted_vals in sorted_values]
for group, (start, end) in it
}


def _seed_to_int(seed: int | Random | None) -> int:
Expand Down

0 comments on commit da1c3a1

Please sign in to comment.