Add bfs_successors and bfs_predecessors; is there a bug in `plc.b…

…fs`?!
rapidsai · Dec 1, 2023 · da1c3a1 · da1c3a1
1 parent 22e317c
commit da1c3a1
Show file tree

Hide file tree

Showing 7 changed files with 152 additions and 22 deletions.
diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py
@@ -32,6 +32,8 @@
         "ancestors",
         "barbell_graph",
         "betweenness_centrality",
+        "bfs_predecessors",
+        "bfs_successors",
         "bull_graph",
         "caveman_graph",
         "chvatal_graph",
@@ -101,6 +103,8 @@
     "extra_docstrings": {
         # BEGIN: extra_docstrings
         "betweenness_centrality": "`weight` parameter is not yet supported.",
+        "bfs_predecessors": "`sort_neighbors` parameter is not yet supported.",
+        "bfs_successors": "`sort_neighbors` parameter is not yet supported.",
         "edge_betweenness_centrality": "`weight` parameter is not yet supported.",
         "eigenvector_centrality": "`nstart` parameter is not used, but it is checked for validity.",
         "from_pandas_edgelist": "cudf.DataFrame inputs also supported.",

diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py
@@ -17,6 +17,7 @@
     components,
     link_analysis,
     shortest_paths,
+    traversal,
 )
 from .bipartite import complete_bipartite_graph
 from .centrality import *
@@ -26,3 +27,4 @@
 from .isolate import *
 from .link_analysis import *
 from .shortest_paths import *
+from .traversal import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/dag.py b/python/nx-cugraph/nx_cugraph/algorithms/dag.py
@@ -28,24 +28,18 @@ def _ancestors_and_descendants(G, source, *, is_ancestors):
     G = _to_graph(G)
     if source not in G:
         hash(source)  # To raise TypeError if appropriate
-        raise nx.NetworkXError(f"The node {source} is not in the graph.")
+        raise nx.NetworkXError(
+            f"The node {source} is not in the {G.__class__.__name__.lower()}."
+        )
     src_index = source if G.key_to_id is None else G.key_to_id[source]
     distances, predecessors, node_ids = plc.bfs(
-        # XXX: why can't I pass arguments as keywords?!
-        plc.ResourceHandle(),
-        G._get_plc_graph(switch_indices=is_ancestors),
-        cp.array([src_index], dtype=index_dtype),
-        False,
-        -1,
-        False,
-        False,
-        # resource_handle=plc.ResourceHandle(),
-        # graph = G._get_plc_graph(switch_indices=is_ancestors),
-        # sources=cp.array([src_index], dtype=index_dtype),
-        # direction_optimizing=False,
-        # depth_limit=-1,
-        # compute_predecessors=False,
-        # do_expensive_check=False,
+        handle=plc.ResourceHandle(),
+        graph=G._get_plc_graph(switch_indices=is_ancestors),
+        sources=cp.array([src_index], dtype=index_dtype),
+        direction_optimizing=False,
+        depth_limit=-1,
+        compute_predecessors=False,
+        do_expensive_check=False,
     )
     mask = (distances != np.iinfo(distances.dtype).max) & (distances != 0)
     return G._nodearray_to_set(node_ids[mask])

diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .breadth_first_search import *
diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py
@@ -0,0 +1,105 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import cupy as cp
+import networkx as nx
+import pylibcugraph as plc
+
+from nx_cugraph.convert import _to_graph
+from nx_cugraph.utils import _groupby, index_dtype, networkx_algorithm
+
+__all__ = [
+    "bfs_predecessors",
+    "bfs_successors",
+]
+
+
+@networkx_algorithm
+def bfs_successors(G, source, depth_limit=None, sort_neighbors=None):
+    """`sort_neighbors` parameter is not yet supported."""
+    G = _to_graph(G)
+    if source not in G:
+        hash(source)  # To raise TypeError if appropriate
+        raise nx.NetworkXError(
+            f"The node {source} is not in the {G.__class__.__name__.lower()}."
+        )
+    if depth_limit is not None and depth_limit < 1:
+        yield (source, [])
+        return
+
+    src_index = source if G.key_to_id is None else G.key_to_id[source]
+    distances, predecessors, node_ids = plc.bfs(
+        handle=plc.ResourceHandle(),
+        graph=G._get_plc_graph(),
+        sources=cp.array([src_index], dtype=index_dtype),
+        direction_optimizing=False,
+        depth_limit=-1 if depth_limit is None else depth_limit,
+        compute_predecessors=True,
+        do_expensive_check=False,
+    )
+    mask = predecessors >= 0
+    distances = distances[mask]
+    predecessors = predecessors[mask]
+    node_ids = node_ids[mask]
+    groups = _groupby(distances, [predecessors, node_ids])
+    id_to_key = G.id_to_key
+    for key in range(1, len(groups) + 1):
+        parent_ids, children_ids = groups[key]
+        parent_id = parent_ids[0].tolist()
+        parent = id_to_key[parent_id] if id_to_key is not None else parent_id
+        children = G._nodearray_to_list(children_ids)
+        yield (parent, children)
+
+
+@bfs_successors._can_run
+def _(G, source, depth_limit=None, sort_neighbors=None):
+    return sort_neighbors is None
+
+
+@networkx_algorithm
+def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None):
+    """`sort_neighbors` parameter is not yet supported."""
+    G = _to_graph(G)
+    if source not in G:
+        hash(source)  # To raise TypeError if appropriate
+        raise nx.NetworkXError(
+            f"The node {source} is not in the {G.__class__.__name__.lower()}."
+        )
+    if depth_limit is not None and depth_limit < 1:
+        return
+
+    src_index = source if G.key_to_id is None else G.key_to_id[source]
+    distances, predecessors, node_ids = plc.bfs(
+        handle=plc.ResourceHandle(),
+        graph=G._get_plc_graph(),
+        sources=cp.array([src_index], dtype=index_dtype),
+        direction_optimizing=False,
+        depth_limit=-1 if depth_limit is None else depth_limit,
+        compute_predecessors=True,
+        do_expensive_check=False,
+    )
+    mask = predecessors >= 0
+    distances = distances[mask]
+    predecessors = predecessors[mask]
+    node_ids = node_ids[mask]
+    groups = _groupby(distances, [predecessors, node_ids])
+    for key in range(1, len(groups) + 1):
+        parent_ids, children_ids = groups[key]
+        yield from zip(
+            G._nodeiter_to_iter(children_ids.tolist()),
+            G._nodeiter_to_iter(parent_ids.tolist()),
+        )
+
+
+@bfs_predecessors._can_run
+def _(G, source, depth_limit=None, sort_neighbors=None):
+    return sort_neighbors is None
diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py
@@ -461,7 +461,9 @@ def has_edge(self, u: NodeKey, v: NodeKey) -> bool:
     def _neighbors(self, n: NodeKey) -> cp.ndarray[NodeValue]:
         if n not in self:
             hash(n)  # To raise TypeError if appropriate
-            raise nx.NetworkXError(f"The node {n} is not in the graph.")
+            raise nx.NetworkXError(
+                f"The node {n} is not in the {self.__class__.__name__.lower()}."
+            )
         if self.key_to_id is not None:
             n = self.key_to_id[n]
         nbrs = self.dst_indices[self.src_indices == n]

diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py
@@ -58,16 +58,18 @@ def pairwise(it):
 
 
 def _groupby(
-    groups: cp.ndarray, values: cp.ndarray, groups_are_canonical: bool = False
+    groups: cp.ndarray,
+    values: cp.ndarray | list[cp.ndarray],
+    groups_are_canonical: bool = False,
 ) -> dict[int, cp.ndarray]:
     """Perform a groupby operation given an array of group IDs and array of values.
 
     Parameters
     ----------
     groups : cp.ndarray
         Array that holds the group IDs.
-    values : cp.ndarray
-        Array of values to be grouped according to groups.
+    values : cp.ndarray or list of cp.ndarray
+        Array or list of arrays of values to be grouped according to groups.
         Must be the same size as groups array.
     groups_are_canonical : bool, default False
         Whether the group IDs are consecutive integers beginning with 0.
@@ -80,15 +82,23 @@ def _groupby(
         return {}
     sort_indices = cp.argsort(groups)
     sorted_groups = groups[sort_indices]
-    sorted_values = values[sort_indices]
+    if not isinstance(values, list):
+        sorted_values = values[sort_indices]
+    else:
+        sorted_values = [vals[sort_indices] for vals in values]
     prepend = 1 if groups_are_canonical else sorted_groups[0] + 1
     left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0]
     boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size]))
     if groups_are_canonical:
         it = enumerate(boundaries)
     else:
         it = zip(sorted_groups[left_bounds].tolist(), boundaries)
-    return {group: sorted_values[start:end] for group, (start, end) in it}
+    if not isinstance(values, list):
+        return {group: sorted_values[start:end] for group, (start, end) in it}
+    return {
+        group: [sorted_vals[start:end] for sorted_vals in sorted_values]
+        for group, (start, end) in it
+    }
 
 
 def _seed_to_int(seed: int | Random | None) -> int: