Add a few (mostly "classic") graph generators to nx-cugraph

Also, better handle dtypes for edge values passed to pylibcugraph, which only takes float32 and float64 atm.
rapidsai · Oct 24, 2023 · b4fb8df · b4fb8df
1 parent 9b28458
commit b4fb8df
Show file tree

Hide file tree

Showing 15 changed files with 924 additions and 24 deletions.
diff --git a/python/nx-cugraph/.flake8 b/python/nx-cugraph/.flake8
@@ -10,5 +10,6 @@ extend-ignore =
 # E203 whitespace before ':' (to be compatible with black)
 per-file-ignores =
     nx_cugraph/tests/*.py:T201,
+    nx_cugraph/generators/community.py:E741,
     __init__.py:F401,F403,
     _nx_cugraph/__init__.py:E501,
diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py
@@ -29,12 +29,26 @@
     # "description": "TODO",
     "functions": {
         # BEGIN: functions
+        "barbell_graph",
         "betweenness_centrality",
+        "caveman_graph",
+        "circular_ladder_graph",
+        "complete_graph",
+        "cycle_graph",
         "edge_betweenness_centrality",
+        "empty_graph",
         "is_isolate",
         "isolates",
+        "karate_club_graph",
+        "ladder_graph",
+        "lollipop_graph",
         "louvain_communities",
+        "null_graph",
         "number_of_isolates",
+        "path_graph",
+        "star_graph",
+        "trivial_graph",
+        "wheel_graph",
         # END: functions
     },
     "extra_docstrings": {

diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml
@@ -45,20 +45,20 @@ repos:
       - id: pyupgrade
         args: [--py39-plus]
   - repo: https://github.com/psf/black
-    rev: 23.9.1
+    rev: 23.10.1
     hooks:
       - id: black
       # - id: black-jupyter
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.292
+    rev: v0.1.1
     hooks:
       - id: ruff
         args: [--fix-only, --show-fixes]
   - repo: https://github.com/PyCQA/flake8
     rev: 6.1.0
     hooks:
       - id: flake8
-        args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501']  # Why is this necessary?
+        args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=SIM105']  # Why is this necessary?
         additional_dependencies: &flake8_dependencies
           # These versions need updated manually
           - flake8==6.1.0
@@ -77,7 +77,7 @@ repos:
         additional_dependencies: [tomli]
         files: ^(nx_cugraph|docs)/
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.0.292
+    rev: v0.1.1
     hooks:
       - id: ruff
   - repo: https://github.com/pre-commit/pre-commit-hooks

diff --git a/python/nx-cugraph/nx_cugraph/__init__.py b/python/nx-cugraph/nx_cugraph/__init__.py
@@ -23,8 +23,8 @@
 # from . import convert_matrix
 # from .convert_matrix import *
 
-# from . import generators
-# from .generators import *
+from . import generators
+from .generators import *
 
 from . import algorithms
 from .algorithms import *

diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py
@@ -23,6 +23,8 @@
 
 import nx_cugraph as nxcg
 
+from ..utils import index_dtype
+
 if TYPE_CHECKING:  # pragma: no cover
     from collections.abc import Iterable, Iterator
 
@@ -62,6 +64,27 @@ class Graph:
     _id_to_key: list[NodeKey] | None
     _N: int
 
+    # Used by graph._get_plc_graph
+    _plc_type_map: ClassVar[dict[np.dtype, np.dtype]] = {
+        # signed int
+        np.dtype(np.int8): np.dtype(np.float32),
+        np.dtype(np.int16): np.dtype(np.float32),
+        np.dtype(np.int32): np.dtype(np.float64),
+        np.dtype(np.int64): np.dtype(np.float64),  # raise if abs(x) > 2**53
+        # unsigned int
+        np.dtype(np.uint8): np.dtype(np.float32),
+        np.dtype(np.uint16): np.dtype(np.float32),
+        np.dtype(np.uint32): np.dtype(np.float64),
+        np.dtype(np.uint64): np.dtype(np.float64),  # raise if x > 2**53
+        # other
+        np.dtype(np.bool_): np.dtype(np.float16),
+        np.dtype(np.float16): np.dtype(np.float32),
+    }
+    _plc_allowed_edge_types: ClassVar[set[np.dtype]] = {
+        np.dtype(np.float16),
+        np.dtype(np.float32),
+    }
+
     ####################
     # Creation methods #
     ####################
@@ -111,6 +134,11 @@ def from_coo(
             raise ValueError
         if new_graph._id_to_key is not None and len(new_graph._id_to_key) != N:
             raise ValueError
+        if new_graph._id_to_key is not None and new_graph.key_to_id is None:
+            try:
+                new_graph.key_to_id = dict(zip(new_graph._id_to_key, range(N)))
+            except TypeError as exc:
+                raise ValueError("Bad type of a node value") from exc
         return new_graph
 
     @classmethod
@@ -130,7 +158,7 @@ def from_csr(
         N = indptr.size - 1
         row_indices = cp.array(
             # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
+            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
         )
         return cls.from_coo(
             N,
@@ -162,7 +190,7 @@ def from_csc(
         N = indptr.size - 1
         col_indices = cp.array(
             # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
+            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
         )
         return cls.from_coo(
             N,
@@ -245,7 +273,9 @@ def from_dcsc(
 
     def __new__(cls, incoming_graph_data=None, **attr) -> Graph:
         if incoming_graph_data is None:
-            new_graph = cls.from_coo(0, cp.empty(0, np.int32), cp.empty(0, np.int32))
+            new_graph = cls.from_coo(
+                0, cp.empty(0, index_dtype), cp.empty(0, index_dtype)
+            )
         elif incoming_graph_data.__class__ is cls:
             new_graph = incoming_graph_data.copy()
         elif incoming_graph_data.__class__ is cls.to_networkx_class():
@@ -521,11 +551,36 @@ def _get_plc_graph(
             # Mask is all True; don't need anymore
             del self.edge_masks[edge_attr]
             edge_array = self.edge_values[edge_attr]
+        if edge_array is not None:
+            if edge_dtype is not None:
+                edge_dtype = np.dtype(edge_dtype)
+                if edge_array.dtype != edge_dtype:
+                    edge_array = edge_array.astype(edge_dtype)
+            # PLC doesn't handle int edge weights right now, so cast int to float
+            if edge_array.dtype in self._plc_type_map:
+                if edge_array.dtype == np.int64:
+                    if (val := edge_array.max().tolist()) > 2**53:
+                        raise ValueError(
+                            f"Integer value of value is too large (> 2**53): {val}; "
+                            "pylibcugraph only supports float16 and float32 dtypes."
+                        )
+                    if (val := edge_array.min().tolist()) < -(2**53):
+                        raise ValueError(
+                            f"Integer value of value is small large (< -2**53): {val}; "
+                            "pylibcugraph only supports float16 and float32 dtypes."
+                        )
+                elif edge_array.dtype == np.uint64:
+                    if edge_array.max().tolist() > 2**53:
+                        raise ValueError(
+                            f"Integer value of value is too large (> 2**53): {val}; "
+                            "pylibcugraph only supports float16 and float32 dtypes."
+                        )
+                    ...
+                # Should we warn?
+                edge_array = edge_array.astype(self._plc_type_map[edge_array.dtype])
+            elif edge_array.dtype not in self._plc_allowed_edge_types:
+                raise TypeError
         # Should we cache PLC graph?
-        if edge_dtype is not None:
-            edge_dtype = np.dtype(edge_dtype)
-            if edge_array.dtype != edge_dtype:
-                edge_array = edge_array.astype(edge_dtype)
         return plc.SGGraph(
             resource_handle=plc.ResourceHandle(),
             graph_properties=plc.GraphProperties(
@@ -540,6 +595,54 @@ def _get_plc_graph(
             do_expensive_check=False,
         )
 
+    def _sort_edge_indices(self, primary="src"):
+        # TODO: what about multigraph edge_indices and edge_keys?
+        if primary == "src":
+            stacked = cp.vstack((self.col_indices, self.row_indices))
+        elif primary == "dst":
+            stacked = cp.vstack((self.row_indices, self.col_indices))
+        else:
+            raise ValueError(
+                f'Bad `primary` argument; expected "src" or "dst", got {primary!r}'
+            )
+        indices = cp.lexsort(stacked)
+        if (cp.diff(indices) > 0).all():
+            # Already sorted
+            return
+        self.row_indices = self.row_indices[indices]
+        self.col_indices = self.col_indices[indices]
+        self.edge_values.update(
+            {key: val[indices] for key, val in self.edge_values.items()}
+        )
+        self.edge_masks.update(
+            {key: val[indices] for key, val in self.edge_masks.items()}
+        )
+
+    def _become(self, other: Graph):
+        if self.__class__ is not other.__class__:
+            raise TypeError(
+                "Attempting to update graph inplace with graph of different type!"
+            )
+        self.clear()
+        edge_values = self.edge_values
+        edge_masks = self.edge_masks
+        node_values = self.node_values
+        node_masks = self.node_masks
+        graph = self.graph
+        edge_values.update(other.edge_values)
+        edge_masks.update(other.edge_masks)
+        node_values.update(other.node_values)
+        node_masks.update(other.node_masks)
+        graph.update(other.graph)
+        self.__dict__.update(other.__dict__)
+        self.edge_values = edge_values
+        self.edge_masks = edge_masks
+        self.node_values = node_values
+        self.node_masks = node_masks
+        self.graph = graph
+        return self
+
+    # Data conversions
     def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]:
         """Convert an iterable of node IDs to an iterable of node keys."""
         if (id_to_key := self.id_to_key) is not None:
@@ -582,7 +685,7 @@ def _dict_to_nodearrays(
             indices_iter = d
         else:
             indices_iter = map(self.key_to_id.__getitem__, d)
-        node_ids = cp.fromiter(indices_iter, np.int32)
+        node_ids = cp.fromiter(indices_iter, index_dtype)
         if dtype is None:
             values = cp.array(list(d.values()))
         else:

diff --git a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py
@@ -25,6 +25,11 @@
 
 
 class MultiDiGraph(MultiGraph, DiGraph):
+    @classmethod
+    @networkx_api
+    def is_directed(cls) -> bool:
+        return True
+
     @classmethod
     def to_networkx_class(cls) -> type[nx.MultiDiGraph]:
         return nx.MultiDiGraph
diff --git a/python/nx-cugraph/nx_cugraph/classes/multigraph.py b/python/nx-cugraph/nx_cugraph/classes/multigraph.py
@@ -21,6 +21,7 @@
 
 import nx_cugraph as nxcg
 
+from ..utils import index_dtype
 from .graph import Graph
 
 if TYPE_CHECKING:
@@ -121,7 +122,7 @@ def from_csr(
         N = indptr.size - 1
         row_indices = cp.array(
             # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
+            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
         )
         return cls.from_coo(
             N,
@@ -157,7 +158,7 @@ def from_csc(
         N = indptr.size - 1
         col_indices = cp.array(
             # cp.repeat is slow to use here, so use numpy instead
-            np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
+            np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
         )
         return cls.from_coo(
             N,

diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py
@@ -24,6 +24,8 @@
 
 import nx_cugraph as nxcg
 
+from .utils import index_dtype
+
 if TYPE_CHECKING:  # pragma: no cover
     from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue
 
@@ -266,22 +268,22 @@ def from_networkx(
     else:
         col_iter = map(key_to_id.__getitem__, col_iter)
     if graph.is_multigraph():
-        col_indices = np.fromiter(col_iter, np.int32)
+        col_indices = np.fromiter(col_iter, index_dtype)
         num_multiedges = np.fromiter(
-            map(len, concat(map(dict.values, adj.values()))), np.int32
+            map(len, concat(map(dict.values, adj.values()))), index_dtype
         )
         # cp.repeat is slow to use here, so use numpy instead
         col_indices = cp.array(np.repeat(col_indices, num_multiedges))
         # Determine edge keys and edge ids for multigraphs
         edge_keys = list(concat(concat(map(dict.values, adj.values()))))
         edge_indices = cp.fromiter(
             concat(map(range, map(len, concat(map(dict.values, adj.values()))))),
-            np.int32,
+            index_dtype,
         )
         if edge_keys == edge_indices.tolist():
             edge_keys = None  # Prefer edge_indices
     else:
-        col_indices = cp.fromiter(col_iter, np.int32)
+        col_indices = cp.fromiter(col_iter, index_dtype)
 
     edge_values = {}
     edge_masks = {}
@@ -354,7 +356,8 @@ def from_networkx(
 
     # cp.repeat is slow to use here, so use numpy instead
     row_indices = np.repeat(
-        np.arange(N, dtype=np.int32), np.fromiter(map(len, adj.values()), np.int32)
+        np.arange(N, dtype=index_dtype),
+        np.fromiter(map(len, adj.values()), index_dtype),
     )
     if graph.is_multigraph():
         row_indices = np.repeat(row_indices, num_multiedges)
@@ -500,12 +503,13 @@ def to_networkx(G: nxcg.Graph) -> nx.Graph:
     col_indices = G.col_indices
     edge_values = G.edge_values
     edge_masks = G.edge_masks
-    if edge_values and not G.is_directed():
+    if not G.is_directed():
         # Only add upper triangle of the adjacency matrix so we don't double-add edges
         mask = row_indices <= col_indices
         row_indices = row_indices[mask]
         col_indices = col_indices[mask]
-        edge_values = {k: v[mask] for k, v in edge_values.items()}
+        if edge_values:
+            edge_values = {k: v[mask] for k, v in edge_values.items()}
         if edge_masks:
             edge_masks = {k: v[mask] for k, v in edge_masks.items()}
     row_indices = row_iter = row_indices.tolist()

diff --git a/python/nx-cugraph/nx_cugraph/generators/__init__.py b/python/nx-cugraph/nx_cugraph/generators/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2023, NVIDIA CORPORATION.
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .classic import *
+from .community import *
+from .social import *