Skip to content

Commit

Permalink
Add a few (mostly "classic") graph generators to nx-cugraph
Browse files Browse the repository at this point in the history
Also, better handle dtypes for edge values passed to pylibcugraph,
which only takes float32 and float64 atm.
  • Loading branch information
eriknw committed Oct 24, 2023
1 parent 9b28458 commit b4fb8df
Show file tree
Hide file tree
Showing 15 changed files with 924 additions and 24 deletions.
1 change: 1 addition & 0 deletions python/nx-cugraph/.flake8
Original file line number Diff line number Diff line change
Expand Up @@ -10,5 +10,6 @@ extend-ignore =
# E203 whitespace before ':' (to be compatible with black)
per-file-ignores =
nx_cugraph/tests/*.py:T201,
nx_cugraph/generators/community.py:E741,
__init__.py:F401,F403,
_nx_cugraph/__init__.py:E501,
14 changes: 14 additions & 0 deletions python/nx-cugraph/_nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,12 +29,26 @@
# "description": "TODO",
"functions": {
# BEGIN: functions
"barbell_graph",
"betweenness_centrality",
"caveman_graph",
"circular_ladder_graph",
"complete_graph",
"cycle_graph",
"edge_betweenness_centrality",
"empty_graph",
"is_isolate",
"isolates",
"karate_club_graph",
"ladder_graph",
"lollipop_graph",
"louvain_communities",
"null_graph",
"number_of_isolates",
"path_graph",
"star_graph",
"trivial_graph",
"wheel_graph",
# END: functions
},
"extra_docstrings": {
Expand Down
8 changes: 4 additions & 4 deletions python/nx-cugraph/lint.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,20 @@ repos:
- id: pyupgrade
args: [--py39-plus]
- repo: https://github.com/psf/black
rev: 23.9.1
rev: 23.10.1
hooks:
- id: black
# - id: black-jupyter
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.292
rev: v0.1.1
hooks:
- id: ruff
args: [--fix-only, --show-fixes]
- repo: https://github.com/PyCQA/flake8
rev: 6.1.0
hooks:
- id: flake8
args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501'] # Why is this necessary?
args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=SIM105'] # Why is this necessary?
additional_dependencies: &flake8_dependencies
# These versions need updated manually
- flake8==6.1.0
Expand All @@ -77,7 +77,7 @@ repos:
additional_dependencies: [tomli]
files: ^(nx_cugraph|docs)/
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.0.292
rev: v0.1.1
hooks:
- id: ruff
- repo: https://github.com/pre-commit/pre-commit-hooks
Expand Down
4 changes: 2 additions & 2 deletions python/nx-cugraph/nx_cugraph/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@
# from . import convert_matrix
# from .convert_matrix import *

# from . import generators
# from .generators import *
from . import generators
from .generators import *

from . import algorithms
from .algorithms import *
Expand Down
119 changes: 111 additions & 8 deletions python/nx-cugraph/nx_cugraph/classes/graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@

import nx_cugraph as nxcg

from ..utils import index_dtype

if TYPE_CHECKING: # pragma: no cover
from collections.abc import Iterable, Iterator

Expand Down Expand Up @@ -62,6 +64,27 @@ class Graph:
_id_to_key: list[NodeKey] | None
_N: int

# Used by graph._get_plc_graph
_plc_type_map: ClassVar[dict[np.dtype, np.dtype]] = {
# signed int
np.dtype(np.int8): np.dtype(np.float32),
np.dtype(np.int16): np.dtype(np.float32),
np.dtype(np.int32): np.dtype(np.float64),
np.dtype(np.int64): np.dtype(np.float64), # raise if abs(x) > 2**53
# unsigned int
np.dtype(np.uint8): np.dtype(np.float32),
np.dtype(np.uint16): np.dtype(np.float32),
np.dtype(np.uint32): np.dtype(np.float64),
np.dtype(np.uint64): np.dtype(np.float64), # raise if x > 2**53
# other
np.dtype(np.bool_): np.dtype(np.float16),
np.dtype(np.float16): np.dtype(np.float32),
}
_plc_allowed_edge_types: ClassVar[set[np.dtype]] = {
np.dtype(np.float16),
np.dtype(np.float32),
}

####################
# Creation methods #
####################
Expand Down Expand Up @@ -111,6 +134,11 @@ def from_coo(
raise ValueError
if new_graph._id_to_key is not None and len(new_graph._id_to_key) != N:
raise ValueError
if new_graph._id_to_key is not None and new_graph.key_to_id is None:
try:
new_graph.key_to_id = dict(zip(new_graph._id_to_key, range(N)))
except TypeError as exc:
raise ValueError("Bad type of a node value") from exc
return new_graph

@classmethod
Expand All @@ -130,7 +158,7 @@ def from_csr(
N = indptr.size - 1
row_indices = cp.array(
# cp.repeat is slow to use here, so use numpy instead
np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
)
return cls.from_coo(
N,
Expand Down Expand Up @@ -162,7 +190,7 @@ def from_csc(
N = indptr.size - 1
col_indices = cp.array(
# cp.repeat is slow to use here, so use numpy instead
np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
)
return cls.from_coo(
N,
Expand Down Expand Up @@ -245,7 +273,9 @@ def from_dcsc(

def __new__(cls, incoming_graph_data=None, **attr) -> Graph:
if incoming_graph_data is None:
new_graph = cls.from_coo(0, cp.empty(0, np.int32), cp.empty(0, np.int32))
new_graph = cls.from_coo(
0, cp.empty(0, index_dtype), cp.empty(0, index_dtype)
)
elif incoming_graph_data.__class__ is cls:
new_graph = incoming_graph_data.copy()
elif incoming_graph_data.__class__ is cls.to_networkx_class():
Expand Down Expand Up @@ -521,11 +551,36 @@ def _get_plc_graph(
# Mask is all True; don't need anymore
del self.edge_masks[edge_attr]
edge_array = self.edge_values[edge_attr]
if edge_array is not None:
if edge_dtype is not None:
edge_dtype = np.dtype(edge_dtype)
if edge_array.dtype != edge_dtype:
edge_array = edge_array.astype(edge_dtype)
# PLC doesn't handle int edge weights right now, so cast int to float
if edge_array.dtype in self._plc_type_map:
if edge_array.dtype == np.int64:
if (val := edge_array.max().tolist()) > 2**53:
raise ValueError(
f"Integer value of value is too large (> 2**53): {val}; "
"pylibcugraph only supports float16 and float32 dtypes."
)
if (val := edge_array.min().tolist()) < -(2**53):
raise ValueError(
f"Integer value of value is small large (< -2**53): {val}; "
"pylibcugraph only supports float16 and float32 dtypes."
)
elif edge_array.dtype == np.uint64:
if edge_array.max().tolist() > 2**53:
raise ValueError(
f"Integer value of value is too large (> 2**53): {val}; "
"pylibcugraph only supports float16 and float32 dtypes."
)
...
# Should we warn?
edge_array = edge_array.astype(self._plc_type_map[edge_array.dtype])
elif edge_array.dtype not in self._plc_allowed_edge_types:
raise TypeError
# Should we cache PLC graph?
if edge_dtype is not None:
edge_dtype = np.dtype(edge_dtype)
if edge_array.dtype != edge_dtype:
edge_array = edge_array.astype(edge_dtype)
return plc.SGGraph(
resource_handle=plc.ResourceHandle(),
graph_properties=plc.GraphProperties(
Expand All @@ -540,6 +595,54 @@ def _get_plc_graph(
do_expensive_check=False,
)

def _sort_edge_indices(self, primary="src"):
# TODO: what about multigraph edge_indices and edge_keys?
if primary == "src":
stacked = cp.vstack((self.col_indices, self.row_indices))
elif primary == "dst":
stacked = cp.vstack((self.row_indices, self.col_indices))
else:
raise ValueError(
f'Bad `primary` argument; expected "src" or "dst", got {primary!r}'
)
indices = cp.lexsort(stacked)
if (cp.diff(indices) > 0).all():
# Already sorted
return
self.row_indices = self.row_indices[indices]
self.col_indices = self.col_indices[indices]
self.edge_values.update(
{key: val[indices] for key, val in self.edge_values.items()}
)
self.edge_masks.update(
{key: val[indices] for key, val in self.edge_masks.items()}
)

def _become(self, other: Graph):
if self.__class__ is not other.__class__:
raise TypeError(
"Attempting to update graph inplace with graph of different type!"
)
self.clear()
edge_values = self.edge_values
edge_masks = self.edge_masks
node_values = self.node_values
node_masks = self.node_masks
graph = self.graph
edge_values.update(other.edge_values)
edge_masks.update(other.edge_masks)
node_values.update(other.node_values)
node_masks.update(other.node_masks)
graph.update(other.graph)
self.__dict__.update(other.__dict__)
self.edge_values = edge_values
self.edge_masks = edge_masks
self.node_values = node_values
self.node_masks = node_masks
self.graph = graph
return self

# Data conversions
def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]:
"""Convert an iterable of node IDs to an iterable of node keys."""
if (id_to_key := self.id_to_key) is not None:
Expand Down Expand Up @@ -582,7 +685,7 @@ def _dict_to_nodearrays(
indices_iter = d
else:
indices_iter = map(self.key_to_id.__getitem__, d)
node_ids = cp.fromiter(indices_iter, np.int32)
node_ids = cp.fromiter(indices_iter, index_dtype)
if dtype is None:
values = cp.array(list(d.values()))
else:
Expand Down
5 changes: 5 additions & 0 deletions python/nx-cugraph/nx_cugraph/classes/multidigraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,11 @@


class MultiDiGraph(MultiGraph, DiGraph):
@classmethod
@networkx_api
def is_directed(cls) -> bool:
return True

@classmethod
def to_networkx_class(cls) -> type[nx.MultiDiGraph]:
return nx.MultiDiGraph
5 changes: 3 additions & 2 deletions python/nx-cugraph/nx_cugraph/classes/multigraph.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

import nx_cugraph as nxcg

from ..utils import index_dtype
from .graph import Graph

if TYPE_CHECKING:
Expand Down Expand Up @@ -121,7 +122,7 @@ def from_csr(
N = indptr.size - 1
row_indices = cp.array(
# cp.repeat is slow to use here, so use numpy instead
np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
)
return cls.from_coo(
N,
Expand Down Expand Up @@ -157,7 +158,7 @@ def from_csc(
N = indptr.size - 1
col_indices = cp.array(
# cp.repeat is slow to use here, so use numpy instead
np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get())
np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get())
)
return cls.from_coo(
N,
Expand Down
18 changes: 11 additions & 7 deletions python/nx-cugraph/nx_cugraph/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@

import nx_cugraph as nxcg

from .utils import index_dtype

if TYPE_CHECKING: # pragma: no cover
from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue

Expand Down Expand Up @@ -266,22 +268,22 @@ def from_networkx(
else:
col_iter = map(key_to_id.__getitem__, col_iter)
if graph.is_multigraph():
col_indices = np.fromiter(col_iter, np.int32)
col_indices = np.fromiter(col_iter, index_dtype)
num_multiedges = np.fromiter(
map(len, concat(map(dict.values, adj.values()))), np.int32
map(len, concat(map(dict.values, adj.values()))), index_dtype
)
# cp.repeat is slow to use here, so use numpy instead
col_indices = cp.array(np.repeat(col_indices, num_multiedges))
# Determine edge keys and edge ids for multigraphs
edge_keys = list(concat(concat(map(dict.values, adj.values()))))
edge_indices = cp.fromiter(
concat(map(range, map(len, concat(map(dict.values, adj.values()))))),
np.int32,
index_dtype,
)
if edge_keys == edge_indices.tolist():
edge_keys = None # Prefer edge_indices
else:
col_indices = cp.fromiter(col_iter, np.int32)
col_indices = cp.fromiter(col_iter, index_dtype)

edge_values = {}
edge_masks = {}
Expand Down Expand Up @@ -354,7 +356,8 @@ def from_networkx(

# cp.repeat is slow to use here, so use numpy instead
row_indices = np.repeat(
np.arange(N, dtype=np.int32), np.fromiter(map(len, adj.values()), np.int32)
np.arange(N, dtype=index_dtype),
np.fromiter(map(len, adj.values()), index_dtype),
)
if graph.is_multigraph():
row_indices = np.repeat(row_indices, num_multiedges)
Expand Down Expand Up @@ -500,12 +503,13 @@ def to_networkx(G: nxcg.Graph) -> nx.Graph:
col_indices = G.col_indices
edge_values = G.edge_values
edge_masks = G.edge_masks
if edge_values and not G.is_directed():
if not G.is_directed():
# Only add upper triangle of the adjacency matrix so we don't double-add edges
mask = row_indices <= col_indices
row_indices = row_indices[mask]
col_indices = col_indices[mask]
edge_values = {k: v[mask] for k, v in edge_values.items()}
if edge_values:
edge_values = {k: v[mask] for k, v in edge_values.items()}
if edge_masks:
edge_masks = {k: v[mask] for k, v in edge_masks.items()}
row_indices = row_iter = row_indices.tolist()
Expand Down
15 changes: 15 additions & 0 deletions python/nx-cugraph/nx_cugraph/generators/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# Copyright (c) 2023, NVIDIA CORPORATION.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .classic import *
from .community import *
from .social import *
Loading

0 comments on commit b4fb8df

Please sign in to comment.