From 975502209991c1abe7b1878585fec9e1978f5b07 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 30 Oct 2023 14:01:25 -0500 Subject: [PATCH 1/7] nx-cugraph: handle seed argument in edge_betweenness_centrality (#3943) CC @rlratzel who brought this to my attention and can test this fix Authors: - Erik Welch (https://github.com/eriknw) - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3943 --- .../nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py index 104ac87414c..210e1f0a2b2 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py @@ -53,6 +53,7 @@ def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=No raise NotImplementedError( "Weighted implementation of betweenness centrality not currently supported" ) + seed = _seed_to_int(seed) G = _to_graph(G, weight) src_ids, dst_ids, values, _edge_ids = plc.edge_betweenness_centrality( resource_handle=plc.ResourceHandle(), From 35875a84b95e9d8ba9c691f33521c45845d3e084 Mon Sep 17 00:00:00 2001 From: Chuck Hastings <45364586+ChuckHastings@users.noreply.github.com> Date: Tue, 31 Oct 2023 09:01:07 -0400 Subject: [PATCH 2/7] Errors compiling for DLFW on CUDA 12.3 (#3952) DLFW compilation found a few issues building cugraph 23.10. This PR addresses these issues. Two main issues: * We were brace initializing `raft::device_span` objects with only `nullptr`. The device span already defaults to `nullptr`, and the class hierarchy doesn't support just passing `nullptr`. * We were brace initializing the `raft::random::DeviceState`, but there's no default constructor. Changed it to not brace initialize and require a reference to make it clear that you must initialize the DeviceState. Authors: - Chuck Hastings (https://github.com/ChuckHastings) - Ralph Liu (https://github.com/nv-rliu) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Robert Maynard (https://github.com/robertmaynard) URL: https://github.com/rapidsai/cugraph/pull/3952 --- cpp/src/community/detail/refine_impl.cuh | 2 +- cpp/src/prims/detail/nbr_intersection.cuh | 25 +++++++++++------------ 2 files changed, 13 insertions(+), 14 deletions(-) diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index e811aafc776..6b6470991bb 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -64,7 +64,7 @@ struct leiden_key_aggregated_edge_op_t { weight_t total_edge_weight{}; weight_t resolution{}; // resolution parameter weight_t theta{}; // scaling factor - raft::random::DeviceState device_state{}; + raft::random::DeviceState& device_state; __device__ auto operator()( vertex_t src, vertex_t neighboring_leiden_cluster, diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 32247ca3466..cefc1836fa6 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -138,7 +138,7 @@ struct update_rx_major_local_degree_t { size_t local_edge_partition_idx{}; raft::device_span rx_reordered_group_lasts{}; - raft::device_span rx_group_firsts{nullptr}; + raft::device_span rx_group_firsts{}; raft::device_span rx_majors{}; raft::device_span local_degrees_for_rx_majors{}; @@ -200,7 +200,7 @@ struct update_rx_major_local_nbrs_t { size_t local_edge_partition_idx{}; raft::device_span rx_reordered_group_lasts{}; - raft::device_span rx_group_firsts{nullptr}; + raft::device_span rx_group_firsts{}; raft::device_span rx_majors{}; raft::device_span local_nbr_offsets_for_rx_majors{}; raft::device_span local_nbrs_for_rx_majors{}; @@ -311,10 +311,10 @@ template struct pick_min_degree_t { FirstElementToIdxMap first_element_to_idx_map{}; - raft::device_span first_element_offsets{nullptr}; + raft::device_span first_element_offsets{}; SecondElementToIdxMap second_element_to_idx_map{}; - raft::device_span second_element_offsets{nullptr}; + raft::device_span second_element_offsets{}; edge_partition_device_view_t edge_partition{}; thrust::optional> @@ -473,12 +473,12 @@ template first_element_offsets{}; - raft::device_span first_element_indices{nullptr}; + raft::device_span first_element_indices{}; optional_property_buffer_view_t first_element_edge_property_values{}; SecondElementToIdxMap second_element_to_idx_map{}; raft::device_span second_element_offsets{}; - raft::device_span second_element_indices{nullptr}; + raft::device_span second_element_indices{}; optional_property_buffer_view_t second_element_edge_property_values{}; edge_partition_device_view_t edge_partition{}; @@ -487,8 +487,8 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { edge_partition_e_mask{}; VertexPairIterator vertex_pair_first; - raft::device_span nbr_intersection_offsets{nullptr}; - raft::device_span nbr_intersection_indices{nullptr}; + raft::device_span nbr_intersection_offsets{}; + raft::device_span nbr_intersection_indices{}; optional_property_buffer_mutable_view_t nbr_intersection_e_property_values0{}; optional_property_buffer_mutable_view_t nbr_intersection_e_property_values1{}; @@ -499,12 +499,11 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; auto pair = *(vertex_pair_first + i); - - vertex_t const* indices0{nullptr}; + vertex_t const* indices0{}; std::conditional_t, edge_property_value_t const*, void*> - edge_property_values0{nullptr}; + edge_property_values0{}; edge_t local_edge_offset0{0}; edge_t local_degree0{0}; @@ -548,11 +547,11 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { local_degree0 = static_cast(first_element_offsets[idx + 1] - local_edge_offset0); } - vertex_t const* indices1{nullptr}; + vertex_t const* indices1{}; std::conditional_t, edge_property_value_t const*, void*> - edge_property_values1{nullptr}; + edge_property_values1{}; edge_t local_edge_offset1{0}; edge_t local_degree1{0}; From d6c7fa131ea5bb901ca4fde3094a29b4681b77bd Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 31 Oct 2023 14:10:20 -0500 Subject: [PATCH 3/7] Add many graph generators to nx-cugraph (#3954) Also, better handle dtypes for edge values passed to pylibcugraph, which only takes float32 and float64 atm. I also defined `index_dtype` (currently int32) to globally control the dtype of indices. Authors: - Erik Welch (https://github.com/eriknw) - Ralph Liu (https://github.com/nv-rliu) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3954 --- python/nx-cugraph/_nx_cugraph/__init__.py | 45 ++ python/nx-cugraph/lint.yaml | 8 +- python/nx-cugraph/nx_cugraph/__init__.py | 8 +- .../nx_cugraph/algorithms/__init__.py | 3 +- .../algorithms/bipartite/__init__.py | 13 + .../algorithms/bipartite/generators.py | 62 ++ .../nx-cugraph/nx_cugraph/algorithms/core.py | 11 +- python/nx-cugraph/nx_cugraph/classes/graph.py | 188 +++++- .../nx_cugraph/classes/multidigraph.py | 5 + .../nx_cugraph/classes/multigraph.py | 74 ++- python/nx-cugraph/nx_cugraph/convert.py | 78 ++- .../nx-cugraph/nx_cugraph/convert_matrix.py | 146 ++++ .../nx_cugraph/generators/__init__.py | 16 + .../nx_cugraph/generators/_utils.py | 136 ++++ .../nx_cugraph/generators/classic.py | 423 ++++++++++++ .../nx_cugraph/generators/community.py | 45 ++ .../nx-cugraph/nx_cugraph/generators/small.py | 622 ++++++++++++++++++ .../nx_cugraph/generators/social.py | 294 +++++++++ python/nx-cugraph/nx_cugraph/interface.py | 35 + .../nx_cugraph/tests/test_generators.py | 277 ++++++++ .../nx-cugraph/nx_cugraph/tests/test_utils.py | 87 +++ python/nx-cugraph/nx_cugraph/typing.py | 8 + python/nx-cugraph/nx_cugraph/utils/misc.py | 72 +- python/nx-cugraph/pyproject.toml | 3 +- 24 files changed, 2573 insertions(+), 86 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py create mode 100644 python/nx-cugraph/nx_cugraph/convert_matrix.py create mode 100644 python/nx-cugraph/nx_cugraph/generators/__init__.py create mode 100644 python/nx-cugraph/nx_cugraph/generators/_utils.py create mode 100644 python/nx-cugraph/nx_cugraph/generators/classic.py create mode 100644 python/nx-cugraph/nx_cugraph/generators/community.py create mode 100644 python/nx-cugraph/nx_cugraph/generators/small.py create mode 100644 python/nx-cugraph/nx_cugraph/generators/social.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_generators.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_utils.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 965b5b232ab..af1df04644c 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -29,23 +29,68 @@ # "description": "TODO", "functions": { # BEGIN: functions + "barbell_graph", "betweenness_centrality", + "bull_graph", + "caveman_graph", + "chvatal_graph", + "circular_ladder_graph", + "complete_bipartite_graph", + "complete_graph", + "complete_multipartite_graph", + "cubical_graph", + "cycle_graph", + "davis_southern_women_graph", "degree_centrality", + "desargues_graph", + "diamond_graph", + "dodecahedral_graph", "edge_betweenness_centrality", + "empty_graph", + "florentine_families_graph", + "from_pandas_edgelist", + "from_scipy_sparse_array", + "frucht_graph", + "heawood_graph", + "house_graph", + "house_x_graph", + "icosahedral_graph", "in_degree_centrality", "is_isolate", "isolates", "k_truss", + "karate_club_graph", + "krackhardt_kite_graph", + "ladder_graph", + "les_miserables_graph", + "lollipop_graph", "louvain_communities", + "moebius_kantor_graph", + "null_graph", "number_of_isolates", "number_of_selfloops", + "octahedral_graph", "out_degree_centrality", + "pappus_graph", + "path_graph", + "petersen_graph", + "sedgewick_maze_graph", + "star_graph", + "tadpole_graph", + "tetrahedral_graph", + "trivial_graph", + "truncated_cube_graph", + "truncated_tetrahedron_graph", + "turan_graph", + "tutte_graph", + "wheel_graph", # END: functions }, "extra_docstrings": { # BEGIN: extra_docstrings "betweenness_centrality": "`weight` parameter is not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported.", + "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", "louvain_communities": "`seed` parameter is currently ignored.", # END: extra_docstrings }, diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index fef2cebc7f5..01a806e6162 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -45,12 +45,12 @@ repos: - id: pyupgrade args: [--py39-plus] - repo: https://github.com/psf/black - rev: 23.10.0 + rev: 23.10.1 hooks: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.1 + rev: v0.1.3 hooks: - id: ruff args: [--fix-only, --show-fixes] @@ -58,7 +58,7 @@ repos: rev: 6.1.0 hooks: - id: flake8 - args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501'] # Why is this necessary? + args: ['--per-file-ignores=_nx_cugraph/__init__.py:E501', '--extend-ignore=SIM105'] # Why is this necessary? additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.1.0 @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.1 + rev: v0.1.3 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/__init__.py b/python/nx-cugraph/nx_cugraph/__init__.py index 1eaf3ec6bbb..25d44212264 100644 --- a/python/nx-cugraph/nx_cugraph/__init__.py +++ b/python/nx-cugraph/nx_cugraph/__init__.py @@ -20,11 +20,11 @@ from . import convert from .convert import * -# from . import convert_matrix -# from .convert_matrix import * +from . import convert_matrix +from .convert_matrix import * -# from . import generators -# from .generators import * +from . import generators +from .generators import * from . import algorithms from .algorithms import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 22600bfdc2d..69feb8f6437 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -10,7 +10,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from . import centrality, community +from . import bipartite, centrality, community +from .bipartite import complete_bipartite_graph from .centrality import * from .core import * from .isolate import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py new file mode 100644 index 00000000000..062be973d55 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .generators import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py new file mode 100644 index 00000000000..1d3e762b4fd --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py @@ -0,0 +1,62 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from numbers import Integral + +import cupy as cp +import networkx as nx +import numpy as np + +from nx_cugraph.generators._utils import _create_using_class, _number_and_nodes +from nx_cugraph.utils import index_dtype, networkx_algorithm, nodes_or_number + +__all__ = [ + "complete_bipartite_graph", +] + + +@nodes_or_number([0, 1]) +@networkx_algorithm +def complete_bipartite_graph(n1, n2, create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + orig_n1, unused_nodes1 = n1 + orig_n2, unused_nodes2 = n2 + n1, nodes1 = _number_and_nodes(n1) + n2, nodes2 = _number_and_nodes(n2) + all_indices = cp.indices((n1, n2), dtype=index_dtype) + indices0 = all_indices[0].ravel() + indices1 = all_indices[1].ravel() + n1 + del all_indices + src_indices = cp.hstack((indices0, indices1)) + dst_indices = cp.hstack((indices1, indices0)) + bipartite = cp.zeros(n1 + n2, np.int8) + bipartite[n1:] = 1 + if isinstance(orig_n1, Integral) and isinstance(orig_n2, Integral): + nodes = None + else: + nodes = list(range(n1)) if nodes1 is None else nodes1 + nodes.extend(range(n2) if nodes2 is None else nodes2) + if len(set(nodes)) != len(nodes): + raise nx.NetworkXError("Inputs n1 and n2 must contain distinct nodes") + G = graph_class.from_coo( + n1 + n2, + src_indices, + dst_indices, + node_values={"bipartite": bipartite}, + id_to_key=nodes, + name=f"complete_bipartite_graph({orig_n1}, {orig_n2})", + ) + if inplace: + return create_using._become(G) + return G diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index 0a64dd71c69..33e79793553 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -12,11 +12,10 @@ # limitations under the License. import cupy as cp import networkx as nx -import numpy as np import pylibcugraph as plc import nx_cugraph as nxcg -from nx_cugraph.utils import networkx_algorithm, not_implemented_for +from nx_cugraph.utils import _get_int_dtype, networkx_algorithm, not_implemented_for __all__ = ["k_truss"] @@ -51,8 +50,8 @@ def k_truss(G, k): edge_values = {key: val.copy() for key, val in G.edge_values.items()} edge_masks = {key: val.copy() for key, val in G.edge_masks.items()} else: - # int dtype for edge_indices would be preferred - edge_indices = cp.arange(G.src_indices.size, dtype=np.float64) + edge_dtype = _get_int_dtype(G.src_indices.size - 1) + edge_indices = cp.arange(G.src_indices.size, dtype=edge_dtype) src_indices, dst_indices, edge_indices, _ = plc.k_truss_subgraph( resource_handle=plc.ResourceHandle(), graph=G._get_plc_graph(edge_array=edge_indices), @@ -62,7 +61,9 @@ def k_truss(G, k): # Renumber step 0: node indices node_indices = cp.unique(cp.concatenate([src_indices, dst_indices])) # Renumber step 1: edge values - edge_indices = edge_indices.astype(np.int64) + if edge_indices.dtype != edge_dtype: + # The returned edge_indices may have different dtype (and float) + edge_indices = edge_indices.astype(edge_dtype) edge_values = {key: val[edge_indices] for key, val in G.edge_values.items()} edge_masks = {key: val[edge_indices] for key, val in G.edge_masks.items()} # Renumber step 2: edge indices diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 166b6b9dc6b..2048c4c3d72 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -14,7 +14,7 @@ import operator as op from copy import deepcopy -from typing import TYPE_CHECKING, ClassVar +from typing import TYPE_CHECKING import cupy as cp import networkx as nx @@ -23,8 +23,11 @@ import nx_cugraph as nxcg +from ..utils import index_dtype + if TYPE_CHECKING: # pragma: no cover from collections.abc import Iterable, Iterator + from typing import ClassVar from nx_cugraph.typing import ( AttrKey, @@ -34,6 +37,7 @@ IndexValue, NodeKey, NodeValue, + any_ndarray, ) __all__ = ["Graph"] @@ -51,17 +55,38 @@ class Graph: graph_attr_dict_factory: ClassVar[type] = dict # Not networkx properties - # We store edge data in COO format with {row,col}_indices and edge_values. + # We store edge data in COO format with {src,dst}_indices and edge_values. src_indices: cp.ndarray[IndexValue] dst_indices: cp.ndarray[IndexValue] edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] edge_masks: dict[AttrKey, cp.ndarray[bool]] - node_values: dict[AttrKey, cp.ndarray[NodeValue]] - node_masks: dict[AttrKey, cp.ndarray[bool]] + node_values: dict[AttrKey, any_ndarray[NodeValue]] + node_masks: dict[AttrKey, any_ndarray[bool]] key_to_id: dict[NodeKey, IndexValue] | None _id_to_key: list[NodeKey] | None _N: int + # Used by graph._get_plc_graph + _plc_type_map: ClassVar[dict[np.dtype, np.dtype]] = { + # signed int + np.dtype(np.int8): np.dtype(np.float32), + np.dtype(np.int16): np.dtype(np.float32), + np.dtype(np.int32): np.dtype(np.float64), + np.dtype(np.int64): np.dtype(np.float64), # raise if abs(x) > 2**53 + # unsigned int + np.dtype(np.uint8): np.dtype(np.float32), + np.dtype(np.uint16): np.dtype(np.float32), + np.dtype(np.uint32): np.dtype(np.float64), + np.dtype(np.uint64): np.dtype(np.float64), # raise if x > 2**53 + # other + np.dtype(np.bool_): np.dtype(np.float16), + np.dtype(np.float16): np.dtype(np.float32), + } + _plc_allowed_edge_types: ClassVar[set[np.dtype]] = { + np.dtype(np.float32), + np.dtype(np.float64), + } + #################### # Creation methods # #################### @@ -74,8 +99,8 @@ def from_coo( dst_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -111,6 +136,27 @@ def from_coo( raise ValueError if new_graph._id_to_key is not None and len(new_graph._id_to_key) != N: raise ValueError + if new_graph._id_to_key is not None and new_graph.key_to_id is None: + try: + new_graph.key_to_id = dict(zip(new_graph._id_to_key, range(N))) + except TypeError as exc: + raise ValueError("Bad type of a node value") from exc + if new_graph.src_indices.dtype != index_dtype: + src_indices = new_graph.src_indices.astype(index_dtype) + if not (new_graph.src_indices == src_indices).all(): + raise ValueError( + f"Unable to convert src_indices to {src_indices.dtype.name} " + f"(got {new_graph.src_indices.dtype.name})." + ) + new_graph.src_indices = src_indices + if new_graph.dst_indices.dtype != index_dtype: + dst_indices = new_graph.dst_indices.astype(index_dtype) + if not (new_graph.dst_indices == dst_indices).all(): + raise ValueError( + f"Unable to convert dst_indices to {dst_indices.dtype.name} " + f"(got {new_graph.dst_indices.dtype.name})." + ) + new_graph.dst_indices = dst_indices return new_graph @classmethod @@ -120,8 +166,8 @@ def from_csr( dst_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -130,7 +176,7 @@ def from_csr( N = indptr.size - 1 src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) + np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -152,8 +198,8 @@ def from_csc( src_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -162,7 +208,7 @@ def from_csc( N = indptr.size - 1 dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) + np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -181,13 +227,13 @@ def from_csc( def from_dcsr( cls, N: int, - compressed_rows: cp.ndarray[IndexValue], + compressed_srcs: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], dst_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -195,7 +241,7 @@ def from_dcsr( ) -> Graph: src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(compressed_rows.get(), cp.diff(indptr).get()) + np.repeat(compressed_srcs.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -214,13 +260,13 @@ def from_dcsr( def from_dcsc( cls, N: int, - compressed_cols: cp.ndarray[IndexValue], + compressed_dsts: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], src_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -228,7 +274,7 @@ def from_dcsc( ) -> Graph: dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(compressed_cols.get(), cp.diff(indptr).get()) + np.repeat(compressed_dsts.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -245,7 +291,9 @@ def from_dcsc( def __new__(cls, incoming_graph_data=None, **attr) -> Graph: if incoming_graph_data is None: - new_graph = cls.from_coo(0, cp.empty(0, np.int32), cp.empty(0, np.int32)) + new_graph = cls.from_coo( + 0, cp.empty(0, index_dtype), cp.empty(0, index_dtype) + ) elif incoming_graph_data.__class__ is cls: new_graph = incoming_graph_data.copy() elif incoming_graph_data.__class__ is cls.to_networkx_class(): @@ -336,6 +384,17 @@ def __len__(self) -> int: # NetworkX graph methods # ########################## + @networkx_api + def add_nodes_from(self, nodes_for_adding: Iterable[NodeKey], **attr) -> None: + if self._N != 0: + raise NotImplementedError( + "add_nodes_from is not implemented for graph that already has nodes." + ) + G = self.to_networkx_class()() + G.add_nodes_from(nodes_for_adding, **attr) + G = nxcg.from_networkx(G, preserve_node_attrs=True) + self._become(G) + @networkx_api def clear(self) -> None: self.edge_values.clear() @@ -522,11 +581,38 @@ def _get_plc_graph( # Mask is all True; don't need anymore del self.edge_masks[edge_attr] edge_array = self.edge_values[edge_attr] + if edge_array is not None: + if edge_dtype is not None: + edge_dtype = np.dtype(edge_dtype) + if edge_array.dtype != edge_dtype: + edge_array = edge_array.astype(edge_dtype) + # PLC doesn't handle int edge weights right now, so cast int to float + if edge_array.dtype in self._plc_type_map: + if edge_array.dtype == np.int64: + if (val := edge_array.max().tolist()) > 2**53: + raise ValueError( + f"Integer value of value is too large (> 2**53): {val}; " + "pylibcugraph only supports float16 and float32 dtypes." + ) + if (val := edge_array.min().tolist()) < -(2**53): + raise ValueError( + f"Integer value of value is small large (< -2**53): {val}; " + "pylibcugraph only supports float16 and float32 dtypes." + ) + elif ( + edge_array.dtype == np.uint64 + and edge_array.max().tolist() > 2**53 + ): + raise ValueError( + f"Integer value of value is too large (> 2**53): {val}; " + "pylibcugraph only supports float16 and float32 dtypes." + ) + # Consider warning here if we add algorithms that may + # introduce roundoff errors when using floats as ints. + edge_array = edge_array.astype(self._plc_type_map[edge_array.dtype]) + elif edge_array.dtype not in self._plc_allowed_edge_types: + raise TypeError(edge_array.dtype) # Should we cache PLC graph? - if edge_dtype is not None: - edge_dtype = np.dtype(edge_dtype) - if edge_array.dtype != edge_dtype: - edge_array = edge_array.astype(edge_dtype) return plc.SGGraph( resource_handle=plc.ResourceHandle(), graph_properties=plc.GraphProperties( @@ -541,12 +627,60 @@ def _get_plc_graph( do_expensive_check=False, ) + def _sort_edge_indices(self, primary="src"): + # DRY warning: see also MultiGraph._sort_edge_indices + if primary == "src": + stacked = cp.vstack((self.dst_indices, self.src_indices)) + elif primary == "dst": + stacked = cp.vstack((self.src_indices, self.dst_indices)) + else: + raise ValueError( + f'Bad `primary` argument; expected "src" or "dst", got {primary!r}' + ) + indices = cp.lexsort(stacked) + if (cp.diff(indices) > 0).all(): + # Already sorted + return + self.src_indices = self.src_indices[indices] + self.dst_indices = self.dst_indices[indices] + self.edge_values.update( + {key: val[indices] for key, val in self.edge_values.items()} + ) + self.edge_masks.update( + {key: val[indices] for key, val in self.edge_masks.items()} + ) + + def _become(self, other: Graph): + if self.__class__ is not other.__class__: + raise TypeError( + "Attempting to update graph inplace with graph of different type!" + ) + self.clear() + edge_values = self.edge_values + edge_masks = self.edge_masks + node_values = self.node_values + node_masks = self.node_masks + graph = self.graph + edge_values.update(other.edge_values) + edge_masks.update(other.edge_masks) + node_values.update(other.node_values) + node_masks.update(other.node_masks) + graph.update(other.graph) + self.__dict__.update(other.__dict__) + self.edge_values = edge_values + self.edge_masks = edge_masks + self.node_values = node_values + self.node_masks = node_masks + self.graph = graph + return self + def _degrees_array(self): degrees = cp.bincount(self.src_indices, minlength=self._N) if self.is_directed(): degrees += cp.bincount(self.dst_indices, minlength=self._N) return degrees + # Data conversions def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]: """Convert an iterable of node IDs to an iterable of node keys.""" if (id_to_key := self.id_to_key) is not None: @@ -567,7 +701,7 @@ def _nodearray_to_dict( return dict(it) def _nodearrays_to_dict( - self, node_ids: cp.ndarray[IndexValue], values: cp.ndarray[NodeValue] + self, node_ids: cp.ndarray[IndexValue], values: any_ndarray[NodeValue] ) -> dict[NodeKey, NodeValue]: it = zip(node_ids.tolist(), values.tolist()) if (id_to_key := self.id_to_key) is not None: @@ -597,7 +731,7 @@ def _dict_to_nodearrays( indices_iter = d else: indices_iter = map(self.key_to_id.__getitem__, d) - node_ids = cp.fromiter(indices_iter, np.int32) + node_ids = cp.fromiter(indices_iter, index_dtype) if dtype is None: values = cp.array(list(d.values())) else: diff --git a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py index 5629e2c9c06..2c7bfc00752 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multidigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multidigraph.py @@ -25,6 +25,11 @@ class MultiDiGraph(MultiGraph, DiGraph): + @classmethod + @networkx_api + def is_directed(cls) -> bool: + return True + @classmethod def to_networkx_class(cls) -> type[nx.MultiDiGraph]: return nx.MultiDiGraph diff --git a/python/nx-cugraph/nx_cugraph/classes/multigraph.py b/python/nx-cugraph/nx_cugraph/classes/multigraph.py index 3d90861a328..23466dc7dd4 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multigraph.py @@ -21,6 +21,7 @@ import nx_cugraph as nxcg +from ..utils import index_dtype from .graph import Graph if TYPE_CHECKING: @@ -31,6 +32,7 @@ IndexValue, NodeKey, NodeValue, + any_ndarray, ) __all__ = ["MultiGraph"] @@ -43,11 +45,11 @@ class MultiGraph(Graph): # Not networkx properties - # In a MultiGraph, each edge has a unique `(row, col, key)` key. + # In a MultiGraph, each edge has a unique `(src, dst, key)` key. # By default, `key` is 0 if possible, else 1, else 2, etc. # This key can be any hashable Python object in NetworkX. # We don't use a dict for our data structure here, because - # that would require a `(row, col, key)` key. + # that would require a `(src, dst, key)` key. # Instead, we keep `edge_keys` and/or `edge_indices`. # `edge_keys` is the list of Python objects for each edge. # `edge_indices` is for the common case of default multiedge keys, @@ -72,8 +74,8 @@ def from_coo( edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -110,8 +112,8 @@ def from_csr( edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -121,7 +123,7 @@ def from_csr( N = indptr.size - 1 src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) + np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -146,8 +148,8 @@ def from_csc( edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -157,7 +159,7 @@ def from_csc( N = indptr.size - 1 dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) + np.repeat(np.arange(N, dtype=index_dtype), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -178,14 +180,14 @@ def from_csc( def from_dcsr( cls, N: int, - compressed_rows: cp.ndarray[IndexValue], + compressed_srcs: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], dst_indices: cp.ndarray[IndexValue], edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -194,7 +196,7 @@ def from_dcsr( ) -> MultiGraph: src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(compressed_rows.get(), cp.diff(indptr).get()) + np.repeat(compressed_srcs.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -215,14 +217,14 @@ def from_dcsr( def from_dcsc( cls, N: int, - compressed_cols: cp.ndarray[IndexValue], + compressed_dsts: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], src_indices: cp.ndarray[IndexValue], edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, - node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, - node_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, + node_values: dict[AttrKey, any_ndarray[NodeValue]] | None = None, + node_masks: dict[AttrKey, any_ndarray[bool]] | None = None, *, key_to_id: dict[NodeKey, IndexValue] | None = None, id_to_key: list[NodeKey] | None = None, @@ -231,7 +233,7 @@ def from_dcsc( ) -> Graph: dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead - np.repeat(compressed_cols.get(), cp.diff(indptr).get()) + np.repeat(compressed_dsts.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, @@ -449,3 +451,39 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): else: rv.graph.update(deepcopy(self.graph)) return rv + + def _sort_edge_indices(self, primary="src"): + # DRY warning: see also Graph._sort_edge_indices + if self.edge_indices is None and self.edge_keys is None: + return super()._sort_edge_indices(primary=primary) + if primary == "src": + if self.edge_indices is None: + stacked = (self.dst_indices, self.src_indices) + else: + stacked = (self.edge_indices, self.dst_indices, self.src_indices) + elif primary == "dst": + if self.edge_indices is None: + stacked = (self.src_indices, self.dst_indices) + else: + stacked = (self.edge_indices, self.dst_indices, self.src_indices) + else: + raise ValueError( + f'Bad `primary` argument; expected "src" or "dst", got {primary!r}' + ) + indices = cp.lexsort(cp.vstack(stacked)) + if (cp.diff(indices) > 0).all(): + # Already sorted + return + self.src_indices = self.src_indices[indices] + self.dst_indices = self.dst_indices[indices] + self.edge_values.update( + {key: val[indices] for key, val in self.edge_values.items()} + ) + self.edge_masks.update( + {key: val[indices] for key, val in self.edge_masks.items()} + ) + if self.edge_indices is not None: + self.edge_indices = self.edge_indices[indices] + if self.edge_keys is not None: + edge_keys = self.edge_keys + self.edge_keys = [edge_keys[i] for i in indices.tolist()] diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index d117c8e5c03..3c0814370d3 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -24,8 +24,10 @@ import nx_cugraph as nxcg +from .utils import index_dtype + if TYPE_CHECKING: # pragma: no cover - from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue + from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue, any_ndarray __all__ = [ "from_networkx", @@ -256,7 +258,7 @@ def from_networkx( node_attrs[attr] = REQUIRED key_to_id = dict(zip(adj, range(N))) - col_iter = concat(adj.values()) + dst_iter = concat(adj.values()) try: no_renumber = all(k == v for k, v in key_to_id.items()) except Exception: @@ -264,11 +266,11 @@ def from_networkx( if no_renumber: key_to_id = None else: - col_iter = map(key_to_id.__getitem__, col_iter) + dst_iter = map(key_to_id.__getitem__, dst_iter) if graph.is_multigraph(): - dst_indices = np.fromiter(col_iter, np.int32) + dst_indices = np.fromiter(dst_iter, index_dtype) num_multiedges = np.fromiter( - map(len, concat(map(dict.values, adj.values()))), np.int32 + map(len, concat(map(dict.values, adj.values()))), index_dtype ) # cp.repeat is slow to use here, so use numpy instead dst_indices = cp.array(np.repeat(dst_indices, num_multiedges)) @@ -276,12 +278,12 @@ def from_networkx( edge_keys = list(concat(concat(map(dict.values, adj.values())))) edge_indices = cp.fromiter( concat(map(range, map(len, concat(map(dict.values, adj.values()))))), - np.int32, + index_dtype, ) if edge_keys == edge_indices.tolist(): edge_keys = None # Prefer edge_indices else: - dst_indices = cp.fromiter(col_iter, np.int32) + dst_indices = cp.fromiter(dst_iter, index_dtype) edge_values = {} edge_masks = {} @@ -354,7 +356,8 @@ def from_networkx( # cp.repeat is slow to use here, so use numpy instead src_indices = np.repeat( - np.arange(N, dtype=np.int32), np.fromiter(map(len, adj.values()), np.int32) + np.arange(N, dtype=index_dtype), + np.fromiter(map(len, adj.values()), index_dtype), ) if graph.is_multigraph(): src_indices = np.repeat(src_indices, num_multiedges) @@ -383,8 +386,18 @@ def from_networkx( or present for node_id in adj ) - node_masks[node_attr] = cp.fromiter(iter_mask, bool) - node_values[node_attr] = cp.array(vals, dtype) + # Node values may be numpy or cupy arrays (useful for str, object, etc). + # Someday we'll let the user choose np or cp, and support edge values. + node_mask = np.fromiter(iter_mask, bool) + node_value = np.array(vals, dtype) + try: + node_value = cp.array(node_value) + except ValueError: + pass + else: + node_mask = cp.array(node_mask) + node_values[node_attr] = node_value + node_masks[node_attr] = node_mask # if vals.ndim > 1: ... else: if node_default is REQUIRED: @@ -393,10 +406,17 @@ def from_networkx( iter_values = ( nodes[node_id].get(node_attr, node_default) for node_id in adj ) + # Node values may be numpy or cupy arrays (useful for str, object, etc). + # Someday we'll let the user choose np or cp, and support edge values. if dtype is None: - node_values[node_attr] = cp.array(list(iter_values)) + node_value = np.array(list(iter_values)) else: - node_values[node_attr] = cp.fromiter(iter_values, dtype) + node_value = np.fromiter(iter_values, dtype) + try: + node_value = cp.array(node_value) + except ValueError: + pass + node_values[node_attr] = node_value # if vals.ndim > 1: ... if graph.is_multigraph(): if graph.is_directed() or as_directed: @@ -436,8 +456,8 @@ def from_networkx( def _iter_attr_dicts( - values: dict[AttrKey, cp.ndarray[EdgeValue | NodeValue]], - masks: dict[AttrKey, cp.ndarray[bool]], + values: dict[AttrKey, any_ndarray[EdgeValue | NodeValue]], + masks: dict[AttrKey, any_ndarray[bool]], ): full_attrs = list(values.keys() - masks.keys()) if full_attrs: @@ -463,7 +483,7 @@ def _iter_attr_dicts( return full_dicts -def to_networkx(G: nxcg.Graph) -> nx.Graph: +def to_networkx(G: nxcg.Graph, *, sort_edges: bool = False) -> nx.Graph: """Convert a nx_cugraph graph to networkx graph. All edge and node attributes and ``G.graph`` properties are converted. @@ -471,6 +491,11 @@ def to_networkx(G: nxcg.Graph) -> nx.Graph: Parameters ---------- G : nx_cugraph.Graph + sort_edges : bool, default False + Whether to sort the edge data of the input graph by (src, dst) indices + before converting. This can be useful to convert to networkx graphs + that iterate over edges consistently since edges are stored in dicts + in the order they were added. Returns ------- @@ -482,6 +507,8 @@ def to_networkx(G: nxcg.Graph) -> nx.Graph: """ rv = G.to_networkx_class()() id_to_key = G.id_to_key + if sort_edges: + G._sort_edge_indices() node_values = G.node_values node_masks = G.node_masks @@ -500,19 +527,20 @@ def to_networkx(G: nxcg.Graph) -> nx.Graph: dst_indices = G.dst_indices edge_values = G.edge_values edge_masks = G.edge_masks - if edge_values and not G.is_directed(): + if not G.is_directed(): # Only add upper triangle of the adjacency matrix so we don't double-add edges mask = src_indices <= dst_indices src_indices = src_indices[mask] dst_indices = dst_indices[mask] - edge_values = {k: v[mask] for k, v in edge_values.items()} + if edge_values: + edge_values = {k: v[mask] for k, v in edge_values.items()} if edge_masks: edge_masks = {k: v[mask] for k, v in edge_masks.items()} - src_indices = row_iter = src_indices.tolist() - dst_indices = col_iter = dst_indices.tolist() + src_indices = src_iter = src_indices.tolist() + dst_indices = dst_iter = dst_indices.tolist() if id_to_key is not None: - row_iter = map(id_to_key.__getitem__, src_indices) - col_iter = map(id_to_key.__getitem__, dst_indices) + src_iter = map(id_to_key.__getitem__, src_indices) + dst_iter = map(id_to_key.__getitem__, dst_indices) if G.is_multigraph() and (G.edge_keys is not None or G.edge_indices is not None): if G.edge_keys is not None: edge_keys = G.edge_keys @@ -520,14 +548,14 @@ def to_networkx(G: nxcg.Graph) -> nx.Graph: edge_keys = G.edge_indices.tolist() if edge_values: full_edge_dicts = _iter_attr_dicts(edge_values, edge_masks) - rv.add_edges_from(zip(row_iter, col_iter, edge_keys, full_edge_dicts)) + rv.add_edges_from(zip(src_iter, dst_iter, edge_keys, full_edge_dicts)) else: - rv.add_edges_from(zip(row_iter, col_iter, edge_keys)) + rv.add_edges_from(zip(src_iter, dst_iter, edge_keys)) elif edge_values: full_edge_dicts = _iter_attr_dicts(edge_values, edge_masks) - rv.add_edges_from(zip(row_iter, col_iter, full_edge_dicts)) + rv.add_edges_from(zip(src_iter, dst_iter, full_edge_dicts)) else: - rv.add_edges_from(zip(row_iter, col_iter)) + rv.add_edges_from(zip(src_iter, dst_iter)) rv.graph.update(G.graph) return rv diff --git a/python/nx-cugraph/nx_cugraph/convert_matrix.py b/python/nx-cugraph/nx_cugraph/convert_matrix.py new file mode 100644 index 00000000000..6c8b8fb4a1d --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/convert_matrix.py @@ -0,0 +1,146 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import networkx as nx +import numpy as np + +from .generators._utils import _create_using_class +from .utils import index_dtype, networkx_algorithm + +__all__ = [ + "from_pandas_edgelist", + "from_scipy_sparse_array", +] + + +@networkx_algorithm +def from_pandas_edgelist( + df, + source="source", + target="target", + edge_attr=None, + create_using=None, + edge_key=None, +): + """cudf.DataFrame inputs also supported.""" + graph_class, inplace = _create_using_class(create_using) + src_array = df[source].to_numpy() + dst_array = df[target].to_numpy() + # Renumber step 0: node keys + nodes = np.unique(np.concatenate([src_array, dst_array])) + N = nodes.size + kwargs = {} + if N > 0 and ( + nodes[0] != 0 + or nodes[N - 1] != N - 1 + or ( + nodes.dtype.kind not in {"i", "u"} + and not (nodes == np.arange(N, dtype=np.int64)).all() + ) + ): + # We need to renumber indices--np.searchsorted to the rescue! + kwargs["id_to_key"] = nodes.tolist() + src_indices = cp.array(np.searchsorted(nodes, src_array), index_dtype) + dst_indices = cp.array(np.searchsorted(nodes, dst_array), index_dtype) + else: + src_indices = cp.array(src_array) + dst_indices = cp.array(dst_array) + + if not graph_class.is_directed(): + # Symmetrize the edges + mask = src_indices != dst_indices + if mask.all(): + mask = None + src_indices, dst_indices = ( + cp.hstack( + (src_indices, dst_indices[mask] if mask is not None else dst_indices) + ), + cp.hstack( + (dst_indices, src_indices[mask] if mask is not None else src_indices) + ), + ) + + if edge_attr is not None: + # Additional columns requested for edge data + if edge_attr is True: + attr_col_headings = df.columns.difference({source, target}).to_list() + elif isinstance(edge_attr, (list, tuple)): + attr_col_headings = edge_attr + else: + attr_col_headings = [edge_attr] + if len(attr_col_headings) == 0: + raise nx.NetworkXError( + "Invalid edge_attr argument: No columns found with name: " + f"{attr_col_headings}" + ) + try: + edge_values = { + key: cp.array(val.to_numpy()) + for key, val in df[attr_col_headings].items() + } + except (KeyError, TypeError) as exc: + raise nx.NetworkXError(f"Invalid edge_attr argument: {edge_attr}") from exc + + if not graph_class.is_directed(): + # Symmetrize the edges + edge_values = { + key: cp.hstack((val, val[mask] if mask is not None else val)) + for key, val in edge_values.items() + } + kwargs["edge_values"] = edge_values + + if graph_class.is_multigraph() and edge_key is not None: + try: + edge_keys = df[edge_key].to_list() + except (KeyError, TypeError) as exc: + raise nx.NetworkXError( + f"Invalid edge_key argument: {edge_key}" + ) from exc + if not graph_class.is_directed(): + # Symmetrize the edges + edge_keys = cp.hstack( + (edge_keys, edge_keys[mask] if mask is not None else edge_keys) + ) + kwargs["edge_keys"] = edge_keys + + G = graph_class.from_coo(N, src_indices, dst_indices, **kwargs) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def from_scipy_sparse_array( + A, parallel_edges=False, create_using=None, edge_attribute="weight" +): + graph_class, inplace = _create_using_class(create_using) + m, n = A.shape + if m != n: + raise nx.NetworkXError(f"Adjacency matrix not square: nx,ny={A.shape}") + if A.format != "coo": + A = A.tocoo() + if A.dtype.kind in {"i", "u"} and graph_class.is_multigraph() and parallel_edges: + src_indices = cp.array(np.repeat(A.row, A.data), index_dtype) + dst_indices = cp.array(np.repeat(A.col, A.data), index_dtype) + weight = cp.empty(src_indices.size, A.data.dtype) + weight[:] = 1 + else: + src_indices = cp.array(A.row, index_dtype) + dst_indices = cp.array(A.col, index_dtype) + weight = cp.array(A.data) + G = graph_class.from_coo( + n, src_indices, dst_indices, edge_values={"weight": weight} + ) + if inplace: + return create_using._become(G) + return G diff --git a/python/nx-cugraph/nx_cugraph/generators/__init__.py b/python/nx-cugraph/nx_cugraph/generators/__init__.py new file mode 100644 index 00000000000..c1834a4dec7 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/generators/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .classic import * +from .community import * +from .small import * +from .social import * diff --git a/python/nx-cugraph/nx_cugraph/generators/_utils.py b/python/nx-cugraph/nx_cugraph/generators/_utils.py new file mode 100644 index 00000000000..e38ace5b28d --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/generators/_utils.py @@ -0,0 +1,136 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import operator as op + +import cupy as cp +import networkx as nx + +import nx_cugraph as nxcg + +from ..utils import index_dtype + +# 3.2.1 fixed some issues in generators that occur in 3.2 and earlier +_IS_NX32_OR_LESS = (nxver := nx.__version__)[:3] <= "3.2" and ( + len(nxver) <= 3 or nxver[3] != "." and not nxver[3].isdigit() +) + + +def _ensure_int(n): + """Ensure n is integral.""" + return op.index(n) + + +def _ensure_nonnegative_int(n): + """Ensure n is a nonnegative integer.""" + n = op.index(n) + if n < 0: + raise nx.NetworkXError(f"Negative number of nodes not valid: {n}") + return n + + +def _complete_graph_indices(n): + all_indices = cp.indices((n, n), dtype=index_dtype) + src_indices = all_indices[0].ravel() + dst_indices = all_indices[1].ravel() + del all_indices + mask = src_indices != dst_indices + return (src_indices[mask], dst_indices[mask]) + + +def _common_small_graph(n, nodes, create_using, *, allow_directed=True): + """Create a "common graph" for small n. + + n == 0: empty graph + n == 1: empty graph + n == 2: complete graph + n > 2: undefined + """ + graph_class, inplace = _create_using_class(create_using) + if not allow_directed and graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + if n < 2: + G = graph_class.from_coo( + n, cp.empty(0, index_dtype), cp.empty(0, index_dtype), id_to_key=nodes + ) + else: + G = graph_class.from_coo( + n, + cp.arange(2, dtype=index_dtype), + cp.array([1, 0], index_dtype), + id_to_key=nodes, + ) + if inplace: + return create_using._become(G) + return G + + +def _create_using_class(create_using, *, default=nxcg.Graph): + """Handle ``create_using`` argument and return a Graph type from nx_cugraph.""" + inplace = False + if create_using is None: + G = default() + elif isinstance(create_using, type): + G = create_using() + elif not hasattr(create_using, "is_directed") or not hasattr( + create_using, "is_multigraph" + ): + raise TypeError("create_using is not a valid graph type or instance") + elif not isinstance(create_using, nxcg.Graph): + raise NotImplementedError( + f"create_using with object of type {type(create_using)} is not supported " + "by the cugraph backend; only nx_cugraph.Graph objects are allowed." + ) + else: + inplace = True + G = create_using + G.clear() + if not isinstance(G, nxcg.Graph): + if G.is_multigraph(): + if G.is_directed(): + graph_class = nxcg.MultiDiGraph + else: + graph_class = nxcg.MultiGraph + elif G.is_directed(): + graph_class = nxcg.DiGraph + else: + graph_class = nxcg.Graph + if G.__class__ not in {nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph}: + raise NotImplementedError( + f"create_using with type {type(G)} is not supported by the cugraph " + "backend; only standard networkx or nx_cugraph Graph objects are " + "allowed (but not customized subclasses derived from them)." + ) + else: + graph_class = G.__class__ + return graph_class, inplace + + +def _number_and_nodes(n_and_nodes): + n, nodes = n_and_nodes + try: + n = op.index(n) + except TypeError: + n = len(nodes) + if n < 0: + raise nx.NetworkXError(f"Negative number of nodes not valid: {n}") + if not isinstance(nodes, list): + nodes = list(nodes) + if not nodes: + return (n, None) + if nodes[0] == 0 and nodes[n - 1] == n - 1: + try: + if nodes == list(range(n)): + return (n, None) + except Exception: + pass + return (n, nodes) diff --git a/python/nx-cugraph/nx_cugraph/generators/classic.py b/python/nx-cugraph/nx_cugraph/generators/classic.py new file mode 100644 index 00000000000..b196c232320 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/generators/classic.py @@ -0,0 +1,423 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools +from numbers import Integral + +import cupy as cp +import networkx as nx +import numpy as np + +import nx_cugraph as nxcg + +from ..utils import _get_int_dtype, index_dtype, networkx_algorithm, nodes_or_number +from ._utils import ( + _IS_NX32_OR_LESS, + _common_small_graph, + _complete_graph_indices, + _create_using_class, + _ensure_int, + _ensure_nonnegative_int, + _number_and_nodes, +) + +__all__ = [ + "barbell_graph", + "circular_ladder_graph", + "complete_graph", + "complete_multipartite_graph", + "cycle_graph", + "empty_graph", + "ladder_graph", + "lollipop_graph", + "null_graph", + "path_graph", + "star_graph", + "tadpole_graph", + "trivial_graph", + "turan_graph", + "wheel_graph", +] + +concat = itertools.chain.from_iterable + + +@networkx_algorithm +def barbell_graph(m1, m2, create_using=None): + # Like two complete graphs and a path_graph + m1 = _ensure_nonnegative_int(m1) + if m1 < 2: + raise nx.NetworkXError("Invalid graph description, m1 should be >=2") + m2 = _ensure_nonnegative_int(m2) + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + src_bell1, dst_bell1 = _complete_graph_indices(m1) + src_bell2 = src_bell1 + (m1 + m2) + dst_bell2 = dst_bell1 + (m1 + m2) + if m2 == 0: + src_bar = cp.array([m1 - 1, m1], index_dtype) + dst_bar = cp.array([m1, m1 - 1], index_dtype) + else: + src_bar = cp.arange(2 * m1 - 1, 2 * m1 + 2 * m2 + 1, dtype=index_dtype) // 2 + dst_bar = ( + cp.arange(m1 - 1, m1 + m2 + 1, dtype=index_dtype)[:, None] + + cp.array([-1, 1], index_dtype) + ).ravel()[1:-1] + src_indices = cp.hstack((src_bell1, src_bar, src_bell2)) + dst_indices = cp.hstack((dst_bell1, dst_bar, dst_bell2)) + G = graph_class.from_coo(2 * m1 + m2, src_indices, dst_indices) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def circular_ladder_graph(n, create_using=None): + return _ladder_graph(n, create_using, is_circular=True) + + +@nodes_or_number(0) +@networkx_algorithm +def complete_graph(n, create_using=None): + n, nodes = _number_and_nodes(n) + if n < 3: + return _common_small_graph(n, nodes, create_using) + graph_class, inplace = _create_using_class(create_using) + src_indices, dst_indices = _complete_graph_indices(n) + G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def complete_multipartite_graph(*subset_sizes): + if not subset_sizes: + return nxcg.Graph() + try: + subset_sizes = [_ensure_int(size) for size in subset_sizes] + except TypeError: + subsets = [list(subset) for subset in subset_sizes] + subset_sizes = [len(subset) for subset in subsets] + nodes = list(concat(subsets)) + else: + subsets = nodes = None + try: + subset_sizes = [_ensure_nonnegative_int(size) for size in subset_sizes] + except nx.NetworkXError: + if _IS_NX32_OR_LESS: + raise NotImplementedError("Negative number of nodes is not supported") + raise + L1 = [] + L2 = [] + total = 0 + for size in subset_sizes: + all_indices = cp.indices((total, size), dtype=index_dtype) + L1.append(all_indices[0].ravel()) + L2.append(all_indices[1].ravel() + total) + total += size + src_indices = cp.hstack(L1 + L2) + dst_indices = cp.hstack(L2 + L1) + subsets_array = cp.array( + np.repeat( + np.arange(len(subset_sizes), dtype=_get_int_dtype(len(subset_sizes) - 1)), + subset_sizes, + ) + ) + return nxcg.Graph.from_coo( + subsets_array.size, + src_indices, + dst_indices, + node_values={"subset": subsets_array}, + id_to_key=nodes, + ) + + +@nodes_or_number(0) +@networkx_algorithm +def cycle_graph(n, create_using=None): + n, nodes = _number_and_nodes(n) + graph_class, inplace = _create_using_class(create_using) + if n == 1: + src_indices = cp.zeros(1, index_dtype) + dst_indices = cp.zeros(1, index_dtype) + elif n == 2 and graph_class.is_multigraph() and not graph_class.is_directed(): + # This is kind of a peculiar edge case + src_indices = cp.array([0, 0, 1, 1], index_dtype) + dst_indices = cp.array([1, 1, 0, 0], index_dtype) + elif n < 3: + return _common_small_graph(n, nodes, create_using) + elif graph_class.is_directed(): + src_indices = cp.arange(n, dtype=index_dtype) + dst_indices = cp.arange(1, n + 1, dtype=index_dtype) + dst_indices[-1] = 0 + else: + src_indices = cp.arange(2 * n, dtype=index_dtype) // 2 + dst_indices = ( + cp.arange(n, dtype=index_dtype)[:, None] + cp.array([-1, 1], index_dtype) + ).ravel() + dst_indices[0] = n - 1 + dst_indices[-1] = 0 + G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G + + +@nodes_or_number(0) +@networkx_algorithm +def empty_graph(n=0, create_using=None, default=nx.Graph): + n, nodes = _number_and_nodes(n) + graph_class, inplace = _create_using_class(create_using, default=default) + G = graph_class.from_coo( + n, cp.empty(0, index_dtype), cp.empty(0, index_dtype), id_to_key=nodes + ) + if inplace: + return create_using._become(G) + return G + + +def _ladder_graph(n, create_using, *, is_circular=False): + # Like path path_graph with extra arange, and middle link missing + n = _ensure_nonnegative_int(n) + if n < 2: + if not is_circular: + return _common_small_graph(2 * n, None, create_using, allow_directed=False) + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + if n == 1: + src_indices = cp.array([0, 1, 0, 1], index_dtype) + dst_indices = cp.array([0, 0, 1, 1], index_dtype) + nodes = None + elif graph_class.is_multigraph(): + src_indices = cp.array([0, 0, 1, 1], index_dtype) + dst_indices = cp.array([1, 1, 0, 0], index_dtype) + nodes = [0, -1] + else: + src_indices = cp.array([0, 1], index_dtype) + dst_indices = cp.array([1, 0], index_dtype) + nodes = [0, -1] + G = graph_class.from_coo(2, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + path_src = cp.arange(1, 2 * n - 1, dtype=index_dtype) // 2 + path_dst = ( + cp.arange(n, dtype=index_dtype)[:, None] + cp.array([-1, 1], index_dtype) + ).ravel()[1:-1] + srcs = [path_src, path_src + n, cp.arange(2 * n, dtype=index_dtype)] + dsts = [ + path_dst, + path_dst + n, + cp.arange(n, 2 * n, dtype=index_dtype), + cp.arange(0, n, dtype=index_dtype), + ] + if is_circular and (n > 2 or graph_class.is_multigraph()): + srcs.append(cp.array([0, n - 1, n, 2 * n - 1], index_dtype)) + dsts.append(cp.array([n - 1, 0, 2 * n - 1, n], index_dtype)) + src_indices = cp.hstack(srcs) + dst_indices = cp.hstack(dsts) + G = graph_class.from_coo(2 * n, src_indices, dst_indices) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def ladder_graph(n, create_using=None): + return _ladder_graph(n, create_using) + + +@nodes_or_number([0, 1]) +@networkx_algorithm +def lollipop_graph(m, n, create_using=None): + # Like complete_graph then path_graph + orig_m, unused_nodes_m = m + orig_n, unused_nodes_n = n + m, m_nodes = _number_and_nodes(m) + if m < 2: + raise nx.NetworkXError( + "Invalid description: m should indicate at least 2 nodes" + ) + n, n_nodes = _number_and_nodes(n) + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + msrc_indices, mdst_indices = _complete_graph_indices(m) + nsrc_indices = cp.arange(2 * m - 1, 2 * m + 2 * n - 1, dtype=index_dtype) // 2 + ndst_indices = ( + cp.arange(m - 1, m + n, dtype=index_dtype)[:, None] + + cp.array([-1, 1], index_dtype) + ).ravel()[1:-1] + src_indices = cp.hstack((msrc_indices, nsrc_indices)) + dst_indices = cp.hstack((mdst_indices, ndst_indices)) + if isinstance(orig_m, Integral) and isinstance(orig_n, Integral): + nodes = None + else: + nodes = list(range(m)) if m_nodes is None else m_nodes + nodes.extend(range(n) if n_nodes is None else n_nodes) + if len(set(nodes)) != len(nodes): + raise nx.NetworkXError("Nodes must be distinct in containers m and n") + G = graph_class.from_coo(m + n, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def null_graph(create_using=None): + return _common_small_graph(0, None, create_using) + + +@nodes_or_number(0) +@networkx_algorithm +def path_graph(n, create_using=None): + n, nodes = _number_and_nodes(n) + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + src_indices = cp.arange(n - 1, dtype=index_dtype) + dst_indices = cp.arange(1, n, dtype=index_dtype) + elif n < 3: + return _common_small_graph(n, nodes, create_using) + else: + src_indices = cp.arange(1, 2 * n - 1, dtype=index_dtype) // 2 + dst_indices = ( + cp.arange(n, dtype=index_dtype)[:, None] + cp.array([-1, 1], index_dtype) + ).ravel()[1:-1] + G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G + + +@nodes_or_number(0) +@networkx_algorithm +def star_graph(n, create_using=None): + orig_n, orig_nodes = n + n, nodes = _number_and_nodes(n) + # star_graph behaves differently whether the input was an int or iterable + if isinstance(orig_n, Integral): + if nodes is not None: + nodes.append(n) + n += 1 + if n < 3: + return _common_small_graph(n, nodes, create_using, allow_directed=False) + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + flat = cp.zeros(n - 1, index_dtype) + ramp = cp.arange(1, n, dtype=index_dtype) + src_indices = cp.hstack((flat, ramp)) + dst_indices = cp.hstack((ramp, flat)) + G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G + + +@nodes_or_number([0, 1]) +@networkx_algorithm +def tadpole_graph(m, n, create_using=None): + orig_m, unused_nodes_m = m + orig_n, unused_nodes_n = n + m, m_nodes = _number_and_nodes(m) + if m < 2: + raise nx.NetworkXError( + "Invalid description: m should indicate at least 2 nodes" + ) + n, n_nodes = _number_and_nodes(n) + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + if isinstance(orig_m, Integral) and isinstance(orig_n, Integral): + nodes = None + else: + nodes = list(range(m)) if m_nodes is None else m_nodes + nodes.extend(range(n) if n_nodes is None else n_nodes) + if m == 2 and not graph_class.is_multigraph(): + src_indices = cp.arange(1, 2 * (m + n) - 1, dtype=index_dtype) // 2 + dst_indices = ( + cp.arange((m + n), dtype=index_dtype)[:, None] + + cp.array([-1, 1], index_dtype) + ).ravel()[1:-1] + else: + src_indices = cp.arange(2 * (m + n), dtype=index_dtype) // 2 + dst_indices = ( + cp.arange((m + n), dtype=index_dtype)[:, None] + + cp.array([-1, 1], index_dtype) + ).ravel() + dst_indices[0] = m - 1 + dst_indices[-1] = 0 + G = graph_class.from_coo(m + n, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def trivial_graph(create_using=None): + return _common_small_graph(1, None, create_using) + + +@networkx_algorithm +def turan_graph(n, r): + if not 1 <= r <= n: + raise nx.NetworkXError("Must satisfy 1 <= r <= n") + n_div_r, n_mod_r = divmod(n, r) + partitions = [n_div_r] * (r - n_mod_r) + [n_div_r + 1] * n_mod_r + return complete_multipartite_graph(*partitions) + + +@nodes_or_number(0) +@networkx_algorithm +def wheel_graph(n, create_using=None): + n, nodes = _number_and_nodes(n) + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + if n < 2: + G = graph_class.from_coo( + n, cp.empty(0, index_dtype), cp.empty(0, index_dtype), id_to_key=nodes + ) + else: + # Like star_graph + flat = cp.zeros(n - 1, index_dtype) + ramp = cp.arange(1, n, dtype=index_dtype) + # Like cycle_graph + if n < 3: + src_indices = cp.empty(0, index_dtype) + dst_indices = cp.empty(0, index_dtype) + elif n > 3: + src_indices = cp.arange(2, 2 * n, dtype=index_dtype) // 2 + dst_indices = ( + cp.arange(1, n, dtype=index_dtype)[:, None] + + cp.array([-1, 1], index_dtype) + ).ravel() + dst_indices[-1] = 1 + dst_indices[0] = n - 1 + elif graph_class.is_multigraph(): + src_indices = cp.array([1, 1, 2, 2], index_dtype) + dst_indices = cp.array([2, 2, 1, 1], index_dtype) + else: + src_indices = cp.array([1, 2], index_dtype) + dst_indices = cp.array([2, 1], index_dtype) + src_indices = cp.hstack((flat, ramp, src_indices)) + dst_indices = cp.hstack((ramp, flat, dst_indices)) + G = graph_class.from_coo(n, src_indices, dst_indices, id_to_key=nodes) + if inplace: + return create_using._become(G) + return G diff --git a/python/nx-cugraph/nx_cugraph/generators/community.py b/python/nx-cugraph/nx_cugraph/generators/community.py new file mode 100644 index 00000000000..e5cb03e8cc0 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/generators/community.py @@ -0,0 +1,45 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp + +import nx_cugraph as nxcg + +from ..utils import networkx_algorithm +from ._utils import ( + _common_small_graph, + _complete_graph_indices, + _ensure_int, + _ensure_nonnegative_int, +) + +__all__ = [ + "caveman_graph", +] + + +@networkx_algorithm +def caveman_graph(l, k): # noqa: E741 + l = _ensure_int(l) # noqa: E741 + k = _ensure_int(k) + N = _ensure_nonnegative_int(k * l) + if l == 0 or k < 1: + return _common_small_graph(N, None, None) + k = _ensure_nonnegative_int(k) + src_clique, dst_clique = _complete_graph_indices(k) + src_cliques = [src_clique] + dst_cliques = [dst_clique] + src_cliques.extend(src_clique + i * k for i in range(1, l)) + dst_cliques.extend(dst_clique + i * k for i in range(1, l)) + src_indices = cp.hstack(src_cliques) + dst_indices = cp.hstack(dst_cliques) + return nxcg.Graph.from_coo(l * k, src_indices, dst_indices) diff --git a/python/nx-cugraph/nx_cugraph/generators/small.py b/python/nx-cugraph/nx_cugraph/generators/small.py new file mode 100644 index 00000000000..b9a189c31d5 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/generators/small.py @@ -0,0 +1,622 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import networkx as nx + +import nx_cugraph as nxcg + +from ..utils import index_dtype, networkx_algorithm +from ._utils import _IS_NX32_OR_LESS, _create_using_class + +__all__ = [ + "bull_graph", + "chvatal_graph", + "cubical_graph", + "desargues_graph", + "diamond_graph", + "dodecahedral_graph", + "frucht_graph", + "heawood_graph", + "house_graph", + "house_x_graph", + "icosahedral_graph", + "krackhardt_kite_graph", + "moebius_kantor_graph", + "octahedral_graph", + "pappus_graph", + "petersen_graph", + "sedgewick_maze_graph", + "tetrahedral_graph", + "truncated_cube_graph", + "truncated_tetrahedron_graph", + "tutte_graph", +] + + +@networkx_algorithm +def bull_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + src_indices = cp.array([0, 0, 1, 1, 1, 2, 2, 2, 3, 4], index_dtype) + dst_indices = cp.array([1, 2, 0, 2, 3, 0, 1, 4, 1, 2], index_dtype) + G = graph_class.from_coo(5, src_indices, dst_indices, name="Bull Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def chvatal_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, + 11, 11, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 4, 6, 9, 0, 2, 5, 7, 1, 3, 6, 8, 2, 4, 7, 9, 0, 3, 5, 8, 1, 4, 10, 11, + 0, 2, 10, 11, 1, 3, 8, 11, 2, 4, 7, 10, 0, 3, 10, 11, 5, 6, 8, 9, 5, 6, + 7, 9, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo(12, src_indices, dst_indices, name="Chvatal Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def cubical_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + src_indices = cp.array( + [0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7], + index_dtype, + ) + dst_indices = cp.array( + [1, 3, 4, 0, 2, 7, 1, 3, 6, 0, 2, 5, 0, 5, 7, 3, 4, 6, 2, 5, 7, 1, 4, 6], + index_dtype, + ) + name = ("Platonic Cubical Graph",) if _IS_NX32_OR_LESS else "Platonic Cubical Graph" + G = graph_class.from_coo(8, src_indices, dst_indices, name=name) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def desargues_graph(create_using=None): + # This can also be defined w.r.t. LCF_graph + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, + 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 5, 19, 0, 2, 16, 1, 3, 11, 2, 4, 14, 3, 5, 9, 0, 4, 6, 5, 7, 15, 6, 8, + 18, 7, 9, 13, 4, 8, 10, 9, 11, 19, 2, 10, 12, 11, 13, 17, 8, 12, 14, 3, + 13, 15, 6, 14, 16, 1, 15, 17, 12, 16, 18, 7, 17, 19, 0, 10, 18, + ], + index_dtype, + ) + # fmt: on + if graph_class.is_multigraph(): + src_indices_extra = cp.array( + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + index_dtype, + ) + dst_indices_extra = cp.array( + [5, 16, 11, 14, 9, 0, 15, 18, 13, 4, 19, 2, 17, 8, 3, 6, 1, 12, 7, 10], + index_dtype, + ) + src_indices = cp.hstack((src_indices, src_indices_extra)) + dst_indices = cp.hstack((dst_indices, dst_indices_extra)) + G = graph_class.from_coo(20, src_indices, dst_indices, name="Desargues Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def diamond_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + src_indices = cp.array([0, 0, 1, 1, 1, 2, 2, 2, 3, 3], index_dtype) + dst_indices = cp.array([1, 2, 0, 2, 3, 0, 1, 3, 1, 2], index_dtype) + G = graph_class.from_coo(4, src_indices, dst_indices, name="Diamond Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def dodecahedral_graph(create_using=None): + # This can also be defined w.r.t. LCF_graph + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, + 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 10, 19, 0, 2, 8, 1, 3, 6, 2, 4, 19, 3, 5, 17, 4, 6, 15, 2, 5, 7, 6, 8, + 14, 1, 7, 9, 8, 10, 13, 0, 9, 11, 10, 12, 18, 11, 13, 16, 9, 12, 14, 7, + 13, 15, 5, 14, 16, 12, 15, 17, 4, 16, 18, 11, 17, 19, 0, 3, 18, + ], + index_dtype, + ) + # fmt: on + if graph_class.is_multigraph(): + src_indices_extra = cp.array( + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19], + index_dtype, + ) + dst_indices_extra = cp.array( + [10, 8, 6, 19, 17, 15, 2, 14, 1, 13, 0, 18, 16, 9, 7, 5, 12, 4, 11, 3], + index_dtype, + ) + src_indices = cp.hstack((src_indices, src_indices_extra)) + dst_indices = cp.hstack((dst_indices, dst_indices_extra)) + G = graph_class.from_coo(20, src_indices, dst_indices, name="Dodecahedral Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def frucht_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + src_indices = cp.array( + [0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 8, 10], + index_dtype, + ) + dst_indices = cp.array( + [1, 7, 2, 7, 3, 8, 4, 9, 5, 9, 6, 10, 0, 10, 11, 9, 11, 11], + index_dtype, + ) + else: + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, + 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 6, 7, 0, 2, 7, 1, 3, 8, 2, 4, 9, 3, 5, 9, 4, 6, 10, 0, 5, 10, 0, + 1, 11, 2, 9, 11, 3, 4, 8, 5, 6, 11, 7, 8, 10, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo(12, src_indices, dst_indices, name="Frucht Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def heawood_graph(create_using=None): + # This can also be defined w.r.t. LCF_graph + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 5, 13, 0, 2, 10, 1, 3, 7, 2, 4, 12, 3, 5, 9, 0, 4, 6, 5, 7, 11, 2, 6, + 8, 7, 9, 13, 4, 8, 10, 1, 9, 11, 6, 10, 12, 3, 11, 13, 0, 8, 12, + ], + index_dtype, + ) + # fmt: on + if graph_class.is_multigraph(): + src_indices_extra = cp.array( + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13], + index_dtype, + ) + dst_indices_extra = cp.array( + [5, 10, 7, 12, 9, 0, 11, 2, 13, 4, 1, 6, 3, 8], + index_dtype, + ) + src_indices = cp.hstack((src_indices, src_indices_extra)) + dst_indices = cp.hstack((dst_indices, dst_indices_extra)) + G = graph_class.from_coo(14, src_indices, dst_indices, name="Heawood Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def house_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + src_indices = cp.array([0, 0, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4], index_dtype) + dst_indices = cp.array([1, 2, 0, 3, 0, 3, 4, 1, 2, 4, 2, 3], index_dtype) + G = graph_class.from_coo(5, src_indices, dst_indices, name="House Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def house_x_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + src_indices = cp.array( + [0, 0, 0, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4], index_dtype + ) + dst_indices = cp.array( + [1, 2, 3, 0, 2, 3, 0, 1, 3, 4, 0, 1, 2, 4, 2, 3], index_dtype + ) + G = graph_class.from_coo( + 5, src_indices, dst_indices, name="House-with-X-inside Graph" + ) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def icosahedral_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, + 4, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 9, + 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11, 11, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 5, 7, 8, 11, 0, 2, 5, 6, 8, 1, 3, 6, 8, 9, 2, 4, 6, 9, 10, 3, 5, 6, + 10, 11, 0, 1, 4, 6, 11, 1, 2, 3, 4, 5, 0, 8, 9, 10, 11, 0, 1, 2, 7, 9, 2, + 3, 7, 8, 10, 3, 4, 7, 9, 11, 0, 4, 5, 7, 10, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo( + 12, src_indices, dst_indices, name="Platonic Icosahedral Graph" + ) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def krackhardt_kite_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, + 5, 6, 6, 6, 6, 6, 7, 7, 7, 8, 8, 9, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 2, 3, 5, 0, 3, 4, 6, 0, 3, 5, 0, 1, 2, 4, 5, 6, 1, 3, 6, 0, 2, 3, 6, + 7, 1, 3, 4, 5, 7, 5, 6, 8, 7, 9, 8, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo( + 10, src_indices, dst_indices, name="Krackhardt Kite Social Network" + ) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def moebius_kantor_graph(create_using=None): + # This can also be defined w.r.t. LCF_graph + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, + 14, 14, 15, 15, 15, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 5, 15, 0, 2, 12, 1, 3, 7, 2, 4, 14, 3, 5, 9, 0, 4, 6, 5, 7, 11, 2, 6, + 8, 7, 9, 13, 4, 8, 10, 9, 11, 15, 6, 10, 12, 1, 11, 13, 8, 12, 14, 3, 13, + 15, 0, 10, 14, + ], + index_dtype, + ) + # fmt: on + if graph_class.is_multigraph(): + src_indices_extra = cp.array( + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + index_dtype, + ) + dst_indices_extra = cp.array( + [5, 12, 7, 14, 9, 0, 11, 2, 13, 4, 15, 6, 1, 8, 3, 10], + index_dtype, + ) + src_indices = cp.hstack((src_indices, src_indices_extra)) + dst_indices = cp.hstack((dst_indices, dst_indices_extra)) + G = graph_class.from_coo(16, src_indices, dst_indices, name="Moebius-Kantor Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def octahedral_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + src_indices = cp.array( + [0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5], + index_dtype, + ) + dst_indices = cp.array( + [1, 2, 3, 4, 0, 2, 3, 5, 0, 1, 4, 5, 0, 1, 4, 5, 0, 2, 3, 5, 1, 2, 3, 4], + index_dtype, + ) + G = graph_class.from_coo( + 6, src_indices, dst_indices, name="Platonic Octahedral Graph" + ) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def pappus_graph(): + # This can also be defined w.r.t. LCF_graph + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, + 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 5, 17, 0, 2, 8, 1, 3, 13, 2, 4, 10, 3, 5, 15, 0, 4, 6, 5, 7, 11, 6, 8, + 14, 1, 7, 9, 8, 10, 16, 3, 9, 11, 6, 10, 12, 11, 13, 17, 2, 12, 14, 7, + 13, 15, 4, 14, 16, 9, 15, 17, 0, 12, 16, + ], + index_dtype, + ) + # fmt: on + return nxcg.Graph.from_coo(18, src_indices, dst_indices, name="Pappus Graph") + + +@networkx_algorithm +def petersen_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 4, 5, 0, 2, 6, 1, 3, 7, 2, 4, 8, 0, 3, 9, 0, 7, 8, 1, 8, 9, 2, 5, 9, + 3, 5, 6, 4, 6, 7, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo(10, src_indices, dst_indices, name="Petersen Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def sedgewick_maze_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + src_indices = cp.array([0, 0, 0, 1, 2, 3, 3, 4, 4, 4], index_dtype) + dst_indices = cp.array([2, 5, 7, 7, 6, 4, 5, 5, 6, 7], index_dtype) + else: + src_indices = cp.array( + [0, 0, 0, 1, 2, 2, 3, 3, 4, 4, 4, 4, 5, 5, 5, 6, 6, 7, 7, 7], + index_dtype, + ) + dst_indices = cp.array( + [2, 5, 7, 7, 0, 6, 4, 5, 3, 5, 6, 7, 0, 3, 4, 2, 4, 0, 1, 4], + index_dtype, + ) + G = graph_class.from_coo(8, src_indices, dst_indices, name="Sedgewick Maze") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def tetrahedral_graph(create_using=None): + # This can also be defined w.r.t. complete_graph + graph_class, inplace = _create_using_class(create_using) + src_indices = cp.array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], index_dtype) + dst_indices = cp.array([1, 2, 3, 0, 2, 3, 0, 1, 3, 0, 1, 2], index_dtype) + name = ( + "Platonic Tetrahedral graph" + if _IS_NX32_OR_LESS + else "Platonic Tetrahedral Graph" + ) + G = graph_class.from_coo(4, src_indices, dst_indices, name=name) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def truncated_cube_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, + 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, + 20, 21, 21, 21, 22, 22, 22, 23, 23, 23, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 2, 4, 0, 11, 14, 0, 3, 4, 2, 6, 8, 0, 2, 5, 4, 16, 18, 3, 7, 8, 6, 10, + 12, 3, 6, 9, 8, 17, 20, 7, 11, 12, 1, 10, 14, 7, 10, 13, 12, 21, 22, 1, + 11, 15, 14, 19, 23, 5, 17, 18, 9, 16, 20, 5, 16, 19, 15, 18, 23, 9, 17, + 21, 13, 20, 22, 13, 21, 23, 15, 19, 22, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo(24, src_indices, dst_indices, name="Truncated Cube Graph") + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def truncated_tetrahedron_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + src_indices = cp.array( + [0, 0, 0, 1, 1, 2, 3, 3, 4, 4, 5, 5, 6, 7, 8, 8, 9, 10], index_dtype + ) + dst_indices = cp.array( + [1, 2, 9, 2, 6, 3, 4, 11, 5, 11, 6, 7, 7, 8, 9, 10, 10, 11], index_dtype + ) + else: + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, + 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 2, 9, 0, 2, 6, 0, 1, 3, 2, 4, 11, 3, 5, 11, 4, 6, 7, 1, 5, 7, 5, + 6, 8, 7, 9, 10, 0, 8, 10, 8, 9, 11, 3, 4, 10, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo( + 12, src_indices, dst_indices, name="Truncated Tetrahedron Graph" + ) + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm +def tutte_graph(create_using=None): + graph_class, inplace = _create_using_class(create_using) + if graph_class.is_directed(): + raise nx.NetworkXError("Directed Graph not supported") + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3, 4, 4, 4, 5, 5, 5, 6, 6, 6, 7, 7, 7, + 8, 8, 8, 9, 9, 9, 10, 10, 10, 11, 11, 11, 12, 12, 12, 13, 13, 13, 14, 14, + 14, 15, 15, 15, 16, 16, 16, 17, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, + 20, 21, 21, 21, 22, 22, 22, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, + 26, 27, 27, 27, 28, 28, 28, 29, 29, 29, 30, 30, 30, 31, 31, 31, 32, 32, + 32, 33, 33, 33, 34, 34, 34, 35, 35, 35, 36, 36, 36, 37, 37, 37, 38, 38, + 38, 39, 39, 39, 40, 40, 40, 41, 41, 41, 42, 42, 42, 43, 43, 43, 44, 44, + 44, 45, 45, 45, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 2, 3, 0, 4, 26, 0, 10, 11, 0, 18, 19, 1, 5, 33, 4, 6, 29, 5, 7, 27, 6, + 8, 14, 7, 9, 38, 8, 10, 37, 2, 9, 39, 2, 12, 39, 11, 13, 35, 12, 14, 15, + 7, 13, 34, 13, 16, 22, 15, 17, 44, 16, 18, 43, 3, 17, 45, 3, 20, 45, 19, + 21, 41, 20, 22, 23, 15, 21, 40, 21, 24, 27, 23, 25, 32, 24, 26, 31, 1, + 25, 33, 6, 23, 28, 27, 29, 32, 5, 28, 30, 29, 31, 33, 25, 30, 32, 24, 28, + 31, 4, 26, 30, 14, 35, 38, 12, 34, 36, 35, 37, 39, 9, 36, 38, 8, 34, 37, + 10, 11, 36, 22, 41, 44, 20, 40, 42, 41, 43, 45, 17, 42, 44, 16, 40, 43, + 18, 19, 42, + ], + index_dtype, + ) + # fmt: on + G = graph_class.from_coo(46, src_indices, dst_indices, name="Tutte's Graph") + if inplace: + return create_using._become(G) + return G diff --git a/python/nx-cugraph/nx_cugraph/generators/social.py b/python/nx-cugraph/nx_cugraph/generators/social.py new file mode 100644 index 00000000000..3c936d07af3 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/generators/social.py @@ -0,0 +1,294 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import numpy as np + +import nx_cugraph as nxcg + +from ..utils import index_dtype, networkx_algorithm + +__all__ = [ + "davis_southern_women_graph", + "florentine_families_graph", + "karate_club_graph", + "les_miserables_graph", +] + + +@networkx_algorithm +def davis_southern_women_graph(): + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 3, + 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 7, 7, 7, 8, 8, 8, + 8, 9, 9, 9, 9, 10, 10, 10, 10, 11, 11, 11, 11, 11, 11, 12, 12, 12, 12, + 12, 12, 12, 13, 13, 13, 13, 13, 13, 13, 13, 14, 14, 14, 14, 14, 15, 15, + 16, 16, 17, 17, 18, 18, 18, 19, 19, 19, 20, 20, 20, 20, 20, 20, 21, 21, + 21, 21, 22, 22, 22, 22, 22, 22, 22, 22, 23, 23, 23, 23, 23, 23, 23, 23, + 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 25, 25, 25, 25, 25, 25, 25, 25, + 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, 26, + 27, 27, 27, 27, 27, 28, 28, 28, 28, 29, 29, 29, 29, 29, 29, 30, 30, 30, + 31, 31, 31, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 18, 19, 20, 21, 22, 23, 25, 26, 18, 19, 20, 22, 23, 24, 25, 19, 20, 21, + 22, 23, 24, 25, 26, 18, 20, 21, 22, 23, 24, 25, 20, 21, 22, 24, 20, 22, + 23, 25, 22, 23, 24, 25, 23, 25, 26, 22, 24, 25, 26, 24, 25, 26, 29, 25, + 26, 27, 29, 25, 26, 27, 29, 30, 31, 24, 25, 26, 27, 29, 30, 31, 23, 24, + 26, 27, 28, 29, 30, 31, 24, 25, 27, 28, 29, 25, 26, 26, 28, 26, 28, 0, 1, + 3, 0, 1, 2, 0, 1, 2, 3, 4, 5, 0, 2, 3, 4, 0, 1, 2, 3, 4, 5, 6, 8, 0, 1, + 2, 3, 5, 6, 7, 13, 1, 2, 3, 4, 6, 8, 9, 12, 13, 14, 0, 1, 2, 3, 5, 6, 7, + 8, 9, 10, 11, 12, 14, 15, 0, 2, 7, 8, 9, 10, 11, 12, 13, 15, 16, 17, 10, + 11, 12, 13, 14, 13, 14, 16, 17, 9, 10, 11, 12, 13, 14, 11, 12, 13, 11, + 12, 13, + ], + index_dtype, + ) + bipartite = cp.array( + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 1, 1, 1, 1, 1, + ], + np.int8, + ) + women = [ + "Evelyn Jefferson", "Laura Mandeville", "Theresa Anderson", "Brenda Rogers", + "Charlotte McDowd", "Frances Anderson", "Eleanor Nye", "Pearl Oglethorpe", + "Ruth DeSand", "Verne Sanderson", "Myra Liddel", "Katherina Rogers", + "Sylvia Avondale", "Nora Fayette", "Helen Lloyd", "Dorothy Murchison", + "Olivia Carleton", "Flora Price", + ] + events = [ + "E1", "E2", "E3", "E4", "E5", "E6", "E7", "E8", "E9", "E10", "E11", "E12", + "E13", "E14", + ] + # fmt: on + return nxcg.Graph.from_coo( + 32, + src_indices, + dst_indices, + node_values={"bipartite": bipartite}, + id_to_key=women + events, + top=women, + bottom=events, + ) + + +@networkx_algorithm +def florentine_families_graph(): + # fmt: off + src_indices = cp.array( + [ + 0, 1, 1, 1, 2, 2, 3, 3, 3, 4, 4, 4, 5, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 8, + 9, 10, 10, 10, 11, 11, 11, 12, 12, 13, 13, 13, 13, 14, 14, 14, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 8, 5, 6, 8, 4, 8, 6, 10, 13, 2, 10, 13, 1, 1, 3, 7, 14, 6, 0, 1, 2, 11, + 12, 14, 12, 3, 4, 13, 8, 13, 14, 8, 9, 3, 4, 10, 11, 6, 8, 11, + ], + index_dtype, + ) + nodes = [ + "Acciaiuoli", "Albizzi", "Barbadori", "Bischeri", "Castellani", "Ginori", + "Guadagni", "Lamberteschi", "Medici", "Pazzi", "Peruzzi", "Ridolfi", + "Salviati", "Strozzi", "Tornabuoni" + ] + # fmt: on + return nxcg.Graph.from_coo(15, src_indices, dst_indices, id_to_key=nodes) + + +@networkx_algorithm +def karate_club_graph(): + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, + 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 3, 3, 3, 3, 3, 4, 4, 4, 5, 5, 5, 5, + 6, 6, 6, 6, 7, 7, 7, 7, 8, 8, 8, 8, 8, 9, 9, 10, 10, 10, 11, 12, 12, 13, + 13, 13, 13, 13, 14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 19, 20, + 20, 21, 21, 22, 22, 23, 23, 23, 23, 23, 24, 24, 24, 25, 25, 25, 26, 26, + 27, 27, 27, 27, 28, 28, 28, 29, 29, 29, 29, 30, 30, 30, 30, 31, 31, 31, + 31, 31, 31, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 32, 33, 33, 33, + 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, 33, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 1, 2, 3, 4, 5, 6, 7, 8, 10, 11, 12, 13, 17, 19, 21, 31, 0, 2, 3, 7, 13, + 17, 19, 21, 30, 0, 1, 3, 7, 8, 9, 13, 27, 28, 32, 0, 1, 2, 7, 12, 13, 0, + 6, 10, 0, 6, 10, 16, 0, 4, 5, 16, 0, 1, 2, 3, 0, 2, 30, 32, 33, 2, 33, + 0, 4, 5, 0, 0, 3, 0, 1, 2, 3, 33, 32, 33, 32, 33, 5, 6, 0, 1, 32, 33, 0, + 1, 33, 32, 33, 0, 1, 32, 33, 25, 27, 29, 32, 33, 25, 27, 31, 23, 24, 31, + 29, 33, 2, 23, 24, 33, 2, 31, 33, 23, 26, 32, 33, 1, 8, 32, 33, 0, 24, + 25, 28, 32, 33, 2, 8, 14, 15, 18, 20, 22, 23, 29, 30, 31, 33, 8, 9, 13, + 14, 15, 18, 19, 20, 22, 23, 26, 27, 28, 29, 30, 31, 32, + ], + index_dtype, + ) + weights = cp.array( + [ + 4, 5, 3, 3, 3, 3, 2, 2, 2, 3, 1, 3, 2, 2, 2, 2, 4, 6, 3, 4, 5, 1, 2, 2, + 2, 5, 6, 3, 4, 5, 1, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 2, 3, 3, 5, 3, 3, + 3, 2, 5, 3, 2, 4, 4, 3, 2, 5, 3, 3, 4, 1, 2, 2, 3, 3, 3, 1, 3, 3, 5, 3, + 3, 3, 3, 2, 3, 4, 3, 3, 2, 1, 1, 2, 2, 2, 1, 3, 1, 2, 2, 2, 3, 5, 4, 3, + 5, 4, 2, 3, 2, 5, 2, 7, 4, 2, 2, 4, 3, 4, 2, 2, 2, 3, 4, 4, 2, 2, 3, 3, + 3, 2, 2, 7, 2, 4, 4, 2, 3, 3, 3, 1, 3, 2, 5, 4, 3, 4, 5, 4, 2, 3, 2, 4, + 2, 1, 1, 3, 4, 2, 4, 2, 2, 3, 4, 5, + ], + np.int8, + ) + # For now, cupy doesn't handle str dtypes and we primarily handle cupy arrays. + # We try to support numpy arrays for node values, so let's use numpy here. + clubs = np.array([ + "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", + "Mr. Hi", "Mr. Hi", "Officer", "Mr. Hi", "Mr. Hi", "Mr. Hi", "Mr. Hi", + "Officer", "Officer", "Mr. Hi", "Mr. Hi", "Officer", "Mr. Hi", "Officer", + "Mr. Hi", "Officer", "Officer", "Officer", "Officer", "Officer", "Officer", + "Officer", "Officer", "Officer", "Officer", "Officer", "Officer", + ]) + # fmt: on + return nxcg.Graph.from_coo( + 34, + src_indices, + dst_indices, + edge_values={"weight": weights}, + node_values={"club": clubs}, + name="Zachary's Karate Club", + ) + + +@networkx_algorithm +def les_miserables_graph(): + # fmt: off + src_indices = cp.array( + [ + 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, + 2, 3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5, 5, 6, 6, 6, 6, 6, 6, + 6, 6, 6, 6, 6, 6, 6, 7, 8, 8, 8, 8, 8, 8, 9, 9, 9, 9, 9, 9, 9, 10, 10, + 10, 10, 10, 10, 11, 12, 12, 12, 12, 12, 12, 13, 13, 14, 14, 15, 15, 15, + 15, 15, 15, 15, 15, 15, 15, 16, 16, 16, 16, 16, 16, 17, 17, 17, 17, 17, + 17, 17, 17, 17, 17, 17, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 18, 19, + 20, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 21, 22, 23, 23, 23, + 23, 23, 23, 23, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, 24, + 24, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 26, 26, 26, 26, 26, 26, + 26, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 27, 28, 28, + 28, 28, 29, 29, 29, 29, 29, 29, 29, 30, 30, 30, 30, 30, 30, 30, 30, 30, + 30, 30, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, 31, + 31, 31, 31, 31, 31, 31, 32, 33, 34, 34, 34, 34, 34, 34, 34, 35, 35, 35, + 35, 35, 35, 35, 35, 35, 35, 36, 37, 37, 37, 37, 37, 37, 37, 37, 37, 37, + 38, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, 39, + 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 40, 41, 42, 42, 42, 42, 42, + 42, 43, 44, 44, 44, 44, 44, 44, 44, 45, 45, 45, 45, 46, 46, 46, 46, 46, + 46, 46, 46, 46, 46, 46, 47, 47, 48, 48, 49, 49, 49, 49, 49, 49, 49, 49, + 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 49, 50, 50, 50, 51, 51, 51, 51, + 51, 51, 51, 52, 53, 53, 54, 55, 55, 55, 55, 55, 55, 55, 56, 56, 56, 57, + 57, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 58, 59, 59, 59, 59, 59, 59, + 59, 59, 59, 60, 60, 61, 62, 62, 62, 62, 62, 62, 62, 62, 62, 62, 63, 64, + 65, 65, 66, 66, 66, 67, 67, 67, 67, 67, 67, 67, 67, 67, 68, 69, 69, 69, + 69, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 70, 71, + 71, 71, 71, 71, 71, 71, 71, 71, 72, 72, 72, 73, 73, 73, 73, 73, 73, 73, + 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, + 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 73, 74, 74, 75, 75, 75, 76, 76, + 76, 76, 76, 76, 76, + ], + index_dtype, + ) + dst_indices = cp.array( + [ + 25, 58, 70, 9, 15, 25, 31, 37, 39, 58, 59, 70, 73, 6, 17, 21, 24, 30, 31, + 35, 40, 46, 49, 55, 67, 8, 10, 12, 16, 27, 39, 42, 73, 34, 49, 23, 26, + 27, 29, 44, 71, 76, 2, 17, 21, 24, 30, 31, 35, 40, 46, 49, 55, 67, 73, + 70, 3, 10, 12, 16, 42, 73, 1, 15, 25, 31, 37, 59, 70, 3, 8, 12, 16, 42, + 73, 62, 3, 8, 10, 16, 42, 73, 14, 31, 13, 31, 1, 9, 24, 25, 37, 39, 58, + 59, 70, 73, 3, 8, 10, 12, 42, 73, 2, 6, 21, 24, 30, 31, 35, 40, 46, 49, + 67, 34, 39, 45, 49, 51, 58, 70, 71, 72, 73, 75, 62, 62, 2, 6, 17, 24, 25, + 30, 31, 35, 40, 46, 49, 55, 67, 62, 5, 26, 27, 29, 44, 71, 76, 2, 6, 15, + 17, 21, 30, 31, 35, 39, 40, 46, 49, 55, 67, 73, 0, 1, 9, 15, 21, 37, 46, + 49, 58, 59, 70, 5, 23, 27, 29, 44, 71, 76, 3, 5, 23, 26, 29, 39, 44, 48, + 58, 65, 69, 70, 71, 73, 76, 36, 39, 60, 73, 5, 23, 26, 27, 44, 71, 76, 2, + 6, 17, 21, 24, 31, 35, 40, 46, 49, 67, 1, 2, 6, 9, 13, 14, 17, 21, 24, + 30, 35, 37, 39, 40, 46, 49, 53, 55, 59, 67, 70, 73, 62, 73, 4, 18, 45, + 47, 49, 51, 73, 2, 6, 17, 21, 24, 30, 31, 40, 55, 67, 28, 1, 9, 15, 25, + 31, 39, 58, 59, 70, 73, 73, 1, 3, 15, 18, 24, 27, 28, 31, 37, 58, 59, 69, + 70, 72, 73, 74, 75, 2, 6, 17, 21, 24, 30, 31, 35, 46, 49, 55, 67, 53, 3, + 8, 10, 12, 16, 73, 73, 5, 23, 26, 27, 29, 71, 76, 18, 34, 49, 51, 2, 6, + 17, 21, 24, 25, 30, 31, 40, 49, 61, 34, 58, 27, 73, 2, 4, 6, 17, 18, 21, + 24, 25, 30, 31, 34, 40, 45, 46, 51, 66, 70, 71, 73, 56, 62, 73, 18, 34, + 45, 49, 52, 57, 73, 51, 31, 41, 73, 2, 6, 21, 24, 31, 35, 40, 50, 62, 73, + 51, 66, 0, 1, 15, 18, 25, 27, 37, 39, 47, 70, 73, 1, 9, 15, 25, 31, 37, + 39, 70, 73, 28, 73, 46, 11, 19, 20, 22, 32, 50, 56, 63, 64, 73, 62, 62, + 27, 69, 49, 57, 70, 2, 6, 17, 21, 24, 30, 31, 35, 40, 73, 27, 39, 65, 73, + 0, 1, 7, 9, 15, 18, 25, 27, 31, 37, 39, 49, 58, 59, 66, 73, 5, 18, 23, + 26, 27, 29, 44, 49, 76, 18, 39, 73, 1, 3, 6, 8, 10, 12, 15, 16, 18, 24, + 27, 28, 31, 33, 34, 37, 38, 39, 42, 43, 48, 49, 50, 51, 54, 56, 58, 59, + 60, 62, 68, 69, 70, 72, 74, 75, 39, 73, 18, 39, 73, 5, 23, 26, 27, 29, + 44, 71, + ], + index_dtype, + ) + weights = cp.array( + [ + 2, 1, 2, 3, 4, 1, 1, 6, 2, 1, 2, 6, 1, 4, 5, 6, 4, 3, 5, 1, 5, 2, 1, 1, + 2, 1, 2, 1, 1, 1, 1, 2, 2, 1, 1, 3, 4, 3, 4, 4, 4, 3, 4, 9, 12, 10, 6, 5, + 3, 7, 1, 5, 1, 2, 1, 1, 1, 2, 2, 2, 2, 2, 3, 1, 1, 1, 3, 1, 3, 2, 2, 2, + 2, 3, 3, 1, 1, 2, 2, 2, 2, 2, 3, 2, 3, 2, 4, 1, 1, 1, 4, 1, 1, 2, 4, 1, + 1, 2, 2, 2, 2, 2, 5, 9, 13, 15, 5, 6, 1, 5, 2, 5, 2, 3, 1, 1, 21, 2, 4, + 1, 1, 2, 31, 1, 2, 1, 6, 12, 13, 17, 1, 6, 7, 2, 5, 2, 9, 1, 3, 1, 3, 3, + 4, 5, 3, 3, 4, 4, 10, 1, 15, 17, 6, 7, 3, 6, 5, 1, 7, 1, 4, 4, 2, 1, 1, + 1, 1, 1, 1, 5, 2, 1, 3, 4, 3, 3, 3, 4, 4, 3, 1, 3, 4, 3, 4, 5, 3, 2, 2, + 1, 2, 1, 3, 9, 4, 2, 1, 3, 8, 4, 5, 3, 4, 3, 3, 4, 3, 6, 5, 6, 6, 2, 1, + 5, 1, 1, 2, 1, 5, 5, 1, 2, 2, 6, 7, 7, 2, 1, 1, 1, 3, 1, 4, 2, 1, 1, 1, + 1, 1, 1, 1, 1, 3, 1, 1, 12, 9, 2, 1, 3, 1, 2, 3, 1, 1, 2, 1, 1, 2, 6, 3, + 4, 1, 1, 1, 1, 2, 5, 1, 1, 2, 1, 1, 1, 6, 5, 1, 1, 1, 1, 1, 1, 5, 1, 17, + 1, 1, 5, 7, 5, 5, 5, 5, 3, 2, 1, 2, 1, 2, 1, 2, 2, 3, 2, 2, 3, 1, 4, 3, + 4, 3, 3, 4, 3, 1, 1, 1, 2, 2, 1, 2, 2, 1, 1, 1, 1, 1, 1, 3, 1, 1, 2, 1, + 1, 1, 5, 5, 21, 9, 7, 5, 1, 4, 12, 2, 1, 1, 6, 1, 2, 1, 19, 6, 8, 3, 2, + 9, 2, 6, 1, 1, 2, 1, 2, 1, 1, 1, 1, 1, 1, 1, 1, 1, 6, 10, 3, 1, 1, 1, 1, + 1, 4, 2, 2, 1, 1, 1, 13, 7, 2, 1, 2, 1, 1, 2, 1, 1, 1, 3, 1, 3, 1, 2, 1, + 1, 1, 8, 10, 1, 1, 5, 1, 1, 1, 2, 1, 1, 1, 2, 2, 2, 3, 4, 2, 1, 1, 2, 1, + 2, 1, 2, 3, 2, 6, 1, 3, 4, 1, 3, 1, 1, 5, 5, 2, 13, 1, 1, 12, 4, 1, 3, 4, + 3, 3, 4, 1, 3, 2, 1, 1, 1, 2, 1, 2, 3, 2, 1, 2, 31, 4, 9, 8, 1, 1, 2, 1, + 1, 17, 3, 1, 1, 19, 3, 2, 1, 3, 7, 1, 1, 5, 1, 3, 12, 1, 2, 3, 1, 2, 1, + 1, 3, 3, 4, 3, 4, 4, 3, 3, + ], + np.int8, + ) + nodes = [ + "Anzelma", "Babet", "Bahorel", "Bamatabois", "BaronessT", "Blacheville", + "Bossuet", "Boulatruelle", "Brevet", "Brujon", "Champmathieu", + "Champtercier", "Chenildieu", "Child1", "Child2", "Claquesous", + "Cochepaille", "Combeferre", "Cosette", "Count", "CountessDeLo", + "Courfeyrac", "Cravatte", "Dahlia", "Enjolras", "Eponine", "Fameuil", + "Fantine", "Fauchelevent", "Favourite", "Feuilly", "Gavroche", "Geborand", + "Gervais", "Gillenormand", "Grantaire", "Gribier", "Gueulemer", "Isabeau", + "Javert", "Joly", "Jondrette", "Judge", "Labarre", "Listolier", + "LtGillenormand", "Mabeuf", "Magnon", "Marguerite", "Marius", + "MlleBaptistine", "MlleGillenormand", "MlleVaubois", "MmeBurgon", "MmeDeR", + "MmeHucheloup", "MmeMagloire", "MmePontmercy", "MmeThenardier", + "Montparnasse", "MotherInnocent", "MotherPlutarch", "Myriel", "Napoleon", + "OldMan", "Perpetue", "Pontmercy", "Prouvaire", "Scaufflaire", "Simplice", + "Thenardier", "Tholomyes", "Toussaint", "Valjean", "Woman1", "Woman2", + "Zephine", + ] + # fmt: on + return nxcg.Graph.from_coo( + 77, src_indices, dst_indices, edge_values={"weight": weights}, id_to_key=nodes + ) diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index a7b88b72ec5..fd0b1483d73 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -65,6 +65,7 @@ def key(testpath): no_weights = "weighted implementation not currently supported" no_multigraph = "multigraphs not currently supported" louvain_different = "Louvain may be different due to RNG" + no_string_dtype = "string edge values not currently supported" xfail = {} @@ -187,6 +188,40 @@ def key(testpath): xfail[ key("test_louvain.py:test_threshold") ] = "Louvain does not support seed parameter" + if nxver.major == 3 and nxver.minor >= 2: + xfail.update( + { + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_multi_attr_incl_target" + ): no_string_dtype, + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_multidigraph_and_edge_attr" + ): no_string_dtype, + key( + "test_convert_pandas.py:TestConvertPandas." + "test_from_edgelist_int_attr_name" + ): no_string_dtype, + } + ) + if nxver.minor == 2: + different_iteration_order = "Different graph data iteration order" + xfail.update( + { + key( + "test_cycles.py:TestMinimumCycleBasis." + "test_gh6787_and_edge_attribute_names" + ): different_iteration_order, + key( + "test_euler.py:TestEulerianCircuit." + "test_eulerian_circuit_cycle" + ): different_iteration_order, + key( + "test_gml.py:TestGraph.test_special_float_label" + ): different_iteration_order, + } + ) for item in items: kset = set(item.keywords) diff --git a/python/nx-cugraph/nx_cugraph/tests/test_generators.py b/python/nx-cugraph/nx_cugraph/tests/test_generators.py new file mode 100644 index 00000000000..511f8dcd8e2 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_generators.py @@ -0,0 +1,277 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import numpy as np +import pytest +from packaging.version import parse + +import nx_cugraph as nxcg + +nxver = parse(nx.__version__) + + +def assert_graphs_equal(Gnx, Gcg): + assert isinstance(Gnx, nx.Graph) + assert isinstance(Gcg, nxcg.Graph) + assert Gnx.number_of_nodes() == Gcg.number_of_nodes() + assert Gnx.number_of_edges() == Gcg.number_of_edges() + assert Gnx.is_directed() == Gcg.is_directed() + assert Gnx.is_multigraph() == Gcg.is_multigraph() + G = nxcg.to_networkx(Gcg) + rv = nx.utils.graphs_equal(G, Gnx) + if not rv: + print("GRAPHS ARE NOT EQUAL!") + assert sorted(G) == sorted(Gnx) + assert sorted(G._adj) == sorted(Gnx._adj) + assert sorted(G._node) == sorted(Gnx._node) + for k in sorted(G._adj): + print(k, sorted(G._adj[k]), sorted(Gnx._adj[k])) + print(nx.to_scipy_sparse_array(G).todense()) + print(nx.to_scipy_sparse_array(Gnx).todense()) + print(G.graph) + print(Gnx.graph) + assert rv + + +if nxver.major == 3 and nxver.minor < 2: + pytest.skip("Need NetworkX >=3.2 to test generators", allow_module_level=True) + + +def compare(name, create_using, *args, is_vanilla=False): + exc1 = exc2 = None + func = getattr(nx, name) + if isinstance(create_using, nxcg.Graph): + nx_create_using = nxcg.to_networkx(create_using) + elif isinstance(create_using, type) and issubclass(create_using, nxcg.Graph): + nx_create_using = create_using.to_networkx_class() + elif isinstance(create_using, nx.Graph): + nx_create_using = create_using.copy() + else: + nx_create_using = create_using + try: + if is_vanilla: + G = func(*args) + else: + G = func(*args, create_using=nx_create_using) + except Exception as exc: + exc1 = exc + try: + if is_vanilla: + Gcg = func(*args, backend="cugraph") + else: + Gcg = func(*args, create_using=create_using, backend="cugraph") + except ZeroDivisionError: + raise + except NotImplementedError as exc: + if name in {"complete_multipartite_graph"}: # nx.__version__[:3] <= "3.2" + return + exc2 = exc + except Exception as exc: + if exc1 is None: # pragma: no cover (debug) + raise + exc2 = exc + if exc1 is not None or exc2 is not None: + assert type(exc1) is type(exc2) + else: + assert_graphs_equal(G, Gcg) + + +N = list(range(-1, 5)) +CREATE_USING = [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph] +COMPLETE_CREATE_USING = [ + nx.Graph, + nx.DiGraph, + nx.MultiGraph, + nx.MultiDiGraph, + nxcg.Graph, + nxcg.DiGraph, + nxcg.MultiGraph, + nxcg.MultiDiGraph, + # These raise NotImplementedError + # nx.Graph(), + # nx.DiGraph(), + # nx.MultiGraph(), + # nx.MultiDiGraph(), + nxcg.Graph(), + nxcg.DiGraph(), + nxcg.MultiGraph(), + nxcg.MultiDiGraph(), + None, + object, # Bad input + 7, # Bad input +] +GENERATORS_NOARG = [ + # classic + "null_graph", + "trivial_graph", + # small + "bull_graph", + "chvatal_graph", + "cubical_graph", + "desargues_graph", + "diamond_graph", + "dodecahedral_graph", + "frucht_graph", + "heawood_graph", + "house_graph", + "house_x_graph", + "icosahedral_graph", + "krackhardt_kite_graph", + "moebius_kantor_graph", + "octahedral_graph", + "petersen_graph", + "sedgewick_maze_graph", + "tetrahedral_graph", + "truncated_cube_graph", + "truncated_tetrahedron_graph", + "tutte_graph", +] +GENERATORS_NOARG_VANILLA = [ + # classic + "complete_multipartite_graph", + # small + "pappus_graph", + # social + "davis_southern_women_graph", + "florentine_families_graph", + "karate_club_graph", + "les_miserables_graph", +] +GENERATORS_N = [ + # classic + "circular_ladder_graph", + "complete_graph", + "cycle_graph", + "empty_graph", + "ladder_graph", + "path_graph", + "star_graph", + "wheel_graph", +] +GENERATORS_M_N = [ + # classic + "barbell_graph", + "lollipop_graph", + "tadpole_graph", + # bipartite + "complete_bipartite_graph", +] +GENERATORS_M_N_VANILLA = [ + # classic + "complete_multipartite_graph", + "turan_graph", + # community + "caveman_graph", +] + + +@pytest.mark.parametrize("name", GENERATORS_NOARG) +@pytest.mark.parametrize("create_using", COMPLETE_CREATE_USING) +def test_generator_noarg(name, create_using): + print(name, create_using, type(create_using)) + if isinstance(create_using, nxcg.Graph) and name in { + # fmt: off + "bull_graph", "chvatal_graph", "cubical_graph", "diamond_graph", + "house_graph", "house_x_graph", "icosahedral_graph", "krackhardt_kite_graph", + "octahedral_graph", "petersen_graph", "truncated_cube_graph", "tutte_graph", + # fmt: on + }: + # The _raise_on_directed decorator used in networkx doesn't like our graphs. + if create_using.is_directed(): + with pytest.raises(AssertionError): + compare(name, create_using) + else: + with pytest.raises(TypeError): + compare(name, create_using) + else: + compare(name, create_using) + + +@pytest.mark.parametrize("name", GENERATORS_NOARG_VANILLA) +def test_generator_noarg_vanilla(name): + print(name) + compare(name, None, is_vanilla=True) + + +@pytest.mark.parametrize("name", GENERATORS_N) +@pytest.mark.parametrize("n", N) +@pytest.mark.parametrize("create_using", CREATE_USING) +def test_generator_n(name, n, create_using): + print(name, n, create_using) + compare(name, create_using, n) + + +@pytest.mark.parametrize("name", GENERATORS_N) +@pytest.mark.parametrize("n", [1, 4]) +@pytest.mark.parametrize("create_using", COMPLETE_CREATE_USING) +def test_generator_n_complete(name, n, create_using): + print(name, n, create_using) + compare(name, create_using, n) + + +@pytest.mark.parametrize("name", GENERATORS_M_N) +@pytest.mark.parametrize("create_using", CREATE_USING) +@pytest.mark.parametrize("m", N) +@pytest.mark.parametrize("n", N) +def test_generator_m_n(name, create_using, m, n): + print(name, m, n, create_using) + compare(name, create_using, m, n) + + +@pytest.mark.parametrize("name", GENERATORS_M_N_VANILLA) +@pytest.mark.parametrize("m", N) +@pytest.mark.parametrize("n", N) +def test_generator_m_n_vanilla(name, m, n): + print(name, m, n) + compare(name, None, m, n, is_vanilla=True) + + +@pytest.mark.parametrize("name", GENERATORS_M_N) +@pytest.mark.parametrize("create_using", COMPLETE_CREATE_USING) +@pytest.mark.parametrize("m", [4]) +@pytest.mark.parametrize("n", [4]) +def test_generator_m_n_complete(name, create_using, m, n): + print(name, m, n, create_using) + compare(name, create_using, m, n) + + +@pytest.mark.parametrize("name", GENERATORS_M_N_VANILLA) +@pytest.mark.parametrize("m", [4]) +@pytest.mark.parametrize("n", [4]) +def test_generator_m_n_complete_vanilla(name, m, n): + print(name, m, n) + compare(name, None, m, n, is_vanilla=True) + + +def test_bad_lollipop_graph(): + compare("lollipop_graph", None, [0, 1], [1, 2]) + + +def test_can_convert_karate_club(): + # Karate club graph has string node values. + # This really tests conversions, but it's here so we can use `assert_graphs_equal`. + G = nx.karate_club_graph() + G.add_node(0, foo="bar") # string dtype with a mask + G.add_node(1, object=object()) # haha + Gcg = nxcg.from_networkx(G, preserve_all_attrs=True) + assert_graphs_equal(G, Gcg) + Gnx = nxcg.to_networkx(Gcg) + assert nx.utils.graphs_equal(G, Gnx) + assert isinstance(Gcg.node_values["club"], np.ndarray) + assert Gcg.node_values["club"].dtype.kind == "U" + assert isinstance(Gcg.node_values["foo"], np.ndarray) + assert isinstance(Gcg.node_masks["foo"], np.ndarray) + assert Gcg.node_values["foo"].dtype.kind == "U" + assert isinstance(Gcg.node_values["object"], np.ndarray) + assert Gcg.node_values["object"].dtype.kind == "O" + assert isinstance(Gcg.node_masks["object"], np.ndarray) diff --git a/python/nx-cugraph/nx_cugraph/tests/test_utils.py b/python/nx-cugraph/nx_cugraph/tests/test_utils.py new file mode 100644 index 00000000000..fdd0c91995c --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_utils.py @@ -0,0 +1,87 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import numpy as np +import pytest + +from nx_cugraph.utils import _get_int_dtype + + +def test_get_int_dtype(): + uint8 = np.dtype(np.uint8) + uint16 = np.dtype(np.uint16) + uint32 = np.dtype(np.uint32) + uint64 = np.dtype(np.uint64) + # signed + cur = np.iinfo(np.int8) + for val in [cur.min, cur.min + 1, -1, 0, 1, cur.max - 1, cur.max]: + assert _get_int_dtype(val) == np.int8 + assert _get_int_dtype(val, signed=True) == np.int8 + if val >= 0: + assert _get_int_dtype(val, unsigned=True) == np.uint8 + assert _get_int_dtype(val + 1, unsigned=True) == np.uint8 + prev = cur + cur = np.iinfo(np.int16) + for val in [cur.min, cur.min + 1, prev.min - 1, prev.max + 1, cur.max - 1, cur.max]: + assert _get_int_dtype(val) != prev.dtype + assert _get_int_dtype(val, signed=True) == np.int16 + if val >= 0: + assert _get_int_dtype(val, unsigned=True) in {uint8, uint16} + assert _get_int_dtype(val + 1, unsigned=True) in {uint8, uint16} + prev = cur + cur = np.iinfo(np.int32) + for val in [cur.min, cur.min + 1, prev.min - 1, prev.max + 1, cur.max - 1, cur.max]: + assert _get_int_dtype(val) != prev.dtype + assert _get_int_dtype(val, signed=True) == np.int32 + if val >= 0: + assert _get_int_dtype(val, unsigned=True) in {uint16, uint32} + assert _get_int_dtype(val + 1, unsigned=True) in {uint16, uint32} + prev = cur + cur = np.iinfo(np.int64) + for val in [cur.min, cur.min + 1, prev.min - 1, prev.max + 1, cur.max - 1, cur.max]: + assert _get_int_dtype(val) != prev.dtype + assert _get_int_dtype(val, signed=True) == np.int64 + if val >= 0: + assert _get_int_dtype(val, unsigned=True) in {uint32, uint64} + assert _get_int_dtype(val + 1, unsigned=True) in {uint32, uint64} + with pytest.raises(ValueError, match="Value is too"): + _get_int_dtype(cur.min - 1, signed=True) + with pytest.raises(ValueError, match="Value is too"): + _get_int_dtype(cur.max + 1, signed=True) + + # unsigned + cur = np.iinfo(np.uint8) + for val in [0, 1, cur.max - 1, cur.max]: + assert _get_int_dtype(val) == (np.uint8 if val > 1 else np.int8) + assert _get_int_dtype(val, unsigned=True) == np.uint8 + assert _get_int_dtype(cur.max + 1) == np.int16 + cur = np.iinfo(np.uint16) + for val in [cur.max - 1, cur.max]: + assert _get_int_dtype(val, unsigned=True) == np.uint16 + assert _get_int_dtype(cur.max + 1) == np.int32 + cur = np.iinfo(np.uint32) + for val in [cur.max - 1, cur.max]: + assert _get_int_dtype(val, unsigned=True) == np.uint32 + assert _get_int_dtype(cur.max + 1) == np.int64 + cur = np.iinfo(np.uint64) + for val in [cur.max - 1, cur.max]: + assert _get_int_dtype(val, unsigned=True) == np.uint64 + with pytest.raises(ValueError, match="Value is incompatible"): + _get_int_dtype(cur.min - 1, unsigned=True) + with pytest.raises(ValueError, match="Value is too"): + _get_int_dtype(cur.max + 1, unsigned=True) + + # API + with pytest.raises(TypeError, match="incompatible"): + _get_int_dtype(7, signed=True, unsigned=True) + assert _get_int_dtype(7, signed=True, unsigned=False) == np.int8 + assert _get_int_dtype(7, signed=False, unsigned=True) == np.uint8 diff --git a/python/nx-cugraph/nx_cugraph/typing.py b/python/nx-cugraph/nx_cugraph/typing.py index d3045ab4656..b419a9085e0 100644 --- a/python/nx-cugraph/nx_cugraph/typing.py +++ b/python/nx-cugraph/nx_cugraph/typing.py @@ -15,6 +15,9 @@ from collections.abc import Hashable from typing import TypeVar +import cupy as cp +import numpy as np + AttrKey = TypeVar("AttrKey", bound=Hashable) EdgeKey = TypeVar("EdgeKey", bound=Hashable) NodeKey = TypeVar("NodeKey", bound=Hashable) @@ -23,3 +26,8 @@ NodeValue = TypeVar("NodeValue") IndexValue = TypeVar("IndexValue") Dtype = TypeVar("Dtype") + + +class any_ndarray: + def __class_getitem__(cls, item): + return cp.ndarray[item] | np.ndarray[item] diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py index 72e4094b8b7..9683df5e7f9 100644 --- a/python/nx-cugraph/nx_cugraph/utils/misc.py +++ b/python/nx-cugraph/nx_cugraph/utils/misc.py @@ -12,13 +12,19 @@ # limitations under the License. from __future__ import annotations +import itertools import operator as op import sys from random import Random +from typing import SupportsIndex import cupy as cp +import numpy as np -__all__ = ["_groupby", "_seed_to_int"] +__all__ = ["index_dtype", "_groupby", "_seed_to_int", "_get_int_dtype"] + +# This may switch to np.uint32 at some point +index_dtype = np.int32 def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]: @@ -58,3 +64,67 @@ def _seed_to_int(seed: int | Random | None) -> int: if isinstance(seed, Random): return seed.randint(0, sys.maxsize) return op.index(seed) # Ensure seed is integral + + +def _get_int_dtype( + val: SupportsIndex, *, signed: bool | None = None, unsigned: bool | None = None +): + """Determine the smallest integer dtype that can store the integer ``val``. + + If signed or unsigned are unspecified, then signed integers are preferred + unless the value can be represented by a smaller unsigned integer. + + Raises + ------ + ValueError : If the value cannot be represented with an int dtype. + """ + # This is similar in spirit to `np.min_scalar_type` + if signed is not None: + if unsigned is not None and (not signed) is (not unsigned): + raise TypeError( + f"signed (={signed}) and unsigned (={unsigned}) keyword arguments " + "are incompatible." + ) + signed = bool(signed) + unsigned = not signed + elif unsigned is not None: + unsigned = bool(unsigned) + signed = not unsigned + + val = op.index(val) # Ensure val is integral + if val < 0: + if unsigned: + raise ValueError(f"Value is incompatible with unsigned int: {val}.") + signed = True + unsigned = False + + if signed is not False: + # Number of bytes (and a power of two) + signed_nbytes = (val + (val < 0)).bit_length() // 8 + 1 + signed_nbytes = next( + filter( + signed_nbytes.__le__, + itertools.accumulate(itertools.repeat(2), op.mul, initial=1), + ) + ) + if unsigned is not False: + # Number of bytes (and a power of two) + unsigned_nbytes = (val.bit_length() + 7) // 8 + unsigned_nbytes = next( + filter( + unsigned_nbytes.__le__, + itertools.accumulate(itertools.repeat(2), op.mul, initial=1), + ) + ) + if signed is None and unsigned is None: + # Prefer signed int if same size + signed = signed_nbytes <= unsigned_nbytes + + if signed: + dtype_string = f"i{signed_nbytes}" + else: + dtype_string = f"u{unsigned_nbytes}" + try: + return np.dtype(dtype_string) + except TypeError as exc: + raise ValueError("Value is too large to store as integer: {val}") from exc diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml index 9fec8fa0242..3d3029da232 100644 --- a/python/nx-cugraph/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -160,7 +160,8 @@ ignore = [ # "SIM300", # Yoda conditions are discouraged, use ... instead (Note: we're not this picky) # "SIM401", # Use dict.get ... instead of if-else-block (Note: if-else better for coverage and sometimes clearer) # "TRY004", # Prefer `TypeError` exception for invalid type (Note: good advice, but not worth the nuisance) - # "TRY200", # Use `raise from` to specify exception cause (Note: sometimes okay to raise original exception) + "B904", # Bare `raise` inside exception clause (like TRY200; sometimes okay) + "TRY200", # Use `raise from` to specify exception cause (Note: sometimes okay to raise original exception) # Intentionally ignored "A003", # Class attribute ... is shadowing a python builtin From 24845ca319fb3922f514ef98844f1e1acb5e0123 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Tue, 31 Oct 2023 15:38:05 -0400 Subject: [PATCH 4/7] Skip certain `cugraph-pyg` tests when `torch_sparse` is not available (#3962) The CSC (CSR) code path in cugraph-pyg requires `torch_sparse` package. However, `torch_sparse` does not seem to work out of box for rockylinux8. This PR fixes such [CI failures](https://github.com/rapidsai/cugraph/actions/runs/6691094105/job/18177667677). Closes https://github.com/rapidsai/graph_dl/issues/343 Authors: - Tingyu Wang (https://github.com/tingyu66) Approvers: - Naim (https://github.com/naimnv) - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/3962 --- .../cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py index 03274948158..836b30c9df7 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/test_cugraph_loader.py @@ -29,6 +29,7 @@ torch = import_optional("torch") torch_geometric = import_optional("torch_geometric") +torch_sparse = import_optional("torch_sparse") trim_to_layer = import_optional("torch_geometric.utils.trim_to_layer") @@ -200,6 +201,9 @@ def test_cugraph_loader_from_disk_subset(): @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(torch_sparse, MissingModule), reason="torch-sparse not available" +) def test_cugraph_loader_from_disk_subset_csr(): m = [2, 9, 99, 82, 11, 13] n = torch.arange(1, 1 + len(m), dtype=torch.int32) @@ -332,6 +336,9 @@ def test_cugraph_loader_e2e_coo(): @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(torch_sparse, MissingModule), reason="torch-sparse not available" +) @pytest.mark.parametrize("framework", ["pyg", "cugraph-ops"]) def test_cugraph_loader_e2e_csc(framework): m = [2, 9, 99, 82, 9, 3, 18, 1, 12] From eb1e515553ef133f0ea484a663b1b3ff82b1d5c6 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 31 Oct 2023 17:50:52 -0500 Subject: [PATCH 5/7] nx-cugraph: add CC for undirected graphs to fix k-truss (#3965) Fixes #3963 and add `connected_components`, `is_connected`, `node_connected_component`, and `number_connected_components`. Also updated `_groupby` to handle groups that are not consecutive integers starting with 0. Also, `plc.weakly_connected_components` does not handle isolated nodes well, and I needed to handle this at the Python layer as was done in #3897 Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3965 --- python/nx-cugraph/_nx_cugraph/__init__.py | 8 ++ .../nx_cugraph/algorithms/__init__.py | 3 +- .../algorithms/community/louvain.py | 2 +- .../algorithms/components/__init__.py | 13 ++ .../algorithms/components/connected.py | 130 ++++++++++++++++++ .../nx-cugraph/nx_cugraph/algorithms/core.py | 10 ++ python/nx-cugraph/nx_cugraph/classes/graph.py | 5 + python/nx-cugraph/nx_cugraph/interface.py | 9 ++ .../nx_cugraph/tests/test_ktruss.py | 30 ++++ .../nx-cugraph/nx_cugraph/utils/decorators.py | 5 +- python/nx-cugraph/nx_cugraph/utils/misc.py | 44 ++++-- python/nx-cugraph/pyproject.toml | 1 + 12 files changed, 243 insertions(+), 17 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/components/connected.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_ktruss.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index af1df04644c..8ef976aabf1 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -38,6 +38,7 @@ "complete_bipartite_graph", "complete_graph", "complete_multipartite_graph", + "connected_components", "cubical_graph", "cycle_graph", "davis_southern_women_graph", @@ -56,6 +57,7 @@ "house_x_graph", "icosahedral_graph", "in_degree_centrality", + "is_connected", "is_isolate", "isolates", "k_truss", @@ -66,7 +68,9 @@ "lollipop_graph", "louvain_communities", "moebius_kantor_graph", + "node_connected_component", "null_graph", + "number_connected_components", "number_of_isolates", "number_of_selfloops", "octahedral_graph", @@ -91,6 +95,10 @@ "betweenness_centrality": "`weight` parameter is not yet supported.", "edge_betweenness_centrality": "`weight` parameter is not yet supported.", "from_pandas_edgelist": "cudf.DataFrame inputs also supported.", + "k_truss": ( + "Currently raises `NotImplementedError` for graphs with more than one connected\n" + "component when k >= 3. We expect to fix this soon." + ), "louvain_communities": "`seed` parameter is currently ignored.", # END: extra_docstrings }, diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 69feb8f6437..87b1967fa93 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -10,8 +10,9 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -from . import bipartite, centrality, community +from . import bipartite, centrality, community, components from .bipartite import complete_bipartite_graph from .centrality import * +from .components import * from .core import * from .isolate import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index 62261d109a2..45a3429d2ee 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -62,7 +62,7 @@ def louvain_communities( resolution=resolution, do_expensive_check=False, ) - groups = _groupby(clusters, vertices) + groups = _groupby(clusters, vertices, groups_are_canonical=True) rv = [set(G._nodearray_to_list(node_ids)) for node_ids in groups.values()] # TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix isolates = _isolates(G) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py new file mode 100644 index 00000000000..26816ef3692 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/components/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from .connected import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py new file mode 100644 index 00000000000..41f3457d542 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py @@ -0,0 +1,130 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import itertools + +import cupy as cp +import networkx as nx +import pylibcugraph as plc + +from nx_cugraph.convert import _to_undirected_graph +from nx_cugraph.utils import _groupby, networkx_algorithm, not_implemented_for + +from ..isolate import _isolates + +__all__ = [ + "number_connected_components", + "connected_components", + "is_connected", + "node_connected_component", +] + + +@not_implemented_for("directed") +@networkx_algorithm +def number_connected_components(G): + return sum(1 for _ in connected_components(G)) + # PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL + # G = _to_undirected_graph(G) + # unused_node_ids, labels = plc.weakly_connected_components( + # resource_handle=plc.ResourceHandle(), + # graph=G._get_plc_graph(), + # offsets=None, + # indices=None, + # weights=None, + # labels=None, + # do_expensive_check=False, + # ) + # return cp.unique(labels).size + + +@number_connected_components._can_run +def _(G): + # NetworkX <= 3.2.1 does not check directedness for us + try: + return not G.is_directed() + except Exception: + return False + + +@not_implemented_for("directed") +@networkx_algorithm +def connected_components(G): + G = _to_undirected_graph(G) + if G.src_indices.size == 0: + # TODO: PLC doesn't handle empty graphs (or isolated nodes) gracefully! + return [{key} for key in G._nodeiter_to_iter(range(len(G)))] + node_ids, labels = plc.weakly_connected_components( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + offsets=None, + indices=None, + weights=None, + labels=None, + do_expensive_check=False, + ) + groups = _groupby(labels, node_ids) + it = (G._nodearray_to_set(connected_ids) for connected_ids in groups.values()) + # TODO: PLC doesn't handle isolated vertices yet, so this is a temporary fix + isolates = _isolates(G) + if isolates.size > 0: + isolates = isolates[isolates > node_ids.max()] + if isolates.size > 0: + it = itertools.chain( + it, ({node} for node in G._nodearray_to_list(isolates)) + ) + return it + + +@not_implemented_for("directed") +@networkx_algorithm +def is_connected(G): + G = _to_undirected_graph(G) + if len(G) == 0: + raise nx.NetworkXPointlessConcept( + "Connectivity is undefined for the null graph." + ) + for community in connected_components(G): + return len(community) == len(G) + raise RuntimeError # pragma: no cover + # PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL + # unused_node_ids, labels = plc.weakly_connected_components( + # resource_handle=plc.ResourceHandle(), + # graph=G._get_plc_graph(), + # offsets=None, + # indices=None, + # weights=None, + # labels=None, + # do_expensive_check=False, + # ) + # return labels.size == len(G) and cp.unique(labels).size == 1 + + +@not_implemented_for("directed") +@networkx_algorithm +def node_connected_component(G, n): + # We could also do plain BFS from n + G = _to_undirected_graph(G) + node_id = n if G.key_to_id is None else G.key_to_id[n] + node_ids, labels = plc.weakly_connected_components( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(), + offsets=None, + indices=None, + weights=None, + labels=None, + do_expensive_check=False, + ) + indices = cp.nonzero(node_ids == node_id)[0] + if indices.size == 0: + return {n} + return G._nodearray_to_set(node_ids[labels == labels[indices[0]]]) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index 33e79793553..2219388bc58 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -24,6 +24,10 @@ @not_implemented_for("multigraph") @networkx_algorithm def k_truss(G, k): + """ + Currently raises `NotImplementedError` for graphs with more than one connected + component when k >= 3. We expect to fix this soon. + """ if is_nx := isinstance(G, nx.Graph): G = nxcg.from_networkx(G, preserve_all_attrs=True) if nxcg.number_of_selfloops(G) > 0: @@ -31,6 +35,7 @@ def k_truss(G, k): "Input graph has self loops which is not permitted; " "Consider using G.remove_edges_from(nx.selfloop_edges(G))." ) + # TODO: create renumbering helper function(s) if k < 3: # k-truss graph is comprised of nodes incident on k-2 triangles, so k<3 is a @@ -49,6 +54,11 @@ def k_truss(G, k): # Renumber step 1: edge values (no changes needed) edge_values = {key: val.copy() for key, val in G.edge_values.items()} edge_masks = {key: val.copy() for key, val in G.edge_masks.items()} + elif (ncc := nxcg.number_connected_components(G)) > 1: + raise NotImplementedError( + "nx_cugraph.k_truss does not yet work on graphs with more than one " + f"connected component (this graph has {ncc}). We expect to fix this soon." + ) else: edge_dtype = _get_int_dtype(G.src_indices.size - 1) edge_indices = cp.arange(G.src_indices.size, dtype=edge_dtype) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index 2048c4c3d72..23004651fc5 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -692,6 +692,11 @@ def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]: return node_ids.tolist() return list(self._nodeiter_to_iter(node_ids.tolist())) + def _nodearray_to_set(self, node_ids: cp.ndarray[IndexValue]) -> set[NodeKey]: + if self.key_to_id is None: + return set(node_ids.tolist()) + return set(self._nodeiter_to_iter(node_ids.tolist())) + def _nodearray_to_dict( self, values: cp.ndarray[NodeValue] ) -> dict[NodeKey, NodeValue]: diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index fd0b1483d73..875f8621021 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -223,11 +223,20 @@ def key(testpath): } ) + too_slow = "Too slow to run" + skip = { + key("test_tree_isomorphism.py:test_positive"): too_slow, + key("test_tree_isomorphism.py:test_negative"): too_slow, + } + for item in items: kset = set(item.keywords) for (test_name, keywords), reason in xfail.items(): if item.name == test_name and keywords.issubset(kset): item.add_marker(pytest.mark.xfail(reason=reason)) + for (test_name, keywords), reason in skip.items(): + if item.name == test_name and keywords.issubset(kset): + item.add_marker(pytest.mark.skip(reason=reason)) @classmethod def can_run(cls, name, args, kwargs): diff --git a/python/nx-cugraph/nx_cugraph/tests/test_ktruss.py b/python/nx-cugraph/nx_cugraph/tests/test_ktruss.py new file mode 100644 index 00000000000..a3e4cee3124 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_ktruss.py @@ -0,0 +1,30 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest + +import nx_cugraph as nxcg + + +@pytest.mark.parametrize( + "get_graph", [nx.florentine_families_graph, nx.les_miserables_graph] +) +def test_k_truss(get_graph): + Gnx = get_graph() + Gcg = nxcg.from_networkx(Gnx, preserve_all_attrs=True) + for k in range(10): + Hnx = nx.k_truss(Gnx, k) + Hcg = nxcg.k_truss(Gcg, k) + assert nx.utils.graphs_equal(Hnx, nxcg.to_networkx(Hcg)) + if Hnx.number_of_edges() == 0: + break diff --git a/python/nx-cugraph/nx_cugraph/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py index 0f15d236ecd..0048aee51bb 100644 --- a/python/nx-cugraph/nx_cugraph/utils/decorators.py +++ b/python/nx-cugraph/nx_cugraph/utils/decorators.py @@ -13,6 +13,7 @@ from __future__ import annotations from functools import partial, update_wrapper +from textwrap import dedent from networkx.utils.decorators import nodes_or_number, not_implemented_for @@ -65,7 +66,9 @@ def __new__( ) instance.extra_params = extra_params # The docstring on our function is added to the NetworkX docstring. - instance.extra_doc = func.__doc__ + instance.extra_doc = ( + dedent(func.__doc__.lstrip("\n").rstrip()) if func.__doc__ else None + ) # Copy __doc__ from NetworkX if instance.name in _registered_algorithms: instance.__doc__ = _registered_algorithms[instance.name].__doc__ diff --git a/python/nx-cugraph/nx_cugraph/utils/misc.py b/python/nx-cugraph/nx_cugraph/utils/misc.py index 9683df5e7f9..26f023bdcec 100644 --- a/python/nx-cugraph/nx_cugraph/utils/misc.py +++ b/python/nx-cugraph/nx_cugraph/utils/misc.py @@ -21,40 +21,56 @@ import cupy as cp import numpy as np +try: + from itertools import pairwise # Python >=3.10 +except ImportError: + + def pairwise(it): + it = iter(it) + for prev in it: + for cur in it: + yield (prev, cur) + prev = cur + + __all__ = ["index_dtype", "_groupby", "_seed_to_int", "_get_int_dtype"] # This may switch to np.uint32 at some point index_dtype = np.int32 -def _groupby(groups: cp.ndarray, values: cp.ndarray) -> dict[int, cp.ndarray]: +def _groupby( + groups: cp.ndarray, values: cp.ndarray, groups_are_canonical: bool = False +) -> dict[int, cp.ndarray]: """Perform a groupby operation given an array of group IDs and array of values. Parameters ---------- groups : cp.ndarray Array that holds the group IDs. - Group IDs are assumed to be consecutive integers from 0. values : cp.ndarray Array of values to be grouped according to groups. Must be the same size as groups array. + groups_are_canonical : bool, default False + Whether the group IDs are consecutive integers beginning with 0. Returns ------- dict with group IDs as keys and cp.ndarray as values. """ - # It would actually be easy to support groups that aren't consecutive integers, - # but let's wait until we need it to implement it. - sorted_groups = cp.argsort(groups) - sorted_values = values[sorted_groups] - rv = {} - start = 0 - for i, end in enumerate( - [*(cp.nonzero(cp.diff(groups[sorted_groups]))[0] + 1).tolist(), groups.size] - ): - rv[i] = sorted_values[start:end] - start = end - return rv + if groups.size == 0: + return {} + sort_indices = cp.argsort(groups) + sorted_groups = groups[sort_indices] + sorted_values = values[sort_indices] + prepend = 1 if groups_are_canonical else sorted_groups[0] + 1 + left_bounds = cp.nonzero(cp.diff(sorted_groups, prepend=prepend))[0] + boundaries = pairwise(itertools.chain(left_bounds.tolist(), [groups.size])) + if groups_are_canonical: + it = enumerate(boundaries) + else: + it = zip(sorted_groups[left_bounds].tolist(), boundaries) + return {group: sorted_values[start:end] for group, (start, end) in it} def _seed_to_int(seed: int | Random | None) -> int: diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml index 3d3029da232..7e51efd4fe4 100644 --- a/python/nx-cugraph/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -218,6 +218,7 @@ ignore = [ # Allow assert, print, RNG, and no docstring "nx_cugraph/**/tests/*py" = ["S101", "S311", "T201", "D103", "D100"] "_nx_cugraph/__init__.py" = ["E501"] +"nx_cugraph/algorithms/**/*py" = ["D205", "D401"] # Allow flexible docstrings for algorithms [tool.ruff.flake8-annotations] mypy-init-return = true From 0a905630c990235783f77c461691a983f97afc9f Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 1 Nov 2023 11:21:45 -0700 Subject: [PATCH 6/7] Cut peak memory footprint in graph creation (#3966) This limits memory footprint (especially in single-GPU or multi-GPU with a small number of GPUs) to the size of edge list * 1.5 + alpha (alpha to store O(V) data, V: # vertices). Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Naim (https://github.com/naimnv) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/3966 --- cpp/src/c_api/capi_helper.cu | 2 +- .../create_graph_from_edgelist_impl.cuh | 425 ++++++++---------- cpp/src/structure/detail/structure_utils.cuh | 281 ++++++------ cpp/src/structure/induced_subgraph_impl.cuh | 2 + cpp/src/structure/renumber_edgelist_impl.cuh | 100 +++-- cpp/tests/community/mg_egonet_test.cu | 1 + .../structure/mg_induced_subgraph_test.cu | 1 + cpp/tests/utilities/test_utilities_impl.cuh | 61 ++- 8 files changed, 415 insertions(+), 458 deletions(-) diff --git a/cpp/src/c_api/capi_helper.cu b/cpp/src/c_api/capi_helper.cu index af0163b0512..0ee49f87265 100644 --- a/cpp/src/c_api/capi_helper.cu +++ b/cpp/src/c_api/capi_helper.cu @@ -44,7 +44,7 @@ shuffle_vertex_ids_and_offsets(raft::handle_t const& handle, thrust::make_zip_iterator(ids.end(), vertices.end())); auto return_offsets = cugraph::detail::compute_sparse_offsets( - ids.begin(), ids.end(), size_t{0}, size_t{offsets.size() - 1}, handle.get_stream()); + ids.begin(), ids.end(), size_t{0}, size_t{offsets.size() - 1}, true, handle.get_stream()); return std::make_tuple(std::move(vertices), std::move(return_offsets)); } diff --git a/cpp/src/structure/create_graph_from_edgelist_impl.cuh b/cpp/src/structure/create_graph_from_edgelist_impl.cuh index 0d4b12a3e38..8dd587e1661 100644 --- a/cpp/src/structure/create_graph_from_edgelist_impl.cuh +++ b/cpp/src/structure/create_graph_from_edgelist_impl.cuh @@ -510,7 +510,18 @@ create_graph_from_edgelist_impl( auto use_dcs = num_segments_per_vertex_partition > (detail::num_sparse_segments_per_vertex_partition + 2); - // 4. compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid + // 4. sort and compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid + + auto total_global_mem = handle.get_device_properties().totalGlobalMem; + size_t element_size = sizeof(vertex_t) * 2; + if (edgelist_weights) { element_size += sizeof(weight_t); } + if (edgelist_edge_ids) { element_size += sizeof(edge_id_t); } + if (edgelist_edge_types) { element_size += sizeof(edge_type_t); } + auto constexpr mem_frugal_ratio = + 0.25; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the + // total_global_mem, switch to the memory frugal approach + auto mem_frugal_threshold = + static_cast(static_cast(total_global_mem / element_size) * mem_frugal_ratio); std::vector> edge_partition_offsets; std::vector> edge_partition_indices; @@ -559,154 +570,139 @@ create_graph_from_edgelist_impl( if (edgelist_weights) { if (edgelist_edge_ids) { if (edgelist_edge_types) { - auto edge_value_first = - thrust::make_zip_iterator((*edge_partition_edgelist_weights)[i].begin(), - (*edge_partition_edgelist_edge_ids)[i].begin(), - (*edge_partition_edgelist_edge_types)[i].begin()); std::forward_as_tuple( offsets, indices, std::tie(weights, edge_ids, edge_types), dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), - edge_value_first, + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), + std::make_tuple(std::move((*edge_partition_edgelist_weights)[i]), + std::move((*edge_partition_edgelist_edge_ids)[i]), + std::move((*edge_partition_edgelist_edge_types)[i])), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } else { - auto edge_value_first = - thrust::make_zip_iterator((*edge_partition_edgelist_weights)[i].begin(), - (*edge_partition_edgelist_edge_ids)[i].begin()); std::forward_as_tuple(offsets, indices, std::tie(weights, edge_ids), dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), - edge_value_first, + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), + std::make_tuple(std::move((*edge_partition_edgelist_weights)[i]), + std::move((*edge_partition_edgelist_edge_ids)[i])), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } } else { if (edgelist_edge_types) { - auto edge_value_first = - thrust::make_zip_iterator((*edge_partition_edgelist_weights)[i].begin(), - (*edge_partition_edgelist_edge_types)[i].begin()); std::forward_as_tuple(offsets, indices, std::tie(weights, edge_types), dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), - edge_value_first, + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), + std::make_tuple(std::move((*edge_partition_edgelist_weights)[i]), + std::move((*edge_partition_edgelist_edge_types)[i])), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } else { - auto edge_value_first = (*edge_partition_edgelist_weights)[i].begin(); std::forward_as_tuple(offsets, indices, weights, dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), - edge_value_first, + detail::sort_and_compress_edgelist( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), + std::move((*edge_partition_edgelist_weights)[i]), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } } } else { if (edgelist_edge_ids) { if (edgelist_edge_types) { - auto edge_value_first = - thrust::make_zip_iterator((*edge_partition_edgelist_edge_ids)[i].begin(), - (*edge_partition_edgelist_edge_types)[i].begin()); std::forward_as_tuple( offsets, indices, std::tie(edge_ids, edge_types), dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), - edge_value_first, + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), + std::make_tuple(std::move((*edge_partition_edgelist_edge_ids)[i]), + std::move((*edge_partition_edgelist_edge_types)[i])), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } else { - auto edge_value_first = (*edge_partition_edgelist_edge_ids)[i].begin(); std::forward_as_tuple(offsets, indices, edge_ids, dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), - edge_value_first, + detail::sort_and_compress_edgelist( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), + std::move((*edge_partition_edgelist_edge_ids)[i]), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } } else { if (edgelist_edge_types) { - auto edge_value_first = (*edge_partition_edgelist_edge_types)[i].begin(); std::forward_as_tuple(offsets, indices, edge_types, dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), - edge_value_first, + detail::sort_and_compress_edgelist( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), + std::move((*edge_partition_edgelist_edge_types)[i]), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } else { std::forward_as_tuple(offsets, indices, dcs_nzd_vertices) = - detail::compress_edgelist( - edge_partition_edgelist_srcs[i].begin(), - edge_partition_edgelist_srcs[i].end(), - edge_partition_edgelist_dsts[i].begin(), + detail::sort_and_compress_edgelist( + std::move(edge_partition_edgelist_srcs[i]), + std::move(edge_partition_edgelist_dsts[i]), major_range_first, major_hypersparse_first, major_range_last, minor_range_first, minor_range_last, + mem_frugal_threshold, handle.get_stream()); } } } - edge_partition_edgelist_srcs[i].resize(0, handle.get_stream()); - edge_partition_edgelist_srcs[i].shrink_to_fit(handle.get_stream()); - edge_partition_edgelist_dsts[i].resize(0, handle.get_stream()); - edge_partition_edgelist_dsts[i].shrink_to_fit(handle.get_stream()); - if (edge_partition_edgelist_weights) { - (*edge_partition_edgelist_weights)[i].resize(0, handle.get_stream()); - (*edge_partition_edgelist_weights)[i].shrink_to_fit(handle.get_stream()); - } - if (edge_partition_edgelist_edge_ids) { - (*edge_partition_edgelist_edge_ids)[i].resize(0, handle.get_stream()); - (*edge_partition_edgelist_edge_ids)[i].shrink_to_fit(handle.get_stream()); - } - if (edge_partition_edgelist_edge_types) { - (*edge_partition_edgelist_edge_types)[i].resize(0, handle.get_stream()); - (*edge_partition_edgelist_edge_types)[i].shrink_to_fit(handle.get_stream()); - } edge_partition_offsets.push_back(std::move(offsets)); edge_partition_indices.push_back(std::move(indices)); if (edge_partition_weights) { (*edge_partition_weights).push_back(std::move(*weights)); } @@ -954,6 +950,17 @@ create_graph_from_edgelist_impl( // convert edge list (COO) to compressed sparse format (CSR or CSC) + auto total_global_mem = handle.get_device_properties().totalGlobalMem; + size_t element_size = sizeof(vertex_t) * 2; + if (edgelist_weights) { element_size += sizeof(weight_t); } + if (edgelist_edge_ids) { element_size += sizeof(edge_id_t); } + if (edgelist_edge_types) { element_size += sizeof(edge_type_t); } + auto constexpr mem_frugal_ratio = + 0.25; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the + // total_global_mem, switch to the memory frugal approach + auto mem_frugal_threshold = + static_cast(static_cast(total_global_mem / element_size) * mem_frugal_ratio); + rmm::device_uvector offsets(size_t{0}, handle.get_stream()); rmm::device_uvector indices(size_t{0}, handle.get_stream()); std::optional> weights{std::nullopt}; @@ -963,202 +970,130 @@ create_graph_from_edgelist_impl( if (edgelist_weights) { if (edgelist_edge_ids) { if (edgelist_edge_types) { - auto edge_value_first = thrust::make_zip_iterator((*edgelist_weights).begin(), - (*edgelist_edge_ids).begin(), - (*edgelist_edge_types).begin()); std::forward_as_tuple(offsets, indices, std::tie(weights, ids, types), std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - edge_value_first, - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_weights), + std::move(*edgelist_edge_ids), + std::move(*edgelist_edge_types)), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } else { - auto edge_value_first = - thrust::make_zip_iterator((*edgelist_weights).begin(), (*edgelist_edge_ids).begin()); std::forward_as_tuple(offsets, indices, std::tie(weights, ids), std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - edge_value_first, - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } } else { if (edgelist_edge_types) { - auto edge_value_first = - thrust::make_zip_iterator((*edgelist_weights).begin(), (*edgelist_edge_types).begin()); std::forward_as_tuple(offsets, indices, std::tie(weights, types), std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - edge_value_first, - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } else { - auto edge_value_first = (*edgelist_weights).begin(); std::forward_as_tuple(offsets, indices, weights, std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - edge_value_first, - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); + detail::sort_and_compress_edgelist( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(*edgelist_weights), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } } } else { if (edgelist_edge_ids) { if (edgelist_edge_types) { - auto edge_value_first = - thrust::make_zip_iterator((*edgelist_edge_ids).begin(), (*edgelist_edge_types).begin()); std::forward_as_tuple(offsets, indices, std::tie(ids, types), std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - edge_value_first, - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); + detail::sort_and_compress_edgelist, + store_transposed>( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } else { - auto edge_value_first = (*edgelist_edge_ids).begin(); std::forward_as_tuple(offsets, indices, ids, std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - edge_value_first, - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); + detail::sort_and_compress_edgelist( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(*edgelist_edge_ids), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } } else { if (edgelist_edge_types) { - auto edge_value_first = (*edgelist_edge_types).begin(); std::forward_as_tuple(offsets, indices, types, std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - edge_value_first, - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); + detail::sort_and_compress_edgelist( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + std::move(*edgelist_edge_types), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } else { std::forward_as_tuple(offsets, indices, std::ignore) = - detail::compress_edgelist(edgelist_srcs.begin(), - edgelist_srcs.end(), - edgelist_dsts.begin(), - vertex_t{0}, - std::optional{std::nullopt}, - num_vertices, - vertex_t{0}, - num_vertices, - handle.get_stream()); - } - } - } - - edgelist_srcs.resize(0, handle.get_stream()); - edgelist_srcs.shrink_to_fit(handle.get_stream()); - edgelist_dsts.resize(0, handle.get_stream()); - edgelist_dsts.shrink_to_fit(handle.get_stream()); - if (edgelist_weights) { - (*edgelist_weights).resize(0, handle.get_stream()); - (*edgelist_weights).shrink_to_fit(handle.get_stream()); - } - if (edgelist_edge_ids) { - (*edgelist_edge_ids).resize(0, handle.get_stream()); - (*edgelist_edge_ids).shrink_to_fit(handle.get_stream()); - } - if (edgelist_edge_types) { - (*edgelist_edge_types).resize(0, handle.get_stream()); - (*edgelist_edge_types).shrink_to_fit(handle.get_stream()); - } - - // segmented sort neighbors - - if (weights) { - if (ids) { - if (types) { - detail::sort_adjacency_list( - handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end(), - thrust::make_zip_iterator((*weights).begin(), (*ids).begin(), (*types).begin())); - } else { - detail::sort_adjacency_list(handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end(), - thrust::make_zip_iterator((*weights).begin(), (*ids).begin())); - } - } else { - if (types) { - detail::sort_adjacency_list( - handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end(), - thrust::make_zip_iterator((*weights).begin(), (*types).begin())); - } else { - detail::sort_adjacency_list(handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end(), - (*weights).begin()); - } - } - } else { - if (ids) { - if (types) { - detail::sort_adjacency_list(handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end(), - thrust::make_zip_iterator((*ids).begin(), (*types).begin())); - } else { - detail::sort_adjacency_list(handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end(), - (*ids).begin()); - } - } else { - if (types) { - detail::sort_adjacency_list(handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end(), - (*types).begin()); - } else { - detail::sort_adjacency_list(handle, - raft::device_span(offsets.data(), offsets.size()), - indices.begin(), - indices.end()); + detail::sort_and_compress_edgelist( + std::move(edgelist_srcs), + std::move(edgelist_dsts), + vertex_t{0}, + std::optional{std::nullopt}, + num_vertices, + vertex_t{0}, + num_vertices, + mem_frugal_threshold, + handle.get_stream()); } } } diff --git a/cpp/src/structure/detail/structure_utils.cuh b/cpp/src/structure/detail/structure_utils.cuh index f57b549e1ef..01fbccaa53e 100644 --- a/cpp/src/structure/detail/structure_utils.cuh +++ b/cpp/src/structure/detail/structure_utils.cuh @@ -47,57 +47,38 @@ namespace cugraph { namespace detail { -template -struct update_edge_t { - raft::device_span offsets{}; - raft::device_span indices{}; - EdgeValueIterator edge_value_first{}; - vertex_t major_range_first{}; - - __device__ void operator()(typename thrust::iterator_traits::value_type e) const - { - auto s = thrust::get<0>(e); - auto d = thrust::get<1>(e); - auto major = store_transposed ? d : s; - auto minor = store_transposed ? s : d; - auto start = offsets[major - major_range_first]; - auto degree = offsets[(major - major_range_first) + 1] - start; - auto idx = - atomicAdd(&indices[start + degree - 1], vertex_t{1}); // use the last element as a counter - // FIXME: we can actually store minor - minor_range_first instead of minor to save memory if - // minor can be larger than 32 bit but minor - minor_range_first fits within 32 bit - indices[start + idx] = minor; // overwrite the counter only if idx == degree - 1 (no race) - if constexpr (!std::is_same_v) { - auto value = thrust::get<2>(e); - *(edge_value_first + (start + idx)) = value; - } - } -}; - template rmm::device_uvector compute_sparse_offsets( VertexIterator edgelist_major_first, VertexIterator edgelist_major_last, typename thrust::iterator_traits::value_type major_range_first, typename thrust::iterator_traits::value_type major_range_last, + bool edgelist_major_sorted, rmm::cuda_stream_view stream_view) { rmm::device_uvector offsets((major_range_last - major_range_first) + 1, stream_view); - thrust::fill(rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), edge_t{0}); - - auto offset_view = raft::device_span(offsets.data(), offsets.size()); - thrust::for_each(rmm::exec_policy(stream_view), - edgelist_major_first, - edgelist_major_last, - [offset_view, major_range_first] __device__(auto v) { - atomicAdd(&offset_view[v - major_range_first], edge_t{1}); - }); - thrust::exclusive_scan( - rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), offsets.begin()); + if (edgelist_major_sorted) { + offsets.set_element_to_zero_async(0, stream_view); + thrust::upper_bound(rmm::exec_policy(stream_view), + edgelist_major_first, + edgelist_major_last, + thrust::make_counting_iterator(major_range_first), + thrust::make_counting_iterator(major_range_last), + offsets.begin() + 1); + } else { + thrust::fill(rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), edge_t{0}); + + auto offset_view = raft::device_span(offsets.data(), offsets.size()); + thrust::for_each(rmm::exec_policy(stream_view), + edgelist_major_first, + edgelist_major_last, + [offset_view, major_range_first] __device__(auto v) { + atomicAdd(&offset_view[v - major_range_first], edge_t{1}); + }); + + thrust::exclusive_scan( + rmm::exec_policy(stream_view), offsets.begin(), offsets.end(), offsets.begin()); + } return offsets; } @@ -156,61 +137,77 @@ std::tuple, rmm::device_uvector> compress_ } // compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid -template -std::tuple< - rmm::device_uvector, - rmm::device_uvector::value_type>, - decltype(allocate_dataframe_buffer::value_type>(size_t{0}, rmm::cuda_stream_view{})), - std::optional::value_type>>> -compress_edgelist( - VertexIterator edgelist_src_first, - VertexIterator edgelist_src_last, - VertexIterator edgelist_dst_first, - EdgeValueIterator edge_value_first, - typename thrust::iterator_traits::value_type major_range_first, - std::optional::value_type> - major_hypersparse_first, - typename thrust::iterator_traits::value_type major_range_last, - typename thrust::iterator_traits::value_type /* minor_range_first */, - typename thrust::iterator_traits::value_type /* minor_range_last */, +template +std::tuple, + rmm::device_uvector, + decltype(allocate_dataframe_buffer(size_t{0}, rmm::cuda_stream_view{})), + std::optional>> +sort_and_compress_edgelist( + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& edgelist_values, + vertex_t major_range_first, + std::optional major_hypersparse_first, + vertex_t major_range_last, + vertex_t /* minor_range_first */, + vertex_t /* minor_range_last */, + size_t mem_frugal_threshold, rmm::cuda_stream_view stream_view) { - using vertex_t = std::remove_cv_t::value_type>; - using edge_value_t = - std::remove_cv_t::value_type>; - - auto number_of_edges = - static_cast(thrust::distance(edgelist_src_first, edgelist_src_last)); - - auto offsets = compute_sparse_offsets( - store_transposed ? edgelist_dst_first : edgelist_src_first, - store_transposed ? edgelist_dst_first + number_of_edges : edgelist_src_last, - major_range_first, - major_range_last, - stream_view); - - rmm::device_uvector indices(number_of_edges, stream_view); - thrust::fill(rmm::exec_policy(stream_view), indices.begin(), indices.end(), vertex_t{0}); - auto values = allocate_dataframe_buffer(number_of_edges, stream_view); - - auto offset_view = raft::device_span(offsets.data(), offsets.size()); - auto index_view = raft::device_span(indices.data(), indices.size()); - auto edge_first = thrust::make_zip_iterator( - thrust::make_tuple(edgelist_src_first, edgelist_dst_first, edge_value_first)); - thrust::for_each( - rmm::exec_policy(stream_view), - edge_first, - edge_first + number_of_edges, - update_edge_t{ - offset_view, index_view, get_dataframe_buffer_begin(values), major_range_first}); + auto edgelist_majors = std::move(store_transposed ? edgelist_dsts : edgelist_srcs); + auto edgelist_minors = std::move(store_transposed ? edgelist_srcs : edgelist_dsts); + + rmm::device_uvector offsets(0, stream_view); + rmm::device_uvector indices(0, stream_view); + auto values = allocate_dataframe_buffer(0, stream_view); + auto pair_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin()); + if (edgelist_minors.size() > mem_frugal_threshold) { + offsets = compute_sparse_offsets(edgelist_majors.begin(), + edgelist_majors.end(), + major_range_first, + major_range_last, + false, + stream_view); + + auto pivot = major_range_first + static_cast(thrust::distance( + offsets.begin(), + thrust::lower_bound(rmm::exec_policy(stream_view), + offsets.begin(), + offsets.end(), + edgelist_minors.size() / 2))); + auto second_first = + detail::mem_frugal_partition(pair_first, + pair_first + edgelist_minors.size(), + get_dataframe_buffer_begin(edgelist_values), + thrust_tuple_get, 0>{}, + pivot, + stream_view); + thrust::sort_by_key(rmm::exec_policy(stream_view), + pair_first, + std::get<0>(second_first), + get_dataframe_buffer_begin(edgelist_values)); + thrust::sort_by_key(rmm::exec_policy(stream_view), + std::get<0>(second_first), + pair_first + edgelist_minors.size(), + std::get<1>(second_first)); + } else { + thrust::sort_by_key(rmm::exec_policy(stream_view), + pair_first, + pair_first + edgelist_minors.size(), + get_dataframe_buffer_begin(edgelist_values)); + + offsets = compute_sparse_offsets(edgelist_majors.begin(), + edgelist_majors.end(), + major_range_first, + major_range_last, + true, + stream_view); + } + indices = std::move(edgelist_minors); + values = std::move(edgelist_values); + + edgelist_majors.resize(0, stream_view); + edgelist_majors.shrink_to_fit(stream_view); std::optional> dcs_nzd_vertices{std::nullopt}; if (major_hypersparse_first) { @@ -226,47 +223,61 @@ compress_edgelist( } // compress edge list (COO) to CSR (or CSC) or CSR + DCSR (CSC + DCSC) hybrid -template -std::tuple< - rmm::device_uvector, - rmm::device_uvector::value_type>, - std::optional::value_type>>> -compress_edgelist( - VertexIterator edgelist_src_first, - VertexIterator edgelist_src_last, - VertexIterator edgelist_dst_first, - typename thrust::iterator_traits::value_type major_range_first, - std::optional::value_type> - major_hypersparse_first, - typename thrust::iterator_traits::value_type major_range_last, - typename thrust::iterator_traits::value_type /* minor_range_first */, - typename thrust::iterator_traits::value_type /* minor_range_last */, - rmm::cuda_stream_view stream_view) +template +std::tuple, + rmm::device_uvector, + std::optional>> +sort_and_compress_edgelist(rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + vertex_t major_range_first, + std::optional major_hypersparse_first, + vertex_t major_range_last, + vertex_t /* minor_range_first */, + vertex_t /* minor_range_last */, + size_t mem_frugal_threshold, + rmm::cuda_stream_view stream_view) { - using vertex_t = std::remove_cv_t::value_type>; - - auto number_of_edges = - static_cast(thrust::distance(edgelist_src_first, edgelist_src_last)); - - auto offsets = compute_sparse_offsets( - store_transposed ? edgelist_dst_first : edgelist_src_first, - store_transposed ? edgelist_dst_first + number_of_edges : edgelist_src_last, - major_range_first, - major_range_last, - stream_view); - - rmm::device_uvector indices(number_of_edges, stream_view); - thrust::fill(rmm::exec_policy(stream_view), indices.begin(), indices.end(), vertex_t{0}); - - auto offset_view = raft::device_span(offsets.data(), offsets.size()); - auto index_view = raft::device_span(indices.data(), indices.size()); - auto edge_first = - thrust::make_zip_iterator(thrust::make_tuple(edgelist_src_first, edgelist_dst_first)); - thrust::for_each(rmm::exec_policy(stream_view), - edge_first, - edge_first + number_of_edges, - update_edge_t{ - offset_view, index_view, static_cast(nullptr), major_range_first}); + auto edgelist_majors = std::move(store_transposed ? edgelist_dsts : edgelist_srcs); + auto edgelist_minors = std::move(store_transposed ? edgelist_srcs : edgelist_dsts); + + rmm::device_uvector offsets(0, stream_view); + rmm::device_uvector indices(0, stream_view); + auto edge_first = thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin()); + if (edgelist_minors.size() > mem_frugal_threshold) { + offsets = compute_sparse_offsets(edgelist_majors.begin(), + edgelist_majors.end(), + major_range_first, + major_range_last, + false, + stream_view); + + auto pivot = major_range_first + static_cast(thrust::distance( + offsets.begin(), + thrust::lower_bound(rmm::exec_policy(stream_view), + offsets.begin(), + offsets.end(), + edgelist_minors.size() / 2))); + auto second_first = + detail::mem_frugal_partition(edge_first, + edge_first + edgelist_minors.size(), + thrust_tuple_get, 0>{}, + pivot, + stream_view); + thrust::sort(rmm::exec_policy(stream_view), edge_first, second_first); + thrust::sort(rmm::exec_policy(stream_view), second_first, edge_first + edgelist_minors.size()); + } else { + thrust::sort(rmm::exec_policy(stream_view), edge_first, edge_first + edgelist_minors.size()); + offsets = compute_sparse_offsets(edgelist_majors.begin(), + edgelist_majors.end(), + major_range_first, + major_range_last, + true, + stream_view); + } + indices = std::move(edgelist_minors); + + edgelist_majors.resize(0, stream_view); + edgelist_majors.shrink_to_fit(stream_view); std::optional> dcs_nzd_vertices{std::nullopt}; if (major_hypersparse_first) { diff --git a/cpp/src/structure/induced_subgraph_impl.cuh b/cpp/src/structure/induced_subgraph_impl.cuh index 950cca5828d..18e1af32a71 100644 --- a/cpp/src/structure/induced_subgraph_impl.cuh +++ b/cpp/src/structure/induced_subgraph_impl.cuh @@ -196,6 +196,7 @@ extract_induced_subgraphs( graph_ids_v.end(), size_t{0}, size_t{subgraph_offsets.size() - 1}, + true, handle.get_stream()); dst_subgraph_offsets = @@ -290,6 +291,7 @@ extract_induced_subgraphs( subgraph_edge_graph_ids.end(), size_t{0}, size_t{subgraph_offsets.size() - 1}, + true, handle.get_stream()); #ifdef TIMING diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index 6bc19ff4fe1..09a4dae6c64 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -367,18 +367,19 @@ std::tuple, std::vector, vertex_t> compu rmm::device_uvector sorted_local_vertex_degrees(0, handle.get_stream()); std::optional> stream_pool_indices{ std::nullopt}; // FIXME: move this inside the if statement + + auto constexpr num_chunks = size_t{ + 2}; // tuning parameter, this trade-offs # binary searches (up to num_chunks times more binary + // searches can be necessary if num_unique_majors << edgelist_edge_counts[i]) and temporary + // buffer requirement (cut by num_chunks times), currently set to 2 to avoid peak memory + // usage happening in this part (especially when minor_comm_size is small) + if constexpr (multi_gpu) { auto& comm = handle.get_comms(); auto& minor_comm = handle.get_subcomm(cugraph::partition_manager::minor_comm_name()); auto const minor_comm_rank = minor_comm.get_rank(); auto const minor_comm_size = minor_comm.get_size(); - auto constexpr num_chunks = size_t{ - 2}; // tuning parameter, this trade-offs # binary searches (up to num_chunks times more - // binary searches can be necessary if num_unique_majors << edgelist_edge_counts[i]) and - // temporary buffer requirement (cut by num_chunks times), currently set to 2 to avoid - // peak memory usage happening in this part (especially when minor_comm_size is small) - assert(edgelist_majors.size() == minor_comm_size); auto edge_partition_major_range_sizes = @@ -433,29 +434,30 @@ std::tuple, std::vector, vertex_t> compu sorted_major_degrees.end(), edge_t{0}); - rmm::device_uvector tmp_majors( + rmm::device_uvector tmp_majors(0, loop_stream); + tmp_majors.reserve( (static_cast(edgelist_edge_counts[i]) + (num_chunks - 1)) / num_chunks, - handle.get_stream()); + loop_stream); size_t offset{0}; for (size_t j = 0; j < num_chunks; ++j) { size_t this_chunk_size = - std::min(tmp_majors.size(), static_cast(edgelist_edge_counts[i]) - offset); + std::min(tmp_majors.capacity(), static_cast(edgelist_edge_counts[i]) - offset); + tmp_majors.resize(this_chunk_size, loop_stream); thrust::copy(rmm::exec_policy(loop_stream), edgelist_majors[i] + offset, - edgelist_majors[i] + offset + this_chunk_size, + edgelist_majors[i] + offset + tmp_majors.size(), tmp_majors.begin()); - thrust::sort( - rmm::exec_policy(loop_stream), tmp_majors.begin(), tmp_majors.begin() + this_chunk_size); + thrust::sort(rmm::exec_policy(loop_stream), tmp_majors.begin(), tmp_majors.end()); auto num_unique_majors = thrust::count_if(rmm::exec_policy(loop_stream), thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(this_chunk_size), + thrust::make_counting_iterator(tmp_majors.size()), is_first_in_run_t{tmp_majors.data()}); rmm::device_uvector tmp_keys(num_unique_majors, loop_stream); rmm::device_uvector tmp_values(num_unique_majors, loop_stream); thrust::reduce_by_key(rmm::exec_policy(loop_stream), tmp_majors.begin(), - tmp_majors.begin() + this_chunk_size, + tmp_majors.end(), thrust::make_constant_iterator(edge_t{1}), tmp_keys.begin(), tmp_values.begin()); @@ -486,44 +488,50 @@ std::tuple, std::vector, vertex_t> compu } else { assert(edgelist_majors.size() == 1); - rmm::device_uvector tmp_majors(edgelist_edge_counts[0], handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - edgelist_majors[0], - edgelist_majors[0] + edgelist_edge_counts[0], - tmp_majors.begin()); - thrust::sort(handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end()); - auto num_unique_majors = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(tmp_majors.size()), - is_first_in_run_t{tmp_majors.data()}); - rmm::device_uvector tmp_keys(num_unique_majors, handle.get_stream()); - rmm::device_uvector tmp_values(num_unique_majors, handle.get_stream()); - thrust::reduce_by_key(handle.get_thrust_policy(), - tmp_majors.begin(), - tmp_majors.end(), - thrust::make_constant_iterator(edge_t{1}), - tmp_keys.begin(), - tmp_values.begin()); - - tmp_majors.resize(0, handle.get_stream()); - tmp_majors.shrink_to_fit(handle.get_stream()); - sorted_local_vertex_degrees.resize(sorted_local_vertices.size(), handle.get_stream()); thrust::fill(handle.get_thrust_policy(), sorted_local_vertex_degrees.begin(), sorted_local_vertex_degrees.end(), edge_t{0}); - auto kv_pair_first = - thrust::make_zip_iterator(thrust::make_tuple(tmp_keys.begin(), tmp_values.begin())); - thrust::for_each(handle.get_thrust_policy(), - kv_pair_first, - kv_pair_first + tmp_keys.size(), - search_and_increment_degree_t{ - sorted_local_vertices.data(), - static_cast(sorted_local_vertices.size()), - sorted_local_vertex_degrees.data()}); + rmm::device_uvector tmp_majors(0, handle.get_stream()); + tmp_majors.reserve(static_cast(edgelist_edge_counts[0] + (num_chunks - 1)) / num_chunks, + handle.get_stream()); + size_t offset{0}; + for (size_t i = 0; i < num_chunks; ++i) { + size_t this_chunk_size = + std::min(tmp_majors.capacity(), static_cast(edgelist_edge_counts[0]) - offset); + tmp_majors.resize(this_chunk_size, handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + edgelist_majors[0] + offset, + edgelist_majors[0] + offset + tmp_majors.size(), + tmp_majors.begin()); + thrust::sort(handle.get_thrust_policy(), tmp_majors.begin(), tmp_majors.end()); + auto num_unique_majors = + thrust::count_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(tmp_majors.size()), + is_first_in_run_t{tmp_majors.data()}); + rmm::device_uvector tmp_keys(num_unique_majors, handle.get_stream()); + rmm::device_uvector tmp_values(num_unique_majors, handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), + tmp_majors.begin(), + tmp_majors.end(), + thrust::make_constant_iterator(edge_t{1}), + tmp_keys.begin(), + tmp_values.begin()); + + auto kv_pair_first = + thrust::make_zip_iterator(thrust::make_tuple(tmp_keys.begin(), tmp_values.begin())); + thrust::for_each(handle.get_thrust_policy(), + kv_pair_first, + kv_pair_first + tmp_keys.size(), + search_and_increment_degree_t{ + sorted_local_vertices.data(), + static_cast(sorted_local_vertices.size()), + sorted_local_vertex_degrees.data()}); + offset += this_chunk_size; + } } // 4. sort local vertices by degree (descending) diff --git a/cpp/tests/community/mg_egonet_test.cu b/cpp/tests/community/mg_egonet_test.cu index 42a2bba1181..6660eac3cad 100644 --- a/cpp/tests/community/mg_egonet_test.cu +++ b/cpp/tests/community/mg_egonet_test.cu @@ -215,6 +215,7 @@ class Tests_MGEgonet graph_ids_v.end(), size_t{0}, d_mg_edgelist_offsets.size() - 1, + true, handle_->get_stream()); auto [d_reference_src, d_reference_dst, d_reference_wgt, d_reference_offsets] = diff --git a/cpp/tests/structure/mg_induced_subgraph_test.cu b/cpp/tests/structure/mg_induced_subgraph_test.cu index 3f3db7c5278..b7bd22dfa63 100644 --- a/cpp/tests/structure/mg_induced_subgraph_test.cu +++ b/cpp/tests/structure/mg_induced_subgraph_test.cu @@ -210,6 +210,7 @@ class Tests_MGInducedSubgraph graph_ids_v.end(), size_t{0}, size_t{d_subgraph_offsets.size() - 1}, + true, handle_->get_stream()); auto [sg_graph, sg_edge_weights, sg_number_map] = cugraph::test::mg_graph_to_sg_graph( diff --git a/cpp/tests/utilities/test_utilities_impl.cuh b/cpp/tests/utilities/test_utilities_impl.cuh index 3025ca7908b..856c50ad35f 100644 --- a/cpp/tests/utilities/test_utilities_impl.cuh +++ b/cpp/tests/utilities/test_utilities_impl.cuh @@ -183,43 +183,42 @@ graph_to_host_csr( } } + auto total_global_mem = handle.get_device_properties().totalGlobalMem; + size_t element_size = sizeof(vertex_t) * 2; + if (d_wgt) { element_size += sizeof(weight_t); } + auto constexpr mem_frugal_ratio = + 0.25; // if the expected temporary buffer size exceeds the mem_frugal_ratio of the + // total_global_mem, switch to the memory frugal approach + auto mem_frugal_threshold = + static_cast(static_cast(total_global_mem / element_size) * mem_frugal_ratio); + rmm::device_uvector d_offsets(0, handle.get_stream()); if (d_wgt) { std::tie(d_offsets, d_dst, *d_wgt, std::ignore) = - detail::compress_edgelist(d_src.begin(), - d_src.end(), - d_dst.begin(), - d_wgt->begin(), - vertex_t{0}, - std::optional{std::nullopt}, - graph_view.number_of_vertices(), - vertex_t{0}, - graph_view.number_of_vertices(), - handle.get_stream()); - - // segmented sort neighbors - detail::sort_adjacency_list(handle, - raft::device_span(d_offsets.data(), d_offsets.size()), - d_dst.begin(), - d_dst.end(), - d_wgt->begin()); + detail::sort_and_compress_edgelist( + std::move(d_src), + std::move(d_dst), + std::move(*d_wgt), + vertex_t{0}, + std::optional{std::nullopt}, + graph_view.number_of_vertices(), + vertex_t{0}, + graph_view.number_of_vertices(), + mem_frugal_threshold, + handle.get_stream()); } else { std::tie(d_offsets, d_dst, std::ignore) = - detail::compress_edgelist(d_src.begin(), - d_src.end(), - d_dst.begin(), - vertex_t{0}, - std::optional{std::nullopt}, - graph_view.number_of_vertices(), - vertex_t{0}, - graph_view.number_of_vertices(), - handle.get_stream()); - // segmented sort neighbors - detail::sort_adjacency_list(handle, - raft::device_span(d_offsets.data(), d_offsets.size()), - d_dst.begin(), - d_dst.end()); + detail::sort_and_compress_edgelist( + std::move(d_src), + std::move(d_dst), + vertex_t{0}, + std::optional{std::nullopt}, + graph_view.number_of_vertices(), + vertex_t{0}, + graph_view.number_of_vertices(), + mem_frugal_threshold, + handle.get_stream()); } return std::make_tuple( From 5c0bc8a19fc3f9904541de6fb9bde95495298eb4 Mon Sep 17 00:00:00 2001 From: Alex Barghi <105237337+alexbarghi-nv@users.noreply.github.com> Date: Wed, 1 Nov 2023 15:41:03 -0400 Subject: [PATCH 7/7] [BUG] Check if Dask has quit to avoid throwing an exception and triggering a segfault on ddp exit (#3961) Currently, when training with ddp, if dask exits before the `CuGraphStore` is cleaned up, an exception is thrown, which causes ddp to quit with an error, which then causes a segfault, making users think that the workflow has failed when it has actually succeeded. This bug gracefully displays a warning if the dask dataset can't be deleted, which resolves this issue. Authors: - Alex Barghi (https://github.com/alexbarghi-nv) Approvers: - Vibhu Jawa (https://github.com/VibhuJawa) - Tingyu Wang (https://github.com/tingyu66) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3961 --- python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py index fd2172e6ade..6192cd621d5 100644 --- a/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/data/cugraph_store.py @@ -320,7 +320,13 @@ def __init__( def __del__(self): if self.__is_graph_owner: if isinstance(self.__graph._plc_graph, dict): - distributed.get_client().unpublish_dataset("cugraph_graph") + try: + distributed.get_client().unpublish_dataset("cugraph_graph") + except TypeError: + warnings.warn( + "Could not unpublish graph dataset, most likely because" + " dask has already shut down." + ) del self.__graph def __make_offsets(self, input_dict):