From 50be7baf2547af391085573dbc8891b86a6340c5 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Mon, 15 Jul 2024 04:17:39 -0700 Subject: [PATCH] nx-cugraph: add `from_dict_of_lists` and `to_dict_of_lists` --- python/nx-cugraph/README.md | 3 + python/nx-cugraph/_nx_cugraph/__init__.py | 2 + python/nx-cugraph/nx_cugraph/convert.py | 102 +++++++++++++++++- .../nx_cugraph/tests/test_convert.py | 50 ++++++++- 4 files changed, 154 insertions(+), 3 deletions(-) diff --git a/python/nx-cugraph/README.md b/python/nx-cugraph/README.md index 27825585c28..088f2fd2072 100644 --- a/python/nx-cugraph/README.md +++ b/python/nx-cugraph/README.md @@ -253,6 +253,9 @@ Below is the list of algorithms that are currently supported in nx-cugraph. classes └─ function └─ is_negatively_weighted +convert + ├─ from_dict_of_lists + └─ to_dict_of_lists convert_matrix ├─ from_pandas_edgelist └─ from_scipy_sparse_array diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index f57b90eb402..3d27e4b9e9d 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -81,6 +81,7 @@ "eigenvector_centrality", "empty_graph", "florentine_families_graph", + "from_dict_of_lists", "from_pandas_edgelist", "from_scipy_sparse_array", "frucht_graph", @@ -138,6 +139,7 @@ "star_graph", "tadpole_graph", "tetrahedral_graph", + "to_dict_of_lists", "transitivity", "triangles", "trivial_graph", diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index b34245d5031..9e6c080d6ef 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -14,7 +14,7 @@ import itertools import operator as op -from collections import Counter +from collections import Counter, defaultdict from collections.abc import Mapping from typing import TYPE_CHECKING @@ -24,7 +24,8 @@ import nx_cugraph as nxcg -from .utils import index_dtype +from .utils import index_dtype, networkx_algorithm +from .utils.misc import pairwise if TYPE_CHECKING: # pragma: no cover from nx_cugraph.typing import AttrKey, Dtype, EdgeValue, NodeValue, any_ndarray @@ -32,6 +33,8 @@ __all__ = [ "from_networkx", "to_networkx", + "from_dict_of_lists", + "to_dict_of_lists", ] concat = itertools.chain.from_iterable @@ -653,3 +656,98 @@ def _to_undirected_graph( ) # TODO: handle cugraph.Graph raise TypeError + + +@networkx_algorithm(version_added="24.08") +def from_dict_of_lists(d, create_using=None): + from .generators._utils import _create_using_class + + graph_class, inplace = _create_using_class(create_using) + key_to_id = defaultdict(itertools.count().__next__) + src_indices = cp.array( + # cp.repeat is slow to use here, so use numpy instead + np.repeat( + np.fromiter(map(key_to_id.__getitem__, d), index_dtype), + np.fromiter(map(len, d.values()), index_dtype), + ) + ) + dst_indices = cp.fromiter( + map(key_to_id.__getitem__, concat(d.values())), index_dtype + ) + # Initialize as directed first them symmetrize if undirected. + G = graph_class.to_directed_class().from_coo( + len(key_to_id), + src_indices, + dst_indices, + key_to_id=key_to_id, + ) + if not graph_class.is_directed(): + G = G.to_undirected() + if inplace: + return create_using._become(G) + return G + + +@networkx_algorithm(version_added="24.08") +def to_dict_of_lists(G, nodelist=None): + G = _to_graph(G) + src_indices = G.src_indices + dst_indices = G.dst_indices + if nodelist is not None: + try: + node_ids = G._nodekeys_to_nodearray(nodelist) + except KeyError as exc: + gname = "digraph" if G.is_directed() else "graph" + raise nx.NetworkXError( + f"The node {exc.args[0]} is not in the {gname}." + ) from exc + mask = cp.isin(src_indices, node_ids) & cp.isin(dst_indices, node_ids) + src_indices = src_indices[mask] + dst_indices = dst_indices[mask] + # Sort indices so we can use `cp.unique` to determine boundaries. + # This is like exporting to DCSR. + if G.is_multigraph(): + stacked = cp.unique(cp.vstack((src_indices, dst_indices)), axis=1) + src_indices = stacked[0] + dst_indices = stacked[1] + else: + stacked = cp.vstack((dst_indices, src_indices)) + indices = cp.lexsort(stacked) + src_indices = src_indices[indices] + dst_indices = dst_indices[indices] + compressed_srcs, left_bounds = cp.unique(src_indices, return_index=True) + # Ensure we include isolate nodes in the result (and in proper order) + rv = None + if nodelist is not None: + if compressed_srcs.size != len(nodelist): + if G.key_to_id is None: + # `G._nodekeys_to_nodearray` does not check for valid node keys. + container = range(G._N) + for key in nodelist: + if key not in container: + gname = "digraph" if G.is_directed() else "graph" + raise nx.NetworkXError(f"The node {key} is not in the {gname}.") + rv = {key: [] for key in nodelist} + elif compressed_srcs.size != G._N: + rv = {key: [] for key in G} + # We use `boundaries` like this in `_groupby` too + boundaries = pairwise(itertools.chain(left_bounds.tolist(), [src_indices.size])) + dst_indices = dst_indices.tolist() + if G.key_to_id is None: + it = zip(compressed_srcs.tolist(), boundaries) + if rv is None: + return {src: dst_indices[start:end] for src, (start, end) in it} + rv.update((src, dst_indices[start:end]) for src, (start, end) in it) + return rv + to_key = G.id_to_key.__getitem__ + it = zip(compressed_srcs.tolist(), boundaries) + if rv is None: + return { + to_key(src): list(map(to_key, dst_indices[start:end])) + for src, (start, end) in it + } + rv.update( + (to_key(src), list(map(to_key, dst_indices[start:end]))) + for src, (start, end) in it + ) + return rv diff --git a/python/nx-cugraph/nx_cugraph/tests/test_convert.py b/python/nx-cugraph/nx_cugraph/tests/test_convert.py index 1a71b796861..634b28e961c 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_convert.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_convert.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -13,10 +13,13 @@ import cupy as cp import networkx as nx import pytest +from packaging.version import parse import nx_cugraph as nxcg from nx_cugraph import interface +nxver = parse(nx.__version__) + @pytest.mark.parametrize( "graph_class", [nx.Graph, nx.DiGraph, nx.MultiGraph, nx.MultiDiGraph] @@ -224,3 +227,48 @@ def test_multigraph(graph_class): H = nxcg.to_networkx(Gcg) assert type(G) is type(H) assert nx.utils.graphs_equal(G, H) + + +def test_to_dict_of_lists(): + G = nx.MultiGraph() + G.add_edge("a", "b") + G.add_edge("a", "c") + G.add_edge("a", "b") + expected = nx.to_dict_of_lists(G) + result = nxcg.to_dict_of_lists(G) + assert expected == result + expected = nx.to_dict_of_lists(G, nodelist=["a", "b"]) + result = nxcg.to_dict_of_lists(G, nodelist=["a", "b"]) + assert expected == result + with pytest.raises(nx.NetworkXError, match="The node d is not in the graph"): + nx.to_dict_of_lists(G, nodelist=["a", "d"]) + with pytest.raises(nx.NetworkXError, match="The node d is not in the graph"): + nxcg.to_dict_of_lists(G, nodelist=["a", "d"]) + G.add_node("d") # No edges + expected = nx.to_dict_of_lists(G) + result = nxcg.to_dict_of_lists(G) + assert expected == result + expected = nx.to_dict_of_lists(G, nodelist=["a", "d"]) + result = nxcg.to_dict_of_lists(G, nodelist=["a", "d"]) + assert expected == result + # Now try with default node ids + G = nx.DiGraph() + G.add_edge(0, 1) + G.add_edge(0, 2) + expected = nx.to_dict_of_lists(G) + result = nxcg.to_dict_of_lists(G) + assert expected == result + expected = nx.to_dict_of_lists(G, nodelist=[0, 1]) + result = nxcg.to_dict_of_lists(G, nodelist=[0, 1]) + assert expected == result + with pytest.raises(nx.NetworkXError, match="The node 3 is not in the digraph"): + nx.to_dict_of_lists(G, nodelist=[0, 3]) + with pytest.raises(nx.NetworkXError, match="The node 3 is not in the digraph"): + nxcg.to_dict_of_lists(G, nodelist=[0, 3]) + G.add_node(3) # No edges + expected = nx.to_dict_of_lists(G) + result = nxcg.to_dict_of_lists(G) + assert expected == result + expected = nx.to_dict_of_lists(G, nodelist=[0, 3]) + result = nxcg.to_dict_of_lists(G, nodelist=[0, 3]) + assert expected == result