From 629e63c164732fe8da690fa42d2bd9b990131b09 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Wed, 25 Oct 2023 16:16:00 -0500 Subject: [PATCH 1/5] nx-cugraph: add k_truss and degree centralities (#3945) New algorithms: - `degree_centrality` - `in_degree_centrality` - `k_truss` - `number_of_selfloops` - `out_degree_centrality` Also, rename `row_indices, col_indices` to `src_indices, dst_indices` Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/3945 --- python/nx-cugraph/_nx_cugraph/__init__.py | 5 + python/nx-cugraph/lint.yaml | 6 +- .../nx_cugraph/algorithms/__init__.py | 1 + .../algorithms/centrality/__init__.py | 1 + .../algorithms/centrality/degree_alg.py | 48 ++++++++ .../algorithms/community/louvain.py | 2 +- .../nx-cugraph/nx_cugraph/algorithms/core.py | 96 ++++++++++++++++ .../nx_cugraph/algorithms/isolate.py | 8 +- .../nx-cugraph/nx_cugraph/classes/__init__.py | 5 +- .../nx-cugraph/nx_cugraph/classes/digraph.py | 13 ++- .../nx-cugraph/nx_cugraph/classes/function.py | 23 ++++ python/nx-cugraph/nx_cugraph/classes/graph.py | 105 ++++++++++-------- .../nx_cugraph/classes/multigraph.py | 64 ++++++----- python/nx-cugraph/nx_cugraph/convert.py | 38 +++---- .../nx_cugraph/tests/test_convert.py | 32 +++--- .../nx_cugraph/tests/test_match_api.py | 3 + python/nx-cugraph/pyproject.toml | 5 +- 17 files changed, 330 insertions(+), 125 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py create mode 100644 python/nx-cugraph/nx_cugraph/algorithms/core.py create mode 100644 python/nx-cugraph/nx_cugraph/classes/function.py diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 886d7a19f74..965b5b232ab 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -30,11 +30,16 @@ "functions": { # BEGIN: functions "betweenness_centrality", + "degree_centrality", "edge_betweenness_centrality", + "in_degree_centrality", "is_isolate", "isolates", + "k_truss", "louvain_communities", "number_of_isolates", + "number_of_selfloops", + "out_degree_centrality", # END: functions }, "extra_docstrings": { diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index 4f604fbeb6e..fef2cebc7f5 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -45,12 +45,12 @@ repos: - id: pyupgrade args: [--py39-plus] - repo: https://github.com/psf/black - rev: 23.9.1 + rev: 23.10.0 hooks: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.292 + rev: v0.1.1 hooks: - id: ruff args: [--fix-only, --show-fixes] @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.0.292 + rev: v0.1.1 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index dfd9adfc61a..22600bfdc2d 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -12,4 +12,5 @@ # limitations under the License. from . import centrality, community from .centrality import * +from .core import * from .isolate import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py index 2ac6242e8a4..af91f227843 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/__init__.py @@ -11,3 +11,4 @@ # See the License for the specific language governing permissions and # limitations under the License. from .betweenness import * +from .degree_alg import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py new file mode 100644 index 00000000000..0b2fd24af79 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py @@ -0,0 +1,48 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nx_cugraph.convert import _to_directed_graph, _to_graph +from nx_cugraph.utils import networkx_algorithm, not_implemented_for + +__all__ = ["degree_centrality", "in_degree_centrality", "out_degree_centrality"] + + +@networkx_algorithm +def degree_centrality(G): + G = _to_graph(G) + if len(G) <= 1: + return dict.fromkeys(G, 1) + deg = G._degrees_array() + centrality = deg * (1 / (len(G) - 1)) + return G._nodearray_to_dict(centrality) + + +@not_implemented_for("undirected") +@networkx_algorithm +def in_degree_centrality(G): + G = _to_directed_graph(G) + if len(G) <= 1: + return dict.fromkeys(G, 1) + deg = G._in_degrees_array() + centrality = deg * (1 / (len(G) - 1)) + return G._nodearray_to_dict(centrality) + + +@not_implemented_for("undirected") +@networkx_algorithm +def out_degree_centrality(G): + G = _to_directed_graph(G) + if len(G) <= 1: + return dict.fromkeys(G, 1) + deg = G._out_degrees_array() + centrality = deg * (1 / (len(G) - 1)) + return G._nodearray_to_dict(centrality) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index dc209870c89..62261d109a2 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -42,7 +42,7 @@ def louvain_communities( # NetworkX allows both directed and undirected, but cugraph only allows undirected. seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility G = _to_undirected_graph(G, weight) - if G.row_indices.size == 0: + if G.src_indices.size == 0: # TODO: PLC doesn't handle empty graphs gracefully! return [{key} for key in G._nodeiter_to_iter(range(len(G)))] if max_level is None: diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py new file mode 100644 index 00000000000..0a64dd71c69 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -0,0 +1,96 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import cupy as cp +import networkx as nx +import numpy as np +import pylibcugraph as plc + +import nx_cugraph as nxcg +from nx_cugraph.utils import networkx_algorithm, not_implemented_for + +__all__ = ["k_truss"] + + +@not_implemented_for("directed") +@not_implemented_for("multigraph") +@networkx_algorithm +def k_truss(G, k): + if is_nx := isinstance(G, nx.Graph): + G = nxcg.from_networkx(G, preserve_all_attrs=True) + if nxcg.number_of_selfloops(G) > 0: + raise nx.NetworkXError( + "Input graph has self loops which is not permitted; " + "Consider using G.remove_edges_from(nx.selfloop_edges(G))." + ) + # TODO: create renumbering helper function(s) + if k < 3: + # k-truss graph is comprised of nodes incident on k-2 triangles, so k<3 is a + # boundary condition. Here, all we need to do is drop nodes with zero degree. + # Technically, it would be okay to delete this branch of code, because + # plc.k_truss_subgraph behaves the same for 0 <= k < 3. We keep this branch b/c + # it's faster and has an "early return" if there are no nodes with zero degree. + degrees = G._degrees_array() + # Renumber step 0: node indices + node_indices = degrees.nonzero()[0] + if degrees.size == node_indices.size: + # No change + return G if is_nx else G.copy() + src_indices = G.src_indices + dst_indices = G.dst_indices + # Renumber step 1: edge values (no changes needed) + edge_values = {key: val.copy() for key, val in G.edge_values.items()} + edge_masks = {key: val.copy() for key, val in G.edge_masks.items()} + else: + # int dtype for edge_indices would be preferred + edge_indices = cp.arange(G.src_indices.size, dtype=np.float64) + src_indices, dst_indices, edge_indices, _ = plc.k_truss_subgraph( + resource_handle=plc.ResourceHandle(), + graph=G._get_plc_graph(edge_array=edge_indices), + k=k, + do_expensive_check=False, + ) + # Renumber step 0: node indices + node_indices = cp.unique(cp.concatenate([src_indices, dst_indices])) + # Renumber step 1: edge values + edge_indices = edge_indices.astype(np.int64) + edge_values = {key: val[edge_indices] for key, val in G.edge_values.items()} + edge_masks = {key: val[edge_indices] for key, val in G.edge_masks.items()} + # Renumber step 2: edge indices + mapper = cp.zeros(len(G), src_indices.dtype) + mapper[node_indices] = cp.arange(node_indices.size, dtype=mapper.dtype) + src_indices = mapper[src_indices] + dst_indices = mapper[dst_indices] + # Renumber step 3: node values + node_values = {key: val[node_indices] for key, val in G.node_values.items()} + node_masks = {key: val[node_indices] for key, val in G.node_masks.items()} + # Renumber step 4: key_to_id + if (id_to_key := G.id_to_key) is not None: + key_to_id = { + id_to_key[old_index]: new_index + for new_index, old_index in enumerate(node_indices.tolist()) + } + else: + key_to_id = None + # Same as calling `G.from_coo`, but use __class__ to indicate it's a classmethod. + new_graph = G.__class__.from_coo( + node_indices.size, + src_indices, + dst_indices, + edge_values, + edge_masks, + node_values, + node_masks, + key_to_id=key_to_id, + ) + new_graph.graph.update(G.graph) + return new_graph diff --git a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py index 774627e84f6..d32223fb3ed 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py @@ -30,18 +30,18 @@ def is_isolate(G, n): G = _to_graph(G) index = n if G.key_to_id is None else G.key_to_id[n] return not ( - (G.row_indices == index).any().tolist() + (G.src_indices == index).any().tolist() or G.is_directed() - and (G.col_indices == index).any().tolist() + and (G.dst_indices == index).any().tolist() ) def _mark_isolates(G) -> cp.ndarray[bool]: """Return a boolean mask array indicating indices of isolated nodes.""" mark_isolates = cp.ones(len(G), bool) - mark_isolates[G.row_indices] = False + mark_isolates[G.src_indices] = False if G.is_directed(): - mark_isolates[G.col_indices] = False + mark_isolates[G.dst_indices] = False return mark_isolates diff --git a/python/nx-cugraph/nx_cugraph/classes/__init__.py b/python/nx-cugraph/nx_cugraph/classes/__init__.py index 9916bcbe241..19a5357da55 100644 --- a/python/nx-cugraph/nx_cugraph/classes/__init__.py +++ b/python/nx-cugraph/nx_cugraph/classes/__init__.py @@ -11,7 +11,8 @@ # See the License for the specific language governing permissions and # limitations under the License. from .graph import Graph +from .digraph import DiGraph from .multigraph import MultiGraph +from .multidigraph import MultiDiGraph -from .digraph import DiGraph # isort:skip -from .multidigraph import MultiDiGraph # isort:skip +from .function import * diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 72a1bff21a9..52ea2334c85 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -14,6 +14,7 @@ from typing import TYPE_CHECKING +import cupy as cp import networkx as nx import nx_cugraph as nxcg @@ -48,7 +49,7 @@ def number_of_edges( ) -> int: if u is not None or v is not None: raise NotImplementedError - return self.row_indices.size + return self.src_indices.size ########################## # NetworkX graph methods # @@ -59,3 +60,13 @@ def reverse(self, copy: bool = True) -> DiGraph: return self._copy(not copy, self.__class__, reverse=True) # Many more methods to implement... + + ################### + # Private methods # + ################### + + def _in_degrees_array(self): + return cp.bincount(self.dst_indices, minlength=self._N) + + def _out_degrees_array(self): + return cp.bincount(self.src_indices, minlength=self._N) diff --git a/python/nx-cugraph/nx_cugraph/classes/function.py b/python/nx-cugraph/nx_cugraph/classes/function.py new file mode 100644 index 00000000000..633e4abd7f4 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/classes/function.py @@ -0,0 +1,23 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from nx_cugraph.convert import _to_graph +from nx_cugraph.utils import networkx_algorithm + +__all__ = ["number_of_selfloops"] + + +@networkx_algorithm +def number_of_selfloops(G): + G = _to_graph(G) + is_selfloop = G.src_indices == G.dst_indices + return is_selfloop.sum().tolist() diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index f1e85c836e3..166b6b9dc6b 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -52,8 +52,8 @@ class Graph: # Not networkx properties # We store edge data in COO format with {row,col}_indices and edge_values. - row_indices: cp.ndarray[IndexValue] - col_indices: cp.ndarray[IndexValue] + src_indices: cp.ndarray[IndexValue] + dst_indices: cp.ndarray[IndexValue] edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] edge_masks: dict[AttrKey, cp.ndarray[bool]] node_values: dict[AttrKey, cp.ndarray[NodeValue]] @@ -70,8 +70,8 @@ class Graph: def from_coo( cls, N: int, - row_indices: cp.ndarray[IndexValue], - col_indices: cp.ndarray[IndexValue], + src_indices: cp.ndarray[IndexValue], + dst_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, @@ -82,8 +82,8 @@ def from_coo( **attr, ) -> Graph: new_graph = object.__new__(cls) - new_graph.row_indices = row_indices - new_graph.col_indices = col_indices + new_graph.src_indices = src_indices + new_graph.dst_indices = dst_indices new_graph.edge_values = {} if edge_values is None else dict(edge_values) new_graph.edge_masks = {} if edge_masks is None else dict(edge_masks) new_graph.node_values = {} if node_values is None else dict(node_values) @@ -93,9 +93,9 @@ def from_coo( new_graph._N = op.index(N) # Ensure N is integral new_graph.graph = new_graph.graph_attr_dict_factory() new_graph.graph.update(attr) - size = new_graph.row_indices.size + size = new_graph.src_indices.size # Easy and fast sanity checks - if size != new_graph.col_indices.size: + if size != new_graph.dst_indices.size: raise ValueError for attr in ["edge_values", "edge_masks"]: if datadict := getattr(new_graph, attr): @@ -117,7 +117,7 @@ def from_coo( def from_csr( cls, indptr: cp.ndarray[IndexValue], - col_indices: cp.ndarray[IndexValue], + dst_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, @@ -128,14 +128,14 @@ def from_csr( **attr, ) -> Graph: N = indptr.size - 1 - row_indices = cp.array( + src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_values, edge_masks, node_values, @@ -149,7 +149,7 @@ def from_csr( def from_csc( cls, indptr: cp.ndarray[IndexValue], - row_indices: cp.ndarray[IndexValue], + src_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, @@ -160,14 +160,14 @@ def from_csc( **attr, ) -> Graph: N = indptr.size - 1 - col_indices = cp.array( + dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_values, edge_masks, node_values, @@ -183,7 +183,7 @@ def from_dcsr( N: int, compressed_rows: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], - col_indices: cp.ndarray[IndexValue], + dst_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, @@ -193,14 +193,14 @@ def from_dcsr( id_to_key: list[NodeKey] | None = None, **attr, ) -> Graph: - row_indices = cp.array( + src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_rows.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_values, edge_masks, node_values, @@ -216,7 +216,7 @@ def from_dcsc( N: int, compressed_cols: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], - row_indices: cp.ndarray[IndexValue], + src_indices: cp.ndarray[IndexValue], edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, node_values: dict[AttrKey, cp.ndarray[NodeValue]] | None = None, @@ -226,14 +226,14 @@ def from_dcsc( id_to_key: list[NodeKey] | None = None, **attr, ) -> Graph: - col_indices = cp.array( + dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_cols.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_values, edge_masks, node_values, @@ -343,8 +343,8 @@ def clear(self) -> None: self.node_values.clear() self.node_masks.clear() self.graph.clear() - self.row_indices = cp.empty(0, self.row_indices.dtype) - self.col_indices = cp.empty(0, self.col_indices.dtype) + self.src_indices = cp.empty(0, self.src_indices.dtype) + self.dst_indices = cp.empty(0, self.dst_indices.dtype) self._N = 0 self.key_to_id = None self._id_to_key = None @@ -353,8 +353,8 @@ def clear(self) -> None: def clear_edges(self) -> None: self.edge_values.clear() self.edge_masks.clear() - self.row_indices = cp.empty(0, self.row_indices.dtype) - self.col_indices = cp.empty(0, self.col_indices.dtype) + self.src_indices = cp.empty(0, self.src_indices.dtype) + self.dst_indices = cp.empty(0, self.dst_indices.dtype) @networkx_api def copy(self, as_view: bool = False) -> Graph: @@ -377,7 +377,7 @@ def get_edge_data( return default except TypeError: return default - index = cp.nonzero((self.row_indices == u) & (self.col_indices == v))[0] + index = cp.nonzero((self.src_indices == u) & (self.dst_indices == v))[0] if index.size == 0: return default [index] = index.tolist() @@ -397,7 +397,7 @@ def has_edge(self, u: NodeKey, v: NodeKey) -> bool: v = self.key_to_id[v] except KeyError: return False - return bool(((self.row_indices == u) & (self.col_indices == v)).any()) + return bool(((self.src_indices == u) & (self.dst_indices == v)).any()) @networkx_api def has_node(self, n: NodeKey) -> bool: @@ -431,8 +431,8 @@ def order(self) -> int: def size(self, weight: AttrKey | None = None) -> int: if weight is not None: raise NotImplementedError - # If no self-edges, then `self.row_indices.size // 2` - return int((self.row_indices <= self.col_indices).sum()) + # If no self-edges, then `self.src_indices.size // 2` + return int((self.src_indices <= self.dst_indices).sum()) @networkx_api def to_directed(self, as_view: bool = False) -> nxcg.DiGraph: @@ -455,9 +455,8 @@ def to_undirected(self, as_view: bool = False) -> Graph: def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): # DRY warning: see also MultiGraph._copy - indptr = self.indptr - row_indices = self.row_indices - col_indices = self.col_indices + src_indices = self.src_indices + dst_indices = self.dst_indices edge_values = self.edge_values edge_masks = self.edge_masks node_values = self.node_values @@ -465,9 +464,8 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): key_to_id = self.key_to_id id_to_key = None if key_to_id is None else self._id_to_key if not as_view: - indptr = indptr.copy() - row_indices = row_indices.copy() - col_indices = col_indices.copy() + src_indices = src_indices.copy() + dst_indices = dst_indices.copy() edge_values = {key: val.copy() for key, val in edge_values.items()} edge_masks = {key: val.copy() for key, val in edge_masks.items()} node_values = {key: val.copy() for key, val in node_values.items()} @@ -477,11 +475,11 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): if id_to_key is not None: id_to_key = id_to_key.copy() if reverse: - row_indices, col_indices = col_indices, row_indices + src_indices, dst_indices = dst_indices, src_indices rv = cls.from_coo( - indptr, - row_indices, - col_indices, + self._N, + src_indices, + dst_indices, edge_values, edge_masks, node_values, @@ -502,8 +500,11 @@ def _get_plc_graph( edge_dtype: Dtype | None = None, *, store_transposed: bool = False, + edge_array: cp.ndarray[EdgeValue] | None = None, ): - if edge_attr is None: + if edge_array is not None: + pass + elif edge_attr is None: edge_array = None elif edge_attr not in self.edge_values: raise KeyError("Graph has no edge attribute {edge_attr!r}") @@ -532,14 +533,20 @@ def _get_plc_graph( is_multigraph=self.is_multigraph(), is_symmetric=not self.is_directed(), ), - src_or_offset_array=self.row_indices, - dst_or_index_array=self.col_indices, + src_or_offset_array=self.src_indices, + dst_or_index_array=self.dst_indices, weight_array=edge_array, store_transposed=store_transposed, renumber=False, do_expensive_check=False, ) + def _degrees_array(self): + degrees = cp.bincount(self.src_indices, minlength=self._N) + if self.is_directed(): + degrees += cp.bincount(self.dst_indices, minlength=self._N) + return degrees + def _nodeiter_to_iter(self, node_ids: Iterable[IndexValue]) -> Iterable[NodeKey]: """Convert an iterable of node IDs to an iterable of node keys.""" if (id_to_key := self.id_to_key) is not None: @@ -551,6 +558,14 @@ def _nodearray_to_list(self, node_ids: cp.ndarray[IndexValue]) -> list[NodeKey]: return node_ids.tolist() return list(self._nodeiter_to_iter(node_ids.tolist())) + def _nodearray_to_dict( + self, values: cp.ndarray[NodeValue] + ) -> dict[NodeKey, NodeValue]: + it = enumerate(values.tolist()) + if (id_to_key := self.id_to_key) is not None: + return {id_to_key[key]: val for key, val in it} + return dict(it) + def _nodearrays_to_dict( self, node_ids: cp.ndarray[IndexValue], values: cp.ndarray[NodeValue] ) -> dict[NodeKey, NodeValue]: diff --git a/python/nx-cugraph/nx_cugraph/classes/multigraph.py b/python/nx-cugraph/nx_cugraph/classes/multigraph.py index 499ca7ce212..3d90861a328 100644 --- a/python/nx-cugraph/nx_cugraph/classes/multigraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/multigraph.py @@ -67,8 +67,8 @@ class MultiGraph(Graph): def from_coo( cls, N: int, - row_indices: cp.ndarray[IndexValue], - col_indices: cp.ndarray[IndexValue], + src_indices: cp.ndarray[IndexValue], + dst_indices: cp.ndarray[IndexValue], edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, @@ -82,8 +82,8 @@ def from_coo( ) -> MultiGraph: new_graph = super().from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_values, edge_masks, node_values, @@ -97,7 +97,7 @@ def from_coo( # Easy and fast sanity checks if ( new_graph.edge_keys is not None - and len(new_graph.edge_keys) != row_indices.size + and len(new_graph.edge_keys) != src_indices.size ): raise ValueError return new_graph @@ -106,7 +106,7 @@ def from_coo( def from_csr( cls, indptr: cp.ndarray[IndexValue], - col_indices: cp.ndarray[IndexValue], + dst_indices: cp.ndarray[IndexValue], edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, @@ -119,14 +119,14 @@ def from_csr( **attr, ) -> MultiGraph: N = indptr.size - 1 - row_indices = cp.array( + src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_indices, edge_values, edge_masks, @@ -142,7 +142,7 @@ def from_csr( def from_csc( cls, indptr: cp.ndarray[IndexValue], - row_indices: cp.ndarray[IndexValue], + src_indices: cp.ndarray[IndexValue], edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, @@ -155,14 +155,14 @@ def from_csc( **attr, ) -> MultiGraph: N = indptr.size - 1 - col_indices = cp.array( + dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(np.arange(N, dtype=np.int32), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_indices, edge_values, edge_masks, @@ -180,7 +180,7 @@ def from_dcsr( N: int, compressed_rows: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], - col_indices: cp.ndarray[IndexValue], + dst_indices: cp.ndarray[IndexValue], edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, @@ -192,14 +192,14 @@ def from_dcsr( edge_keys: list[EdgeKey] | None = None, **attr, ) -> MultiGraph: - row_indices = cp.array( + src_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_rows.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_indices, edge_values, edge_masks, @@ -217,7 +217,7 @@ def from_dcsc( N: int, compressed_cols: cp.ndarray[IndexValue], indptr: cp.ndarray[IndexValue], - row_indices: cp.ndarray[IndexValue], + src_indices: cp.ndarray[IndexValue], edge_indices: cp.ndarray[IndexValue] | None = None, edge_values: dict[AttrKey, cp.ndarray[EdgeValue]] | None = None, edge_masks: dict[AttrKey, cp.ndarray[bool]] | None = None, @@ -229,14 +229,14 @@ def from_dcsc( edge_keys: list[EdgeKey] | None = None, **attr, ) -> Graph: - col_indices = cp.array( + dst_indices = cp.array( # cp.repeat is slow to use here, so use numpy instead np.repeat(compressed_cols.get(), cp.diff(indptr).get()) ) return cls.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_indices, edge_values, edge_masks, @@ -330,7 +330,7 @@ def get_edge_data( return default except TypeError: return default - mask = (self.row_indices == u) & (self.col_indices == v) + mask = (self.src_indices == u) & (self.dst_indices == v) if not mask.any(): return default if self.edge_keys is None: @@ -376,7 +376,7 @@ def has_edge(self, u: NodeKey, v: NodeKey, key: EdgeKey | None = None) -> bool: v = self.key_to_id[v] except KeyError: return False - mask = (self.row_indices == u) & (self.col_indices == v) + mask = (self.src_indices == u) & (self.dst_indices == v) if key is None or (self.edge_indices is None and self.edge_keys is None): return bool(mask.any()) if self.edge_keys is None: @@ -405,9 +405,8 @@ def to_undirected(self, as_view: bool = False) -> MultiGraph: def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): # DRY warning: see also Graph._copy - indptr = self.indptr - row_indices = self.row_indices - col_indices = self.col_indices + src_indices = self.src_indices + dst_indices = self.dst_indices edge_indices = self.edge_indices edge_values = self.edge_values edge_masks = self.edge_masks @@ -417,9 +416,8 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): id_to_key = None if key_to_id is None else self._id_to_key edge_keys = self.edge_keys if not as_view: - indptr = indptr.copy() - row_indices = row_indices.copy() - col_indices = col_indices.copy() + src_indices = src_indices.copy() + dst_indices = dst_indices.copy() edge_indices = edge_indices.copy() edge_values = {key: val.copy() for key, val in edge_values.items()} edge_masks = {key: val.copy() for key, val in edge_masks.items()} @@ -432,11 +430,11 @@ def _copy(self, as_view: bool, cls: type[Graph], reverse: bool = False): if edge_keys is not None: edge_keys = edge_keys.copy() if reverse: - row_indices, col_indices = col_indices, row_indices + src_indices, dst_indices = dst_indices, src_indices rv = cls.from_coo( - indptr, - row_indices, - col_indices, + self._N, + src_indices, + dst_indices, edge_indices, edge_values, edge_masks, diff --git a/python/nx-cugraph/nx_cugraph/convert.py b/python/nx-cugraph/nx_cugraph/convert.py index e82286e5e29..d117c8e5c03 100644 --- a/python/nx-cugraph/nx_cugraph/convert.py +++ b/python/nx-cugraph/nx_cugraph/convert.py @@ -266,12 +266,12 @@ def from_networkx( else: col_iter = map(key_to_id.__getitem__, col_iter) if graph.is_multigraph(): - col_indices = np.fromiter(col_iter, np.int32) + dst_indices = np.fromiter(col_iter, np.int32) num_multiedges = np.fromiter( map(len, concat(map(dict.values, adj.values()))), np.int32 ) # cp.repeat is slow to use here, so use numpy instead - col_indices = cp.array(np.repeat(col_indices, num_multiedges)) + dst_indices = cp.array(np.repeat(dst_indices, num_multiedges)) # Determine edge keys and edge ids for multigraphs edge_keys = list(concat(concat(map(dict.values, adj.values())))) edge_indices = cp.fromiter( @@ -281,7 +281,7 @@ def from_networkx( if edge_keys == edge_indices.tolist(): edge_keys = None # Prefer edge_indices else: - col_indices = cp.fromiter(col_iter, np.int32) + dst_indices = cp.fromiter(col_iter, np.int32) edge_values = {} edge_masks = {} @@ -353,12 +353,12 @@ def from_networkx( # if vals.ndim > 1: ... # cp.repeat is slow to use here, so use numpy instead - row_indices = np.repeat( + src_indices = np.repeat( np.arange(N, dtype=np.int32), np.fromiter(map(len, adj.values()), np.int32) ) if graph.is_multigraph(): - row_indices = np.repeat(row_indices, num_multiedges) - row_indices = cp.array(row_indices) + src_indices = np.repeat(src_indices, num_multiedges) + src_indices = cp.array(src_indices) node_values = {} node_masks = {} @@ -405,8 +405,8 @@ def from_networkx( klass = nxcg.MultiGraph rv = klass.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_indices, edge_values, edge_masks, @@ -422,8 +422,8 @@ def from_networkx( klass = nxcg.Graph rv = klass.from_coo( N, - row_indices, - col_indices, + src_indices, + dst_indices, edge_values, edge_masks, node_values, @@ -496,23 +496,23 @@ def to_networkx(G: nxcg.Graph) -> nx.Graph: else: rv.add_nodes_from(range(len(G))) - row_indices = G.row_indices - col_indices = G.col_indices + src_indices = G.src_indices + dst_indices = G.dst_indices edge_values = G.edge_values edge_masks = G.edge_masks if edge_values and not G.is_directed(): # Only add upper triangle of the adjacency matrix so we don't double-add edges - mask = row_indices <= col_indices - row_indices = row_indices[mask] - col_indices = col_indices[mask] + mask = src_indices <= dst_indices + src_indices = src_indices[mask] + dst_indices = dst_indices[mask] edge_values = {k: v[mask] for k, v in edge_values.items()} if edge_masks: edge_masks = {k: v[mask] for k, v in edge_masks.items()} - row_indices = row_iter = row_indices.tolist() - col_indices = col_iter = col_indices.tolist() + src_indices = row_iter = src_indices.tolist() + dst_indices = col_iter = dst_indices.tolist() if id_to_key is not None: - row_iter = map(id_to_key.__getitem__, row_indices) - col_iter = map(id_to_key.__getitem__, col_indices) + row_iter = map(id_to_key.__getitem__, src_indices) + col_iter = map(id_to_key.__getitem__, dst_indices) if G.is_multigraph() and (G.edge_keys is not None or G.edge_indices is not None): if G.edge_keys is not None: edge_keys = G.edge_keys diff --git a/python/nx-cugraph/nx_cugraph/tests/test_convert.py b/python/nx-cugraph/nx_cugraph/tests/test_convert.py index 83820f7621f..1a71b796861 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_convert.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_convert.py @@ -71,8 +71,8 @@ def test_convert(graph_class): ]: # All edges have "x" attribute, so all kwargs are equivalent Gcg = nxcg.from_networkx(G, **kwargs) - cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0]) cp.testing.assert_array_equal(Gcg.edge_values["x"], [2, 2]) assert len(Gcg.edge_values) == 1 assert Gcg.edge_masks == {} @@ -86,8 +86,8 @@ def test_convert(graph_class): # Structure-only graph (no edge attributes) Gcg = nxcg.from_networkx(G, preserve_node_attrs=True) - cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0]) cp.testing.assert_array_equal(Gcg.node_values["foo"], [10, 20]) assert Gcg.edge_values == Gcg.edge_masks == {} H = nxcg.to_networkx(Gcg) @@ -99,8 +99,8 @@ def test_convert(graph_class): # Fill completely missing attribute with default value Gcg = nxcg.from_networkx(G, edge_attrs={"y": 0}) - cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0]) cp.testing.assert_array_equal(Gcg.edge_values["y"], [0, 0]) assert len(Gcg.edge_values) == 1 assert Gcg.edge_masks == Gcg.node_values == Gcg.node_masks == {} @@ -111,8 +111,8 @@ def test_convert(graph_class): # If attribute is completely missing (and no default), then just ignore it Gcg = nxcg.from_networkx(G, edge_attrs={"y": None}) - cp.testing.assert_array_equal(Gcg.row_indices, [0, 1]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 1]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 0]) assert sorted(Gcg.edge_values) == sorted(Gcg.edge_masks) == [] H = nxcg.to_networkx(Gcg) assert list(H.edges(data=True)) == [(0, 1, {})] @@ -123,8 +123,8 @@ def test_convert(graph_class): # Some edges are missing 'x' attribute; need to use a mask for kwargs in [{"preserve_edge_attrs": True}, {"edge_attrs": {"x": None}}]: Gcg = nxcg.from_networkx(G, **kwargs) - cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0]) assert sorted(Gcg.edge_values) == sorted(Gcg.edge_masks) == ["x"] cp.testing.assert_array_equal(Gcg.edge_masks["x"], [True, False, True, False]) cp.testing.assert_array_equal(Gcg.edge_values["x"][Gcg.edge_masks["x"]], [2, 2]) @@ -160,8 +160,8 @@ def test_convert(graph_class): ]: Gcg = nxcg.from_networkx(G, **kwargs) assert Gcg.id_to_key == [10, 20, 30] # Remap node IDs to 0, 1, ... - cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0]) cp.testing.assert_array_equal(Gcg.edge_values["x"], [1, 2, 1, 2]) assert sorted(Gcg.edge_masks) == ["y"] cp.testing.assert_array_equal(Gcg.edge_masks["y"], [False, True, False, True]) @@ -181,8 +181,8 @@ def test_convert(graph_class): ]: Gcg = nxcg.from_networkx(G, **kwargs) assert Gcg.id_to_key == [10, 20, 30] # Remap node IDs to 0, 1, ... - cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0]) cp.testing.assert_array_equal(Gcg.node_values["foo"], [100, 200, 300]) assert sorted(Gcg.node_masks) == ["bar"] cp.testing.assert_array_equal(Gcg.node_masks["bar"], [False, True, False]) @@ -202,8 +202,8 @@ def test_convert(graph_class): ]: Gcg = nxcg.from_networkx(G, **kwargs) assert Gcg.id_to_key == [10, 20, 30] # Remap node IDs to 0, 1, ... - cp.testing.assert_array_equal(Gcg.row_indices, [0, 0, 1, 2]) - cp.testing.assert_array_equal(Gcg.col_indices, [1, 2, 0, 0]) + cp.testing.assert_array_equal(Gcg.src_indices, [0, 0, 1, 2]) + cp.testing.assert_array_equal(Gcg.dst_indices, [1, 2, 0, 0]) cp.testing.assert_array_equal(Gcg.node_values["bar"], [0, 1000, 0]) assert Gcg.node_masks == {} diff --git a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py index ecfda1397db..a654ff343ed 100644 --- a/python/nx-cugraph/nx_cugraph/tests/test_match_api.py +++ b/python/nx-cugraph/nx_cugraph/tests/test_match_api.py @@ -31,6 +31,9 @@ def test_match_signature_and_names(): if is_nx_30_or_31 and name in {"louvain_communities"}: continue + if name not in nx_backends._registered_algorithms: + print(f"{name} not dispatched from networkx") + continue dispatchable_func = nx_backends._registered_algorithms[name] # nx version >=3.2 uses orig_func, version >=3.0,<3.2 uses _orig_func if is_nx_30_or_31: diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml index 2478a02df9b..9fec8fa0242 100644 --- a/python/nx-cugraph/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -81,7 +81,10 @@ float_to_top = true default_section = "THIRDPARTY" known_first_party = "nx_cugraph" line_length = 88 -extend_skip_glob = ["nx_cugraph/__init__.py"] +extend_skip_glob = [ + "nx_cugraph/__init__.py", + "nx_cugraph/classes/__init__.py", +] [tool.pytest.ini_options] minversion = "6.0" From c90fd9480644c2ef68cf67041b97160fc174313e Mon Sep 17 00:00:00 2001 From: GALI PREM SAGAR Date: Thu, 26 Oct 2023 07:40:03 -0500 Subject: [PATCH 2/5] Unpin `dask` and `distributed` for `23.12` development (#3953) This PR relaxes `dask` and `distributed` versions pinning for `23.12` development. xref: https://github.com/rapidsai/cudf/pull/14320 Authors: - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Brad Rees (https://github.com/BradReesWork) - Ray Douglass (https://github.com/raydouglass) - https://github.com/jakirkham URL: https://github.com/rapidsai/cugraph/pull/3953 --- ci/test_wheel_cugraph.sh | 2 +- conda/environments/all_cuda-118_arch-x86_64.yaml | 2 +- conda/environments/all_cuda-120_arch-x86_64.yaml | 2 +- conda/recipes/cugraph-pyg/meta.yaml | 2 +- conda/recipes/cugraph-service/meta.yaml | 2 +- conda/recipes/cugraph/meta.yaml | 6 +++--- dependencies.yaml | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/ci/test_wheel_cugraph.sh b/ci/test_wheel_cugraph.sh index ac18459128a..f9e2aa6d8da 100755 --- a/ci/test_wheel_cugraph.sh +++ b/ci/test_wheel_cugraph.sh @@ -9,6 +9,6 @@ RAPIDS_PY_WHEEL_NAME="pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-whe python -m pip install --no-deps ./local-pylibcugraph-dep/pylibcugraph*.whl # Always install latest dask for testing -python -m pip install git+https://github.com/dask/dask.git@2023.9.2 git+https://github.com/dask/distributed.git@2023.9.2 +python -m pip install git+https://github.com/dask/dask.git@main git+https://github.com/dask/distributed.git@main ./ci/test_wheel.sh cugraph python/cugraph diff --git a/conda/environments/all_cuda-118_arch-x86_64.yaml b/conda/environments/all_cuda-118_arch-x86_64.yaml index 71a8d10b935..2f3a9c988cf 100644 --- a/conda/environments/all_cuda-118_arch-x86_64.yaml +++ b/conda/environments/all_cuda-118_arch-x86_64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core==2023.9.2 +- dask-core>=2023.9.2 - dask-cuda==23.12.* - dask-cudf==23.12.* - dask>=2023.7.1 diff --git a/conda/environments/all_cuda-120_arch-x86_64.yaml b/conda/environments/all_cuda-120_arch-x86_64.yaml index 9179fba788e..31ff503e682 100644 --- a/conda/environments/all_cuda-120_arch-x86_64.yaml +++ b/conda/environments/all_cuda-120_arch-x86_64.yaml @@ -20,7 +20,7 @@ dependencies: - cupy>=12.0.0 - cxx-compiler - cython>=3.0.0 -- dask-core==2023.9.2 +- dask-core>=2023.9.2 - dask-cuda==23.12.* - dask-cudf==23.12.* - dask>=2023.7.1 diff --git a/conda/recipes/cugraph-pyg/meta.yaml b/conda/recipes/cugraph-pyg/meta.yaml index 1dc5a75c41b..f4b2e9a4ee9 100644 --- a/conda/recipes/cugraph-pyg/meta.yaml +++ b/conda/recipes/cugraph-pyg/meta.yaml @@ -26,7 +26,7 @@ requirements: - python - scikit-build >=0.13.1 run: - - distributed ==2023.9.2 + - distributed >=2023.9.2 - numba >=0.57 - numpy >=1.21 - python diff --git a/conda/recipes/cugraph-service/meta.yaml b/conda/recipes/cugraph-service/meta.yaml index 2daf0438351..3d001e83e1e 100644 --- a/conda/recipes/cugraph-service/meta.yaml +++ b/conda/recipes/cugraph-service/meta.yaml @@ -59,7 +59,7 @@ outputs: - cupy >=12.0.0 - dask-cuda ={{ minor_version }} - dask-cudf ={{ minor_version }} - - distributed ==2023.9.2 + - distributed >=2023.9.2 - numba >=0.57 - numpy >=1.21 - python diff --git a/conda/recipes/cugraph/meta.yaml b/conda/recipes/cugraph/meta.yaml index f9bf54a2ef4..3691db508d9 100644 --- a/conda/recipes/cugraph/meta.yaml +++ b/conda/recipes/cugraph/meta.yaml @@ -76,9 +76,9 @@ requirements: - cupy >=12.0.0 - dask-cuda ={{ minor_version }} - dask-cudf ={{ minor_version }} - - dask ==2023.9.2 - - dask-core ==2023.9.2 - - distributed ==2023.9.2 + - dask >=2023.9.2 + - dask-core >=2023.9.2 + - distributed >=2023.9.2 - fsspec>=0.6.0 - libcugraph ={{ version }} - pylibcugraph ={{ version }} diff --git a/dependencies.yaml b/dependencies.yaml index 736aa2e019f..b127d9bd29e 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -385,7 +385,7 @@ dependencies: - output_types: conda packages: - aiohttp - - &dask-core_conda dask-core==2023.9.2 + - &dask-core_conda dask-core>=2023.9.2 - fsspec>=0.6.0 - libcudf==23.12.* - requests From 4e39f209392ad604c747dc92a8e28b398a4ef1c7 Mon Sep 17 00:00:00 2001 From: Jake Awe <50372925+AyodeAwe@users.noreply.github.com> Date: Fri, 27 Oct 2023 02:25:07 -0500 Subject: [PATCH 3/5] Download `xml` docs artifact through CloudFront endpoint (#3955) * local docs build workaround * pipe output --- ci/build_docs.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ci/build_docs.sh b/ci/build_docs.sh index e774a6f9871..3f97f652d41 100755 --- a/ci/build_docs.sh +++ b/ci/build_docs.sh @@ -44,7 +44,7 @@ for PROJECT in libcugraphops libwholegraph; do rapids-logger "Download ${PROJECT} xml_tar" TMP_DIR=$(mktemp -d) export XML_DIR_${PROJECT^^}="$TMP_DIR" - aws s3 cp --only-show-errors "s3://rapidsai-docs/${PROJECT}/xml_tar/${RAPIDS_VERSION_NUMBER}/xml.tar.gz" - | tar xzf - -C "${TMP_DIR}" + curl "https://d1664dvumjb44w.cloudfront.net/${PROJECT}/xml_tar/${RAPIDS_VERSION_NUMBER}/xml.tar.gz" | tar -xzf - -C "${TMP_DIR}" done rapids-logger "Build CPP docs" From a57f779dc1c1ab8ea2575210747eb1066db5f9cf Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 27 Oct 2023 11:59:57 -0700 Subject: [PATCH 4/5] Update the neighbor intersection primitive to support edge masking. (#3550) This PR updates the detail::nbr_intersection() primitive and the per_v_pair_transform_dst_nbr_intersection primitive (which calls the detail::nbr_intersection primitive) to work with edge masking (graph_view_t object with attached edge mask). Several utility functions are updated to support edge masking as well to support primitive updates and testing. This PR is necessary to implement K-truss with the cuGraph C++ primitives. See https://github.com/rapidsai/cugraph/issues/3446#issuecomment-1499649664 for additional details. Authors: - Seunghwa Kang (https://github.com/seunghwak) - Brad Rees (https://github.com/BradReesWork) - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/3550 --- .../detail/decompress_edge_partition.cuh | 138 ++- ...ge_partition_edge_property_device_view.cuh | 12 +- cpp/include/cugraph/graph_mask.hpp | 382 -------- cpp/include/cugraph/graph_view.hpp | 24 +- .../cugraph/utilities/device_functors.cuh | 55 +- cpp/include/cugraph/utilities/mask_utils.cuh | 150 ++++ .../cugraph/utilities/packed_bool_utils.hpp | 7 + cpp/src/prims/detail/nbr_intersection.cuh | 835 +++++++++++------- .../detail/optional_dataframe_buffer.hpp | 1 + cpp/src/prims/kv_store.cuh | 6 +- ..._v_pair_transform_dst_nbr_intersection.cuh | 50 +- ...r_v_random_select_transform_outgoing_e.cuh | 2 +- ...m_reduce_dst_key_aggregated_outgoing_e.cuh | 10 +- ...t_nbr_intersection_of_e_endpoints_by_v.cuh | 35 +- cpp/src/structure/coarsen_graph_impl.cuh | 16 +- .../structure/decompress_to_edgelist_impl.cuh | 56 +- cpp/src/structure/graph_impl.cuh | 15 +- cpp/src/structure/graph_view_impl.cuh | 79 +- cpp/tests/CMakeLists.txt | 4 - ...r_v_pair_transform_dst_nbr_intersection.cu | 128 ++- ...transform_dst_nbr_weighted_intersection.cu | 77 +- cpp/tests/structure/graph_mask_test.cpp | 64 -- 22 files changed, 1085 insertions(+), 1061 deletions(-) delete mode 100644 cpp/include/cugraph/graph_mask.hpp create mode 100644 cpp/include/cugraph/utilities/mask_utils.cuh delete mode 100644 cpp/tests/structure/graph_mask_test.cpp diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh index cd8739114f2..4b256a0413a 100644 --- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh +++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -46,7 +47,7 @@ __global__ void decompress_to_edgelist_mid_degree( edge_partition_device_view_t edge_partition, vertex_t major_range_first, vertex_t major_range_last, - vertex_t* majors) + raft::device_span majors) { auto const tid = threadIdx.x + blockIdx.x * blockDim.x; static_assert(decompress_edge_partition_block_size % raft::warp_size() == 0); @@ -76,7 +77,7 @@ __global__ void decompress_to_edgelist_high_degree( edge_partition_device_view_t edge_partition, vertex_t major_range_first, vertex_t major_range_last, - vertex_t* majors) + raft::device_span majors) { auto major_start_offset = static_cast(major_range_first - edge_partition.major_range_first()); @@ -103,10 +104,19 @@ template void decompress_edge_partition_to_fill_edgelist_majors( raft::handle_t const& handle, edge_partition_device_view_t edge_partition, - vertex_t* majors, + std::optional> + edge_partition_mask_view, + raft::device_span majors, std::optional> const& segment_offsets) { - auto execution_policy = handle.get_thrust_policy(); + auto tmp_buffer = edge_partition_mask_view + ? std::make_optional>( + edge_partition.number_of_edges(), handle.get_stream()) + : std::nullopt; + + auto output_buffer = + tmp_buffer ? raft::device_span((*tmp_buffer).data(), (*tmp_buffer).size()) : majors; + if (segment_offsets) { // FIXME: we may further improve performance by 1) concurrently running kernels on different // segments; 2) individually tuning block sizes for different segments; and 3) adding one more @@ -124,7 +134,7 @@ void decompress_edge_partition_to_fill_edgelist_majors( edge_partition, edge_partition.major_range_first(), edge_partition.major_range_first() + (*segment_offsets)[1], - majors); + output_buffer); } if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], @@ -138,49 +148,63 @@ void decompress_edge_partition_to_fill_edgelist_majors( edge_partition, edge_partition.major_range_first() + (*segment_offsets)[1], edge_partition.major_range_first() + (*segment_offsets)[2], - majors); + output_buffer); } if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { thrust::for_each( - execution_policy, + handle.get_thrust_policy(), thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2], thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3], - [edge_partition, majors] __device__(auto major) { + [edge_partition, output_buffer] __device__(auto major) { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); auto local_degree = edge_partition.local_degree(major_offset); auto local_offset = edge_partition.local_offset(major_offset); - thrust::fill( - thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); + thrust::fill(thrust::seq, + output_buffer.begin() + local_offset, + output_buffer.begin() + local_offset + local_degree, + major); }); } if (edge_partition.dcs_nzd_vertex_count() && (*(edge_partition.dcs_nzd_vertex_count()) > 0)) { thrust::for_each( - execution_policy, + handle.get_thrust_policy(), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())), - [edge_partition, major_start_offset = (*segment_offsets)[3], majors] __device__(auto idx) { + [edge_partition, major_start_offset = (*segment_offsets)[3], output_buffer] __device__( + auto idx) { auto major = *(edge_partition.major_from_major_hypersparse_idx_nocheck(idx)); auto major_idx = major_start_offset + idx; // major_offset != major_idx in the hypersparse region auto local_degree = edge_partition.local_degree(major_idx); auto local_offset = edge_partition.local_offset(major_idx); - thrust::fill( - thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); + thrust::fill(thrust::seq, + output_buffer.begin() + local_offset, + output_buffer.begin() + local_offset + local_degree, + major); }); } } else { - thrust::for_each( - execution_policy, - thrust::make_counting_iterator(edge_partition.major_range_first()), - thrust::make_counting_iterator(edge_partition.major_range_first()) + - edge_partition.major_range_size(), - [edge_partition, majors] __device__(auto major) { - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - auto local_degree = edge_partition.local_degree(major_offset); - auto local_offset = edge_partition.local_offset(major_offset); - thrust::fill( - thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); - }); + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(edge_partition.major_range_first()), + thrust::make_counting_iterator(edge_partition.major_range_first()) + + edge_partition.major_range_size(), + [edge_partition, output_buffer] __device__(auto major) { + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + auto local_degree = edge_partition.local_degree(major_offset); + auto local_offset = edge_partition.local_offset(major_offset); + thrust::fill(thrust::seq, + output_buffer.begin() + local_offset, + output_buffer.begin() + local_offset + local_degree, + major); + }); + } + + if (tmp_buffer) { + copy_if_mask_set(handle, + (*tmp_buffer).begin(), + (*tmp_buffer).end(), + (*edge_partition_mask_view).value_first(), + majors.begin()); } } @@ -192,33 +216,59 @@ void decompress_edge_partition_to_edgelist( edge_partition_weight_view, std::optional> edge_partition_id_view, - vertex_t* edgelist_majors /* [OUT] */, - vertex_t* edgelist_minors /* [OUT] */, - std::optional edgelist_weights /* [OUT] */, - std::optional edgelist_ids /* [OUT] */, + std::optional> + edge_partition_mask_view, + raft::device_span edgelist_majors /* [OUT] */, + raft::device_span edgelist_minors /* [OUT] */, + std::optional> edgelist_weights /* [OUT] */, + std::optional> edgelist_ids /* [OUT] */, std::optional> const& segment_offsets) { auto number_of_edges = edge_partition.number_of_edges(); decompress_edge_partition_to_fill_edgelist_majors( - handle, edge_partition, edgelist_majors, segment_offsets); - thrust::copy(handle.get_thrust_policy(), - edge_partition.indices(), - edge_partition.indices() + number_of_edges, - edgelist_minors); - if (edge_partition_id_view) { - assert(edgelist_ids.has_value()); + handle, edge_partition, edge_partition_mask_view, edgelist_majors, segment_offsets); + if (edge_partition_mask_view) { + copy_if_mask_set(handle, + edge_partition.indices(), + edge_partition.indices() + number_of_edges, + (*edge_partition_mask_view).value_first(), + edgelist_minors.begin()); + } else { thrust::copy(handle.get_thrust_policy(), - (*edge_partition_id_view).value_first(), - (*edge_partition_id_view).value_first() + number_of_edges, - (*edgelist_ids)); + edge_partition.indices(), + edge_partition.indices() + number_of_edges, + edgelist_minors.begin()); } if (edge_partition_weight_view) { assert(edgelist_weights.has_value()); - thrust::copy(handle.get_thrust_policy(), - (*edge_partition_weight_view).value_first(), - (*edge_partition_weight_view).value_first() + number_of_edges, - (*edgelist_weights)); + if (edge_partition_mask_view) { + copy_if_mask_set(handle, + (*edge_partition_weight_view).value_first(), + (*edge_partition_weight_view).value_first() + number_of_edges, + (*edge_partition_mask_view).value_first(), + (*edgelist_weights).begin()); + } else { + thrust::copy(handle.get_thrust_policy(), + (*edge_partition_weight_view).value_first(), + (*edge_partition_weight_view).value_first() + number_of_edges, + (*edgelist_weights).begin()); + } + } + if (edge_partition_id_view) { + assert(edgelist_ids.has_value()); + if (edge_partition_mask_view) { + copy_if_mask_set(handle, + (*edge_partition_id_view).value_first(), + (*edge_partition_id_view).value_first() + number_of_edges, + (*edge_partition_mask_view).value_first(), + (*edgelist_ids).begin()); + } else { + thrust::copy(handle.get_thrust_policy(), + (*edge_partition_id_view).value_first(), + (*edge_partition_id_view).value_first() + number_of_edges, + (*edgelist_ids).begin()); + } } } diff --git a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh index 18091567e38..e5b64b1e02f 100644 --- a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh @@ -33,8 +33,9 @@ template ::value_type> class edge_partition_edge_property_device_view_t { public: - using edge_type = edge_t; - using value_type = value_t; + using edge_type = edge_t; + using value_type = value_t; + static constexpr bool is_packed_bool = cugraph::is_packed_bool(); static constexpr bool has_packed_bool_element = cugraph::has_packed_bool_element(); @@ -53,7 +54,7 @@ class edge_partition_edge_property_device_view_t { value_first_ = view.value_firsts()[partition_idx]; } - __host__ __device__ ValueIterator value_first() { return value_first_; } + __host__ __device__ ValueIterator value_first() const { return value_first_; } __device__ value_t get(edge_t offset) const { @@ -173,8 +174,9 @@ class edge_partition_edge_property_device_view_t { template class edge_partition_edge_dummy_property_device_view_t { public: - using edge_type = edge_t; - using value_type = thrust::nullopt_t; + using edge_type = edge_t; + using value_type = thrust::nullopt_t; + static constexpr bool is_packed_bool = false; static constexpr bool has_packed_bool_element = false; diff --git a/cpp/include/cugraph/graph_mask.hpp b/cpp/include/cugraph/graph_mask.hpp deleted file mode 100644 index 2048d3692c7..00000000000 --- a/cpp/include/cugraph/graph_mask.hpp +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -#include -#include -#include -#include - -namespace cugraph { - -/** - * Compile-time fast lookup of log2(num_bits(mask_t)) to eliminate - * the log2 computation for powers of 2. - * @tparam mask_t - */ -template -__host__ __device__ constexpr int log_bits() -{ - switch (std::numeric_limits::digits) { - case 8: return 3; - case 16: return 4; - case 32: return 5; - case 64: return 6; - default: return log2(std::numeric_limits::digits); - } -} - -/** - * Uses bit-shifting to perform a fast mod operation. This - * is used to compute the index of a specific bit - * @tparam mask_t - * @tparam T - */ -template -__host__ __device__ int bit_mod(T numerator) -{ - return numerator & (std::numeric_limits::digits - 1); -} - -namespace detail { - -/** - * Sets the bit at location h in a one-hot encoded 32-bit int array - */ -template -__device__ __host__ inline void _set_bit(mask_type* arr, mask_type h) -{ - mask_type bit = bit_mod(h); - mask_type idx = h >> log_bits(); - atomicOr(arr + idx, 1 << bit); -} - -/** - * Unsets the bit at location h in a one-hot encoded 32-bit int array - */ -template -__device__ __host__ inline void _unset_bit(mask_type* arr, mask_type h) -{ - mask_type bit = bit_mod(h); - mask_type idx = h >> log_bits(); - atomicAnd(arr + idx, ~(1 << bit)); -} - -/** - * Returns whether or not bit at location h is nonzero in a one-hot - * encoded 32-bit in array. - */ -template -__device__ __host__ inline bool _is_set(mask_type* arr, mask_type h) -{ - mask_type bit = bit_mod(h); - mask_type idx = h >> log_bits(); - return arr[idx] >> bit & 1U; -} -}; // namespace detail - -/** - * Mask view to be used in device functions for reading and updating existing mask. - * This assumes the appropriate masks (vertex/edge) have already been initialized, - * since that will need to be done from the owning object. - * @tparam vertex_t - * @tparam edge_t - * @tparam mask_t - */ -template -struct graph_mask_view_t { - public: - graph_mask_view_t() = delete; - - graph_mask_view_t(vertex_t n_vertices, - edge_t n_edges, - std::optional>& vertices, - std::optional>& edges, - bool complement = false) - : n_vertices_(n_vertices), - n_edges_(n_edges), - complement_(complement), - vertices_(vertices), - edges_(edges) - { - } - - graph_mask_view_t(graph_mask_view_t const& other) = default; - using vertex_type = vertex_t; - using edge_type = edge_t; - using mask_type = mask_t; - using size_type = std::size_t; - - ~graph_mask_view_t() = default; - graph_mask_view_t(graph_mask_view_t&&) noexcept = default; - - graph_mask_view_t& operator=(graph_mask_view_t&&) noexcept = default; - graph_mask_view_t& operator=(graph_mask_view_t const& other) = default; - - /** - * Are masks complemeneted? - * - * - !complemented means masks are inclusive (masking in) - * - complemented means masks are exclusive (masking out) - */ - __host__ __device__ bool is_complemented() const { return complement_; } - - /** - * Has the edge mask been initialized? - */ - __host__ __device__ bool has_edge_mask() const { return edges_.has_value(); } - - /** - * Has the vertex mask been initialized? - */ - __host__ __device__ bool has_vertex_mask() const { return vertices_.has_value(); } - - /** - * Get the vertex mask - */ - __host__ __device__ std::optional> get_vertex_mask() const - { - return vertices_; - } - - /** - * Get the edge mask - */ - __host__ __device__ std::optional> get_edge_mask() const - { - return edges_; - } - - __host__ __device__ edge_t get_edge_mask_size() const { return n_edges_ >> log_bits(); } - - __host__ __device__ vertex_t get_vertex_mask_size() const - { - return n_vertices_ >> log_bits(); - } - - protected: - vertex_t n_vertices_; - edge_t n_edges_; - bool complement_{false}; - std::optional> vertices_{std::nullopt}; - std::optional> edges_{std::nullopt}; -}; // struct graph_mask_view_t - -/** - * An owning container object to manage separate bitmasks for - * filtering vertices and edges. A compliment setting - * determines whether the value of 1 for corresponding - * items means they should be masked in (included) or - * masked out (excluded). - * - * Creating this object does not allocate any memory on device. - * In order to start using and querying the masks, they will - * need to first be initialized. - * - * @tparam vertex_t - * @tparam edge_t - * @tparam mask_t - */ -template -struct graph_mask_t { - public: - using vertex_type = vertex_t; - using edge_type = edge_t; - using mask_type = mask_t; - using size_type = std::size_t; - - ~graph_mask_t() = default; - graph_mask_t(graph_mask_t&&) noexcept = default; - - graph_mask_t() = delete; - - explicit graph_mask_t(raft::handle_t const& handle, - vertex_t n_vertices, - edge_t n_edges, - bool complement = false) - : handle_(handle), - n_vertices_(n_vertices), - n_edges_(n_edges), - edges_(0, handle.get_stream()), - vertices_(0, handle.get_stream()), - complement_(complement) - { - } - - explicit graph_mask_t(graph_mask_t const& other) - : handle_(other.handle_), - n_vertices_(other.n_vertices_), - n_edges_(other.n_edges_), - edges_(other.edges_, other.handle_.get_stream()), - vertices_(other.vertices_, other.handle_.get_stream()), - complement_(other.complement_) - { - } - - graph_mask_t& operator=(graph_mask_t&&) noexcept = default; - graph_mask_t& operator=(graph_mask_t const& other) = default; - - /** - * Determines whether the 1 bit in a vertex or edge position - * represents an inclusive mask or exclusive mask. Default is - * an inclusive mask (e.g. 1 bit means the corresponding vertex - * or edge should be included in computations). - * @return - */ - bool is_complemented() const { return complement_; } - - /** - * Whether or not the current mask object has been initialized - * with an edge mask. - * @return - */ - bool has_edge_mask() const { return get_edge_mask().has_value(); } - - /** - * Whether or not the current mask object has been initialized - * with a vertex mask. - * @return - */ - bool has_vertex_mask() const { return get_vertex_mask().has_value(); } - - /** - * Returns the edge mask if it has been initialized on the instance - * @return - */ - std::optional get_edge_mask() const - { - return edges_.size() > 0 ? std::make_optional(edges_.data()) : std::nullopt; - } - - /** - * Retuns the vertex mask if it has been initialized on the instance - * @return - */ - std::optional get_vertex_mask() const - { - return vertices_.size() > 0 ? std::make_optional(vertices_.data()) - : std::nullopt; - } - - vertex_t get_n_vertices() { return n_vertices_; } - - edge_t get_n_edges() { return n_edges_; } - - edge_t get_edge_mask_size() const { return n_edges_ >> log_bits(); } - - vertex_t get_vertex_mask_size() const { return n_vertices_ >> log_bits(); } - - void initialize_edge_mask(bool init = 0) - { - if (!has_edge_mask()) { - allocate_edge_mask(); - RAFT_CUDA_TRY(cudaMemsetAsync(edges_.data(), - edges_.size() * sizeof(mask_t), - std::numeric_limits::max() * init, - handle_.get_stream())); - } - } - - void initialize_vertex_mask(bool init = 0) - { - if (!has_vertex_mask()) { - allocate_vertex_mask(); - RAFT_CUDA_TRY(cudaMemsetAsync(vertices_.data(), - vertices_.size() * sizeof(mask_t), - std::numeric_limits::max() * init, - handle_.get_stream())); - } - } - - /** - * Initializes an edge mask by allocating the device memory - */ - void allocate_edge_mask() - { - if (edges_.size() == 0) { - edges_.resize(get_edge_mask_size(), handle_.get_stream()); - clear_edge_mask(); - } - } - - /** - * Initializes a vertex mask by allocating the device memory - */ - void allocate_vertex_mask() - { - if (vertices_.size() == 0) { - vertices_.resize(get_vertex_mask_size(), handle_.get_stream()); - clear_vertex_mask(); - } - } - - /** - * Clears out all the masked bits of the edge mask - */ - void clear_edge_mask() - { - if (edges_.size() > 0) { - RAFT_CUDA_TRY( - cudaMemsetAsync(edges_.data(), edges_.size() * sizeof(mask_t), 0, handle_.get_stream())); - } - } - - /** - * Clears out all the masked bits of the vertex mask - */ - void clear_vertex_mask() - { - if (vertices_.size() > 0) { - RAFT_CUDA_TRY(cudaMemsetAsync( - vertices_.data(), vertices_.size() * sizeof(mask_t), 0, handle_.get_stream())); - } - } - - /** - * Returns a view of the current mask object which can safely be used - * in device functions. - * - * Note that the view will not be able to initialize the underlying - * masks so they will need to be initialized before this method is - * invoked. - */ - auto view() - { - auto vspan = has_vertex_mask() ? std::make_optional>(vertices_.data(), - vertices_.size()) - : std::nullopt; - auto espan = has_edge_mask() - ? std::make_optional>(edges_.data(), edges_.size()) - : std::nullopt; - return graph_mask_view_t( - n_vertices_, n_edges_, vspan, espan, complement_); - } - - protected: - raft::handle_t const& handle_; - vertex_t n_vertices_; - edge_t n_edges_; - bool complement_ = false; - rmm::device_uvector vertices_; - rmm::device_uvector edges_; - -}; // struct graph_mask_t -}; // namespace cugraph \ No newline at end of file diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index 2d10b435224..f30a8b7e2af 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -444,12 +444,6 @@ class graph_view_t local_edge_partition_view( size_t partition_idx) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - vertex_t major_range_first{}; vertex_t major_range_last{}; vertex_t minor_range_first{}; @@ -748,6 +740,11 @@ class graph_view_t> edge_mask_view() const + { + return edge_mask_view_; + } + private: std::vector edge_partition_offsets_{}; std::vector edge_partition_indices_{}; @@ -856,12 +853,6 @@ class graph_view_tnumber_of_edges(); - } - vertex_t local_edge_partition_src_range_size(size_t partition_idx = 0) const { assert(partition_idx == 0); @@ -1030,6 +1021,11 @@ class graph_view_t> edge_mask_view() const + { + return edge_mask_view_; + } + private: edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 501e74cf47b..3af8ed1dd19 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -15,18 +15,11 @@ */ #pragma once -#include -#include -#include +#include -#include -#include -#include -#include +#include #include -#include -#include namespace cugraph { @@ -44,12 +37,12 @@ struct pack_bool_t { __device__ uint32_t operator()(size_t i) const { - auto first = i * (sizeof(uint32_t) * 8); - auto last = std::min((i + 1) * (sizeof(uint32_t) * 8), num_bools); + auto first = i * packed_bools_per_word(); + auto last = std::min((i + 1) * packed_bools_per_word(), num_bools); uint32_t ret{0}; for (auto j = first; j < last; ++j) { if (*(bool_first + j)) { - auto mask = uint32_t{1} << (j % (sizeof(uint32_t) * 8)); + auto mask = packed_bool_mask(j); ret |= mask; } } @@ -57,6 +50,22 @@ struct pack_bool_t { } }; +template +struct check_bit_set_t { + PackedBoolIterator bitmap_first{}; + T idx_first{}; + + static_assert( + std::is_same_v::value_type, uint32_t>); + + __device__ bool operator()(T idx) const + { + auto offset = idx - idx_first; + return static_cast(*(bitmap_first + packed_bool_offset(offset)) & + packed_bool_mask(offset)); + } +}; + template struct indirection_t { Iterator first{}; @@ -80,7 +89,14 @@ struct indirection_if_idx_valid_t { }; template -struct not_equal_t { +struct is_equal_t { + T compare{}; + + __device__ bool operator()(T val) const { return val == compare; } +}; + +template +struct is_not_equal_t { T compare{}; __device__ bool operator()(T val) const { return val != compare; } @@ -96,19 +112,6 @@ struct is_first_in_run_t { } }; -template -struct check_bit_set_t { - uint32_t const* bitmaps{nullptr}; - T idx_first{}; - - __device__ bool operator()(T idx) const - { - auto offset = idx - idx_first; - auto mask = uint32_t{1} << (offset % (sizeof(uint32_t) * 8)); - return (*(bitmaps + (offset / (sizeof(uint32_t) * 8))) & mask) > uint32_t{0}; - } -}; - template struct check_in_range_t { T min{}; // inclusive diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh new file mode 100644 index 00000000000..ab1403d019b --- /dev/null +++ b/cpp/include/cugraph/utilities/mask_utils.cuh @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace cugraph { + +namespace detail { + +template // should be packed bool +__device__ size_t count_set_bits(MaskIterator mask_first, size_t start_offset, size_t num_bits) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + + size_t ret{0}; + + mask_first = mask_first + packed_bool_offset(start_offset); + start_offset = start_offset % packed_bools_per_word(); + if (start_offset != 0) { + auto mask = ~packed_bool_partial_mask(start_offset); + if (start_offset + num_bits < packed_bools_per_word()) { + mask &= packed_bool_partial_mask(start_offset + num_bits); + } + ret += __popc(*mask_first & mask); + num_bits -= __popc(mask); + ++mask_first; + } + + return thrust::transform_reduce( + thrust::seq, + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(packed_bool_size(num_bits)), + [mask_first, num_bits] __device__(size_t i) { + auto word = *(mask_first + i); + if ((i + 1) * packed_bools_per_word() > num_bits) { + word &= packed_bool_partial_mask(num_bits % packed_bools_per_word()); + } + return static_cast(__popc(word)); + }, + ret, + thrust::plus{}); +} + +template ::value_type, // for packed bool support + typename output_value_type = typename thrust::iterator_traits< + OutputIterator>::value_type> // for packed bool support +__device__ size_t copy_if_mask_set(InputIterator input_first, + MaskIterator mask_first, + OutputIterator output_first, + size_t input_start_offset, + size_t output_start_offset, + size_t num_items) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + static_assert( + std::is_same_v::value_type, input_value_type> || + cugraph::has_packed_bool_element()); + static_assert(std::is_same_v::value_type, + output_value_type> || + cugraph::has_packed_bool_element()); + + static_assert(!cugraph::has_packed_bool_element() && + !cugraph::has_packed_bool_element(), + "unimplemented."); + + return static_cast(thrust::distance( + output_first + output_start_offset, + thrust::copy_if(thrust::seq, + input_first + input_start_offset, + input_first + (input_start_offset + num_items), + thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + check_bit_set_t{mask_first, size_t{0}}) + + input_start_offset, + output_first + output_start_offset, + is_equal_t{true}))); +} + +template // should be packed bool +size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, size_t num_bits) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + + return thrust::transform_reduce( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(packed_bool_size(num_bits)), + [mask_first, num_bits] __device__(size_t i) { + auto word = *(mask_first + i); + if ((i + 1) * packed_bools_per_word() > num_bits) { + word &= packed_bool_partial_mask(num_bits % packed_bools_per_word()); + } + return static_cast(__popc(word)); + }, + size_t{0}, + thrust::plus{}); +} + +template +OutputIterator copy_if_mask_set(raft::handle_t const& handle, + InputIterator input_first, + InputIterator input_last, + MaskIterator mask_first, + OutputIterator output_first) +{ + return thrust::copy_if( + handle.get_thrust_policy(), + input_first, + input_last, + thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), + check_bit_set_t{mask_first, size_t{0}}), + output_first, + is_equal_t{true}); +} + +} // namespace detail + +} // namespace cugraph diff --git a/cpp/include/cugraph/utilities/packed_bool_utils.hpp b/cpp/include/cugraph/utilities/packed_bool_utils.hpp index 0be5711d90c..b418d5afc35 100644 --- a/cpp/include/cugraph/utilities/packed_bool_utils.hpp +++ b/cpp/include/cugraph/utilities/packed_bool_utils.hpp @@ -92,6 +92,13 @@ constexpr uint32_t packed_bool_mask(T bool_offset) constexpr uint32_t packed_bool_full_mask() { return uint32_t{0xffffffff}; } +template +constexpr uint32_t packed_bool_partial_mask(T num_set_bits) +{ + assert(static_cast(num_set_bits) <= sizeof(uint32_t) * 8); + return uint32_t{0xffffffff} >> (sizeof(uint32_t) * 8 - num_set_bits); +} + constexpr uint32_t packed_bool_empty_mask() { return uint32_t{0x0}; } template diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 2f30faebb3e..32247ca3466 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,8 @@ struct update_rx_major_local_degree_t { int minor_comm_size{}; edge_partition_device_view_t edge_partition{}; + thrust::optional> + edge_partition_e_mask{}; size_t reordered_idx_first{}; size_t local_edge_partition_idx{}; @@ -151,19 +154,28 @@ struct update_rx_major_local_degree_t { auto major = rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition]; - edge_t local_degree{}; + vertex_t major_idx{0}; + edge_t local_degree{0}; if (multi_gpu && (edge_partition.major_hypersparse_first() && (major >= *(edge_partition.major_hypersparse_first())))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); - local_degree = major_hypersparse_idx - ? edge_partition.local_degree((*(edge_partition.major_hypersparse_first()) - - edge_partition.major_range_first()) + - *major_hypersparse_idx) - : edge_t{0}; + if (major_hypersparse_idx) { + major_idx = + (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + + *major_hypersparse_idx; + local_degree = edge_partition.local_degree(major_idx); + } } else { - local_degree = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major)); + major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_degree = edge_partition.local_degree(major_idx); } + + if (edge_partition_e_mask && (local_degree > edge_t{0})) { + auto local_offset = edge_partition.local_offset(major_idx); + local_degree = static_cast( + count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree)); + } + local_degrees_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition] = local_degree; @@ -173,7 +185,7 @@ struct update_rx_major_local_degree_t { template struct update_rx_major_local_nbrs_t { int major_comm_size{}; @@ -181,6 +193,8 @@ struct update_rx_major_local_nbrs_t { edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; + thrust::optional> + edge_partition_e_mask{}; size_t reordered_idx_first{}; size_t local_edge_partition_idx{}; @@ -190,12 +204,13 @@ struct update_rx_major_local_nbrs_t { raft::device_span rx_majors{}; raft::device_span local_nbr_offsets_for_rx_majors{}; raft::device_span local_nbrs_for_rx_majors{}; - optional_property_buffer_view_t local_nbrs_properties_for_rx_majors{}; + optional_property_buffer_mutable_view_t local_e_property_values_for_rx_majors{}; __device__ void operator()(size_t idx) { using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; - auto it = thrust::upper_bound( + + auto it = thrust::upper_bound( thrust::seq, rx_reordered_group_lasts.begin(), rx_reordered_group_lasts.end(), idx); auto major_comm_rank = static_cast(thrust::distance(rx_reordered_group_lasts.begin(), it)); auto offset_in_local_edge_partition = @@ -204,39 +219,76 @@ struct update_rx_major_local_nbrs_t { auto major = rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition]; - vertex_t const* indices{nullptr}; - [[maybe_unused]] edge_t edge_offset{0}; + + edge_t edge_offset{0}; edge_t local_degree{0}; if (multi_gpu && (edge_partition.major_hypersparse_first() && (major >= *(edge_partition.major_hypersparse_first())))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); if (major_hypersparse_idx) { - thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges( + auto major_idx = (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + - *major_hypersparse_idx); + *major_hypersparse_idx; + edge_offset = edge_partition.local_offset(major_idx); + local_degree = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices, edge_offset, local_degree) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + edge_offset = edge_partition.local_offset(major_idx); + local_degree = edge_partition.local_degree(major_idx); } - // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree - // vertices in a single warp (better optimize if this becomes a performance - // bottleneck) - size_t start_offset = + auto indices = edge_partition.indices(); + size_t output_start_offset = local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition]; - thrust::copy(thrust::seq, - indices, - indices + local_degree, - local_nbrs_for_rx_majors.begin() + start_offset); - if constexpr (!std::is_same_v) { - thrust::copy(thrust::seq, - edge_partition_e_value_input.value_first() + edge_offset, - edge_partition_e_value_input.value_first() + (edge_offset + local_degree), - local_nbrs_properties_for_rx_majors.begin() + start_offset); + // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree + // vertices in a single warp (better optimize if this becomes a performance + // bottleneck) + + static_assert(!edge_partition_e_input_device_view_t::has_packed_bool_element, "unimplemented."); + if (local_degree > 0) { + if (edge_partition_e_mask) { + auto mask_first = (*edge_partition_e_mask).value_first(); + if constexpr (!std::is_same_v) { + auto input_first = + thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) + + edge_offset; + copy_if_mask_set(input_first, + mask_first, + thrust::make_zip_iterator(local_nbrs_for_rx_majors.begin(), + local_e_property_values_for_rx_majors), + edge_offset, + output_start_offset, + local_degree); + } else { + copy_if_mask_set(indices, + mask_first, + local_nbrs_for_rx_majors.begin(), + edge_offset, + output_start_offset, + local_degree); + } + } else { + if constexpr (!std::is_same_v) { + auto input_first = + thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) + + edge_offset; + thrust::copy(thrust::seq, + input_first, + input_first + local_degree, + thrust::make_zip_iterator(local_nbrs_for_rx_majors.begin(), + local_e_property_values_for_rx_majors) + + output_start_offset); + } else { + thrust::copy(thrust::seq, + indices + edge_offset, + indices + (edge_offset + local_degree), + local_nbrs_for_rx_majors.begin() + output_start_offset); + } + } } } }; @@ -259,36 +311,45 @@ template struct pick_min_degree_t { FirstElementToIdxMap first_element_to_idx_map{}; - raft::device_span first_element_offsets{nullptr}; + raft::device_span first_element_offsets{nullptr}; SecondElementToIdxMap second_element_to_idx_map{}; - raft::device_span second_element_offsets{nullptr}; + raft::device_span second_element_offsets{nullptr}; edge_partition_device_view_t edge_partition{}; + thrust::optional> + edge_partition_e_mask{}; __device__ edge_t operator()(thrust::tuple pair) const { edge_t local_degree0{0}; vertex_t major0 = thrust::get<0>(pair); if constexpr (std::is_same_v) { + vertex_t major_idx{0}; if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && (major0 >= *(edge_partition.major_hypersparse_first()))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major0); - local_degree0 = - major_hypersparse_idx - ? edge_partition.local_degree((*(edge_partition.major_hypersparse_first()) - - edge_partition.major_range_first()) + - *major_hypersparse_idx) - : edge_t{0}; + if (major_hypersparse_idx) { + major_idx = + (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + + *major_hypersparse_idx; + local_degree0 = edge_partition.local_degree(major_idx); + } } else { - local_degree0 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major0)); + major_idx = edge_partition.major_offset_from_major_nocheck(major0); + local_degree0 = edge_partition.local_degree(major_idx); } } else { + major_idx = edge_partition.major_offset_from_major_nocheck(major0); + local_degree0 = edge_partition.local_degree(major_idx); + } + + if (edge_partition_e_mask && (local_degree0 > edge_t{0})) { + auto local_offset = edge_partition.local_offset(major_idx); local_degree0 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major0)); + count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree0); } } else { auto idx = first_element_to_idx_map.find(major0); @@ -299,24 +360,31 @@ struct pick_min_degree_t { edge_t local_degree1{0}; vertex_t major1 = thrust::get<1>(pair); if constexpr (std::is_same_v) { + vertex_t major_idx{0}; if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && (major1 >= *(edge_partition.major_hypersparse_first()))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major1); - local_degree1 = - major_hypersparse_idx - ? edge_partition.local_degree((*(edge_partition.major_hypersparse_first()) - - edge_partition.major_range_first()) + - *major_hypersparse_idx) - : edge_t{0}; + if (major_hypersparse_idx) { + major_idx = + (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + + *major_hypersparse_idx; + local_degree1 = edge_partition.local_degree(major_idx); + } } else { - local_degree1 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major1)); + major_idx = edge_partition.major_offset_from_major_nocheck(major1); + local_degree1 = edge_partition.local_degree(major_idx); } } else { + major_idx = edge_partition.major_offset_from_major_nocheck(major1); + local_degree1 = edge_partition.local_degree(major_idx); + } + + if (edge_partition_e_mask && (local_degree1 > edge_t{0})) { + auto local_offset = edge_partition.local_offset(major_idx); local_degree1 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major1)); + count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree1); } } else { auto idx = second_element_to_idx_map.find(major1); @@ -328,6 +396,71 @@ struct pick_min_degree_t { } }; +template +__device__ edge_t set_intersection_by_key_with_mask(InputKeyIterator0 input_key_first0, + InputKeyIterator1 input_key_first1, + InputValueIterator0 input_value_first0, + InputValueIterator1 input_value_first1, + MaskIterator mask_first, + OutputKeyIterator output_key_first, + OutputValueIterator0 output_value_first0, + OutputValueIterator1 output_value_first1, + edge_t input_start_offset0, + edge_t input_size0, + bool apply_mask0, + edge_t input_start_offset1, + edge_t input_size1, + bool apply_mask1, + size_t output_start_offset) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + static_assert(std::is_same_v == + std::is_same_v); + + check_bit_set_t check_bit_set{mask_first, edge_t{0}}; + + auto idx0 = input_start_offset0; + auto idx1 = input_start_offset1; + auto output_idx = output_start_offset; + while ((idx0 < (input_start_offset0 + input_size0)) && + (idx1 < (input_start_offset1 + input_size1))) { + bool valid0 = apply_mask0 ? check_bit_set(idx0) : true; + bool valid1 = apply_mask1 ? check_bit_set(idx1) : true; + if (!valid0) { ++idx0; } + if (!valid1) { ++idx1; } + + if (valid0 && valid1) { + auto key0 = *(input_key_first0 + idx0); + auto key1 = *(input_key_first1 + idx1); + if (key0 < key1) { + ++idx0; + } else if (key0 > key1) { + ++idx1; + } else { + *(output_key_first + output_idx) = key0; + if constexpr (!std::is_same_v) { + *(output_value_first0 + output_idx) = *(input_value_first0 + idx0); + *(output_value_first1 + output_idx) = *(input_value_first1 + idx1); + } + ++idx0; + ++idx1; + ++output_idx; + } + } + } + + return (output_idx - output_start_offset); +} + template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; - raft::device_span first_element_offsets{}; + raft::device_span first_element_offsets{}; raft::device_span first_element_indices{nullptr}; - optional_property_buffer_view_t first_element_properties{}; + optional_property_buffer_view_t first_element_edge_property_values{}; SecondElementToIdxMap second_element_to_idx_map{}; - raft::device_span second_element_offsets{}; + raft::device_span second_element_offsets{}; raft::device_span second_element_indices{nullptr}; - optional_property_buffer_view_t second_element_properties{}; + optional_property_buffer_view_t second_element_edge_property_values{}; edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; + thrust::optional> + edge_partition_e_mask{}; VertexPairIterator vertex_pair_first; raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; - optional_property_buffer_view_t nbr_intersection_properties0{}; - optional_property_buffer_view_t nbr_intersection_properties1{}; + optional_property_buffer_mutable_view_t nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t nbr_intersection_e_property_values1{}; vertex_t invalid_id{}; + __device__ edge_t operator()(size_t i) { using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; - using optional_const_property_buffer_view_t = - std::conditional_t, - raft::device_span, - std::byte /* dummy */>; auto pair = *(vertex_pair_first + i); + vertex_t const* indices0{nullptr}; - optional_const_property_buffer_view_t properties0{}; + std::conditional_t, + edge_property_value_t const*, + void*> + edge_property_values0{nullptr}; edge_t local_edge_offset0{0}; edge_t local_degree0{0}; if constexpr (std::is_same_v) { + indices0 = edge_partition.indices(); + if constexpr (!std::is_same_v) { + edge_property_values0 = edge_partition_e_value_input.value_first(); + } + vertex_t major = thrust::get<0>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -379,42 +521,47 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); if (major_hypersparse_idx) { - thrust::tie(indices0, local_edge_offset0, local_degree0) = edge_partition.local_edges( + auto major_idx = (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + - *major_hypersparse_idx); + *major_hypersparse_idx; + local_edge_offset0 = edge_partition.local_offset(major_idx); + local_degree0 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices0, local_edge_offset0, local_degree0) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset0 = edge_partition.local_offset(major_idx); + local_degree0 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices0, local_edge_offset0, local_degree0) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset0 = edge_partition.local_offset(major_idx); + local_degree0 = edge_partition.local_degree(major_idx); } - + } else { + indices0 = first_element_indices.begin(); if constexpr (!std::is_same_v) { - properties0 = raft::device_span( - edge_partition_e_value_input.value_first() + local_edge_offset0, local_degree0); + edge_property_values0 = first_element_edge_property_values; } - } else { auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); local_edge_offset0 = first_element_offsets[idx]; local_degree0 = static_cast(first_element_offsets[idx + 1] - local_edge_offset0); - indices0 = first_element_indices.begin() + local_edge_offset0; - - if constexpr (!std::is_same_v) { - properties0 = raft::device_span( - first_element_properties.begin() + local_edge_offset0, local_degree0); - } } vertex_t const* indices1{nullptr}; - optional_const_property_buffer_view_t properties1{}; + std::conditional_t, + edge_property_value_t const*, + void*> + edge_property_values1{nullptr}; - [[maybe_unused]] edge_t local_edge_offset1{0}; + edge_t local_edge_offset1{0}; edge_t local_degree1{0}; if constexpr (std::is_same_v) { + indices1 = edge_partition.indices(); + if constexpr (!std::is_same_v) { + edge_property_values1 = edge_partition_e_value_input.value_first(); + } + vertex_t major = thrust::get<1>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -422,83 +569,63 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); if (major_hypersparse_idx) { - thrust::tie(indices1, local_edge_offset1, local_degree1) = edge_partition.local_edges( + auto major_idx = (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + - *major_hypersparse_idx); + *major_hypersparse_idx; + local_edge_offset1 = edge_partition.local_offset(major_idx); + local_degree1 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices1, local_edge_offset1, local_degree1) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset1 = edge_partition.local_offset(major_idx); + local_degree1 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices1, local_edge_offset1, local_degree1) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset1 = edge_partition.local_offset(major_idx); + local_degree1 = edge_partition.local_degree(major_idx); } - + } else { + indices1 = second_element_indices.begin(); if constexpr (!std::is_same_v) { - properties1 = raft::device_span( - edge_partition_e_value_input.value_first() + local_edge_offset1, local_degree1); + edge_property_values1 = second_element_edge_property_values; } - } else { auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); local_edge_offset1 = second_element_offsets[idx]; local_degree1 = static_cast(second_element_offsets[idx + 1] - local_edge_offset1); - indices1 = second_element_indices.begin() + local_edge_offset1; - - if constexpr (!std::is_same_v) { - properties1 = raft::device_span( - second_element_properties.begin() + local_edge_offset1, local_degree1); - } } // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance // bottleneck) - auto nbr_intersection_first = nbr_intersection_indices.begin() + nbr_intersection_offsets[i]; - - auto nbr_intersection_last = thrust::set_intersection(thrust::seq, - indices0, - indices0 + local_degree0, - indices1, - indices1 + local_degree1, - nbr_intersection_first); - thrust::fill(thrust::seq, - nbr_intersection_last, - nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], - invalid_id); - - auto insection_size = - static_cast(thrust::distance(nbr_intersection_first, nbr_intersection_last)); - if constexpr (!std::is_same_v) { - auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; - - // copy edge properties from first vertex to common neighbors - thrust::transform(thrust::seq, - nbr_intersection_first, - nbr_intersection_last, - ip0_start, - [indices0, local_degree0, properties0] __device__(auto v) { - auto position = - thrust::lower_bound(thrust::seq, indices0, indices0 + local_degree0, v); - return properties0[thrust::distance(indices0, position)]; - }); - - auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i]; - - // copy edge properties from second vertex to common neighbors - thrust::transform(thrust::seq, - nbr_intersection_first, - nbr_intersection_last, - ip1_start, - [indices1, local_degree1, properties1] __device__(auto v) { - auto position = - thrust::lower_bound(thrust::seq, indices1, indices1 + local_degree1, v); - return properties1[thrust::distance(indices1, position)]; - }); - } - return insection_size; + auto mask_first = edge_partition_e_mask ? (*edge_partition_e_mask).value_first() + : static_cast(nullptr); + auto intersection_size = set_intersection_by_key_with_mask( + indices0, + indices1, + edge_property_values0, + edge_property_values1, + mask_first, + nbr_intersection_indices.begin(), + nbr_intersection_e_property_values0, + nbr_intersection_e_property_values1, + local_edge_offset0, + local_degree0, + (std::is_same_v && edge_partition_e_mask), + local_edge_offset1, + local_degree1, + (std::is_same_v && edge_partition_e_mask), + nbr_intersection_offsets[i]); + + thrust::fill( + thrust::seq, + nbr_intersection_indices.begin() + (nbr_intersection_offsets[i] + intersection_size), + nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], + invalid_id); + + return intersection_size; } }; @@ -520,7 +647,8 @@ struct strided_accumulate_t { template + typename optional_property_buffer_view_t, + typename optional_property_buffer_mutable_view_t> struct gatherv_indices_t { size_t output_size{}; int minor_comm_size{}; @@ -530,10 +658,10 @@ struct gatherv_indices_t { raft::device_span combined_nbr_intersection_offsets{}; raft::device_span combined_nbr_intersection_indices{}; - optional_property_buffer_view_t gathered_nbr_intersection_properties0{}; - optional_property_buffer_view_t gathered_nbr_intersection_properties1{}; - optional_property_buffer_view_t combined_nbr_intersection_properties0{}; - optional_property_buffer_view_t combined_nbr_intersection_properties1{}; + optional_property_buffer_view_t gathered_nbr_intersection_e_property_values0{}; + optional_property_buffer_view_t gathered_nbr_intersection_e_property_values1{}; + optional_property_buffer_mutable_view_t combined_nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t combined_nbr_intersection_e_property_values1{}; __device__ void operator()(size_t i) const { @@ -546,13 +674,13 @@ struct gatherv_indices_t { if constexpr (!std::is_same_v) { auto zipped_gathered_begin = thrust::make_zip_iterator( thrust::make_tuple(gathered_intersection_indices.begin(), - gathered_nbr_intersection_properties0.begin(), - gathered_nbr_intersection_properties1.begin())); + gathered_nbr_intersection_e_property_values0, + gathered_nbr_intersection_e_property_values1)); auto zipped_combined_begin = thrust::make_zip_iterator( thrust::make_tuple(combined_nbr_intersection_indices.begin(), - combined_nbr_intersection_properties0.begin(), - combined_nbr_intersection_properties1.begin())); + combined_nbr_intersection_e_property_values0, + combined_nbr_intersection_e_property_values1)); thrust::copy(thrust::seq, zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i], @@ -694,13 +822,12 @@ nbr_intersection(raft::handle_t const& handle, using optional_property_buffer_view_t = std::conditional_t, - raft::device_span, - std::byte /* dummy */>; - - using optional_nbr_intersected_edge_partitions_t = + edge_property_value_t const*, + void*>; + using optional_property_buffer_mutable_view_t = std::conditional_t, - std::vector>, - std::byte /* dummy */>; + edge_property_value_t*, + void*>; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -729,19 +856,20 @@ nbr_intersection(raft::handle_t const& handle, "Invalid input arguments: there are invalid input vertex pairs."); } - // 2. Collect neighbor lists for unique second pair elements (for the neighbors within the minor - // range for this GPU); Note that no need to collect for first pair elements as they already - // locally reside. + // 2. Collect neighbor lists (within the minor range for this GPU in multi-GPU) for unique second + // pair elements (all-gathered over minor_comm in multi-GPU); Note that no need to collect for + // first pair elements as they already locally reside. + + auto edge_mask_view = graph_view.edge_mask_view(); std::optional>> major_to_idx_map_ptr{ std::nullopt}; - std::optional> major_nbr_offsets{std::nullopt}; + std::optional> major_nbr_offsets{std::nullopt}; std::optional> major_nbr_indices{std::nullopt}; - [[maybe_unused]] auto major_nbr_properties = + [[maybe_unused]] auto major_e_property_values = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - optional_property_buffer_view_t optional_major_nbr_properties{}; if constexpr (GraphViewType::is_multi_gpu) { if (intersect_minor_nbr[1]) { @@ -805,7 +933,7 @@ nbr_intersection(raft::handle_t const& handle, } } - // 2.2 Send majors and group (major_comm_rank, edge_partition_idx) counts + // 2.2 Send majors and group (major_comm_rank, local edge_partition_idx) counts rmm::device_uvector rx_majors(0, handle.get_stream()); std::vector rx_major_counts{}; @@ -859,7 +987,7 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); rmm::device_uvector local_nbrs_for_rx_majors(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto local_nbrs_properties_for_rx_majors = + [[maybe_unused]] auto local_e_property_values_for_rx_majors = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); @@ -901,6 +1029,13 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail:: + edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -913,6 +1048,7 @@ nbr_intersection(raft::handle_t const& handle, major_comm_size, minor_comm_size, edge_partition, + edge_partition_e_mask, reordered_idx_first, i, raft::device_span( @@ -936,22 +1072,28 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_view_t optional_local_nbrs_properties{}; + optional_property_buffer_mutable_view_t optional_local_e_property_values{}; if constexpr (!std::is_same_v) { - local_nbrs_properties_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), - handle.get_stream()); - optional_local_nbrs_properties = raft::device_span( - local_nbrs_properties_for_rx_majors.data(), local_nbrs_properties_for_rx_majors.size()); + local_e_property_values_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), + handle.get_stream()); + optional_local_e_property_values = local_e_property_values_for_rx_majors.data(); } for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail:: + edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -964,12 +1106,13 @@ nbr_intersection(raft::handle_t const& handle, update_rx_major_local_nbrs_t{ major_comm_size, minor_comm_size, edge_partition, edge_partition_e_value_input, + edge_partition_e_mask, reordered_idx_first, i, raft::device_span( @@ -980,7 +1123,7 @@ nbr_intersection(raft::handle_t const& handle, local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), local_nbrs_for_rx_majors.size()), - optional_local_nbrs_properties}); + optional_local_e_property_values}); } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); @@ -1012,7 +1155,7 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_unique_majors(size_t{0}, handle.get_stream()); std::tie(local_degrees_for_unique_majors, std::ignore) = shuffle_values( major_comm, local_degrees_for_rx_majors.begin(), rx_major_counts, handle.get_stream()); - major_nbr_offsets = rmm::device_uvector(local_degrees_for_unique_majors.size() + 1, + major_nbr_offsets = rmm::device_uvector(local_degrees_for_unique_majors.size() + 1, handle.get_stream()); (*major_nbr_offsets).set_element_to_zero_async(size_t{0}, handle.get_stream()); auto degree_first = thrust::make_transform_iterator(local_degrees_for_unique_majors.begin(), @@ -1027,14 +1170,11 @@ nbr_intersection(raft::handle_t const& handle, major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); if constexpr (!std::is_same_v) { - std::tie(major_nbr_properties, std::ignore) = + std::tie(major_e_property_values, std::ignore) = shuffle_values(major_comm, - local_nbrs_properties_for_rx_majors.begin(), + local_e_property_values_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); - - optional_major_nbr_properties = raft::device_span( - major_nbr_properties.data(), major_nbr_properties.size()); } major_to_idx_map_ptr = std::make_unique>( @@ -1065,11 +1205,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector nbr_intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector nbr_intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto nbr_intersection_properties0 = + [[maybe_unused]] auto nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - [[maybe_unused]] auto nbr_intersection_properties1 = + [[maybe_unused]] auto nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); @@ -1116,15 +1256,19 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); - [[maybe_unused]] optional_nbr_intersected_edge_partitions_t - edge_partition_nbr_intersection_property0{}; - [[maybe_unused]] optional_nbr_intersected_edge_partitions_t - edge_partition_nbr_intersection_property1{}; + [[maybe_unused]] std::conditional_t, + std::vector>, + std::byte /* dummy */> + edge_partition_nbr_intersection_e_property_values0{}; + [[maybe_unused]] std::conditional_t, + std::vector>, + std::byte /* dummy */> + edge_partition_nbr_intersection_e_property_values1{}; if constexpr (!std::is_same_v) { - edge_partition_nbr_intersection_property0.reserve( + edge_partition_nbr_intersection_e_property_values0.reserve( graph_view.number_of_local_edge_partitions()); - edge_partition_nbr_intersection_property1.reserve( + edge_partition_nbr_intersection_e_property_values1.reserve( graph_view.number_of_local_edge_partitions()); } @@ -1144,11 +1288,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector rx_v_pair_nbr_intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties0 = + [[maybe_unused]] auto rx_v_pair_nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties1 = + [[maybe_unused]] auto rx_v_pair_nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); @@ -1174,9 +1318,15 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); rx_v_pair_nbr_intersection_sizes.resize( @@ -1193,11 +1343,12 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_sizes.begin(), pick_min_degree_t{ nullptr, - raft::device_span(), + raft::device_span(), second_element_to_idx_map, - raft::device_span((*major_nbr_offsets).data(), + raft::device_span((*major_nbr_offsets).data(), (*major_nbr_offsets).size()), - edge_partition}); + edge_partition, + edge_partition_e_mask}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -1215,25 +1366,30 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties0{}; - optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties1{}; + optional_property_buffer_mutable_view_t + rx_v_pair_optional_nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t + rx_v_pair_optional_nbr_intersection_e_property_values1{}; if constexpr (!std::is_same_v) { - rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); - rx_v_pair_optional_nbr_intersection_properties0 = - raft::device_span(rx_v_pair_nbr_intersection_properties0.data(), - rx_v_pair_nbr_intersection_properties0.size()); + rx_v_pair_optional_nbr_intersection_e_property_values0 = + rx_v_pair_nbr_intersection_e_property_values0.data(); - rx_v_pair_optional_nbr_intersection_properties1 = - raft::device_span(rx_v_pair_nbr_intersection_properties1.data(), - rx_v_pair_nbr_intersection_properties1.size()); + rx_v_pair_optional_nbr_intersection_e_property_values1 = + rx_v_pair_nbr_intersection_e_property_values1.data(); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { + optional_property_buffer_view_t optional_major_e_property_values{}; + if constexpr (!std::is_same_v) { + optional_major_e_property_values = major_e_property_values.data(); + } + auto second_element_to_idx_map = detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view()); thrust::tabulate( @@ -1248,28 +1404,29 @@ nbr_intersection(raft::handle_t const& handle, edge_t, edge_partition_e_input_device_view_t, optional_property_buffer_view_t, + optional_property_buffer_mutable_view_t, true>{nullptr, - raft::device_span(), + raft::device_span(), raft::device_span(), optional_property_buffer_view_t{}, second_element_to_idx_map, - raft::device_span((*major_nbr_offsets).data(), + raft::device_span((*major_nbr_offsets).data(), (*major_nbr_offsets).size()), raft::device_span((*major_nbr_indices).data(), (*major_nbr_indices).size()), - optional_major_nbr_properties, + optional_major_e_property_values, edge_partition, edge_partition_e_value_input, + edge_partition_e_mask, get_dataframe_buffer_begin(vertex_pair_buffer), raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), rx_v_pair_nbr_intersection_offsets.size()), raft::device_span(rx_v_pair_nbr_intersection_indices.data(), rx_v_pair_nbr_intersection_indices.size()), - rx_v_pair_optional_nbr_intersection_properties0, - rx_v_pair_optional_nbr_intersection_properties1, + rx_v_pair_optional_nbr_intersection_e_property_values0, + rx_v_pair_optional_nbr_intersection_e_property_values1, invalid_vertex_id::value}); - } else { CUGRAPH_FAIL("unimplemented."); } @@ -1284,31 +1441,31 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); } else { - auto common_nbr_and_properties_begin = thrust::make_zip_iterator( + auto common_nbr_and_e_property_values_begin = thrust::make_zip_iterator( thrust::make_tuple(rx_v_pair_nbr_intersection_indices.begin(), - rx_v_pair_nbr_intersection_properties0.begin(), - rx_v_pair_nbr_intersection_properties1.begin())); + rx_v_pair_nbr_intersection_e_property_values0.begin(), + rx_v_pair_nbr_intersection_e_property_values1.begin())); auto last = thrust::remove_if( handle.get_thrust_policy(), - common_nbr_and_properties_begin, - common_nbr_and_properties_begin + rx_v_pair_nbr_intersection_indices.size(), + common_nbr_and_e_property_values_begin, + common_nbr_and_e_property_values_begin + rx_v_pair_nbr_intersection_indices.size(), [] __device__(auto nbr_p0_p1) { return thrust::get<0>(nbr_p0_p1) == invalid_vertex_id::value; }); rx_v_pair_nbr_intersection_indices.resize( - thrust::distance(common_nbr_and_properties_begin, last), handle.get_stream()); + thrust::distance(common_nbr_and_e_property_values_begin, last), handle.get_stream()); rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.shrink_to_fit(handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.shrink_to_fit(handle.get_stream()); } thrust::inclusive_scan(handle.get_thrust_policy(), @@ -1427,11 +1584,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector combined_nbr_intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto combined_nbr_intersection_properties0 = + [[maybe_unused]] auto combined_nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( size_t{0}, handle.get_stream()); - [[maybe_unused]] auto combined_nbr_intersection_properties1 = + [[maybe_unused]] auto combined_nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( size_t{0}, handle.get_stream()); @@ -1470,47 +1627,47 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_indices.resize(gathered_nbr_intersection_indices.size(), handle.get_stream()); - [[maybe_unused]] auto gathered_nbr_intersection_properties0 = + [[maybe_unused]] auto gathered_nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream()); - [[maybe_unused]] auto gathered_nbr_intersection_properties1 = + [[maybe_unused]] auto gathered_nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream()); if constexpr (!std::is_same_v) { device_multicast_sendrecv(minor_comm, - rx_v_pair_nbr_intersection_properties0.begin(), + rx_v_pair_nbr_intersection_e_property_values0.begin(), rx_v_pair_nbr_intersection_index_tx_counts, tx_displacements, ranks, - gathered_nbr_intersection_properties0.begin(), + gathered_nbr_intersection_e_property_values0.begin(), gathered_nbr_intersection_index_rx_counts, rx_displacements, ranks, handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.resize(size_t{0}, handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.shrink_to_fit(handle.get_stream()); - combined_nbr_intersection_properties0.resize(gathered_nbr_intersection_properties0.size(), - handle.get_stream()); + combined_nbr_intersection_e_property_values0.resize( + gathered_nbr_intersection_e_property_values0.size(), handle.get_stream()); device_multicast_sendrecv(minor_comm, - rx_v_pair_nbr_intersection_properties1.begin(), + rx_v_pair_nbr_intersection_e_property_values1.begin(), rx_v_pair_nbr_intersection_index_tx_counts, tx_displacements, ranks, - gathered_nbr_intersection_properties1.begin(), + gathered_nbr_intersection_e_property_values1.begin(), gathered_nbr_intersection_index_rx_counts, rx_displacements, ranks, handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.resize(size_t{0}, handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); - combined_nbr_intersection_properties1.resize(gathered_nbr_intersection_properties1.size(), - handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.shrink_to_fit(handle.get_stream()); + combined_nbr_intersection_e_property_values1.resize( + gathered_nbr_intersection_e_property_values1.size(), handle.get_stream()); } if constexpr (!std::is_same_v) { @@ -1518,7 +1675,10 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1529,25 +1689,19 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_offsets.size()), raft::device_span(combined_nbr_intersection_indices.data(), combined_nbr_intersection_indices.size()), - raft::device_span( - gathered_nbr_intersection_properties0.data(), - gathered_nbr_intersection_properties0.size()), - raft::device_span( - gathered_nbr_intersection_properties1.data(), - gathered_nbr_intersection_properties1.size()), - raft::device_span( - combined_nbr_intersection_properties0.data(), - combined_nbr_intersection_properties0.size()), - raft::device_span( - combined_nbr_intersection_properties1.data(), - combined_nbr_intersection_properties1.size())}); - + gathered_nbr_intersection_e_property_values0.data(), + gathered_nbr_intersection_e_property_values1.data(), + combined_nbr_intersection_e_property_values0.data(), + combined_nbr_intersection_e_property_values1.data()}); } else { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1567,10 +1721,10 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_indices.push_back( std::move(combined_nbr_intersection_indices)); if constexpr (!std::is_same_v) { - edge_partition_nbr_intersection_property0.push_back( - std::move(combined_nbr_intersection_properties0)); - edge_partition_nbr_intersection_property1.push_back( - std::move(combined_nbr_intersection_properties1)); + edge_partition_nbr_intersection_e_property_values0.push_back( + std::move(combined_nbr_intersection_e_property_values0)); + edge_partition_nbr_intersection_e_property_values1.push_back( + std::move(combined_nbr_intersection_e_property_values1)); } } @@ -1581,8 +1735,10 @@ nbr_intersection(raft::handle_t const& handle, } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); if constexpr (!std::is_same_v) { - nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), + handle.get_stream()); + nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), + handle.get_stream()); } size_t size_offset{0}; size_t index_offset{0}; @@ -1599,14 +1755,14 @@ nbr_intersection(raft::handle_t const& handle, if constexpr (!std::is_same_v) { thrust::copy(handle.get_thrust_policy(), - edge_partition_nbr_intersection_property0[i].begin(), - edge_partition_nbr_intersection_property0[i].end(), - nbr_intersection_properties0.begin() + index_offset); + edge_partition_nbr_intersection_e_property_values0[i].begin(), + edge_partition_nbr_intersection_e_property_values0[i].end(), + nbr_intersection_e_property_values0.begin() + index_offset); thrust::copy(handle.get_thrust_policy(), - edge_partition_nbr_intersection_property1[i].begin(), - edge_partition_nbr_intersection_property1[i].end(), - nbr_intersection_properties1.begin() + index_offset); + edge_partition_nbr_intersection_e_property_values1[i].begin(), + edge_partition_nbr_intersection_e_property_values1[i].end(), + nbr_intersection_e_property_values1.begin() + index_offset); } index_offset += edge_partition_nbr_intersection_indices[i].size(); @@ -1619,13 +1775,18 @@ nbr_intersection(raft::handle_t const& handle, size_first, size_first + nbr_intersection_sizes.size(), nbr_intersection_offsets.begin() + 1); - } else { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(size_t{0})); - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, 0) + : thrust::nullopt; + rmm::device_uvector nbr_intersection_sizes( input_size, handle.get_stream()); // initially store minimum degrees (upper bound for intersection sizes) @@ -1636,10 +1797,11 @@ nbr_intersection(raft::handle_t const& handle, vertex_pair_first + input_size, nbr_intersection_sizes.begin(), pick_min_degree_t{nullptr, - raft::device_span(), + raft::device_span(), nullptr, - raft::device_span(), - edge_partition}); + raft::device_span(), + edge_partition, + edge_partition_e_mask}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -1656,51 +1818,51 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_view_t optional_nbr_intersection_properties0{}; - optional_property_buffer_view_t optional_nbr_intersection_properties1{}; + optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values1{}; if constexpr (!std::is_same_v) { - nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); - - optional_nbr_intersection_properties0 = raft::device_span( - nbr_intersection_properties0.data(), nbr_intersection_properties0.size()); + nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), + handle.get_stream()); + nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), + handle.get_stream()); - optional_nbr_intersection_properties1 = raft::device_span( - nbr_intersection_properties1.data(), nbr_intersection_properties1.size()); + optional_nbr_intersection_e_property_values0 = nbr_intersection_e_property_values0.data(); + optional_nbr_intersection_e_property_values1 = nbr_intersection_e_property_values1.data(); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { - thrust::tabulate( - handle.get_thrust_policy(), - nbr_intersection_sizes.begin(), - nbr_intersection_sizes.end(), - copy_intersecting_nbrs_and_update_intersection_size_t{ - nullptr, - raft::device_span(), - raft::device_span(), - optional_property_buffer_view_t{}, - nullptr, - raft::device_span(), - raft::device_span(), - optional_property_buffer_view_t{}, - edge_partition, - edge_partition_e_value_input, - vertex_pair_first, - raft::device_span(nbr_intersection_offsets.data(), - nbr_intersection_offsets.size()), - raft::device_span(nbr_intersection_indices.data(), - nbr_intersection_indices.size()), - optional_nbr_intersection_properties0, - optional_nbr_intersection_properties1, - invalid_vertex_id::value}); + thrust::tabulate(handle.get_thrust_policy(), + nbr_intersection_sizes.begin(), + nbr_intersection_sizes.end(), + copy_intersecting_nbrs_and_update_intersection_size_t< + void*, + void*, + decltype(vertex_pair_first), + vertex_t, + edge_t, + edge_partition_e_input_device_view_t, + optional_property_buffer_view_t, + optional_property_buffer_mutable_view_t, + false>{nullptr, + raft::device_span(), + raft::device_span(), + optional_property_buffer_view_t{}, + nullptr, + raft::device_span(), + raft::device_span(), + optional_property_buffer_view_t{}, + edge_partition, + edge_partition_e_value_input, + edge_partition_e_mask, + vertex_pair_first, + raft::device_span(nbr_intersection_offsets.data(), + nbr_intersection_offsets.size()), + raft::device_span(nbr_intersection_indices.data(), + nbr_intersection_indices.size()), + optional_nbr_intersection_e_property_values0, + optional_nbr_intersection_e_property_values1, + invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -1711,14 +1873,14 @@ nbr_intersection(raft::handle_t const& handle, thrust::count_if(handle.get_thrust_policy(), nbr_intersection_indices.begin(), nbr_intersection_indices.end(), - detail::not_equal_t{invalid_vertex_id::value}), + detail::is_not_equal_t{invalid_vertex_id::value}), handle.get_stream()); - [[maybe_unused]] auto tmp_properties0 = + [[maybe_unused]] auto tmp_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( tmp_indices.size(), handle.get_stream()); - [[maybe_unused]] auto tmp_properties1 = + [[maybe_unused]] auto tmp_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( tmp_indices.size(), handle.get_stream()); @@ -1737,39 +1899,38 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.begin() + num_scanned + this_scan_size, tmp_indices.begin() + num_copied, - detail::not_equal_t{invalid_vertex_id::value}))); + detail::is_not_equal_t{invalid_vertex_id::value}))); } else { - auto zipped_itr_to_indices_and_properties_begin = - thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(), - nbr_intersection_properties0.begin(), - nbr_intersection_properties1.begin())); + auto zipped_itr_to_indices_and_e_property_values_begin = thrust::make_zip_iterator( + thrust::make_tuple(nbr_intersection_indices.begin(), + nbr_intersection_e_property_values0.begin(), + nbr_intersection_e_property_values1.begin())); auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple( - tmp_indices.begin(), tmp_properties0.begin(), tmp_properties1.begin())); + tmp_indices.begin(), tmp_property_values0.begin(), tmp_property_values1.begin())); num_copied += static_cast(thrust::distance( zipped_itr_to_tmps_begin + num_copied, - thrust::copy_if(handle.get_thrust_policy(), - zipped_itr_to_indices_and_properties_begin + num_scanned, - zipped_itr_to_indices_and_properties_begin + num_scanned + this_scan_size, - zipped_itr_to_tmps_begin + num_copied, - [] __device__(auto nbr_p0_p1) { - auto nbr = thrust::get<0>(nbr_p0_p1); - auto p0 = thrust::get<1>(nbr_p0_p1); - auto p1 = thrust::get<2>(nbr_p0_p1); - return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id::value; - }))); + thrust::copy_if( + handle.get_thrust_policy(), + zipped_itr_to_indices_and_e_property_values_begin + num_scanned, + zipped_itr_to_indices_and_e_property_values_begin + num_scanned + this_scan_size, + zipped_itr_to_tmps_begin + num_copied, + [] __device__(auto nbr_p0_p1) { + auto nbr = thrust::get<0>(nbr_p0_p1); + auto p0 = thrust::get<1>(nbr_p0_p1); + auto p1 = thrust::get<2>(nbr_p0_p1); + return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id::value; + }))); } num_scanned += this_scan_size; } nbr_intersection_indices = std::move(tmp_indices); if constexpr (!std::is_same_v) { - nbr_intersection_properties0 = std::move(tmp_properties0); - nbr_intersection_properties1 = std::move(tmp_properties1); + nbr_intersection_e_property_values0 = std::move(tmp_property_values0); + nbr_intersection_e_property_values1 = std::move(tmp_property_values1); } - #else - if constexpr (std::is_same_v) { nbr_intersection_indices.resize( thrust::distance(nbr_intersection_indices.begin(), @@ -1780,10 +1941,10 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); } else { nbr_intersection_indices.resize( - thrust::distance(zipped_itr_to_indices_and_properties_begin, + thrust::distance(zipped_itr_to_indices_and_e_property_values_begin, thrust::remove_if(handle.get_thrust_policy(), - zipped_itr_to_indices_and_properties_begin, - zipped_itr_to_indices_and_properties_begin + + zipped_itr_to_indices_and_e_property_values_begin, + zipped_itr_to_indices_and_e_property_values_begin + nbr_intersection_indices.size(), [] __device__(auto nbr_p0_p1) { return thrust::get<0>(nbr_p0_p1) == @@ -1791,8 +1952,10 @@ nbr_intersection(raft::handle_t const& handle, })), handle.get_stream()); - nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), + handle.get_stream()); + nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), + handle.get_stream()); } #endif @@ -1811,8 +1974,8 @@ nbr_intersection(raft::handle_t const& handle, } else { return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices), - std::move(nbr_intersection_properties0), - std::move(nbr_intersection_properties1)); + std::move(nbr_intersection_e_property_values0), + std::move(nbr_intersection_e_property_values1)); } } diff --git a/cpp/src/prims/detail/optional_dataframe_buffer.hpp b/cpp/src/prims/detail/optional_dataframe_buffer.hpp index dd40e6932e4..62b2245a651 100644 --- a/cpp/src/prims/detail/optional_dataframe_buffer.hpp +++ b/cpp/src/prims/detail/optional_dataframe_buffer.hpp @@ -97,6 +97,7 @@ void shrink_to_fit_optional_dataframe_buffer( { return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); } + } // namespace detail } // namespace cugraph diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index c46e83aa5da..f17441ad6ab 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -604,7 +604,7 @@ class kv_cuco_store_t { store_value_offsets.begin() /* map */, store_value_offsets.begin() /* stencil */, get_dataframe_buffer_begin(store_values_), - not_equal_t{std::numeric_limits::max()}); + is_not_equal_t{std::numeric_limits::max()}); } } @@ -649,7 +649,7 @@ class kv_cuco_store_t { store_value_offsets.begin() /* map */, store_value_offsets.begin() /* stencil */, get_dataframe_buffer_begin(store_values_), - not_equal_t{std::numeric_limits::max()}); + is_not_equal_t{std::numeric_limits::max()}); } } @@ -695,7 +695,7 @@ class kv_cuco_store_t { store_value_offsets.begin() /* map */, store_value_offsets.begin() /* stencil */, get_dataframe_buffer_begin(store_values_), - not_equal_t{std::numeric_limits::max()}); + is_not_equal_t{std::numeric_limits::max()}); // now perform assigns (for k,v pairs that failed to insert) diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index 640c3c04bfd..201c08325d7 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -112,8 +112,8 @@ struct call_intersection_op_t { IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; typename GraphViewType::vertex_type const* nbr_indices{nullptr}; - EdgeValueInputIterator nbr_intersection_properties0{nullptr}; - EdgeValueInputIterator nbr_intersection_properties1{nullptr}; + EdgeValueInputIterator nbr_intersection_property_values0{nullptr}; + EdgeValueInputIterator nbr_intersection_property_values1{nullptr}; VertexPairIndexIterator major_minor_pair_index_first{}; VertexPairIterator major_minor_pair_first{}; VertexPairValueOutputIterator major_minor_pair_value_output_first{}; @@ -136,20 +136,20 @@ struct call_intersection_op_t { std::conditional_t, raft::device_span, std::byte /* dummy */> - properties0{}; + property_values0{}; std::conditional_t, raft::device_span, std::byte /* dummy */> - properties1{}; + property_values1{}; if constexpr (!std::is_same_v) { - properties0 = raft::device_span( - nbr_intersection_properties0 + nbr_offsets[i], - nbr_intersection_properties0 + +nbr_offsets[i + 1]); - properties1 = raft::device_span( - nbr_intersection_properties1 + nbr_offsets[i], - nbr_intersection_properties1 + +nbr_offsets[i + 1]); + property_values0 = raft::device_span( + nbr_intersection_property_values0 + nbr_offsets[i], + nbr_intersection_property_values0 + +nbr_offsets[i + 1]); + property_values1 = raft::device_span( + nbr_intersection_property_values1 + nbr_offsets[i], + nbr_intersection_property_values1 + +nbr_offsets[i + 1]); } property_t src_prop{}; @@ -174,8 +174,8 @@ struct call_intersection_op_t { dst_prop = *(vertex_property_first + dst_offset); } - *(major_minor_pair_value_output_first + index) = - intersection_op(src, dst, src_prop, dst_prop, intersection, properties0, properties1); + *(major_minor_pair_value_output_first + index) = intersection_op( + src, dst, src_prop, dst_prop, intersection, property_values0, property_values1); } }; @@ -240,8 +240,6 @@ void per_v_pair_transform_dst_nbr_intersection( using edge_property_value_t = typename EdgeValueInputIterator::value_type; using result_t = typename thrust::iterator_traits::value_type; - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { auto num_invalids = detail::count_invalid_vertex_pairs(handle, graph_view, vertex_pair_first, vertex_pair_last); @@ -384,16 +382,16 @@ void per_v_pair_transform_dst_nbr_intersection( rmm::device_uvector intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties0( - size_t{0}, handle.get_stream()); - [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties1( - size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector + r_nbr_intersection_property_values0(size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector + r_nbr_intersection_property_values1(size_t{0}, handle.get_stream()); if constexpr (!std::is_same_v) { std::tie(intersection_offsets, intersection_indices, - r_nbr_intersection_properties0, - r_nbr_intersection_properties1) = + r_nbr_intersection_property_values0, + r_nbr_intersection_property_values1) = detail::nbr_intersection(handle, graph_view, edge_value_input, @@ -422,7 +420,7 @@ void per_v_pair_transform_dst_nbr_intersection( detail::call_intersection_op_t< GraphViewType, decltype(vertex_value_input_for_unique_vertices_first), - typename decltype(r_nbr_intersection_properties0)::const_pointer, + typename decltype(r_nbr_intersection_property_values0)::const_pointer, IntersectionOp, decltype(chunk_vertex_pair_index_first), VertexPairIterator, @@ -433,8 +431,8 @@ void per_v_pair_transform_dst_nbr_intersection( intersection_op, intersection_offsets.data(), intersection_indices.data(), - r_nbr_intersection_properties0.data(), - r_nbr_intersection_properties1.data(), + r_nbr_intersection_property_values0.data(), + r_nbr_intersection_property_values1.data(), chunk_vertex_pair_index_first, vertex_pair_first, vertex_pair_value_output_first}); @@ -445,7 +443,7 @@ void per_v_pair_transform_dst_nbr_intersection( detail::call_intersection_op_t< GraphViewType, VertexValueInputIterator, - typename decltype(r_nbr_intersection_properties0)::const_pointer, + typename decltype(r_nbr_intersection_property_values0)::const_pointer, IntersectionOp, decltype(chunk_vertex_pair_index_first), VertexPairIterator, @@ -456,8 +454,8 @@ void per_v_pair_transform_dst_nbr_intersection( intersection_op, intersection_offsets.data(), intersection_indices.data(), - r_nbr_intersection_properties0.data(), - r_nbr_intersection_properties1.data(), + r_nbr_intersection_property_values0.data(), + r_nbr_intersection_property_values1.data(), chunk_vertex_pair_index_first, vertex_pair_first, vertex_pair_value_output_first}); diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index e6db21f1c7c..5fee97790f1 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -941,7 +941,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, minor_comm_ranks.begin(), thrust::make_zip_iterator( thrust::make_tuple(tmp_sample_local_nbr_indices.begin(), tmp_sample_key_indices.begin())), - not_equal_t{-1}); + is_not_equal_t{-1}); sample_local_nbr_indices = std::move(tmp_sample_local_nbr_indices); sample_key_indices = std::move(tmp_sample_key_indices); diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 2e19adc34c4..d4f8606257e 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -336,8 +336,14 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( if (edge_partition.number_of_edges() > 0) { auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); - detail::decompress_edge_partition_to_fill_edgelist_majors( - handle, edge_partition, tmp_majors.data(), segment_offsets); + detail::decompress_edge_partition_to_fill_edgelist_majors( + handle, + edge_partition, + std::nullopt, + raft::device_span(tmp_majors.data(), tmp_majors.size()), + segment_offsets); auto minor_key_first = thrust::make_transform_iterator( edge_partition.indices(), diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index f773a102959..bcf7606c423 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -269,10 +270,17 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( vertex_value_output_first + graph_view.local_vertex_partition_range_size(), init); + auto edge_mask_view = graph_view.edge_mask_view(); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : std::nullopt; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; @@ -286,22 +294,29 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); } - rmm::device_uvector majors(edge_partition.number_of_edges(), handle.get_stream()); + rmm::device_uvector majors( + edge_partition_e_mask + ? detail::count_set_bits( + handle, (*edge_partition_e_mask).value_first(), edge_partition.number_of_edges()) + : static_cast(edge_partition.number_of_edges()), + handle.get_stream()); rmm::device_uvector minors(majors.size(), handle.get_stream()); auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); detail::decompress_edge_partition_to_edgelist(handle, - edge_partition, - std::nullopt, - std::nullopt, - majors.data(), - minors.data(), - std::nullopt, - std::nullopt, - segment_offsets); + GraphViewType::is_multi_gpu>( + handle, + edge_partition, + std::nullopt, + std::nullopt, + edge_partition_e_mask, + raft::device_span(majors.data(), majors.size()), + raft::device_span(minors.data(), minors.size()), + std::nullopt, + std::nullopt, + segment_offsets); auto vertex_pair_first = thrust::make_zip_iterator(thrust::make_tuple(majors.begin(), minors.begin())); diff --git a/cpp/src/structure/coarsen_graph_impl.cuh b/cpp/src/structure/coarsen_graph_impl.cuh index b8dc28d563e..0704bc6f23b 100644 --- a/cpp/src/structure/coarsen_graph_impl.cuh +++ b/cpp/src/structure/coarsen_graph_impl.cuh @@ -164,16 +164,18 @@ decompress_edge_partition_to_relabeled_and_grouped_and_coarsened_edgelist( ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition, edge_partition_weight_view, - std::optional>{ - std::nullopt}, - edgelist_majors.data(), - edgelist_minors.data(), - edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, - std::optional{std::nullopt}, + std::nullopt, + std::nullopt, + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + raft::device_span(edgelist_minors.data(), edgelist_minors.size()), + edgelist_weights ? std::make_optional>((*edgelist_weights).data(), + (*edgelist_weights).size()) + : std::nullopt, + std::nullopt, segment_offsets); auto pair_first = diff --git a/cpp/src/structure/decompress_to_edgelist_impl.cuh b/cpp/src/structure/decompress_to_edgelist_impl.cuh index d653307c620..e9e84f5bf15 100644 --- a/cpp/src/structure/decompress_to_edgelist_impl.cuh +++ b/cpp/src/structure/decompress_to_edgelist_impl.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -80,8 +81,11 @@ decompress_to_edgelist_impl( std::vector edgelist_edge_counts(graph_view.number_of_local_edge_partitions(), size_t{0}); for (size_t i = 0; i < edgelist_edge_counts.size(); ++i) { - edgelist_edge_counts[i] = - static_cast(graph_view.number_of_local_edge_partition_edges(i)); + edgelist_edge_counts[i] = graph_view.local_edge_partition_view(i).number_of_edges(); + if (graph_view.has_edge_mask()) { + edgelist_edge_counts[i] = detail::count_set_bits( + handle, (*(graph_view.edge_mask_view())).value_firsts()[i], edgelist_edge_counts[i]); + } } auto number_of_local_edges = std::reduce(edgelist_edge_counts.begin(), edgelist_edge_counts.end()); @@ -97,7 +101,7 @@ decompress_to_edgelist_impl( size_t cur_size{0}; for (size_t i = 0; i < edgelist_edge_counts.size(); ++i) { - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition_device_view_t( graph_view.local_edge_partition_view(i)), @@ -110,11 +114,19 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_id_view), i) : std::nullopt, - edgelist_majors.data() + cur_size, - edgelist_minors.data() + cur_size, - edgelist_weights ? std::optional{(*edgelist_weights).data() + cur_size} + graph_view.has_edge_mask() + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *(graph_view.edge_mask_view()), i) + : std::nullopt, + raft::device_span(edgelist_majors.data() + cur_size, edgelist_edge_counts[i]), + raft::device_span(edgelist_minors.data() + cur_size, edgelist_edge_counts[i]), + edgelist_weights ? std::make_optional>( + (*edgelist_weights).data() + cur_size, edgelist_edge_counts[i]) : std::nullopt, - edgelist_ids ? std::optional{(*edgelist_ids).data() + cur_size} : std::nullopt, + edgelist_ids ? std::make_optional>( + (*edgelist_ids).data() + cur_size, edgelist_edge_counts[i]) + : std::nullopt, graph_view.local_edge_partition_segment_offsets(i)); cur_size += edgelist_edge_counts[i]; } @@ -231,8 +243,13 @@ decompress_to_edgelist_impl( if (do_expensive_check) { /* currently, nothing to do */ } - rmm::device_uvector edgelist_majors(graph_view.number_of_local_edge_partition_edges(), - handle.get_stream()); + auto num_edges = graph_view.local_edge_partition_view().number_of_edges(); + if (graph_view.has_edge_mask()) { + num_edges = + detail::count_set_bits(handle, (*(graph_view.edge_mask_view())).value_firsts()[0], num_edges); + } + + rmm::device_uvector edgelist_majors(num_edges, handle.get_stream()); rmm::device_uvector edgelist_minors(edgelist_majors.size(), handle.get_stream()); auto edgelist_weights = edge_weight_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) @@ -240,7 +257,7 @@ decompress_to_edgelist_impl( auto edgelist_ids = edge_id_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition_device_view_t( graph_view.local_edge_partition_view()), @@ -253,10 +270,19 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_id_view), 0) : std::nullopt, - edgelist_majors.data(), - edgelist_minors.data(), - edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, - edgelist_ids ? std::optional{(*edgelist_ids).data()} : std::nullopt, + graph_view.has_edge_mask() + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *(graph_view.edge_mask_view()), 0) + : std::nullopt, + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + raft::device_span(edgelist_minors.data(), edgelist_minors.size()), + edgelist_weights ? std::make_optional>((*edgelist_weights).data(), + (*edgelist_weights).size()) + : std::nullopt, + edgelist_ids ? std::make_optional>((*edgelist_ids).data(), + (*edgelist_ids).size()) + : std::nullopt, graph_view.local_edge_partition_segment_offsets()); if (renumber_map) { @@ -294,8 +320,6 @@ decompress_to_edgelist( std::optional> renumber_map, bool do_expensive_check) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - return decompress_to_edgelist_impl( handle, graph_view, edge_weight_view, edge_id_view, renumber_map, do_expensive_check); } diff --git a/cpp/src/structure/graph_impl.cuh b/cpp/src/structure/graph_impl.cuh index 97975897e08..75862266789 100644 --- a/cpp/src/structure/graph_impl.cuh +++ b/cpp/src/structure/graph_impl.cuh @@ -213,16 +213,17 @@ update_local_sorted_unique_edge_majors_minors( thrust::make_counting_iterator(minor_range_first + num_scanned), thrust::make_counting_iterator(minor_range_first + num_scanned + this_scan_size), unique_edge_minors.begin() + num_copied, - cugraph::detail::check_bit_set_t{minor_bitmaps.data(), minor_range_first}))); + cugraph::detail::check_bit_set_t{minor_bitmaps.data(), + minor_range_first}))); num_scanned += this_scan_size; } #else - thrust::copy_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(minor_range_first), - thrust::make_counting_iterator(minor_range_last), - unique_edge_minors.begin(), - cugraph::detail::check_bit_set_t{minor_bitmaps.data(), minor_range_first}); + thrust::copy_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(minor_range_first), + thrust::make_counting_iterator(minor_range_last), + unique_edge_minors.begin(), + cugraph::detail::check_bit_set_t{ + minor_bitmaps.data(), minor_range_first}); #endif auto num_chunks = diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 7626784c13c..64a8a3212b3 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,7 @@ rmm::device_uvector compute_major_degrees( std::vector const& edge_partition_offsets, std::optional> const& edge_partition_dcs_nzd_vertices, std::optional> const& edge_partition_dcs_nzd_vertex_counts, + std::optional> const& edge_partition_masks, partition_t const& partition, std::vector const& edge_partition_segment_offsets) { @@ -142,7 +144,9 @@ rmm::device_uvector compute_major_degrees( vertex_t major_range_last{}; std::tie(major_range_first, major_range_last) = partition.vertex_partition_range(major_range_vertex_partition_id); - auto p_offsets = edge_partition_offsets[i]; + auto offsets = edge_partition_offsets[i]; + auto masks = + edge_partition_masks ? thrust::make_optional((*edge_partition_masks)[i]) : thrust::nullopt; auto segment_offset_size_per_partition = edge_partition_segment_offsets.size() / static_cast(minor_comm_size); auto major_hypersparse_first = @@ -155,9 +159,16 @@ rmm::device_uvector compute_major_degrees( thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_hypersparse_first - major_range_first), local_degrees.begin(), - [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); + [offsets, masks] __device__(auto i) { + auto local_degree = offsets[i + 1] - offsets[i]; + if (masks) { + local_degree = static_cast( + detail::count_set_bits(*masks, offsets[i], local_degree)); + } + return local_degree; + }); if (use_dcs) { - auto p_dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; + auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; auto dcs_nzd_vertex_count = (*edge_partition_dcs_nzd_vertex_counts)[i]; thrust::fill(execution_policy, local_degrees.begin() + (major_hypersparse_first - major_range_first), @@ -166,15 +177,20 @@ rmm::device_uvector compute_major_degrees( thrust::for_each(execution_policy, thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(dcs_nzd_vertex_count), - [p_offsets, - p_dcs_nzd_vertices, + [offsets, + dcs_nzd_vertices, + masks, major_range_first, major_hypersparse_first, local_degrees = local_degrees.data()] __device__(auto i) { - auto d = p_offsets[(major_hypersparse_first - major_range_first) + i + 1] - - p_offsets[(major_hypersparse_first - major_range_first) + i]; - auto v = p_dcs_nzd_vertices[i]; - local_degrees[v - major_range_first] = d; + auto major_idx = (major_hypersparse_first - major_range_first) + i; + auto local_degree = offsets[major_idx + 1] - offsets[major_idx]; + if (masks) { + local_degree = static_cast( + detail::count_set_bits(*masks, offsets[major_idx], local_degree)); + } + auto v = dcs_nzd_vertices[i]; + local_degrees[v - major_range_first] = local_degree; }); } minor_comm.reduce(local_degrees.data(), @@ -193,12 +209,21 @@ rmm::device_uvector compute_major_degrees( template rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, edge_t const* offsets, + std::optional masks, vertex_t number_of_vertices) { rmm::device_uvector degrees(number_of_vertices, handle.get_stream()); thrust::tabulate( - handle.get_thrust_policy(), degrees.begin(), degrees.end(), [offsets] __device__(auto i) { - return offsets[i + 1] - offsets[i]; + handle.get_thrust_policy(), + degrees.begin(), + degrees.end(), + [offsets, masks = masks ? thrust::make_optional(*masks) : thrust::nullopt] __device__(auto i) { + auto local_degree = offsets[i + 1] - offsets[i]; + if (masks) { + local_degree = + static_cast(detail::count_set_bits(*masks, offsets[i], local_degree)); + } + return local_degree; }); return degrees; } @@ -512,16 +537,18 @@ rmm::device_uvector graph_view_t>:: compute_in_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, this->edge_partition_dcs_nzd_vertex_counts_, + this->has_edge_mask() + ? std::make_optional((*(this->edge_mask_view())).value_firsts()) + : std::nullopt, this->partition_, this->edge_partition_segment_offsets_); } else { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } } @@ -531,11 +558,15 @@ rmm::device_uvector graph_view_t>:: compute_in_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { - return compute_major_degrees(handle, this->offsets_, this->local_vertex_partition_range_size()); + return compute_major_degrees( + handle, + this->offsets_, + this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) + : std::nullopt, + this->local_vertex_partition_range_size()); } else { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } } @@ -545,15 +576,17 @@ rmm::device_uvector graph_view_t>:: compute_out_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } else { return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, this->edge_partition_dcs_nzd_vertex_counts_, + this->has_edge_mask() + ? std::make_optional((*(this->edge_mask_view())).value_firsts()) + : std::nullopt, this->partition_, this->edge_partition_segment_offsets_); } @@ -564,12 +597,16 @@ rmm::device_uvector graph_view_t>:: compute_out_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } else { - return compute_major_degrees(handle, this->offsets_, this->local_vertex_partition_range_size()); + return compute_major_degrees( + handle, + this->offsets_, + this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) + : std::nullopt, + this->local_vertex_partition_range_size()); } } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2a4bb8ab2a5..6775ed2eb16 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -288,10 +288,6 @@ ConfigureTest(GENERATE_RMAT_TEST generators/generate_rmat_test.cpp) ConfigureTest(GENERATE_BIPARTITE_RMAT_TEST generators/generate_bipartite_rmat_test.cpp GPUS 1 PERCENT 100) -################################################################################################### -# - Graph mask tests ------------------------------------------------------------------------------ -ConfigureTest(GRAPH_MASK_TEST structure/graph_mask_test.cpp) - ################################################################################################### # - Symmetrize tests ------------------------------------------------------------------------------ ConfigureTest(SYMMETRIZE_TEST structure/symmetrize_test.cpp) diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index a7cd8a989b0..a3edb1f6372 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -21,9 +21,13 @@ #include #include +#include +#include #include +#include #include +#include #include #include #include @@ -61,6 +65,7 @@ struct intersection_op_t { struct Prims_Usecase { size_t num_vertex_pairs{0}; + bool edge_masking{false}; bool check_correctness{true}; }; @@ -78,7 +83,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection virtual void TearDown() {} // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive - template + template void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { HighResTimer hr_timer{}; @@ -109,6 +114,34 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto mg_graph_view = mg_graph.view(); + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + cugraph::edge_src_property_t edge_src_renumber_map( + *handle_, mg_graph_view); + cugraph::edge_dst_property_t edge_dst_renumber_map( + *handle_, mg_graph_view); + cugraph::update_edge_src_property( + *handle_, mg_graph_view, (*mg_renumber_map).begin(), edge_src_renumber_map); + cugraph::update_edge_dst_property( + *handle_, mg_graph_view, (*mg_renumber_map).begin(), edge_dst_renumber_map); + + edge_mask = cugraph::edge_property_t(*handle_, mg_graph_view); + + cugraph::transform_e( + *handle_, + mg_graph_view, + edge_src_renumber_map.view(), + edge_dst_renumber_map.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + return ((src_property % 2 == 0) && (dst_property % 2 == 0)) + ? false + : true; // mask out the edges with even unrenumbered src & dst vertex IDs + }, + (*edge_mask).mutable_view()); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive ASSERT_TRUE( @@ -224,6 +257,42 @@ class Tests_MGPerVPairTransformDstNbrIntersection if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); + if (prims_usecase.edge_masking) { + rmm::device_uvector srcs(0, handle_->get_stream()); + rmm::device_uvector dsts(0, handle_->get_stream()); + std::tie(srcs, dsts, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, sg_graph_view, std::nullopt, std::nullopt, std::nullopt); + auto edge_first = thrust::make_zip_iterator(srcs.begin(), dsts.begin()); + srcs.resize(thrust::distance(edge_first, + thrust::remove_if(handle_->get_thrust_policy(), + edge_first, + edge_first + srcs.size(), + [] __device__(auto pair) { + return (thrust::get<0>(pair) % 2 == 0) && + (thrust::get<1>(pair) % 2 == 0); + })), + handle_->get_stream()); + dsts.resize(srcs.size(), handle_->get_stream()); + rmm::device_uvector vertices(sg_graph_view.number_of_vertices(), + handle_->get_stream()); + thrust::sequence( + handle_->get_thrust_policy(), vertices.begin(), vertices.end(), vertex_t{0}); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph:: + create_graph_from_edgelist( + *handle_, + std::move(vertices), + std::move(srcs), + std::move(dsts), + std::nullopt, + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{sg_graph_view.is_symmetric(), + sg_graph_view.is_multigraph()}, + false); + sg_graph_view = sg_graph.view(); + } + auto sg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); auto sg_out_degrees = sg_graph_view.compute_out_degrees(*handle_); @@ -262,47 +331,16 @@ using Tests_MGPerVPairTransformDstNbrIntersection_File = using Tests_MGPerVPairTransformDstNbrIntersection_Rmat = Tests_MGPerVPairTransformDstNbrIntersection; -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>(std::get<0>(param), - std::get<1>(param)); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -310,7 +348,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -318,7 +356,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -327,15 +365,18 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1024}, true}), + ::testing::Values(Prims_Usecase{size_t{1024}, false, true}, + Prims_Usecase{size_t{1024}, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_test, - Tests_MGPerVPairTransformDstNbrIntersection_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{1024}, false, true}, + Prims_Usecase{size_t{1024}, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -345,7 +386,8 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGPerVPairTransformDstNbrIntersection_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), + ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false, false}, + Prims_Usecase{size_t{1024 * 1024}, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 3b6a6b9c4c5..4d05b0c9e65 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -50,14 +50,14 @@ template struct intersection_op_t { - __device__ thrust::tuple operator()( + __device__ thrust::tuple operator()( vertex_t a, vertex_t b, weight_t weight_a /* weighted out degree */, weight_t weight_b /* weighted out degree */, raft::device_span intersection, - raft::device_span intersected_properties_a, - raft::device_span intersected_properties_b) const + raft::device_span intersected_property_values_a, + raft::device_span intersected_property_values_b) const { weight_t min_weight_a_intersect_b = weight_t{0}; weight_t max_weight_a_intersect_b = weight_t{0}; @@ -65,10 +65,12 @@ struct intersection_op_t { weight_t sum_of_intersected_b = weight_t{0}; for (size_t k = 0; k < intersection.size(); k++) { - min_weight_a_intersect_b += min(intersected_properties_a[k], intersected_properties_b[k]); - max_weight_a_intersect_b += max(intersected_properties_a[k], intersected_properties_b[k]); - sum_of_intersected_a += intersected_properties_a[k]; - sum_of_intersected_b += intersected_properties_b[k]; + min_weight_a_intersect_b += + min(intersected_property_values_a[k], intersected_property_values_b[k]); + max_weight_a_intersect_b += + max(intersected_property_values_a[k], intersected_property_values_b[k]); + sum_of_intersected_a += intersected_property_values_a[k]; + sum_of_intersected_b += intersected_property_values_b[k]; } weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; @@ -99,7 +101,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection virtual void TearDown() {} // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive - template + template void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { HighResTimer hr_timer{}; @@ -189,7 +191,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); - auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_); + auto mg_out_weight_sums = compute_out_weight_sums(*handle_, mg_graph_view, mg_edge_weight_view); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -197,9 +199,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.start("MG per_v_pair_transform_dst_nbr_intersection"); } - rmm::device_uvector mg_out_weight_sums = - compute_out_weight_sums(*handle_, mg_graph_view, mg_edge_weight_view); - cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, mg_graph_view, @@ -272,7 +271,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); - auto sg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); @@ -290,11 +288,21 @@ class Tests_MGPerVPairTransformDstNbrIntersection */), sg_out_weight_sums.begin(), intersection_op_t{}, cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + auto threshold_ratio = weight_t{1e-4}; + auto threshold_magnitude = std::numeric_limits::min(); + auto nearly_equal = [threshold_ratio, threshold_magnitude] __device__(auto lhs, auto rhs) { + return (fabs(thrust::get<0>(lhs) - thrust::get<0>(rhs)) < + max(max(thrust::get<0>(lhs), thrust::get<0>(rhs)) * threshold_ratio, + threshold_magnitude)) && + (fabs(thrust::get<1>(lhs) - thrust::get<1>(rhs)) < + max(max(thrust::get<1>(lhs), thrust::get<1>(rhs)) * threshold_ratio, + threshold_magnitude)); + }; bool valid = thrust::equal(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer), cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer), - cugraph::get_dataframe_buffer_begin(sg_result_buffer)); - + cugraph::get_dataframe_buffer_begin(sg_result_buffer), + nearly_equal); ASSERT_TRUE(valid); } } @@ -313,47 +321,16 @@ using Tests_MGPerVPairTransformDstNbrIntersection_File = using Tests_MGPerVPairTransformDstNbrIntersection_Rmat = Tests_MGPerVPairTransformDstNbrIntersection; -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>(std::get<0>(param), - std::get<1>(param)); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -361,7 +338,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -369,7 +346,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/structure/graph_mask_test.cpp b/cpp/tests/structure/graph_mask_test.cpp deleted file mode 100644 index fc704d683a7..00000000000 --- a/cpp/tests/structure/graph_mask_test.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governin_from_mtxg permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -TEST(Test_GraphMask, BasicGraphMaskTestInt64) -{ - raft::handle_t handle; - - int number_of_vertices = 500; - int number_of_edges = 1000; - - cugraph::graph_mask_t mask( - handle, number_of_vertices, number_of_edges); - - auto mask_view = mask.view(); - - ASSERT_EQ(false, mask.has_vertex_mask()); - ASSERT_EQ(false, mask.has_edge_mask()); - ASSERT_EQ(false, mask_view.has_vertex_mask()); - ASSERT_EQ(false, mask_view.has_edge_mask()); - - mask.initialize_vertex_mask(); - mask.initialize_edge_mask(); - - auto mask_view2 = mask.view(); - - ASSERT_EQ(true, mask.has_vertex_mask()); - ASSERT_EQ(true, mask.has_edge_mask()); - ASSERT_EQ(true, mask_view2.has_vertex_mask()); - ASSERT_EQ(true, mask_view2.has_edge_mask()); -} \ No newline at end of file From e0c3a9391851528407bc085eeab202e6c260c9f4 Mon Sep 17 00:00:00 2001 From: Andrei Ivanov <32910461+drivanov@users.noreply.github.com> Date: Mon, 30 Oct 2023 06:10:52 -0700 Subject: [PATCH 5/5] Fix an issue occurring in the cuGraph-DGL example for "mixed" mode. (#3927) Fixing the following bug: ``` Training in mixed mode. Loading data Traceback (most recent call last): File "/opt/rapids/cugraph/python/cugraph-dgl/examples/graphsage/node-classification.py", line 249, in g.get_node_storage(key="feat", ntype="_N") File "/usr/local/lib/python3.10/dist-packages/dgl/frame.py", line 530, in fetch return super().fetch(indices, device, pin_memory=pin_memory, **kwargs) File "/usr/local/lib/python3.10/dist-packages/dgl/storages/pytorch_tensor.py", line 40, in fetch raise ValueError( ValueError: Got indices on device cuda:0 whereas the feature tensor is on cpu. Please either (1) move the graph to GPU with to() method, or (2) pin the graph with pin_memory_() method. ``` which appears in `mixed` mode. NOTE: the option `(1) move the graph to GPU with to()` is not available because in mixed mode the graph must be on the CPU. Authors: - Andrei Ivanov (https://github.com/drivanov) - Brad Rees (https://github.com/BradReesWork) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) - Vibhu Jawa (https://github.com/VibhuJawa) URL: https://github.com/rapidsai/cugraph/pull/3927 --- python/cugraph-dgl/examples/graphsage/node-classification.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/python/cugraph-dgl/examples/graphsage/node-classification.py b/python/cugraph-dgl/examples/graphsage/node-classification.py index 320890b0312..539fd86d136 100644 --- a/python/cugraph-dgl/examples/graphsage/node-classification.py +++ b/python/cugraph-dgl/examples/graphsage/node-classification.py @@ -243,7 +243,9 @@ def train(args, device, g, dataset, model): else: g = g.to("cuda" if args.mode == "gpu_dgl" else "cpu") - device = torch.device("cpu" if args.mode == "cpu" else "cuda") + device = torch.device( + "cpu" if args.mode == "cpu" or args.mode == "mixed" else "cuda" + ) # create GraphSAGE model feat_shape = (