From 7b811738bac92d4a7ba176e5784a81f663f9db6b Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Wed, 31 Jul 2024 15:27:40 +0100 Subject: [PATCH] MG Implementation K-Truss (#4438) This PR adds an MG implementation of K-Truss leveraging the C API Authors: - Joseph Nke (https://github.com/jnke2016) - Ralph Liu (https://github.com/nv-rliu) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cugraph/pull/4438 --- .../cugraph/community/ktruss_subgraph.py | 62 ++------- python/cugraph/cugraph/dask/__init__.py | 1 + .../cugraph/dask/community/__init__.py | 3 +- .../cugraph/dask/community/ktruss_subgraph.py | 119 ++++++++++++++++++ .../tests/community/test_k_truss_subgraph.py | 34 ----- .../community/test_k_truss_subgraph_mg.py | 105 ++++++++++++++++ .../pylibcugraph/k_truss_subgraph.pyx | 2 +- 7 files changed, 238 insertions(+), 88 deletions(-) create mode 100644 python/cugraph/cugraph/dask/community/ktruss_subgraph.py create mode 100644 python/cugraph/cugraph/tests/community/test_k_truss_subgraph_mg.py diff --git a/python/cugraph/cugraph/community/ktruss_subgraph.py b/python/cugraph/cugraph/community/ktruss_subgraph.py index 1799c50252f..bcf8527e17b 100644 --- a/python/cugraph/cugraph/community/ktruss_subgraph.py +++ b/python/cugraph/cugraph/community/ktruss_subgraph.py @@ -11,19 +11,16 @@ # See the License for the specific language governing permissions and # limitations under the License. -from cugraph.structure.graph_classes import Graph from typing import Union + +import cudf +from pylibcugraph import k_truss_subgraph as pylibcugraph_k_truss_subgraph +from pylibcugraph import ResourceHandle +from cugraph.structure.graph_classes import Graph from cugraph.utilities import ( ensure_cugraph_obj_for_nx, cugraph_to_nx, ) - -from pylibcugraph import k_truss_subgraph as pylibcugraph_k_truss_subgraph -from pylibcugraph import ResourceHandle -import warnings - -from numba import cuda -import cudf from cugraph.utilities.utils import import_optional # FIXME: the networkx.Graph type used in the type annotation for @@ -34,37 +31,17 @@ networkx = import_optional("networkx") -# FIXME: special case for ktruss on CUDA 11.4: an 11.4 bug causes ktruss to -# crash in that environment. Allow ktruss to import on non-11.4 systems, but -# raise an exception if ktruss is directly imported on 11.4. -def _ensure_compatible_cuda_version(): - try: - cuda_version = cuda.runtime.get_version() - except cuda.cudadrv.runtime.CudaRuntimeAPIError: - cuda_version = "n/a" - - unsupported_cuda_version = (11, 4) - - if cuda_version == unsupported_cuda_version: - ver_string = ".".join([str(n) for n in unsupported_cuda_version]) - raise NotImplementedError( - "k_truss is not currently supported in CUDA" f" {ver_string} environments." - ) - - def k_truss( G: Union[Graph, "networkx.Graph"], k: int ) -> Union[Graph, "networkx.Graph"]: """ Returns the K-Truss subgraph of a graph for a specific k. - NOTE: this function is currently not available on CUDA 11.4 systems. - - The k-truss of a graph is a subgraph where each edge is part of at least - (k−2) triangles. K-trusses are used for finding tighlty knit groups of - vertices in a graph. A k-truss is a relaxation of a k-clique in the graph - and was define in [1]. Finding cliques is computationally demanding and - finding the maximal k-clique is known to be NP-Hard. + The k-truss of a graph is a subgraph where each edge is incident to at + least (k−2) triangles. K-trusses are used for finding tighlty knit groups + of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph. + Finding cliques is computationally demanding and finding the maximal + k-clique is known to be NP-Hard. Parameters ---------- @@ -89,9 +66,6 @@ def k_truss( >>> k_subgraph = cugraph.k_truss(G, 3) """ - - _ensure_compatible_cuda_version() - G, isNx = ensure_cugraph_obj_for_nx(G) if isNx is True: @@ -159,12 +133,6 @@ def ktruss_subgraph( k : int The desired k to be used for extracting the k-truss subgraph. - use_weights : bool, optional (default=True) - Whether the output should contain the edge weights if G has them. - - Deprecated: If 'weights' were passed at the graph creation, they will - be used. - Returns ------- G_truss : cuGraph.Graph @@ -177,20 +145,10 @@ def ktruss_subgraph( >>> k_subgraph = cugraph.ktruss_subgraph(G, 3, use_weights=False) """ - _ensure_compatible_cuda_version() - KTrussSubgraph = Graph() if G.is_directed(): raise ValueError("input graph must be undirected") - if use_weights: - warning_msg = ( - "The use_weights flag is deprecated " - "and will be removed in the next release. if weights " - "were passed at the graph creation, they will be used." - ) - warnings.warn(warning_msg, FutureWarning) - sources, destinations, edge_weights, _ = pylibcugraph_k_truss_subgraph( resource_handle=ResourceHandle(), graph=G._plc_graph, diff --git a/python/cugraph/cugraph/dask/__init__.py b/python/cugraph/cugraph/dask/__init__.py index 6d86982142b..b1588008bc6 100644 --- a/python/cugraph/cugraph/dask/__init__.py +++ b/python/cugraph/cugraph/dask/__init__.py @@ -23,6 +23,7 @@ from .community.triangle_count import triangle_count from .community.egonet import ego_graph from .community.induced_subgraph import induced_subgraph +from .community.ktruss_subgraph import ktruss_subgraph from .centrality.katz_centrality import katz_centrality from .components.connectivity import weakly_connected_components from .sampling.uniform_neighbor_sample import uniform_neighbor_sample diff --git a/python/cugraph/cugraph/dask/community/__init__.py b/python/cugraph/cugraph/dask/community/__init__.py index 657d9df101b..9b5301d0e42 100644 --- a/python/cugraph/cugraph/dask/community/__init__.py +++ b/python/cugraph/cugraph/dask/community/__init__.py @@ -1,4 +1,4 @@ -# Copyright (c) 2020-2023, NVIDIA CORPORATION. +# Copyright (c) 2020-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,3 +15,4 @@ from .triangle_count import triangle_count from .induced_subgraph import induced_subgraph from .leiden import leiden +from .ktruss_subgraph import ktruss_subgraph diff --git a/python/cugraph/cugraph/dask/community/ktruss_subgraph.py b/python/cugraph/cugraph/dask/community/ktruss_subgraph.py new file mode 100644 index 00000000000..2ecca069ea5 --- /dev/null +++ b/python/cugraph/cugraph/dask/community/ktruss_subgraph.py @@ -0,0 +1,119 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +from typing import Tuple + +import cudf +import cupy as cp +from dask.distributed import wait, default_client +import dask_cudf + +from pylibcugraph import ( + ResourceHandle, + k_truss_subgraph as pylibcugraph_k_truss_subgraph, +) +import cugraph.dask.comms.comms as Comms + + +def _call_k_truss_subgraph( + sID: bytes, + mg_graph_x, + k: int, + do_expensive_check: bool, +) -> Tuple[cp.ndarray, cp.ndarray, cp.ndarray]: + + return pylibcugraph_k_truss_subgraph( + resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), + graph=mg_graph_x, + k=k, + do_expensive_check=do_expensive_check, + ) + + +def convert_to_cudf(cp_arrays: cp.ndarray) -> cudf.DataFrame: + cp_src, cp_dst, cp_weight, _ = cp_arrays + + df = cudf.DataFrame() + if cp_src is not None: + df["src"] = cp_src + df["dst"] = cp_dst + if cp_weight is not None: + df["weight"] = cp_weight + + return df + + +def ktruss_subgraph(input_graph, k: int) -> dask_cudf.DataFrame: + """ + Returns the K-Truss subgraph of a graph for a specific k. + + The k-truss of a graph is a subgraph where each edge is incident to at + least (k−2) triangles. K-trusses are used for finding tighlty knit groups + of vertices in a graph. A k-truss is a relaxation of a k-clique in the graph. + Finding cliques is computationally demanding and finding the maximal + k-clique is known to be NP-Hard. + + Parameters + ---------- + input_graph : cugraph.Graph + Graph or matrix object, which should contain the connectivity + information. Edge weights, if present, should be single or double + precision floating point values + + k : int + The desired k to be used for extracting the k-truss subgraph. + + + Returns + ------- + k_truss_edge_lists : dask_cudf.DataFrame + Distributed GPU data frame containing all source identifiers, + destination identifiers, and edge weights belonging to the truss. + """ + if input_graph.is_directed(): + raise ValueError("input graph must be undirected") + # Initialize dask client + client = default_client() + + do_expensive_check = False + + result = [ + client.submit( + _call_k_truss_subgraph, + Comms.get_session_id(), + input_graph._plc_graph[w], + k, + do_expensive_check, + workers=[w], + allow_other_workers=False, + ) + for w in Comms.get_workers() + ] + wait(result) + + cudf_result = [client.submit(convert_to_cudf, cp_arrays) for cp_arrays in result] + + wait(cudf_result) + + ddf = dask_cudf.from_delayed(cudf_result).persist() + wait(ddf) + # Wait until the inactive futures are released + wait([(r.release(), c_r.release()) for r, c_r in zip(result, cudf_result)]) + + if input_graph.renumbered: + ddf = input_graph.unrenumber(ddf, "src") + ddf = input_graph.unrenumber(ddf, "dst") + + return ddf diff --git a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py index 063d7fc735f..bbd2866b5df 100644 --- a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py +++ b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph.py @@ -20,7 +20,6 @@ import cugraph from cugraph.testing import utils from cugraph.datasets import polbooks, karate_asymmetric -from numba import cuda # ============================================================================= @@ -67,32 +66,7 @@ def compare_k_truss(k_truss_cugraph, k, ground_truth_file): return True -__cuda_version = cuda.runtime.get_version() -__unsupported_cuda_version = (11, 4) - - -# FIXME: remove when ktruss is supported on CUDA 11.4 -@pytest.mark.sg -def test_unsupported_cuda_version(): - """ - Ensures the proper exception is raised when ktruss is called in an - unsupported env, and not when called in a supported env. - """ - k = 5 - - G = polbooks.get_graph(download=True) - if __cuda_version == __unsupported_cuda_version: - with pytest.raises(NotImplementedError): - cugraph.k_truss(G, k) - else: - cugraph.k_truss(G, k) - - @pytest.mark.sg -@pytest.mark.skipif( - (__cuda_version == __unsupported_cuda_version), - reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.", -) @pytest.mark.parametrize("_, nx_ground_truth", utils.DATASETS_KTRUSS) def test_ktruss_subgraph_Graph(_, nx_ground_truth): @@ -104,10 +78,6 @@ def test_ktruss_subgraph_Graph(_, nx_ground_truth): @pytest.mark.sg -@pytest.mark.skipif( - (__cuda_version == __unsupported_cuda_version), - reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.", -) def test_ktruss_subgraph_Graph_nx(): k = 5 dataset_path = polbooks.get_path() @@ -122,10 +92,6 @@ def test_ktruss_subgraph_Graph_nx(): @pytest.mark.sg -@pytest.mark.skipif( - (__cuda_version == __unsupported_cuda_version), - reason="skipping on unsupported CUDA " f"{__unsupported_cuda_version} environment.", -) def test_ktruss_subgraph_directed_Graph(): k = 5 edgevals = True diff --git a/python/cugraph/cugraph/tests/community/test_k_truss_subgraph_mg.py b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph_mg.py new file mode 100644 index 00000000000..12e5146c2de --- /dev/null +++ b/python/cugraph/cugraph/tests/community/test_k_truss_subgraph_mg.py @@ -0,0 +1,105 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import gc + +import pytest + +import cugraph +import cugraph.dask as dcg +from cudf.testing.testing import assert_frame_equal +from cugraph.datasets import karate, dolphins, netscience + + +# ============================================================================= +# Pytest Setup / Teardown - called for each test function +# ============================================================================= + + +def setup_function(): + gc.collect() + + +# ============================================================================= +# Parameters +# ============================================================================= + + +DATASETS = [karate, dolphins, netscience] +IS_DIRECTED = [True, False] +K_VALUE = [4, 6, 8] + + +# ============================================================================= +# Helper functions +# ============================================================================= + + +def get_sg_graph(dataset, directed): + G = dataset.get_graph(create_using=cugraph.Graph(directed=directed)) + + return G + + +def get_mg_graph(dataset, directed): + ddf = dataset.get_dask_edgelist() + dg = cugraph.Graph(directed=directed) + dg.from_dask_cudf_edgelist( + ddf, + source="src", + destination="dst", + edge_attr="wgt", + renumber=True, + store_transposed=True, + ) + + return dg + + +# ============================================================================= +# Tests +# ============================================================================= + + +@pytest.mark.mg +@pytest.mark.parametrize("dataset", DATASETS) +@pytest.mark.parametrize("is_directed", IS_DIRECTED) +@pytest.mark.parametrize("k", K_VALUE) +def test_mg_ktruss_subgraph(dask_client, benchmark, dataset, is_directed, k): + # Create SG and MG Graphs + g = get_sg_graph(dataset, is_directed) + dg = get_mg_graph(dataset, is_directed) + + if is_directed: + with pytest.raises(ValueError): + result_ktruss_subgraph = benchmark(dcg.ktruss_subgraph, dg, k) + else: + sg_ktruss_subgraph = cugraph.ktruss_subgraph(g, k=k) + result_ktruss_subgraph = benchmark(dcg.ktruss_subgraph, dg, k) + + mg_df = result_ktruss_subgraph + + if len(mg_df) != 0 and len(sg_ktruss_subgraph.input_df) != 0: + # FIXME: 'edges()' or 'view_edgelist()' takes half the edges out if + # 'directed=False'. + sg_result = sg_ktruss_subgraph.input_df + + sg_df = sg_result.sort_values(["src", "dst"]).reset_index(drop=True) + mg_df = mg_df.compute().sort_values(["src", "dst"]).reset_index(drop=True) + + assert_frame_equal(sg_df, mg_df, check_dtype=False, check_like=True) + + else: + # There is no edge left when extracting the K-Truss + assert len(sg_ktruss_subgraph.input_df) == 0 + assert len(mg_df) == 0 diff --git a/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx index 6e4cd2e282a..9ea533c9f28 100644 --- a/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx +++ b/python/pylibcugraph/pylibcugraph/k_truss_subgraph.pyx @@ -65,7 +65,7 @@ def k_truss_subgraph(ResourceHandle resource_handle, Handle to the underlying device resources needed for referencing data and running algorithms. - graph : SGGraph + graph : SGGraph or MGGraph The input graph. k: size_t