From c7b720d87deb59f934dcc922644e8bd9ced0cdd9 Mon Sep 17 00:00:00 2001 From: Huiyu Xie Date: Tue, 9 Jan 2024 08:56:10 -0800 Subject: [PATCH 1/7] [FEA]: Add DASK edgelist and graph support to the Dataset API (#4035) Hi! I choose to go further with some simple work other than docs. This PR is going to close #3218. Here is what I have done in this PR: 1. Added `get_dask_edgelist()` and `get_dask_graph()` (and another internal helper function `__download_dask_csv()`) to Dataset API. 2. Executed all necessary tests for these new functions. 3. Improved existing functions in the Dataset API and conducted tests to verify improvements. Here are some additional details regarding this PR: 1. The building and testing were conducted using version 23.12 instead of the default 24.02. Since Cugraph-ops library is no longer open, I failed to build from source using version 24.02. I built and tested the code in version 23.12 and then transferred the updated file to 24.02 before creating this PR. (I would appreciate any guidance on how to build from version 24.02 for external contributors). 2. All tests from the test file have passed, but some warnings remain, as shown below ```bash ============================================================ warnings summary ============================================================ cugraph/tests/utils/test_dataset.py::test_get_dask_graph[dataset0] cugraph/tests/utils/test_dataset.py::test_get_dask_graph[dataset0] cugraph/tests/utils/test_dataset.py::test_get_dask_graph[dataset0] cugraph/tests/utils/test_dataset.py::test_weights_dask[dataset0] cugraph/tests/utils/test_dataset.py::test_weights_dask[dataset0] cugraph/tests/utils/test_dataset.py::test_weights_dask[dataset0] cugraph/tests/utils/test_dataset.py::test_weights_dask[dataset0] cugraph/tests/utils/test_dataset.py::test_weights_dask[dataset0] cugraph/tests/utils/test_dataset.py::test_weights_dask[dataset0] /home/ubuntu/miniconda3/envs/cugraph_dev/lib/python3.10/site-packages/cudf/core/index.py:3284: FutureWarning: cudf.StringIndex is deprecated and will be removed from cudf in a future version. Use cudf.Index with the appropriate dtype instead. warnings.warn( -- Docs: https://docs.pytest.org/en/stable/how-to/capture-warnings.html ``` I think above warnings came from the function call `from_dask_cudf_edgelist` but currently I have no idea how to remove them. I will do my best to address it if anyone has any ideas about it. 3. The `get_edgelist()` function returns a deep copy of the object, but this is not supported for `get_dask_edgelist()` since only shallow copy is allowed for Dask cuDF dataframe (see [docs](https://docs.rapids.ai/api/dask-cudf/legacy/api/#dask_cudf.DataFrame.copy)). This will lead to a problem where if a user modifies the dataframe, the changes will be reflected in the internal `self._edgelist` object. So when `get_dask_graph()` is called later, the resulting graph will differ from the one directly constructed from the data file. 4. I am uncertain about the requirements for (1) Identifying datasets and (2) Adding them to Dataset. If there is a need to add another function for determining whether a dataset requires MG handling based on its size, or to tag the dataset metadata (.yaml file) to indicate the necessity for MG processing, please let me know. Also, I welcome any suggestions for further features. 5. When I ran pytest on other test files, the most common warnings were ```bash /home/ubuntu/miniconda3/envs/cugraph_dev/lib/python3.10/site-packages/dask_cudf/io/csv.py:79: FutureWarning: `chunksize` is deprecated and will be removed in the future. Please use `blocksize` instead. ``` The keyword `chunksize` is no longer in use (check [docs](https://docs.rapids.ai/api/dask-cudf/legacy/api/#dask_cudf.read_csv) here). I have checked all related functions in the repository and found that they currently use `chunksize`. If there is a need to change them to `blocksize`, I will create another PR to address this issue. Any comments and suggestions are welcome! Authors: - Huiyu Xie (https://github.com/huiyuxie) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4035 --- python/cugraph/cugraph/datasets/dataset.py | 126 ++++++++++++++++-- .../cugraph/tests/utils/test_dataset.py | 77 ++++++++++- 2 files changed, 191 insertions(+), 12 deletions(-) diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py index dd7aa0df00a..9817d15dacb 100644 --- a/python/cugraph/cugraph/datasets/dataset.py +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,10 +12,13 @@ # limitations under the License. import cudf +import dask_cudf import yaml import os import pandas as pd +import cugraph.dask as dcg from pathlib import Path +import urllib.request from cugraph.structure.graph_classes import Graph @@ -138,9 +141,8 @@ def __download_csv(self, url): filename = self.metadata["name"] + self.metadata["file_type"] if self._dl_path.path.is_dir(): - df = cudf.read_csv(url) self._path = self._dl_path.path / filename - df.to_csv(self._path, index=False) + urllib.request.urlretrieve(url, str(self._path)) else: raise RuntimeError( @@ -149,7 +151,6 @@ def __download_csv(self, url): return self._path def unload(self): - """ Remove all saved internal objects, forcing them to be re-created when accessed. @@ -162,7 +163,7 @@ def unload(self): def get_edgelist(self, download=False, reader="cudf"): """ - Return an Edgelist + Return an Edgelist. Parameters ---------- @@ -212,6 +213,47 @@ def get_edgelist(self, download=False, reader="cudf"): return self._edgelist.copy() + def get_dask_edgelist(self, download=False): + """ + Return a distributed Edgelist. + + Parameters + ---------- + download : Boolean (default=False) + Automatically download the dataset from the 'url' location within + the YAML file. + """ + if self._edgelist is None: + full_path = self.get_path() + if not full_path.is_file(): + if download: + full_path = self.__download_csv(self.metadata["url"]) + else: + raise RuntimeError( + f"The datafile {full_path} does not" + " exist. Try setting download=True" + " to download the datafile" + ) + + header = None + if isinstance(self.metadata["header"], int): + header = self.metadata["header"] + + blocksize = dcg.get_chunksize(full_path) + self._edgelist = dask_cudf.read_csv( + path=full_path, + blocksize=blocksize, + delimiter=self.metadata["delim"], + names=self.metadata["col_names"], + dtype={ + self.metadata["col_names"][i]: self.metadata["col_types"][i] + for i in range(len(self.metadata["col_types"])) + }, + header=header, + ) + + return self._edgelist.copy() + def get_graph( self, download=False, @@ -249,10 +291,10 @@ def get_graph( if create_using is None: G = Graph() elif isinstance(create_using, Graph): - # what about BFS if trnaposed is True + # what about BFS if transposed is True attrs = {"directed": create_using.is_directed()} G = type(create_using)(**attrs) - elif type(create_using) is type: + elif issubclass(create_using, Graph): G = create_using() else: raise TypeError( @@ -277,9 +319,74 @@ def get_graph( ) return G + def get_dask_graph( + self, + download=False, + create_using=Graph, + ignore_weights=False, + store_transposed=False, + ): + """ + Return a distributed Graph object. + + Parameters + ---------- + download : Boolean (default=False) + Downloads the dataset from the web. + + create_using: cugraph.Graph (instance or class), optional + (default=Graph) + Specify the type of Graph to create. Can pass in an instance to + create a Graph instance with specified 'directed' attribute. + + ignore_weights : Boolean (default=False) + Ignores weights in the dataset if True, resulting in an + unweighted Graph. If False (the default), weights from the + dataset -if present- will be applied to the Graph. If the + dataset does not contain weights, the Graph returned will + be unweighted regardless of ignore_weights. + + store_transposed : bool, optional (default=False) + If True, stores the transpose of the adjacency matrix. Required + for certain algorithms. + """ + if self._edgelist is None: + self.get_dask_edgelist(download) + + if create_using is None: + G = Graph() + elif isinstance(create_using, Graph): + attrs = {"directed": create_using.is_directed()} + G = type(create_using)(**attrs) + elif issubclass(create_using, Graph): + G = create_using() + else: + raise TypeError( + "create_using must be a cugraph.Graph " + "(or subclass) type or instance, got: " + f"{type(create_using)}" + ) + + if len(self.metadata["col_names"]) > 2 and not (ignore_weights): + G.from_dask_cudf_edgelist( + self._edgelist, + source=self.metadata["col_names"][0], + destination=self.metadata["col_names"][1], + edge_attr=self.metadata["col_names"][2], + store_transposed=store_transposed, + ) + else: + G.from_dask_cudf_edgelist( + self._edgelist, + source=self.metadata["col_names"][0], + destination=self.metadata["col_names"][1], + store_transposed=store_transposed, + ) + return G + def get_path(self): """ - Returns the location of the stored dataset file + Returns the location of the stored dataset file. """ if self._path is None: self._path = self._dl_path.path / ( @@ -347,8 +454,7 @@ def download_all(force=False): filename = meta["name"] + meta["file_type"] save_to = default_download_dir.path / filename if not save_to.is_file() or force: - df = cudf.read_csv(meta["url"]) - df.to_csv(save_to, index=False) + urllib.request.urlretrieve(meta["url"], str(save_to)) def set_download_dir(path): diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index 60bc6dbb45a..39f7ed8850b 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import pytest import cudf +import dask_cudf from cugraph.structure import Graph from cugraph.testing import ( RAPIDS_DATASET_ROOT_DIR_PATH, @@ -29,6 +30,7 @@ BENCHMARKING_DATASETS, ) from cugraph import datasets +from cugraph.dask.common.mg_utils import is_single_gpu # Add the sg marker to all tests in this module. pytestmark = pytest.mark.sg @@ -37,6 +39,7 @@ ############################################################################### # Fixtures + # module fixture - called once for this module @pytest.fixture(scope="module") def tmpdir(): @@ -77,6 +80,7 @@ def setup(tmpdir): ############################################################################### # Helpers + # check if there is a row where src == dst def has_selfloop(dataset): if not dataset.metadata["is_directed"]: @@ -115,6 +119,7 @@ def is_symmetric(dataset): ############################################################################### # Tests + # setting download_dir to None effectively re-initialized the default def test_env_var(): os.environ["RAPIDS_DATASET_ROOT_DIR"] = "custom_storage_location" @@ -150,9 +155,19 @@ def test_download(dataset): assert dataset.get_path().is_file() +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_download_dask(dask_client, dataset): + E = dataset.get_dask_edgelist(download=True) + + assert E is not None + assert dataset.get_path().is_file() + + @pytest.mark.parametrize("dataset", SMALL_DATASETS) def test_reader(dataset): - # defaults to using cudf.read_csv + # defaults to using cudf E = dataset.get_edgelist(download=True) assert E is not None @@ -171,18 +186,46 @@ def test_reader(dataset): dataset.get_edgelist(reader=None) +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", SMALL_DATASETS) +def test_reader_dask(dask_client, dataset): + # using dask_cudf + E = dataset.get_dask_edgelist(download=True) + + assert E is not None + assert isinstance(E, dask_cudf.core.DataFrame) + dataset.unload() + + @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_get_edgelist(dataset): E = dataset.get_edgelist(download=True) assert E is not None +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_get_dask_edgelist(dask_client, dataset): + E = dataset.get_dask_edgelist(download=True) + assert E is not None + + @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_get_graph(dataset): G = dataset.get_graph(download=True) assert G is not None +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_get_dask_graph(dask_client, dataset): + G = dataset.get_dask_graph(download=True) + assert G is not None + + @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_metadata(dataset): M = dataset.metadata @@ -207,6 +250,16 @@ def test_weights(dataset): assert not G.is_weighted() +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", WEIGHTED_DATASETS) +def test_weights_dask(dask_client, dataset): + G = dataset.get_dask_graph(download=True) + assert G.is_weighted() + G = dataset.get_dask_graph(download=True, ignore_weights=True) + assert not G.is_weighted() + + @pytest.mark.parametrize("dataset", SMALL_DATASETS) def test_create_using(dataset): G = dataset.get_graph(download=True) @@ -216,6 +269,26 @@ def test_create_using(dataset): G = dataset.get_graph(download=True, create_using=Graph(directed=True)) assert G.is_directed() + # using a non-Graph type should raise an error + with pytest.raises(TypeError): + dataset.get_graph(download=True, create_using=set) + + +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", SMALL_DATASETS) +def test_create_using_dask(dask_client, dataset): + G = dataset.get_dask_graph(download=True) + assert not G.is_directed() + G = dataset.get_dask_graph(download=True, create_using=Graph) + assert not G.is_directed() + G = dataset.get_dask_graph(download=True, create_using=Graph(directed=True)) + assert G.is_directed() + + # using a non-Graph type should raise an error + with pytest.raises(TypeError): + dataset.get_dask_graph(download=True, create_using=set) + def test_ctor_with_datafile(): from cugraph.datasets import karate From cd5fc6f6510056804af39f9deaad8885e632458a Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Tue, 9 Jan 2024 15:19:27 -0500 Subject: [PATCH 2/7] build wheels for `cugraph-dgl` and `cugraph-pyg` (#4075) Closes #4061 Authors: - Tingyu Wang (https://github.com/tingyu66) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cugraph/pull/4075 --- .github/workflows/build.yaml | 40 ++++++++++++++++++++++++++++ .github/workflows/pr.yaml | 34 ++++++++++++++++++++++++ .github/workflows/test.yaml | 18 +++++++++++++ ci/build_wheel.sh | 8 +++--- ci/build_wheel_cugraph-dgl.sh | 6 +++++ ci/build_wheel_cugraph-pyg.sh | 6 +++++ ci/test_wheel_cugraph-dgl.sh | 34 ++++++++++++++++++++++++ ci/test_wheel_cugraph-pyg.sh | 43 +++++++++++++++++++++++++++++++ dependencies.yaml | 18 +++++++++++++ python/cugraph-dgl/pyproject.toml | 21 +++++++++++---- python/cugraph-pyg/pyproject.toml | 13 +++++++++- 11 files changed, 232 insertions(+), 9 deletions(-) create mode 100755 ci/build_wheel_cugraph-dgl.sh create mode 100755 ci/build_wheel_cugraph-pyg.sh create mode 100755 ci/test_wheel_cugraph-dgl.sh create mode 100755 ci/test_wheel_cugraph-pyg.sh diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 85ac682daf4..273a8902eae 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -133,3 +133,43 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: nx-cugraph + wheel-build-cugraph-dgl: + needs: wheel-publish-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_cugraph-dgl.sh + wheel-publish-cugraph-dgl: + needs: wheel-build-cugraph-dgl + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: cugraph-dgl + wheel-build-cugraph-pyg: + needs: wheel-publish-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_cugraph-pyg.sh + wheel-publish-cugraph-pyg: + needs: wheel-build-cugraph-pyg + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: cugraph-pyg diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 82c71efffdb..84d22f8e896 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,6 +25,10 @@ jobs: - wheel-tests-cugraph - wheel-build-nx-cugraph - wheel-tests-nx-cugraph + - wheel-build-cugraph-dgl + - wheel-tests-cugraph-dgl + - wheel-build-cugraph-pyg + - wheel-tests-cugraph-pyg - devcontainer secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 @@ -127,6 +131,36 @@ jobs: with: build_type: pull-request script: ci/test_wheel_nx-cugraph.sh + wheel-build-cugraph-dgl: + needs: wheel-tests-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/build_wheel_cugraph-dgl.sh + wheel-tests-cugraph-dgl: + needs: wheel-build-cugraph-dgl + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/test_wheel_cugraph-dgl.sh + matrix_filter: map(select(.ARCH == "amd64")) + wheel-build-cugraph-pyg: + needs: wheel-tests-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/build_wheel_cugraph-pyg.sh + wheel-tests-cugraph-pyg: + needs: wheel-build-cugraph-pyg + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/test_wheel_cugraph-pyg.sh + matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0")) devcontainer: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 0d9f4d291c3..773358ede8d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -57,3 +57,21 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_nx-cugraph.sh + wheel-tests-cugraph-dgl: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel_cugraph-dgl.sh + wheel-tests-cugraph-pyg: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel_cugraph-pyg.sh diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 163520ea1da..828d8948143 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -euo pipefail @@ -36,7 +36,7 @@ if ! rapids-is-release-build; then alpha_spec=',>=0.0.0a0' fi -for dep in rmm cudf raft-dask pylibcugraph pylibraft ucx-py; do +for dep in rmm cudf cugraph raft-dask pylibcugraph pylibcugraphops pylibraft ucx-py; do sed -r -i "s/${dep}==(.*)\"/${dep}${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file} done @@ -55,7 +55,9 @@ cd "${package_dir}" python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check # pure-python packages should not have auditwheel run on them. -if [[ ${package_name} == "nx-cugraph" ]]; then +if [[ ${package_name} == "nx-cugraph" ]] || \ + [[ ${package_name} == "cugraph-dgl" ]] || \ + [[ ${package_name} == "cugraph-pyg" ]]; then RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist else mkdir -p final_dist diff --git a/ci/build_wheel_cugraph-dgl.sh b/ci/build_wheel_cugraph-dgl.sh new file mode 100755 index 00000000000..d62f810cba4 --- /dev/null +++ b/ci/build_wheel_cugraph-dgl.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +./ci/build_wheel.sh cugraph-dgl python/cugraph-dgl diff --git a/ci/build_wheel_cugraph-pyg.sh b/ci/build_wheel_cugraph-pyg.sh new file mode 100755 index 00000000000..97baa243f73 --- /dev/null +++ b/ci/build_wheel_cugraph-pyg.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +./ci/build_wheel.sh cugraph-pyg python/cugraph-pyg diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh new file mode 100755 index 00000000000..90c86af95fe --- /dev/null +++ b/ci/test_wheel_cugraph-dgl.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -eoxu pipefail + +package_name="cugraph-dgl" +package_dir="python/cugraph-dgl" + +python_package_name=$(echo ${package_name}|sed 's/-/_/g') + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# use 'ls' to expand wildcard before adding `[extra]` requires for pip +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# pip creates wheels using python package names +python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] + + +PKG_CUDA_VER="$(echo ${CUDA_VERSION} | cut -d '.' -f1,2 | tr -d '.')" +PKG_CUDA_VER_MAJOR=${PKG_CUDA_VER:0:2} +if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then + PYTORCH_CUDA_VER="121" +else + PYTORCH_CUDA_VER=$PKG_CUDA_VER +fi +PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}" +DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html" + +rapids-logger "Installing PyTorch and DGL" +rapids-retry python -m pip install torch --index-url ${PYTORCH_URL} +rapids-retry python -m pip install dgl --find-links ${DGL_URL} + +python -m pytest python/cugraph-dgl/tests diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh new file mode 100755 index 00000000000..9a211c81886 --- /dev/null +++ b/ci/test_wheel_cugraph-pyg.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -eoxu pipefail + +package_name="cugraph-pyg" +package_dir="python/cugraph-pyg" + +python_package_name=$(echo ${package_name}|sed 's/-/_/g') + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# use 'ls' to expand wildcard before adding `[extra]` requires for pip +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# pip creates wheels using python package names +python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] + +# RAPIDS_DATASET_ROOT_DIR is used by test scripts +export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" + +if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then + rapids-logger "Installing PyTorch and PyG dependencies" + PYTORCH_URL="https://download.pytorch.org/whl/cu118" + rapids-retry python -m pip install torch==2.1.0 --index-url ${PYTORCH_URL} + rapids-retry python -m pip install torch-geometric==2.4.0 + rapids-retry python -m pip install \ + pyg_lib \ + torch_scatter \ + torch_sparse \ + torch_cluster \ + torch_spline_conv \ + -f https://data.pyg.org/whl/torch-2.1.0+cu118.html + + rapids-logger "pytest cugraph-pyg (single GPU)" + python -m pytest \ + --cache-clear \ + --ignore=tests/int \ + --ignore=tests/mg \ + python/cugraph-pyg/cugraph_pyg/tests +else + rapids-logger "skipping cugraph-pyg wheel test on CUDA!=11.8" +fi diff --git a/dependencies.yaml b/dependencies.yaml index 579acec3996..3eed525bfe4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -165,6 +165,15 @@ files: table: project includes: - python_run_cugraph_dgl + - depends_on_pylibcugraphops + py_test_cugraph_dgl: + output: pyproject + pyproject_dir: python/cugraph-dgl + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common py_build_cugraph_pyg: output: pyproject pyproject_dir: python/cugraph-pyg @@ -179,6 +188,15 @@ files: table: project includes: - python_run_cugraph_pyg + - depends_on_pylibcugraphops + py_test_cugraph_pyg: + output: pyproject + pyproject_dir: python/cugraph-pyg + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common py_build_cugraph_service_client: output: pyproject pyproject_dir: python/cugraph-service/client diff --git a/python/cugraph-dgl/pyproject.toml b/python/cugraph-dgl/pyproject.toml index 62fa8ab6368..65ee414da44 100644 --- a/python/cugraph-dgl/pyproject.toml +++ b/python/cugraph-dgl/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. [build-system] @@ -18,15 +18,26 @@ authors = [ ] license = { text = "Apache 2.0" } requires-python = ">=3.9" +classifiers = [ + "Intended Audience :: Developers", + "Programming Language :: Python", +] dependencies = [ "cugraph==24.2.*", "numba>=0.57", "numpy>=1.21", + "pylibcugraphops==24.2.*", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project.optional-dependencies] +test = [ + "pandas", + "pytest", + "pytest-benchmark", + "pytest-cov", + "pytest-xdist", + "scipy", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. -classifiers = [ - "Intended Audience :: Developers", - "Programming Language :: Python", -] [project.urls] Homepage = "https://github.com/rapidsai/cugraph" diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index b0671644982..c4bd00bb86c 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. [build-system] @@ -29,12 +29,23 @@ dependencies = [ "cugraph==24.2.*", "numba>=0.57", "numpy>=1.21", + "pylibcugraphops==24.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cugraph" Documentation = "https://docs.rapids.ai/api/cugraph/stable/" +[project.optional-dependencies] +test = [ + "pandas", + "pytest", + "pytest-benchmark", + "pytest-cov", + "pytest-xdist", + "scipy", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + [tool.setuptools] license-files = ["LICENSE"] From 5e8e9b546f413fdbfd0cb1886b9ec5e715346019 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 10 Jan 2024 08:00:43 -0800 Subject: [PATCH 3/7] Fix MG weighted similarity test failure (#4054) MG weighted similarity tests assume symmetric graphs (undirected). When we remove multi-edges, we pick arbitrary edges and this can lead to an asymmetry in edge weights. This PR adds an additional flag to keep minimum value edges in `remove_multi_edges` and use this function if the input graph is symmetric to maintain weight symmetry. Applying the non-breaking label as this PR does not change existing code behavior if `keep_min_value_edge` is not provided. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) URL: https://github.com/rapidsai/cugraph/pull/4054 --- cpp/include/cugraph/graph_functions.hpp | 21 +++++-- cpp/src/c_api/graph_mg.cpp | 7 ++- cpp/src/c_api/graph_sg.cpp | 7 ++- cpp/src/structure/remove_multi_edges.cu | 20 +++--- cpp/src/structure/remove_multi_edges_impl.cuh | 63 ++++++++++++------- .../weighted_similarity_test.cpp | 14 +++-- cpp/tests/utilities/test_graphs.hpp | 16 ++--- 7 files changed, 97 insertions(+), 51 deletions(-) diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 6a75a420bf8..6684d31d8fd 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1005,9 +1005,14 @@ remove_self_loops(raft::handle_t const& handle, std::optional>&& edgelist_edge_types); /** - * @brief Remove all but one edge when a multi-edge exists. Note that this function does not use - * stable methods. When a multi-edge exists, one of the edges will remain, there is no - * guarantee on which one will remain. + * @brief Remove all but one edge when a multi-edge exists. + * + * When a multi-edge exists, one of the edges will remain. If @p keep_min_value_edge is false, an + * arbitrary edge will be selected among the edges in the multi-edge. If @p keep_min_value_edge is + * true, the edge with the minimum value will be selected. The edge weights will be first compared + * (if @p edgelist_weights.has_value() is true); edge IDs will be compared next (if @p + * edgelist_edge_ids.has_value() is true); and edge types (if @p edgelist_edge_types.has_value() is + * true) will compared last. * * In an MG context it is assumed that edges have been shuffled to the proper GPU, * in which case any multi-edges will be on the same GPU. @@ -1024,6 +1029,11 @@ remove_self_loops(raft::handle_t const& handle, * @param edgelist_weights Optional list of edge weights * @param edgelist_edge_ids Optional list of edge ids * @param edgelist_edge_types Optional list of edge types + * @param keep_min_value_edge Flag indicating whether to keep an arbitrary edge (false) or the + * minimum value edge (true) among the edges in a multi-edge. Relevant only if @p + * edgelist_weights.has_value() | @p edgelist_edge_ids.has_value() | @p + * edgelist_edge_types.has_value() is true. Setting this to true incurs performance overhead as this + * requires more comparisons. * @return Tuple of vectors storing edge sources, destinations, optional weights, * optional edge ids, optional edge types. */ @@ -1038,6 +1048,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge = false); } // namespace cugraph diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 326022a3fa9..57a589caf02 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -217,7 +217,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index 7793458b53a..6745be01f95 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -200,7 +200,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/structure/remove_multi_edges.cu b/cpp/src/structure/remove_multi_edges.cu index ba07d068c0e..54403f0b034 100644 --- a/cpp/src/structure/remove_multi_edges.cu +++ b/cpp/src/structure/remove_multi_edges.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -39,7 +40,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -51,7 +53,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -63,7 +66,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -75,7 +79,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -87,6 +92,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); } // namespace cugraph diff --git a/cpp/src/structure/remove_multi_edges_impl.cuh b/cpp/src/structure/remove_multi_edges_impl.cuh index fdd3059f874..651876ac8b1 100644 --- a/cpp/src/structure/remove_multi_edges_impl.cuh +++ b/cpp/src/structure/remove_multi_edges_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,10 +104,12 @@ group_multi_edges( rmm::device_uvector&& edgelist_srcs, rmm::device_uvector&& edgelist_dsts, decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& edgelist_values, - size_t mem_frugal_threshold) + size_t mem_frugal_threshold, + bool keep_min_value_edge) { auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); auto value_first = get_dataframe_buffer_begin(edgelist_values); + auto edge_first = thrust::make_zip_iterator(pair_first, value_first); if (edgelist_srcs.size() > mem_frugal_threshold) { // FIXME: Tuning parameter to address high frequency multi-edges @@ -128,19 +130,28 @@ group_multi_edges( raft::update_host( h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream()); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + h_group_counts[0], - get_dataframe_buffer_begin(edgelist_values)); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first + h_group_counts[0], - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values) + h_group_counts[0]); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + h_group_counts[0]); + thrust::sort(handle.get_thrust_policy(), + edge_first + h_group_counts[0], + edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key( + handle.get_thrust_policy(), pair_first, pair_first + h_group_counts[0], value_first); + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first + h_group_counts[0], + pair_first + edgelist_srcs.size(), + value_first + h_group_counts[0]); + } } else { - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values)); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_srcs.size(), + get_dataframe_buffer_begin(edgelist_values)); + } } return std::make_tuple( @@ -160,7 +171,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types) + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge) { auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; @@ -187,7 +199,8 @@ remove_multi_edges(raft::handle_t const& handle, std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple( edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) = @@ -196,7 +209,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -207,7 +221,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) = detail::group_multi_edges>( @@ -215,7 +230,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } } else { @@ -228,7 +244,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) = detail::group_multi_edges>( @@ -236,7 +253,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -246,7 +264,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::tie(edgelist_srcs, edgelist_dsts) = detail::group_multi_edges( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold); diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp index ca644b76c5a..99e752c0b02 100644 --- a/cpp/tests/link_prediction/weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,9 +27,9 @@ struct Similarity_Usecase { bool use_weights{false}; - bool check_correctness{true}; size_t max_seeds{std::numeric_limits::max()}; size_t max_vertex_pairs_to_check{std::numeric_limits::max()}; + bool check_correctness{true}; }; template @@ -293,7 +293,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20, //: 100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); @@ -305,7 +305,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, //: Similarity_Usecase{false,true,20,100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -319,7 +319,8 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, true}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -332,7 +333,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( // disable correctness checks for large graphs //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), false}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 8cc87b26f1d..5a9dc9c90d4 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -633,12 +633,14 @@ construct_graph(raft::handle_t const& handle, if (drop_multi_edges) { std::tie(d_src_v, d_dst_v, d_weights_v, std::ignore, std::ignore) = - cugraph::remove_multi_edges(handle, - std::move(d_src_v), - std::move(d_dst_v), - std::move(d_weights_v), - std::nullopt, - std::nullopt); + cugraph::remove_multi_edges( + handle, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + std::nullopt, + std::nullopt, + is_symmetric ? true /* keep minimum weight edges to maintain symmetry */ : false); } graph_t graph(handle); From ae25ea1765734728b66fa104d018979b01db2234 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Wed, 10 Jan 2024 11:55:13 -0600 Subject: [PATCH 4/7] Adds `nx-cugraph` benchmarks for 23.12 algos (SSSP, pagerank, hits, katz_centrality, degree_centrality, eigenvector_centrality) (#4065) closes rapidsai/graph_dl#404 * Adds benchmarks for algos added in the 23.12 release * SSSP * pagerank * hits * katz_centrality * degree_centrality * eigenvector_centrality * Refactors fixtures for easier usage and mainentance * uses `benchmark.pedantic` instead of `benchmark` to provide complete control over how to benchmark the algos, since `benchmark` will result in no fewer than 3 runs (calibrate timer, timed run, run to generate func return value) which can be too time consuming for slower runs. * Removes code to create `Dataset` objects for larger datasets and replaces with the equivalent objects that are now part of `cugraph.datasets` Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Erik Welch (https://github.com/eriknw) URL: https://github.com/rapidsai/cugraph/pull/4065 --- .../nx-cugraph/pytest-based/bench_algos.py | 440 ++++++++++++++---- .../python/cugraph_benchmarking/params.py | 36 +- 2 files changed, 348 insertions(+), 128 deletions(-) diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index 971c3ff1032..a8ed18a20fc 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,29 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import random + import networkx as nx import pandas as pd import pytest from cugraph import datasets - -# FIXME: promote these to cugraph.datasets so the following steps aren't -# necessary -# -# These datasets can be downloaded using the script in the 'datasets' dir: -# -# cd /datasets -# ./get_test_data.sh --benchmark -# -# Then set the following env var so the dataset utils can find their location: -# -# export RAPIDS_DATASET_ROOT_DIR=/datasets -# -from cugraph_benchmarking.params import ( - hollywood, - europe_osm, - cit_patents, - soc_livejournal, -) +import nx_cugraph as nxcg # Attempt to import the NetworkX dispatching module, which is only needed when # testing with NX <3.2 in order to dynamically switch backends. NX >=3.2 allows @@ -45,22 +29,76 @@ ################################################################################ -# Fixtures and helpers -backend_params = ["cugraph", None] +# Fixtures and params + +# See https://pytest-benchmark.readthedocs.io/en/latest/glossary.html for how +# these variables are used. +rounds = 1 +iterations = 1 +warmup_rounds = 1 -dataset_params = [ +dataset_param_values = [ pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]), pytest.param(datasets.netscience, marks=[pytest.mark.small, pytest.mark.directed]), pytest.param( datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed] ), - pytest.param(cit_patents, marks=[pytest.mark.medium, pytest.mark.directed]), - pytest.param(hollywood, marks=[pytest.mark.medium, pytest.mark.undirected]), - pytest.param(europe_osm, marks=[pytest.mark.medium, pytest.mark.undirected]), - pytest.param(soc_livejournal, marks=[pytest.mark.large, pytest.mark.directed]), + pytest.param( + datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed] + ), + pytest.param( + datasets.hollywood, marks=[pytest.mark.medium, pytest.mark.undirected] + ), + pytest.param( + datasets.soc_livejournal, marks=[pytest.mark.medium, pytest.mark.directed] + ), + pytest.param( + datasets.europe_osm, marks=[pytest.mark.large, pytest.mark.undirected] + ), ] +backend_param_values = ["cugraph", "cugraph-preconverted", None] + + +def setup_module(module): + """ + Trivial conversion call to force various one-time CUDA initialization + operations to happen outside of benchmarks. + """ + G = nx.karate_club_graph() + nxcg.from_networkx(G) + + +# Test IDs are generated using the lambda assigned to the ids arg to provide an +# easier-to-read name. This is especially helpful for Dataset objs (see +# https://docs.pytest.org/en/stable/reference/reference.html#pytest-fixture) +@pytest.fixture( + scope="module", params=dataset_param_values, ids=lambda ds: f"ds={str(ds)}" +) +def graph_obj(request): + """ + Returns a NX Graph or DiGraph obj from the dataset instance parameter. + """ + dataset = request.param + return nx_graph_from_dataset(dataset) + + +@pytest.fixture( + scope="module", + params=backend_param_values, + ids=lambda backend: f"backend={backend}", +) +def backend(request): + """ + Returns the backend name to use. This is done as a fixture for consistency + and simplicity when creating benchmarks (no need to mark the benchmark as + parametrized). + """ + return request.param + +################################################################################ +# Helpers def nx_graph_from_dataset(dataset_obj): """ Read the dataset specified by the dataset_obj and create and return a @@ -87,126 +125,334 @@ def nx_graph_from_dataset(dataset_obj): return G -# Test IDs are generated using the lambda assigned to the ids arg to provide an -# easier-to-read name from the Dataset obj string repr. -# See: https://docs.pytest.org/en/stable/reference/reference.html#pytest-fixture -@pytest.fixture(scope="module", params=dataset_params, ids=lambda ds: f"ds={str(ds)}") -def graph_obj(request): - """ - Returns a NX Graph or DiGraph obj from the dataset instance parameter. - """ - dataset = request.param - return nx_graph_from_dataset(dataset) - - -def get_legacy_backend_selector(backend_name): +def get_legacy_backend_wrapper(backend_name): """ Returns a callable that wraps an algo function with either the default - dispatch decorator, or the "testing" decorator which unconditionally - dispatches. + dispatcher (which dispatches based on input graph type), or the "testing" + dispatcher (which autoconverts and unconditionally dispatches). This is only supported for NetworkX <3.2 """ backends.plugin_name = "cugraph" orig_dispatch = backends._dispatch testing_dispatch = backends.test_override_dispatch - # Testing with the networkx <3.2 dispatch mechanism is based on decorating - # networkx APIs. The decorator is either one that only uses a backend if - # the input graph type is for that backend (the default decorator), or the - # "testing" decorator, which unconditionally converts a graph type to the - # type needed by the backend then calls the backend. If the cugraph backend - # is specified, create a callable that decorates the benchmarked function - # with the testing decorator. - # - # Because both the default and testing decorators assume they are only - # applied once and do bookkeeping to ensure algos are not registered - # multiple times, the callable also clears bookkeeping so the decorators - # can be reapplied multiple times. This is obviously a hack and networkx - # >=3.2 makes this use case properly supported. if backend_name == "cugraph": - - def wrapper(*args, **kwargs): - backends._registered_algorithms = {} - return testing_dispatch(*args, **kwargs) - + dispatch = testing_dispatch else: + dispatch = orig_dispatch + + def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False): + # Networkx <3.2 registers functions when the dispatch decorator is + # applied (called) and errors if re-registered, so clear bookkeeping to + # allow it to be called repeatedly. + backends._registered_algorithms = {} + actual_func = dispatch(func) # returns the func the dispatcher picks def wrapper(*args, **kwargs): - backends._registered_algorithms = {} - return orig_dispatch(*args, **kwargs) + retval = actual_func(*args, **kwargs) + if exhaust_returned_iterator: + retval = list(retval) + return retval - return wrapper + return wrapper + + return wrap_callable_for_dispatch -def get_backend_selector(backend_name): +def get_backend_wrapper(backend_name): """ Returns a callable that wraps an algo function in order to set the "backend" kwarg on it. This is only supported for NetworkX >= 3.2 """ - def get_callable_for_func(func): + def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False): def wrapper(*args, **kwargs): kwargs["backend"] = backend_name - return func(*args, **kwargs) + retval = func(*args, **kwargs) + if exhaust_returned_iterator: + retval = list(retval) + return retval return wrapper - return get_callable_for_func + return wrap_callable_for_dispatch @pytest.fixture( - scope="module", params=backend_params, ids=lambda backend: f"backend={backend}" + scope="module", + params=backend_param_values, + ids=lambda backend: f"backend={backend}", ) -def backend_selector(request): +def backend_wrapper(request): """ Returns a callable that takes a function algo and wraps it in another function that calls the algo using the appropriate backend. + + For example: if the backend to test is "cugraph", this will return a + function that calls nx.pagerank(..., backend='cugraph') """ backend_name = request.param + actual_backend_name = backend_name + + # Special case: cugraph-preconverted may be specified as a backend but this + # name is reserved to indicate a cugraph backend is to be used with a + # preconverted graph obj (rather than having the backend do the + # conversion). + if backend_name == "cugraph-preconverted": + actual_backend_name = "cugraph" + + # NX <3.2 does not support the backends= kwarg, so the backend must be + # enabled differently if backends is not None: - return get_legacy_backend_selector(backend_name) + wrapper = get_legacy_backend_wrapper(actual_backend_name) else: - return get_backend_selector(backend_name) + wrapper = get_backend_wrapper(actual_backend_name) + + wrapper.backend_name = backend_name + return wrapper + + +def get_graph_obj_for_benchmark(graph_obj, backend_wrapper): + """ + Given a Graph object and a backend name, return a converted Graph or the + original Graph object based on the backend to use. + + This is needed because some backend names are actually used as descriptions + for combinations of backends and converted/non-converted graphs. For + example, a benchmark may specify the "cugraph-preconverted" backend, which + is not an installed backend but instead refers to the "cugraph" backend + passed a NX Graph that has been converted to a nx-cugraph Graph object. + """ + G = graph_obj + if backend_wrapper.backend_name == "cugraph-preconverted": + G = nxcg.from_networkx(G) + return G ################################################################################ # Benchmarks -normalized_params = [True, False] -k_params = [10, 100] - - -@pytest.mark.parametrize("normalized", normalized_params, ids=lambda norm: f"{norm=}") -@pytest.mark.parametrize("k", k_params, ids=lambda k: f"{k=}") -def bench_betweenness_centrality(benchmark, graph_obj, backend_selector, normalized, k): - result = benchmark( - backend_selector(nx.betweenness_centrality), - graph_obj, - weight=None, - normalized=normalized, - k=k, +# normalized_param_values = [True, False] +# k_param_values = [10, 100] +normalized_param_values = [True] +k_param_values = [10] + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") +def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.betweenness_centrality), + args=(G,), + kwargs=dict( + weight=None, + normalized=normalized, + k=k, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, ) assert type(result) is dict -@pytest.mark.parametrize("normalized", normalized_params, ids=lambda norm: f"{norm=}") +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") def bench_edge_betweenness_centrality( - benchmark, graph_obj, backend_selector, normalized + benchmark, graph_obj, backend_wrapper, normalized, k ): - result = benchmark( - backend_selector(nx.edge_betweenness_centrality), - graph_obj, - weight=None, - normalized=normalized, + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.edge_betweenness_centrality), + args=(G,), + kwargs=dict( + weight=None, + normalized=normalized, + k=k, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, ) assert type(result) is dict -def bench_louvain_communities(benchmark, graph_obj, backend_selector): +def bench_louvain_communities(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) # The cugraph backend for louvain_communities only supports undirected graphs - if isinstance(graph_obj, nx.DiGraph): - G = graph_obj.to_undirected() - else: - G = graph_obj - result = benchmark(backend_selector(nx.community.louvain_communities), G) + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.community.louvain_communities), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_eigenvector_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.eigenvector_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +def bench_hits(benchmark, graph_obj, backend_wrapper, normalized): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.hits), + args=(G,), + kwargs=dict( + normalized=normalized, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is tuple + assert len(result) == 2 + assert type(result[0]) is dict + assert type(result[1]) is dict + + +def bench_in_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.in_degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +def bench_katz_centrality(benchmark, graph_obj, backend_wrapper, normalized): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.katz_centrality), + args=(G,), + kwargs=dict( + normalized=normalized, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_k_truss(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.k_truss), + args=(G,), + kwargs=dict( + k=2, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # Check that this at least appears to be some kind of NX-like Graph + assert hasattr(result, "has_node") + + +def bench_out_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.out_degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_pagerank(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.pagerank), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): + # Use the node with the highest degree + degrees = graph_obj.degree() # list of tuples of (node, degree) + node = max(degrees, key=lambda t: t[1])[0] + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + result = benchmark.pedantic( + target=backend_wrapper(nx.single_source_shortest_path_length), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapper): + # Use the node with the highest degree + degrees = graph_obj.degree() # list of tuples of (node, degree) + node = max(degrees, key=lambda t: t[1])[0] + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + result = benchmark.pedantic( + target=backend_wrapper( + nx.single_target_shortest_path_length, exhaust_returned_iterator=True + ), + args=(G,), + kwargs=dict( + target=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # exhaust_returned_iterator=True forces the result to a list, but is not + # needed for this algo in NX 3.3+ which returns a dict instead of an + # iterator. Forcing to a list does not change the benchmark timing. assert type(result) is list diff --git a/benchmarks/shared/python/cugraph_benchmarking/params.py b/benchmarks/shared/python/cugraph_benchmarking/params.py index d82cfd26117..034e22ffc37 100644 --- a/benchmarks/shared/python/cugraph_benchmarking/params.py +++ b/benchmarks/shared/python/cugraph_benchmarking/params.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,42 +14,16 @@ import pytest from pylibcugraph.testing.utils import gen_fixture_params -from cugraph.testing import RAPIDS_DATASET_ROOT_DIR_PATH from cugraph.datasets import ( - Dataset, karate, netscience, email_Eu_core, + hollywood, + europe_osm, + cit_patents, + soc_livejournal, ) -# Create Dataset objects from .csv files. -# Once the cugraph.dataset package is updated to include the metadata files for -# these (like karate), these will no longer need to be explicitly instantiated. -hollywood = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/hollywood.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -hollywood.metadata["is_directed"] = False -europe_osm = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/europe_osm.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -europe_osm.metadata["is_directed"] = False -cit_patents = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/cit-Patents.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -cit_patents.metadata["is_directed"] = True -soc_livejournal = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/soc-LiveJournal1.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -soc_livejournal.metadata["is_directed"] = True - # Assume all "file_data" (.csv file on disk) datasets are too small to be useful for MG. undirected_datasets = [ pytest.param( From 35ae8ef46ed45862cd89f3e56284f3974653a7c7 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Wed, 10 Jan 2024 16:51:13 -0500 Subject: [PATCH 5/7] Correct `cugraph-pyg` package name used in wheels and fix test script (#4083) This is an oversight from #4075. With this fix, the generated wheels should have proper a CUDA suffix (e.g. `cugraph-pyg-cu12`). Edit: This PR also fixes https://github.com/rapidsai/graph_dl/issues/421 The mg tests were not skipped as expected due to wrong relative path in `test_wheel_cugraph-pyg.sh`. Authors: - Tingyu Wang (https://github.com/tingyu66) Approvers: - Ray Douglass (https://github.com/raydouglass) - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/4083 --- ci/test_python.sh | 3 +-- ci/test_wheel_cugraph-pyg.sh | 5 +++-- .../cugraph_pyg/tests/mg/test_mg_cugraph_store.py | 4 ++-- python/cugraph-pyg/pyproject.toml | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 500bc2f3467..d8288758f3c 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. set -euo pipefail @@ -230,7 +230,6 @@ if [[ "${RAPIDS_CUDA_VERSION}" == "11.8.0" ]]; then # rmat is not tested because of multi-GPU testing pytest \ --cache-clear \ - --ignore=tests/int \ --ignore=tests/mg \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph-pyg.xml" \ --cov-config=../../.coveragerc \ diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh index 9a211c81886..acd42224387 100755 --- a/ci/test_wheel_cugraph-pyg.sh +++ b/ci/test_wheel_cugraph-pyg.sh @@ -33,11 +33,12 @@ if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then -f https://data.pyg.org/whl/torch-2.1.0+cu118.html rapids-logger "pytest cugraph-pyg (single GPU)" + pushd python/cugraph-pyg/cugraph_pyg python -m pytest \ --cache-clear \ - --ignore=tests/int \ --ignore=tests/mg \ - python/cugraph-pyg/cugraph_pyg/tests + tests + popd else rapids-logger "skipping cugraph-pyg wheel test on CUDA!=11.8" fi diff --git a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py index be8f8245807..7047c62250b 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/mg/test_mg_cugraph_store.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -385,7 +385,7 @@ def test_get_input_nodes(karate_gnn, dask_client): def test_mg_frame_handle(graph, dask_client): F, G, N = graph cugraph_store = CuGraphStore(F, G, N, multi_gpu=True) - assert isinstance(cugraph_store._EXPERIMENTAL__CuGraphStore__graph._plc_graph, dict) + assert isinstance(cugraph_store._CuGraphStore__graph._plc_graph, dict) @pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index c4bd00bb86c..b8666c0d806 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -11,9 +11,9 @@ requires = [ testpaths = ["cugraph_pyg/tests"] [project] -name = "cugraph_pyg" +name = "cugraph-pyg" dynamic = ["version"] -description = "cugraph_pyg - PyG support for cuGraph massive-scale, ultra-fast GPU graph analytics." +description = "cugraph-pyg - PyG support for cuGraph massive-scale, ultra-fast GPU graph analytics." authors = [ { name = "NVIDIA Corporation" }, ] From b22dd99d9ed329b8ea95442a50daff5085d2b82c Mon Sep 17 00:00:00 2001 From: James Lamb Date: Thu, 11 Jan 2024 11:23:40 -0600 Subject: [PATCH 6/7] refactor CUDA versions in dependencies.yaml (#4084) Contributes to https://github.com/rapidsai/build-planning/issues/7. Proposes splitting the `cuda-version` dependency in `dependencies.yaml` out to its own thing, separate from the bits of the CUDA Toolkit this project needs. ### Benefits of this change * prevents accidental inclusion of multiple `cuda-version` version in environments * reduces update effort (via enabling more use of globs like `"12.*"`) * improves the chance that errors like "`conda` recipe is missing a dependency" are caught in CI Authors: - James Lamb (https://github.com/jameslamb) - Bradley Dice (https://github.com/bdice) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - Bradley Dice (https://github.com/bdice) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/4084 --- .pre-commit-config.yaml | 2 +- dependencies.yaml | 47 +++++++++++++++++++++++------------------ 2 files changed, 28 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index bab39557c99..188ea1a266a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -52,7 +52,7 @@ repos: pass_filenames: false additional_dependencies: [gitpython] - repo: https://github.com/rapidsai/dependency-file-generator - rev: v1.5.1 + rev: v1.8.0 hooks: - id: rapids-dependency-file-generator args: ["--clean"] diff --git a/dependencies.yaml b/dependencies.yaml index 3eed525bfe4..18ddb6c51dd 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -9,7 +9,8 @@ files: - checks - common_build - cpp_build - - cudatoolkit + - cuda + - cuda_version - docs - python_build_wheel - python_build_cythonize @@ -37,19 +38,19 @@ files: docs: output: none includes: - - cudatoolkit + - cuda_version - docs - py_version - depends_on_pylibcugraphops test_cpp: output: none includes: - - cudatoolkit + - cuda_version - test_cpp test_notebooks: output: none includes: - - cudatoolkit + - cuda_version - py_version - test_notebook - test_python_common @@ -57,7 +58,7 @@ files: test_python: output: none includes: - - cudatoolkit + - cuda_version - depends_on_cudf - py_version - test_python_common @@ -273,33 +274,40 @@ dependencies: - output_types: [conda, requirements] packages: - pre-commit - cudatoolkit: + cuda_version: specific: - - output_types: [conda] + - output_types: conda matrices: - matrix: - cuda: "12.0" + cuda: "11.2" packages: - - cuda-version=12.0 + - cuda-version=11.2 - matrix: - cuda: "11.8" + cuda: "11.4" packages: - - cuda-version=11.8 - - cudatoolkit + - cuda-version=11.4 - matrix: cuda: "11.5" packages: - cuda-version=11.5 - - cudatoolkit - matrix: - cuda: "11.4" + cuda: "11.8" packages: - - cuda-version=11.4 - - cudatoolkit + - cuda-version=11.8 - matrix: - cuda: "11.2" + cuda: "12.0" + packages: + - cuda-version=12.0 + cuda: + specific: + - output_types: [conda] + matrices: + - matrix: + cuda: "12.*" + packages: + - matrix: + cuda: "11.*" packages: - - cuda-version=11.2 - cudatoolkit common_build: common: @@ -344,9 +352,8 @@ dependencies: packages: - nvcc_linux-aarch64=11.8 - matrix: - cuda: "12.0" + cuda: "12.*" packages: - - cuda-version=12.0 - cuda-nvcc docs: common: From 88c388442c4765ebb67f8d28961701d2c32c8a2f Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 11 Jan 2024 15:11:00 -0600 Subject: [PATCH 7/7] nx-cugraph: indicate which plc algorithms are used and version_added (#4069) Pretty simple PR. I would like for us to use this metadata when creating tables of supported algorithms. Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4069 --- .../algorithms/bipartite/generators.py | 4 +- .../algorithms/centrality/betweenness.py | 20 +- .../algorithms/centrality/degree_alg.py | 8 +- .../algorithms/centrality/eigenvector.py | 9 +- .../nx_cugraph/algorithms/centrality/katz.py | 9 +- .../algorithms/community/louvain.py | 10 +- .../algorithms/components/connected.py | 10 +- .../nx-cugraph/nx_cugraph/algorithms/core.py | 4 +- .../nx-cugraph/nx_cugraph/algorithms/dag.py | 6 +- .../nx_cugraph/algorithms/isolate.py | 8 +- .../algorithms/link_analysis/hits_alg.py | 6 +- .../algorithms/link_analysis/pagerank_alg.py | 11 +- .../algorithms/shortest_paths/unweighted.py | 6 +- .../traversal/breadth_first_search.py | 16 +- .../nx-cugraph/nx_cugraph/classes/function.py | 4 +- .../nx-cugraph/nx_cugraph/convert_matrix.py | 9 +- .../nx_cugraph/generators/classic.py | 32 +-- .../nx_cugraph/generators/community.py | 4 +- .../nx-cugraph/nx_cugraph/generators/small.py | 44 ++-- .../nx_cugraph/generators/social.py | 10 +- .../nx-cugraph/nx_cugraph/scripts/__init__.py | 12 + .../nx-cugraph/nx_cugraph/scripts/__main__.py | 38 +++ .../nx_cugraph/scripts/print_table.py | 78 ++++++ .../nx_cugraph/scripts/print_tree.py | 241 ++++++++++++++++++ .../nx-cugraph/nx_cugraph/utils/decorators.py | 31 ++- python/nx-cugraph/pyproject.toml | 8 + 26 files changed, 538 insertions(+), 100 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/scripts/__init__.py create mode 100755 python/nx-cugraph/nx_cugraph/scripts/__main__.py create mode 100755 python/nx-cugraph/nx_cugraph/scripts/print_table.py create mode 100755 python/nx-cugraph/nx_cugraph/scripts/print_tree.py diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py index 25b9b39554b..5a0c970c984 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -24,7 +24,7 @@ ] -@networkx_algorithm(nodes_or_number=[0, 1]) +@networkx_algorithm(nodes_or_number=[0, 1], version_added="23.12") def complete_bipartite_graph(n1, n2, create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py index 210e1f0a2b2..ba2b3d9c895 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/betweenness.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -18,11 +18,16 @@ __all__ = ["betweenness_centrality", "edge_betweenness_centrality"] -@networkx_algorithm +@networkx_algorithm( + is_incomplete=True, # weight not supported + is_different=True, # RNG with seed is different + plc="betweenness_centrality", + version_added="23.10", +) def betweenness_centrality( G, k=None, normalized=True, weight=None, endpoints=False, seed=None ): - """`weight` parameter is not yet supported.""" + """`weight` parameter is not yet supported, and RNG with seed may be different.""" if weight is not None: raise NotImplementedError( "Weighted implementation of betweenness centrality not currently supported" @@ -46,9 +51,14 @@ def _(G, k=None, normalized=True, weight=None, endpoints=False, seed=None): return weight is None -@networkx_algorithm +@networkx_algorithm( + is_incomplete=True, # weight not supported + is_different=True, # RNG with seed is different + plc="edge_betweenness_centrality", + version_added="23.10", +) def edge_betweenness_centrality(G, k=None, normalized=True, weight=None, seed=None): - """`weight` parameter is not yet supported.""" + """`weight` parameter is not yet supported, and RNG with seed may be different.""" if weight is not None: raise NotImplementedError( "Weighted implementation of betweenness centrality not currently supported" diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py index 0b2fd24af79..a319eb3a12c 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/degree_alg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ __all__ = ["degree_centrality", "in_degree_centrality", "out_degree_centrality"] -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def degree_centrality(G): G = _to_graph(G) if len(G) <= 1: @@ -27,7 +27,7 @@ def degree_centrality(G): @not_implemented_for("undirected") -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def in_degree_centrality(G): G = _to_directed_graph(G) if len(G) <= 1: @@ -38,7 +38,7 @@ def in_degree_centrality(G): @not_implemented_for("undirected") -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def out_degree_centrality(G): G = _to_directed_graph(G) if len(G) <= 1: diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/eigenvector.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/eigenvector.py index c0f02a6258e..9e615955a8b 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/eigenvector.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/eigenvector.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,7 +26,12 @@ @not_implemented_for("multigraph") -@networkx_algorithm(extra_params=_dtype_param) +@networkx_algorithm( + extra_params=_dtype_param, + is_incomplete=True, # nstart not supported + plc="eigenvector_centrality", + version_added="23.12", +) def eigenvector_centrality( G, max_iter=100, tol=1.0e-6, nstart=None, weight=None, *, dtype=None ): diff --git a/python/nx-cugraph/nx_cugraph/algorithms/centrality/katz.py b/python/nx-cugraph/nx_cugraph/algorithms/centrality/katz.py index b61b811b8fa..a2fb950c1aa 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/centrality/katz.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/centrality/katz.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,7 +26,12 @@ @not_implemented_for("multigraph") -@networkx_algorithm(extra_params=_dtype_param) +@networkx_algorithm( + extra_params=_dtype_param, + is_incomplete=True, # nstart and normalized=False not supported + plc="katz_centrality", + version_added="23.12", +) def katz_centrality( G, alpha=0.1, diff --git a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py index 936d837dacd..d023bab1a47 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/community/louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -35,7 +35,11 @@ "Upper limit of the number of macro-iterations (max: 500)." ), **_dtype_param, - } + }, + is_incomplete=True, # seed not supported; self-loops not supported + is_different=True, # RNG different + plc="louvain", + version_added="23.10", ) def louvain_communities( G, @@ -47,7 +51,7 @@ def louvain_communities( max_level=None, dtype=None, ): - """`seed` parameter is currently ignored.""" + """`seed` parameter is currently ignored, and self-loops are not yet supported.""" # NetworkX allows both directed and undirected, but cugraph only allows undirected. seed = _seed_to_int(seed) # Unused, but ensure it's valid for future compatibility G = _to_undirected_graph(G, weight) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py index 41f3457d542..cb12aed1d39 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/components/connected.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -30,7 +30,7 @@ @not_implemented_for("directed") -@networkx_algorithm +@networkx_algorithm(plc="weakly_connected_components", version_added="23.12") def number_connected_components(G): return sum(1 for _ in connected_components(G)) # PREFERRED IMPLEMENTATION, BUT PLC DOES NOT HANDLE ISOLATED VERTICES WELL @@ -57,7 +57,7 @@ def _(G): @not_implemented_for("directed") -@networkx_algorithm +@networkx_algorithm(plc="weakly_connected_components", version_added="23.12") def connected_components(G): G = _to_undirected_graph(G) if G.src_indices.size == 0: @@ -86,7 +86,7 @@ def connected_components(G): @not_implemented_for("directed") -@networkx_algorithm +@networkx_algorithm(plc="weakly_connected_components", version_added="23.12") def is_connected(G): G = _to_undirected_graph(G) if len(G) == 0: @@ -110,7 +110,7 @@ def is_connected(G): @not_implemented_for("directed") -@networkx_algorithm +@networkx_algorithm(plc="weakly_connected_components", version_added="23.12") def node_connected_component(G, n): # We could also do plain BFS from n G = _to_undirected_graph(G) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index c00df2d832f..e4520c2713b 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -27,7 +27,7 @@ @not_implemented_for("directed") @not_implemented_for("multigraph") -@networkx_algorithm +@networkx_algorithm(is_incomplete=True, plc="k_truss_subgraph", version_added="23.12") def k_truss(G, k): """ Currently raises `NotImplementedError` for graphs with more than one connected diff --git a/python/nx-cugraph/nx_cugraph/algorithms/dag.py b/python/nx-cugraph/nx_cugraph/algorithms/dag.py index 067cfed9101..ad5b7594aa1 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/dag.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/dag.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -45,11 +45,11 @@ def _ancestors_and_descendants(G, source, *, is_ancestors): return G._nodearray_to_set(node_ids[mask]) -@networkx_algorithm +@networkx_algorithm(plc="bfs", version_added="24.02") def descendants(G, source): return _ancestors_and_descendants(G, source, is_ancestors=False) -@networkx_algorithm +@networkx_algorithm(plc="bfs", version_added="24.02") def ancestors(G, source): return _ancestors_and_descendants(G, source, is_ancestors=True) diff --git a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py index d32223fb3ed..c7e5d7113de 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/isolate.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/isolate.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,7 +25,7 @@ __all__ = ["is_isolate", "isolates", "number_of_isolates"] -@networkx_algorithm +@networkx_algorithm(version_added="23.10") def is_isolate(G, n): G = _to_graph(G) index = n if G.key_to_id is None else G.key_to_id[n] @@ -51,13 +51,13 @@ def _isolates(G) -> cp.ndarray[IndexValue]: return cp.nonzero(_mark_isolates(G))[0] -@networkx_algorithm +@networkx_algorithm(version_added="23.10") def isolates(G): G = _to_graph(G) return G._nodeiter_to_iter(iter(_isolates(G).tolist())) -@networkx_algorithm +@networkx_algorithm(version_added="23.10") def number_of_isolates(G): G = _to_graph(G) return _mark_isolates(G).sum().tolist() diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py index 1c8a47c24b1..caa01327a56 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/hits_alg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -32,7 +32,9 @@ "The edge attribute to use as the edge weight." ), **_dtype_param, - } + }, + plc="hits", + version_added="23.12", ) def hits( G, diff --git a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py index 63f6e89c33a..d45d019c1b7 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/link_analysis/pagerank_alg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,7 +26,12 @@ __all__ = ["pagerank"] -@networkx_algorithm(extra_params=_dtype_param) +@networkx_algorithm( + extra_params=_dtype_param, + is_incomplete=True, # dangling not supported + plc={"pagerank", "personalized_pagerank"}, + version_added="23.12", +) def pagerank( G, alpha=0.85, @@ -97,7 +102,7 @@ def pagerank( @pagerank._can_run -def pagerank( +def _( G, alpha=0.85, personalization=None, diff --git a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py index 3413a637b32..b1032a8236b 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/shortest_paths/unweighted.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -21,12 +21,12 @@ __all__ = ["single_source_shortest_path_length", "single_target_shortest_path_length"] -@networkx_algorithm +@networkx_algorithm(plc="bfs", version_added="23.12") def single_source_shortest_path_length(G, source, cutoff=None): return _single_shortest_path_length(G, source, cutoff, "Source") -@networkx_algorithm +@networkx_algorithm(plc="bfs", version_added="23.12") def single_target_shortest_path_length(G, target, cutoff=None): return _single_shortest_path_length(G, target, cutoff, "Target") diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index e2a7d46f462..aa671bbb7d4 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -57,7 +57,7 @@ def _bfs(G, source, *, depth_limit=None, reverse=False): return distances[mask], predecessors[mask], node_ids[mask] -@networkx_algorithm +@networkx_algorithm(is_incomplete=True, plc="bfs", version_added="24.02") def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): """`neighbors` and `sort_neighbors` parameters are not yet supported.""" return bfs_edges(source, depth_limit=depth_limit) @@ -68,7 +68,7 @@ def _(G, source, neighbors=None, depth_limit=None, sort_neighbors=None): return neighbors is None and sort_neighbors is None -@networkx_algorithm +@networkx_algorithm(is_incomplete=True, plc="bfs", version_added="24.02") def bfs_edges(G, source, reverse=False, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" G = _check_G_and_source(G, source) @@ -95,7 +95,7 @@ def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None): return sort_neighbors is None -@networkx_algorithm +@networkx_algorithm(is_incomplete=True, plc="bfs", version_added="24.02") def bfs_tree(G, source, reverse=False, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" G = _check_G_and_source(G, source) @@ -149,7 +149,7 @@ def _(G, source, reverse=False, depth_limit=None, sort_neighbors=None): return sort_neighbors is None -@networkx_algorithm +@networkx_algorithm(is_incomplete=True, plc="bfs", version_added="24.02") def bfs_successors(G, source, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" G = _check_G_and_source(G, source) @@ -173,7 +173,7 @@ def _(G, source, depth_limit=None, sort_neighbors=None): return sort_neighbors is None -@networkx_algorithm +@networkx_algorithm(plc="bfs", version_added="24.02") def bfs_layers(G, sources): G = _to_graph(G) if sources in G: @@ -201,7 +201,7 @@ def bfs_layers(G, sources): return (G._nodearray_to_list(groups[key]) for key in range(len(groups))) -@networkx_algorithm +@networkx_algorithm(is_incomplete=True, plc="bfs", version_added="24.02") def bfs_predecessors(G, source, depth_limit=None, sort_neighbors=None): """`sort_neighbors` parameter is not yet supported.""" G = _check_G_and_source(G, source) @@ -227,7 +227,7 @@ def _(G, source, depth_limit=None, sort_neighbors=None): return sort_neighbors is None -@networkx_algorithm +@networkx_algorithm(plc="bfs", version_added="24.02") def descendants_at_distance(G, source, distance): G = _check_G_and_source(G, source) if distance is None or distance < 0: diff --git a/python/nx-cugraph/nx_cugraph/classes/function.py b/python/nx-cugraph/nx_cugraph/classes/function.py index 633e4abd7f4..435dfe37239 100644 --- a/python/nx-cugraph/nx_cugraph/classes/function.py +++ b/python/nx-cugraph/nx_cugraph/classes/function.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -16,7 +16,7 @@ __all__ = ["number_of_selfloops"] -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def number_of_selfloops(G): G = _to_graph(G) is_selfloop = G.src_indices == G.dst_indices diff --git a/python/nx-cugraph/nx_cugraph/convert_matrix.py b/python/nx-cugraph/nx_cugraph/convert_matrix.py index 80ca0c2fa4b..1a2ecde9b8c 100644 --- a/python/nx-cugraph/nx_cugraph/convert_matrix.py +++ b/python/nx-cugraph/nx_cugraph/convert_matrix.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -23,7 +23,8 @@ ] -@networkx_algorithm +# Value columns with string dtype is not supported +@networkx_algorithm(is_incomplete=True, version_added="23.12") def from_pandas_edgelist( df, source="source", @@ -32,7 +33,7 @@ def from_pandas_edgelist( create_using=None, edge_key=None, ): - """cudf.DataFrame inputs also supported.""" + """cudf.DataFrame inputs also supported; value columns with str is unsuppported.""" graph_class, inplace = _create_using_class(create_using) src_array = df[source].to_numpy() dst_array = df[target].to_numpy() @@ -120,7 +121,7 @@ def from_pandas_edgelist( return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def from_scipy_sparse_array( A, parallel_edges=False, create_using=None, edge_attribute="weight" ): diff --git a/python/nx-cugraph/nx_cugraph/generators/classic.py b/python/nx-cugraph/nx_cugraph/generators/classic.py index 4213e6dd2a0..a548beea34f 100644 --- a/python/nx-cugraph/nx_cugraph/generators/classic.py +++ b/python/nx-cugraph/nx_cugraph/generators/classic.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -51,7 +51,7 @@ concat = itertools.chain.from_iterable -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def barbell_graph(m1, m2, create_using=None): # Like two complete graphs and a path_graph m1 = _ensure_nonnegative_int(m1) @@ -81,12 +81,12 @@ def barbell_graph(m1, m2, create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def circular_ladder_graph(n, create_using=None): return _ladder_graph(n, create_using, is_circular=True) -@networkx_algorithm(nodes_or_number=0) +@networkx_algorithm(nodes_or_number=0, version_added="23.12") def complete_graph(n, create_using=None): n, nodes = _number_and_nodes(n) if n < 3: @@ -99,7 +99,7 @@ def complete_graph(n, create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def complete_multipartite_graph(*subset_sizes): if not subset_sizes: return nxcg.Graph() @@ -142,7 +142,7 @@ def complete_multipartite_graph(*subset_sizes): ) -@networkx_algorithm(nodes_or_number=0) +@networkx_algorithm(nodes_or_number=0, version_added="23.12") def cycle_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) @@ -172,7 +172,7 @@ def cycle_graph(n, create_using=None): return G -@networkx_algorithm(nodes_or_number=0) +@networkx_algorithm(nodes_or_number=0, version_added="23.12") def empty_graph(n=0, create_using=None, default=nx.Graph): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using, default=default) @@ -234,12 +234,12 @@ def _ladder_graph(n, create_using, *, is_circular=False): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def ladder_graph(n, create_using=None): return _ladder_graph(n, create_using) -@networkx_algorithm(nodes_or_number=[0, 1]) +@networkx_algorithm(nodes_or_number=[0, 1], version_added="23.12") def lollipop_graph(m, n, create_using=None): # Like complete_graph then path_graph orig_m, unused_nodes_m = m @@ -274,12 +274,12 @@ def lollipop_graph(m, n, create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def null_graph(create_using=None): return _common_small_graph(0, None, create_using) -@networkx_algorithm(nodes_or_number=0) +@networkx_algorithm(nodes_or_number=0, version_added="23.12") def path_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) @@ -299,7 +299,7 @@ def path_graph(n, create_using=None): return G -@networkx_algorithm(nodes_or_number=0) +@networkx_algorithm(nodes_or_number=0, version_added="23.12") def star_graph(n, create_using=None): orig_n, orig_nodes = n n, nodes = _number_and_nodes(n) @@ -323,7 +323,7 @@ def star_graph(n, create_using=None): return G -@networkx_algorithm(nodes_or_number=[0, 1]) +@networkx_algorithm(nodes_or_number=[0, 1], version_added="23.12") def tadpole_graph(m, n, create_using=None): orig_m, unused_nodes_m = m orig_n, unused_nodes_n = n @@ -361,12 +361,12 @@ def tadpole_graph(m, n, create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def trivial_graph(create_using=None): return _common_small_graph(1, None, create_using) -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def turan_graph(n, r): if not 1 <= r <= n: raise nx.NetworkXError("Must satisfy 1 <= r <= n") @@ -375,7 +375,7 @@ def turan_graph(n, r): return complete_multipartite_graph(*partitions) -@networkx_algorithm(nodes_or_number=0) +@networkx_algorithm(nodes_or_number=0, version_added="23.12") def wheel_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) diff --git a/python/nx-cugraph/nx_cugraph/generators/community.py b/python/nx-cugraph/nx_cugraph/generators/community.py index e5cb03e8cc0..9b0e0848de9 100644 --- a/python/nx-cugraph/nx_cugraph/generators/community.py +++ b/python/nx-cugraph/nx_cugraph/generators/community.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -27,7 +27,7 @@ ] -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def caveman_graph(l, k): # noqa: E741 l = _ensure_int(l) # noqa: E741 k = _ensure_int(k) diff --git a/python/nx-cugraph/nx_cugraph/generators/small.py b/python/nx-cugraph/nx_cugraph/generators/small.py index b9a189c31d5..45487571cda 100644 --- a/python/nx-cugraph/nx_cugraph/generators/small.py +++ b/python/nx-cugraph/nx_cugraph/generators/small.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -43,7 +43,7 @@ ] -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def bull_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -56,7 +56,7 @@ def bull_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def chvatal_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -85,7 +85,7 @@ def chvatal_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def cubical_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -105,7 +105,7 @@ def cubical_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def desargues_graph(create_using=None): # This can also be defined w.r.t. LCF_graph graph_class, inplace = _create_using_class(create_using) @@ -146,7 +146,7 @@ def desargues_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def diamond_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -159,7 +159,7 @@ def diamond_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def dodecahedral_graph(create_using=None): # This can also be defined w.r.t. LCF_graph graph_class, inplace = _create_using_class(create_using) @@ -200,7 +200,7 @@ def dodecahedral_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def frucht_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -235,7 +235,7 @@ def frucht_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def heawood_graph(create_using=None): # This can also be defined w.r.t. LCF_graph graph_class, inplace = _create_using_class(create_using) @@ -274,7 +274,7 @@ def heawood_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def house_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -287,7 +287,7 @@ def house_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def house_x_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -306,7 +306,7 @@ def house_x_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def icosahedral_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -337,7 +337,7 @@ def icosahedral_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def krackhardt_kite_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -366,7 +366,7 @@ def krackhardt_kite_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def moebius_kantor_graph(create_using=None): # This can also be defined w.r.t. LCF_graph graph_class, inplace = _create_using_class(create_using) @@ -407,7 +407,7 @@ def moebius_kantor_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def octahedral_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -428,7 +428,7 @@ def octahedral_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def pappus_graph(): # This can also be defined w.r.t. LCF_graph # fmt: off @@ -452,7 +452,7 @@ def pappus_graph(): return nxcg.Graph.from_coo(18, src_indices, dst_indices, name="Pappus Graph") -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def petersen_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -479,7 +479,7 @@ def petersen_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def sedgewick_maze_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -500,7 +500,7 @@ def sedgewick_maze_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def tetrahedral_graph(create_using=None): # This can also be defined w.r.t. complete_graph graph_class, inplace = _create_using_class(create_using) @@ -517,7 +517,7 @@ def tetrahedral_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def truncated_cube_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -548,7 +548,7 @@ def truncated_cube_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def truncated_tetrahedron_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): @@ -583,7 +583,7 @@ def truncated_tetrahedron_graph(create_using=None): return G -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def tutte_graph(create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): diff --git a/python/nx-cugraph/nx_cugraph/generators/social.py b/python/nx-cugraph/nx_cugraph/generators/social.py index 3c936d07af3..07e82c63fbf 100644 --- a/python/nx-cugraph/nx_cugraph/generators/social.py +++ b/python/nx-cugraph/nx_cugraph/generators/social.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -25,7 +25,7 @@ ] -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def davis_southern_women_graph(): # fmt: off src_indices = cp.array( @@ -88,7 +88,7 @@ def davis_southern_women_graph(): ) -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def florentine_families_graph(): # fmt: off src_indices = cp.array( @@ -114,7 +114,7 @@ def florentine_families_graph(): return nxcg.Graph.from_coo(15, src_indices, dst_indices, id_to_key=nodes) -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def karate_club_graph(): # fmt: off src_indices = cp.array( @@ -175,7 +175,7 @@ def karate_club_graph(): ) -@networkx_algorithm +@networkx_algorithm(version_added="23.12") def les_miserables_graph(): # fmt: off src_indices = cp.array( diff --git a/python/nx-cugraph/nx_cugraph/scripts/__init__.py b/python/nx-cugraph/nx_cugraph/scripts/__init__.py new file mode 100644 index 00000000000..aeae6078111 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/scripts/__init__.py @@ -0,0 +1,12 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/python/nx-cugraph/nx_cugraph/scripts/__main__.py b/python/nx-cugraph/nx_cugraph/scripts/__main__.py new file mode 100755 index 00000000000..c0963e64cc5 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/scripts/__main__.py @@ -0,0 +1,38 @@ +#!/usr/bin/env python +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +if __name__ == "__main__": + import argparse + + from nx_cugraph.scripts import print_table, print_tree + + parser = argparse.ArgumentParser( + parents=[ + print_table.get_argumentparser(add_help=False), + print_tree.get_argumentparser(add_help=False), + ], + description="Print info about functions implemented by nx-cugraph", + ) + parser.add_argument("action", choices=["print_table", "print_tree"]) + args = parser.parse_args() + if args.action == "print_table": + print_table.main() + else: + print_tree.main( + by=args.by, + networkx_path=args.networkx_path, + dispatch_name=args.dispatch_name or args.dispatch_name_always, + version_added=args.version_added, + plc=args.plc, + dispatch_name_if_different=not args.dispatch_name_always, + ) diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_table.py b/python/nx-cugraph/nx_cugraph/scripts/print_table.py new file mode 100755 index 00000000000..7e69de63dc1 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/scripts/print_table.py @@ -0,0 +1,78 @@ +#!/usr/bin/env python +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import sys +from collections import namedtuple + +from networkx.utils.backends import _registered_algorithms as algos + +from _nx_cugraph import get_info +from nx_cugraph.interface import BackendInterface + + +def get_funcpath(func): + return f"{func.__module__}.{func.__name__}" + + +def get_path_to_name(): + return { + get_funcpath(algos[funcname]): funcname + for funcname in get_info()["functions"].keys() & algos.keys() + } + + +Info = namedtuple( + "Info", + "networkx_path, dispatch_name, version_added, plc, is_incomplete, is_different", +) + + +def get_path_to_info(path_to_name=None, version_added_sep=".", plc_sep="/"): + if path_to_name is None: + path_to_name = get_path_to_name() + rv = {} + for funcpath in sorted(path_to_name): + funcname = path_to_name[funcpath] + cufunc = getattr(BackendInterface, funcname) + plc = plc_sep.join(sorted(cufunc._plc_names)) if cufunc._plc_names else "" + version_added = cufunc.version_added.replace(".", version_added_sep) + is_incomplete = cufunc.is_incomplete + is_different = cufunc.is_different + rv[funcpath] = Info( + funcpath, funcname, version_added, plc, is_incomplete, is_different + ) + return rv + + +def main(path_to_info=None, *, file=sys.stdout): + if path_to_info is None: + path_to_info = get_path_to_info(version_added_sep=".") + lines = ["networkx_path,dispatch_name,version_added,plc,is_incomplete,is_different"] + lines.extend(",".join(info) for info in path_to_info.values()) + text = "\n".join(lines) + print(text, file=file) + return text + + +def get_argumentparser(add_help=True): + return argparse.ArgumentParser( + description="Print info about functions implemented by nx-cugraph as CSV", + add_help=add_help, + ) + + +if __name__ == "__main__": + parser = get_argumentparser() + args = parser.parse_args() + main() diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_tree.py b/python/nx-cugraph/nx_cugraph/scripts/print_tree.py new file mode 100755 index 00000000000..bb75d735c31 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/scripts/print_tree.py @@ -0,0 +1,241 @@ +#!/usr/bin/env python +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import argparse +import re +import sys + +import networkx as nx + +from nx_cugraph.scripts.print_table import get_path_to_info + + +def add_branch(G, path, extra="", *, skip=0): + branch = path.split(".") + prev = ".".join(branch[: skip + 1]) + for i in range(skip + 2, len(branch)): + cur = ".".join(branch[:i]) + G.add_edge(prev, cur) + prev = cur + if extra: + if not isinstance(extra, str): + extra = ", ".join(extra) + path += f" ({extra})" + G.add_edge(prev, path) + + +def get_extra( + info, + *, + networkx_path=False, + dispatch_name=False, + version_added=False, + plc=False, + dispatch_name_if_different=False, + incomplete=False, + different=False, +): + extra = [] + if networkx_path: + extra.append(info.networkx_path) + if dispatch_name and ( + not dispatch_name_if_different + or info.dispatch_name != info.networkx_path.rsplit(".", 1)[-1] + ): + extra.append(info.dispatch_name) + if version_added: + v = info.version_added + if len(v) != 5: + raise ValueError(f"Is there something wrong with version: {v!r}?") + extra.append(v[:2] + "." + v[-2:]) + if plc and info.plc: + extra.append(info.plc) + if incomplete and info.is_incomplete: + extra.append("is-incomplete") + if different and info.is_different: + extra.append("is-different") + return extra + + +def create_tree( + path_to_info=None, + *, + by="networkx_path", + skip=0, + networkx_path=False, + dispatch_name=False, + version_added=False, + plc=False, + dispatch_name_if_different=False, + incomplete=False, + different=False, + prefix="", +): + if path_to_info is None: + path_to_info = get_path_to_info() + if isinstance(by, str): + by = [by] + G = nx.DiGraph() + for info in sorted( + path_to_info.values(), + key=lambda x: (*(getattr(x, b) for b in by), x.networkx_path), + ): + if not all(getattr(info, b) for b in by): + continue + path = prefix + ".".join(getattr(info, b) for b in by) + extra = get_extra( + info, + networkx_path=networkx_path, + dispatch_name=dispatch_name, + version_added=version_added, + plc=plc, + dispatch_name_if_different=dispatch_name_if_different, + incomplete=incomplete, + different=different, + ) + add_branch(G, path, extra=extra, skip=skip) + return G + + +def main( + path_to_info=None, + *, + by="networkx_path", + networkx_path=False, + dispatch_name=False, + version_added=False, + plc=False, + dispatch_name_if_different=True, + incomplete=False, + different=False, + file=sys.stdout, +): + if path_to_info is None: + path_to_info = get_path_to_info(version_added_sep="-") + kwargs = { + "networkx_path": networkx_path, + "dispatch_name": dispatch_name, + "version_added": version_added, + "plc": plc, + "dispatch_name_if_different": dispatch_name_if_different, + "incomplete": incomplete, + "different": different, + } + if by == "networkx_path": + G = create_tree(path_to_info, by="networkx_path", **kwargs) + text = re.sub(r"[A-Za-z_\./]+\.", "", ("\n".join(nx.generate_network_text(G)))) + elif by == "plc": + G = create_tree( + path_to_info, by=["plc", "networkx_path"], prefix="plc-", **kwargs + ) + text = re.sub( + "plc-", + "plc.", + re.sub( + r" plc-[A-Za-z_\./]*\.", + " ", + "\n".join(nx.generate_network_text(G)), + ), + ) + elif by == "version_added": + G = create_tree( + path_to_info, + by=["version_added", "networkx_path"], + prefix="version_added-", + **kwargs, + ) + text = re.sub( + "version_added-", + "version: ", + re.sub( + r" version_added-[-0-9A-Za-z_\./]*\.", + " ", + "\n".join(nx.generate_network_text(G)), + ), + ).replace("-", ".") + else: + raise ValueError( + "`by` argument should be one of {'networkx_path', 'plc', 'version_added' " + f"got: {by}" + ) + print(text, file=file) + return text + + +def get_argumentparser(add_help=True): + parser = argparse.ArgumentParser( + "Print a tree showing NetworkX functions implemented by nx-cugraph", + add_help=add_help, + ) + parser.add_argument( + "--by", + choices=["networkx_path", "plc", "version_added"], + default="networkx_path", + help="How to group functions", + ) + parser.add_argument( + "--dispatch-name", + "--dispatch_name", + action="store_true", + help="Show the dispatch name in parentheses if different from NetworkX name", + ) + parser.add_argument( + "--dispatch-name-always", + "--dispatch_name_always", + action="store_true", + help="Always show the dispatch name in parentheses", + ) + parser.add_argument( + "--plc", + "--pylibcugraph", + action="store_true", + help="Show the used pylibcugraph function in parentheses", + ) + parser.add_argument( + "--version-added", + "--version_added", + action="store_true", + help="Show the version added in parentheses", + ) + parser.add_argument( + "--networkx-path", + "--networkx_path", + action="store_true", + help="Show the full networkx path in parentheses", + ) + parser.add_argument( + "--incomplete", + action="store_true", + help="Show which functions are incomplete", + ) + parser.add_argument( + "--different", + action="store_true", + help="Show which functions are different", + ) + return parser + + +if __name__ == "__main__": + parser = get_argumentparser() + args = parser.parse_args() + main( + by=args.by, + networkx_path=args.networkx_path, + dispatch_name=args.dispatch_name or args.dispatch_name_always, + version_added=args.version_added, + plc=args.plc, + dispatch_name_if_different=not args.dispatch_name_always, + incomplete=args.incomplete, + different=args.different, + ) diff --git a/python/nx-cugraph/nx_cugraph/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py index a0dbfcec890..d09a9e9617a 100644 --- a/python/nx-cugraph/nx_cugraph/utils/decorators.py +++ b/python/nx-cugraph/nx_cugraph/utils/decorators.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -41,14 +41,25 @@ class networkx_algorithm: name: str extra_doc: str | None extra_params: dict[str, str] | None + version_added: str + is_incomplete: bool + is_different: bool + _plc_names: set[str] | None def __new__( cls, func=None, *, name: str | None = None, + # Extra parameter info that is added to NetworkX docstring extra_params: dict[str, str] | str | None = None, + # Applies `nodes_or_number` decorator compatibly across versions (3.3 changed) nodes_or_number: list[int] | int | None = None, + # Metadata (for introspection only) + version_added: str, # Required + is_incomplete: bool = False, # See self.extra_doc for details if True + is_different: bool = False, # See self.extra_doc for details if True + plc: str | set[str] | None = None, # Hidden from user, may be removed someday ): if func is None: return partial( @@ -56,6 +67,10 @@ def __new__( name=name, extra_params=extra_params, nodes_or_number=nodes_or_number, + plc=plc, + version_added=version_added, + is_incomplete=is_incomplete, + is_different=is_different, ) instance = object.__new__(cls) if nodes_or_number is not None and nx.__version__[:3] > "3.2": @@ -74,6 +89,15 @@ def __new__( f"extra_params must be dict, str, or None; got {type(extra_params)}" ) instance.extra_params = extra_params + if plc is None or isinstance(plc, set): + instance._plc_names = plc + elif isinstance(plc, str): + instance._plc_names = {plc} + else: + raise TypeError(f"plc argument must be str, set, or None; got {type(plc)}") + instance.version_added = version_added + instance.is_incomplete = is_incomplete + instance.is_different = is_different # The docstring on our function is added to the NetworkX docstring. instance.extra_doc = ( dedent(func.__doc__.lstrip("\n").rstrip()) if func.__doc__ else None @@ -91,6 +115,11 @@ def __new__( def _can_run(self, func): """Set the `can_run` attribute to the decorated function.""" + if not func.__name__.startswith("_"): + raise ValueError( + "The name of the function used by `_can_run` must begin with '_'; " + f"got: {func.__name__!r}" + ) self.can_run = func def __call__(self, /, *args, **kwargs): diff --git a/python/nx-cugraph/pyproject.toml b/python/nx-cugraph/pyproject.toml index b29578b036f..63ac115918f 100644 --- a/python/nx-cugraph/pyproject.toml +++ b/python/nx-cugraph/pyproject.toml @@ -52,12 +52,20 @@ test = [ Homepage = "https://github.com/rapidsai/cugraph" Documentation = "https://docs.rapids.ai/api/cugraph/stable/" +# "plugin" used in nx version < 3.2 [project.entry-points."networkx.plugins"] cugraph = "nx_cugraph.interface:BackendInterface" [project.entry-points."networkx.plugin_info"] cugraph = "_nx_cugraph:get_info" +# "backend" used in nx version >= 3.2 +[project.entry-points."networkx.backends"] +cugraph = "nx_cugraph.interface:BackendInterface" + +[project.entry-points."networkx.backend_info"] +cugraph = "_nx_cugraph:get_info" + [tool.setuptools] license-files = ["LICENSE"]