diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 85ac682daf4..273a8902eae 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -133,3 +133,43 @@ jobs: sha: ${{ inputs.sha }} date: ${{ inputs.date }} package-name: nx-cugraph + wheel-build-cugraph-dgl: + needs: wheel-publish-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_cugraph-dgl.sh + wheel-publish-cugraph-dgl: + needs: wheel-build-cugraph-dgl + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: cugraph-dgl + wheel-build-cugraph-pyg: + needs: wheel-publish-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + script: ci/build_wheel_cugraph-pyg.sh + wheel-publish-cugraph-pyg: + needs: wheel-build-cugraph-pyg + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + with: + build_type: ${{ inputs.build_type || 'branch' }} + branch: ${{ inputs.branch }} + sha: ${{ inputs.sha }} + date: ${{ inputs.date }} + package-name: cugraph-pyg diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 82c71efffdb..84d22f8e896 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -25,6 +25,10 @@ jobs: - wheel-tests-cugraph - wheel-build-nx-cugraph - wheel-tests-nx-cugraph + - wheel-build-cugraph-dgl + - wheel-tests-cugraph-dgl + - wheel-build-cugraph-pyg + - wheel-tests-cugraph-pyg - devcontainer secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/pr-builder.yaml@branch-24.02 @@ -127,6 +131,36 @@ jobs: with: build_type: pull-request script: ci/test_wheel_nx-cugraph.sh + wheel-build-cugraph-dgl: + needs: wheel-tests-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/build_wheel_cugraph-dgl.sh + wheel-tests-cugraph-dgl: + needs: wheel-build-cugraph-dgl + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/test_wheel_cugraph-dgl.sh + matrix_filter: map(select(.ARCH == "amd64")) + wheel-build-cugraph-pyg: + needs: wheel-tests-cugraph + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/build_wheel_cugraph-pyg.sh + wheel-tests-cugraph-pyg: + needs: wheel-build-cugraph-pyg + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: pull-request + script: ci/test_wheel_cugraph-pyg.sh + matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0")) devcontainer: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/build-in-devcontainer.yaml@branch-24.02 diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 0d9f4d291c3..773358ede8d 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -57,3 +57,21 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_nx-cugraph.sh + wheel-tests-cugraph-dgl: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel_cugraph-dgl.sh + wheel-tests-cugraph-pyg: + secrets: inherit + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + with: + build_type: nightly + branch: ${{ inputs.branch }} + date: ${{ inputs.date }} + sha: ${{ inputs.sha }} + script: ci/test_wheel_cugraph-pyg.sh diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index 971c3ff1032..a8ed18a20fc 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -11,29 +11,13 @@ # See the License for the specific language governing permissions and # limitations under the License. +import random + import networkx as nx import pandas as pd import pytest from cugraph import datasets - -# FIXME: promote these to cugraph.datasets so the following steps aren't -# necessary -# -# These datasets can be downloaded using the script in the 'datasets' dir: -# -# cd /datasets -# ./get_test_data.sh --benchmark -# -# Then set the following env var so the dataset utils can find their location: -# -# export RAPIDS_DATASET_ROOT_DIR=/datasets -# -from cugraph_benchmarking.params import ( - hollywood, - europe_osm, - cit_patents, - soc_livejournal, -) +import nx_cugraph as nxcg # Attempt to import the NetworkX dispatching module, which is only needed when # testing with NX <3.2 in order to dynamically switch backends. NX >=3.2 allows @@ -45,22 +29,76 @@ ################################################################################ -# Fixtures and helpers -backend_params = ["cugraph", None] +# Fixtures and params + +# See https://pytest-benchmark.readthedocs.io/en/latest/glossary.html for how +# these variables are used. +rounds = 1 +iterations = 1 +warmup_rounds = 1 -dataset_params = [ +dataset_param_values = [ pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]), pytest.param(datasets.netscience, marks=[pytest.mark.small, pytest.mark.directed]), pytest.param( datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed] ), - pytest.param(cit_patents, marks=[pytest.mark.medium, pytest.mark.directed]), - pytest.param(hollywood, marks=[pytest.mark.medium, pytest.mark.undirected]), - pytest.param(europe_osm, marks=[pytest.mark.medium, pytest.mark.undirected]), - pytest.param(soc_livejournal, marks=[pytest.mark.large, pytest.mark.directed]), + pytest.param( + datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed] + ), + pytest.param( + datasets.hollywood, marks=[pytest.mark.medium, pytest.mark.undirected] + ), + pytest.param( + datasets.soc_livejournal, marks=[pytest.mark.medium, pytest.mark.directed] + ), + pytest.param( + datasets.europe_osm, marks=[pytest.mark.large, pytest.mark.undirected] + ), ] +backend_param_values = ["cugraph", "cugraph-preconverted", None] + + +def setup_module(module): + """ + Trivial conversion call to force various one-time CUDA initialization + operations to happen outside of benchmarks. + """ + G = nx.karate_club_graph() + nxcg.from_networkx(G) + + +# Test IDs are generated using the lambda assigned to the ids arg to provide an +# easier-to-read name. This is especially helpful for Dataset objs (see +# https://docs.pytest.org/en/stable/reference/reference.html#pytest-fixture) +@pytest.fixture( + scope="module", params=dataset_param_values, ids=lambda ds: f"ds={str(ds)}" +) +def graph_obj(request): + """ + Returns a NX Graph or DiGraph obj from the dataset instance parameter. + """ + dataset = request.param + return nx_graph_from_dataset(dataset) + + +@pytest.fixture( + scope="module", + params=backend_param_values, + ids=lambda backend: f"backend={backend}", +) +def backend(request): + """ + Returns the backend name to use. This is done as a fixture for consistency + and simplicity when creating benchmarks (no need to mark the benchmark as + parametrized). + """ + return request.param + +################################################################################ +# Helpers def nx_graph_from_dataset(dataset_obj): """ Read the dataset specified by the dataset_obj and create and return a @@ -87,126 +125,334 @@ def nx_graph_from_dataset(dataset_obj): return G -# Test IDs are generated using the lambda assigned to the ids arg to provide an -# easier-to-read name from the Dataset obj string repr. -# See: https://docs.pytest.org/en/stable/reference/reference.html#pytest-fixture -@pytest.fixture(scope="module", params=dataset_params, ids=lambda ds: f"ds={str(ds)}") -def graph_obj(request): - """ - Returns a NX Graph or DiGraph obj from the dataset instance parameter. - """ - dataset = request.param - return nx_graph_from_dataset(dataset) - - -def get_legacy_backend_selector(backend_name): +def get_legacy_backend_wrapper(backend_name): """ Returns a callable that wraps an algo function with either the default - dispatch decorator, or the "testing" decorator which unconditionally - dispatches. + dispatcher (which dispatches based on input graph type), or the "testing" + dispatcher (which autoconverts and unconditionally dispatches). This is only supported for NetworkX <3.2 """ backends.plugin_name = "cugraph" orig_dispatch = backends._dispatch testing_dispatch = backends.test_override_dispatch - # Testing with the networkx <3.2 dispatch mechanism is based on decorating - # networkx APIs. The decorator is either one that only uses a backend if - # the input graph type is for that backend (the default decorator), or the - # "testing" decorator, which unconditionally converts a graph type to the - # type needed by the backend then calls the backend. If the cugraph backend - # is specified, create a callable that decorates the benchmarked function - # with the testing decorator. - # - # Because both the default and testing decorators assume they are only - # applied once and do bookkeeping to ensure algos are not registered - # multiple times, the callable also clears bookkeeping so the decorators - # can be reapplied multiple times. This is obviously a hack and networkx - # >=3.2 makes this use case properly supported. if backend_name == "cugraph": - - def wrapper(*args, **kwargs): - backends._registered_algorithms = {} - return testing_dispatch(*args, **kwargs) - + dispatch = testing_dispatch else: + dispatch = orig_dispatch + + def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False): + # Networkx <3.2 registers functions when the dispatch decorator is + # applied (called) and errors if re-registered, so clear bookkeeping to + # allow it to be called repeatedly. + backends._registered_algorithms = {} + actual_func = dispatch(func) # returns the func the dispatcher picks def wrapper(*args, **kwargs): - backends._registered_algorithms = {} - return orig_dispatch(*args, **kwargs) + retval = actual_func(*args, **kwargs) + if exhaust_returned_iterator: + retval = list(retval) + return retval - return wrapper + return wrapper + + return wrap_callable_for_dispatch -def get_backend_selector(backend_name): +def get_backend_wrapper(backend_name): """ Returns a callable that wraps an algo function in order to set the "backend" kwarg on it. This is only supported for NetworkX >= 3.2 """ - def get_callable_for_func(func): + def wrap_callable_for_dispatch(func, exhaust_returned_iterator=False): def wrapper(*args, **kwargs): kwargs["backend"] = backend_name - return func(*args, **kwargs) + retval = func(*args, **kwargs) + if exhaust_returned_iterator: + retval = list(retval) + return retval return wrapper - return get_callable_for_func + return wrap_callable_for_dispatch @pytest.fixture( - scope="module", params=backend_params, ids=lambda backend: f"backend={backend}" + scope="module", + params=backend_param_values, + ids=lambda backend: f"backend={backend}", ) -def backend_selector(request): +def backend_wrapper(request): """ Returns a callable that takes a function algo and wraps it in another function that calls the algo using the appropriate backend. + + For example: if the backend to test is "cugraph", this will return a + function that calls nx.pagerank(..., backend='cugraph') """ backend_name = request.param + actual_backend_name = backend_name + + # Special case: cugraph-preconverted may be specified as a backend but this + # name is reserved to indicate a cugraph backend is to be used with a + # preconverted graph obj (rather than having the backend do the + # conversion). + if backend_name == "cugraph-preconverted": + actual_backend_name = "cugraph" + + # NX <3.2 does not support the backends= kwarg, so the backend must be + # enabled differently if backends is not None: - return get_legacy_backend_selector(backend_name) + wrapper = get_legacy_backend_wrapper(actual_backend_name) else: - return get_backend_selector(backend_name) + wrapper = get_backend_wrapper(actual_backend_name) + + wrapper.backend_name = backend_name + return wrapper + + +def get_graph_obj_for_benchmark(graph_obj, backend_wrapper): + """ + Given a Graph object and a backend name, return a converted Graph or the + original Graph object based on the backend to use. + + This is needed because some backend names are actually used as descriptions + for combinations of backends and converted/non-converted graphs. For + example, a benchmark may specify the "cugraph-preconverted" backend, which + is not an installed backend but instead refers to the "cugraph" backend + passed a NX Graph that has been converted to a nx-cugraph Graph object. + """ + G = graph_obj + if backend_wrapper.backend_name == "cugraph-preconverted": + G = nxcg.from_networkx(G) + return G ################################################################################ # Benchmarks -normalized_params = [True, False] -k_params = [10, 100] - - -@pytest.mark.parametrize("normalized", normalized_params, ids=lambda norm: f"{norm=}") -@pytest.mark.parametrize("k", k_params, ids=lambda k: f"{k=}") -def bench_betweenness_centrality(benchmark, graph_obj, backend_selector, normalized, k): - result = benchmark( - backend_selector(nx.betweenness_centrality), - graph_obj, - weight=None, - normalized=normalized, - k=k, +# normalized_param_values = [True, False] +# k_param_values = [10, 100] +normalized_param_values = [True] +k_param_values = [10] + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") +def bench_betweenness_centrality(benchmark, graph_obj, backend_wrapper, normalized, k): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.betweenness_centrality), + args=(G,), + kwargs=dict( + weight=None, + normalized=normalized, + k=k, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, ) assert type(result) is dict -@pytest.mark.parametrize("normalized", normalized_params, ids=lambda norm: f"{norm=}") +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +@pytest.mark.parametrize("k", k_param_values, ids=lambda k: f"{k=}") def bench_edge_betweenness_centrality( - benchmark, graph_obj, backend_selector, normalized + benchmark, graph_obj, backend_wrapper, normalized, k ): - result = benchmark( - backend_selector(nx.edge_betweenness_centrality), - graph_obj, - weight=None, - normalized=normalized, + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.edge_betweenness_centrality), + args=(G,), + kwargs=dict( + weight=None, + normalized=normalized, + k=k, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, ) assert type(result) is dict -def bench_louvain_communities(benchmark, graph_obj, backend_selector): +def bench_louvain_communities(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) # The cugraph backend for louvain_communities only supports undirected graphs - if isinstance(graph_obj, nx.DiGraph): - G = graph_obj.to_undirected() - else: - G = graph_obj - result = benchmark(backend_selector(nx.community.louvain_communities), G) + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.community.louvain_communities), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_eigenvector_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.eigenvector_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +def bench_hits(benchmark, graph_obj, backend_wrapper, normalized): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.hits), + args=(G,), + kwargs=dict( + normalized=normalized, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is tuple + assert len(result) == 2 + assert type(result[0]) is dict + assert type(result[1]) is dict + + +def bench_in_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.in_degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +@pytest.mark.parametrize( + "normalized", normalized_param_values, ids=lambda norm: f"{norm=}" +) +def bench_katz_centrality(benchmark, graph_obj, backend_wrapper, normalized): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.katz_centrality), + args=(G,), + kwargs=dict( + normalized=normalized, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_k_truss(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.k_truss), + args=(G,), + kwargs=dict( + k=2, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # Check that this at least appears to be some kind of NX-like Graph + assert hasattr(result, "has_node") + + +def bench_out_degree_centrality(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.out_degree_centrality), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_pagerank(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.pagerank), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): + # Use the node with the highest degree + degrees = graph_obj.degree() # list of tuples of (node, degree) + node = max(degrees, key=lambda t: t[1])[0] + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + result = benchmark.pedantic( + target=backend_wrapper(nx.single_source_shortest_path_length), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapper): + # Use the node with the highest degree + degrees = graph_obj.degree() # list of tuples of (node, degree) + node = max(degrees, key=lambda t: t[1])[0] + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + + result = benchmark.pedantic( + target=backend_wrapper( + nx.single_target_shortest_path_length, exhaust_returned_iterator=True + ), + args=(G,), + kwargs=dict( + target=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # exhaust_returned_iterator=True forces the result to a list, but is not + # needed for this algo in NX 3.3+ which returns a dict instead of an + # iterator. Forcing to a list does not change the benchmark timing. assert type(result) is list diff --git a/benchmarks/shared/python/cugraph_benchmarking/params.py b/benchmarks/shared/python/cugraph_benchmarking/params.py index d82cfd26117..034e22ffc37 100644 --- a/benchmarks/shared/python/cugraph_benchmarking/params.py +++ b/benchmarks/shared/python/cugraph_benchmarking/params.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -14,42 +14,16 @@ import pytest from pylibcugraph.testing.utils import gen_fixture_params -from cugraph.testing import RAPIDS_DATASET_ROOT_DIR_PATH from cugraph.datasets import ( - Dataset, karate, netscience, email_Eu_core, + hollywood, + europe_osm, + cit_patents, + soc_livejournal, ) -# Create Dataset objects from .csv files. -# Once the cugraph.dataset package is updated to include the metadata files for -# these (like karate), these will no longer need to be explicitly instantiated. -hollywood = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/hollywood.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -hollywood.metadata["is_directed"] = False -europe_osm = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/undirected/europe_osm.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -europe_osm.metadata["is_directed"] = False -cit_patents = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/cit-Patents.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -cit_patents.metadata["is_directed"] = True -soc_livejournal = Dataset( - csv_file=RAPIDS_DATASET_ROOT_DIR_PATH / "csv/directed/soc-LiveJournal1.csv", - csv_col_names=["src", "dst"], - csv_col_dtypes=["int32", "int32"], -) -soc_livejournal.metadata["is_directed"] = True - # Assume all "file_data" (.csv file on disk) datasets are too small to be useful for MG. undirected_datasets = [ pytest.param( diff --git a/ci/build_wheel.sh b/ci/build_wheel.sh index 163520ea1da..828d8948143 100755 --- a/ci/build_wheel.sh +++ b/ci/build_wheel.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -euo pipefail @@ -36,7 +36,7 @@ if ! rapids-is-release-build; then alpha_spec=',>=0.0.0a0' fi -for dep in rmm cudf raft-dask pylibcugraph pylibraft ucx-py; do +for dep in rmm cudf cugraph raft-dask pylibcugraph pylibcugraphops pylibraft ucx-py; do sed -r -i "s/${dep}==(.*)\"/${dep}${PACKAGE_CUDA_SUFFIX}==\1${alpha_spec}\"/g" ${pyproject_file} done @@ -55,7 +55,9 @@ cd "${package_dir}" python -m pip wheel . -w dist -vvv --no-deps --disable-pip-version-check # pure-python packages should not have auditwheel run on them. -if [[ ${package_name} == "nx-cugraph" ]]; then +if [[ ${package_name} == "nx-cugraph" ]] || \ + [[ ${package_name} == "cugraph-dgl" ]] || \ + [[ ${package_name} == "cugraph-pyg" ]]; then RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-upload-wheels-to-s3 dist else mkdir -p final_dist diff --git a/ci/build_wheel_cugraph-dgl.sh b/ci/build_wheel_cugraph-dgl.sh new file mode 100755 index 00000000000..d62f810cba4 --- /dev/null +++ b/ci/build_wheel_cugraph-dgl.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +./ci/build_wheel.sh cugraph-dgl python/cugraph-dgl diff --git a/ci/build_wheel_cugraph-pyg.sh b/ci/build_wheel_cugraph-pyg.sh new file mode 100755 index 00000000000..97baa243f73 --- /dev/null +++ b/ci/build_wheel_cugraph-pyg.sh @@ -0,0 +1,6 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -euo pipefail + +./ci/build_wheel.sh cugraph-pyg python/cugraph-pyg diff --git a/ci/test_wheel_cugraph-dgl.sh b/ci/test_wheel_cugraph-dgl.sh new file mode 100755 index 00000000000..90c86af95fe --- /dev/null +++ b/ci/test_wheel_cugraph-dgl.sh @@ -0,0 +1,34 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -eoxu pipefail + +package_name="cugraph-dgl" +package_dir="python/cugraph-dgl" + +python_package_name=$(echo ${package_name}|sed 's/-/_/g') + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# use 'ls' to expand wildcard before adding `[extra]` requires for pip +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# pip creates wheels using python package names +python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] + + +PKG_CUDA_VER="$(echo ${CUDA_VERSION} | cut -d '.' -f1,2 | tr -d '.')" +PKG_CUDA_VER_MAJOR=${PKG_CUDA_VER:0:2} +if [[ "${PKG_CUDA_VER_MAJOR}" == "12" ]]; then + PYTORCH_CUDA_VER="121" +else + PYTORCH_CUDA_VER=$PKG_CUDA_VER +fi +PYTORCH_URL="https://download.pytorch.org/whl/cu${PYTORCH_CUDA_VER}" +DGL_URL="https://data.dgl.ai/wheels/cu${PYTORCH_CUDA_VER}/repo.html" + +rapids-logger "Installing PyTorch and DGL" +rapids-retry python -m pip install torch --index-url ${PYTORCH_URL} +rapids-retry python -m pip install dgl --find-links ${DGL_URL} + +python -m pytest python/cugraph-dgl/tests diff --git a/ci/test_wheel_cugraph-pyg.sh b/ci/test_wheel_cugraph-pyg.sh new file mode 100755 index 00000000000..9a211c81886 --- /dev/null +++ b/ci/test_wheel_cugraph-pyg.sh @@ -0,0 +1,43 @@ +#!/bin/bash +# Copyright (c) 2024, NVIDIA CORPORATION. + +set -eoxu pipefail + +package_name="cugraph-pyg" +package_dir="python/cugraph-pyg" + +python_package_name=$(echo ${package_name}|sed 's/-/_/g') + +mkdir -p ./dist +RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" + +# use 'ls' to expand wildcard before adding `[extra]` requires for pip +RAPIDS_PY_WHEEL_NAME="${package_name}_${RAPIDS_PY_CUDA_SUFFIX}" rapids-download-wheels-from-s3 ./dist +# pip creates wheels using python package names +python -m pip install $(ls ./dist/${python_package_name}*.whl)[test] + +# RAPIDS_DATASET_ROOT_DIR is used by test scripts +export RAPIDS_DATASET_ROOT_DIR="$(realpath datasets)" + +if [[ "${CUDA_VERSION}" == "11.8.0" ]]; then + rapids-logger "Installing PyTorch and PyG dependencies" + PYTORCH_URL="https://download.pytorch.org/whl/cu118" + rapids-retry python -m pip install torch==2.1.0 --index-url ${PYTORCH_URL} + rapids-retry python -m pip install torch-geometric==2.4.0 + rapids-retry python -m pip install \ + pyg_lib \ + torch_scatter \ + torch_sparse \ + torch_cluster \ + torch_spline_conv \ + -f https://data.pyg.org/whl/torch-2.1.0+cu118.html + + rapids-logger "pytest cugraph-pyg (single GPU)" + python -m pytest \ + --cache-clear \ + --ignore=tests/int \ + --ignore=tests/mg \ + python/cugraph-pyg/cugraph_pyg/tests +else + rapids-logger "skipping cugraph-pyg wheel test on CUDA!=11.8" +fi diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 6a75a420bf8..6684d31d8fd 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -1005,9 +1005,14 @@ remove_self_loops(raft::handle_t const& handle, std::optional>&& edgelist_edge_types); /** - * @brief Remove all but one edge when a multi-edge exists. Note that this function does not use - * stable methods. When a multi-edge exists, one of the edges will remain, there is no - * guarantee on which one will remain. + * @brief Remove all but one edge when a multi-edge exists. + * + * When a multi-edge exists, one of the edges will remain. If @p keep_min_value_edge is false, an + * arbitrary edge will be selected among the edges in the multi-edge. If @p keep_min_value_edge is + * true, the edge with the minimum value will be selected. The edge weights will be first compared + * (if @p edgelist_weights.has_value() is true); edge IDs will be compared next (if @p + * edgelist_edge_ids.has_value() is true); and edge types (if @p edgelist_edge_types.has_value() is + * true) will compared last. * * In an MG context it is assumed that edges have been shuffled to the proper GPU, * in which case any multi-edges will be on the same GPU. @@ -1024,6 +1029,11 @@ remove_self_loops(raft::handle_t const& handle, * @param edgelist_weights Optional list of edge weights * @param edgelist_edge_ids Optional list of edge ids * @param edgelist_edge_types Optional list of edge types + * @param keep_min_value_edge Flag indicating whether to keep an arbitrary edge (false) or the + * minimum value edge (true) among the edges in a multi-edge. Relevant only if @p + * edgelist_weights.has_value() | @p edgelist_edge_ids.has_value() | @p + * edgelist_edge_types.has_value() is true. Setting this to true incurs performance overhead as this + * requires more comparisons. * @return Tuple of vectors storing edge sources, destinations, optional weights, * optional edge ids, optional edge types. */ @@ -1038,6 +1048,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge = false); } // namespace cugraph diff --git a/cpp/src/c_api/graph_mg.cpp b/cpp/src/c_api/graph_mg.cpp index 326022a3fa9..57a589caf02 100644 --- a/cpp/src/c_api/graph_mg.cpp +++ b/cpp/src/c_api/graph_mg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -217,7 +217,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/c_api/graph_sg.cpp b/cpp/src/c_api/graph_sg.cpp index 7793458b53a..6745be01f95 100644 --- a/cpp/src/c_api/graph_sg.cpp +++ b/cpp/src/c_api/graph_sg.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -200,7 +200,10 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor { std::move(edgelist_dsts), std::move(edgelist_weights), std::move(edgelist_edge_ids), - std::move(edgelist_edge_types)); + std::move(edgelist_edge_types), + properties_->is_symmetric + ? true /* keep minimum weight edges to maintain symmetry */ + : false); } std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) = diff --git a/cpp/src/structure/remove_multi_edges.cu b/cpp/src/structure/remove_multi_edges.cu index ba07d068c0e..54403f0b034 100644 --- a/cpp/src/structure/remove_multi_edges.cu +++ b/cpp/src/structure/remove_multi_edges.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,7 +27,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -39,7 +40,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -51,7 +53,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -63,7 +66,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -75,7 +79,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); template std::tuple, rmm::device_uvector, @@ -87,6 +92,7 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types); + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge); } // namespace cugraph diff --git a/cpp/src/structure/remove_multi_edges_impl.cuh b/cpp/src/structure/remove_multi_edges_impl.cuh index fdd3059f874..651876ac8b1 100644 --- a/cpp/src/structure/remove_multi_edges_impl.cuh +++ b/cpp/src/structure/remove_multi_edges_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -104,10 +104,12 @@ group_multi_edges( rmm::device_uvector&& edgelist_srcs, rmm::device_uvector&& edgelist_dsts, decltype(allocate_dataframe_buffer(0, rmm::cuda_stream_view{}))&& edgelist_values, - size_t mem_frugal_threshold) + size_t mem_frugal_threshold, + bool keep_min_value_edge) { auto pair_first = thrust::make_zip_iterator(edgelist_srcs.begin(), edgelist_dsts.begin()); auto value_first = get_dataframe_buffer_begin(edgelist_values); + auto edge_first = thrust::make_zip_iterator(pair_first, value_first); if (edgelist_srcs.size() > mem_frugal_threshold) { // FIXME: Tuning parameter to address high frequency multi-edges @@ -128,19 +130,28 @@ group_multi_edges( raft::update_host( h_group_counts.data(), group_counts.data(), group_counts.size(), handle.get_stream()); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + h_group_counts[0], - get_dataframe_buffer_begin(edgelist_values)); - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first + h_group_counts[0], - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values) + h_group_counts[0]); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + h_group_counts[0]); + thrust::sort(handle.get_thrust_policy(), + edge_first + h_group_counts[0], + edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key( + handle.get_thrust_policy(), pair_first, pair_first + h_group_counts[0], value_first); + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first + h_group_counts[0], + pair_first + edgelist_srcs.size(), + value_first + h_group_counts[0]); + } } else { - thrust::sort_by_key(handle.get_thrust_policy(), - pair_first, - pair_first + edgelist_srcs.size(), - get_dataframe_buffer_begin(edgelist_values)); + if (keep_min_value_edge) { + thrust::sort(handle.get_thrust_policy(), edge_first, edge_first + edgelist_srcs.size()); + } else { + thrust::sort_by_key(handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_srcs.size(), + get_dataframe_buffer_begin(edgelist_values)); + } } return std::make_tuple( @@ -160,7 +171,8 @@ remove_multi_edges(raft::handle_t const& handle, rmm::device_uvector&& edgelist_dsts, std::optional>&& edgelist_weights, std::optional>&& edgelist_edge_ids, - std::optional>&& edgelist_edge_types) + std::optional>&& edgelist_edge_types, + bool keep_min_value_edge) { auto total_global_mem = handle.get_device_properties().totalGlobalMem; size_t element_size = sizeof(vertex_t) * 2; @@ -187,7 +199,8 @@ remove_multi_edges(raft::handle_t const& handle, std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple( edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights, edgelist_edge_ids)) = @@ -196,7 +209,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -207,7 +221,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_weights)) = detail::group_multi_edges>( @@ -215,7 +230,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_weights)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } } else { @@ -228,7 +244,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids), std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::forward_as_tuple(edgelist_srcs, edgelist_dsts, std::tie(edgelist_edge_ids)) = detail::group_multi_edges>( @@ -236,7 +253,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_ids)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } } else { if (edgelist_edge_types) { @@ -246,7 +264,8 @@ remove_multi_edges(raft::handle_t const& handle, std::move(edgelist_srcs), std::move(edgelist_dsts), std::make_tuple(std::move(*edgelist_edge_types)), - mem_frugal_threshold); + mem_frugal_threshold, + keep_min_value_edge); } else { std::tie(edgelist_srcs, edgelist_dsts) = detail::group_multi_edges( handle, std::move(edgelist_srcs), std::move(edgelist_dsts), mem_frugal_threshold); diff --git a/cpp/tests/link_prediction/weighted_similarity_test.cpp b/cpp/tests/link_prediction/weighted_similarity_test.cpp index ca644b76c5a..99e752c0b02 100644 --- a/cpp/tests/link_prediction/weighted_similarity_test.cpp +++ b/cpp/tests/link_prediction/weighted_similarity_test.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,9 +27,9 @@ struct Similarity_Usecase { bool use_weights{false}; - bool check_correctness{true}; size_t max_seeds{std::numeric_limits::max()}; size_t max_vertex_pairs_to_check{std::numeric_limits::max()}; + bool check_correctness{true}; }; template @@ -293,7 +293,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, Similarity_Usecase{false, true, 20, //: 100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); @@ -305,7 +305,7 @@ INSTANTIATE_TEST_SUITE_P( // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{true, true, 20, 100}, //: Similarity_Usecase{false,true,20,100}), - ::testing::Values(Similarity_Usecase{true, true, 20, 100}), + ::testing::Values(Similarity_Usecase{true, 20, 100, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( @@ -319,7 +319,8 @@ INSTANTIATE_TEST_SUITE_P( // disable correctness checks // Disable weighted computation testing in 22.10 //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, true}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -332,7 +333,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Combine( // disable correctness checks for large graphs //::testing::Values(Similarity_Usecase{false, false}, Similarity_Usecase{true, false}), - ::testing::Values(Similarity_Usecase{true, false}), + ::testing::Values(Similarity_Usecase{ + true, std::numeric_limits::max(), std::numeric_limits::max(), false}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/test_graphs.hpp b/cpp/tests/utilities/test_graphs.hpp index 8cc87b26f1d..5a9dc9c90d4 100644 --- a/cpp/tests/utilities/test_graphs.hpp +++ b/cpp/tests/utilities/test_graphs.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -633,12 +633,14 @@ construct_graph(raft::handle_t const& handle, if (drop_multi_edges) { std::tie(d_src_v, d_dst_v, d_weights_v, std::ignore, std::ignore) = - cugraph::remove_multi_edges(handle, - std::move(d_src_v), - std::move(d_dst_v), - std::move(d_weights_v), - std::nullopt, - std::nullopt); + cugraph::remove_multi_edges( + handle, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + std::nullopt, + std::nullopt, + is_symmetric ? true /* keep minimum weight edges to maintain symmetry */ : false); } graph_t graph(handle); diff --git a/dependencies.yaml b/dependencies.yaml index 579acec3996..3eed525bfe4 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -165,6 +165,15 @@ files: table: project includes: - python_run_cugraph_dgl + - depends_on_pylibcugraphops + py_test_cugraph_dgl: + output: pyproject + pyproject_dir: python/cugraph-dgl + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common py_build_cugraph_pyg: output: pyproject pyproject_dir: python/cugraph-pyg @@ -179,6 +188,15 @@ files: table: project includes: - python_run_cugraph_pyg + - depends_on_pylibcugraphops + py_test_cugraph_pyg: + output: pyproject + pyproject_dir: python/cugraph-pyg + extras: + table: project.optional-dependencies + key: test + includes: + - test_python_common py_build_cugraph_service_client: output: pyproject pyproject_dir: python/cugraph-service/client diff --git a/python/cugraph-dgl/pyproject.toml b/python/cugraph-dgl/pyproject.toml index 62fa8ab6368..65ee414da44 100644 --- a/python/cugraph-dgl/pyproject.toml +++ b/python/cugraph-dgl/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. [build-system] @@ -18,15 +18,26 @@ authors = [ ] license = { text = "Apache 2.0" } requires-python = ">=3.9" +classifiers = [ + "Intended Audience :: Developers", + "Programming Language :: Python", +] dependencies = [ "cugraph==24.2.*", "numba>=0.57", "numpy>=1.21", + "pylibcugraphops==24.2.*", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + +[project.optional-dependencies] +test = [ + "pandas", + "pytest", + "pytest-benchmark", + "pytest-cov", + "pytest-xdist", + "scipy", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. -classifiers = [ - "Intended Audience :: Developers", - "Programming Language :: Python", -] [project.urls] Homepage = "https://github.com/rapidsai/cugraph" diff --git a/python/cugraph-pyg/pyproject.toml b/python/cugraph-pyg/pyproject.toml index b0671644982..c4bd00bb86c 100644 --- a/python/cugraph-pyg/pyproject.toml +++ b/python/cugraph-pyg/pyproject.toml @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. [build-system] @@ -29,12 +29,23 @@ dependencies = [ "cugraph==24.2.*", "numba>=0.57", "numpy>=1.21", + "pylibcugraphops==24.2.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] Homepage = "https://github.com/rapidsai/cugraph" Documentation = "https://docs.rapids.ai/api/cugraph/stable/" +[project.optional-dependencies] +test = [ + "pandas", + "pytest", + "pytest-benchmark", + "pytest-cov", + "pytest-xdist", + "scipy", +] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. + [tool.setuptools] license-files = ["LICENSE"] diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py index dd7aa0df00a..9817d15dacb 100644 --- a/python/cugraph/cugraph/datasets/dataset.py +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -12,10 +12,13 @@ # limitations under the License. import cudf +import dask_cudf import yaml import os import pandas as pd +import cugraph.dask as dcg from pathlib import Path +import urllib.request from cugraph.structure.graph_classes import Graph @@ -138,9 +141,8 @@ def __download_csv(self, url): filename = self.metadata["name"] + self.metadata["file_type"] if self._dl_path.path.is_dir(): - df = cudf.read_csv(url) self._path = self._dl_path.path / filename - df.to_csv(self._path, index=False) + urllib.request.urlretrieve(url, str(self._path)) else: raise RuntimeError( @@ -149,7 +151,6 @@ def __download_csv(self, url): return self._path def unload(self): - """ Remove all saved internal objects, forcing them to be re-created when accessed. @@ -162,7 +163,7 @@ def unload(self): def get_edgelist(self, download=False, reader="cudf"): """ - Return an Edgelist + Return an Edgelist. Parameters ---------- @@ -212,6 +213,47 @@ def get_edgelist(self, download=False, reader="cudf"): return self._edgelist.copy() + def get_dask_edgelist(self, download=False): + """ + Return a distributed Edgelist. + + Parameters + ---------- + download : Boolean (default=False) + Automatically download the dataset from the 'url' location within + the YAML file. + """ + if self._edgelist is None: + full_path = self.get_path() + if not full_path.is_file(): + if download: + full_path = self.__download_csv(self.metadata["url"]) + else: + raise RuntimeError( + f"The datafile {full_path} does not" + " exist. Try setting download=True" + " to download the datafile" + ) + + header = None + if isinstance(self.metadata["header"], int): + header = self.metadata["header"] + + blocksize = dcg.get_chunksize(full_path) + self._edgelist = dask_cudf.read_csv( + path=full_path, + blocksize=blocksize, + delimiter=self.metadata["delim"], + names=self.metadata["col_names"], + dtype={ + self.metadata["col_names"][i]: self.metadata["col_types"][i] + for i in range(len(self.metadata["col_types"])) + }, + header=header, + ) + + return self._edgelist.copy() + def get_graph( self, download=False, @@ -249,10 +291,10 @@ def get_graph( if create_using is None: G = Graph() elif isinstance(create_using, Graph): - # what about BFS if trnaposed is True + # what about BFS if transposed is True attrs = {"directed": create_using.is_directed()} G = type(create_using)(**attrs) - elif type(create_using) is type: + elif issubclass(create_using, Graph): G = create_using() else: raise TypeError( @@ -277,9 +319,74 @@ def get_graph( ) return G + def get_dask_graph( + self, + download=False, + create_using=Graph, + ignore_weights=False, + store_transposed=False, + ): + """ + Return a distributed Graph object. + + Parameters + ---------- + download : Boolean (default=False) + Downloads the dataset from the web. + + create_using: cugraph.Graph (instance or class), optional + (default=Graph) + Specify the type of Graph to create. Can pass in an instance to + create a Graph instance with specified 'directed' attribute. + + ignore_weights : Boolean (default=False) + Ignores weights in the dataset if True, resulting in an + unweighted Graph. If False (the default), weights from the + dataset -if present- will be applied to the Graph. If the + dataset does not contain weights, the Graph returned will + be unweighted regardless of ignore_weights. + + store_transposed : bool, optional (default=False) + If True, stores the transpose of the adjacency matrix. Required + for certain algorithms. + """ + if self._edgelist is None: + self.get_dask_edgelist(download) + + if create_using is None: + G = Graph() + elif isinstance(create_using, Graph): + attrs = {"directed": create_using.is_directed()} + G = type(create_using)(**attrs) + elif issubclass(create_using, Graph): + G = create_using() + else: + raise TypeError( + "create_using must be a cugraph.Graph " + "(or subclass) type or instance, got: " + f"{type(create_using)}" + ) + + if len(self.metadata["col_names"]) > 2 and not (ignore_weights): + G.from_dask_cudf_edgelist( + self._edgelist, + source=self.metadata["col_names"][0], + destination=self.metadata["col_names"][1], + edge_attr=self.metadata["col_names"][2], + store_transposed=store_transposed, + ) + else: + G.from_dask_cudf_edgelist( + self._edgelist, + source=self.metadata["col_names"][0], + destination=self.metadata["col_names"][1], + store_transposed=store_transposed, + ) + return G + def get_path(self): """ - Returns the location of the stored dataset file + Returns the location of the stored dataset file. """ if self._path is None: self._path = self._dl_path.path / ( @@ -347,8 +454,7 @@ def download_all(force=False): filename = meta["name"] + meta["file_type"] save_to = default_download_dir.path / filename if not save_to.is_file() or force: - df = cudf.read_csv(meta["url"]) - df.to_csv(save_to, index=False) + urllib.request.urlretrieve(meta["url"], str(save_to)) def set_download_dir(path): diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index 60bc6dbb45a..39f7ed8850b 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -20,6 +20,7 @@ import pytest import cudf +import dask_cudf from cugraph.structure import Graph from cugraph.testing import ( RAPIDS_DATASET_ROOT_DIR_PATH, @@ -29,6 +30,7 @@ BENCHMARKING_DATASETS, ) from cugraph import datasets +from cugraph.dask.common.mg_utils import is_single_gpu # Add the sg marker to all tests in this module. pytestmark = pytest.mark.sg @@ -37,6 +39,7 @@ ############################################################################### # Fixtures + # module fixture - called once for this module @pytest.fixture(scope="module") def tmpdir(): @@ -77,6 +80,7 @@ def setup(tmpdir): ############################################################################### # Helpers + # check if there is a row where src == dst def has_selfloop(dataset): if not dataset.metadata["is_directed"]: @@ -115,6 +119,7 @@ def is_symmetric(dataset): ############################################################################### # Tests + # setting download_dir to None effectively re-initialized the default def test_env_var(): os.environ["RAPIDS_DATASET_ROOT_DIR"] = "custom_storage_location" @@ -150,9 +155,19 @@ def test_download(dataset): assert dataset.get_path().is_file() +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_download_dask(dask_client, dataset): + E = dataset.get_dask_edgelist(download=True) + + assert E is not None + assert dataset.get_path().is_file() + + @pytest.mark.parametrize("dataset", SMALL_DATASETS) def test_reader(dataset): - # defaults to using cudf.read_csv + # defaults to using cudf E = dataset.get_edgelist(download=True) assert E is not None @@ -171,18 +186,46 @@ def test_reader(dataset): dataset.get_edgelist(reader=None) +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", SMALL_DATASETS) +def test_reader_dask(dask_client, dataset): + # using dask_cudf + E = dataset.get_dask_edgelist(download=True) + + assert E is not None + assert isinstance(E, dask_cudf.core.DataFrame) + dataset.unload() + + @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_get_edgelist(dataset): E = dataset.get_edgelist(download=True) assert E is not None +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_get_dask_edgelist(dask_client, dataset): + E = dataset.get_dask_edgelist(download=True) + assert E is not None + + @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_get_graph(dataset): G = dataset.get_graph(download=True) assert G is not None +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_get_dask_graph(dask_client, dataset): + G = dataset.get_dask_graph(download=True) + assert G is not None + + @pytest.mark.parametrize("dataset", ALL_DATASETS) def test_metadata(dataset): M = dataset.metadata @@ -207,6 +250,16 @@ def test_weights(dataset): assert not G.is_weighted() +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", WEIGHTED_DATASETS) +def test_weights_dask(dask_client, dataset): + G = dataset.get_dask_graph(download=True) + assert G.is_weighted() + G = dataset.get_dask_graph(download=True, ignore_weights=True) + assert not G.is_weighted() + + @pytest.mark.parametrize("dataset", SMALL_DATASETS) def test_create_using(dataset): G = dataset.get_graph(download=True) @@ -216,6 +269,26 @@ def test_create_using(dataset): G = dataset.get_graph(download=True, create_using=Graph(directed=True)) assert G.is_directed() + # using a non-Graph type should raise an error + with pytest.raises(TypeError): + dataset.get_graph(download=True, create_using=set) + + +@pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") +@pytest.mark.skip(reason="MG not supported on CI") +@pytest.mark.parametrize("dataset", SMALL_DATASETS) +def test_create_using_dask(dask_client, dataset): + G = dataset.get_dask_graph(download=True) + assert not G.is_directed() + G = dataset.get_dask_graph(download=True, create_using=Graph) + assert not G.is_directed() + G = dataset.get_dask_graph(download=True, create_using=Graph(directed=True)) + assert G.is_directed() + + # using a non-Graph type should raise an error + with pytest.raises(TypeError): + dataset.get_dask_graph(download=True, create_using=set) + def test_ctor_with_datafile(): from cugraph.datasets import karate