From b1495edf55746313946ca8020dca02c713fe527a Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 1 Feb 2024 14:14:49 -0600 Subject: [PATCH 1/9] Adds option to rapids_cpm_find for raft to disable hnswlib feature, adds updates for pytest 8 compat, temporarily skips IO intensive test in CI (#4121) * Adds option to rapids_cpm_find for raft to disable hnswlib feature, which is not needed by cugraph. This resolves cmake failures such as the following: ``` CMake Error at /home/coder/cugraph/cpp/build/release/_deps/raft-build/raft-targets.cmake:56 (set_target_properties): The link interface of target "raft::raft" contains: hnswlib::hnswlib but the target was not found. ``` * TEMPORARILY skips bulk sampling IO tests which were hanging on CI runners. These tests pass locally, so we believe this is related to IO issues on CI runners, but will investigate further in 24.04 * Adds updates for pytest >=8 * Adds option to pytest to append source paths to PYTHONPATH instead of prepend (the default). The prepended PYTHONPATH caused the dask `LocalCUDACluster` to import cugraph modules from the source tree instead of the install location, which resulted in `ImportError`s. This appears to be a behavior change in pytest 8. * Adds an `xfail` for an [NX test known to be incompatible with pytest>=8](https://github.com/networkx/networkx/pull/7203) Authors: - Rick Ratzel (https://github.com/rlratzel) Approvers: - Divye Gala (https://github.com/divyegala) - Chuck Hastings (https://github.com/ChuckHastings) - Ray Douglass (https://github.com/raydouglass) - Erik Welch (https://github.com/eriknw) URL: https://github.com/rapidsai/cugraph/pull/4121 --- ci/test_python.sh | 14 ++++++++++++-- ci/test_wheel.sh | 17 ++++++++++++++--- cpp/cmake/thirdparty/get_raft.cmake | 3 ++- python/nx-cugraph/nx_cugraph/interface.py | 20 ++++++++++++++------ 4 files changed, 42 insertions(+), 12 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index 5892c37e35b..2b8a6347066 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -63,7 +63,16 @@ pytest \ tests popd -# FIXME: TEMPORARILY disable single-GPU "MG" testing +# Test runs that include tests that use dask require +# --import-mode=append. Those tests start a LocalCUDACluster that inherits +# changes from pytest's modifications to PYTHONPATH (which defaults to +# prepending source tree paths to PYTHONPATH). This causes the +# LocalCUDACluster subprocess to import cugraph from the source tree instead of +# the install location, and in most cases, the source tree does not have +# extensions built in-place and will result in ImportErrors. +# +# FIXME: TEMPORARILY disable MG PropertyGraph tests (experimental) tests and +# bulk sampler IO tests (hangs in CI) rapids-logger "pytest cugraph" pushd python/cugraph/cugraph DASK_WORKER_DEVICES="0" \ @@ -72,6 +81,7 @@ DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \ pytest \ -v \ + --import-mode=append \ --benchmark-disable \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph.xml" \ @@ -79,7 +89,7 @@ pytest \ --cov=cugraph \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \ --cov-report=term \ - -k "not test_property_graph_mg" \ + -k "not test_property_graph_mg and not test_bulk_sampler_io" \ tests popd diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 428efd4ed21..8c5832e412f 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -eoxu pipefail @@ -21,10 +21,21 @@ arch=$(uname -m) if [[ "${arch}" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then python ./ci/wheel_smoke_test_${package_name}.py else - # FIXME: TEMPORARILY disable single-GPU "MG" testing + # Test runs that include tests that use dask require + # --import-mode=append. See test_python.sh for details. + # FIXME: Adding PY_IGNORE_IMPORTMISMATCH=1 to workaround conftest.py import + # mismatch error seen by nx-cugraph after using pytest 8 and + # --import-mode=append. RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets \ + PY_IGNORE_IMPORTMISMATCH=1 \ + DASK_WORKER_DEVICES="0" \ DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL="1000s" \ DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \ - python -m pytest ./python/${package_name}/${python_package_name}/tests + python -m pytest \ + -v \ + --import-mode=append \ + --benchmark-disable \ + -k "not test_property_graph_mg and not test_bulk_sampler_io" \ + ./python/${package_name}/${python_package_name}/tests fi diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 015b5b07920..8f56372c81a 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -52,6 +52,7 @@ function(find_and_configure_raft) "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_RAFT_LIB}" "BUILD_TESTS OFF" "BUILD_BENCH OFF" + "BUILD_CAGRA_HNSWLIB OFF" ) if(raft_ADDED) diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index 04591c0e9e3..a57074aabb0 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -80,18 +80,26 @@ def key(testpath): nxver = parse(nx.__version__) if nxver.major == 3 and nxver.minor <= 2: - # Networkx versions prior to 3.2.1 have tests written to expect - # sp.sparse.linalg.ArpackNoConvergence exceptions raised on no - # convergence in HITS. Newer versions since the merge of - # https://github.com/networkx/networkx/pull/7084 expect - # nx.PowerIterationFailedConvergence, which is what nx_cugraph.hits - # raises, so we mark them as xfail for previous versions of NX. xfail.update( { + # NetworkX versions prior to 3.2.1 have tests written to + # expect sp.sparse.linalg.ArpackNoConvergence exceptions + # raised on no convergence in HITS. Newer versions since + # the merge of + # https://github.com/networkx/networkx/pull/7084 expect + # nx.PowerIterationFailedConvergence, which is what + # nx_cugraph.hits raises, so we mark them as xfail for + # previous versions of NX. key( "test_hits.py:TestHITS.test_hits_not_convergent" ): "nx_cugraph.hits raises updated exceptions not caught in " "these tests", + # NetworkX versions 3.2 and older contain tests that fail + # with pytest>=8. Assume pytest>=8 and mark xfail. + key( + "test_strongly_connected.py:" + "TestStronglyConnected.test_connected_raise" + ): "test is incompatible with pytest>=8", } ) From 26efc6d088258dcee0519d6464c2e5211836b88a Mon Sep 17 00:00:00 2001 From: jakirkham Date: Thu, 1 Feb 2024 14:23:18 -0800 Subject: [PATCH 2/9] Rerun `ci/release/update-version.sh 24.04.00` (#4127) Looks like `ci/release/update-version.sh` may not have been run for `24.04.00`. This does that Also noticed the following errors on macOS. Not sure if these are important. Could someone please double check? ``` sed: 1: "/^ucx_py_version:$/ {n; ...": bad flag in substitute command: '}' sed: 1: "/^ucx_py_version:$/ {n; ...": bad flag in substitute command: '}' sed: 1: "/^ucx_py_version:$/ {n; ...": bad flag in substitute command: '}' ``` Edit: Should add do see the right `ucx_py_version` values in these locations. So it does already appear to be correctly updated already https://github.com/rapidsai/cugraph/blob/00f906f6e97949fd33c85143af4fdf069c791558/conda/recipes/cugraph/conda_build_config.yaml#L19-L20 https://github.com/rapidsai/cugraph/blob/00f906f6e97949fd33c85143af4fdf069c791558/conda/recipes/pylibcugraph/conda_build_config.yaml#L19-L20 https://github.com/rapidsai/cugraph/blob/00f906f6e97949fd33c85143af4fdf069c791558/conda/recipes/cugraph-service/conda_build_config.yaml#L1-L2 Authors: - https://github.com/jakirkham Approvers: - Brad Rees (https://github.com/BradReesWork) - Ray Douglass (https://github.com/raydouglass) --- .github/workflows/build.yaml | 4 ++-- .github/workflows/pr.yaml | 4 ++-- .github/workflows/test.yaml | 2 +- python/cugraph-equivariant/pyproject.toml | 2 +- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 4593962bb1c..5ecff1f8a75 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -175,7 +175,7 @@ jobs: package-name: cugraph-pyg wheel-build-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} @@ -185,7 +185,7 @@ jobs: wheel-publish-cugraph-equivariant: needs: wheel-build-cugraph-equivariant secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-publish.yaml@branch-24.04 with: build_type: ${{ inputs.build_type || 'branch' }} branch: ${{ inputs.branch }} diff --git a/.github/workflows/pr.yaml b/.github/workflows/pr.yaml index 9d0b682f2f5..349d682a59e 100644 --- a/.github/workflows/pr.yaml +++ b/.github/workflows/pr.yaml @@ -165,14 +165,14 @@ jobs: matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0")) wheel-build-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-build.yaml@branch-24.04 with: build_type: pull-request script: ci/build_wheel_cugraph-equivariant.sh wheel-tests-cugraph-equivariant: needs: wheel-build-cugraph-equivariant secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: pull-request script: ci/test_wheel_cugraph-equivariant.sh diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index b21229b318e..af68dbdbce9 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -77,7 +77,7 @@ jobs: script: ci/test_wheel_cugraph-pyg.sh wheel-tests-cugraph-equivariant: secrets: inherit - uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 + uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.04 with: build_type: nightly branch: ${{ inputs.branch }} diff --git a/python/cugraph-equivariant/pyproject.toml b/python/cugraph-equivariant/pyproject.toml index f261b0e3535..24db45bd6d6 100644 --- a/python/cugraph-equivariant/pyproject.toml +++ b/python/cugraph-equivariant/pyproject.toml @@ -34,7 +34,7 @@ classifiers = [ "Programming Language :: Python :: 3.10", ] dependencies = [ - "pylibcugraphops==24.2.*", + "pylibcugraphops==24.4.*", ] # This list was generated by `rapids-dependency-file-generator`. To make changes, edit ../../dependencies.yaml and run `rapids-dependency-file-generator`. [project.urls] From ba73b02b0ecd556343e0cde85c4988950ecbe8ed Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 1 Feb 2024 17:57:24 -0600 Subject: [PATCH 3/9] Adds benchmarks for additional nx-cugraph 24.02 algos (#4112) * Adds 23 benchmarks for BFS, connected components, triangles, and other related algos * Adds a shell script to run the algo benchmarks individually for easier comparison to NX * Fixes a bug in `nx_cugraph.generic_bfs_edges()` uncovered by these benchmarks Authors: - Rick Ratzel (https://github.com/rlratzel) - Erik Welch (https://github.com/eriknw) - Brad Rees (https://github.com/BradReesWork) Approvers: - Erik Welch (https://github.com/eriknw) URL: https://github.com/rapidsai/cugraph/pull/4112 --- .../nx-cugraph/pytest-based/bench_algos.py | 366 +++++++++++++++++- .../nx-cugraph/pytest-based/run-2402.sh | 46 +++ .../traversal/breadth_first_search.py | 2 +- .../nx-cugraph/nx_cugraph/classes/digraph.py | 12 +- python/nx-cugraph/nx_cugraph/classes/graph.py | 4 +- 5 files changed, 413 insertions(+), 17 deletions(-) create mode 100755 benchmarks/nx-cugraph/pytest-based/run-2402.sh diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index a8ed18a20fc..97eb32e2aaa 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -38,20 +38,27 @@ warmup_rounds = 1 dataset_param_values = [ + # name: karate, nodes: 34, edges: 156 pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]), + # name: netscience, nodes: 1461, edges: 5484 pytest.param(datasets.netscience, marks=[pytest.mark.small, pytest.mark.directed]), + # name: email-Eu-core, nodes: 1005, edges: 25571 pytest.param( datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed] ), + # name: cit-Patents, nodes: 3774768, edges: 16518948 pytest.param( datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed] ), + # name: hollywood, nodes: 1139905, edges: 57515616 pytest.param( datasets.hollywood, marks=[pytest.mark.medium, pytest.mark.undirected] ), + # name: soc-LiveJournal1, nodes: 4847571, edges: 68993773 pytest.param( datasets.soc_livejournal, marks=[pytest.mark.medium, pytest.mark.directed] ), + # name: europe_osm, nodes: 50912018, edges: 54054660 pytest.param( datasets.europe_osm, marks=[pytest.mark.large, pytest.mark.undirected] ), @@ -226,12 +233,21 @@ def get_graph_obj_for_benchmark(graph_obj, backend_wrapper): """ G = graph_obj if backend_wrapper.backend_name == "cugraph-preconverted": - G = nxcg.from_networkx(G) + G = nxcg.from_networkx(G, preserve_all_attrs=True) return G +def get_highest_degree_node(graph_obj): + degrees = graph_obj.degree() # list of tuples of (node, degree) + return max(degrees, key=lambda t: t[1])[0] + + ################################################################################ # Benchmarks +def bench_from_networkx(benchmark, graph_obj): + benchmark(nxcg.from_networkx, graph_obj) + + # normalized_param_values = [True, False] # k_param_values = [10, 100] normalized_param_values = [True] @@ -284,7 +300,7 @@ def bench_edge_betweenness_centrality( def bench_louvain_communities(benchmark, graph_obj, backend_wrapper): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) - # The cugraph backend for louvain_communities only supports undirected graphs + # DiGraphs are not supported if G.is_directed(): G = G.to_undirected() result = benchmark.pedantic( @@ -416,10 +432,8 @@ def bench_pagerank(benchmark, graph_obj, backend_wrapper): def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): - # Use the node with the highest degree - degrees = graph_obj.degree() # list of tuples of (node, degree) - node = max(degrees, key=lambda t: t[1])[0] G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) result = benchmark.pedantic( target=backend_wrapper(nx.single_source_shortest_path_length), @@ -435,11 +449,8 @@ def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapp def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapper): - # Use the node with the highest degree - degrees = graph_obj.degree() # list of tuples of (node, degree) - node = max(degrees, key=lambda t: t[1])[0] G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) - + node = get_highest_degree_node(graph_obj) result = benchmark.pedantic( target=backend_wrapper( nx.single_target_shortest_path_length, exhaust_returned_iterator=True @@ -456,3 +467,340 @@ def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapp # needed for this algo in NX 3.3+ which returns a dict instead of an # iterator. Forcing to a list does not change the benchmark timing. assert type(result) is list + + +def bench_ancestors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.ancestors), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_average_clustering(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.average_clustering), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_generic_bfs_edges(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.generic_bfs_edges, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_edges(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_edges, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_layers(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_layers, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + sources=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_predecessors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_predecessors, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_successors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_successors, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_tree(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_tree), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # Check that this at least appears to be some kind of NX-like Graph + assert hasattr(result, "has_node") + + +def bench_clustering(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.clustering), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_core_number(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.core_number), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_descendants(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.descendants), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_descendants_at_distance(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.descendants_at_distance), + args=(G,), + kwargs=dict( + source=node, + distance=1, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_is_bipartite(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_bipartite), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_is_strongly_connected(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_strongly_connected), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_is_weakly_connected(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_weakly_connected), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_number_strongly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.number_strongly_connected_components), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is int + + +def bench_number_weakly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.number_weakly_connected_components), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is int + + +def bench_overall_reciprocity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.overall_reciprocity), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_reciprocity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.reciprocity), + args=(G,), + kwargs=dict( + nodes=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_strongly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper( + nx.strongly_connected_components, exhaust_returned_iterator=True + ), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_transitivity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.transitivity), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_triangles(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.triangles), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_weakly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper( + nx.weakly_connected_components, exhaust_returned_iterator=True + ), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list diff --git a/benchmarks/nx-cugraph/pytest-based/run-2402.sh b/benchmarks/nx-cugraph/pytest-based/run-2402.sh new file mode 100755 index 00000000000..44ed0bda43a --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/run-2402.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Runs benchmarks for the 24.02 algos. +# Pass either a or b or both. This is useful for separating batches of runs on different GPUs: +# CUDA_VISIBLE_DEVICES=1 run-2402.sh b + +mkdir -p logs + +# benches="$benches ..." pattern is easy to comment out individual runs +benches= + +while [[ $1 != "" ]]; do + if [[ $1 == "a" ]]; then + benches="$benches bench_ancestors" + benches="$benches bench_average_clustering" + benches="$benches bench_generic_bfs_edges" + benches="$benches bench_bfs_edges" + benches="$benches bench_bfs_layers" + benches="$benches bench_bfs_predecessors" + benches="$benches bench_bfs_successors" + benches="$benches bench_bfs_tree" + benches="$benches bench_clustering" + benches="$benches bench_core_number" + benches="$benches bench_descendants" + elif [[ $1 == "b" ]]; then + benches="$benches bench_descendants_at_distance" + benches="$benches bench_is_bipartite" + benches="$benches bench_is_strongly_connected" + benches="$benches bench_is_weakly_connected" + benches="$benches bench_number_strongly_connected_components" + benches="$benches bench_number_weakly_connected_components" + benches="$benches bench_overall_reciprocity" + benches="$benches bench_reciprocity" + benches="$benches bench_strongly_connected_components" + benches="$benches bench_transitivity" + benches="$benches bench_triangles" + benches="$benches bench_weakly_connected_components" + fi + shift +done + +for bench in $benches; do + pytest -sv -k "soc-livejournal1" "bench_algos.py::$bench" 2>&1 | tee "logs/${bench}.log" +done diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index ef1c011363a..f5d5e2a995d 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -68,7 +68,7 @@ def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbor raise NotImplementedError( "sort_neighbors argument in generic_bfs_edges is not currently supported" ) - return bfs_edges(source, depth_limit=depth_limit) + return bfs_edges(G, source, depth_limit=depth_limit) @generic_bfs_edges._can_run diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 169815eb067..e5cfb8f6815 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -86,9 +86,9 @@ def to_undirected(self, reciprocal=False, as_view=False): key: val[indices].copy() for key, val in self.edge_masks.items() } else: - src_indices, dst_indices = cp.divmod( - src_dst_indices_new, N, dtype=index_dtype - ) + src_indices, dst_indices = cp.divmod(src_dst_indices_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) else: src_dst_indices_old_T = self.src_indices + N * self.dst_indices.astype( np.int64 @@ -116,9 +116,9 @@ def to_undirected(self, reciprocal=False, as_view=False): src_dst_indices_new = cp.union1d( src_dst_indices_old, src_dst_indices_old_T ) - src_indices, dst_indices = cp.divmod( - src_dst_indices_new, N, dtype=index_dtype - ) + src_indices, dst_indices = cp.divmod(src_dst_indices_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) if self.edge_values: recip_indices = cp.lexsort(cp.vstack((src_indices, dst_indices))) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index f697668750d..0951ee6b135 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -668,7 +668,9 @@ def _get_plc_graph( raise ValueError( f'symmetrize must be "union" or "intersection"; got "{symmetrize}"' ) - src_indices, dst_indices = cp.divmod(src_dst_new, N, dtype=index_dtype) + src_indices, dst_indices = cp.divmod(src_dst_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) return plc.SGGraph( resource_handle=plc.ResourceHandle(), From 20f7dca65e85b71058b0847288a37a0b3c81a913 Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Thu, 1 Feb 2024 17:59:58 -0600 Subject: [PATCH 4/9] nx-cugraph: use coverage to ensure all algorithms were run (#4108) Heh, there is probably a "better" way to do this, but this way was fast and easy enough to do, and I hope is "good enough". Authors: - Erik Welch (https://github.com/eriknw) - Brad Rees (https://github.com/BradReesWork) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/4108 --- ci/test_python.sh | 21 +++++ python/nx-cugraph/lint.yaml | 2 +- .../nx_cugraph/scripts/print_table.py | 2 +- .../nx_cugraph/tests/ensure_algos_covered.py | 84 +++++++++++++++++++ .../nx-cugraph/nx_cugraph/tests/test_bfs.py | 33 ++++++++ python/nx-cugraph/run_nx_tests.sh | 14 +++- 6 files changed, 150 insertions(+), 6 deletions(-) create mode 100644 python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py create mode 100644 python/nx-cugraph/nx_cugraph/tests/test_bfs.py diff --git a/ci/test_python.sh b/ci/test_python.sh index 2b8a6347066..b070143f076 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -120,12 +120,33 @@ popd rapids-logger "pytest networkx using nx-cugraph backend" pushd python/nx-cugraph +# Use editable install to make coverage work +pip install -e . --no-deps ./run_nx_tests.sh # run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0% # in case nx-cugraph failed to load but fallback mode allowed the run to pass. _coverage=$(coverage report|grep "^TOTAL") echo "nx-cugraph coverage from networkx tests: $_coverage" echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }' +# Ensure all algorithms were called by comparing covered lines to function lines. +# Run our tests again (they're fast enough) to add their coverage, then create coverage.json +pytest \ + --pyargs nx_cugraph \ + --config-file=./pyproject.toml \ + --cov-config=./pyproject.toml \ + --cov=nx_cugraph \ + --cov-append \ + --cov-report= +coverage report \ + --include="*/nx_cugraph/algorithms/*" \ + --omit=__init__.py \ + --show-missing \ + --rcfile=./pyproject.toml +coverage json --rcfile=./pyproject.toml +python -m nx_cugraph.tests.ensure_algos_covered +# Exercise (and show results of) scripts that show implemented networkx algorithms +python -m nx_cugraph.scripts.print_tree --dispatch-name --plc --incomplete --different +python -m nx_cugraph.scripts.print_table popd rapids-logger "pytest cugraph-service (single GPU)" diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index 5a4773168b6..8e87fc23592 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -26,7 +26,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.15 + rev: v0.16 hooks: - id: validate-pyproject name: Validate pyproject.toml diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_table.py b/python/nx-cugraph/nx_cugraph/scripts/print_table.py index 7e69de63dc1..117a1444f48 100755 --- a/python/nx-cugraph/nx_cugraph/scripts/print_table.py +++ b/python/nx-cugraph/nx_cugraph/scripts/print_table.py @@ -59,7 +59,7 @@ def main(path_to_info=None, *, file=sys.stdout): if path_to_info is None: path_to_info = get_path_to_info(version_added_sep=".") lines = ["networkx_path,dispatch_name,version_added,plc,is_incomplete,is_different"] - lines.extend(",".join(info) for info in path_to_info.values()) + lines.extend(",".join(map(str, info)) for info in path_to_info.values()) text = "\n".join(lines) print(text, file=file) return text diff --git a/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py b/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py new file mode 100644 index 00000000000..7047f0eeafd --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ensure that all functions wrapped by @networkx_algorithm were called. + +This file is run by CI and should not normally be run manually. +""" +import inspect +import json +from pathlib import Path + +from nx_cugraph.interface import BackendInterface +from nx_cugraph.utils import networkx_algorithm + +with Path("coverage.json").open() as f: + coverage = json.load(f) + +filenames_to_executed_lines = { + "nx_cugraph/" + + filename.rsplit("nx_cugraph/", 1)[-1]: set(coverage_info["executed_lines"]) + for filename, coverage_info in coverage["files"].items() +} + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def get_func_filename(func): + return "nx_cugraph" + inspect.getfile(unwrap(func)).rsplit("nx_cugraph", 1)[-1] + + +def get_func_linenos(func): + lines, lineno = inspect.getsourcelines(unwrap(func)) + for i, line in enumerate(lines, lineno): + if ":\n" in line: + return set(range(i + 1, lineno + len(lines))) + raise RuntimeError(f"Could not determine line numbers for function {func}") + + +def has_any_coverage(func): + return bool( + filenames_to_executed_lines[get_func_filename(func)] & get_func_linenos(func) + ) + + +def main(): + no_coverage = set() + for attr, func in vars(BackendInterface).items(): + if not isinstance(func, networkx_algorithm): + continue + if not has_any_coverage(func): + no_coverage.add(attr) + if no_coverage: + msg = "The following algorithms have no coverage: " + ", ".join( + sorted(no_coverage) + ) + # Create a border of "!" + msg = ( + "\n\n" + + "!" * (len(msg) + 6) + + "\n!! " + + msg + + " !!\n" + + "!" * (len(msg) + 6) + + "\n" + ) + raise AssertionError(msg) + print("\nSuccess: coverage determined all algorithms were called!\n") + + +if __name__ == "__main__": + main() diff --git a/python/nx-cugraph/nx_cugraph/tests/test_bfs.py b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py new file mode 100644 index 00000000000..c2b22e98949 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py @@ -0,0 +1,33 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest +from packaging.version import parse + +nxver = parse(nx.__version__) + +if nxver.major == 3 and nxver.minor < 2: + pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True) + + +def test_generic_bfs_edges(): + # generic_bfs_edges currently isn't exercised by networkx tests + Gnx = nx.karate_club_graph() + Gcg = nx.karate_club_graph(backend="cugraph") + for depth_limit in (0, 1, 2): + for source in Gnx: + # Some ordering is arbitrary, so I think there's a chance + # this test may fail if networkx or nx-cugraph changes. + nx_result = nx.generic_bfs_edges(Gnx, source, depth_limit=depth_limit) + cg_result = nx.generic_bfs_edges(Gcg, source, depth_limit=depth_limit) + assert sorted(nx_result) == sorted(cg_result), (source, depth_limit) diff --git a/python/nx-cugraph/run_nx_tests.sh b/python/nx-cugraph/run_nx_tests.sh index 07c97cdf947..da7a2014cef 100755 --- a/python/nx-cugraph/run_nx_tests.sh +++ b/python/nx-cugraph/run_nx_tests.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # NETWORKX_GRAPH_CONVERT=cugraph # Used by networkx versions 3.0 and 3.1 @@ -30,7 +30,13 @@ NETWORKX_TEST_BACKEND=cugraph \ NETWORKX_FALLBACK_TO_NX=True \ pytest \ --pyargs networkx \ - --cov=nx_cugraph.algorithms \ - --cov-report term-missing \ - --no-cov-on-fail \ + --config-file=$(dirname $0)/pyproject.toml \ + --cov-config=$(dirname $0)/pyproject.toml \ + --cov=nx_cugraph \ + --cov-report= \ "$@" +coverage report \ + --include="*/nx_cugraph/algorithms/*" \ + --omit=__init__.py \ + --show-missing \ + --rcfile=$(dirname $0)/pyproject.toml From 3d52f177b3cb8213283544ed3bfe3397be86d20f Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Thu, 1 Feb 2024 18:00:50 -0600 Subject: [PATCH 5/9] Optimize the drop-duplicate functionality (#4095) Our current python API leverages dask to implement the `drop-duplicate` functionality but it carries a lot of overhead as it draws a significant amount of host memory and results into a crash when processing large graphs (4+ billion edges). This PR 1. Leverages the CAPI to internally drop multi edges when creating the PLC graph. 2. Deprecates the parameter `multi` which, when set to False, triggers the dask based `drop-duplicate` functionality 3. Add flag `do_expensive_check` to check for `NULL` values in the edgelist Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Vibhu Jawa (https://github.com/VibhuJawa) - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4095 --- .../simpleDistributedGraph.py | 17 ++++++++- .../graph_implementation/simpleGraph.py | 17 +++++++-- .../cugraph/cugraph/structure/symmetrize.py | 37 +++++++++++++++++-- .../test_uniform_neighbor_sample_mg.py | 11 +++++- python/pylibcugraph/pylibcugraph/graphs.pyx | 2 +- 5 files changed, 73 insertions(+), 11 deletions(-) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 8fed467bf6d..cdf1e937e67 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -39,6 +39,7 @@ ) from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster import cugraph.dask.comms.comms as Comms +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates class simpleDistributedGraphImpl: @@ -95,6 +96,7 @@ def _make_plc_graph( weight_type, edge_id_type, edge_type_id, + drop_multi_edges, ): weights = None edge_ids = None @@ -149,6 +151,7 @@ def _make_plc_graph( num_arrays=num_arrays, store_transposed=store_transposed, do_expensive_check=False, + drop_multi_edges=drop_multi_edges, ) del edata_x gc.collect() @@ -267,7 +270,7 @@ def __from_edgelist( input_ddf, source, destination, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) value_col = None @@ -277,7 +280,7 @@ def __from_edgelist( source, destination, value_col_names, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -364,6 +367,7 @@ def __from_edgelist( self.weight_type, self.edge_id_type, self.edge_type_id_type, + not self.properties.multi_edge, ) for w, edata in persisted_keys_d.items() } @@ -455,6 +459,15 @@ def view_edge_list(self): else: is_multi_column = True + if not self.properties.multi_edge: + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + _client = default_client() + workers = _client.scheduler_info()["workers"] + edgelist_df = _memory_efficient_drop_duplicates( + edgelist_df, [srcCol, dstCol], len(workers) + ) + edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ [srcCol, dstCol] ].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 22d82eb1796..121a4c6245a 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -264,7 +264,7 @@ def __from_edgelist( source, destination, edge_attr, - multi=self.properties.multi_edge, + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -279,7 +279,7 @@ def __from_edgelist( elist, source, destination, - multi=self.properties.multi_edge, + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -298,7 +298,10 @@ def __from_edgelist( self._replicate_edgelist() self._make_plc_graph( - value_col=value_col, store_transposed=store_transposed, renumber=renumber + value_col=value_col, + store_transposed=store_transposed, + renumber=renumber, + drop_multi_edges=not self.properties.multi_edge, ) def to_pandas_edgelist( @@ -477,6 +480,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + elif not use_initial_input_df and self.properties.renumbered: # Do not unrenumber the vertices if the initial input df was used if not self.properties.directed: @@ -484,6 +488,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) @@ -1084,6 +1089,7 @@ def _make_plc_graph( value_col: Dict[str, cudf.DataFrame] = None, store_transposed: bool = False, renumber: bool = True, + drop_multi_edges: bool = False, ): """ Parameters @@ -1100,6 +1106,8 @@ def _make_plc_graph( Whether to renumber the vertices of the graph. Required if inputted vertex ids are not of int32 or int64 type. + drop_multi_edges: bool (default=False) + Whether to drop multi edges """ if value_col is None: @@ -1163,6 +1171,7 @@ def _make_plc_graph( renumber=renumber, do_expensive_check=True, input_array_format=input_array_format, + drop_multi_edges=drop_multi_edges, ) def to_directed(self, DiG, store_transposed=False): diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index b324ff65834..30c6394ade9 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,7 @@ import cudf import dask_cudf from dask.distributed import default_client +import warnings def symmetrize_df( @@ -54,6 +55,11 @@ def symmetrize_df( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -84,6 +90,12 @@ def symmetrize_df( if multi: return result else: + warnings.warn( + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = result.groupby(by=[*vertex_col_name], as_index=False).min() return result @@ -128,6 +140,11 @@ def symmetrize_ddf( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -165,8 +182,15 @@ def symmetrize_ddf( else: result = ddf if multi: + result = result.reset_index(drop=True).repartition(npartitions=len(workers) * 2) return result else: + warnings.warn( + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = _memory_efficient_drop_duplicates( result, vertex_col_name, len(workers) @@ -181,6 +205,7 @@ def symmetrize( value_col_name=None, multi=False, symmetrize=True, + do_expensive_check=False, ): """ Take a dataframe of source destination pairs along with associated @@ -208,6 +233,11 @@ def symmetrize( weights column name. multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -234,8 +264,9 @@ def symmetrize( if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") - csg.null_check(input_df[source_col_name]) - csg.null_check(input_df[dest_col_name]) + if do_expensive_check: # FIXME: Optimize this check as it is currently expensive + csg.null_check(input_df[source_col_name]) + csg.null_check(input_df[dest_col_name]) if isinstance(input_df, dask_cudf.DataFrame): output_df = symmetrize_ddf( diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 460a25cbd14..371410b8bd5 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,6 +26,7 @@ from cugraph.testing import UNDIRECTED_DATASETS from cugraph.dask import uniform_neighbor_sample from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates from cugraph.datasets import email_Eu_core, small_tree from pylibcugraph.testing.utils import gen_fixture_params_product @@ -135,6 +136,14 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo): dg = input_combo["MGGraph"] input_df = dg.input_df + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = ["src", "dst"] + workers = dask_client.scheduler_info()["workers"] + input_df = _memory_efficient_drop_duplicates( + input_df, vertex_col_name, len(workers) + ) + result_nbr = uniform_neighbor_sample( dg, input_combo["start_list"], diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index 76ad7690840..def47390ce5 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -463,9 +463,9 @@ cdef class MGGraph(_GPUGraph): edge_type_view_ptr_ptr, store_transposed, num_arrays, - do_expensive_check, drop_self_loops, drop_multi_edges, + do_expensive_check, &(self.c_graph_ptr), &error_ptr) From acb3add62e67d8357649352e81e3179416ee81ff Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Thu, 1 Feb 2024 18:46:22 -0600 Subject: [PATCH 6/9] Removes the `networkx_algorithm` decorator to all SCC functions to disable dispatching to them (#4120) The current cugraph `strongly_connected_components` is a legacy implementation with known issues, and in most cases should not be used until the cugraph team can provide an update. This PR removes the `networkx_algorithm` decorator from all SCC functions to disable dispatching. Users can still run the SCC functions here by accessing them directly from `nx_cugraph`: ```python >>> import nx_cugraph as nxcg >>> nxcg.strongly_connected_components(...) ``` Tested by running the `nx_cugraph` tests (`pytest nx_cugraph/tests`) and the NetworkX tests (`run_nx_tests.sh`) _Note: using the "non-breaking" label since this API was only present in nightlies and never released._ Authors: - Rick Ratzel (https://github.com/rlratzel) - Brad Rees (https://github.com/BradReesWork) Approvers: - Erik Welch (https://github.com/eriknw) URL: https://github.com/rapidsai/cugraph/pull/4120 --- python/nx-cugraph/_nx_cugraph/__init__.py | 9 +++---- .../components/strongly_connected.py | 24 ++++++++++++------- python/nx-cugraph/nx_cugraph/interface.py | 12 ++++++---- 3 files changed, 28 insertions(+), 17 deletions(-) diff --git a/python/nx-cugraph/_nx_cugraph/__init__.py b/python/nx-cugraph/_nx_cugraph/__init__.py index 2f283aa153c..8b5c87a63f9 100644 --- a/python/nx-cugraph/_nx_cugraph/__init__.py +++ b/python/nx-cugraph/_nx_cugraph/__init__.py @@ -12,7 +12,11 @@ # limitations under the License. """Tell NetworkX about the cugraph backend. This file can update itself: -$ make plugin-info # Recommended method for development +$ make plugin-info + +or + +$ make all # Recommended - runs 'plugin-info' followed by 'lint' or @@ -78,7 +82,6 @@ "is_connected", "is_forest", "is_isolate", - "is_strongly_connected", "is_tree", "is_weakly_connected", "isolates", @@ -96,7 +99,6 @@ "number_connected_components", "number_of_isolates", "number_of_selfloops", - "number_strongly_connected_components", "number_weakly_connected_components", "octahedral_graph", "out_degree_centrality", @@ -111,7 +113,6 @@ "single_source_shortest_path_length", "single_target_shortest_path_length", "star_graph", - "strongly_connected_components", "tadpole_graph", "tetrahedral_graph", "transitivity", diff --git a/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py b/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py index d1713129703..a63b3237dfc 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/components/strongly_connected.py @@ -15,12 +15,7 @@ import pylibcugraph as plc from nx_cugraph.convert import _to_directed_graph -from nx_cugraph.utils import ( - _groupby, - index_dtype, - networkx_algorithm, - not_implemented_for, -) +from nx_cugraph.utils import _groupby, index_dtype, not_implemented_for __all__ = [ "number_strongly_connected_components", @@ -50,8 +45,19 @@ def _strongly_connected_components(G): return labels +# The networkx_algorithm decorator is (temporarily) removed to disable +# dispatching for this function. The current cugraph +# strongly_connected_components is a legacy implementation with known issues, +# and in most cases should not be used until the cugraph team can provide an +# update. +# +# Users can still call this via the nx_cugraph module directly: +# >>> import nx_cugraph as nxcg +# >>> nxcg.strongly_connected_components(...) + + @not_implemented_for("undirected") -@networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") +# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") def strongly_connected_components(G): G = _to_directed_graph(G) if G.src_indices.size == 0: @@ -62,7 +68,7 @@ def strongly_connected_components(G): @not_implemented_for("undirected") -@networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") +# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") def number_strongly_connected_components(G): G = _to_directed_graph(G) if G.src_indices.size == 0: @@ -72,7 +78,7 @@ def number_strongly_connected_components(G): @not_implemented_for("undirected") -@networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") +# @networkx_algorithm(version_added="24.02", _plc="strongly_connected_components") def is_strongly_connected(G): G = _to_directed_graph(G) if len(G) == 0: diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index a57074aabb0..46ea5831b0b 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -69,10 +69,14 @@ def key(testpath): no_string_dtype = "string edge values not currently supported" xfail = { - key( - "test_strongly_connected.py:" - "TestStronglyConnected.test_condensation_mapping_and_members" - ): "Strongly connected groups in different iteration order", + # This is removed while strongly_connected_components() is not + # dispatchable. See algorithms/components/strongly_connected.py for + # details. + # + # key( + # "test_strongly_connected.py:" + # "TestStronglyConnected.test_condensation_mapping_and_members" + # ): "Strongly connected groups in different iteration order", } from packaging.version import parse From 581d3562496c5c9c0b094f3cfd6f5631154e3739 Mon Sep 17 00:00:00 2001 From: Don Acosta <97529984+acostadon@users.noreply.github.com> Date: Thu, 1 Feb 2024 19:58:16 -0500 Subject: [PATCH 7/9] corrected links in C API and added groups for support functions (#4131) Fixes broken links in C API docs in Traversal, Sampling and Community algorithm sections resolves issue #4116 Authors: - Don Acosta (https://github.com/acostadon) - Brad Rees (https://github.com/BradReesWork) Approvers: - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/4131 --- cpp/include/cugraph_c/community_algorithms.h | 14 ++++--- cpp/include/cugraph_c/sampling_algorithms.h | 37 ++++++++++++++++--- cpp/include/cugraph_c/traversal_algorithms.h | 13 ++++--- .../source/api_docs/cugraph_c/community.rst | 10 +---- .../source/api_docs/cugraph_c/labeling.rst | 4 +- .../source/api_docs/cugraph_c/sampling.rst | 13 +++---- .../source/api_docs/cugraph_c/similarity.rst | 4 +- .../source/api_docs/cugraph_c/traversal.rst | 4 +- 8 files changed, 59 insertions(+), 40 deletions(-) diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h index feab15c7eeb..e8a71a40162 100644 --- a/cpp/include/cugraph_c/community_algorithms.h +++ b/cpp/include/cugraph_c/community_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -23,7 +23,6 @@ #include /** @defgroup community Community algorithms - * @{ */ #ifdef __cplusplus @@ -60,18 +59,21 @@ cugraph_error_code_t cugraph_triangle_count(const cugraph_resource_handle_t* han cugraph_error_t** error); /** + * @ingroup community * @brief Get triangle counting vertices */ cugraph_type_erased_device_array_view_t* cugraph_triangle_count_result_get_vertices( cugraph_triangle_count_result_t* result); /** + * @ingroup community * @brief Get triangle counting counts */ cugraph_type_erased_device_array_view_t* cugraph_triangle_count_result_get_counts( cugraph_triangle_count_result_t* result); /** + * @ingroup community * @brief Free a triangle count result * * @param [in] result The result from a sampling algorithm @@ -147,24 +149,28 @@ cugraph_error_code_t cugraph_leiden(const cugraph_resource_handle_t* handle, cugraph_error_t** error); /** + * @ingroup community * @brief Get hierarchical clustering vertices */ cugraph_type_erased_device_array_view_t* cugraph_hierarchical_clustering_result_get_vertices( cugraph_hierarchical_clustering_result_t* result); /** + * @ingroup community * @brief Get hierarchical clustering clusters */ cugraph_type_erased_device_array_view_t* cugraph_hierarchical_clustering_result_get_clusters( cugraph_hierarchical_clustering_result_t* result); /** + * @ingroup community * @brief Get modularity */ double cugraph_hierarchical_clustering_result_get_modularity( cugraph_hierarchical_clustering_result_t* result); /** + * @ingroup community * @brief Free a hierarchical clustering result * * @param [in] result The result from a sampling algorithm @@ -423,7 +429,3 @@ void cugraph_clustering_result_free(cugraph_clustering_result_t* result); #ifdef __cplusplus } #endif - -/** - * @} - */ diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 782bb5a3790..5760d2098aa 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ #include /** @defgroup samplingC Sampling algorithms - * @{ */ #ifdef __cplusplus @@ -134,6 +133,7 @@ cugraph_error_code_t cugraph_node2vec(const cugraph_resource_handle_t* handle, cugraph_error_t** error); /** + * @ingroup samplingC * @brief Get the max path length from random walk result * * @param [in] result The result from random walks @@ -145,6 +145,7 @@ size_t cugraph_random_walk_result_get_max_path_length(cugraph_random_walk_result // difference at the moment is that RW results contain weights // and extract_paths results don't. But that's probably wrong. /** + * @ingroup samplingC * @brief Get the matrix (row major order) of vertices in the paths * * @param [in] result The result from a random walk algorithm @@ -154,6 +155,7 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_paths( cugraph_random_walk_result_t* result); /** + * @ingroup samplingC * @brief Get the matrix (row major order) of edge weights in the paths * * @param [in] result The result from a random walk algorithm @@ -163,6 +165,7 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_weights( cugraph_random_walk_result_t* result); /** + * @ingroup samplingC * @brief If the random walk result is compressed, get the path sizes * @deprecated This call will no longer be relevant once the new node2vec are called * @@ -173,6 +176,7 @@ cugraph_type_erased_device_array_view_t* cugraph_random_walk_result_get_path_siz cugraph_random_walk_result_t* result); /** + * @ingroup samplingC * @brief Free random walks result * * @param [in] result The result from random walks @@ -220,6 +224,7 @@ typedef enum cugraph_compression_type_t { } cugraph_compression_type_t; /** + * @ingroup samplingC * @brief Create sampling options object * * All sampling options set to FALSE @@ -232,6 +237,7 @@ cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t* cugraph_error_t** error); /** + * @ingroup samplingC * @brief Set flag to renumber results * * @param options - opaque pointer to the sampling options @@ -240,6 +246,7 @@ cugraph_error_code_t cugraph_sampling_options_create(cugraph_sampling_options_t* void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set whether to compress per-hop (True) or globally (False) * * @param options - opaque pointer to the sampling options @@ -248,6 +255,7 @@ void cugraph_sampling_set_renumber_results(cugraph_sampling_options_t* options, void cugraph_sampling_set_compress_per_hop(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set flag to sample with_replacement * * @param options - opaque pointer to the sampling options @@ -256,6 +264,7 @@ void cugraph_sampling_set_compress_per_hop(cugraph_sampling_options_t* options, void cugraph_sampling_set_with_replacement(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set flag to sample return_hops * * @param options - opaque pointer to the sampling options @@ -264,6 +273,7 @@ void cugraph_sampling_set_with_replacement(cugraph_sampling_options_t* options, void cugraph_sampling_set_return_hops(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Set compression type * * @param options - opaque pointer to the sampling options @@ -273,6 +283,7 @@ void cugraph_sampling_set_compression_type(cugraph_sampling_options_t* options, cugraph_compression_type_t value); /** + * @ingroup samplingC * @brief Set prior sources behavior * * @param options - opaque pointer to the sampling options @@ -282,6 +293,7 @@ void cugraph_sampling_set_prior_sources_behavior(cugraph_sampling_options_t* opt cugraph_prior_sources_behavior_t value); /** + * @ingroup samplingC * @brief Set flag to sample dedupe_sources prior to sampling * * @param options - opaque pointer to the sampling options @@ -290,6 +302,7 @@ void cugraph_sampling_set_prior_sources_behavior(cugraph_sampling_options_t* opt void cugraph_sampling_set_dedupe_sources(cugraph_sampling_options_t* options, bool_t value); /** + * @ingroup samplingC * @brief Free sampling options object * * @param [in] options Opaque pointer to sampling object @@ -369,6 +382,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_destinations( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the major vertices from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -378,6 +392,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_majors( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the minor vertices from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -387,6 +402,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_minors( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the major offsets from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -396,6 +412,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_major_offsets const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the start labels from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -405,6 +422,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_start_labels( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the edge_id from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -414,6 +432,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_id( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the edge_type from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -423,6 +442,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_type( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the edge_weight from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -432,6 +452,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_edge_weight( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the hop from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -441,6 +462,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_hop( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the label-hop offsets from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -450,6 +472,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_hop_off const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the index from the sampling algorithm result * * @param [in] result The result from a sampling algorithm @@ -469,6 +492,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_offsets( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the renumber map * * @param [in] result The result from a sampling algorithm @@ -478,6 +502,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map( const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Get the renumber map offsets * * @param [in] result The result from a sampling algorithm @@ -487,6 +512,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map_ const cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Free a sampling result * * @param [in] result The result from a sampling algorithm @@ -494,6 +520,7 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_renumber_map_ void cugraph_sample_result_free(cugraph_sample_result_t* result); /** + * @ingroup samplingC * @brief Create a sampling result (testing API) * * @param [in] handle Handle for accessing resources @@ -524,6 +551,7 @@ cugraph_error_code_t cugraph_test_sample_result_create( cugraph_error_t** error); /** + * @ingroup samplingC * @brief Create a sampling result (testing API) * * @param [in] handle Handle for accessing resources @@ -554,6 +582,7 @@ cugraph_error_code_t cugraph_test_uniform_neighborhood_sample_result_create( cugraph_error_t** error); /** + * @ingroup samplingC * @brief Select random vertices from the graph * * @param [in] handle Handle for accessing resources @@ -576,7 +605,3 @@ cugraph_error_code_t cugraph_select_random_vertices(const cugraph_resource_handl #ifdef __cplusplus } #endif - -/** - * @} - */ diff --git a/cpp/include/cugraph_c/traversal_algorithms.h b/cpp/include/cugraph_c/traversal_algorithms.h index 8959366ac17..e25fa167e43 100644 --- a/cpp/include/cugraph_c/traversal_algorithms.h +++ b/cpp/include/cugraph_c/traversal_algorithms.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -22,7 +22,6 @@ /** @defgroup traversal Traversal Algorithms * @ingroup c_api - * @{ */ #ifdef __cplusplus @@ -40,6 +39,7 @@ typedef struct { } cugraph_paths_result_t; /** + * @ingroup traversal * @brief Get the vertex ids from the paths result * * @param [in] result The result from bfs or sssp @@ -49,6 +49,7 @@ cugraph_type_erased_device_array_view_t* cugraph_paths_result_get_vertices( cugraph_paths_result_t* result); /** + * @ingroup traversal * @brief Get the distances from the paths result * * @param [in] result The result from bfs or sssp @@ -58,6 +59,7 @@ cugraph_type_erased_device_array_view_t* cugraph_paths_result_get_distances( cugraph_paths_result_t* result); /** + * @ingroup traversal * @brief Get the predecessors from the paths result * * @param [in] result The result from bfs or sssp @@ -69,6 +71,7 @@ cugraph_type_erased_device_array_view_t* cugraph_paths_result_get_predecessors( cugraph_paths_result_t* result); /** + * @ingroup traversal * @brief Free paths result * * @param [in] result The result from bfs or sssp @@ -188,6 +191,7 @@ cugraph_error_code_t cugraph_extract_paths( size_t cugraph_extract_paths_result_get_max_path_length(cugraph_extract_paths_result_t* result); /** + * @ingroup traversal * @brief Get the matrix (row major order) of paths * * @param [in] result The result from extract_paths @@ -197,6 +201,7 @@ cugraph_type_erased_device_array_view_t* cugraph_extract_paths_result_get_paths( cugraph_extract_paths_result_t* result); /** + * @ingroup traversal * @brief Free extract_paths result * * @param [in] result The result from extract_paths @@ -206,7 +211,3 @@ void cugraph_extract_paths_result_free(cugraph_extract_paths_result_t* result); #ifdef __cplusplus } #endif - -/** - * @} - */ diff --git a/docs/cugraph/source/api_docs/cugraph_c/community.rst b/docs/cugraph/source/api_docs/cugraph_c/community.rst index 0bbfe365c4d..d55325720c4 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/community.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/community.rst @@ -1,12 +1,6 @@ Community ========= -.. role:: py(code) - :language: c - :class: highlight - -``#include `` - Triangle Counting ----------------- .. doxygenfunction:: cugraph_triangle_count @@ -45,8 +39,8 @@ Spectral Clustering - Modularity Maximization .. doxygenfunction:: cugraph_analyze_clustering_modularity :project: libcugraph -Spectral Clusteriong - Edge Cut -------------------------------- +Spectral Clustering - Edge Cut +------------------------------ .. doxygenfunction:: cugraph_analyze_clustering_edge_cut :project: libcugraph diff --git a/docs/cugraph/source/api_docs/cugraph_c/labeling.rst b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst index af105ee8fc9..4ca598c0a06 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/labeling.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/labeling.rst @@ -12,8 +12,8 @@ Strongly Connected Components .. doxygenfunction:: cugraph_strongly_connected_components :project: libcugraph -Support -------- +Labeling Support Functions +-------------------------- .. doxygengroup:: labeling :project: libcugraph :members: diff --git a/docs/cugraph/source/api_docs/cugraph_c/sampling.rst b/docs/cugraph/source/api_docs/cugraph_c/sampling.rst index 21b837daf93..3d5af713c33 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/sampling.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/sampling.rst @@ -7,7 +7,7 @@ Uniform Random Walks :project: libcugraph Biased Random Walks --------------------- +------------------- .. doxygenfunction:: cugraph_biased_random_walks :project: libcugraph @@ -21,16 +21,13 @@ Node2Vec .. doxygenfunction:: cugraph_node2vec :project: libcugraph -Uniform Neighborhood Sampling ------------------------------ -.. doxygenfunction:: cugraph_uniform_neighbor_sample_with_edge_properties - :project: libcugraph - +Uniform Neighbor Sampling +------------------------- .. doxygenfunction:: cugraph_uniform_neighbor_sample :project: libcugraph -Support -------- +Sampling Support Functions +-------------------------- .. doxygengroup:: samplingC :project: libcugraph :members: diff --git a/docs/cugraph/source/api_docs/cugraph_c/similarity.rst b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst index fba07ad206c..200ba695781 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/similarity.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/similarity.rst @@ -17,8 +17,8 @@ Overlap .. doxygenfunction:: cugraph_overlap_coefficients :project: libcugraph -Support -------- +Similarty Support Functions +--------------------------- .. doxygengroup:: similarity :project: libcugraph :members: diff --git a/docs/cugraph/source/api_docs/cugraph_c/traversal.rst b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst index c90760e9e79..1578951e05f 100644 --- a/docs/cugraph/source/api_docs/cugraph_c/traversal.rst +++ b/docs/cugraph/source/api_docs/cugraph_c/traversal.rst @@ -22,8 +22,8 @@ Extract Max Path Length .. doxygenfunction:: cugraph_extract_paths_result_get_max_path_length :project: libcugraph -Support -------- +Traversal Support Functions +--------------------------- .. doxygengroup:: traversal :project: libcugraph :members: From f64df6091ae76a7e7e80cefcf151f169ee16bfb2 Mon Sep 17 00:00:00 2001 From: Rick Ratzel <3039903+rlratzel@users.noreply.github.com> Date: Fri, 2 Feb 2024 00:00:18 -0600 Subject: [PATCH 8/9] Updates nx-cugraph README.md with latest algos (#4135) closes #4079 * Updates nx-cugraph `README.md` with latest algos * Adds script to auto-update `README.md` from `nx_cugraph` metadata directly Authors: - Rick Ratzel (https://github.com/rlratzel) - Erik Welch (https://github.com/eriknw) - Brad Rees (https://github.com/BradReesWork) Approvers: - Don Acosta (https://github.com/acostadon) - Erik Welch (https://github.com/eriknw) URL: https://github.com/rapidsai/cugraph/pull/4135 --- python/nx-cugraph/README.md | 182 ++++++++++++++---- .../nx_cugraph/scripts/print_tree.py | 4 +- 2 files changed, 143 insertions(+), 43 deletions(-) diff --git a/python/nx-cugraph/README.md b/python/nx-cugraph/README.md index f6a9aac1088..5d0554734a8 100644 --- a/python/nx-cugraph/README.md +++ b/python/nx-cugraph/README.md @@ -89,48 +89,146 @@ interface to its CUDA-based graph analytics library) and [CuPy](https://cupy.dev/) (a GPU-accelerated array library) to NetworkX's familiar and easy-to-use API. -Below is the list of algorithms (many listed using pylibcugraph names), -available today in pylibcugraph or implemented using CuPy, that are or will be -supported in nx-cugraph. - -| feature/algo | release/target version | -| ----- | ----- | -| analyze_clustering_edge_cut | ? | -| analyze_clustering_modularity | ? | -| analyze_clustering_ratio_cut | ? | -| balanced_cut_clustering | ? | -| betweenness_centrality | 23.10 | -| bfs | ? | -| connected_components | 23.12 | -| core_number | ? | -| degree_centrality | 23.12 | -| ecg | ? | -| edge_betweenness_centrality | 23.10 | -| ego_graph | ? | -| eigenvector_centrality | 23.12 | -| get_two_hop_neighbors | ? | -| hits | 23.12 | -| in_degree_centrality | 23.12 | -| induced_subgraph | ? | -| jaccard_coefficients | ? | -| katz_centrality | 23.12 | -| k_core | ? | -| k_truss_subgraph | 23.12 | -| leiden | ? | -| louvain | 23.10 | -| node2vec | ? | -| out_degree_centrality | 23.12 | -| overlap_coefficients | ? | -| pagerank | 23.12 | -| personalized_pagerank | ? | -| sorensen_coefficients | ? | -| spectral_modularity_maximization | ? | -| sssp | 23.12 | -| strongly_connected_components | ? | -| triangle_count | ? | -| uniform_neighbor_sample | ? | -| uniform_random_walks | ? | -| weakly_connected_components | ? | +Below is the list of algorithms that are currently supported in nx-cugraph. + +### Algorithms + +``` +bipartite + ├─ basic + │ └─ is_bipartite + └─ generators + └─ complete_bipartite_graph +centrality + ├─ betweenness + │ ├─ betweenness_centrality + │ └─ edge_betweenness_centrality + ├─ degree_alg + │ ├─ degree_centrality + │ ├─ in_degree_centrality + │ └─ out_degree_centrality + ├─ eigenvector + │ └─ eigenvector_centrality + └─ katz + └─ katz_centrality +cluster + ├─ average_clustering + ├─ clustering + ├─ transitivity + └─ triangles +community + └─ louvain + └─ louvain_communities +components + ├─ connected + │ ├─ connected_components + │ ├─ is_connected + │ ├─ node_connected_component + │ └─ number_connected_components + └─ weakly_connected + ├─ is_weakly_connected + ├─ number_weakly_connected_components + └─ weakly_connected_components +core + ├─ core_number + └─ k_truss +dag + ├─ ancestors + └─ descendants +isolate + ├─ is_isolate + ├─ isolates + └─ number_of_isolates +link_analysis + ├─ hits_alg + │ └─ hits + └─ pagerank_alg + └─ pagerank +operators + └─ unary + ├─ complement + └─ reverse +reciprocity + ├─ overall_reciprocity + └─ reciprocity +shortest_paths + └─ unweighted + ├─ single_source_shortest_path_length + └─ single_target_shortest_path_length +traversal + └─ breadth_first_search + ├─ bfs_edges + ├─ bfs_layers + ├─ bfs_predecessors + ├─ bfs_successors + ├─ bfs_tree + ├─ descendants_at_distance + └─ generic_bfs_edges +tree + └─ recognition + ├─ is_arborescence + ├─ is_branching + ├─ is_forest + └─ is_tree +``` + +### Generators + +``` +classic + ├─ barbell_graph + ├─ circular_ladder_graph + ├─ complete_graph + ├─ complete_multipartite_graph + ├─ cycle_graph + ├─ empty_graph + ├─ ladder_graph + ├─ lollipop_graph + ├─ null_graph + ├─ path_graph + ├─ star_graph + ├─ tadpole_graph + ├─ trivial_graph + ├─ turan_graph + └─ wheel_graph +community + └─ caveman_graph +small + ├─ bull_graph + ├─ chvatal_graph + ├─ cubical_graph + ├─ desargues_graph + ├─ diamond_graph + ├─ dodecahedral_graph + ├─ frucht_graph + ├─ heawood_graph + ├─ house_graph + ├─ house_x_graph + ├─ icosahedral_graph + ├─ krackhardt_kite_graph + ├─ moebius_kantor_graph + ├─ octahedral_graph + ├─ pappus_graph + ├─ petersen_graph + ├─ sedgewick_maze_graph + ├─ tetrahedral_graph + ├─ truncated_cube_graph + ├─ truncated_tetrahedron_graph + └─ tutte_graph +social + ├─ davis_southern_women_graph + ├─ florentine_families_graph + ├─ karate_club_graph + └─ les_miserables_graph +``` + +### Other + +``` +convert_matrix + ├─ from_pandas_edgelist + └─ from_scipy_sparse_array +``` To request nx-cugraph backend support for a NetworkX API that is not listed above, visit the [cuGraph GitHub repo](https://github.com/rapidsai/cugraph). diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_tree.py b/python/nx-cugraph/nx_cugraph/scripts/print_tree.py index bb75d735c31..485873a447d 100755 --- a/python/nx-cugraph/nx_cugraph/scripts/print_tree.py +++ b/python/nx-cugraph/nx_cugraph/scripts/print_tree.py @@ -133,7 +133,9 @@ def main( } if by == "networkx_path": G = create_tree(path_to_info, by="networkx_path", **kwargs) - text = re.sub(r"[A-Za-z_\./]+\.", "", ("\n".join(nx.generate_network_text(G)))) + text = re.sub( + r" [A-Za-z_\./]+\.", " ", ("\n".join(nx.generate_network_text(G))) + ) elif by == "plc": G = create_tree( path_to_info, by=["plc", "networkx_path"], prefix="plc-", **kwargs From 51a72fbe8a95e7dc96f773b84fdd8140c9d77e8e Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Fri, 2 Feb 2024 09:02:02 -0500 Subject: [PATCH 9/9] Constraint pytorch-dependent wheel test to only run on amd64 (#4133) Fixes https://github.com/rapidsai/graph_dl/issues/446 fixes https://github.com/rapidsai/graph_dl/issues/447 Cannot test these wheels in CI due to lack of ARM binary builds for pytorch. Authors: - Tingyu Wang (https://github.com/tingyu66) - Brad Rees (https://github.com/BradReesWork) Approvers: - Jake Awe (https://github.com/AyodeAwe) URL: https://github.com/rapidsai/cugraph/pull/4133 --- .github/workflows/test.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml index 71051bcc529..528bc1ca9a2 100644 --- a/.github/workflows/test.yaml +++ b/.github/workflows/test.yaml @@ -66,6 +66,7 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cugraph-dgl.sh + matrix_filter: map(select(.ARCH == "amd64")) wheel-tests-cugraph-pyg: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 @@ -75,6 +76,7 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cugraph-pyg.sh + matrix_filter: map(select(.ARCH == "amd64" and .CUDA_VER == "11.8.0")) wheel-tests-cugraph-equivariant: secrets: inherit uses: rapidsai/shared-workflows/.github/workflows/wheels-test.yaml@branch-24.02 @@ -84,3 +86,4 @@ jobs: date: ${{ inputs.date }} sha: ${{ inputs.sha }} script: ci/test_wheel_cugraph-equivariant.sh + matrix_filter: map(select(.ARCH == "amd64"))