diff --git a/benchmarks/nx-cugraph/pytest-based/bench_algos.py b/benchmarks/nx-cugraph/pytest-based/bench_algos.py index a8ed18a20fc..97eb32e2aaa 100644 --- a/benchmarks/nx-cugraph/pytest-based/bench_algos.py +++ b/benchmarks/nx-cugraph/pytest-based/bench_algos.py @@ -38,20 +38,27 @@ warmup_rounds = 1 dataset_param_values = [ + # name: karate, nodes: 34, edges: 156 pytest.param(datasets.karate, marks=[pytest.mark.small, pytest.mark.undirected]), + # name: netscience, nodes: 1461, edges: 5484 pytest.param(datasets.netscience, marks=[pytest.mark.small, pytest.mark.directed]), + # name: email-Eu-core, nodes: 1005, edges: 25571 pytest.param( datasets.email_Eu_core, marks=[pytest.mark.small, pytest.mark.directed] ), + # name: cit-Patents, nodes: 3774768, edges: 16518948 pytest.param( datasets.cit_patents, marks=[pytest.mark.medium, pytest.mark.directed] ), + # name: hollywood, nodes: 1139905, edges: 57515616 pytest.param( datasets.hollywood, marks=[pytest.mark.medium, pytest.mark.undirected] ), + # name: soc-LiveJournal1, nodes: 4847571, edges: 68993773 pytest.param( datasets.soc_livejournal, marks=[pytest.mark.medium, pytest.mark.directed] ), + # name: europe_osm, nodes: 50912018, edges: 54054660 pytest.param( datasets.europe_osm, marks=[pytest.mark.large, pytest.mark.undirected] ), @@ -226,12 +233,21 @@ def get_graph_obj_for_benchmark(graph_obj, backend_wrapper): """ G = graph_obj if backend_wrapper.backend_name == "cugraph-preconverted": - G = nxcg.from_networkx(G) + G = nxcg.from_networkx(G, preserve_all_attrs=True) return G +def get_highest_degree_node(graph_obj): + degrees = graph_obj.degree() # list of tuples of (node, degree) + return max(degrees, key=lambda t: t[1])[0] + + ################################################################################ # Benchmarks +def bench_from_networkx(benchmark, graph_obj): + benchmark(nxcg.from_networkx, graph_obj) + + # normalized_param_values = [True, False] # k_param_values = [10, 100] normalized_param_values = [True] @@ -284,7 +300,7 @@ def bench_edge_betweenness_centrality( def bench_louvain_communities(benchmark, graph_obj, backend_wrapper): G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) - # The cugraph backend for louvain_communities only supports undirected graphs + # DiGraphs are not supported if G.is_directed(): G = G.to_undirected() result = benchmark.pedantic( @@ -416,10 +432,8 @@ def bench_pagerank(benchmark, graph_obj, backend_wrapper): def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapper): - # Use the node with the highest degree - degrees = graph_obj.degree() # list of tuples of (node, degree) - node = max(degrees, key=lambda t: t[1])[0] G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) result = benchmark.pedantic( target=backend_wrapper(nx.single_source_shortest_path_length), @@ -435,11 +449,8 @@ def bench_single_source_shortest_path_length(benchmark, graph_obj, backend_wrapp def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapper): - # Use the node with the highest degree - degrees = graph_obj.degree() # list of tuples of (node, degree) - node = max(degrees, key=lambda t: t[1])[0] G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) - + node = get_highest_degree_node(graph_obj) result = benchmark.pedantic( target=backend_wrapper( nx.single_target_shortest_path_length, exhaust_returned_iterator=True @@ -456,3 +467,340 @@ def bench_single_target_shortest_path_length(benchmark, graph_obj, backend_wrapp # needed for this algo in NX 3.3+ which returns a dict instead of an # iterator. Forcing to a list does not change the benchmark timing. assert type(result) is list + + +def bench_ancestors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.ancestors), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_average_clustering(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.average_clustering), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_generic_bfs_edges(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.generic_bfs_edges, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_edges(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_edges, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_layers(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_layers, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + sources=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_predecessors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_predecessors, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_successors(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_successors, exhaust_returned_iterator=True), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_bfs_tree(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.bfs_tree), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + # Check that this at least appears to be some kind of NX-like Graph + assert hasattr(result, "has_node") + + +def bench_clustering(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.clustering), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_core_number(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.core_number), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_descendants(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.descendants), + args=(G,), + kwargs=dict( + source=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_descendants_at_distance(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.descendants_at_distance), + args=(G,), + kwargs=dict( + source=node, + distance=1, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is set + + +def bench_is_bipartite(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_bipartite), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_is_strongly_connected(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_strongly_connected), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_is_weakly_connected(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.is_weakly_connected), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is bool + + +def bench_number_strongly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.number_strongly_connected_components), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is int + + +def bench_number_weakly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.number_weakly_connected_components), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is int + + +def bench_overall_reciprocity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper(nx.overall_reciprocity), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_reciprocity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + node = get_highest_degree_node(graph_obj) + result = benchmark.pedantic( + target=backend_wrapper(nx.reciprocity), + args=(G,), + kwargs=dict( + nodes=node, + ), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_strongly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper( + nx.strongly_connected_components, exhaust_returned_iterator=True + ), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list + + +def bench_transitivity(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported by nx-cugraph + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.transitivity), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is float + + +def bench_triangles(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + # DiGraphs are not supported + if G.is_directed(): + G = G.to_undirected() + result = benchmark.pedantic( + target=backend_wrapper(nx.triangles), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is dict + + +def bench_weakly_connected_components(benchmark, graph_obj, backend_wrapper): + G = get_graph_obj_for_benchmark(graph_obj, backend_wrapper) + result = benchmark.pedantic( + target=backend_wrapper( + nx.weakly_connected_components, exhaust_returned_iterator=True + ), + args=(G,), + rounds=rounds, + iterations=iterations, + warmup_rounds=warmup_rounds, + ) + assert type(result) is list diff --git a/benchmarks/nx-cugraph/pytest-based/run-2402.sh b/benchmarks/nx-cugraph/pytest-based/run-2402.sh new file mode 100755 index 00000000000..44ed0bda43a --- /dev/null +++ b/benchmarks/nx-cugraph/pytest-based/run-2402.sh @@ -0,0 +1,46 @@ +#!/bin/bash +# +# Copyright (c) 2024, NVIDIA CORPORATION. +# +# Runs benchmarks for the 24.02 algos. +# Pass either a or b or both. This is useful for separating batches of runs on different GPUs: +# CUDA_VISIBLE_DEVICES=1 run-2402.sh b + +mkdir -p logs + +# benches="$benches ..." pattern is easy to comment out individual runs +benches= + +while [[ $1 != "" ]]; do + if [[ $1 == "a" ]]; then + benches="$benches bench_ancestors" + benches="$benches bench_average_clustering" + benches="$benches bench_generic_bfs_edges" + benches="$benches bench_bfs_edges" + benches="$benches bench_bfs_layers" + benches="$benches bench_bfs_predecessors" + benches="$benches bench_bfs_successors" + benches="$benches bench_bfs_tree" + benches="$benches bench_clustering" + benches="$benches bench_core_number" + benches="$benches bench_descendants" + elif [[ $1 == "b" ]]; then + benches="$benches bench_descendants_at_distance" + benches="$benches bench_is_bipartite" + benches="$benches bench_is_strongly_connected" + benches="$benches bench_is_weakly_connected" + benches="$benches bench_number_strongly_connected_components" + benches="$benches bench_number_weakly_connected_components" + benches="$benches bench_overall_reciprocity" + benches="$benches bench_reciprocity" + benches="$benches bench_strongly_connected_components" + benches="$benches bench_transitivity" + benches="$benches bench_triangles" + benches="$benches bench_weakly_connected_components" + fi + shift +done + +for bench in $benches; do + pytest -sv -k "soc-livejournal1" "bench_algos.py::$bench" 2>&1 | tee "logs/${bench}.log" +done diff --git a/ci/test_python.sh b/ci/test_python.sh index 5892c37e35b..b070143f076 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -63,7 +63,16 @@ pytest \ tests popd -# FIXME: TEMPORARILY disable single-GPU "MG" testing +# Test runs that include tests that use dask require +# --import-mode=append. Those tests start a LocalCUDACluster that inherits +# changes from pytest's modifications to PYTHONPATH (which defaults to +# prepending source tree paths to PYTHONPATH). This causes the +# LocalCUDACluster subprocess to import cugraph from the source tree instead of +# the install location, and in most cases, the source tree does not have +# extensions built in-place and will result in ImportErrors. +# +# FIXME: TEMPORARILY disable MG PropertyGraph tests (experimental) tests and +# bulk sampler IO tests (hangs in CI) rapids-logger "pytest cugraph" pushd python/cugraph/cugraph DASK_WORKER_DEVICES="0" \ @@ -72,6 +81,7 @@ DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \ pytest \ -v \ + --import-mode=append \ --benchmark-disable \ --cache-clear \ --junitxml="${RAPIDS_TESTS_DIR}/junit-cugraph.xml" \ @@ -79,7 +89,7 @@ pytest \ --cov=cugraph \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \ --cov-report=term \ - -k "not test_property_graph_mg" \ + -k "not test_property_graph_mg and not test_bulk_sampler_io" \ tests popd @@ -110,12 +120,33 @@ popd rapids-logger "pytest networkx using nx-cugraph backend" pushd python/nx-cugraph +# Use editable install to make coverage work +pip install -e . --no-deps ./run_nx_tests.sh # run_nx_tests.sh outputs coverage data, so check that total coverage is >0.0% # in case nx-cugraph failed to load but fallback mode allowed the run to pass. _coverage=$(coverage report|grep "^TOTAL") echo "nx-cugraph coverage from networkx tests: $_coverage" echo $_coverage | awk '{ if ($NF == "0.0%") exit 1 }' +# Ensure all algorithms were called by comparing covered lines to function lines. +# Run our tests again (they're fast enough) to add their coverage, then create coverage.json +pytest \ + --pyargs nx_cugraph \ + --config-file=./pyproject.toml \ + --cov-config=./pyproject.toml \ + --cov=nx_cugraph \ + --cov-append \ + --cov-report= +coverage report \ + --include="*/nx_cugraph/algorithms/*" \ + --omit=__init__.py \ + --show-missing \ + --rcfile=./pyproject.toml +coverage json --rcfile=./pyproject.toml +python -m nx_cugraph.tests.ensure_algos_covered +# Exercise (and show results of) scripts that show implemented networkx algorithms +python -m nx_cugraph.scripts.print_tree --dispatch-name --plc --incomplete --different +python -m nx_cugraph.scripts.print_table popd rapids-logger "pytest cugraph-service (single GPU)" diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 428efd4ed21..8c5832e412f 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -1,5 +1,5 @@ #!/bin/bash -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. set -eoxu pipefail @@ -21,10 +21,21 @@ arch=$(uname -m) if [[ "${arch}" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then python ./ci/wheel_smoke_test_${package_name}.py else - # FIXME: TEMPORARILY disable single-GPU "MG" testing + # Test runs that include tests that use dask require + # --import-mode=append. See test_python.sh for details. + # FIXME: Adding PY_IGNORE_IMPORTMISMATCH=1 to workaround conftest.py import + # mismatch error seen by nx-cugraph after using pytest 8 and + # --import-mode=append. RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets \ + PY_IGNORE_IMPORTMISMATCH=1 \ + DASK_WORKER_DEVICES="0" \ DASK_DISTRIBUTED__SCHEDULER__WORKER_TTL="1000s" \ DASK_DISTRIBUTED__COMM__TIMEOUTS__CONNECT="1000s" \ DASK_CUDA_WAIT_WORKERS_MIN_TIMEOUT="1000s" \ - python -m pytest ./python/${package_name}/${python_package_name}/tests + python -m pytest \ + -v \ + --import-mode=append \ + --benchmark-disable \ + -k "not test_property_graph_mg and not test_bulk_sampler_io" \ + ./python/${package_name}/${python_package_name}/tests fi diff --git a/cpp/cmake/thirdparty/get_raft.cmake b/cpp/cmake/thirdparty/get_raft.cmake index 015b5b07920..8f56372c81a 100644 --- a/cpp/cmake/thirdparty/get_raft.cmake +++ b/cpp/cmake/thirdparty/get_raft.cmake @@ -1,5 +1,5 @@ #============================================================================= -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -52,6 +52,7 @@ function(find_and_configure_raft) "RAFT_COMPILE_LIBRARY ${PKG_COMPILE_RAFT_LIB}" "BUILD_TESTS OFF" "BUILD_BENCH OFF" + "BUILD_CAGRA_HNSWLIB OFF" ) if(raft_ADDED) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py index 8fed467bf6d..cdf1e937e67 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleDistributedGraph.py @@ -39,6 +39,7 @@ ) from cugraph.dask.common.mg_utils import run_gc_on_dask_cluster import cugraph.dask.comms.comms as Comms +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates class simpleDistributedGraphImpl: @@ -95,6 +96,7 @@ def _make_plc_graph( weight_type, edge_id_type, edge_type_id, + drop_multi_edges, ): weights = None edge_ids = None @@ -149,6 +151,7 @@ def _make_plc_graph( num_arrays=num_arrays, store_transposed=store_transposed, do_expensive_check=False, + drop_multi_edges=drop_multi_edges, ) del edata_x gc.collect() @@ -267,7 +270,7 @@ def __from_edgelist( input_ddf, source, destination, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) value_col = None @@ -277,7 +280,7 @@ def __from_edgelist( source, destination, value_col_names, - multi=self.properties.multi_edge, + multi=True, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -364,6 +367,7 @@ def __from_edgelist( self.weight_type, self.edge_id_type, self.edge_type_id_type, + not self.properties.multi_edge, ) for w, edata in persisted_keys_d.items() } @@ -455,6 +459,15 @@ def view_edge_list(self): else: is_multi_column = True + if not self.properties.multi_edge: + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + _client = default_client() + workers = _client.scheduler_info()["workers"] + edgelist_df = _memory_efficient_drop_duplicates( + edgelist_df, [srcCol, dstCol], len(workers) + ) + edgelist_df[srcCol], edgelist_df[dstCol] = edgelist_df[ [srcCol, dstCol] ].min(axis=1), edgelist_df[[srcCol, dstCol]].max(axis=1) diff --git a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py index 22d82eb1796..121a4c6245a 100644 --- a/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py +++ b/python/cugraph/cugraph/structure/graph_implementation/simpleGraph.py @@ -1,4 +1,4 @@ -# Copyright (c) 2021-2023, NVIDIA CORPORATION. +# Copyright (c) 2021-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -264,7 +264,7 @@ def __from_edgelist( source, destination, edge_attr, - multi=self.properties.multi_edge, + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -279,7 +279,7 @@ def __from_edgelist( elist, source, destination, - multi=self.properties.multi_edge, + multi=self.properties.multi_edge, # Deprecated parameter symmetrize=not self.properties.directed, ) @@ -298,7 +298,10 @@ def __from_edgelist( self._replicate_edgelist() self._make_plc_graph( - value_col=value_col, store_transposed=store_transposed, renumber=renumber + value_col=value_col, + store_transposed=store_transposed, + renumber=renumber, + drop_multi_edges=not self.properties.multi_edge, ) def to_pandas_edgelist( @@ -477,6 +480,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + elif not use_initial_input_df and self.properties.renumbered: # Do not unrenumber the vertices if the initial input df was used if not self.properties.directed: @@ -484,6 +488,7 @@ def view_edge_list(self): edgelist_df[simpleGraphImpl.srcCol] <= edgelist_df[simpleGraphImpl.dstCol] ] + edgelist_df = self.renumber_map.unrenumber( edgelist_df, simpleGraphImpl.srcCol ) @@ -1084,6 +1089,7 @@ def _make_plc_graph( value_col: Dict[str, cudf.DataFrame] = None, store_transposed: bool = False, renumber: bool = True, + drop_multi_edges: bool = False, ): """ Parameters @@ -1100,6 +1106,8 @@ def _make_plc_graph( Whether to renumber the vertices of the graph. Required if inputted vertex ids are not of int32 or int64 type. + drop_multi_edges: bool (default=False) + Whether to drop multi edges """ if value_col is None: @@ -1163,6 +1171,7 @@ def _make_plc_graph( renumber=renumber, do_expensive_check=True, input_array_format=input_array_format, + drop_multi_edges=drop_multi_edges, ) def to_directed(self, DiG, store_transposed=False): diff --git a/python/cugraph/cugraph/structure/symmetrize.py b/python/cugraph/cugraph/structure/symmetrize.py index b324ff65834..30c6394ade9 100644 --- a/python/cugraph/cugraph/structure/symmetrize.py +++ b/python/cugraph/cugraph/structure/symmetrize.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -15,6 +15,7 @@ import cudf import dask_cudf from dask.distributed import default_client +import warnings def symmetrize_df( @@ -54,6 +55,11 @@ def symmetrize_df( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -84,6 +90,12 @@ def symmetrize_df( if multi: return result else: + warnings.warn( + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = result.groupby(by=[*vertex_col_name], as_index=False).min() return result @@ -128,6 +140,11 @@ def symmetrize_ddf( Name of the column in the data frame containing the weight ids multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -165,8 +182,15 @@ def symmetrize_ddf( else: result = ddf if multi: + result = result.reset_index(drop=True).repartition(npartitions=len(workers) * 2) return result else: + warnings.warn( + "Multi is deprecated and the removal of multi edges will no longer be " + "supported from 'symmetrize'. Multi edges will be removed upon creation " + "of graph instance.", + FutureWarning, + ) vertex_col_name = src_name + dst_name result = _memory_efficient_drop_duplicates( result, vertex_col_name, len(workers) @@ -181,6 +205,7 @@ def symmetrize( value_col_name=None, multi=False, symmetrize=True, + do_expensive_check=False, ): """ Take a dataframe of source destination pairs along with associated @@ -208,6 +233,11 @@ def symmetrize( weights column name. multi : bool, optional (default=False) + [Deprecated, Multi will be removed in future version, and the removal + of multi edges will no longer be supported from 'symmetrize'. + Multi edges will be removed upon creation of graph instance directly + based on if the graph is `curgaph.MultiGraph` or `cugraph.Graph`.] + Set to True if graph is a Multi(Di)Graph. This allows multiple edges instead of dropping them. @@ -234,8 +264,9 @@ def symmetrize( if "edge_id" in input_df.columns and symmetrize: raise ValueError("Edge IDs are not supported on undirected graphs") - csg.null_check(input_df[source_col_name]) - csg.null_check(input_df[dest_col_name]) + if do_expensive_check: # FIXME: Optimize this check as it is currently expensive + csg.null_check(input_df[source_col_name]) + csg.null_check(input_df[dest_col_name]) if isinstance(input_df, dask_cudf.DataFrame): output_df = symmetrize_ddf( diff --git a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py index 460a25cbd14..371410b8bd5 100644 --- a/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_uniform_neighbor_sample_mg.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022-2023, NVIDIA CORPORATION. +# Copyright (c) 2022-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -26,6 +26,7 @@ from cugraph.testing import UNDIRECTED_DATASETS from cugraph.dask import uniform_neighbor_sample from cugraph.dask.common.mg_utils import is_single_gpu +from cugraph.structure.symmetrize import _memory_efficient_drop_duplicates from cugraph.datasets import email_Eu_core, small_tree from pylibcugraph.testing.utils import gen_fixture_params_product @@ -135,6 +136,14 @@ def test_mg_uniform_neighbor_sample_simple(dask_client, input_combo): dg = input_combo["MGGraph"] input_df = dg.input_df + # Drop parallel edges for non MultiGraph + # FIXME: Drop multi edges with the CAPI instead. + vertex_col_name = ["src", "dst"] + workers = dask_client.scheduler_info()["workers"] + input_df = _memory_efficient_drop_duplicates( + input_df, vertex_col_name, len(workers) + ) + result_nbr = uniform_neighbor_sample( dg, input_combo["start_list"], diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index 5a4773168b6..8e87fc23592 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -26,7 +26,7 @@ repos: - id: mixed-line-ending - id: trailing-whitespace - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.15 + rev: v0.16 hooks: - id: validate-pyproject name: Validate pyproject.toml diff --git a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py index ef1c011363a..f5d5e2a995d 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/traversal/breadth_first_search.py @@ -68,7 +68,7 @@ def generic_bfs_edges(G, source, neighbors=None, depth_limit=None, sort_neighbor raise NotImplementedError( "sort_neighbors argument in generic_bfs_edges is not currently supported" ) - return bfs_edges(source, depth_limit=depth_limit) + return bfs_edges(G, source, depth_limit=depth_limit) @generic_bfs_edges._can_run diff --git a/python/nx-cugraph/nx_cugraph/classes/digraph.py b/python/nx-cugraph/nx_cugraph/classes/digraph.py index 169815eb067..e5cfb8f6815 100644 --- a/python/nx-cugraph/nx_cugraph/classes/digraph.py +++ b/python/nx-cugraph/nx_cugraph/classes/digraph.py @@ -86,9 +86,9 @@ def to_undirected(self, reciprocal=False, as_view=False): key: val[indices].copy() for key, val in self.edge_masks.items() } else: - src_indices, dst_indices = cp.divmod( - src_dst_indices_new, N, dtype=index_dtype - ) + src_indices, dst_indices = cp.divmod(src_dst_indices_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) else: src_dst_indices_old_T = self.src_indices + N * self.dst_indices.astype( np.int64 @@ -116,9 +116,9 @@ def to_undirected(self, reciprocal=False, as_view=False): src_dst_indices_new = cp.union1d( src_dst_indices_old, src_dst_indices_old_T ) - src_indices, dst_indices = cp.divmod( - src_dst_indices_new, N, dtype=index_dtype - ) + src_indices, dst_indices = cp.divmod(src_dst_indices_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) if self.edge_values: recip_indices = cp.lexsort(cp.vstack((src_indices, dst_indices))) diff --git a/python/nx-cugraph/nx_cugraph/classes/graph.py b/python/nx-cugraph/nx_cugraph/classes/graph.py index f697668750d..0951ee6b135 100644 --- a/python/nx-cugraph/nx_cugraph/classes/graph.py +++ b/python/nx-cugraph/nx_cugraph/classes/graph.py @@ -668,7 +668,9 @@ def _get_plc_graph( raise ValueError( f'symmetrize must be "union" or "intersection"; got "{symmetrize}"' ) - src_indices, dst_indices = cp.divmod(src_dst_new, N, dtype=index_dtype) + src_indices, dst_indices = cp.divmod(src_dst_new, N) + src_indices = src_indices.astype(index_dtype) + dst_indices = dst_indices.astype(index_dtype) return plc.SGGraph( resource_handle=plc.ResourceHandle(), diff --git a/python/nx-cugraph/nx_cugraph/interface.py b/python/nx-cugraph/nx_cugraph/interface.py index 04591c0e9e3..a57074aabb0 100644 --- a/python/nx-cugraph/nx_cugraph/interface.py +++ b/python/nx-cugraph/nx_cugraph/interface.py @@ -80,18 +80,26 @@ def key(testpath): nxver = parse(nx.__version__) if nxver.major == 3 and nxver.minor <= 2: - # Networkx versions prior to 3.2.1 have tests written to expect - # sp.sparse.linalg.ArpackNoConvergence exceptions raised on no - # convergence in HITS. Newer versions since the merge of - # https://github.com/networkx/networkx/pull/7084 expect - # nx.PowerIterationFailedConvergence, which is what nx_cugraph.hits - # raises, so we mark them as xfail for previous versions of NX. xfail.update( { + # NetworkX versions prior to 3.2.1 have tests written to + # expect sp.sparse.linalg.ArpackNoConvergence exceptions + # raised on no convergence in HITS. Newer versions since + # the merge of + # https://github.com/networkx/networkx/pull/7084 expect + # nx.PowerIterationFailedConvergence, which is what + # nx_cugraph.hits raises, so we mark them as xfail for + # previous versions of NX. key( "test_hits.py:TestHITS.test_hits_not_convergent" ): "nx_cugraph.hits raises updated exceptions not caught in " "these tests", + # NetworkX versions 3.2 and older contain tests that fail + # with pytest>=8. Assume pytest>=8 and mark xfail. + key( + "test_strongly_connected.py:" + "TestStronglyConnected.test_connected_raise" + ): "test is incompatible with pytest>=8", } ) diff --git a/python/nx-cugraph/nx_cugraph/scripts/print_table.py b/python/nx-cugraph/nx_cugraph/scripts/print_table.py index be9cfa31c48..117a1444f48 100755 --- a/python/nx-cugraph/nx_cugraph/scripts/print_table.py +++ b/python/nx-cugraph/nx_cugraph/scripts/print_table.py @@ -59,7 +59,7 @@ def main(path_to_info=None, *, file=sys.stdout): if path_to_info is None: path_to_info = get_path_to_info(version_added_sep=".") lines = ["networkx_path,dispatch_name,version_added,plc,is_incomplete,is_different"] - lines.extend(",".join([str(i) for i in info]) for info in path_to_info.values()) + lines.extend(",".join(map(str, info)) for info in path_to_info.values()) text = "\n".join(lines) print(text, file=file) return text diff --git a/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py b/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py new file mode 100644 index 00000000000..7047f0eeafd --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/ensure_algos_covered.py @@ -0,0 +1,84 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ensure that all functions wrapped by @networkx_algorithm were called. + +This file is run by CI and should not normally be run manually. +""" +import inspect +import json +from pathlib import Path + +from nx_cugraph.interface import BackendInterface +from nx_cugraph.utils import networkx_algorithm + +with Path("coverage.json").open() as f: + coverage = json.load(f) + +filenames_to_executed_lines = { + "nx_cugraph/" + + filename.rsplit("nx_cugraph/", 1)[-1]: set(coverage_info["executed_lines"]) + for filename, coverage_info in coverage["files"].items() +} + + +def unwrap(func): + while hasattr(func, "__wrapped__"): + func = func.__wrapped__ + return func + + +def get_func_filename(func): + return "nx_cugraph" + inspect.getfile(unwrap(func)).rsplit("nx_cugraph", 1)[-1] + + +def get_func_linenos(func): + lines, lineno = inspect.getsourcelines(unwrap(func)) + for i, line in enumerate(lines, lineno): + if ":\n" in line: + return set(range(i + 1, lineno + len(lines))) + raise RuntimeError(f"Could not determine line numbers for function {func}") + + +def has_any_coverage(func): + return bool( + filenames_to_executed_lines[get_func_filename(func)] & get_func_linenos(func) + ) + + +def main(): + no_coverage = set() + for attr, func in vars(BackendInterface).items(): + if not isinstance(func, networkx_algorithm): + continue + if not has_any_coverage(func): + no_coverage.add(attr) + if no_coverage: + msg = "The following algorithms have no coverage: " + ", ".join( + sorted(no_coverage) + ) + # Create a border of "!" + msg = ( + "\n\n" + + "!" * (len(msg) + 6) + + "\n!! " + + msg + + " !!\n" + + "!" * (len(msg) + 6) + + "\n" + ) + raise AssertionError(msg) + print("\nSuccess: coverage determined all algorithms were called!\n") + + +if __name__ == "__main__": + main() diff --git a/python/nx-cugraph/nx_cugraph/tests/test_bfs.py b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py new file mode 100644 index 00000000000..c2b22e98949 --- /dev/null +++ b/python/nx-cugraph/nx_cugraph/tests/test_bfs.py @@ -0,0 +1,33 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import networkx as nx +import pytest +from packaging.version import parse + +nxver = parse(nx.__version__) + +if nxver.major == 3 and nxver.minor < 2: + pytest.skip("Need NetworkX >=3.2 to test clustering", allow_module_level=True) + + +def test_generic_bfs_edges(): + # generic_bfs_edges currently isn't exercised by networkx tests + Gnx = nx.karate_club_graph() + Gcg = nx.karate_club_graph(backend="cugraph") + for depth_limit in (0, 1, 2): + for source in Gnx: + # Some ordering is arbitrary, so I think there's a chance + # this test may fail if networkx or nx-cugraph changes. + nx_result = nx.generic_bfs_edges(Gnx, source, depth_limit=depth_limit) + cg_result = nx.generic_bfs_edges(Gcg, source, depth_limit=depth_limit) + assert sorted(nx_result) == sorted(cg_result), (source, depth_limit) diff --git a/python/nx-cugraph/run_nx_tests.sh b/python/nx-cugraph/run_nx_tests.sh index 07c97cdf947..da7a2014cef 100755 --- a/python/nx-cugraph/run_nx_tests.sh +++ b/python/nx-cugraph/run_nx_tests.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # -# Copyright (c) 2023, NVIDIA CORPORATION. +# Copyright (c) 2023-2024, NVIDIA CORPORATION. # # NETWORKX_GRAPH_CONVERT=cugraph # Used by networkx versions 3.0 and 3.1 @@ -30,7 +30,13 @@ NETWORKX_TEST_BACKEND=cugraph \ NETWORKX_FALLBACK_TO_NX=True \ pytest \ --pyargs networkx \ - --cov=nx_cugraph.algorithms \ - --cov-report term-missing \ - --no-cov-on-fail \ + --config-file=$(dirname $0)/pyproject.toml \ + --cov-config=$(dirname $0)/pyproject.toml \ + --cov=nx_cugraph \ + --cov-report= \ "$@" +coverage report \ + --include="*/nx_cugraph/algorithms/*" \ + --omit=__init__.py \ + --show-missing \ + --rcfile=$(dirname $0)/pyproject.toml diff --git a/python/pylibcugraph/pylibcugraph/graphs.pyx b/python/pylibcugraph/pylibcugraph/graphs.pyx index 76ad7690840..def47390ce5 100644 --- a/python/pylibcugraph/pylibcugraph/graphs.pyx +++ b/python/pylibcugraph/pylibcugraph/graphs.pyx @@ -463,9 +463,9 @@ cdef class MGGraph(_GPUGraph): edge_type_view_ptr_ptr, store_transposed, num_arrays, - do_expensive_check, drop_self_loops, drop_multi_edges, + do_expensive_check, &(self.c_graph_ptr), &error_ptr)