From cafded113c9545e5e7211cc965f53c00939307c0 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Fri, 25 Aug 2023 14:49:23 +0100 Subject: [PATCH 1/4] Update the docstrings of the similarity algorithms (#3817) A [PR](https://github.com/rapidsai/cugraph/pull/3002) updating the vertex pair column names was merged few releases ago however few docstrings weren't. This PR updates the docstrings for Jaccard and Sorensen. Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/3817 --- python/cugraph/cugraph/link_prediction/jaccard.py | 14 +++++++------- python/cugraph/cugraph/link_prediction/sorensen.py | 14 +++++++------- 2 files changed, 14 insertions(+), 14 deletions(-) diff --git a/python/cugraph/cugraph/link_prediction/jaccard.py b/python/cugraph/cugraph/link_prediction/jaccard.py index f1b488c8cca..334d57f9d80 100644 --- a/python/cugraph/cugraph/link_prediction/jaccard.py +++ b/python/cugraph/cugraph/link_prediction/jaccard.py @@ -96,13 +96,13 @@ def jaccard(input_graph, vertex_pair=None, do_expensive_check=True): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified). - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + the second vertex ID of each pair (will be identical to second if specified). df['jaccard_coeff'] : cudf.Series - The computed jaccard coefficient between the first and the second + The computed Jaccard coefficient between the first and the second vertex ID. Examples @@ -182,8 +182,8 @@ def jaccard_coefficient(G, ebunch=None, do_expensive_check=True): the second vertex ID of each pair (will be identical to second if specified). df['jaccard_coeff'] : cudf.Series - The computed Jaccard coefficient between the source and destination - vertices. + The computed Jaccard coefficient between the first and the second + vertex ID. Examples -------- diff --git a/python/cugraph/cugraph/link_prediction/sorensen.py b/python/cugraph/cugraph/link_prediction/sorensen.py index 1d43adb51cd..ef2bd8d674d 100644 --- a/python/cugraph/cugraph/link_prediction/sorensen.py +++ b/python/cugraph/cugraph/link_prediction/sorensen.py @@ -74,8 +74,8 @@ def sorensen(input_graph, vertex_pair=None, do_expensive_check=True): specified) df['sorensen_coeff'] : cudf.Series - The computed Sorensen coefficient between the source and - destination vertices + The computed Sorensen coefficient between the first and the second + vertex ID. Examples -------- @@ -149,13 +149,13 @@ def sorensen_coefficient(G, ebunch=None, do_expensive_check=True): relative to the adjacency list, or that given by the specified vertex pairs. - df['source'] : cudf.Series - The source vertex ID (will be identical to first if specified). - df['destination'] : cudf.Series - The destination vertex ID (will be identical to second if + df['first'] : cudf.Series + The first vertex ID of each pair (will be identical to first if specified). + df['second'] : cudf.Series + the second vertex ID of each pair (will be identical to second if specified). df['sorensen_coeff'] : cudf.Series - The computed sorensen coefficient between the first and the second + The computed Sorensen coefficient between the first and the second vertex ID. Examples From a7d988cb20e644e813267c7f0d82c9fa19d48999 Mon Sep 17 00:00:00 2001 From: AJ Schmidt Date: Tue, 29 Aug 2023 10:48:27 -0400 Subject: [PATCH 2/4] Use `copy-pr-bot` (#3827) This PR replaces the `copy_prs` functionality from the `ops-bot` with the new dedicated `copy-pr-bot` GitHub application. Thorough documentation for the new `copy-pr-bot` application can be viewed below. - https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ **Important**: `copy-pr-bot` enforces signed commits. If an organization member opens a PR that contains unsigned commits, it will be deemed untrusted and therefore require an `/ok to test` comment. See the GitHub docs [here](https://docs.github.com/en/authentication/managing-commit-signature-verification/about-commit-signature-verification) for information on how to set up commit signing. Any time a PR is deemed untrusted, it will receive a comment that looks like this: https://github.com/rapidsai/ci-imgs/pull/63#issuecomment-1688973208. Every subsequent commit on an untrusted PR will require an additional `/ok to test` comment. Any existing PRs that have unsigned commits after this change is merged will require an `/ok to test` comment for each subsequent commit _or_ the PR can be rebased to include signed commits as mentioned in the docs below: https://docs.gha-runners.nvidia.com/cpr/contributors. This information is all included on the documentation page linked above. _I've skipped CI on this PR since it's not a change that is tested._ [skip ci] --- .github/copy-pr-bot.yaml | 4 ++++ .github/ops-bot.yaml | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 .github/copy-pr-bot.yaml diff --git a/.github/copy-pr-bot.yaml b/.github/copy-pr-bot.yaml new file mode 100644 index 00000000000..895ba83ee54 --- /dev/null +++ b/.github/copy-pr-bot.yaml @@ -0,0 +1,4 @@ +# Configuration file for `copy-pr-bot` GitHub App +# https://docs.gha-runners.nvidia.com/apps/copy-pr-bot/ + +enabled: true diff --git a/.github/ops-bot.yaml b/.github/ops-bot.yaml index 2d1444c595d..9a0b4155035 100644 --- a/.github/ops-bot.yaml +++ b/.github/ops-bot.yaml @@ -5,5 +5,4 @@ auto_merger: true branch_checker: true label_checker: true release_drafter: true -copy_prs: true recently_updated: true From 4656d3ebdf525eed45e821cf29d88e924299cb03 Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Wed, 30 Aug 2023 02:08:19 +0200 Subject: [PATCH 3/4] Disable mg tests (#3833) This PR is on top off the changes from #3831. Temporarily disables single-GPU "MG" tests in CI until https://github.com/rapidsai/cugraph/issues/3790 is closed. This will unblock CI for PRs unrelated to the issue in https://github.com/rapidsai/cugraph/issues/3790 at the risk of removed coverage for MG code paths. Hopefully nightly MG testing will minimize the risk. A followup PR will be submitted that re-enables the tests and must be merged prior to 23.10 burndown. Authors: - Naim (https://github.com/naimnv) - Rick Ratzel (https://github.com/rlratzel) Approvers: - Rick Ratzel (https://github.com/rlratzel) - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/3833 --- ci/test_python.sh | 6 +++++- ci/test_wheel.sh | 4 +++- .../tests/centrality/test_betweenness_centrality_mg.py | 2 +- .../tests/centrality/test_edge_betweenness_centrality_mg.py | 2 +- .../tests/centrality/test_eigenvector_centrality_mg.py | 4 ++-- .../cugraph/tests/centrality/test_katz_centrality_mg.py | 6 +++--- python/cugraph/cugraph/tests/comms/test_comms_mg.py | 2 +- .../cugraph/tests/components/test_connectivity_mg.py | 2 +- python/cugraph/cugraph/tests/core/test_k_core_mg.py | 4 ++-- python/cugraph/cugraph/tests/internals/test_renumber_mg.py | 2 +- python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py | 4 ++-- .../cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py | 4 ++-- .../cugraph/tests/link_prediction/test_jaccard_mg.py | 4 ++-- .../cugraph/tests/link_prediction/test_overlap_mg.py | 4 ++-- .../cugraph/tests/link_prediction/test_sorensen_mg.py | 4 ++-- python/cugraph/cugraph/tests/sampling/test_egonet_mg.py | 2 +- .../cugraph/cugraph/tests/sampling/test_random_walks_mg.py | 2 +- python/cugraph/cugraph/tests/traversal/test_bfs_mg.py | 6 +++--- python/cugraph/cugraph/tests/traversal/test_sssp_mg.py | 2 +- 19 files changed, 36 insertions(+), 30 deletions(-) diff --git a/ci/test_python.sh b/ci/test_python.sh index dd205b16049..e650630fa47 100755 --- a/ci/test_python.sh +++ b/ci/test_python.sh @@ -63,6 +63,10 @@ pytest \ tests popd +# FIXME: TEMPORARILY disable single-GPU "MG" testing until +# https://github.com/rapidsai/cugraph/issues/3790 is closed +# When closed, replace -k "not _mg" with +# -k "not test_property_graph_mg" \ rapids-logger "pytest cugraph" pushd python/cugraph/cugraph export DASK_WORKER_DEVICES="0" @@ -75,7 +79,7 @@ pytest \ --cov=cugraph \ --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cugraph-coverage.xml" \ --cov-report=term \ - -k "not test_property_graph_mg" \ + -k "not _mg" \ tests popd diff --git a/ci/test_wheel.sh b/ci/test_wheel.sh index 146186ae2e7..b62635d08b4 100755 --- a/ci/test_wheel.sh +++ b/ci/test_wheel.sh @@ -18,5 +18,7 @@ arch=$(uname -m) if [[ "${arch}" == "aarch64" && ${RAPIDS_BUILD_TYPE} == "pull-request" ]]; then python ./ci/wheel_smoke_test_${package_name}.py else - RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets python -m pytest ./python/${package_name}/${package_name}/tests + # FIXME: TEMPORARILY disable single-GPU "MG" testing until + # https://github.com/rapidsai/cugraph/issues/3790 is closed + RAPIDS_DATASET_ROOT_DIR=`pwd`/datasets python -m pytest -k "not _mg" ./python/${package_name}/${package_name}/tests fi diff --git a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py index 930f80c1bfa..1e20287d1e5 100644 --- a/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_betweenness_centrality_mg.py @@ -159,7 +159,7 @@ def input_expected_output(input_combo): @pytest.mark.mg -def test_dask_betweenness_centrality(dask_client, benchmark, input_expected_output): +def test_dask_mg_betweenness_centrality(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] k = input_expected_output["k"] diff --git a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py index 97e503e5428..4277f94a396 100644 --- a/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_edge_betweenness_centrality_mg.py @@ -179,7 +179,7 @@ def input_expected_output(input_combo): # is_single_gpu(), reason="skipping MG testing on Single GPU system" # ) @pytest.mark.mg -def test_dask_edge_betweenness_centrality( +def test_dask_mg_edge_betweenness_centrality( dask_client, benchmark, input_expected_output ): if input_expected_output is not None: diff --git a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py index 6828dd3cbd2..e2ce7d2c341 100644 --- a/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_eigenvector_centrality_mg.py @@ -39,7 +39,7 @@ def setup_function(): @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize("directed", IS_DIRECTED) @pytest.mark.parametrize("input_data_path", DATASETS) -def test_dask_eigenvector_centrality(dask_client, directed, input_data_path): +def test_dask_mg_eigenvector_centrality(dask_client, directed, input_data_path): input_data_path = input_data_path.as_posix() print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) @@ -86,7 +86,7 @@ def test_dask_eigenvector_centrality(dask_client, directed, input_data_path): @pytest.mark.mg -def test_dask_eigenvector_centrality_transposed_false(dask_client): +def test_dask_mg_eigenvector_centrality_transposed_false(dask_client): input_data_path = DATASETS[0] chunksize = dcg.get_chunksize(input_data_path) diff --git a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py index 1dcbcbae3cd..72b81ce50bb 100644 --- a/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py +++ b/python/cugraph/cugraph/tests/centrality/test_katz_centrality_mg.py @@ -38,7 +38,7 @@ def setup_function(): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_katz_centrality(dask_client, directed): +def test_dask_mg_katz_centrality(dask_client, directed): input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() print(f"dataset={input_data_path}") @@ -96,7 +96,7 @@ def test_dask_katz_centrality(dask_client, directed): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_katz_centrality_nstart(dask_client, directed): +def test_dask_mg_katz_centrality_nstart(dask_client, directed): input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() print(f"dataset={input_data_path}") chunksize = dcg.get_chunksize(input_data_path) @@ -141,7 +141,7 @@ def test_dask_katz_centrality_nstart(dask_client, directed): @pytest.mark.mg -def test_dask_katz_centrality_transposed_false(dask_client): +def test_dask_mg_katz_centrality_transposed_false(dask_client): input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() chunksize = dcg.get_chunksize(input_data_path) diff --git a/python/cugraph/cugraph/tests/comms/test_comms_mg.py b/python/cugraph/cugraph/tests/comms/test_comms_mg.py index d4b33641c1a..747ef935e01 100644 --- a/python/cugraph/cugraph/tests/comms/test_comms_mg.py +++ b/python/cugraph/cugraph/tests/comms/test_comms_mg.py @@ -38,7 +38,7 @@ def setup_function(): # ) @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_pagerank(dask_client, directed): +def test_dask_mg_pagerank(dask_client, directed): # Initialize and run pagerank on two distributed graphs # with same communicator diff --git a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py index e809ab66438..217c9f0f09f 100644 --- a/python/cugraph/cugraph/tests/components/test_connectivity_mg.py +++ b/python/cugraph/cugraph/tests/components/test_connectivity_mg.py @@ -40,7 +40,7 @@ def setup_function(): # ) @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_wcc(dask_client, directed): +def test_dask_mg_wcc(dask_client, directed): input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() print(f"dataset={input_data_path}") diff --git a/python/cugraph/cugraph/tests/core/test_k_core_mg.py b/python/cugraph/cugraph/tests/core/test_k_core_mg.py index 7f4eeeb69d5..32c4f4553a2 100644 --- a/python/cugraph/cugraph/tests/core/test_k_core_mg.py +++ b/python/cugraph/cugraph/tests/core/test_k_core_mg.py @@ -137,7 +137,7 @@ def test_sg_k_core(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_k_core(dask_client, benchmark, input_expected_output): +def test_dask_mg_k_core(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] core_number = input_expected_output["core_number"] @@ -159,7 +159,7 @@ def test_dask_k_core(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_k_core_invalid_input(dask_client): +def test_dask_mg_k_core_invalid_input(dask_client): input_data_path = datasets[0] chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py index c0abc61b050..e9521f16594 100644 --- a/python/cugraph/cugraph/tests/internals/test_renumber_mg.py +++ b/python/cugraph/cugraph/tests/internals/test_renumber_mg.py @@ -132,7 +132,7 @@ def test_mg_renumber_add_internal_vertex_id(graph_file, dask_client): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_pagerank(dask_client, directed): +def test_dask_mg_pagerank(dask_client, directed): pandas.set_option("display.max_rows", 10000) input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() diff --git a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py index bfb33ccd619..5590eb17401 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_hits_mg.py @@ -111,7 +111,7 @@ def input_expected_output(input_combo): # is_single_gpu(), reason="skipping MG testing on Single GPU system" # ) @pytest.mark.mg -def test_dask_hits(dask_client, benchmark, input_expected_output): +def test_dask_mg_hits(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] @@ -155,7 +155,7 @@ def test_dask_hits(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_hits_transposed_false(dask_client): +def test_dask_mg_hits_transposed_false(dask_client): input_data_path = (utils.RAPIDS_DATASET_ROOT_DIR_PATH / "karate.csv").as_posix() chunksize = dcg.get_chunksize(input_data_path) diff --git a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py index d68aeda4a2f..63dbf31ca5e 100644 --- a/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py +++ b/python/cugraph/cugraph/tests/link_analysis/test_pagerank_mg.py @@ -89,7 +89,7 @@ def setup_function(): @pytest.mark.parametrize("directed", IS_DIRECTED) @pytest.mark.parametrize("has_precomputed_vertex_out_weight", HAS_PRECOMPUTED) @pytest.mark.parametrize("has_guess", HAS_GUESS) -def test_dask_pagerank( +def test_dask_mg_pagerank( dask_client, personalization_perc, directed, @@ -215,7 +215,7 @@ def test_pagerank_invalid_personalization_dtype(dask_client): @pytest.mark.mg -def test_dask_pagerank_transposed_false(dask_client): +def test_dask_mg_pagerank_transposed_false(dask_client): dg = create_distributed_karate_graph(store_transposed=False) warning_msg = ( diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py index 1f7c0a9cadb..b56a6baae2b 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard_mg.py @@ -119,7 +119,7 @@ def input_expected_output(input_combo): @pytest.mark.mg -def test_dask_jaccard(dask_client, benchmark, input_expected_output): +def test_dask_mg_jaccard(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] @@ -154,7 +154,7 @@ def test_dask_jaccard(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_weighted_jaccard(dask_client): +def test_dask_mg_weighted_jaccard(dask_client): input_data_path = datasets[0] chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py index 220b90cbb47..ce4bf619f47 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_overlap_mg.py @@ -122,7 +122,7 @@ def input_expected_output(input_combo): # is_single_gpu(), reason="skipping MG testing on Single GPU system" # ) @pytest.mark.mg -def test_dask_overlap(dask_client, benchmark, input_expected_output): +def test_dask_mg_overlap(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] @@ -157,7 +157,7 @@ def test_dask_overlap(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_weighted_overlap(): +def test_dask_mg_weighted_overlap(): input_data_path = datasets[0] chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py index d9d013c7e35..af6b60771a0 100644 --- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py +++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen_mg.py @@ -121,7 +121,7 @@ def input_expected_output(input_combo): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -def test_dask_sorensen(dask_client, benchmark, input_expected_output): +def test_dask_mg_sorensen(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] @@ -156,7 +156,7 @@ def test_dask_sorensen(dask_client, benchmark, input_expected_output): @pytest.mark.mg -def test_dask_weighted_sorensen(dask_client): +def test_dask_mg_weighted_sorensen(dask_client): input_data_path = datasets[0] chunksize = dcg.get_chunksize(input_data_path) ddf = dask_cudf.read_csv( diff --git a/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py b/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py index 7f5891abdd3..e2f77700958 100644 --- a/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_egonet_mg.py @@ -118,7 +118,7 @@ def input_expected_output(input_combo): @pytest.mark.mg @pytest.mark.skipif(is_single_gpu(), reason="skipping MG testing on Single GPU system") -def test_dask_ego_graphs(dask_client, benchmark, input_expected_output): +def test_dask_mg_ego_graphs(dask_client, benchmark, input_expected_output): dg = input_expected_output["MGGraph"] diff --git a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py index a8aa34710ec..03658c7a06e 100644 --- a/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_random_walks_mg.py @@ -202,7 +202,7 @@ def input_graph(request): @pytest.mark.mg @pytest.mark.cugraph_ops -def test_dask_random_walks(dask_client, benchmark, input_graph): +def test_dask_mg_random_walks(dask_client, benchmark, input_graph): path_data, seeds, max_depth = calc_random_walks(input_graph) df_G = input_graph.input_df.compute().reset_index(drop=True) check_random_walks(input_graph, path_data, seeds, max_depth, df_G) diff --git a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py index 38b5a2734d6..8ffbecea4fc 100644 --- a/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py +++ b/python/cugraph/cugraph/tests/traversal/test_bfs_mg.py @@ -38,7 +38,7 @@ def setup_function(): # ) @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_bfs(dask_client, directed): +def test_dask_mg_bfs(dask_client, directed): input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() @@ -102,7 +102,7 @@ def modify_dataset(df): # ) @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_bfs_invalid_start(dask_client, directed): +def test_dask_mg_bfs_invalid_start(dask_client, directed): source_vertex = 10 input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() @@ -138,7 +138,7 @@ def test_dask_bfs_invalid_start(dask_client, directed): # ) @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_bfs_multi_column_depthlimit(dask_client, directed): +def test_dask_mg_bfs_multi_column_depthlimit(dask_client, directed): gc.collect() input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() diff --git a/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py b/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py index 1720a051ee7..55bd320c2f1 100644 --- a/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py +++ b/python/cugraph/cugraph/tests/traversal/test_sssp_mg.py @@ -39,7 +39,7 @@ def setup_function(): # ) @pytest.mark.mg @pytest.mark.parametrize("directed", IS_DIRECTED) -def test_dask_sssp(dask_client, directed): +def test_dask_mg_sssp(dask_client, directed): input_data_path = (RAPIDS_DATASET_ROOT_DIR_PATH / "netscience.csv").as_posix() print(f"dataset={input_data_path}") From b5d8cbefd28d5560af23a3baa8f0b3bd93627c31 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Tue, 29 Aug 2023 23:42:52 -0700 Subject: [PATCH 4/4] Fix OD shortest distance matrix computation test failures. (#3813) Closing https://github.com/rapidsai/cugraph/issues/3801 I also submitted a minimum reproducer to the slack thrust channel. Authors: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Naim (https://github.com/naimnv) - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/3813 --- .../traversal/od_shortest_distances_impl.cuh | 30 ++++++++++--------- .../traversal/od_shortest_distances_test.cpp | 8 ++--- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index 09e41466393..6a0c5a4a675 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -210,12 +210,17 @@ size_t compute_kv_store_capacity(size_t new_min_size, int32_t constexpr multi_partition_copy_block_size = 512; // tuning parameter -template +template __global__ void multi_partition_copy( InputIterator input_first, InputIterator input_last, raft::device_span output_buffer_ptrs, PartitionOp partition_op, // returns max_num_partitions to discard + KeyOp key_op, raft::device_span partition_counters) { static_assert(max_num_partitions <= static_cast(std::numeric_limits::max())); @@ -283,7 +288,7 @@ __global__ void multi_partition_copy( if (partition != static_cast(max_num_partitions)) { auto offset = block_start_offsets[partition] + static_cast(tmp_intra_block_offsets[partition] + tmp_offsets[i]); - *(output_buffer_ptrs[partition] + offset) = thrust::get<0>(*(input_first + tmp_idx)); + *(output_buffer_ptrs[partition] + offset) = key_op(*(input_first + tmp_idx)); } } tmp_idx += gridDim.x * blockDim.x; @@ -794,6 +799,7 @@ rmm::device_uvector od_shortest_distances( split_thresholds.end(), thrust::get<1>(pair)))); }, + [] __device__(auto pair) { return thrust::get<0>(pair); }, raft::device_span(d_counters.data(), d_counters.size())); std::vector h_counters(d_counters.size()); @@ -912,13 +918,6 @@ rmm::device_uvector od_shortest_distances( thrust::fill( handle.get_thrust_policy(), d_counters.begin(), d_counters.end(), size_t{0}); if (tmp_buffer.size() > 0) { - auto distance_first = thrust::make_transform_iterator( - tmp_buffer.begin(), - [key_to_dist_map = detail::kv_cuco_store_find_device_view_t( - key_to_dist_map.view())] __device__(auto key) { - return key_to_dist_map.find(key); - }); - auto input_first = thrust::make_zip_iterator(tmp_buffer.begin(), distance_first); raft::grid_1d_thread_t update_grid(tmp_buffer.size(), multi_partition_copy_block_size, handle.get_device_properties().maxGridSize[0]); @@ -926,13 +925,15 @@ rmm::device_uvector od_shortest_distances( static_cast(1 /* near queue */ + num_far_buffers); multi_partition_copy <<>>( - input_first, - input_first + tmp_buffer.size(), + tmp_buffer.begin(), + tmp_buffer.end(), raft::device_span(d_buffer_ptrs.data(), d_buffer_ptrs.size()), - [split_thresholds = raft::device_span( + [key_to_dist_map = + detail::kv_cuco_store_find_device_view_t(key_to_dist_map.view()), + split_thresholds = raft::device_span( d_split_thresholds.data(), d_split_thresholds.size()), - invalid_threshold] __device__(auto pair) { - auto dist = thrust::get<1>(pair); + invalid_threshold] __device__(auto key) { + auto dist = key_to_dist_map.find(key); return static_cast( (dist < invalid_threshold) ? max_num_partitions /* discard */ @@ -942,6 +943,7 @@ rmm::device_uvector od_shortest_distances( split_thresholds.end(), dist))); }, + thrust::identity{}, raft::device_span(d_counters.data(), d_counters.size())); } std::vector h_counters(d_counters.size()); diff --git a/cpp/tests/traversal/od_shortest_distances_test.cpp b/cpp/tests/traversal/od_shortest_distances_test.cpp index e4fbbdf9275..cc283f24dfd 100644 --- a/cpp/tests/traversal/od_shortest_distances_test.cpp +++ b/cpp/tests/traversal/od_shortest_distances_test.cpp @@ -225,27 +225,27 @@ class Tests_ODShortestDistances using Tests_ODShortestDistances_File = Tests_ODShortestDistances; using Tests_ODShortestDistances_Rmat = Tests_ODShortestDistances; -TEST_P(Tests_ODShortestDistances_File, DISABLED_CheckInt32Int32Float) +TEST_P(Tests_ODShortestDistances_File, CheckInt32Int32Float) { auto param = GetParam(); run_current_test(std::get<0>(param), std::get<1>(param)); } -TEST_P(Tests_ODShortestDistances_Rmat, DISABLED_CheckInt32Int32Float) +TEST_P(Tests_ODShortestDistances_Rmat, CheckInt32Int32Float) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_ODShortestDistances_Rmat, DISABLED_CheckInt32Int64Float) +TEST_P(Tests_ODShortestDistances_Rmat, CheckInt32Int64Float) { auto param = GetParam(); run_current_test( std::get<0>(param), override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } -TEST_P(Tests_ODShortestDistances_Rmat, DISABLED_CheckInt64Int64Float) +TEST_P(Tests_ODShortestDistances_Rmat, CheckInt64Int64Float) { auto param = GetParam(); run_current_test(