From 65df1a271011b65c039eac354259250b1e96b5d1 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:35:17 -0800 Subject: [PATCH 1/9] Fix HITS convergence error. (#4043) We set epsilon in hits call to 1e-6 or 1e-8 in our tests. HITS internally uses max. norm to normalize HITS values after each iteration before computing HITS value changes in two consecutive iterations. Sum of HITS values tends to grow with the number of vertices. Using a fixed epsilon leads to convergence failure in large graphs. This PR updates HITS to compare sum of HITS value changes in two consecutive iterations with `epsilon` * graph_view.number_of_vertices() following networkx documentation (https://networkx.org/documentation/stable/reference/algorithms/generated/networkx.algorithms.link_analysis.hits_alg.hits.html). Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Joseph Nke (https://github.com/jnke2016) - Naim (https://github.com/naimnv) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4043 --- cpp/src/link_analysis/hits_impl.cuh | 3 ++- cpp/tests/c_api/hits_test.c | 6 ++--- cpp/tests/c_api/mg_hits_test.c | 4 ++-- cpp/tests/link_analysis/hits_test.cpp | 28 ++++++++++++++---------- cpp/tests/link_analysis/mg_hits_test.cpp | 18 +++++++-------- 5 files changed, 31 insertions(+), 28 deletions(-) diff --git a/cpp/src/link_analysis/hits_impl.cuh b/cpp/src/link_analysis/hits_impl.cuh index 674046745b1..5cdf1b9dc6a 100644 --- a/cpp/src/link_analysis/hits_impl.cuh +++ b/cpp/src/link_analysis/hits_impl.cuh @@ -80,6 +80,7 @@ std::tuple hits(raft::handle_t const& handle, if (num_vertices == 0) { return std::make_tuple(diff_sum, final_iteration_count); } CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); + auto tolerance = static_cast(graph_view.number_of_vertices()) * epsilon; // Check validity of initial guess if supplied if (has_initial_hubs_guess && do_expensive_check) { @@ -171,7 +172,7 @@ std::tuple hits(raft::handle_t const& handle, std::swap(prev_hubs, curr_hubs); iter++; - if (diff_sum < epsilon) { + if (diff_sum < tolerance) { break; } else if (iter >= max_iterations) { CUGRAPH_FAIL("HITS failed to converge."); diff --git a/cpp/tests/c_api/hits_test.c b/cpp/tests/c_api/hits_test.c index c275d883d11..1ebd4f82a51 100644 --- a/cpp/tests/c_api/hits_test.c +++ b/cpp/tests/c_api/hits_test.c @@ -163,7 +163,7 @@ int test_hits() weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // hits wants store_transposed = TRUE @@ -195,7 +195,7 @@ int test_hits_with_transpose() weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // Hits wants store_transposed = TRUE @@ -232,7 +232,7 @@ int test_hits_with_initial() vertex_t h_initial_vertices[] = {0, 1, 2, 3, 4}; weight_t h_initial_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; return generic_hits_test(h_src, diff --git a/cpp/tests/c_api/mg_hits_test.c b/cpp/tests/c_api/mg_hits_test.c index 87371093613..3e10bfc05d6 100644 --- a/cpp/tests/c_api/mg_hits_test.c +++ b/cpp/tests/c_api/mg_hits_test.c @@ -171,7 +171,7 @@ int test_hits(const cugraph_resource_handle_t* handle) weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // hits wants store_transposed = TRUE @@ -203,7 +203,7 @@ int test_hits_with_transpose(const cugraph_resource_handle_t* handle) weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // Hits wants store_transposed = TRUE diff --git a/cpp/tests/link_analysis/hits_test.cpp b/cpp/tests/link_analysis/hits_test.cpp index d0e77769034..6796761e212 100644 --- a/cpp/tests/link_analysis/hits_test.cpp +++ b/cpp/tests/link_analysis/hits_test.cpp @@ -52,9 +52,11 @@ std::tuple, std::vector, double, size_t> hits_re size_t max_iterations, std::optional starting_hub_values, bool normalized, - double tolerance) + double epsilon) { CUGRAPH_EXPECTS(num_vertices > 1, "number of vertices expected to be non-zero"); + auto tolerance = static_cast(num_vertices) * epsilon; + std::vector prev_hubs(num_vertices, result_t{1.0} / num_vertices); std::vector prev_authorities(num_vertices, result_t{1.0} / num_vertices); std::vector curr_hubs(num_vertices); @@ -127,8 +129,8 @@ std::tuple, std::vector, double, size_t> hits_re } struct Hits_Usecase { - bool check_correctness{true}; bool check_initial_input{false}; + bool check_correctness{true}; }; template @@ -175,8 +177,8 @@ class Tests_Hits : public ::testing::TestWithParam d_hubs(graph_view.local_vertex_partition_range_size(), handle.get_stream()); @@ -201,7 +203,7 @@ class Tests_Hits : public ::testing::TestWithParam h_cugraph_hits{}; if (renumber) { @@ -246,8 +248,7 @@ class Tests_Hits : public ::testing::TestWithParam(graph_view.number_of_vertices())) * - threshold_ratio; // skip comparison for low hits vertices (lowly ranked vertices) + 1e-6; // skip comparison for low hits vertices (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { return std::abs(lhs - rhs) <= std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); @@ -294,14 +295,17 @@ INSTANTIATE_TEST_SUITE_P( Tests_Hits_File, ::testing::Combine( // enable correctness checks - ::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}), + ::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_Hits_Rmat, // enable correctness checks - ::testing::Combine(::testing::Values(Hits_Usecase{true, false}, + ::testing::Combine(::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); @@ -315,7 +319,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_Hits_File, ::testing::Combine( // disable correctness checks - ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{false, true}), + ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -327,7 +331,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_Hits_Rmat, // disable correctness checks for large graphs ::testing::Combine( - ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{false, true}), + ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/link_analysis/mg_hits_test.cpp b/cpp/tests/link_analysis/mg_hits_test.cpp index cf95d03681d..5c89bafd08e 100644 --- a/cpp/tests/link_analysis/mg_hits_test.cpp +++ b/cpp/tests/link_analysis/mg_hits_test.cpp @@ -33,8 +33,8 @@ #include struct Hits_Usecase { - bool check_correctness{true}; bool check_initial_input{false}; + bool check_correctness{true}; }; template @@ -81,7 +81,7 @@ class Tests_MGHits : public ::testing::TestWithParam d_mg_hubs(mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); @@ -110,7 +110,7 @@ class Tests_MGHits : public ::testing::TestWithParam(mg_graph_view.number_of_vertices())) * - threshold_ratio; // skip comparison for low Hits verties (lowly ranked - // vertices) + 1e-6; // skip comparison for low Hits verties (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { return std::abs(lhs - rhs) < std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); @@ -274,7 +272,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGHits_File, ::testing::Combine( // enable correctness checks - ::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}), + ::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -285,7 +283,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGHits_Rmat, ::testing::Combine( // enable correctness checks - ::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}), + ::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( @@ -297,7 +295,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGHits_Rmat, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(Hits_Usecase{false, false}), + ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() From c709fc9c4e8d1a434a567b3b2c407f3d51ea9030 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 6 Dec 2023 12:52:59 -0800 Subject: [PATCH 2/9] Remove CUGRAPH_BUILD_WHEELS and standardize Python builds (#4041) Some minor simplification in advance of the scikit-build-core migration to better align wheel and non-wheel Python builds. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Ray Douglass (https://github.com/raydouglass) - Brad Rees (https://github.com/BradReesWork) URL: https://github.com/rapidsai/cugraph/pull/4041 --- ci/build_wheel_cugraph.sh | 2 +- ci/build_wheel_pylibcugraph.sh | 2 +- python/cugraph/CMakeLists.txt | 25 ++++++++----------------- python/pylibcugraph/CMakeLists.txt | 25 ++++++++----------------- 4 files changed, 18 insertions(+), 36 deletions(-) diff --git a/ci/build_wheel_cugraph.sh b/ci/build_wheel_cugraph.sh index 5b5061f67c2..0a722c88c3e 100755 --- a/ci/build_wheel_cugraph.sh +++ b/ci/build_wheel_cugraph.sh @@ -12,6 +12,6 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME=pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibcugraph export PIP_FIND_LINKS=$(pwd)/local-pylibcugraph -export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" ./ci/build_wheel.sh cugraph python/cugraph diff --git a/ci/build_wheel_pylibcugraph.sh b/ci/build_wheel_pylibcugraph.sh index 8d365bc250b..9e236c145ce 100755 --- a/ci/build_wheel_pylibcugraph.sh +++ b/ci/build_wheel_pylibcugraph.sh @@ -3,6 +3,6 @@ set -euo pipefail -export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" ./ci/build_wheel.sh pylibcugraph python/pylibcugraph diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt index a1ec12c6e07..99936b23a8c 100644 --- a/python/cugraph/CMakeLists.txt +++ b/python/cugraph/CMakeLists.txt @@ -38,7 +38,6 @@ project( option(FIND_CUGRAPH_CPP "Search for existing CUGRAPH C++ installations before defaulting to local files" OFF ) -option(CUGRAPH_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) option(USE_CUGRAPH_OPS "Enable all functions that call cugraph-ops" ON) if(NOT USE_CUGRAPH_OPS) @@ -59,22 +58,14 @@ if(NOT cugraph_FOUND) set(BUILD_TESTS OFF) set(BUILD_CUGRAPH_MG_TESTS OFF) set(BUILD_CUGRAPH_OPS_CPP_TESTS OFF) - - set(_exclude_from_all "") - if(CUGRAPH_BUILD_WHEELS) - # Statically link dependencies if building wheels - set(CUDA_STATIC_RUNTIME ON) - set(USE_RAFT_STATIC ON) - set(CUGRAPH_COMPILE_RAFT_LIB ON) - set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) - set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) - set(ALLOW_CLONE_CUGRAPH_OPS ON) - - # Don't install the cuML C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp cugraph-cpp ${_exclude_from_all}) + set(CUDA_STATIC_RUNTIME ON) + set(USE_RAFT_STATIC ON) + set(CUGRAPH_COMPILE_RAFT_LIB ON) + set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) + set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) + set(ALLOW_CLONE_CUGRAPH_OPS ON) + + add_subdirectory(../../cpp cugraph-cpp EXCLUDE_FROM_ALL) set(cython_lib_dir cugraph) install(TARGETS cugraph DESTINATION ${cython_lib_dir}) diff --git a/python/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/CMakeLists.txt index 7d5dc790ad0..e1250cb2edb 100644 --- a/python/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/CMakeLists.txt @@ -38,7 +38,6 @@ project( option(FIND_CUGRAPH_CPP "Search for existing CUGRAPH C++ installations before defaulting to local files" OFF ) -option(CUGRAPH_BUILD_WHEELS "Whether we're building a wheel for pypi" OFF) option(USE_CUGRAPH_OPS "Enable all functions that call cugraph-ops" ON) if(NOT USE_CUGRAPH_OPS) @@ -59,22 +58,14 @@ if (NOT cugraph_FOUND) set(BUILD_TESTS OFF) set(BUILD_CUGRAPH_MG_TESTS OFF) set(BUILD_CUGRAPH_OPS_CPP_TESTS OFF) - - set(_exclude_from_all "") - if(CUGRAPH_BUILD_WHEELS) - # Statically link dependencies if building wheels - set(CUDA_STATIC_RUNTIME ON) - set(USE_RAFT_STATIC ON) - set(CUGRAPH_COMPILE_RAFT_LIB ON) - set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) - set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) - set(ALLOW_CLONE_CUGRAPH_OPS ON) - - # Don't install the cuML C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp cugraph-cpp ${_exclude_from_all}) + set(CUDA_STATIC_RUNTIME ON) + set(USE_RAFT_STATIC ON) + set(CUGRAPH_COMPILE_RAFT_LIB ON) + set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) + set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) + set(ALLOW_CLONE_CUGRAPH_OPS ON) + + add_subdirectory(../../cpp cugraph-cpp EXCLUDE_FROM_ALL) set(cython_lib_dir pylibcugraph) install(TARGETS cugraph DESTINATION ${cython_lib_dir}) From 1df62176c3d5a8addaad2a910c489c7426e8f6a4 Mon Sep 17 00:00:00 2001 From: Vyas Ramasubramani Date: Wed, 6 Dec 2023 16:12:44 -0800 Subject: [PATCH 3/9] Update dependencies.yaml to new pip index (#4045) This PR changes all references to pypi.nvidia.com to pypi.anaconda.org/rapidsai-wheels-nightly. Authors: - Vyas Ramasubramani (https://github.com/vyasr) Approvers: - Ray Douglass (https://github.com/raydouglass) URL: https://github.com/rapidsai/cugraph/pull/4045 --- dependencies.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/dependencies.yaml b/dependencies.yaml index baa08c37413..b5c1fb2fa2d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -511,6 +511,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -536,6 +537,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -561,6 +563,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -586,6 +589,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -611,6 +615,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -636,6 +641,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -661,6 +667,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: From 18ab76b73680e9269be89728613cb55081f9833e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Fri, 8 Dec 2023 13:41:24 -0800 Subject: [PATCH 4/9] Update for CCCL 2.x (#3862) This PR adds `cuda::proclaim_return_type` to device lambdas used in `thrust::transform` and `thrust::make_transform_iterator`. This PR requires libcudacxx 2.1.0, which was provided by https://github.com/rapidsai/rapids-cmake/pull/464. Closes #3863. Authors: - Seunghwa Kang (https://github.com/seunghwak) - Bradley Dice (https://github.com/bdice) Approvers: - Bradley Dice (https://github.com/bdice) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/3862 --- cpp/include/cugraph/utilities/misc_utils.cuh | 5 +- .../cugraph/utilities/shuffle_comm.cuh | 48 ++++++----- cpp/src/community/detail/mis_impl.cuh | 16 ++-- cpp/src/community/detail/refine_impl.cuh | 39 +++++---- cpp/src/detail/collect_local_vertex_values.cu | 5 +- cpp/src/generators/erdos_renyi_generator.cu | 24 +++--- cpp/src/generators/simple_generators.cu | 39 +++++---- cpp/src/mtmg/vertex_result.cu | 10 ++- ...r_v_random_select_transform_outgoing_e.cuh | 12 ++- ...v_transform_reduce_incoming_outgoing_e.cuh | 25 +++--- .../prims/update_edge_src_dst_property.cuh | 82 +++++++++++-------- cpp/src/sampling/random_walks.cuh | 23 ++++-- .../sampling_post_processing_impl.cuh | 12 ++- cpp/src/structure/graph_view_impl.cuh | 6 +- .../sampling/sampling_post_processing_test.cu | 56 +++++++------ 15 files changed, 235 insertions(+), 167 deletions(-) diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 28e2853727f..04aeac49c9d 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -26,6 +26,8 @@ #include #include +#include + #include #include #include @@ -44,7 +46,8 @@ std::tuple, std::vector> compute_offset_aligned_ed { auto search_offset_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{1}), - [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }); + cuda::proclaim_return_type( + [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; })); auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size; if (num_chunks > 1) { diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index ab6a54cc1c0..414d9b36992 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -40,6 +40,8 @@ #include #include +#include + #include #include #include @@ -197,12 +199,13 @@ void multi_partition(ValueIterator value_first, value_last, thrust::make_zip_iterator( thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), - [value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) { - auto group_id = value_to_group_id_op(value); - cuda::std::atomic_ref counter(counts[group_id - group_first]); - return thrust::make_tuple(group_id, - counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); - }); + cuda::proclaim_return_type>( + [value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) { + auto group_id = value_to_group_id_op(value); + cuda::std::atomic_ref counter(counts[group_id - group_first]); + return thrust::make_tuple(group_id, + counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); + })); rmm::device_uvector displacements(num_groups, stream_view); thrust::exclusive_scan( @@ -245,17 +248,19 @@ void multi_partition(KeyIterator key_first, rmm::device_uvector group_ids(num_keys, stream_view); rmm::device_uvector intra_partition_offsets(num_keys, stream_view); thrust::fill(rmm::exec_policy(stream_view), counts.begin(), counts.end(), size_t{0}); - thrust::transform(rmm::exec_policy(stream_view), - key_first, - key_last, - thrust::make_zip_iterator( - thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), - [key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) { - auto group_id = key_to_group_id_op(key); - cuda::std::atomic_ref counter(counts[group_id - group_first]); - return thrust::make_tuple( - group_id, counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); - }); + thrust::transform( + rmm::exec_policy(stream_view), + key_first, + key_last, + thrust::make_zip_iterator( + thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), + cuda::proclaim_return_type>( + [key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) { + auto group_id = key_to_group_id_op(key); + cuda::std::atomic_ref counter(counts[group_id - group_first]); + return thrust::make_tuple(group_id, + counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); + })); rmm::device_uvector displacements(num_groups, stream_view); thrust::exclusive_scan( @@ -761,8 +766,9 @@ rmm::device_uvector groupby_and_count(ValueIterator tx_value_first /* [I stream_view); auto group_id_first = thrust::make_transform_iterator( - tx_value_first, - [value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); }); + tx_value_first, cuda::proclaim_return_type([value_to_group_id_op] __device__(auto value) { + return value_to_group_id_op(value); + })); rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); auto rank_count_pair_first = thrust::make_zip_iterator( @@ -795,7 +801,9 @@ rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [IN stream_view); auto group_id_first = thrust::make_transform_iterator( - tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); }); + tx_key_first, cuda::proclaim_return_type([key_to_group_id_op] __device__(auto key) { + return key_to_group_id_op(key); + })); rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); auto rank_count_pair_first = thrust::make_zip_iterator( diff --git a/cpp/src/community/detail/mis_impl.cuh b/cpp/src/community/detail/mis_impl.cuh index bcd71af5a08..2659a982183 100644 --- a/cpp/src/community/detail/mis_impl.cuh +++ b/cpp/src/community/detail/mis_impl.cuh @@ -37,6 +37,8 @@ #include #include +#include + #include namespace cugraph { @@ -78,13 +80,13 @@ rmm::device_uvector maximal_independent_set( thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin()); // Set ranks of zero out-degree vetices to std::numeric_limits::lowest() - thrust::transform_if( - handle.get_thrust_policy(), - out_degrees.begin(), - out_degrees.end(), - ranks.begin(), - [] __device__(auto) { return std::numeric_limits::lowest(); }, - [] __device__(auto deg) { return deg == 0; }); + thrust::transform_if(handle.get_thrust_policy(), + out_degrees.begin(), + out_degrees.end(), + ranks.begin(), + cuda::proclaim_return_type( + [] __device__(auto) { return std::numeric_limits::lowest(); }), + [] __device__(auto deg) { return deg == 0; }); out_degrees.resize(0, handle.get_stream()); out_degrees.shrink_to_fit(handle.get_stream()); diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index ebaae498d04..eb874657f01 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -46,6 +46,8 @@ #include #include +#include + CUCO_DECLARE_BITWISE_COMPARABLE(float) CUCO_DECLARE_BITWISE_COMPARABLE(double) // FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. @@ -213,16 +215,17 @@ refine_clustering( : detail::edge_minor_property_view_t( louvain_assignment_of_vertices.data(), vertex_t{0}), *edge_weight_view, - [] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) { - weight_t weighted_cut_contribution{0}; + cuda::proclaim_return_type( + [] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) { + weight_t weighted_cut_contribution{0}; - if (src == dst) // self loop - weighted_cut_contribution = 0; - else if (src_cluster == dst_cluster) - weighted_cut_contribution = wt; + if (src == dst) // self loop + weighted_cut_contribution = 0; + else if (src_cluster == dst_cluster) + weighted_cut_contribution = wt; - return weighted_cut_contribution; - }, + return weighted_cut_contribution; + }), weight_t{0}, cugraph::reduce_op::plus{}, weighted_cut_of_vertices_to_louvain.begin()); @@ -243,13 +246,14 @@ refine_clustering( wcut_deg_and_cluster_vol_triple_begin, wcut_deg_and_cluster_vol_triple_end, singleton_and_connected_flags.begin(), - [resolution, total_edge_weight] __device__(auto wcut_wdeg_and_louvain_volume) { + cuda::proclaim_return_type([resolution, total_edge_weight] __device__( + auto wcut_wdeg_and_louvain_volume) { auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); - return wcut > - (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight); - }); + return static_cast( + wcut > (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight)); + })); edge_src_property_t src_louvain_cluster_weight_cache(handle); edge_src_property_t src_cut_to_louvain_cache(handle); @@ -718,11 +722,12 @@ refine_clustering( vertices_in_mis.begin(), vertices_in_mis.end(), dst_vertices.begin(), - [dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()), - v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { - auto dst = *(dst_first + v - v_first); - return dst; - }); + cuda::proclaim_return_type( + [dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { + auto dst = *(dst_first + v - v_first); + return dst; + })); cugraph::resize_dataframe_buffer(gain_and_dst_output_pairs, 0, handle.get_stream()); cugraph::shrink_to_fit_dataframe_buffer(gain_and_dst_output_pairs, handle.get_stream()); diff --git a/cpp/src/detail/collect_local_vertex_values.cu b/cpp/src/detail/collect_local_vertex_values.cu index 9d5d2cb553b..795902dfd87 100644 --- a/cpp/src/detail/collect_local_vertex_values.cu +++ b/cpp/src/detail/collect_local_vertex_values.cu @@ -19,6 +19,8 @@ #include #include +#include + namespace cugraph { namespace detail { @@ -64,7 +66,8 @@ rmm::device_uvector collect_local_vertex_values_from_ext_vertex_value_p auto vertex_iterator = thrust::make_transform_iterator( d_vertices.begin(), - [local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; }); + cuda::proclaim_return_type( + [local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; })); d_local_values.resize(local_vertex_last - local_vertex_first, handle.get_stream()); thrust::fill( diff --git a/cpp/src/generators/erdos_renyi_generator.cu b/cpp/src/generators/erdos_renyi_generator.cu index 6d847ae0bde..8448eeaf960 100644 --- a/cpp/src/generators/erdos_renyi_generator.cu +++ b/cpp/src/generators/erdos_renyi_generator.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,8 @@ #include #include +#include + namespace cugraph { template @@ -42,12 +44,13 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, "Implementation cannot support specified value"); auto random_iterator = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [seed] __device__(size_t index) { + thrust::make_counting_iterator(0), + cuda::proclaim_return_type([seed] __device__(size_t index) { thrust::default_random_engine rng(seed); thrust::uniform_real_distribution dist(0.0, 1.0); rng.discard(index); return dist(rng); - }); + })); size_t count = thrust::count_if(handle.get_thrust_policy(), random_iterator, @@ -69,13 +72,14 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, indices_v.begin(), indices_v.end(), thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), src_v.end())), - [num_vertices] __device__(size_t index) { - size_t src = index / num_vertices; - size_t dst = index % num_vertices; - - return thrust::make_tuple(static_cast(src), - static_cast(dst)); - }); + cuda::proclaim_return_type>( + [num_vertices] __device__(size_t index) { + size_t src = index / num_vertices; + size_t dst = index % num_vertices; + + return thrust::make_tuple(static_cast(src), + static_cast(dst)); + })); handle.sync_stream(); diff --git a/cpp/src/generators/simple_generators.cu b/cpp/src/generators/simple_generators.cu index 6dba63909c3..65647be5de0 100644 --- a/cpp/src/generators/simple_generators.cu +++ b/cpp/src/generators/simple_generators.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,8 @@ #include #include +#include + #include namespace cugraph { @@ -264,23 +266,24 @@ generate_complete_graph_edgelist( auto transform_iter = thrust::make_transform_iterator( thrust::make_counting_iterator(0), - [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { - size_t graph_index = index / (num_vertices * num_vertices); - size_t local_index = index % (num_vertices * num_vertices); - - vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); - vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); - - if (src == dst) { - src = invalid_vertex; - dst = invalid_vertex; - } else { - src += (graph_index * num_vertices); - dst += (graph_index * num_vertices); - } - - return thrust::make_tuple(src, dst); - }); + cuda::proclaim_return_type>( + [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { + size_t graph_index = index / (num_vertices * num_vertices); + size_t local_index = index % (num_vertices * num_vertices); + + vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); + vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); + + if (src == dst) { + src = invalid_vertex; + dst = invalid_vertex; + } else { + src += (graph_index * num_vertices); + dst += (graph_index * num_vertices); + } + + return thrust::make_tuple(src, dst); + })); output_iterator = thrust::copy_if(handle.get_thrust_policy(), transform_iter, diff --git a/cpp/src/mtmg/vertex_result.cu b/cpp/src/mtmg/vertex_result.cu index 5b1825656ff..414f1bdfa88 100644 --- a/cpp/src/mtmg/vertex_result.cu +++ b/cpp/src/mtmg/vertex_result.cu @@ -21,6 +21,7 @@ #include +#include #include namespace cugraph { @@ -91,10 +92,11 @@ rmm::device_uvector vertex_result_view_t::gather( auto vertex_partition = vertex_partition_device_view_t(vertex_partition_view); - auto iter = - thrust::make_transform_iterator(local_vertices.begin(), [vertex_partition] __device__(auto v) { + auto iter = thrust::make_transform_iterator( + local_vertices.begin(), + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); thrust::gather(handle.get_thrust_policy(), iter, @@ -111,7 +113,7 @@ rmm::device_uvector vertex_result_view_t::gather( vertex_gpu_ids.begin(), vertex_gpu_ids.end(), thrust::make_zip_iterator(local_vertices.begin(), vertex_pos.begin(), tmp_result.begin()), - [] __device__(int gpu) { return gpu; }, + thrust::identity{}, handle.get_stream()); // diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 5fee97790f1..4c5c43c7d1e 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -44,6 +44,8 @@ #include #include +#include + #include #include @@ -596,8 +598,9 @@ rmm::device_uvector get_sampling_index_without_replacement( multiplier_t{high_partition_over_sampling_K}), thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), - [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( - size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }), + cuda::proclaim_return_type( + [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( + size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })), handle.get_stream()); if (tmp_storage_bytes > d_tmp_storage.size()) { d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); @@ -615,8 +618,9 @@ rmm::device_uvector get_sampling_index_without_replacement( multiplier_t{high_partition_over_sampling_K}), thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), - [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( - size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }), + cuda::proclaim_return_type( + [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( + size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })), handle.get_stream()); // copy the neighbor indices back to sample_nbr_indices diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 0b6c6a554bb..5de2dbc18e2 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include #include #include @@ -940,16 +942,19 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, minor_init); auto value_first = thrust::make_transform_iterator( view.value_first(), - [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); }); - thrust::scatter( - handle.get_thrust_policy(), - value_first + (*minor_key_offsets)[i], - value_first + (*minor_key_offsets)[i + 1], - thrust::make_transform_iterator( - (*(view.keys())).begin() + (*minor_key_offsets)[i], - [key_first = graph_view.vertex_partition_range_first( - this_segment_vertex_partition_id)] __device__(auto key) { return key - key_first; }), - tx_buffer_first); + cuda::proclaim_return_type( + [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); })); + thrust::scatter(handle.get_thrust_policy(), + value_first + (*minor_key_offsets)[i], + value_first + (*minor_key_offsets)[i + 1], + thrust::make_transform_iterator( + (*(view.keys())).begin() + (*minor_key_offsets)[i], + cuda::proclaim_return_type( + [key_first = graph_view.vertex_partition_range_first( + this_segment_vertex_partition_id)] __device__(auto key) { + return key - key_first; + })), + tx_buffer_first); device_reduce(major_comm, tx_buffer_first, vertex_value_output_first, diff --git a/cpp/src/prims/update_edge_src_dst_property.cuh b/cpp/src/prims/update_edge_src_dst_property.cuh index b8621e122c6..18bdf5bcf2d 100644 --- a/cpp/src/prims/update_edge_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_src_dst_property.cuh @@ -44,6 +44,8 @@ #include #include +#include + #include #include #include @@ -181,13 +183,14 @@ void update_edge_major_property(raft::handle_t const& handle, handle.get_stream()); auto bool_first = thrust::make_transform_iterator( (*edge_partition_keys)[i].begin(), - [rx_value_first, - v_first = graph_view.vertex_partition_range_first( - major_range_vertex_partition_id)] __device__(auto v) { - auto v_offset = v - v_first; - return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & - packed_bool_mask(v_offset)); - }); + cuda::proclaim_return_type( + [rx_value_first, + v_first = graph_view.vertex_partition_range_first( + major_range_vertex_partition_id)] __device__(auto v) { + auto v_offset = v - v_first; + return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & + packed_bool_mask(v_offset)); + })); pack_bools(handle, bool_first, bool_first + (*edge_partition_keys)[i].size(), @@ -202,8 +205,9 @@ void update_edge_major_property(raft::handle_t const& handle, auto v_offset_first = thrust::make_transform_iterator( (*edge_partition_keys)[i].begin(), - [v_first = graph_view.vertex_partition_range_first( - major_range_vertex_partition_id)] __device__(auto v) { return v - v_first; }); + cuda::proclaim_return_type( + [v_first = graph_view.vertex_partition_range_first( + major_range_vertex_partition_id)] __device__(auto v) { return v - v_first; })); thrust::gather(handle.get_thrust_policy(), v_offset_first, v_offset_first + (*edge_partition_keys)[i].size(), @@ -312,21 +316,24 @@ void update_edge_major_property(raft::handle_t const& handle, graph_view.local_vertex_partition_view()); if constexpr (packed_bool) { auto bool_first = thrust::make_transform_iterator( - vertex_first, [vertex_property_input_first, vertex_partition] __device__(auto v) { + vertex_first, + cuda::proclaim_return_type([vertex_property_input_first, + vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); return static_cast( *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_bools(handle, bool_first, bool_first + thrust::distance(vertex_first, vertex_last), rx_value_first); } else { - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + auto map_first = thrust::make_transform_iterator( + vertex_first, + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) thrust::gather(handle.get_thrust_policy(), @@ -391,9 +398,10 @@ void update_edge_major_property(raft::handle_t const& handle, }); } else { auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [edge_partition] __device__(auto v) { + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { return edge_partition.major_offset_from_major_nocheck(v); - }); + })); // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), @@ -593,13 +601,14 @@ void update_edge_minor_property(raft::handle_t const& handle, auto bool_first = thrust::make_transform_iterator( (*edge_partition_keys).begin() + key_offsets[j], - [rx_value_first, - v_first = graph_view.vertex_partition_range_first( - minor_range_vertex_partition_id)] __device__(auto v) { - auto v_offset = v - v_first; - return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & - packed_bool_mask(v_offset)); - }); + cuda::proclaim_return_type( + [rx_value_first, + v_first = graph_view.vertex_partition_range_first( + minor_range_vertex_partition_id)] __device__(auto v) { + auto v_offset = v - v_first; + return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & + packed_bool_mask(v_offset)); + })); pack_unaligned_bools( handle, bool_first, @@ -611,10 +620,10 @@ void update_edge_minor_property(raft::handle_t const& handle, std::get>(key_offsets_or_rx_displacements); auto bool_first = thrust::make_transform_iterator( thrust::make_counting_iterator(vertex_t{0}), - [rx_value_first] __device__(vertex_t v_offset) { + cuda::proclaim_return_type([rx_value_first] __device__(vertex_t v_offset) { return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_unaligned_bools( handle, bool_first, @@ -630,8 +639,9 @@ void update_edge_minor_property(raft::handle_t const& handle, auto v_offset_first = thrust::make_transform_iterator( (*edge_partition_keys).begin() + key_offsets[j], - [v_first = graph_view.vertex_partition_range_first( - minor_range_vertex_partition_id)] __device__(auto v) { return v - v_first; }); + cuda::proclaim_return_type( + [v_first = graph_view.vertex_partition_range_first( + minor_range_vertex_partition_id)] __device__(auto v) { return v - v_first; })); thrust::gather(handle.get_thrust_policy(), v_offset_first, v_offset_first + (key_offsets[j + 1] - key_offsets[j]), @@ -718,21 +728,24 @@ void update_edge_minor_property(raft::handle_t const& handle, graph_view.local_vertex_partition_view()); if constexpr (packed_bool) { auto bool_first = thrust::make_transform_iterator( - vertex_first, [vertex_property_input_first, vertex_partition] __device__(auto v) { + vertex_first, + cuda::proclaim_return_type([vertex_property_input_first, + vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); return static_cast( *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_bools(handle, bool_first, bool_first + thrust::distance(vertex_first, vertex_last), rx_value_first); } else { - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + auto map_first = thrust::make_transform_iterator( + vertex_first, + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) thrust::gather(handle.get_thrust_policy(), @@ -799,9 +812,10 @@ void update_edge_minor_property(raft::handle_t const& handle, }); } else { auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [edge_partition] __device__(auto v) { + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { return edge_partition.minor_offset_from_minor_nocheck(v); - }); + })); // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 6a7334e9f1a..5a9ded02009 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -52,6 +52,8 @@ #include #include +#include + #include #include // FIXME: requirement for temporary std::getenv() #include @@ -378,7 +380,8 @@ struct random_walker_t { // scatter d_src_init_v to coalesced vertex vector: // - auto dlambda = [stride = max_depth_] __device__(auto indx) { return indx * stride; }; + auto dlambda = cuda::proclaim_return_type( + [stride = max_depth_] __device__(auto indx) { return indx * stride; }); // use the transform iterator as map: // @@ -539,10 +542,11 @@ struct random_walker_t { // delta = ptr_d_sizes[indx] - 1 // - auto dlambda = [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { - auto delta = ptr_d_sizes[indx] - 1; - return ptr_d_coalesced[indx * stride + delta]; - }; + auto dlambda = cuda::proclaim_return_type( + [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - 1; + return ptr_d_coalesced[indx * stride + delta]; + }); // use the transform iterator as map: // @@ -587,10 +591,11 @@ struct random_walker_t { { index_t const* ptr_d_sizes = original::raw_const_ptr(d_sizes); - auto dlambda = [stride, adjust, ptr_d_sizes] __device__(auto indx) { - auto delta = ptr_d_sizes[indx] - adjust - 1; - return indx * stride + delta; - }; + auto dlambda = + cuda::proclaim_return_type([stride, adjust, ptr_d_sizes] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - adjust - 1; + return indx * stride + delta; + }); // use the transform iterator as map: // diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index 77d4f2d865f..852d82e78ab 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -40,6 +40,8 @@ #include #include +#include + #include namespace cugraph { @@ -1229,10 +1231,12 @@ renumber_and_compress_sampled_edgelist( auto pair_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), (*compressed_hops).begin()); auto value_pair_first = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_t{0}), [num_hops] __device__(size_t i) { - return thrust::make_tuple(static_cast(i / num_hops), - static_cast(i % num_hops)); - }); + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type>( + [num_hops] __device__(size_t i) { + return thrust::make_tuple(static_cast(i / num_hops), + static_cast(i % num_hops)); + })); thrust::upper_bound(handle.get_thrust_policy(), pair_first, pair_first + (*compressed_label_indices).size(), diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 37a553dcdbd..a40747f13f7 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include #include #include @@ -159,14 +161,14 @@ rmm::device_uvector compute_major_degrees( thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_hypersparse_first - major_range_first), local_degrees.begin(), - [offsets, masks] __device__(auto i) { + cuda::proclaim_return_type([offsets, masks] __device__(auto i) { auto local_degree = offsets[i + 1] - offsets[i]; if (masks) { local_degree = static_cast( detail::count_set_bits(*masks, offsets[i], local_degree)); } return local_degree; - }); + })); if (use_dcs) { auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; auto dcs_nzd_vertex_count = (*edge_partition_dcs_nzd_vertex_counts)[i]; diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index e5267d75ac2..6be735c3482 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -38,6 +38,8 @@ #include #include +#include + struct SamplingPostProcessing_Usecase { size_t num_labels{}; size_t num_seeds_per_label{}; @@ -318,15 +320,16 @@ bool check_renumber_map_invariants( auto renumbered_merged_vertex_first = thrust::make_transform_iterator( merged_vertices.begin(), - [sorted_org_vertices = - raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }); + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t major) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + })); thrust::reduce_by_key(handle.get_thrust_policy(), sort_key_first, @@ -1020,23 +1023,24 @@ class Tests_SamplingPostProcessing ? this_label_output_edgelist_srcs.begin() : this_label_output_edgelist_dsts.begin()) + old_size, - [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), - nzd_vertices = - renumbered_and_compressed_nzd_vertices - ? thrust::make_optional>( - (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, - (offset_end_offset - offset_start_offset) - 1) - : thrust::nullopt, - base_v] __device__(size_t i) { - auto idx = static_cast(thrust::distance( - offsets.begin() + 1, - thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); - if (nzd_vertices) { - return (*nzd_vertices)[idx]; - } else { - return base_v + static_cast(idx); - } - }); + cuda::proclaim_return_type( + [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), + nzd_vertices = + renumbered_and_compressed_nzd_vertices + ? thrust::make_optional>( + (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, + (offset_end_offset - offset_start_offset) - 1) + : thrust::nullopt, + base_v] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); + if (nzd_vertices) { + return (*nzd_vertices)[idx]; + } else { + return base_v + static_cast(idx); + } + })); thrust::copy(handle.get_thrust_policy(), renumbered_and_compressed_edgelist_minors.begin() + h_offsets[0], renumbered_and_compressed_edgelist_minors.begin() + h_offsets.back(), From 16550034e5f33868bd492fd7ce051e84d2f91240 Mon Sep 17 00:00:00 2001 From: Tingyu Wang Date: Mon, 11 Dec 2023 13:00:59 -0500 Subject: [PATCH 5/9] Add `HeteroGATConv` to `cugraph-pyg` (#3914) Fixes https://github.com/rapidsai/graph_dl/issues/311 Adding @stadlmax POC code to cugraph-pyg Authors: - Tingyu Wang (https://github.com/tingyu66) - Brad Rees (https://github.com/BradReesWork) Approvers: - Brad Rees (https://github.com/BradReesWork) - Alex Barghi (https://github.com/alexbarghi-nv) URL: https://github.com/rapidsai/cugraph/pull/3914 --- .../cugraph_pyg/nn/conv/__init__.py | 2 + .../cugraph_pyg/nn/conv/hetero_gat_conv.py | 265 ++++++++++++++++++ .../cugraph-pyg/cugraph_pyg/tests/conftest.py | 29 ++ .../tests/nn/test_hetero_gat_conv.py | 132 +++++++++ 4 files changed, 428 insertions(+) create mode 100644 python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py create mode 100644 python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py index 9c9dcdb43bb..bef3a023b93 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py @@ -13,6 +13,7 @@ from .gat_conv import GATConv from .gatv2_conv import GATv2Conv +from .hetero_gat_conv import HeteroGATConv from .rgcn_conv import RGCNConv from .sage_conv import SAGEConv from .transformer_conv import TransformerConv @@ -20,6 +21,7 @@ __all__ = [ "GATConv", "GATv2Conv", + "HeteroGATConv", "RGCNConv", "SAGEConv", "TransformerConv", diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py new file mode 100644 index 00000000000..3b717552a96 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py @@ -0,0 +1,265 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Union +from collections import defaultdict + +from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch.operators import mha_gat_n2n + +from .base import BaseConv + +torch = import_optional("torch") +torch_geometric = import_optional("torch_geometric") + + +class HeteroGATConv(BaseConv): + r"""The graph attentional operator on heterogeneous graphs, where a separate + `GATConv` is applied on the homogeneous graph for each edge type. Compared + with directly wrapping `GATConv`s with `HeteroConv`, `HeteroGATConv` fuses + all the linear transformation associated with each node type together into 1 + GEMM call, to improve the performance on GPUs. + + Parameters + ---------- + in_channels : int or Dict[str, int]) + Size of each input sample of every node type. + + out_channels : int + Size of each output sample. + + node_types : List[str] + List of Node types. + + edge_types : List[Tuple[str, str, str]] + List of Edge types. + + heads : int, optional (default=1) + Number of multi-head-attentions. + + concat : bool, optional (default=True): + If set to :obj:`False`, the multi-head attentions are averaged instead + of concatenated. + + negative_slope : float, optional (default=0.2) + LeakyReLU angle of the negative slope. + + bias : bool, optional (default=True) + If set to :obj:`False`, the layer will not learn an additive bias. + + aggr : str, optional (default="sum") + The aggregation scheme to use for grouping node embeddings generated by + different relations. Choose from "sum", "mean", "min", "max". + """ + + def __init__( + self, + in_channels: Union[int, dict[str, int]], + out_channels: int, + node_types: list[str], + edge_types: list[tuple[str, str, str]], + heads: int = 1, + concat: bool = True, + negative_slope: float = 0.2, + bias: bool = True, + aggr: str = "sum", + ): + major, minor, patch = torch_geometric.__version__.split(".")[:3] + pyg_version = tuple(map(int, [major, minor, patch])) + if pyg_version < (2, 4, 0): + raise RuntimeError(f"{self.__class__.__name__} requires pyg >= 2.4.0.") + + super().__init__() + + if isinstance(in_channels, int): + in_channels = dict.fromkeys(node_types, in_channels) + self.in_channels = in_channels + self.out_channels = out_channels + + self.node_types = node_types + self.edge_types = edge_types + self.num_heads = heads + self.concat_heads = concat + + self.negative_slope = negative_slope + self.aggr = aggr + + self.relations_per_ntype = defaultdict(lambda: ([], [])) + + lin_weights = dict.fromkeys(self.node_types) + attn_weights = dict.fromkeys(self.edge_types) + biases = dict.fromkeys(self.edge_types) + + ParameterDict = torch_geometric.nn.parameter_dict.ParameterDict + + for edge_type in self.edge_types: + src_type, _, dst_type = edge_type + self.relations_per_ntype[src_type][0].append(edge_type) + if src_type != dst_type: + self.relations_per_ntype[dst_type][1].append(edge_type) + + attn_weights[edge_type] = torch.empty( + 2 * self.num_heads * self.out_channels + ) + + if bias and concat: + biases[edge_type] = torch.empty(self.num_heads * out_channels) + elif bias: + biases[edge_type] = torch.empty(out_channels) + else: + biases[edge_type] = None + + for ntype in self.node_types: + n_src_rel = len(self.relations_per_ntype[ntype][0]) + n_dst_rel = len(self.relations_per_ntype[ntype][1]) + n_rel = n_src_rel + n_dst_rel + + lin_weights[ntype] = torch.empty( + (n_rel * self.num_heads * self.out_channels, self.in_channels[ntype]) + ) + + self.lin_weights = ParameterDict(lin_weights) + self.attn_weights = ParameterDict(attn_weights) + + if bias: + self.bias = ParameterDict(biases) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def split_tensors( + self, x_fused_dict: dict[str, torch.Tensor], dim: int + ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: + """Split fused tensors into chunks based on edge types. + + Parameters + ---------- + x_fused_dict : dict[str, torch.Tensor] + A dictionary to hold node feature for each node type. The key is + node type; the value is a fused tensor that account for all + relations for that node type. + + dim : int + Dimension along which to split the fused tensor. + + Returns + ------- + x_src_dict : dict[str, torch.Tensor] + A dictionary to hold source node feature for each relation graph. + + x_dst_dict : dict[str, torch.Tensor] + A dictionary to hold destination node feature for each relation graph. + """ + x_src_dict = dict.fromkeys(self.edge_types) + x_dst_dict = dict.fromkeys(self.edge_types) + + for ntype, t in x_fused_dict.items(): + n_src_rel = len(self.relations_per_ntype[ntype][0]) + n_dst_rel = len(self.relations_per_ntype[ntype][1]) + n_rel = n_src_rel + n_dst_rel + t_list = torch.chunk(t, chunks=n_rel, dim=dim) + + for i, src_rel in enumerate(self.relations_per_ntype[ntype][0]): + x_src_dict[src_rel] = t_list[i] + + for i, dst_rel in enumerate(self.relations_per_ntype[ntype][1]): + x_dst_dict[dst_rel] = t_list[i + n_src_rel] + + return x_src_dict, x_dst_dict + + def reset_parameters(self, seed: Optional[int] = None): + if seed is not None: + torch.manual_seed(seed) + + w_src, w_dst = self.split_tensors(self.lin_weights, dim=0) + + for edge_type in self.edge_types: + src_type, _, dst_type = edge_type + + # lin_src + torch_geometric.nn.inits.glorot(w_src[edge_type]) + + # lin_dst + if src_type != dst_type: + torch_geometric.nn.inits.glorot(w_dst[edge_type]) + + # attn_weights + torch_geometric.nn.inits.glorot( + self.attn_weights[edge_type].view(-1, self.num_heads, self.out_channels) + ) + + # bias + if self.bias is not None: + torch_geometric.nn.inits.zeros(self.bias[edge_type]) + + def forward( + self, + x_dict: dict[str, torch.Tensor], + edge_index_dict: dict[tuple[str, str, str], torch.Tensor], + ) -> dict[str, torch.Tensor]: + feat_dict = dict.fromkeys(x_dict.keys()) + + for ntype, x in x_dict.items(): + feat_dict[ntype] = x @ self.lin_weights[ntype].T + + x_src_dict, x_dst_dict = self.split_tensors(feat_dict, dim=1) + + out_dict = defaultdict(list) + + for edge_type, edge_index in edge_index_dict.items(): + src_type, _, dst_type = edge_type + + csc = BaseConv.to_csc( + edge_index, (x_dict[src_type].size(0), x_dict[dst_type].size(0)) + ) + + if src_type == dst_type: + graph = self.get_cugraph( + csc, + bipartite=False, + ) + out = mha_gat_n2n( + x_src_dict[edge_type], + self.attn_weights[edge_type], + graph, + num_heads=self.num_heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat_heads, + ) + + else: + graph = self.get_cugraph( + csc, + bipartite=True, + ) + out = mha_gat_n2n( + (x_src_dict[edge_type], x_dst_dict[edge_type]), + self.attn_weights[edge_type], + graph, + num_heads=self.num_heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat_heads, + ) + + if self.bias is not None: + out = out + self.bias[edge_type] + + out_dict[dst_type].append(out) + + for key, value in out_dict.items(): + out_dict[key] = torch_geometric.nn.conv.hetero_conv.group(value, self.aggr) + + return out_dict diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py index 1512901822a..30994289f9c 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py @@ -284,3 +284,32 @@ def basic_pyg_graph_2(): ) size = (10, 10) return edge_index, size + + +@pytest.fixture +def sample_pyg_hetero_data(): + torch.manual_seed(12345) + raw_data_dict = { + "v0": torch.randn(6, 3), + "v1": torch.randn(7, 2), + "v2": torch.randn(5, 4), + ("v2", "e0", "v1"): torch.tensor([[0, 2, 2, 4, 4], [4, 3, 6, 0, 1]]), + ("v1", "e1", "v1"): torch.tensor( + [[0, 2, 2, 2, 3, 5, 5], [4, 0, 4, 5, 3, 0, 1]] + ), + ("v0", "e2", "v0"): torch.tensor([[0, 2, 2, 3, 5, 5], [1, 1, 5, 1, 1, 2]]), + ("v1", "e3", "v2"): torch.tensor( + [[0, 1, 1, 2, 4, 5, 6], [1, 2, 3, 1, 2, 2, 2]] + ), + ("v0", "e4", "v2"): torch.tensor([[1, 1, 3, 3, 4, 4], [1, 4, 1, 4, 0, 3]]), + } + + # create a nested dictionary to facilitate PyG's HeteroData construction + hetero_data_dict = {} + for key, value in raw_data_dict.items(): + if isinstance(key, tuple): + hetero_data_dict[key] = {"edge_index": value} + else: + hetero_data_dict[key] = {"x": value} + + return hetero_data_dict diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py new file mode 100644 index 00000000000..1c841a17df7 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py @@ -0,0 +1,132 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from cugraph_pyg.nn import HeteroGATConv as CuGraphHeteroGATConv +from cugraph.utilities.utils import import_optional, MissingModule + +torch = import_optional("torch") +torch_geometric = import_optional("torch_geometric") + +ATOL = 1e-6 + + +@pytest.mark.cugraph_ops +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(torch_geometric, MissingModule), reason="torch_geometric not available" +) +@pytest.mark.parametrize("heads", [1, 3, 10]) +@pytest.mark.parametrize("aggr", ["sum", "mean"]) +def test_hetero_gat_conv_equality(sample_pyg_hetero_data, aggr, heads): + major, minor, patch = torch_geometric.__version__.split(".")[:3] + pyg_version = tuple(map(int, [major, minor, patch])) + if pyg_version < (2, 4, 0): + pytest.skip("Skipping HeteroGATConv test") + + from torch_geometric.data import HeteroData + from torch_geometric.nn import HeteroConv, GATConv + + device = torch.device("cuda:0") + data = HeteroData(sample_pyg_hetero_data).to(device) + + in_channels_dict = {k: v.size(1) for k, v in data.x_dict.items()} + out_channels = 2 + + convs_dict = {} + kwargs1 = dict(heads=heads, add_self_loops=False, bias=False) + for edge_type in data.edge_types: + src_t, _, dst_t = edge_type + in_channels_src, in_channels_dst = data.x_dict[src_t].size(-1), data.x_dict[ + dst_t + ].size(-1) + if src_t == dst_t: + convs_dict[edge_type] = GATConv(in_channels_src, out_channels, **kwargs1) + else: + convs_dict[edge_type] = GATConv( + (in_channels_src, in_channels_dst), out_channels, **kwargs1 + ) + + conv1 = HeteroConv(convs_dict, aggr=aggr).to(device) + kwargs2 = dict( + heads=heads, + aggr=aggr, + node_types=data.node_types, + edge_types=data.edge_types, + bias=False, + ) + conv2 = CuGraphHeteroGATConv(in_channels_dict, out_channels, **kwargs2).to(device) + + # copy over linear and attention weights + w_src, w_dst = conv2.split_tensors(conv2.lin_weights, dim=0) + with torch.no_grad(): + for edge_type in conv2.edge_types: + src_t, _, dst_t = edge_type + w_src[edge_type][:, :] = conv1.convs[edge_type].lin_src.weight[:, :] + if w_dst[edge_type] is not None: + w_dst[edge_type][:, :] = conv1.convs[edge_type].lin_dst.weight[:, :] + + conv2.attn_weights[edge_type][: heads * out_channels] = conv1.convs[ + edge_type + ].att_src.data.flatten() + conv2.attn_weights[edge_type][heads * out_channels :] = conv1.convs[ + edge_type + ].att_dst.data.flatten() + + out1 = conv1(data.x_dict, data.edge_index_dict) + out2 = conv2(data.x_dict, data.edge_index_dict) + + for node_type in data.node_types: + assert torch.allclose(out1[node_type], out2[node_type], atol=ATOL) + + loss1 = 0 + loss2 = 0 + for node_type in data.node_types: + loss1 += out1[node_type].mean() + loss2 += out2[node_type].mean() + + loss1.backward() + loss2.backward() + + # check gradient w.r.t attention weights + out_dim = heads * out_channels + for edge_type in conv2.edge_types: + assert torch.allclose( + conv1.convs[edge_type].att_src.grad.flatten(), + conv2.attn_weights[edge_type].grad[:out_dim], + atol=ATOL, + ) + assert torch.allclose( + conv1.convs[edge_type].att_dst.grad.flatten(), + conv2.attn_weights[edge_type].grad[out_dim:], + atol=ATOL, + ) + + # check gradient w.r.t linear weights + grad_lin_weights_ref = dict.fromkeys(out1.keys()) + for node_t, (rels_as_src, rels_as_dst) in conv2.relations_per_ntype.items(): + grad_list = [] + for rel_t in rels_as_src: + grad_list.append(conv1.convs[rel_t].lin_src.weight.grad.clone()) + for rel_t in rels_as_dst: + grad_list.append(conv1.convs[rel_t].lin_dst.weight.grad.clone()) + assert len(grad_list) > 0 + grad_lin_weights_ref[node_t] = torch.vstack(grad_list) + + for node_type in conv2.lin_weights: + assert torch.allclose( + grad_lin_weights_ref[node_type], + conv2.lin_weights[node_type].grad, + atol=ATOL, + ) From c637b337e97bd1cac926959dae4ab5669a5e693f Mon Sep 17 00:00:00 2001 From: Erik Welch Date: Tue, 12 Dec 2023 10:26:32 -0600 Subject: [PATCH 6/9] nx-cugraph: update usage of `nodes_or_number` for nx compat (#4028) These changes will be necessary when https://github.com/networkx/networkx/pull/7066 is merged. Authors: - Erik Welch (https://github.com/eriknw) Approvers: - Rick Ratzel (https://github.com/rlratzel) URL: https://github.com/rapidsai/cugraph/pull/4028 --- python/nx-cugraph/lint.yaml | 8 +++--- .../nx_cugraph/algorithms/__init__.py | 4 +-- .../algorithms/bipartite/generators.py | 5 ++-- .../nx-cugraph/nx_cugraph/algorithms/core.py | 6 ++++- .../nx_cugraph/generators/classic.py | 26 +++++++------------ .../nx-cugraph/nx_cugraph/utils/decorators.py | 13 +++++++++- 6 files changed, 34 insertions(+), 28 deletions(-) diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index a94aa9f0448..de6f20bc439 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -45,12 +45,12 @@ repos: - id: pyupgrade args: [--py39-plus] - repo: https://github.com/psf/black - rev: 23.10.1 + rev: 23.11.0 hooks: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 + rev: v0.1.7 hooks: - id: ruff args: [--fix-only, --show-fixes] # --unsafe-fixes] @@ -62,7 +62,7 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.1.0 - - flake8-bugbear==23.9.16 + - flake8-bugbear==23.12.2 - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa rev: v1.5.0 @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 + rev: v0.1.7 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 63841b15bd5..e4947491555 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -15,13 +15,13 @@ centrality, community, components, - shortest_paths, link_analysis, + shortest_paths, ) from .bipartite import complete_bipartite_graph from .centrality import * from .components import * from .core import * from .isolate import * -from .shortest_paths import * from .link_analysis import * +from .shortest_paths import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py index 1d3e762b4fd..25b9b39554b 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py @@ -17,15 +17,14 @@ import numpy as np from nx_cugraph.generators._utils import _create_using_class, _number_and_nodes -from nx_cugraph.utils import index_dtype, networkx_algorithm, nodes_or_number +from nx_cugraph.utils import index_dtype, networkx_algorithm __all__ = [ "complete_bipartite_graph", ] -@nodes_or_number([0, 1]) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=[0, 1]) def complete_bipartite_graph(n1, n2, create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index 2219388bc58..390598d070e 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -31,7 +31,11 @@ def k_truss(G, k): if is_nx := isinstance(G, nx.Graph): G = nxcg.from_networkx(G, preserve_all_attrs=True) if nxcg.number_of_selfloops(G) > 0: - raise nx.NetworkXError( + if nx.__version__[:3] <= "3.2": + exc_class = nx.NetworkXError + else: + exc_class = nx.NetworkXNotImplemented + raise exc_class( "Input graph has self loops which is not permitted; " "Consider using G.remove_edges_from(nx.selfloop_edges(G))." ) diff --git a/python/nx-cugraph/nx_cugraph/generators/classic.py b/python/nx-cugraph/nx_cugraph/generators/classic.py index b196c232320..4213e6dd2a0 100644 --- a/python/nx-cugraph/nx_cugraph/generators/classic.py +++ b/python/nx-cugraph/nx_cugraph/generators/classic.py @@ -19,7 +19,7 @@ import nx_cugraph as nxcg -from ..utils import _get_int_dtype, index_dtype, networkx_algorithm, nodes_or_number +from ..utils import _get_int_dtype, index_dtype, networkx_algorithm from ._utils import ( _IS_NX32_OR_LESS, _common_small_graph, @@ -86,8 +86,7 @@ def circular_ladder_graph(n, create_using=None): return _ladder_graph(n, create_using, is_circular=True) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def complete_graph(n, create_using=None): n, nodes = _number_and_nodes(n) if n < 3: @@ -143,8 +142,7 @@ def complete_multipartite_graph(*subset_sizes): ) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def cycle_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) @@ -174,8 +172,7 @@ def cycle_graph(n, create_using=None): return G -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def empty_graph(n=0, create_using=None, default=nx.Graph): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using, default=default) @@ -242,8 +239,7 @@ def ladder_graph(n, create_using=None): return _ladder_graph(n, create_using) -@nodes_or_number([0, 1]) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=[0, 1]) def lollipop_graph(m, n, create_using=None): # Like complete_graph then path_graph orig_m, unused_nodes_m = m @@ -283,8 +279,7 @@ def null_graph(create_using=None): return _common_small_graph(0, None, create_using) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def path_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) @@ -304,8 +299,7 @@ def path_graph(n, create_using=None): return G -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def star_graph(n, create_using=None): orig_n, orig_nodes = n n, nodes = _number_and_nodes(n) @@ -329,8 +323,7 @@ def star_graph(n, create_using=None): return G -@nodes_or_number([0, 1]) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=[0, 1]) def tadpole_graph(m, n, create_using=None): orig_m, unused_nodes_m = m orig_n, unused_nodes_n = n @@ -382,8 +375,7 @@ def turan_graph(n, r): return complete_multipartite_graph(*partitions) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def wheel_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) diff --git a/python/nx-cugraph/nx_cugraph/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py index 0048aee51bb..a0dbfcec890 100644 --- a/python/nx-cugraph/nx_cugraph/utils/decorators.py +++ b/python/nx-cugraph/nx_cugraph/utils/decorators.py @@ -15,6 +15,7 @@ from functools import partial, update_wrapper from textwrap import dedent +import networkx as nx from networkx.utils.decorators import nodes_or_number, not_implemented_for from nx_cugraph.interface import BackendInterface @@ -47,10 +48,18 @@ def __new__( *, name: str | None = None, extra_params: dict[str, str] | str | None = None, + nodes_or_number: list[int] | int | None = None, ): if func is None: - return partial(networkx_algorithm, name=name, extra_params=extra_params) + return partial( + networkx_algorithm, + name=name, + extra_params=extra_params, + nodes_or_number=nodes_or_number, + ) instance = object.__new__(cls) + if nodes_or_number is not None and nx.__version__[:3] > "3.2": + func = nx.utils.decorators.nodes_or_number(nodes_or_number)(func) # update_wrapper sets __wrapped__, which will be used for the signature update_wrapper(instance, func) instance.__defaults__ = func.__defaults__ @@ -76,6 +85,8 @@ def __new__( setattr(BackendInterface, instance.name, instance) # Set methods so they are in __dict__ instance._can_run = instance._can_run + if nodes_or_number is not None and nx.__version__[:3] <= "3.2": + instance = nx.utils.decorators.nodes_or_number(nodes_or_number)(instance) return instance def _can_run(self, func): From 7fe7beab548d66053916fde070d6f6cb58ed2339 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 13 Dec 2023 03:41:31 +0900 Subject: [PATCH 7/9] Replace graph_view.hpp::number_of_edges with compute_number_of_edges (#4026) Replace graph_view.hpp::number_of_edges (deprecated, throws an exception if an edge mask is attached to the graph view object) with compute_number_of_edges (this function works with or without edge mask) Authors: - Seunghwa Kang (https://github.com/seunghwak) - Naim (https://github.com/naimnv) Approvers: - Joseph Nke (https://github.com/jnke2016) - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) URL: https://github.com/rapidsai/cugraph/pull/4026 --- cpp/include/cugraph/graph.hpp | 47 ++-- cpp/include/cugraph/graph_view.hpp | 136 +++++----- ...v_transform_reduce_incoming_outgoing_e.cuh | 9 +- .../prims/update_edge_src_dst_property.cuh | 3 +- cpp/src/structure/graph_impl.cuh | 24 +- cpp/src/structure/graph_view_impl.cuh | 237 +++++++++--------- .../traversal/od_shortest_distances_impl.cuh | 4 +- cpp/src/traversal/sssp_impl.cuh | 2 +- cpp/src/utilities/cugraph_ops_utils.hpp | 2 +- cpp/tests/link_analysis/hits_test.cpp | 2 +- ...er_v_random_select_transform_outgoing_e.cu | 4 +- 11 files changed, 237 insertions(+), 233 deletions(-) diff --git a/cpp/include/cugraph/graph.hpp b/cpp/include/cugraph/graph.hpp index 60b9f1a4054..a723fde24df 100644 --- a/cpp/include/cugraph/graph.hpp +++ b/cpp/include/cugraph/graph.hpp @@ -90,24 +90,25 @@ class graph_t meta, bool do_expensive_check = false); + edge_t number_of_edges() const { return this->number_of_edges_; } + graph_view_t view() const { - std::vector offsets(edge_partition_offsets_.size(), nullptr); - std::vector indices(edge_partition_indices_.size(), nullptr); - auto dcs_nzd_vertices = edge_partition_dcs_nzd_vertices_ - ? std::make_optional>( - (*edge_partition_dcs_nzd_vertices_).size(), nullptr) - : std::nullopt; - auto dcs_nzd_vertex_counts = edge_partition_dcs_nzd_vertex_counts_ - ? std::make_optional>( - (*edge_partition_dcs_nzd_vertex_counts_).size(), vertex_t{0}) - : std::nullopt; + std::vector> offsets(edge_partition_offsets_.size()); + std::vector> indices(edge_partition_indices_.size()); + auto dcs_nzd_vertices = edge_partition_dcs_nzd_vertices_ + ? std::make_optional>>( + (*edge_partition_dcs_nzd_vertices_).size()) + : std::nullopt; for (size_t i = 0; i < offsets.size(); ++i) { - offsets[i] = edge_partition_offsets_[i].data(); - indices[i] = edge_partition_indices_[i].data(); + offsets[i] = raft::device_span(edge_partition_offsets_[i].data(), + edge_partition_offsets_[i].size()); + indices[i] = raft::device_span(edge_partition_indices_[i].data(), + edge_partition_indices_[i].size()); if (dcs_nzd_vertices) { - (*dcs_nzd_vertices)[i] = (*edge_partition_dcs_nzd_vertices_)[i].data(); - (*dcs_nzd_vertex_counts)[i] = (*edge_partition_dcs_nzd_vertex_counts_)[i]; + (*dcs_nzd_vertices)[i] = + raft::device_span((*edge_partition_dcs_nzd_vertices_)[i].data(), + (*edge_partition_dcs_nzd_vertices_)[i].size()); } } @@ -196,15 +197,13 @@ class graph_t( - *(this->handle_ptr()), offsets, indices, dcs_nzd_vertices, - dcs_nzd_vertex_counts, graph_view_meta_t{ this->number_of_vertices(), this->number_of_edges(), - this->graph_properties(), + this->properties_, partition_, edge_partition_segment_offsets_, local_sorted_unique_edge_srcs, @@ -224,7 +223,6 @@ class graph_t>> edge_partition_dcs_nzd_vertices_{ std::nullopt}; - std::optional> edge_partition_dcs_nzd_vertex_counts_{std::nullopt}; partition_t partition_{}; // segment offsets within the vertex partition based on vertex degree @@ -283,16 +281,15 @@ class graph_t meta, bool do_expensive_check = false); + edge_t number_of_edges() const { return this->number_of_edges_; } + graph_view_t view() const { return graph_view_t( - *(this->handle_ptr()), - offsets_.data(), - indices_.data(), - graph_view_meta_t{this->number_of_vertices(), - this->number_of_edges(), - this->graph_properties(), - segment_offsets_}); + raft::device_span(offsets_.data(), offsets_.size()), + raft::device_span(indices_.data(), indices_.size()), + graph_view_meta_t{ + this->number_of_vertices(), this->number_of_edges(), this->properties_, segment_offsets_}); } private: diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index d79d4635c54..53c66c6483e 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -258,21 +258,12 @@ class graph_base_t { public: graph_base_t() = default; - graph_base_t(raft::handle_t const& handle, - vertex_t number_of_vertices, - edge_t number_of_edges, - graph_properties_t properties) - : handle_ptr_(&handle), - number_of_vertices_(number_of_vertices), + graph_base_t(vertex_t number_of_vertices, edge_t number_of_edges, graph_properties_t properties) + : number_of_vertices_(number_of_vertices), number_of_edges_(number_of_edges), properties_(properties){}; vertex_t number_of_vertices() const { return number_of_vertices_; } - edge_t number_of_edges() const - { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - return number_of_edges_; - } template std::enable_if_t::value, bool> is_valid_vertex(vertex_type v) const @@ -289,33 +280,12 @@ class graph_base_t { bool is_symmetric() const { return properties_.is_symmetric; } bool is_multigraph() const { return properties_.is_multigraph; } - void attach_edge_mask(edge_property_view_t edge_mask_view) - { - edge_mask_view_ = edge_mask_view; - } - - void clear_edge_mask() { edge_mask_view_ = std::nullopt; } - - bool has_edge_mask() const { return edge_mask_view_.has_value(); } - - std::optional> edge_mask_view() const - { - return edge_mask_view_; - } - protected: - raft::handle_t const* handle_ptr() const { return handle_ptr_; }; - graph_properties_t graph_properties() const { return properties_; } - - private: - raft::handle_t const* handle_ptr_{nullptr}; - - vertex_t number_of_vertices_{0}; edge_t number_of_edges_{0}; - graph_properties_t properties_{}; - std::optional> edge_mask_view_{std::nullopt}; + private: + vertex_t number_of_vertices_{0}; }; } // namespace detail @@ -405,11 +375,10 @@ class graph_view_t const& edge_partition_offsets, - std::vector const& edge_partition_indices, - std::optional> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, + graph_view_t(std::vector> const& edge_partition_offsets, + std::vector> const& edge_partition_indices, + std::optional>> const& + edge_partition_dcs_nzd_vertices, graph_view_meta_t meta); std::vector vertex_partition_range_offsets() const @@ -624,25 +593,16 @@ class graph_view_tlocal_edge_partition_src_value_start_offset(partition_idx); } std::optional major_hypersparse_first{std::nullopt}; - vertex_t offset_size = (major_range_last - major_range_first) + 1; if (this->use_dcs()) { major_hypersparse_first = major_range_first + (*(this->local_edge_partition_segment_offsets( partition_idx)))[detail::num_sparse_segments_per_vertex_partition]; - offset_size = ((*major_hypersparse_first) - major_range_first) + - (*edge_partition_dcs_nzd_vertex_counts_)[partition_idx] + 1; } return edge_partition_view_t( - raft::device_span(edge_partition_offsets_[partition_idx], - edge_partition_offsets_[partition_idx] + offset_size), - raft::device_span( - edge_partition_indices_[partition_idx], - edge_partition_indices_[partition_idx] + edge_partition_number_of_edges_[partition_idx]), + edge_partition_offsets_[partition_idx], + edge_partition_indices_[partition_idx], edge_partition_dcs_nzd_vertices_ - ? std::make_optional>( - (*edge_partition_dcs_nzd_vertices_)[partition_idx], - (*edge_partition_dcs_nzd_vertices_)[partition_idx] + - (*edge_partition_dcs_nzd_vertex_counts_)[partition_idx]) + ? std::make_optional((*edge_partition_dcs_nzd_vertices_)[partition_idx]) : std::nullopt, major_hypersparse_first, major_range_first, @@ -652,6 +612,16 @@ class graph_view_thas_edge_mask()), "unimplemented."); + return this->number_of_edges_; + } + + edge_t compute_number_of_edges(raft::handle_t const& handle) const; + rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; @@ -751,15 +721,26 @@ class graph_view_t edge_mask_view) + { + edge_mask_view_ = edge_mask_view; + } + + void clear_edge_mask() { edge_mask_view_ = std::nullopt; } + + bool has_edge_mask() const { return edge_mask_view_.has_value(); } + + std::optional> edge_mask_view() const + { + return edge_mask_view_; + } + private: - std::vector edge_partition_offsets_{}; - std::vector edge_partition_indices_{}; + std::vector> edge_partition_offsets_{}; + std::vector> edge_partition_indices_{}; // relevant only if we use the CSR + DCSR (or CSC + DCSC) hybrid format - std::optional> edge_partition_dcs_nzd_vertices_{}; - std::optional> edge_partition_dcs_nzd_vertex_counts_{}; - - std::vector edge_partition_number_of_edges_{}; + std::optional>> edge_partition_dcs_nzd_vertices_{}; partition_t partition_{}; @@ -796,6 +777,8 @@ class graph_view_t>, std::optional /* dummy */> local_sorted_unique_edge_dst_vertex_partition_offsets_{std::nullopt}; + + std::optional> edge_mask_view_{std::nullopt}; }; // single-GPU version @@ -808,9 +791,8 @@ class graph_view_t offsets, + raft::device_span indices, graph_view_meta_t meta); std::vector vertex_partition_range_offsets() const @@ -924,11 +906,19 @@ class graph_view_t( - raft::device_span(offsets_, offsets_ + (this->number_of_vertices() + 1)), - raft::device_span(indices_, indices_ + this->number_of_edges()), - this->number_of_vertices()); + offsets_, indices_, this->number_of_vertices()); + } + + // FIXME: deprecated, replaced with copmute_number_of_edges (which works with or without edge + // masking) + edge_t number_of_edges() const + { + CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); + return this->number_of_edges_; } + edge_t compute_number_of_edges(raft::handle_t const& handle) const; + rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; @@ -1016,12 +1006,28 @@ class graph_view_t edge_mask_view) + { + edge_mask_view_ = edge_mask_view; + } + + void clear_edge_mask() { edge_mask_view_ = std::nullopt; } + + bool has_edge_mask() const { return edge_mask_view_.has_value(); } + + std::optional> edge_mask_view() const + { + return edge_mask_view_; + } + private: - edge_t const* offsets_{nullptr}; - vertex_t const* indices_{nullptr}; + raft::device_span offsets_{}; + raft::device_span indices_{}; // segment offsets based on vertex degree, relevant only if vertex IDs are renumbered std::optional> segment_offsets_{std::nullopt}; + + std::optional> edge_mask_view_{std::nullopt}; }; } // namespace cugraph diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 5de2dbc18e2..1a7fc0130c4 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -593,10 +593,11 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, value_size = sizeof(T); } - auto avg_vertex_degree = graph_view.number_of_vertices() > 0 - ? (static_cast(graph_view.number_of_edges()) / - static_cast(graph_view.number_of_vertices())) - : double{0.0}; + auto avg_vertex_degree = + graph_view.number_of_vertices() > 0 + ? (static_cast(graph_view.compute_number_of_edges(handle)) / + static_cast(graph_view.number_of_vertices())) + : double{0.0}; num_streams = std::min(static_cast(avg_vertex_degree * (static_cast(sizeof(vertex_t)) / diff --git a/cpp/src/prims/update_edge_src_dst_property.cuh b/cpp/src/prims/update_edge_src_dst_property.cuh index 18bdf5bcf2d..0c7058cccb4 100644 --- a/cpp/src/prims/update_edge_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_src_dst_property.cuh @@ -479,7 +479,8 @@ void update_edge_minor_property(raft::handle_t const& handle, bcast_size *= sizeof(typename EdgeMinorPropertyOutputWrapper::value_type); } auto num_concurrent_bcasts = - (static_cast(graph_view.number_of_edges() / comm_size) * sizeof(vertex_t)) / + (static_cast(graph_view.compute_number_of_edges(handle) / comm_size) * + sizeof(vertex_t)) / std::max(bcast_size, size_t{1}); num_concurrent_bcasts = std::max(num_concurrent_bcasts, size_t{1}); num_concurrent_bcasts = std::min(num_concurrent_bcasts, static_cast(major_comm_size)); diff --git a/cpp/src/structure/graph_impl.cuh b/cpp/src/structure/graph_impl.cuh index 75862266789..6568b5e3b9e 100644 --- a/cpp/src/structure/graph_impl.cuh +++ b/cpp/src/structure/graph_impl.cuh @@ -133,8 +133,7 @@ update_local_sorted_unique_edge_majors_minors( graph_meta_t const& meta, std::vector> const& edge_partition_offsets, std::vector> const& edge_partition_indices, - std::optional>> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts) + std::optional>> const& edge_partition_dcs_nzd_vertices) { auto& comm = handle.get_comms(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -341,8 +340,7 @@ update_local_sorted_unique_edge_majors_minors( if (use_dcs) { thrust::copy(handle.get_thrust_policy(), (*edge_partition_dcs_nzd_vertices)[i].begin(), - (*edge_partition_dcs_nzd_vertices)[i].begin() + - (*edge_partition_dcs_nzd_vertex_counts)[i], + (*edge_partition_dcs_nzd_vertices)[i].end(), unique_edge_majors.begin() + cur_size); } @@ -390,7 +388,7 @@ graph_t meta, bool do_expensive_check) : detail::graph_base_t( - handle, meta.number_of_vertices, meta.number_of_edges, meta.properties), + meta.number_of_vertices, meta.number_of_edges, meta.properties), partition_(meta.partition) { CUGRAPH_EXPECTS( @@ -408,14 +406,6 @@ graph_t((*edge_partition_dcs_nzd_vertices_).size()); - for (size_t i = 0; i < (*edge_partition_dcs_nzd_vertex_counts_).size(); ++i) { - (*edge_partition_dcs_nzd_vertex_counts_)[i] = - static_cast((*edge_partition_dcs_nzd_vertices_)[i].size()); - } - } // update local sorted unique edge sources/destinations (only if key, value pair will be used) @@ -432,8 +422,7 @@ graph_t meta, bool do_expensive_check) : detail::graph_base_t( - handle, meta.number_of_vertices, static_cast(indices.size()), meta.properties), + meta.number_of_vertices, static_cast(indices.size()), meta.properties), offsets_(std::move(offsets)), indices_(std::move(indices)), segment_offsets_(meta.segment_offsets) diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index a40747f13f7..da0ecc991df 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -72,44 +72,15 @@ struct out_of_range_t { __device__ bool operator()(vertex_t v) const { return (v < min) || (v >= max); } }; -template -std::vector update_edge_partition_edge_counts( - std::vector const& edge_partition_offsets, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, - partition_t const& partition, - std::vector const& edge_partition_segment_offsets, - cudaStream_t stream) -{ - std::vector edge_partition_edge_counts(partition.number_of_local_edge_partitions(), 0); - auto use_dcs = edge_partition_dcs_nzd_vertex_counts.has_value(); - for (size_t i = 0; i < edge_partition_offsets.size(); ++i) { - auto [major_range_first, major_range_last] = partition.local_edge_partition_major_range(i); - auto segment_offset_size_per_partition = - edge_partition_segment_offsets.size() / edge_partition_offsets.size(); - raft::update_host( - &(edge_partition_edge_counts[i]), - edge_partition_offsets[i] + - (use_dcs - ? (edge_partition_segment_offsets[segment_offset_size_per_partition * i + - detail::num_sparse_segments_per_vertex_partition] + - (*edge_partition_dcs_nzd_vertex_counts)[i]) - : (major_range_last - major_range_first)), - 1, - stream); - } - RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); - return edge_partition_edge_counts; -} - // compute out-degrees (if we are internally storing edges in the sparse 2D matrix using sources as // major indices) or in-degrees (otherwise) template rmm::device_uvector compute_major_degrees( raft::handle_t const& handle, - std::vector const& edge_partition_offsets, - std::optional> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, - std::optional> const& edge_partition_masks, + std::vector> const& edge_partition_offsets, + std::optional>> const& + edge_partition_dcs_nzd_vertices, + std::optional>> const& edge_partition_masks, partition_t const& partition, std::vector const& edge_partition_segment_offsets) { @@ -165,35 +136,35 @@ rmm::device_uvector compute_major_degrees( auto local_degree = offsets[i + 1] - offsets[i]; if (masks) { local_degree = static_cast( - detail::count_set_bits(*masks, offsets[i], local_degree)); + detail::count_set_bits((*masks).begin(), offsets[i], local_degree)); } return local_degree; })); if (use_dcs) { - auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; - auto dcs_nzd_vertex_count = (*edge_partition_dcs_nzd_vertex_counts)[i]; + auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; thrust::fill(execution_policy, local_degrees.begin() + (major_hypersparse_first - major_range_first), local_degrees.begin() + (major_range_last - major_range_first), edge_t{0}); - thrust::for_each(execution_policy, - thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(dcs_nzd_vertex_count), - [offsets, - dcs_nzd_vertices, - masks, - major_range_first, - major_hypersparse_first, - local_degrees = local_degrees.data()] __device__(auto i) { - auto major_idx = (major_hypersparse_first - major_range_first) + i; - auto local_degree = offsets[major_idx + 1] - offsets[major_idx]; - if (masks) { - local_degree = static_cast( - detail::count_set_bits(*masks, offsets[major_idx], local_degree)); - } - auto v = dcs_nzd_vertices[i]; - local_degrees[v - major_range_first] = local_degree; - }); + thrust::for_each( + execution_policy, + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(static_cast(dcs_nzd_vertices.size())), + [offsets, + dcs_nzd_vertices, + masks, + major_range_first, + major_hypersparse_first, + local_degrees = local_degrees.data()] __device__(auto i) { + auto major_idx = (major_hypersparse_first - major_range_first) + i; + auto local_degree = offsets[major_idx + 1] - offsets[major_idx]; + if (masks) { + local_degree = static_cast( + detail::count_set_bits((*masks).begin(), offsets[major_idx], local_degree)); + } + auto v = dcs_nzd_vertices[i]; + local_degrees[v - major_range_first] = local_degree; + }); } minor_comm.reduce(local_degrees.data(), i == minor_comm_rank ? degrees.data() : static_cast(nullptr), @@ -209,10 +180,11 @@ rmm::device_uvector compute_major_degrees( // compute out-degrees (if we are internally storing edges in the sparse 2D matrix using sources as // major indices) or in-degrees (otherwise) template -rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, - edge_t const* offsets, - std::optional masks, - vertex_t number_of_vertices) +rmm::device_uvector compute_major_degrees( + raft::handle_t const& handle, + raft::device_span offsets, + std::optional> masks, + vertex_t number_of_vertices) { rmm::device_uvector degrees(number_of_vertices, handle.get_stream()); thrust::tabulate( @@ -223,7 +195,7 @@ rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, auto local_degree = offsets[i + 1] - offsets[i]; if (masks) { local_degree = - static_cast(detail::count_set_bits(*masks, offsets[i], local_degree)); + static_cast(detail::count_set_bits((*masks).begin(), offsets[i], local_degree)); } return local_degree; }); @@ -446,24 +418,16 @@ edge_t count_edge_partition_multi_edges( template graph_view_t>:: - graph_view_t(raft::handle_t const& handle, - std::vector const& edge_partition_offsets, - std::vector const& edge_partition_indices, - std::optional> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, + graph_view_t(std::vector> const& edge_partition_offsets, + std::vector> const& edge_partition_indices, + std::optional>> const& + edge_partition_dcs_nzd_vertices, graph_view_meta_t meta) : detail::graph_base_t( - handle, meta.number_of_vertices, meta.number_of_edges, meta.properties), + meta.number_of_vertices, meta.number_of_edges, meta.properties), edge_partition_offsets_(edge_partition_offsets), edge_partition_indices_(edge_partition_indices), edge_partition_dcs_nzd_vertices_(edge_partition_dcs_nzd_vertices), - edge_partition_dcs_nzd_vertex_counts_(edge_partition_dcs_nzd_vertex_counts), - edge_partition_number_of_edges_( - update_edge_partition_edge_counts(edge_partition_offsets, - edge_partition_dcs_nzd_vertex_counts, - meta.partition, - meta.edge_partition_segment_offsets, - handle.get_stream())), partition_(meta.partition), edge_partition_segment_offsets_(meta.edge_partition_segment_offsets), local_sorted_unique_edge_srcs_(meta.local_sorted_unique_edge_srcs), @@ -481,51 +445,42 @@ graph_view_thandle_ptr()->get_subcomm(cugraph::partition_manager::minor_comm_name()).get_size(); - auto use_dcs = edge_partition_dcs_nzd_vertices.has_value(); CUGRAPH_EXPECTS(edge_partition_offsets.size() == edge_partition_indices.size(), "Internal Error: edge_partition_offsets.size() and " "edge_partition_indices.size() should coincide."); - CUGRAPH_EXPECTS(edge_partition_dcs_nzd_vertex_counts.has_value() == use_dcs, - "edge_partition_dcs_nzd_vertices.has_value() and " - "edge_partition_dcs_nzd_vertex_counts.has_value() should coincide"); - CUGRAPH_EXPECTS(!use_dcs || ((*edge_partition_dcs_nzd_vertices).size() == - (*edge_partition_dcs_nzd_vertex_counts).size()), - "Internal Error: edge_partition_dcs_nzd_vertices.size() and " - "edge_partition_dcs_nzd_vertex_counts.size() should coincide (if used)."); CUGRAPH_EXPECTS( !use_dcs || ((*edge_partition_dcs_nzd_vertices).size() == edge_partition_offsets.size()), "Internal Error: edge_partition_dcs_nzd_vertices.size() should coincide " "with edge_partition_offsets.size() (if used)."); - CUGRAPH_EXPECTS(edge_partition_offsets.size() == static_cast(minor_comm_size), - "Internal Error: erroneous edge_partition_offsets.size()."); - - CUGRAPH_EXPECTS( - meta.edge_partition_segment_offsets.size() == - minor_comm_size * (detail::num_sparse_segments_per_vertex_partition + (use_dcs ? 3 : 2)), - "Internal Error: invalid edge_partition_segment_offsets.size()."); + CUGRAPH_EXPECTS(meta.edge_partition_segment_offsets.size() == + edge_partition_offsets.size() * + (detail::num_sparse_segments_per_vertex_partition + (use_dcs ? 3 : 2)), + "Internal Error: invalid edge_partition_segment_offsets.size()."); // skip expensive error checks as this function is only called by graph_t } template graph_view_t>:: - graph_view_t(raft::handle_t const& handle, - edge_t const* offsets, - vertex_t const* indices, + graph_view_t(raft::device_span offsets, + raft::device_span indices, graph_view_meta_t meta) : detail::graph_base_t( - handle, meta.number_of_vertices, meta.number_of_edges, meta.properties), + meta.number_of_vertices, meta.number_of_edges, meta.properties), offsets_(offsets), indices_(indices), segment_offsets_(meta.segment_offsets) { // cheap error checks + CUGRAPH_EXPECTS(offsets.size() == static_cast(meta.number_of_vertices + 1), + "Internal Error: offsets.size() returns an invalid value."); + CUGRAPH_EXPECTS(indices.size() == static_cast(meta.number_of_edges), + "Internal Error: indices.size() returns an invalid value."); + CUGRAPH_EXPECTS( !(meta.segment_offsets).has_value() || ((*(meta.segment_offsets)).size() == (detail::num_sparse_segments_per_vertex_partition + 2)), @@ -534,19 +489,62 @@ graph_view_t +edge_t graph_view_t>:: + compute_number_of_edges(raft::handle_t const& handle) const +{ + if (this->has_edge_mask()) { + edge_t ret{}; + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + for (size_t i = 0; i < value_firsts.size(); ++i) { + ret += static_cast(detail::count_set_bits(handle, value_firsts[i], edge_counts[i])); + } + ret = + host_scalar_allreduce(handle.get_comms(), ret, raft::comms::op_t::SUM, handle.get_stream()); + return ret; + } else { + return this->number_of_edges_; + } +} + +template +edge_t graph_view_t>:: + compute_number_of_edges(raft::handle_t const& handle) const +{ + if (this->has_edge_mask()) { + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + assert(value_firsts.size() == 0); + assert(edge_counts.size() == 0); + return static_cast(detail::count_set_bits(handle, value_firsts[0], edge_counts[0])); + } else { + return this->number_of_edges_; + } +} + template rmm::device_uvector graph_view_t>:: compute_in_degrees(raft::handle_t const& handle) const { if (store_transposed) { + std::optional>> edge_partition_masks{ + std::nullopt}; + if (this->has_edge_mask()) { + edge_partition_masks = + std::vector>(this->edge_partition_offsets_.size()); + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + for (size_t i = 0; i < (*edge_partition_masks).size(); ++i) { + (*edge_partition_masks)[i] = + raft::device_span(value_firsts[i], edge_counts[i]); + } + } return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, - this->edge_partition_dcs_nzd_vertex_counts_, - this->has_edge_mask() - ? std::make_optional((*(this->edge_mask_view())).value_firsts()) - : std::nullopt, + edge_partition_masks, this->partition_, this->edge_partition_segment_offsets_); } else { @@ -561,12 +559,14 @@ graph_view_toffsets_, - this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) - : std::nullopt, - this->local_vertex_partition_range_size()); + return compute_major_degrees(handle, + this->offsets_, + this->has_edge_mask() + ? std::make_optional(raft::device_span( + (*(this->edge_mask_view())).value_firsts()[0], + (*(this->edge_mask_view())).edge_counts()[0])) + : std::nullopt, + this->local_vertex_partition_range_size()); } else { CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); @@ -582,13 +582,22 @@ graph_view_thas_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } else { + std::optional>> edge_partition_masks{ + std::nullopt}; + if (this->has_edge_mask()) { + edge_partition_masks = + std::vector>(this->edge_partition_offsets_.size()); + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + for (size_t i = 0; i < (*edge_partition_masks).size(); ++i) { + (*edge_partition_masks)[i] = + raft::device_span(value_firsts[i], edge_counts[i]); + } + } return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, - this->edge_partition_dcs_nzd_vertex_counts_, - this->has_edge_mask() - ? std::make_optional((*(this->edge_mask_view())).value_firsts()) - : std::nullopt, + edge_partition_masks, this->partition_, this->edge_partition_segment_offsets_); } @@ -603,12 +612,14 @@ graph_view_thas_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } else { - return compute_major_degrees( - handle, - this->offsets_, - this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) - : std::nullopt, - this->local_vertex_partition_range_size()); + return compute_major_degrees(handle, + this->offsets_, + this->has_edge_mask() + ? std::make_optional(raft::device_span( + (*(this->edge_mask_view())).value_firsts()[0], + (*(this->edge_mask_view())).edge_counts()[0])) + : std::nullopt, + this->local_vertex_partition_range_size()); } } diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index 6a0c5a4a675..cc69cb5f67f 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -432,7 +432,7 @@ rmm::device_uvector od_shortest_distances( // 1. check input arguments auto const num_vertices = graph_view.number_of_vertices(); - auto const num_edges = graph_view.number_of_edges(); + auto const num_edges = graph_view.compute_number_of_edges(handle); CUGRAPH_EXPECTS(num_vertices != 0 || (origins.size() == 0 && destinations.size() == 0), "Invalid input argument: the input graph is empty but origins.size() > 0 or " @@ -1049,7 +1049,7 @@ rmm::device_uvector od_shortest_distances( CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); auto const num_vertices = graph_view.number_of_vertices(); - auto const num_edges = graph_view.number_of_edges(); + auto const num_edges = graph_view.compute_number_of_edges(handle); weight_t average_vertex_degree = static_cast(num_edges) / static_cast(num_vertices); diff --git a/cpp/src/traversal/sssp_impl.cuh b/cpp/src/traversal/sssp_impl.cuh index c78fa3839e2..5a6d536c6f5 100644 --- a/cpp/src/traversal/sssp_impl.cuh +++ b/cpp/src/traversal/sssp_impl.cuh @@ -93,7 +93,7 @@ void sssp(raft::handle_t const& handle, "GraphViewType should support the push model."); auto const num_vertices = push_graph_view.number_of_vertices(); - auto const num_edges = push_graph_view.number_of_edges(); + auto const num_edges = push_graph_view.compute_number_of_edges(handle); if (num_vertices == 0) { return; } // implements the Near-Far Pile method in diff --git a/cpp/src/utilities/cugraph_ops_utils.hpp b/cpp/src/utilities/cugraph_ops_utils.hpp index 9aea4183866..880a2c8d104 100644 --- a/cpp/src/utilities/cugraph_ops_utils.hpp +++ b/cpp/src/utilities/cugraph_ops_utils.hpp @@ -30,7 +30,7 @@ ops::graph::csc get_graph( ops::graph::csc graph; graph.n_src_nodes = gview.number_of_vertices(); graph.n_dst_nodes = gview.number_of_vertices(); - graph.n_indices = gview.number_of_edges(); + graph.n_indices = gview.local_edge_partition_view().number_of_edges(); // FIXME this is sufficient for now, but if there is a fast (cached) way // of getting max degree, use that instead graph.dst_max_in_degree = std::numeric_limits::max(); diff --git a/cpp/tests/link_analysis/hits_test.cpp b/cpp/tests/link_analysis/hits_test.cpp index 6796761e212..cf35356bb76 100644 --- a/cpp/tests/link_analysis/hits_test.cpp +++ b/cpp/tests/link_analysis/hits_test.cpp @@ -229,7 +229,7 @@ class Tests_Hits : public ::testing::TestWithParam sg_indices(sg_graph_view.number_of_edges(), - handle_->get_stream()); + rmm::device_uvector sg_indices( + sg_graph_view.local_edge_partition_view().indices().size(), handle_->get_stream()); thrust::copy(handle_->get_thrust_policy(), sg_graph_view.local_edge_partition_view().indices().begin(), sg_graph_view.local_edge_partition_view().indices().end(), From 23e70a6f2c81416736ec4d9107088c2b5a4788cb Mon Sep 17 00:00:00 2001 From: Naim <110031745+naimnv@users.noreply.github.com> Date: Tue, 12 Dec 2023 19:42:58 +0100 Subject: [PATCH 8/9] Test select_random_vertices for all possible values of flags (#4042) Test `select_random_vertices` for all possible values of flags. Authors: - Naim (https://github.com/naimnv) Approvers: - Seunghwa Kang (https://github.com/seunghwak) - Chuck Hastings (https://github.com/ChuckHastings) URL: https://github.com/rapidsai/cugraph/pull/4042 --- .../mg_select_random_vertices_test.cpp | 155 ++++++++++-------- 1 file changed, 86 insertions(+), 69 deletions(-) diff --git a/cpp/tests/structure/mg_select_random_vertices_test.cpp b/cpp/tests/structure/mg_select_random_vertices_test.cpp index e49e1ebcb99..8392a6831ca 100644 --- a/cpp/tests/structure/mg_select_random_vertices_test.cpp +++ b/cpp/tests/structure/mg_select_random_vertices_test.cpp @@ -79,6 +79,8 @@ class Tests_MGSelectRandomVertices // std::vector with_replacement_flags = {true, false}; + std::vector sort_vertices_flags = {true, false}; + { // Generate distributed vertex set to sample from std::srand((unsigned)std::chrono::duration_cast( @@ -107,80 +109,95 @@ class Tests_MGSelectRandomVertices ? select_random_vertices_usecase.select_count : std::rand() % (num_of_elements_in_given_set + 1); - for (int idx = 0; idx < with_replacement_flags.size(); idx++) { - bool with_replacement = with_replacement_flags[idx]; - auto d_sampled_vertices = - cugraph::select_random_vertices(*handle_, - mg_graph_view, - std::make_optional(raft::device_span{ - d_given_set.data(), d_given_set.size()}), - rng_state, - select_count, - with_replacement, - true); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); - - if (select_random_vertices_usecase.check_correctness) { - if (!with_replacement) { - std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); - - auto nr_duplicates = - std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), - h_sampled_vertices.end()); - - ASSERT_EQ(nr_duplicates, 0); + for (int i = 0; i < with_replacement_flags.size(); i++) { + for (int j = 0; j < sort_vertices_flags.size(); j++) { + bool with_replacement = with_replacement_flags[i]; + bool sort_vertices = sort_vertices_flags[j]; + + auto d_sampled_vertices = + cugraph::select_random_vertices(*handle_, + mg_graph_view, + std::make_optional(raft::device_span{ + d_given_set.data(), d_given_set.size()}), + rng_state, + select_count, + with_replacement, + sort_vertices); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); + + if (select_random_vertices_usecase.check_correctness) { + if (!with_replacement) { + std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); + + auto nr_duplicates = + std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), + h_sampled_vertices.end()); + + ASSERT_EQ(nr_duplicates, 0); + } + + std::sort(h_given_set.begin(), h_given_set.end()); + if (sort_vertices) { + assert(std::is_sorted(h_sampled_vertices.begin(), h_sampled_vertices.end())); + } else { + std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); + } + std::for_each( + h_sampled_vertices.begin(), h_sampled_vertices.end(), [&h_given_set](vertex_t v) { + ASSERT_TRUE(std::binary_search(h_given_set.begin(), h_given_set.end(), v)); + }); } - - std::sort(h_given_set.begin(), h_given_set.end()); - std::for_each( - h_sampled_vertices.begin(), h_sampled_vertices.end(), [&h_given_set](vertex_t v) { - ASSERT_TRUE(std::binary_search(h_given_set.begin(), h_given_set.end(), v)); - }); } } - } - - // - // Test sampling from [0, V) - // - - for (int idx = 0; idx < with_replacement_flags.size(); idx++) { - bool with_replacement = false; - auto d_sampled_vertices = cugraph::select_random_vertices( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - rng_state, - select_random_vertices_usecase.select_count, - with_replacement, - true); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); - if (select_random_vertices_usecase.check_correctness) { - if (!with_replacement) { - std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); - - auto nr_duplicates = - std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), - h_sampled_vertices.end()); - - ASSERT_EQ(nr_duplicates, 0); + // + // Test sampling from [0, V) + // + + for (int i = 0; i < with_replacement_flags.size(); i++) { + for (int j = 0; j < sort_vertices_flags.size(); j++) { + bool with_replacement = with_replacement_flags[i]; + bool sort_vertices = sort_vertices_flags[j]; + + auto d_sampled_vertices = cugraph::select_random_vertices( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + rng_state, + select_random_vertices_usecase.select_count, + with_replacement, + sort_vertices); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); + + if (select_random_vertices_usecase.check_correctness) { + if (!with_replacement) { + std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); + + auto nr_duplicates = + std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), + h_sampled_vertices.end()); + + ASSERT_EQ(nr_duplicates, 0); + } + if (sort_vertices) { + assert(std::is_sorted(h_sampled_vertices.begin(), h_sampled_vertices.end())); + } + + auto vertex_first = mg_graph_view.local_vertex_partition_range_first(); + auto vertex_last = mg_graph_view.local_vertex_partition_range_last(); + std::for_each(h_sampled_vertices.begin(), + h_sampled_vertices.end(), + [vertex_first, vertex_last](vertex_t v) { + ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); + }); + } } - - auto vertex_first = mg_graph_view.local_vertex_partition_range_first(); - auto vertex_last = mg_graph_view.local_vertex_partition_range_last(); - - std::for_each(h_sampled_vertices.begin(), - h_sampled_vertices.end(), - [vertex_first, vertex_last](vertex_t v) { - ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); - }); } } } From 05c78bbbc1d1f7bcb284712b7496e68f9633b07e Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Wed, 13 Dec 2023 03:52:26 +0900 Subject: [PATCH 9/9] MG C-API test failure fixes (#4047) This PR fixes MG C-API test failures in egonet (due to a C-API bug), leiden (due to a C++ API bug), graph creation (due to a bug in test code). Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) URL: https://github.com/rapidsai/cugraph/pull/4047 --- cpp/CMakeLists.txt | 2 +- cpp/src/c_api/capi_helper.cu | 98 ++++++++++++++++++++++++++ cpp/src/c_api/capi_helper.hpp | 12 ++++ cpp/src/c_api/extract_ego.cpp | 46 +++++++++++- cpp/src/community/leiden_impl.cuh | 6 +- cpp/src/detail/shuffle_vertices.cu | 12 ++++ cpp/tests/CMakeLists.txt | 4 +- cpp/tests/c_api/mg_create_graph_test.c | 22 +++--- 8 files changed, 183 insertions(+), 19 deletions(-) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index c09fe3f4004..84a5534facd 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -423,7 +423,7 @@ add_library(cugraph_c src/c_api/core_result.cpp src/c_api/extract_ego.cpp src/c_api/k_core.cpp - src/c_api/hierarchical_clustering_result.cpp + src/c_api/hierarchical_clustering_result.cpp src/c_api/induced_subgraph.cpp src/c_api/capi_helper.cu src/c_api/legacy_spectral.cpp diff --git a/cpp/src/c_api/capi_helper.cu b/cpp/src/c_api/capi_helper.cu index 0ee49f87265..f08af4137db 100644 --- a/cpp/src/c_api/capi_helper.cu +++ b/cpp/src/c_api/capi_helper.cu @@ -74,6 +74,104 @@ template void sort_by_key(raft::handle_t const& handle, raft::device_span keys, raft::device_span values); +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights) +{ + rmm::device_uvector sort_indices(edge_srcs.size(), handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + sort_indices.begin(), + sort_indices.end(), + [offset_lasts = raft::device_span(offsets.begin() + 1, offsets.end()), + source_indices = raft::device_span(source_indices.data(), + source_indices.size())] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + offset_lasts.begin(), + thrust::upper_bound(thrust::seq, offset_lasts.begin(), offset_lasts.end(), i))); + return source_indices[idx]; + }); + source_indices.resize(0, handle.get_stream()); + source_indices.shrink_to_fit(handle.get_stream()); + + auto triplet_first = + thrust::make_zip_iterator(sort_indices.begin(), edge_srcs.begin(), edge_dsts.begin()); + if (edge_weights) { + thrust::sort_by_key(handle.get_thrust_policy(), + triplet_first, + triplet_first + sort_indices.size(), + (*edge_weights).begin()); + } else { + thrust::sort(handle.get_thrust_policy(), triplet_first, triplet_first + sort_indices.size()); + } + + thrust::tabulate( + handle.get_thrust_policy(), + offsets.begin() + 1, + offsets.end(), + [sort_indices = raft::device_span(sort_indices.data(), + sort_indices.size())] __device__(size_t i) { + return static_cast(thrust::distance( + sort_indices.begin(), + thrust::upper_bound(thrust::seq, sort_indices.begin(), sort_indices.end(), i))); + }); + + return std::make_tuple( + std::move(offsets), std::move(edge_srcs), std::move(edge_dsts), std::move(edge_weights)); +} + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + } // namespace detail } // namespace c_api } // namespace cugraph diff --git a/cpp/src/c_api/capi_helper.hpp b/cpp/src/c_api/capi_helper.hpp index ce08e8d90d3..56401606477 100644 --- a/cpp/src/c_api/capi_helper.hpp +++ b/cpp/src/c_api/capi_helper.hpp @@ -36,6 +36,18 @@ void sort_by_key(raft::handle_t const& handle, raft::device_span keys, raft::device_span values); +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + } // namespace detail } // namespace c_api } // namespace cugraph diff --git a/cpp/src/c_api/extract_ego.cpp b/cpp/src/c_api/extract_ego.cpp index 931d58b5185..cbe07af2e77 100644 --- a/cpp/src/c_api/extract_ego.cpp +++ b/cpp/src/c_api/extract_ego.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -26,7 +27,10 @@ #include #include #include +#include +#include +#include #include namespace { @@ -91,9 +95,22 @@ struct extract_ego_functor : public cugraph::c_api::abstract_functor { source_vertices.size(), handle_.get_stream()); + std::optional> source_indices{std::nullopt}; + if constexpr (multi_gpu) { - source_vertices = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( - handle_, std::move(source_vertices)); + auto displacements = cugraph::host_scalar_allgather( + handle_.get_comms(), source_vertices.size(), handle_.get_stream()); + std::exclusive_scan( + displacements.begin(), displacements.end(), displacements.begin(), size_t{0}); + source_indices = rmm::device_uvector(source_vertices.size(), handle_.get_stream()); + cugraph::detail::sequence_fill(handle_.get_stream(), + (*source_indices).data(), + (*source_indices).size(), + displacements[handle_.get_comms().get_rank()]); + + std::tie(source_vertices, source_indices) = + cugraph::detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle_, std::move(source_vertices), std::move(*source_indices)); } cugraph::renumber_ext_vertices( @@ -130,6 +147,31 @@ struct extract_ego_functor : public cugraph::c_api::abstract_functor { graph_view.vertex_partition_range_lasts(), do_expensive_check_); + if constexpr (multi_gpu) { + auto recvcounts = cugraph::host_scalar_allgather( + handle_.get_comms(), (*source_indices).size(), handle_.get_stream()); + std::vector displacements(recvcounts.size()); + std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); + rmm::device_uvector allgathered_indices(displacements.back() + recvcounts.back(), + handle_.get_stream()); + cugraph::device_allgatherv(handle_.get_comms(), + (*source_indices).begin(), + allgathered_indices.begin(), + recvcounts, + displacements, + handle_.get_stream()); + source_indices = std::move(allgathered_indices); + + std::tie(edge_offsets, src, dst, wgt) = + cugraph::c_api::detail::reorder_extracted_egonets( + handle_, + std::move(*source_indices), + std::move(edge_offsets), + std::move(src), + std::move(dst), + std::move(wgt)); + } + result_ = new cugraph::c_api::cugraph_induced_subgraph_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(src, graph_->vertex_type_), new cugraph::c_api::cugraph_type_erased_device_array_t(dst, graph_->vertex_type_), diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index b6e20272de9..1e2b8f2ad44 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -568,17 +568,17 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, leiden_partition_at_level( handle, dendrogram, clustering, dendrogram.num_levels()); - rmm::device_uvector unique_cluster_ids(graph_view.number_of_vertices(), + rmm::device_uvector unique_cluster_ids(graph_view.local_vertex_partition_range_size(), handle.get_stream()); thrust::copy(handle.get_thrust_policy(), clustering, - clustering + graph_view.number_of_vertices(), + clustering + graph_view.local_vertex_partition_range_size(), unique_cluster_ids.begin()); remove_duplicates(handle, unique_cluster_ids); relabel_cluster_ids( - handle, unique_cluster_ids, clustering, graph_view.number_of_vertices()); + handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size()); } } // namespace detail diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu index bc450ce3bbf..94729a770f7 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/detail/shuffle_vertices.cu @@ -200,6 +200,12 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); +template std::tuple, rmm::device_uvector> +shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, @@ -224,6 +230,12 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); +template std::tuple, rmm::device_uvector> +shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d9c88bc179e..e9c6dc446af 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -257,7 +257,7 @@ ConfigureTest(BALANCED_TEST community/balanced_edge_test.cpp) ################################################################################################### # - EGO tests ------------------------------------------------------------------------------------- -ConfigureTest(EGO_TEST community/egonet_test.cpp GPUS 1 PERCENT 75) +ConfigureTest(EGONET_TEST community/egonet_test.cpp GPUS 1 PERCENT 75) ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- @@ -531,7 +531,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG LOUVAIN tests -------------------------------------------------------------------------- - ConfigureTestMG(MG_EGO_TEST community/mg_egonet_test.cu) + ConfigureTestMG(MG_EGONET_TEST community/mg_egonet_test.cu) ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c index fec319d1881..7156647b025 100644 --- a/cpp/tests/c_api/mg_create_graph_test.c +++ b/cpp/tests/c_api/mg_create_graph_test.c @@ -175,18 +175,18 @@ int test_create_mg_graph_multiple_edge_lists(const cugraph_resource_handle_t* ha int my_rank = cugraph_resource_handle_get_rank(handle); int comm_size = cugraph_resource_handle_get_comm_size(handle); - size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_num_vertices = num_vertices / comm_size; size_t local_start_vertex = my_rank * local_num_vertices; - size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_num_edges = num_edges / comm_size; size_t local_start_edge = my_rank * local_num_edges; - local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); - local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + local_num_edges = (my_rank != (comm_size - 1)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (my_rank != (comm_size - 1)) ? local_num_vertices : (num_vertices - local_start_vertex); for (size_t i = 0 ; i < num_local_arrays ; ++i) { - size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; + size_t vertex_count = local_num_vertices / num_local_arrays; size_t vertex_start = i * vertex_count; - vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); @@ -363,18 +363,18 @@ int test_create_mg_graph_multiple_edge_lists_multi_edge(const cugraph_resource_h int my_rank = cugraph_resource_handle_get_rank(handle); int comm_size = cugraph_resource_handle_get_comm_size(handle); - size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_num_vertices = num_vertices / comm_size; size_t local_start_vertex = my_rank * local_num_vertices; - size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_num_edges = num_edges / comm_size; size_t local_start_edge = my_rank * local_num_edges; - local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); - local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + local_num_edges = (my_rank != (comm_size - 1)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (my_rank != (comm_size - 1)) ? local_num_vertices : (num_vertices - local_start_vertex); for (size_t i = 0 ; i < num_local_arrays ; ++i) { size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; size_t vertex_start = i * vertex_count; - vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error);