From 2222634688931804414caec2b7821657995cd24d Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Wed, 13 Mar 2024 08:49:51 -0700 Subject: [PATCH] changes to address gather crash --- cpp/include/cugraph/mtmg/instance_manager.hpp | 17 +- .../cugraph/mtmg/vertex_result_view.hpp | 5 +- cpp/src/mtmg/vertex_result.cu | 182 ++++++++++-------- 3 files changed, 125 insertions(+), 79 deletions(-) diff --git a/cpp/include/cugraph/mtmg/instance_manager.hpp b/cpp/include/cugraph/mtmg/instance_manager.hpp index f60063c4101..a2111804997 100644 --- a/cpp/include/cugraph/mtmg/instance_manager.hpp +++ b/cpp/include/cugraph/mtmg/instance_manager.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -73,6 +73,21 @@ class instance_manager_t { return handle_t(*raft_handle_[gpu_id], thread_id, device_ids_[gpu_id]); } + /** + * @brief Get handle for particular GPU + * + * Return a handle for a particular GPU. In a context-free environment + * this lets the caller reconstitute the handle for the right host thread. + * It does assume that the caller will not allow multiple threads to + * concurrently use a gpu_id/thread_id pair. + * + * @return a handle for this thread. + */ + handle_t get_handle(int gpu_id, int thread_id = 0) + { + return handle_t(*raft_handle_[gpu_id], thread_id, device_ids_[gpu_id]); + } + /** * @brief Reset the thread counter * diff --git a/cpp/include/cugraph/mtmg/vertex_result_view.hpp b/cpp/include/cugraph/mtmg/vertex_result_view.hpp index 42b80cea62f..cd22fc98f79 100644 --- a/cpp/include/cugraph/mtmg/vertex_result_view.hpp +++ b/cpp/include/cugraph/mtmg/vertex_result_view.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -45,7 +45,8 @@ class vertex_result_view_t : public detail::device_shared_device_span_t vertices, std::vector const& vertex_partition_range_lasts, cugraph::vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + result_t default_value = 0); }; } // namespace mtmg diff --git a/cpp/src/mtmg/vertex_result.cu b/cpp/src/mtmg/vertex_result.cu index 0339ff10d0a..1dc6b876d52 100644 --- a/cpp/src/mtmg/vertex_result.cu +++ b/cpp/src/mtmg/vertex_result.cu @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -34,58 +35,71 @@ rmm::device_uvector vertex_result_view_t::gather( raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view) + std::optional>& renumber_map_view, + result_t default_value) { - rmm::device_uvector local_vertices(vertices.size(), handle.get_stream()); - rmm::device_uvector vertex_gpu_ids(vertices.size(), handle.get_stream()); - rmm::device_uvector vertex_pos(vertices.size(), handle.get_stream()); - rmm::device_uvector result(vertices.size(), handle.get_stream()); - - raft::copy(local_vertices.data(), vertices.data(), vertices.size(), handle.get_stream()); - cugraph::detail::scalar_fill( - handle.get_stream(), vertex_gpu_ids.data(), vertex_gpu_ids.size(), handle.get_rank()); - cugraph::detail::sequence_fill( - handle.get_stream(), vertex_pos.data(), vertex_pos.size(), size_t{0}); - - rmm::device_uvector d_vertex_partition_range_lasts(vertex_partition_range_lasts.size(), - handle.get_stream()); - raft::update_device(d_vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.data(), - vertex_partition_range_lasts.size(), - handle.get_stream()); + auto stream = handle.raft_handle().get_stream(); + + rmm::device_uvector local_vertices(vertices.size(), stream); + rmm::device_uvector vertex_gpu_ids(multi_gpu ? vertices.size() : 0, stream); + rmm::device_uvector vertex_pos(multi_gpu ? vertices.size() : 0, stream); + + raft::copy(local_vertices.data(), vertices.data(), vertices.size(), stream); + + if constexpr (multi_gpu) { + cugraph::detail::scalar_fill( + stream, vertex_gpu_ids.data(), vertex_gpu_ids.size(), handle.get_rank()); + cugraph::detail::sequence_fill(stream, vertex_pos.data(), vertex_pos.size(), size_t{0}); + + auto const comm_size = handle.raft_handle().get_comms().get_size(); + auto const major_comm_size = + handle.raft_handle().get_subcomm(cugraph::partition_manager::major_comm_name()).get_size(); + auto const minor_comm_size = + handle.raft_handle().get_subcomm(cugraph::partition_manager::minor_comm_name()).get_size(); + + std::forward_as_tuple(local_vertices, std::tie(vertex_gpu_ids, vertex_pos), std::ignore) = + groupby_gpu_id_and_shuffle_kv_pairs( + handle.raft_handle().get_comms(), + local_vertices.begin(), + local_vertices.end(), + thrust::make_zip_iterator(vertex_gpu_ids.begin(), vertex_pos.begin()), + cugraph::detail::compute_gpu_id_from_ext_vertex_t{ + comm_size, major_comm_size, minor_comm_size}, + stream); + } if (renumber_map_view) { - cugraph::renumber_ext_vertices( + cugraph::renumber_local_ext_vertices( handle.raft_handle(), local_vertices.data(), local_vertices.size(), renumber_map_view->get(handle).data(), vertex_partition_view.local_vertex_partition_range_first(), vertex_partition_view.local_vertex_partition_range_last()); - } - auto const major_comm_size = - handle.raft_handle().get_subcomm(cugraph::partition_manager::major_comm_name()).get_size(); - auto const minor_comm_size = - handle.raft_handle().get_subcomm(cugraph::partition_manager::minor_comm_name()).get_size(); - - std::forward_as_tuple(local_vertices, std::tie(vertex_gpu_ids, vertex_pos), std::ignore) = - groupby_gpu_id_and_shuffle_kv_pairs( - handle.raft_handle().get_comms(), - local_vertices.begin(), - local_vertices.end(), - thrust::make_zip_iterator(vertex_gpu_ids.begin(), vertex_pos.begin()), - cugraph::detail::compute_gpu_id_from_int_vertex_t{ - raft::device_span(d_vertex_partition_range_lasts.data(), - d_vertex_partition_range_lasts.size()), - major_comm_size, - minor_comm_size}, - handle.get_stream()); + size_t new_size = thrust::distance( + thrust::make_zip_iterator(local_vertices.begin(), vertex_gpu_ids.begin(), vertex_pos.begin()), + thrust::remove_if( + rmm::exec_policy(stream), + thrust::make_zip_iterator( + local_vertices.begin(), vertex_gpu_ids.begin(), vertex_pos.begin()), + thrust::make_zip_iterator(local_vertices.end(), vertex_gpu_ids.end(), vertex_pos.end()), + [check = cugraph::detail::check_out_of_range_t{ + vertex_partition_view.local_vertex_partition_range_first(), + vertex_partition_view.local_vertex_partition_range_last()}] __device__(auto tuple) { + return check(thrust::get<0>(tuple)); + })); + + local_vertices.resize(new_size, stream); + vertex_gpu_ids.resize(new_size, stream); + vertex_pos.resize(new_size, stream); + } // // Now gather // - rmm::device_uvector tmp_result(local_vertices.size(), handle.get_stream()); + rmm::device_uvector result(local_vertices.size(), stream); + cugraph::detail::scalar_fill(stream, result.data(), result.size(), default_value); auto& wrapped = this->get(handle); @@ -98,32 +112,36 @@ rmm::device_uvector vertex_result_view_t::gather( return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); })); - thrust::gather(handle.get_thrust_policy(), - iter, - iter + local_vertices.size(), - wrapped.begin(), - tmp_result.begin()); - - // - // Shuffle back - // - std::forward_as_tuple(std::ignore, std::tie(std::ignore, vertex_pos, tmp_result), std::ignore) = - groupby_gpu_id_and_shuffle_kv_pairs( - handle.raft_handle().get_comms(), - vertex_gpu_ids.begin(), - vertex_gpu_ids.end(), - thrust::make_zip_iterator(local_vertices.begin(), vertex_pos.begin(), tmp_result.begin()), - thrust::identity{}, - handle.get_stream()); - - // - // Finally, reorder result - // - thrust::scatter(handle.get_thrust_policy(), - tmp_result.begin(), - tmp_result.end(), - vertex_pos.begin(), - result.begin()); + thrust::gather( + rmm::exec_policy(stream), iter, iter + local_vertices.size(), wrapped.begin(), result.begin()); + + if constexpr (multi_gpu) { + rmm::device_uvector tmp_result(0, stream); + + // + // Shuffle back + // + std::forward_as_tuple(std::ignore, std::tie(std::ignore, vertex_pos, tmp_result), std::ignore) = + groupby_gpu_id_and_shuffle_kv_pairs( + handle.raft_handle().get_comms(), + vertex_gpu_ids.begin(), + vertex_gpu_ids.end(), + thrust::make_zip_iterator(local_vertices.begin(), vertex_pos.begin(), result.begin()), + thrust::identity{}, + stream); + + // + // Finally, reorder result + // + result.resize(tmp_result.size(), stream); + cugraph::detail::scalar_fill(stream, result.data(), result.size(), default_value); + + thrust::scatter(rmm::exec_policy(stream), + tmp_result.begin(), + tmp_result.end(), + vertex_pos.begin(), + result.begin()); + } return result; } @@ -133,84 +151,96 @@ template rmm::device_uvector vertex_result_view_t::gather( raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + float default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + float default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + float default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + float default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + double default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + double default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + double default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + double default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + int32_t default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + int32_t default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + int64_t default_value); template rmm::device_uvector vertex_result_view_t::gather( handle_t const& handle, raft::device_span vertices, std::vector const& vertex_partition_range_lasts, vertex_partition_view_t vertex_partition_view, - std::optional>& renumber_map_view); + std::optional>& renumber_map_view, + int64_t default_value); } // namespace mtmg } // namespace cugraph