From ff76a385c4048ac75d1408c3bcf9d4e3018a88b0 Mon Sep 17 00:00:00 2001 From: Seunghwa Kang <45857425+seunghwak@users.noreply.github.com> Date: Thu, 25 Jan 2024 13:13:58 -0800 Subject: [PATCH] Update per_v_transform_reduce_incoming|outgoing_e to support edge masking (#4085) per_v_transform_reduce_(incoming|outgoing_e) now supports edge masking. Authors: - Seunghwa Kang (https://github.com/seunghwak) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Naim (https://github.com/naimnv) - Joseph Nke (https://github.com/jnke2016) URL: https://github.com/rapidsai/cugraph/pull/4085 --- .../eigenvector_centrality_impl.cuh | 4 +- cpp/src/link_analysis/pagerank_impl.cuh | 4 +- cpp/src/prims/detail/prim_functors.cuh | 60 +++ ...v_transform_reduce_incoming_outgoing_e.cuh | 442 +++++++++++------- ...rm_reduce_v_frontier_outgoing_e_by_dst.cuh | 18 +- .../traversal/od_shortest_distances_impl.cuh | 17 +- ..._v_transform_reduce_incoming_outgoing_e.cu | 27 +- 7 files changed, 381 insertions(+), 191 deletions(-) create mode 100644 cpp/src/prims/detail/prim_functors.cuh diff --git a/cpp/src/centrality/eigenvector_centrality_impl.cuh b/cpp/src/centrality/eigenvector_centrality_impl.cuh index 8d1bea4004d..2129dca6985 100644 --- a/cpp/src/centrality/eigenvector_centrality_impl.cuh +++ b/cpp/src/centrality/eigenvector_centrality_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * Copyright (c) 2022-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -117,7 +117,7 @@ rmm::device_uvector eigenvector_centrality( edge_src_centralities.view(), edge_dst_dummy_property_t{}.view(), edge_dummy_property_t{}.view(), - [] __device__(vertex_t, vertex_t, auto src_val, auto, auto) { return src_val * 1.0; }, + [] __device__(vertex_t, vertex_t, auto src_val, auto, auto) { return src_val; }, weight_t{0}, reduce_op::plus{}, centralities.begin()); diff --git a/cpp/src/link_analysis/pagerank_impl.cuh b/cpp/src/link_analysis/pagerank_impl.cuh index 92c70fcff20..9a76ba73f92 100644 --- a/cpp/src/link_analysis/pagerank_impl.cuh +++ b/cpp/src/link_analysis/pagerank_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -288,7 +288,7 @@ centrality_algorithm_metadata_t pagerank( edge_dst_dummy_property_t{}.view(), edge_dummy_property_t{}.view(), [alpha] __device__(vertex_t, vertex_t, auto src_val, auto, auto) { - return src_val * 1.0 * alpha; + return src_val * alpha; }, unvarying_part, reduce_op::plus{}, diff --git a/cpp/src/prims/detail/prim_functors.cuh b/cpp/src/prims/detail/prim_functors.cuh new file mode 100644 index 00000000000..2785ba38dfd --- /dev/null +++ b/cpp/src/prims/detail/prim_functors.cuh @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include + +namespace cugraph { + +namespace detail { + +template +struct call_e_op_t { + edge_partition_device_view_t const& edge_partition{}; + EdgePartitionSrcValueInputWrapper const& edge_partition_src_value_input{}; + EdgePartitionDstValueInputWrapper const& edge_partition_dst_value_input{}; + EdgePartitionEdgeValueInputWrapper const& edge_partition_e_value_input{}; + EdgeOp const& e_op{}; + typename GraphViewType::vertex_type major{}; + typename GraphViewType::vertex_type major_offset{}; + typename GraphViewType::vertex_type const* indices{nullptr}; + typename GraphViewType::edge_type edge_offset{}; + + __device__ auto operator()(typename GraphViewType::edge_type i) const + { + auto minor = indices[i]; + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + auto src = GraphViewType::is_storage_transposed ? minor : major; + auto dst = GraphViewType::is_storage_transposed ? major : minor; + auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; + auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; + return e_op(src, + dst, + edge_partition_src_value_input.get(src_offset), + edge_partition_dst_value_input.get(dst_offset), + edge_partition_e_value_input.get(edge_offset + i)); + } +}; + +} // namespace detail + +} // namespace cugraph diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 1a7fc0130c4..24b4f0857b1 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #pragma once #include +#include #include #include #include @@ -63,11 +64,84 @@ namespace detail { int32_t constexpr per_v_transform_reduce_e_kernel_block_size = 512; +template +struct transform_and_atomic_reduce_t { + edge_partition_device_view_t const& edge_partition{}; + result_t identity_element{}; + vertex_t const* indices{nullptr}; + TransformOp const& transform_op{}; + ResultValueOutputIteratorOrWrapper& result_value_output{}; + + __device__ void operator()(edge_t i) const + { + auto e_op_result = transform_op(i); + if (e_op_result != identity_element) { + auto minor = indices[i]; + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); + if constexpr (multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } +}; + +template +__device__ void update_result_value_output( + edge_partition_device_view_t const& edge_partition, + vertex_t const* indices, + edge_t local_degree, + TransformOp const& transform_op, + result_t init, + ReduceOp const& reduce_op, + size_t output_idx /* relevent only when update_major === true */, + result_t identity_element, + ResultValueOutputIteratorOrWrapper& result_value_output) +{ + if constexpr (update_major) { + *(result_value_output + output_idx) = + thrust::transform_reduce(thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_op, + init, + reduce_op); + } else { + thrust::for_each( + thrust::seq, + thrust::make_counting_iterator(edge_t{0}), + thrust::make_counting_iterator(local_degree), + transform_and_atomic_reduce_t{ + edge_partition, identity_element, indices, transform_op, result_value_output}); + } +} + template edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, + T identity_element /* relevant only if update_major == true */, ReduceOp reduce_op) { static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< @@ -104,6 +180,7 @@ __global__ void per_v_transform_reduce_e_hypersparse( while (idx < static_cast(dcs_nzd_vertex_count)) { auto major = *(edge_partition.major_from_major_hypersparse_idx_nocheck(static_cast(idx))); + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); auto major_idx = major_start_offset + idx; // major_offset != major_idx in the hypersparse region vertex_t const* indices{nullptr}; @@ -111,60 +188,50 @@ __global__ void per_v_transform_reduce_e_hypersparse( edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(static_cast(major_idx)); - auto transform_op = [&edge_partition, - &edge_partition_src_value_input, - &edge_partition_dst_value_input, - &edge_partition_e_value_input, - &e_op, - major, - indices, - edge_offset] __device__(auto i) { - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - auto src = GraphViewType::is_storage_transposed ? minor : major; - auto dst = GraphViewType::is_storage_transposed ? major : minor; - auto src_offset = GraphViewType::is_storage_transposed ? minor_offset : major_offset; - auto dst_offset = GraphViewType::is_storage_transposed ? major_offset : minor_offset; - return e_op(src, - dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(edge_offset + i)); - }; - if constexpr (update_major) { - *(result_value_output + (major - *(edge_partition.major_hypersparse_first()))) = - thrust::transform_reduce(thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - transform_op, - init, - reduce_op); + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + major, + major_offset, + indices, + edge_offset}; + + if (edge_partition_e_mask) { + auto transform_op = + [&edge_partition_e_mask, &call_e_op, identity_element, edge_offset] __device__(auto i) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + return call_e_op(i); + } else { + return identity_element; + } + }; + + update_result_value_output(edge_partition, + indices, + local_degree, + transform_op, + init, + reduce_op, + major - *(edge_partition).major_hypersparse_first(), + identity_element, + result_value_output); } else { - if constexpr (GraphViewType::is_multi_gpu) { - thrust::for_each( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - [&edge_partition, indices, &result_value_output, &transform_op] __device__(auto i) { - auto e_op_result = transform_op(i); - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); - }); - } else { - thrust::for_each( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - [&edge_partition, indices, &result_value_output, &transform_op] __device__(auto i) { - auto e_op_result = transform_op(i); - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); - }); - } + update_result_value_output(edge_partition, + indices, + local_degree, + call_e_op, + init, + reduce_op, + major - *(edge_partition).major_hypersparse_first(), + identity_element, + result_value_output); } idx += gridDim.x * blockDim.x; } @@ -175,6 +242,7 @@ template edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, + T identity_element /* relevant only if update_major == true */, ReduceOp reduce_op) { static_assert(update_major || reduce_op::has_compatible_raft_comms_op_v< @@ -209,71 +279,57 @@ __global__ void per_v_transform_reduce_e_low_degree( auto idx = static_cast(tid); while (idx < static_cast(major_range_last - major_range_first)) { - auto major_offset = major_start_offset + idx; + auto major_offset = static_cast(major_start_offset + idx); + auto major = edge_partition.major_from_major_offset_nocheck(major_offset); vertex_t const* indices{nullptr}; edge_t edge_offset{}; edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(static_cast(major_offset)); - auto transform_op = [&edge_partition, - &edge_partition_src_value_input, - &edge_partition_dst_value_input, - &edge_partition_e_value_input, - &e_op, - major_offset, - indices, - edge_offset] __device__(auto i) { - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - auto src = GraphViewType::is_storage_transposed - ? minor - : edge_partition.major_from_major_offset_nocheck(major_offset); - auto dst = GraphViewType::is_storage_transposed - ? edge_partition.major_from_major_offset_nocheck(major_offset) - : minor; - auto src_offset = - GraphViewType::is_storage_transposed ? minor_offset : static_cast(major_offset); - auto dst_offset = - GraphViewType::is_storage_transposed ? static_cast(major_offset) : minor_offset; - return e_op(src, - dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(edge_offset + i)); - }; - if constexpr (update_major) { - *(result_value_output + idx) = - thrust::transform_reduce(thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - transform_op, - init, - reduce_op); + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + major, + major_offset, + indices, + edge_offset}; + + if (edge_partition_e_mask) { + auto transform_op = + [&edge_partition_e_mask, &call_e_op, identity_element, edge_offset] __device__(auto i) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + return call_e_op(i); + } else { + return identity_element; + } + }; + + update_result_value_output(edge_partition, + indices, + local_degree, + transform_op, + init, + reduce_op, + idx, + identity_element, + result_value_output); } else { - if constexpr (GraphViewType::is_multi_gpu) { - thrust::for_each( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - [&edge_partition, indices, &result_value_output, &transform_op] __device__(auto i) { - auto e_op_result = transform_op(i); - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); - }); - } else { - thrust::for_each( - thrust::seq, - thrust::make_counting_iterator(edge_t{0}), - thrust::make_counting_iterator(local_degree), - [&edge_partition, indices, &result_value_output, &transform_op] __device__(auto i) { - auto e_op_result = transform_op(i); - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); - }); - } + update_result_value_output(edge_partition, + indices, + local_degree, + call_e_op, + init, + reduce_op, + idx, + identity_element, + result_value_output); } idx += gridDim.x * blockDim.x; } @@ -284,6 +340,7 @@ template edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -327,41 +385,61 @@ __global__ void per_v_transform_reduce_e_mid_degree( raft::warp_size()]; // relevant only if update_major == true while (idx < static_cast(major_range_last - major_range_first)) { - auto major_offset = major_start_offset + idx; + auto major_offset = static_cast(major_start_offset + idx); + auto major = edge_partition.major_from_major_offset_nocheck(major_offset); vertex_t const* indices{nullptr}; edge_t edge_offset{}; edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + major, + major_offset, + indices, + edge_offset}; + [[maybe_unused]] auto reduced_e_op_result = lane_id == 0 ? init : identity_element; // relevant only if update_major == true - for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - auto src = GraphViewType::is_storage_transposed - ? minor - : edge_partition.major_from_major_offset_nocheck(major_offset); - auto dst = GraphViewType::is_storage_transposed - ? edge_partition.major_from_major_offset_nocheck(major_offset) - : minor; - auto src_offset = - GraphViewType::is_storage_transposed ? minor_offset : static_cast(major_offset); - auto dst_offset = - GraphViewType::is_storage_transposed ? static_cast(major_offset) : minor_offset; - auto e_op_result = e_op(src, - dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(edge_offset + i)); - if constexpr (update_major) { - reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); - } else { - if constexpr (GraphViewType::is_multi_gpu) { - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + if (edge_partition_e_mask) { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); + } else { + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } + } + } else { + for (edge_t i = lane_id; i < local_degree; i += raft::warp_size()) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); } else { - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } } } } + if constexpr (update_major) { reduced_e_op_result = WarpReduce(temp_storage[threadIdx.x / raft::warp_size()]) .Reduce(reduced_e_op_result, reduce_op); @@ -377,6 +455,7 @@ template edge_partition_e_mask, ResultValueOutputIteratorOrWrapper result_value_output, EdgeOp e_op, T init /* relevant only if update_major == true */, @@ -416,41 +496,61 @@ __global__ void per_v_transform_reduce_e_high_degree( typename BlockReduce::TempStorage temp_storage; // relevant only if update_major == true while (idx < static_cast(major_range_last - major_range_first)) { - auto major_offset = major_start_offset + idx; + auto major_offset = static_cast(major_start_offset + idx); + auto major = edge_partition.major_from_major_offset_nocheck(major_offset); vertex_t const* indices{nullptr}; edge_t edge_offset{}; edge_t local_degree{}; thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges(major_offset); + + auto call_e_op = call_e_op_t{edge_partition, + edge_partition_src_value_input, + edge_partition_dst_value_input, + edge_partition_e_value_input, + e_op, + major, + major_offset, + indices, + edge_offset}; + [[maybe_unused]] auto reduced_e_op_result = threadIdx.x == 0 ? init : identity_element; // relevant only if update_major == true - for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { - auto minor = indices[i]; - auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(minor); - auto src = GraphViewType::is_storage_transposed - ? minor - : edge_partition.major_from_major_offset_nocheck(major_offset); - auto dst = GraphViewType::is_storage_transposed - ? edge_partition.major_from_major_offset_nocheck(major_offset) - : minor; - auto src_offset = - GraphViewType::is_storage_transposed ? minor_offset : static_cast(major_offset); - auto dst_offset = - GraphViewType::is_storage_transposed ? static_cast(major_offset) : minor_offset; - auto e_op_result = e_op(src, - dst, - edge_partition_src_value_input.get(src_offset), - edge_partition_dst_value_input.get(dst_offset), - edge_partition_e_value_input.get(edge_offset + i)); - if constexpr (update_major) { - reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); - } else { - if constexpr (GraphViewType::is_multi_gpu) { - reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + if (edge_partition_e_mask) { + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + if ((*edge_partition_e_mask).get(edge_offset + i)) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); + } else { + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } + } + } + } + } else { + for (edge_t i = threadIdx.x; i < local_degree; i += blockDim.x) { + auto e_op_result = call_e_op(i); + if constexpr (update_major) { + reduced_e_op_result = reduce_op(reduced_e_op_result, e_op_result); } else { - reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + auto minor_offset = edge_partition.minor_offset_from_minor_nocheck(indices[i]); + if constexpr (GraphViewType::is_multi_gpu) { + reduce_op::atomic_reduce(result_value_output, minor_offset, e_op_result); + } else { + reduce_op::atomic_reduce(result_value_output + minor_offset, e_op_result); + } } } } + if constexpr (update_major) { reduced_e_op_result = BlockReduce(temp_storage).Reduce(reduced_e_op_result, reduce_op); if (threadIdx.x == 0) { *(result_value_output + idx) = reduced_e_op_result; } @@ -656,10 +756,18 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, if (stream_pool_indices) { handle.sync_stream(); } + auto edge_mask_view = graph_view.edge_mask_view(); + for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; auto major_init = ReduceOp::identity_element; if constexpr (update_major) { @@ -737,9 +845,11 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, segment_output_buffer, e_op, major_init, + ReduceOp::identity_element, reduce_op); } } @@ -761,9 +871,11 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, segment_output_buffer, e_op, major_init, + ReduceOp::identity_element, reduce_op); } if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { @@ -784,6 +896,7 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, segment_output_buffer, e_op, major_init, @@ -806,6 +919,7 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, output_buffer, e_op, major_init, @@ -825,9 +939,11 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, edge_partition_src_value_input, edge_partition_dst_value_input, edge_partition_e_value_input, + edge_partition_e_mask, output_buffer, e_op, major_init, + ReduceOp::identity_element, reduce_op); } } @@ -1056,8 +1172,6 @@ void per_v_transform_reduce_incoming_e(raft::handle_t const& handle, VertexValueOutputIterator vertex_value_output_first, bool do_expensive_check = false) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do } @@ -1137,8 +1251,6 @@ void per_v_transform_reduce_outgoing_e(raft::handle_t const& handle, VertexValueOutputIterator vertex_value_output_first, bool do_expensive_check = false) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { // currently, nothing to do } diff --git a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh index 745f1a8fd8e..18e722d62cc 100644 --- a/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh +++ b/cpp/src/prims/transform_reduce_v_frontier_outgoing_e_by_dst.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -80,7 +80,7 @@ template -struct call_e_op_t { +struct transform_reduce_v_frontier_call_e_op_t { EdgeOp e_op{}; __device__ thrust::optional< @@ -331,13 +331,13 @@ transform_reduce_v_frontier_outgoing_e_by_dst(raft::handle_t const& handle, // 1. fill the buffer - detail::call_e_op_t + detail::transform_reduce_v_frontier_call_e_op_t e_op_wrapper{e_op}; auto [key_buffer, payload_buffer] = diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index cc69cb5f67f..58fae83bca0 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -639,13 +639,14 @@ rmm::device_uvector od_shortest_distances( static_cast(origins.size()), cutoff, invalid_distance}; - detail::call_e_op_t, - weight_t, - vertex_t, - thrust::nullopt_t, - thrust::nullopt_t, - weight_t, - e_op_t> + detail::transform_reduce_v_frontier_call_e_op_t< + thrust::tuple, + weight_t, + vertex_t, + thrust::nullopt_t, + thrust::nullopt_t, + weight_t, + e_op_t> e_op_wrapper{e_op}; auto new_frontier_tagged_vertex_buffer = diff --git a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu index 677d6ce5022..fc8114a4652 100644 --- a/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_transform_reduce_incoming_outgoing_e.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -150,8 +150,9 @@ struct result_compare { }; struct Prims_Usecase { - bool check_correctness{true}; bool test_weighted{false}; + bool edge_masking{false}; + bool check_correctness{true}; }; template @@ -200,6 +201,13 @@ class Tests_MGPerVTransformReduceIncomingOutgoingE auto mg_graph_view = mg_graph.view(); + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(*handle_, mg_graph_view, 2); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + // 2. run MG transform reduce const int hash_bin_count = 5; @@ -674,7 +682,10 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVTransformReduceIncomingOutgoingE_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{true}), + ::testing::Values(Prims_Usecase{false, false, true}, + Prims_Usecase{false, true, true}, + Prims_Usecase{true, false, true}, + Prims_Usecase{true, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -682,7 +693,10 @@ INSTANTIATE_TEST_SUITE_P( INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{true}), + ::testing::Combine(::testing::Values(Prims_Usecase{false, false, true}, + Prims_Usecase{false, true, true}, + Prims_Usecase{true, false, true}, + Prims_Usecase{true, true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); @@ -694,7 +708,10 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGPerVTransformReduceIncomingOutgoingE_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{false}), + ::testing::Values(Prims_Usecase{false, false, false}, + Prims_Usecase{false, true, false}, + Prims_Usecase{true, false, false}, + Prims_Usecase{true, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN()