diff --git a/cpp/include/cugraph/detail/decompress_edge_partition.cuh b/cpp/include/cugraph/detail/decompress_edge_partition.cuh index cd8739114f2..4b256a0413a 100644 --- a/cpp/include/cugraph/detail/decompress_edge_partition.cuh +++ b/cpp/include/cugraph/detail/decompress_edge_partition.cuh @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -46,7 +47,7 @@ __global__ void decompress_to_edgelist_mid_degree( edge_partition_device_view_t edge_partition, vertex_t major_range_first, vertex_t major_range_last, - vertex_t* majors) + raft::device_span majors) { auto const tid = threadIdx.x + blockIdx.x * blockDim.x; static_assert(decompress_edge_partition_block_size % raft::warp_size() == 0); @@ -76,7 +77,7 @@ __global__ void decompress_to_edgelist_high_degree( edge_partition_device_view_t edge_partition, vertex_t major_range_first, vertex_t major_range_last, - vertex_t* majors) + raft::device_span majors) { auto major_start_offset = static_cast(major_range_first - edge_partition.major_range_first()); @@ -103,10 +104,19 @@ template void decompress_edge_partition_to_fill_edgelist_majors( raft::handle_t const& handle, edge_partition_device_view_t edge_partition, - vertex_t* majors, + std::optional> + edge_partition_mask_view, + raft::device_span majors, std::optional> const& segment_offsets) { - auto execution_policy = handle.get_thrust_policy(); + auto tmp_buffer = edge_partition_mask_view + ? std::make_optional>( + edge_partition.number_of_edges(), handle.get_stream()) + : std::nullopt; + + auto output_buffer = + tmp_buffer ? raft::device_span((*tmp_buffer).data(), (*tmp_buffer).size()) : majors; + if (segment_offsets) { // FIXME: we may further improve performance by 1) concurrently running kernels on different // segments; 2) individually tuning block sizes for different segments; and 3) adding one more @@ -124,7 +134,7 @@ void decompress_edge_partition_to_fill_edgelist_majors( edge_partition, edge_partition.major_range_first(), edge_partition.major_range_first() + (*segment_offsets)[1], - majors); + output_buffer); } if ((*segment_offsets)[2] - (*segment_offsets)[1] > 0) { raft::grid_1d_warp_t update_grid((*segment_offsets)[2] - (*segment_offsets)[1], @@ -138,49 +148,63 @@ void decompress_edge_partition_to_fill_edgelist_majors( edge_partition, edge_partition.major_range_first() + (*segment_offsets)[1], edge_partition.major_range_first() + (*segment_offsets)[2], - majors); + output_buffer); } if ((*segment_offsets)[3] - (*segment_offsets)[2] > 0) { thrust::for_each( - execution_policy, + handle.get_thrust_policy(), thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[2], thrust::make_counting_iterator(edge_partition.major_range_first()) + (*segment_offsets)[3], - [edge_partition, majors] __device__(auto major) { + [edge_partition, output_buffer] __device__(auto major) { auto major_offset = edge_partition.major_offset_from_major_nocheck(major); auto local_degree = edge_partition.local_degree(major_offset); auto local_offset = edge_partition.local_offset(major_offset); - thrust::fill( - thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); + thrust::fill(thrust::seq, + output_buffer.begin() + local_offset, + output_buffer.begin() + local_offset + local_degree, + major); }); } if (edge_partition.dcs_nzd_vertex_count() && (*(edge_partition.dcs_nzd_vertex_count()) > 0)) { thrust::for_each( - execution_policy, + handle.get_thrust_policy(), thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(*(edge_partition.dcs_nzd_vertex_count())), - [edge_partition, major_start_offset = (*segment_offsets)[3], majors] __device__(auto idx) { + [edge_partition, major_start_offset = (*segment_offsets)[3], output_buffer] __device__( + auto idx) { auto major = *(edge_partition.major_from_major_hypersparse_idx_nocheck(idx)); auto major_idx = major_start_offset + idx; // major_offset != major_idx in the hypersparse region auto local_degree = edge_partition.local_degree(major_idx); auto local_offset = edge_partition.local_offset(major_idx); - thrust::fill( - thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); + thrust::fill(thrust::seq, + output_buffer.begin() + local_offset, + output_buffer.begin() + local_offset + local_degree, + major); }); } } else { - thrust::for_each( - execution_policy, - thrust::make_counting_iterator(edge_partition.major_range_first()), - thrust::make_counting_iterator(edge_partition.major_range_first()) + - edge_partition.major_range_size(), - [edge_partition, majors] __device__(auto major) { - auto major_offset = edge_partition.major_offset_from_major_nocheck(major); - auto local_degree = edge_partition.local_degree(major_offset); - auto local_offset = edge_partition.local_offset(major_offset); - thrust::fill( - thrust::seq, majors + local_offset, majors + local_offset + local_degree, major); - }); + thrust::for_each(handle.get_thrust_policy(), + thrust::make_counting_iterator(edge_partition.major_range_first()), + thrust::make_counting_iterator(edge_partition.major_range_first()) + + edge_partition.major_range_size(), + [edge_partition, output_buffer] __device__(auto major) { + auto major_offset = edge_partition.major_offset_from_major_nocheck(major); + auto local_degree = edge_partition.local_degree(major_offset); + auto local_offset = edge_partition.local_offset(major_offset); + thrust::fill(thrust::seq, + output_buffer.begin() + local_offset, + output_buffer.begin() + local_offset + local_degree, + major); + }); + } + + if (tmp_buffer) { + copy_if_mask_set(handle, + (*tmp_buffer).begin(), + (*tmp_buffer).end(), + (*edge_partition_mask_view).value_first(), + majors.begin()); } } @@ -192,33 +216,59 @@ void decompress_edge_partition_to_edgelist( edge_partition_weight_view, std::optional> edge_partition_id_view, - vertex_t* edgelist_majors /* [OUT] */, - vertex_t* edgelist_minors /* [OUT] */, - std::optional edgelist_weights /* [OUT] */, - std::optional edgelist_ids /* [OUT] */, + std::optional> + edge_partition_mask_view, + raft::device_span edgelist_majors /* [OUT] */, + raft::device_span edgelist_minors /* [OUT] */, + std::optional> edgelist_weights /* [OUT] */, + std::optional> edgelist_ids /* [OUT] */, std::optional> const& segment_offsets) { auto number_of_edges = edge_partition.number_of_edges(); decompress_edge_partition_to_fill_edgelist_majors( - handle, edge_partition, edgelist_majors, segment_offsets); - thrust::copy(handle.get_thrust_policy(), - edge_partition.indices(), - edge_partition.indices() + number_of_edges, - edgelist_minors); - if (edge_partition_id_view) { - assert(edgelist_ids.has_value()); + handle, edge_partition, edge_partition_mask_view, edgelist_majors, segment_offsets); + if (edge_partition_mask_view) { + copy_if_mask_set(handle, + edge_partition.indices(), + edge_partition.indices() + number_of_edges, + (*edge_partition_mask_view).value_first(), + edgelist_minors.begin()); + } else { thrust::copy(handle.get_thrust_policy(), - (*edge_partition_id_view).value_first(), - (*edge_partition_id_view).value_first() + number_of_edges, - (*edgelist_ids)); + edge_partition.indices(), + edge_partition.indices() + number_of_edges, + edgelist_minors.begin()); } if (edge_partition_weight_view) { assert(edgelist_weights.has_value()); - thrust::copy(handle.get_thrust_policy(), - (*edge_partition_weight_view).value_first(), - (*edge_partition_weight_view).value_first() + number_of_edges, - (*edgelist_weights)); + if (edge_partition_mask_view) { + copy_if_mask_set(handle, + (*edge_partition_weight_view).value_first(), + (*edge_partition_weight_view).value_first() + number_of_edges, + (*edge_partition_mask_view).value_first(), + (*edgelist_weights).begin()); + } else { + thrust::copy(handle.get_thrust_policy(), + (*edge_partition_weight_view).value_first(), + (*edge_partition_weight_view).value_first() + number_of_edges, + (*edgelist_weights).begin()); + } + } + if (edge_partition_id_view) { + assert(edgelist_ids.has_value()); + if (edge_partition_mask_view) { + copy_if_mask_set(handle, + (*edge_partition_id_view).value_first(), + (*edge_partition_id_view).value_first() + number_of_edges, + (*edge_partition_mask_view).value_first(), + (*edgelist_ids).begin()); + } else { + thrust::copy(handle.get_thrust_policy(), + (*edge_partition_id_view).value_first(), + (*edge_partition_id_view).value_first() + number_of_edges, + (*edgelist_ids).begin()); + } } } diff --git a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh index 18091567e38..e5b64b1e02f 100644 --- a/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh +++ b/cpp/include/cugraph/edge_partition_edge_property_device_view.cuh @@ -33,8 +33,9 @@ template ::value_type> class edge_partition_edge_property_device_view_t { public: - using edge_type = edge_t; - using value_type = value_t; + using edge_type = edge_t; + using value_type = value_t; + static constexpr bool is_packed_bool = cugraph::is_packed_bool(); static constexpr bool has_packed_bool_element = cugraph::has_packed_bool_element(); @@ -53,7 +54,7 @@ class edge_partition_edge_property_device_view_t { value_first_ = view.value_firsts()[partition_idx]; } - __host__ __device__ ValueIterator value_first() { return value_first_; } + __host__ __device__ ValueIterator value_first() const { return value_first_; } __device__ value_t get(edge_t offset) const { @@ -173,8 +174,9 @@ class edge_partition_edge_property_device_view_t { template class edge_partition_edge_dummy_property_device_view_t { public: - using edge_type = edge_t; - using value_type = thrust::nullopt_t; + using edge_type = edge_t; + using value_type = thrust::nullopt_t; + static constexpr bool is_packed_bool = false; static constexpr bool has_packed_bool_element = false; diff --git a/cpp/include/cugraph/graph_mask.hpp b/cpp/include/cugraph/graph_mask.hpp deleted file mode 100644 index 2048d3692c7..00000000000 --- a/cpp/include/cugraph/graph_mask.hpp +++ /dev/null @@ -1,382 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include -#include -#include - -#include -#include -#include -#include - -namespace cugraph { - -/** - * Compile-time fast lookup of log2(num_bits(mask_t)) to eliminate - * the log2 computation for powers of 2. - * @tparam mask_t - */ -template -__host__ __device__ constexpr int log_bits() -{ - switch (std::numeric_limits::digits) { - case 8: return 3; - case 16: return 4; - case 32: return 5; - case 64: return 6; - default: return log2(std::numeric_limits::digits); - } -} - -/** - * Uses bit-shifting to perform a fast mod operation. This - * is used to compute the index of a specific bit - * @tparam mask_t - * @tparam T - */ -template -__host__ __device__ int bit_mod(T numerator) -{ - return numerator & (std::numeric_limits::digits - 1); -} - -namespace detail { - -/** - * Sets the bit at location h in a one-hot encoded 32-bit int array - */ -template -__device__ __host__ inline void _set_bit(mask_type* arr, mask_type h) -{ - mask_type bit = bit_mod(h); - mask_type idx = h >> log_bits(); - atomicOr(arr + idx, 1 << bit); -} - -/** - * Unsets the bit at location h in a one-hot encoded 32-bit int array - */ -template -__device__ __host__ inline void _unset_bit(mask_type* arr, mask_type h) -{ - mask_type bit = bit_mod(h); - mask_type idx = h >> log_bits(); - atomicAnd(arr + idx, ~(1 << bit)); -} - -/** - * Returns whether or not bit at location h is nonzero in a one-hot - * encoded 32-bit in array. - */ -template -__device__ __host__ inline bool _is_set(mask_type* arr, mask_type h) -{ - mask_type bit = bit_mod(h); - mask_type idx = h >> log_bits(); - return arr[idx] >> bit & 1U; -} -}; // namespace detail - -/** - * Mask view to be used in device functions for reading and updating existing mask. - * This assumes the appropriate masks (vertex/edge) have already been initialized, - * since that will need to be done from the owning object. - * @tparam vertex_t - * @tparam edge_t - * @tparam mask_t - */ -template -struct graph_mask_view_t { - public: - graph_mask_view_t() = delete; - - graph_mask_view_t(vertex_t n_vertices, - edge_t n_edges, - std::optional>& vertices, - std::optional>& edges, - bool complement = false) - : n_vertices_(n_vertices), - n_edges_(n_edges), - complement_(complement), - vertices_(vertices), - edges_(edges) - { - } - - graph_mask_view_t(graph_mask_view_t const& other) = default; - using vertex_type = vertex_t; - using edge_type = edge_t; - using mask_type = mask_t; - using size_type = std::size_t; - - ~graph_mask_view_t() = default; - graph_mask_view_t(graph_mask_view_t&&) noexcept = default; - - graph_mask_view_t& operator=(graph_mask_view_t&&) noexcept = default; - graph_mask_view_t& operator=(graph_mask_view_t const& other) = default; - - /** - * Are masks complemeneted? - * - * - !complemented means masks are inclusive (masking in) - * - complemented means masks are exclusive (masking out) - */ - __host__ __device__ bool is_complemented() const { return complement_; } - - /** - * Has the edge mask been initialized? - */ - __host__ __device__ bool has_edge_mask() const { return edges_.has_value(); } - - /** - * Has the vertex mask been initialized? - */ - __host__ __device__ bool has_vertex_mask() const { return vertices_.has_value(); } - - /** - * Get the vertex mask - */ - __host__ __device__ std::optional> get_vertex_mask() const - { - return vertices_; - } - - /** - * Get the edge mask - */ - __host__ __device__ std::optional> get_edge_mask() const - { - return edges_; - } - - __host__ __device__ edge_t get_edge_mask_size() const { return n_edges_ >> log_bits(); } - - __host__ __device__ vertex_t get_vertex_mask_size() const - { - return n_vertices_ >> log_bits(); - } - - protected: - vertex_t n_vertices_; - edge_t n_edges_; - bool complement_{false}; - std::optional> vertices_{std::nullopt}; - std::optional> edges_{std::nullopt}; -}; // struct graph_mask_view_t - -/** - * An owning container object to manage separate bitmasks for - * filtering vertices and edges. A compliment setting - * determines whether the value of 1 for corresponding - * items means they should be masked in (included) or - * masked out (excluded). - * - * Creating this object does not allocate any memory on device. - * In order to start using and querying the masks, they will - * need to first be initialized. - * - * @tparam vertex_t - * @tparam edge_t - * @tparam mask_t - */ -template -struct graph_mask_t { - public: - using vertex_type = vertex_t; - using edge_type = edge_t; - using mask_type = mask_t; - using size_type = std::size_t; - - ~graph_mask_t() = default; - graph_mask_t(graph_mask_t&&) noexcept = default; - - graph_mask_t() = delete; - - explicit graph_mask_t(raft::handle_t const& handle, - vertex_t n_vertices, - edge_t n_edges, - bool complement = false) - : handle_(handle), - n_vertices_(n_vertices), - n_edges_(n_edges), - edges_(0, handle.get_stream()), - vertices_(0, handle.get_stream()), - complement_(complement) - { - } - - explicit graph_mask_t(graph_mask_t const& other) - : handle_(other.handle_), - n_vertices_(other.n_vertices_), - n_edges_(other.n_edges_), - edges_(other.edges_, other.handle_.get_stream()), - vertices_(other.vertices_, other.handle_.get_stream()), - complement_(other.complement_) - { - } - - graph_mask_t& operator=(graph_mask_t&&) noexcept = default; - graph_mask_t& operator=(graph_mask_t const& other) = default; - - /** - * Determines whether the 1 bit in a vertex or edge position - * represents an inclusive mask or exclusive mask. Default is - * an inclusive mask (e.g. 1 bit means the corresponding vertex - * or edge should be included in computations). - * @return - */ - bool is_complemented() const { return complement_; } - - /** - * Whether or not the current mask object has been initialized - * with an edge mask. - * @return - */ - bool has_edge_mask() const { return get_edge_mask().has_value(); } - - /** - * Whether or not the current mask object has been initialized - * with a vertex mask. - * @return - */ - bool has_vertex_mask() const { return get_vertex_mask().has_value(); } - - /** - * Returns the edge mask if it has been initialized on the instance - * @return - */ - std::optional get_edge_mask() const - { - return edges_.size() > 0 ? std::make_optional(edges_.data()) : std::nullopt; - } - - /** - * Retuns the vertex mask if it has been initialized on the instance - * @return - */ - std::optional get_vertex_mask() const - { - return vertices_.size() > 0 ? std::make_optional(vertices_.data()) - : std::nullopt; - } - - vertex_t get_n_vertices() { return n_vertices_; } - - edge_t get_n_edges() { return n_edges_; } - - edge_t get_edge_mask_size() const { return n_edges_ >> log_bits(); } - - vertex_t get_vertex_mask_size() const { return n_vertices_ >> log_bits(); } - - void initialize_edge_mask(bool init = 0) - { - if (!has_edge_mask()) { - allocate_edge_mask(); - RAFT_CUDA_TRY(cudaMemsetAsync(edges_.data(), - edges_.size() * sizeof(mask_t), - std::numeric_limits::max() * init, - handle_.get_stream())); - } - } - - void initialize_vertex_mask(bool init = 0) - { - if (!has_vertex_mask()) { - allocate_vertex_mask(); - RAFT_CUDA_TRY(cudaMemsetAsync(vertices_.data(), - vertices_.size() * sizeof(mask_t), - std::numeric_limits::max() * init, - handle_.get_stream())); - } - } - - /** - * Initializes an edge mask by allocating the device memory - */ - void allocate_edge_mask() - { - if (edges_.size() == 0) { - edges_.resize(get_edge_mask_size(), handle_.get_stream()); - clear_edge_mask(); - } - } - - /** - * Initializes a vertex mask by allocating the device memory - */ - void allocate_vertex_mask() - { - if (vertices_.size() == 0) { - vertices_.resize(get_vertex_mask_size(), handle_.get_stream()); - clear_vertex_mask(); - } - } - - /** - * Clears out all the masked bits of the edge mask - */ - void clear_edge_mask() - { - if (edges_.size() > 0) { - RAFT_CUDA_TRY( - cudaMemsetAsync(edges_.data(), edges_.size() * sizeof(mask_t), 0, handle_.get_stream())); - } - } - - /** - * Clears out all the masked bits of the vertex mask - */ - void clear_vertex_mask() - { - if (vertices_.size() > 0) { - RAFT_CUDA_TRY(cudaMemsetAsync( - vertices_.data(), vertices_.size() * sizeof(mask_t), 0, handle_.get_stream())); - } - } - - /** - * Returns a view of the current mask object which can safely be used - * in device functions. - * - * Note that the view will not be able to initialize the underlying - * masks so they will need to be initialized before this method is - * invoked. - */ - auto view() - { - auto vspan = has_vertex_mask() ? std::make_optional>(vertices_.data(), - vertices_.size()) - : std::nullopt; - auto espan = has_edge_mask() - ? std::make_optional>(edges_.data(), edges_.size()) - : std::nullopt; - return graph_mask_view_t( - n_vertices_, n_edges_, vspan, espan, complement_); - } - - protected: - raft::handle_t const& handle_; - vertex_t n_vertices_; - edge_t n_edges_; - bool complement_ = false; - rmm::device_uvector vertices_; - rmm::device_uvector edges_; - -}; // struct graph_mask_t -}; // namespace cugraph \ No newline at end of file diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index 2d10b435224..f30a8b7e2af 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -444,12 +444,6 @@ class graph_view_t local_edge_partition_view( size_t partition_idx) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - vertex_t major_range_first{}; vertex_t major_range_last{}; vertex_t minor_range_first{}; @@ -748,6 +740,11 @@ class graph_view_t> edge_mask_view() const + { + return edge_mask_view_; + } + private: std::vector edge_partition_offsets_{}; std::vector edge_partition_indices_{}; @@ -856,12 +853,6 @@ class graph_view_tnumber_of_edges(); - } - vertex_t local_edge_partition_src_range_size(size_t partition_idx = 0) const { assert(partition_idx == 0); @@ -1030,6 +1021,11 @@ class graph_view_t> edge_mask_view() const + { + return edge_mask_view_; + } + private: edge_t const* offsets_{nullptr}; vertex_t const* indices_{nullptr}; diff --git a/cpp/include/cugraph/utilities/device_functors.cuh b/cpp/include/cugraph/utilities/device_functors.cuh index 501e74cf47b..3af8ed1dd19 100644 --- a/cpp/include/cugraph/utilities/device_functors.cuh +++ b/cpp/include/cugraph/utilities/device_functors.cuh @@ -15,18 +15,11 @@ */ #pragma once -#include -#include -#include +#include -#include -#include -#include -#include +#include #include -#include -#include namespace cugraph { @@ -44,12 +37,12 @@ struct pack_bool_t { __device__ uint32_t operator()(size_t i) const { - auto first = i * (sizeof(uint32_t) * 8); - auto last = std::min((i + 1) * (sizeof(uint32_t) * 8), num_bools); + auto first = i * packed_bools_per_word(); + auto last = std::min((i + 1) * packed_bools_per_word(), num_bools); uint32_t ret{0}; for (auto j = first; j < last; ++j) { if (*(bool_first + j)) { - auto mask = uint32_t{1} << (j % (sizeof(uint32_t) * 8)); + auto mask = packed_bool_mask(j); ret |= mask; } } @@ -57,6 +50,22 @@ struct pack_bool_t { } }; +template +struct check_bit_set_t { + PackedBoolIterator bitmap_first{}; + T idx_first{}; + + static_assert( + std::is_same_v::value_type, uint32_t>); + + __device__ bool operator()(T idx) const + { + auto offset = idx - idx_first; + return static_cast(*(bitmap_first + packed_bool_offset(offset)) & + packed_bool_mask(offset)); + } +}; + template struct indirection_t { Iterator first{}; @@ -80,7 +89,14 @@ struct indirection_if_idx_valid_t { }; template -struct not_equal_t { +struct is_equal_t { + T compare{}; + + __device__ bool operator()(T val) const { return val == compare; } +}; + +template +struct is_not_equal_t { T compare{}; __device__ bool operator()(T val) const { return val != compare; } @@ -96,19 +112,6 @@ struct is_first_in_run_t { } }; -template -struct check_bit_set_t { - uint32_t const* bitmaps{nullptr}; - T idx_first{}; - - __device__ bool operator()(T idx) const - { - auto offset = idx - idx_first; - auto mask = uint32_t{1} << (offset % (sizeof(uint32_t) * 8)); - return (*(bitmaps + (offset / (sizeof(uint32_t) * 8))) & mask) > uint32_t{0}; - } -}; - template struct check_in_range_t { T min{}; // inclusive diff --git a/cpp/include/cugraph/utilities/mask_utils.cuh b/cpp/include/cugraph/utilities/mask_utils.cuh new file mode 100644 index 00000000000..ab1403d019b --- /dev/null +++ b/cpp/include/cugraph/utilities/mask_utils.cuh @@ -0,0 +1,150 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace cugraph { + +namespace detail { + +template // should be packed bool +__device__ size_t count_set_bits(MaskIterator mask_first, size_t start_offset, size_t num_bits) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + + size_t ret{0}; + + mask_first = mask_first + packed_bool_offset(start_offset); + start_offset = start_offset % packed_bools_per_word(); + if (start_offset != 0) { + auto mask = ~packed_bool_partial_mask(start_offset); + if (start_offset + num_bits < packed_bools_per_word()) { + mask &= packed_bool_partial_mask(start_offset + num_bits); + } + ret += __popc(*mask_first & mask); + num_bits -= __popc(mask); + ++mask_first; + } + + return thrust::transform_reduce( + thrust::seq, + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(packed_bool_size(num_bits)), + [mask_first, num_bits] __device__(size_t i) { + auto word = *(mask_first + i); + if ((i + 1) * packed_bools_per_word() > num_bits) { + word &= packed_bool_partial_mask(num_bits % packed_bools_per_word()); + } + return static_cast(__popc(word)); + }, + ret, + thrust::plus{}); +} + +template ::value_type, // for packed bool support + typename output_value_type = typename thrust::iterator_traits< + OutputIterator>::value_type> // for packed bool support +__device__ size_t copy_if_mask_set(InputIterator input_first, + MaskIterator mask_first, + OutputIterator output_first, + size_t input_start_offset, + size_t output_start_offset, + size_t num_items) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + static_assert( + std::is_same_v::value_type, input_value_type> || + cugraph::has_packed_bool_element()); + static_assert(std::is_same_v::value_type, + output_value_type> || + cugraph::has_packed_bool_element()); + + static_assert(!cugraph::has_packed_bool_element() && + !cugraph::has_packed_bool_element(), + "unimplemented."); + + return static_cast(thrust::distance( + output_first + output_start_offset, + thrust::copy_if(thrust::seq, + input_first + input_start_offset, + input_first + (input_start_offset + num_items), + thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + check_bit_set_t{mask_first, size_t{0}}) + + input_start_offset, + output_first + output_start_offset, + is_equal_t{true}))); +} + +template // should be packed bool +size_t count_set_bits(raft::handle_t const& handle, MaskIterator mask_first, size_t num_bits) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + + return thrust::transform_reduce( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(packed_bool_size(num_bits)), + [mask_first, num_bits] __device__(size_t i) { + auto word = *(mask_first + i); + if ((i + 1) * packed_bools_per_word() > num_bits) { + word &= packed_bool_partial_mask(num_bits % packed_bools_per_word()); + } + return static_cast(__popc(word)); + }, + size_t{0}, + thrust::plus{}); +} + +template +OutputIterator copy_if_mask_set(raft::handle_t const& handle, + InputIterator input_first, + InputIterator input_last, + MaskIterator mask_first, + OutputIterator output_first) +{ + return thrust::copy_if( + handle.get_thrust_policy(), + input_first, + input_last, + thrust::make_transform_iterator(thrust::make_counting_iterator(size_t{0}), + check_bit_set_t{mask_first, size_t{0}}), + output_first, + is_equal_t{true}); +} + +} // namespace detail + +} // namespace cugraph diff --git a/cpp/include/cugraph/utilities/packed_bool_utils.hpp b/cpp/include/cugraph/utilities/packed_bool_utils.hpp index 0be5711d90c..b418d5afc35 100644 --- a/cpp/include/cugraph/utilities/packed_bool_utils.hpp +++ b/cpp/include/cugraph/utilities/packed_bool_utils.hpp @@ -92,6 +92,13 @@ constexpr uint32_t packed_bool_mask(T bool_offset) constexpr uint32_t packed_bool_full_mask() { return uint32_t{0xffffffff}; } +template +constexpr uint32_t packed_bool_partial_mask(T num_set_bits) +{ + assert(static_cast(num_set_bits) <= sizeof(uint32_t) * 8); + return uint32_t{0xffffffff} >> (sizeof(uint32_t) * 8 - num_set_bits); +} + constexpr uint32_t packed_bool_empty_mask() { return uint32_t{0x0}; } template diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index 2f30faebb3e..32247ca3466 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,8 @@ struct update_rx_major_local_degree_t { int minor_comm_size{}; edge_partition_device_view_t edge_partition{}; + thrust::optional> + edge_partition_e_mask{}; size_t reordered_idx_first{}; size_t local_edge_partition_idx{}; @@ -151,19 +154,28 @@ struct update_rx_major_local_degree_t { auto major = rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition]; - edge_t local_degree{}; + vertex_t major_idx{0}; + edge_t local_degree{0}; if (multi_gpu && (edge_partition.major_hypersparse_first() && (major >= *(edge_partition.major_hypersparse_first())))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); - local_degree = major_hypersparse_idx - ? edge_partition.local_degree((*(edge_partition.major_hypersparse_first()) - - edge_partition.major_range_first()) + - *major_hypersparse_idx) - : edge_t{0}; + if (major_hypersparse_idx) { + major_idx = + (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + + *major_hypersparse_idx; + local_degree = edge_partition.local_degree(major_idx); + } } else { - local_degree = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major)); + major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_degree = edge_partition.local_degree(major_idx); } + + if (edge_partition_e_mask && (local_degree > edge_t{0})) { + auto local_offset = edge_partition.local_offset(major_idx); + local_degree = static_cast( + count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree)); + } + local_degrees_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition] = local_degree; @@ -173,7 +185,7 @@ struct update_rx_major_local_degree_t { template struct update_rx_major_local_nbrs_t { int major_comm_size{}; @@ -181,6 +193,8 @@ struct update_rx_major_local_nbrs_t { edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; + thrust::optional> + edge_partition_e_mask{}; size_t reordered_idx_first{}; size_t local_edge_partition_idx{}; @@ -190,12 +204,13 @@ struct update_rx_major_local_nbrs_t { raft::device_span rx_majors{}; raft::device_span local_nbr_offsets_for_rx_majors{}; raft::device_span local_nbrs_for_rx_majors{}; - optional_property_buffer_view_t local_nbrs_properties_for_rx_majors{}; + optional_property_buffer_mutable_view_t local_e_property_values_for_rx_majors{}; __device__ void operator()(size_t idx) { using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; - auto it = thrust::upper_bound( + + auto it = thrust::upper_bound( thrust::seq, rx_reordered_group_lasts.begin(), rx_reordered_group_lasts.end(), idx); auto major_comm_rank = static_cast(thrust::distance(rx_reordered_group_lasts.begin(), it)); auto offset_in_local_edge_partition = @@ -204,39 +219,76 @@ struct update_rx_major_local_nbrs_t { auto major = rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition]; - vertex_t const* indices{nullptr}; - [[maybe_unused]] edge_t edge_offset{0}; + + edge_t edge_offset{0}; edge_t local_degree{0}; if (multi_gpu && (edge_partition.major_hypersparse_first() && (major >= *(edge_partition.major_hypersparse_first())))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); if (major_hypersparse_idx) { - thrust::tie(indices, edge_offset, local_degree) = edge_partition.local_edges( + auto major_idx = (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + - *major_hypersparse_idx); + *major_hypersparse_idx; + edge_offset = edge_partition.local_offset(major_idx); + local_degree = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices, edge_offset, local_degree) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + edge_offset = edge_partition.local_offset(major_idx); + local_degree = edge_partition.local_degree(major_idx); } - // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree - // vertices in a single warp (better optimize if this becomes a performance - // bottleneck) - size_t start_offset = + auto indices = edge_partition.indices(); + size_t output_start_offset = local_nbr_offsets_for_rx_majors[rx_group_firsts[major_comm_rank * minor_comm_size + local_edge_partition_idx] + offset_in_local_edge_partition]; - thrust::copy(thrust::seq, - indices, - indices + local_degree, - local_nbrs_for_rx_majors.begin() + start_offset); - if constexpr (!std::is_same_v) { - thrust::copy(thrust::seq, - edge_partition_e_value_input.value_first() + edge_offset, - edge_partition_e_value_input.value_first() + (edge_offset + local_degree), - local_nbrs_properties_for_rx_majors.begin() + start_offset); + // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree + // vertices in a single warp (better optimize if this becomes a performance + // bottleneck) + + static_assert(!edge_partition_e_input_device_view_t::has_packed_bool_element, "unimplemented."); + if (local_degree > 0) { + if (edge_partition_e_mask) { + auto mask_first = (*edge_partition_e_mask).value_first(); + if constexpr (!std::is_same_v) { + auto input_first = + thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) + + edge_offset; + copy_if_mask_set(input_first, + mask_first, + thrust::make_zip_iterator(local_nbrs_for_rx_majors.begin(), + local_e_property_values_for_rx_majors), + edge_offset, + output_start_offset, + local_degree); + } else { + copy_if_mask_set(indices, + mask_first, + local_nbrs_for_rx_majors.begin(), + edge_offset, + output_start_offset, + local_degree); + } + } else { + if constexpr (!std::is_same_v) { + auto input_first = + thrust::make_zip_iterator(indices, edge_partition_e_value_input.value_first()) + + edge_offset; + thrust::copy(thrust::seq, + input_first, + input_first + local_degree, + thrust::make_zip_iterator(local_nbrs_for_rx_majors.begin(), + local_e_property_values_for_rx_majors) + + output_start_offset); + } else { + thrust::copy(thrust::seq, + indices + edge_offset, + indices + (edge_offset + local_degree), + local_nbrs_for_rx_majors.begin() + output_start_offset); + } + } } } }; @@ -259,36 +311,45 @@ template struct pick_min_degree_t { FirstElementToIdxMap first_element_to_idx_map{}; - raft::device_span first_element_offsets{nullptr}; + raft::device_span first_element_offsets{nullptr}; SecondElementToIdxMap second_element_to_idx_map{}; - raft::device_span second_element_offsets{nullptr}; + raft::device_span second_element_offsets{nullptr}; edge_partition_device_view_t edge_partition{}; + thrust::optional> + edge_partition_e_mask{}; __device__ edge_t operator()(thrust::tuple pair) const { edge_t local_degree0{0}; vertex_t major0 = thrust::get<0>(pair); if constexpr (std::is_same_v) { + vertex_t major_idx{0}; if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && (major0 >= *(edge_partition.major_hypersparse_first()))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major0); - local_degree0 = - major_hypersparse_idx - ? edge_partition.local_degree((*(edge_partition.major_hypersparse_first()) - - edge_partition.major_range_first()) + - *major_hypersparse_idx) - : edge_t{0}; + if (major_hypersparse_idx) { + major_idx = + (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + + *major_hypersparse_idx; + local_degree0 = edge_partition.local_degree(major_idx); + } } else { - local_degree0 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major0)); + major_idx = edge_partition.major_offset_from_major_nocheck(major0); + local_degree0 = edge_partition.local_degree(major_idx); } } else { + major_idx = edge_partition.major_offset_from_major_nocheck(major0); + local_degree0 = edge_partition.local_degree(major_idx); + } + + if (edge_partition_e_mask && (local_degree0 > edge_t{0})) { + auto local_offset = edge_partition.local_offset(major_idx); local_degree0 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major0)); + count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree0); } } else { auto idx = first_element_to_idx_map.find(major0); @@ -299,24 +360,31 @@ struct pick_min_degree_t { edge_t local_degree1{0}; vertex_t major1 = thrust::get<1>(pair); if constexpr (std::is_same_v) { + vertex_t major_idx{0}; if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && (major1 >= *(edge_partition.major_hypersparse_first()))) { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major1); - local_degree1 = - major_hypersparse_idx - ? edge_partition.local_degree((*(edge_partition.major_hypersparse_first()) - - edge_partition.major_range_first()) + - *major_hypersparse_idx) - : edge_t{0}; + if (major_hypersparse_idx) { + major_idx = + (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + + *major_hypersparse_idx; + local_degree1 = edge_partition.local_degree(major_idx); + } } else { - local_degree1 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major1)); + major_idx = edge_partition.major_offset_from_major_nocheck(major1); + local_degree1 = edge_partition.local_degree(major_idx); } } else { + major_idx = edge_partition.major_offset_from_major_nocheck(major1); + local_degree1 = edge_partition.local_degree(major_idx); + } + + if (edge_partition_e_mask && (local_degree1 > edge_t{0})) { + auto local_offset = edge_partition.local_offset(major_idx); local_degree1 = - edge_partition.local_degree(edge_partition.major_offset_from_major_nocheck(major1)); + count_set_bits((*edge_partition_e_mask).value_first(), local_offset, local_degree1); } } else { auto idx = second_element_to_idx_map.find(major1); @@ -328,6 +396,71 @@ struct pick_min_degree_t { } }; +template +__device__ edge_t set_intersection_by_key_with_mask(InputKeyIterator0 input_key_first0, + InputKeyIterator1 input_key_first1, + InputValueIterator0 input_value_first0, + InputValueIterator1 input_value_first1, + MaskIterator mask_first, + OutputKeyIterator output_key_first, + OutputValueIterator0 output_value_first0, + OutputValueIterator1 output_value_first1, + edge_t input_start_offset0, + edge_t input_size0, + bool apply_mask0, + edge_t input_start_offset1, + edge_t input_size1, + bool apply_mask1, + size_t output_start_offset) +{ + static_assert( + std::is_same_v::value_type, uint32_t>); + static_assert(std::is_same_v == + std::is_same_v); + + check_bit_set_t check_bit_set{mask_first, edge_t{0}}; + + auto idx0 = input_start_offset0; + auto idx1 = input_start_offset1; + auto output_idx = output_start_offset; + while ((idx0 < (input_start_offset0 + input_size0)) && + (idx1 < (input_start_offset1 + input_size1))) { + bool valid0 = apply_mask0 ? check_bit_set(idx0) : true; + bool valid1 = apply_mask1 ? check_bit_set(idx1) : true; + if (!valid0) { ++idx0; } + if (!valid1) { ++idx1; } + + if (valid0 && valid1) { + auto key0 = *(input_key_first0 + idx0); + auto key1 = *(input_key_first1 + idx1); + if (key0 < key1) { + ++idx0; + } else if (key0 > key1) { + ++idx1; + } else { + *(output_key_first + output_idx) = key0; + if constexpr (!std::is_same_v) { + *(output_value_first0 + output_idx) = *(input_value_first0 + idx0); + *(output_value_first1 + output_idx) = *(input_value_first1 + idx1); + } + ++idx0; + ++idx1; + ++output_idx; + } + } + } + + return (output_idx - output_start_offset); +} + template struct copy_intersecting_nbrs_and_update_intersection_size_t { FirstElementToIdxMap first_element_to_idx_map{}; - raft::device_span first_element_offsets{}; + raft::device_span first_element_offsets{}; raft::device_span first_element_indices{nullptr}; - optional_property_buffer_view_t first_element_properties{}; + optional_property_buffer_view_t first_element_edge_property_values{}; SecondElementToIdxMap second_element_to_idx_map{}; - raft::device_span second_element_offsets{}; + raft::device_span second_element_offsets{}; raft::device_span second_element_indices{nullptr}; - optional_property_buffer_view_t second_element_properties{}; + optional_property_buffer_view_t second_element_edge_property_values{}; edge_partition_device_view_t edge_partition{}; edge_partition_e_input_device_view_t edge_partition_e_value_input{}; + thrust::optional> + edge_partition_e_mask{}; VertexPairIterator vertex_pair_first; raft::device_span nbr_intersection_offsets{nullptr}; raft::device_span nbr_intersection_indices{nullptr}; - optional_property_buffer_view_t nbr_intersection_properties0{}; - optional_property_buffer_view_t nbr_intersection_properties1{}; + optional_property_buffer_mutable_view_t nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t nbr_intersection_e_property_values1{}; vertex_t invalid_id{}; + __device__ edge_t operator()(size_t i) { using edge_property_value_t = typename edge_partition_e_input_device_view_t::value_type; - using optional_const_property_buffer_view_t = - std::conditional_t, - raft::device_span, - std::byte /* dummy */>; auto pair = *(vertex_pair_first + i); + vertex_t const* indices0{nullptr}; - optional_const_property_buffer_view_t properties0{}; + std::conditional_t, + edge_property_value_t const*, + void*> + edge_property_values0{nullptr}; edge_t local_edge_offset0{0}; edge_t local_degree0{0}; if constexpr (std::is_same_v) { + indices0 = edge_partition.indices(); + if constexpr (!std::is_same_v) { + edge_property_values0 = edge_partition_e_value_input.value_first(); + } + vertex_t major = thrust::get<0>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -379,42 +521,47 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); if (major_hypersparse_idx) { - thrust::tie(indices0, local_edge_offset0, local_degree0) = edge_partition.local_edges( + auto major_idx = (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + - *major_hypersparse_idx); + *major_hypersparse_idx; + local_edge_offset0 = edge_partition.local_offset(major_idx); + local_degree0 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices0, local_edge_offset0, local_degree0) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset0 = edge_partition.local_offset(major_idx); + local_degree0 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices0, local_edge_offset0, local_degree0) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset0 = edge_partition.local_offset(major_idx); + local_degree0 = edge_partition.local_degree(major_idx); } - + } else { + indices0 = first_element_indices.begin(); if constexpr (!std::is_same_v) { - properties0 = raft::device_span( - edge_partition_e_value_input.value_first() + local_edge_offset0, local_degree0); + edge_property_values0 = first_element_edge_property_values; } - } else { auto idx = first_element_to_idx_map.find(thrust::get<0>(pair)); local_edge_offset0 = first_element_offsets[idx]; local_degree0 = static_cast(first_element_offsets[idx + 1] - local_edge_offset0); - indices0 = first_element_indices.begin() + local_edge_offset0; - - if constexpr (!std::is_same_v) { - properties0 = raft::device_span( - first_element_properties.begin() + local_edge_offset0, local_degree0); - } } vertex_t const* indices1{nullptr}; - optional_const_property_buffer_view_t properties1{}; + std::conditional_t, + edge_property_value_t const*, + void*> + edge_property_values1{nullptr}; - [[maybe_unused]] edge_t local_edge_offset1{0}; + edge_t local_edge_offset1{0}; edge_t local_degree1{0}; if constexpr (std::is_same_v) { + indices1 = edge_partition.indices(); + if constexpr (!std::is_same_v) { + edge_property_values1 = edge_partition_e_value_input.value_first(); + } + vertex_t major = thrust::get<1>(pair); if constexpr (multi_gpu) { if (edge_partition.major_hypersparse_first() && @@ -422,83 +569,63 @@ struct copy_intersecting_nbrs_and_update_intersection_size_t { auto major_hypersparse_idx = edge_partition.major_hypersparse_idx_from_major_nocheck(major); if (major_hypersparse_idx) { - thrust::tie(indices1, local_edge_offset1, local_degree1) = edge_partition.local_edges( + auto major_idx = (*(edge_partition.major_hypersparse_first()) - edge_partition.major_range_first()) + - *major_hypersparse_idx); + *major_hypersparse_idx; + local_edge_offset1 = edge_partition.local_offset(major_idx); + local_degree1 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices1, local_edge_offset1, local_degree1) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset1 = edge_partition.local_offset(major_idx); + local_degree1 = edge_partition.local_degree(major_idx); } } else { - thrust::tie(indices1, local_edge_offset1, local_degree1) = - edge_partition.local_edges(edge_partition.major_offset_from_major_nocheck(major)); + auto major_idx = edge_partition.major_offset_from_major_nocheck(major); + local_edge_offset1 = edge_partition.local_offset(major_idx); + local_degree1 = edge_partition.local_degree(major_idx); } - + } else { + indices1 = second_element_indices.begin(); if constexpr (!std::is_same_v) { - properties1 = raft::device_span( - edge_partition_e_value_input.value_first() + local_edge_offset1, local_degree1); + edge_property_values1 = second_element_edge_property_values; } - } else { auto idx = second_element_to_idx_map.find(thrust::get<1>(pair)); local_edge_offset1 = second_element_offsets[idx]; local_degree1 = static_cast(second_element_offsets[idx + 1] - local_edge_offset1); - indices1 = second_element_indices.begin() + local_edge_offset1; - - if constexpr (!std::is_same_v) { - properties1 = raft::device_span( - second_element_properties.begin() + local_edge_offset1, local_degree1); - } } // FIXME: this can lead to thread-divergence with a mix of high-degree and low-degree // vertices in a single warp (better optimize if this becomes a performance // bottleneck) - auto nbr_intersection_first = nbr_intersection_indices.begin() + nbr_intersection_offsets[i]; - - auto nbr_intersection_last = thrust::set_intersection(thrust::seq, - indices0, - indices0 + local_degree0, - indices1, - indices1 + local_degree1, - nbr_intersection_first); - thrust::fill(thrust::seq, - nbr_intersection_last, - nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], - invalid_id); - - auto insection_size = - static_cast(thrust::distance(nbr_intersection_first, nbr_intersection_last)); - if constexpr (!std::is_same_v) { - auto ip0_start = nbr_intersection_properties0.begin() + nbr_intersection_offsets[i]; - - // copy edge properties from first vertex to common neighbors - thrust::transform(thrust::seq, - nbr_intersection_first, - nbr_intersection_last, - ip0_start, - [indices0, local_degree0, properties0] __device__(auto v) { - auto position = - thrust::lower_bound(thrust::seq, indices0, indices0 + local_degree0, v); - return properties0[thrust::distance(indices0, position)]; - }); - - auto ip1_start = nbr_intersection_properties1.begin() + nbr_intersection_offsets[i]; - - // copy edge properties from second vertex to common neighbors - thrust::transform(thrust::seq, - nbr_intersection_first, - nbr_intersection_last, - ip1_start, - [indices1, local_degree1, properties1] __device__(auto v) { - auto position = - thrust::lower_bound(thrust::seq, indices1, indices1 + local_degree1, v); - return properties1[thrust::distance(indices1, position)]; - }); - } - return insection_size; + auto mask_first = edge_partition_e_mask ? (*edge_partition_e_mask).value_first() + : static_cast(nullptr); + auto intersection_size = set_intersection_by_key_with_mask( + indices0, + indices1, + edge_property_values0, + edge_property_values1, + mask_first, + nbr_intersection_indices.begin(), + nbr_intersection_e_property_values0, + nbr_intersection_e_property_values1, + local_edge_offset0, + local_degree0, + (std::is_same_v && edge_partition_e_mask), + local_edge_offset1, + local_degree1, + (std::is_same_v && edge_partition_e_mask), + nbr_intersection_offsets[i]); + + thrust::fill( + thrust::seq, + nbr_intersection_indices.begin() + (nbr_intersection_offsets[i] + intersection_size), + nbr_intersection_indices.begin() + nbr_intersection_offsets[i + 1], + invalid_id); + + return intersection_size; } }; @@ -520,7 +647,8 @@ struct strided_accumulate_t { template + typename optional_property_buffer_view_t, + typename optional_property_buffer_mutable_view_t> struct gatherv_indices_t { size_t output_size{}; int minor_comm_size{}; @@ -530,10 +658,10 @@ struct gatherv_indices_t { raft::device_span combined_nbr_intersection_offsets{}; raft::device_span combined_nbr_intersection_indices{}; - optional_property_buffer_view_t gathered_nbr_intersection_properties0{}; - optional_property_buffer_view_t gathered_nbr_intersection_properties1{}; - optional_property_buffer_view_t combined_nbr_intersection_properties0{}; - optional_property_buffer_view_t combined_nbr_intersection_properties1{}; + optional_property_buffer_view_t gathered_nbr_intersection_e_property_values0{}; + optional_property_buffer_view_t gathered_nbr_intersection_e_property_values1{}; + optional_property_buffer_mutable_view_t combined_nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t combined_nbr_intersection_e_property_values1{}; __device__ void operator()(size_t i) const { @@ -546,13 +674,13 @@ struct gatherv_indices_t { if constexpr (!std::is_same_v) { auto zipped_gathered_begin = thrust::make_zip_iterator( thrust::make_tuple(gathered_intersection_indices.begin(), - gathered_nbr_intersection_properties0.begin(), - gathered_nbr_intersection_properties1.begin())); + gathered_nbr_intersection_e_property_values0, + gathered_nbr_intersection_e_property_values1)); auto zipped_combined_begin = thrust::make_zip_iterator( thrust::make_tuple(combined_nbr_intersection_indices.begin(), - combined_nbr_intersection_properties0.begin(), - combined_nbr_intersection_properties1.begin())); + combined_nbr_intersection_e_property_values0, + combined_nbr_intersection_e_property_values1)); thrust::copy(thrust::seq, zipped_gathered_begin + gathered_intersection_offsets[output_size * j + i], @@ -694,13 +822,12 @@ nbr_intersection(raft::handle_t const& handle, using optional_property_buffer_view_t = std::conditional_t, - raft::device_span, - std::byte /* dummy */>; - - using optional_nbr_intersected_edge_partitions_t = + edge_property_value_t const*, + void*>; + using optional_property_buffer_mutable_view_t = std::conditional_t, - std::vector>, - std::byte /* dummy */>; + edge_property_value_t*, + void*>; static_assert(std::is_same_v::value_type, thrust::tuple>); @@ -729,19 +856,20 @@ nbr_intersection(raft::handle_t const& handle, "Invalid input arguments: there are invalid input vertex pairs."); } - // 2. Collect neighbor lists for unique second pair elements (for the neighbors within the minor - // range for this GPU); Note that no need to collect for first pair elements as they already - // locally reside. + // 2. Collect neighbor lists (within the minor range for this GPU in multi-GPU) for unique second + // pair elements (all-gathered over minor_comm in multi-GPU); Note that no need to collect for + // first pair elements as they already locally reside. + + auto edge_mask_view = graph_view.edge_mask_view(); std::optional>> major_to_idx_map_ptr{ std::nullopt}; - std::optional> major_nbr_offsets{std::nullopt}; + std::optional> major_nbr_offsets{std::nullopt}; std::optional> major_nbr_indices{std::nullopt}; - [[maybe_unused]] auto major_nbr_properties = + [[maybe_unused]] auto major_e_property_values = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - optional_property_buffer_view_t optional_major_nbr_properties{}; if constexpr (GraphViewType::is_multi_gpu) { if (intersect_minor_nbr[1]) { @@ -805,7 +933,7 @@ nbr_intersection(raft::handle_t const& handle, } } - // 2.2 Send majors and group (major_comm_rank, edge_partition_idx) counts + // 2.2 Send majors and group (major_comm_rank, local edge_partition_idx) counts rmm::device_uvector rx_majors(0, handle.get_stream()); std::vector rx_major_counts{}; @@ -859,7 +987,7 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_rx_majors(size_t{0}, handle.get_stream()); rmm::device_uvector local_nbrs_for_rx_majors(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto local_nbrs_properties_for_rx_majors = + [[maybe_unused]] auto local_e_property_values_for_rx_majors = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); @@ -901,6 +1029,13 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail:: + edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -913,6 +1048,7 @@ nbr_intersection(raft::handle_t const& handle, major_comm_size, minor_comm_size, edge_partition, + edge_partition_e_mask, reordered_idx_first, i, raft::device_span( @@ -936,22 +1072,28 @@ nbr_intersection(raft::handle_t const& handle, local_nbrs_for_rx_majors.resize( local_nbr_offsets_for_rx_majors.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_view_t optional_local_nbrs_properties{}; + optional_property_buffer_mutable_view_t optional_local_e_property_values{}; if constexpr (!std::is_same_v) { - local_nbrs_properties_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), - handle.get_stream()); - optional_local_nbrs_properties = raft::device_span( - local_nbrs_properties_for_rx_majors.data(), local_nbrs_properties_for_rx_majors.size()); + local_e_property_values_for_rx_majors.resize(local_nbrs_for_rx_majors.size(), + handle.get_stream()); + optional_local_e_property_values = local_e_property_values_for_rx_majors.data(); } for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail:: + edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); auto reordered_idx_first = (i == size_t{0}) ? size_t{0} : h_rx_reordered_group_lasts[i * major_comm_size - 1]; @@ -964,12 +1106,13 @@ nbr_intersection(raft::handle_t const& handle, update_rx_major_local_nbrs_t{ major_comm_size, minor_comm_size, edge_partition, edge_partition_e_value_input, + edge_partition_e_mask, reordered_idx_first, i, raft::device_span( @@ -980,7 +1123,7 @@ nbr_intersection(raft::handle_t const& handle, local_nbr_offsets_for_rx_majors.size()), raft::device_span(local_nbrs_for_rx_majors.data(), local_nbrs_for_rx_majors.size()), - optional_local_nbrs_properties}); + optional_local_e_property_values}); } std::vector h_rx_offsets(rx_major_counts.size() + size_t{1}, size_t{0}); @@ -1012,7 +1155,7 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector local_degrees_for_unique_majors(size_t{0}, handle.get_stream()); std::tie(local_degrees_for_unique_majors, std::ignore) = shuffle_values( major_comm, local_degrees_for_rx_majors.begin(), rx_major_counts, handle.get_stream()); - major_nbr_offsets = rmm::device_uvector(local_degrees_for_unique_majors.size() + 1, + major_nbr_offsets = rmm::device_uvector(local_degrees_for_unique_majors.size() + 1, handle.get_stream()); (*major_nbr_offsets).set_element_to_zero_async(size_t{0}, handle.get_stream()); auto degree_first = thrust::make_transform_iterator(local_degrees_for_unique_majors.begin(), @@ -1027,14 +1170,11 @@ nbr_intersection(raft::handle_t const& handle, major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); if constexpr (!std::is_same_v) { - std::tie(major_nbr_properties, std::ignore) = + std::tie(major_e_property_values, std::ignore) = shuffle_values(major_comm, - local_nbrs_properties_for_rx_majors.begin(), + local_e_property_values_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); - - optional_major_nbr_properties = raft::device_span( - major_nbr_properties.data(), major_nbr_properties.size()); } major_to_idx_map_ptr = std::make_unique>( @@ -1065,11 +1205,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector nbr_intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector nbr_intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto nbr_intersection_properties0 = + [[maybe_unused]] auto nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - [[maybe_unused]] auto nbr_intersection_properties1 = + [[maybe_unused]] auto nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); @@ -1116,15 +1256,19 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_sizes.reserve(graph_view.number_of_local_edge_partitions()); edge_partition_nbr_intersection_indices.reserve(graph_view.number_of_local_edge_partitions()); - [[maybe_unused]] optional_nbr_intersected_edge_partitions_t - edge_partition_nbr_intersection_property0{}; - [[maybe_unused]] optional_nbr_intersected_edge_partitions_t - edge_partition_nbr_intersection_property1{}; + [[maybe_unused]] std::conditional_t, + std::vector>, + std::byte /* dummy */> + edge_partition_nbr_intersection_e_property_values0{}; + [[maybe_unused]] std::conditional_t, + std::vector>, + std::byte /* dummy */> + edge_partition_nbr_intersection_e_property_values1{}; if constexpr (!std::is_same_v) { - edge_partition_nbr_intersection_property0.reserve( + edge_partition_nbr_intersection_e_property_values0.reserve( graph_view.number_of_local_edge_partitions()); - edge_partition_nbr_intersection_property1.reserve( + edge_partition_nbr_intersection_e_property_values1.reserve( graph_view.number_of_local_edge_partitions()); } @@ -1144,11 +1288,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector rx_v_pair_nbr_intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties0 = + [[maybe_unused]] auto rx_v_pair_nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); - [[maybe_unused]] auto rx_v_pair_nbr_intersection_properties1 = + [[maybe_unused]] auto rx_v_pair_nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( 0, handle.get_stream()); @@ -1174,9 +1318,15 @@ nbr_intersection(raft::handle_t const& handle, auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, i); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : thrust::nullopt; + auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); rx_v_pair_nbr_intersection_sizes.resize( @@ -1193,11 +1343,12 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_sizes.begin(), pick_min_degree_t{ nullptr, - raft::device_span(), + raft::device_span(), second_element_to_idx_map, - raft::device_span((*major_nbr_offsets).data(), + raft::device_span((*major_nbr_offsets).data(), (*major_nbr_offsets).size()), - edge_partition}); + edge_partition, + edge_partition_e_mask}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -1215,25 +1366,30 @@ nbr_intersection(raft::handle_t const& handle, rx_v_pair_nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties0{}; - optional_property_buffer_view_t rx_v_pair_optional_nbr_intersection_properties1{}; + optional_property_buffer_mutable_view_t + rx_v_pair_optional_nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t + rx_v_pair_optional_nbr_intersection_e_property_values1{}; if constexpr (!std::is_same_v) { - rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); - rx_v_pair_optional_nbr_intersection_properties0 = - raft::device_span(rx_v_pair_nbr_intersection_properties0.data(), - rx_v_pair_nbr_intersection_properties0.size()); + rx_v_pair_optional_nbr_intersection_e_property_values0 = + rx_v_pair_nbr_intersection_e_property_values0.data(); - rx_v_pair_optional_nbr_intersection_properties1 = - raft::device_span(rx_v_pair_nbr_intersection_properties1.data(), - rx_v_pair_nbr_intersection_properties1.size()); + rx_v_pair_optional_nbr_intersection_e_property_values1 = + rx_v_pair_nbr_intersection_e_property_values1.data(); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { + optional_property_buffer_view_t optional_major_e_property_values{}; + if constexpr (!std::is_same_v) { + optional_major_e_property_values = major_e_property_values.data(); + } + auto second_element_to_idx_map = detail::kv_cuco_store_find_device_view_t((*major_to_idx_map_ptr)->view()); thrust::tabulate( @@ -1248,28 +1404,29 @@ nbr_intersection(raft::handle_t const& handle, edge_t, edge_partition_e_input_device_view_t, optional_property_buffer_view_t, + optional_property_buffer_mutable_view_t, true>{nullptr, - raft::device_span(), + raft::device_span(), raft::device_span(), optional_property_buffer_view_t{}, second_element_to_idx_map, - raft::device_span((*major_nbr_offsets).data(), + raft::device_span((*major_nbr_offsets).data(), (*major_nbr_offsets).size()), raft::device_span((*major_nbr_indices).data(), (*major_nbr_indices).size()), - optional_major_nbr_properties, + optional_major_e_property_values, edge_partition, edge_partition_e_value_input, + edge_partition_e_mask, get_dataframe_buffer_begin(vertex_pair_buffer), raft::device_span(rx_v_pair_nbr_intersection_offsets.data(), rx_v_pair_nbr_intersection_offsets.size()), raft::device_span(rx_v_pair_nbr_intersection_indices.data(), rx_v_pair_nbr_intersection_indices.size()), - rx_v_pair_optional_nbr_intersection_properties0, - rx_v_pair_optional_nbr_intersection_properties1, + rx_v_pair_optional_nbr_intersection_e_property_values0, + rx_v_pair_optional_nbr_intersection_e_property_values1, invalid_vertex_id::value}); - } else { CUGRAPH_FAIL("unimplemented."); } @@ -1284,31 +1441,31 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); } else { - auto common_nbr_and_properties_begin = thrust::make_zip_iterator( + auto common_nbr_and_e_property_values_begin = thrust::make_zip_iterator( thrust::make_tuple(rx_v_pair_nbr_intersection_indices.begin(), - rx_v_pair_nbr_intersection_properties0.begin(), - rx_v_pair_nbr_intersection_properties1.begin())); + rx_v_pair_nbr_intersection_e_property_values0.begin(), + rx_v_pair_nbr_intersection_e_property_values1.begin())); auto last = thrust::remove_if( handle.get_thrust_policy(), - common_nbr_and_properties_begin, - common_nbr_and_properties_begin + rx_v_pair_nbr_intersection_indices.size(), + common_nbr_and_e_property_values_begin, + common_nbr_and_e_property_values_begin + rx_v_pair_nbr_intersection_indices.size(), [] __device__(auto nbr_p0_p1) { return thrust::get<0>(nbr_p0_p1) == invalid_vertex_id::value; }); rx_v_pair_nbr_intersection_indices.resize( - thrust::distance(common_nbr_and_properties_begin, last), handle.get_stream()); + thrust::distance(common_nbr_and_e_property_values_begin, last), handle.get_stream()); rx_v_pair_nbr_intersection_indices.shrink_to_fit(handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.shrink_to_fit(handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.resize(rx_v_pair_nbr_intersection_indices.size(), - handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.resize( + rx_v_pair_nbr_intersection_indices.size(), handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.shrink_to_fit(handle.get_stream()); } thrust::inclusive_scan(handle.get_thrust_policy(), @@ -1427,11 +1584,11 @@ nbr_intersection(raft::handle_t const& handle, rmm::device_uvector combined_nbr_intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] auto combined_nbr_intersection_properties0 = + [[maybe_unused]] auto combined_nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( size_t{0}, handle.get_stream()); - [[maybe_unused]] auto combined_nbr_intersection_properties1 = + [[maybe_unused]] auto combined_nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( size_t{0}, handle.get_stream()); @@ -1470,47 +1627,47 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_indices.resize(gathered_nbr_intersection_indices.size(), handle.get_stream()); - [[maybe_unused]] auto gathered_nbr_intersection_properties0 = + [[maybe_unused]] auto gathered_nbr_intersection_e_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream()); - [[maybe_unused]] auto gathered_nbr_intersection_properties1 = + [[maybe_unused]] auto gathered_nbr_intersection_e_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( rx_displacements.back() + gathered_nbr_intersection_index_rx_counts.back(), handle.get_stream()); if constexpr (!std::is_same_v) { device_multicast_sendrecv(minor_comm, - rx_v_pair_nbr_intersection_properties0.begin(), + rx_v_pair_nbr_intersection_e_property_values0.begin(), rx_v_pair_nbr_intersection_index_tx_counts, tx_displacements, ranks, - gathered_nbr_intersection_properties0.begin(), + gathered_nbr_intersection_e_property_values0.begin(), gathered_nbr_intersection_index_rx_counts, rx_displacements, ranks, handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.resize(size_t{0}, handle.get_stream()); - rx_v_pair_nbr_intersection_properties0.shrink_to_fit(handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values0.shrink_to_fit(handle.get_stream()); - combined_nbr_intersection_properties0.resize(gathered_nbr_intersection_properties0.size(), - handle.get_stream()); + combined_nbr_intersection_e_property_values0.resize( + gathered_nbr_intersection_e_property_values0.size(), handle.get_stream()); device_multicast_sendrecv(minor_comm, - rx_v_pair_nbr_intersection_properties1.begin(), + rx_v_pair_nbr_intersection_e_property_values1.begin(), rx_v_pair_nbr_intersection_index_tx_counts, tx_displacements, ranks, - gathered_nbr_intersection_properties1.begin(), + gathered_nbr_intersection_e_property_values1.begin(), gathered_nbr_intersection_index_rx_counts, rx_displacements, ranks, handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.resize(size_t{0}, handle.get_stream()); - rx_v_pair_nbr_intersection_properties1.shrink_to_fit(handle.get_stream()); - combined_nbr_intersection_properties1.resize(gathered_nbr_intersection_properties1.size(), - handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.resize(size_t{0}, handle.get_stream()); + rx_v_pair_nbr_intersection_e_property_values1.shrink_to_fit(handle.get_stream()); + combined_nbr_intersection_e_property_values1.resize( + gathered_nbr_intersection_e_property_values1.size(), handle.get_stream()); } if constexpr (!std::is_same_v) { @@ -1518,7 +1675,10 @@ nbr_intersection(raft::handle_t const& handle, handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1529,25 +1689,19 @@ nbr_intersection(raft::handle_t const& handle, combined_nbr_intersection_offsets.size()), raft::device_span(combined_nbr_intersection_indices.data(), combined_nbr_intersection_indices.size()), - raft::device_span( - gathered_nbr_intersection_properties0.data(), - gathered_nbr_intersection_properties0.size()), - raft::device_span( - gathered_nbr_intersection_properties1.data(), - gathered_nbr_intersection_properties1.size()), - raft::device_span( - combined_nbr_intersection_properties0.data(), - combined_nbr_intersection_properties0.size()), - raft::device_span( - combined_nbr_intersection_properties1.data(), - combined_nbr_intersection_properties1.size())}); - + gathered_nbr_intersection_e_property_values0.data(), + gathered_nbr_intersection_e_property_values1.data(), + combined_nbr_intersection_e_property_values0.data(), + combined_nbr_intersection_e_property_values1.data()}); } else { thrust::for_each( handle.get_thrust_policy(), thrust::make_counting_iterator(size_t{0}), thrust::make_counting_iterator(rx_v_pair_counts[minor_comm_rank]), - gatherv_indices_t{ + gatherv_indices_t{ rx_v_pair_counts[minor_comm_rank], minor_comm_size, raft::device_span(gathered_nbr_intersection_offsets.data(), @@ -1567,10 +1721,10 @@ nbr_intersection(raft::handle_t const& handle, edge_partition_nbr_intersection_indices.push_back( std::move(combined_nbr_intersection_indices)); if constexpr (!std::is_same_v) { - edge_partition_nbr_intersection_property0.push_back( - std::move(combined_nbr_intersection_properties0)); - edge_partition_nbr_intersection_property1.push_back( - std::move(combined_nbr_intersection_properties1)); + edge_partition_nbr_intersection_e_property_values0.push_back( + std::move(combined_nbr_intersection_e_property_values0)); + edge_partition_nbr_intersection_e_property_values1.push_back( + std::move(combined_nbr_intersection_e_property_values1)); } } @@ -1581,8 +1735,10 @@ nbr_intersection(raft::handle_t const& handle, } nbr_intersection_indices.resize(num_nbr_intersection_indices, handle.get_stream()); if constexpr (!std::is_same_v) { - nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), + handle.get_stream()); + nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), + handle.get_stream()); } size_t size_offset{0}; size_t index_offset{0}; @@ -1599,14 +1755,14 @@ nbr_intersection(raft::handle_t const& handle, if constexpr (!std::is_same_v) { thrust::copy(handle.get_thrust_policy(), - edge_partition_nbr_intersection_property0[i].begin(), - edge_partition_nbr_intersection_property0[i].end(), - nbr_intersection_properties0.begin() + index_offset); + edge_partition_nbr_intersection_e_property_values0[i].begin(), + edge_partition_nbr_intersection_e_property_values0[i].end(), + nbr_intersection_e_property_values0.begin() + index_offset); thrust::copy(handle.get_thrust_policy(), - edge_partition_nbr_intersection_property1[i].begin(), - edge_partition_nbr_intersection_property1[i].end(), - nbr_intersection_properties1.begin() + index_offset); + edge_partition_nbr_intersection_e_property_values1[i].begin(), + edge_partition_nbr_intersection_e_property_values1[i].end(), + nbr_intersection_e_property_values1.begin() + index_offset); } index_offset += edge_partition_nbr_intersection_indices[i].size(); @@ -1619,13 +1775,18 @@ nbr_intersection(raft::handle_t const& handle, size_first, size_first + nbr_intersection_sizes.size(), nbr_intersection_offsets.begin() + 1); - } else { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(size_t{0})); - auto edge_partition_e_value_input = edge_partition_e_input_device_view_t(edge_value_input, 0); + auto edge_partition_e_mask = + edge_mask_view + ? thrust::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, 0) + : thrust::nullopt; + rmm::device_uvector nbr_intersection_sizes( input_size, handle.get_stream()); // initially store minimum degrees (upper bound for intersection sizes) @@ -1636,10 +1797,11 @@ nbr_intersection(raft::handle_t const& handle, vertex_pair_first + input_size, nbr_intersection_sizes.begin(), pick_min_degree_t{nullptr, - raft::device_span(), + raft::device_span(), nullptr, - raft::device_span(), - edge_partition}); + raft::device_span(), + edge_partition, + edge_partition_e_mask}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -1656,51 +1818,51 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.resize(nbr_intersection_offsets.back_element(handle.get_stream()), handle.get_stream()); - optional_property_buffer_view_t optional_nbr_intersection_properties0{}; - optional_property_buffer_view_t optional_nbr_intersection_properties1{}; + optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values0{}; + optional_property_buffer_mutable_view_t optional_nbr_intersection_e_property_values1{}; if constexpr (!std::is_same_v) { - nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); - - optional_nbr_intersection_properties0 = raft::device_span( - nbr_intersection_properties0.data(), nbr_intersection_properties0.size()); + nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), + handle.get_stream()); + nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), + handle.get_stream()); - optional_nbr_intersection_properties1 = raft::device_span( - nbr_intersection_properties1.data(), nbr_intersection_properties1.size()); + optional_nbr_intersection_e_property_values0 = nbr_intersection_e_property_values0.data(); + optional_nbr_intersection_e_property_values1 = nbr_intersection_e_property_values1.data(); } if (intersect_minor_nbr[0] && intersect_minor_nbr[1]) { - thrust::tabulate( - handle.get_thrust_policy(), - nbr_intersection_sizes.begin(), - nbr_intersection_sizes.end(), - copy_intersecting_nbrs_and_update_intersection_size_t{ - nullptr, - raft::device_span(), - raft::device_span(), - optional_property_buffer_view_t{}, - nullptr, - raft::device_span(), - raft::device_span(), - optional_property_buffer_view_t{}, - edge_partition, - edge_partition_e_value_input, - vertex_pair_first, - raft::device_span(nbr_intersection_offsets.data(), - nbr_intersection_offsets.size()), - raft::device_span(nbr_intersection_indices.data(), - nbr_intersection_indices.size()), - optional_nbr_intersection_properties0, - optional_nbr_intersection_properties1, - invalid_vertex_id::value}); + thrust::tabulate(handle.get_thrust_policy(), + nbr_intersection_sizes.begin(), + nbr_intersection_sizes.end(), + copy_intersecting_nbrs_and_update_intersection_size_t< + void*, + void*, + decltype(vertex_pair_first), + vertex_t, + edge_t, + edge_partition_e_input_device_view_t, + optional_property_buffer_view_t, + optional_property_buffer_mutable_view_t, + false>{nullptr, + raft::device_span(), + raft::device_span(), + optional_property_buffer_view_t{}, + nullptr, + raft::device_span(), + raft::device_span(), + optional_property_buffer_view_t{}, + edge_partition, + edge_partition_e_value_input, + edge_partition_e_mask, + vertex_pair_first, + raft::device_span(nbr_intersection_offsets.data(), + nbr_intersection_offsets.size()), + raft::device_span(nbr_intersection_indices.data(), + nbr_intersection_indices.size()), + optional_nbr_intersection_e_property_values0, + optional_nbr_intersection_e_property_values1, + invalid_vertex_id::value}); } else { CUGRAPH_FAIL("unimplemented."); } @@ -1711,14 +1873,14 @@ nbr_intersection(raft::handle_t const& handle, thrust::count_if(handle.get_thrust_policy(), nbr_intersection_indices.begin(), nbr_intersection_indices.end(), - detail::not_equal_t{invalid_vertex_id::value}), + detail::is_not_equal_t{invalid_vertex_id::value}), handle.get_stream()); - [[maybe_unused]] auto tmp_properties0 = + [[maybe_unused]] auto tmp_property_values0 = cugraph::detail::allocate_optional_dataframe_buffer( tmp_indices.size(), handle.get_stream()); - [[maybe_unused]] auto tmp_properties1 = + [[maybe_unused]] auto tmp_property_values1 = cugraph::detail::allocate_optional_dataframe_buffer( tmp_indices.size(), handle.get_stream()); @@ -1737,39 +1899,38 @@ nbr_intersection(raft::handle_t const& handle, nbr_intersection_indices.begin() + num_scanned, nbr_intersection_indices.begin() + num_scanned + this_scan_size, tmp_indices.begin() + num_copied, - detail::not_equal_t{invalid_vertex_id::value}))); + detail::is_not_equal_t{invalid_vertex_id::value}))); } else { - auto zipped_itr_to_indices_and_properties_begin = - thrust::make_zip_iterator(thrust::make_tuple(nbr_intersection_indices.begin(), - nbr_intersection_properties0.begin(), - nbr_intersection_properties1.begin())); + auto zipped_itr_to_indices_and_e_property_values_begin = thrust::make_zip_iterator( + thrust::make_tuple(nbr_intersection_indices.begin(), + nbr_intersection_e_property_values0.begin(), + nbr_intersection_e_property_values1.begin())); auto zipped_itr_to_tmps_begin = thrust::make_zip_iterator(thrust::make_tuple( - tmp_indices.begin(), tmp_properties0.begin(), tmp_properties1.begin())); + tmp_indices.begin(), tmp_property_values0.begin(), tmp_property_values1.begin())); num_copied += static_cast(thrust::distance( zipped_itr_to_tmps_begin + num_copied, - thrust::copy_if(handle.get_thrust_policy(), - zipped_itr_to_indices_and_properties_begin + num_scanned, - zipped_itr_to_indices_and_properties_begin + num_scanned + this_scan_size, - zipped_itr_to_tmps_begin + num_copied, - [] __device__(auto nbr_p0_p1) { - auto nbr = thrust::get<0>(nbr_p0_p1); - auto p0 = thrust::get<1>(nbr_p0_p1); - auto p1 = thrust::get<2>(nbr_p0_p1); - return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id::value; - }))); + thrust::copy_if( + handle.get_thrust_policy(), + zipped_itr_to_indices_and_e_property_values_begin + num_scanned, + zipped_itr_to_indices_and_e_property_values_begin + num_scanned + this_scan_size, + zipped_itr_to_tmps_begin + num_copied, + [] __device__(auto nbr_p0_p1) { + auto nbr = thrust::get<0>(nbr_p0_p1); + auto p0 = thrust::get<1>(nbr_p0_p1); + auto p1 = thrust::get<2>(nbr_p0_p1); + return thrust::get<0>(nbr_p0_p1) != invalid_vertex_id::value; + }))); } num_scanned += this_scan_size; } nbr_intersection_indices = std::move(tmp_indices); if constexpr (!std::is_same_v) { - nbr_intersection_properties0 = std::move(tmp_properties0); - nbr_intersection_properties1 = std::move(tmp_properties1); + nbr_intersection_e_property_values0 = std::move(tmp_property_values0); + nbr_intersection_e_property_values1 = std::move(tmp_property_values1); } - #else - if constexpr (std::is_same_v) { nbr_intersection_indices.resize( thrust::distance(nbr_intersection_indices.begin(), @@ -1780,10 +1941,10 @@ nbr_intersection(raft::handle_t const& handle, handle.get_stream()); } else { nbr_intersection_indices.resize( - thrust::distance(zipped_itr_to_indices_and_properties_begin, + thrust::distance(zipped_itr_to_indices_and_e_property_values_begin, thrust::remove_if(handle.get_thrust_policy(), - zipped_itr_to_indices_and_properties_begin, - zipped_itr_to_indices_and_properties_begin + + zipped_itr_to_indices_and_e_property_values_begin, + zipped_itr_to_indices_and_e_property_values_begin + nbr_intersection_indices.size(), [] __device__(auto nbr_p0_p1) { return thrust::get<0>(nbr_p0_p1) == @@ -1791,8 +1952,10 @@ nbr_intersection(raft::handle_t const& handle, })), handle.get_stream()); - nbr_intersection_properties0.resize(nbr_intersection_indices.size(), handle.get_stream()); - nbr_intersection_properties1.resize(nbr_intersection_indices.size(), handle.get_stream()); + nbr_intersection_e_property_values0.resize(nbr_intersection_indices.size(), + handle.get_stream()); + nbr_intersection_e_property_values1.resize(nbr_intersection_indices.size(), + handle.get_stream()); } #endif @@ -1811,8 +1974,8 @@ nbr_intersection(raft::handle_t const& handle, } else { return std::make_tuple(std::move(nbr_intersection_offsets), std::move(nbr_intersection_indices), - std::move(nbr_intersection_properties0), - std::move(nbr_intersection_properties1)); + std::move(nbr_intersection_e_property_values0), + std::move(nbr_intersection_e_property_values1)); } } diff --git a/cpp/src/prims/detail/optional_dataframe_buffer.hpp b/cpp/src/prims/detail/optional_dataframe_buffer.hpp index dd40e6932e4..62b2245a651 100644 --- a/cpp/src/prims/detail/optional_dataframe_buffer.hpp +++ b/cpp/src/prims/detail/optional_dataframe_buffer.hpp @@ -97,6 +97,7 @@ void shrink_to_fit_optional_dataframe_buffer( { return shrink_to_fit_dataframe_buffer(optional_dataframe_buffer, stream_view); } + } // namespace detail } // namespace cugraph diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index c46e83aa5da..f17441ad6ab 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -604,7 +604,7 @@ class kv_cuco_store_t { store_value_offsets.begin() /* map */, store_value_offsets.begin() /* stencil */, get_dataframe_buffer_begin(store_values_), - not_equal_t{std::numeric_limits::max()}); + is_not_equal_t{std::numeric_limits::max()}); } } @@ -649,7 +649,7 @@ class kv_cuco_store_t { store_value_offsets.begin() /* map */, store_value_offsets.begin() /* stencil */, get_dataframe_buffer_begin(store_values_), - not_equal_t{std::numeric_limits::max()}); + is_not_equal_t{std::numeric_limits::max()}); } } @@ -695,7 +695,7 @@ class kv_cuco_store_t { store_value_offsets.begin() /* map */, store_value_offsets.begin() /* stencil */, get_dataframe_buffer_begin(store_values_), - not_equal_t{std::numeric_limits::max()}); + is_not_equal_t{std::numeric_limits::max()}); // now perform assigns (for k,v pairs that failed to insert) diff --git a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh index 640c3c04bfd..201c08325d7 100644 --- a/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh +++ b/cpp/src/prims/per_v_pair_transform_dst_nbr_intersection.cuh @@ -112,8 +112,8 @@ struct call_intersection_op_t { IntersectionOp intersection_op{}; size_t const* nbr_offsets{nullptr}; typename GraphViewType::vertex_type const* nbr_indices{nullptr}; - EdgeValueInputIterator nbr_intersection_properties0{nullptr}; - EdgeValueInputIterator nbr_intersection_properties1{nullptr}; + EdgeValueInputIterator nbr_intersection_property_values0{nullptr}; + EdgeValueInputIterator nbr_intersection_property_values1{nullptr}; VertexPairIndexIterator major_minor_pair_index_first{}; VertexPairIterator major_minor_pair_first{}; VertexPairValueOutputIterator major_minor_pair_value_output_first{}; @@ -136,20 +136,20 @@ struct call_intersection_op_t { std::conditional_t, raft::device_span, std::byte /* dummy */> - properties0{}; + property_values0{}; std::conditional_t, raft::device_span, std::byte /* dummy */> - properties1{}; + property_values1{}; if constexpr (!std::is_same_v) { - properties0 = raft::device_span( - nbr_intersection_properties0 + nbr_offsets[i], - nbr_intersection_properties0 + +nbr_offsets[i + 1]); - properties1 = raft::device_span( - nbr_intersection_properties1 + nbr_offsets[i], - nbr_intersection_properties1 + +nbr_offsets[i + 1]); + property_values0 = raft::device_span( + nbr_intersection_property_values0 + nbr_offsets[i], + nbr_intersection_property_values0 + +nbr_offsets[i + 1]); + property_values1 = raft::device_span( + nbr_intersection_property_values1 + nbr_offsets[i], + nbr_intersection_property_values1 + +nbr_offsets[i + 1]); } property_t src_prop{}; @@ -174,8 +174,8 @@ struct call_intersection_op_t { dst_prop = *(vertex_property_first + dst_offset); } - *(major_minor_pair_value_output_first + index) = - intersection_op(src, dst, src_prop, dst_prop, intersection, properties0, properties1); + *(major_minor_pair_value_output_first + index) = intersection_op( + src, dst, src_prop, dst_prop, intersection, property_values0, property_values1); } }; @@ -240,8 +240,6 @@ void per_v_pair_transform_dst_nbr_intersection( using edge_property_value_t = typename EdgeValueInputIterator::value_type; using result_t = typename thrust::iterator_traits::value_type; - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - if (do_expensive_check) { auto num_invalids = detail::count_invalid_vertex_pairs(handle, graph_view, vertex_pair_first, vertex_pair_last); @@ -384,16 +382,16 @@ void per_v_pair_transform_dst_nbr_intersection( rmm::device_uvector intersection_offsets(size_t{0}, handle.get_stream()); rmm::device_uvector intersection_indices(size_t{0}, handle.get_stream()); - [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties0( - size_t{0}, handle.get_stream()); - [[maybe_unused]] rmm::device_uvector r_nbr_intersection_properties1( - size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector + r_nbr_intersection_property_values0(size_t{0}, handle.get_stream()); + [[maybe_unused]] rmm::device_uvector + r_nbr_intersection_property_values1(size_t{0}, handle.get_stream()); if constexpr (!std::is_same_v) { std::tie(intersection_offsets, intersection_indices, - r_nbr_intersection_properties0, - r_nbr_intersection_properties1) = + r_nbr_intersection_property_values0, + r_nbr_intersection_property_values1) = detail::nbr_intersection(handle, graph_view, edge_value_input, @@ -422,7 +420,7 @@ void per_v_pair_transform_dst_nbr_intersection( detail::call_intersection_op_t< GraphViewType, decltype(vertex_value_input_for_unique_vertices_first), - typename decltype(r_nbr_intersection_properties0)::const_pointer, + typename decltype(r_nbr_intersection_property_values0)::const_pointer, IntersectionOp, decltype(chunk_vertex_pair_index_first), VertexPairIterator, @@ -433,8 +431,8 @@ void per_v_pair_transform_dst_nbr_intersection( intersection_op, intersection_offsets.data(), intersection_indices.data(), - r_nbr_intersection_properties0.data(), - r_nbr_intersection_properties1.data(), + r_nbr_intersection_property_values0.data(), + r_nbr_intersection_property_values1.data(), chunk_vertex_pair_index_first, vertex_pair_first, vertex_pair_value_output_first}); @@ -445,7 +443,7 @@ void per_v_pair_transform_dst_nbr_intersection( detail::call_intersection_op_t< GraphViewType, VertexValueInputIterator, - typename decltype(r_nbr_intersection_properties0)::const_pointer, + typename decltype(r_nbr_intersection_property_values0)::const_pointer, IntersectionOp, decltype(chunk_vertex_pair_index_first), VertexPairIterator, @@ -456,8 +454,8 @@ void per_v_pair_transform_dst_nbr_intersection( intersection_op, intersection_offsets.data(), intersection_indices.data(), - r_nbr_intersection_properties0.data(), - r_nbr_intersection_properties1.data(), + r_nbr_intersection_property_values0.data(), + r_nbr_intersection_property_values1.data(), chunk_vertex_pair_index_first, vertex_pair_first, vertex_pair_value_output_first}); diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index e6db21f1c7c..5fee97790f1 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -941,7 +941,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, minor_comm_ranks.begin(), thrust::make_zip_iterator( thrust::make_tuple(tmp_sample_local_nbr_indices.begin(), tmp_sample_key_indices.begin())), - not_equal_t{-1}); + is_not_equal_t{-1}); sample_local_nbr_indices = std::move(tmp_sample_local_nbr_indices); sample_key_indices = std::move(tmp_sample_key_indices); diff --git a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh index 2e19adc34c4..d4f8606257e 100644 --- a/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_dst_key_aggregated_outgoing_e.cuh @@ -336,8 +336,14 @@ void per_v_transform_reduce_dst_key_aggregated_outgoing_e( if (edge_partition.number_of_edges() > 0) { auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); - detail::decompress_edge_partition_to_fill_edgelist_majors( - handle, edge_partition, tmp_majors.data(), segment_offsets); + detail::decompress_edge_partition_to_fill_edgelist_majors( + handle, + edge_partition, + std::nullopt, + raft::device_span(tmp_majors.data(), tmp_majors.size()), + segment_offsets); auto minor_key_first = thrust::make_transform_iterator( edge_partition.indices(), diff --git a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh index f773a102959..bcf7606c423 100644 --- a/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh +++ b/cpp/src/prims/transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v.cuh @@ -25,6 +25,7 @@ #include #include #include +#include #include #include @@ -269,10 +270,17 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( vertex_value_output_first + graph_view.local_vertex_partition_range_size(), init); + auto edge_mask_view = graph_view.edge_mask_view(); for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) { auto edge_partition = edge_partition_device_view_t( graph_view.local_edge_partition_view(i)); + auto edge_partition_e_mask = + edge_mask_view + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *edge_mask_view, i) + : std::nullopt; edge_partition_src_input_device_view_t edge_partition_src_value_input{}; edge_partition_dst_input_device_view_t edge_partition_dst_value_input{}; @@ -286,22 +294,29 @@ void transform_reduce_dst_nbr_intersection_of_e_endpoints_by_v( edge_partition_dst_value_input = edge_partition_dst_input_device_view_t(edge_dst_value_input); } - rmm::device_uvector majors(edge_partition.number_of_edges(), handle.get_stream()); + rmm::device_uvector majors( + edge_partition_e_mask + ? detail::count_set_bits( + handle, (*edge_partition_e_mask).value_first(), edge_partition.number_of_edges()) + : static_cast(edge_partition.number_of_edges()), + handle.get_stream()); rmm::device_uvector minors(majors.size(), handle.get_stream()); auto segment_offsets = graph_view.local_edge_partition_segment_offsets(i); detail::decompress_edge_partition_to_edgelist(handle, - edge_partition, - std::nullopt, - std::nullopt, - majors.data(), - minors.data(), - std::nullopt, - std::nullopt, - segment_offsets); + GraphViewType::is_multi_gpu>( + handle, + edge_partition, + std::nullopt, + std::nullopt, + edge_partition_e_mask, + raft::device_span(majors.data(), majors.size()), + raft::device_span(minors.data(), minors.size()), + std::nullopt, + std::nullopt, + segment_offsets); auto vertex_pair_first = thrust::make_zip_iterator(thrust::make_tuple(majors.begin(), minors.begin())); diff --git a/cpp/src/structure/coarsen_graph_impl.cuh b/cpp/src/structure/coarsen_graph_impl.cuh index b8dc28d563e..0704bc6f23b 100644 --- a/cpp/src/structure/coarsen_graph_impl.cuh +++ b/cpp/src/structure/coarsen_graph_impl.cuh @@ -164,16 +164,18 @@ decompress_edge_partition_to_relabeled_and_grouped_and_coarsened_edgelist( ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition, edge_partition_weight_view, - std::optional>{ - std::nullopt}, - edgelist_majors.data(), - edgelist_minors.data(), - edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, - std::optional{std::nullopt}, + std::nullopt, + std::nullopt, + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + raft::device_span(edgelist_minors.data(), edgelist_minors.size()), + edgelist_weights ? std::make_optional>((*edgelist_weights).data(), + (*edgelist_weights).size()) + : std::nullopt, + std::nullopt, segment_offsets); auto pair_first = diff --git a/cpp/src/structure/decompress_to_edgelist_impl.cuh b/cpp/src/structure/decompress_to_edgelist_impl.cuh index d653307c620..e9e84f5bf15 100644 --- a/cpp/src/structure/decompress_to_edgelist_impl.cuh +++ b/cpp/src/structure/decompress_to_edgelist_impl.cuh @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -80,8 +81,11 @@ decompress_to_edgelist_impl( std::vector edgelist_edge_counts(graph_view.number_of_local_edge_partitions(), size_t{0}); for (size_t i = 0; i < edgelist_edge_counts.size(); ++i) { - edgelist_edge_counts[i] = - static_cast(graph_view.number_of_local_edge_partition_edges(i)); + edgelist_edge_counts[i] = graph_view.local_edge_partition_view(i).number_of_edges(); + if (graph_view.has_edge_mask()) { + edgelist_edge_counts[i] = detail::count_set_bits( + handle, (*(graph_view.edge_mask_view())).value_firsts()[i], edgelist_edge_counts[i]); + } } auto number_of_local_edges = std::reduce(edgelist_edge_counts.begin(), edgelist_edge_counts.end()); @@ -97,7 +101,7 @@ decompress_to_edgelist_impl( size_t cur_size{0}; for (size_t i = 0; i < edgelist_edge_counts.size(); ++i) { - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition_device_view_t( graph_view.local_edge_partition_view(i)), @@ -110,11 +114,19 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_id_view), i) : std::nullopt, - edgelist_majors.data() + cur_size, - edgelist_minors.data() + cur_size, - edgelist_weights ? std::optional{(*edgelist_weights).data() + cur_size} + graph_view.has_edge_mask() + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *(graph_view.edge_mask_view()), i) + : std::nullopt, + raft::device_span(edgelist_majors.data() + cur_size, edgelist_edge_counts[i]), + raft::device_span(edgelist_minors.data() + cur_size, edgelist_edge_counts[i]), + edgelist_weights ? std::make_optional>( + (*edgelist_weights).data() + cur_size, edgelist_edge_counts[i]) : std::nullopt, - edgelist_ids ? std::optional{(*edgelist_ids).data() + cur_size} : std::nullopt, + edgelist_ids ? std::make_optional>( + (*edgelist_ids).data() + cur_size, edgelist_edge_counts[i]) + : std::nullopt, graph_view.local_edge_partition_segment_offsets(i)); cur_size += edgelist_edge_counts[i]; } @@ -231,8 +243,13 @@ decompress_to_edgelist_impl( if (do_expensive_check) { /* currently, nothing to do */ } - rmm::device_uvector edgelist_majors(graph_view.number_of_local_edge_partition_edges(), - handle.get_stream()); + auto num_edges = graph_view.local_edge_partition_view().number_of_edges(); + if (graph_view.has_edge_mask()) { + num_edges = + detail::count_set_bits(handle, (*(graph_view.edge_mask_view())).value_firsts()[0], num_edges); + } + + rmm::device_uvector edgelist_majors(num_edges, handle.get_stream()); rmm::device_uvector edgelist_minors(edgelist_majors.size(), handle.get_stream()); auto edgelist_weights = edge_weight_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) @@ -240,7 +257,7 @@ decompress_to_edgelist_impl( auto edgelist_ids = edge_id_view ? std::make_optional>( edgelist_majors.size(), handle.get_stream()) : std::nullopt; - detail::decompress_edge_partition_to_edgelist( + detail::decompress_edge_partition_to_edgelist( handle, edge_partition_device_view_t( graph_view.local_edge_partition_view()), @@ -253,10 +270,19 @@ decompress_to_edgelist_impl( detail::edge_partition_edge_property_device_view_t>( (*edge_id_view), 0) : std::nullopt, - edgelist_majors.data(), - edgelist_minors.data(), - edgelist_weights ? std::optional{(*edgelist_weights).data()} : std::nullopt, - edgelist_ids ? std::optional{(*edgelist_ids).data()} : std::nullopt, + graph_view.has_edge_mask() + ? std::make_optional< + detail::edge_partition_edge_property_device_view_t>( + *(graph_view.edge_mask_view()), 0) + : std::nullopt, + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + raft::device_span(edgelist_minors.data(), edgelist_minors.size()), + edgelist_weights ? std::make_optional>((*edgelist_weights).data(), + (*edgelist_weights).size()) + : std::nullopt, + edgelist_ids ? std::make_optional>((*edgelist_ids).data(), + (*edgelist_ids).size()) + : std::nullopt, graph_view.local_edge_partition_segment_offsets()); if (renumber_map) { @@ -294,8 +320,6 @@ decompress_to_edgelist( std::optional> renumber_map, bool do_expensive_check) { - CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); - return decompress_to_edgelist_impl( handle, graph_view, edge_weight_view, edge_id_view, renumber_map, do_expensive_check); } diff --git a/cpp/src/structure/graph_impl.cuh b/cpp/src/structure/graph_impl.cuh index 97975897e08..75862266789 100644 --- a/cpp/src/structure/graph_impl.cuh +++ b/cpp/src/structure/graph_impl.cuh @@ -213,16 +213,17 @@ update_local_sorted_unique_edge_majors_minors( thrust::make_counting_iterator(minor_range_first + num_scanned), thrust::make_counting_iterator(minor_range_first + num_scanned + this_scan_size), unique_edge_minors.begin() + num_copied, - cugraph::detail::check_bit_set_t{minor_bitmaps.data(), minor_range_first}))); + cugraph::detail::check_bit_set_t{minor_bitmaps.data(), + minor_range_first}))); num_scanned += this_scan_size; } #else - thrust::copy_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(minor_range_first), - thrust::make_counting_iterator(minor_range_last), - unique_edge_minors.begin(), - cugraph::detail::check_bit_set_t{minor_bitmaps.data(), minor_range_first}); + thrust::copy_if(handle.get_thrust_policy(), + thrust::make_counting_iterator(minor_range_first), + thrust::make_counting_iterator(minor_range_last), + unique_edge_minors.begin(), + cugraph::detail::check_bit_set_t{ + minor_bitmaps.data(), minor_range_first}); #endif auto num_chunks = diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 7626784c13c..64a8a3212b3 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -106,6 +107,7 @@ rmm::device_uvector compute_major_degrees( std::vector const& edge_partition_offsets, std::optional> const& edge_partition_dcs_nzd_vertices, std::optional> const& edge_partition_dcs_nzd_vertex_counts, + std::optional> const& edge_partition_masks, partition_t const& partition, std::vector const& edge_partition_segment_offsets) { @@ -142,7 +144,9 @@ rmm::device_uvector compute_major_degrees( vertex_t major_range_last{}; std::tie(major_range_first, major_range_last) = partition.vertex_partition_range(major_range_vertex_partition_id); - auto p_offsets = edge_partition_offsets[i]; + auto offsets = edge_partition_offsets[i]; + auto masks = + edge_partition_masks ? thrust::make_optional((*edge_partition_masks)[i]) : thrust::nullopt; auto segment_offset_size_per_partition = edge_partition_segment_offsets.size() / static_cast(minor_comm_size); auto major_hypersparse_first = @@ -155,9 +159,16 @@ rmm::device_uvector compute_major_degrees( thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_hypersparse_first - major_range_first), local_degrees.begin(), - [p_offsets] __device__(auto i) { return p_offsets[i + 1] - p_offsets[i]; }); + [offsets, masks] __device__(auto i) { + auto local_degree = offsets[i + 1] - offsets[i]; + if (masks) { + local_degree = static_cast( + detail::count_set_bits(*masks, offsets[i], local_degree)); + } + return local_degree; + }); if (use_dcs) { - auto p_dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; + auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; auto dcs_nzd_vertex_count = (*edge_partition_dcs_nzd_vertex_counts)[i]; thrust::fill(execution_policy, local_degrees.begin() + (major_hypersparse_first - major_range_first), @@ -166,15 +177,20 @@ rmm::device_uvector compute_major_degrees( thrust::for_each(execution_policy, thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(dcs_nzd_vertex_count), - [p_offsets, - p_dcs_nzd_vertices, + [offsets, + dcs_nzd_vertices, + masks, major_range_first, major_hypersparse_first, local_degrees = local_degrees.data()] __device__(auto i) { - auto d = p_offsets[(major_hypersparse_first - major_range_first) + i + 1] - - p_offsets[(major_hypersparse_first - major_range_first) + i]; - auto v = p_dcs_nzd_vertices[i]; - local_degrees[v - major_range_first] = d; + auto major_idx = (major_hypersparse_first - major_range_first) + i; + auto local_degree = offsets[major_idx + 1] - offsets[major_idx]; + if (masks) { + local_degree = static_cast( + detail::count_set_bits(*masks, offsets[major_idx], local_degree)); + } + auto v = dcs_nzd_vertices[i]; + local_degrees[v - major_range_first] = local_degree; }); } minor_comm.reduce(local_degrees.data(), @@ -193,12 +209,21 @@ rmm::device_uvector compute_major_degrees( template rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, edge_t const* offsets, + std::optional masks, vertex_t number_of_vertices) { rmm::device_uvector degrees(number_of_vertices, handle.get_stream()); thrust::tabulate( - handle.get_thrust_policy(), degrees.begin(), degrees.end(), [offsets] __device__(auto i) { - return offsets[i + 1] - offsets[i]; + handle.get_thrust_policy(), + degrees.begin(), + degrees.end(), + [offsets, masks = masks ? thrust::make_optional(*masks) : thrust::nullopt] __device__(auto i) { + auto local_degree = offsets[i + 1] - offsets[i]; + if (masks) { + local_degree = + static_cast(detail::count_set_bits(*masks, offsets[i], local_degree)); + } + return local_degree; }); return degrees; } @@ -512,16 +537,18 @@ rmm::device_uvector graph_view_t>:: compute_in_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, this->edge_partition_dcs_nzd_vertex_counts_, + this->has_edge_mask() + ? std::make_optional((*(this->edge_mask_view())).value_firsts()) + : std::nullopt, this->partition_, this->edge_partition_segment_offsets_); } else { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } } @@ -531,11 +558,15 @@ rmm::device_uvector graph_view_t>:: compute_in_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { - return compute_major_degrees(handle, this->offsets_, this->local_vertex_partition_range_size()); + return compute_major_degrees( + handle, + this->offsets_, + this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) + : std::nullopt, + this->local_vertex_partition_range_size()); } else { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } } @@ -545,15 +576,17 @@ rmm::device_uvector graph_view_t>:: compute_out_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } else { return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, this->edge_partition_dcs_nzd_vertex_counts_, + this->has_edge_mask() + ? std::make_optional((*(this->edge_mask_view())).value_firsts()) + : std::nullopt, this->partition_, this->edge_partition_segment_offsets_); } @@ -564,12 +597,16 @@ rmm::device_uvector graph_view_t>:: compute_out_degrees(raft::handle_t const& handle) const { - CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); - if (store_transposed) { + CUGRAPH_EXPECTS(!has_edge_mask(), "unimplemented."); return compute_minor_degrees(handle, *this); } else { - return compute_major_degrees(handle, this->offsets_, this->local_vertex_partition_range_size()); + return compute_major_degrees( + handle, + this->offsets_, + this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) + : std::nullopt, + this->local_vertex_partition_range_size()); } } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 2a4bb8ab2a5..6775ed2eb16 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -288,10 +288,6 @@ ConfigureTest(GENERATE_RMAT_TEST generators/generate_rmat_test.cpp) ConfigureTest(GENERATE_BIPARTITE_RMAT_TEST generators/generate_bipartite_rmat_test.cpp GPUS 1 PERCENT 100) -################################################################################################### -# - Graph mask tests ------------------------------------------------------------------------------ -ConfigureTest(GRAPH_MASK_TEST structure/graph_mask_test.cpp) - ################################################################################################### # - Symmetrize tests ------------------------------------------------------------------------------ ConfigureTest(SYMMETRIZE_TEST structure/symmetrize_test.cpp) diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu index a7cd8a989b0..a3edb1f6372 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_intersection.cu @@ -21,9 +21,13 @@ #include #include +#include +#include #include +#include #include +#include #include #include #include @@ -61,6 +65,7 @@ struct intersection_op_t { struct Prims_Usecase { size_t num_vertex_pairs{0}; + bool edge_masking{false}; bool check_correctness{true}; }; @@ -78,7 +83,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection virtual void TearDown() {} // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive - template + template void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { HighResTimer hr_timer{}; @@ -109,6 +114,34 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto mg_graph_view = mg_graph.view(); + std::optional> edge_mask{std::nullopt}; + if (prims_usecase.edge_masking) { + cugraph::edge_src_property_t edge_src_renumber_map( + *handle_, mg_graph_view); + cugraph::edge_dst_property_t edge_dst_renumber_map( + *handle_, mg_graph_view); + cugraph::update_edge_src_property( + *handle_, mg_graph_view, (*mg_renumber_map).begin(), edge_src_renumber_map); + cugraph::update_edge_dst_property( + *handle_, mg_graph_view, (*mg_renumber_map).begin(), edge_dst_renumber_map); + + edge_mask = cugraph::edge_property_t(*handle_, mg_graph_view); + + cugraph::transform_e( + *handle_, + mg_graph_view, + edge_src_renumber_map.view(), + edge_dst_renumber_map.view(), + cugraph::edge_dummy_property_t{}.view(), + [] __device__(auto src, auto dst, auto src_property, auto dst_property, thrust::nullopt_t) { + return ((src_property % 2 == 0) && (dst_property % 2 == 0)) + ? false + : true; // mask out the edges with even unrenumbered src & dst vertex IDs + }, + (*edge_mask).mutable_view()); + mg_graph_view.attach_edge_mask((*edge_mask).view()); + } + // 2. run MG per_v_pair_transform_dst_nbr_intersection primitive ASSERT_TRUE( @@ -224,6 +257,42 @@ class Tests_MGPerVPairTransformDstNbrIntersection if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); + if (prims_usecase.edge_masking) { + rmm::device_uvector srcs(0, handle_->get_stream()); + rmm::device_uvector dsts(0, handle_->get_stream()); + std::tie(srcs, dsts, std::ignore, std::ignore) = + cugraph::decompress_to_edgelist( + *handle_, sg_graph_view, std::nullopt, std::nullopt, std::nullopt); + auto edge_first = thrust::make_zip_iterator(srcs.begin(), dsts.begin()); + srcs.resize(thrust::distance(edge_first, + thrust::remove_if(handle_->get_thrust_policy(), + edge_first, + edge_first + srcs.size(), + [] __device__(auto pair) { + return (thrust::get<0>(pair) % 2 == 0) && + (thrust::get<1>(pair) % 2 == 0); + })), + handle_->get_stream()); + dsts.resize(srcs.size(), handle_->get_stream()); + rmm::device_uvector vertices(sg_graph_view.number_of_vertices(), + handle_->get_stream()); + thrust::sequence( + handle_->get_thrust_policy(), vertices.begin(), vertices.end(), vertex_t{0}); + std::tie(sg_graph, std::ignore, std::ignore, std::ignore, std::ignore) = cugraph:: + create_graph_from_edgelist( + *handle_, + std::move(vertices), + std::move(srcs), + std::move(dsts), + std::nullopt, + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{sg_graph_view.is_symmetric(), + sg_graph_view.is_multigraph()}, + false); + sg_graph_view = sg_graph.view(); + } + auto sg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); auto sg_out_degrees = sg_graph_view.compute_out_degrees(*handle_); @@ -262,47 +331,16 @@ using Tests_MGPerVPairTransformDstNbrIntersection_File = using Tests_MGPerVPairTransformDstNbrIntersection_Rmat = Tests_MGPerVPairTransformDstNbrIntersection; -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>(std::get<0>(param), - std::get<1>(param)); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -310,7 +348,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -318,7 +356,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -327,15 +365,18 @@ INSTANTIATE_TEST_SUITE_P( file_test, Tests_MGPerVPairTransformDstNbrIntersection_File, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1024}, true}), + ::testing::Values(Prims_Usecase{size_t{1024}, false, true}, + Prims_Usecase{size_t{1024}, true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/netscience.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_test, - Tests_MGPerVPairTransformDstNbrIntersection_Rmat, - ::testing::Combine(::testing::Values(Prims_Usecase{size_t{1024}, true}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_MGPerVPairTransformDstNbrIntersection_Rmat, + ::testing::Combine( + ::testing::Values(Prims_Usecase{size_t{1024}, false, true}, + Prims_Usecase{size_t{1024}, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with @@ -345,7 +386,8 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGPerVPairTransformDstNbrIntersection_Rmat, ::testing::Combine( - ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false}), + ::testing::Values(Prims_Usecase{size_t{1024 * 1024}, false, false}, + Prims_Usecase{size_t{1024 * 1024}, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu index 3b6a6b9c4c5..4d05b0c9e65 100644 --- a/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu +++ b/cpp/tests/prims/mg_per_v_pair_transform_dst_nbr_weighted_intersection.cu @@ -50,14 +50,14 @@ template struct intersection_op_t { - __device__ thrust::tuple operator()( + __device__ thrust::tuple operator()( vertex_t a, vertex_t b, weight_t weight_a /* weighted out degree */, weight_t weight_b /* weighted out degree */, raft::device_span intersection, - raft::device_span intersected_properties_a, - raft::device_span intersected_properties_b) const + raft::device_span intersected_property_values_a, + raft::device_span intersected_property_values_b) const { weight_t min_weight_a_intersect_b = weight_t{0}; weight_t max_weight_a_intersect_b = weight_t{0}; @@ -65,10 +65,12 @@ struct intersection_op_t { weight_t sum_of_intersected_b = weight_t{0}; for (size_t k = 0; k < intersection.size(); k++) { - min_weight_a_intersect_b += min(intersected_properties_a[k], intersected_properties_b[k]); - max_weight_a_intersect_b += max(intersected_properties_a[k], intersected_properties_b[k]); - sum_of_intersected_a += intersected_properties_a[k]; - sum_of_intersected_b += intersected_properties_b[k]; + min_weight_a_intersect_b += + min(intersected_property_values_a[k], intersected_property_values_b[k]); + max_weight_a_intersect_b += + max(intersected_property_values_a[k], intersected_property_values_b[k]); + sum_of_intersected_a += intersected_property_values_a[k]; + sum_of_intersected_b += intersected_property_values_b[k]; } weight_t sum_of_uniq_a = weight_a - sum_of_intersected_a; @@ -99,7 +101,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection virtual void TearDown() {} // Verify the results of per_v_pair_transform_dst_nbr_intersection primitive - template + template void run_current_test(Prims_Usecase const& prims_usecase, input_usecase_t const& input_usecase) { HighResTimer hr_timer{}; @@ -189,7 +191,7 @@ class Tests_MGPerVPairTransformDstNbrIntersection auto mg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_vertex_pair_buffer), handle_->get_stream()); - auto mg_out_degrees = mg_graph_view.compute_out_degrees(*handle_); + auto mg_out_weight_sums = compute_out_weight_sums(*handle_, mg_graph_view, mg_edge_weight_view); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -197,9 +199,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection hr_timer.start("MG per_v_pair_transform_dst_nbr_intersection"); } - rmm::device_uvector mg_out_weight_sums = - compute_out_weight_sums(*handle_, mg_graph_view, mg_edge_weight_view); - cugraph::per_v_pair_transform_dst_nbr_intersection( *handle_, mg_graph_view, @@ -272,7 +271,6 @@ class Tests_MGPerVPairTransformDstNbrIntersection if (handle_->get_comms().get_rank() == 0) { auto sg_graph_view = sg_graph.view(); - auto sg_result_buffer = cugraph::allocate_dataframe_buffer>( cugraph::size_dataframe_buffer(mg_aggregate_vertex_pair_buffer), handle_->get_stream()); @@ -290,11 +288,21 @@ class Tests_MGPerVPairTransformDstNbrIntersection */), sg_out_weight_sums.begin(), intersection_op_t{}, cugraph::get_dataframe_buffer_begin(sg_result_buffer)); + auto threshold_ratio = weight_t{1e-4}; + auto threshold_magnitude = std::numeric_limits::min(); + auto nearly_equal = [threshold_ratio, threshold_magnitude] __device__(auto lhs, auto rhs) { + return (fabs(thrust::get<0>(lhs) - thrust::get<0>(rhs)) < + max(max(thrust::get<0>(lhs), thrust::get<0>(rhs)) * threshold_ratio, + threshold_magnitude)) && + (fabs(thrust::get<1>(lhs) - thrust::get<1>(rhs)) < + max(max(thrust::get<1>(lhs), thrust::get<1>(rhs)) * threshold_ratio, + threshold_magnitude)); + }; bool valid = thrust::equal(handle_->get_thrust_policy(), cugraph::get_dataframe_buffer_begin(mg_aggregate_result_buffer), cugraph::get_dataframe_buffer_end(mg_aggregate_result_buffer), - cugraph::get_dataframe_buffer_begin(sg_result_buffer)); - + cugraph::get_dataframe_buffer_begin(sg_result_buffer), + nearly_equal); ASSERT_TRUE(valid); } } @@ -313,47 +321,16 @@ using Tests_MGPerVPairTransformDstNbrIntersection_File = using Tests_MGPerVPairTransformDstNbrIntersection_Rmat = Tests_MGPerVPairTransformDstNbrIntersection; -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>(std::get<0>(param), - std::get<1>(param)); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - -TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64FloatTupleIntFloat) -{ - auto param = GetParam(); - run_current_test>( - std::get<0>(param), - cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); -} - TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_File, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test(std::get<0>(param), std::get<1>(param)); + run_current_test(std::get<0>(param), std::get<1>(param)); } TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -361,7 +338,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int32Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } @@ -369,7 +346,7 @@ TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt32Int64Float) TEST_P(Tests_MGPerVPairTransformDstNbrIntersection_Rmat, CheckInt64Int64Float) { auto param = GetParam(); - run_current_test( + run_current_test( std::get<0>(param), cugraph::test::override_Rmat_Usecase_with_cmd_line_arguments(std::get<1>(param))); } diff --git a/cpp/tests/structure/graph_mask_test.cpp b/cpp/tests/structure/graph_mask_test.cpp deleted file mode 100644 index fc704d683a7..00000000000 --- a/cpp/tests/structure/graph_mask_test.cpp +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Copyright (c) 2022, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governin_from_mtxg permissions and - * limitations under the License. - */ - -#include -#include -#include - -#include -#include -#include - -#include -#include -#include -#include - -#include - -#include -#include -#include -#include -#include - -TEST(Test_GraphMask, BasicGraphMaskTestInt64) -{ - raft::handle_t handle; - - int number_of_vertices = 500; - int number_of_edges = 1000; - - cugraph::graph_mask_t mask( - handle, number_of_vertices, number_of_edges); - - auto mask_view = mask.view(); - - ASSERT_EQ(false, mask.has_vertex_mask()); - ASSERT_EQ(false, mask.has_edge_mask()); - ASSERT_EQ(false, mask_view.has_vertex_mask()); - ASSERT_EQ(false, mask_view.has_edge_mask()); - - mask.initialize_vertex_mask(); - mask.initialize_edge_mask(); - - auto mask_view2 = mask.view(); - - ASSERT_EQ(true, mask.has_vertex_mask()); - ASSERT_EQ(true, mask.has_edge_mask()); - ASSERT_EQ(true, mask_view2.has_vertex_mask()); - ASSERT_EQ(true, mask_view2.has_edge_mask()); -} \ No newline at end of file