diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 0f05aedf1a1..865d06b20e4 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -33,7 +33,7 @@ repos: additional_dependencies: - flake8==6.0.0 - repo: https://github.com/pre-commit/mirrors-clang-format - rev: v16.0.1 + rev: v16.0.6 hooks: - id: clang-format exclude: | diff --git a/conda/recipes/cugraph-dgl/meta.yaml b/conda/recipes/cugraph-dgl/meta.yaml index 2fbc6360c04..9e9fcd2faf1 100644 --- a/conda/recipes/cugraph-dgl/meta.yaml +++ b/conda/recipes/cugraph-dgl/meta.yaml @@ -26,6 +26,7 @@ requirements: - dgl >=1.1.0.cu* - numba >=0.57 - numpy >=1.21 + - pylibcugraphops ={{ version }} - python - pytorch diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 2eaf4361730..a78c06474c0 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -166,6 +166,7 @@ endif() include(cmake/thirdparty/get_nccl.cmake) include(cmake/thirdparty/get_cuhornet.cmake) +include(cmake/thirdparty/get_ucp.cmake) if(BUILD_TESTS) include(cmake/thirdparty/get_gtest.cmake) @@ -228,6 +229,7 @@ set(CUGRAPH_SOURCES src/sampling/uniform_neighbor_sampling_mg.cpp src/sampling/uniform_neighbor_sampling_sg.cpp src/sampling/renumber_sampled_edgelist_sg.cu + src/sampling/sampling_post_processing_sg.cu src/cores/core_number_sg.cu src/cores/core_number_mg.cu src/cores/k_core_sg.cu @@ -291,6 +293,7 @@ set(CUGRAPH_SOURCES src/community/triangle_count_mg.cu src/traversal/k_hop_nbrs_sg.cu src/traversal/k_hop_nbrs_mg.cu + src/mtmg/vertex_result.cu ) if(USE_CUGRAPH_OPS) diff --git a/cpp/cmake/thirdparty/get_ucp.cmake b/cpp/cmake/thirdparty/get_ucp.cmake new file mode 100644 index 00000000000..dcc4956a34e --- /dev/null +++ b/cpp/cmake/thirdparty/get_ucp.cmake @@ -0,0 +1,35 @@ +#============================================================================= +# Copyright (c) 2023, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +#============================================================================= + +function(find_and_configure_ucp) + + if(TARGET UCP::UCP) + return() + endif() + + rapids_find_generate_module(UCP + HEADER_NAMES ucp.h + LIBRARY_NAMES ucp + INCLUDE_SUFFIXES ucp/api + ) + + # Currently UCP has no CMake build-system so we require + # it built and installed on the machine already + rapids_find_package(UCP REQUIRED) + +endfunction() + +find_and_configure_ucp() diff --git a/cpp/include/cugraph/detail/utility_wrappers.hpp b/cpp/include/cugraph/detail/utility_wrappers.hpp index a15dbf34cf9..faa0fbb841b 100644 --- a/cpp/include/cugraph/detail/utility_wrappers.hpp +++ b/cpp/include/cugraph/detail/utility_wrappers.hpp @@ -37,8 +37,8 @@ namespace detail { * @param[in] stream_view stream view * @param[out] d_value device array to fill * @param[in] size number of elements in array - * @param[in] min_value minimum value - * @param[in] max_value maximum value + * @param[in] min_value minimum value (inclusive) + * @param[in] max_value maximum value (exclusive) * @param[in] rng_state The RngState instance holding pseudo-random number generator state. * */ diff --git a/cpp/include/cugraph/graph_functions.hpp b/cpp/include/cugraph/graph_functions.hpp index 200ee725b7a..5c1e9d5311f 100644 --- a/cpp/include/cugraph/graph_functions.hpp +++ b/cpp/include/cugraph/graph_functions.hpp @@ -919,6 +919,10 @@ rmm::device_uvector select_random_vertices( /** * @brief renumber sampling output * + * @deprecated This API will be deprecated and will be replaced by the + * renumber_and_compress_sampled_edgelist and renumber_and_sort_sampled_edgelist functions in + * sampling_functions.hpp. + * * This function renumbers sampling function (e.g. uniform_neighbor_sample) outputs satisfying the * following requirements. * diff --git a/cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp b/cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp new file mode 100644 index 00000000000..37398891370 --- /dev/null +++ b/cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { +namespace detail { + +/** + * @brief Wrap an object to be available for each GPU + * + * In the MTMG environment we need the ability to manage a collection of objects + * that are associated with a particular GPU, and fetch the objects from an + * arbitrary GPU thread. This object will wrap any object and allow it to be + * accessed from different threads. + */ +template +using device_shared_device_span_t = device_shared_wrapper_t>; + +} // namespace detail +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp b/cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp new file mode 100644 index 00000000000..7f3992b73bd --- /dev/null +++ b/cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { +namespace detail { + +/** + * @brief Wrap an object to be available for each GPU + * + * In the MTMG environment we need the ability to manage a collection of objects + * that are associated with a particular GPU, and fetch the objects from an + * arbitrary GPU thread. This object will wrap any object and allow it to be + * accessed from different threads. + */ +template +class device_shared_device_vector_t : public device_shared_wrapper_t> { + using parent_t = detail::device_shared_wrapper_t>; + + public: + /** + * @brief Create a device_shared_device_span (read only view) + */ + auto view() + { + std::lock_guard lock(parent_t::lock_); + + device_shared_device_span_t result; + + std::for_each(parent_t::objects_.begin(), parent_t::objects_.end(), [&result](auto& p) { + result.set(p.first, raft::device_span{p.second.data(), p.second.size()}); + }); + + return result; + } +}; + +} // namespace detail +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/detail/device_shared_wrapper.hpp b/cpp/include/cugraph/mtmg/detail/device_shared_wrapper.hpp new file mode 100644 index 00000000000..c4cacb401af --- /dev/null +++ b/cpp/include/cugraph/mtmg/detail/device_shared_wrapper.hpp @@ -0,0 +1,123 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +#include +#include + +namespace cugraph { +namespace mtmg { +namespace detail { + +/** + * @brief Wrap an object to be available for each GPU + * + * In the MTMG environment we need the ability to manage a collection of objects + * that are associated with a particular GPU, and fetch the objects from an + * arbitrary GPU thread. This object will wrap any object and allow it to be + * accessed from different threads. + */ +template +class device_shared_wrapper_t { + public: + using wrapped_t = T; + + device_shared_wrapper_t() = default; + device_shared_wrapper_t(device_shared_wrapper_t&& other) : objects_{std::move(other.objects_)} {} + device_shared_wrapper_t& operator=(device_shared_wrapper_t&& other) + { + objects_ = std::move(other.objects_); + return *this; + } + + /** + * @brief Move a wrapped object into the wrapper for this thread + * + * @param handle Handle is used to identify the GPU we associated this object with + * @param obj Wrapped object + */ + void set(cugraph::mtmg::handle_t const& handle, wrapped_t&& obj) + { + std::lock_guard lock(lock_); + + auto pos = objects_.find(handle.get_local_rank()); + CUGRAPH_EXPECTS(pos == objects_.end(), "Cannot overwrite wrapped object"); + + objects_.insert(std::make_pair(handle.get_local_rank(), std::move(obj))); + } + + /** + * @brief Move a wrapped object into the wrapper for this thread + * + * @param local_rank Identify which GPU to associated this object with + * @param obj Wrapped object + */ + void set(int local_rank, wrapped_t&& obj) + { + std::lock_guard lock(lock_); + + auto pos = objects_.find(local_rank); + CUGRAPH_EXPECTS(pos == objects_.end(), "Cannot overwrite wrapped object"); + + objects_.insert(std::make_pair(local_rank, std::move(obj))); + } + + public: + /** + * @brief Get reference to an object for a particular thread + * + * @param handle Handle is used to identify the GPU we associated this object with + * @return Reference to the wrapped object + */ + wrapped_t& get(cugraph::mtmg::handle_t const& handle) + { + std::lock_guard lock(lock_); + + auto pos = objects_.find(handle.get_local_rank()); + CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object"); + + return pos->second; + } + + /** + * @brief Get the pointer to an object for a particular thread from this wrapper + * + * @param handle Handle is used to identify the GPU we associated this object with + * @return Shared pointer the wrapped object + */ + wrapped_t const& get(cugraph::mtmg::handle_t const& handle) const + { + std::lock_guard lock(lock_); + + auto pos = objects_.find(handle.get_local_rank()); + + CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object"); + + return pos->second; + } + + protected: + mutable std::mutex lock_{}; + std::map objects_{}; +}; + +} // namespace detail +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp b/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp new file mode 100644 index 00000000000..8011146ee4f --- /dev/null +++ b/cpp/include/cugraph/mtmg/detail/per_device_edgelist.hpp @@ -0,0 +1,275 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +// FIXME: Could use std::span once compiler supports C++20 +#include + +#include + +namespace cugraph { +namespace mtmg { +namespace detail { + +/** + * @brief An edgelist for each GPU + * + * Manages an edge list for edges associated with a particular GPU. Multiple threads + * can call the append() method, possibly concurrently. To avoid constantly copying + * when the buffers fill up, the class will create a device buffer containing a + * number of elements specified in the constructor. When that device buffer is full + * we will create a new buffer. + * + * When we try and use the edgelist we will consolidate the buffers, since at that + * time we know the entire size required. + * + * Important note, the expectation is that this object will be used in two phases: + * 1) The append() method will be used to fill buffers with edges + * 2) The edges will be consumed to create a graph + * + * These two phases are expected to be disjoint. The calling process is expected to + * manage some barrier so that all threads are guaranteed to be completed before changing + * phases. If an append() call (part of the filling phase) overlaps with calls to + * finalize_buffer(), consolidate_and_shuffle(), get_src(), get_dst(), get_wgt(), + * get_edge_id() and get_edge_type() then the behavior is undefined (data might change + * in some non-deterministic way). + */ +template +class per_device_edgelist_t { + public: + per_device_edgelist_t() = delete; + per_device_edgelist_t(per_device_edgelist_t const&) = delete; + per_device_edgelist_t& operator=(per_device_edgelist_t const&) = delete; + per_device_edgelist_t& operator=(per_device_edgelist_t&&) = delete; + + per_device_edgelist_t(cugraph::mtmg::handle_t const& handle, + size_t device_buffer_size, + bool use_weight, + bool use_edge_id, + bool use_edge_type) + : device_buffer_size_{device_buffer_size}, + current_pos_{0}, + src_{}, + dst_{}, + wgt_{std::nullopt}, + edge_id_{std::nullopt}, + edge_type_{std::nullopt} + { + if (use_weight) { wgt_ = std::make_optional(std::vector>()); } + + if (use_edge_id) { edge_id_ = std::make_optional(std::vector>()); } + + if (use_edge_type) { + edge_type_ = std::make_optional(std::vector>()); + } + + create_new_buffers(handle); + } + + per_device_edgelist_t(per_device_edgelist_t&& other) + : device_buffer_size_{other.device_buffer_size_}, + current_pos_{other.current_pos_}, + src_{std::move(other.src_)}, + dst_{std::move(other.dst_)}, + wgt_{std::move(other.wgt_)}, + edge_id_{std::move(other.edge_id_)}, + edge_type_{std::move(other.edge_type_)} + { + } + + /** + * @brief Append a list of edges to the edge list + * + * @param handle The resource handle + * @param src Source vertex id + * @param dst Destination vertex id + * @param wgt Edge weight + * @param edge_id Edge id + * @param edge_type Edge type + */ + void append(handle_t const& handle, + raft::host_span src, + raft::host_span dst, + std::optional> wgt, + std::optional> edge_id, + std::optional> edge_type) + { + // FIXME: This lock guard could be on a smaller region, but it + // would require more careful coding. The raft::update_device + // calls could be done without the lock if we made a local + // of the values of *.back() and did an increment of current_pos_ + // while we hold the lock. + std::lock_guard lock(lock_); + + size_t count = src.size(); + size_t pos = 0; + + while (count > 0) { + size_t copy_count = std::min(count, (src_.back().size() - current_pos_)); + + raft::update_device( + src_.back().begin() + current_pos_, src.begin() + pos, copy_count, handle.get_stream()); + raft::update_device( + dst_.back().begin() + current_pos_, dst.begin() + pos, copy_count, handle.get_stream()); + if (wgt) + raft::update_device( + wgt_->back().begin() + current_pos_, wgt->begin() + pos, copy_count, handle.get_stream()); + if (edge_id) + raft::update_device(edge_id_->back().begin() + current_pos_, + edge_id->begin() + pos, + copy_count, + handle.get_stream()); + if (edge_type) + raft::update_device(edge_type_->back().begin() + current_pos_, + edge_type->begin() + pos, + copy_count, + handle.get_stream()); + + count -= copy_count; + pos += copy_count; + current_pos_ += copy_count; + + if (current_pos_ == src_.back().size()) { create_new_buffers(handle); } + } + + handle.raft_handle().sync_stream(); + } + + /** + * @brief Mark the edgelist as ready for reading (all writes are complete) + * + * @param handle The resource handle + */ + void finalize_buffer(handle_t const& handle) + { + src_.back().resize(current_pos_, handle.get_stream()); + dst_.back().resize(current_pos_, handle.get_stream()); + if (wgt_) wgt_->back().resize(current_pos_, handle.get_stream()); + if (edge_id_) edge_id_->back().resize(current_pos_, handle.get_stream()); + if (edge_type_) edge_type_->back().resize(current_pos_, handle.get_stream()); + } + + bool use_weight() const { return wgt_.has_value(); } + + bool use_edge_id() const { return edge_id_.has_value(); } + + bool use_edge_type() const { return edge_type_.has_value(); } + + std::vector>& get_src() { return src_; } + std::vector>& get_dst() { return dst_; } + std::optional>>& get_wgt() { return wgt_; } + std::optional>>& get_edge_id() { return edge_id_; } + std::optional>>& get_edge_type() + { + return edge_type_; + } + + /** + * @brief Consolidate edgelists (if necessary) and shuffle to the proper GPU + * + * @param handle The resource handle + */ + void consolidate_and_shuffle(cugraph::mtmg::handle_t const& handle, bool store_transposed) + { + if (src_.size() > 1) { + size_t total_size = std::transform_reduce( + src_.begin(), src_.end(), size_t{0}, std::plus(), [](auto& d_vector) { + return d_vector.size(); + }); + + resize_and_copy_buffers(handle.get_stream(), src_, total_size); + resize_and_copy_buffers(handle.get_stream(), dst_, total_size); + if (wgt_) resize_and_copy_buffers(handle.get_stream(), *wgt_, total_size); + if (edge_id_) resize_and_copy_buffers(handle.get_stream(), *edge_id_, total_size); + if (edge_type_) resize_and_copy_buffers(handle.get_stream(), *edge_type_, total_size); + } + + auto tmp_wgt = wgt_ ? std::make_optional(std::move((*wgt_)[0])) : std::nullopt; + auto tmp_edge_id = edge_id_ ? std::make_optional(std::move((*edge_id_)[0])) : std::nullopt; + auto tmp_edge_type = + edge_type_ ? std::make_optional(std::move((*edge_type_)[0])) : std::nullopt; + + std::tie(store_transposed ? dst_[0] : src_[0], + store_transposed ? src_[0] : dst_[0], + tmp_wgt, + tmp_edge_id, + tmp_edge_type) = + cugraph::detail::shuffle_ext_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning( + handle.raft_handle(), + store_transposed ? std::move(dst_[0]) : std::move(src_[0]), + store_transposed ? std::move(src_[0]) : std::move(dst_[0]), + std::move(tmp_wgt), + std::move(tmp_edge_id), + std::move(tmp_edge_type)); + + if (tmp_wgt) ((*wgt_)[0]) = std::move(*tmp_wgt); + if (tmp_edge_id) ((*edge_id_)[0]) = std::move(*tmp_edge_id); + if (tmp_edge_type) ((*edge_type_)[0]) = std::move(*tmp_edge_type); + } + + private: + template + void resize_and_copy_buffers(rmm::cuda_stream_view stream, + std::vector>& buffer, + size_t total_size) + { + size_t pos = buffer[0].size(); + buffer[0].resize(total_size, stream); + + for (size_t i = 1; i < buffer.size(); ++i) { + raft::copy(buffer[0].data() + pos, buffer[i].data(), buffer[i].size(), stream); + pos += buffer[i].size(); + buffer[i].resize(0, stream); + buffer[i].shrink_to_fit(stream); + } + + std::vector> new_buffer; + new_buffer.push_back(std::move(buffer[0])); + buffer = std::move(new_buffer); + } + + void create_new_buffers(cugraph::mtmg::handle_t const& handle) + { + src_.emplace_back(device_buffer_size_, handle.get_stream()); + dst_.emplace_back(device_buffer_size_, handle.get_stream()); + + if (wgt_) { wgt_->emplace_back(device_buffer_size_, handle.get_stream()); } + + if (edge_id_) { edge_id_->emplace_back(device_buffer_size_, handle.get_stream()); } + + if (edge_type_) { edge_type_->emplace_back(device_buffer_size_, handle.get_stream()); } + + current_pos_ = 0; + } + + mutable std::mutex lock_{}; + + size_t current_pos_{0}; + size_t device_buffer_size_{0}; + + std::vector> src_{}; + std::vector> dst_{}; + std::optional>> wgt_{}; + std::optional>> edge_id_{}; + std::optional>> edge_type_{}; +}; + +} // namespace detail +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/edge_property.hpp b/cpp/include/cugraph/mtmg/edge_property.hpp new file mode 100644 index 00000000000..afa72492b9a --- /dev/null +++ b/cpp/include/cugraph/mtmg/edge_property.hpp @@ -0,0 +1,60 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Edge property object for each GPU + */ +template +class edge_property_t : public detail::device_shared_wrapper_t< + cugraph::edge_property_t> { + public: + using parent_t = detail::device_shared_wrapper_t< + cugraph::edge_property_t>; + + /** + * @brief Return a edge_property_view_t (read only) + */ + auto view() + { + std::lock_guard lock(parent_t::lock_); + + using edge_t = typename graph_view_t::wrapped_t::edge_type; + using buffer_t = + typename cugraph::edge_property_t::buffer_type; + std::vector buffers{}; + using const_value_iterator_t = decltype(get_dataframe_buffer_cbegin(buffers[0])); + + edge_property_view_t result; + + std::for_each(parent_t::objects_.begin(), parent_t::objects_.end(), [&result](auto& p) { + result.set(p.first, p.second.view()); + }); + + return result; + } +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/edge_property_view.hpp b/cpp/include/cugraph/mtmg/edge_property_view.hpp new file mode 100644 index 00000000000..c84a6458e1d --- /dev/null +++ b/cpp/include/cugraph/mtmg/edge_property_view.hpp @@ -0,0 +1,33 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Edge property object for each GPU + */ +template +using edge_property_view_t = + detail::device_shared_wrapper_t>; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/edgelist.hpp b/cpp/include/cugraph/mtmg/edgelist.hpp new file mode 100644 index 00000000000..90c53dfbb64 --- /dev/null +++ b/cpp/include/cugraph/mtmg/edgelist.hpp @@ -0,0 +1,65 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Edgelist object for each GPU + */ +template +class edgelist_t : public detail::device_shared_wrapper_t< + detail::per_device_edgelist_t> { + public: + /** + * @brief Create a per_device_edgelist for this GPU + */ + void set(handle_t const& handle, + size_t device_buffer_size, + bool use_weight, + bool use_edge_id, + bool use_edge_type) + { + detail::per_device_edgelist_t tmp( + handle, device_buffer_size, use_weight, use_edge_id, use_edge_type); + + detail::device_shared_wrapper_t< + detail::per_device_edgelist_t>::set(handle, + std::move(tmp)); + } + + /** + * @brief Stop inserting edges into this edgelist so we can use the edges + */ + void finalize_buffer(handle_t const& handle) { this->get(handle).finalize_buffer(handle); } + + /** + * @brief Consolidate for the edgelist edges into a single edgelist and then + * shuffle across GPUs. + */ + void consolidate_and_shuffle(cugraph::mtmg::handle_t const& handle, bool store_transposed) + { + this->get(handle).consolidate_and_shuffle(handle, store_transposed); + } +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/graph.hpp b/cpp/include/cugraph/mtmg/graph.hpp new file mode 100644 index 00000000000..76a2f401425 --- /dev/null +++ b/cpp/include/cugraph/mtmg/graph.hpp @@ -0,0 +1,136 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Graph object for each GPU + */ +template +class graph_t : public detail::device_shared_wrapper_t< + cugraph::graph_t> { + using parent_t = detail::device_shared_wrapper_t< + cugraph::graph_t>; + + public: + /** + * @brief Create an MTMG graph view (read only) + */ + auto view() + { + std::lock_guard lock(parent_t::lock_); + + cugraph::mtmg::graph_view_t result; + + std::for_each(parent_t::objects_.begin(), parent_t::objects_.end(), [&result](auto& p) { + result.set(p.first, std::move(p.second.view())); + }); + + return result; + } +}; + +/** + * @brief Create an MTMG graph from an edgelist + * + * @param[in] handle Resource handle + * @param[in] edgelist Edgelist + * @param[in] graph_properties Graph properties + * @param[in] renumber If true, renumber graph (must be true for MG) + * @param[out] graph MTMG graph is stored here + * @param[out] edge_weights MTMG edge weights is stored here + * @param[out] edge_ids MTMG edge ids is stored here + * @param[out] edge_types MTMG edge types is stored here + * @param[in] renumber_map MTMG renumber_map is stored here + * @param[in] do_expensive_check A flag to run expensive checks for input arguments (if set to + * `true`). + */ +template +void create_graph_from_edgelist( + handle_t const& handle, + cugraph::mtmg::edgelist_t& edgelist, + graph_properties_t graph_properties, + bool renumber, + cugraph::mtmg::graph_t& graph, + std::optional, + weight_t>>& edge_weights, + std::optional, + edge_id_t>>& edge_ids, + std::optional, + edge_type_t>>& edge_types, + std::optional>& renumber_map, + bool do_expensive_check = false) +{ + if (handle.get_thread_rank() > 0) return; + + CUGRAPH_EXPECTS(renumber_map.has_value() == renumber, + "Renumbering set to true, but no space for renumber map"); + + auto& my_edgelist = edgelist.get(handle); + + CUGRAPH_EXPECTS(my_edgelist.get_src().size() > 0, "Cannot create graph without an edge list"); + CUGRAPH_EXPECTS(my_edgelist.get_src().size() == 1, + "Must consolidate edges into a single list before creating graph"); + + auto [local_graph, local_edge_weights, local_edge_ids, local_edge_types, local_renumber_map] = + cugraph::create_graph_from_edgelist( + handle.raft_handle(), + std::nullopt, + std::move(my_edgelist.get_src()[0]), + std::move(my_edgelist.get_dst()[0]), + my_edgelist.get_wgt() ? std::make_optional(std::move((*my_edgelist.get_wgt())[0])) + : std::nullopt, + my_edgelist.get_edge_id() ? std::make_optional(std::move((*my_edgelist.get_edge_id())[0])) + : std::nullopt, + my_edgelist.get_edge_type() ? std::make_optional(std::move((*my_edgelist.get_edge_type())[0])) + : std::nullopt, + graph_properties, + renumber, + do_expensive_check); + + graph.set(handle, std::move(local_graph)); + if (edge_weights) edge_weights->set(handle, std::move(*local_edge_weights)); + if (edge_ids) edge_ids->set(handle, std::move(*local_edge_ids)); + if (edge_types) edge_types->set(handle, std::move(*local_edge_types)); + if (renumber) renumber_map->set(handle, std::move(*local_renumber_map)); +} + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/graph_view.hpp b/cpp/include/cugraph/mtmg/graph_view.hpp new file mode 100644 index 00000000000..94347e016ea --- /dev/null +++ b/cpp/include/cugraph/mtmg/graph_view.hpp @@ -0,0 +1,34 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Graph view for each GPU + */ +template +using graph_view_t = detail::device_shared_wrapper_t< + cugraph::graph_view_t>; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/handle.hpp b/cpp/include/cugraph/mtmg/handle.hpp new file mode 100644 index 00000000000..f23bce5aeac --- /dev/null +++ b/cpp/include/cugraph/mtmg/handle.hpp @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Resource handler + * + * Multi-threaded resource handler. Every GPU gets a raft::handle object that provides access to + * the GPU resources. In a multi-threaded environment multiple threads will share a particular GPU. + * Following the MPI model, each thread will be assigned to a thread rank. + * + */ +class handle_t { + public: + /** + * @brief Constructor + * + * @param raft_handle Raft handle for the resources + * @param thread_rank Rank for this thread + */ + handle_t(raft::handle_t const& raft_handle, int thread_rank, size_t device_id) + : raft_handle_(raft_handle), + thread_rank_(thread_rank), + local_rank_(raft_handle.get_comms().get_rank()), // FIXME: update for multi-node + device_id_(device_id) + { + } + + /** + * @brief Get the raft handle + * + * @return const reference to a raft handle + */ + raft::handle_t const& raft_handle() const { return raft_handle_; } + + /** + * @brief Get cuda stream + * + * @return cuda stream + */ + rmm::cuda_stream_view get_stream() const + { + return raft_handle_.is_stream_pool_initialized() + ? raft_handle_.get_stream_from_stream_pool(device_id_) + : raft_handle_.get_stream(); + } + + /** + * @brief Get thread rank + * + * @return thread rank + */ + int get_thread_rank() const { return thread_rank_; } + + /** + * @brief Get number of gpus + * + * @return number of gpus + */ + int get_size() const { return raft_handle_.get_comms().get_size(); } + + /** + * @brief Get number of local gpus + * + * @return number of local gpus + */ + // FIXME: wrong for multi-node + int get_local_size() const { return raft_handle_.get_comms().get_size(); } + + /** + * @brief Get gpu rank + * + * @return gpu rank + */ + int get_rank() const { return raft_handle_.get_comms().get_rank(); } + + /** + * @brief Get local gpu rank + * + * @return local gpu rank + */ + int get_local_rank() const { return local_rank_; } + + private: + raft::handle_t const& raft_handle_; + int thread_rank_; + int local_rank_; + size_t device_id_; +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/instance_manager.hpp b/cpp/include/cugraph/mtmg/instance_manager.hpp new file mode 100644 index 00000000000..8bf62b56f4b --- /dev/null +++ b/cpp/include/cugraph/mtmg/instance_manager.hpp @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include + +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Manages a subset of the cluster for a set of graph computations + */ +class instance_manager_t { + public: + /** + * @brief Constructor + * + * @param handles Vector of RAFT handles, one for each device on this node + */ + instance_manager_t(std::vector>&& handles, + std::vector>&& nccl_comms, + std::vector&& device_ids, + int local_gpu_count) + : thread_counter_{0}, + raft_handle_{std::move(handles)}, + nccl_comms_{std::move(nccl_comms)}, + device_ids_{std::move(device_ids)}, + local_gpu_count_{local_gpu_count} + { + } + + /** + * @brief Get handle + * + * The instance manager will construct a handle appropriate for the thread making + * the request. Threads will be assigned to GPUs in a round-robin fashion to + * spread requesting threads around the GPU resources. + * + * This function will be CPU thread-safe. + * + * @return a handle for this thread. + */ + handle_t get_handle() + { + int local_id = thread_counter_++; + + RAFT_CUDA_TRY(cudaSetDevice(device_ids_[local_id % raft_handle_.size()].value())); + return handle_t(*raft_handle_[local_id % raft_handle_.size()], + local_id / raft_handle_.size(), + static_cast(local_id % raft_handle_.size())); + } + + /** + * @brief Reset the thread counter + * + * After a parallel activity is completed, we need to reset the thread counter so that + * future threads will round robin around the GPUs properly. + */ + void reset_threads() { thread_counter_.store(0); } + + /** + * @brief Number of local GPUs in the instance + */ + int get_local_gpu_count() { return local_gpu_count_; } + + private: + // FIXME: Should this be an std::map<> where the key is the rank? + // On a multi-node system we might have nodes with fewer + // (or no) GPUs, so mapping rank to a handle might be a challenge + // + std::vector> raft_handle_{}; + std::vector> nccl_comms_{}; + std::vector device_ids_{}; + int local_gpu_count_{}; + + std::atomic thread_counter_{0}; +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/per_thread_edgelist.hpp b/cpp/include/cugraph/mtmg/per_thread_edgelist.hpp new file mode 100644 index 00000000000..b672db48719 --- /dev/null +++ b/cpp/include/cugraph/mtmg/per_thread_edgelist.hpp @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Supports creating an edgelist from individual host threads + * + * A cugraph edgelist needs to contain all of the edges necessary to create the graph + * stored in GPU memory (distributed across multiple GPUs in a multi-GPU configuration). + * + * This class provides a mechanism for populating the edgelist object from independent CPU threads. + * + * Calls to the append() method will take edges (in CPU host memory) and append them to a local + * buffer. As the local buffer fills, the buffer will be sent to GPU memory using the flush() + * method. This allows the CPU to GPU transfers to be larger (and consequently more efficient). + */ +template +class per_thread_edgelist_t { + public: + per_thread_edgelist_t() = delete; + per_thread_edgelist_t(per_thread_edgelist_t const&) = delete; + + /** + * @brief Only constructor + * + * @param edgelist The edge list this thread_edgelist_t should be associated with + * @param thread_buffer_size Size of the local buffer for accumulating edges on the CPU + */ + per_thread_edgelist_t( + detail::per_device_edgelist_t& edgelist, + size_t thread_buffer_size) + : edgelist_{edgelist}, + current_pos_{0}, + src_(thread_buffer_size), + dst_(thread_buffer_size), + wgt_{std::nullopt}, + edge_id_{std::nullopt}, + edge_type_{std::nullopt} + { + if (edgelist.use_weight()) wgt_ = std::make_optional(std::vector(thread_buffer_size)); + + if (edgelist.use_edge_id()) + edge_id_ = std::make_optional(std::vector(thread_buffer_size)); + + if (edgelist.use_edge_type()) + edge_type_ = std::make_optional(std::vector(thread_buffer_size)); + } + + /** + * @brief Append an edge to the edge list + * + * @param handle The resource handle + * @param src Source vertex id + * @param dst Destination vertex id + * @param wgt Edge weight + * @param edge_id Edge id + * @param edge_type Edge type + */ + void append(handle_t const& handle, + vertex_t src, + vertex_t dst, + std::optional wgt, + std::optional edge_id, + std::optional edge_type) + { + if (current_pos_ == src_.size()) { flush(handle); } + + src_[current_pos_] = src; + dst_[current_pos_] = dst; + if (wgt) (*wgt_)[current_pos_] = *wgt; + if (edge_id) (*edge_id_)[current_pos_] = *edge_id; + if (edge_type) (*edge_type_)[current_pos_] = *edge_type; + + ++current_pos_; + } + + /** + * @brief Append a list of edges to the edge list + * + * @param handle The resource handle + * @param src Source vertex id + * @param dst Destination vertex id + * @param wgt Edge weight + * @param edge_id Edge id + * @param edge_type Edge type + */ + void append(handle_t const& handle, + raft::host_span src, + raft::host_span dst, + std::optional> wgt, + std::optional> edge_id, + std::optional> edge_type) + { + size_t count = src.size(); + size_t pos = 0; + + while (count > 0) { + size_t copy_count = std::min(count, (src_.size() - current_pos_)); + + std::copy(src.begin() + pos, src.begin() + pos + copy_count, src_.begin() + current_pos_); + std::copy(dst.begin() + pos, dst.begin() + pos + copy_count, dst_.begin() + current_pos_); + if (wgt) + std::copy(wgt.begin() + pos, wgt.begin() + pos + copy_count, wgt_->begin() + current_pos_); + if (edge_id) + std::copy(edge_id.begin() + pos, + edge_id.begin() + pos + copy_count, + edge_id_->begin() + current_pos_); + if (edge_type) + std::copy(edge_type.begin() + pos, + edge_type.begin() + pos + copy_count, + edge_type_->begin() + current_pos_); + + if (current_pos_ == src_.size()) { flush(handle); } + + count -= copy_count; + pos += copy_count; + } + } + + /** + * @brief Flush thread data from host to GPU memory + * + * @param handle The resource handle + */ + void flush(handle_t const& handle) + { + edgelist_.append( + handle, + raft::host_span{src_.data(), current_pos_}, + raft::host_span{dst_.data(), current_pos_}, + wgt_ ? std::make_optional(raft::host_span{wgt_->data(), current_pos_}) + : std::nullopt, + edge_id_ ? std::make_optional(raft::host_span{edge_id_->data(), current_pos_}) + : std::nullopt, + edge_type_ + ? std::make_optional(raft::host_span{edge_type_->data(), current_pos_}) + : std::nullopt); + + current_pos_ = 0; + } + + private: + detail::per_device_edgelist_t& edgelist_; + size_t current_pos_{0}; + std::vector src_{}; + std::vector dst_{}; + std::optional> wgt_{}; + std::optional> edge_id_{}; + std::optional> edge_type_{}; +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/renumber_map.hpp b/cpp/include/cugraph/mtmg/renumber_map.hpp new file mode 100644 index 00000000000..da07d61bd96 --- /dev/null +++ b/cpp/include/cugraph/mtmg/renumber_map.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief An MTMG device vector for storing a renumber map + */ +template +class renumber_map_t : public detail::device_shared_device_vector_t { + using parent_t = detail::device_shared_device_vector_t; + + public: + /** + * @brief Return a view (read only) of the renumber map + */ + auto view() { return static_cast>(this->parent_t::view()); } +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/renumber_map_view.hpp b/cpp/include/cugraph/mtmg/renumber_map_view.hpp new file mode 100644 index 00000000000..5ff7ff5e100 --- /dev/null +++ b/cpp/include/cugraph/mtmg/renumber_map_view.hpp @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief An MTMG device span for storing a renumber map + */ +template +using renumber_map_view_t = detail::device_shared_device_span_t; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/resource_manager.hpp b/cpp/include/cugraph/mtmg/resource_manager.hpp new file mode 100644 index 00000000000..b4633626e7c --- /dev/null +++ b/cpp/include/cugraph/mtmg/resource_manager.hpp @@ -0,0 +1,225 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief Class for managing local and remote GPU resources for use in + * multi-threaded multi-GPU interface. + * + * Each process in a multi-GPU configuration should have an instance of this + * class. The resource manager object should be configured by calling + * register_local_gpu (or register_remote_gpu once we support a multi-node + * configuration) to allocate resources that can be used in the mtmg space. + * + * When we want to execute some graph computations, we need to create an instance for execution. + * Based on how big a subset of the desired compute resources is desired, we can allocate some + * number of GPUs to the problem (up to the total set of managed resources). + * + * The returned instance can be used to create a graph, execute one or more algorithms, etc. Once + * we are done the caller can delete the instance. + * + * At the moment, the caller is assumed to be responsible for scheduling use of the resources. + * + * For our first release, we will only consider a single node multi-GPU configuration, so the remote + * GPU methods are currently disabled via ifdef. + */ +class resource_manager_t { + public: + /** + * @brief Default constructor + */ + resource_manager_t() {} + + /** + * @brief add a local GPU to the resource manager. + * + * @param rank The rank to assign to the local GPU + * @param device_id The device_id corresponding to this rank + */ + void register_local_gpu(int rank, rmm::cuda_device_id device_id) + { + std::lock_guard lock(lock_); + + CUGRAPH_EXPECTS(local_rank_map_.find(rank) == local_rank_map_.end(), + "cannot register same rank multiple times"); + + int num_gpus_this_node; + RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus_this_node)); + + CUGRAPH_EXPECTS((device_id.value() >= 0) && (device_id.value() < num_gpus_this_node), + "device id out of range"); + + local_rank_map_.insert(std::pair(rank, device_id)); + + RAFT_CUDA_TRY(cudaSetDevice(device_id.value())); + + // FIXME: There is a bug in the cuda_memory_resource that results in a Hang. + // using the pool resource as a work-around. + // + // There is a deprecated environment variable: NCCL_LAUNCH_MODE=GROUP + // which should temporarily work around this problem. + // + // Ultimately there should be some RMM parameters passed into this function + // (or the constructor of the object) to configure this behavior +#if 0 + auto per_device_it = per_device_rmm_resources_.insert( + std::pair{rank, std::make_shared()}); +#else + auto const [free, total] = rmm::detail::available_device_memory(); + auto const min_alloc = + rmm::detail::align_down(std::min(free, total / 6), rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + + auto per_device_it = per_device_rmm_resources_.insert( + std::pair{rank, + rmm::mr::make_owning_wrapper( + std::make_shared(), min_alloc)}); +#endif + + rmm::mr::set_per_device_resource(device_id, per_device_it.first->second.get()); + } + + /** + * @brief Create an instance using a subset of the registered resources + * + * The selected set of resources will be configured as an instance manager. + * If @ranks_to_include is a proper subset of the registered resources, + * ranks will be renumbered into the range [0, @p ranks_to_use.size()), making + * it a proper configuration. + * + * @param ranks_to_use a vector containing the ranks to include in the instance. + * Must be a subset of the entire set of available ranks. + * @param instance_manager_id a ncclUniqueId that is shared by all processes participating + * in this instance. All processes must use the same ID in this call, it is up + * to the calling code to share this ID properly before the call. + * + * @return unique pointer to instance manager + */ + std::unique_ptr create_instance_manager( + std::vector ranks_to_include, ncclUniqueId instance_manager_id) const + { + std::for_each( + ranks_to_include.begin(), ranks_to_include.end(), [local_ranks = local_rank_map_](int rank) { + CUGRAPH_EXPECTS(local_ranks.find(rank) != local_ranks.end(), + "requesting inclusion of an invalid rank"); + }); + + std::vector> nccl_comms{}; + std::vector> handles{}; + std::vector device_ids{}; + + nccl_comms.reserve(ranks_to_include.size()); + handles.reserve(ranks_to_include.size()); + device_ids.reserve(ranks_to_include.size()); + + // FIXME: not quite right for multi-node + auto gpu_row_comm_size = static_cast(sqrt(static_cast(ranks_to_include.size()))); + while (ranks_to_include.size() % gpu_row_comm_size != 0) { + --gpu_row_comm_size; + } + + // FIXME: not quite right for multi-node + for (size_t i = 0; i < ranks_to_include.size(); ++i) { + int rank = ranks_to_include[i]; + auto pos = local_rank_map_.find(rank); + RAFT_CUDA_TRY(cudaSetDevice(pos->second.value())); + + raft::handle_t tmp_handle; + + nccl_comms.push_back(std::make_unique()); + handles.push_back( + std::make_unique(tmp_handle, per_device_rmm_resources_.find(rank)->second)); + device_ids.push_back(pos->second); + } + + std::vector running_threads; + + for (size_t i = 0; i < ranks_to_include.size(); ++i) { + running_threads.emplace_back([instance_manager_id, + idx = i, + gpu_row_comm_size, + comm_size = ranks_to_include.size(), + &ranks_to_include, + &local_rank_map = local_rank_map_, + &nccl_comms, + &handles]() { + int rank = ranks_to_include[idx]; + auto pos = local_rank_map.find(rank); + RAFT_CUDA_TRY(cudaSetDevice(pos->second.value())); + + NCCL_TRY(ncclCommInitRank(nccl_comms[idx].get(), comm_size, instance_manager_id, rank)); + + raft::comms::build_comms_nccl_only(handles[idx].get(), *nccl_comms[idx], comm_size, rank); + + cugraph::partition_manager::init_subcomm(*handles[idx], gpu_row_comm_size); + }); + } + + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + + // FIXME: Update for multi-node + return std::make_unique( + std::move(handles), std::move(nccl_comms), std::move(device_ids), ranks_to_include.size()); + } + + /** + * @brief Get a list of all of the currently registered ranks + * + * @return A copy of the list of ranks. + */ + std::vector registered_ranks() const + { + std::lock_guard lock(lock_); + + // + // C++20 mechanism: + // return std::vector{ std::views::keys(local_rank_map_).begin(), + // std::views::keys(local_rank_map_).end() }; + // Would need a bit more complicated to handle remote_rank_map_ also + // + std::vector registered_ranks(local_rank_map_.size()); + std::transform( + local_rank_map_.begin(), local_rank_map_.end(), registered_ranks.begin(), [](auto pair) { + return pair.first; + }); + + return registered_ranks; + } + + private: + mutable std::mutex lock_{}; + std::map local_rank_map_{}; + std::map> per_device_rmm_resources_{}; +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/vertex_result.hpp b/cpp/include/cugraph/mtmg/vertex_result.hpp new file mode 100644 index 00000000000..e8999b35aa9 --- /dev/null +++ b/cpp/include/cugraph/mtmg/vertex_result.hpp @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief An MTMG device vector for storing vertex results + */ +template +class vertex_result_t : public detail::device_shared_device_vector_t { + using parent_t = detail::device_shared_device_vector_t; + + public: + /** + * @brief Create a vertex result view (read only) + */ + auto view() { return static_cast>(this->parent_t::view()); } +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/mtmg/vertex_result_view.hpp b/cpp/include/cugraph/mtmg/vertex_result_view.hpp new file mode 100644 index 00000000000..7a7070d6f2a --- /dev/null +++ b/cpp/include/cugraph/mtmg/vertex_result_view.hpp @@ -0,0 +1,49 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include +#include + +namespace cugraph { +namespace mtmg { + +/** + * @brief An MTMG device span for referencing a vertex result + */ +template +class vertex_result_view_t : public detail::device_shared_device_span_t { + using parent_t = detail::device_shared_device_span_t; + + public: + vertex_result_view_t(parent_t&& other) : parent_t{std::move(other)} {} + + /** + * @brief Gather results from specified vertices into a device vector + */ + template + rmm::device_uvector gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view); +}; + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp new file mode 100644 index 00000000000..e42ef9bfcf3 --- /dev/null +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -0,0 +1,296 @@ +/* + * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#pragma once + +#include +#include +#include + +#include +#include + +namespace cugraph { + +/* + * @brief renumber sampled edge list and compress to the (D)CSR|(D)CSC format. + * + * This function renumbers sampling function (e.g. uniform_neighbor_sample) output edges fulfilling + * the following requirements. Assume major = source if @p src_is_major is true, major = destination + * if @p src_is_major is false. + * + * 1. If @p edgelist_hops is valid, we can consider (vertex ID, hop, flag=major) triplets for each + * vertex ID in edge majors (@p edgelist_srcs if @p src_is_major is true, @p edgelist_dsts if false) + * and (vertex ID, hop, flag=minor) triplets for each vertex ID in edge minors. From these triplets, + * we can find the minimum (hop, flag) pairs for every unique vertex ID (hop is the primary key and + * flag is the secondary key, flag=major is considered smaller than flag=minor if hop numbers are + * same). Vertex IDs with smaller (hop, flag) pairs precede vertex IDs with larger (hop, flag) pairs + * in renumbering. Ordering can be arbitrary among the vertices with the same (hop, flag) pairs. + * 2. If @p edgelist_hops is invalid, unique vertex IDs in edge majors precede vertex IDs that + * appear only in edge minors. + * 3. If edgelist_label_offsets.has_value() is true, edge lists for different labels will be + * renumbered separately. + * + * The renumbered edges are compressed based on the following requirements. + * + * 1. If @p compress_per_hop is true, edges are compressed separately for each hop. If @p + * compress_per_hop is false, edges with different hop numbers are compressed altogether. + * 2. Edges are compressed independently for different labels. + * 3. If @p doubly_compress is false, edges are compressed to CSR (if @p src_is_major is true) or + * CSC (if @p src_is_major is false). If @p doubly_compress is true, edges are compressed to DCSR + * (if @p src_is_major is true) or DCSC (if @p src_is_major is false). If @p doubly_compress is + * false, the CSR/CSC offset array size is the number of vertices (which is the maximum vertex ID + + * 1) + 1. Here, the maximum vertex ID is the maximum major vertex ID in the edges to compress if @p + * compress_per_hop is false or for hop 0. If @p compress_per_hop is true and hop number is 1 or + * larger, the maximum vertex ID is the larger of the maximum major vertex ID for this hop and the + * maximum vertex ID for the edges in the previous hops. + * + * If both @p compress_per_hop is false and @p edgelist_hops.has_value() is true, majors should be + * non-decreasing within each label after renumbering and sorting by (hop, major, minor). Also, + * majors in hop N should not appear in any of the previous hops. This condition is satisfied if + * majors in hop N + 1 does not have any vertices from the previous hops excluding the minors from + * hop N. + * + * This function is single-GPU only (we are not aware of any practical multi-GPU use cases). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weight. Needs to be floating point type + * @tparam edge_id_t Type of edge id. Needs to be an integral type + * @tparam edge_type_t Type of edge type. Needs to be an integral type, currently only int32_t is + * supported + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_srcs A vector storing edgelist source vertices. + * @param edgelist_dsts A vector storing edgelist destination vertices (size = @p + * edgelist_srcs.size()). + * @param edgelist_weights An optional vector storing edgelist weights (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_edge_ids An optional vector storing edgelist edge IDs (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_edge_types An optional vector storing edgelist edge types (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_hops An optional tuple having a vector storing edge list hop numbers (size = @p + * edgelist_srcs.size() if valid) and the number of hops. + * @param edgelist_label_offsets An optional tuple storing a pointer to the array storing label + * offsets to the input edges (size = std::get<1>(*edgelist_label_offsets) + 1) and the number of + * labels. + * @param src_is_major A flag to determine whether to use the source or destination as the + * major key in renumbering and compression. + * @param compress_per_hop A flag to determine whether to compress edges with different hop numbers + * separately (if true) or altogether (if false). If @p compress_per_hop is true, @p + * edgelist_hops.has_value() should be true and @p doubly_compress should be false. + * @param doubly_compress A flag to determine whether to compress to the CSR/CSC format (if false) + * or the DCSR/DCSC format (if true). + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return Tuple of vectors storing optional DCSR/DCSC major vertex IDs with one or more neighbors, + * (D)CSR|(D)CSC offset values, edge minor vertex IDs, optional edge weights (valid only if @p + * edgelist_weights.has_value() is true), optional edge IDs (valid only if @p + * edgelist_edge_ids.has_value() is true), optional edge types (valid only if @p + * edgelist_edge_types.has_value() is true), optional (label, hop) offset values to the + * (D)CSR|(D)CSC offset array (size = # labels * # hops + 1, where # labels = + * std::get<1>(*edgelist_label_offsets) if @p edgelist_label_offsets.has_value() is true and 1 + * otherwise and # hops = std::get<1>(*edgelist_hops) if edgelist_hops.has_value() is true and 1 + * otherwise, valid only if at least one of @p edgelist_label_offsets.has_value() or @p + * edgelist_hops.has_value() is rue), renumber_map to query original vertices (size = # unique + * vertices or aggregate # unique vertices for every label), and label offsets to the renumber_map + * (size = std::get<1>(*edgelist_label_offsets) + 1, valid only if @p + * edgelist_label_offsets.has_value() is true). + */ +template +std::tuple>, // dcsr/dcsc major vertices + rmm::device_uvector, // (d)csr/(d)csc offset values + rmm::device_uvector, // minor vertices + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // edge types + std::optional>, // (label, hop) offsets to the (d)csr/(d)csc + // offset array + rmm::device_uvector, // renumber map + std::optional>> // label offsets to the renumber map +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major = true, + bool compress_per_hop = false, + bool doubly_compress = false, + bool do_expensive_check = false); + +/* + * @brief renumber sampled edge list and sort the renumbered edges. + * + * This function renumbers sampling function (e.g. uniform_neighbor_sample) output edges fulfilling + * the following requirements. Assume major = source if @p src_is_major is true, major = destination + * if @p src_is_major is false. + * + * 1. If @p edgelist_hops is valid, we can consider (vertex ID, hop, flag=major) triplets for each + * vertex ID in edge majors (@p edgelist_srcs if @p src_is_major is true, @p edgelist_dsts if false) + * and (vertex ID, hop, flag=minor) triplets for each vertex ID in edge minors. From these triplets, + * we can find the minimum (hop, flag) pairs for every unique vertex ID (hop is the primary key and + * flag is the secondary key, flag=major is considered smaller than flag=minor if hop numbers are + * same). Vertex IDs with smaller (hop, flag) pairs precede vertex IDs with larger (hop, flag) pairs + * in renumbering. Ordering can be arbitrary among the vertices with the same (hop, flag) pairs. + * 2. If @p edgelist_hops is invalid, unique vertex IDs in edge majors precede vertex IDs that + * appear only in edge minors. + * 3. If edgelist_label_offsets.has_value() is true, edge lists for different labels will be + * renumbered separately. + * + * The renumbered edges are sorted based on the following rules. + * + * 1. If @p src_is_major is true, use ((hop), src, dst) as the key in sorting. If @p src_is_major is + * false, use ((hop), dst, src) instead. hop is used only if @p edgelist_hops.has_value() is true. + * 2. Edges in each label are sorted independently if @p edgelist_label_offsets.has_value() is true. + * + * This function is single-GPU only (we are not aware of any practical multi-GPU use cases). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weight. Needs to be floating point type + * @tparam edge_id_t Type of edge id. Needs to be an integral type + * @tparam edge_type_t Type of edge type. Needs to be an integral type, currently only int32_t is + * supported + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_srcs A vector storing edgelist source vertices. + * @param edgelist_dsts A vector storing edgelist destination vertices (size = @p + * edgelist_srcs.size()). + * @param edgelist_weights An optional vector storing edgelist weights (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_edge_ids An optional vector storing edgelist edge IDs (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_edge_types An optional vector storing edgelist edge types (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_hops An optional tuple having a vector storing edge list hop numbers (size = @p + * edgelist_srcs.size() if valid) and the number of hops. The hop vector values should be + * non-decreasing within each label. + * @param edgelist_label_offsets An optional tuple storing a pointer to the array storing label + * offsets to the input edges (size = std::get<1>(*edgelist_label_offsets) + 1) and the number of + * labels. + * @param src_is_major A flag to determine whether to use the source or destination as the + * major key in renumbering and sorting. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return Tuple of vectors storing edge sources, edge destinations, optional edge weights (valid + * only if @p edgelist_weights.has_value() is true), optional edge IDs (valid only if @p + * edgelist_edge_ids.has_value() is true), optional edge types (valid only if @p + * edgelist_edge_types.has_value() is true), optional (label, hop) offset values to the renumbered + * and sorted edges (size = # labels * # hops + 1, where # labels = + * std::get<1>(*edgelist_label_offsets) if @p edgelist_label_offsets.has_value() is true and 1 + * otherwise and # hops = std::get<1>(*edgelist_hops) if edgelist_hops.has_value() is true and 1 + * otherwise, valid only if at least one of @p edgelist_label_offsets.has_value() or @p + * edgelist_hops.has_value() is true), renumber_map to query original vertices (size = # unique + * vertices or aggregate # unique vertices for every label), and label offsets to the renumber_map + * (size = std::get<1>(*edgelist_label_offsets) + 1, valid only if @p + * edgelist_label_offsets.has_value() is true). + */ +template +std::tuple, // srcs + rmm::device_uvector, // dsts + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // edge types + std::optional>, // (label, hop) offsets to the edges + rmm::device_uvector, // renumber map + std::optional>> // label offsets to the renumber map +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major = true, + bool do_expensive_check = false); + +/* + * @brief sort sampled edge list. + * + * Sampled edges are sorted based on the following rules. + * + * 1. If @p src_is_major is true, use ((hop), src, dst) as the key in sorting. If @p src_is_major is + * false, use ((hop), dst, src) instead. hop is used only if @p edgelist_hops.has_value() is true. + * 2. Edges in each label are sorted independently if @p edgelist_label_offsets.has_value() is true. + * + * This function is single-GPU only (we are not aware of any practical multi-GPU use cases). + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weight. Needs to be floating point type + * @tparam edge_id_t Type of edge id. Needs to be an integral type + * @tparam edge_type_t Type of edge type. Needs to be an integral type, currently only int32_t is + * supported + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * @param edgelist_srcs A vector storing edgelist source vertices. + * @param edgelist_dsts A vector storing edgelist destination vertices (size = @p + * edgelist_srcs.size()). + * @param edgelist_weights An optional vector storing edgelist weights (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_edge_ids An optional vector storing edgelist edge IDs (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_edge_types An optional vector storing edgelist edge types (size = @p + * edgelist_srcs.size() if valid). + * @param edgelist_hops An optional tuple having a vector storing edge list hop numbers (size = @p + * edgelist_srcs.size() if valid) and the number of hops. The hop vector values should be + * non-decreasing within each label. + * @param edgelist_label_offsets An optional tuple storing a pointer to the array storing label + * offsets to the input edges (size = std::get<1>(*edgelist_label_offsets) + 1) and the number of + * labels. + * @param src_is_major A flag to determine whether to use the source or destination as the + * major key in renumbering and sorting. + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return Tuple of vectors storing edge sources, edge destinations, optional edge weights (valid + * only if @p edgelist_weights.has_value() is true), optional edge IDs (valid only if @p + * edgelist_edge_ids.has_value() is true), optional edge types (valid only if @p + * edgelist_edge_types.has_value() is true), and optional (label, hop) offset values to the + * renumbered and sorted edges (size = # labels * # hops + 1, where # labels = + * std::get<1>(*edgelist_label_offsets) if @p edgelist_label_offsets.has_value() is true and 1 + * otherwise and # hops = std::get<1>(*edgelist_hops) if edgelist_hops.has_value() is true and 1 + * otherwise, valid only if at least one of @p edgelist_label_offsets.has_value() or @p + * edgelist_hops.has_value() is true) + */ +template +std::tuple, // srcs + rmm::device_uvector, // dsts + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // edge types + std::optional>> // (label, hop) offsets to the edges +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major = true, + bool do_expensive_check = false); + +} // namespace cugraph diff --git a/cpp/src/c_api/uniform_neighbor_sampling.cpp b/cpp/src/c_api/uniform_neighbor_sampling.cpp index caaba8e9c8d..f146c331d8c 100644 --- a/cpp/src/c_api/uniform_neighbor_sampling.cpp +++ b/cpp/src/c_api/uniform_neighbor_sampling.cpp @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include diff --git a/cpp/src/components/legacy/scc_matrix.cuh b/cpp/src/components/legacy/scc_matrix.cuh index 3d56bdc5bf4..d044123bed0 100644 --- a/cpp/src/components/legacy/scc_matrix.cuh +++ b/cpp/src/components/legacy/scc_matrix.cuh @@ -68,7 +68,7 @@ struct SCC_Data { SCC_Data(size_t nrows, const IndexT* p_d_r_o, // row_offsets const IndexT* p_d_c_i) - : // column indices + : // column indices nrows_(nrows), p_d_r_o_(p_d_r_o), p_d_c_i_(p_d_c_i), diff --git a/cpp/src/cores/core_number_impl.cuh b/cpp/src/cores/core_number_impl.cuh index b63ae60f052..ea8e2a9c4ee 100644 --- a/cpp/src/cores/core_number_impl.cuh +++ b/cpp/src/cores/core_number_impl.cuh @@ -72,7 +72,7 @@ struct v_to_core_number_t { // a workaround for cudaErrorInvalidDeviceFunction error when device lambda is used template struct mult_degree_by_two_t { - __device__ edge_t operator()(edge_t d) const { return d* edge_t{2}; } + __device__ edge_t operator()(edge_t d) const { return d * edge_t{2}; } }; } // namespace diff --git a/cpp/src/link_analysis/pagerank_impl.cuh b/cpp/src/link_analysis/pagerank_impl.cuh index 3a84cdedfda..92c70fcff20 100644 --- a/cpp/src/link_analysis/pagerank_impl.cuh +++ b/cpp/src/link_analysis/pagerank_impl.cuh @@ -388,9 +388,11 @@ void pagerank(raft::handle_t const& handle, handle, graph_view, edge_weight_view, - std::make_optional(raft::device_span{ - *precomputed_vertex_out_weight_sums, - static_cast(graph_view.local_vertex_partition_range_size())}), + precomputed_vertex_out_weight_sums + ? std::make_optional(raft::device_span{ + *precomputed_vertex_out_weight_sums, + static_cast(graph_view.local_vertex_partition_range_size())}) + : std::nullopt, personalization_vertices ? std::make_optional(std::make_tuple( raft::device_span{*personalization_vertices, diff --git a/cpp/src/mtmg/vertex_result.cu b/cpp/src/mtmg/vertex_result.cu new file mode 100644 index 00000000000..a669a127f41 --- /dev/null +++ b/cpp/src/mtmg/vertex_result.cu @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +#include + +#include + +namespace cugraph { +namespace mtmg { + +template +template +rmm::device_uvector vertex_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view) +{ + auto this_gpu_graph_view = graph_view.get(handle); + + rmm::device_uvector local_vertices(vertices.size(), handle.get_stream()); + rmm::device_uvector vertex_gpu_ids(vertices.size(), handle.get_stream()); + rmm::device_uvector vertex_pos(vertices.size(), handle.get_stream()); + rmm::device_uvector result(vertices.size(), handle.get_stream()); + + raft::copy(local_vertices.data(), vertices.data(), vertices.size(), handle.get_stream()); + cugraph::detail::scalar_fill( + handle.get_stream(), vertex_gpu_ids.data(), vertex_gpu_ids.size(), handle.get_rank()); + cugraph::detail::sequence_fill( + handle.get_stream(), vertex_pos.data(), vertex_pos.size(), size_t{0}); + + rmm::device_uvector d_vertex_partition_range_lasts( + this_gpu_graph_view.vertex_partition_range_lasts().size(), handle.get_stream()); + raft::update_device(d_vertex_partition_range_lasts.data(), + this_gpu_graph_view.vertex_partition_range_lasts().data(), + this_gpu_graph_view.vertex_partition_range_lasts().size(), + handle.get_stream()); + + if (renumber_map_view) { + cugraph::renumber_ext_vertices( + handle.raft_handle(), + local_vertices.data(), + local_vertices.size(), + renumber_map_view->get(handle).data(), + this_gpu_graph_view.local_vertex_partition_range_first(), + this_gpu_graph_view.local_vertex_partition_range_last()); + } + + auto const major_comm_size = + handle.raft_handle().get_subcomm(cugraph::partition_manager::major_comm_name()).get_size(); + auto const minor_comm_size = + handle.raft_handle().get_subcomm(cugraph::partition_manager::minor_comm_name()).get_size(); + + std::forward_as_tuple(local_vertices, std::tie(vertex_gpu_ids, vertex_pos), std::ignore) = + groupby_gpu_id_and_shuffle_kv_pairs( + handle.raft_handle().get_comms(), + local_vertices.begin(), + local_vertices.end(), + thrust::make_zip_iterator(vertex_gpu_ids.begin(), vertex_pos.begin()), + cugraph::detail::compute_gpu_id_from_int_vertex_t{ + raft::device_span(d_vertex_partition_range_lasts.data(), + d_vertex_partition_range_lasts.size()), + major_comm_size, + minor_comm_size}, + handle.get_stream()); + + // + // Now gather + // + rmm::device_uvector tmp_result(local_vertices.size(), handle.get_stream()); + + auto& wrapped = this->get(handle); + + auto vertex_partition = vertex_partition_device_view_t( + this_gpu_graph_view.local_vertex_partition_view()); + + auto iter = + thrust::make_transform_iterator(local_vertices.begin(), [vertex_partition] __device__(auto v) { + return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); + }); + + thrust::gather(handle.raft_handle().get_thrust_policy(), + iter, + iter + local_vertices.size(), + wrapped.begin(), + tmp_result.begin()); + + // + // Shuffle back + // + std::forward_as_tuple(std::ignore, std::tie(std::ignore, vertex_pos, tmp_result), std::ignore) = + groupby_gpu_id_and_shuffle_kv_pairs( + handle.raft_handle().get_comms(), + vertex_gpu_ids.begin(), + vertex_gpu_ids.end(), + thrust::make_zip_iterator(local_vertices.begin(), vertex_pos.begin(), tmp_result.begin()), + [] __device__(int gpu) { return gpu; }, + handle.get_stream()); + + // + // Finally, reorder result + // + thrust::scatter(handle.raft_handle().get_thrust_policy(), + tmp_result.begin(), + tmp_result.end(), + vertex_pos.begin(), + result.begin()); + + return result; +} + +template rmm::device_uvector vertex_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view); + +template rmm::device_uvector vertex_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view); + +template rmm::device_uvector vertex_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view); + +template rmm::device_uvector vertex_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view); + +template rmm::device_uvector vertex_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view); + +template rmm::device_uvector vertex_result_view_t::gather( + handle_t const& handle, + raft::device_span vertices, + cugraph::mtmg::graph_view_t const& graph_view, + std::optional>& renumber_map_view); + +} // namespace mtmg +} // namespace cugraph diff --git a/cpp/src/prims/kv_store.cuh b/cpp/src/prims/kv_store.cuh index 8490bacfd9c..c46e83aa5da 100644 --- a/cpp/src/prims/kv_store.cuh +++ b/cpp/src/prims/kv_store.cuh @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index b238b964ede..e6db21f1c7c 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -108,7 +108,7 @@ struct convert_pair_to_quadruplet_t { thrust::seq, displacement_first, displacement_first + minor_comm_size, nbr_idx))) - 1; local_nbr_idx -= *(displacement_first + minor_comm_rank); - cuda::std::atomic_ref counter(tx_counts[minor_comm_rank]); + cuda::atomic_ref counter(tx_counts[minor_comm_rank]); intra_partition_offset = counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); } return thrust::make_tuple(minor_comm_rank, intra_partition_offset, local_nbr_idx, key_idx); @@ -252,7 +252,7 @@ struct count_t { __device__ size_t operator()(size_t key_idx) const { - cuda::std::atomic_ref counter(sample_counts[key_idx]); + cuda::atomic_ref counter(sample_counts[key_idx]); return counter.fetch_add(int32_t{1}, cuda::std::memory_order_relaxed); } }; @@ -287,7 +287,7 @@ rmm::device_uvector get_sampling_index_without_replacement( #ifndef NO_CUGRAPH_OPS edge_t mid_partition_degree_range_last = static_cast(K * 10); // tuning parameter assert(mid_partition_degree_range_last > K); - size_t high_partition_over_sampling_K = K * 2; // tuning parameter + size_t high_partition_over_sampling_K = K * 2; // tuning parameter assert(high_partition_over_sampling_K > K); rmm::device_uvector sample_nbr_indices(frontier_degrees.size() * K, handle.get_stream()); @@ -883,7 +883,7 @@ per_v_random_select_transform_e(raft::handle_t const& handle, sample_nbr_indices); // neighbor index within an edge partition (note that each vertex's // neighbors are distributed in minor_comm_size partitions) std::optional> sample_key_indices{ - std::nullopt}; // relevant only when (minor_comm_size > 1) + std::nullopt}; // relevant only when (minor_comm_size > 1) auto local_frontier_sample_counts = std::vector{}; auto local_frontier_sample_displacements = std::vector{}; if (minor_comm_size > 1) { diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 46789c6b8bd..6a7334e9f1a 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -197,19 +197,19 @@ struct col_indx_extract_t { void operator()( original::device_vec_t const& d_coalesced_src_v, // in: coalesced vector of vertices original::device_vec_t const& - d_v_col_indx, // in: column indices, given by stepper's random engine + d_v_col_indx, // in: column indices, given by stepper's random engine original::device_vec_t& d_v_next_vertices, // out: set of destination vertices, for next step original::device_vec_t& - d_v_next_weights) // out: set of weights between src and destination vertices, for next step + d_v_next_weights) // out: set of weights between src and destination vertices, for next step const { thrust::transform_if( handle_.get_thrust_policy(), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(num_paths_), // input1 - d_v_col_indx.begin(), // input2 - out_degs_, // stencil + thrust::make_counting_iterator(num_paths_), // input1 + d_v_col_indx.begin(), // input2 + out_degs_, // stencil thrust::make_zip_iterator( thrust::make_tuple(d_v_next_vertices.begin(), d_v_next_weights.begin())), // output [max_depth = max_depth_, @@ -575,9 +575,9 @@ struct random_walker_t { d_crt_out_degs, // |current set of vertex out degrees| = nelems, // to be used as stencil (don't scatter if 0) original::device_vec_t const& - d_sizes, // paths sizes used to provide delta in coalesced paths; - // pre-condition: assumed as updated to reflect new vertex additions; - // also, this is the number of _vertices_ in each path; + d_sizes, // paths sizes used to provide delta in coalesced paths; + // pre-condition: assumed as updated to reflect new vertex additions; + // also, this is the number of _vertices_ in each path; // hence for scattering weights this needs to be adjusted; hence the `adjust` parameter index_t stride, // stride = coalesce block size (max_depth for vertices; max_depth-1 for weights) @@ -762,7 +762,7 @@ random_walks_impl( // pre-allocate num_paths * max_depth; // original::device_vec_t d_coalesced_v(num_paths * max_depth, - stream); // coalesced vertex set + stream); // coalesced vertex set original::device_vec_t d_coalesced_w(num_paths * (max_depth - 1), stream); // coalesced weight set original::device_vec_t d_paths_sz(num_paths, stream); // paths sizes diff --git a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh index 6fdb1c887f2..50f42851a1f 100644 --- a/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh +++ b/cpp/src/sampling/renumber_sampled_edgelist_impl.cuh @@ -19,6 +19,7 @@ #include #include +#include #include #include @@ -41,6 +42,7 @@ #include +// FIXME: deprecated, to be deleted namespace cugraph { namespace { diff --git a/cpp/src/sampling/renumber_sampled_edgelist_sg.cu b/cpp/src/sampling/renumber_sampled_edgelist_sg.cu index 46e2264a0c1..9a5f0d357b2 100644 --- a/cpp/src/sampling/renumber_sampled_edgelist_sg.cu +++ b/cpp/src/sampling/renumber_sampled_edgelist_sg.cu @@ -14,10 +14,11 @@ * limitations under the License. */ -#include +#include #include "renumber_sampled_edgelist_impl.cuh" +// FIXME: deprecated, to be deleted namespace cugraph { template std::tuple, diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh new file mode 100644 index 00000000000..ff8da72ff35 --- /dev/null +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -0,0 +1,1800 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace cugraph { + +namespace { + +template +struct edge_order_t { + thrust::optional> edgelist_label_offsets{thrust::nullopt}; + thrust::optional> edgelist_hops{thrust::nullopt}; + raft::device_span edgelist_majors{}; + raft::device_span edgelist_minors{}; + + __device__ bool operator()(size_t l_idx, size_t r_idx) const + { + if (edgelist_label_offsets) { + auto l_label = thrust::distance((*edgelist_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*edgelist_label_offsets).begin() + 1, + (*edgelist_label_offsets).end(), + (*edgelist_label_offsets)[0] + l_idx)); + auto r_label = thrust::distance((*edgelist_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*edgelist_label_offsets).begin() + 1, + (*edgelist_label_offsets).end(), + (*edgelist_label_offsets)[0] + r_idx)); + if (l_label != r_label) { return l_label < r_label; } + } + + if (edgelist_hops) { + auto l_hop = (*edgelist_hops)[l_idx]; + auto r_hop = (*edgelist_hops)[r_idx]; + if (l_hop != r_hop) { return l_hop < r_hop; } + } + + auto l_major = edgelist_majors[l_idx]; + auto r_major = edgelist_majors[r_idx]; + if (l_major != r_major) { return l_major < r_major; } + + auto l_minor = edgelist_minors[l_idx]; + auto r_minor = edgelist_minors[r_idx]; + if (l_minor != r_minor) { return l_minor < r_minor; } + + return l_idx < r_idx; + } +}; + +template +struct is_first_in_run_t { + thrust::optional> edgelist_label_offsets{thrust::nullopt}; + thrust::optional> edgelist_hops{thrust::nullopt}; + raft::device_span edgelist_majors{}; + + __device__ bool operator()(size_t i) const + { + if (i == 0) return true; + if (edgelist_label_offsets) { + auto prev_label = thrust::distance((*edgelist_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*edgelist_label_offsets).begin() + 1, + (*edgelist_label_offsets).end(), + i - 1)); + auto this_label = thrust::distance( + (*edgelist_label_offsets).begin() + 1, + thrust::upper_bound( + thrust::seq, (*edgelist_label_offsets).begin() + 1, (*edgelist_label_offsets).end(), i)); + if (this_label != prev_label) { return true; } + } + if (edgelist_hops) { + auto prev_hop = (*edgelist_hops)[i - 1]; + auto this_hop = (*edgelist_hops)[i]; + if (this_hop != prev_hop) { return true; } + } + return edgelist_majors[i] != edgelist_majors[i - 1]; + } +}; + +template +struct compute_label_index_t { + raft::device_span edgelist_label_offsets{}; + + __device__ label_index_t operator()(size_t i) const + { + return static_cast(thrust::distance( + edgelist_label_offsets.begin() + 1, + thrust::upper_bound( + thrust::seq, edgelist_label_offsets.begin() + 1, edgelist_label_offsets.end(), i))); + } +}; + +template +struct optionally_compute_label_index_t { + thrust::optional> edgelist_label_offsets{thrust::nullopt}; + + __device__ label_index_t operator()(size_t i) const + { + return edgelist_label_offsets ? static_cast(thrust::distance( + (*edgelist_label_offsets).begin() + 1, + thrust::upper_bound(thrust::seq, + (*edgelist_label_offsets).begin() + 1, + (*edgelist_label_offsets).end(), + i))) + : label_index_t{0}; + } +}; + +template +void check_input_edges( + raft::handle_t const& handle, + rmm::device_uvector const& edgelist_srcs, + rmm::device_uvector const& edgelist_dsts, + std::optional> const& edgelist_weights, + std::optional> const& edgelist_edge_ids, + std::optional> const& edgelist_edge_types, + std::optional, size_t>> const& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool do_expensive_check) +{ + CUGRAPH_EXPECTS(!edgelist_label_offsets || (std::get<1>(*edgelist_label_offsets) <= + std::numeric_limits::max()), + "Invalid input arguments: current implementation assumes that the number of " + "unique labels is no larger than std::numeric_limits::max()."); + CUGRAPH_EXPECTS(!edgelist_label_offsets || std::get<1>(*edgelist_label_offsets) > 0, + "Invlaid input arguments: there should be 1 or more labels if " + "edgelist_label_offsets.has_value() is true."); + CUGRAPH_EXPECTS( + !edgelist_label_offsets.has_value() || + (std::get<0>(*edgelist_label_offsets).size() == std::get<1>(*edgelist_label_offsets) + 1), + "Invalid input arguments: if edgelist_label_offsets is valid, " + "std::get<0>(*edgelist_label_offsets).size() (size of the offset array) should be " + "std::get<1>(*edgelist_label_offsets) (number of unique labels) + 1."); + + CUGRAPH_EXPECTS( + !edgelist_hops || (std::get<1>(*edgelist_hops) <= std::numeric_limits::max()), + "Invalid input arguments: current implementation assumes that the number of " + "hops is no larger than std::numeric_limits::max()."); + CUGRAPH_EXPECTS(!edgelist_hops || std::get<1>(*edgelist_hops) > 0, + "Invlaid input arguments: number of hops should be larger than 0 if " + "edgelist_hops.has_value() is true."); + + CUGRAPH_EXPECTS( + edgelist_srcs.size() == edgelist_dsts.size(), + "Invalid input arguments: edgelist_srcs.size() and edgelist_dsts.size() should coincide."); + CUGRAPH_EXPECTS( + !edgelist_weights.has_value() || (edgelist_srcs.size() == (*edgelist_weights).size()), + "Invalid input arguments: if edgelist_weights is valid, std::get<0>(*edgelist_weights).size() " + "and edgelist_srcs.size() should coincide."); + CUGRAPH_EXPECTS( + !edgelist_edge_ids.has_value() || (edgelist_srcs.size() == (*edgelist_edge_ids).size()), + "Invalid input arguments: if edgelist_edge_ids is valid, " + "std::get<0>(*edgelist_edge_ids).size() and edgelist_srcs.size() should coincide."); + CUGRAPH_EXPECTS( + !edgelist_edge_types.has_value() || (edgelist_srcs.size() == (*edgelist_edge_types).size()), + "Invalid input arguments: if edgelist_edge_types is valid, " + "std::get<0>(*edgelist_edge_types).size() and edgelist_srcs.size() should coincide."); + CUGRAPH_EXPECTS( + !edgelist_hops.has_value() || (edgelist_srcs.size() == std::get<0>(*edgelist_hops).size()), + "Invalid input arguments: if edgelist_hops is valid, std::get<0>(*edgelist_hops).size() and " + "edgelist_srcs.size() should coincide."); + + if (do_expensive_check) { + if (edgelist_label_offsets) { + CUGRAPH_EXPECTS(thrust::is_sorted(handle.get_thrust_policy(), + std::get<0>(*edgelist_label_offsets).begin(), + std::get<0>(*edgelist_label_offsets).end()), + "Invalid input arguments: if edgelist_label_offsets is valid, " + "std::get<0>(*edgelist_label_offsets) should be sorted."); + size_t back_element{}; + raft::update_host( + &back_element, + std::get<0>(*edgelist_label_offsets).data() + std::get<1>(*edgelist_label_offsets), + size_t{1}, + handle.get_stream()); + handle.get_stream(); + CUGRAPH_EXPECTS( + back_element == edgelist_srcs.size(), + "Invalid input arguments: if edgelist_label_offsets is valid, the last element of " + "std::get<0>(*edgelist_label_offsets) and edgelist_srcs.size() should coincide."); + } + } +} + +// output sorted by (primary key:label_index, secondary key:vertex) +template +std::tuple> /* label indices */, + rmm::device_uvector /* vertices */, + std::optional> /* minimum hops for the vertices */, + std::optional> /* label offsets for the output */> +compute_min_hop_for_unique_label_vertex_pairs( + raft::handle_t const& handle, + raft::device_span vertices, + std::optional> hops, + std::optional> label_indices, + std::optional> label_offsets) +{ + auto approx_edges_to_sort_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * + (1 << 20) /* tuning parameter */; // for segmented sort + + if (label_indices) { + auto num_labels = (*label_offsets).size() - 1; + + rmm::device_uvector tmp_label_indices((*label_indices).size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + (*label_indices).begin(), + (*label_indices).end(), + tmp_label_indices.begin()); + + rmm::device_uvector tmp_vertices(0, handle.get_stream()); + std::optional> tmp_hops{std::nullopt}; + + if (hops) { + tmp_vertices.resize(vertices.size(), handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), vertices.begin(), vertices.end(), tmp_vertices.begin()); + tmp_hops = rmm::device_uvector((*hops).size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), (*hops).begin(), (*hops).end(), (*tmp_hops).begin()); + + auto triplet_first = thrust::make_zip_iterator( + tmp_label_indices.begin(), tmp_vertices.begin(), (*tmp_hops).begin()); + thrust::sort( + handle.get_thrust_policy(), triplet_first, triplet_first + tmp_label_indices.size()); + auto key_first = thrust::make_zip_iterator(tmp_label_indices.begin(), tmp_vertices.begin()); + auto num_uniques = static_cast( + thrust::distance(key_first, + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + key_first, + key_first + tmp_label_indices.size(), + (*tmp_hops).begin())))); + tmp_label_indices.resize(num_uniques, handle.get_stream()); + tmp_vertices.resize(num_uniques, handle.get_stream()); + (*tmp_hops).resize(num_uniques, handle.get_stream()); + tmp_label_indices.shrink_to_fit(handle.get_stream()); + tmp_vertices.shrink_to_fit(handle.get_stream()); + (*tmp_hops).shrink_to_fit(handle.get_stream()); + } else { + rmm::device_uvector segment_sorted_vertices(vertices.size(), handle.get_stream()); + + rmm::device_uvector d_tmp_storage(0, handle.get_stream()); + + auto [h_label_offsets, h_edge_offsets] = + detail::compute_offset_aligned_edge_chunks(handle, + (*label_offsets).data(), + num_labels, + vertices.size(), + approx_edges_to_sort_per_iteration); + auto num_chunks = h_label_offsets.size() - 1; + + for (size_t i = 0; i < num_chunks; ++i) { + size_t tmp_storage_bytes{0}; + + auto offset_first = + thrust::make_transform_iterator((*label_offsets).data() + h_label_offsets[i], + detail::shift_left_t{h_edge_offsets[i]}); + cub::DeviceSegmentedSort::SortKeys(static_cast(nullptr), + tmp_storage_bytes, + vertices.begin() + h_edge_offsets[i], + segment_sorted_vertices.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + + if (tmp_storage_bytes > d_tmp_storage.size()) { + d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); + } + + cub::DeviceSegmentedSort::SortKeys(d_tmp_storage.data(), + tmp_storage_bytes, + vertices.begin() + h_edge_offsets[i], + segment_sorted_vertices.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + } + d_tmp_storage.resize(0, handle.get_stream()); + d_tmp_storage.shrink_to_fit(handle.get_stream()); + + auto pair_first = + thrust::make_zip_iterator(tmp_label_indices.begin(), segment_sorted_vertices.begin()); + auto num_uniques = static_cast(thrust::distance( + pair_first, + thrust::unique( + handle.get_thrust_policy(), pair_first, pair_first + tmp_label_indices.size()))); + tmp_label_indices.resize(num_uniques, handle.get_stream()); + segment_sorted_vertices.resize(num_uniques, handle.get_stream()); + tmp_label_indices.shrink_to_fit(handle.get_stream()); + segment_sorted_vertices.shrink_to_fit(handle.get_stream()); + + tmp_vertices = std::move(segment_sorted_vertices); + } + + rmm::device_uvector tmp_label_offsets(num_labels + 1, handle.get_stream()); + tmp_label_offsets.set_element_to_zero_async(0, handle.get_stream()); + thrust::upper_bound(handle.get_thrust_policy(), + tmp_label_indices.begin(), + tmp_label_indices.end(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_labels), + tmp_label_offsets.begin() + 1); + + return std::make_tuple(std::move(tmp_label_indices), + std::move(tmp_vertices), + std::move(tmp_hops), + std::move(tmp_label_offsets)); + } else { + rmm::device_uvector tmp_vertices(vertices.size(), handle.get_stream()); + thrust::copy( + handle.get_thrust_policy(), vertices.begin(), vertices.end(), tmp_vertices.begin()); + + if (hops) { + rmm::device_uvector tmp_hops((*hops).size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), (*hops).begin(), (*hops).end(), tmp_hops.begin()); + + auto pair_first = thrust::make_zip_iterator( + tmp_vertices.begin(), tmp_hops.begin()); // vertex is a primary key, hop is a secondary key + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + tmp_vertices.size()); + tmp_vertices.resize( + thrust::distance(tmp_vertices.begin(), + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + tmp_vertices.begin(), + tmp_vertices.end(), + tmp_hops.begin()))), + handle.get_stream()); + tmp_hops.resize(tmp_vertices.size(), handle.get_stream()); + + return std::make_tuple( + std::nullopt, std::move(tmp_vertices), std::move(tmp_hops), std::nullopt); + } else { + thrust::sort(handle.get_thrust_policy(), tmp_vertices.begin(), tmp_vertices.end()); + tmp_vertices.resize( + thrust::distance( + tmp_vertices.begin(), + thrust::unique(handle.get_thrust_policy(), tmp_vertices.begin(), tmp_vertices.end())), + handle.get_stream()); + tmp_vertices.shrink_to_fit(handle.get_stream()); + + return std::make_tuple(std::nullopt, std::move(tmp_vertices), std::nullopt, std::nullopt); + } + } +} + +template +std::tuple, std::optional>> +compute_renumber_map(raft::handle_t const& handle, + raft::device_span edgelist_majors, + raft::device_span edgelist_minors, + std::optional> edgelist_hops, + std::optional> edgelist_label_offsets) +{ + auto approx_edges_to_sort_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * + (1 << 20) /* tuning parameter */; // for segmented sort + + std::optional> edgelist_label_indices{std::nullopt}; + if (edgelist_label_offsets) { + edgelist_label_indices = + detail::expand_sparse_offsets(*edgelist_label_offsets, label_index_t{0}, handle.get_stream()); + } + + auto [unique_label_major_pair_label_indices, + unique_label_major_pair_vertices, + unique_label_major_pair_hops, + unique_label_major_pair_label_offsets] = + compute_min_hop_for_unique_label_vertex_pairs( + handle, + edgelist_majors, + edgelist_hops, + edgelist_label_indices ? std::make_optional>( + (*edgelist_label_indices).data(), (*edgelist_label_indices).size()) + : std::nullopt, + edgelist_label_offsets); + + auto [unique_label_minor_pair_label_indices, + unique_label_minor_pair_vertices, + unique_label_minor_pair_hops, + unique_label_minor_pair_label_offsets] = + compute_min_hop_for_unique_label_vertex_pairs( + handle, + edgelist_minors, + edgelist_hops, + edgelist_label_indices ? std::make_optional>( + (*edgelist_label_indices).data(), (*edgelist_label_indices).size()) + : std::nullopt, + edgelist_label_offsets); + + edgelist_label_indices = std::nullopt; + + if (edgelist_label_offsets) { + auto num_labels = (*edgelist_label_offsets).size() - 1; + + rmm::device_uvector renumber_map(0, handle.get_stream()); + rmm::device_uvector renumber_map_label_indices(0, handle.get_stream()); + + renumber_map.reserve((*unique_label_major_pair_label_indices).size() + + (*unique_label_minor_pair_label_indices).size(), + handle.get_stream()); + renumber_map_label_indices.reserve(renumber_map.capacity(), handle.get_stream()); + + auto num_chunks = (edgelist_majors.size() + (approx_edges_to_sort_per_iteration - 1)) / + approx_edges_to_sort_per_iteration; + auto chunk_size = (num_chunks > 0) ? ((num_labels + (num_chunks - 1)) / num_chunks) : 0; + + size_t copy_offset{0}; + for (size_t i = 0; i < num_chunks; ++i) { + auto major_start_offset = + (*unique_label_major_pair_label_offsets).element(chunk_size * i, handle.get_stream()); + auto major_end_offset = + (*unique_label_major_pair_label_offsets) + .element(std::min(chunk_size * (i + 1), num_labels), handle.get_stream()); + auto minor_start_offset = + (*unique_label_minor_pair_label_offsets).element(chunk_size * i, handle.get_stream()); + auto minor_end_offset = + (*unique_label_minor_pair_label_offsets) + .element(std::min(chunk_size * (i + 1), num_labels), handle.get_stream()); + + rmm::device_uvector merged_label_indices( + (major_end_offset - major_start_offset) + (minor_end_offset - minor_start_offset), + handle.get_stream()); + rmm::device_uvector merged_vertices(merged_label_indices.size(), + handle.get_stream()); + rmm::device_uvector merged_flags(merged_label_indices.size(), handle.get_stream()); + + if (edgelist_hops) { + rmm::device_uvector merged_hops(merged_label_indices.size(), handle.get_stream()); + auto major_quad_first = + thrust::make_zip_iterator((*unique_label_major_pair_label_indices).begin(), + unique_label_major_pair_vertices.begin(), + (*unique_label_major_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{0})); + auto minor_quad_first = + thrust::make_zip_iterator((*unique_label_minor_pair_label_indices).begin(), + unique_label_minor_pair_vertices.begin(), + (*unique_label_minor_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge(handle.get_thrust_policy(), + major_quad_first + major_start_offset, + major_quad_first + major_end_offset, + minor_quad_first + minor_start_offset, + minor_quad_first + minor_end_offset, + thrust::make_zip_iterator(merged_label_indices.begin(), + merged_vertices.begin(), + merged_hops.begin(), + merged_flags.begin())); + + auto unique_key_first = + thrust::make_zip_iterator(merged_label_indices.begin(), merged_vertices.begin()); + merged_label_indices.resize( + thrust::distance( + unique_key_first, + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + unique_key_first, + unique_key_first + merged_label_indices.size(), + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), + handle.get_stream()); + merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); + merged_hops.resize(merged_label_indices.size(), handle.get_stream()); + merged_flags.resize(merged_label_indices.size(), handle.get_stream()); + auto sort_key_first = thrust::make_zip_iterator( + merged_label_indices.begin(), merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_label_indices.size(), + merged_vertices.begin()); + } else { + auto major_triplet_first = + thrust::make_zip_iterator((*unique_label_major_pair_label_indices).begin(), + unique_label_major_pair_vertices.begin(), + thrust::make_constant_iterator(int8_t{0})); + auto minor_triplet_first = + thrust::make_zip_iterator((*unique_label_minor_pair_label_indices).begin(), + unique_label_minor_pair_vertices.begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge( + handle.get_thrust_policy(), + major_triplet_first + major_start_offset, + major_triplet_first + major_end_offset, + minor_triplet_first + minor_start_offset, + minor_triplet_first + minor_end_offset, + thrust::make_zip_iterator( + merged_label_indices.begin(), merged_vertices.begin(), merged_flags.begin())); + + auto unique_key_first = + thrust::make_zip_iterator(merged_label_indices.begin(), merged_vertices.begin()); + merged_label_indices.resize( + thrust::distance( + unique_key_first, + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + unique_key_first, + unique_key_first + merged_label_indices.size(), + merged_flags.begin()))), + handle.get_stream()); + merged_vertices.resize(merged_label_indices.size(), handle.get_stream()); + merged_flags.resize(merged_label_indices.size(), handle.get_stream()); + auto sort_key_first = + thrust::make_zip_iterator(merged_label_indices.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_label_indices.size(), + merged_vertices.begin()); + } + + renumber_map.resize(copy_offset + merged_vertices.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end(), + renumber_map.begin() + copy_offset); + renumber_map_label_indices.resize(copy_offset + merged_label_indices.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + merged_label_indices.begin(), + merged_label_indices.end(), + renumber_map_label_indices.begin() + copy_offset); + + copy_offset += merged_vertices.size(); + } + + renumber_map.shrink_to_fit(handle.get_stream()); + renumber_map_label_indices.shrink_to_fit(handle.get_stream()); + + return std::make_tuple(std::move(renumber_map), std::move(renumber_map_label_indices)); + } else { + if (edgelist_hops) { + rmm::device_uvector merged_vertices( + unique_label_major_pair_vertices.size() + unique_label_minor_pair_vertices.size(), + handle.get_stream()); + rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); + rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); + auto major_triplet_first = + thrust::make_zip_iterator(unique_label_major_pair_vertices.begin(), + (*unique_label_major_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{0})); + auto minor_triplet_first = + thrust::make_zip_iterator(unique_label_minor_pair_vertices.begin(), + (*unique_label_minor_pair_hops).begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge(handle.get_thrust_policy(), + major_triplet_first, + major_triplet_first + unique_label_major_pair_vertices.size(), + minor_triplet_first, + minor_triplet_first + unique_label_minor_pair_vertices.size(), + thrust::make_zip_iterator( + merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); + + unique_label_major_pair_vertices.resize(0, handle.get_stream()); + unique_label_major_pair_vertices.shrink_to_fit(handle.get_stream()); + unique_label_major_pair_hops = std::nullopt; + unique_label_minor_pair_vertices.resize(0, handle.get_stream()); + unique_label_minor_pair_vertices.shrink_to_fit(handle.get_stream()); + unique_label_minor_pair_hops = std::nullopt; + + merged_vertices.resize( + thrust::distance(merged_vertices.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end(), + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), + handle.get_stream()); + merged_hops.resize(merged_vertices.size(), handle.get_stream()); + merged_flags.resize(merged_vertices.size(), handle.get_stream()); + + auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + merged_vertices.begin()); + + return std::make_tuple(std::move(merged_vertices), std::nullopt); + } else { + rmm::device_uvector output_vertices(unique_label_minor_pair_vertices.size(), + handle.get_stream()); + auto output_last = thrust::set_difference(handle.get_thrust_policy(), + unique_label_minor_pair_vertices.begin(), + unique_label_minor_pair_vertices.end(), + unique_label_major_pair_vertices.begin(), + unique_label_major_pair_vertices.end(), + output_vertices.begin()); + + auto num_unique_majors = unique_label_major_pair_vertices.size(); + auto renumber_map = std::move(unique_label_major_pair_vertices); + renumber_map.resize( + renumber_map.size() + thrust::distance(output_vertices.begin(), output_last), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + output_vertices.begin(), + output_last, + renumber_map.begin() + num_unique_majors); + + return std::make_tuple(std::move(renumber_map), std::nullopt); + } + } +} + +// this function does not reorder edges (the i'th returned edge is the renumbered output of the i'th +// input edge) +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +renumber_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_majors, + rmm::device_uvector&& edgelist_minors, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool do_expensive_check) +{ + // 1. compute renumber_map + + auto [renumber_map, renumber_map_label_indices] = compute_renumber_map( + handle, + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + raft::device_span(edgelist_minors.data(), edgelist_minors.size()), + edgelist_hops ? std::make_optional>( + std::get<0>(*edgelist_hops).data(), std::get<0>(*edgelist_hops).size()) + : std::nullopt, + edgelist_label_offsets + ? std::make_optional>(std::get<0>(*edgelist_label_offsets)) + : std::nullopt); + + // 2. compute renumber map offsets for each label + + std::optional> renumber_map_label_offsets{}; + if (edgelist_label_offsets) { + auto num_unique_labels = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator((*renumber_map_label_indices).size()), + detail::is_first_in_run_t{(*renumber_map_label_indices).data()}); + rmm::device_uvector unique_label_indices(num_unique_labels, handle.get_stream()); + rmm::device_uvector vertex_counts(num_unique_labels, handle.get_stream()); + thrust::reduce_by_key(handle.get_thrust_policy(), + (*renumber_map_label_indices).begin(), + (*renumber_map_label_indices).end(), + thrust::make_constant_iterator(size_t{1}), + unique_label_indices.begin(), + vertex_counts.begin()); + + renumber_map_label_offsets = + rmm::device_uvector(std::get<1>(*edgelist_label_offsets) + 1, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + (*renumber_map_label_offsets).begin(), + (*renumber_map_label_offsets).end(), + size_t{0}); + thrust::scatter(handle.get_thrust_policy(), + vertex_counts.begin(), + vertex_counts.end(), + unique_label_indices.begin(), + (*renumber_map_label_offsets).begin() + 1); + + thrust::inclusive_scan(handle.get_thrust_policy(), + (*renumber_map_label_offsets).begin(), + (*renumber_map_label_offsets).end(), + (*renumber_map_label_offsets).begin()); + } + + // 3. renumber input edges + + if (edgelist_label_offsets) { + rmm::device_uvector new_vertices(renumber_map.size(), handle.get_stream()); + thrust::tabulate(handle.get_thrust_policy(), + new_vertices.begin(), + new_vertices.end(), + [label_indices = raft::device_span( + (*renumber_map_label_indices).data(), (*renumber_map_label_indices).size()), + renumber_map_label_offsets = raft::device_span( + (*renumber_map_label_offsets).data(), + (*renumber_map_label_offsets).size())] __device__(size_t i) { + auto label_index = label_indices[i]; + auto label_start_offset = renumber_map_label_offsets[label_index]; + return static_cast(i - label_start_offset); + }); + + (*renumber_map_label_indices).resize(0, handle.get_stream()); + (*renumber_map_label_indices).shrink_to_fit(handle.get_stream()); + + auto num_labels = std::get<0>(*edgelist_label_offsets).size(); + + rmm::device_uvector segment_sorted_renumber_map(renumber_map.size(), + handle.get_stream()); + rmm::device_uvector segment_sorted_new_vertices(new_vertices.size(), + handle.get_stream()); + + rmm::device_uvector d_tmp_storage(0, handle.get_stream()); + + auto approx_edges_to_sort_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * + (1 << 20) /* tuning parameter */; // for segmented sort + + auto [h_label_offsets, h_edge_offsets] = detail::compute_offset_aligned_edge_chunks( + handle, + (*renumber_map_label_offsets).data(), + static_cast((*renumber_map_label_offsets).size() - 1), + renumber_map.size(), + approx_edges_to_sort_per_iteration); + auto num_chunks = h_label_offsets.size() - 1; + + for (size_t i = 0; i < num_chunks; ++i) { + size_t tmp_storage_bytes{0}; + + auto offset_first = + thrust::make_transform_iterator((*renumber_map_label_offsets).data() + h_label_offsets[i], + detail::shift_left_t{h_edge_offsets[i]}); + cub::DeviceSegmentedSort::SortPairs(static_cast(nullptr), + tmp_storage_bytes, + renumber_map.begin() + h_edge_offsets[i], + segment_sorted_renumber_map.begin() + h_edge_offsets[i], + new_vertices.begin() + h_edge_offsets[i], + segment_sorted_new_vertices.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + + if (tmp_storage_bytes > d_tmp_storage.size()) { + d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); + } + + cub::DeviceSegmentedSort::SortPairs(d_tmp_storage.data(), + tmp_storage_bytes, + renumber_map.begin() + h_edge_offsets[i], + segment_sorted_renumber_map.begin() + h_edge_offsets[i], + new_vertices.begin() + h_edge_offsets[i], + segment_sorted_new_vertices.begin() + h_edge_offsets[i], + h_edge_offsets[i + 1] - h_edge_offsets[i], + h_label_offsets[i + 1] - h_label_offsets[i], + offset_first, + offset_first + 1, + handle.get_stream()); + } + new_vertices.resize(0, handle.get_stream()); + d_tmp_storage.resize(0, handle.get_stream()); + new_vertices.shrink_to_fit(handle.get_stream()); + d_tmp_storage.shrink_to_fit(handle.get_stream()); + + auto edgelist_label_indices = detail::expand_sparse_offsets( + std::get<0>(*edgelist_label_offsets), label_index_t{0}, handle.get_stream()); + + auto pair_first = + thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_label_indices.begin()); + thrust::transform( + handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_majors.size(), + edgelist_majors.begin(), + [renumber_map_label_offsets = raft::device_span( + (*renumber_map_label_offsets).data(), (*renumber_map_label_offsets).size()), + old_vertices = raft::device_span(segment_sorted_renumber_map.data(), + segment_sorted_renumber_map.size()), + new_vertices = raft::device_span( + segment_sorted_new_vertices.data(), + segment_sorted_new_vertices.size())] __device__(auto pair) { + auto old_vertex = thrust::get<0>(pair); + auto label_index = thrust::get<1>(pair); + auto label_start_offset = renumber_map_label_offsets[label_index]; + auto label_end_offset = renumber_map_label_offsets[label_index + 1]; + auto it = thrust::lower_bound(thrust::seq, + old_vertices.begin() + label_start_offset, + old_vertices.begin() + label_end_offset, + old_vertex); + assert(*it == old_vertex); + return *(new_vertices.begin() + thrust::distance(old_vertices.begin(), it)); + }); + + pair_first = thrust::make_zip_iterator(edgelist_minors.begin(), edgelist_label_indices.begin()); + thrust::transform( + handle.get_thrust_policy(), + pair_first, + pair_first + edgelist_minors.size(), + edgelist_minors.begin(), + [renumber_map_label_offsets = raft::device_span( + (*renumber_map_label_offsets).data(), (*renumber_map_label_offsets).size()), + old_vertices = raft::device_span(segment_sorted_renumber_map.data(), + segment_sorted_renumber_map.size()), + new_vertices = raft::device_span( + segment_sorted_new_vertices.data(), + segment_sorted_new_vertices.size())] __device__(auto pair) { + auto old_vertex = thrust::get<0>(pair); + auto label_index = thrust::get<1>(pair); + auto label_start_offset = renumber_map_label_offsets[label_index]; + auto label_end_offset = renumber_map_label_offsets[label_index + 1]; + auto it = thrust::lower_bound(thrust::seq, + old_vertices.begin() + label_start_offset, + old_vertices.begin() + label_end_offset, + old_vertex); + assert(*it == old_vertex); + return new_vertices[thrust::distance(old_vertices.begin(), it)]; + }); + } else { + kv_store_t kv_store(renumber_map.begin(), + renumber_map.end(), + thrust::make_counting_iterator(vertex_t{0}), + std::numeric_limits::max(), + std::numeric_limits::max(), + handle.get_stream()); + auto kv_store_view = kv_store.view(); + + kv_store_view.find( + edgelist_majors.begin(), edgelist_majors.end(), edgelist_majors.begin(), handle.get_stream()); + kv_store_view.find( + edgelist_minors.begin(), edgelist_minors.end(), edgelist_minors.begin(), handle.get_stream()); + } + + return std::make_tuple(std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(renumber_map), + std::move(renumber_map_label_offsets)); +} + +template +void permute_array(raft::handle_t const& handle, + IndexIterator index_first, + IndexIterator index_last, + ValueIterator value_first /* [INOUT] */) +{ + using value_t = typename thrust::iterator_traits::value_type; + + auto tmp_buffer = allocate_dataframe_buffer(thrust::distance(index_first, index_last), + handle.get_stream()); + thrust::gather(handle.get_thrust_policy(), + index_first, + index_last, + value_first, + get_dataframe_buffer_begin(tmp_buffer)); + thrust::copy(handle.get_thrust_policy(), + get_dataframe_buffer_begin(tmp_buffer), + get_dataframe_buffer_end(tmp_buffer), + value_first); +} + +// key: ((label), (hop), major, minor) +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional, size_t>>> +sort_sampled_edge_tuples( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_majors, + rmm::device_uvector&& edgelist_minors, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets) +{ + std::vector h_label_offsets{}; + std::vector h_edge_offsets{}; + + if (edgelist_label_offsets) { + auto approx_edges_to_sort_per_iteration = + static_cast(handle.get_device_properties().multiProcessorCount) * + (1 << 20) /* tuning parameter */; // for sorts in chunks + + std::tie(h_label_offsets, h_edge_offsets) = + detail::compute_offset_aligned_edge_chunks(handle, + std::get<0>(*edgelist_label_offsets).data(), + std::get<1>(*edgelist_label_offsets), + edgelist_majors.size(), + approx_edges_to_sort_per_iteration); + } else { + h_label_offsets = {0, 1}; + h_edge_offsets = {0, edgelist_majors.size()}; + } + + auto num_chunks = h_label_offsets.size() - 1; + for (size_t i = 0; i < num_chunks; ++i) { + rmm::device_uvector indices(h_edge_offsets[i + 1] - h_edge_offsets[i], + handle.get_stream()); + thrust::sequence(handle.get_thrust_policy(), indices.begin(), indices.end(), size_t{0}); + edge_order_t edge_order_comp{ + edgelist_label_offsets ? thrust::make_optional>( + std::get<0>(*edgelist_label_offsets).data() + h_label_offsets[i], + (h_label_offsets[i + 1] - h_label_offsets[i]) + 1) + : thrust::nullopt, + edgelist_hops ? thrust::make_optional>( + std::get<0>(*edgelist_hops).data() + h_edge_offsets[i], indices.size()) + : thrust::nullopt, + raft::device_span(edgelist_majors.data() + h_edge_offsets[i], indices.size()), + raft::device_span(edgelist_minors.data() + h_edge_offsets[i], + indices.size())}; + thrust::sort(handle.get_thrust_policy(), indices.begin(), indices.end(), edge_order_comp); + + permute_array(handle, + indices.begin(), + indices.end(), + thrust::make_zip_iterator(edgelist_majors.begin(), edgelist_minors.begin()) + + h_edge_offsets[i]); + + if (edgelist_weights) { + permute_array( + handle, indices.begin(), indices.end(), (*edgelist_weights).begin() + h_edge_offsets[i]); + } + + if (edgelist_edge_ids) { + permute_array( + handle, indices.begin(), indices.end(), (*edgelist_edge_ids).begin() + h_edge_offsets[i]); + } + + if (edgelist_edge_types) { + permute_array( + handle, indices.begin(), indices.end(), (*edgelist_edge_types).begin() + h_edge_offsets[i]); + } + + if (edgelist_hops) { + permute_array(handle, + indices.begin(), + indices.end(), + std::get<0>(*edgelist_hops).begin() + h_edge_offsets[i]); + } + } + + return std::make_tuple(std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(edgelist_hops)); +} + +} // namespace + +template +std::tuple>, // dcsr/dcsc major vertices + rmm::device_uvector, // (d)csr/(d)csc offset values + rmm::device_uvector, // minor vertices + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // edge types + std::optional>, // (label, hop) offsets to the (d)csr/(d)csc + // offset array + rmm::device_uvector, // renumber map + std::optional>> // label offsets to the renumber map +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool compress_per_hop, + bool doubly_compress, + bool do_expensive_check) +{ + using label_index_t = uint32_t; + + auto num_labels = edgelist_label_offsets ? std::get<1>(*edgelist_label_offsets) : size_t{1}; + auto num_hops = edgelist_hops ? std::get<1>(*edgelist_hops) : size_t{1}; + + // 1. check input arguments + + check_input_edges(handle, + edgelist_srcs, + edgelist_dsts, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops, + edgelist_label_offsets, + do_expensive_check); + + CUGRAPH_EXPECTS( + !doubly_compress || !compress_per_hop, + "Invalid input arguments: compress_per_hop should be false if doubly_compress is true."); + CUGRAPH_EXPECTS(!compress_per_hop || edgelist_hops, + "Invalid input arguments: edgelist_hops.has_value() should be true if " + "compress_per_hop is true."); + + // 2. renumber + + auto edgelist_majors = src_is_major ? std::move(edgelist_srcs) : std::move(edgelist_dsts); + auto edgelist_minors = src_is_major ? std::move(edgelist_dsts) : std::move(edgelist_srcs); + + rmm::device_uvector renumber_map(0, handle.get_stream()); + std::optional> renumber_map_label_offsets{std::nullopt}; + std::tie(edgelist_majors, edgelist_minors, renumber_map, renumber_map_label_offsets) = + renumber_sampled_edgelist( + handle, + std::move(edgelist_majors), + std::move(edgelist_minors), + edgelist_hops ? std::make_optional(std::make_tuple( + raft::device_span(std::get<0>(*edgelist_hops).data(), + std::get<0>(*edgelist_hops).size()), + num_hops)) + : std::nullopt, + edgelist_label_offsets, + do_expensive_check); + + // 3. sort by ((l), (h), major, minor) + + std::tie(edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops) = sort_sampled_edge_tuples(handle, + std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(edgelist_hops), + edgelist_label_offsets); + + if (do_expensive_check) { + if (!compress_per_hop && edgelist_hops) { + rmm::device_uvector min_vertices(num_labels * num_hops, handle.get_stream()); + rmm::device_uvector max_vertices(min_vertices.size(), handle.get_stream()); + + auto label_index_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + optionally_compute_label_index_t{ + edgelist_label_offsets ? thrust::make_optional(std::get<0>(*edgelist_label_offsets)) + : thrust::nullopt}); + auto input_key_first = + thrust::make_zip_iterator(label_index_first, std::get<0>(*edgelist_hops).begin()); + rmm::device_uvector unique_key_label_indices(min_vertices.size(), + handle.get_stream()); + rmm::device_uvector unique_key_hops(min_vertices.size(), handle.get_stream()); + auto output_key_first = + thrust::make_zip_iterator(unique_key_label_indices.begin(), unique_key_hops.begin()); + + auto output_it = + thrust::reduce_by_key(handle.get_thrust_policy(), + input_key_first, + input_key_first + edgelist_majors.size(), + edgelist_majors.begin(), + output_key_first, + min_vertices.begin(), + thrust::equal_to>{}, + thrust::minimum{}); + auto num_unique_keys = + static_cast(thrust::distance(output_key_first, thrust::get<0>(output_it))); + thrust::reduce_by_key(handle.get_thrust_policy(), + input_key_first, + input_key_first + edgelist_majors.size(), + edgelist_majors.begin(), + output_key_first, + max_vertices.begin(), + thrust::equal_to>{}, + thrust::maximum{}); + if (num_unique_keys > 1) { + auto num_invalids = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{1}), + thrust::make_counting_iterator(num_unique_keys), + [output_key_first, + min_vertices = raft::device_span(min_vertices.data(), num_unique_keys), + max_vertices = raft::device_span(max_vertices.data(), + num_unique_keys)] __device__(size_t i) { + auto prev_key = *(output_key_first + (i - 1)); + auto this_key = *(output_key_first + i); + if (thrust::get<0>(prev_key) == thrust::get<0>(this_key)) { + auto this_min = min_vertices[i]; + auto prev_max = max_vertices[i - 1]; + return prev_max >= this_min; + } else { + return false; + } + }); + CUGRAPH_EXPECTS(num_invalids == 0, + "Invalid input arguments: if @p compress_per_hop is false and @p " + "edgelist_hops.has_value() is true, the minimum majors with hop N + 1 " + "should be larger than the maximum majors with hop N after renumbering."); + } + } + } + + // 4. compute offsets for ((l), (h), major) triplets with non zero neighbors (update + // compressed_label_indices, compressed_hops, compressed_nzd_vertices, and compressed_offsets) + + auto num_uniques = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(edgelist_majors.size()), + is_first_in_run_t{ + edgelist_label_offsets ? thrust::make_optional(std::get<0>(*edgelist_label_offsets)) + : thrust::nullopt, + edgelist_hops ? thrust::make_optional>( + std::get<0>(*edgelist_hops).data(), std::get<0>(*edgelist_hops).size()) + : thrust::nullopt, + raft::device_span( + edgelist_majors.data(), + edgelist_majors.size())}); // number of unique ((label), (hop), major) triplets + + auto compressed_label_indices = + edgelist_label_offsets + ? std::make_optional>(num_uniques, handle.get_stream()) + : std::nullopt; + auto compressed_hops = edgelist_hops ? std::make_optional>( + num_uniques, handle.get_stream()) + : std::nullopt; + rmm::device_uvector compressed_nzd_vertices(num_uniques, handle.get_stream()); + rmm::device_uvector compressed_offsets(num_uniques + 1, handle.get_stream()); + compressed_offsets.set_element_to_zero_async(num_uniques, handle.get_stream()); + + if (edgelist_label_offsets) { + auto label_index_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), + compute_label_index_t{std::get<0>(*edgelist_label_offsets)}); + + if (edgelist_hops) { + auto input_key_first = thrust::make_zip_iterator( + label_index_first, std::get<0>(*edgelist_hops).begin(), edgelist_majors.begin()); + auto output_key_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), + (*compressed_hops).begin(), + compressed_nzd_vertices.begin()); + thrust::reduce_by_key(handle.get_thrust_policy(), + input_key_first, + input_key_first + edgelist_majors.size(), + thrust::make_constant_iterator(size_t{1}), + output_key_first, + compressed_offsets.begin()); + } else { + auto input_key_first = thrust::make_zip_iterator(label_index_first, edgelist_majors.begin()); + auto output_key_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), + compressed_nzd_vertices.begin()); + thrust::reduce_by_key(handle.get_thrust_policy(), + input_key_first, + input_key_first + edgelist_majors.size(), + thrust::make_constant_iterator(size_t{1}), + output_key_first, + compressed_offsets.begin()); + } + } else { + if (edgelist_hops) { + auto input_key_first = + thrust::make_zip_iterator(std::get<0>(*edgelist_hops).begin(), edgelist_majors.begin()); + auto output_key_first = + thrust::make_zip_iterator((*compressed_hops).begin(), compressed_nzd_vertices.begin()); + thrust::reduce_by_key(handle.get_thrust_policy(), + input_key_first, + input_key_first + edgelist_majors.size(), + thrust::make_constant_iterator(size_t{1}), + output_key_first, + compressed_offsets.begin()); + } else { + auto input_key_first = edgelist_majors.begin(); + auto output_key_first = compressed_nzd_vertices.begin(); + thrust::reduce_by_key(handle.get_thrust_policy(), + input_key_first, + input_key_first + edgelist_majors.size(), + thrust::make_constant_iterator(size_t{1}), + output_key_first, + compressed_offsets.begin()); + } + } + thrust::exclusive_scan(handle.get_thrust_policy(), + compressed_offsets.begin(), + compressed_offsets.end(), + compressed_offsets.begin()); + + // 5. update compressed_offsets to include zero degree vertices (if doubly_compress is false) and + // compressed_offset_label_hop_offsets (if edgelist_label_offsets.has_value() or + // edgelist_hops.has_value() is true) + + std::optional> compressed_offset_label_hop_offsets{std::nullopt}; + if (doubly_compress) { + if (edgelist_label_offsets || edgelist_hops) { + rmm::device_uvector offset_array_offsets(num_labels * num_hops + 1, + handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + offset_array_offsets.begin(), + offset_array_offsets.end(), + size_t{0}); + + if (edgelist_label_offsets) { + if (edgelist_hops) { + auto pair_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), + (*compressed_hops).begin()); + auto value_pair_first = thrust::make_transform_iterator( + thrust::make_counting_iterator(size_t{0}), [num_hops] __device__(size_t i) { + return thrust::make_tuple(static_cast(i / num_hops), + static_cast(i % num_hops)); + }); + thrust::upper_bound(handle.get_thrust_policy(), + pair_first, + pair_first + (*compressed_label_indices).size(), + value_pair_first, + value_pair_first + (num_labels * num_hops), + offset_array_offsets.begin() + 1); + } else { + thrust::upper_bound( + handle.get_thrust_policy(), + (*compressed_label_indices).begin(), + (*compressed_label_indices).end(), + thrust::make_counting_iterator(label_index_t{0}), + thrust::make_counting_iterator(static_cast(num_labels)), + offset_array_offsets.begin() + 1); + } + } else { + thrust::upper_bound(handle.get_thrust_policy(), + (*compressed_hops).begin(), + (*compressed_hops).end(), + thrust::make_counting_iterator(int32_t{0}), + thrust::make_counting_iterator(static_cast(num_hops)), + offset_array_offsets.begin() + 1); + } + + compressed_offset_label_hop_offsets = std::move(offset_array_offsets); + } + } else { // !doubly_compress + rmm::device_uvector major_vertex_counts(num_labels * num_hops, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + major_vertex_counts.begin(), + major_vertex_counts.end(), + [edgelist_label_offsets = edgelist_label_offsets + ? thrust::make_optional(std::get<0>(*edgelist_label_offsets)) + : thrust::nullopt, + edgelist_hops = edgelist_hops + ? thrust::make_optional>( + std::get<0>(*edgelist_hops).data(), std::get<0>(*edgelist_hops).size()) + : thrust::nullopt, + edgelist_majors = + raft::device_span(edgelist_majors.data(), edgelist_majors.size()), + num_hops, + compress_per_hop] __device__(size_t i) { + size_t start_offset{0}; + auto end_offset = edgelist_majors.size(); + auto label_start_offset = start_offset; + auto label_end_offset = end_offset; + + if (edgelist_label_offsets) { + auto l_idx = static_cast(i / num_hops); + start_offset = (*edgelist_label_offsets)[l_idx]; + end_offset = (*edgelist_label_offsets)[l_idx + 1]; + label_start_offset = start_offset; + label_end_offset = end_offset; + } + + if (num_hops > 1) { + auto h = static_cast(i % num_hops); + auto lower_it = thrust::lower_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + auto upper_it = thrust::upper_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + start_offset = static_cast(thrust::distance((*edgelist_hops).begin(), lower_it)); + end_offset = static_cast(thrust::distance((*edgelist_hops).begin(), upper_it)); + } + if (compress_per_hop) { + return (start_offset < end_offset) ? (edgelist_majors[end_offset - 1] + 1) : vertex_t{0}; + } else { + if (end_offset != label_end_offset) { + return edgelist_majors[end_offset]; + } else if (label_start_offset < label_end_offset) { + return edgelist_majors[end_offset - 1] + 1; + } else { + return vertex_t{0}; + } + } + }); + + std::optional> minor_vertex_counts{std::nullopt}; + if (compress_per_hop) { + minor_vertex_counts = + rmm::device_uvector(major_vertex_counts.size(), handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + (*minor_vertex_counts).begin(), + (*minor_vertex_counts).end(), + vertex_t{0}); + if (edgelist_label_offsets) { + auto triplet_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), + (*compressed_hops).begin(), + thrust::make_counting_iterator(size_t{0})); + thrust::for_each(handle.get_thrust_policy(), + triplet_first, + triplet_first + compressed_nzd_vertices.size(), + [edgelist_minors = raft::device_span( + edgelist_minors.data(), edgelist_minors.size()), + compressed_offsets = raft::device_span( + compressed_offsets.data(), compressed_offsets.size()), + minor_vertex_counts = raft::device_span( + (*minor_vertex_counts).data(), (*minor_vertex_counts).size()), + num_hops] __device__(auto triplet) { + auto nzd_v_idx = thrust::get<2>(triplet); + size_t end_offset = compressed_offsets[nzd_v_idx + 1]; + auto l_idx = thrust::get<0>(triplet); + auto h = thrust::get<1>(triplet); + cuda::atomic_ref minor_vertex_count( + minor_vertex_counts[l_idx * num_hops + h]); + minor_vertex_count.fetch_max(edgelist_minors[end_offset - 1] + 1, + cuda::std::memory_order_relaxed); + }); + } else { + auto pair_first = thrust::make_zip_iterator((*compressed_hops).begin(), + thrust::make_counting_iterator(size_t{0})); + thrust::for_each(handle.get_thrust_policy(), + pair_first, + pair_first + compressed_nzd_vertices.size(), + [edgelist_minors = raft::device_span( + edgelist_minors.data(), edgelist_minors.size()), + compressed_offsets = raft::device_span( + compressed_offsets.data(), compressed_offsets.size()), + minor_vertex_counts = raft::device_span( + (*minor_vertex_counts).data(), (*minor_vertex_counts).size()), + num_hops] __device__(auto pair) { + auto nzd_v_idx = thrust::get<1>(pair); + size_t end_offset = compressed_offsets[nzd_v_idx + 1]; + auto h = thrust::get<0>(pair); + cuda::atomic_ref minor_vertex_count( + minor_vertex_counts[h]); + minor_vertex_count.fetch_max(edgelist_minors[end_offset - 1] + 1, + cuda::std::memory_order_relaxed); + }); + } + } + + rmm::device_uvector offset_array_offsets(num_labels * num_hops + 1, + handle.get_stream()); + offset_array_offsets.set_element_to_zero_async(num_labels * num_hops, handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + offset_array_offsets.begin(), + offset_array_offsets.begin() + (num_labels * num_hops), + [major_vertex_counts = + raft::device_span(major_vertex_counts.data(), major_vertex_counts.size()), + minor_vertex_counts = minor_vertex_counts + ? thrust::make_optional>( + (*minor_vertex_counts).data(), (*minor_vertex_counts).size()) + : thrust::nullopt, + num_hops, + compress_per_hop] __device__(size_t i) { + auto vertex_count = major_vertex_counts[i]; + if (num_hops > 1) { + if (compress_per_hop) { + for (size_t j = (i - (i % num_hops)); j < i; ++j) { + vertex_count = cuda::std::max(vertex_count, major_vertex_counts[j]); + vertex_count = cuda::std::max(vertex_count, (*minor_vertex_counts)[j]); + } + } else { + if (i % num_hops != 0) { vertex_count -= major_vertex_counts[i - 1]; } + } + } + return vertex_count; + }); + thrust::exclusive_scan(handle.get_thrust_policy(), + offset_array_offsets.begin(), + offset_array_offsets.end(), + offset_array_offsets.begin()); + + auto tmp_compressed_offsets = rmm::device_uvector( + offset_array_offsets.back_element(handle.get_stream()) + 1, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + tmp_compressed_offsets.begin(), + tmp_compressed_offsets.end(), + size_t{0}); + + if (edgelist_label_offsets) { + if (edgelist_hops) { + auto triplet_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), + (*compressed_hops).begin(), + thrust::make_counting_iterator(size_t{0})); + thrust::for_each( + handle.get_thrust_policy(), + triplet_first, + triplet_first + compressed_nzd_vertices.size(), + [compressed_nzd_vertices = raft::device_span( + compressed_nzd_vertices.data(), compressed_nzd_vertices.size()), + offset_array_offsets = raft::device_span(offset_array_offsets.data(), + offset_array_offsets.size()), + compressed_offsets = + raft::device_span(compressed_offsets.data(), compressed_offsets.size()), + tmp_compressed_offsets = raft::device_span(tmp_compressed_offsets.data(), + tmp_compressed_offsets.size()), + compress_per_hop, + num_hops] __device__(auto triplet) { + auto nzd_v_idx = thrust::get<2>(triplet); + size_t start_offset = compressed_offsets[nzd_v_idx]; + size_t end_offset = compressed_offsets[nzd_v_idx + 1]; + auto l_idx = thrust::get<0>(triplet); + auto h = thrust::get<1>(triplet); + tmp_compressed_offsets[offset_array_offsets[l_idx * num_hops + + (compress_per_hop ? h : int32_t{0})] + + compressed_nzd_vertices[nzd_v_idx]] = end_offset - start_offset; + }); + } else { + auto pair_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), + thrust::make_counting_iterator(size_t{0})); + thrust::for_each( + handle.get_thrust_policy(), + pair_first, + pair_first + compressed_nzd_vertices.size(), + [compressed_nzd_vertices = raft::device_span( + compressed_nzd_vertices.data(), compressed_nzd_vertices.size()), + offset_array_offsets = raft::device_span(offset_array_offsets.data(), + offset_array_offsets.size()), + compressed_offsets = + raft::device_span(compressed_offsets.data(), compressed_offsets.size()), + tmp_compressed_offsets = raft::device_span( + tmp_compressed_offsets.data(), tmp_compressed_offsets.size())] __device__(auto pair) { + auto nzd_v_idx = thrust::get<1>(pair); + size_t start_offset = compressed_offsets[nzd_v_idx]; + size_t end_offset = compressed_offsets[nzd_v_idx + 1]; + auto l_idx = thrust::get<0>(pair); + tmp_compressed_offsets[offset_array_offsets[l_idx] + + compressed_nzd_vertices[nzd_v_idx]] = end_offset - start_offset; + }); + } + } else { + if (edgelist_hops) { + auto pair_first = thrust::make_zip_iterator((*compressed_hops).begin(), + thrust::make_counting_iterator(size_t{0})); + thrust::for_each( + handle.get_thrust_policy(), + pair_first, + pair_first + compressed_nzd_vertices.size(), + [compressed_nzd_vertices = raft::device_span( + compressed_nzd_vertices.data(), compressed_nzd_vertices.size()), + offset_array_offsets = raft::device_span(offset_array_offsets.data(), + offset_array_offsets.size()), + compressed_offsets = + raft::device_span(compressed_offsets.data(), compressed_offsets.size()), + tmp_compressed_offsets = raft::device_span(tmp_compressed_offsets.data(), + tmp_compressed_offsets.size()), + compress_per_hop] __device__(auto pair) { + auto nzd_v_idx = thrust::get<1>(pair); + size_t start_offset = compressed_offsets[nzd_v_idx]; + size_t end_offset = compressed_offsets[nzd_v_idx + 1]; + auto h = thrust::get<0>(pair); + tmp_compressed_offsets[offset_array_offsets[compress_per_hop ? h : int32_t{0}] + + compressed_nzd_vertices[nzd_v_idx]] = end_offset - start_offset; + }); + } else { + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(compressed_nzd_vertices.size()), + [compressed_nzd_vertices = raft::device_span( + compressed_nzd_vertices.data(), compressed_nzd_vertices.size()), + compressed_offsets = + raft::device_span(compressed_offsets.data(), compressed_offsets.size()), + tmp_compressed_offsets = + raft::device_span(tmp_compressed_offsets.data(), + tmp_compressed_offsets.size())] __device__(auto nzd_v_idx) { + size_t start_offset = compressed_offsets[nzd_v_idx]; + size_t end_offset = compressed_offsets[nzd_v_idx + 1]; + tmp_compressed_offsets[compressed_nzd_vertices[nzd_v_idx]] = end_offset - start_offset; + }); + } + } + + thrust::exclusive_scan(handle.get_thrust_policy(), + tmp_compressed_offsets.begin(), + tmp_compressed_offsets.end(), + tmp_compressed_offsets.begin()); + + compressed_offsets = std::move(tmp_compressed_offsets); + + if (edgelist_label_offsets || edgelist_hops) { + compressed_offset_label_hop_offsets = std::move(offset_array_offsets); + } + } + + edgelist_hops = std::nullopt; + + return std::make_tuple( + doubly_compress ? std::make_optional(std::move(compressed_nzd_vertices)) : std::nullopt, + std::move(compressed_offsets), + std::move(edgelist_minors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(compressed_offset_label_hop_offsets), + std::move(renumber_map), + std::move(renumber_map_label_offsets)); +} + +template +std::tuple, // srcs + rmm::device_uvector, // dsts + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // edge types + std::optional>, // (label, hop) offsets to the edges + rmm::device_uvector, // renumber map + std::optional>> // label offsets to the renumber map +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check) +{ + using label_index_t = uint32_t; + + auto num_labels = edgelist_label_offsets ? std::get<1>(*edgelist_label_offsets) : size_t{1}; + auto num_hops = edgelist_hops ? std::get<1>(*edgelist_hops) : size_t{1}; + + // 1. check input arguments + + check_input_edges(handle, + edgelist_srcs, + edgelist_dsts, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops, + edgelist_label_offsets, + do_expensive_check); + + // 2. renumber + + auto edgelist_majors = src_is_major ? std::move(edgelist_srcs) : std::move(edgelist_dsts); + auto edgelist_minors = src_is_major ? std::move(edgelist_dsts) : std::move(edgelist_srcs); + + rmm::device_uvector renumber_map(0, handle.get_stream()); + std::optional> renumber_map_label_offsets{std::nullopt}; + std::tie(edgelist_majors, edgelist_minors, renumber_map, renumber_map_label_offsets) = + renumber_sampled_edgelist( + handle, + std::move(edgelist_majors), + std::move(edgelist_minors), + edgelist_hops ? std::make_optional(std::make_tuple( + raft::device_span(std::get<0>(*edgelist_hops).data(), + std::get<0>(*edgelist_hops).size()), + num_hops)) + : std::nullopt, + edgelist_label_offsets, + do_expensive_check); + + // 3. sort by ((l), (h), major, minor) + + std::tie(edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops) = sort_sampled_edge_tuples(handle, + std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(edgelist_hops), + edgelist_label_offsets); + + // 4. compute edgelist_label_hop_offsets + + std::optional> edgelist_label_hop_offsets{std::nullopt}; + if (edgelist_label_offsets || edgelist_hops) { + edgelist_label_hop_offsets = + rmm::device_uvector(num_labels * num_hops + 1, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + (*edgelist_label_hop_offsets).begin(), + (*edgelist_label_hop_offsets).end(), + size_t{0}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_labels * num_hops), + [edgelist_label_offsets = edgelist_label_offsets + ? thrust::make_optional(std::get<0>(*edgelist_label_offsets)) + : thrust::nullopt, + edgelist_hops = edgelist_hops + ? thrust::make_optional>( + std::get<0>(*edgelist_hops).data(), std::get<0>(*edgelist_hops).size()) + : thrust::nullopt, + num_hops, + num_edges = edgelist_majors.size()] __device__(size_t i) { + size_t start_offset{0}; + auto end_offset = num_edges; + + if (edgelist_label_offsets) { + auto l_idx = static_cast(i / num_hops); + start_offset = (*edgelist_label_offsets)[l_idx]; + end_offset = (*edgelist_label_offsets)[l_idx + 1]; + } + + if (edgelist_hops) { + auto h = static_cast(i % num_hops); + auto lower_it = thrust::lower_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + auto upper_it = thrust::upper_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + start_offset = static_cast(thrust::distance((*edgelist_hops).begin(), lower_it)); + end_offset = static_cast(thrust::distance((*edgelist_hops).begin(), upper_it)); + } + + return end_offset - start_offset; + }); + thrust::exclusive_scan(handle.get_thrust_policy(), + (*edgelist_label_hop_offsets).begin(), + (*edgelist_label_hop_offsets).end(), + (*edgelist_label_hop_offsets).begin()); + } + + edgelist_hops = std::nullopt; + + return std::make_tuple(std::move(src_is_major ? edgelist_majors : edgelist_minors), + std::move(src_is_major ? edgelist_minors : edgelist_majors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(edgelist_label_hop_offsets), + std::move(renumber_map), + std::move(renumber_map_label_offsets)); +} + +template +std::tuple, // srcs + rmm::device_uvector, // dsts + std::optional>, // weights + std::optional>, // edge IDs + std::optional>, // edge types + std::optional>> // (label, hop) offsets to the edges +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check) +{ + using label_index_t = uint32_t; + + auto num_labels = edgelist_label_offsets ? std::get<1>(*edgelist_label_offsets) : size_t{1}; + auto num_hops = edgelist_hops ? std::get<1>(*edgelist_hops) : size_t{1}; + + // 1. check input arguments + + check_input_edges(handle, + edgelist_srcs, + edgelist_dsts, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops, + edgelist_label_offsets, + do_expensive_check); + + // 2. sort by ((l), (h), major, minor) + + auto edgelist_majors = src_is_major ? std::move(edgelist_srcs) : std::move(edgelist_dsts); + auto edgelist_minors = src_is_major ? std::move(edgelist_dsts) : std::move(edgelist_srcs); + + std::tie(edgelist_majors, + edgelist_minors, + edgelist_weights, + edgelist_edge_ids, + edgelist_edge_types, + edgelist_hops) = sort_sampled_edge_tuples(handle, + std::move(edgelist_majors), + std::move(edgelist_minors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(edgelist_hops), + edgelist_label_offsets); + + // 3. compute edgelist_label_hop_offsets + + std::optional> edgelist_label_hop_offsets{std::nullopt}; + if (edgelist_label_offsets || edgelist_hops) { + edgelist_label_hop_offsets = + rmm::device_uvector(num_labels * num_hops + 1, handle.get_stream()); + thrust::fill(handle.get_thrust_policy(), + (*edgelist_label_hop_offsets).begin(), + (*edgelist_label_hop_offsets).end(), + size_t{0}); + thrust::for_each( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(num_labels * num_hops), + [edgelist_label_offsets = edgelist_label_offsets + ? thrust::make_optional(std::get<0>(*edgelist_label_offsets)) + : thrust::nullopt, + edgelist_hops = edgelist_hops + ? thrust::make_optional>( + std::get<0>(*edgelist_hops).data(), std::get<0>(*edgelist_hops).size()) + : thrust::nullopt, + num_hops, + num_edges = edgelist_majors.size()] __device__(size_t i) { + size_t start_offset{0}; + auto end_offset = num_edges; + + if (edgelist_label_offsets) { + auto l_idx = static_cast(i / num_hops); + start_offset = (*edgelist_label_offsets)[l_idx]; + end_offset = (*edgelist_label_offsets)[l_idx + 1]; + } + + if (edgelist_hops) { + auto h = static_cast(i % num_hops); + auto lower_it = thrust::lower_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + auto upper_it = thrust::upper_bound(thrust::seq, + (*edgelist_hops).begin() + start_offset, + (*edgelist_hops).begin() + end_offset, + h); + start_offset = static_cast(thrust::distance((*edgelist_hops).begin(), lower_it)); + end_offset = static_cast(thrust::distance((*edgelist_hops).begin(), upper_it)); + } + + return end_offset - start_offset; + }); + thrust::exclusive_scan(handle.get_thrust_policy(), + (*edgelist_label_hop_offsets).begin(), + (*edgelist_label_hop_offsets).end(), + (*edgelist_label_hop_offsets).begin()); + } + + edgelist_hops = std::nullopt; + + return std::make_tuple(std::move(src_is_major ? edgelist_majors : edgelist_minors), + std::move(src_is_major ? edgelist_minors : edgelist_majors), + std::move(edgelist_weights), + std::move(edgelist_edge_ids), + std::move(edgelist_edge_types), + std::move(edgelist_label_hop_offsets)); +} + +} // namespace cugraph diff --git a/cpp/src/sampling/sampling_post_processing_sg.cu b/cpp/src/sampling/sampling_post_processing_sg.cu new file mode 100644 index 00000000000..75e3c5f005a --- /dev/null +++ b/cpp/src/sampling/sampling_post_processing_sg.cu @@ -0,0 +1,389 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include "sampling_post_processing_impl.cuh" + +namespace cugraph { + +template std::tuple>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> label_offsets, + bool src_is_major, + bool compress_per_hop, + bool doubly_compress, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> label_offsets, + bool src_is_major, + bool compress_per_hop, + bool doubly_compress, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> label_offsets, + bool src_is_major, + bool compress_per_hop, + bool doubly_compress, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> label_offsets, + bool src_is_major, + bool compress_per_hop, + bool doubly_compress, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> label_offsets, + bool src_is_major, + bool compress_per_hop, + bool doubly_compress, + bool do_expensive_check); + +template std::tuple>, + rmm::device_uvector, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_compress_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> label_offsets, + bool src_is_major, + bool compress_per_hop, + bool doubly_compress, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + rmm::device_uvector, + std::optional>> +renumber_and_sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +sort_sampled_edgelist( + raft::handle_t const& handle, + rmm::device_uvector&& edgelist_srcs, + rmm::device_uvector&& edgelist_dsts, + std::optional>&& edgelist_weights, + std::optional>&& edgelist_edge_ids, + std::optional>&& edgelist_edge_types, + std::optional, size_t>>&& edgelist_hops, + std::optional, size_t>> edgelist_label_offsets, + bool src_is_major, + bool do_expensive_check); + +} // namespace cugraph diff --git a/cpp/src/structure/renumber_edgelist_impl.cuh b/cpp/src/structure/renumber_edgelist_impl.cuh index d7381ba71af..6bc19ff4fe1 100644 --- a/cpp/src/structure/renumber_edgelist_impl.cuh +++ b/cpp/src/structure/renumber_edgelist_impl.cuh @@ -86,7 +86,7 @@ struct find_unused_id_t { for (size_t i = worker_id; i < sorted_local_vertices.size() + size_t{1}; i += num_workers) { auto start = (i == size_t{0}) ? std::numeric_limits::lowest() : sorted_local_vertices[i - size_t{1}]; - if (start != std::numeric_limits::max()) { ++start; }; // now inclusive + if (start != std::numeric_limits::max()) { ++start; }; // now inclusive auto end = (i == sorted_local_vertices.size()) ? std::numeric_limits::max() : sorted_local_vertices[i]; // exclusive for (vertex_t v = start; v < end; ++v) { diff --git a/cpp/src/traversal/bfs_impl.cuh b/cpp/src/traversal/bfs_impl.cuh index 0402184bd93..437071569bf 100644 --- a/cpp/src/traversal/bfs_impl.cuh +++ b/cpp/src/traversal/bfs_impl.cuh @@ -73,7 +73,7 @@ struct e_op_t { if (*(prev_visited_flags + packed_bool_offset(dst)) & packed_bool_mask(dst)) { // check if unvisited in previous iterations push = false; - } else { // check if unvisited in this iteration as well + } else { // check if unvisited in this iteration as well auto old = visited_flags.atomic_or(dst, true); push = !old; } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 6f8c727789f..2a4bb8ab2a5 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -390,9 +390,9 @@ ConfigureTest(UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/sg_uniform_neighbor_sampli target_link_libraries(UNIFORM_NEIGHBOR_SAMPLING_TEST PRIVATE cuco::cuco) ################################################################################################### -# - RENUMBER SAMPLED EDGE LIST tests -------------------------------------------------------------- -ConfigureTest(RENUMBER_SAMPLED_EDGELIST_TEST sampling/renumber_sampled_edgelist_test.cu) -target_link_libraries(RENUMBER_SAMPLED_EDGELIST_TEST PRIVATE cuco::cuco) +# - SAMPLING_POST_PROCESSING tests ---------------------------------------------------------------- +ConfigureTest(SAMPLING_POST_PROCESSING_TEST sampling/sampling_post_processing_test.cu) +target_link_libraries(SAMPLING_POST_PROCESSING_TEST PRIVATE cuco::cuco) ################################################################################################### # - Renumber tests -------------------------------------------------------------------------------- @@ -419,6 +419,14 @@ ConfigureTest(K_HOP_NBRS_TEST traversal/k_hop_nbrs_test.cpp) # - install tests --------------------------------------------------------------------------------- rapids_test_install_relocatable(INSTALL_COMPONENT_SET testing DESTINATION bin/gtests/libcugraph) +################################################################################################### +# - MTMG tests ------------------------------------------------------------------------- +ConfigureTest(MTMG_TEST mtmg/threaded_test.cu) +target_link_libraries(MTMG_TEST + PRIVATE + UCP::UCP + ) + ################################################################################################### # - MG tests -------------------------------------------------------------------------------------- diff --git a/cpp/tests/mtmg/threaded_test.cu b/cpp/tests/mtmg/threaded_test.cu new file mode 100644 index 00000000000..c5dc2d3c7ce --- /dev/null +++ b/cpp/tests/mtmg/threaded_test.cu @@ -0,0 +1,459 @@ +/* + * Copyright (c) 2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include + +#include + +#include + +#include +#include + +struct Multithreaded_Usecase { + bool test_weighted{false}; + bool check_correctness{true}; +}; + +template +class Tests_Multithreaded + : public ::testing::TestWithParam> { + public: + Tests_Multithreaded() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + std::vector get_gpu_list() + { + int num_gpus_per_node{1}; + RAFT_CUDA_TRY(cudaGetDeviceCount(&num_gpus_per_node)); + + std::vector gpu_list(num_gpus_per_node); + std::iota(gpu_list.begin(), gpu_list.end(), 0); + + return gpu_list; + } + + template + void run_current_test( + std::tuple const& param, + std::vector gpu_list) + { + using edge_type_t = int32_t; + + constexpr bool renumber = true; + constexpr bool do_expensive_check = false; + + auto [multithreaded_usecase, input_usecase] = param; + + raft::handle_t handle{}; + + result_t constexpr alpha{0.85}; + result_t constexpr epsilon{1e-6}; + + size_t device_buffer_size{64 * 1024 * 1024}; + size_t thread_buffer_size{4 * 1024 * 1024}; + + int num_gpus = gpu_list.size(); + int num_threads = num_gpus * 4; + + cugraph::mtmg::resource_manager_t resource_manager; + + std::for_each(gpu_list.begin(), gpu_list.end(), [&resource_manager](int gpu_id) { + resource_manager.register_local_gpu(gpu_id, rmm::cuda_device_id{gpu_id}); + }); + + ncclUniqueId instance_manager_id; + ncclGetUniqueId(&instance_manager_id); + + auto instance_manager = resource_manager.create_instance_manager( + resource_manager.registered_ranks(), instance_manager_id); + + cugraph::mtmg::edgelist_t edgelist; + cugraph::mtmg::graph_t graph; + cugraph::mtmg::graph_view_t graph_view; + cugraph::mtmg::vertex_result_t pageranks; + std::optional> renumber_map = + std::make_optional>(); + + auto edge_weights = multithreaded_usecase.test_weighted + ? std::make_optional, + weight_t>>() + : std::nullopt; + + // + // Simulate graph creation by spawning threads to walk through the + // local COO and add edges + // + std::vector running_threads; + + // Initialize shared edgelist object, one per GPU + for (int i = 0; i < num_gpus; ++i) { + running_threads.emplace_back([&instance_manager, + &edgelist, + device_buffer_size, + use_weight = true, + use_edge_id = false, + use_edge_type = false]() { + auto thread_handle = instance_manager->get_handle(); + + edgelist.set(thread_handle, device_buffer_size, use_weight, use_edge_id, use_edge_type); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + // Load SG edge list + auto [d_src_v, d_dst_v, d_weights_v, d_vertices_v, is_symmetric] = + input_usecase.template construct_edgelist( + handle, multithreaded_usecase.test_weighted, false, false); + + auto h_src_v = cugraph::test::to_host(handle, d_src_v); + auto h_dst_v = cugraph::test::to_host(handle, d_dst_v); + auto h_weights_v = cugraph::test::to_host(handle, d_weights_v); + auto unique_vertices = cugraph::test::to_host(handle, d_vertices_v); + + // Load edgelist from different threads. We'll use more threads than GPUs here + for (int i = 0; i < num_threads; ++i) { + running_threads.emplace_back([&instance_manager, + thread_buffer_size, + &edgelist, + &h_src_v, + &h_dst_v, + &h_weights_v, + i, + num_threads]() { + auto thread_handle = instance_manager->get_handle(); + cugraph::mtmg::per_thread_edgelist_t + per_thread_edgelist(edgelist.get(thread_handle), thread_buffer_size); + + for (size_t j = i; j < h_src_v.size(); j += num_threads) { +#if 0 + if (h_weights_v) { + thread_edgelist.append( + thread_handle, h_src_v[j], h_dst_v[j], (*h_weights_v)[j], std::nullopt, std::nullopt); + } else { + thread_edgelist.append( + thread_handle, h_src_v[j], h_dst_v[j], std::nullopt, std::nullopt, std::nullopt); + } +#endif + per_thread_edgelist.append( + thread_handle, + h_src_v[j], + h_dst_v[j], + h_weights_v ? std::make_optional((*h_weights_v)[j]) : std::nullopt, + std::nullopt, + std::nullopt); + } + + per_thread_edgelist.flush(thread_handle); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + for (int i = 0; i < num_gpus; ++i) { + running_threads.emplace_back([&instance_manager, + &graph, + &edge_weights, + &edgelist, + &renumber_map, + &pageranks, + is_symmetric = is_symmetric, + renumber, + do_expensive_check]() { + auto thread_handle = instance_manager->get_handle(); + + if (thread_handle.get_thread_rank() > 0) return; + + std::optional, + edge_t>> + edge_ids{std::nullopt}; + std::optional, + int32_t>> + edge_types{std::nullopt}; + + edgelist.finalize_buffer(thread_handle); + edgelist.consolidate_and_shuffle(thread_handle, true); + + cugraph::mtmg:: + create_graph_from_edgelist( + thread_handle, + edgelist, + cugraph::graph_properties_t{is_symmetric, true}, + renumber, + graph, + edge_weights, + edge_ids, + edge_types, + renumber_map, + do_expensive_check); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + graph_view = graph.view(); + + for (int i = 0; i < num_threads; ++i) { + running_threads.emplace_back( + [&instance_manager, &graph_view, &edge_weights, &pageranks, alpha, epsilon]() { + auto thread_handle = instance_manager->get_handle(); + + if (thread_handle.get_thread_rank() > 0) return; + + auto [local_pageranks, metadata] = + cugraph::pagerank( + thread_handle.raft_handle(), + graph_view.get(thread_handle), + edge_weights ? std::make_optional(edge_weights->get(thread_handle).view()) + : std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + alpha, + epsilon, + 500, + true); + + pageranks.set(thread_handle, std::move(local_pageranks)); + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + std::vector, std::vector>> computed_pageranks_v; + std::mutex computed_pageranks_lock{}; + + auto pageranks_view = pageranks.view(); + auto renumber_map_view = renumber_map ? std::make_optional(renumber_map->view()) : std::nullopt; + + // Load computed_pageranks from different threads. + for (int i = 0; i < num_gpus; ++i) { + running_threads.emplace_back([&instance_manager, + &graph_view, + &renumber_map_view, + &pageranks_view, + &computed_pageranks_lock, + &computed_pageranks_v, + &h_src_v, + &h_dst_v, + &h_weights_v, + &unique_vertices, + i, + num_threads]() { + auto thread_handle = instance_manager->get_handle(); + + auto number_of_vertices = unique_vertices->size(); + + std::vector my_vertex_list; + my_vertex_list.reserve((number_of_vertices + num_threads - 1) / num_threads); + + for (size_t j = i; j < number_of_vertices; j += num_threads) { + my_vertex_list.push_back((*unique_vertices)[j]); + } + + rmm::device_uvector d_my_vertex_list(my_vertex_list.size(), + thread_handle.raft_handle().get_stream()); + raft::update_device(d_my_vertex_list.data(), + my_vertex_list.data(), + my_vertex_list.size(), + thread_handle.raft_handle().get_stream()); + + auto d_my_pageranks = pageranks_view.gather( + thread_handle, + raft::device_span{d_my_vertex_list.data(), d_my_vertex_list.size()}, + graph_view, + renumber_map_view); + + std::vector my_pageranks(d_my_pageranks.size()); + raft::update_host(my_pageranks.data(), + d_my_pageranks.data(), + d_my_pageranks.size(), + thread_handle.raft_handle().get_stream()); + + { + std::lock_guard lock(computed_pageranks_lock); + computed_pageranks_v.push_back( + std::make_tuple(std::move(my_vertex_list), std::move(my_pageranks))); + } + }); + } + + // Wait for CPU threads to complete + std::for_each(running_threads.begin(), running_threads.end(), [](auto& t) { t.join(); }); + running_threads.resize(0); + instance_manager->reset_threads(); + + if (multithreaded_usecase.check_correctness) { + // Want to compare the results in computed_pageranks_v with SG results + cugraph::graph_t sg_graph(handle); + std::optional< + cugraph::edge_property_t, weight_t>> + sg_edge_weights{std::nullopt}; + std::optional> sg_renumber_map{std::nullopt}; + + std::tie(sg_graph, sg_edge_weights, std::ignore, std::ignore, sg_renumber_map) = cugraph:: + create_graph_from_edgelist( + handle, + std::nullopt, + std::move(d_src_v), + std::move(d_dst_v), + std::move(d_weights_v), + std::nullopt, + std::nullopt, + cugraph::graph_properties_t{is_symmetric, true}, + true); + + auto [sg_pageranks, meta] = cugraph::pagerank( + handle, + sg_graph.view(), + sg_edge_weights ? std::make_optional(sg_edge_weights->view()) : std::nullopt, + std::nullopt, + std::nullopt, + std::nullopt, + alpha, + epsilon); + + auto h_sg_pageranks = cugraph::test::to_host(handle, sg_pageranks); + auto h_sg_renumber_map = cugraph::test::to_host(handle, sg_renumber_map); + auto compare_functor = cugraph::test::nearly_equal{ + weight_t{1e-3}, + weight_t{(weight_t{1} / static_cast(h_sg_pageranks.size())) * weight_t{1e-3}}}; + + std::for_each( + computed_pageranks_v.begin(), + computed_pageranks_v.end(), + [h_sg_pageranks, compare_functor, h_sg_renumber_map](auto t1) { + std::for_each( + thrust::make_zip_iterator(std::get<0>(t1).begin(), std::get<1>(t1).begin()), + thrust::make_zip_iterator(std::get<0>(t1).end(), std::get<1>(t1).end()), + [h_sg_pageranks, compare_functor, h_sg_renumber_map](auto t2) { + vertex_t v = thrust::get<0>(t2); + weight_t pr = thrust::get<1>(t2); + + auto pos = std::find(h_sg_renumber_map->begin(), h_sg_renumber_map->end(), v); + auto offset = std::distance(h_sg_renumber_map->begin(), pos); + + ASSERT_TRUE(compare_functor(pr, h_sg_pageranks[offset])) + << "vertex " << v << ", SG result = " << h_sg_pageranks[offset] + << ", mtmg result = " << pr << ", renumber map = " << (*h_sg_renumber_map)[offset]; + }); + }); + } + } +}; + +using Tests_Multithreaded_File = Tests_Multithreaded; +using Tests_Multithreaded_Rmat = Tests_Multithreaded; + +// FIXME: add tests for type combinations +TEST_P(Tests_Multithreaded_File, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam()), std::vector{{0, 1}}); +} + +TEST_P(Tests_Multithreaded_Rmat, CheckInt32Int32FloatFloat) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam()), std::vector{{0, 1}}); +} + +INSTANTIATE_TEST_SUITE_P(file_test, + Tests_Multithreaded_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values(Multithreaded_Usecase{false, true}, + Multithreaded_Usecase{true, true}), + ::testing::Values(cugraph::test::File_Usecase("karate.csv"), + cugraph::test::File_Usecase("dolphins.csv")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Multithreaded_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values(Multithreaded_Usecase{false, true}, Multithreaded_Usecase{true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with + --gtest_filter to select only the file_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one File_Usecase that differ only in filename + (to avoid running same benchmarks more than once) */ + Tests_Multithreaded_File, + ::testing::Combine( + // disable correctness checks + ::testing::Values(Multithreaded_Usecase{false, false}, Multithreaded_Usecase{true, false}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Multithreaded_Rmat, + ::testing::Combine( + // disable correctness checks for large graphs + ::testing::Values(Multithreaded_Usecase{false, false}, Multithreaded_Usecase{true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_extract_transform_e.cu b/cpp/tests/prims/mg_extract_transform_e.cu index b71fe5ddb5e..bca6471a5bb 100644 --- a/cpp/tests/prims/mg_extract_transform_e.cu +++ b/cpp/tests/prims/mg_extract_transform_e.cu @@ -157,8 +157,8 @@ class Tests_MGExtractTransformE // 1. create MG graph constexpr bool is_multi_gpu = true; - constexpr bool renumber = true; // needs to be true for multi gpu case - constexpr bool store_transposed = false; // needs to be false for using extract_transform_e + constexpr bool renumber = true; // needs to be true for multi gpu case + constexpr bool store_transposed = false; // needs to be false for using extract_transform_e if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement handle_->get_comms().barrier(); diff --git a/cpp/tests/sampling/renumber_sampled_edgelist_test.cu b/cpp/tests/sampling/renumber_sampled_edgelist_test.cu deleted file mode 100644 index 96c8d6173e7..00000000000 --- a/cpp/tests/sampling/renumber_sampled_edgelist_test.cu +++ /dev/null @@ -1,512 +0,0 @@ -/* - * Copyright (c) 2022-2023, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#include - -#include -#include -#include -#include - -#include -#include - -#include - -#include -#include -#include -#include -#include -#include -#include -#include - -struct RenumberSampledEdgelist_Usecase { - size_t num_vertices{}; - size_t num_sampled_edges{}; - size_t num_hops{1}; // enabled if larger than 1 - size_t num_labels{1}; // enabled if larger than 1 - bool check_correctness{true}; -}; - -class Tests_RenumberSampledEdgelist - : public ::testing::TestWithParam { - public: - Tests_RenumberSampledEdgelist() {} - - static void SetUpTestCase() {} - static void TearDownTestCase() {} - - virtual void SetUp() {} - virtual void TearDown() {} - - template - void run_current_test(RenumberSampledEdgelist_Usecase const& usecase) - { - using label_t = int32_t; - - raft::handle_t handle{}; - HighResTimer hr_timer{}; - - raft::random::RngState rng_state(0); - - rmm::device_uvector org_edgelist_srcs(usecase.num_sampled_edges, handle.get_stream()); - rmm::device_uvector org_edgelist_dsts(usecase.num_sampled_edges, handle.get_stream()); - cugraph::detail::uniform_random_fill(handle.get_stream(), - org_edgelist_srcs.data(), - org_edgelist_srcs.size(), - vertex_t{0}, - static_cast(usecase.num_vertices), - rng_state); - cugraph::detail::uniform_random_fill(handle.get_stream(), - org_edgelist_dsts.data(), - org_edgelist_dsts.size(), - vertex_t{0}, - static_cast(usecase.num_vertices), - rng_state); - - std::optional> edgelist_hops{std::nullopt}; - if (usecase.num_hops > 1) { - edgelist_hops = rmm::device_uvector(usecase.num_sampled_edges, handle.get_stream()); - cugraph::detail::uniform_random_fill(handle.get_stream(), - (*edgelist_hops).data(), - (*edgelist_hops).size(), - int32_t{0}, - static_cast(usecase.num_hops), - rng_state); - } - - std::optional, rmm::device_uvector>> - label_offsets{std::nullopt}; - if (usecase.num_labels > 1) { - rmm::device_uvector labels(usecase.num_labels, handle.get_stream()); - thrust::sequence(handle.get_thrust_policy(), labels.begin(), labels.end(), label_t{0}); - - rmm::device_uvector edgelist_labels(usecase.num_sampled_edges, handle.get_stream()); - cugraph::detail::uniform_random_fill(handle.get_stream(), - edgelist_labels.data(), - edgelist_labels.size(), - label_t{0}, - static_cast(usecase.num_labels), - rng_state); - - rmm::device_uvector offsets(usecase.num_labels + 1, handle.get_stream()); - thrust::fill(handle.get_thrust_policy(), offsets.begin(), offsets.end(), size_t{0}); - - thrust::for_each( - handle.get_thrust_policy(), - edgelist_labels.begin(), - edgelist_labels.end(), - [offsets = - raft::device_span(offsets.data(), offsets.size())] __device__(label_t label) { - cuda::atomic_ref atomic_counter(offsets[label]); - atomic_counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed); - }); - - thrust::exclusive_scan( - handle.get_thrust_policy(), offsets.begin(), offsets.end(), offsets.begin()); - - label_offsets = std::make_tuple(std::move(labels), std::move(offsets)); - } - - rmm::device_uvector renumbered_edgelist_srcs(org_edgelist_srcs.size(), - handle.get_stream()); - rmm::device_uvector renumbered_edgelist_dsts(org_edgelist_dsts.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - org_edgelist_srcs.begin(), - org_edgelist_srcs.end(), - renumbered_edgelist_srcs.begin()); - thrust::copy(handle.get_thrust_policy(), - org_edgelist_dsts.begin(), - org_edgelist_dsts.end(), - renumbered_edgelist_dsts.begin()); - - rmm::device_uvector renumber_map(0, handle.get_stream()); - std::optional> renumber_map_label_offsets{std::nullopt}; - - if (cugraph::test::g_perf) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - hr_timer.start("Renumber sampled edgelist"); - } - - std::tie(renumbered_edgelist_srcs, - renumbered_edgelist_dsts, - renumber_map, - renumber_map_label_offsets) = - cugraph::renumber_sampled_edgelist( - handle, - std::move(renumbered_edgelist_srcs), - std::move(renumbered_edgelist_dsts), - edgelist_hops ? std::make_optional>( - (*edgelist_hops).data(), (*edgelist_hops).size()) - : std::nullopt, - label_offsets - ? std::make_optional< - std::tuple, raft::device_span>>( - std::make_tuple(raft::device_span(std::get<0>(*label_offsets).data(), - std::get<0>(*label_offsets).size()), - raft::device_span(std::get<1>(*label_offsets).data(), - std::get<1>(*label_offsets).size()))) - : std::nullopt); - - if (cugraph::test::g_perf) { - RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement - hr_timer.stop(); - hr_timer.display_and_clear(std::cout); - } - - if (usecase.check_correctness) { - for (size_t i = 0; i < usecase.num_labels; ++i) { - size_t edgelist_start_offset = - label_offsets ? std::get<1>(*label_offsets).element(i, handle.get_stream()) : size_t{0}; - size_t edgelist_end_offset = - label_offsets ? std::get<1>(*label_offsets).element(i + 1, handle.get_stream()) - : usecase.num_sampled_edges; - if (edgelist_start_offset == edgelist_end_offset) continue; - - auto this_label_org_edgelist_srcs = - raft::device_span(org_edgelist_srcs.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_org_edgelist_dsts = - raft::device_span(org_edgelist_dsts.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_edgelist_hops = edgelist_hops - ? std::make_optional>( - (*edgelist_hops).data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset) - : std::nullopt; - auto this_label_renumbered_edgelist_srcs = - raft::device_span(renumbered_edgelist_srcs.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - auto this_label_renumbered_edgelist_dsts = - raft::device_span(renumbered_edgelist_dsts.data() + edgelist_start_offset, - edgelist_end_offset - edgelist_start_offset); - - size_t renumber_map_start_offset = - renumber_map_label_offsets ? (*renumber_map_label_offsets).element(i, handle.get_stream()) - : size_t{0}; - size_t renumber_map_end_offset = - renumber_map_label_offsets - ? (*renumber_map_label_offsets).element(i + 1, handle.get_stream()) - : renumber_map.size(); - auto this_label_renumber_map = - raft::device_span(renumber_map.data() + renumber_map_start_offset, - renumber_map_end_offset - renumber_map_start_offset); - - // check un-renumbering recovers the original edge list - - auto pair_first = thrust::make_zip_iterator(this_label_org_edgelist_srcs.begin(), - this_label_renumbered_edgelist_srcs.begin()); - auto num_renumber_errors = - thrust::count_if(handle.get_thrust_policy(), - pair_first, - pair_first + this_label_org_edgelist_srcs.size(), - [this_label_renumber_map] __device__(auto pair) { - auto org = thrust::get<0>(pair); - auto renumbered = thrust::get<1>(pair); - return this_label_renumber_map[renumbered] != org; - }); - ASSERT_TRUE(num_renumber_errors == 0) << "Renumber error in edge list sources."; - - pair_first = thrust::make_zip_iterator(this_label_org_edgelist_dsts.begin(), - this_label_renumbered_edgelist_dsts.begin()); - num_renumber_errors = thrust::count_if(handle.get_thrust_policy(), - pair_first, - pair_first + this_label_org_edgelist_dsts.size(), - [this_label_renumber_map] __device__(auto pair) { - auto org = thrust::get<0>(pair); - auto renumbered = thrust::get<1>(pair); - return this_label_renumber_map[renumbered] != org; - }); - ASSERT_TRUE(num_renumber_errors == 0) << "Renumber error in edge list destinations."; - - // Check the invariants in renumber_map - // Say we found the minimum (primary key:hop, secondary key:flag) pairs for every unique - // vertices, where flag is 0 for sources and 1 for destinations. Then, vertices with smaller - // (hop, flag) pairs should be renumbered to smaller numbers than vertices with larger (hop, - // flag) pairs. - - rmm::device_uvector unique_srcs(this_label_org_edgelist_srcs.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - this_label_org_edgelist_srcs.begin(), - this_label_org_edgelist_srcs.end(), - unique_srcs.begin()); - std::optional> unique_src_hops = - this_label_edgelist_hops ? std::make_optional>( - (*this_label_edgelist_hops).size(), handle.get_stream()) - : std::nullopt; - if (this_label_edgelist_hops) { - thrust::copy(handle.get_thrust_policy(), - (*this_label_edgelist_hops).begin(), - (*this_label_edgelist_hops).end(), - (*unique_src_hops).begin()); - - auto pair_first = - thrust::make_zip_iterator(unique_srcs.begin(), (*unique_src_hops).begin()); - thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + unique_srcs.size()); - unique_srcs.resize( - thrust::distance(unique_srcs.begin(), - thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), - unique_srcs.begin(), - unique_srcs.end(), - (*unique_src_hops).begin()))), - handle.get_stream()); - (*unique_src_hops).resize(unique_srcs.size(), handle.get_stream()); - } else { - thrust::sort(handle.get_thrust_policy(), unique_srcs.begin(), unique_srcs.end()); - unique_srcs.resize( - thrust::distance( - unique_srcs.begin(), - thrust::unique(handle.get_thrust_policy(), unique_srcs.begin(), unique_srcs.end())), - handle.get_stream()); - } - - rmm::device_uvector unique_dsts(this_label_org_edgelist_dsts.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - this_label_org_edgelist_dsts.begin(), - this_label_org_edgelist_dsts.end(), - unique_dsts.begin()); - std::optional> unique_dst_hops = - this_label_edgelist_hops ? std::make_optional>( - (*this_label_edgelist_hops).size(), handle.get_stream()) - : std::nullopt; - if (this_label_edgelist_hops) { - thrust::copy(handle.get_thrust_policy(), - (*this_label_edgelist_hops).begin(), - (*this_label_edgelist_hops).end(), - (*unique_dst_hops).begin()); - - auto pair_first = - thrust::make_zip_iterator(unique_dsts.begin(), (*unique_dst_hops).begin()); - thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + unique_dsts.size()); - unique_dsts.resize( - thrust::distance(unique_dsts.begin(), - thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), - unique_dsts.begin(), - unique_dsts.end(), - (*unique_dst_hops).begin()))), - handle.get_stream()); - (*unique_dst_hops).resize(unique_dsts.size(), handle.get_stream()); - } else { - thrust::sort(handle.get_thrust_policy(), unique_dsts.begin(), unique_dsts.end()); - unique_dsts.resize( - thrust::distance( - unique_dsts.begin(), - thrust::unique(handle.get_thrust_policy(), unique_dsts.begin(), unique_dsts.end())), - handle.get_stream()); - } - - rmm::device_uvector sorted_org_vertices(this_label_renumber_map.size(), - handle.get_stream()); - rmm::device_uvector matching_renumbered_vertices(sorted_org_vertices.size(), - handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - this_label_renumber_map.begin(), - this_label_renumber_map.end(), - sorted_org_vertices.begin()); - thrust::sequence(handle.get_thrust_policy(), - matching_renumbered_vertices.begin(), - matching_renumbered_vertices.end(), - vertex_t{0}); - thrust::sort_by_key(handle.get_thrust_policy(), - sorted_org_vertices.begin(), - sorted_org_vertices.end(), - matching_renumbered_vertices.begin()); - - if (this_label_edgelist_hops) { - rmm::device_uvector merged_vertices(unique_srcs.size() + unique_dsts.size(), - handle.get_stream()); - rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); - rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); - - auto src_triplet_first = - thrust::make_zip_iterator(unique_srcs.begin(), - (*unique_src_hops).begin(), - thrust::make_constant_iterator(int8_t{0})); - auto dst_triplet_first = - thrust::make_zip_iterator(unique_dsts.begin(), - (*unique_dst_hops).begin(), - thrust::make_constant_iterator(int8_t{1})); - thrust::merge(handle.get_thrust_policy(), - src_triplet_first, - src_triplet_first + unique_srcs.size(), - dst_triplet_first, - dst_triplet_first + unique_dsts.size(), - thrust::make_zip_iterator( - merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); - merged_vertices.resize( - thrust::distance( - merged_vertices.begin(), - thrust::get<0>(thrust::unique_by_key( - handle.get_thrust_policy(), - merged_vertices.begin(), - merged_vertices.end(), - thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), - handle.get_stream()); - merged_hops.resize(merged_vertices.size(), handle.get_stream()); - merged_flags.resize(merged_vertices.size(), handle.get_stream()); - - auto sort_key_first = - thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); - thrust::sort_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - merged_vertices.begin()); - - auto num_unique_keys = thrust::count_if( - handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{0}), - thrust::make_counting_iterator(merged_hops.size()), - cugraph::detail::is_first_in_run_t{sort_key_first}); - rmm::device_uvector min_vertices(num_unique_keys, handle.get_stream()); - rmm::device_uvector max_vertices(num_unique_keys, handle.get_stream()); - - auto renumbered_merged_vertex_first = thrust::make_transform_iterator( - merged_vertices.begin(), - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t src) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), src); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), - it)]; - }); - - thrust::reduce_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - renumbered_merged_vertex_first, - thrust::make_discard_iterator(), - min_vertices.begin(), - thrust::equal_to>{}, - thrust::minimum{}); - thrust::reduce_by_key(handle.get_thrust_policy(), - sort_key_first, - sort_key_first + merged_hops.size(), - renumbered_merged_vertex_first, - thrust::make_discard_iterator(), - max_vertices.begin(), - thrust::equal_to>{}, - thrust::maximum{}); - - auto num_violations = - thrust::count_if(handle.get_thrust_policy(), - thrust::make_counting_iterator(size_t{1}), - thrust::make_counting_iterator(min_vertices.size()), - [min_vertices = raft::device_span(min_vertices.data(), - min_vertices.size()), - max_vertices = raft::device_span( - max_vertices.data(), max_vertices.size())] __device__(size_t i) { - return min_vertices[i] <= max_vertices[i - 1]; - }); - - ASSERT_TRUE(num_violations == 0) - << "Invariant violated, a vertex with a smaller (hop,flag) pair is renumbered to a " - "larger value than a vertex with a larger (hop, flag) pair."; - } else { - unique_dsts.resize( - thrust::distance( - unique_dsts.begin(), - thrust::remove_if(handle.get_thrust_policy(), - unique_dsts.begin(), - unique_dsts.end(), - [sorted_unique_srcs = raft::device_span( - unique_srcs.data(), unique_srcs.size())] __device__(auto dst) { - return thrust::binary_search(thrust::seq, - sorted_unique_srcs.begin(), - sorted_unique_srcs.end(), - dst); - })), - handle.get_stream()); - - auto max_src_renumbered_vertex = thrust::transform_reduce( - handle.get_thrust_policy(), - unique_srcs.begin(), - unique_srcs.end(), - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t src) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), src); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), - it)]; - }, - std::numeric_limits::lowest(), - thrust::maximum{}); - - auto min_dst_renumbered_vertex = thrust::transform_reduce( - handle.get_thrust_policy(), - unique_dsts.begin(), - unique_dsts.end(), - [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), - sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t dst) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), dst); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), - it)]; - }, - std::numeric_limits::max(), - thrust::minimum{}); - - ASSERT_TRUE(max_src_renumbered_vertex < min_dst_renumbered_vertex) - << "Invariants violated, a source vertex is renumbered to a non-smaller value than a " - "vertex that appear only in the edge list destinations."; - } - } - } - } -}; - -TEST_P(Tests_RenumberSampledEdgelist, CheckInt32) -{ - auto param = GetParam(); - run_current_test(param); -} - -TEST_P(Tests_RenumberSampledEdgelist, CheckInt64) -{ - auto param = GetParam(); - run_current_test(param); -} - -INSTANTIATE_TEST_SUITE_P( - small_test, - Tests_RenumberSampledEdgelist, - ::testing::Values(RenumberSampledEdgelist_Usecase{1024, 4096, 1, 1, true}, - RenumberSampledEdgelist_Usecase{1024, 4096, 3, 1, true}, - RenumberSampledEdgelist_Usecase{1024, 32768, 1, 256, true}, - RenumberSampledEdgelist_Usecase{1024, 32768, 3, 256, true})); - -INSTANTIATE_TEST_SUITE_P( - benchmark_test, - Tests_RenumberSampledEdgelist, - ::testing::Values(RenumberSampledEdgelist_Usecase{1 << 20, 1 << 20, 1, 1, false}, - RenumberSampledEdgelist_Usecase{1 << 20, 1 << 20, 5, 1, false}, - RenumberSampledEdgelist_Usecase{1 << 20, 1 << 24, 1, 1 << 20, false}, - RenumberSampledEdgelist_Usecase{1 << 20, 1 << 24, 5, 1 << 20, false})); - -CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu new file mode 100644 index 00000000000..422fe953b20 --- /dev/null +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -0,0 +1,1457 @@ +/* + * Copyright (c) 2022-2023, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct SamplingPostProcessing_Usecase { + size_t num_labels{}; + size_t num_seeds_per_label{}; + std::vector fanouts{{-1}}; + bool sample_with_replacement{false}; + + bool src_is_major{true}; + bool compress_per_hop{false}; + bool doubly_compress{false}; + bool check_correctness{true}; +}; + +template +bool compare_edgelist(raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_weights, + raft::device_span renumbered_edgelist_srcs, + raft::device_span renumbered_edgelist_dsts, + std::optional> renumbered_edgelist_weights, + std::optional> renumber_map) +{ + if (org_edgelist_srcs.size() != renumbered_edgelist_srcs.size()) { return false; } + + rmm::device_uvector sorted_org_edgelist_srcs(org_edgelist_srcs.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_srcs.begin(), + org_edgelist_srcs.end(), + sorted_org_edgelist_srcs.begin()); + rmm::device_uvector sorted_org_edgelist_dsts(org_edgelist_dsts.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_dsts.begin(), + org_edgelist_dsts.end(), + sorted_org_edgelist_dsts.begin()); + auto sorted_org_edgelist_weights = org_edgelist_weights + ? std::make_optional>( + (*org_edgelist_weights).size(), handle.get_stream()) + : std::nullopt; + if (sorted_org_edgelist_weights) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_weights).begin(), + (*org_edgelist_weights).end(), + (*sorted_org_edgelist_weights).begin()); + } + + if (sorted_org_edgelist_weights) { + auto sorted_org_edge_first = thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), + sorted_org_edgelist_dsts.begin(), + (*sorted_org_edgelist_weights).begin()); + thrust::sort(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + sorted_org_edgelist_srcs.size()); + } else { + auto sorted_org_edge_first = + thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), sorted_org_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + sorted_org_edgelist_srcs.size()); + } + + rmm::device_uvector sorted_unrenumbered_edgelist_srcs(renumbered_edgelist_srcs.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumbered_edgelist_srcs.begin(), + renumbered_edgelist_srcs.end(), + sorted_unrenumbered_edgelist_srcs.begin()); + rmm::device_uvector sorted_unrenumbered_edgelist_dsts(renumbered_edgelist_dsts.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumbered_edgelist_dsts.begin(), + renumbered_edgelist_dsts.end(), + sorted_unrenumbered_edgelist_dsts.begin()); + auto sorted_unrenumbered_edgelist_weights = + renumbered_edgelist_weights ? std::make_optional>( + (*renumbered_edgelist_weights).size(), handle.get_stream()) + : std::nullopt; + if (sorted_unrenumbered_edgelist_weights) { + thrust::copy(handle.get_thrust_policy(), + (*renumbered_edgelist_weights).begin(), + (*renumbered_edgelist_weights).end(), + (*sorted_unrenumbered_edgelist_weights).begin()); + } + + if (renumber_map) { + cugraph::unrenumber_int_vertices( + handle, + sorted_unrenumbered_edgelist_srcs.data(), + sorted_unrenumbered_edgelist_srcs.size(), + (*renumber_map).data(), + std::vector{static_cast((*renumber_map).size())}); + cugraph::unrenumber_int_vertices( + handle, + sorted_unrenumbered_edgelist_dsts.data(), + sorted_unrenumbered_edgelist_dsts.size(), + (*renumber_map).data(), + std::vector{static_cast((*renumber_map).size())}); + } + + if (sorted_unrenumbered_edgelist_weights) { + auto sorted_unrenumbered_edge_first = + thrust::make_zip_iterator(sorted_unrenumbered_edgelist_srcs.begin(), + sorted_unrenumbered_edgelist_dsts.begin(), + (*sorted_unrenumbered_edgelist_weights).begin()); + thrust::sort(handle.get_thrust_policy(), + sorted_unrenumbered_edge_first, + sorted_unrenumbered_edge_first + sorted_unrenumbered_edgelist_srcs.size()); + + auto sorted_org_edge_first = thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), + sorted_org_edgelist_dsts.begin(), + (*sorted_org_edgelist_weights).begin()); + return thrust::equal(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + sorted_org_edgelist_srcs.size(), + sorted_unrenumbered_edge_first); + } else { + auto sorted_unrenumbered_edge_first = thrust::make_zip_iterator( + sorted_unrenumbered_edgelist_srcs.begin(), sorted_unrenumbered_edgelist_dsts.begin()); + thrust::sort(handle.get_thrust_policy(), + sorted_unrenumbered_edge_first, + sorted_unrenumbered_edge_first + sorted_unrenumbered_edgelist_srcs.size()); + + auto sorted_org_edge_first = + thrust::make_zip_iterator(sorted_org_edgelist_srcs.begin(), sorted_org_edgelist_dsts.begin()); + return thrust::equal(handle.get_thrust_policy(), + sorted_org_edge_first, + sorted_org_edge_first + sorted_org_edgelist_srcs.size(), + sorted_unrenumbered_edge_first); + } +} + +template +bool check_renumber_map_invariants( + raft::handle_t const& handle, + raft::device_span org_edgelist_srcs, + raft::device_span org_edgelist_dsts, + std::optional> org_edgelist_hops, + raft::device_span renumber_map, + bool src_is_major) +{ + // Check the invariants in renumber_map + // Say we found the minimum (primary key:hop, secondary key:flag) pairs for every unique vertices, + // where flag is 0 for sources and 1 for destinations. Then, vertices with smaller (hop, flag) + // pairs should be renumbered to smaller numbers than vertices with larger (hop, flag) pairs. + auto org_edgelist_majors = src_is_major ? org_edgelist_srcs : org_edgelist_dsts; + auto org_edgelist_minors = src_is_major ? org_edgelist_dsts : org_edgelist_srcs; + + rmm::device_uvector unique_majors(org_edgelist_majors.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_majors.begin(), + org_edgelist_majors.end(), + unique_majors.begin()); + std::optional> unique_major_hops = + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).size(), handle.get_stream()) + : std::nullopt; + if (org_edgelist_hops) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_hops).begin(), + (*org_edgelist_hops).end(), + (*unique_major_hops).begin()); + + auto pair_first = + thrust::make_zip_iterator(unique_majors.begin(), (*unique_major_hops).begin()); + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + unique_majors.size()); + unique_majors.resize( + thrust::distance(unique_majors.begin(), + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + unique_majors.begin(), + unique_majors.end(), + (*unique_major_hops).begin()))), + handle.get_stream()); + (*unique_major_hops).resize(unique_majors.size(), handle.get_stream()); + } else { + thrust::sort(handle.get_thrust_policy(), unique_majors.begin(), unique_majors.end()); + unique_majors.resize( + thrust::distance( + unique_majors.begin(), + thrust::unique(handle.get_thrust_policy(), unique_majors.begin(), unique_majors.end())), + handle.get_stream()); + } + + rmm::device_uvector unique_minors(org_edgelist_minors.size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + org_edgelist_minors.begin(), + org_edgelist_minors.end(), + unique_minors.begin()); + std::optional> unique_minor_hops = + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).size(), handle.get_stream()) + : std::nullopt; + if (org_edgelist_hops) { + thrust::copy(handle.get_thrust_policy(), + (*org_edgelist_hops).begin(), + (*org_edgelist_hops).end(), + (*unique_minor_hops).begin()); + + auto pair_first = + thrust::make_zip_iterator(unique_minors.begin(), (*unique_minor_hops).begin()); + thrust::sort(handle.get_thrust_policy(), pair_first, pair_first + unique_minors.size()); + unique_minors.resize( + thrust::distance(unique_minors.begin(), + thrust::get<0>(thrust::unique_by_key(handle.get_thrust_policy(), + unique_minors.begin(), + unique_minors.end(), + (*unique_minor_hops).begin()))), + handle.get_stream()); + (*unique_minor_hops).resize(unique_minors.size(), handle.get_stream()); + } else { + thrust::sort(handle.get_thrust_policy(), unique_minors.begin(), unique_minors.end()); + unique_minors.resize( + thrust::distance( + unique_minors.begin(), + thrust::unique(handle.get_thrust_policy(), unique_minors.begin(), unique_minors.end())), + handle.get_stream()); + } + + rmm::device_uvector sorted_org_vertices(renumber_map.size(), handle.get_stream()); + rmm::device_uvector matching_renumbered_vertices(sorted_org_vertices.size(), + handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + renumber_map.begin(), + renumber_map.end(), + sorted_org_vertices.begin()); + thrust::sequence(handle.get_thrust_policy(), + matching_renumbered_vertices.begin(), + matching_renumbered_vertices.end(), + vertex_t{0}); + thrust::sort_by_key(handle.get_thrust_policy(), + sorted_org_vertices.begin(), + sorted_org_vertices.end(), + matching_renumbered_vertices.begin()); + + if (org_edgelist_hops) { + rmm::device_uvector merged_vertices(unique_majors.size() + unique_minors.size(), + handle.get_stream()); + rmm::device_uvector merged_hops(merged_vertices.size(), handle.get_stream()); + rmm::device_uvector merged_flags(merged_vertices.size(), handle.get_stream()); + + auto major_triplet_first = thrust::make_zip_iterator(unique_majors.begin(), + (*unique_major_hops).begin(), + thrust::make_constant_iterator(int8_t{0})); + auto minor_triplet_first = thrust::make_zip_iterator(unique_minors.begin(), + (*unique_minor_hops).begin(), + thrust::make_constant_iterator(int8_t{1})); + thrust::merge(handle.get_thrust_policy(), + major_triplet_first, + major_triplet_first + unique_majors.size(), + minor_triplet_first, + minor_triplet_first + unique_minors.size(), + thrust::make_zip_iterator( + merged_vertices.begin(), merged_hops.begin(), merged_flags.begin())); + merged_vertices.resize( + thrust::distance(merged_vertices.begin(), + thrust::get<0>(thrust::unique_by_key( + handle.get_thrust_policy(), + merged_vertices.begin(), + merged_vertices.end(), + thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin())))), + handle.get_stream()); + merged_hops.resize(merged_vertices.size(), handle.get_stream()); + merged_flags.resize(merged_vertices.size(), handle.get_stream()); + + auto sort_key_first = thrust::make_zip_iterator(merged_hops.begin(), merged_flags.begin()); + thrust::sort_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + merged_vertices.begin()); + + auto num_unique_keys = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{0}), + thrust::make_counting_iterator(merged_hops.size()), + cugraph::detail::is_first_in_run_t{sort_key_first}); + rmm::device_uvector min_vertices(num_unique_keys, handle.get_stream()); + rmm::device_uvector max_vertices(num_unique_keys, handle.get_stream()); + + auto renumbered_merged_vertex_first = thrust::make_transform_iterator( + merged_vertices.begin(), + [sorted_org_vertices = + raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t major) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + }); + + thrust::reduce_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + renumbered_merged_vertex_first, + thrust::make_discard_iterator(), + min_vertices.begin(), + thrust::equal_to>{}, + thrust::minimum{}); + thrust::reduce_by_key(handle.get_thrust_policy(), + sort_key_first, + sort_key_first + merged_hops.size(), + renumbered_merged_vertex_first, + thrust::make_discard_iterator(), + max_vertices.begin(), + thrust::equal_to>{}, + thrust::maximum{}); + + auto num_violations = thrust::count_if( + handle.get_thrust_policy(), + thrust::make_counting_iterator(size_t{1}), + thrust::make_counting_iterator(min_vertices.size()), + [min_vertices = raft::device_span(min_vertices.data(), min_vertices.size()), + max_vertices = raft::device_span(max_vertices.data(), + max_vertices.size())] __device__(size_t i) { + return min_vertices[i] <= max_vertices[i - 1]; + }); + + return (num_violations == 0); + } else { + unique_minors.resize( + thrust::distance( + unique_minors.begin(), + thrust::remove_if(handle.get_thrust_policy(), + unique_minors.begin(), + unique_minors.end(), + [sorted_unique_majors = raft::device_span( + unique_majors.data(), unique_majors.size())] __device__(auto minor) { + return thrust::binary_search(thrust::seq, + sorted_unique_majors.begin(), + sorted_unique_majors.end(), + minor); + })), + handle.get_stream()); + + auto max_major_renumbered_vertex = thrust::transform_reduce( + handle.get_thrust_policy(), + unique_majors.begin(), + unique_majors.end(), + [sorted_org_vertices = + raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t major) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + }, + std::numeric_limits::lowest(), + thrust::maximum{}); + + auto min_minor_renumbered_vertex = thrust::transform_reduce( + handle.get_thrust_policy(), + unique_minors.begin(), + unique_minors.end(), + [sorted_org_vertices = + raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t minor) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), minor); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + }, + std::numeric_limits::max(), + thrust::minimum{}); + + return (max_major_renumbered_vertex < min_minor_renumbered_vertex); + } +} + +template +class Tests_SamplingPostProcessing + : public ::testing::TestWithParam> { + public: + Tests_SamplingPostProcessing() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test( + std::tuple const& param) + { + using label_t = int32_t; + using weight_t = float; + using edge_id_t = vertex_t; + using edge_type_t = int32_t; + + bool constexpr store_transposed = false; + bool constexpr renumber = true; + bool constexpr test_weighted = true; + + auto [sampling_post_processing_usecase, input_usecase] = param; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, d_renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, test_weighted, renumber); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + raft::random::RngState rng_state(0); + + rmm::device_uvector starting_vertices( + sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.num_seeds_per_label, + handle.get_stream()); + cugraph::detail::uniform_random_fill(handle.get_stream(), + starting_vertices.data(), + starting_vertices.size(), + vertex_t{0}, + graph_view.number_of_vertices(), + rng_state); + auto starting_vertex_labels = (sampling_post_processing_usecase.num_labels > 1) + ? std::make_optional>( + starting_vertices.size(), handle.get_stream()) + : std::nullopt; + if (starting_vertex_labels) { + thrust::tabulate( + handle.get_thrust_policy(), + (*starting_vertex_labels).begin(), + (*starting_vertex_labels).end(), + [num_seeds_per_label = sampling_post_processing_usecase.num_seeds_per_label] __device__( + size_t i) { return static_cast(i / num_seeds_per_label); }); + } + + rmm::device_uvector org_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector org_edgelist_dsts(0, handle.get_stream()); + std::optional> org_edgelist_weights{std::nullopt}; + std::optional> org_edgelist_hops{std::nullopt}; + std::optional> org_labels{std::nullopt}; + std::optional> org_edgelist_label_offsets{std::nullopt}; + std::tie(org_edgelist_srcs, + org_edgelist_dsts, + org_edgelist_weights, + std::ignore, + std::ignore, + org_edgelist_hops, + org_labels, + org_edgelist_label_offsets) = cugraph::uniform_neighbor_sample( + handle, + graph_view, + edge_weight_view, + std::nullopt, + std::nullopt, + raft::device_span(starting_vertices.data(), starting_vertices.size()), + starting_vertex_labels ? std::make_optional>( + (*starting_vertex_labels).data(), (*starting_vertex_labels).size()) + : std::nullopt, + std::nullopt, + raft::host_span(sampling_post_processing_usecase.fanouts.data(), + sampling_post_processing_usecase.fanouts.size()), + rng_state, + sampling_post_processing_usecase.fanouts.size() > 1, + sampling_post_processing_usecase.sample_with_replacement, + (!sampling_post_processing_usecase.compress_per_hop && + (sampling_post_processing_usecase.fanouts.size() > 1)) + ? cugraph::prior_sources_behavior_t::EXCLUDE + : cugraph::prior_sources_behavior_t::DEFAULT, + false); + + if (!sampling_post_processing_usecase.src_is_major) { + std::swap(org_edgelist_srcs, org_edgelist_dsts); + } + + starting_vertices.resize(0, handle.get_stream()); + starting_vertices.shrink_to_fit(handle.get_stream()); + starting_vertex_labels = std::nullopt; + + { + rmm::device_uvector renumbered_and_sorted_edgelist_srcs(org_edgelist_srcs.size(), + handle.get_stream()); + rmm::device_uvector renumbered_and_sorted_edgelist_dsts(org_edgelist_dsts.size(), + handle.get_stream()); + auto renumbered_and_sorted_edgelist_weights = + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).size(), handle.get_stream()) + : std::nullopt; + std::optional> renumbered_and_sorted_edgelist_edge_ids{ + std::nullopt}; + std::optional> renumbered_and_sorted_edgelist_edge_types{ + std::nullopt}; + auto renumbered_and_sorted_edgelist_hops = + org_edgelist_hops + ? std::make_optional(std::make_tuple( + rmm::device_uvector((*org_edgelist_hops).size(), handle.get_stream()), + sampling_post_processing_usecase.fanouts.size())) + : std::nullopt; + + raft::copy(renumbered_and_sorted_edgelist_srcs.data(), + org_edgelist_srcs.data(), + org_edgelist_srcs.size(), + handle.get_stream()); + raft::copy(renumbered_and_sorted_edgelist_dsts.data(), + org_edgelist_dsts.data(), + org_edgelist_dsts.size(), + handle.get_stream()); + if (renumbered_and_sorted_edgelist_weights) { + raft::copy((*renumbered_and_sorted_edgelist_weights).data(), + (*org_edgelist_weights).data(), + (*org_edgelist_weights).size(), + handle.get_stream()); + } + if (renumbered_and_sorted_edgelist_hops) { + raft::copy(std::get<0>(*renumbered_and_sorted_edgelist_hops).data(), + (*org_edgelist_hops).data(), + (*org_edgelist_hops).size(), + handle.get_stream()); + } + + std::optional> renumbered_and_sorted_edgelist_label_hop_offsets{ + std::nullopt}; + rmm::device_uvector renumbered_and_sorted_renumber_map(0, handle.get_stream()); + std::optional> renumbered_and_sorted_renumber_map_label_offsets{ + std::nullopt}; + + { + size_t free_size{}; + size_t total_size{}; + RAFT_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); + std::cout << "free_size=" << free_size / (1024.0 * 1024.0 * 1024.0) + << "GB total_size=" << total_size / (1024.0 * 1024.0 * 1024.0) << "GB." + << std::endl; + } + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Renumber and sort sampled edgelist"); + } + + std::tie(renumbered_and_sorted_edgelist_srcs, + renumbered_and_sorted_edgelist_dsts, + renumbered_and_sorted_edgelist_weights, + renumbered_and_sorted_edgelist_edge_ids, + renumbered_and_sorted_edgelist_edge_types, + renumbered_and_sorted_edgelist_label_hop_offsets, + renumbered_and_sorted_renumber_map, + renumbered_and_sorted_renumber_map_label_offsets) = + cugraph::renumber_and_sort_sampled_edgelist( + handle, + std::move(renumbered_and_sorted_edgelist_srcs), + std::move(renumbered_and_sorted_edgelist_dsts), + std::move(renumbered_and_sorted_edgelist_weights), + std::move(renumbered_and_sorted_edgelist_edge_ids), + std::move(renumbered_and_sorted_edgelist_edge_types), + std::move(renumbered_and_sorted_edgelist_hops), + org_edgelist_label_offsets + ? std::make_optional(std::make_tuple( + raft::device_span((*org_edgelist_label_offsets).data(), + (*org_edgelist_label_offsets).size()), + sampling_post_processing_usecase.num_labels)) + : std::nullopt, + sampling_post_processing_usecase.src_is_major); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (sampling_post_processing_usecase.check_correctness) { + if (renumbered_and_sorted_edgelist_label_hop_offsets) { + ASSERT_TRUE((*renumbered_and_sorted_edgelist_label_hop_offsets).size() == + sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.fanouts.size() + + 1) + << "Renumbered and sorted edge list (label,hop) offset array size should coincide with " + "the number of labels * the number of hops + 1."; + + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + (*renumbered_and_sorted_edgelist_label_hop_offsets).begin(), + (*renumbered_and_sorted_edgelist_label_hop_offsets).end())) + << "Renumbered and sorted edge list (label,hop) offset array values should be " + "non-decreasing."; + } + + if (renumbered_and_sorted_renumber_map_label_offsets) { + ASSERT_TRUE((*renumbered_and_sorted_renumber_map_label_offsets).size() == + sampling_post_processing_usecase.num_labels + 1) + << "Renumbered and sorted offset (label, hop) offset array size should coincide with " + "the number of labels + 1."; + + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + (*renumbered_and_sorted_renumber_map_label_offsets).begin(), + (*renumbered_and_sorted_renumber_map_label_offsets).end())) + << "Renumbered and sorted renumber map label offset array values should be " + "non-decreasing."; + + ASSERT_TRUE( + (*renumbered_and_sorted_renumber_map_label_offsets).back_element(handle.get_stream()) == + renumbered_and_sorted_renumber_map.size()) + << "Renumbered and sorted renumber map label offset array's last value should coincide " + "with the renumber map size."; + } + + for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { + size_t edgelist_start_offset = + org_edgelist_label_offsets + ? (*org_edgelist_label_offsets).element(i, handle.get_stream()) + : size_t{0}; + size_t edgelist_end_offset = + org_edgelist_label_offsets + ? (*org_edgelist_label_offsets).element(i + 1, handle.get_stream()) + : org_edgelist_srcs.size(); + if (edgelist_start_offset == edgelist_end_offset) continue; + + auto this_label_org_edgelist_srcs = + raft::device_span(org_edgelist_srcs.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_org_edgelist_dsts = + raft::device_span(org_edgelist_dsts.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_org_edgelist_hops = + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + auto this_label_org_edgelist_weights = + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + + auto this_label_output_edgelist_srcs = raft::device_span( + renumbered_and_sorted_edgelist_srcs.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_output_edgelist_dsts = raft::device_span( + renumbered_and_sorted_edgelist_dsts.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_output_edgelist_weights = + renumbered_and_sorted_edgelist_weights + ? std::make_optional>( + (*renumbered_and_sorted_edgelist_weights).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + + size_t renumber_map_start_offset = + renumbered_and_sorted_renumber_map_label_offsets + ? (*renumbered_and_sorted_renumber_map_label_offsets).element(i, handle.get_stream()) + : size_t{0}; + size_t renumber_map_end_offset = renumbered_and_sorted_renumber_map_label_offsets + ? (*renumbered_and_sorted_renumber_map_label_offsets) + .element(i + 1, handle.get_stream()) + : renumbered_and_sorted_renumber_map.size(); + auto this_label_output_renumber_map = raft::device_span( + renumbered_and_sorted_renumber_map.data() + renumber_map_start_offset, + renumber_map_end_offset - renumber_map_start_offset); + + // check whether the edges are properly sorted + + auto this_label_output_edgelist_majors = sampling_post_processing_usecase.src_is_major + ? this_label_output_edgelist_srcs + : this_label_output_edgelist_dsts; + auto this_label_output_edgelist_minors = sampling_post_processing_usecase.src_is_major + ? this_label_output_edgelist_dsts + : this_label_output_edgelist_srcs; + + if (this_label_org_edgelist_hops) { + auto num_hops = sampling_post_processing_usecase.fanouts.size(); + auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), + this_label_output_edgelist_minors.begin()); + for (size_t j = 0; j < num_hops; ++j) { + auto hop_start_offset = (*renumbered_and_sorted_edgelist_label_hop_offsets) + .element(i * num_hops + j, handle.get_stream()) - + (*renumbered_and_sorted_edgelist_label_hop_offsets) + .element(i * num_hops, handle.get_stream()); + auto hop_end_offset = (*renumbered_and_sorted_edgelist_label_hop_offsets) + .element(i * num_hops + j + 1, handle.get_stream()) - + (*renumbered_and_sorted_edgelist_label_hop_offsets) + .element(i * num_hops, handle.get_stream()); + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + edge_first + hop_start_offset, + edge_first + hop_end_offset)) + << "Renumbered and sorted output edges are not properly sorted."; + } + } else { + auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), + this_label_output_edgelist_minors.begin()); + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + edge_first, + edge_first + this_label_output_edgelist_majors.size())) + << "Renumbered and sorted output edges are not properly sorted."; + } + + // check whether renumbering recovers the original edge list + + ASSERT_TRUE(compare_edgelist(handle, + this_label_org_edgelist_srcs, + this_label_org_edgelist_dsts, + this_label_org_edgelist_weights, + this_label_output_edgelist_srcs, + this_label_output_edgelist_dsts, + this_label_output_edgelist_weights, + std::make_optional(this_label_output_renumber_map))) + << "Unrenumbering the renumbered and sorted edge list does not recover the original " + "edgelist."; + + // Check the invariants in renumber_map + + ASSERT_TRUE(check_renumber_map_invariants(handle, + this_label_org_edgelist_srcs, + this_label_org_edgelist_dsts, + this_label_org_edgelist_hops, + this_label_output_renumber_map, + sampling_post_processing_usecase.src_is_major)) + << "Renumbered and sorted output renumber map violates invariants."; + } + } + } + + { + rmm::device_uvector renumbered_and_compressed_edgelist_srcs( + org_edgelist_srcs.size(), handle.get_stream()); + rmm::device_uvector renumbered_and_compressed_edgelist_dsts( + org_edgelist_dsts.size(), handle.get_stream()); + auto renumbered_and_compressed_edgelist_weights = + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).size(), handle.get_stream()) + : std::nullopt; + std::optional> renumbered_and_compressed_edgelist_edge_ids{ + std::nullopt}; + std::optional> renumbered_and_compressed_edgelist_edge_types{ + std::nullopt}; + auto renumbered_and_compressed_edgelist_hops = + org_edgelist_hops + ? std::make_optional(std::make_tuple( + rmm::device_uvector((*org_edgelist_hops).size(), handle.get_stream()), + sampling_post_processing_usecase.fanouts.size())) + : std::nullopt; + + raft::copy(renumbered_and_compressed_edgelist_srcs.data(), + org_edgelist_srcs.data(), + org_edgelist_srcs.size(), + handle.get_stream()); + raft::copy(renumbered_and_compressed_edgelist_dsts.data(), + org_edgelist_dsts.data(), + org_edgelist_dsts.size(), + handle.get_stream()); + if (renumbered_and_compressed_edgelist_weights) { + raft::copy((*renumbered_and_compressed_edgelist_weights).data(), + (*org_edgelist_weights).data(), + (*org_edgelist_weights).size(), + handle.get_stream()); + } + if (renumbered_and_compressed_edgelist_hops) { + raft::copy(std::get<0>(*renumbered_and_compressed_edgelist_hops).data(), + (*org_edgelist_hops).data(), + (*org_edgelist_hops).size(), + handle.get_stream()); + } + + std::optional> renumbered_and_compressed_nzd_vertices{ + std::nullopt}; + rmm::device_uvector renumbered_and_compressed_offsets(0, handle.get_stream()); + rmm::device_uvector renumbered_and_compressed_edgelist_minors(0, + handle.get_stream()); + std::optional> renumbered_and_compressed_offset_label_hop_offsets{ + std::nullopt}; + rmm::device_uvector renumbered_and_compressed_renumber_map(0, handle.get_stream()); + std::optional> + renumbered_and_compressed_renumber_map_label_offsets{std::nullopt}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Renumber and compressed sampled edgelist"); + } + + std::tie(renumbered_and_compressed_nzd_vertices, + renumbered_and_compressed_offsets, + renumbered_and_compressed_edgelist_minors, + renumbered_and_compressed_edgelist_weights, + renumbered_and_compressed_edgelist_edge_ids, + renumbered_and_compressed_edgelist_edge_types, + renumbered_and_compressed_offset_label_hop_offsets, + renumbered_and_compressed_renumber_map, + renumbered_and_compressed_renumber_map_label_offsets) = + cugraph::renumber_and_compress_sampled_edgelist( + handle, + std::move(renumbered_and_compressed_edgelist_srcs), + std::move(renumbered_and_compressed_edgelist_dsts), + std::move(renumbered_and_compressed_edgelist_weights), + std::move(renumbered_and_compressed_edgelist_edge_ids), + std::move(renumbered_and_compressed_edgelist_edge_types), + std::move(renumbered_and_compressed_edgelist_hops), + org_edgelist_label_offsets + ? std::make_optional(std::make_tuple( + raft::device_span((*org_edgelist_label_offsets).data(), + (*org_edgelist_label_offsets).size()), + sampling_post_processing_usecase.num_labels)) + : std::nullopt, + sampling_post_processing_usecase.src_is_major, + sampling_post_processing_usecase.compress_per_hop, + sampling_post_processing_usecase.doubly_compress); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (sampling_post_processing_usecase.check_correctness) { + if (renumbered_and_compressed_nzd_vertices) { + ASSERT_TRUE(renumbered_and_compressed_offsets.size() == + (*renumbered_and_compressed_nzd_vertices).size() + 1) + << "Renumbered and compressed offset array size should coincide with the number of " + "non-zero-degree vertices + 1."; + } + + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + renumbered_and_compressed_offsets.begin(), + renumbered_and_compressed_offsets.end())) + << "Renumbered and compressed offset array values should be non-decreasing."; + + ASSERT_TRUE(renumbered_and_compressed_offsets.back_element(handle.get_stream()) == + renumbered_and_compressed_edgelist_minors.size()) + << "Renumbered and compressed offset array's last value should coincide with the number " + "of " + "edges."; + + if (renumbered_and_compressed_offset_label_hop_offsets) { + ASSERT_TRUE((*renumbered_and_compressed_offset_label_hop_offsets).size() == + sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.fanouts.size() + + 1) + << "Renumbered and compressed offset (label,hop) offset array size should coincide " + "with " + "the number of labels * the number of hops + 1."; + + ASSERT_TRUE( + thrust::is_sorted(handle.get_thrust_policy(), + (*renumbered_and_compressed_offset_label_hop_offsets).begin(), + (*renumbered_and_compressed_offset_label_hop_offsets).end())) + << "Renumbered and compressed offset (label,hop) offset array values should be " + "non-decreasing."; + + ASSERT_TRUE((*renumbered_and_compressed_offset_label_hop_offsets) + .back_element(handle.get_stream()) == + renumbered_and_compressed_offsets.size() - 1) + << "Renumbered and compressed offset (label,hop) offset array's last value should " + "coincide with the offset array size - 1."; + } + + if (renumbered_and_compressed_renumber_map_label_offsets) { + ASSERT_TRUE((*renumbered_and_compressed_renumber_map_label_offsets).size() == + sampling_post_processing_usecase.num_labels + 1) + << "Renumbered and compressed offset (label, hop) offset array size should coincide " + "with " + "the number of labels + 1."; + + ASSERT_TRUE( + thrust::is_sorted(handle.get_thrust_policy(), + (*renumbered_and_compressed_renumber_map_label_offsets).begin(), + (*renumbered_and_compressed_renumber_map_label_offsets).end())) + << "Renumbered and compressed renumber map label offset array values should be " + "non-decreasing."; + + ASSERT_TRUE((*renumbered_and_compressed_renumber_map_label_offsets) + .back_element(handle.get_stream()) == + renumbered_and_compressed_renumber_map.size()) + << "Renumbered and compressed renumber map label offset array's last value should " + "coincide with the renumber map size."; + } + + for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { + size_t edgelist_start_offset = + org_edgelist_label_offsets + ? (*org_edgelist_label_offsets).element(i, handle.get_stream()) + : size_t{0}; + size_t edgelist_end_offset = + org_edgelist_label_offsets + ? (*org_edgelist_label_offsets).element(i + 1, handle.get_stream()) + : org_edgelist_srcs.size(); + if (edgelist_start_offset == edgelist_end_offset) continue; + + auto this_label_org_edgelist_srcs = + raft::device_span(org_edgelist_srcs.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_org_edgelist_dsts = + raft::device_span(org_edgelist_dsts.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_org_edgelist_hops = + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + auto this_label_org_edgelist_weights = + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + + rmm::device_uvector this_label_output_edgelist_srcs(0, handle.get_stream()); + rmm::device_uvector this_label_output_edgelist_dsts(0, handle.get_stream()); + auto this_label_output_edgelist_weights = + renumbered_and_compressed_edgelist_weights + ? std::make_optional>(0, handle.get_stream()) + : std::nullopt; + this_label_output_edgelist_srcs.reserve(edgelist_end_offset - edgelist_start_offset, + handle.get_stream()); + this_label_output_edgelist_dsts.reserve(edgelist_end_offset - edgelist_start_offset, + handle.get_stream()); + if (this_label_output_edgelist_weights) { + (*this_label_output_edgelist_weights) + .reserve(edgelist_end_offset - edgelist_start_offset, handle.get_stream()); + } + + // decompress + + auto num_hops = sampling_post_processing_usecase.fanouts.size(); + for (size_t j = 0; j < num_hops; ++j) { + auto offset_start_offset = renumbered_and_compressed_offset_label_hop_offsets + ? (*renumbered_and_compressed_offset_label_hop_offsets) + .element(i * num_hops + j, handle.get_stream()) + : size_t{0}; + auto offset_end_offset = renumbered_and_compressed_offset_label_hop_offsets + ? ((*renumbered_and_compressed_offset_label_hop_offsets) + .element(i * num_hops + j + 1, handle.get_stream()) + + 1) + : renumbered_and_compressed_offsets.size(); + + auto base_v = + (!sampling_post_processing_usecase.doubly_compress && + !sampling_post_processing_usecase.compress_per_hop && (j > 0)) + ? static_cast(offset_start_offset - + (*renumbered_and_compressed_offset_label_hop_offsets) + .element(i * num_hops, handle.get_stream())) + : vertex_t{0}; + + raft::device_span d_offsets( + renumbered_and_compressed_offsets.data() + offset_start_offset, + offset_end_offset - offset_start_offset); + std::vector h_offsets(d_offsets.size()); + raft::update_host( + h_offsets.data(), d_offsets.data(), h_offsets.size(), handle.get_stream()); + handle.sync_stream(); + + auto old_size = this_label_output_edgelist_srcs.size(); + this_label_output_edgelist_srcs.resize(old_size + (h_offsets.back() - h_offsets[0]), + handle.get_stream()); + this_label_output_edgelist_dsts.resize(this_label_output_edgelist_srcs.size(), + handle.get_stream()); + if (this_label_output_edgelist_weights) { + (*this_label_output_edgelist_weights) + .resize(this_label_output_edgelist_srcs.size(), handle.get_stream()); + } + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(h_offsets[0]), + thrust::make_counting_iterator(h_offsets.back()), + (sampling_post_processing_usecase.src_is_major + ? this_label_output_edgelist_srcs.begin() + : this_label_output_edgelist_dsts.begin()) + + old_size, + [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), + nzd_vertices = + renumbered_and_compressed_nzd_vertices + ? thrust::make_optional>( + (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, + (offset_end_offset - offset_start_offset) - 1) + : thrust::nullopt, + base_v] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); + if (nzd_vertices) { + return (*nzd_vertices)[idx]; + } else { + return base_v + static_cast(idx); + } + }); + thrust::copy(handle.get_thrust_policy(), + renumbered_and_compressed_edgelist_minors.begin() + h_offsets[0], + renumbered_and_compressed_edgelist_minors.begin() + h_offsets.back(), + (sampling_post_processing_usecase.src_is_major + ? this_label_output_edgelist_dsts.begin() + : this_label_output_edgelist_srcs.begin()) + + old_size); + if (this_label_output_edgelist_weights) { + thrust::copy(handle.get_thrust_policy(), + (*renumbered_and_compressed_edgelist_weights).begin() + h_offsets[0], + (*renumbered_and_compressed_edgelist_weights).begin() + h_offsets.back(), + (*this_label_output_edgelist_weights).begin() + old_size); + } + } + + size_t renumber_map_start_offset = + renumbered_and_compressed_renumber_map_label_offsets + ? (*renumbered_and_compressed_renumber_map_label_offsets) + .element(i, handle.get_stream()) + : size_t{0}; + size_t renumber_map_end_offset = + renumbered_and_compressed_renumber_map_label_offsets + ? (*renumbered_and_compressed_renumber_map_label_offsets) + .element(i + 1, handle.get_stream()) + : renumbered_and_compressed_renumber_map.size(); + auto this_label_output_renumber_map = raft::device_span( + renumbered_and_compressed_renumber_map.data() + renumber_map_start_offset, + renumber_map_end_offset - renumber_map_start_offset); + + // check whether renumbering recovers the original edge list + + ASSERT_TRUE(compare_edgelist( + handle, + this_label_org_edgelist_srcs, + this_label_org_edgelist_dsts, + this_label_org_edgelist_weights, + raft::device_span(this_label_output_edgelist_srcs.data(), + this_label_output_edgelist_srcs.size()), + raft::device_span(this_label_output_edgelist_dsts.data(), + this_label_output_edgelist_dsts.size()), + this_label_output_edgelist_weights + ? std::make_optional>( + (*this_label_output_edgelist_weights).data(), + (*this_label_output_edgelist_weights).size()) + : std::nullopt, + std::make_optional(this_label_output_renumber_map))) + << "Unrenumbering the renumbered and sorted edge list does not recover the original " + "edgelist."; + + // Check the invariants in renumber_map + + ASSERT_TRUE(check_renumber_map_invariants(handle, + this_label_org_edgelist_srcs, + this_label_org_edgelist_dsts, + this_label_org_edgelist_hops, + this_label_output_renumber_map, + sampling_post_processing_usecase.src_is_major)) + << "Renumbered and sorted output renumber map violates invariants."; + } + } + } + + { + rmm::device_uvector sorted_edgelist_srcs(org_edgelist_srcs.size(), + handle.get_stream()); + rmm::device_uvector sorted_edgelist_dsts(org_edgelist_dsts.size(), + handle.get_stream()); + auto sorted_edgelist_weights = org_edgelist_weights + ? std::make_optional>( + (*org_edgelist_weights).size(), handle.get_stream()) + : std::nullopt; + std::optional> sorted_edgelist_edge_ids{std::nullopt}; + std::optional> sorted_edgelist_edge_types{std::nullopt}; + auto sorted_edgelist_hops = + org_edgelist_hops + ? std::make_optional(std::make_tuple( + rmm::device_uvector((*org_edgelist_hops).size(), handle.get_stream()), + sampling_post_processing_usecase.fanouts.size())) + : std::nullopt; + + raft::copy(sorted_edgelist_srcs.data(), + org_edgelist_srcs.data(), + org_edgelist_srcs.size(), + handle.get_stream()); + raft::copy(sorted_edgelist_dsts.data(), + org_edgelist_dsts.data(), + org_edgelist_dsts.size(), + handle.get_stream()); + if (sorted_edgelist_weights) { + raft::copy((*sorted_edgelist_weights).data(), + (*org_edgelist_weights).data(), + (*org_edgelist_weights).size(), + handle.get_stream()); + } + if (sorted_edgelist_hops) { + raft::copy(std::get<0>(*sorted_edgelist_hops).data(), + (*org_edgelist_hops).data(), + (*org_edgelist_hops).size(), + handle.get_stream()); + } + + std::optional> sorted_edgelist_label_hop_offsets{std::nullopt}; + + { + size_t free_size{}; + size_t total_size{}; + RAFT_CUDA_TRY(cudaMemGetInfo(&free_size, &total_size)); + std::cout << "free_size=" << free_size / (1024.0 * 1024.0 * 1024.0) + << "GB total_size=" << total_size / (1024.0 * 1024.0 * 1024.0) << "GB." + << std::endl; + } + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Sort sampled edgelist"); + } + + std::tie(sorted_edgelist_srcs, + sorted_edgelist_dsts, + sorted_edgelist_weights, + sorted_edgelist_edge_ids, + sorted_edgelist_edge_types, + sorted_edgelist_label_hop_offsets) = + cugraph::sort_sampled_edgelist( + handle, + std::move(sorted_edgelist_srcs), + std::move(sorted_edgelist_dsts), + std::move(sorted_edgelist_weights), + std::move(sorted_edgelist_edge_ids), + std::move(sorted_edgelist_edge_types), + std::move(sorted_edgelist_hops), + org_edgelist_label_offsets + ? std::make_optional(std::make_tuple( + raft::device_span((*org_edgelist_label_offsets).data(), + (*org_edgelist_label_offsets).size()), + sampling_post_processing_usecase.num_labels)) + : std::nullopt, + sampling_post_processing_usecase.src_is_major); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (sampling_post_processing_usecase.check_correctness) { + if (sorted_edgelist_label_hop_offsets) { + ASSERT_TRUE((*sorted_edgelist_label_hop_offsets).size() == + sampling_post_processing_usecase.num_labels * + sampling_post_processing_usecase.fanouts.size() + + 1) + << "Sorted edge list (label,hop) offset array size should coincide with " + "the number of labels * the number of hops + 1."; + + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + (*sorted_edgelist_label_hop_offsets).begin(), + (*sorted_edgelist_label_hop_offsets).end())) + << "Sorted edge list (label,hop) offset array values should be " + "non-decreasing."; + } + + for (size_t i = 0; i < sampling_post_processing_usecase.num_labels; ++i) { + size_t edgelist_start_offset = + org_edgelist_label_offsets + ? (*org_edgelist_label_offsets).element(i, handle.get_stream()) + : size_t{0}; + size_t edgelist_end_offset = + org_edgelist_label_offsets + ? (*org_edgelist_label_offsets).element(i + 1, handle.get_stream()) + : org_edgelist_srcs.size(); + if (edgelist_start_offset == edgelist_end_offset) continue; + + auto this_label_org_edgelist_srcs = + raft::device_span(org_edgelist_srcs.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_org_edgelist_dsts = + raft::device_span(org_edgelist_dsts.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_org_edgelist_hops = + org_edgelist_hops ? std::make_optional>( + (*org_edgelist_hops).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + auto this_label_org_edgelist_weights = + org_edgelist_weights ? std::make_optional>( + (*org_edgelist_weights).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + + auto this_label_output_edgelist_srcs = + raft::device_span(sorted_edgelist_srcs.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_output_edgelist_dsts = + raft::device_span(sorted_edgelist_dsts.data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset); + auto this_label_output_edgelist_weights = + sorted_edgelist_weights ? std::make_optional>( + (*sorted_edgelist_weights).data() + edgelist_start_offset, + edgelist_end_offset - edgelist_start_offset) + : std::nullopt; + + // check whether the edges are properly sorted + + auto this_label_output_edgelist_majors = sampling_post_processing_usecase.src_is_major + ? this_label_output_edgelist_srcs + : this_label_output_edgelist_dsts; + auto this_label_output_edgelist_minors = sampling_post_processing_usecase.src_is_major + ? this_label_output_edgelist_dsts + : this_label_output_edgelist_srcs; + + if (this_label_org_edgelist_hops) { + auto num_hops = sampling_post_processing_usecase.fanouts.size(); + auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), + this_label_output_edgelist_minors.begin()); + for (size_t j = 0; j < num_hops; ++j) { + auto hop_start_offset = + (*sorted_edgelist_label_hop_offsets) + .element(i * num_hops + j, handle.get_stream()) - + (*sorted_edgelist_label_hop_offsets).element(i * num_hops, handle.get_stream()); + auto hop_end_offset = + (*sorted_edgelist_label_hop_offsets) + .element(i * num_hops + j + 1, handle.get_stream()) - + (*sorted_edgelist_label_hop_offsets).element(i * num_hops, handle.get_stream()); + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + edge_first + hop_start_offset, + edge_first + hop_end_offset)) + << "Renumbered and sorted output edges are not properly sorted."; + } + } else { + auto edge_first = thrust::make_zip_iterator(this_label_output_edgelist_majors.begin(), + this_label_output_edgelist_minors.begin()); + ASSERT_TRUE(thrust::is_sorted(handle.get_thrust_policy(), + edge_first, + edge_first + this_label_output_edgelist_majors.size())) + << "Renumbered and sorted output edges are not properly sorted."; + } + + // check whether renumbering recovers the original edge list + + ASSERT_TRUE( + compare_edgelist(handle, + this_label_org_edgelist_srcs, + this_label_org_edgelist_dsts, + this_label_org_edgelist_weights, + this_label_output_edgelist_srcs, + this_label_output_edgelist_dsts, + this_label_output_edgelist_weights, + std::optional>{std::nullopt})) + << "Sorted edge list does not coincide with the original edgelist."; + } + } + } + } +}; + +using Tests_SamplingPostProcessing_File = Tests_SamplingPostProcessing; +using Tests_SamplingPostProcessing_Rmat = Tests_SamplingPostProcessing; + +TEST_P(Tests_SamplingPostProcessing_File, CheckInt32Int32) +{ + run_current_test(override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SamplingPostProcessing_Rmat, CheckInt32Int32) +{ + run_current_test(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SamplingPostProcessing_Rmat, CheckInt32Int64) +{ + run_current_test(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_SamplingPostProcessing_Rmat, CheckInt64Int64) +{ + run_current_test(override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_SamplingPostProcessing_File, + ::testing::Combine( + // enable correctness checks + ::testing::Values( + SamplingPostProcessing_Usecase{1, 16, {10}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 4, {5, 10, 25}, false, false, true, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, false, false, true, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, false, true, true, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, true, false, true, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 25}, true, true, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, false, false, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, false, true, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, true, false, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 25}, true, true, true, false, true}), + ::testing::Values(cugraph::test::File_Usecase("karate.mtx"), + cugraph::test::File_Usecase("dolphins.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_SamplingPostProcessing_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values( + SamplingPostProcessing_Usecase{1, 16, {10}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {10}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, false, false, true, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, false, true, true, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, true, false, true, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{1, 16, {5, 10, 15}, true, true, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {10}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, false, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, false, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, false, false, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, false, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, false, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, false, true, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, true, false, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, true, false, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, true, false, true, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, true, true, false, false, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, true, true, false, true, true}, + SamplingPostProcessing_Usecase{32, 16, {5, 10, 15}, true, true, true, false, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, + Tests_SamplingPostProcessing_Rmat, + ::testing::Combine( + // enable correctness checks + ::testing::Values( + SamplingPostProcessing_Usecase{1, 64, {10}, false, false, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {10}, false, false, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {10}, false, true, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {10}, false, true, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {10}, true, false, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {10}, true, false, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {10}, true, true, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {10}, true, true, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, false, false, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, false, false, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, false, false, true, false, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, false, true, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, false, true, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, false, true, true, false, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, true, false, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, true, false, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, true, false, true, false, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, true, true, false, false, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, true, true, false, true, false}, + SamplingPostProcessing_Usecase{1, 64, {5, 10, 15}, true, true, true, false, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, false, false, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, false, false, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, false, true, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, false, true, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, true, false, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, true, false, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, true, true, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {10}, true, true, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, false, false, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, false, false, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, false, false, true, false, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, false, true, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, false, true, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, false, true, true, false, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, true, false, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, true, false, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, true, false, true, false, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, true, true, false, false, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, true, true, false, true, false}, + SamplingPostProcessing_Usecase{256, 64, {5, 10, 15}, true, true, true, false, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); + +CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/traversal/mg_sssp_test.cpp b/cpp/tests/traversal/mg_sssp_test.cpp index b3e96981f96..ea0353c3743 100644 --- a/cpp/tests/traversal/mg_sssp_test.cpp +++ b/cpp/tests/traversal/mg_sssp_test.cpp @@ -214,7 +214,7 @@ class Tests_MGSSSP : public ::testing::TestWithParam> sg_renumber_map, // std::nullopt if the SG graph is not renumbered std::optional> - mg_vertices, // std::nullopt if the entire local vertex partition range is assumed + mg_vertices, // std::nullopt if the entire local vertex partition range is assumed raft::device_span mg_values); template diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/__init__.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/__init__.py index e5acbf34478..3e7f2f076f0 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/__init__.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/__init__.py @@ -13,6 +13,7 @@ from .base import SparseGraph from .gatconv import GATConv +from .gatv2conv import GATv2Conv from .relgraphconv import RelGraphConv from .sageconv import SAGEConv from .transformerconv import TransformerConv @@ -20,6 +21,7 @@ __all__ = [ "SparseGraph", "GATConv", + "GATv2Conv", "RelGraphConv", "SAGEConv", "TransformerConv", diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py index 0eeaed29d86..307eb33078e 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/base.py @@ -17,38 +17,7 @@ torch = import_optional("torch") ops_torch = import_optional("pylibcugraphops.pytorch") - - -class BaseConv(torch.nn.Module): - r"""An abstract base class for cugraph-ops nn module.""" - - def __init__(self): - super().__init__() - self._cached_offsets_fg = None - - def reset_parameters(self): - r"""Resets all learnable parameters of the module.""" - raise NotImplementedError - - def forward(self, *args): - r"""Runs the forward pass of the module.""" - raise NotImplementedError - - def pad_offsets(self, offsets: torch.Tensor, size: int) -> torch.Tensor: - r"""Pad zero-in-degree nodes to the end of offsets to reach size. This - is used to augment offset tensors from DGL blocks (MFGs) to be - compatible with cugraph-ops full-graph primitives.""" - if self._cached_offsets_fg is None: - self._cached_offsets_fg = torch.empty( - size, dtype=offsets.dtype, device=offsets.device - ) - elif self._cached_offsets_fg.numel() < size: - self._cached_offsets_fg.resize_(size) - - self._cached_offsets_fg[: offsets.numel()] = offsets - self._cached_offsets_fg[offsets.numel() : size] = offsets[-1] - - return self._cached_offsets_fg[:size] +dgl = import_optional("dgl") def compress_ids(ids: torch.Tensor, size: int) -> torch.Tensor: @@ -63,8 +32,9 @@ def decompress_ids(c_ids: torch.Tensor) -> torch.Tensor: class SparseGraph(object): - r"""A god-class to store different sparse formats needed by cugraph-ops - and facilitate sparse format conversions. + r"""A class to create and store different sparse formats needed by + cugraph-ops. It always creates a CSC representation and can provide COO- or + CSR-format if needed. Parameters ---------- @@ -89,25 +59,43 @@ class SparseGraph(object): consists of the sources between `src_indices[cdst_indices[k]]` and `src_indices[cdst_indices[k+1]]`. - dst_ids_is_sorted: bool - Whether `dst_ids` has been sorted in an ascending order. When sorted, - creating CSC layout is much faster. + values: torch.Tensor, optional + Values on the edges. + + is_sorted: bool + Whether the COO inputs (src_ids, dst_ids, values) have been sorted by + `dst_ids` in an ascending order. CSC layout creation is much faster + when sorted. formats: str or tuple of str, optional - The desired sparse formats to create for the graph. + The desired sparse formats to create for the graph. The formats tuple + must include "csc". Default: "csc". reduce_memory: bool, optional When set, the tensors are not required by the desired formats will be - set to `None`. + set to `None`. Default: True. Notes ----- For MFGs (sampled graphs), the node ids must have been renumbered. """ - supported_formats = {"coo": ("src_ids", "dst_ids"), "csc": ("cdst_ids", "src_ids")} - - all_tensors = set(["src_ids", "dst_ids", "csrc_ids", "cdst_ids"]) + supported_formats = { + "coo": ("_src_ids", "_dst_ids"), + "csc": ("_cdst_ids", "_src_ids"), + "csr": ("_csrc_ids", "_dst_ids", "_perm_csc2csr"), + } + + all_tensors = set( + [ + "_src_ids", + "_dst_ids", + "_csrc_ids", + "_cdst_ids", + "_perm_coo2csc", + "_perm_csc2csr", + ] + ) def __init__( self, @@ -116,15 +104,19 @@ def __init__( dst_ids: Optional[torch.Tensor] = None, csrc_ids: Optional[torch.Tensor] = None, cdst_ids: Optional[torch.Tensor] = None, - dst_ids_is_sorted: bool = False, - formats: Optional[Union[str, Tuple[str]]] = None, + values: Optional[torch.Tensor] = None, + is_sorted: bool = False, + formats: Union[str, Tuple[str]] = "csc", reduce_memory: bool = True, ): self._num_src_nodes, self._num_dst_nodes = size - self._dst_ids_is_sorted = dst_ids_is_sorted + self._is_sorted = is_sorted if dst_ids is None and cdst_ids is None: - raise ValueError("One of 'dst_ids' and 'cdst_ids' must be given.") + raise ValueError( + "One of 'dst_ids' and 'cdst_ids' must be given " + "to create a SparseGraph." + ) if src_ids is not None: src_ids = src_ids.contiguous() @@ -148,21 +140,40 @@ def __init__( ) cdst_ids = cdst_ids.contiguous() + if values is not None: + values = values.contiguous() + self._src_ids = src_ids self._dst_ids = dst_ids self._csrc_ids = csrc_ids self._cdst_ids = cdst_ids - self._perm = None + self._values = values + self._perm_coo2csc = None + self._perm_csc2csr = None if isinstance(formats, str): formats = (formats,) - - if formats is not None: - for format_ in formats: - assert format_ in SparseGraph.supported_formats - self.__getattribute__(f"_create_{format_}")() self._formats = formats + if "csc" not in formats: + raise ValueError( + f"{self.__class__.__name__}.formats must contain " + f"'csc', but got {formats}." + ) + + # always create csc first + if self._cdst_ids is None: + if not self._is_sorted: + self._dst_ids, self._perm_coo2csc = torch.sort(self._dst_ids) + self._src_ids = self._src_ids[self._perm_coo2csc] + if self._values is not None: + self._values = self._values[self._perm_coo2csc] + self._cdst_ids = compress_ids(self._dst_ids, self._num_dst_nodes) + + for format_ in formats: + assert format_ in SparseGraph.supported_formats + self.__getattribute__(f"{format_}")() + self._reduce_memory = reduce_memory if reduce_memory: self.reduce_memory() @@ -170,8 +181,6 @@ def __init__( def reduce_memory(self): """Remove the tensors that are not necessary to create the desired sparse formats to reduce memory footprint.""" - - self._perm = None if self._formats is None: return @@ -181,16 +190,22 @@ def reduce_memory(self): for t in SparseGraph.all_tensors.difference(set(tensors_needed)): self.__dict__[t] = None - def _create_coo(self): + def src_ids(self) -> torch.Tensor: + return self._src_ids + + def cdst_ids(self) -> torch.Tensor: + return self._cdst_ids + + def dst_ids(self) -> torch.Tensor: if self._dst_ids is None: self._dst_ids = decompress_ids(self._cdst_ids) + return self._dst_ids - def _create_csc(self): - if self._cdst_ids is None: - if not self._dst_ids_is_sorted: - self._dst_ids, self._perm = torch.sort(self._dst_ids) - self._src_ids = self._src_ids[self._perm] - self._cdst_ids = compress_ids(self._dst_ids, self._num_dst_nodes) + def csrc_ids(self) -> torch.Tensor: + if self._csrc_ids is None: + src_ids, self._perm_csc2csr = torch.sort(self._src_ids) + self._csrc_ids = compress_ids(src_ids, self._num_src_nodes) + return self._csrc_ids def num_src_nodes(self): return self._num_src_nodes @@ -198,21 +213,134 @@ def num_src_nodes(self): def num_dst_nodes(self): return self._num_dst_nodes + def values(self): + return self._values + def formats(self): return self._formats - def coo(self) -> Tuple[torch.Tensor, torch.Tensor]: + def coo(self) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: if "coo" not in self.formats(): raise RuntimeError( "The SparseGraph did not create a COO layout. " - "Set 'formats' to include 'coo' when creating the graph." + "Set 'formats' list to include 'coo' when creating the graph." ) - return (self._src_ids, self._dst_ids) + return self.src_ids(), self.dst_ids(), self._values - def csc(self) -> Tuple[torch.Tensor, torch.Tensor]: + def csc(self) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: if "csc" not in self.formats(): raise RuntimeError( "The SparseGraph did not create a CSC layout. " - "Set 'formats' to include 'csc' when creating the graph." + "Set 'formats' list to include 'csc' when creating the graph." + ) + return self.cdst_ids(), self.src_ids(), self._values + + def csr(self) -> Tuple[torch.Tensor, torch.Tensor, Optional[torch.Tensor]]: + if "csr" not in self.formats(): + raise RuntimeError( + "The SparseGraph did not create a CSR layout. " + "Set 'formats' list to include 'csr' when creating the graph." + ) + csrc_ids = self.csrc_ids() + dst_ids = self.dst_ids()[self._perm_csc2csr] + value = self._values + if value is not None: + value = value[self._perm_csc2csr] + return csrc_ids, dst_ids, value + + +class BaseConv(torch.nn.Module): + r"""An abstract base class for cugraph-ops nn module.""" + + def __init__(self): + super().__init__() + + def reset_parameters(self): + r"""Resets all learnable parameters of the module.""" + raise NotImplementedError + + def forward(self, *args): + r"""Runs the forward pass of the module.""" + raise NotImplementedError + + def get_cugraph_ops_CSC( + self, + g: Union[SparseGraph, dgl.DGLHeteroGraph], + is_bipartite: bool = False, + max_in_degree: Optional[int] = None, + ) -> ops_torch.CSC: + """Create CSC structure needed by cugraph-ops.""" + + if not isinstance(g, (SparseGraph, dgl.DGLHeteroGraph)): + raise TypeError( + f"The graph has to be either a 'cugraph_dgl.nn.SparseGraph' or " + f"'dgl.DGLHeteroGraph', but got '{type(g)}'." ) - return (self._cdst_ids, self._src_ids) + + # TODO: max_in_degree should default to None in pylibcugraphops + if max_in_degree is None: + max_in_degree = -1 + + if isinstance(g, SparseGraph): + offsets, indices, _ = g.csc() + else: + offsets, indices, _ = g.adj_tensors("csc") + + graph = ops_torch.CSC( + offsets=offsets, + indices=indices, + num_src_nodes=g.num_src_nodes(), + dst_max_in_degree=max_in_degree, + is_bipartite=is_bipartite, + ) + + return graph + + def get_cugraph_ops_HeteroCSC( + self, + g: Union[SparseGraph, dgl.DGLHeteroGraph], + num_edge_types: int, + etypes: Optional[torch.Tensor] = None, + is_bipartite: bool = False, + max_in_degree: Optional[int] = None, + ) -> ops_torch.HeteroCSC: + """Create HeteroCSC structure needed by cugraph-ops.""" + + if not isinstance(g, (SparseGraph, dgl.DGLHeteroGraph)): + raise TypeError( + f"The graph has to be either a 'cugraph_dgl.nn.SparseGraph' or " + f"'dgl.DGLHeteroGraph', but got '{type(g)}'." + ) + + # TODO: max_in_degree should default to None in pylibcugraphops + if max_in_degree is None: + max_in_degree = -1 + + if isinstance(g, SparseGraph): + offsets, indices, etypes = g.csc() + if etypes is None: + raise ValueError( + "SparseGraph must have 'values' to create HeteroCSC. " + "Pass in edge types as 'values' when creating the SparseGraph." + ) + etypes = etypes.int() + else: + if etypes is None: + raise ValueError( + "'etypes' is required when creating HeteroCSC " + "from dgl.DGLHeteroGraph." + ) + offsets, indices, perm = g.adj_tensors("csc") + etypes = etypes[perm].int() + + graph = ops_torch.HeteroCSC( + offsets=offsets, + indices=indices, + edge_types=etypes, + num_src_nodes=g.num_src_nodes(), + num_edge_types=num_edge_types, + dst_max_in_degree=max_in_degree, + is_bipartite=is_bipartite, + ) + + return graph diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py index 239def5b677..8843e61ad89 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatconv.py @@ -10,13 +10,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Torch Module for graph attention network layer using the aggregation -primitives in cugraph-ops""" -# pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments -from __future__ import annotations + from typing import Optional, Tuple, Union -from cugraph_dgl.nn.conv.base import BaseConv +from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional dgl = import_optional("dgl") @@ -32,13 +29,15 @@ class GATConv(BaseConv): Parameters ---------- - in_feats : int, pair of ints + in_feats : int or tuple Input feature size. A pair denotes feature sizes of source and destination nodes. out_feats : int Output feature size. num_heads : int - Number of heads in Multi-Head Attention. + Number of heads in multi-head attention. + feat_drop : float, optional + Dropout rate on feature. Defaults: ``0``. concat : bool, optional If False, the multi-head attentions are averaged instead of concatenated. Default: ``True``. @@ -46,6 +45,15 @@ class GATConv(BaseConv): Edge feature size. Default: ``None``. negative_slope : float, optional LeakyReLU angle of negative slope. Defaults: ``0.2``. + residual : bool, optional + If True, use residual connection. Defaults: ``False``. + allow_zero_in_degree : bool, optional + If there are 0-in-degree nodes in the graph, output for those nodes will + be invalid since no message will be passed to those nodes. This is + harmful for some applications causing silent performance regression. + This module will raise a DGLError if it detects 0-in-degree nodes in + input graph. By setting ``True``, it will suppress the check and let the + users handle it by themselves. Defaults: ``False``. bias : bool, optional If True, learns a bias term. Defaults: ``True``. @@ -81,37 +89,46 @@ class GATConv(BaseConv): [ 1.6477, -1.9986], [ 1.1138, -1.9302]]], device='cuda:0', grad_fn=) """ - MAX_IN_DEGREE_MFG = 200 def __init__( self, in_feats: Union[int, Tuple[int, int]], out_feats: int, num_heads: int, + feat_drop: float = 0.0, concat: bool = True, edge_feats: Optional[int] = None, negative_slope: float = 0.2, + residual: bool = False, + allow_zero_in_degree: bool = False, bias: bool = True, ): super().__init__() self.in_feats = in_feats self.out_feats = out_feats + self.in_feats_src, self.in_feats_dst = dgl.utils.expand_as_pair(in_feats) self.num_heads = num_heads + self.feat_drop = nn.Dropout(feat_drop) self.concat = concat self.edge_feats = edge_feats self.negative_slope = negative_slope + self.allow_zero_in_degree = allow_zero_in_degree if isinstance(in_feats, int): - self.fc = nn.Linear(in_feats, num_heads * out_feats, bias=False) + self.lin = nn.Linear(in_feats, num_heads * out_feats, bias=False) else: - self.fc_src = nn.Linear(in_feats[0], num_heads * out_feats, bias=False) - self.fc_dst = nn.Linear(in_feats[1], num_heads * out_feats, bias=False) + self.lin_src = nn.Linear( + self.in_feats_src, num_heads * out_feats, bias=False + ) + self.lin_dst = nn.Linear( + self.in_feats_dst, num_heads * out_feats, bias=False + ) if edge_feats is not None: - self.fc_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) + self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) self.attn_weights = nn.Parameter(torch.Tensor(3 * num_heads * out_feats)) else: - self.register_parameter("fc_edge", None) + self.register_parameter("lin_edge", None) self.attn_weights = nn.Parameter(torch.Tensor(2 * num_heads * out_feats)) if bias and concat: @@ -121,28 +138,40 @@ def __init__( else: self.register_buffer("bias", None) + self.residual = residual and self.in_feats_dst != out_feats * num_heads + if self.residual: + self.lin_res = nn.Linear( + self.in_feats_dst, num_heads * out_feats, bias=bias + ) + else: + self.register_buffer("lin_res", None) + self.reset_parameters() def reset_parameters(self): r"""Reinitialize learnable parameters.""" gain = nn.init.calculate_gain("relu") - if hasattr(self, "fc"): - nn.init.xavier_normal_(self.fc.weight, gain=gain) + if hasattr(self, "lin"): + nn.init.xavier_normal_(self.lin.weight, gain=gain) else: - nn.init.xavier_normal_(self.fc_src.weight, gain=gain) - nn.init.xavier_normal_(self.fc_dst.weight, gain=gain) + nn.init.xavier_normal_(self.lin_src.weight, gain=gain) + nn.init.xavier_normal_(self.lin_dst.weight, gain=gain) nn.init.xavier_normal_( self.attn_weights.view(-1, self.num_heads, self.out_feats), gain=gain ) - if self.fc_edge is not None: - self.fc_edge.reset_parameters() + if self.lin_edge is not None: + self.lin_edge.reset_parameters() + + if self.lin_res is not None: + self.lin_res.reset_parameters() + if self.bias is not None: nn.init.zeros_(self.bias) def forward( self, - g: dgl.DGLHeteroGraph, + g: Union[SparseGraph, dgl.DGLHeteroGraph], nfeat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], efeat: Optional[torch.Tensor] = None, max_in_degree: Optional[int] = None, @@ -151,18 +180,17 @@ def forward( Parameters ---------- - graph : DGLGraph + graph : DGLGraph or SparseGraph The graph. nfeat : torch.Tensor Input features of shape :math:`(N, D_{in})`. efeat: torch.Tensor, optional Optional edge features. max_in_degree : int - Maximum in-degree of destination nodes. It is only effective when - :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When - :attr:`g` is generated from a neighbor sampler, the value should be - set to the corresponding :attr:`fanout`. If not given, - :attr:`max_in_degree` will be calculated on-the-fly. + Maximum in-degree of destination nodes. When :attr:`g` is generated + from a neighbor sampler, the value should be set to the corresponding + :attr:`fanout`. This option is used to invoke the MFG-variant of + cugraph-ops kernel. Returns ------- @@ -171,49 +199,63 @@ def forward( :math:`H` is the number of heads, and :math:`D_{out}` is size of output feature. """ - if max_in_degree is None: - max_in_degree = -1 - - bipartite = not isinstance(nfeat, torch.Tensor) - offsets, indices, _ = g.adj_tensors("csc") - - graph = ops_torch.CSC( - offsets=offsets, - indices=indices, - num_src_nodes=g.num_src_nodes(), - dst_max_in_degree=max_in_degree, - is_bipartite=bipartite, + if isinstance(g, dgl.DGLHeteroGraph): + if not self.allow_zero_in_degree: + if (g.in_degrees() == 0).any(): + raise dgl.base.DGLError( + "There are 0-in-degree nodes in the graph, " + "output for those nodes will be invalid. " + "This is harmful for some applications, " + "causing silent performance regression. " + "Adding self-loop on the input graph by " + "calling `g = dgl.add_self_loop(g)` will resolve " + "the issue. Setting ``allow_zero_in_degree`` " + "to be `True` when constructing this module will " + "suppress the check and let the code run." + ) + + bipartite = isinstance(nfeat, (list, tuple)) + + _graph = self.get_cugraph_ops_CSC( + g, is_bipartite=bipartite, max_in_degree=max_in_degree ) + if bipartite: + nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1])) + nfeat_dst_orig = nfeat[1] + else: + nfeat = self.feat_drop(nfeat) + nfeat_dst_orig = nfeat[: g.num_dst_nodes()] + if efeat is not None: - if self.fc_edge is None: + if self.lin_edge is None: raise RuntimeError( f"{self.__class__.__name__}.edge_feats must be set to " f"accept edge features." ) - efeat = self.fc_edge(efeat) + efeat = self.lin_edge(efeat) if bipartite: - if not hasattr(self, "fc_src"): + if not hasattr(self, "lin_src"): raise RuntimeError( f"{self.__class__.__name__}.in_feats must be a pair of " f"integers to allow bipartite node features, but got " f"{self.in_feats}." ) - nfeat_src = self.fc_src(nfeat[0]) - nfeat_dst = self.fc_dst(nfeat[1]) + nfeat_src = self.lin_src(nfeat[0]) + nfeat_dst = self.lin_dst(nfeat[1]) else: - if not hasattr(self, "fc"): + if not hasattr(self, "lin"): raise RuntimeError( f"{self.__class__.__name__}.in_feats is expected to be an " f"integer, but got {self.in_feats}." ) - nfeat = self.fc(nfeat) + nfeat = self.lin(nfeat) out = ops_torch.operators.mha_gat_n2n( (nfeat_src, nfeat_dst) if bipartite else nfeat, self.attn_weights, - graph, + _graph, num_heads=self.num_heads, activation="LeakyReLU", negative_slope=self.negative_slope, @@ -224,6 +266,12 @@ def forward( if self.concat: out = out.view(-1, self.num_heads, self.out_feats) + if self.residual: + res = self.lin_res(nfeat_dst_orig).view(-1, self.num_heads, self.out_feats) + if not self.concat: + res = res.mean(dim=1) + out = out + res + if self.bias is not None: out = out + self.bias diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py new file mode 100644 index 00000000000..209a5fe1a8d --- /dev/null +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/gatv2conv.py @@ -0,0 +1,249 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Tuple, Union + +from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph +from cugraph.utilities.utils import import_optional + +dgl = import_optional("dgl") +torch = import_optional("torch") +nn = import_optional("torch.nn") +ops_torch = import_optional("pylibcugraphops.pytorch") + + +class GATv2Conv(BaseConv): + r"""GATv2 from `How Attentive are Graph Attention Networks? + `__, with the sparse aggregation + accelerated by cugraph-ops. + + Parameters + ---------- + in_feats : int, or pair of ints + Input feature size; i.e, the number of dimensions of :math:`h_i^{(l)}`. + If the layer is to be applied to a unidirectional bipartite graph, `in_feats` + specifies the input feature size on both the source and destination nodes. + If a scalar is given, the source and destination node feature size + would take the same value. + out_feats : int + Output feature size; i.e, the number of dimensions of :math:`h_i^{(l+1)}`. + num_heads : int + Number of heads in Multi-Head Attention. + feat_drop : float, optional + Dropout rate on feature. Defaults: ``0``. + concat : bool, optional + If False, the multi-head attentions are averaged instead of concatenated. + Default: ``True``. + edge_feats : int, optional + Edge feature size. Default: ``None``. + negative_slope : float, optional + LeakyReLU angle of negative slope. Defaults: ``0.2``. + residual : bool, optional + If True, use residual connection. Defaults: ``False``. + allow_zero_in_degree : bool, optional + If there are 0-in-degree nodes in the graph, output for those nodes will + be invalid since no message will be passed to those nodes. This is + harmful for some applications causing silent performance regression. + This module will raise a DGLError if it detects 0-in-degree nodes in + input graph. By setting ``True``, it will suppress the check and let the + users handle it by themselves. Defaults: ``False``. + bias : bool, optional + If set to :obj:`False`, the layer will not learn + an additive bias. (default: :obj:`True`) + share_weights : bool, optional + If set to :obj:`True`, the same matrix for :math:`W_{left}` and + :math:`W_{right}` in the above equations, will be applied to the source + and the target node of every edge. (default: :obj:`False`) + """ + + def __init__( + self, + in_feats: Union[int, Tuple[int, int]], + out_feats: int, + num_heads: int, + feat_drop: float = 0.0, + concat: bool = True, + edge_feats: Optional[int] = None, + negative_slope: float = 0.2, + residual: bool = False, + allow_zero_in_degree: bool = False, + bias: bool = True, + share_weights: bool = False, + ): + super().__init__() + self.in_feats = in_feats + self.out_feats = out_feats + self.in_feats_src, self.in_feats_dst = dgl.utils.expand_as_pair(in_feats) + self.num_heads = num_heads + self.feat_drop = nn.Dropout(feat_drop) + self.concat = concat + self.edge_feats = edge_feats + self.negative_slope = negative_slope + self.allow_zero_in_degree = allow_zero_in_degree + self.share_weights = share_weights + + self.lin_src = nn.Linear(self.in_feats_src, num_heads * out_feats, bias=bias) + if share_weights: + if self.in_feats_src != self.in_feats_dst: + raise ValueError( + f"Input feature size of source and destination " + f"nodes must be identical when share_weights is enabled, " + f"but got {self.in_feats_src} and {self.in_feats_dst}." + ) + self.lin_dst = self.lin_src + else: + self.lin_dst = nn.Linear( + self.in_feats_dst, num_heads * out_feats, bias=bias + ) + + self.attn = nn.Parameter(torch.Tensor(num_heads * out_feats)) + + if edge_feats is not None: + self.lin_edge = nn.Linear(edge_feats, num_heads * out_feats, bias=False) + else: + self.register_parameter("lin_edge", None) + + if bias and concat: + self.bias = nn.Parameter(torch.Tensor(num_heads, out_feats)) + elif bias and not concat: + self.bias = nn.Parameter(torch.Tensor(out_feats)) + else: + self.register_buffer("bias", None) + + self.residual = residual and self.in_feats_dst != out_feats * num_heads + if self.residual: + self.lin_res = nn.Linear( + self.in_feats_dst, num_heads * out_feats, bias=bias + ) + else: + self.register_buffer("lin_res", None) + + self.reset_parameters() + + def reset_parameters(self): + r"""Reinitialize learnable parameters.""" + gain = nn.init.calculate_gain("relu") + nn.init.xavier_normal_(self.lin_src.weight, gain=gain) + nn.init.xavier_normal_(self.lin_dst.weight, gain=gain) + + nn.init.xavier_normal_( + self.attn.view(-1, self.num_heads, self.out_feats), gain=gain + ) + if self.lin_edge is not None: + self.lin_edge.reset_parameters() + + if self.lin_res is not None: + self.lin_res.reset_parameters() + + if self.bias is not None: + nn.init.zeros_(self.bias) + + def forward( + self, + g: Union[SparseGraph, dgl.DGLHeteroGraph], + nfeat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], + efeat: Optional[torch.Tensor] = None, + max_in_degree: Optional[int] = None, + ) -> torch.Tensor: + r"""Forward computation. + + Parameters + ---------- + graph : DGLGraph or SparseGraph + The graph. + nfeat : torch.Tensor + Input features of shape :math:`(N, D_{in})`. + efeat: torch.Tensor, optional + Optional edge features. + max_in_degree : int + Maximum in-degree of destination nodes. When :attr:`g` is generated + from a neighbor sampler, the value should be set to the corresponding + :attr:`fanout`. This option is used to invoke the MFG-variant of + cugraph-ops kernel. + + Returns + ------- + torch.Tensor + The output feature of shape :math:`(N, H, D_{out})` where + :math:`H` is the number of heads, and :math:`D_{out}` is size of + output feature. + """ + + if isinstance(g, dgl.DGLHeteroGraph): + if not self.allow_zero_in_degree: + if (g.in_degrees() == 0).any(): + raise dgl.base.DGLError( + "There are 0-in-degree nodes in the graph, " + "output for those nodes will be invalid. " + "This is harmful for some applications, " + "causing silent performance regression. " + "Adding self-loop on the input graph by " + "calling `g = dgl.add_self_loop(g)` will resolve " + "the issue. Setting ``allow_zero_in_degree`` " + "to be `True` when constructing this module will " + "suppress the check and let the code run." + ) + + nfeat_bipartite = isinstance(nfeat, (list, tuple)) + graph_bipartite = nfeat_bipartite or self.share_weights is False + + _graph = self.get_cugraph_ops_CSC( + g, is_bipartite=graph_bipartite, max_in_degree=max_in_degree + ) + + if nfeat_bipartite: + nfeat = (self.feat_drop(nfeat[0]), self.feat_drop(nfeat[1])) + nfeat_dst_orig = nfeat[1] + else: + nfeat = self.feat_drop(nfeat) + nfeat_dst_orig = nfeat[: g.num_dst_nodes()] + + if efeat is not None: + if self.lin_edge is None: + raise RuntimeError( + f"{self.__class__.__name__}.edge_feats must be set to " + f"accept edge features." + ) + efeat = self.lin_edge(efeat) + + if nfeat_bipartite: + nfeat = (self.lin_src(nfeat[0]), self.lin_dst(nfeat[1])) + elif graph_bipartite: + nfeat = (self.lin_src(nfeat), self.lin_dst(nfeat[: g.num_dst_nodes()])) + else: + nfeat = self.lin_src(nfeat) + + out = ops_torch.operators.mha_gat_v2_n2n( + nfeat, + self.attn, + _graph, + num_heads=self.num_heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat, + edge_feat=efeat, + )[: g.num_dst_nodes()] + + if self.concat: + out = out.view(-1, self.num_heads, self.out_feats) + + if self.residual: + res = self.lin_res(nfeat_dst_orig).view(-1, self.num_heads, self.out_feats) + if not self.concat: + res = res.mean(dim=1) + out = out + res + + if self.bias is not None: + out = out + self.bias + + return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py index 89e49011cf7..54916674210 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/relgraphconv.py @@ -10,14 +10,11 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Torch Module for Relational graph convolution layer using the aggregation -primitives in cugraph-ops""" -# pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments -from __future__ import annotations + import math -from typing import Optional +from typing import Optional, Union -from cugraph_dgl.nn.conv.base import BaseConv +from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional dgl = import_optional("dgl") @@ -29,13 +26,8 @@ class RelGraphConv(BaseConv): r"""An accelerated relational graph convolution layer from `Modeling Relational Data with Graph Convolutional Networks - `__ that leverages the highly-optimized - aggregation primitives in cugraph-ops. - - See :class:`dgl.nn.pytorch.conv.RelGraphConv` for mathematical model. - - This module depends on :code:`pylibcugraphops` package, which can be - installed via :code:`conda install -c nvidia pylibcugraphops>=23.02`. + `__, with the sparse aggregation + accelerated by cugraph-ops. Parameters ---------- @@ -84,7 +76,6 @@ class RelGraphConv(BaseConv): [-1.4335, -2.3758], [-1.4331, -2.3295]], device='cuda:0', grad_fn=) """ - MAX_IN_DEGREE_MFG = 500 def __init__( self, @@ -148,7 +139,7 @@ def reset_parameters(self): def forward( self, - g: dgl.DGLHeteroGraph, + g: Union[SparseGraph, dgl.DGLHeteroGraph], feat: torch.Tensor, etypes: torch.Tensor, max_in_degree: Optional[int] = None, @@ -167,49 +158,24 @@ def forward( so any input of other integer types will be casted into int32, thus introducing some overhead. Pass in int32 tensors directly for best performance. - max_in_degree : int, optional - Maximum in-degree of destination nodes. It is only effective when - :attr:`g` is a :class:`DGLBlock`, i.e., bipartite graph. When - :attr:`g` is generated from a neighbor sampler, the value should be - set to the corresponding :attr:`fanout`. If not given, - :attr:`max_in_degree` will be calculated on-the-fly. + max_in_degree : int + Maximum in-degree of destination nodes. When :attr:`g` is generated + from a neighbor sampler, the value should be set to the corresponding + :attr:`fanout`. This option is used to invoke the MFG-variant of + cugraph-ops kernel. Returns ------- torch.Tensor New node features. Shape: :math:`(|V|, D_{out})`. """ - offsets, indices, edge_ids = g.adj_tensors("csc") - edge_types_perm = etypes[edge_ids.long()].int() - - if g.is_block: - if max_in_degree is None: - max_in_degree = g.in_degrees().max().item() - - if max_in_degree < self.MAX_IN_DEGREE_MFG: - _graph = ops_torch.SampledHeteroCSC( - offsets, - indices, - edge_types_perm, - max_in_degree, - g.num_src_nodes(), - self.num_rels, - ) - else: - offsets_fg = self.pad_offsets(offsets, g.num_src_nodes() + 1) - _graph = ops_torch.StaticHeteroCSC( - offsets_fg, - indices, - edge_types_perm, - self.num_rels, - ) - else: - _graph = ops_torch.StaticHeteroCSC( - offsets, - indices, - edge_types_perm, - self.num_rels, - ) + _graph = self.get_cugraph_ops_HeteroCSC( + g, + num_edge_types=self.num_rels, + etypes=etypes, + is_bipartite=False, + max_in_degree=max_in_degree, + ) h = ops_torch.operators.agg_hg_basis_n2n_post( feat, diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py index 60f4c505e19..a3f946d7cb4 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/sageconv.py @@ -10,11 +10,8 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -"""Torch Module for GraphSAGE layer using the aggregation primitives in -cugraph-ops""" -# pylint: disable=no-member, arguments-differ, invalid-name, too-many-arguments -from __future__ import annotations -from typing import Optional, Union + +from typing import Optional, Tuple, Union from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional @@ -27,22 +24,18 @@ class SAGEConv(BaseConv): r"""An accelerated GraphSAGE layer from `Inductive Representation Learning - on Large Graphs `__ that leverages the - highly-optimized aggregation primitives in cugraph-ops. - - See :class:`dgl.nn.pytorch.conv.SAGEConv` for mathematical model. - - This module depends on :code:`pylibcugraphops` package, which can be - installed via :code:`conda install -c nvidia pylibcugraphops>=23.02`. + on Large Graphs `, with the sparse + aggregation accelerated by cugraph-ops. Parameters ---------- - in_feats : int - Input feature size. + in_feats : int or tuple + Input feature size. If a scalar is given, the source and destination + nodes are required to be the same. out_feats : int Output feature size. aggregator_type : str - Aggregator type to use (``mean``, ``sum``, ``min``, ``max``). + Aggregator type to use ("mean", "sum", "min", "max", "pool", "gcn"). feat_drop : float Dropout rate on features, default: ``0``. bias : bool @@ -68,38 +61,57 @@ class SAGEConv(BaseConv): [-1.1690, 0.1952], [-1.1690, 0.1952]], device='cuda:0', grad_fn=) """ - MAX_IN_DEGREE_MFG = 500 + valid_aggr_types = {"mean", "sum", "min", "max", "pool", "gcn"} def __init__( self, - in_feats: int, + in_feats: Union[int, Tuple[int, int]], out_feats: int, aggregator_type: str = "mean", feat_drop: float = 0.0, bias: bool = True, ): super().__init__() - self.in_feats = in_feats - self.out_feats = out_feats - valid_aggr_types = {"max", "min", "mean", "sum"} - if aggregator_type not in valid_aggr_types: + + if aggregator_type not in self.valid_aggr_types: raise ValueError( - f"Invalid aggregator_type. Must be one of {valid_aggr_types}. " + f"Invalid aggregator_type. Must be one of {self.valid_aggr_types}. " f"But got '{aggregator_type}' instead." ) - self.aggr = aggregator_type + + self.aggregator_type = aggregator_type + self._aggr = aggregator_type + self.in_feats = in_feats + self.out_feats = out_feats + self.in_feats_src, self.in_feats_dst = dgl.utils.expand_as_pair(in_feats) self.feat_drop = nn.Dropout(feat_drop) - self.linear = nn.Linear(2 * in_feats, out_feats, bias=bias) + if self.aggregator_type == "gcn": + self._aggr = "mean" + self.lin = nn.Linear(self.in_feats_src, out_feats, bias=bias) + else: + self.lin = nn.Linear( + self.in_feats_src + self.in_feats_dst, out_feats, bias=bias + ) + + if self.aggregator_type == "pool": + self._aggr = "max" + self.pre_lin = nn.Linear(self.in_feats_src, self.in_feats_src) + else: + self.register_parameter("pre_lin", None) + + self.reset_parameters() def reset_parameters(self): r"""Reinitialize learnable parameters.""" - self.linear.reset_parameters() + self.lin.reset_parameters() + if self.pre_lin is not None: + self.pre_lin.reset_parameters() def forward( self, g: Union[SparseGraph, dgl.DGLHeteroGraph], - feat: torch.Tensor, + feat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], max_in_degree: Optional[int] = None, ) -> torch.Tensor: r"""Forward computation. @@ -108,7 +120,7 @@ def forward( ---------- g : DGLGraph or SparseGraph The graph. - feat : torch.Tensor + feat : torch.Tensor or tuple Node features. Shape: :math:`(|V|, D_{in})`. max_in_degree : int Maximum in-degree of destination nodes. When :attr:`g` is generated @@ -121,36 +133,34 @@ def forward( torch.Tensor Output node features. Shape: :math:`(|V|, D_{out})`. """ - if max_in_degree is None: - max_in_degree = -1 - - if isinstance(g, SparseGraph): - assert "csc" in g.formats() - offsets, indices = g.csc() - _graph = ops_torch.CSC( - offsets=offsets, - indices=indices, - num_src_nodes=g.num_src_nodes(), - dst_max_in_degree=max_in_degree, - ) - elif isinstance(g, dgl.DGLHeteroGraph): - offsets, indices, _ = g.adj_tensors("csc") - _graph = ops_torch.CSC( - offsets=offsets, - indices=indices, - num_src_nodes=g.num_src_nodes(), - dst_max_in_degree=max_in_degree, - ) - else: - raise TypeError( - f"The graph has to be either a 'SparseGraph' or " - f"'dgl.DGLHeteroGraph', but got '{type(g)}'." - ) + feat_bipartite = isinstance(feat, (list, tuple)) + graph_bipartite = feat_bipartite or self.aggregator_type == "pool" + + _graph = self.get_cugraph_ops_CSC( + g, is_bipartite=graph_bipartite, max_in_degree=max_in_degree + ) - feat = self.feat_drop(feat) - h = ops_torch.operators.agg_concat_n2n(feat, _graph, self.aggr)[ + if feat_bipartite: + feat = (self.feat_drop(feat[0]), self.feat_drop(feat[1])) + else: + feat = self.feat_drop(feat) + + if self.aggregator_type == "pool": + if feat_bipartite: + feat = (self.pre_lin(feat[0]).relu(), feat[1]) + else: + feat = (self.pre_lin(feat).relu(), feat[: g.num_dst_nodes()]) + # force ctx.needs_input_grad=True in cugraph-ops autograd function + feat[0].requires_grad_() + feat[1].requires_grad_() + + out = ops_torch.operators.agg_concat_n2n(feat, _graph, self._aggr)[ : g.num_dst_nodes() ] - h = self.linear(h) - return h + if self.aggregator_type == "gcn": + out = out[:, : self.in_feats_src] + + out = self.lin(out) + + return out diff --git a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py index 5cd5fbbaebe..8481b9ee265 100644 --- a/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py +++ b/python/cugraph-dgl/cugraph_dgl/nn/conv/transformerconv.py @@ -10,9 +10,10 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + from typing import Optional, Tuple, Union -from cugraph_dgl.nn.conv.base import BaseConv +from cugraph_dgl.nn.conv.base import BaseConv, SparseGraph from cugraph.utilities.utils import import_optional dgl = import_optional("dgl") @@ -114,7 +115,7 @@ def reset_parameters(self): def forward( self, - g: dgl.DGLHeteroGraph, + g: Union[SparseGraph, dgl.DGLHeteroGraph], nfeat: Union[torch.Tensor, Tuple[torch.Tensor, torch.Tensor]], efeat: Optional[torch.Tensor] = None, ) -> torch.Tensor: @@ -130,17 +131,12 @@ def forward( efeat: torch.Tensor, optional Edge feature tensor. Default: ``None``. """ - offsets, indices, _ = g.adj_tensors("csc") - graph = ops_torch.CSC( - offsets=offsets, - indices=indices, - num_src_nodes=g.num_src_nodes(), - is_bipartite=True, - ) - - if isinstance(nfeat, torch.Tensor): + feat_bipartite = isinstance(nfeat, (list, tuple)) + if not feat_bipartite: nfeat = (nfeat, nfeat) + _graph = self.get_cugraph_ops_CSC(g, is_bipartite=True) + query = self.lin_query(nfeat[1][: g.num_dst_nodes()]) key = self.lin_key(nfeat[0]) value = self.lin_value(nfeat[0]) @@ -157,7 +153,7 @@ def forward( key_emb=key, query_emb=query, value_emb=value, - graph=graph, + graph=_graph, num_heads=self.num_heads, concat_heads=self.concat, edge_emb=efeat, diff --git a/python/cugraph-dgl/tests/conftest.py b/python/cugraph-dgl/tests/conftest.py index 6f8690d1140..a3863ed81fa 100644 --- a/python/cugraph-dgl/tests/conftest.py +++ b/python/cugraph-dgl/tests/conftest.py @@ -40,16 +40,19 @@ class SparseGraphData1: nnz = 6 src_ids = torch.IntTensor([0, 1, 2, 3, 2, 5]).cuda() dst_ids = torch.IntTensor([1, 2, 3, 4, 0, 3]).cuda() + values = torch.IntTensor([10, 20, 30, 40, 50, 60]).cuda() # CSR src_ids_sorted_by_src = torch.IntTensor([0, 1, 2, 2, 3, 5]).cuda() dst_ids_sorted_by_src = torch.IntTensor([1, 2, 0, 3, 4, 3]).cuda() csrc_ids = torch.IntTensor([0, 1, 2, 4, 5, 5, 6]).cuda() + values_csr = torch.IntTensor([10, 20, 50, 30, 40, 60]).cuda() # CSC src_ids_sorted_by_dst = torch.IntTensor([2, 0, 1, 5, 2, 3]).cuda() dst_ids_sorted_by_dst = torch.IntTensor([0, 1, 2, 3, 3, 4]).cuda() cdst_ids = torch.IntTensor([0, 1, 2, 3, 5, 6]).cuda() + values_csc = torch.IntTensor([50, 10, 20, 60, 30, 40]).cuda() @pytest.fixture diff --git a/python/cugraph-dgl/tests/nn/test_gatconv.py b/python/cugraph-dgl/tests/nn/test_gatconv.py index 7ed65645a28..ef3047dc2cd 100644 --- a/python/cugraph-dgl/tests/nn/test_gatconv.py +++ b/python/cugraph-dgl/tests/nn/test_gatconv.py @@ -10,69 +10,84 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# pylint: disable=too-many-arguments, too-many-locals import pytest -try: - import cugraph_dgl -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional +from cugraph_dgl.nn.conv.base import SparseGraph +from cugraph_dgl.nn import GATConv as CuGraphGATConv from .common import create_graph1 -torch = import_optional("torch") -dgl = import_optional("dgl") +dgl = pytest.importorskip("dgl", reason="DGL not available") +torch = pytest.importorskip("torch", reason="PyTorch not available") + +ATOL = 1e-6 @pytest.mark.parametrize("bipartite", [False, True]) @pytest.mark.parametrize("idtype_int", [False, True]) @pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("num_heads", [1, 2, 7]) +@pytest.mark.parametrize("residual", [False, True]) @pytest.mark.parametrize("to_block", [False, True]) -def test_gatconv_equality(bipartite, idtype_int, max_in_degree, num_heads, to_block): - GATConv = dgl.nn.GATConv - CuGraphGATConv = cugraph_dgl.nn.GATConv - device = "cuda" - g = create_graph1().to(device) +@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) +def test_gatconv_equality( + bipartite, idtype_int, max_in_degree, num_heads, residual, to_block, sparse_format +): + from dgl.nn.pytorch import GATConv + + g = create_graph1().to("cuda") if idtype_int: g = g.int() - if to_block: g = dgl.to_block(g) + size = (g.num_src_nodes(), g.num_dst_nodes()) + if bipartite: in_feats = (10, 3) nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0], device=device), - torch.rand(g.num_dst_nodes(), in_feats[1], device=device), + torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), + torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), ) else: in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats, device=device) + nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() out_feats = 2 + if sparse_format == "coo": + sg = SparseGraph( + size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" + ) + elif sparse_format == "csc": + offsets, indices, _ = g.adj_tensors("csc") + sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") + args = (in_feats, out_feats, num_heads) - kwargs = {"bias": False} + kwargs = {"bias": False, "allow_zero_in_degree": True} - conv1 = GATConv(*args, **kwargs, allow_zero_in_degree=True).to(device) + conv1 = GATConv(*args, **kwargs).cuda() out1 = conv1(g, nfeat) - conv2 = CuGraphGATConv(*args, **kwargs).to(device) + conv2 = CuGraphGATConv(*args, **kwargs).cuda() dim = num_heads * out_feats with torch.no_grad(): conv2.attn_weights.data[:dim] = conv1.attn_l.data.flatten() conv2.attn_weights.data[dim:] = conv1.attn_r.data.flatten() if bipartite: - conv2.fc_src.weight.data = conv1.fc_src.weight.data.detach().clone() - conv2.fc_dst.weight.data = conv1.fc_dst.weight.data.detach().clone() + conv2.lin_src.weight.data = conv1.fc_src.weight.data.detach().clone() + conv2.lin_dst.weight.data = conv1.fc_dst.weight.data.detach().clone() else: - conv2.fc.weight.data = conv1.fc.weight.data.detach().clone() - out2 = conv2(g, nfeat, max_in_degree=max_in_degree) + conv2.lin.weight.data = conv1.fc.weight.data.detach().clone() + if residual and conv2.residual: + conv2.lin_res.weight.data = conv1.fc_res.weight.data.detach().clone() - assert torch.allclose(out1, out2, atol=1e-6) + if sparse_format is not None: + out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) + else: + out2 = conv2(g, nfeat, max_in_degree=max_in_degree) + + assert torch.allclose(out1, out2, atol=ATOL) grad_out1 = torch.rand_like(out1) grad_out2 = grad_out1.clone().detach() @@ -81,18 +96,18 @@ def test_gatconv_equality(bipartite, idtype_int, max_in_degree, num_heads, to_bl if bipartite: assert torch.allclose( - conv1.fc_src.weight.grad, conv2.fc_src.weight.grad, atol=1e-6 + conv1.fc_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL ) assert torch.allclose( - conv1.fc_dst.weight.grad, conv2.fc_dst.weight.grad, atol=1e-6 + conv1.fc_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL ) else: - assert torch.allclose(conv1.fc.weight.grad, conv2.fc.weight.grad, atol=1e-6) + assert torch.allclose(conv1.fc.weight.grad, conv2.lin.weight.grad, atol=ATOL) assert torch.allclose( torch.cat((conv1.attn_l.grad, conv1.attn_r.grad), dim=0), conv2.attn_weights.grad.view(2, num_heads, out_feats), - atol=1e-6, + atol=ATOL, ) @@ -106,10 +121,7 @@ def test_gatconv_equality(bipartite, idtype_int, max_in_degree, num_heads, to_bl def test_gatconv_edge_feats( bias, bipartite, concat, max_in_degree, num_heads, to_block, use_edge_feats ): - from cugraph_dgl.nn import GATConv - - device = "cuda" - g = create_graph1().to(device) + g = create_graph1().to("cuda") if to_block: g = dgl.to_block(g) @@ -117,24 +129,30 @@ def test_gatconv_edge_feats( if bipartite: in_feats = (10, 3) nfeat = ( - torch.rand(g.num_src_nodes(), in_feats[0], device=device), - torch.rand(g.num_dst_nodes(), in_feats[1], device=device), + torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), + torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), ) else: in_feats = 10 - nfeat = torch.rand(g.num_src_nodes(), in_feats, device=device) + nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() out_feats = 2 if use_edge_feats: edge_feats = 3 - efeat = torch.rand(g.num_edges(), edge_feats, device=device) + efeat = torch.rand(g.num_edges(), edge_feats).cuda() else: edge_feats = None efeat = None - conv = GATConv( - in_feats, out_feats, num_heads, concat=concat, edge_feats=edge_feats, bias=bias - ).to(device) + conv = CuGraphGATConv( + in_feats, + out_feats, + num_heads, + concat=concat, + edge_feats=edge_feats, + bias=bias, + allow_zero_in_degree=True, + ).cuda() out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) grad_out = torch.rand_like(out) diff --git a/python/cugraph-dgl/tests/nn/test_gatv2conv.py b/python/cugraph-dgl/tests/nn/test_gatv2conv.py new file mode 100644 index 00000000000..cc46a6e4b39 --- /dev/null +++ b/python/cugraph-dgl/tests/nn/test_gatv2conv.py @@ -0,0 +1,147 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from cugraph_dgl.nn.conv.base import SparseGraph +from cugraph_dgl.nn import GATv2Conv as CuGraphGATv2Conv +from .common import create_graph1 + +dgl = pytest.importorskip("dgl", reason="DGL not available") +torch = pytest.importorskip("torch", reason="PyTorch not available") + +ATOL = 1e-6 + + +@pytest.mark.parametrize("bipartite", [False, True]) +@pytest.mark.parametrize("idtype_int", [False, True]) +@pytest.mark.parametrize("max_in_degree", [None, 8]) +@pytest.mark.parametrize("num_heads", [1, 2, 7]) +@pytest.mark.parametrize("residual", [False, True]) +@pytest.mark.parametrize("to_block", [False, True]) +@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) +def test_gatv2conv_equality( + bipartite, idtype_int, max_in_degree, num_heads, residual, to_block, sparse_format +): + from dgl.nn.pytorch import GATv2Conv + + g = create_graph1().to("cuda") + + if idtype_int: + g = g.int() + if to_block: + g = dgl.to_block(g) + + size = (g.num_src_nodes(), g.num_dst_nodes()) + + if bipartite: + in_feats = (10, 3) + nfeat = ( + torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), + torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), + ) + else: + in_feats = 10 + nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() + out_feats = 2 + + if sparse_format == "coo": + sg = SparseGraph( + size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" + ) + elif sparse_format == "csc": + offsets, indices, _ = g.adj_tensors("csc") + sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") + + args = (in_feats, out_feats, num_heads) + kwargs = {"bias": False, "allow_zero_in_degree": True} + + conv1 = GATv2Conv(*args, **kwargs).cuda() + out1 = conv1(g, nfeat) + + conv2 = CuGraphGATv2Conv(*args, **kwargs).cuda() + with torch.no_grad(): + conv2.attn.data = conv1.attn.data.flatten() + conv2.lin_src.weight.data = conv1.fc_src.weight.data.detach().clone() + conv2.lin_dst.weight.data = conv1.fc_dst.weight.data.detach().clone() + if residual and conv2.residual: + conv2.lin_res.weight.data = conv1.fc_res.weight.data.detach().clone() + + if sparse_format is not None: + out2 = conv2(sg, nfeat, max_in_degree=max_in_degree) + else: + out2 = conv2(g, nfeat, max_in_degree=max_in_degree) + + assert torch.allclose(out1, out2, atol=ATOL) + + grad_out1 = torch.rand_like(out1) + grad_out2 = grad_out1.clone().detach() + out1.backward(grad_out1) + out2.backward(grad_out2) + + assert torch.allclose( + conv1.fc_src.weight.grad, conv2.lin_src.weight.grad, atol=ATOL + ) + assert torch.allclose( + conv1.fc_dst.weight.grad, conv2.lin_dst.weight.grad, atol=ATOL + ) + + assert torch.allclose(conv1.attn.grad, conv1.attn.grad, atol=ATOL) + + +@pytest.mark.parametrize("bias", [False, True]) +@pytest.mark.parametrize("bipartite", [False, True]) +@pytest.mark.parametrize("concat", [False, True]) +@pytest.mark.parametrize("max_in_degree", [None, 8, 800]) +@pytest.mark.parametrize("num_heads", [1, 2, 7]) +@pytest.mark.parametrize("to_block", [False, True]) +@pytest.mark.parametrize("use_edge_feats", [False, True]) +def test_gatv2conv_edge_feats( + bias, bipartite, concat, max_in_degree, num_heads, to_block, use_edge_feats +): + g = create_graph1().to("cuda") + + if to_block: + g = dgl.to_block(g) + + if bipartite: + in_feats = (10, 3) + nfeat = ( + torch.rand(g.num_src_nodes(), in_feats[0]).cuda(), + torch.rand(g.num_dst_nodes(), in_feats[1]).cuda(), + ) + else: + in_feats = 10 + nfeat = torch.rand(g.num_src_nodes(), in_feats).cuda() + out_feats = 2 + + if use_edge_feats: + edge_feats = 3 + efeat = torch.rand(g.num_edges(), edge_feats).cuda() + else: + edge_feats = None + efeat = None + + conv = CuGraphGATv2Conv( + in_feats, + out_feats, + num_heads, + concat=concat, + edge_feats=edge_feats, + bias=bias, + allow_zero_in_degree=True, + ).cuda() + out = conv(g, nfeat, efeat=efeat, max_in_degree=max_in_degree) + + grad_out = torch.rand_like(out) + out.backward(grad_out) diff --git a/python/cugraph-dgl/tests/nn/test_relgraphconv.py b/python/cugraph-dgl/tests/nn/test_relgraphconv.py index d2ae6a23978..901f9ba1433 100644 --- a/python/cugraph-dgl/tests/nn/test_relgraphconv.py +++ b/python/cugraph-dgl/tests/nn/test_relgraphconv.py @@ -10,20 +10,17 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# pylint: disable=too-many-arguments, too-many-locals import pytest -try: - import cugraph_dgl -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional +from cugraph_dgl.nn.conv.base import SparseGraph +from cugraph_dgl.nn import RelGraphConv as CuGraphRelGraphConv from .common import create_graph1 -torch = import_optional("torch") -dgl = import_optional("dgl") +dgl = pytest.importorskip("dgl", reason="DGL not available") +torch = pytest.importorskip("torch", reason="PyTorch not available") + +ATOL = 1e-6 @pytest.mark.parametrize("idtype_int", [False, True]) @@ -32,12 +29,17 @@ @pytest.mark.parametrize("regularizer", [None, "basis"]) @pytest.mark.parametrize("self_loop", [False, True]) @pytest.mark.parametrize("to_block", [False, True]) +@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) def test_relgraphconv_equality( - idtype_int, max_in_degree, num_bases, regularizer, self_loop, to_block + idtype_int, + max_in_degree, + num_bases, + regularizer, + self_loop, + to_block, + sparse_format, ): - RelGraphConv = dgl.nn.RelGraphConv - CuGraphRelGraphConv = cugraph_dgl.nn.RelGraphConv - device = "cuda" + from dgl.nn.pytorch import RelGraphConv in_feat, out_feat, num_rels = 10, 2, 3 args = (in_feat, out_feat, num_rels) @@ -47,34 +49,57 @@ def test_relgraphconv_equality( "bias": False, "self_loop": self_loop, } - g = create_graph1().to(device) - g.edata[dgl.ETYPE] = torch.randint(num_rels, (g.num_edges(),)).to(device) + g = create_graph1().to("cuda") + g.edata[dgl.ETYPE] = torch.randint(num_rels, (g.num_edges(),)).cuda() + if idtype_int: g = g.int() if to_block: g = dgl.to_block(g) - feat = torch.rand(g.num_src_nodes(), in_feat).to(device) + + size = (g.num_src_nodes(), g.num_dst_nodes()) + feat = torch.rand(g.num_src_nodes(), in_feat).cuda() + + if sparse_format == "coo": + sg = SparseGraph( + size=size, + src_ids=g.edges()[0], + dst_ids=g.edges()[1], + values=g.edata[dgl.ETYPE], + formats="csc", + ) + elif sparse_format == "csc": + offsets, indices, perm = g.adj_tensors("csc") + etypes = g.edata[dgl.ETYPE][perm] + sg = SparseGraph( + size=size, src_ids=indices, cdst_ids=offsets, values=etypes, formats="csc" + ) torch.manual_seed(0) - conv1 = RelGraphConv(*args, **kwargs).to(device) + conv1 = RelGraphConv(*args, **kwargs).cuda() torch.manual_seed(0) kwargs["apply_norm"] = False - conv2 = CuGraphRelGraphConv(*args, **kwargs).to(device) + conv2 = CuGraphRelGraphConv(*args, **kwargs).cuda() out1 = conv1(g, feat, g.edata[dgl.ETYPE]) - out2 = conv2(g, feat, g.edata[dgl.ETYPE], max_in_degree=max_in_degree) - assert torch.allclose(out1, out2, atol=1e-06) + + if sparse_format is not None: + out2 = conv2(sg, feat, sg.values(), max_in_degree=max_in_degree) + else: + out2 = conv2(g, feat, g.edata[dgl.ETYPE], max_in_degree=max_in_degree) + + assert torch.allclose(out1, out2, atol=ATOL) grad_out = torch.rand_like(out1) out1.backward(grad_out) out2.backward(grad_out) end = -1 if self_loop else None - assert torch.allclose(conv1.linear_r.W.grad, conv2.W.grad[:end], atol=1e-6) + assert torch.allclose(conv1.linear_r.W.grad, conv2.W.grad[:end], atol=ATOL) if self_loop: - assert torch.allclose(conv1.loop_weight.grad, conv2.W.grad[-1], atol=1e-6) + assert torch.allclose(conv1.loop_weight.grad, conv2.W.grad[-1], atol=ATOL) if regularizer is not None: - assert torch.allclose(conv1.linear_r.coeff.grad, conv2.coeff.grad, atol=1e-6) + assert torch.allclose(conv1.linear_r.coeff.grad, conv2.coeff.grad, atol=ATOL) diff --git a/python/cugraph-dgl/tests/nn/test_sageconv.py b/python/cugraph-dgl/tests/nn/test_sageconv.py index 447bbe49460..e2acf9e6596 100644 --- a/python/cugraph-dgl/tests/nn/test_sageconv.py +++ b/python/cugraph-dgl/tests/nn/test_sageconv.py @@ -10,31 +10,33 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. -# pylint: disable=too-many-arguments, too-many-locals import pytest -from cugraph.utilities.utils import import_optional from cugraph_dgl.nn.conv.base import SparseGraph from cugraph_dgl.nn import SAGEConv as CuGraphSAGEConv from .common import create_graph1 -torch = import_optional("torch") -dgl = import_optional("dgl") +dgl = pytest.importorskip("dgl", reason="DGL not available") +torch = pytest.importorskip("torch", reason="PyTorch not available") +ATOL = 1e-6 + +@pytest.mark.parametrize("aggr", ["mean", "pool"]) @pytest.mark.parametrize("bias", [False, True]) +@pytest.mark.parametrize("bipartite", [False, True]) @pytest.mark.parametrize("idtype_int", [False, True]) @pytest.mark.parametrize("max_in_degree", [None, 8]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) -def test_SAGEConv_equality(bias, idtype_int, max_in_degree, to_block, sparse_format): - SAGEConv = dgl.nn.SAGEConv - device = "cuda" +def test_sageconv_equality( + aggr, bias, bipartite, idtype_int, max_in_degree, to_block, sparse_format +): + from dgl.nn.pytorch import SAGEConv - in_feat, out_feat = 5, 2 - kwargs = {"aggregator_type": "mean", "bias": bias} - g = create_graph1().to(device) + kwargs = {"aggregator_type": aggr, "bias": bias} + g = create_graph1().to("cuda") if idtype_int: g = g.int() @@ -42,7 +44,17 @@ def test_SAGEConv_equality(bias, idtype_int, max_in_degree, to_block, sparse_for g = dgl.to_block(g) size = (g.num_src_nodes(), g.num_dst_nodes()) - feat = torch.rand(g.num_src_nodes(), in_feat).to(device) + + if bipartite: + in_feats = (5, 3) + feat = ( + torch.rand(size[0], in_feats[0], requires_grad=True).cuda(), + torch.rand(size[1], in_feats[1], requires_grad=True).cuda(), + ) + else: + in_feats = 5 + feat = torch.rand(size[0], in_feats).cuda() + out_feats = 2 if sparse_format == "coo": sg = SparseGraph( @@ -52,39 +64,38 @@ def test_SAGEConv_equality(bias, idtype_int, max_in_degree, to_block, sparse_for offsets, indices, _ = g.adj_tensors("csc") sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") - torch.manual_seed(0) - conv1 = SAGEConv(in_feat, out_feat, **kwargs).to(device) - - torch.manual_seed(0) - conv2 = CuGraphSAGEConv(in_feat, out_feat, **kwargs).to(device) + conv1 = SAGEConv(in_feats, out_feats, **kwargs).cuda() + conv2 = CuGraphSAGEConv(in_feats, out_feats, **kwargs).cuda() + in_feats_src = conv2.in_feats_src with torch.no_grad(): - conv2.linear.weight.data[:, :in_feat] = conv1.fc_neigh.weight.data - conv2.linear.weight.data[:, in_feat:] = conv1.fc_self.weight.data + conv2.lin.weight.data[:, :in_feats_src] = conv1.fc_neigh.weight.data + conv2.lin.weight.data[:, in_feats_src:] = conv1.fc_self.weight.data if bias: - conv2.linear.bias.data[:] = conv1.fc_self.bias.data + conv2.lin.bias.data[:] = conv1.fc_self.bias.data + if aggr == "pool": + conv2.pre_lin.weight.data[:] = conv1.fc_pool.weight.data + conv2.pre_lin.bias.data[:] = conv1.fc_pool.bias.data out1 = conv1(g, feat) if sparse_format is not None: out2 = conv2(sg, feat, max_in_degree=max_in_degree) else: out2 = conv2(g, feat, max_in_degree=max_in_degree) - assert torch.allclose(out1, out2, atol=1e-06) + assert torch.allclose(out1, out2, atol=ATOL) grad_out = torch.rand_like(out1) out1.backward(grad_out) out2.backward(grad_out) assert torch.allclose( conv1.fc_neigh.weight.grad, - conv2.linear.weight.grad[:, :in_feat], - atol=1e-6, + conv2.lin.weight.grad[:, :in_feats_src], + atol=ATOL, ) assert torch.allclose( conv1.fc_self.weight.grad, - conv2.linear.weight.grad[:, in_feat:], - atol=1e-6, + conv2.lin.weight.grad[:, in_feats_src:], + atol=ATOL, ) if bias: - assert torch.allclose( - conv1.fc_self.bias.grad, conv2.linear.bias.grad, atol=1e-6 - ) + assert torch.allclose(conv1.fc_self.bias.grad, conv2.lin.bias.grad, atol=ATOL) diff --git a/python/cugraph-dgl/tests/nn/test_sparsegraph.py b/python/cugraph-dgl/tests/nn/test_sparsegraph.py index 3fb01575d66..09c0df202ff 100644 --- a/python/cugraph-dgl/tests/nn/test_sparsegraph.py +++ b/python/cugraph-dgl/tests/nn/test_sparsegraph.py @@ -19,32 +19,42 @@ def test_coo2csc(sparse_graph_1): data = sparse_graph_1 - values = torch.ones(data.nnz).cuda() + g = SparseGraph( - size=data.size, src_ids=data.src_ids, dst_ids=data.dst_ids, formats="csc" + size=data.size, + src_ids=data.src_ids, + dst_ids=data.dst_ids, + values=data.values, + formats=["csc"], ) - cdst_ids, src_ids = g.csc() + cdst_ids, src_ids, values = g.csc() new = torch.sparse_csc_tensor(cdst_ids, src_ids, values).cuda() old = torch.sparse_coo_tensor( - torch.vstack((data.src_ids, data.dst_ids)), values + torch.vstack((data.src_ids, data.dst_ids)), data.values ).cuda() torch.allclose(new.to_dense(), old.to_dense()) -def test_csc2coo(sparse_graph_1): +def test_csc_input(sparse_graph_1): data = sparse_graph_1 - values = torch.ones(data.nnz).cuda() + g = SparseGraph( size=data.size, src_ids=data.src_ids_sorted_by_dst, cdst_ids=data.cdst_ids, - formats="coo", + values=data.values_csc, + formats=["coo", "csc", "csr"], ) - src_ids, dst_ids = g.coo() + src_ids, dst_ids, values = g.coo() new = torch.sparse_coo_tensor(torch.vstack((src_ids, dst_ids)), values).cuda() old = torch.sparse_csc_tensor( - data.cdst_ids, data.src_ids_sorted_by_dst, values + data.cdst_ids, data.src_ids_sorted_by_dst, data.values_csc ).cuda() torch.allclose(new.to_dense(), old.to_dense()) + + csrc_ids, dst_ids, values = g.csr() + + new = torch.sparse_csr_tensor(csrc_ids, dst_ids, values).cuda() + torch.allclose(new.to_dense(), old.to_dense()) diff --git a/python/cugraph-dgl/tests/nn/test_transformerconv.py b/python/cugraph-dgl/tests/nn/test_transformerconv.py index 00476b9f0bb..b2b69cb35ab 100644 --- a/python/cugraph-dgl/tests/nn/test_transformerconv.py +++ b/python/cugraph-dgl/tests/nn/test_transformerconv.py @@ -13,16 +13,14 @@ import pytest -try: - from cugraph_dgl.nn import TransformerConv -except ModuleNotFoundError: - pytest.skip("cugraph_dgl not available", allow_module_level=True) - -from cugraph.utilities.utils import import_optional +from cugraph_dgl.nn.conv.base import SparseGraph +from cugraph_dgl.nn import TransformerConv from .common import create_graph1 -torch = import_optional("torch") -dgl = import_optional("dgl") +dgl = pytest.importorskip("dgl", reason="DGL not available") +torch = pytest.importorskip("torch", reason="PyTorch not available") + +ATOL = 1e-6 @pytest.mark.parametrize("beta", [False, True]) @@ -32,8 +30,16 @@ @pytest.mark.parametrize("num_heads", [1, 2, 3, 4]) @pytest.mark.parametrize("to_block", [False, True]) @pytest.mark.parametrize("use_edge_feats", [False, True]) -def test_TransformerConv( - beta, bipartite_node_feats, concat, idtype_int, num_heads, to_block, use_edge_feats +@pytest.mark.parametrize("sparse_format", ["coo", "csc", None]) +def test_transformerconv( + beta, + bipartite_node_feats, + concat, + idtype_int, + num_heads, + to_block, + use_edge_feats, + sparse_format, ): device = "cuda" g = create_graph1().to(device) @@ -44,6 +50,15 @@ def test_TransformerConv( if to_block: g = dgl.to_block(g) + size = (g.num_src_nodes(), g.num_dst_nodes()) + if sparse_format == "coo": + sg = SparseGraph( + size=size, src_ids=g.edges()[0], dst_ids=g.edges()[1], formats="csc" + ) + elif sparse_format == "csc": + offsets, indices, _ = g.adj_tensors("csc") + sg = SparseGraph(size=size, src_ids=indices, cdst_ids=offsets, formats="csc") + if bipartite_node_feats: in_node_feats = (5, 3) nfeat = ( @@ -71,6 +86,10 @@ def test_TransformerConv( edge_feats=edge_feats, ).to(device) - out = conv(g, nfeat, efeat) + if sparse_format is not None: + out = conv(sg, nfeat, efeat) + else: + out = conv(g, nfeat, efeat) + grad_out = torch.rand_like(out) out.backward(grad_out) diff --git a/python/cugraph-dgl/tests/test_dataset.py b/python/cugraph-dgl/tests/test_dataset.py index 69d50261e55..5db443dc0d8 100644 --- a/python/cugraph-dgl/tests/test_dataset.py +++ b/python/cugraph-dgl/tests/test_dataset.py @@ -123,6 +123,6 @@ def test_homogeneous_sampled_graphs_from_dataframe(return_type, seed_node): assert dgl_block.num_src_nodes() == cugraph_dgl_graph.num_src_nodes() assert dgl_block.num_dst_nodes() == cugraph_dgl_graph.num_dst_nodes() dgl_offsets, dgl_indices, _ = dgl_block.adj_tensors("csc") - cugraph_offsets, cugraph_indices = cugraph_dgl_graph.csc() + cugraph_offsets, cugraph_indices, _ = cugraph_dgl_graph.csc() assert torch.equal(dgl_offsets.to("cpu"), cugraph_offsets.to("cpu")) assert torch.equal(dgl_indices.to("cpu"), cugraph_indices.to("cpu")) diff --git a/python/cugraph-dgl/tests/test_from_dgl_hetrograph.py b/python/cugraph-dgl/tests/test_from_dgl_heterograph.py similarity index 100% rename from python/cugraph-dgl/tests/test_from_dgl_hetrograph.py rename to python/cugraph-dgl/tests/test_from_dgl_heterograph.py