diff --git a/ci/build_wheel_cugraph.sh b/ci/build_wheel_cugraph.sh index 5b5061f67c2..0a722c88c3e 100755 --- a/ci/build_wheel_cugraph.sh +++ b/ci/build_wheel_cugraph.sh @@ -12,6 +12,6 @@ RAPIDS_PY_CUDA_SUFFIX="$(rapids-wheel-ctk-name-gen ${RAPIDS_CUDA_VERSION})" RAPIDS_PY_WHEEL_NAME=pylibcugraph_${RAPIDS_PY_CUDA_SUFFIX} rapids-download-wheels-from-s3 ./local-pylibcugraph export PIP_FIND_LINKS=$(pwd)/local-pylibcugraph -export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" ./ci/build_wheel.sh cugraph python/cugraph diff --git a/ci/build_wheel_pylibcugraph.sh b/ci/build_wheel_pylibcugraph.sh index 8d365bc250b..9e236c145ce 100755 --- a/ci/build_wheel_pylibcugraph.sh +++ b/ci/build_wheel_pylibcugraph.sh @@ -3,6 +3,6 @@ set -euo pipefail -export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DCUGRAPH_BUILD_WHEELS=ON -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" +export SKBUILD_CONFIGURE_OPTIONS="-DDETECT_CONDA_ENV=OFF -DFIND_CUGRAPH_CPP=OFF -DCPM_cugraph-ops_SOURCE=${GITHUB_WORKSPACE}/cugraph-ops/" ./ci/build_wheel.sh pylibcugraph python/pylibcugraph diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 67bf1c25b09..a772651625d 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -423,7 +423,7 @@ add_library(cugraph_c src/c_api/core_result.cpp src/c_api/extract_ego.cpp src/c_api/k_core.cpp - src/c_api/hierarchical_clustering_result.cpp + src/c_api/hierarchical_clustering_result.cpp src/c_api/induced_subgraph.cpp src/c_api/capi_helper.cu src/c_api/legacy_spectral.cpp diff --git a/cpp/include/cugraph/graph.hpp b/cpp/include/cugraph/graph.hpp index 60b9f1a4054..a723fde24df 100644 --- a/cpp/include/cugraph/graph.hpp +++ b/cpp/include/cugraph/graph.hpp @@ -90,24 +90,25 @@ class graph_t meta, bool do_expensive_check = false); + edge_t number_of_edges() const { return this->number_of_edges_; } + graph_view_t view() const { - std::vector offsets(edge_partition_offsets_.size(), nullptr); - std::vector indices(edge_partition_indices_.size(), nullptr); - auto dcs_nzd_vertices = edge_partition_dcs_nzd_vertices_ - ? std::make_optional>( - (*edge_partition_dcs_nzd_vertices_).size(), nullptr) - : std::nullopt; - auto dcs_nzd_vertex_counts = edge_partition_dcs_nzd_vertex_counts_ - ? std::make_optional>( - (*edge_partition_dcs_nzd_vertex_counts_).size(), vertex_t{0}) - : std::nullopt; + std::vector> offsets(edge_partition_offsets_.size()); + std::vector> indices(edge_partition_indices_.size()); + auto dcs_nzd_vertices = edge_partition_dcs_nzd_vertices_ + ? std::make_optional>>( + (*edge_partition_dcs_nzd_vertices_).size()) + : std::nullopt; for (size_t i = 0; i < offsets.size(); ++i) { - offsets[i] = edge_partition_offsets_[i].data(); - indices[i] = edge_partition_indices_[i].data(); + offsets[i] = raft::device_span(edge_partition_offsets_[i].data(), + edge_partition_offsets_[i].size()); + indices[i] = raft::device_span(edge_partition_indices_[i].data(), + edge_partition_indices_[i].size()); if (dcs_nzd_vertices) { - (*dcs_nzd_vertices)[i] = (*edge_partition_dcs_nzd_vertices_)[i].data(); - (*dcs_nzd_vertex_counts)[i] = (*edge_partition_dcs_nzd_vertex_counts_)[i]; + (*dcs_nzd_vertices)[i] = + raft::device_span((*edge_partition_dcs_nzd_vertices_)[i].data(), + (*edge_partition_dcs_nzd_vertices_)[i].size()); } } @@ -196,15 +197,13 @@ class graph_t( - *(this->handle_ptr()), offsets, indices, dcs_nzd_vertices, - dcs_nzd_vertex_counts, graph_view_meta_t{ this->number_of_vertices(), this->number_of_edges(), - this->graph_properties(), + this->properties_, partition_, edge_partition_segment_offsets_, local_sorted_unique_edge_srcs, @@ -224,7 +223,6 @@ class graph_t>> edge_partition_dcs_nzd_vertices_{ std::nullopt}; - std::optional> edge_partition_dcs_nzd_vertex_counts_{std::nullopt}; partition_t partition_{}; // segment offsets within the vertex partition based on vertex degree @@ -283,16 +281,15 @@ class graph_t meta, bool do_expensive_check = false); + edge_t number_of_edges() const { return this->number_of_edges_; } + graph_view_t view() const { return graph_view_t( - *(this->handle_ptr()), - offsets_.data(), - indices_.data(), - graph_view_meta_t{this->number_of_vertices(), - this->number_of_edges(), - this->graph_properties(), - segment_offsets_}); + raft::device_span(offsets_.data(), offsets_.size()), + raft::device_span(indices_.data(), indices_.size()), + graph_view_meta_t{ + this->number_of_vertices(), this->number_of_edges(), this->properties_, segment_offsets_}); } private: diff --git a/cpp/include/cugraph/graph_view.hpp b/cpp/include/cugraph/graph_view.hpp index d79d4635c54..53c66c6483e 100644 --- a/cpp/include/cugraph/graph_view.hpp +++ b/cpp/include/cugraph/graph_view.hpp @@ -258,21 +258,12 @@ class graph_base_t { public: graph_base_t() = default; - graph_base_t(raft::handle_t const& handle, - vertex_t number_of_vertices, - edge_t number_of_edges, - graph_properties_t properties) - : handle_ptr_(&handle), - number_of_vertices_(number_of_vertices), + graph_base_t(vertex_t number_of_vertices, edge_t number_of_edges, graph_properties_t properties) + : number_of_vertices_(number_of_vertices), number_of_edges_(number_of_edges), properties_(properties){}; vertex_t number_of_vertices() const { return number_of_vertices_; } - edge_t number_of_edges() const - { - CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); - return number_of_edges_; - } template std::enable_if_t::value, bool> is_valid_vertex(vertex_type v) const @@ -289,33 +280,12 @@ class graph_base_t { bool is_symmetric() const { return properties_.is_symmetric; } bool is_multigraph() const { return properties_.is_multigraph; } - void attach_edge_mask(edge_property_view_t edge_mask_view) - { - edge_mask_view_ = edge_mask_view; - } - - void clear_edge_mask() { edge_mask_view_ = std::nullopt; } - - bool has_edge_mask() const { return edge_mask_view_.has_value(); } - - std::optional> edge_mask_view() const - { - return edge_mask_view_; - } - protected: - raft::handle_t const* handle_ptr() const { return handle_ptr_; }; - graph_properties_t graph_properties() const { return properties_; } - - private: - raft::handle_t const* handle_ptr_{nullptr}; - - vertex_t number_of_vertices_{0}; edge_t number_of_edges_{0}; - graph_properties_t properties_{}; - std::optional> edge_mask_view_{std::nullopt}; + private: + vertex_t number_of_vertices_{0}; }; } // namespace detail @@ -405,11 +375,10 @@ class graph_view_t const& edge_partition_offsets, - std::vector const& edge_partition_indices, - std::optional> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, + graph_view_t(std::vector> const& edge_partition_offsets, + std::vector> const& edge_partition_indices, + std::optional>> const& + edge_partition_dcs_nzd_vertices, graph_view_meta_t meta); std::vector vertex_partition_range_offsets() const @@ -624,25 +593,16 @@ class graph_view_tlocal_edge_partition_src_value_start_offset(partition_idx); } std::optional major_hypersparse_first{std::nullopt}; - vertex_t offset_size = (major_range_last - major_range_first) + 1; if (this->use_dcs()) { major_hypersparse_first = major_range_first + (*(this->local_edge_partition_segment_offsets( partition_idx)))[detail::num_sparse_segments_per_vertex_partition]; - offset_size = ((*major_hypersparse_first) - major_range_first) + - (*edge_partition_dcs_nzd_vertex_counts_)[partition_idx] + 1; } return edge_partition_view_t( - raft::device_span(edge_partition_offsets_[partition_idx], - edge_partition_offsets_[partition_idx] + offset_size), - raft::device_span( - edge_partition_indices_[partition_idx], - edge_partition_indices_[partition_idx] + edge_partition_number_of_edges_[partition_idx]), + edge_partition_offsets_[partition_idx], + edge_partition_indices_[partition_idx], edge_partition_dcs_nzd_vertices_ - ? std::make_optional>( - (*edge_partition_dcs_nzd_vertices_)[partition_idx], - (*edge_partition_dcs_nzd_vertices_)[partition_idx] + - (*edge_partition_dcs_nzd_vertex_counts_)[partition_idx]) + ? std::make_optional((*edge_partition_dcs_nzd_vertices_)[partition_idx]) : std::nullopt, major_hypersparse_first, major_range_first, @@ -652,6 +612,16 @@ class graph_view_thas_edge_mask()), "unimplemented."); + return this->number_of_edges_; + } + + edge_t compute_number_of_edges(raft::handle_t const& handle) const; + rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; @@ -751,15 +721,26 @@ class graph_view_t edge_mask_view) + { + edge_mask_view_ = edge_mask_view; + } + + void clear_edge_mask() { edge_mask_view_ = std::nullopt; } + + bool has_edge_mask() const { return edge_mask_view_.has_value(); } + + std::optional> edge_mask_view() const + { + return edge_mask_view_; + } + private: - std::vector edge_partition_offsets_{}; - std::vector edge_partition_indices_{}; + std::vector> edge_partition_offsets_{}; + std::vector> edge_partition_indices_{}; // relevant only if we use the CSR + DCSR (or CSC + DCSC) hybrid format - std::optional> edge_partition_dcs_nzd_vertices_{}; - std::optional> edge_partition_dcs_nzd_vertex_counts_{}; - - std::vector edge_partition_number_of_edges_{}; + std::optional>> edge_partition_dcs_nzd_vertices_{}; partition_t partition_{}; @@ -796,6 +777,8 @@ class graph_view_t>, std::optional /* dummy */> local_sorted_unique_edge_dst_vertex_partition_offsets_{std::nullopt}; + + std::optional> edge_mask_view_{std::nullopt}; }; // single-GPU version @@ -808,9 +791,8 @@ class graph_view_t offsets, + raft::device_span indices, graph_view_meta_t meta); std::vector vertex_partition_range_offsets() const @@ -924,11 +906,19 @@ class graph_view_t( - raft::device_span(offsets_, offsets_ + (this->number_of_vertices() + 1)), - raft::device_span(indices_, indices_ + this->number_of_edges()), - this->number_of_vertices()); + offsets_, indices_, this->number_of_vertices()); + } + + // FIXME: deprecated, replaced with copmute_number_of_edges (which works with or without edge + // masking) + edge_t number_of_edges() const + { + CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); + return this->number_of_edges_; } + edge_t compute_number_of_edges(raft::handle_t const& handle) const; + rmm::device_uvector compute_in_degrees(raft::handle_t const& handle) const; rmm::device_uvector compute_out_degrees(raft::handle_t const& handle) const; @@ -1016,12 +1006,28 @@ class graph_view_t edge_mask_view) + { + edge_mask_view_ = edge_mask_view; + } + + void clear_edge_mask() { edge_mask_view_ = std::nullopt; } + + bool has_edge_mask() const { return edge_mask_view_.has_value(); } + + std::optional> edge_mask_view() const + { + return edge_mask_view_; + } + private: - edge_t const* offsets_{nullptr}; - vertex_t const* indices_{nullptr}; + raft::device_span offsets_{}; + raft::device_span indices_{}; // segment offsets based on vertex degree, relevant only if vertex IDs are renumbered std::optional> segment_offsets_{std::nullopt}; + + std::optional> edge_mask_view_{std::nullopt}; }; } // namespace cugraph diff --git a/cpp/include/cugraph/utilities/misc_utils.cuh b/cpp/include/cugraph/utilities/misc_utils.cuh index 28e2853727f..04aeac49c9d 100644 --- a/cpp/include/cugraph/utilities/misc_utils.cuh +++ b/cpp/include/cugraph/utilities/misc_utils.cuh @@ -26,6 +26,8 @@ #include #include +#include + #include #include #include @@ -44,7 +46,8 @@ std::tuple, std::vector> compute_offset_aligned_ed { auto search_offset_first = thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{1}), - [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; }); + cuda::proclaim_return_type( + [approx_edge_chunk_size] __device__(auto i) { return i * approx_edge_chunk_size; })); auto num_chunks = (num_edges + approx_edge_chunk_size - 1) / approx_edge_chunk_size; if (num_chunks > 1) { diff --git a/cpp/include/cugraph/utilities/shuffle_comm.cuh b/cpp/include/cugraph/utilities/shuffle_comm.cuh index ab6a54cc1c0..414d9b36992 100644 --- a/cpp/include/cugraph/utilities/shuffle_comm.cuh +++ b/cpp/include/cugraph/utilities/shuffle_comm.cuh @@ -40,6 +40,8 @@ #include #include +#include + #include #include #include @@ -197,12 +199,13 @@ void multi_partition(ValueIterator value_first, value_last, thrust::make_zip_iterator( thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), - [value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) { - auto group_id = value_to_group_id_op(value); - cuda::std::atomic_ref counter(counts[group_id - group_first]); - return thrust::make_tuple(group_id, - counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); - }); + cuda::proclaim_return_type>( + [value_to_group_id_op, group_first, counts = counts.data()] __device__(auto value) { + auto group_id = value_to_group_id_op(value); + cuda::std::atomic_ref counter(counts[group_id - group_first]); + return thrust::make_tuple(group_id, + counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); + })); rmm::device_uvector displacements(num_groups, stream_view); thrust::exclusive_scan( @@ -245,17 +248,19 @@ void multi_partition(KeyIterator key_first, rmm::device_uvector group_ids(num_keys, stream_view); rmm::device_uvector intra_partition_offsets(num_keys, stream_view); thrust::fill(rmm::exec_policy(stream_view), counts.begin(), counts.end(), size_t{0}); - thrust::transform(rmm::exec_policy(stream_view), - key_first, - key_last, - thrust::make_zip_iterator( - thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), - [key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) { - auto group_id = key_to_group_id_op(key); - cuda::std::atomic_ref counter(counts[group_id - group_first]); - return thrust::make_tuple( - group_id, counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); - }); + thrust::transform( + rmm::exec_policy(stream_view), + key_first, + key_last, + thrust::make_zip_iterator( + thrust::make_tuple(group_ids.begin(), intra_partition_offsets.begin())), + cuda::proclaim_return_type>( + [key_to_group_id_op, group_first, counts = counts.data()] __device__(auto key) { + auto group_id = key_to_group_id_op(key); + cuda::std::atomic_ref counter(counts[group_id - group_first]); + return thrust::make_tuple(group_id, + counter.fetch_add(size_t{1}, cuda::std::memory_order_relaxed)); + })); rmm::device_uvector displacements(num_groups, stream_view); thrust::exclusive_scan( @@ -761,8 +766,9 @@ rmm::device_uvector groupby_and_count(ValueIterator tx_value_first /* [I stream_view); auto group_id_first = thrust::make_transform_iterator( - tx_value_first, - [value_to_group_id_op] __device__(auto value) { return value_to_group_id_op(value); }); + tx_value_first, cuda::proclaim_return_type([value_to_group_id_op] __device__(auto value) { + return value_to_group_id_op(value); + })); rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); auto rank_count_pair_first = thrust::make_zip_iterator( @@ -795,7 +801,9 @@ rmm::device_uvector groupby_and_count(VertexIterator tx_key_first /* [IN stream_view); auto group_id_first = thrust::make_transform_iterator( - tx_key_first, [key_to_group_id_op] __device__(auto key) { return key_to_group_id_op(key); }); + tx_key_first, cuda::proclaim_return_type([key_to_group_id_op] __device__(auto key) { + return key_to_group_id_op(key); + })); rmm::device_uvector d_tx_dst_ranks(num_groups, stream_view); rmm::device_uvector d_tx_value_counts(d_tx_dst_ranks.size(), stream_view); auto rank_count_pair_first = thrust::make_zip_iterator( diff --git a/cpp/src/c_api/capi_helper.cu b/cpp/src/c_api/capi_helper.cu index 0ee49f87265..f08af4137db 100644 --- a/cpp/src/c_api/capi_helper.cu +++ b/cpp/src/c_api/capi_helper.cu @@ -74,6 +74,104 @@ template void sort_by_key(raft::handle_t const& handle, raft::device_span keys, raft::device_span values); +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights) +{ + rmm::device_uvector sort_indices(edge_srcs.size(), handle.get_stream()); + thrust::tabulate( + handle.get_thrust_policy(), + sort_indices.begin(), + sort_indices.end(), + [offset_lasts = raft::device_span(offsets.begin() + 1, offsets.end()), + source_indices = raft::device_span(source_indices.data(), + source_indices.size())] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + offset_lasts.begin(), + thrust::upper_bound(thrust::seq, offset_lasts.begin(), offset_lasts.end(), i))); + return source_indices[idx]; + }); + source_indices.resize(0, handle.get_stream()); + source_indices.shrink_to_fit(handle.get_stream()); + + auto triplet_first = + thrust::make_zip_iterator(sort_indices.begin(), edge_srcs.begin(), edge_dsts.begin()); + if (edge_weights) { + thrust::sort_by_key(handle.get_thrust_policy(), + triplet_first, + triplet_first + sort_indices.size(), + (*edge_weights).begin()); + } else { + thrust::sort(handle.get_thrust_policy(), triplet_first, triplet_first + sort_indices.size()); + } + + thrust::tabulate( + handle.get_thrust_policy(), + offsets.begin() + 1, + offsets.end(), + [sort_indices = raft::device_span(sort_indices.data(), + sort_indices.size())] __device__(size_t i) { + return static_cast(thrust::distance( + sort_indices.begin(), + thrust::upper_bound(thrust::seq, sort_indices.begin(), sort_indices.end(), i))); + }); + + return std::make_tuple( + std::move(offsets), std::move(edge_srcs), std::move(edge_dsts), std::move(edge_weights)); +} + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + +template std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + } // namespace detail } // namespace c_api } // namespace cugraph diff --git a/cpp/src/c_api/capi_helper.hpp b/cpp/src/c_api/capi_helper.hpp index ce08e8d90d3..56401606477 100644 --- a/cpp/src/c_api/capi_helper.hpp +++ b/cpp/src/c_api/capi_helper.hpp @@ -36,6 +36,18 @@ void sort_by_key(raft::handle_t const& handle, raft::device_span keys, raft::device_span values); +template +std::tuple, + rmm::device_uvector, + rmm::device_uvector, + std::optional>> +reorder_extracted_egonets(raft::handle_t const& handle, + rmm::device_uvector&& source_indices, + rmm::device_uvector&& offsets, + rmm::device_uvector&& edge_srcs, + rmm::device_uvector&& edge_dsts, + std::optional>&& edge_weights); + } // namespace detail } // namespace c_api } // namespace cugraph diff --git a/cpp/src/c_api/extract_ego.cpp b/cpp/src/c_api/extract_ego.cpp index 931d58b5185..cbe07af2e77 100644 --- a/cpp/src/c_api/extract_ego.cpp +++ b/cpp/src/c_api/extract_ego.cpp @@ -17,6 +17,7 @@ #include #include +#include #include #include #include @@ -26,7 +27,10 @@ #include #include #include +#include +#include +#include #include namespace { @@ -91,9 +95,22 @@ struct extract_ego_functor : public cugraph::c_api::abstract_functor { source_vertices.size(), handle_.get_stream()); + std::optional> source_indices{std::nullopt}; + if constexpr (multi_gpu) { - source_vertices = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( - handle_, std::move(source_vertices)); + auto displacements = cugraph::host_scalar_allgather( + handle_.get_comms(), source_vertices.size(), handle_.get_stream()); + std::exclusive_scan( + displacements.begin(), displacements.end(), displacements.begin(), size_t{0}); + source_indices = rmm::device_uvector(source_vertices.size(), handle_.get_stream()); + cugraph::detail::sequence_fill(handle_.get_stream(), + (*source_indices).data(), + (*source_indices).size(), + displacements[handle_.get_comms().get_rank()]); + + std::tie(source_vertices, source_indices) = + cugraph::detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle_, std::move(source_vertices), std::move(*source_indices)); } cugraph::renumber_ext_vertices( @@ -130,6 +147,31 @@ struct extract_ego_functor : public cugraph::c_api::abstract_functor { graph_view.vertex_partition_range_lasts(), do_expensive_check_); + if constexpr (multi_gpu) { + auto recvcounts = cugraph::host_scalar_allgather( + handle_.get_comms(), (*source_indices).size(), handle_.get_stream()); + std::vector displacements(recvcounts.size()); + std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); + rmm::device_uvector allgathered_indices(displacements.back() + recvcounts.back(), + handle_.get_stream()); + cugraph::device_allgatherv(handle_.get_comms(), + (*source_indices).begin(), + allgathered_indices.begin(), + recvcounts, + displacements, + handle_.get_stream()); + source_indices = std::move(allgathered_indices); + + std::tie(edge_offsets, src, dst, wgt) = + cugraph::c_api::detail::reorder_extracted_egonets( + handle_, + std::move(*source_indices), + std::move(edge_offsets), + std::move(src), + std::move(dst), + std::move(wgt)); + } + result_ = new cugraph::c_api::cugraph_induced_subgraph_result_t{ new cugraph::c_api::cugraph_type_erased_device_array_t(src, graph_->vertex_type_), new cugraph::c_api::cugraph_type_erased_device_array_t(dst, graph_->vertex_type_), diff --git a/cpp/src/community/detail/mis_impl.cuh b/cpp/src/community/detail/mis_impl.cuh index bcd71af5a08..2659a982183 100644 --- a/cpp/src/community/detail/mis_impl.cuh +++ b/cpp/src/community/detail/mis_impl.cuh @@ -37,6 +37,8 @@ #include #include +#include + #include namespace cugraph { @@ -78,13 +80,13 @@ rmm::device_uvector maximal_independent_set( thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin()); // Set ranks of zero out-degree vetices to std::numeric_limits::lowest() - thrust::transform_if( - handle.get_thrust_policy(), - out_degrees.begin(), - out_degrees.end(), - ranks.begin(), - [] __device__(auto) { return std::numeric_limits::lowest(); }, - [] __device__(auto deg) { return deg == 0; }); + thrust::transform_if(handle.get_thrust_policy(), + out_degrees.begin(), + out_degrees.end(), + ranks.begin(), + cuda::proclaim_return_type( + [] __device__(auto) { return std::numeric_limits::lowest(); }), + [] __device__(auto deg) { return deg == 0; }); out_degrees.resize(0, handle.get_stream()); out_degrees.shrink_to_fit(handle.get_stream()); diff --git a/cpp/src/community/detail/refine_impl.cuh b/cpp/src/community/detail/refine_impl.cuh index ebaae498d04..eb874657f01 100644 --- a/cpp/src/community/detail/refine_impl.cuh +++ b/cpp/src/community/detail/refine_impl.cuh @@ -46,6 +46,8 @@ #include #include +#include + CUCO_DECLARE_BITWISE_COMPARABLE(float) CUCO_DECLARE_BITWISE_COMPARABLE(double) // FIXME: a temporary workaround for a compiler error, should be deleted once cuco gets patched. @@ -213,16 +215,17 @@ refine_clustering( : detail::edge_minor_property_view_t( louvain_assignment_of_vertices.data(), vertex_t{0}), *edge_weight_view, - [] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) { - weight_t weighted_cut_contribution{0}; + cuda::proclaim_return_type( + [] __device__(auto src, auto dst, auto src_cluster, auto dst_cluster, auto wt) { + weight_t weighted_cut_contribution{0}; - if (src == dst) // self loop - weighted_cut_contribution = 0; - else if (src_cluster == dst_cluster) - weighted_cut_contribution = wt; + if (src == dst) // self loop + weighted_cut_contribution = 0; + else if (src_cluster == dst_cluster) + weighted_cut_contribution = wt; - return weighted_cut_contribution; - }, + return weighted_cut_contribution; + }), weight_t{0}, cugraph::reduce_op::plus{}, weighted_cut_of_vertices_to_louvain.begin()); @@ -243,13 +246,14 @@ refine_clustering( wcut_deg_and_cluster_vol_triple_begin, wcut_deg_and_cluster_vol_triple_end, singleton_and_connected_flags.begin(), - [resolution, total_edge_weight] __device__(auto wcut_wdeg_and_louvain_volume) { + cuda::proclaim_return_type([resolution, total_edge_weight] __device__( + auto wcut_wdeg_and_louvain_volume) { auto wcut = thrust::get<0>(wcut_wdeg_and_louvain_volume); auto wdeg = thrust::get<1>(wcut_wdeg_and_louvain_volume); auto louvain_volume = thrust::get<2>(wcut_wdeg_and_louvain_volume); - return wcut > - (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight); - }); + return static_cast( + wcut > (resolution * wdeg * (louvain_volume - wdeg) / total_edge_weight)); + })); edge_src_property_t src_louvain_cluster_weight_cache(handle); edge_src_property_t src_cut_to_louvain_cache(handle); @@ -718,11 +722,12 @@ refine_clustering( vertices_in_mis.begin(), vertices_in_mis.end(), dst_vertices.begin(), - [dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()), - v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { - auto dst = *(dst_first + v - v_first); - return dst; - }); + cuda::proclaim_return_type( + [dst_first = thrust::get<1>(gain_and_dst_first.get_iterator_tuple()), + v_first = graph_view.local_vertex_partition_range_first()] __device__(vertex_t v) { + auto dst = *(dst_first + v - v_first); + return dst; + })); cugraph::resize_dataframe_buffer(gain_and_dst_output_pairs, 0, handle.get_stream()); cugraph::shrink_to_fit_dataframe_buffer(gain_and_dst_output_pairs, handle.get_stream()); diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index b6e20272de9..1e2b8f2ad44 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -568,17 +568,17 @@ void flatten_leiden_dendrogram(raft::handle_t const& handle, leiden_partition_at_level( handle, dendrogram, clustering, dendrogram.num_levels()); - rmm::device_uvector unique_cluster_ids(graph_view.number_of_vertices(), + rmm::device_uvector unique_cluster_ids(graph_view.local_vertex_partition_range_size(), handle.get_stream()); thrust::copy(handle.get_thrust_policy(), clustering, - clustering + graph_view.number_of_vertices(), + clustering + graph_view.local_vertex_partition_range_size(), unique_cluster_ids.begin()); remove_duplicates(handle, unique_cluster_ids); relabel_cluster_ids( - handle, unique_cluster_ids, clustering, graph_view.number_of_vertices()); + handle, unique_cluster_ids, clustering, graph_view.local_vertex_partition_range_size()); } } // namespace detail diff --git a/cpp/src/detail/collect_local_vertex_values.cu b/cpp/src/detail/collect_local_vertex_values.cu index 9d5d2cb553b..795902dfd87 100644 --- a/cpp/src/detail/collect_local_vertex_values.cu +++ b/cpp/src/detail/collect_local_vertex_values.cu @@ -19,6 +19,8 @@ #include #include +#include + namespace cugraph { namespace detail { @@ -64,7 +66,8 @@ rmm::device_uvector collect_local_vertex_values_from_ext_vertex_value_p auto vertex_iterator = thrust::make_transform_iterator( d_vertices.begin(), - [local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; }); + cuda::proclaim_return_type( + [local_vertex_first] __device__(vertex_t v) { return v - local_vertex_first; })); d_local_values.resize(local_vertex_last - local_vertex_first, handle.get_stream()); thrust::fill( diff --git a/cpp/src/detail/shuffle_vertices.cu b/cpp/src/detail/shuffle_vertices.cu index bc450ce3bbf..94729a770f7 100644 --- a/cpp/src/detail/shuffle_vertices.cu +++ b/cpp/src/detail/shuffle_vertices.cu @@ -200,6 +200,12 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); +template std::tuple, rmm::device_uvector> +shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, @@ -224,6 +230,12 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); +template std::tuple, rmm::device_uvector> +shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values); + template std::tuple, rmm::device_uvector> shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, diff --git a/cpp/src/generators/erdos_renyi_generator.cu b/cpp/src/generators/erdos_renyi_generator.cu index 6d847ae0bde..8448eeaf960 100644 --- a/cpp/src/generators/erdos_renyi_generator.cu +++ b/cpp/src/generators/erdos_renyi_generator.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -28,6 +28,8 @@ #include #include +#include + namespace cugraph { template @@ -42,12 +44,13 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, "Implementation cannot support specified value"); auto random_iterator = thrust::make_transform_iterator( - thrust::make_counting_iterator(0), [seed] __device__(size_t index) { + thrust::make_counting_iterator(0), + cuda::proclaim_return_type([seed] __device__(size_t index) { thrust::default_random_engine rng(seed); thrust::uniform_real_distribution dist(0.0, 1.0); rng.discard(index); return dist(rng); - }); + })); size_t count = thrust::count_if(handle.get_thrust_policy(), random_iterator, @@ -69,13 +72,14 @@ generate_erdos_renyi_graph_edgelist_gnp(raft::handle_t const& handle, indices_v.begin(), indices_v.end(), thrust::make_zip_iterator(thrust::make_tuple(src_v.begin(), src_v.end())), - [num_vertices] __device__(size_t index) { - size_t src = index / num_vertices; - size_t dst = index % num_vertices; - - return thrust::make_tuple(static_cast(src), - static_cast(dst)); - }); + cuda::proclaim_return_type>( + [num_vertices] __device__(size_t index) { + size_t src = index / num_vertices; + size_t dst = index % num_vertices; + + return thrust::make_tuple(static_cast(src), + static_cast(dst)); + })); handle.sync_stream(); diff --git a/cpp/src/generators/simple_generators.cu b/cpp/src/generators/simple_generators.cu index 6dba63909c3..65647be5de0 100644 --- a/cpp/src/generators/simple_generators.cu +++ b/cpp/src/generators/simple_generators.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -27,6 +27,8 @@ #include #include +#include + #include namespace cugraph { @@ -264,23 +266,24 @@ generate_complete_graph_edgelist( auto transform_iter = thrust::make_transform_iterator( thrust::make_counting_iterator(0), - [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { - size_t graph_index = index / (num_vertices * num_vertices); - size_t local_index = index % (num_vertices * num_vertices); - - vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); - vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); - - if (src == dst) { - src = invalid_vertex; - dst = invalid_vertex; - } else { - src += (graph_index * num_vertices); - dst += (graph_index * num_vertices); - } - - return thrust::make_tuple(src, dst); - }); + cuda::proclaim_return_type>( + [base_vertex_id, num_vertices, invalid_vertex] __device__(size_t index) { + size_t graph_index = index / (num_vertices * num_vertices); + size_t local_index = index % (num_vertices * num_vertices); + + vertex_t src = base_vertex_id + static_cast(local_index / num_vertices); + vertex_t dst = base_vertex_id + static_cast(local_index % num_vertices); + + if (src == dst) { + src = invalid_vertex; + dst = invalid_vertex; + } else { + src += (graph_index * num_vertices); + dst += (graph_index * num_vertices); + } + + return thrust::make_tuple(src, dst); + })); output_iterator = thrust::copy_if(handle.get_thrust_policy(), transform_iter, diff --git a/cpp/src/link_analysis/hits_impl.cuh b/cpp/src/link_analysis/hits_impl.cuh index 674046745b1..5cdf1b9dc6a 100644 --- a/cpp/src/link_analysis/hits_impl.cuh +++ b/cpp/src/link_analysis/hits_impl.cuh @@ -80,6 +80,7 @@ std::tuple hits(raft::handle_t const& handle, if (num_vertices == 0) { return std::make_tuple(diff_sum, final_iteration_count); } CUGRAPH_EXPECTS(epsilon >= 0.0, "Invalid input argument: epsilon should be non-negative."); + auto tolerance = static_cast(graph_view.number_of_vertices()) * epsilon; // Check validity of initial guess if supplied if (has_initial_hubs_guess && do_expensive_check) { @@ -171,7 +172,7 @@ std::tuple hits(raft::handle_t const& handle, std::swap(prev_hubs, curr_hubs); iter++; - if (diff_sum < epsilon) { + if (diff_sum < tolerance) { break; } else if (iter >= max_iterations) { CUGRAPH_FAIL("HITS failed to converge."); diff --git a/cpp/src/mtmg/vertex_result.cu b/cpp/src/mtmg/vertex_result.cu index 5b1825656ff..414f1bdfa88 100644 --- a/cpp/src/mtmg/vertex_result.cu +++ b/cpp/src/mtmg/vertex_result.cu @@ -21,6 +21,7 @@ #include +#include #include namespace cugraph { @@ -91,10 +92,11 @@ rmm::device_uvector vertex_result_view_t::gather( auto vertex_partition = vertex_partition_device_view_t(vertex_partition_view); - auto iter = - thrust::make_transform_iterator(local_vertices.begin(), [vertex_partition] __device__(auto v) { + auto iter = thrust::make_transform_iterator( + local_vertices.begin(), + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); thrust::gather(handle.get_thrust_policy(), iter, @@ -111,7 +113,7 @@ rmm::device_uvector vertex_result_view_t::gather( vertex_gpu_ids.begin(), vertex_gpu_ids.end(), thrust::make_zip_iterator(local_vertices.begin(), vertex_pos.begin(), tmp_result.begin()), - [] __device__(int gpu) { return gpu; }, + thrust::identity{}, handle.get_stream()); // diff --git a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh index 5fee97790f1..4c5c43c7d1e 100644 --- a/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh +++ b/cpp/src/prims/per_v_random_select_transform_outgoing_e.cuh @@ -44,6 +44,8 @@ #include #include +#include + #include #include @@ -596,8 +598,9 @@ rmm::device_uvector get_sampling_index_without_replacement( multiplier_t{high_partition_over_sampling_K}), thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), - [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( - size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }), + cuda::proclaim_return_type( + [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( + size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })), handle.get_stream()); if (tmp_storage_bytes > d_tmp_storage.size()) { d_tmp_storage = rmm::device_uvector(tmp_storage_bytes, handle.get_stream()); @@ -615,8 +618,9 @@ rmm::device_uvector get_sampling_index_without_replacement( multiplier_t{high_partition_over_sampling_K}), thrust::make_transform_iterator( thrust::make_counting_iterator(size_t{0}), - [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( - size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; }), + cuda::proclaim_return_type( + [high_partition_over_sampling_K, unique_counts = unique_counts.data()] __device__( + size_t i) { return i * high_partition_over_sampling_K + unique_counts[i]; })), handle.get_stream()); // copy the neighbor indices back to sample_nbr_indices diff --git a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh index 0b6c6a554bb..1a7fc0130c4 100644 --- a/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh +++ b/cpp/src/prims/per_v_transform_reduce_incoming_outgoing_e.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include #include #include @@ -591,10 +593,11 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, value_size = sizeof(T); } - auto avg_vertex_degree = graph_view.number_of_vertices() > 0 - ? (static_cast(graph_view.number_of_edges()) / - static_cast(graph_view.number_of_vertices())) - : double{0.0}; + auto avg_vertex_degree = + graph_view.number_of_vertices() > 0 + ? (static_cast(graph_view.compute_number_of_edges(handle)) / + static_cast(graph_view.number_of_vertices())) + : double{0.0}; num_streams = std::min(static_cast(avg_vertex_degree * (static_cast(sizeof(vertex_t)) / @@ -940,16 +943,19 @@ void per_v_transform_reduce_e(raft::handle_t const& handle, minor_init); auto value_first = thrust::make_transform_iterator( view.value_first(), - [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); }); - thrust::scatter( - handle.get_thrust_policy(), - value_first + (*minor_key_offsets)[i], - value_first + (*minor_key_offsets)[i + 1], - thrust::make_transform_iterator( - (*(view.keys())).begin() + (*minor_key_offsets)[i], - [key_first = graph_view.vertex_partition_range_first( - this_segment_vertex_partition_id)] __device__(auto key) { return key - key_first; }), - tx_buffer_first); + cuda::proclaim_return_type( + [reduce_op, minor_init] __device__(auto val) { return reduce_op(val, minor_init); })); + thrust::scatter(handle.get_thrust_policy(), + value_first + (*minor_key_offsets)[i], + value_first + (*minor_key_offsets)[i + 1], + thrust::make_transform_iterator( + (*(view.keys())).begin() + (*minor_key_offsets)[i], + cuda::proclaim_return_type( + [key_first = graph_view.vertex_partition_range_first( + this_segment_vertex_partition_id)] __device__(auto key) { + return key - key_first; + })), + tx_buffer_first); device_reduce(major_comm, tx_buffer_first, vertex_value_output_first, diff --git a/cpp/src/prims/update_edge_src_dst_property.cuh b/cpp/src/prims/update_edge_src_dst_property.cuh index b8621e122c6..0c7058cccb4 100644 --- a/cpp/src/prims/update_edge_src_dst_property.cuh +++ b/cpp/src/prims/update_edge_src_dst_property.cuh @@ -44,6 +44,8 @@ #include #include +#include + #include #include #include @@ -181,13 +183,14 @@ void update_edge_major_property(raft::handle_t const& handle, handle.get_stream()); auto bool_first = thrust::make_transform_iterator( (*edge_partition_keys)[i].begin(), - [rx_value_first, - v_first = graph_view.vertex_partition_range_first( - major_range_vertex_partition_id)] __device__(auto v) { - auto v_offset = v - v_first; - return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & - packed_bool_mask(v_offset)); - }); + cuda::proclaim_return_type( + [rx_value_first, + v_first = graph_view.vertex_partition_range_first( + major_range_vertex_partition_id)] __device__(auto v) { + auto v_offset = v - v_first; + return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & + packed_bool_mask(v_offset)); + })); pack_bools(handle, bool_first, bool_first + (*edge_partition_keys)[i].size(), @@ -202,8 +205,9 @@ void update_edge_major_property(raft::handle_t const& handle, auto v_offset_first = thrust::make_transform_iterator( (*edge_partition_keys)[i].begin(), - [v_first = graph_view.vertex_partition_range_first( - major_range_vertex_partition_id)] __device__(auto v) { return v - v_first; }); + cuda::proclaim_return_type( + [v_first = graph_view.vertex_partition_range_first( + major_range_vertex_partition_id)] __device__(auto v) { return v - v_first; })); thrust::gather(handle.get_thrust_policy(), v_offset_first, v_offset_first + (*edge_partition_keys)[i].size(), @@ -312,21 +316,24 @@ void update_edge_major_property(raft::handle_t const& handle, graph_view.local_vertex_partition_view()); if constexpr (packed_bool) { auto bool_first = thrust::make_transform_iterator( - vertex_first, [vertex_property_input_first, vertex_partition] __device__(auto v) { + vertex_first, + cuda::proclaim_return_type([vertex_property_input_first, + vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); return static_cast( *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_bools(handle, bool_first, bool_first + thrust::distance(vertex_first, vertex_last), rx_value_first); } else { - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + auto map_first = thrust::make_transform_iterator( + vertex_first, + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) thrust::gather(handle.get_thrust_policy(), @@ -391,9 +398,10 @@ void update_edge_major_property(raft::handle_t const& handle, }); } else { auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [edge_partition] __device__(auto v) { + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { return edge_partition.major_offset_from_major_nocheck(v); - }); + })); // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), @@ -471,7 +479,8 @@ void update_edge_minor_property(raft::handle_t const& handle, bcast_size *= sizeof(typename EdgeMinorPropertyOutputWrapper::value_type); } auto num_concurrent_bcasts = - (static_cast(graph_view.number_of_edges() / comm_size) * sizeof(vertex_t)) / + (static_cast(graph_view.compute_number_of_edges(handle) / comm_size) * + sizeof(vertex_t)) / std::max(bcast_size, size_t{1}); num_concurrent_bcasts = std::max(num_concurrent_bcasts, size_t{1}); num_concurrent_bcasts = std::min(num_concurrent_bcasts, static_cast(major_comm_size)); @@ -593,13 +602,14 @@ void update_edge_minor_property(raft::handle_t const& handle, auto bool_first = thrust::make_transform_iterator( (*edge_partition_keys).begin() + key_offsets[j], - [rx_value_first, - v_first = graph_view.vertex_partition_range_first( - minor_range_vertex_partition_id)] __device__(auto v) { - auto v_offset = v - v_first; - return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & - packed_bool_mask(v_offset)); - }); + cuda::proclaim_return_type( + [rx_value_first, + v_first = graph_view.vertex_partition_range_first( + minor_range_vertex_partition_id)] __device__(auto v) { + auto v_offset = v - v_first; + return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & + packed_bool_mask(v_offset)); + })); pack_unaligned_bools( handle, bool_first, @@ -611,10 +621,10 @@ void update_edge_minor_property(raft::handle_t const& handle, std::get>(key_offsets_or_rx_displacements); auto bool_first = thrust::make_transform_iterator( thrust::make_counting_iterator(vertex_t{0}), - [rx_value_first] __device__(vertex_t v_offset) { + cuda::proclaim_return_type([rx_value_first] __device__(vertex_t v_offset) { return static_cast(*(rx_value_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_unaligned_bools( handle, bool_first, @@ -630,8 +640,9 @@ void update_edge_minor_property(raft::handle_t const& handle, auto v_offset_first = thrust::make_transform_iterator( (*edge_partition_keys).begin() + key_offsets[j], - [v_first = graph_view.vertex_partition_range_first( - minor_range_vertex_partition_id)] __device__(auto v) { return v - v_first; }); + cuda::proclaim_return_type( + [v_first = graph_view.vertex_partition_range_first( + minor_range_vertex_partition_id)] __device__(auto v) { return v - v_first; })); thrust::gather(handle.get_thrust_policy(), v_offset_first, v_offset_first + (key_offsets[j + 1] - key_offsets[j]), @@ -718,21 +729,24 @@ void update_edge_minor_property(raft::handle_t const& handle, graph_view.local_vertex_partition_view()); if constexpr (packed_bool) { auto bool_first = thrust::make_transform_iterator( - vertex_first, [vertex_property_input_first, vertex_partition] __device__(auto v) { + vertex_first, + cuda::proclaim_return_type([vertex_property_input_first, + vertex_partition] __device__(auto v) { auto v_offset = vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); return static_cast( *(vertex_property_input_first + packed_bool_offset(v_offset)) & packed_bool_mask(v_offset)); - }); + })); pack_bools(handle, bool_first, bool_first + thrust::distance(vertex_first, vertex_last), rx_value_first); } else { - auto map_first = - thrust::make_transform_iterator(vertex_first, [vertex_partition] __device__(auto v) { + auto map_first = thrust::make_transform_iterator( + vertex_first, + cuda::proclaim_return_type([vertex_partition] __device__(auto v) { return vertex_partition.local_vertex_partition_offset_from_vertex_nocheck(v); - }); + })); // FIXME: this gather (and temporary buffer) is unnecessary if NCCL directly takes a // permutation iterator (and directly gathers to the internal buffer) thrust::gather(handle.get_thrust_policy(), @@ -799,9 +813,10 @@ void update_edge_minor_property(raft::handle_t const& handle, }); } else { auto map_first = thrust::make_transform_iterator( - rx_vertices.begin(), [edge_partition] __device__(auto v) { + rx_vertices.begin(), + cuda::proclaim_return_type([edge_partition] __device__(auto v) { return edge_partition.minor_offset_from_minor_nocheck(v); - }); + })); // FIXME: this scatter is unnecessary if NCCL directly takes a permutation iterator (and // directly scatters from the internal buffer) thrust::scatter(handle.get_thrust_policy(), diff --git a/cpp/src/sampling/random_walks.cuh b/cpp/src/sampling/random_walks.cuh index 6a7334e9f1a..5a9ded02009 100644 --- a/cpp/src/sampling/random_walks.cuh +++ b/cpp/src/sampling/random_walks.cuh @@ -52,6 +52,8 @@ #include #include +#include + #include #include // FIXME: requirement for temporary std::getenv() #include @@ -378,7 +380,8 @@ struct random_walker_t { // scatter d_src_init_v to coalesced vertex vector: // - auto dlambda = [stride = max_depth_] __device__(auto indx) { return indx * stride; }; + auto dlambda = cuda::proclaim_return_type( + [stride = max_depth_] __device__(auto indx) { return indx * stride; }); // use the transform iterator as map: // @@ -539,10 +542,11 @@ struct random_walker_t { // delta = ptr_d_sizes[indx] - 1 // - auto dlambda = [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { - auto delta = ptr_d_sizes[indx] - 1; - return ptr_d_coalesced[indx * stride + delta]; - }; + auto dlambda = cuda::proclaim_return_type( + [stride, ptr_d_sizes, ptr_d_coalesced] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - 1; + return ptr_d_coalesced[indx * stride + delta]; + }); // use the transform iterator as map: // @@ -587,10 +591,11 @@ struct random_walker_t { { index_t const* ptr_d_sizes = original::raw_const_ptr(d_sizes); - auto dlambda = [stride, adjust, ptr_d_sizes] __device__(auto indx) { - auto delta = ptr_d_sizes[indx] - adjust - 1; - return indx * stride + delta; - }; + auto dlambda = + cuda::proclaim_return_type([stride, adjust, ptr_d_sizes] __device__(auto indx) { + auto delta = ptr_d_sizes[indx] - adjust - 1; + return indx * stride + delta; + }); // use the transform iterator as map: // diff --git a/cpp/src/sampling/sampling_post_processing_impl.cuh b/cpp/src/sampling/sampling_post_processing_impl.cuh index 77d4f2d865f..852d82e78ab 100644 --- a/cpp/src/sampling/sampling_post_processing_impl.cuh +++ b/cpp/src/sampling/sampling_post_processing_impl.cuh @@ -40,6 +40,8 @@ #include #include +#include + #include namespace cugraph { @@ -1229,10 +1231,12 @@ renumber_and_compress_sampled_edgelist( auto pair_first = thrust::make_zip_iterator((*compressed_label_indices).begin(), (*compressed_hops).begin()); auto value_pair_first = thrust::make_transform_iterator( - thrust::make_counting_iterator(size_t{0}), [num_hops] __device__(size_t i) { - return thrust::make_tuple(static_cast(i / num_hops), - static_cast(i % num_hops)); - }); + thrust::make_counting_iterator(size_t{0}), + cuda::proclaim_return_type>( + [num_hops] __device__(size_t i) { + return thrust::make_tuple(static_cast(i / num_hops), + static_cast(i % num_hops)); + })); thrust::upper_bound(handle.get_thrust_policy(), pair_first, pair_first + (*compressed_label_indices).size(), diff --git a/cpp/src/structure/graph_impl.cuh b/cpp/src/structure/graph_impl.cuh index 75862266789..6568b5e3b9e 100644 --- a/cpp/src/structure/graph_impl.cuh +++ b/cpp/src/structure/graph_impl.cuh @@ -133,8 +133,7 @@ update_local_sorted_unique_edge_majors_minors( graph_meta_t const& meta, std::vector> const& edge_partition_offsets, std::vector> const& edge_partition_indices, - std::optional>> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts) + std::optional>> const& edge_partition_dcs_nzd_vertices) { auto& comm = handle.get_comms(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); @@ -341,8 +340,7 @@ update_local_sorted_unique_edge_majors_minors( if (use_dcs) { thrust::copy(handle.get_thrust_policy(), (*edge_partition_dcs_nzd_vertices)[i].begin(), - (*edge_partition_dcs_nzd_vertices)[i].begin() + - (*edge_partition_dcs_nzd_vertex_counts)[i], + (*edge_partition_dcs_nzd_vertices)[i].end(), unique_edge_majors.begin() + cur_size); } @@ -390,7 +388,7 @@ graph_t meta, bool do_expensive_check) : detail::graph_base_t( - handle, meta.number_of_vertices, meta.number_of_edges, meta.properties), + meta.number_of_vertices, meta.number_of_edges, meta.properties), partition_(meta.partition) { CUGRAPH_EXPECTS( @@ -408,14 +406,6 @@ graph_t((*edge_partition_dcs_nzd_vertices_).size()); - for (size_t i = 0; i < (*edge_partition_dcs_nzd_vertex_counts_).size(); ++i) { - (*edge_partition_dcs_nzd_vertex_counts_)[i] = - static_cast((*edge_partition_dcs_nzd_vertices_)[i].size()); - } - } // update local sorted unique edge sources/destinations (only if key, value pair will be used) @@ -432,8 +422,7 @@ graph_t meta, bool do_expensive_check) : detail::graph_base_t( - handle, meta.number_of_vertices, static_cast(indices.size()), meta.properties), + meta.number_of_vertices, static_cast(indices.size()), meta.properties), offsets_(std::move(offsets)), indices_(std::move(indices)), segment_offsets_(meta.segment_offsets) diff --git a/cpp/src/structure/graph_view_impl.cuh b/cpp/src/structure/graph_view_impl.cuh index 37a553dcdbd..da0ecc991df 100644 --- a/cpp/src/structure/graph_view_impl.cuh +++ b/cpp/src/structure/graph_view_impl.cuh @@ -51,6 +51,8 @@ #include #include +#include + #include #include #include @@ -70,44 +72,15 @@ struct out_of_range_t { __device__ bool operator()(vertex_t v) const { return (v < min) || (v >= max); } }; -template -std::vector update_edge_partition_edge_counts( - std::vector const& edge_partition_offsets, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, - partition_t const& partition, - std::vector const& edge_partition_segment_offsets, - cudaStream_t stream) -{ - std::vector edge_partition_edge_counts(partition.number_of_local_edge_partitions(), 0); - auto use_dcs = edge_partition_dcs_nzd_vertex_counts.has_value(); - for (size_t i = 0; i < edge_partition_offsets.size(); ++i) { - auto [major_range_first, major_range_last] = partition.local_edge_partition_major_range(i); - auto segment_offset_size_per_partition = - edge_partition_segment_offsets.size() / edge_partition_offsets.size(); - raft::update_host( - &(edge_partition_edge_counts[i]), - edge_partition_offsets[i] + - (use_dcs - ? (edge_partition_segment_offsets[segment_offset_size_per_partition * i + - detail::num_sparse_segments_per_vertex_partition] + - (*edge_partition_dcs_nzd_vertex_counts)[i]) - : (major_range_last - major_range_first)), - 1, - stream); - } - RAFT_CUDA_TRY(cudaStreamSynchronize(stream)); - return edge_partition_edge_counts; -} - // compute out-degrees (if we are internally storing edges in the sparse 2D matrix using sources as // major indices) or in-degrees (otherwise) template rmm::device_uvector compute_major_degrees( raft::handle_t const& handle, - std::vector const& edge_partition_offsets, - std::optional> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, - std::optional> const& edge_partition_masks, + std::vector> const& edge_partition_offsets, + std::optional>> const& + edge_partition_dcs_nzd_vertices, + std::optional>> const& edge_partition_masks, partition_t const& partition, std::vector const& edge_partition_segment_offsets) { @@ -159,39 +132,39 @@ rmm::device_uvector compute_major_degrees( thrust::make_counting_iterator(vertex_t{0}), thrust::make_counting_iterator(major_hypersparse_first - major_range_first), local_degrees.begin(), - [offsets, masks] __device__(auto i) { + cuda::proclaim_return_type([offsets, masks] __device__(auto i) { auto local_degree = offsets[i + 1] - offsets[i]; if (masks) { local_degree = static_cast( - detail::count_set_bits(*masks, offsets[i], local_degree)); + detail::count_set_bits((*masks).begin(), offsets[i], local_degree)); } return local_degree; - }); + })); if (use_dcs) { - auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; - auto dcs_nzd_vertex_count = (*edge_partition_dcs_nzd_vertex_counts)[i]; + auto dcs_nzd_vertices = (*edge_partition_dcs_nzd_vertices)[i]; thrust::fill(execution_policy, local_degrees.begin() + (major_hypersparse_first - major_range_first), local_degrees.begin() + (major_range_last - major_range_first), edge_t{0}); - thrust::for_each(execution_policy, - thrust::make_counting_iterator(vertex_t{0}), - thrust::make_counting_iterator(dcs_nzd_vertex_count), - [offsets, - dcs_nzd_vertices, - masks, - major_range_first, - major_hypersparse_first, - local_degrees = local_degrees.data()] __device__(auto i) { - auto major_idx = (major_hypersparse_first - major_range_first) + i; - auto local_degree = offsets[major_idx + 1] - offsets[major_idx]; - if (masks) { - local_degree = static_cast( - detail::count_set_bits(*masks, offsets[major_idx], local_degree)); - } - auto v = dcs_nzd_vertices[i]; - local_degrees[v - major_range_first] = local_degree; - }); + thrust::for_each( + execution_policy, + thrust::make_counting_iterator(vertex_t{0}), + thrust::make_counting_iterator(static_cast(dcs_nzd_vertices.size())), + [offsets, + dcs_nzd_vertices, + masks, + major_range_first, + major_hypersparse_first, + local_degrees = local_degrees.data()] __device__(auto i) { + auto major_idx = (major_hypersparse_first - major_range_first) + i; + auto local_degree = offsets[major_idx + 1] - offsets[major_idx]; + if (masks) { + local_degree = static_cast( + detail::count_set_bits((*masks).begin(), offsets[major_idx], local_degree)); + } + auto v = dcs_nzd_vertices[i]; + local_degrees[v - major_range_first] = local_degree; + }); } minor_comm.reduce(local_degrees.data(), i == minor_comm_rank ? degrees.data() : static_cast(nullptr), @@ -207,10 +180,11 @@ rmm::device_uvector compute_major_degrees( // compute out-degrees (if we are internally storing edges in the sparse 2D matrix using sources as // major indices) or in-degrees (otherwise) template -rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, - edge_t const* offsets, - std::optional masks, - vertex_t number_of_vertices) +rmm::device_uvector compute_major_degrees( + raft::handle_t const& handle, + raft::device_span offsets, + std::optional> masks, + vertex_t number_of_vertices) { rmm::device_uvector degrees(number_of_vertices, handle.get_stream()); thrust::tabulate( @@ -221,7 +195,7 @@ rmm::device_uvector compute_major_degrees(raft::handle_t const& handle, auto local_degree = offsets[i + 1] - offsets[i]; if (masks) { local_degree = - static_cast(detail::count_set_bits(*masks, offsets[i], local_degree)); + static_cast(detail::count_set_bits((*masks).begin(), offsets[i], local_degree)); } return local_degree; }); @@ -444,24 +418,16 @@ edge_t count_edge_partition_multi_edges( template graph_view_t>:: - graph_view_t(raft::handle_t const& handle, - std::vector const& edge_partition_offsets, - std::vector const& edge_partition_indices, - std::optional> const& edge_partition_dcs_nzd_vertices, - std::optional> const& edge_partition_dcs_nzd_vertex_counts, + graph_view_t(std::vector> const& edge_partition_offsets, + std::vector> const& edge_partition_indices, + std::optional>> const& + edge_partition_dcs_nzd_vertices, graph_view_meta_t meta) : detail::graph_base_t( - handle, meta.number_of_vertices, meta.number_of_edges, meta.properties), + meta.number_of_vertices, meta.number_of_edges, meta.properties), edge_partition_offsets_(edge_partition_offsets), edge_partition_indices_(edge_partition_indices), edge_partition_dcs_nzd_vertices_(edge_partition_dcs_nzd_vertices), - edge_partition_dcs_nzd_vertex_counts_(edge_partition_dcs_nzd_vertex_counts), - edge_partition_number_of_edges_( - update_edge_partition_edge_counts(edge_partition_offsets, - edge_partition_dcs_nzd_vertex_counts, - meta.partition, - meta.edge_partition_segment_offsets, - handle.get_stream())), partition_(meta.partition), edge_partition_segment_offsets_(meta.edge_partition_segment_offsets), local_sorted_unique_edge_srcs_(meta.local_sorted_unique_edge_srcs), @@ -479,51 +445,42 @@ graph_view_thandle_ptr()->get_subcomm(cugraph::partition_manager::minor_comm_name()).get_size(); - auto use_dcs = edge_partition_dcs_nzd_vertices.has_value(); CUGRAPH_EXPECTS(edge_partition_offsets.size() == edge_partition_indices.size(), "Internal Error: edge_partition_offsets.size() and " "edge_partition_indices.size() should coincide."); - CUGRAPH_EXPECTS(edge_partition_dcs_nzd_vertex_counts.has_value() == use_dcs, - "edge_partition_dcs_nzd_vertices.has_value() and " - "edge_partition_dcs_nzd_vertex_counts.has_value() should coincide"); - CUGRAPH_EXPECTS(!use_dcs || ((*edge_partition_dcs_nzd_vertices).size() == - (*edge_partition_dcs_nzd_vertex_counts).size()), - "Internal Error: edge_partition_dcs_nzd_vertices.size() and " - "edge_partition_dcs_nzd_vertex_counts.size() should coincide (if used)."); CUGRAPH_EXPECTS( !use_dcs || ((*edge_partition_dcs_nzd_vertices).size() == edge_partition_offsets.size()), "Internal Error: edge_partition_dcs_nzd_vertices.size() should coincide " "with edge_partition_offsets.size() (if used)."); - CUGRAPH_EXPECTS(edge_partition_offsets.size() == static_cast(minor_comm_size), - "Internal Error: erroneous edge_partition_offsets.size()."); - - CUGRAPH_EXPECTS( - meta.edge_partition_segment_offsets.size() == - minor_comm_size * (detail::num_sparse_segments_per_vertex_partition + (use_dcs ? 3 : 2)), - "Internal Error: invalid edge_partition_segment_offsets.size()."); + CUGRAPH_EXPECTS(meta.edge_partition_segment_offsets.size() == + edge_partition_offsets.size() * + (detail::num_sparse_segments_per_vertex_partition + (use_dcs ? 3 : 2)), + "Internal Error: invalid edge_partition_segment_offsets.size()."); // skip expensive error checks as this function is only called by graph_t } template graph_view_t>:: - graph_view_t(raft::handle_t const& handle, - edge_t const* offsets, - vertex_t const* indices, + graph_view_t(raft::device_span offsets, + raft::device_span indices, graph_view_meta_t meta) : detail::graph_base_t( - handle, meta.number_of_vertices, meta.number_of_edges, meta.properties), + meta.number_of_vertices, meta.number_of_edges, meta.properties), offsets_(offsets), indices_(indices), segment_offsets_(meta.segment_offsets) { // cheap error checks + CUGRAPH_EXPECTS(offsets.size() == static_cast(meta.number_of_vertices + 1), + "Internal Error: offsets.size() returns an invalid value."); + CUGRAPH_EXPECTS(indices.size() == static_cast(meta.number_of_edges), + "Internal Error: indices.size() returns an invalid value."); + CUGRAPH_EXPECTS( !(meta.segment_offsets).has_value() || ((*(meta.segment_offsets)).size() == (detail::num_sparse_segments_per_vertex_partition + 2)), @@ -532,19 +489,62 @@ graph_view_t +edge_t graph_view_t>:: + compute_number_of_edges(raft::handle_t const& handle) const +{ + if (this->has_edge_mask()) { + edge_t ret{}; + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + for (size_t i = 0; i < value_firsts.size(); ++i) { + ret += static_cast(detail::count_set_bits(handle, value_firsts[i], edge_counts[i])); + } + ret = + host_scalar_allreduce(handle.get_comms(), ret, raft::comms::op_t::SUM, handle.get_stream()); + return ret; + } else { + return this->number_of_edges_; + } +} + +template +edge_t graph_view_t>:: + compute_number_of_edges(raft::handle_t const& handle) const +{ + if (this->has_edge_mask()) { + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + assert(value_firsts.size() == 0); + assert(edge_counts.size() == 0); + return static_cast(detail::count_set_bits(handle, value_firsts[0], edge_counts[0])); + } else { + return this->number_of_edges_; + } +} + template rmm::device_uvector graph_view_t>:: compute_in_degrees(raft::handle_t const& handle) const { if (store_transposed) { + std::optional>> edge_partition_masks{ + std::nullopt}; + if (this->has_edge_mask()) { + edge_partition_masks = + std::vector>(this->edge_partition_offsets_.size()); + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + for (size_t i = 0; i < (*edge_partition_masks).size(); ++i) { + (*edge_partition_masks)[i] = + raft::device_span(value_firsts[i], edge_counts[i]); + } + } return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, - this->edge_partition_dcs_nzd_vertex_counts_, - this->has_edge_mask() - ? std::make_optional((*(this->edge_mask_view())).value_firsts()) - : std::nullopt, + edge_partition_masks, this->partition_, this->edge_partition_segment_offsets_); } else { @@ -559,12 +559,14 @@ graph_view_toffsets_, - this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) - : std::nullopt, - this->local_vertex_partition_range_size()); + return compute_major_degrees(handle, + this->offsets_, + this->has_edge_mask() + ? std::make_optional(raft::device_span( + (*(this->edge_mask_view())).value_firsts()[0], + (*(this->edge_mask_view())).edge_counts()[0])) + : std::nullopt, + this->local_vertex_partition_range_size()); } else { CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); @@ -580,13 +582,22 @@ graph_view_thas_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } else { + std::optional>> edge_partition_masks{ + std::nullopt}; + if (this->has_edge_mask()) { + edge_partition_masks = + std::vector>(this->edge_partition_offsets_.size()); + auto value_firsts = (*(this->edge_mask_view())).value_firsts(); + auto edge_counts = (*(this->edge_mask_view())).edge_counts(); + for (size_t i = 0; i < (*edge_partition_masks).size(); ++i) { + (*edge_partition_masks)[i] = + raft::device_span(value_firsts[i], edge_counts[i]); + } + } return compute_major_degrees(handle, this->edge_partition_offsets_, this->edge_partition_dcs_nzd_vertices_, - this->edge_partition_dcs_nzd_vertex_counts_, - this->has_edge_mask() - ? std::make_optional((*(this->edge_mask_view())).value_firsts()) - : std::nullopt, + edge_partition_masks, this->partition_, this->edge_partition_segment_offsets_); } @@ -601,12 +612,14 @@ graph_view_thas_edge_mask()), "unimplemented."); return compute_minor_degrees(handle, *this); } else { - return compute_major_degrees( - handle, - this->offsets_, - this->has_edge_mask() ? std::make_optional((*(this->edge_mask_view())).value_firsts()[0]) - : std::nullopt, - this->local_vertex_partition_range_size()); + return compute_major_degrees(handle, + this->offsets_, + this->has_edge_mask() + ? std::make_optional(raft::device_span( + (*(this->edge_mask_view())).value_firsts()[0], + (*(this->edge_mask_view())).edge_counts()[0])) + : std::nullopt, + this->local_vertex_partition_range_size()); } } diff --git a/cpp/src/traversal/od_shortest_distances_impl.cuh b/cpp/src/traversal/od_shortest_distances_impl.cuh index 6a0c5a4a675..cc69cb5f67f 100644 --- a/cpp/src/traversal/od_shortest_distances_impl.cuh +++ b/cpp/src/traversal/od_shortest_distances_impl.cuh @@ -432,7 +432,7 @@ rmm::device_uvector od_shortest_distances( // 1. check input arguments auto const num_vertices = graph_view.number_of_vertices(); - auto const num_edges = graph_view.number_of_edges(); + auto const num_edges = graph_view.compute_number_of_edges(handle); CUGRAPH_EXPECTS(num_vertices != 0 || (origins.size() == 0 && destinations.size() == 0), "Invalid input argument: the input graph is empty but origins.size() > 0 or " @@ -1049,7 +1049,7 @@ rmm::device_uvector od_shortest_distances( CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); auto const num_vertices = graph_view.number_of_vertices(); - auto const num_edges = graph_view.number_of_edges(); + auto const num_edges = graph_view.compute_number_of_edges(handle); weight_t average_vertex_degree = static_cast(num_edges) / static_cast(num_vertices); diff --git a/cpp/src/traversal/sssp_impl.cuh b/cpp/src/traversal/sssp_impl.cuh index c78fa3839e2..5a6d536c6f5 100644 --- a/cpp/src/traversal/sssp_impl.cuh +++ b/cpp/src/traversal/sssp_impl.cuh @@ -93,7 +93,7 @@ void sssp(raft::handle_t const& handle, "GraphViewType should support the push model."); auto const num_vertices = push_graph_view.number_of_vertices(); - auto const num_edges = push_graph_view.number_of_edges(); + auto const num_edges = push_graph_view.compute_number_of_edges(handle); if (num_vertices == 0) { return; } // implements the Near-Far Pile method in diff --git a/cpp/src/utilities/cugraph_ops_utils.hpp b/cpp/src/utilities/cugraph_ops_utils.hpp index 9aea4183866..880a2c8d104 100644 --- a/cpp/src/utilities/cugraph_ops_utils.hpp +++ b/cpp/src/utilities/cugraph_ops_utils.hpp @@ -30,7 +30,7 @@ ops::graph::csc get_graph( ops::graph::csc graph; graph.n_src_nodes = gview.number_of_vertices(); graph.n_dst_nodes = gview.number_of_vertices(); - graph.n_indices = gview.number_of_edges(); + graph.n_indices = gview.local_edge_partition_view().number_of_edges(); // FIXME this is sufficient for now, but if there is a fast (cached) way // of getting max degree, use that instead graph.dst_max_in_degree = std::numeric_limits::max(); diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d9c88bc179e..e9c6dc446af 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -257,7 +257,7 @@ ConfigureTest(BALANCED_TEST community/balanced_edge_test.cpp) ################################################################################################### # - EGO tests ------------------------------------------------------------------------------------- -ConfigureTest(EGO_TEST community/egonet_test.cpp GPUS 1 PERCENT 75) +ConfigureTest(EGONET_TEST community/egonet_test.cpp GPUS 1 PERCENT 75) ################################################################################################### # - FORCE ATLAS 2 tests -------------------------------------------------------------------------- @@ -531,7 +531,7 @@ if(BUILD_CUGRAPH_MG_TESTS) ############################################################################################### # - MG LOUVAIN tests -------------------------------------------------------------------------- - ConfigureTestMG(MG_EGO_TEST community/mg_egonet_test.cu) + ConfigureTestMG(MG_EGONET_TEST community/mg_egonet_test.cu) ############################################################################################### # - MG WEAKLY CONNECTED COMPONENTS tests ------------------------------------------------------ diff --git a/cpp/tests/c_api/hits_test.c b/cpp/tests/c_api/hits_test.c index c275d883d11..1ebd4f82a51 100644 --- a/cpp/tests/c_api/hits_test.c +++ b/cpp/tests/c_api/hits_test.c @@ -163,7 +163,7 @@ int test_hits() weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // hits wants store_transposed = TRUE @@ -195,7 +195,7 @@ int test_hits_with_transpose() weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // Hits wants store_transposed = TRUE @@ -232,7 +232,7 @@ int test_hits_with_initial() vertex_t h_initial_vertices[] = {0, 1, 2, 3, 4}; weight_t h_initial_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; return generic_hits_test(h_src, diff --git a/cpp/tests/c_api/mg_create_graph_test.c b/cpp/tests/c_api/mg_create_graph_test.c index fec319d1881..7156647b025 100644 --- a/cpp/tests/c_api/mg_create_graph_test.c +++ b/cpp/tests/c_api/mg_create_graph_test.c @@ -175,18 +175,18 @@ int test_create_mg_graph_multiple_edge_lists(const cugraph_resource_handle_t* ha int my_rank = cugraph_resource_handle_get_rank(handle); int comm_size = cugraph_resource_handle_get_comm_size(handle); - size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_num_vertices = num_vertices / comm_size; size_t local_start_vertex = my_rank * local_num_vertices; - size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_num_edges = num_edges / comm_size; size_t local_start_edge = my_rank * local_num_edges; - local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); - local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + local_num_edges = (my_rank != (comm_size - 1)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (my_rank != (comm_size - 1)) ? local_num_vertices : (num_vertices - local_start_vertex); for (size_t i = 0 ; i < num_local_arrays ; ++i) { - size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; + size_t vertex_count = local_num_vertices / num_local_arrays; size_t vertex_start = i * vertex_count; - vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); @@ -363,18 +363,18 @@ int test_create_mg_graph_multiple_edge_lists_multi_edge(const cugraph_resource_h int my_rank = cugraph_resource_handle_get_rank(handle); int comm_size = cugraph_resource_handle_get_comm_size(handle); - size_t local_num_vertices = (num_vertices + comm_size - 1) / comm_size; + size_t local_num_vertices = num_vertices / comm_size; size_t local_start_vertex = my_rank * local_num_vertices; - size_t local_num_edges = (num_edges + comm_size - 1) / comm_size; + size_t local_num_edges = num_edges / comm_size; size_t local_start_edge = my_rank * local_num_edges; - local_num_edges = (local_num_edges < (num_edges - local_start_edge)) ? local_num_edges : (num_edges - local_start_edge); - local_num_vertices = (local_num_vertices < (num_vertices - local_start_vertex)) ? local_num_vertices : (num_vertices - local_start_vertex); + local_num_edges = (my_rank != (comm_size - 1)) ? local_num_edges : (num_edges - local_start_edge); + local_num_vertices = (my_rank != (comm_size - 1)) ? local_num_vertices : (num_vertices - local_start_vertex); for (size_t i = 0 ; i < num_local_arrays ; ++i) { size_t vertex_count = (local_num_vertices + num_local_arrays - 1) / num_local_arrays; size_t vertex_start = i * vertex_count; - vertex_count = (vertex_count < (local_num_vertices - vertex_start)) ? vertex_count : (local_num_vertices - vertex_start); + vertex_count = (i != (num_local_arrays - 1)) ? vertex_count : (local_num_vertices - vertex_start); ret_code = cugraph_type_erased_device_array_create(handle, vertex_count, vertex_tid, vertices + i, &ret_error); diff --git a/cpp/tests/c_api/mg_hits_test.c b/cpp/tests/c_api/mg_hits_test.c index 87371093613..3e10bfc05d6 100644 --- a/cpp/tests/c_api/mg_hits_test.c +++ b/cpp/tests/c_api/mg_hits_test.c @@ -171,7 +171,7 @@ int test_hits(const cugraph_resource_handle_t* handle) weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // hits wants store_transposed = TRUE @@ -203,7 +203,7 @@ int test_hits_with_transpose(const cugraph_resource_handle_t* handle) weight_t h_hubs[] = {0.347296, 0.532089, 1, 0.00003608, 0.00003608, 0}; weight_t h_authorities[] = {0.652703, 0.879385, 0, 1, 0.347296, 0.00009136}; - double epsilon = 0.0001; + double epsilon = 0.00002; size_t max_iterations = 20; // Hits wants store_transposed = TRUE diff --git a/cpp/tests/link_analysis/hits_test.cpp b/cpp/tests/link_analysis/hits_test.cpp index d0e77769034..cf35356bb76 100644 --- a/cpp/tests/link_analysis/hits_test.cpp +++ b/cpp/tests/link_analysis/hits_test.cpp @@ -52,9 +52,11 @@ std::tuple, std::vector, double, size_t> hits_re size_t max_iterations, std::optional starting_hub_values, bool normalized, - double tolerance) + double epsilon) { CUGRAPH_EXPECTS(num_vertices > 1, "number of vertices expected to be non-zero"); + auto tolerance = static_cast(num_vertices) * epsilon; + std::vector prev_hubs(num_vertices, result_t{1.0} / num_vertices); std::vector prev_authorities(num_vertices, result_t{1.0} / num_vertices); std::vector curr_hubs(num_vertices); @@ -127,8 +129,8 @@ std::tuple, std::vector, double, size_t> hits_re } struct Hits_Usecase { - bool check_correctness{true}; bool check_initial_input{false}; + bool check_correctness{true}; }; template @@ -175,8 +177,8 @@ class Tests_Hits : public ::testing::TestWithParam d_hubs(graph_view.local_vertex_partition_range_size(), handle.get_stream()); @@ -201,7 +203,7 @@ class Tests_Hits : public ::testing::TestWithParam h_cugraph_hits{}; if (renumber) { @@ -246,8 +248,7 @@ class Tests_Hits : public ::testing::TestWithParam(graph_view.number_of_vertices())) * - threshold_ratio; // skip comparison for low hits vertices (lowly ranked vertices) + 1e-6; // skip comparison for low hits vertices (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { return std::abs(lhs - rhs) <= std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); @@ -294,14 +295,17 @@ INSTANTIATE_TEST_SUITE_P( Tests_Hits_File, ::testing::Combine( // enable correctness checks - ::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}), + ::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), + cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); INSTANTIATE_TEST_SUITE_P(rmat_small_test, Tests_Hits_Rmat, // enable correctness checks - ::testing::Combine(::testing::Values(Hits_Usecase{true, false}, + ::testing::Combine(::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase( 10, 16, 0.57, 0.19, 0.19, 0, false, false)))); @@ -315,7 +319,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_Hits_File, ::testing::Combine( // disable correctness checks - ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{false, true}), + ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -327,7 +331,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_Hits_Rmat, // disable correctness checks for large graphs ::testing::Combine( - ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{false, true}), + ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/link_analysis/mg_hits_test.cpp b/cpp/tests/link_analysis/mg_hits_test.cpp index cf95d03681d..5c89bafd08e 100644 --- a/cpp/tests/link_analysis/mg_hits_test.cpp +++ b/cpp/tests/link_analysis/mg_hits_test.cpp @@ -33,8 +33,8 @@ #include struct Hits_Usecase { - bool check_correctness{true}; bool check_initial_input{false}; + bool check_correctness{true}; }; template @@ -81,7 +81,7 @@ class Tests_MGHits : public ::testing::TestWithParam d_mg_hubs(mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream()); @@ -110,7 +110,7 @@ class Tests_MGHits : public ::testing::TestWithParam(mg_graph_view.number_of_vertices())) * - threshold_ratio; // skip comparison for low Hits verties (lowly ranked - // vertices) + 1e-6; // skip comparison for low Hits verties (lowly ranked vertices) auto nearly_equal = [threshold_ratio, threshold_magnitude](auto lhs, auto rhs) { return std::abs(lhs - rhs) < std::max(std::max(lhs, rhs) * threshold_ratio, threshold_magnitude); @@ -274,7 +272,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGHits_File, ::testing::Combine( // enable correctness checks - ::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}), + ::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), @@ -285,7 +283,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGHits_Rmat, ::testing::Combine( // enable correctness checks - ::testing::Values(Hits_Usecase{true, false}, Hits_Usecase{true, true}), + ::testing::Values(Hits_Usecase{false, true}, Hits_Usecase{true, true}), ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false)))); INSTANTIATE_TEST_SUITE_P( @@ -297,7 +295,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGHits_Rmat, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(Hits_Usecase{false, false}), + ::testing::Values(Hits_Usecase{false, false}, Hits_Usecase{true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu index 2b9e9aafa3f..80aa34b68ae 100644 --- a/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu +++ b/cpp/tests/prims/mg_per_v_random_select_transform_outgoing_e.cu @@ -301,8 +301,8 @@ class Tests_MGPerVRandomSelectTransformOutgoingE sg_graph_view.local_edge_partition_view().offsets().begin(), sg_graph_view.local_edge_partition_view().offsets().end(), sg_offsets.begin()); - rmm::device_uvector sg_indices(sg_graph_view.number_of_edges(), - handle_->get_stream()); + rmm::device_uvector sg_indices( + sg_graph_view.local_edge_partition_view().indices().size(), handle_->get_stream()); thrust::copy(handle_->get_thrust_policy(), sg_graph_view.local_edge_partition_view().indices().begin(), sg_graph_view.local_edge_partition_view().indices().end(), diff --git a/cpp/tests/sampling/sampling_post_processing_test.cu b/cpp/tests/sampling/sampling_post_processing_test.cu index e5267d75ac2..6be735c3482 100644 --- a/cpp/tests/sampling/sampling_post_processing_test.cu +++ b/cpp/tests/sampling/sampling_post_processing_test.cu @@ -38,6 +38,8 @@ #include #include +#include + struct SamplingPostProcessing_Usecase { size_t num_labels{}; size_t num_seeds_per_label{}; @@ -318,15 +320,16 @@ bool check_renumber_map_invariants( auto renumbered_merged_vertex_first = thrust::make_transform_iterator( merged_vertices.begin(), - [sorted_org_vertices = - raft::device_span(sorted_org_vertices.data(), sorted_org_vertices.size()), - matching_renumbered_vertices = raft::device_span( - matching_renumbered_vertices.data(), - matching_renumbered_vertices.size())] __device__(vertex_t major) { - auto it = thrust::lower_bound( - thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); - return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; - }); + cuda::proclaim_return_type( + [sorted_org_vertices = raft::device_span(sorted_org_vertices.data(), + sorted_org_vertices.size()), + matching_renumbered_vertices = raft::device_span( + matching_renumbered_vertices.data(), + matching_renumbered_vertices.size())] __device__(vertex_t major) { + auto it = thrust::lower_bound( + thrust::seq, sorted_org_vertices.begin(), sorted_org_vertices.end(), major); + return matching_renumbered_vertices[thrust::distance(sorted_org_vertices.begin(), it)]; + })); thrust::reduce_by_key(handle.get_thrust_policy(), sort_key_first, @@ -1020,23 +1023,24 @@ class Tests_SamplingPostProcessing ? this_label_output_edgelist_srcs.begin() : this_label_output_edgelist_dsts.begin()) + old_size, - [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), - nzd_vertices = - renumbered_and_compressed_nzd_vertices - ? thrust::make_optional>( - (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, - (offset_end_offset - offset_start_offset) - 1) - : thrust::nullopt, - base_v] __device__(size_t i) { - auto idx = static_cast(thrust::distance( - offsets.begin() + 1, - thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); - if (nzd_vertices) { - return (*nzd_vertices)[idx]; - } else { - return base_v + static_cast(idx); - } - }); + cuda::proclaim_return_type( + [offsets = raft::device_span(d_offsets.data(), d_offsets.size()), + nzd_vertices = + renumbered_and_compressed_nzd_vertices + ? thrust::make_optional>( + (*renumbered_and_compressed_nzd_vertices).data() + offset_start_offset, + (offset_end_offset - offset_start_offset) - 1) + : thrust::nullopt, + base_v] __device__(size_t i) { + auto idx = static_cast(thrust::distance( + offsets.begin() + 1, + thrust::upper_bound(thrust::seq, offsets.begin() + 1, offsets.end(), i))); + if (nzd_vertices) { + return (*nzd_vertices)[idx]; + } else { + return base_v + static_cast(idx); + } + })); thrust::copy(handle.get_thrust_policy(), renumbered_and_compressed_edgelist_minors.begin() + h_offsets[0], renumbered_and_compressed_edgelist_minors.begin() + h_offsets.back(), diff --git a/cpp/tests/structure/mg_select_random_vertices_test.cpp b/cpp/tests/structure/mg_select_random_vertices_test.cpp index e49e1ebcb99..8392a6831ca 100644 --- a/cpp/tests/structure/mg_select_random_vertices_test.cpp +++ b/cpp/tests/structure/mg_select_random_vertices_test.cpp @@ -79,6 +79,8 @@ class Tests_MGSelectRandomVertices // std::vector with_replacement_flags = {true, false}; + std::vector sort_vertices_flags = {true, false}; + { // Generate distributed vertex set to sample from std::srand((unsigned)std::chrono::duration_cast( @@ -107,80 +109,95 @@ class Tests_MGSelectRandomVertices ? select_random_vertices_usecase.select_count : std::rand() % (num_of_elements_in_given_set + 1); - for (int idx = 0; idx < with_replacement_flags.size(); idx++) { - bool with_replacement = with_replacement_flags[idx]; - auto d_sampled_vertices = - cugraph::select_random_vertices(*handle_, - mg_graph_view, - std::make_optional(raft::device_span{ - d_given_set.data(), d_given_set.size()}), - rng_state, - select_count, - with_replacement, - true); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); - - if (select_random_vertices_usecase.check_correctness) { - if (!with_replacement) { - std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); - - auto nr_duplicates = - std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), - h_sampled_vertices.end()); - - ASSERT_EQ(nr_duplicates, 0); + for (int i = 0; i < with_replacement_flags.size(); i++) { + for (int j = 0; j < sort_vertices_flags.size(); j++) { + bool with_replacement = with_replacement_flags[i]; + bool sort_vertices = sort_vertices_flags[j]; + + auto d_sampled_vertices = + cugraph::select_random_vertices(*handle_, + mg_graph_view, + std::make_optional(raft::device_span{ + d_given_set.data(), d_given_set.size()}), + rng_state, + select_count, + with_replacement, + sort_vertices); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); + + if (select_random_vertices_usecase.check_correctness) { + if (!with_replacement) { + std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); + + auto nr_duplicates = + std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), + h_sampled_vertices.end()); + + ASSERT_EQ(nr_duplicates, 0); + } + + std::sort(h_given_set.begin(), h_given_set.end()); + if (sort_vertices) { + assert(std::is_sorted(h_sampled_vertices.begin(), h_sampled_vertices.end())); + } else { + std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); + } + std::for_each( + h_sampled_vertices.begin(), h_sampled_vertices.end(), [&h_given_set](vertex_t v) { + ASSERT_TRUE(std::binary_search(h_given_set.begin(), h_given_set.end(), v)); + }); } - - std::sort(h_given_set.begin(), h_given_set.end()); - std::for_each( - h_sampled_vertices.begin(), h_sampled_vertices.end(), [&h_given_set](vertex_t v) { - ASSERT_TRUE(std::binary_search(h_given_set.begin(), h_given_set.end(), v)); - }); } } - } - - // - // Test sampling from [0, V) - // - - for (int idx = 0; idx < with_replacement_flags.size(); idx++) { - bool with_replacement = false; - auto d_sampled_vertices = cugraph::select_random_vertices( - *handle_, - mg_graph_view, - std::optional>{std::nullopt}, - rng_state, - select_random_vertices_usecase.select_count, - with_replacement, - true); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); - if (select_random_vertices_usecase.check_correctness) { - if (!with_replacement) { - std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); - - auto nr_duplicates = - std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), - h_sampled_vertices.end()); - - ASSERT_EQ(nr_duplicates, 0); + // + // Test sampling from [0, V) + // + + for (int i = 0; i < with_replacement_flags.size(); i++) { + for (int j = 0; j < sort_vertices_flags.size(); j++) { + bool with_replacement = with_replacement_flags[i]; + bool sort_vertices = sort_vertices_flags[j]; + + auto d_sampled_vertices = cugraph::select_random_vertices( + *handle_, + mg_graph_view, + std::optional>{std::nullopt}, + rng_state, + select_random_vertices_usecase.select_count, + with_replacement, + sort_vertices); + + RAFT_CUDA_TRY(cudaDeviceSynchronize()); + + auto h_sampled_vertices = cugraph::test::to_host(*handle_, d_sampled_vertices); + + if (select_random_vertices_usecase.check_correctness) { + if (!with_replacement) { + std::sort(h_sampled_vertices.begin(), h_sampled_vertices.end()); + + auto nr_duplicates = + std::distance(std::unique(h_sampled_vertices.begin(), h_sampled_vertices.end()), + h_sampled_vertices.end()); + + ASSERT_EQ(nr_duplicates, 0); + } + if (sort_vertices) { + assert(std::is_sorted(h_sampled_vertices.begin(), h_sampled_vertices.end())); + } + + auto vertex_first = mg_graph_view.local_vertex_partition_range_first(); + auto vertex_last = mg_graph_view.local_vertex_partition_range_last(); + std::for_each(h_sampled_vertices.begin(), + h_sampled_vertices.end(), + [vertex_first, vertex_last](vertex_t v) { + ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); + }); + } } - - auto vertex_first = mg_graph_view.local_vertex_partition_range_first(); - auto vertex_last = mg_graph_view.local_vertex_partition_range_last(); - - std::for_each(h_sampled_vertices.begin(), - h_sampled_vertices.end(), - [vertex_first, vertex_last](vertex_t v) { - ASSERT_TRUE((v >= vertex_first) && (v < vertex_last)); - }); } } } diff --git a/dependencies.yaml b/dependencies.yaml index baa08c37413..b5c1fb2fa2d 100644 --- a/dependencies.yaml +++ b/dependencies.yaml @@ -511,6 +511,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -536,6 +537,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -561,6 +563,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -586,6 +589,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -611,6 +615,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -636,6 +641,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: @@ -661,6 +667,7 @@ dependencies: packages: # pip recognizes the index as a global option for the requirements.txt file - --extra-index-url=https://pypi.nvidia.com + - --extra-index-url=https://pypi.anaconda.org/rapidsai-wheels-nightly/simple specific: - output_types: [requirements, pyproject] matrices: diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py index 9c9dcdb43bb..bef3a023b93 100644 --- a/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/__init__.py @@ -13,6 +13,7 @@ from .gat_conv import GATConv from .gatv2_conv import GATv2Conv +from .hetero_gat_conv import HeteroGATConv from .rgcn_conv import RGCNConv from .sage_conv import SAGEConv from .transformer_conv import TransformerConv @@ -20,6 +21,7 @@ __all__ = [ "GATConv", "GATv2Conv", + "HeteroGATConv", "RGCNConv", "SAGEConv", "TransformerConv", diff --git a/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py new file mode 100644 index 00000000000..3b717552a96 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/nn/conv/hetero_gat_conv.py @@ -0,0 +1,265 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Optional, Union +from collections import defaultdict + +from cugraph.utilities.utils import import_optional +from pylibcugraphops.pytorch.operators import mha_gat_n2n + +from .base import BaseConv + +torch = import_optional("torch") +torch_geometric = import_optional("torch_geometric") + + +class HeteroGATConv(BaseConv): + r"""The graph attentional operator on heterogeneous graphs, where a separate + `GATConv` is applied on the homogeneous graph for each edge type. Compared + with directly wrapping `GATConv`s with `HeteroConv`, `HeteroGATConv` fuses + all the linear transformation associated with each node type together into 1 + GEMM call, to improve the performance on GPUs. + + Parameters + ---------- + in_channels : int or Dict[str, int]) + Size of each input sample of every node type. + + out_channels : int + Size of each output sample. + + node_types : List[str] + List of Node types. + + edge_types : List[Tuple[str, str, str]] + List of Edge types. + + heads : int, optional (default=1) + Number of multi-head-attentions. + + concat : bool, optional (default=True): + If set to :obj:`False`, the multi-head attentions are averaged instead + of concatenated. + + negative_slope : float, optional (default=0.2) + LeakyReLU angle of the negative slope. + + bias : bool, optional (default=True) + If set to :obj:`False`, the layer will not learn an additive bias. + + aggr : str, optional (default="sum") + The aggregation scheme to use for grouping node embeddings generated by + different relations. Choose from "sum", "mean", "min", "max". + """ + + def __init__( + self, + in_channels: Union[int, dict[str, int]], + out_channels: int, + node_types: list[str], + edge_types: list[tuple[str, str, str]], + heads: int = 1, + concat: bool = True, + negative_slope: float = 0.2, + bias: bool = True, + aggr: str = "sum", + ): + major, minor, patch = torch_geometric.__version__.split(".")[:3] + pyg_version = tuple(map(int, [major, minor, patch])) + if pyg_version < (2, 4, 0): + raise RuntimeError(f"{self.__class__.__name__} requires pyg >= 2.4.0.") + + super().__init__() + + if isinstance(in_channels, int): + in_channels = dict.fromkeys(node_types, in_channels) + self.in_channels = in_channels + self.out_channels = out_channels + + self.node_types = node_types + self.edge_types = edge_types + self.num_heads = heads + self.concat_heads = concat + + self.negative_slope = negative_slope + self.aggr = aggr + + self.relations_per_ntype = defaultdict(lambda: ([], [])) + + lin_weights = dict.fromkeys(self.node_types) + attn_weights = dict.fromkeys(self.edge_types) + biases = dict.fromkeys(self.edge_types) + + ParameterDict = torch_geometric.nn.parameter_dict.ParameterDict + + for edge_type in self.edge_types: + src_type, _, dst_type = edge_type + self.relations_per_ntype[src_type][0].append(edge_type) + if src_type != dst_type: + self.relations_per_ntype[dst_type][1].append(edge_type) + + attn_weights[edge_type] = torch.empty( + 2 * self.num_heads * self.out_channels + ) + + if bias and concat: + biases[edge_type] = torch.empty(self.num_heads * out_channels) + elif bias: + biases[edge_type] = torch.empty(out_channels) + else: + biases[edge_type] = None + + for ntype in self.node_types: + n_src_rel = len(self.relations_per_ntype[ntype][0]) + n_dst_rel = len(self.relations_per_ntype[ntype][1]) + n_rel = n_src_rel + n_dst_rel + + lin_weights[ntype] = torch.empty( + (n_rel * self.num_heads * self.out_channels, self.in_channels[ntype]) + ) + + self.lin_weights = ParameterDict(lin_weights) + self.attn_weights = ParameterDict(attn_weights) + + if bias: + self.bias = ParameterDict(biases) + else: + self.register_parameter("bias", None) + + self.reset_parameters() + + def split_tensors( + self, x_fused_dict: dict[str, torch.Tensor], dim: int + ) -> tuple[dict[str, torch.Tensor], dict[str, torch.Tensor]]: + """Split fused tensors into chunks based on edge types. + + Parameters + ---------- + x_fused_dict : dict[str, torch.Tensor] + A dictionary to hold node feature for each node type. The key is + node type; the value is a fused tensor that account for all + relations for that node type. + + dim : int + Dimension along which to split the fused tensor. + + Returns + ------- + x_src_dict : dict[str, torch.Tensor] + A dictionary to hold source node feature for each relation graph. + + x_dst_dict : dict[str, torch.Tensor] + A dictionary to hold destination node feature for each relation graph. + """ + x_src_dict = dict.fromkeys(self.edge_types) + x_dst_dict = dict.fromkeys(self.edge_types) + + for ntype, t in x_fused_dict.items(): + n_src_rel = len(self.relations_per_ntype[ntype][0]) + n_dst_rel = len(self.relations_per_ntype[ntype][1]) + n_rel = n_src_rel + n_dst_rel + t_list = torch.chunk(t, chunks=n_rel, dim=dim) + + for i, src_rel in enumerate(self.relations_per_ntype[ntype][0]): + x_src_dict[src_rel] = t_list[i] + + for i, dst_rel in enumerate(self.relations_per_ntype[ntype][1]): + x_dst_dict[dst_rel] = t_list[i + n_src_rel] + + return x_src_dict, x_dst_dict + + def reset_parameters(self, seed: Optional[int] = None): + if seed is not None: + torch.manual_seed(seed) + + w_src, w_dst = self.split_tensors(self.lin_weights, dim=0) + + for edge_type in self.edge_types: + src_type, _, dst_type = edge_type + + # lin_src + torch_geometric.nn.inits.glorot(w_src[edge_type]) + + # lin_dst + if src_type != dst_type: + torch_geometric.nn.inits.glorot(w_dst[edge_type]) + + # attn_weights + torch_geometric.nn.inits.glorot( + self.attn_weights[edge_type].view(-1, self.num_heads, self.out_channels) + ) + + # bias + if self.bias is not None: + torch_geometric.nn.inits.zeros(self.bias[edge_type]) + + def forward( + self, + x_dict: dict[str, torch.Tensor], + edge_index_dict: dict[tuple[str, str, str], torch.Tensor], + ) -> dict[str, torch.Tensor]: + feat_dict = dict.fromkeys(x_dict.keys()) + + for ntype, x in x_dict.items(): + feat_dict[ntype] = x @ self.lin_weights[ntype].T + + x_src_dict, x_dst_dict = self.split_tensors(feat_dict, dim=1) + + out_dict = defaultdict(list) + + for edge_type, edge_index in edge_index_dict.items(): + src_type, _, dst_type = edge_type + + csc = BaseConv.to_csc( + edge_index, (x_dict[src_type].size(0), x_dict[dst_type].size(0)) + ) + + if src_type == dst_type: + graph = self.get_cugraph( + csc, + bipartite=False, + ) + out = mha_gat_n2n( + x_src_dict[edge_type], + self.attn_weights[edge_type], + graph, + num_heads=self.num_heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat_heads, + ) + + else: + graph = self.get_cugraph( + csc, + bipartite=True, + ) + out = mha_gat_n2n( + (x_src_dict[edge_type], x_dst_dict[edge_type]), + self.attn_weights[edge_type], + graph, + num_heads=self.num_heads, + activation="LeakyReLU", + negative_slope=self.negative_slope, + concat_heads=self.concat_heads, + ) + + if self.bias is not None: + out = out + self.bias[edge_type] + + out_dict[dst_type].append(out) + + for key, value in out_dict.items(): + out_dict[key] = torch_geometric.nn.conv.hetero_conv.group(value, self.aggr) + + return out_dict diff --git a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py index 1512901822a..30994289f9c 100644 --- a/python/cugraph-pyg/cugraph_pyg/tests/conftest.py +++ b/python/cugraph-pyg/cugraph_pyg/tests/conftest.py @@ -284,3 +284,32 @@ def basic_pyg_graph_2(): ) size = (10, 10) return edge_index, size + + +@pytest.fixture +def sample_pyg_hetero_data(): + torch.manual_seed(12345) + raw_data_dict = { + "v0": torch.randn(6, 3), + "v1": torch.randn(7, 2), + "v2": torch.randn(5, 4), + ("v2", "e0", "v1"): torch.tensor([[0, 2, 2, 4, 4], [4, 3, 6, 0, 1]]), + ("v1", "e1", "v1"): torch.tensor( + [[0, 2, 2, 2, 3, 5, 5], [4, 0, 4, 5, 3, 0, 1]] + ), + ("v0", "e2", "v0"): torch.tensor([[0, 2, 2, 3, 5, 5], [1, 1, 5, 1, 1, 2]]), + ("v1", "e3", "v2"): torch.tensor( + [[0, 1, 1, 2, 4, 5, 6], [1, 2, 3, 1, 2, 2, 2]] + ), + ("v0", "e4", "v2"): torch.tensor([[1, 1, 3, 3, 4, 4], [1, 4, 1, 4, 0, 3]]), + } + + # create a nested dictionary to facilitate PyG's HeteroData construction + hetero_data_dict = {} + for key, value in raw_data_dict.items(): + if isinstance(key, tuple): + hetero_data_dict[key] = {"edge_index": value} + else: + hetero_data_dict[key] = {"x": value} + + return hetero_data_dict diff --git a/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py new file mode 100644 index 00000000000..1c841a17df7 --- /dev/null +++ b/python/cugraph-pyg/cugraph_pyg/tests/nn/test_hetero_gat_conv.py @@ -0,0 +1,132 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import pytest + +from cugraph_pyg.nn import HeteroGATConv as CuGraphHeteroGATConv +from cugraph.utilities.utils import import_optional, MissingModule + +torch = import_optional("torch") +torch_geometric = import_optional("torch_geometric") + +ATOL = 1e-6 + + +@pytest.mark.cugraph_ops +@pytest.mark.skipif(isinstance(torch, MissingModule), reason="torch not available") +@pytest.mark.skipif( + isinstance(torch_geometric, MissingModule), reason="torch_geometric not available" +) +@pytest.mark.parametrize("heads", [1, 3, 10]) +@pytest.mark.parametrize("aggr", ["sum", "mean"]) +def test_hetero_gat_conv_equality(sample_pyg_hetero_data, aggr, heads): + major, minor, patch = torch_geometric.__version__.split(".")[:3] + pyg_version = tuple(map(int, [major, minor, patch])) + if pyg_version < (2, 4, 0): + pytest.skip("Skipping HeteroGATConv test") + + from torch_geometric.data import HeteroData + from torch_geometric.nn import HeteroConv, GATConv + + device = torch.device("cuda:0") + data = HeteroData(sample_pyg_hetero_data).to(device) + + in_channels_dict = {k: v.size(1) for k, v in data.x_dict.items()} + out_channels = 2 + + convs_dict = {} + kwargs1 = dict(heads=heads, add_self_loops=False, bias=False) + for edge_type in data.edge_types: + src_t, _, dst_t = edge_type + in_channels_src, in_channels_dst = data.x_dict[src_t].size(-1), data.x_dict[ + dst_t + ].size(-1) + if src_t == dst_t: + convs_dict[edge_type] = GATConv(in_channels_src, out_channels, **kwargs1) + else: + convs_dict[edge_type] = GATConv( + (in_channels_src, in_channels_dst), out_channels, **kwargs1 + ) + + conv1 = HeteroConv(convs_dict, aggr=aggr).to(device) + kwargs2 = dict( + heads=heads, + aggr=aggr, + node_types=data.node_types, + edge_types=data.edge_types, + bias=False, + ) + conv2 = CuGraphHeteroGATConv(in_channels_dict, out_channels, **kwargs2).to(device) + + # copy over linear and attention weights + w_src, w_dst = conv2.split_tensors(conv2.lin_weights, dim=0) + with torch.no_grad(): + for edge_type in conv2.edge_types: + src_t, _, dst_t = edge_type + w_src[edge_type][:, :] = conv1.convs[edge_type].lin_src.weight[:, :] + if w_dst[edge_type] is not None: + w_dst[edge_type][:, :] = conv1.convs[edge_type].lin_dst.weight[:, :] + + conv2.attn_weights[edge_type][: heads * out_channels] = conv1.convs[ + edge_type + ].att_src.data.flatten() + conv2.attn_weights[edge_type][heads * out_channels :] = conv1.convs[ + edge_type + ].att_dst.data.flatten() + + out1 = conv1(data.x_dict, data.edge_index_dict) + out2 = conv2(data.x_dict, data.edge_index_dict) + + for node_type in data.node_types: + assert torch.allclose(out1[node_type], out2[node_type], atol=ATOL) + + loss1 = 0 + loss2 = 0 + for node_type in data.node_types: + loss1 += out1[node_type].mean() + loss2 += out2[node_type].mean() + + loss1.backward() + loss2.backward() + + # check gradient w.r.t attention weights + out_dim = heads * out_channels + for edge_type in conv2.edge_types: + assert torch.allclose( + conv1.convs[edge_type].att_src.grad.flatten(), + conv2.attn_weights[edge_type].grad[:out_dim], + atol=ATOL, + ) + assert torch.allclose( + conv1.convs[edge_type].att_dst.grad.flatten(), + conv2.attn_weights[edge_type].grad[out_dim:], + atol=ATOL, + ) + + # check gradient w.r.t linear weights + grad_lin_weights_ref = dict.fromkeys(out1.keys()) + for node_t, (rels_as_src, rels_as_dst) in conv2.relations_per_ntype.items(): + grad_list = [] + for rel_t in rels_as_src: + grad_list.append(conv1.convs[rel_t].lin_src.weight.grad.clone()) + for rel_t in rels_as_dst: + grad_list.append(conv1.convs[rel_t].lin_dst.weight.grad.clone()) + assert len(grad_list) > 0 + grad_lin_weights_ref[node_t] = torch.vstack(grad_list) + + for node_type in conv2.lin_weights: + assert torch.allclose( + grad_lin_weights_ref[node_type], + conv2.lin_weights[node_type].grad, + atol=ATOL, + ) diff --git a/python/cugraph/CMakeLists.txt b/python/cugraph/CMakeLists.txt index a1ec12c6e07..99936b23a8c 100644 --- a/python/cugraph/CMakeLists.txt +++ b/python/cugraph/CMakeLists.txt @@ -38,7 +38,6 @@ project( option(FIND_CUGRAPH_CPP "Search for existing CUGRAPH C++ installations before defaulting to local files" OFF ) -option(CUGRAPH_BUILD_WHEELS "Whether this build is generating a Python wheel." OFF) option(USE_CUGRAPH_OPS "Enable all functions that call cugraph-ops" ON) if(NOT USE_CUGRAPH_OPS) @@ -59,22 +58,14 @@ if(NOT cugraph_FOUND) set(BUILD_TESTS OFF) set(BUILD_CUGRAPH_MG_TESTS OFF) set(BUILD_CUGRAPH_OPS_CPP_TESTS OFF) - - set(_exclude_from_all "") - if(CUGRAPH_BUILD_WHEELS) - # Statically link dependencies if building wheels - set(CUDA_STATIC_RUNTIME ON) - set(USE_RAFT_STATIC ON) - set(CUGRAPH_COMPILE_RAFT_LIB ON) - set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) - set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) - set(ALLOW_CLONE_CUGRAPH_OPS ON) - - # Don't install the cuML C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp cugraph-cpp ${_exclude_from_all}) + set(CUDA_STATIC_RUNTIME ON) + set(USE_RAFT_STATIC ON) + set(CUGRAPH_COMPILE_RAFT_LIB ON) + set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) + set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) + set(ALLOW_CLONE_CUGRAPH_OPS ON) + + add_subdirectory(../../cpp cugraph-cpp EXCLUDE_FROM_ALL) set(cython_lib_dir cugraph) install(TARGETS cugraph DESTINATION ${cython_lib_dir}) diff --git a/python/nx-cugraph/lint.yaml b/python/nx-cugraph/lint.yaml index a94aa9f0448..de6f20bc439 100644 --- a/python/nx-cugraph/lint.yaml +++ b/python/nx-cugraph/lint.yaml @@ -45,12 +45,12 @@ repos: - id: pyupgrade args: [--py39-plus] - repo: https://github.com/psf/black - rev: 23.10.1 + rev: 23.11.0 hooks: - id: black # - id: black-jupyter - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 + rev: v0.1.7 hooks: - id: ruff args: [--fix-only, --show-fixes] # --unsafe-fixes] @@ -62,7 +62,7 @@ repos: additional_dependencies: &flake8_dependencies # These versions need updated manually - flake8==6.1.0 - - flake8-bugbear==23.9.16 + - flake8-bugbear==23.12.2 - flake8-simplify==0.21.0 - repo: https://github.com/asottile/yesqa rev: v1.5.0 @@ -77,7 +77,7 @@ repos: additional_dependencies: [tomli] files: ^(nx_cugraph|docs)/ - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.1.3 + rev: v0.1.7 hooks: - id: ruff - repo: https://github.com/pre-commit/pre-commit-hooks diff --git a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py index 63841b15bd5..e4947491555 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/__init__.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/__init__.py @@ -15,13 +15,13 @@ centrality, community, components, - shortest_paths, link_analysis, + shortest_paths, ) from .bipartite import complete_bipartite_graph from .centrality import * from .components import * from .core import * from .isolate import * -from .shortest_paths import * from .link_analysis import * +from .shortest_paths import * diff --git a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py index 1d3e762b4fd..25b9b39554b 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/bipartite/generators.py @@ -17,15 +17,14 @@ import numpy as np from nx_cugraph.generators._utils import _create_using_class, _number_and_nodes -from nx_cugraph.utils import index_dtype, networkx_algorithm, nodes_or_number +from nx_cugraph.utils import index_dtype, networkx_algorithm __all__ = [ "complete_bipartite_graph", ] -@nodes_or_number([0, 1]) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=[0, 1]) def complete_bipartite_graph(n1, n2, create_using=None): graph_class, inplace = _create_using_class(create_using) if graph_class.is_directed(): diff --git a/python/nx-cugraph/nx_cugraph/algorithms/core.py b/python/nx-cugraph/nx_cugraph/algorithms/core.py index 2219388bc58..390598d070e 100644 --- a/python/nx-cugraph/nx_cugraph/algorithms/core.py +++ b/python/nx-cugraph/nx_cugraph/algorithms/core.py @@ -31,7 +31,11 @@ def k_truss(G, k): if is_nx := isinstance(G, nx.Graph): G = nxcg.from_networkx(G, preserve_all_attrs=True) if nxcg.number_of_selfloops(G) > 0: - raise nx.NetworkXError( + if nx.__version__[:3] <= "3.2": + exc_class = nx.NetworkXError + else: + exc_class = nx.NetworkXNotImplemented + raise exc_class( "Input graph has self loops which is not permitted; " "Consider using G.remove_edges_from(nx.selfloop_edges(G))." ) diff --git a/python/nx-cugraph/nx_cugraph/generators/classic.py b/python/nx-cugraph/nx_cugraph/generators/classic.py index b196c232320..4213e6dd2a0 100644 --- a/python/nx-cugraph/nx_cugraph/generators/classic.py +++ b/python/nx-cugraph/nx_cugraph/generators/classic.py @@ -19,7 +19,7 @@ import nx_cugraph as nxcg -from ..utils import _get_int_dtype, index_dtype, networkx_algorithm, nodes_or_number +from ..utils import _get_int_dtype, index_dtype, networkx_algorithm from ._utils import ( _IS_NX32_OR_LESS, _common_small_graph, @@ -86,8 +86,7 @@ def circular_ladder_graph(n, create_using=None): return _ladder_graph(n, create_using, is_circular=True) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def complete_graph(n, create_using=None): n, nodes = _number_and_nodes(n) if n < 3: @@ -143,8 +142,7 @@ def complete_multipartite_graph(*subset_sizes): ) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def cycle_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) @@ -174,8 +172,7 @@ def cycle_graph(n, create_using=None): return G -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def empty_graph(n=0, create_using=None, default=nx.Graph): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using, default=default) @@ -242,8 +239,7 @@ def ladder_graph(n, create_using=None): return _ladder_graph(n, create_using) -@nodes_or_number([0, 1]) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=[0, 1]) def lollipop_graph(m, n, create_using=None): # Like complete_graph then path_graph orig_m, unused_nodes_m = m @@ -283,8 +279,7 @@ def null_graph(create_using=None): return _common_small_graph(0, None, create_using) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def path_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) @@ -304,8 +299,7 @@ def path_graph(n, create_using=None): return G -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def star_graph(n, create_using=None): orig_n, orig_nodes = n n, nodes = _number_and_nodes(n) @@ -329,8 +323,7 @@ def star_graph(n, create_using=None): return G -@nodes_or_number([0, 1]) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=[0, 1]) def tadpole_graph(m, n, create_using=None): orig_m, unused_nodes_m = m orig_n, unused_nodes_n = n @@ -382,8 +375,7 @@ def turan_graph(n, r): return complete_multipartite_graph(*partitions) -@nodes_or_number(0) -@networkx_algorithm +@networkx_algorithm(nodes_or_number=0) def wheel_graph(n, create_using=None): n, nodes = _number_and_nodes(n) graph_class, inplace = _create_using_class(create_using) diff --git a/python/nx-cugraph/nx_cugraph/utils/decorators.py b/python/nx-cugraph/nx_cugraph/utils/decorators.py index 0048aee51bb..a0dbfcec890 100644 --- a/python/nx-cugraph/nx_cugraph/utils/decorators.py +++ b/python/nx-cugraph/nx_cugraph/utils/decorators.py @@ -15,6 +15,7 @@ from functools import partial, update_wrapper from textwrap import dedent +import networkx as nx from networkx.utils.decorators import nodes_or_number, not_implemented_for from nx_cugraph.interface import BackendInterface @@ -47,10 +48,18 @@ def __new__( *, name: str | None = None, extra_params: dict[str, str] | str | None = None, + nodes_or_number: list[int] | int | None = None, ): if func is None: - return partial(networkx_algorithm, name=name, extra_params=extra_params) + return partial( + networkx_algorithm, + name=name, + extra_params=extra_params, + nodes_or_number=nodes_or_number, + ) instance = object.__new__(cls) + if nodes_or_number is not None and nx.__version__[:3] > "3.2": + func = nx.utils.decorators.nodes_or_number(nodes_or_number)(func) # update_wrapper sets __wrapped__, which will be used for the signature update_wrapper(instance, func) instance.__defaults__ = func.__defaults__ @@ -76,6 +85,8 @@ def __new__( setattr(BackendInterface, instance.name, instance) # Set methods so they are in __dict__ instance._can_run = instance._can_run + if nodes_or_number is not None and nx.__version__[:3] <= "3.2": + instance = nx.utils.decorators.nodes_or_number(nodes_or_number)(instance) return instance def _can_run(self, func): diff --git a/python/pylibcugraph/CMakeLists.txt b/python/pylibcugraph/CMakeLists.txt index 7d5dc790ad0..e1250cb2edb 100644 --- a/python/pylibcugraph/CMakeLists.txt +++ b/python/pylibcugraph/CMakeLists.txt @@ -38,7 +38,6 @@ project( option(FIND_CUGRAPH_CPP "Search for existing CUGRAPH C++ installations before defaulting to local files" OFF ) -option(CUGRAPH_BUILD_WHEELS "Whether we're building a wheel for pypi" OFF) option(USE_CUGRAPH_OPS "Enable all functions that call cugraph-ops" ON) if(NOT USE_CUGRAPH_OPS) @@ -59,22 +58,14 @@ if (NOT cugraph_FOUND) set(BUILD_TESTS OFF) set(BUILD_CUGRAPH_MG_TESTS OFF) set(BUILD_CUGRAPH_OPS_CPP_TESTS OFF) - - set(_exclude_from_all "") - if(CUGRAPH_BUILD_WHEELS) - # Statically link dependencies if building wheels - set(CUDA_STATIC_RUNTIME ON) - set(USE_RAFT_STATIC ON) - set(CUGRAPH_COMPILE_RAFT_LIB ON) - set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) - set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) - set(ALLOW_CLONE_CUGRAPH_OPS ON) - - # Don't install the cuML C++ targets into wheels - set(_exclude_from_all EXCLUDE_FROM_ALL) - endif() - - add_subdirectory(../../cpp cugraph-cpp ${_exclude_from_all}) + set(CUDA_STATIC_RUNTIME ON) + set(USE_RAFT_STATIC ON) + set(CUGRAPH_COMPILE_RAFT_LIB ON) + set(CUGRAPH_USE_CUGRAPH_OPS_STATIC ON) + set(CUGRAPH_EXCLUDE_CUGRAPH_OPS_FROM_ALL ON) + set(ALLOW_CLONE_CUGRAPH_OPS ON) + + add_subdirectory(../../cpp cugraph-cpp EXCLUDE_FROM_ALL) set(cython_lib_dir pylibcugraph) install(TARGETS cugraph DESTINATION ${cython_lib_dir})