From 11f9c4020547431c4d985cf4684354edf85252d7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 22 Oct 2024 09:44:06 -0700 Subject: [PATCH] fix style --- .../cugraph/detail/shuffle_wrappers.hpp | 13 +- .../cugraph/detail/utility_wrappers.hpp | 1 - cpp/include/cugraph/sampling_functions.hpp | 6 +- cpp/include/cugraph_c/sampling_algorithms.h | 59 ++- cpp/src/c_api/array.hpp | 2 +- cpp/src/c_api/neighbor_sampling.cpp | 432 +++++++++--------- cpp/src/detail/utility_wrappers_impl.cuh | 8 +- .../detail/conversion_utilities_impl.cuh | 8 +- cpp/src/sampling/neighbor_sampling_impl.hpp | 232 +++++----- .../sampling/neighbor_sampling_mg_v32_e32.cu | 51 +-- .../sampling/neighbor_sampling_sg_v32_e32.cu | 48 +- cpp/src/utilities/shuffle_vertices.cuh | 75 +-- .../utilities/shuffle_vertices_mg_v32_fp.cu | 26 +- .../shuffle_vertices_mg_v32_integral.cu | 29 +- .../utilities/shuffle_vertices_mg_v64_fp.cu | 26 +- .../shuffle_vertices_mg_v64_integral.cu | 28 +- cpp/tests/CMakeLists.txt | 6 +- ...heterogeneous_biased_neighbor_sampling.cpp | 99 ++-- ...eterogeneous_uniform_neighbor_sampling.cpp | 98 ++-- .../homogeneous_biased_neighbor_sampling.cpp | 60 ++- .../homogeneous_uniform_neighbor_sampling.cpp | 60 ++- ...heterogeneous_biased_neighbor_sampling.cpp | 106 ++--- ...eterogeneous_uniform_neighbor_sampling.cpp | 107 ++--- ...g_homogeneous_biased_neighbor_sampling.cpp | 83 ++-- ..._homogeneous_uniform_neighbor_sampling.cpp | 88 ++-- cpp/tests/utilities/thrust_wrapper.cu | 16 +- .../dask/sampling/uniform_neighbor_sample.py | 2 +- .../heterogeneous_biased_neighbor_sample.py | 37 +- .../heterogeneous_uniform_neighbor_sample.py | 37 +- ...ogeneous_biased_uniform_neighbor_sample.py | 35 +- .../homogeneous_uniform_neighbor_sample.py | 35 +- python/pylibcugraph/pylibcugraph/__init__.py | 22 +- .../heterogeneous_biased_neighbor_sample.pyx | 8 +- .../heterogeneous_uniform_neighbor_sample.pyx | 6 +- .../homogeneous_biased_neighbor_sample.pyx | 6 +- .../homogeneous_uniform_neighbor_sample.pyx | 4 +- 36 files changed, 949 insertions(+), 1010 deletions(-) diff --git a/cpp/include/cugraph/detail/shuffle_wrappers.hpp b/cpp/include/cugraph/detail/shuffle_wrappers.hpp index 68ebdf01296..6592150e748 100644 --- a/cpp/include/cugraph/detail/shuffle_wrappers.hpp +++ b/cpp/include/cugraph/detail/shuffle_wrappers.hpp @@ -158,12 +158,13 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( * @return Tuple of vectors storing shuffled vertex & value pairs. */ template -std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); /** * @brief Permute a range. diff --git a/cpp/include/cugraph/detail/utility_wrappers.hpp b/cpp/include/cugraph/detail/utility_wrappers.hpp index 76b362e58bf..a7400a43039 100644 --- a/cpp/include/cugraph/detail/utility_wrappers.hpp +++ b/cpp/include/cugraph/detail/utility_wrappers.hpp @@ -94,7 +94,6 @@ void sort(raft::handle_t const& handle, value_t* d_value, size_t size); template size_t unique(raft::handle_t const& handle, value_t* d_value, size_t size); - /** * @brief Increment the values of a buffer by a constant value * diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index de636f84ff1..f3f9b126513 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -317,7 +317,8 @@ struct sampling_flags_t { * @param label_to_output_comm_rank Optional device span identifying which rank should get each * vertex label. This should be the same on each rank. * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each - * level. The fanout value at hop x is given by the expression 'fanout[x*num_edge_types + edge_type_id]' + * level. The fanout value at hop x is given by the expression 'fanout[x*num_edge_types + + * edge_type_id]' * @param num_edge_types Number of edge types where a value of 1 translates to homogeneous neighbor * sample whereas a value greater than 1 translates to heterogeneous neighbor sample. * @param flags A set of flags indicating which sampling features should be used. @@ -405,7 +406,8 @@ heterogeneous_uniform_neighbor_sample( * @param label_to_output_comm_rank Optional device span identifying which rank should get each * vertex label. This should be the same on each rank. * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each - * level. The fanout value at hop x is given by the expression 'fanout[x*num_edge_types + edge_type_id]' + * level. The fanout value at hop x is given by the expression 'fanout[x*num_edge_types + + * edge_type_id]' * @param num_edge_types Number of edge types where a value of 1 translates to homogeneous neighbor * sample whereas a value greater than 1 translates to heterogeneous neighbor sample. * @param flags A set of flags indicating which sampling features should be used. diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index abc8f1c4082..fb0b93a0bed 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -331,10 +331,9 @@ void cugraph_sampling_options_free(cugraph_sampling_options_t* options); * @brief Opaque neighborhood sampling heterogeneous fan_out type */ - /** * @brief Uniform Neighborhood Sampling - * + * * @deprecated This API will be deleted, use cugraph_homogeneous_uniform_neighbor_sample * * Returns a sample of the neighborhood around specified start vertices. Optionally, each @@ -362,8 +361,8 @@ void cugraph_sampling_options_free(cugraph_sampling_options_t* options); * label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks. * @param [in] label_offsets Device array of the offsets for each label in the seed list. This * parameter is only used with the retain_seeds option. - * @param [in] fan_out Host array defining the fan out at each step in the sampling algorithm. - * We only support fan_out values of type INT32 + * @param [in] fan_out Host array defining the fan out at each step in the sampling + * algorithm. We only support fan_out values of type INT32 * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] sampling_options * Opaque pointer defining the sampling options. @@ -391,9 +390,9 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( /** * @brief Biased Neighborhood Sampling - * + * * @deprecated This API will be deleted, use cugraph_homogeneous_biased_neighbor_sample. - * + * * Returns a sample of the neighborhood around specified start vertices. Optionally, each * start vertex can be associated with a label, allowing the caller to specify multiple batches * of sampling requests in the same function call - which should improve GPU utilization. @@ -422,8 +421,8 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( * label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks. * @param [in] label_offsets Device array of the offsets for each label in the seed list. This * parameter is only used with the retain_seeds option. - * @param [in] fan_out Host array defining the fan out at each step in the sampling algorithm. - * We only support fan_out values of type INT32 + * @param [in] fan_out Host array defining the fan out at each step in the sampling + * algorithm. We only support fan_out values of type INT32 * @param [in,out] rng_state State of the random number generator, updated with each call * @param [in] sampling_options * Opaque pointer defining the sampling options. @@ -467,13 +466,12 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage * needs to be transposed * @param [in] start_vertices Device array of start vertices for the sampling - * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. This - * parameter is only used with the retain_seeds option. - * @param [in] fan_out Host array defining the fan out at each step in the sampling algorithm. - * We only support fan_out values of type INT32 - * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous neighbor - * sample whereas a value greater than 1 translates to heterogeneous neighbor - * sample. + * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. + * This parameter is only used with the retain_seeds option. + * @param [in] fan_out Host array defining the fan out at each step in the sampling + * algorithm. We only support fan_out values of type INT32 + * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous + * neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor sample. * @param [in] sampling_options * Opaque pointer defining the sampling options. * @param [in] do_expensive_check @@ -496,7 +494,6 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_sample_result_t** result, cugraph_error_t** error); - /** * @brief Heterogeneous Biased Neighborhood Sampling * @@ -516,13 +513,12 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( * @param [in] edge_biases Device array of edge biases to use for sampling. If NULL * use the edge weight as the bias. If set to NULL, edges will be sampled uniformly. * @param [in] start_vertices Device array of start vertices for the sampling - * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. This - * parameter is only used with the retain_seeds option. - * @param [in] fan_out Host array defining the fan out at each step in the sampling algorithm. - * We only support fan_out values of type INT32 - * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous neighbor - * sample whereas a value greater than 1 translates to heterogeneous neighbor - * sample. + * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. + * This parameter is only used with the retain_seeds option. + * @param [in] fan_out Host array defining the fan out at each step in the sampling + * algorithm. We only support fan_out values of type INT32 + * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous + * neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor sample. * @param [in] sampling_options * Opaque pointer defining the sampling options. * @param [in] do_expensive_check @@ -563,10 +559,10 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage * needs to be transposed * @param [in] start_vertices Device array of start vertices for the sampling - * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. This - * parameter is only used with the retain_seeds option. - * @param [in] fan_out Host array defining the fan out at each step in the sampling algorithm. - * We only support fan_out values of type INT32 + * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. + * This parameter is only used with the retain_seeds option. + * @param [in] fan_out Host array defining the fan out at each step in the sampling + * algorithm. We only support fan_out values of type INT32 * @param [in] sampling_options * Opaque pointer defining the sampling options. * @param [in] do_expensive_check @@ -607,10 +603,10 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( * @param [in] edge_biases Device array of edge biases to use for sampling. If NULL * use the edge weight as the bias. If set to NULL, edges will be sampled uniformly. * @param [in] start_vertices Device array of start vertices for the sampling - * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. This - * parameter is only used with the retain_seeds option. - * @param [in] fan_out Host array defining the fan out at each step in the sampling algorithm. - * We only support fan_out values of type INT32 + * @param [in] start_vertex_offsets Device array of the offsets for each label in the seed list. + * This parameter is only used with the retain_seeds option. + * @param [in] fan_out Host array defining the fan out at each step in the sampling + * algorithm. We only support fan_out values of type INT32 * @param [in] sampling_options * Opaque pointer defining the sampling options. * @param [in] do_expensive_check @@ -633,7 +629,6 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( cugraph_sample_result_t** result, cugraph_error_t** error); - /** * @deprecated This call should be replaced with cugraph_sample_result_get_majors * @brief Get the source vertices from the sampling algorithm result diff --git a/cpp/src/c_api/array.hpp b/cpp/src/c_api/array.hpp index 2fa58d917ae..0ab30a1cb72 100644 --- a/cpp/src/c_api/array.hpp +++ b/cpp/src/c_api/array.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2023, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 7d535f45058..5b05b542015 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -14,14 +14,14 @@ * limitations under the License. */ - #include "c_api/abstract_functor.hpp" #include "c_api/graph.hpp" +#include "c_api/graph_helper.hpp" #include "c_api/properties.hpp" #include "c_api/random.hpp" #include "c_api/resource_handle.hpp" #include "c_api/utils.hpp" -#include "c_api/graph_helper.hpp" +#include "sampling/detail/sampling_utils.hpp" #include #include @@ -30,7 +30,6 @@ #include #include #include -#include "sampling/detail/sampling_utils.hpp" #include @@ -780,18 +779,17 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { bool do_expensive_check_{false}; cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; - neighbor_sampling_functor( - cugraph_resource_handle_t const* handle, - cugraph_rng_state_t* rng_state, - cugraph_graph_t* graph, - cugraph_edge_property_view_t const* edge_biases, - cugraph_type_erased_device_array_view_t const* start_vertices, - cugraph_type_erased_device_array_view_t const* start_vertex_offsets, - cugraph_type_erased_host_array_view_t const* fan_out, - int num_edge_types, - cugraph::c_api::cugraph_sampling_options_t options, - bool is_biased, - bool do_expensive_check) + neighbor_sampling_functor(cugraph_resource_handle_t const* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + cugraph_edge_property_view_t const* edge_biases, + cugraph_type_erased_device_array_view_t const* start_vertices, + cugraph_type_erased_device_array_view_t const* start_vertex_offsets, + cugraph_type_erased_host_array_view_t const* fan_out, + int num_edge_types, + cugraph::c_api::cugraph_sampling_options_t options, + bool is_biased, + bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), rng_state_(reinterpret_cast(rng_state)), @@ -867,17 +865,18 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> start_vertex_labels{std::nullopt}; std::optional> local_label_to_comm_rank{std::nullopt}; - std::optional> label_to_comm_rank{std::nullopt}; // global after allgatherv + std::optional> label_to_comm_rank{ + std::nullopt}; // global after allgatherv if (start_vertex_offsets_ != nullptr) { // Get the number of labels on each GPU auto num_local_labels = start_vertex_offsets_->size_ - 1; - auto global_labels = cugraph::host_scalar_allgather( + auto global_labels = cugraph::host_scalar_allgather( handle_.get_comms(), num_local_labels, handle_.get_stream()); - + std::exclusive_scan( - global_labels.begin(), global_labels.end(), global_labels.begin(), size_t{0}); - + global_labels.begin(), global_labels.end(), global_labels.begin(), size_t{0}); + // Compute the global start_vertex_label_offsets cugraph::detail::transform_increment(handle_.get_stream(), start_vertex_offsets_->as_type(), @@ -885,16 +884,16 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { global_labels[handle_.get_comms().get_rank()]); // Retrieve the start_vertex_labels - start_vertex_labels = - cugraph::detail::convert_starting_vertex_offsets_to_labels( - handle_, - raft::device_span{start_vertex_offsets_->as_type(), - start_vertex_offsets_->size_}); + start_vertex_labels = cugraph::detail::convert_starting_vertex_offsets_to_labels( + handle_, + raft::device_span{start_vertex_offsets_->as_type(), + start_vertex_offsets_->size_}); } if constexpr (multi_gpu) { if (start_vertex_labels) { - rmm::device_uvector unique_labels((*start_vertex_labels).size(), handle_.get_stream()); + rmm::device_uvector unique_labels((*start_vertex_labels).size(), + handle_.get_stream()); raft::copy(unique_labels.data(), (*start_vertex_labels).data(), unique_labels.size(), @@ -905,34 +904,37 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph::detail::sort(handle_.get_stream(), unique_labels.begin(), unique_labels.size()); auto num_unique_labels = cugraph::detail::unique( handle_.get_stream(), unique_labels.begin(), unique_labels.size()); - + (*local_label_to_comm_rank).resize(num_unique_labels, handle_.get_stream()); - cugraph::detail::scalar_fill(handle_.get_stream(), - (*local_label_to_comm_rank).begin(), // This should be rename to rank - (*local_label_to_comm_rank).size(), - label_t{handle_.get_comms().get_rank()}); - + cugraph::detail::scalar_fill( + handle_.get_stream(), + (*local_label_to_comm_rank).begin(), // This should be rename to rank + (*local_label_to_comm_rank).size(), + label_t{handle_.get_comms().get_rank()}); + // Perform allgather to get global_label_to_comm_rank_d_vector auto recvcounts = cugraph::host_scalar_allgather( handle_.get_comms(), num_unique_labels, handle_.get_stream()); - + std::vector displacements(recvcounts.size()); - std::exclusive_scan(recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); - - (*label_to_comm_rank).resize(displacements.back() + recvcounts.back(), handle_.get_stream()); + std::exclusive_scan( + recvcounts.begin(), recvcounts.end(), displacements.begin(), size_t{0}); + + (*label_to_comm_rank) + .resize(displacements.back() + recvcounts.back(), handle_.get_stream()); cugraph::device_allgatherv(handle_.get_comms(), - (*local_label_to_comm_rank).begin(), - (*label_to_comm_rank).begin(), - recvcounts, - displacements, - handle_.get_stream()); - + (*local_label_to_comm_rank).begin(), + (*label_to_comm_rank).begin(), + recvcounts, + displacements, + handle_.get_stream()); + std::tie(start_vertices, *start_vertex_labels) = cugraph::detail::shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( handle_, std::move(start_vertices), std::move(*start_vertex_labels)); - + } else { start_vertices = cugraph::detail::shuffle_ext_vertices_to_local_gpu_by_vertex_partitioning( @@ -951,7 +953,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { graph_view.local_vertex_partition_range_first(), graph_view.local_vertex_partition_range_last(), do_expensive_check_); - + rmm::device_uvector src(0, handle_.get_stream()); rmm::device_uvector dst(0, handle_.get_stream()); std::optional> wgt{std::nullopt}; @@ -960,7 +962,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> hop{std::nullopt}; std::optional> edge_label{std::nullopt}; std::optional> offsets{std::nullopt}; - + // FIXME: For biased sampling, the user should pass either biases or edge weights, // otherwised throw an error and suggest the user to call uniform neighbor sample instead @@ -979,21 +981,17 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { raft::device_span{start_vertices.data(), start_vertices.size()}, (start_vertex_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), - (*start_vertex_labels).size()) - : std::nullopt, - label_to_comm_rank - ? std::make_optional(raft::device_span{(*label_to_comm_rank).data(), - (*label_to_comm_rank).size()}) + (*start_vertex_labels).size()) : std::nullopt, - raft::host_span( - fan_out_->as_type(), fan_out_->size_), + label_to_comm_rank ? std::make_optional(raft::device_span{ + (*label_to_comm_rank).data(), (*label_to_comm_rank).size()}) + : std::nullopt, + raft::host_span(fan_out_->as_type(), fan_out_->size_), num_edge_types_, - cugraph::sampling_flags_t{ - options_.prior_sources_behavior_, - options_.return_hops_, - options_.dedupe_sources_, - options_.with_replacement_ - }, + cugraph::sampling_flags_t{options_.prior_sources_behavior_, + options_.return_hops_, + options_.dedupe_sources_, + options_.with_replacement_}, do_expensive_check_); } else { auto&& [src, dst, wgt, edge_id, edge_type, hop, offsets] = @@ -1007,23 +1005,18 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { raft::device_span{start_vertices.data(), start_vertices.size()}, (start_vertex_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), - (*start_vertex_labels).size()) - : std::nullopt, - label_to_comm_rank - ? std::make_optional(raft::device_span{(*label_to_comm_rank).data(), - (*label_to_comm_rank).size()}) + (*start_vertex_labels).size()) : std::nullopt, - raft::host_span( - fan_out_->as_type(), fan_out_->size_), + label_to_comm_rank ? std::make_optional(raft::device_span{ + (*label_to_comm_rank).data(), (*label_to_comm_rank).size()}) + : std::nullopt, + raft::host_span(fan_out_->as_type(), fan_out_->size_), num_edge_types_, - cugraph::sampling_flags_t{ - options_.prior_sources_behavior_, - options_.return_hops_, - options_.dedupe_sources_, - options_.with_replacement_ - }, + cugraph::sampling_flags_t{options_.prior_sources_behavior_, + options_.return_hops_, + options_.dedupe_sources_, + options_.with_replacement_}, do_expensive_check_); - } } else { // Call homogeneous neighbor sample @@ -1042,47 +1035,39 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, - label_to_comm_rank - ? std::make_optional(raft::device_span{(*label_to_comm_rank).data(), - (*label_to_comm_rank).size()}) - : std::nullopt, - raft::host_span( - fan_out_->as_type(), fan_out_->size_), - cugraph::sampling_flags_t{ - options_.prior_sources_behavior_, - options_.return_hops_, - options_.dedupe_sources_, - options_.with_replacement_ - }, + label_to_comm_rank ? std::make_optional(raft::device_span{ + (*label_to_comm_rank).data(), (*label_to_comm_rank).size()}) + : std::nullopt, + raft::host_span(fan_out_->as_type(), fan_out_->size_), + cugraph::sampling_flags_t{options_.prior_sources_behavior_, + options_.return_hops_, + options_.dedupe_sources_, + options_.with_replacement_}, do_expensive_check_); } else { if (is_biased_) { - auto&& [src, dst, wgt, edge_id, edge_type, hop, offsets] = - cugraph::homogeneous_uniform_neighbor_sample( - handle_, - rng_state_->rng_state_, - graph_view, - (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, - (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, - (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, - raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) - ? std::make_optional>((*start_vertex_labels).data(), - (*start_vertex_labels).size()) - : std::nullopt, - label_to_comm_rank - ? std::make_optional(raft::device_span{(*label_to_comm_rank).data(), - (*label_to_comm_rank).size()}) - : std::nullopt, - raft::host_span( - fan_out_->as_type(), fan_out_->size_), - cugraph::sampling_flags_t{ - options_.prior_sources_behavior_, - options_.return_hops_, - options_.dedupe_sources_, - options_.with_replacement_ - }, - do_expensive_check_); + auto&& [src, dst, wgt, edge_id, edge_type, hop, offsets] = + cugraph::homogeneous_uniform_neighbor_sample( + handle_, + rng_state_->rng_state_, + graph_view, + (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, + (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, + (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, + raft::device_span{start_vertices.data(), start_vertices.size()}, + (start_vertex_offsets_ != nullptr) + ? std::make_optional>((*start_vertex_labels).data(), + (*start_vertex_labels).size()) + : std::nullopt, + label_to_comm_rank ? std::make_optional(raft::device_span{ + (*label_to_comm_rank).data(), (*label_to_comm_rank).size()}) + : std::nullopt, + raft::host_span(fan_out_->as_type(), fan_out_->size_), + cugraph::sampling_flags_t{options_.prior_sources_behavior_, + options_.return_hops_, + options_.dedupe_sources_, + options_.with_replacement_}, + do_expensive_check_); } } } @@ -1125,20 +1110,20 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } if (options_.renumber_results_) { - if (num_edge_types_ == 1) { // homogeneous renumbering + if (num_edge_types_ == 1) { // homogeneous renumbering if (options_.compression_type_ == cugraph_compression_type_t::COO) { // COO rmm::device_uvector output_majors(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); std::tie(output_majors, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = cugraph::renumber_and_sort_sampled_edgelist( handle_, std::move(src), @@ -1151,13 +1136,14 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, - offsets ? edge_label->size() : size_t{1}, // FIXME: update edge_label ? + offsets ? edge_label->size() : size_t{1}, // FIXME: update edge_label ? hop ? fan_out_->size_ : size_t{1}, src_is_major, do_expensive_check_); @@ -1167,21 +1153,22 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } else { // (D)CSC, (D)CSR - bool doubly_compress = (options_.compression_type_ == cugraph_compression_type_t::DCSR) || - (options_.compression_type_ == cugraph_compression_type_t::DCSC); + bool doubly_compress = + (options_.compression_type_ == cugraph_compression_type_t::DCSR) || + (options_.compression_type_ == cugraph_compression_type_t::DCSC); rmm::device_uvector output_major_offsets(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); std::tie(majors, - output_major_offsets, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = + output_major_offsets, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = cugraph::renumber_and_compress_sampled_edgelist( handle_, std::move(src), @@ -1194,9 +1181,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -1214,35 +1202,37 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { // These are now represented by label_hop_offsets hop.reset(); offsets.reset(); - - } else { // heterogeneous renumbering - rmm::device_uvector vertex_type_offsets(graph_view.local_vertex_partition_range_size(), handle_.get_stream()); + } else { // heterogeneous renumbering + + rmm::device_uvector vertex_type_offsets( + graph_view.local_vertex_partition_range_size(), handle_.get_stream()); cugraph::detail::sequence_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0}); + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}); rmm::device_uvector output_majors(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); - - std::optional> renumbered_and_sorted_edge_id_renumber_map(std::nullopt); - std::optional> renumbered_and_sorted_edge_id_renumber_map_label_type_offsets(std::nullopt); + + std::optional> renumbered_and_sorted_edge_id_renumber_map( + std::nullopt); + std::optional> + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets(std::nullopt); // extract the edge_type from label_type_hop_offsets std::optional> label_type_hop_offsets{std::nullopt}; std::tie(output_majors, - minors, - wgt, - edge_id, - label_type_hop_offsets, // Contains information about the type and hop offsets - output_renumber_map, - (*renumber_map_offsets), - renumbered_and_sorted_edge_id_renumber_map, - renumbered_and_sorted_edge_id_renumber_map_label_type_offsets - ) = + minors, + wgt, + edge_id, + label_type_hop_offsets, // Contains information about the type and hop offsets + output_renumber_map, + (*renumber_map_offsets), + renumbered_and_sorted_edge_id_renumber_map, + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( handle_, std::move(src), @@ -1255,34 +1245,35 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, - raft::device_span{ - vertex_type_offsets.data(), vertex_type_offsets.size()}, - + raft::device_span{vertex_type_offsets.data(), + vertex_type_offsets.size()}, + start_vertex_offsets_ ? start_vertex_offsets_->size_ : size_t{1}, hop ? fan_out_->size_ : size_t{1}, size_t{1}, size_t{1}, src_is_major, do_expensive_check_); - if (edge_type) { - (*edge_type).resize(raft::device_span{ - (*label_type_hop_offsets).data(), (*label_type_hop_offsets).size()}.back() - 1, handle_.get_stream()); - cugraph::detail::sequence_fill(handle_.get_stream(), - (*edge_type).begin(), - (*edge_type).size(), - edge_type_t{0}); - } - + if (edge_type) { + (*edge_type) + .resize(raft::device_span{(*label_type_hop_offsets).data(), + (*label_type_hop_offsets).size()} + .back() - + 1, + handle_.get_stream()); + cugraph::detail::sequence_fill( + handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0}); + } - majors.emplace(std::move(output_majors)); - renumber_map.emplace(std::move(output_renumber_map)); - + majors.emplace(std::move(output_majors)); + renumber_map.emplace(std::move(output_renumber_map)); } } else { @@ -1290,23 +1281,22 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { CUGRAPH_FAIL("Can only use COO format if not renumbering"); } - std::tie(src, dst, wgt, edge_id, edge_type, label_hop_offsets) = - cugraph::sort_sampled_edgelist(handle_, - std::move(src), - std::move(dst), - std::move(wgt), - std::move(edge_id), - std::move(edge_type), - std::move(hop), - offsets - ? std::make_optional(raft::device_span{ - offsets->data(), offsets->size()}) - : std::nullopt, - edge_label ? edge_label->size() : size_t{1}, // FIXME: update edge_label - hop ? fan_out_->size_ : size_t{1}, - src_is_major, - do_expensive_check_); + cugraph::sort_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + edge_label ? edge_label->size() : size_t{1}, // FIXME: update edge_label + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + do_expensive_check_); majors.emplace(std::move(src)); minors = std::move(dst); @@ -1336,9 +1326,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, - (edge_label) - ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) // FIXME: update edge_label - : nullptr, + (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t( + edge_label.value(), INT32) // FIXME: update edge_label + : nullptr, (renumber_map) ? new cugraph::c_api::cugraph_type_erased_device_array_t( renumber_map.value(), graph_->vertex_type_) : nullptr, @@ -1918,7 +1908,6 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( cugraph_sample_result_t** result, cugraph_error_t** error) { - auto options_cpp = *reinterpret_cast(options); CAPI_EXPECTS( @@ -1977,7 +1966,7 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( fan_out, rng_state, std::move(options_cpp), - do_expensive_check}; + do_expensive_check}; return cugraph::c_api::run_algorithm(graph, functor, result, error); } @@ -1995,13 +1984,13 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_error_t** error) { auto options_cpp = *reinterpret_cast(options); - + // FIXME: Should we maintain this contition? CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), CUGRAPH_INVALID_INPUT, "must specify start_vertex_offsets if retain_seeds is true", *error); - + CAPI_EXPECTS((start_vertex_offsets == nullptr) || (reinterpret_cast( start_vertex_offsets) @@ -2009,7 +1998,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( CUGRAPH_INVALID_INPUT, "start_vertex_offsets should be of type size_t", *error); - + CAPI_EXPECTS( reinterpret_cast(fan_out) ->type_ == INT32, @@ -2061,13 +2050,13 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( CUGRAPH_INVALID_INPUT, "edge_biases is required if the graph is not weighted", *error); - + // FIXME: Should we maintain this contition? CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), CUGRAPH_INVALID_INPUT, "must specify start_vertex_offsets if retain_seeds is true", *error); - + CAPI_EXPECTS((start_vertex_offsets == nullptr) || (reinterpret_cast( start_vertex_offsets) @@ -2075,7 +2064,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( CUGRAPH_INVALID_INPUT, "start_vertex_offsets should be of type size_t", *error); - + CAPI_EXPECTS( reinterpret_cast(fan_out) ->type_ == INT32, @@ -2110,7 +2099,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, // RENAME? + const cugraph_type_erased_device_array_view_t* start_vertex_offsets, // RENAME? const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2118,13 +2107,13 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( cugraph_error_t** error) { auto options_cpp = *reinterpret_cast(options); - + // FIXME: Should we maintain this contition? CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), CUGRAPH_INVALID_INPUT, "must specify start_vertex_offsets if retain_seeds is true", *error); - + CAPI_EXPECTS((start_vertex_offsets == nullptr) || (reinterpret_cast( start_vertex_offsets) @@ -2132,14 +2121,14 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( CUGRAPH_INVALID_INPUT, "start_vertex_offsets should be of type size_t", *error); - - CAPI_EXPECTS(reinterpret_cast( - fan_out) - ->type_ == INT32, - CUGRAPH_INVALID_INPUT, - "fan_out type must be INT32", - *error); - + + CAPI_EXPECTS( + reinterpret_cast(fan_out) + ->type_ == INT32, + CUGRAPH_INVALID_INPUT, + "fan_out type must be INT32", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == reinterpret_cast( start_vertices) @@ -2155,7 +2144,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( start_vertices, start_vertex_offsets, fan_out, - 1, // num_edge_types + 1, // num_edge_types std::move(options_cpp), FALSE, do_expensive_check}; @@ -2183,13 +2172,13 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( CUGRAPH_INVALID_INPUT, "edge_biases is required if the graph is not weighted", *error); - + // FIXME: Should we maintain this contition? CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), CUGRAPH_INVALID_INPUT, "must specify start_vertex_offsets if retain_seeds is true", *error); - + CAPI_EXPECTS((start_vertex_offsets == nullptr) || (reinterpret_cast( start_vertex_offsets) @@ -2197,14 +2186,14 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( CUGRAPH_INVALID_INPUT, "start_vertex_offsets should be of type size_t", *error); - - CAPI_EXPECTS(reinterpret_cast( - fan_out) - ->type_ == INT32, - CUGRAPH_INVALID_INPUT, - "fan_out type must be INT32", - *error); - + + CAPI_EXPECTS( + reinterpret_cast(fan_out) + ->type_ == INT32, + CUGRAPH_INVALID_INPUT, + "fan_out type must be INT32", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == reinterpret_cast( start_vertices) @@ -2213,7 +2202,6 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( "vertex type of graph and start_vertices must match", *error); - neighbor_sampling_functor functor{handle, rng_state, graph, @@ -2221,7 +2209,7 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( start_vertices, start_vertex_offsets, fan_out, - 1, // num_edge_types + 1, // num_edge_types std::move(options_cpp), TRUE, do_expensive_check}; diff --git a/cpp/src/detail/utility_wrappers_impl.cuh b/cpp/src/detail/utility_wrappers_impl.cuh index e28039a3c02..eaca2516dbf 100644 --- a/cpp/src/detail/utility_wrappers_impl.cuh +++ b/cpp/src/detail/utility_wrappers_impl.cuh @@ -73,14 +73,13 @@ void sort(raft::handle_t const& handle, value_t* d_value, size_t size) template size_t unique(raft::handle_t const& handle, value_t* d_value, size_t size) { - //auto unique_element_last = thrust::unique(handle.get_thrust_policy(), d_value, d_value + size); + // auto unique_element_last = thrust::unique(handle.get_thrust_policy(), d_value, d_value + size); auto unique_element_last = thrust::unique(handle.get_thrust_policy(), d_value, d_value + size); - //auto num_unique_element = + // auto num_unique_element = return thrust::distance(d_value, unique_element_last); - //masked_edgelist_srcs.resize(2* masked_edgelist_srcs.size(), handle.get_stream()); + // masked_edgelist_srcs.resize(2* masked_edgelist_srcs.size(), handle.get_stream()); } - template void sequence_fill(rmm::cuda_stream_view const& stream_view, value_t* d_value, @@ -105,7 +104,6 @@ void transform_increment(rmm::cuda_stream_view const& stream_view, })); } - template void stride_fill(rmm::cuda_stream_view const& stream_view, value_t* d_value, diff --git a/cpp/src/sampling/detail/conversion_utilities_impl.cuh b/cpp/src/sampling/detail/conversion_utilities_impl.cuh index 0c8d8ac95ea..6eff0154581 100644 --- a/cpp/src/sampling/detail/conversion_utilities_impl.cuh +++ b/cpp/src/sampling/detail/conversion_utilities_impl.cuh @@ -46,10 +46,10 @@ rmm::device_uvector flatten_label_map( thrust::fill(handle.get_thrust_policy(), label_map.begin(), label_map.end(), int32_t{0}); thrust::scatter(handle.get_thrust_policy(), - std::get<0>(label_to_output_comm_rank).begin(), - std::get<0>(label_to_output_comm_rank).end(), - std::get<1>(label_to_output_comm_rank).begin(), - label_map.begin()); + std::get<0>(label_to_output_comm_rank).begin(), + std::get<0>(label_to_output_comm_rank).end(), + std::get<1>(label_to_output_comm_rank).begin(), + label_map.begin()); return label_map; } diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 5f72a4d99a8..e3a06f323d1 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -18,12 +18,10 @@ #include "prims/fill_edge_property.cuh" #include "prims/transform_e.cuh" - #include "sampling/detail/sampling_utils.hpp" #include #include - #include #include #include @@ -101,16 +99,19 @@ neighbor_sample_impl(raft::handle_t const& handle, "Invalid input argument: number of levels should not overflow int32_t"); // as we use int32_t // to store hops - std::vector, bool>> edge_masks_vector{}; + std::vector< + cugraph::edge_property_t, bool>> + edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - + if (num_edge_types > 1) { for (int i = 0; i < num_edge_types; i++) { + cugraph::edge_property_t, bool> + edge_mask(handle, graph_view); - cugraph::edge_property_t, bool> edge_mask(handle, graph_view); - - cugraph::fill_edge_property(handle, modified_graph_view, edge_mask.mutable_view(), bool{true}); + cugraph::fill_edge_property( + handle, modified_graph_view, edge_mask.mutable_view(), bool{true}); cugraph::transform_e( handle, @@ -118,12 +119,16 @@ neighbor_sample_impl(raft::handle_t const& handle, cugraph::edge_src_dummy_property_t{}.view(), cugraph::edge_dst_dummy_property_t{}.view(), *edge_type_view, - [valid_edge_type = i] __device__(auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, /*thrust::nullopt_t*/auto edge_type) { + [valid_edge_type = i] __device__(auto src, + auto dst, + thrust::nullopt_t, + thrust::nullopt_t, + /*thrust::nullopt_t*/ auto edge_type) { return edge_type == valid_edge_type; }, edge_mask.mutable_view(), false); - + edge_masks_vector.push_back(std::move(edge_mask)); } } @@ -172,11 +177,13 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector level_sizes{}; // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1 - auto num_hops = ((fan_out.size() % num_edge_types) == 0) ? (fan_out.size() / num_edge_types) : ((fan_out.size() / num_edge_types) + 1); + auto num_hops = ((fan_out.size() % num_edge_types) == 0) + ? (fan_out.size() / num_edge_types) + : ((fan_out.size() / num_edge_types) + 1); - for (auto hop = 0; hop < num_hops; hop++){ + for (auto hop = 0; hop < num_hops; hop++) { for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { - auto k_level = fan_out[(hop*num_edge_types) + edge_type_id]; + auto k_level = fan_out[(hop * num_edge_types) + edge_type_id]; rmm::device_uvector srcs(0, handle.get_stream()); rmm::device_uvector dsts(0, handle.get_stream()); std::optional> weights{std::nullopt}; @@ -191,16 +198,16 @@ neighbor_sample_impl(raft::handle_t const& handle, if (k_level > 0) { std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = sample_edges(handle, - modified_graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - edge_bias_view, - rng_state, - starting_vertices, - starting_vertex_labels, - static_cast(k_level), - with_replacement); + modified_graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + edge_bias_view, + rng_state, + starting_vertices, + starting_vertex_labels, + static_cast(k_level), + with_replacement); } else { std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = gather_one_hop_edgelist(handle, @@ -232,10 +239,10 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, - frontier_vertex_labels ? std::make_optional(raft::device_span( - level_result_label_vectors->back().data(), - level_result_label_vectors->back().size())) - : std::nullopt, + frontier_vertex_labels + ? std::make_optional(raft::device_span( + level_result_label_vectors->back().data(), level_result_label_vectors->back().size())) + : std::nullopt, std::move(vertex_used_as_source), modified_graph_view.local_vertex_partition_view(), vertex_partition_range_lasts, @@ -521,29 +528,30 @@ heterogeneous_uniform_neighbor_sample( edge_type_t num_edge_types, sampling_flags_t sampling_flags, bool do_expensive_check) -{ - using bias_t = weight_t; // dummy - - auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] - = detail::neighbor_sample_impl( - handle, - rng_state, - graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - std::optional>{std::nullopt}, // Optional edge_bias_view - starting_vertices, - starting_vertex_labels, - label_to_output_comm_rank, - fan_out, - num_edge_types, - sampling_flags.return_hops, - sampling_flags.with_replacement, - sampling_flags.prior_sources_behavior, - sampling_flags.dedupe_sources, - do_expensive_check); - +{ + using bias_t = weight_t; // dummy + + auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] = + detail::neighbor_sample_impl( + handle, + rng_state, + graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + std::optional>{ + std::nullopt}, // Optional edge_bias_view + starting_vertices, + starting_vertex_labels, + label_to_output_comm_rank, + fan_out, + num_edge_types, + sampling_flags.return_hops, + sampling_flags.with_replacement, + sampling_flags.prior_sources_behavior, + sampling_flags.dedupe_sources, + do_expensive_check); + return std::make_tuple(std::move(majors), std::move(minors), std::move(weights), @@ -551,7 +559,6 @@ heterogeneous_uniform_neighbor_sample( std::move(edge_types), std::move(hops), std::move(offsets)); - } template ( - handle, - rng_state, - graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - std::make_optional(edge_bias_view), - starting_vertices, - starting_vertex_labels, - label_to_output_comm_rank, - fan_out, - num_edge_types, - sampling_flags.return_hops, - sampling_flags.with_replacement, - sampling_flags.prior_sources_behavior, - sampling_flags.dedupe_sources, - do_expensive_check); - +{ + auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] = + detail::neighbor_sample_impl( + handle, + rng_state, + graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + std::make_optional(edge_bias_view), + starting_vertices, + starting_vertex_labels, + label_to_output_comm_rank, + fan_out, + num_edge_types, + sampling_flags.return_hops, + sampling_flags.with_replacement, + sampling_flags.prior_sources_behavior, + sampling_flags.dedupe_sources, + do_expensive_check); + return std::make_tuple(std::move(majors), std::move(minors), std::move(weights), @@ -641,28 +647,29 @@ homogeneous_uniform_neighbor_sample( sampling_flags_t sampling_flags, bool do_expensive_check) { - using bias_t = weight_t; // dummy + using bias_t = weight_t; // dummy + + auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] = + detail::neighbor_sample_impl( + handle, + rng_state, + graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + std::optional>{ + std::nullopt}, // Optional edge_bias_view + starting_vertices, + starting_vertex_labels, + label_to_output_comm_rank, + fan_out, + edge_type_t{1}, + sampling_flags.return_hops, + sampling_flags.with_replacement, + sampling_flags.prior_sources_behavior, + sampling_flags.dedupe_sources, + do_expensive_check); - auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] - = detail::neighbor_sample_impl( - handle, - rng_state, - graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - std::optional>{std::nullopt}, // Optional edge_bias_view - starting_vertices, - starting_vertex_labels, - label_to_output_comm_rank, - fan_out, - edge_type_t{1}, - sampling_flags.return_hops, - sampling_flags.with_replacement, - sampling_flags.prior_sources_behavior, - sampling_flags.dedupe_sources, - do_expensive_check); - return std::make_tuple(std::move(majors), std::move(minors), std::move(weights), @@ -701,27 +708,26 @@ homogeneous_biased_neighbor_sample( sampling_flags_t sampling_flags, bool do_expensive_check) { + auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] = + detail::neighbor_sample_impl( + handle, + rng_state, + graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + std::make_optional(edge_bias_view), + starting_vertices, + starting_vertex_labels, + label_to_output_comm_rank, + fan_out, + edge_type_t{1}, + sampling_flags.return_hops, + sampling_flags.with_replacement, + sampling_flags.prior_sources_behavior, + sampling_flags.dedupe_sources, + do_expensive_check); - auto [majors, minors, weights, edge_ids, edge_types, hops, labels, offsets] - = detail::neighbor_sample_impl( - handle, - rng_state, - graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - std::make_optional(edge_bias_view), - starting_vertices, - starting_vertex_labels, - label_to_output_comm_rank, - fan_out, - edge_type_t{1}, - sampling_flags.return_hops, - sampling_flags.with_replacement, - sampling_flags.prior_sources_behavior, - sampling_flags.dedupe_sources, - do_expensive_check); - return std::make_tuple(std::move(majors), std::move(minors), std::move(weights), diff --git a/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cu b/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cu index b96a73b722d..d848935cc7e 100644 --- a/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cu +++ b/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cu @@ -128,12 +128,12 @@ biased_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> heterogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, @@ -150,12 +150,12 @@ heterogeneous_uniform_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> heterogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, @@ -218,12 +218,12 @@ heterogeneous_biased_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> homogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, @@ -239,12 +239,12 @@ homogeneous_uniform_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> homogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, @@ -303,7 +303,4 @@ homogeneous_biased_neighbor_sample( sampling_flags_t sampling_flags, bool do_expensive_check); - - - } // namespace cugraph diff --git a/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cu b/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cu index 5cb3fd75ce6..72bbb4e27a8 100644 --- a/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cu +++ b/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cu @@ -128,12 +128,12 @@ biased_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> heterogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, @@ -150,12 +150,12 @@ heterogeneous_uniform_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> heterogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, @@ -218,12 +218,12 @@ heterogeneous_biased_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> homogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, @@ -239,12 +239,12 @@ homogeneous_uniform_neighbor_sample( bool do_expensive_check); template std::tuple, - rmm::device_uvector, - std::optional>, - std::optional>, - std::optional>, - std::optional>, - std::optional>> + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> homogeneous_uniform_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, diff --git a/cpp/src/utilities/shuffle_vertices.cuh b/cpp/src/utilities/shuffle_vertices.cuh index 8707068bb90..59b5ecad059 100644 --- a/cpp/src/utilities/shuffle_vertices.cuh +++ b/cpp/src/utilities/shuffle_vertices.cuh @@ -45,40 +45,42 @@ rmm::device_uvector shuffle_vertices_by_gpu_id_impl( } template -std::tuple, rmm::device_uvector, std::optional>> -shuffle_vertices_and_values_by_gpu_id_impl(raft::handle_t const& handle, - rmm::device_uvector&& d_vertices, - rmm::device_uvector&& d_values_0, - std::optional>&& d_values_1, - func_t func) +std::tuple, + rmm::device_uvector, + std::optional>> +shuffle_vertices_and_values_by_gpu_id_impl( + raft::handle_t const& handle, + rmm::device_uvector&& d_vertices, + rmm::device_uvector&& d_values_0, + std::optional>&& d_values_1, + func_t func) { - - if (d_values_1) { - auto [d_shuffled_vertices, d_values, counts] = cugraph::groupby_gpu_id_and_shuffle_kv_pairs( - handle.get_comms(), - d_vertices.begin(), - d_vertices.end(), - thrust::make_zip_iterator(d_values_0.begin(), (*d_values_1).begin()), - [key_func = func] __device__(auto val) { return key_func(val); }, - handle.get_stream()); - - return std::make_tuple( - std::move(d_shuffled_vertices), std::move(std::get<0>(d_values)), std::make_optional(std::move(std::get<1>(d_values)))); - } else { - auto [d_shuffled_vertices, d_values, counts] = cugraph::groupby_gpu_id_and_shuffle_kv_pairs( - handle.get_comms(), - d_vertices.begin(), - d_vertices.end(), - d_values_0.begin(), - [key_func = func] __device__(auto val) { return key_func(val); }, - handle.get_stream()); + if (d_values_1) { + auto [d_shuffled_vertices, d_values, counts] = cugraph::groupby_gpu_id_and_shuffle_kv_pairs( + handle.get_comms(), + d_vertices.begin(), + d_vertices.end(), + thrust::make_zip_iterator(d_values_0.begin(), (*d_values_1).begin()), + [key_func = func] __device__(auto val) { return key_func(val); }, + handle.get_stream()); + + return std::make_tuple(std::move(d_shuffled_vertices), + std::move(std::get<0>(d_values)), + std::make_optional(std::move(std::get<1>(d_values)))); + } else { + auto [d_shuffled_vertices, d_values, counts] = cugraph::groupby_gpu_id_and_shuffle_kv_pairs( + handle.get_comms(), + d_vertices.begin(), + d_vertices.end(), + d_values_0.begin(), + [key_func = func] __device__(auto val) { return key_func(val); }, + handle.get_stream()); auto d_values_1 = std::optional>{std::nullopt}; - return std::make_tuple( - std::move(d_shuffled_vertices), std::move(d_values), std::move(d_values_1)); - } - + return std::make_tuple( + std::move(d_shuffled_vertices), std::move(d_values), std::move(d_values_1)); + } } } // namespace @@ -130,12 +132,13 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( } template -std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1) +std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1) { auto const comm_size = handle.get_comms().get_size(); auto& major_comm = handle.get_subcomm(cugraph::partition_manager::major_comm_name()); diff --git a/cpp/src/utilities/shuffle_vertices_mg_v32_fp.cu b/cpp/src/utilities/shuffle_vertices_mg_v32_fp.cu index f148f71f166..c66a0868b6f 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v32_fp.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v32_fp.cu @@ -40,19 +40,21 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); } // namespace detail diff --git a/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu b/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu index c8f158bde6a..984f2afc002 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v32_integral.cu @@ -62,20 +62,21 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); - -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); - +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); } // namespace detail diff --git a/cpp/src/utilities/shuffle_vertices_mg_v64_fp.cu b/cpp/src/utilities/shuffle_vertices_mg_v64_fp.cu index a61c91b8ac3..ff200fac563 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v64_fp.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v64_fp.cu @@ -40,19 +40,21 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); } // namespace detail diff --git a/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu b/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu index 1539244307a..fb4c0d469f7 100644 --- a/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu +++ b/cpp/src/utilities/shuffle_vertices_mg_v64_integral.cu @@ -54,19 +54,21 @@ shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( rmm::device_uvector&& vertices, rmm::device_uvector&& values); -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values_0, - rmm::device_uvector&& values_1); - -template std::tuple, rmm::device_uvector, rmm::device_uvector> -shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( - raft::handle_t const& handle, - rmm::device_uvector&& vertices, - rmm::device_uvector&& values, - rmm::device_uvector&& values_1); +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values_0, + rmm::device_uvector&& values_1); + +template std:: + tuple, rmm::device_uvector, rmm::device_uvector> + shuffle_ext_vertex_values_pairs_to_local_gpu_by_vertex_partitioning( + raft::handle_t const& handle, + rmm::device_uvector&& vertices, + rmm::device_uvector&& values, + rmm::device_uvector&& values_1); template rmm::device_uvector shuffle_int_vertices_to_local_gpu_by_vertex_partitioning( raft::handle_t const& handle, diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 04bd3552190..f392c7fb461 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -773,17 +773,17 @@ if(BUILD_CUGRAPH_MG_TESTS) # - MG HOMOGENEOUS UNIFORM NBR SAMPLING tests ------------------------------------------------- ConfigureTestMG( MG_HOMOGENEOUS_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_homogeneous_uniform_neighbor_sampling.cpp) - + ############################################################################################### # - MG HETEROGENEOUS UNIFORM NBR SAMPLING tests ------------------------------------------------- ConfigureTestMG( MG_HETEROGENEOUS_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp) - + ############################################################################################### # - MG HOMOGENEOUS BIASED NBR SAMPLING tests -------------------------------------------------- ConfigureTestMG( MG_HOMOGENEOUS_BIASED_NEIGHBOR_SAMPLING_TEST sampling/mg_homogeneous_biased_neighbor_sampling.cpp) - + ############################################################################################### # - MG HETEROGENEOUS BIASED NBR SAMPLING tests -------------------------------------------------- ConfigureTestMG( diff --git a/cpp/tests/sampling/heterogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/heterogeneous_biased_neighbor_sampling.cpp index 745a9049c93..c8bfb654598 100644 --- a/cpp/tests/sampling/heterogeneous_biased_neighbor_sampling.cpp +++ b/cpp/tests/sampling/heterogeneous_biased_neighbor_sampling.cpp @@ -46,8 +46,8 @@ class Tests_Heterogeneous_Biased_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test( - std::tuple const& param) + void run_current_test(std::tuple const& param) { using edge_type_t = int32_t; @@ -101,8 +101,11 @@ class Tests_Heterogeneous_Biased_Neighbor_Sampling auto batch_number = std::make_optional>(0, handle.get_stream()); - batch_number = cugraph::test::sequence( - handle, random_sources.size(), heterogeneous_biased_neighbor_sampling_usecase.batch_size, int32_t{0}); + batch_number = + cugraph::test::sequence(handle, + random_sources.size(), + heterogeneous_biased_neighbor_sampling_usecase.batch_size, + int32_t{0}); rmm::device_uvector random_sources_copy(random_sources.size(), handle.get_stream()); @@ -111,9 +114,8 @@ class Tests_Heterogeneous_Biased_Neighbor_Sampling random_sources.size(), handle.get_stream()); - std::optional> - label_to_output_comm_rank_mapping{std::nullopt}; - + std::optional> label_to_output_comm_rank_mapping{std::nullopt}; + // Generate the edge types std::optional> edge_types{ @@ -121,9 +123,7 @@ class Tests_Heterogeneous_Biased_Neighbor_Sampling if (heterogeneous_biased_neighbor_sampling_usecase.num_edge_types > 1) { edge_types = cugraph::test::generate::edge_property( - handle, - graph_view, - heterogeneous_biased_neighbor_sampling_usecase.num_edge_types); + handle, graph_view, heterogeneous_biased_neighbor_sampling_usecase.num_edge_types); } #ifdef NO_CUGRAPH_OPS @@ -135,25 +135,23 @@ class Tests_Heterogeneous_Biased_Neighbor_Sampling edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, *edge_weight_view, raft::device_span{random_sources_copy.data(), random_sources.size()}, batch_number ? std::make_optional(raft::device_span{batch_number->data(), batch_number->size()}) : std::nullopt, label_to_output_comm_rank_mapping, - raft::host_span(heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), - heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), + raft::host_span( + heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), + heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), heterogeneous_biased_neighbor_sampling_usecase.num_edge_types, - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_biased_neighbor_sampling_usecase.flag_replacement - } - ), + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_biased_neighbor_sampling_usecase.flag_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -169,26 +167,24 @@ class Tests_Heterogeneous_Biased_Neighbor_Sampling edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, *edge_weight_view, raft::device_span{random_sources_copy.data(), random_sources.size()}, batch_number ? std::make_optional(raft::device_span{batch_number->data(), batch_number->size()}) : std::nullopt, label_to_output_comm_rank_mapping, - raft::host_span(heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), - heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), + raft::host_span( + heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), + heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), heterogeneous_biased_neighbor_sampling_usecase.num_edge_types, - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_biased_neighbor_sampling_usecase.flag_replacement - } - ); - + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_biased_neighbor_sampling_usecase.flag_replacement}); + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_timer.stop(); @@ -224,22 +220,19 @@ class Tests_Heterogeneous_Biased_Neighbor_Sampling raft::device_span(d_subgraph_offsets.data(), 2), raft::device_span(vertices.data(), vertices.size()), true); - - - ASSERT_TRUE(cugraph::test::validate_extracted_graph_is_subgraph( handle, src_compare, dst_compare, wgt_compare, src_out, dst_out, wgt_out)); if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices - ASSERT_TRUE( - cugraph::test::validate_sampling_depth(handle, - std::move(src_out), - std::move(dst_out), - std::move(wgt_out), - std::move(random_sources), - heterogeneous_biased_neighbor_sampling_usecase.fanout.size())); + ASSERT_TRUE(cugraph::test::validate_sampling_depth( + handle, + std::move(src_out), + std::move(dst_out), + std::move(wgt_out), + std::move(random_sources), + heterogeneous_biased_neighbor_sampling_usecase.fanout.size())); } } #endif @@ -288,7 +281,6 @@ TEST_P(Tests_Heterogeneous_Biased_Neighbor_Sampling_Rmat, CheckInt64Int64Float) override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } - INSTANTIATE_TEST_SUITE_P( file_test, Tests_Heterogeneous_Biased_Neighbor_Sampling_File, @@ -329,11 +321,16 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_Heterogeneous_Biased_Neighbor_Sampling_Rmat, ::testing::Combine( - ::testing::Values(Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, - Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, - Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}, - Heterogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}), + ::testing::Values( + Heterogeneous_Biased_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, + Heterogeneous_Biased_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, + Heterogeneous_Biased_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}, + Heterogeneous_Biased_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0)))); -//#endif +// #endif CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/heterogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/heterogeneous_uniform_neighbor_sampling.cpp index d94d7ad7e5a..15be79a623e 100644 --- a/cpp/tests/sampling/heterogeneous_uniform_neighbor_sampling.cpp +++ b/cpp/tests/sampling/heterogeneous_uniform_neighbor_sampling.cpp @@ -46,8 +46,8 @@ class Tests_Heterogeneous_Uniform_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test( - std::tuple const& param) + void run_current_test(std::tuple const& param) { using edge_type_t = int32_t; @@ -99,8 +99,11 @@ class Tests_Heterogeneous_Uniform_Neighbor_Sampling auto batch_number = std::make_optional>(0, handle.get_stream()); - batch_number = cugraph::test::sequence( - handle, random_sources.size(), heterogeneous_uniform_neighbor_sampling_usecase.batch_size, int32_t{0}); + batch_number = + cugraph::test::sequence(handle, + random_sources.size(), + heterogeneous_uniform_neighbor_sampling_usecase.batch_size, + int32_t{0}); rmm::device_uvector random_sources_copy(random_sources.size(), handle.get_stream()); @@ -109,9 +112,8 @@ class Tests_Heterogeneous_Uniform_Neighbor_Sampling random_sources.size(), handle.get_stream()); - std::optional> - label_to_output_comm_rank_mapping{std::nullopt}; - + std::optional> label_to_output_comm_rank_mapping{std::nullopt}; + // Generate the edge types std::optional> edge_types{ @@ -119,9 +121,7 @@ class Tests_Heterogeneous_Uniform_Neighbor_Sampling if (heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types > 1) { edge_types = cugraph::test::generate::edge_property( - handle, - graph_view, - heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types); + handle, graph_view, heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types); } #ifdef NO_CUGRAPH_OPS @@ -133,24 +133,23 @@ class Tests_Heterogeneous_Uniform_Neighbor_Sampling edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, raft::device_span{random_sources_copy.data(), random_sources.size()}, batch_number ? std::make_optional(raft::device_span{batch_number->data(), batch_number->size()}) : std::nullopt, label_to_output_comm_rank_mapping, - raft::host_span(heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), - heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), + raft::host_span( + heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), + heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types, cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_uniform_neighbor_sampling_usecase.flag_replacement - } - ), + cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_uniform_neighbor_sampling_usecase.flag_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -166,25 +165,24 @@ class Tests_Heterogeneous_Uniform_Neighbor_Sampling edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, raft::device_span{random_sources_copy.data(), random_sources.size()}, batch_number ? std::make_optional(raft::device_span{batch_number->data(), batch_number->size()}) : std::nullopt, label_to_output_comm_rank_mapping, - raft::host_span(heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), - heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), + raft::host_span( + heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), + heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types, cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_uniform_neighbor_sampling_usecase.flag_replacement - } - ); - + cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_uniform_neighbor_sampling_usecase.flag_replacement}); + if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement hr_timer.stop(); @@ -220,22 +218,19 @@ class Tests_Heterogeneous_Uniform_Neighbor_Sampling raft::device_span(d_subgraph_offsets.data(), 2), raft::device_span(vertices.data(), vertices.size()), true); - - - ASSERT_TRUE(cugraph::test::validate_extracted_graph_is_subgraph( handle, src_compare, dst_compare, wgt_compare, src_out, dst_out, wgt_out)); if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices - ASSERT_TRUE( - cugraph::test::validate_sampling_depth(handle, - std::move(src_out), - std::move(dst_out), - std::move(wgt_out), - std::move(random_sources), - heterogeneous_uniform_neighbor_sampling_usecase.fanout.size())); + ASSERT_TRUE(cugraph::test::validate_sampling_depth( + handle, + std::move(src_out), + std::move(dst_out), + std::move(wgt_out), + std::move(random_sources), + heterogeneous_uniform_neighbor_sampling_usecase.fanout.size())); } } #endif @@ -248,7 +243,6 @@ using Tests_Heterogeneous_Uniform_Neighbor_Sampling_File = using Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat = Tests_Heterogeneous_Uniform_Neighbor_Sampling; - TEST_P(Tests_Heterogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int32Float) { run_current_test( @@ -285,7 +279,6 @@ TEST_P(Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt64Int64Float) override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); } - INSTANTIATE_TEST_SUITE_P( file_test, Tests_Heterogeneous_Uniform_Neighbor_Sampling_File, @@ -326,11 +319,16 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_Heterogeneous_Uniform_Neighbor_Sampling_Rmat, ::testing::Combine( - ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, - Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, - Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}, - Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}), + ::testing::Values( + Heterogeneous_Uniform_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, + Heterogeneous_Uniform_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, false, false}, + Heterogeneous_Uniform_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}, + Heterogeneous_Uniform_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8, 1, 9, 5, 12}, 1024, 4, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0)))); -//#endif +// #endif CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/homogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/homogeneous_biased_neighbor_sampling.cpp index a3f6b612337..d9f3e90f127 100644 --- a/cpp/tests/sampling/homogeneous_biased_neighbor_sampling.cpp +++ b/cpp/tests/sampling/homogeneous_biased_neighbor_sampling.cpp @@ -46,8 +46,8 @@ class Tests_Homogeneous_Biased_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test( - std::tuple const& param) + void run_current_test(std::tuple const& param) { auto [homogeneous_biased_neighbor_sampling_usecase, input_usecase] = param; @@ -104,8 +104,10 @@ class Tests_Homogeneous_Biased_Neighbor_Sampling auto batch_number = std::make_optional>(0, handle.get_stream()); - batch_number = cugraph::test::sequence( - handle, random_sources.size(), homogeneous_biased_neighbor_sampling_usecase.batch_size, int32_t{0}); + batch_number = cugraph::test::sequence(handle, + random_sources.size(), + homogeneous_biased_neighbor_sampling_usecase.batch_size, + int32_t{0}); rmm::device_uvector random_sources_copy(random_sources.size(), handle.get_stream()); @@ -114,8 +116,7 @@ class Tests_Homogeneous_Biased_Neighbor_Sampling random_sources.size(), handle.get_stream()); - std::optional> - label_to_output_comm_rank_mapping{std::nullopt}; + std::optional> label_to_output_comm_rank_mapping{std::nullopt}; #ifdef NO_CUGRAPH_OPS EXPECT_THROW( @@ -134,13 +135,10 @@ class Tests_Homogeneous_Biased_Neighbor_Sampling label_to_output_comm_rank_mapping, raft::host_span(homogeneous_biased_neighbor_sampling_usecase.fanout.data(), homogeneous_biased_neighbor_sampling_usecase.fanout.size()), - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_biased_neighbor_sampling_usecase.flag_replacement - } - ), + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_biased_neighbor_sampling_usecase.flag_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -164,14 +162,10 @@ class Tests_Homogeneous_Biased_Neighbor_Sampling label_to_output_comm_rank_mapping, raft::host_span(homogeneous_biased_neighbor_sampling_usecase.fanout.data(), homogeneous_biased_neighbor_sampling_usecase.fanout.size()), - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_biased_neighbor_sampling_usecase.flag_replacement - } - ); - + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_biased_neighbor_sampling_usecase.flag_replacement}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -214,13 +208,13 @@ class Tests_Homogeneous_Biased_Neighbor_Sampling if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices - ASSERT_TRUE( - cugraph::test::validate_sampling_depth(handle, - std::move(src_out), - std::move(dst_out), - std::move(wgt_out), - std::move(random_sources), - homogeneous_biased_neighbor_sampling_usecase.fanout.size())); + ASSERT_TRUE(cugraph::test::validate_sampling_depth( + handle, + std::move(src_out), + std::move(dst_out), + std::move(wgt_out), + std::move(random_sources), + homogeneous_biased_neighbor_sampling_usecase.fanout.size())); } } #endif @@ -233,7 +227,6 @@ using Tests_Homogeneous_Biased_Neighbor_Sampling_File = using Tests_Homogeneous_Biased_Neighbor_Sampling_Rmat = Tests_Homogeneous_Biased_Neighbor_Sampling; - TEST_P(Tests_Homogeneous_Biased_Neighbor_Sampling_File, CheckInt32Int32Float) { run_current_test( @@ -310,10 +303,11 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_Homogeneous_Biased_Neighbor_Sampling_Rmat, ::testing::Combine( - ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, false, false}, - Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, true, false}, - Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, false, false}, - Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, true, false}), + ::testing::Values( + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, false, false}, + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, true, false}, + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, false, false}, + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp index 5b18ecf6a50..8c50ab56917 100644 --- a/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp +++ b/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp @@ -46,8 +46,8 @@ class Tests_Homogeneous_Uniform_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test( - std::tuple const& param) + void run_current_test(std::tuple const& param) { auto [homogeneous_uniform_neighbor_sampling_usecase, input_usecase] = param; @@ -104,8 +104,10 @@ class Tests_Homogeneous_Uniform_Neighbor_Sampling auto batch_number = std::make_optional>(0, handle.get_stream()); - batch_number = cugraph::test::sequence( - handle, random_sources.size(), homogeneous_uniform_neighbor_sampling_usecase.batch_size, int32_t{0}); + batch_number = cugraph::test::sequence(handle, + random_sources.size(), + homogeneous_uniform_neighbor_sampling_usecase.batch_size, + int32_t{0}); rmm::device_uvector random_sources_copy(random_sources.size(), handle.get_stream()); @@ -114,8 +116,7 @@ class Tests_Homogeneous_Uniform_Neighbor_Sampling random_sources.size(), handle.get_stream()); - std::optional> - label_to_output_comm_rank_mapping{std::nullopt}; + std::optional> label_to_output_comm_rank_mapping{std::nullopt}; #ifdef NO_CUGRAPH_OPS EXPECT_THROW( @@ -133,13 +134,10 @@ class Tests_Homogeneous_Uniform_Neighbor_Sampling label_to_output_comm_rank_mapping, raft::host_span(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(), homogeneous_uniform_neighbor_sampling_usecase.fanout.size()), - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_uniform_neighbor_sampling_usecase.flag_replacement - } - ), + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_uniform_neighbor_sampling_usecase.flag_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -162,14 +160,10 @@ class Tests_Homogeneous_Uniform_Neighbor_Sampling label_to_output_comm_rank_mapping, raft::host_span(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(), homogeneous_uniform_neighbor_sampling_usecase.fanout.size()), - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_uniform_neighbor_sampling_usecase.flag_replacement - } - ); - + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_uniform_neighbor_sampling_usecase.flag_replacement}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -212,13 +206,13 @@ class Tests_Homogeneous_Uniform_Neighbor_Sampling if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices - ASSERT_TRUE( - cugraph::test::validate_sampling_depth(handle, - std::move(src_out), - std::move(dst_out), - std::move(wgt_out), - std::move(random_sources), - homogeneous_uniform_neighbor_sampling_usecase.fanout.size())); + ASSERT_TRUE(cugraph::test::validate_sampling_depth( + handle, + std::move(src_out), + std::move(dst_out), + std::move(wgt_out), + std::move(random_sources), + homogeneous_uniform_neighbor_sampling_usecase.fanout.size())); } } #endif @@ -231,7 +225,6 @@ using Tests_Homogeneous_Uniform_Neighbor_Sampling_File = using Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat = Tests_Homogeneous_Uniform_Neighbor_Sampling; - TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int32Float) { run_current_test( @@ -308,10 +301,11 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, ::testing::Combine( - ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, false, false}, - Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, true, false}, - Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, false, false}, - Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, true, false}), + ::testing::Values( + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, true, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0)))); CUGRAPH_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/mg_heterogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/mg_heterogeneous_biased_neighbor_sampling.cpp index d9cdee3479c..c2e34355062 100644 --- a/cpp/tests/sampling/mg_heterogeneous_biased_neighbor_sampling.cpp +++ b/cpp/tests/sampling/mg_heterogeneous_biased_neighbor_sampling.cpp @@ -50,7 +50,8 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test(std::tuple const& param) + void run_current_test( + std::tuple const& param) { using edge_type_t = int32_t; @@ -86,7 +87,6 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling auto mg_edge_weight_view = mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; - // // Test is designed like GNN sampling. We'll select 5% of vertices to be included in sampling // batches @@ -101,11 +101,11 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling mg_graph_view, std::optional>{std::nullopt}, rng_state, - //20, - + // 20, + std::max(static_cast(mg_graph_view.number_of_vertices() * select_probability), std::min(static_cast(mg_graph_view.number_of_vertices()), size_t{1})), - + false, false); @@ -115,24 +115,22 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling auto seed_sizes = cugraph::host_scalar_allgather( handle_->get_comms(), random_sources.size(), handle_->get_stream()); - size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end()); - size_t num_batches = (num_seeds + heterogeneous_biased_neighbor_sampling_usecase.batch_size - 1) / - heterogeneous_biased_neighbor_sampling_usecase.batch_size; + size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end()); + size_t num_batches = + (num_seeds + heterogeneous_biased_neighbor_sampling_usecase.batch_size - 1) / + heterogeneous_biased_neighbor_sampling_usecase.batch_size; std::vector seed_offsets(seed_sizes.size()); std::exclusive_scan(seed_sizes.begin(), seed_sizes.end(), seed_offsets.begin(), size_t{0}); auto batch_number = cugraph::test::modulo_sequence( *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]); - + // Get unique batch_number -> label_list rmm::device_uvector label_list(batch_number.size(), handle_->get_stream()); - raft::copy(label_list.data(), - batch_number.data(), - batch_number.size(), - handle_->get_stream()); - + raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream()); + label_list = cugraph::test::sort(*handle_, std::move(label_list)); label_list = cugraph::test::unique(*handle_, std::move(label_list)); @@ -142,8 +140,7 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()}); // perform allgatherv - comm_ranks = - cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); + comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); rmm::device_uvector random_sources_copy(random_sources.size(), handle_->get_stream()); @@ -151,7 +148,7 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling random_sources.data(), random_sources.size(), handle_->get_stream()); - + // Generate the edge types std::optional> edge_types{ @@ -159,9 +156,7 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling if (heterogeneous_biased_neighbor_sampling_usecase.num_edge_types > 1) { edge_types = cugraph::test::generate::edge_property( - *handle_, - mg_graph_view, - heterogeneous_biased_neighbor_sampling_usecase.num_edge_types); + *handle_, mg_graph_view, heterogeneous_biased_neighbor_sampling_usecase.num_edge_types); } #ifdef NO_CUGRAPH_OPS @@ -173,25 +168,22 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling mg_edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, - + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, + raft::device_span{random_sources.data(), random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), - raft::host_span(heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), - heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), + raft::host_span( + heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), + heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), heterogeneous_biased_neighbor_sampling_usecase.num_edge_types, - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_biased_neighbor_sampling_usecase.with_replacement - } - ), + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_biased_neighbor_sampling_usecase.with_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -200,7 +192,7 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling hr_timer.start("MG biased_neighbor_sample"); } RAFT_CUDA_TRY(cudaDeviceSynchronize()); - + auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] = cugraph::heterogeneous_biased_neighbor_sample( *handle_, @@ -209,25 +201,22 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling mg_edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, *mg_edge_weight_view, raft::device_span{random_sources.data(), random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), - raft::host_span(heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), - heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), + raft::host_span( + heterogeneous_biased_neighbor_sampling_usecase.fanout.data(), + heterogeneous_biased_neighbor_sampling_usecase.fanout.size()), heterogeneous_biased_neighbor_sampling_usecase.num_edge_types, - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_biased_neighbor_sampling_usecase.with_replacement - } - ); + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_biased_neighbor_sampling_usecase.with_replacement}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -309,12 +298,13 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices if (mg_aggregate_src.size() > 0) { - cugraph::test::validate_sampling_depth(*handle_, - std::move(mg_aggregate_src), - std::move(mg_aggregate_dst), - std::move(mg_aggregate_wgt), - std::move(mg_start_src), - heterogeneous_biased_neighbor_sampling_usecase.fanout.size()); + cugraph::test::validate_sampling_depth( + *handle_, + std::move(mg_aggregate_src), + std::move(mg_aggregate_dst), + std::move(mg_aggregate_wgt), + std::move(mg_start_src), + heterogeneous_biased_neighbor_sampling_usecase.fanout.size()); } } } @@ -327,8 +317,8 @@ class Tests_MGHeterogeneous_Biased_Neighbor_Sampling }; template -std::unique_ptr Tests_MGHeterogeneous_Biased_Neighbor_Sampling::handle_ = - nullptr; +std::unique_ptr + Tests_MGHeterogeneous_Biased_Neighbor_Sampling::handle_ = nullptr; using Tests_MGHeterogeneous_Biased_Neighbor_Sampling_File = Tests_MGHeterogeneous_Biased_Neighbor_Sampling; diff --git a/cpp/tests/sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp index 87dab4801fb..6282f57f1be 100644 --- a/cpp/tests/sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp +++ b/cpp/tests/sampling/mg_heterogeneous_uniform_neighbor_sampling.cpp @@ -50,7 +50,8 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test(std::tuple const& param) + void run_current_test( + std::tuple const& param) { using edge_type_t = int32_t; @@ -86,7 +87,6 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling auto mg_edge_weight_view = mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt; - // // Test is designed like GNN sampling. We'll select 5% of vertices to be included in sampling // batches @@ -101,11 +101,11 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling mg_graph_view, std::optional>{std::nullopt}, rng_state, - //20, - + // 20, + std::max(static_cast(mg_graph_view.number_of_vertices() * select_probability), std::min(static_cast(mg_graph_view.number_of_vertices()), size_t{1})), - + false, false); @@ -115,24 +115,22 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling auto seed_sizes = cugraph::host_scalar_allgather( handle_->get_comms(), random_sources.size(), handle_->get_stream()); - size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end()); - size_t num_batches = (num_seeds + heterogeneous_uniform_neighbor_sampling_usecase.batch_size - 1) / - heterogeneous_uniform_neighbor_sampling_usecase.batch_size; + size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end()); + size_t num_batches = + (num_seeds + heterogeneous_uniform_neighbor_sampling_usecase.batch_size - 1) / + heterogeneous_uniform_neighbor_sampling_usecase.batch_size; std::vector seed_offsets(seed_sizes.size()); std::exclusive_scan(seed_sizes.begin(), seed_sizes.end(), seed_offsets.begin(), size_t{0}); auto batch_number = cugraph::test::modulo_sequence( *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]); - + // Get unique batch_number -> label_list rmm::device_uvector label_list(batch_number.size(), handle_->get_stream()); - raft::copy(label_list.data(), - batch_number.data(), - batch_number.size(), - handle_->get_stream()); - + raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream()); + label_list = cugraph::test::sort(*handle_, std::move(label_list)); label_list = cugraph::test::unique(*handle_, std::move(label_list)); @@ -142,8 +140,7 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()}); // perform allgatherv - comm_ranks = - cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); + comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); rmm::device_uvector random_sources_copy(random_sources.size(), handle_->get_stream()); @@ -151,7 +148,7 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling random_sources.data(), random_sources.size(), handle_->get_stream()); - + // Generate the edge types std::optional> edge_types{ @@ -159,9 +156,7 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling if (heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types > 1) { edge_types = cugraph::test::generate::edge_property( - *handle_, - mg_graph_view, - heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types); + *handle_, mg_graph_view, heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types); } #ifdef NO_CUGRAPH_OPS @@ -173,25 +168,23 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling mg_edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, - + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, + raft::device_span{random_sources.data(), random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), - raft::host_span(heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), - heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), + raft::host_span( + heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), + heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types, cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_uniform_neighbor_sampling_usecase.with_replacement - } - ), + cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_uniform_neighbor_sampling_usecase.with_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -200,7 +193,7 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling hr_timer.start("MG uniform_neighbor_sample"); } RAFT_CUDA_TRY(cudaDeviceSynchronize()); - + auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] = cugraph::heterogeneous_uniform_neighbor_sample( *handle_, @@ -209,24 +202,22 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling mg_edge_weight_view, std::optional>{std::nullopt}, edge_types - ? std::optional>{(*edge_types) - .view()} - : std::nullopt, + ? std::optional>{(*edge_types) + .view()} + : std::nullopt, raft::device_span{random_sources.data(), random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), - raft::host_span(heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), - heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), + raft::host_span( + heterogeneous_uniform_neighbor_sampling_usecase.fanout.data(), + heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()), heterogeneous_uniform_neighbor_sampling_usecase.num_edge_types, cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - heterogeneous_uniform_neighbor_sampling_usecase.with_replacement - } - ); + cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + heterogeneous_uniform_neighbor_sampling_usecase.with_replacement}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -308,12 +299,13 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices if (mg_aggregate_src.size() > 0) { - cugraph::test::validate_sampling_depth(*handle_, - std::move(mg_aggregate_src), - std::move(mg_aggregate_dst), - std::move(mg_aggregate_wgt), - std::move(mg_start_src), - heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()); + cugraph::test::validate_sampling_depth( + *handle_, + std::move(mg_aggregate_src), + std::move(mg_aggregate_dst), + std::move(mg_aggregate_wgt), + std::move(mg_start_src), + heterogeneous_uniform_neighbor_sampling_usecase.fanout.size()); } } } @@ -326,8 +318,8 @@ class Tests_MGHeterogeneous_Uniform_Neighbor_Sampling }; template -std::unique_ptr Tests_MGHeterogeneous_Uniform_Neighbor_Sampling::handle_ = - nullptr; +std::unique_ptr + Tests_MGHeterogeneous_Uniform_Neighbor_Sampling::handle_ = nullptr; using Tests_MGHeterogeneous_Uniform_Neighbor_Sampling_File = Tests_MGHeterogeneous_Uniform_Neighbor_Sampling; @@ -404,7 +396,8 @@ INSTANTIATE_TEST_SUITE_P( ::testing::Values(Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false}, Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false}, Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false}, - Heterogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10, 7, 8}, 128, 2, false}), + Heterogeneous_Uniform_Neighbor_Sampling_Usecase{ + {4, 10, 7, 8}, 128, 2, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/mg_homogeneous_biased_neighbor_sampling.cpp b/cpp/tests/sampling/mg_homogeneous_biased_neighbor_sampling.cpp index 1ac6c4244ce..4ad5f0a2053 100644 --- a/cpp/tests/sampling/mg_homogeneous_biased_neighbor_sampling.cpp +++ b/cpp/tests/sampling/mg_homogeneous_biased_neighbor_sampling.cpp @@ -50,7 +50,8 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test(std::tuple const& param) + void run_current_test( + std::tuple const& param) { auto [homogeneous_biased_neighbor_sampling_usecase, input_usecase] = param; @@ -105,11 +106,11 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling mg_graph_view, std::optional>{std::nullopt}, rng_state, - //20, - + // 20, + std::max(static_cast(mg_graph_view.number_of_vertices() * select_probability), std::min(static_cast(mg_graph_view.number_of_vertices()), size_t{1})), - + false, false); @@ -128,15 +129,12 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling auto batch_number = cugraph::test::modulo_sequence( *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]); - + // Get unique batch_number -> label_list rmm::device_uvector label_list(batch_number.size(), handle_->get_stream()); - raft::copy(label_list.data(), - batch_number.data(), - batch_number.size(), - handle_->get_stream()); - + raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream()); + label_list = cugraph::test::sort(*handle_, std::move(label_list)); label_list = cugraph::test::unique(*handle_, std::move(label_list)); @@ -146,8 +144,7 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()}); // perform allgatherv - comm_ranks = - cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); + comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); rmm::device_uvector random_sources_copy(random_sources.size(), handle_->get_stream()); @@ -165,22 +162,18 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling mg_edge_weight_view, std::optional>{std::nullopt}, std::optional>{std::nullopt}, - mg_edge_weight_view - raft::device_span{random_sources.data(), random_sources.size()}, + mg_edge_weight_view raft::device_span{random_sources.data(), + random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), raft::host_span(homogeneous_biased_neighbor_sampling_usecase.fanout.data(), homogeneous_biased_neighbor_sampling_usecase.fanout.size()), - - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_biased_neighbor_sampling_usecase.with_replacement - } - ), + + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_biased_neighbor_sampling_usecase.with_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -201,18 +194,14 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling raft::device_span{random_sources.data(), random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), raft::host_span(homogeneous_biased_neighbor_sampling_usecase.fanout.data(), homogeneous_biased_neighbor_sampling_usecase.fanout.size()), - - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_biased_neighbor_sampling_usecase.with_replacement - } - ); + + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_biased_neighbor_sampling_usecase.with_replacement}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -294,12 +283,13 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices if (mg_aggregate_src.size() > 0) { - cugraph::test::validate_sampling_depth(*handle_, - std::move(mg_aggregate_src), - std::move(mg_aggregate_dst), - std::move(mg_aggregate_wgt), - std::move(mg_start_src), - homogeneous_biased_neighbor_sampling_usecase.fanout.size()); + cugraph::test::validate_sampling_depth( + *handle_, + std::move(mg_aggregate_src), + std::move(mg_aggregate_dst), + std::move(mg_aggregate_wgt), + std::move(mg_start_src), + homogeneous_biased_neighbor_sampling_usecase.fanout.size()); } } } @@ -312,8 +302,8 @@ class Tests_MGHomogeneous_Biased_Neighbor_Sampling }; template -std::unique_ptr Tests_MGHomogeneous_Biased_Neighbor_Sampling::handle_ = - nullptr; +std::unique_ptr + Tests_MGHomogeneous_Biased_Neighbor_Sampling::handle_ = nullptr; using Tests_MGHomogeneous_Biased_Neighbor_Sampling_File = Tests_MGHomogeneous_Biased_Neighbor_Sampling; @@ -387,10 +377,11 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGHomogeneous_Biased_Neighbor_Sampling_Rmat, ::testing::Combine( - ::testing::Values(Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false, false}, - Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true, false}, - Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false, false}, - Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true, false}), + ::testing::Values( + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false, false}, + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true, false}, + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false, false}, + Homogeneous_Biased_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/sampling/mg_homogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/mg_homogeneous_uniform_neighbor_sampling.cpp index 7132231cf15..68ca7d4dd8e 100644 --- a/cpp/tests/sampling/mg_homogeneous_uniform_neighbor_sampling.cpp +++ b/cpp/tests/sampling/mg_homogeneous_uniform_neighbor_sampling.cpp @@ -50,7 +50,8 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling virtual void TearDown() {} template - void run_current_test(std::tuple const& param) + void run_current_test( + std::tuple const& param) { auto [homogeneous_uniform_neighbor_sampling_usecase, input_usecase] = param; @@ -105,11 +106,11 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling mg_graph_view, std::optional>{std::nullopt}, rng_state, - //20, - + // 20, + std::max(static_cast(mg_graph_view.number_of_vertices() * select_probability), std::min(static_cast(mg_graph_view.number_of_vertices()), size_t{1})), - + false, false); @@ -119,24 +120,22 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling auto seed_sizes = cugraph::host_scalar_allgather( handle_->get_comms(), random_sources.size(), handle_->get_stream()); - size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end()); - size_t num_batches = (num_seeds + homogeneous_uniform_neighbor_sampling_usecase.batch_size - 1) / - homogeneous_uniform_neighbor_sampling_usecase.batch_size; + size_t num_seeds = std::reduce(seed_sizes.begin(), seed_sizes.end()); + size_t num_batches = + (num_seeds + homogeneous_uniform_neighbor_sampling_usecase.batch_size - 1) / + homogeneous_uniform_neighbor_sampling_usecase.batch_size; std::vector seed_offsets(seed_sizes.size()); std::exclusive_scan(seed_sizes.begin(), seed_sizes.end(), seed_offsets.begin(), size_t{0}); auto batch_number = cugraph::test::modulo_sequence( *handle_, random_sources.size(), num_batches, seed_offsets[handle_->get_comms().get_rank()]); - + // Get unique batch_number -> label_list rmm::device_uvector label_list(batch_number.size(), handle_->get_stream()); - raft::copy(label_list.data(), - batch_number.data(), - batch_number.size(), - handle_->get_stream()); - + raft::copy(label_list.data(), batch_number.data(), batch_number.size(), handle_->get_stream()); + label_list = cugraph::test::sort(*handle_, std::move(label_list)); label_list = cugraph::test::unique(*handle_, std::move(label_list)); @@ -146,8 +145,7 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling *handle_, num_unique_labels, int32_t{handle_->get_comms().get_rank()}); // perform allgatherv - comm_ranks = - cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); + comm_ranks = cugraph::test::device_allgatherv(*handle_, comm_ranks.data(), comm_ranks.size()); rmm::device_uvector random_sources_copy(random_sources.size(), handle_->get_stream()); @@ -168,18 +166,14 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling raft::device_span{random_sources.data(), random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), raft::host_span(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(), homogeneous_uniform_neighbor_sampling_usecase.fanout.size()), - - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_uniform_neighbor_sampling_usecase.with_replacement - } - ), + + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_uniform_neighbor_sampling_usecase.with_replacement}), std::exception); #else if (cugraph::test::g_perf) { @@ -188,7 +182,7 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling hr_timer.start("MG uniform_neighbor_sample"); } RAFT_CUDA_TRY(cudaDeviceSynchronize()); - + auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] = cugraph::homogeneous_uniform_neighbor_sample( *handle_, @@ -200,18 +194,14 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling raft::device_span{random_sources.data(), random_sources.size()}, std::make_optional( raft::device_span{batch_number.data(), batch_number.size()}), - std::make_optional( - raft::device_span{comm_ranks.data(), comm_ranks.size()}), + std::make_optional(raft::device_span{comm_ranks.data(), comm_ranks.size()}), raft::host_span(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(), homogeneous_uniform_neighbor_sampling_usecase.fanout.size()), - - cugraph::sampling_flags_t{ - cugraph::prior_sources_behavior_t{0}, - true, // return_hops - false, // dedupe_sources - homogeneous_uniform_neighbor_sampling_usecase.with_replacement - } - ); + + cugraph::sampling_flags_t{cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_uniform_neighbor_sampling_usecase.with_replacement}); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -293,12 +283,13 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling if (random_sources.size() < 100) { // This validation is too expensive for large number of vertices if (mg_aggregate_src.size() > 0) { - cugraph::test::validate_sampling_depth(*handle_, - std::move(mg_aggregate_src), - std::move(mg_aggregate_dst), - std::move(mg_aggregate_wgt), - std::move(mg_start_src), - homogeneous_uniform_neighbor_sampling_usecase.fanout.size()); + cugraph::test::validate_sampling_depth( + *handle_, + std::move(mg_aggregate_src), + std::move(mg_aggregate_dst), + std::move(mg_aggregate_wgt), + std::move(mg_start_src), + homogeneous_uniform_neighbor_sampling_usecase.fanout.size()); } } } @@ -311,8 +302,8 @@ class Tests_MGHomogeneous_Uniform_Neighbor_Sampling }; template -std::unique_ptr Tests_MGHomogeneous_Uniform_Neighbor_Sampling::handle_ = - nullptr; +std::unique_ptr + Tests_MGHomogeneous_Uniform_Neighbor_Sampling::handle_ = nullptr; using Tests_MGHomogeneous_Uniform_Neighbor_Sampling_File = Tests_MGHomogeneous_Uniform_Neighbor_Sampling; @@ -386,10 +377,11 @@ INSTANTIATE_TEST_SUITE_P( factor (to avoid running same benchmarks more than once) */ Tests_MGHomogeneous_Uniform_Neighbor_Sampling_Rmat, ::testing::Combine( - ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false, false}, - Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true, false}, - Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false, false}, - Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true, false}), + ::testing::Values( + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu index 2d759a22b1f..539de42845f 100644 --- a/cpp/tests/utilities/thrust_wrapper.cu +++ b/cpp/tests/utilities/thrust_wrapper.cu @@ -41,7 +41,7 @@ namespace test { template cugraph::dataframe_buffer_type_t sort( - + raft::handle_t const& handle, cugraph::dataframe_buffer_type_t const& values) { auto sorted_values = cugraph::allocate_dataframe_buffer( @@ -410,14 +410,12 @@ cugraph::dataframe_buffer_type_t scalar_fill(raft::handle_t const& hand value_t value) { auto values = cugraph::allocate_dataframe_buffer(length, handle.get_stream()); - - thrust::tabulate(handle.get_thrust_policy(), - values.begin(), - values.end(), - [value] __device__(size_t i) { - return value; - }); - + + thrust::tabulate( + handle.get_thrust_policy(), values.begin(), values.end(), [value] __device__(size_t i) { + return value; + }); + return values; } diff --git a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py index 45805b88581..36a86635254 100644 --- a/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/dask/sampling/uniform_neighbor_sample.py @@ -620,7 +620,7 @@ def uniform_neighbor_sample( # FIXME: Add expensive check to ensure all dict values are lists # Convert to a tuple of sequence (edge type size and fanout values) - fanout_vals = {2:[1], 3:[4, 5], 1:[6, 7], 0:[8, 9, 10]} + fanout_vals = {2: [1], 3: [4, 5], 1: [6, 7], 0: [8, 9, 10]} fanout_vals = collections.OrderedDict(sorted(fanout_vals.items())) edge_type_offsets = [] diff --git a/python/cugraph/cugraph/sampling/heterogeneous_biased_neighbor_sample.py b/python/cugraph/cugraph/sampling/heterogeneous_biased_neighbor_sample.py index b953a5d14f4..0266bb6ddb6 100644 --- a/python/cugraph/cugraph/sampling/heterogeneous_biased_neighbor_sample.py +++ b/python/cugraph/cugraph/sampling/heterogeneous_biased_neighbor_sample.py @@ -14,8 +14,9 @@ from __future__ import annotations from pylibcugraph import ResourceHandle -from pylibcugraph import heterogeneous_biased_neighbor_sample as \ - pylibcugraph_heterogeneous_biased_neighbor_sample +from pylibcugraph import ( + heterogeneous_biased_neighbor_sample as pylibcugraph_heterogeneous_biased_neighbor_sample, +) from cugraph.sampling.sampling_utilities import sampling_results_from_cupy_array_dict @@ -87,14 +88,14 @@ def heterogeneous_biased_neighbor_sample( start_vertex_list : list or cudf.Series (int32) a list of starting vertices for sampling - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. fanout_vals : list (int32) List of branching out (fan-out) degrees per starting vertex for each hop level. - + num_edge_types: int32 Number of edge types where a value of 1 translates to homogeneous neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor @@ -107,7 +108,7 @@ def heterogeneous_biased_neighbor_sample( Deprecated. Flag to specify whether to return edge properties (weight, edge id, edge type, batch id, hop id) with the sampled edges. - + prior_sources_behavior: str, optional (default=None) Options are "carryover", and "exclude". Default will leave the source list as-is. @@ -115,37 +116,37 @@ def heterogeneous_biased_neighbor_sample( current hop. Exclude will exclude sources from previous hops from reappearing as sources in future hops. - + deduplicate_sources: bool, optional (default=False) Whether to first deduplicate the list of possible sources from the previous destinations before performing next hop. - + return_hops: bool, optional (default=True) Whether to return the sampling results with hop ids corresponding to the hop where the edge appeared. Defaults to True. - + renumber: bool, optional (default=False) Whether to renumber on a per-batch basis. If True, will return the renumber map and renumber map offsets as an additional dataframe. - + retain_seeds: bool, optional (default=False) If True, will retain the original seeds (original source vertices) in the output even if they do not have outgoing neighbors. - + compression: str, optional (default=COO) Sets the compression type for the output minibatches. Valid options are COO (default), CSR, CSC, DCSR, and DCSC. - + compress_per_hop: bool, optional (default=False) Whether to compress globally (default), or to produce a separate compressed edgelist per hop. random_state: int, optional Random seed to use when making sampling calls. - + return_offsets: bool, optional (default=False) Whether to return the sampling results with batch ids included as one dataframe, or to instead return two @@ -216,8 +217,8 @@ def heterogeneous_biased_neighbor_sample( Contains the batch offsets for the renumber maps """ - use_legacy_names = False # Deprecated parameter - include_hop_column=not return_offsets # Deprecated parameter + use_legacy_names = False # Deprecated parameter + include_hop_column = not return_offsets # Deprecated parameter major_col_name = "majors" minor_col_name = "minors" @@ -243,7 +244,6 @@ def heterogeneous_biased_neighbor_sample( " of the libcugraph C++ API" ) - if with_edge_properties: warning_msg = ( "The with_edge_properties flag is deprecated" @@ -260,8 +260,7 @@ def heterogeneous_biased_neighbor_sample( start_vertex_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype ) - - """ + """ # No batch_ids, the rank owning the vertices will wom the final # result. if with_edge_properties and not with_batch_ids: @@ -292,11 +291,9 @@ def heterogeneous_biased_neighbor_sample( start_vertex_list = ensure_valid_dtype(G, start_vertex_list) - if G.renumbered: start_vertex_list = G.lookup_internal_vertex_id(start_vertex_list) - sampling_result_array_dict = pylibcugraph_heterogeneous_biased_neighbor_sample( resource_handle=ResourceHandle(), input_graph=G._plc_graph, @@ -326,7 +323,7 @@ def heterogeneous_biased_neighbor_sample( return_offsets=return_offsets, renumber=renumber, use_legacy_names=use_legacy_names, - include_hop_column=include_hop_column, # Deprecated flag + include_hop_column=include_hop_column, # Deprecated flag ) if G.renumbered and not renumber: diff --git a/python/cugraph/cugraph/sampling/heterogeneous_uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/heterogeneous_uniform_neighbor_sample.py index 7eadebbc912..3c81877abc6 100644 --- a/python/cugraph/cugraph/sampling/heterogeneous_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/sampling/heterogeneous_uniform_neighbor_sample.py @@ -14,8 +14,9 @@ from __future__ import annotations from pylibcugraph import ResourceHandle -from pylibcugraph import heterogeneous_uniform_neighbor_sample as \ - pylibcugraph_heterogeneous_uniform_neighbor_sample +from pylibcugraph import ( + heterogeneous_uniform_neighbor_sample as pylibcugraph_heterogeneous_uniform_neighbor_sample, +) from cugraph.sampling.sampling_utilities import sampling_results_from_cupy_array_dict @@ -87,14 +88,14 @@ def heterogeneous_uniform_neighbor_sample( start_vertex_list : list or cudf.Series (int32) a list of starting vertices for sampling - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. fanout_vals : list (int32) List of branching out (fan-out) degrees per starting vertex for each hop level. - + num_edge_types: int32 Number of edge types where a value of 1 translates to homogeneous neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor @@ -107,7 +108,7 @@ def heterogeneous_uniform_neighbor_sample( Deprecated. Flag to specify whether to return edge properties (weight, edge id, edge type, batch id, hop id) with the sampled edges. - + prior_sources_behavior: str, optional (default=None) Options are "carryover", and "exclude". Default will leave the source list as-is. @@ -115,37 +116,37 @@ def heterogeneous_uniform_neighbor_sample( current hop. Exclude will exclude sources from previous hops from reappearing as sources in future hops. - + deduplicate_sources: bool, optional (default=False) Whether to first deduplicate the list of possible sources from the previous destinations before performing next hop. - + return_hops: bool, optional (default=True) Whether to return the sampling results with hop ids corresponding to the hop where the edge appeared. Defaults to True. - + renumber: bool, optional (default=False) Whether to renumber on a per-batch basis. If True, will return the renumber map and renumber map offsets as an additional dataframe. - + retain_seeds: bool, optional (default=False) If True, will retain the original seeds (original source vertices) in the output even if they do not have outgoing neighbors. - + compression: str, optional (default=COO) Sets the compression type for the output minibatches. Valid options are COO (default), CSR, CSC, DCSR, and DCSC. - + compress_per_hop: bool, optional (default=False) Whether to compress globally (default), or to produce a separate compressed edgelist per hop. random_state: int, optional Random seed to use when making sampling calls. - + return_offsets: bool, optional (default=False) Whether to return the sampling results with batch ids included as one dataframe, or to instead return two @@ -216,8 +217,8 @@ def heterogeneous_uniform_neighbor_sample( Contains the batch offsets for the renumber maps """ - use_legacy_names = False # Deprecated parameter - include_hop_column=not return_offsets # Deprecated parameter + use_legacy_names = False # Deprecated parameter + include_hop_column = not return_offsets # Deprecated parameter major_col_name = "majors" minor_col_name = "minors" @@ -243,7 +244,6 @@ def heterogeneous_uniform_neighbor_sample( " of the libcugraph C++ API" ) - if with_edge_properties: warning_msg = ( "The with_edge_properties flag is deprecated" @@ -260,8 +260,7 @@ def heterogeneous_uniform_neighbor_sample( start_vertex_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype ) - - """ + """ # No batch_ids, the rank owning the vertices will wom the final # result. if with_edge_properties and not with_batch_ids: @@ -292,11 +291,9 @@ def heterogeneous_uniform_neighbor_sample( start_vertex_list = ensure_valid_dtype(G, start_vertex_list) - if G.renumbered: start_vertex_list = G.lookup_internal_vertex_id(start_vertex_list) - sampling_result_array_dict = pylibcugraph_heterogeneous_uniform_neighbor_sample( resource_handle=ResourceHandle(), input_graph=G._plc_graph, @@ -326,7 +323,7 @@ def heterogeneous_uniform_neighbor_sample( return_offsets=return_offsets, renumber=renumber, use_legacy_names=use_legacy_names, - include_hop_column=include_hop_column, # Deprecated flag + include_hop_column=include_hop_column, # Deprecated flag ) if G.renumbered and not renumber: diff --git a/python/cugraph/cugraph/sampling/homogeneous_biased_uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/homogeneous_biased_uniform_neighbor_sample.py index 62ea1a93686..804bea9336d 100644 --- a/python/cugraph/cugraph/sampling/homogeneous_biased_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/sampling/homogeneous_biased_uniform_neighbor_sample.py @@ -14,8 +14,9 @@ from __future__ import annotations from pylibcugraph import ResourceHandle -from pylibcugraph import homogeneous_biased_neighbor_sample as \ - pylibcugraph_homogeneous_biased_neighbor_sample +from pylibcugraph import ( + homogeneous_biased_neighbor_sample as pylibcugraph_homogeneous_biased_neighbor_sample, +) from cugraph.sampling.sampling_utilities import sampling_results_from_cupy_array_dict @@ -87,7 +88,7 @@ def homogeneous_biased_neighbor_sample( start_vertex_list : list or cudf.Series (int32) a list of starting vertices for sampling - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. @@ -102,7 +103,7 @@ def homogeneous_biased_neighbor_sample( Deprecated. Flag to specify whether to return edge properties (weight, edge id, edge type, batch id, hop id) with the sampled edges. - + prior_sources_behavior: str, optional (default=None) Options are "carryover", and "exclude". Default will leave the source list as-is. @@ -110,37 +111,37 @@ def homogeneous_biased_neighbor_sample( current hop. Exclude will exclude sources from previous hops from reappearing as sources in future hops. - + deduplicate_sources: bool, optional (default=False) Whether to first deduplicate the list of possible sources from the previous destinations before performing next hop. - + return_hops: bool, optional (default=True) Whether to return the sampling results with hop ids corresponding to the hop where the edge appeared. Defaults to True. - + renumber: bool, optional (default=False) Whether to renumber on a per-batch basis. If True, will return the renumber map and renumber map offsets as an additional dataframe. - + retain_seeds: bool, optional (default=False) If True, will retain the original seeds (original source vertices) in the output even if they do not have outgoing neighbors. - + compression: str, optional (default=COO) Sets the compression type for the output minibatches. Valid options are COO (default), CSR, CSC, DCSR, and DCSC. - + compress_per_hop: bool, optional (default=False) Whether to compress globally (default), or to produce a separate compressed edgelist per hop. random_state: int, optional Random seed to use when making sampling calls. - + return_offsets: bool, optional (default=False) Whether to return the sampling results with batch ids included as one dataframe, or to instead return two @@ -211,8 +212,8 @@ def homogeneous_biased_neighbor_sample( Contains the batch offsets for the renumber maps """ - use_legacy_names = False # Deprecated parameter - include_hop_column=not return_offsets # Deprecated parameter + use_legacy_names = False # Deprecated parameter + include_hop_column = not return_offsets # Deprecated parameter major_col_name = "majors" minor_col_name = "minors" @@ -238,7 +239,6 @@ def homogeneous_biased_neighbor_sample( " of the libcugraph C++ API" ) - if with_edge_properties: warning_msg = ( "The with_edge_properties flag is deprecated" @@ -255,8 +255,7 @@ def homogeneous_biased_neighbor_sample( start_vertex_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype ) - - """ + """ # No batch_ids, the rank owning the vertices will wom the final # result. if with_edge_properties and not with_batch_ids: @@ -287,11 +286,9 @@ def homogeneous_biased_neighbor_sample( start_vertex_list = ensure_valid_dtype(G, start_vertex_list) - if G.renumbered: start_vertex_list = G.lookup_internal_vertex_id(start_vertex_list) - sampling_result_array_dict = pylibcugraph_homogeneous_biased_neighbor_sample( resource_handle=ResourceHandle(), input_graph=G._plc_graph, @@ -320,7 +317,7 @@ def homogeneous_biased_neighbor_sample( return_offsets=return_offsets, renumber=renumber, use_legacy_names=use_legacy_names, - include_hop_column=include_hop_column, # Deprecated flag + include_hop_column=include_hop_column, # Deprecated flag ) if G.renumbered and not renumber: diff --git a/python/cugraph/cugraph/sampling/homogeneous_uniform_neighbor_sample.py b/python/cugraph/cugraph/sampling/homogeneous_uniform_neighbor_sample.py index 4c015251af7..6ff955215ca 100644 --- a/python/cugraph/cugraph/sampling/homogeneous_uniform_neighbor_sample.py +++ b/python/cugraph/cugraph/sampling/homogeneous_uniform_neighbor_sample.py @@ -14,8 +14,9 @@ from __future__ import annotations from pylibcugraph import ResourceHandle -from pylibcugraph import homogeneous_uniform_neighbor_sample as \ - pylibcugraph_homogeneous_uniform_neighbor_sample +from pylibcugraph import ( + homogeneous_uniform_neighbor_sample as pylibcugraph_homogeneous_uniform_neighbor_sample, +) from cugraph.sampling.sampling_utilities import sampling_results_from_cupy_array_dict @@ -86,7 +87,7 @@ def homogeneous_uniform_neighbor_sample( start_vertex_list : list or cudf.Series (int32) a list of starting vertices for sampling - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. @@ -101,7 +102,7 @@ def homogeneous_uniform_neighbor_sample( Deprecated. Flag to specify whether to return edge properties (weight, edge id, edge type, batch id, hop id) with the sampled edges. - + prior_sources_behavior: str, optional (default=None) Options are "carryover", and "exclude". Default will leave the source list as-is. @@ -109,37 +110,37 @@ def homogeneous_uniform_neighbor_sample( current hop. Exclude will exclude sources from previous hops from reappearing as sources in future hops. - + deduplicate_sources: bool, optional (default=False) Whether to first deduplicate the list of possible sources from the previous destinations before performing next hop. - + return_hops: bool, optional (default=True) Whether to return the sampling results with hop ids corresponding to the hop where the edge appeared. Defaults to True. - + renumber: bool, optional (default=False) Whether to renumber on a per-batch basis. If True, will return the renumber map and renumber map offsets as an additional dataframe. - + retain_seeds: bool, optional (default=False) If True, will retain the original seeds (original source vertices) in the output even if they do not have outgoing neighbors. - + compression: str, optional (default=COO) Sets the compression type for the output minibatches. Valid options are COO (default), CSR, CSC, DCSR, and DCSC. - + compress_per_hop: bool, optional (default=False) Whether to compress globally (default), or to produce a separate compressed edgelist per hop. random_state: int, optional Random seed to use when making sampling calls. - + return_offsets: bool, optional (default=False) Whether to return the sampling results with batch ids included as one dataframe, or to instead return two @@ -210,8 +211,8 @@ def homogeneous_uniform_neighbor_sample( Contains the batch offsets for the renumber maps """ - use_legacy_names = False # Deprecated parameter - include_hop_column=not return_offsets # Deprecated parameter + use_legacy_names = False # Deprecated parameter + include_hop_column = not return_offsets # Deprecated parameter major_col_name = "majors" minor_col_name = "minors" @@ -237,7 +238,6 @@ def homogeneous_uniform_neighbor_sample( " of the libcugraph C++ API" ) - if with_edge_properties: warning_msg = ( "The with_edge_properties flag is deprecated" @@ -254,8 +254,7 @@ def homogeneous_uniform_neighbor_sample( start_vertex_list, dtype=G.edgelist.edgelist_df[G.srcCol].dtype ) - - """ + """ # No batch_ids, the rank owning the vertices will wom the final # result. if with_edge_properties and not with_batch_ids: @@ -286,11 +285,9 @@ def homogeneous_uniform_neighbor_sample( start_vertex_list = ensure_valid_dtype(G, start_vertex_list) - if G.renumbered: start_vertex_list = G.lookup_internal_vertex_id(start_vertex_list) - sampling_result_array_dict = pylibcugraph_homogeneous_uniform_neighbor_sample( resource_handle=ResourceHandle(), input_graph=G._plc_graph, @@ -319,7 +316,7 @@ def homogeneous_uniform_neighbor_sample( return_offsets=return_offsets, renumber=renumber, use_legacy_names=use_legacy_names, - include_hop_column=include_hop_column, # Deprecated flag + include_hop_column=include_hop_column, # Deprecated flag ) if G.renumbered and not renumber: diff --git a/python/pylibcugraph/pylibcugraph/__init__.py b/python/pylibcugraph/pylibcugraph/__init__.py index ea584569d77..97c8ab342e6 100644 --- a/python/pylibcugraph/pylibcugraph/__init__.py +++ b/python/pylibcugraph/pylibcugraph/__init__.py @@ -40,15 +40,23 @@ from pylibcugraph.bfs import bfs from pylibcugraph.uniform_neighbor_sample import uniform_neighbor_sample -from pylibcugraph.homogeneous_uniform_neighbor_sample import homogeneous_uniform_neighbor_sample -from pylibcugraph.homogeneous_biased_neighbor_sample import homogeneous_biased_neighbor_sample -from pylibcugraph.heterogeneous_uniform_neighbor_sample import heterogeneous_uniform_neighbor_sample -from pylibcugraph.heterogeneous_biased_neighbor_sample import heterogeneous_biased_neighbor_sample +from pylibcugraph.homogeneous_uniform_neighbor_sample import ( + homogeneous_uniform_neighbor_sample, +) +from pylibcugraph.homogeneous_biased_neighbor_sample import ( + homogeneous_biased_neighbor_sample, +) +from pylibcugraph.heterogeneous_uniform_neighbor_sample import ( + heterogeneous_uniform_neighbor_sample, +) +from pylibcugraph.heterogeneous_biased_neighbor_sample import ( + heterogeneous_biased_neighbor_sample, +) -#FIXME: break down the API into homogeneous and heterogeneous neighbor sample -#from pylibcugraph.uniform_neighbor_sample import neighbor_sample -#from pylibcugraph.biased_neighbor_sample import biased_neighbor_sample +# FIXME: break down the API into homogeneous and heterogeneous neighbor sample +# from pylibcugraph.uniform_neighbor_sample import neighbor_sample +# from pylibcugraph.biased_neighbor_sample import biased_neighbor_sample from pylibcugraph.negative_sampling import negative_sampling diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx index 401b2ee6258..ea71caaa7db 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx @@ -113,23 +113,23 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, input_graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + edge_biases: FIXE: update this - Create edge_biases of type 'cugraph_edge_property_view_t' - edge biases not yet supported. start_vertex_list: device array type Device array containing the list of starting vertices for sampling. - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. h_fan_out: tuple of numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level - + The sampling method can use different fan_out values for each edge type which is not the case for homogeneous neighborhood sampling. - + num_edge_types: Number of edge types where a value of 1 translates to homogeneous neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor sample. diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx index 477a010037b..2580c7369be 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx @@ -113,17 +113,17 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, start_vertex_list: device array type Device array containing the list of starting vertices for sampling. - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. h_fan_out: tuple of numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level - + The sampling method can use different fan_out values for each edge type which is not the case for homogeneous neighborhood sampling. - + num_edge_types: Number of edge types where a value of 1 translates to homogeneous neighbor sample whereas a value greater than 1 translates to heterogeneous neighbor sample. diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx index d95d8a80a2e..51b770f0fab 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx @@ -112,20 +112,20 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, input_graph : SGGraph or MGGraph The input graph, for either Single or Multi-GPU operations. - + edge_biases: FIXE: update this - Create edge_biases of type 'cugraph_edge_property_view_t' - edge biases not yet supported. start_vertex_list: device array type Device array containing the list of starting vertices for sampling. - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. h_fan_out: tuple of numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level - + The sampling method can use different fan_out values for each edge type which is not the case for homogeneous neighborhood sampling. diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx index 1f5ec94705a..bddcc0b29ff 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx @@ -112,14 +112,14 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, start_vertex_list: device array type Device array containing the list of starting vertices for sampling. - + start_vertex_offsets: list[int] (Optional) Offsets of each label within the start vertex list. h_fan_out: tuple of numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level - + The sampling method can use different fan_out values for each edge type which is not the case for homogeneous neighborhood sampling.