From 7b95c5e516b657692333cd1defbc758615d5f7bc Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 29 Aug 2024 20:28:22 -0700 Subject: [PATCH] update neighbor sample API --- cpp/include/cugraph/sampling_functions.hpp | 131 ++++- cpp/include/cugraph_c/sampling_algorithms.h | 86 ++- cpp/src/c_api/neighbor_sampling.cpp | 279 +++++++--- cpp/src/sampling/neighbor_sampling_impl.hpp | 65 ++- .../sampling/neighbor_sampling_mg_v32_e32.cpp | 73 ++- .../sampling/neighbor_sampling_mg_v32_e64.cpp | 64 ++- .../sampling/neighbor_sampling_mg_v64_e64.cpp | 64 ++- .../sampling/neighbor_sampling_sg_v32_e32.cpp | 70 ++- .../sampling/neighbor_sampling_sg_v32_e64.cpp | 64 ++- .../sampling/neighbor_sampling_sg_v64_e64.cpp | 64 ++- .../_cugraph_c/sampling_algorithms.pxd | 23 +- .../heterogeneous_neighbor_sample.pyx | 511 ++++++++++++++++++ ...le.pyx => homogeneous_neighbor_sample.pyx} | 76 +-- 13 files changed, 1355 insertions(+), 215 deletions(-) create mode 100644 python/pylibcugraph/pylibcugraph/heterogeneous_neighbor_sample.pyx rename python/pylibcugraph/pylibcugraph/{neighbor_sample.pyx => homogeneous_neighbor_sample.pyx} (90%) diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index 3140e9073a0..45114e06480 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -43,7 +43,8 @@ enum class prior_sources_behavior_t { DEFAULT = 0, CARRY_OVER, EXCLUDE }; /** * @brief Uniform Neighborhood Sampling. * - * @deprecated This API will be deleted, use neighbor_sample instead + * @deprecated This API will be deleted, use cugraph_homogeneous_neighbor_sample with + * 'is_biased' set to false instead * * This function traverses from a set of starting vertices, traversing outgoing edges and * randomly selects from these outgoing neighbors to extract a subgraph. @@ -142,7 +143,8 @@ uniform_neighbor_sample( /** * @brief Biased Neighborhood Sampling. * - * @deprecated This API will be deleted, use neighbor_sample instead + * @deprecated This API will be deleted, use cugraph_homogeneous_neighbor_sample with + * 'is_biased' set to true instead * * This function traverses from a set of starting vertices, traversing outgoing edges and * randomly selects (with edge biases) from these outgoing neighbors to extract a subgraph. @@ -244,14 +246,12 @@ biased_neighbor_sample( bool dedupe_sources = false, bool do_expensive_check = false); - /** - * @brief Neighborhood Sampling. + * @brief Homogeneous Neighborhood Sampling. * * This function traverses from a set of starting vertices, traversing outgoing edges and * randomly selects (with edge biases or not) from these outgoing neighbors to extract a subgraph. - * When branching out to select outgoing neighbors, either fan_out or heterogeneous_fan_out must - * be provided but not both. + * The branching out to select outgoing neighbors is performed with homogeneous fanouts. * * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop, * label, offsets), identifying the randomly selected edges. src is the source vertex, dst is the @@ -301,12 +301,10 @@ biased_neighbor_sample( * @param label_to_output_comm_rank Optional tuple of device spans mapping label to a particular * output rank. Element 0 of the tuple identifes the label, Element 1 of the tuple identifies the * output rank. The label span must be sorted in ascending order. - * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each - * level. When fan_out is provided, the sampling method uses the same fanout value for each type. * @param heterogeneous_fan_out Tuple of host spans defining branching out (fan-out) degree per * source vertex for each level in CSR style format. The first element of the tuple is the offset - * array per edge type id and the second element correspond to the fanout values. - * When heterogeneous_fan_out is provided, different fan_out values can be used for each edge type. + * array per edge type id and the second element corresponds to the fanout values. + * The sampling method can use different fan_out values for each edge type. * The fan-out offsets size must be proportional to the number of edge types and fan_out values. * @param return_hops boolean flag specifying if the hop information should be returned. * @param prior_sources_behavior Enum type defining how to handle prior sources, (defaults to @@ -321,7 +319,7 @@ biased_neighbor_sample( * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type, * optional int32_t hop, optional label_t label, optional size_t offsets) */ -// FIXME: Add flag for bias=True/False + template , std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -350,8 +348,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement = true, @@ -359,6 +356,112 @@ neighbor_sample( bool dedupe_sources = false, bool do_expensive_check = false); +/** + * @brief Homogeneous Neighborhood Sampling. + * + * This function traverses from a set of starting vertices, traversing outgoing edges and + * randomly selects (with edge biases or not) from these outgoing neighbors to extract a subgraph. + * The branching out to select outgoing neighbors is performed with homogeneous fanouts + * + * Output from this function is a tuple of vectors (src, dst, weight, edge_id, edge_type, hop, + * label, offsets), identifying the randomly selected edges. src is the source vertex, dst is the + * destination vertex, weight (optional) is the edge weight, edge_id (optional) identifies the edge + * id, edge_type (optional) identifies the edge type, hop identifies which hop the edge was + * encountered in. The label output (optional) identifes the vertex label. The offsets array + * (optional) will be described below and is dependent upon the input parameters. + * + * If @p starting_vertex_labels is not specified then no organization is applied to the output, the + * label and offsets values in the return set will be std::nullopt. + * + * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is not specified then + * the label output has values. This will also result in the output being sorted by vertex label. + * The offsets array in the return will be a CSR-style offsets array to identify the beginning of + * each label range in the data. `labels.size() == (offsets.size() - 1)`. + * + * If @p starting_vertex_labels is specified and @p label_to_output_comm_rank is specified then the + * label output has values. This will also result in the output being sorted by vertex label. The + * offsets array in the return will be a CSR-style offsets array to identify the beginning of each + * label range in the data. `labels.size() == (offsets.size() - 1)`. Additionally, the data will + * be shuffled so that all data with a particular label will be on the specified rank. + * + * @tparam vertex_t Type of vertex identifiers. Needs to be an integral type. + * @tparam edge_t Type of edge identifiers. Needs to be an integral type. + * @tparam weight_t Type of edge weights. Needs to be a floating point type. + * @tparam edge_type_t Type of edge type. Needs to be an integral type. + * @tparam label_t Type of label. Needs to be an integral type. + * @tparam store_transposed Flag indicating whether sources (if false) or destinations (if + * true) are major indices + * @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false) + * @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and + * handles to various CUDA libraries) to run graph algorithms. + * * @param rng_state A pre-initialized raft::RngState object for generating random numbers + * @param graph_view Graph View object to generate NBR Sampling on. + * @param edge_weight_view Optional view object holding edge weights for @p graph_view. + * @param edge_id_view Optional view object holding edge ids for @p graph_view. + * @param edge_type_view Optional view object holding edge types for @p graph_view. + * @param edge_bias_view Optional view object holding edge biases (to be used in biased sampling) for @p + * graph_view. Bias values should be non-negative and the sum of edge bias values from any vertex + * should not exceed std::numeric_limits::max(). 0 bias value indicates that the + * corresponding edge can never be selected. passing std::nullopt as the edge biases will result in + * uniform sampling. + * @param starting_vertices Device span of starting vertex IDs for the sampling. + * In a multi-gpu context the starting vertices should be local to this GPU. + * @param starting_vertex_labels Optional device span of labels associted with each starting vertex + * for the sampling. + * @param label_to_output_comm_rank Optional tuple of device spans mapping label to a particular + * output rank. Element 0 of the tuple identifes the label, Element 1 of the tuple identifies the + * output rank. The label span must be sorted in ascending order. + * @param fan_out Host span defining branching out (fan-out) degree per source vertex for each + * level. The sampling method uses the same fanout value for each type. + * @param return_hops boolean flag specifying if the hop information should be returned. + * @param prior_sources_behavior Enum type defining how to handle prior sources, (defaults to + * DEFAULT) + * @param dedupe_sources boolean flag, if true then if a vertex v appears as a destination in hop X + * multiple times with the same label, it will only be passed once (for each label) as a source + * for the next hop. Default is false. + * @param with_replacement boolean flag specifying if random sampling is done with replacement + * (true); or, without replacement (false); default = true; + * @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`). + * @return tuple device vectors (vertex_t source_vertex, vertex_t destination_vertex, + * optional weight_t weight, optional edge_t edge id, optional edge_type_t edge type, + * optional int32_t hop, optional label_t label, optional size_t offsets) + */ + +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement = true, + prior_sources_behavior_t prior_sources_behavior = prior_sources_behavior_t::DEFAULT, + bool dedupe_sources = false, + bool do_expensive_check = false); + /* * @brief renumber sampled edge list and compress to the (D)CSR|(D)CSC format. * diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 17dd53436f9..32f0b09ee6d 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -366,12 +366,13 @@ cugraph_error_code_t cugraph_create_heterogeneous_fan_out( * * @param [in] heterogeneous_fanout The edge type size and fanout values */ -void cugraph_heterogeneous_fanout_free(cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fanout); +void cugraph_heterogeneous_fan_out_free(cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fanout); /** * @brief Uniform Neighborhood Sampling * - * @deprecated This API will be deleted, use cugraph_neighbor_sample instead + * @deprecated This API will be deleted, use cugraph_homogeneous_neighbor_sample with + * 'is_biased' set to false instead * * Returns a sample of the neighborhood around specified start vertices. Optionally, each * start vertex can be associated with a label, allowing the caller to specify multiple batches @@ -428,8 +429,9 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( /** * @brief Biased Neighborhood Sampling * - * @deprecated This API will be deleted, use cugraph_neighbor_sample instead - * + * @deprecated This API will be deleted, use cugraph_homogeneous_neighbor_sample with + * 'is_biased' set to true instead + * * Returns a sample of the neighborhood around specified start vertices. Optionally, each * start vertex can be associated with a label, allowing the caller to specify multiple batches * of sampling requests in the same function call - which should improve GPU utilization. @@ -487,9 +489,10 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( cugraph_error_t** error); /** - * @brief Neighborhood Sampling + * @brief Heterogeneous Neighborhood Sampling * - * Returns a sample of the neighborhood around specified start vertices with edge biases or not. + * Returns a sample of the neighborhood around specified start vertices with edge biases or not + * and homogeneous fanout types. * Optionally, each start vertex can be associated with a label, allowing the caller to specify * multiple batches of sampling requests in the same function call - which should improve GPU * utilization. @@ -518,11 +521,9 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( * label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks. * @param [in] label_offsets Device array of the offsets for each label in the seed list. This * parameter is only used with the retain_seeds option. - * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm. - * We only support fanout values of type INT32 * @param [in] heterogeneous_fanout Tuple of host arrays defining the fan out at each step in the * sampling algorithm. in CSR style format. The first element of the tuple is the offset array per - * edge type id and the second element correspond to the fanout values. + * edge type id and the second element corresponds to the fanout values. * We only support type INT32 for both the offsets and the fanout values array. * @param [in] sampling_options * Opaque pointer defining the sampling options. @@ -536,7 +537,7 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( * be populated if error code is not CUGRAPH_SUCCESS * @return error code */ -cugraph_error_code_t cugraph_neighbor_sample( +cugraph_error_code_t cugraph_heterogeneous_neighbor_sample( const cugraph_resource_handle_t* handle, cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, @@ -546,7 +547,6 @@ cugraph_error_code_t cugraph_neighbor_sample( const cugraph_type_erased_device_array_view_t* label_list, const cugraph_type_erased_device_array_view_t* label_to_comm_rank, const cugraph_type_erased_device_array_view_t* label_offsets, - const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fanout, const cugraph_sampling_options_t* options, bool_t is_biased, @@ -554,6 +554,70 @@ cugraph_error_code_t cugraph_neighbor_sample( cugraph_sample_result_t** result, cugraph_error_t** error); +/** + * @brief Homogeneous Neighborhood Sampling + * + * Returns a sample of the neighborhood around specified start vertices with edge biases or not + * and homogeneous fanout types. + * Optionally, each start vertex can be associated with a label, allowing the caller to specify + * multiple batches of sampling requests in the same function call - which should improve GPU + * utilization. + * + * If label is NULL then all start vertices will be considered part of the same batch and the + * return value will not have a label column. + * + * @param [in] handle Handle for accessing resources + * * @param [in,out] rng_state State of the random number generator, updated with each call + * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage + * needs to be transposed + * @param [in] edge_biases Device array of edge biases to use for sampling. If NULL + * use the edge weight as the bias. If set to NULL, edges will be sampled uniformly. + * @param [in] start_vertices Device array of start vertices for the sampling + * @param [in] start_vertex_labels Device array of start vertex labels for the sampling. The + * labels associated with each start vertex will be included in the output associated with results + * that were derived from that start vertex. We only support label of type INT32. If label is + * NULL, the return data will not be labeled. + * @param [in] label_list Device array of the labels included in @p start_vertex_labels. If + * @p label_to_comm_rank is not specified this parameter is ignored. If specified, label_list + * must be sorted in ascending order. + * @param [in] label_to_comm_rank Device array identifying which comm rank the output for a + * particular label should be shuffled in the output. If not specifed the data is not organized in + * output. If specified then the all data from @p label_list[i] will be shuffled to rank @p. This + * cannot be specified unless @p start_vertex_labels is also specified + * label_to_comm_rank[i]. If not specified then the output data will not be shuffled between ranks. + * @param [in] label_offsets Device array of the offsets for each label in the seed list. This + * parameter is only used with the retain_seeds option. + * @param [in] fanout Host array defining the fan out at each step in the sampling algorithm. + * We only support fanout values of type INT32 + * @param [in] sampling_options + * Opaque pointer defining the sampling options. + * @param [in] is_biased + * A flag specifying whether to run biased neighborhood sampling + * (if set to true) or uniform neighbor sampling. + * @param [in] do_expensive_check + * A flag to run expensive checks for input arguments (if set to true) + * @param [out] result Output from the uniform_neighbor_sample call + * @param [out] error Pointer to an error object storing details of any error. Will + * be populated if error code is not CUGRAPH_SUCCESS + * @return error code + */ +cugraph_error_code_t cugraph_homogeneous_neighbor_sample( + const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + const cugraph_edge_property_view_t* edge_biases, + const cugraph_type_erased_device_array_view_t* start_vertices, + const cugraph_type_erased_device_array_view_t* start_vertex_labels, + const cugraph_type_erased_device_array_view_t* label_list, + const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, + const cugraph_type_erased_host_array_view_t* fan_out, + const cugraph_sampling_options_t* options, + bool_t is_biased, + bool_t do_expensive_check, + cugraph_sample_result_t** result, + cugraph_error_t** error); + /** * @deprecated This call should be replaced with cugraph_sample_result_get_majors * @brief Get the source vertices from the sampling algorithm result diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 46e6eacf938..def04c021e7 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -218,45 +218,82 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { graph_view.local_vertex_partition_range_last(), do_expensive_check_); + rmm::device_uvector src(0, handle_.get_stream()); + rmm::device_uvector dst(0, handle_.get_stream()); + std::optional> wgt{std::nullopt}; + std::optional> edge_id{std::nullopt}; + std::optional> edge_type{std::nullopt}; + std::optional> hop{std::nullopt}; + std::optional> edge_label{std::nullopt}; + std::optional> offsets{std::nullopt}; // FIXME: Consolidate 'fan_out_' and 'heterogeneous_fan_out_' into one // argument with std::variant - auto&& [src, dst, wgt, edge_id, edge_type, hop, edge_label, offsets] = - cugraph::neighbor_sample( - handle_, - rng_state_->rng_state_, - graph_view, - (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, - (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, - (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, - is_biased_ ? ((edge_biases != nullptr) ? std::make_optional(*edge_biases) : std::make_optional(edge_weights->view())) : std::nullopt, - raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_labels_ != nullptr) - ? std::make_optional>(start_vertex_labels->data(), - start_vertex_labels->size()) - : std::nullopt, - (label_list_ != nullptr) - ? std::make_optional(std::make_tuple( - raft::device_span{label_list_->as_type(), - label_list_->size_}, - raft::device_span{label_to_comm_rank_->as_type(), - label_to_comm_rank_->size_})) - : std::nullopt, - (fan_out_ != nullptr) ? std::make_optional>( - fan_out_->as_type(), fan_out_->size_) - : std::nullopt, - - (heterogeneous_fan_out_ != nullptr) - ? std::make_optional(std::make_tuple( - raft::host_span{std::get<0>(*heterogeneous_fan_out_)->as_type(), - std::get<0>(*heterogeneous_fan_out_)->size_}, - raft::host_span{std::get<1>(*heterogeneous_fan_out_)->as_type(), - std::get<1>(*heterogeneous_fan_out_)->size_})) - : std::nullopt, - options_.return_hops_, - options_.with_replacement_, - options_.prior_sources_behavior_, - options_.dedupe_sources_, - do_expensive_check_); + //auto&& [src, dst, wgt, edge_id, edge_type, hop, edge_label, offsets] = + if (heterogeneous_fan_out_ != nullptr) { + std::tie(src, dst, wgt, edge_id, edge_type, hop, edge_label, offsets) = + cugraph::heterogeneous_neighbor_sample( + handle_, + rng_state_->rng_state_, + graph_view, + (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, + (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, + (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, + is_biased_ ? ((edge_biases != nullptr) ? std::make_optional(*edge_biases) : std::make_optional(edge_weights->view())) : std::nullopt, + raft::device_span{start_vertices.data(), start_vertices.size()}, + (start_vertex_labels_ != nullptr) + ? std::make_optional>(start_vertex_labels->data(), + start_vertex_labels->size()) + : std::nullopt, + (label_list_ != nullptr) + ? std::make_optional(std::make_tuple( + raft::device_span{label_list_->as_type(), + label_list_->size_}, + raft::device_span{label_to_comm_rank_->as_type(), + label_to_comm_rank_->size_})) + : std::nullopt, + + std::make_tuple( + raft::host_span{std::get<0>(*heterogeneous_fan_out_)->as_type(), + std::get<0>(*heterogeneous_fan_out_)->size_}, + raft::host_span{std::get<1>(*heterogeneous_fan_out_)->as_type(), + std::get<1>(*heterogeneous_fan_out_)->size_}), + options_.return_hops_, + options_.with_replacement_, + options_.prior_sources_behavior_, + options_.dedupe_sources_, + do_expensive_check_); + } else { + + std::tie(src, dst, wgt, edge_id, edge_type, hop, edge_label, offsets) = + cugraph::homogeneous_neighbor_sample( + handle_, + rng_state_->rng_state_, + graph_view, + (edge_weights != nullptr) ? std::make_optional(edge_weights->view()) : std::nullopt, + (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, + (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, + is_biased_ ? ((edge_biases != nullptr) ? std::make_optional(*edge_biases) : std::make_optional(edge_weights->view())) : std::nullopt, + raft::device_span{start_vertices.data(), start_vertices.size()}, + (start_vertex_labels_ != nullptr) + ? std::make_optional>(start_vertex_labels->data(), + start_vertex_labels->size()) + : std::nullopt, + (label_list_ != nullptr) + ? std::make_optional(std::make_tuple( + raft::device_span{label_list_->as_type(), + label_list_->size_}, + raft::device_span{label_to_comm_rank_->as_type(), + label_to_comm_rank_->size_})) + : std::nullopt, + raft::host_span( + fan_out_->as_type(), fan_out_->size_), + options_.return_hops_, + options_.with_replacement_, + options_.prior_sources_behavior_, + options_.dedupe_sources_, + do_expensive_check_); + + } std::vector vertex_partition_lasts = graph_view.vertex_partition_range_lasts(); @@ -996,9 +1033,8 @@ cugraph_error_code_t cugraph_uniform_neighbor_sample( return cugraph::c_api::run_algorithm(graph, functor, result, error); } -cugraph_error_code_t cugraph_neighbor_sample( +cugraph_error_code_t cugraph_biased_neighbor_sample( const cugraph_resource_handle_t* handle, - cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, @@ -1007,6 +1043,88 @@ cugraph_error_code_t cugraph_neighbor_sample( const cugraph_type_erased_device_array_view_t* label_to_comm_rank, const cugraph_type_erased_device_array_view_t* label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, + cugraph_rng_state_t* rng_state, + const cugraph_sampling_options_t* options, + bool_t do_expensive_check, + cugraph_sample_result_t** result, + cugraph_error_t** error) +{ + auto options_cpp = *reinterpret_cast(options); + + CAPI_EXPECTS( + (edge_biases != nullptr) || + (reinterpret_cast(graph)->edge_weights_ != nullptr), + CUGRAPH_INVALID_INPUT, + "edge_biases is required if the graph is not weighted", + *error); + + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (label_offsets != nullptr), + CUGRAPH_INVALID_INPUT, + "must specify label_offsets if retain_seeds is true", + *error); + + CAPI_EXPECTS((start_vertex_labels == nullptr) || + (reinterpret_cast( + start_vertex_labels) + ->type_ == INT32), + CUGRAPH_INVALID_INPUT, + "start_vertex_labels should be of type int", + *error); + + CAPI_EXPECTS((label_to_comm_rank == nullptr) || (start_vertex_labels != nullptr), + CUGRAPH_INVALID_INPUT, + "cannot specify label_to_comm_rank unless start_vertex_labels is also specified", + *error); + + CAPI_EXPECTS((label_to_comm_rank == nullptr) || (label_list != nullptr), + CUGRAPH_INVALID_INPUT, + "cannot specify label_to_comm_rank unless label_list is also specified", + *error); + + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == + reinterpret_cast( + start_vertices) + ->type_, + CUGRAPH_INVALID_INPUT, + "vertex type of graph and start_vertices must match", + *error); + + CAPI_EXPECTS( + reinterpret_cast(fan_out) + ->type_ == INT32, + CUGRAPH_INVALID_INPUT, + "fan_out should be of type int", + *error); + + bool is_biased = true; + + neighbor_sampling_functor functor{handle, + rng_state, + graph, + edge_biases, + start_vertices, + start_vertex_labels, + label_list, + label_to_comm_rank, + label_offsets, + fan_out, + nullptr, + std::move(options_cpp), + is_biased, + do_expensive_check}; + return cugraph::c_api::run_algorithm(graph, functor, result, error); +} + +cugraph_error_code_t cugraph_heterogeneous_neighbor_sample( + const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + const cugraph_edge_property_view_t* edge_biases, + const cugraph_type_erased_device_array_view_t* start_vertices, + const cugraph_type_erased_device_array_view_t* start_vertex_labels, + const cugraph_type_erased_device_array_view_t* label_list, + const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, const cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fan_out, const cugraph_sampling_options_t* options, bool_t is_biased, @@ -1047,36 +1165,20 @@ cugraph_error_code_t cugraph_neighbor_sample( CUGRAPH_INVALID_INPUT, "cannot specify label_to_comm_rank unless label_list is also specified", *error); - - CAPI_EXPECTS(!((fan_out != nullptr) && (heterogeneous_fan_out != nullptr)), - CUGRAPH_INVALID_INPUT, - "cannot specify both fan_out and heterogeneous_fan_out", - *error); - - if (fan_out != nullptr) { - CAPI_EXPECTS(reinterpret_cast( - fan_out) - ->type_ == INT32, - CUGRAPH_INVALID_INPUT, - "fan_out type must be INT32", - *error); - - } else { - CAPI_EXPECTS(reinterpret_cast( - std::get<0>(*reinterpret_cast(heterogeneous_fan_out))) - ->type_ == INT32, - CUGRAPH_INVALID_INPUT, - "edge type offsets type must be INT32", - *error); - - CAPI_EXPECTS(reinterpret_cast( - std::get<0>(*reinterpret_cast(heterogeneous_fan_out))) - ->type_ == INT32, - CUGRAPH_INVALID_INPUT, - "fan_out values type must be INT32", - *error); - } + CAPI_EXPECTS(reinterpret_cast( + std::get<0>(*reinterpret_cast(heterogeneous_fan_out))) + ->type_ == INT32, + CUGRAPH_INVALID_INPUT, + "edge type offsets type must be INT32", + *error); + + CAPI_EXPECTS(reinterpret_cast( + std::get<0>(*reinterpret_cast(heterogeneous_fan_out))) + ->type_ == INT32, + CUGRAPH_INVALID_INPUT, + "fan_out values type must be INT32", + *error); CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == reinterpret_cast( @@ -1086,7 +1188,6 @@ cugraph_error_code_t cugraph_neighbor_sample( "vertex type of graph and start_vertices must match", *error); - neighbor_sampling_functor functor{handle, rng_state, graph, @@ -1096,7 +1197,7 @@ cugraph_error_code_t cugraph_neighbor_sample( label_list, label_to_comm_rank, label_offsets, - fan_out, + nullptr, heterogeneous_fan_out, std::move(options_cpp), is_biased, @@ -1104,8 +1205,9 @@ cugraph_error_code_t cugraph_neighbor_sample( return cugraph::c_api::run_algorithm(graph, functor, result, error); } -cugraph_error_code_t cugraph_biased_neighbor_sample( +cugraph_error_code_t cugraph_homogeneous_neighbor_sample( const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, @@ -1114,21 +1216,23 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( const cugraph_type_erased_device_array_view_t* label_to_comm_rank, const cugraph_type_erased_device_array_view_t* label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, - cugraph_rng_state_t* rng_state, const cugraph_sampling_options_t* options, + bool_t is_biased, bool_t do_expensive_check, cugraph_sample_result_t** result, cugraph_error_t** error) { auto options_cpp = *reinterpret_cast(options); - CAPI_EXPECTS( - (edge_biases != nullptr) || - (reinterpret_cast(graph)->edge_weights_ != nullptr), - CUGRAPH_INVALID_INPUT, - "edge_biases is required if the graph is not weighted", - *error); - + if (is_biased) { + CAPI_EXPECTS( + (edge_biases != nullptr) || + (reinterpret_cast(graph)->edge_weights_ != nullptr), + CUGRAPH_INVALID_INPUT, + "edge_biases is required if the graph is not weighted", + *error); + } + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (label_offsets != nullptr), CUGRAPH_INVALID_INPUT, "must specify label_offsets if retain_seeds is true", @@ -1151,7 +1255,14 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( CUGRAPH_INVALID_INPUT, "cannot specify label_to_comm_rank unless label_list is also specified", *error); - + + CAPI_EXPECTS(reinterpret_cast( + fan_out) + ->type_ == INT32, + CUGRAPH_INVALID_INPUT, + "fan_out type must be INT32", + *error); + CAPI_EXPECTS(reinterpret_cast(graph)->vertex_type_ == reinterpret_cast( start_vertices) @@ -1160,14 +1271,6 @@ cugraph_error_code_t cugraph_biased_neighbor_sample( "vertex type of graph and start_vertices must match", *error); - CAPI_EXPECTS( - reinterpret_cast(fan_out) - ->type_ == INT32, - CUGRAPH_INVALID_INPUT, - "fan_out should be of type int", - *error); - - bool is_biased = true; neighbor_sampling_functor functor{handle, rng_state, diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index fe9cceb1fd1..a49102ea7ad 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -472,6 +472,63 @@ biased_neighbor_sample( do_expensive_check); } +template +std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +heterogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + std::tuple, raft::host_span> + heterogeneous_fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check) +{ + return detail::neighbor_sample_impl( + handle, + graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + edge_bias_view, + starting_vertices, + starting_vertex_labels, + label_to_output_comm_rank, + std::nullopt, + heterogeneous_fan_out, + return_hops, + with_replacement, + prior_sources_behavior, + dedupe_sources, + rng_state, + do_expensive_check); +} + + @@ -492,7 +549,7 @@ std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +homogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -504,9 +561,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> - heterogeneous_fan_out, + raft::host_span fan_out, bool return_hops, bool with_replacement, prior_sources_behavior_t prior_sources_behavior, @@ -524,7 +579,7 @@ neighbor_sample( starting_vertex_labels, label_to_output_comm_rank, fan_out, - heterogeneous_fan_out, + std::nullopt, return_hops, with_replacement, prior_sources_behavior, diff --git a/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cpp b/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cpp index 5aa325c8578..917e8fcc6e1 100644 --- a/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cpp +++ b/cpp/src/sampling/neighbor_sampling_mg_v32_e32.cpp @@ -127,15 +127,71 @@ biased_neighbor_sample( bool dedupe_sources, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +heterogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + std::tuple, raft::host_span> + heterogeneous_fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + template std::tuple, rmm::device_uvector, - std::optional>, + std::optional>, std::optional>, std::optional>, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + std::tuple, raft::host_span> + heterogeneous_fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -147,9 +203,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> - heterogeneous_fan_out, + raft::host_span fan_out, bool return_hops, bool with_replacement, prior_sources_behavior_t prior_sources_behavior, @@ -164,7 +218,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +homogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -176,13 +230,14 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> - heterogeneous_fan_out, + raft::host_span fan_out, bool return_hops, bool with_replacement, prior_sources_behavior_t prior_sources_behavior, bool dedupe_sources, bool do_expensive_check); + + + } // namespace cugraph diff --git a/cpp/src/sampling/neighbor_sampling_mg_v32_e64.cpp b/cpp/src/sampling/neighbor_sampling_mg_v32_e64.cpp index 913e7d8a62e..71661090023 100644 --- a/cpp/src/sampling/neighbor_sampling_mg_v32_e64.cpp +++ b/cpp/src/sampling/neighbor_sampling_mg_v32_e64.cpp @@ -136,7 +136,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -148,8 +148,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -165,7 +164,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -177,8 +176,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -186,4 +184,58 @@ neighbor_sample( bool dedupe_sources, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cpp b/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cpp index 77ab2e476ac..2a1c0d43ab3 100644 --- a/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cpp +++ b/cpp/src/sampling/neighbor_sampling_mg_v64_e64.cpp @@ -135,7 +135,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -147,8 +147,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -164,7 +163,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -176,8 +175,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -185,4 +183,58 @@ neighbor_sample( bool dedupe_sources, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cpp b/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cpp index 05ccdfca1f7..c58e1659d7c 100644 --- a/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cpp +++ b/cpp/src/sampling/neighbor_sampling_sg_v32_e32.cpp @@ -127,15 +127,71 @@ biased_neighbor_sample( bool dedupe_sources, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +heterogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + std::tuple, raft::host_span> + heterogeneous_fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + template std::tuple, rmm::device_uvector, - std::optional>, + std::optional>, std::optional>, std::optional>, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + std::tuple, raft::host_span> + heterogeneous_fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -147,9 +203,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> - heterogeneous_fan_out, + raft::host_span fan_out, bool return_hops, bool with_replacement, prior_sources_behavior_t prior_sources_behavior, @@ -164,7 +218,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +homogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -176,9 +230,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> - heterogeneous_fan_out, + raft::host_span fan_out, bool return_hops, bool with_replacement, prior_sources_behavior_t prior_sources_behavior, diff --git a/cpp/src/sampling/neighbor_sampling_sg_v32_e64.cpp b/cpp/src/sampling/neighbor_sampling_sg_v32_e64.cpp index 178b1322d89..2a70085d490 100644 --- a/cpp/src/sampling/neighbor_sampling_sg_v32_e64.cpp +++ b/cpp/src/sampling/neighbor_sampling_sg_v32_e64.cpp @@ -135,7 +135,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -147,8 +147,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -164,7 +163,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -176,8 +175,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -185,4 +183,58 @@ neighbor_sample( bool dedupe_sources, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + } // namespace cugraph diff --git a/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cpp b/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cpp index ba1edd3f630..6d045ad092d 100644 --- a/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cpp +++ b/cpp/src/sampling/neighbor_sampling_sg_v64_e64.cpp @@ -135,7 +135,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -147,8 +147,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -164,7 +163,7 @@ template std::tuple, std::optional>, std::optional>, std::optional>> -neighbor_sample( +heterogeneous_neighbor_sample( raft::handle_t const& handle, raft::random::RngState& rng_state, graph_view_t const& graph_view, @@ -176,8 +175,7 @@ neighbor_sample( std::optional> starting_vertex_labels, std::optional, raft::device_span>> label_to_output_comm_rank, - std::optional> fan_out, - std::optional, raft::host_span>> + std::tuple, raft::host_span> heterogeneous_fan_out, bool return_hops, bool with_replacement, @@ -185,4 +183,58 @@ neighbor_sample( bool dedupe_sources, bool do_expensive_check); +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + +template std::tuple, + rmm::device_uvector, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>, + std::optional>> +homogeneous_neighbor_sample( + raft::handle_t const& handle, + raft::random::RngState& rng_state, + graph_view_t const& graph_view, + std::optional> edge_weight_view, + std::optional> edge_id_view, + std::optional> edge_type_view, + std::optional> edge_bias_view, + raft::device_span starting_vertices, + std::optional> starting_vertex_labels, + std::optional, raft::device_span>> + label_to_output_comm_rank, + raft::host_span fan_out, + bool return_hops, + bool with_replacement, + prior_sources_behavior_t prior_sources_behavior, + bool dedupe_sources, + bool do_expensive_check); + } // namespace cugraph diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd index 0934507d56c..6862ae6054c 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd @@ -63,7 +63,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": cugraph_error_t** error ) - cdef cugraph_error_code_t cugraph_neighbor_sample( + cdef cugraph_error_code_t cugraph_heterogeneous_neighbor_sample( const cugraph_resource_handle_t* handle, cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, @@ -73,7 +73,6 @@ cdef extern from "cugraph_c/sampling_algorithms.h": const cugraph_type_erased_device_array_view_t* label_list, const cugraph_type_erased_device_array_view_t* label_to_comm_rank, const cugraph_type_erased_device_array_view_t* label_offsets, - const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fanout, const cugraph_sampling_options_t* options, bool_t is_biased, @@ -82,6 +81,24 @@ cdef extern from "cugraph_c/sampling_algorithms.h": cugraph_error_t** error ) + cdef cugraph_error_code_t cugraph_homogeneous_neighbor_sample( + const cugraph_resource_handle_t* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + const cugraph_edge_property_view_t* edge_biases, + const cugraph_type_erased_device_array_view_t* start_vertices, + const cugraph_type_erased_device_array_view_t* start_vertex_labels, + const cugraph_type_erased_device_array_view_t* label_list, + const cugraph_type_erased_device_array_view_t* label_to_comm_rank, + const cugraph_type_erased_device_array_view_t* label_offsets, + const cugraph_type_erased_host_array_view_t* fan_out, + const cugraph_sampling_options_t* options, + bool_t is_biased, + bool_t do_expensive_check, + cugraph_sample_result_t** result, + cugraph_error_t** error + ) + cdef cugraph_error_code_t cugraph_biased_neighbor_sample( const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, @@ -137,5 +154,5 @@ cdef extern from "cugraph_c/sampling_algorithms.h": ) cdef void \ - cugraph_heterogeneous_fanout_free( + cugraph_heterogeneous_fan_out_free( cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fanout); diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_neighbor_sample.pyx new file mode 100644 index 00000000000..0f999adf93d --- /dev/null +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_neighbor_sample.pyx @@ -0,0 +1,511 @@ +# Copyright (c) 2022-2024, NVIDIA CORPORATION. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Have cython use python 3 syntax +# cython: language_level = 3 + +from libc.stdint cimport uintptr_t + +from pylibcugraph._cugraph_c.resource_handle cimport ( + bool_t, + cugraph_resource_handle_t, +) +from pylibcugraph._cugraph_c.properties cimport ( + cugraph_edge_property_view_t, +) +from pylibcugraph._cugraph_c.error cimport ( + cugraph_error_code_t, + cugraph_error_t, +) +from pylibcugraph._cugraph_c.array cimport ( + cugraph_type_erased_device_array_view_t, + cugraph_type_erased_device_array_view_create, + cugraph_type_erased_device_array_view_free, + cugraph_type_erased_host_array_view_t, + cugraph_type_erased_host_array_view_create, + cugraph_type_erased_host_array_view_free, +) +from pylibcugraph._cugraph_c.graph cimport ( + cugraph_graph_t, +) +from pylibcugraph._cugraph_c.algorithms cimport ( + cugraph_sample_result_t, + cugraph_prior_sources_behavior_t, + cugraph_compression_type_t, + cugraph_sampling_options_t, + cugraph_sampling_options_create, + cugraph_sampling_options_free, + cugraph_sampling_set_with_replacement, + cugraph_sampling_set_return_hops, + cugraph_sampling_set_prior_sources_behavior, + cugraph_sampling_set_dedupe_sources, + cugraph_sampling_set_renumber_results, + cugraph_sampling_set_compress_per_hop, + cugraph_sampling_set_compression_type, + cugraph_sampling_set_retain_seeds, +) +from pylibcugraph._cugraph_c.sampling_algorithms cimport ( + cugraph_heterogeneous_neighbor_sample, + cugraph_sample_heterogeneous_fan_out_t, + cugraph_create_heterogeneous_fan_out, + cugraph_heterogeneous_fan_out_free, +) +from pylibcugraph.resource_handle cimport ( + ResourceHandle, +) +from pylibcugraph.graphs cimport ( + _GPUGraph, +) +from pylibcugraph.utils cimport ( + assert_success, + assert_CAI_type, + assert_AI_type, + get_c_type_from_numpy_type, +) +from pylibcugraph.internal_types.sampling_result cimport ( + SamplingResult, +) +from pylibcugraph._cugraph_c.random cimport ( + cugraph_rng_state_t +) +from pylibcugraph.random cimport ( + CuGraphRandomState +) +import warnings + +# TODO accept cupy/numpy random state in addition to raw seed. +def heterogeneous_neighbor_sample(ResourceHandle resource_handle, + _GPUGraph input_graph, + start_list, + h_fan_out, + bool_t is_biased, + bool_t with_replacement, + bool_t do_expensive_check, + with_edge_properties=False, + batch_id_list=None, + label_list=None, + label_to_output_comm_rank=None, + label_offsets=None, + prior_sources_behavior=None, + deduplicate_sources=False, + return_hops=False, + renumber=False, + retain_seeds=False, + compression='COO', + compress_per_hop=False, + random_state=None, + return_dict=False,): + """ + # FIXME: Deprecate uniform_neighbor_sample + Does both uniform and biased neighborhood sampling, which samples nodes from + a graph based on the current node's neighbors, with a corresponding fan_out + value at each hop. + + # FIXME: biased neighbor sampling is not yet exposed to PLC. + + Parameters + ---------- + resource_handle: ResourceHandle + Handle to the underlying device and host resources needed for + referencing data and running algorithms. + + input_graph : SGGraph or MGGraph + The input graph, for either Single or Multi-GPU operations. + + start_list: device array type + Device array containing the list of starting vertices for sampling. + + edge_biases: FIXE: update this - Create edge_biases of type + 'cugraph_edge_property_view_t' - edge biases not yet supported. + + h_fan_out: tuple of numpy array type + Device array containing the branching out (fan-out) degrees per + starting vertex for each hop level in CSR style format. The first + element of the tuple is the offset array per edge type id and the second + element corresponds to the fan_out values. + The sampling method can use different fan_out values for each edge type. + + is_biased: bool + If false, sampling procedure is done uniform otherwise with biases. + + with_replacement: bool + If true, sampling procedure is done with replacement (the same vertex + can be selected multiple times in the same step). + + do_expensive_check: bool + If True, performs more extensive tests on the inputs to ensure + validitity, at the expense of increased run time. + + with_edge_properties: bool + If True, returns the edge properties of each edges along with the + edges themselves. Will result in an error if the provided graph + does not have edge properties. + + batch_id_list: list[int32] (Optional) + List of int32 batch ids that is returned with each edge. Optional + argument, defaults to NULL, returning nothing. + + label_list: list[int32] (Optional) + List of unique int32 batch ids. Required if also passing the + label_to_output_comm_rank flag. Default to NULL (does nothing) + + label_to_output_comm_rank: list[int32] (Optional) + Maps the unique batch ids in label_list to the rank of the + worker that should hold results for that batch id. + Defaults to NULL (does nothing) + + label_offsets: list[int] (Optional) + Offsets of each label within the start vertex list. + + prior_sources_behavior: str (Optional) + Options are "carryover", and "exclude". + Default will leave the source list as-is. + Carryover will carry over sources from previous hops to the + current hop. + Exclude will exclude sources from previous hops from reappearing + as sources in future hops. + + deduplicate_sources: bool (Optional) + If True, will deduplicate the source list before sampling. + Defaults to False. + + renumber: bool (Optional) + If True, will renumber the sources and destinations on a + per-batch basis and return the renumber map and batch offsets + in additional to the standard returns. + + retain_seeds: bool (Optional) + If True, will retain the original seeds (original source vertices) + in the output even if they do not have outgoing neighbors. + Defaults to False. + + compression: str (Optional) + Options: COO (default), CSR, CSC, DCSR, DCSR + Sets the compression format for the returned samples. + + compress_per_hop: bool (Optional) + If False (default), will create a compressed edgelist for the + entire batch. + If True, will create a separate compressed edgelist per hop within + a batch. + + random_state: int (Optional) + Random state to use when generating samples. Optional argument, + defaults to a hash of process id, time, and hostname. + (See pylibcugraph.random.CuGraphRandomState) + + return_dict: bool (Optional) + Whether to return a dictionary instead of a tuple. + Optional argument, defaults to False, returning a tuple. + This argument will eventually be deprecated in favor + of always returning a dictionary. + + Returns + ------- + A tuple of device arrays, where the first and second items in the tuple + are device arrays containing the starting and ending vertices of each + walk respectively, the third item in the tuple is a device array + containing the start labels, and the fourth item in the tuple is a device + array containing the indices for reconstructing paths. + + If renumber was set to True, then the fifth item in the tuple is a device + array containing the renumber map, and the sixth item in the tuple is a + device array containing the renumber map offsets (which delineate where + the renumber map for each batch starts). + + """ + cdef cugraph_resource_handle_t* c_resource_handle_ptr = ( + resource_handle.c_resource_handle_ptr + ) + + cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr + cdef cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fan_out_ptr = NULL + cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = NULL + cdef cugraph_type_erased_host_array_view_t* fan_out_offsets_ptr = NULL + cdef cugraph_type_erased_host_array_view_t* fan_out_values_ptr = NULL + + cdef bool_t c_deduplicate_sources = deduplicate_sources + cdef bool_t c_return_hops = return_hops + cdef bool_t c_renumber = renumber + cdef bool_t c_compress_per_hop = compress_per_hop + + cdef cugraph_error_code_t error_code + cdef cugraph_error_t* error_ptr + cdef uintptr_t ai_fan_out_ptr + cdef uintptr_t ai_fan_out_offsets_ptr + cdef uintptr_t ai_fan_out_values_ptr + + + assert_CAI_type(start_list, "start_list") + assert_CAI_type(batch_id_list, "batch_id_list", True) + assert_CAI_type(label_list, "label_list", True) + assert_CAI_type(label_to_output_comm_rank, "label_to_output_comm_rank", True) + assert_CAI_type(label_offsets, "label_offsets", True) + + assert_AI_type(h_fan_out[0], "h_fan_out_size") + assert_AI_type(h_fan_out[1], "h_fan_out_values") + ai_fan_out_offsets_ptr = \ + h_fan_out[0].__array_interface__["data"][0] + ai_fan_out_values_ptr = \ + h_fan_out[1].__array_interface__["data"][0] + + fan_out_offsets_ptr = \ + cugraph_type_erased_host_array_view_create( + ai_fan_out_offsets_ptr, + len(h_fan_out[0]), + get_c_type_from_numpy_type(h_fan_out[0].dtype)) + + fan_out_values_ptr = \ + cugraph_type_erased_host_array_view_create( + ai_fan_out_values_ptr, + len(h_fan_out[1]), + get_c_type_from_numpy_type(h_fan_out[1].dtype)) + + error_code = cugraph_create_heterogeneous_fan_out( + c_resource_handle_ptr, + c_graph_ptr, + fan_out_offsets_ptr, + fan_out_values_ptr, + &heterogeneous_fan_out_ptr, + &error_ptr + ) + + assert_success(error_code, error_ptr, "cugraph_create_heterogeneous_fan_out") + + cdef cugraph_sample_result_t* result_ptr + + cdef uintptr_t cai_start_ptr = \ + start_list.__cuda_array_interface__["data"][0] + + cdef uintptr_t cai_batch_id_ptr + if batch_id_list is not None: + cai_batch_id_ptr = \ + batch_id_list.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_label_list_ptr + if label_list is not None: + cai_label_list_ptr = \ + label_list.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_label_to_output_comm_rank_ptr + if label_to_output_comm_rank is not None: + cai_label_to_output_comm_rank_ptr = \ + label_to_output_comm_rank.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_label_offsets_ptr + if label_offsets is not None: + cai_label_offsets_ptr = \ + label_offsets.__cuda_array_interface__['data'][0] + + cdef cugraph_type_erased_device_array_view_t* start_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_start_ptr, + len(start_list), + get_c_type_from_numpy_type(start_list.dtype)) + + cdef cugraph_type_erased_device_array_view_t* batch_id_ptr = NULL + if batch_id_list is not None: + batch_id_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_batch_id_ptr, + len(batch_id_list), + get_c_type_from_numpy_type(batch_id_list.dtype) + ) + + cdef cugraph_type_erased_device_array_view_t* label_list_ptr = NULL + if label_list is not None: + label_list_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_label_list_ptr, + len(label_list), + get_c_type_from_numpy_type(label_list.dtype) + ) + + cdef cugraph_type_erased_device_array_view_t* label_to_output_comm_rank_ptr = NULL + if label_to_output_comm_rank is not None: + label_to_output_comm_rank_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_label_to_output_comm_rank_ptr, + len(label_to_output_comm_rank), + get_c_type_from_numpy_type(label_to_output_comm_rank.dtype) + ) + + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL + if retain_seeds: + if label_offsets is None: + raise ValueError("Must provide label offsets if retain_seeds is True") + label_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_label_offsets_ptr, + len(label_offsets), + get_c_type_from_numpy_type(label_offsets.dtype) + ) + + cg_rng_state = CuGraphRandomState(resource_handle, random_state) + + cdef cugraph_rng_state_t* rng_state_ptr = \ + cg_rng_state.rng_state_ptr + + cdef cugraph_prior_sources_behavior_t prior_sources_behavior_e + if prior_sources_behavior is None: + prior_sources_behavior_e = cugraph_prior_sources_behavior_t.DEFAULT + elif prior_sources_behavior == 'carryover': + prior_sources_behavior_e = cugraph_prior_sources_behavior_t.CARRY_OVER + elif prior_sources_behavior == 'exclude': + prior_sources_behavior_e = cugraph_prior_sources_behavior_t.EXCLUDE + else: + raise ValueError( + f'Invalid option {prior_sources_behavior}' + ' for prior sources behavior' + ) + + cdef cugraph_compression_type_t compression_behavior_e + if compression is None or compression == 'COO': + compression_behavior_e = cugraph_compression_type_t.COO + elif compression == 'CSR': + compression_behavior_e = cugraph_compression_type_t.CSR + elif compression == 'CSC': + compression_behavior_e = cugraph_compression_type_t.CSC + elif compression == 'DCSR': + compression_behavior_e = cugraph_compression_type_t.DCSR + elif compression == 'DCSC': + compression_behavior_e = cugraph_compression_type_t.DCSC + else: + raise ValueError( + f'Invalid option {compression}' + ' for compression type' + ) + + cdef cugraph_sampling_options_t* sampling_options + error_code = cugraph_sampling_options_create(&sampling_options, &error_ptr) + assert_success(error_code, error_ptr, "cugraph_sampling_options_create") + + cugraph_sampling_set_with_replacement(sampling_options, with_replacement) + cugraph_sampling_set_return_hops(sampling_options, c_return_hops) + cugraph_sampling_set_dedupe_sources(sampling_options, c_deduplicate_sources) + cugraph_sampling_set_prior_sources_behavior(sampling_options, prior_sources_behavior_e) + cugraph_sampling_set_renumber_results(sampling_options, c_renumber) + cugraph_sampling_set_compression_type(sampling_options, compression_behavior_e) + cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop) + cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds) + + error_code = cugraph_heterogeneous_neighbor_sample( + c_resource_handle_ptr, + rng_state_ptr, + c_graph_ptr, + NULL, # FIXME: Add support for biased neighbor sampling + start_ptr, + batch_id_ptr, + label_list_ptr, + label_to_output_comm_rank_ptr, + label_offsets_ptr, + heterogeneous_fan_out_ptr, + sampling_options, + is_biased, + do_expensive_check, + &result_ptr, + &error_ptr) + assert_success(error_code, error_ptr, "cugraph_heterogeneous_neighbor_sample") + + # Free the sampling options + cugraph_sampling_options_free(sampling_options) + + if isinstance(h_fan_out, tuple): + cugraph_heterogeneous_fan_out_free(heterogeneous_fan_out_ptr) + + # Free the two input arrays that are no longer needed. + cugraph_type_erased_device_array_view_free(start_ptr) + cugraph_type_erased_host_array_view_free(fan_out_ptr) + if batch_id_list is not None: + cugraph_type_erased_device_array_view_free(batch_id_ptr) + if label_offsets is not None: + cugraph_type_erased_device_array_view_free(label_offsets_ptr) + + # Have the SamplingResult instance assume ownership of the result data. + result = SamplingResult() + result.set_ptr(result_ptr) + + # Get cupy "views" of the individual arrays to return. These each increment + # the refcount on the SamplingResult instance which will keep the data alive + # until all references are removed and the GC runs. + # TODO Return everything that isn't null in release 23.12 + if with_edge_properties: + cupy_majors = result.get_majors() + cupy_major_offsets = result.get_major_offsets() + cupy_minors = result.get_minors() + cupy_edge_weights = result.get_edge_weights() + cupy_edge_ids = result.get_edge_ids() + cupy_edge_types = result.get_edge_types() + cupy_batch_ids = result.get_batch_ids() + cupy_label_hop_offsets = result.get_label_hop_offsets() + + if renumber: + cupy_renumber_map = result.get_renumber_map() + cupy_renumber_map_offsets = result.get_renumber_map_offsets() + # TODO drop the placeholder for hop ids in release 23.12 + if return_dict: + return { + 'major_offsets': cupy_major_offsets, + 'majors': cupy_majors, + 'minors': cupy_minors, + 'weight': cupy_edge_weights, + 'edge_id': cupy_edge_ids, + 'edge_type': cupy_edge_types, + 'batch_id': cupy_batch_ids, + 'label_hop_offsets': cupy_label_hop_offsets, + 'hop_id': None, + 'renumber_map': cupy_renumber_map, + 'renumber_map_offsets': cupy_renumber_map_offsets + } + else: + cupy_majors = cupy_major_offsets if cupy_majors is None else cupy_majors + return (cupy_majors, cupy_minors, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_label_hop_offsets, None, cupy_renumber_map, cupy_renumber_map_offsets) + else: + cupy_hop_ids = result.get_hop_ids() # FIXME remove this + if return_dict: + return { + 'major_offsets': cupy_major_offsets, + 'majors': cupy_majors, + 'minors': cupy_minors, + 'weight': cupy_edge_weights, + 'edge_id': cupy_edge_ids, + 'edge_type': cupy_edge_types, + 'batch_id': cupy_batch_ids, + 'label_hop_offsets': cupy_label_hop_offsets, + 'hop_id': cupy_hop_ids, + } + else: + cupy_majors = cupy_major_offsets if cupy_majors is None else cupy_majors + return (cupy_majors, cupy_minors, cupy_edge_weights, cupy_edge_ids, cupy_edge_types, cupy_batch_ids, cupy_label_hop_offsets, cupy_hop_ids) + + else: + # TODO this is deprecated, remove it in release 23.12 + warnings.warn( + "Calling uniform_neighbor_sample with the 'with_edge_properties' argument is deprecated." + " Starting in release 23.12, this argument will be removed in favor of behaving like the " + "with_edge_properties=True option, returning whatever properties are in the graph.", + FutureWarning, + ) + + cupy_sources = result.get_sources() + cupy_destinations = result.get_destinations() + cupy_indices = result.get_indices() + + if return_dict: + return { + 'sources': cupy_sources, + 'destinations': cupy_destinations, + 'indices': cupy_indices + } + else: + return (cupy_sources, cupy_destinations, cupy_indices) diff --git a/python/pylibcugraph/pylibcugraph/neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_neighbor_sample.pyx similarity index 90% rename from python/pylibcugraph/pylibcugraph/neighbor_sample.pyx rename to python/pylibcugraph/pylibcugraph/homogeneous_neighbor_sample.pyx index 8f40df523c7..6b89980f171 100644 --- a/python/pylibcugraph/pylibcugraph/neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_neighbor_sample.pyx @@ -55,10 +55,9 @@ from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_sampling_set_retain_seeds, ) from pylibcugraph._cugraph_c.sampling_algorithms cimport ( - cugraph_neighbor_sample, + cugraph_homogeneous_neighbor_sample, cugraph_sample_heterogeneous_fan_out_t, - cugraph_create_heterogeneous_fan_out, - cugraph_heterogeneous_fanout_free, + cugraph_heterogeneous_fan_out_free, ) from pylibcugraph.resource_handle cimport ( ResourceHandle, @@ -84,7 +83,7 @@ from pylibcugraph.random cimport ( import warnings # TODO accept cupy/numpy random state in addition to raw seed. -def neighbor_sample(ResourceHandle resource_handle, +def heterogeneous_neighbor_sample(ResourceHandle resource_handle, _GPUGraph input_graph, start_list, h_fan_out, @@ -108,7 +107,7 @@ def neighbor_sample(ResourceHandle resource_handle, """ # FIXME: Deprecate uniform_neighbor_sample Does both uniform and biased neighborhood sampling, which samples nodes from - a graph based on the current node's neighbors, with a corresponding fanout + a graph based on the current node's neighbors, with a corresponding fan_out value at each hop. # FIXME: biased neighbor sampling is not yet exposed to PLC. @@ -128,11 +127,13 @@ def neighbor_sample(ResourceHandle resource_handle, edge_biases: FIXE: update this - Create edge_biases of type 'cugraph_edge_property_view_t' - edge biases not yet supported. - h_fan_out: numpy array type or tuple of numpy array type - Device array containing the brancing out (fan-out) degrees per - starting vertex for each hop level. + h_fan_out: numpy array type + Device array containing the branching out (fan-out) degrees per + starting vertex for each hop level. The sampling method uses the same + fan_out value for each type. - is_biased: bool # FIXME: Update docstrings + is_biased: bool + If false, sampling procedure is done uniform otherwise with biases. with_replacement: bool If true, sampling procedure is done with replacement (the same vertex @@ -225,7 +226,7 @@ def neighbor_sample(ResourceHandle resource_handle, ) cdef cugraph_graph_t* c_graph_ptr = input_graph.c_graph_ptr - cdef cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fanout_ptr = NULL + cdef cugraph_sample_heterogeneous_fan_out_t* heterogeneous_fan_out_ptr = NULL cdef cugraph_type_erased_host_array_view_t* fan_out_ptr = NULL cdef cugraph_type_erased_host_array_view_t* fan_out_size_ptr = NULL cdef cugraph_type_erased_host_array_view_t* fan_out_values_ptr = NULL @@ -247,44 +248,16 @@ def neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(label_list, "label_list", True) assert_CAI_type(label_to_output_comm_rank, "label_to_output_comm_rank", True) assert_CAI_type(label_offsets, "label_offsets", True) - if not isinstance(h_fan_out, tuple): - assert_AI_type(h_fan_out, "h_fan_out") - ai_fan_out_ptr = \ - h_fan_out.__array_interface__["data"][0] - - fan_out_ptr = \ - cugraph_type_erased_host_array_view_create( - ai_fan_out_ptr, - len(h_fan_out), - get_c_type_from_numpy_type(h_fan_out.dtype)) - else: - assert_AI_type(h_fan_out[0], "h_fan_out_size") - assert_AI_type(h_fan_out[1], "h_fan_out_values") - ai_fan_out_size_ptr = \ - h_fan_out[0].__array_interface__["data"][0] - ai_fan_out_values_ptr = \ - h_fan_out[1].__array_interface__["data"][0] - - fan_out_size_ptr = \ - cugraph_type_erased_host_array_view_create( - ai_fan_out_size_ptr, - len(h_fan_out[0]), - get_c_type_from_numpy_type(h_fan_out[0].dtype)) - - fan_out_values_ptr = \ - cugraph_type_erased_host_array_view_create( - ai_fan_out_values_ptr, - len(h_fan_out[1]), - get_c_type_from_numpy_type(h_fan_out[1].dtype)) - - error_code = cugraph_create_heterogeneous_fan_out( - c_resource_handle_ptr, - c_graph_ptr, - fan_out_size_ptr, - fan_out_values_ptr, - &heterogeneous_fanout_ptr, - &error_ptr - ) + + assert_AI_type(h_fan_out, "h_fan_out") + ai_fan_out_ptr = \ + h_fan_out.__array_interface__["data"][0] + + fan_out_ptr = \ + cugraph_type_erased_host_array_view_create( + ai_fan_out_ptr, + len(h_fan_out), + get_c_type_from_numpy_type(h_fan_out.dtype)) assert_success(error_code, error_ptr, "cugraph_create_heterogeneous_fan_out") @@ -405,7 +378,7 @@ def neighbor_sample(ResourceHandle resource_handle, cugraph_sampling_set_compress_per_hop(sampling_options, c_compress_per_hop) cugraph_sampling_set_retain_seeds(sampling_options, retain_seeds) - error_code = cugraph_neighbor_sample( + error_code = cugraph_homogeneous_neighbor_sample( c_resource_handle_ptr, rng_state_ptr, c_graph_ptr, @@ -416,19 +389,18 @@ def neighbor_sample(ResourceHandle resource_handle, label_to_output_comm_rank_ptr, label_offsets_ptr, fan_out_ptr, - heterogeneous_fanout_ptr, sampling_options, is_biased, do_expensive_check, &result_ptr, &error_ptr) - assert_success(error_code, error_ptr, "cugraph_neighbor_sample") + assert_success(error_code, error_ptr, "cugraph_homogeneous_neighbor_sample") # Free the sampling options cugraph_sampling_options_free(sampling_options) if isinstance(h_fan_out, tuple): - cugraph_heterogeneous_fanout_free(heterogeneous_fanout_ptr) + cugraph_heterogeneous_fan_out_free(fan_out_ptr) # Free the two input arrays that are no longer needed. cugraph_type_erased_device_array_view_free(start_ptr)