From 50a11b1449dfb2945240ab5c170fd17f48cea801 Mon Sep 17 00:00:00 2001 From: Joseph Nke <76006812+jnke2016@users.noreply.github.com> Date: Wed, 4 Dec 2024 10:45:23 +0100 Subject: [PATCH] Remove edge renumber map from the homogeneous sampling API (#4775) edge renumbering is only supported in heterogeneous neighborhood sampling hence trying to extract it from the PLC API leads to a segmentation fault This PR : 1. fixes a bug in homogeneous neighborhood sampling (both uniform and bias). 2. properly handle vertex_type_offsets when performing heterogeneous renumbering 3. fixes a typo in the homogeneous neighborhood sampling docstrings Authors: - Joseph Nke (https://github.com/jnke2016) Approvers: - Chuck Hastings (https://github.com/ChuckHastings) - Rick Ratzel (https://github.com/rlratzel) - Alex Barghi (https://github.com/alexbarghi-nv) - Seunghwa Kang (https://github.com/seunghwak) URL: https://github.com/rapidsai/cugraph/pull/4775 --- cpp/include/cugraph/sampling_functions.hpp | 16 +- cpp/include/cugraph_c/sampling_algorithms.h | 16 + cpp/src/c_api/neighbor_sampling.cpp | 411 ++++++++++-------- cpp/src/sampling/neighbor_sampling_impl.hpp | 184 +++++--- .../pylibcugraph/_cugraph_c/algorithms.pxd | 5 + .../_cugraph_c/sampling_algorithms.pxd | 2 + .../heterogeneous_biased_neighbor_sample.pyx | 25 +- .../heterogeneous_uniform_neighbor_sample.pyx | 24 + .../homogeneous_biased_neighbor_sample.pyx | 8 +- .../homogeneous_uniform_neighbor_sample.pyx | 8 +- .../internal_types/sampling_result.pyx | 14 + 11 files changed, 459 insertions(+), 254 deletions(-) diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index 3d41e95441..981c42135f 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -306,7 +306,7 @@ struct sampling_flags_t { * @param edge_type_view Optional view object holding edge types for @p graph_view. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -340,7 +340,7 @@ homogeneous_uniform_neighbor_sample( std::optional> edge_id_view, std::optional> edge_type_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, sampling_flags_t sampling_flags, @@ -385,7 +385,7 @@ homogeneous_uniform_neighbor_sample( * corresponding edge can never be selected. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -421,7 +421,7 @@ homogeneous_biased_neighbor_sample( std::optional> edge_type_view, edge_property_view_t edge_bias_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, sampling_flags_t sampling_flags, @@ -462,7 +462,7 @@ homogeneous_biased_neighbor_sample( * @param edge_type_view Optional view object holding edge types for @p graph_view. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -498,7 +498,7 @@ heterogeneous_uniform_neighbor_sample( std::optional> edge_id_view, std::optional> edge_type_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, edge_type_t num_edge_types, @@ -545,7 +545,7 @@ heterogeneous_uniform_neighbor_sample( * corresponding edge can never be selected. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -583,7 +583,7 @@ heterogeneous_biased_neighbor_sample( std::optional> edge_type_view, edge_property_view_t edge_bias_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, edge_type_t num_edge_types, diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index ef75e726d8..f048d338b9 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -551,6 +551,8 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( * @param [in] start_vertices Device array of start vertices for the sampling * @param [in] starting_vertex_label_offsets Device array of the offsets for each label in * the seed list. This parameter is only used with the retain_seeds option. + * @param [in] vertex_type_offsets Device array of the offsets for each vertex type in the + * graph. * @param [in] fan_out Host array defining the fan out at each step in the sampling * algorithm. We only support fan_out values of type INT32 * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous @@ -570,6 +572,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -598,6 +601,8 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( * @param [in] start_vertices Device array of start vertices for the sampling * @param [in] starting_vertex_label_offsets Device array of the offsets for each label in * the seed list. This parameter is only used with the retain_seeds option. + * @param [in] vertex_type_offsets Device array of the offsets for each vertex type in the + * graph. * @param [in] fan_out Host array defining the fan out at each step in the sampling * algorithm. We only support fan_out values of type INT32 * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous @@ -618,6 +623,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -735,6 +741,16 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_hop( cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_hop_offsets( const cugraph_sample_result_t* result); +/** + * @ingroup samplingC + * @brief Get the label-type-hop offsets from the sampling algorithm result + * + * @param [in] result The result from a sampling algorithm + * @return type erased array pointing to the label-type-hop offsets + */ +cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_type_hop_offsets( + const cugraph_sample_result_t* result); + /** * @ingroup samplingC * @brief Get the index from the sampling algorithm result diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index be3a44d813..37982eab82 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -63,6 +63,7 @@ struct cugraph_sample_result_t { cugraph_type_erased_device_array_t* wgt_{nullptr}; cugraph_type_erased_device_array_t* hop_{nullptr}; cugraph_type_erased_device_array_t* label_hop_offsets_{nullptr}; + cugraph_type_erased_device_array_t* label_type_hop_offsets_{nullptr}; cugraph_type_erased_device_array_t* label_{nullptr}; cugraph_type_erased_device_array_t* renumber_map_{nullptr}; cugraph_type_erased_device_array_t* renumber_map_offsets_{nullptr}; @@ -403,6 +404,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -756,6 +758,7 @@ struct biased_neighbor_sampling_functor : public cugraph::c_api::abstract_functo (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -777,7 +780,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertex_offsets_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{ + nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertex_type_offsets_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; int num_edge_types_{}; cugraph::c_api::cugraph_sampling_options_t options_{}; @@ -785,17 +790,19 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { bool do_expensive_check_{false}; cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; - neighbor_sampling_functor(cugraph_resource_handle_t const* handle, - cugraph_rng_state_t* rng_state, - cugraph_graph_t* graph, - cugraph_edge_property_view_t const* edge_biases, - cugraph_type_erased_device_array_view_t const* start_vertices, - cugraph_type_erased_device_array_view_t const* start_vertex_offsets, - cugraph_type_erased_host_array_view_t const* fan_out, - int num_edge_types, - cugraph::c_api::cugraph_sampling_options_t options, - bool is_biased, - bool do_expensive_check) + neighbor_sampling_functor( + cugraph_resource_handle_t const* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + cugraph_edge_property_view_t const* edge_biases, + cugraph_type_erased_device_array_view_t const* start_vertices, + cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, + cugraph_type_erased_device_array_view_t const* vertex_type_offsets, + cugraph_type_erased_host_array_view_t const* fan_out, + int num_edge_types, + cugraph::c_api::cugraph_sampling_options_t options, + bool is_biased, + bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), rng_state_(reinterpret_cast(rng_state)), @@ -805,9 +812,12 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { start_vertices_( reinterpret_cast( start_vertices)), - start_vertex_offsets_( + starting_vertex_label_offsets_( + reinterpret_cast( + starting_vertex_label_offsets)), + vertex_type_offsets_( reinterpret_cast( - start_vertex_offsets)), + vertex_type_offsets)), fan_out_( reinterpret_cast(fan_out)), num_edge_types_(num_edge_types), @@ -879,17 +889,17 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> renumbered_and_sorted_edge_id_renumber_map_label_type_offsets(std::nullopt); - if (start_vertex_offsets_ != nullptr) { + if (starting_vertex_label_offsets_ != nullptr) { // Retrieve the start_vertex_labels start_vertex_labels = cugraph::detail::convert_starting_vertex_label_offsets_to_labels( handle_, - raft::device_span{start_vertex_offsets_->as_type(), - start_vertex_offsets_->size_}); + raft::device_span{starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}); // Get the number of labels on each GPU if constexpr (multi_gpu) { - auto num_local_labels = start_vertex_offsets_->size_ - 1; + auto num_local_labels = starting_vertex_label_offsets_->size_ - 1; auto global_labels = cugraph::host_scalar_allgather( handle_.get_comms(), num_local_labels, handle_.get_stream()); @@ -897,7 +907,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::exclusive_scan( global_labels.begin(), global_labels.end(), global_labels.begin(), label_t{0}); - // Compute the global start_vertex_label_offsets + // Compute the global starting_vertex_label_offsets cugraph::detail::transform_increment_ints( raft::device_span{(*start_vertex_labels).data(), @@ -996,7 +1006,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, (edge_biases != nullptr) ? *edge_biases : edge_weights->view(), raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1020,7 +1030,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1048,7 +1058,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, (edge_biases != nullptr) ? *edge_biases : edge_weights->view(), raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1071,7 +1081,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1108,6 +1118,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> major_offsets{std::nullopt}; std::optional> label_hop_offsets{std::nullopt}; + std::optional> label_type_hop_offsets{std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional> renumber_map_offsets{std::nullopt}; @@ -1125,21 +1136,129 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } if (options_.renumber_results_) { - if (num_edge_types_ == 1) { // homogeneous renumbering - if (options_.compression_type_ == cugraph_compression_type_t::COO) { - // COO + if (src.size() > 0) { // Only renumber if there are edgelist to renumber + if (num_edge_types_ == 1) { // homogeneous renumbering + if (options_.compression_type_ == cugraph_compression_type_t::COO) { + // COO + + rmm::device_uvector output_majors(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(output_majors, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_sort_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + offsets ? (*offsets).size() - 1 : size_t{1}, + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + do_expensive_check_); + + majors.emplace(std::move(output_majors)); + renumber_map.emplace(std::move(output_renumber_map)); + } else { + // (D)CSC, (D)CSR + + bool doubly_compress = + (options_.compression_type_ == cugraph_compression_type_t::DCSR) || + (options_.compression_type_ == cugraph_compression_type_t::DCSC); + + rmm::device_uvector output_major_offsets(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + + std::tie(majors, + output_major_offsets, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_compress_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + edge_label ? (*offsets).size() - 1 : size_t{1}, // FIXME: update edge_label + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + options_.compress_per_hop_, + doubly_compress, + do_expensive_check_); + + renumber_map.emplace(std::move(output_renumber_map)); + major_offsets.emplace(std::move(output_major_offsets)); + } + + // These are now represented by label_hop_offsets + hop.reset(); + offsets.reset(); + + } else { // heterogeneous renumbering + + rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); + + if (vertex_type_offsets_ == nullptr) { + // If no 'vertex_type_offsets' is provided, all vertices are assumed to have + // a vertex type of value 1. + cugraph::detail::stride_fill(handle_.get_stream(), + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} + + ); + } rmm::device_uvector output_majors(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(output_majors, minors, wgt, edge_id, - edge_type, - label_hop_offsets, + label_type_hop_offsets, // Contains information about the type and hop offsets output_renumber_map, - renumber_map_offsets) = - cugraph::renumber_and_sort_sampled_edgelist( + renumber_map_offsets, + renumbered_and_sorted_edge_id_renumber_map, + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = + cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( handle_, std::move(src), std::move(dst), @@ -1151,140 +1270,47 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, - offsets ? (*offsets).size() - 1 : size_t{1}, - hop ? fan_out_->size_ : size_t{1}, - src_is_major, - do_expensive_check_); - majors.emplace(std::move(output_majors)); - renumber_map.emplace(std::move(output_renumber_map)); - } else { - // (D)CSC, (D)CSR - - bool doubly_compress = - (options_.compression_type_ == cugraph_compression_type_t::DCSR) || - (options_.compression_type_ == cugraph_compression_type_t::DCSC); - - rmm::device_uvector output_major_offsets(0, handle_.get_stream()); - rmm::device_uvector output_renumber_map(0, handle_.get_stream()); - - std::tie(majors, - output_major_offsets, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = - cugraph::renumber_and_compress_sampled_edgelist( - handle_, - std::move(src), - std::move(dst), - std::move(wgt), - std::move(edge_id), - std::move(edge_type), - std::move(hop), - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertices_->as_type(), start_vertices_->size_}) - : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, - offsets ? std::make_optional( - raft::device_span{offsets->data(), offsets->size()}) - : std::nullopt, - edge_label ? (*offsets).size() - 1 : size_t{1}, // FIXME: update edge_label - hop ? fan_out_->size_ : size_t{1}, + (vertex_type_offsets_ != nullptr) + ? raft::device_span{vertex_type_offsets_->as_type(), + vertex_type_offsets_->size_} + : raft::device_span{vertex_type_offsets.data(), + vertex_type_offsets.size()}, + + edge_label ? (*offsets).size() - 1 : size_t{1}, + hop ? (((fan_out_->size_ % num_edge_types_) == 0) + ? (fan_out_->size_ / num_edge_types_) + : ((fan_out_->size_ / num_edge_types_) + 1)) + : size_t{1}, + (vertex_type_offsets_ != nullptr) ? vertex_type_offsets_->size_ - 1 + : vertex_type_offsets.size() - 1, + + // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided. + num_edge_types_, src_is_major, - options_.compress_per_hop_, - doubly_compress, do_expensive_check_); + if (edge_type) { + (*edge_type) + .resize(raft::device_span{(*label_type_hop_offsets).data(), + (*label_type_hop_offsets).size()} + .back() - + 1, + handle_.get_stream()); + cugraph::detail::sequence_fill( + handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0}); + } + majors.emplace(std::move(output_majors)); + // FIXME: Need to update renumber_map because default values are being passed renumber_map.emplace(std::move(output_renumber_map)); - major_offsets.emplace(std::move(output_major_offsets)); } - - // These are now represented by label_hop_offsets - hop.reset(); - offsets.reset(); - - } else { // heterogeneous renumbering - - rmm::device_uvector vertex_type_offsets( - graph_view.local_vertex_partition_range_size(), handle_.get_stream()); - - cugraph::detail::sequence_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0} // FIXME: Update array - ); - - rmm::device_uvector output_majors(0, handle_.get_stream()); - rmm::device_uvector output_renumber_map(0, handle_.get_stream()); - - // extract the edge_type from label_type_hop_offsets - std::optional> label_type_hop_offsets{std::nullopt}; - std::tie(output_majors, - minors, - wgt, - edge_id, - label_type_hop_offsets, // Contains information about the type and hop offsets - output_renumber_map, - (*renumber_map_offsets), - renumbered_and_sorted_edge_id_renumber_map, - renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = - cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( - handle_, - std::move(src), - std::move(dst), - std::move(wgt), - std::move(edge_id), - std::move(edge_type), - std::move(hop), - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertices_->as_type(), start_vertices_->size_}) - : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, - offsets ? std::make_optional( - raft::device_span{offsets->data(), offsets->size()}) - : std::nullopt, - raft::device_span{vertex_type_offsets.data(), - vertex_type_offsets.size()}, - - edge_label ? (*offsets).size() - 1 : size_t{1}, - hop ? fan_out_->size_ : size_t{1}, - size_t{1}, - num_edge_types_, - src_is_major, - do_expensive_check_); - if (edge_type) { - (*edge_type) - .resize(raft::device_span{(*label_type_hop_offsets).data(), - (*label_type_hop_offsets).size()} - .back() - - 1, - handle_.get_stream()); - cugraph::detail::sequence_fill( - handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0}); - } - - majors.emplace(std::move(output_majors)); - // FIXME: Need to update renumber_map because default values are being passed - renumber_map.emplace(std::move(output_renumber_map)); } } else { @@ -1339,6 +1365,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + (label_type_hop_offsets) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_type_hop_offsets, SIZE_T) + : nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -1557,6 +1586,16 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_la : NULL; } +extern "C" cugraph_type_erased_device_array_view_t* +cugraph_sample_result_get_label_type_hop_offsets(const cugraph_sample_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->label_type_hop_offsets_ != nullptr + ? reinterpret_cast( + internal_pointer->label_type_hop_offsets_->view()) + : NULL; +} + extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_index( const cugraph_sample_result_t* result) { @@ -2018,7 +2057,8 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2029,17 +2069,17 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( auto options_cpp = *reinterpret_cast(options); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2062,7 +2102,8 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( graph, nullptr, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + vertex_type_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2077,7 +2118,8 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2095,17 +2137,17 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( *error); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2128,7 +2170,8 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( graph, edge_biases, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + vertex_type_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2142,7 +2185,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, // RENAME? + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2152,17 +2195,17 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( auto options_cpp = *reinterpret_cast(options); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2185,7 +2228,8 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( graph, nullptr, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + nullptr, fan_out, 1, // num_edge_types std::move(options_cpp), @@ -2200,7 +2244,7 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2217,17 +2261,17 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( *error); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2250,7 +2294,8 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( graph, edge_biases, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, + nullptr, fan_out, 1, // num_edge_types std::move(options_cpp), diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index ce580ea9b4..ed77b33043 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -31,6 +31,8 @@ #include +#include + namespace cugraph { namespace detail { @@ -133,8 +135,14 @@ neighbor_sample_impl(raft::handle_t const& handle, } } + // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1. + auto num_hops = ((fan_out.size() % num_edge_types) == 0) + ? (fan_out.size() / num_edge_types) + : ((fan_out.size() / num_edge_types) + 1); + std::vector> level_result_src_vectors{}; std::vector> level_result_dst_vectors{}; + auto level_result_weight_vectors = edge_weight_view ? std::make_optional(std::vector>{}) : std::nullopt; @@ -147,14 +155,16 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels ? std::make_optional(std::vector>{}) : std::nullopt; - level_result_src_vectors.reserve(fan_out.size()); - level_result_dst_vectors.reserve(fan_out.size()); - if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(fan_out.size()); } - if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(fan_out.size()); } - if (level_result_edge_type_vectors) { (*level_result_edge_type_vectors).reserve(fan_out.size()); } - if (level_result_label_vectors) { (*level_result_label_vectors).reserve(fan_out.size()); } + level_result_src_vectors.reserve(num_hops); + level_result_dst_vectors.reserve(num_hops); + + if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(num_hops); } + if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(num_hops); } + if (level_result_edge_type_vectors) { (*level_result_edge_type_vectors).reserve(num_hops); } + if (level_result_label_vectors) { (*level_result_label_vectors).reserve(num_hops); } rmm::device_uvector frontier_vertices(0, handle.get_stream()); + auto frontier_vertex_labels = starting_vertex_labels ? std::make_optional(rmm::device_uvector{0, handle.get_stream()}) @@ -174,12 +184,24 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector level_sizes{}; - // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1 - auto num_hops = ((fan_out.size() % num_edge_types) == 0) - ? (fan_out.size() / num_edge_types) - : ((fan_out.size() / num_edge_types) + 1); + for (auto hop = 0; hop < num_hops; hop++) { + rmm::device_uvector level_result_src(0, handle.get_stream()); + rmm::device_uvector level_result_dst(0, handle.get_stream()); + + auto level_result_weight = + edge_weight_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_id = + edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_type = + edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_label = + starting_vertex_labels + ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; - for (size_t hop = 0; hop < num_hops; ++hop) { for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { auto k_level = fan_out[(hop * num_edge_types) + edge_type_id]; rmm::device_uvector srcs(0, handle.get_stream()); @@ -194,49 +216,119 @@ neighbor_sample_impl(raft::handle_t const& handle, } if (k_level > 0) { - std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = - sample_edges(handle, - modified_graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - edge_bias_view, - rng_state, - starting_vertices, - starting_vertex_labels, - static_cast(k_level), - with_replacement); - } else { - std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = - gather_one_hop_edgelist(handle, - modified_graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - starting_vertices, - starting_vertex_labels); + std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = sample_edges( + handle, + modified_graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + edge_bias_view, + rng_state, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt, + static_cast(k_level), + with_replacement); + } else if (k_level < 0) { + std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = gather_one_hop_edgelist( + handle, + modified_graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt); } - level_sizes.push_back(srcs.size()); - level_result_src_vectors.push_back(std::move(srcs)); - level_result_dst_vectors.push_back(std::move(dsts)); + auto old_size = level_result_src.size(); + level_result_src.resize(old_size + srcs.size(), handle.get_stream()); + level_result_dst.resize(old_size + srcs.size(), handle.get_stream()); - if (weights) { (*level_result_weight_vectors).push_back(std::move(*weights)); } - if (edge_ids) { (*level_result_edge_id_vectors).push_back(std::move(*edge_ids)); } - if (edge_types) { (*level_result_edge_type_vectors).push_back(std::move(*edge_types)); } - if (labels) { (*level_result_label_vectors).push_back(std::move(*labels)); } + raft::copy( + level_result_src.begin() + old_size, srcs.begin(), srcs.size(), handle.get_stream()); + + raft::copy( + level_result_dst.begin() + old_size, dsts.begin(), srcs.size(), handle.get_stream()); + + if (weights) { + (*level_result_weight).resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy(level_result_weight->begin() + old_size, + weights->begin(), + srcs.size(), + handle.get_stream()); + } + + if (edge_ids) { + (*level_result_edge_id).resize(old_size + srcs.size(), handle.get_stream()); + raft::copy(level_result_edge_id->begin() + old_size, + edge_ids->begin(), + srcs.size(), + handle.get_stream()); + } + if (edge_types) { + (*level_result_edge_type).resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy(level_result_edge_type->begin() + old_size, + edge_types->begin(), + srcs.size(), + handle.get_stream()); + } + + if (labels) { + (*level_result_label).resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy(level_result_label->begin() + old_size, + labels->begin(), + srcs.size(), + handle.get_stream()); + } if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } } + level_sizes.push_back(level_result_src.size()); + level_result_src_vectors.push_back(std::move(level_result_src)); + level_result_dst_vectors.push_back(std::move(level_result_dst)); + + if (level_result_weight) { + (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); + } + if (level_result_edge_id) { + (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); + } + if (level_result_edge_type) { + (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); + } + if (level_result_label) { + (*level_result_label_vectors).push_back(std::move(*level_result_label)); + } + // FIXME: We should modify vertex_partition_range_lasts to return a raft::host_span // rather than making a copy. auto vertex_partition_range_lasts = modified_graph_view.vertex_partition_range_lasts(); std::tie(frontier_vertices, frontier_vertex_labels, vertex_used_as_source) = prepare_next_frontier( handle, - starting_vertices, - starting_vertex_labels, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, frontier_vertex_labels @@ -249,14 +341,6 @@ neighbor_sample_impl(raft::handle_t const& handle, prior_sources_behavior, dedupe_sources, do_expensive_check); - - starting_vertices = - raft::device_span(frontier_vertices.data(), frontier_vertices.size()); - - if (frontier_vertex_labels) { - starting_vertex_labels = raft::device_span(frontier_vertex_labels->data(), - frontier_vertex_labels->size()); - } } auto result_size = std::reduce(level_sizes.begin(), level_sizes.end()); @@ -339,7 +423,7 @@ neighbor_sample_impl(raft::handle_t const& handle, if (return_hops) { result_hops = rmm::device_uvector(result_size, handle.get_stream()); output_offset = 0; - for (size_t i = 0; i < fan_out.size(); ++i) { + for (size_t i = 0; i < num_hops; ++i) { scalar_fill( handle, result_hops->data() + output_offset, level_sizes[i], static_cast(i)); output_offset += level_sizes[i]; diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 21f5190ad5..38781614b2 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -245,6 +245,11 @@ cdef extern from "cugraph_c/algorithms.h": const cugraph_sample_result_t* result ) + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_sample_result_get_label_type_hop_offsets( + const cugraph_sample_result_t* result + ) + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_start_labels( const cugraph_sample_result_t* result diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd index 762fd37a35..f496cc7d88 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd @@ -73,6 +73,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_tyoe_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -88,6 +89,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_tyoe_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx index ecdfba3afc..ee0e85fa3b 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx @@ -87,6 +87,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, _GPUGraph input_graph, start_vertex_list, starting_vertex_label_offsets, + vertex_type_offsets, h_fan_out, num_edge_types, bool_t with_replacement, @@ -124,6 +125,9 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) + vertex_type_offsets: device array type (Optional) + Offsets for each vertex type in the graph. + h_fan_out: numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level. The fanout value at each hop for each @@ -247,6 +251,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(start_vertex_list, "start_vertex_list") assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True) + assert_CAI_type(vertex_type_offsets, "vertex_type_offsets", True) assert_AI_type(h_fan_out, "h_fan_out") @@ -277,6 +282,11 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t cai_vertex_type_offsets_ptr + if vertex_type_offsets is not None: + cai_vertex_type_offsets_ptr = \ + vertex_type_offsets.__cuda_array_interface__['data'][0] + cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \ cugraph_type_erased_device_array_view_create( @@ -294,6 +304,16 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, SIZE_T ) + cdef cugraph_type_erased_device_array_view_t* vertex_type_offsets_ptr = NULL + if vertex_type_offsets is not None: + vertex_type_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_vertex_type_offsets_ptr, + len(vertex_type_offsets), + SIZE_T + ) + + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL if retain_seeds: if starting_vertex_label_offsets is None: @@ -354,6 +374,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, NULL, # FIXME: Add support for biased neighbor sampling start_vertex_list_ptr, starting_vertex_label_offsets_ptr, + vertex_type_offsets_ptr, fan_out_ptr, num_edge_types, sampling_options, @@ -388,7 +409,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, cupy_edge_types = result.get_edge_types() cupy_batch_ids = result.get_batch_ids() cupy_label_hop_offsets = result.get_label_hop_offsets() - + cupy_label_type_hop_offsets = result.get_label_type_hop_offsets() if renumber: cupy_renumber_map = result.get_renumber_map() @@ -405,6 +426,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, 'renumber_map_offsets': cupy_renumber_map_offsets, @@ -422,6 +444,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, } # Return everything that isn't null diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx index 3fa3575e27..dbee65323d 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx @@ -84,6 +84,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, _GPUGraph input_graph, start_vertex_list, starting_vertex_label_offsets, + vertex_type_offsets, h_fan_out, num_edge_types, bool_t with_replacement, @@ -119,6 +120,9 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) + vertex_type_offsets: device array type (Optional) + Offsets for each vertex type in the graph. + h_fan_out: numpy array type Device array containing the branching out (fan-out) degrees per starting vertex for each hop level. The fanout value at each hop for each @@ -242,6 +246,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(start_vertex_list, "start_vertex_list") assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True) + assert_CAI_type(vertex_type_offsets, "vertex_type_offsets", True) assert_AI_type(h_fan_out, "h_fan_out") @@ -271,6 +276,11 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] + cdef uintptr_t cai_vertex_type_offsets_ptr + if vertex_type_offsets is not None: + cai_vertex_type_offsets_ptr = \ + vertex_type_offsets.__cuda_array_interface__['data'][0] + cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \ cugraph_type_erased_device_array_view_create( @@ -288,6 +298,15 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, SIZE_T ) + cdef cugraph_type_erased_device_array_view_t* vertex_type_offsets_ptr = NULL + if vertex_type_offsets is not None: + vertex_type_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_vertex_type_offsets_ptr, + len(vertex_type_offsets), + SIZE_T + ) + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL if retain_seeds: if starting_vertex_label_offsets is None: @@ -347,6 +366,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, c_graph_ptr, start_vertex_list_ptr, starting_vertex_label_offsets_ptr, + vertex_type_offsets_ptr, fan_out_ptr, num_edge_types, sampling_options, @@ -372,6 +392,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, # Get cupy "views" of the individual arrays to return. These each increment # the refcount on the SamplingResult instance which will keep the data alive # until all references are removed and the GC runs. + cupy_majors = result.get_majors() cupy_major_offsets = result.get_major_offsets() cupy_minors = result.get_minors() @@ -380,6 +401,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, cupy_edge_types = result.get_edge_types() cupy_batch_ids = result.get_batch_ids() cupy_label_hop_offsets = result.get_label_hop_offsets() + cupy_label_type_hop_offsets = result.get_label_type_hop_offsets() if renumber: cupy_renumber_map = result.get_renumber_map() @@ -396,6 +418,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, 'renumber_map_offsets': cupy_renumber_map_offsets, @@ -413,6 +436,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, } # Return everything that isn't null diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx index e2476de160..cbd7a5dcff 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx @@ -216,7 +216,7 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, >>> sampling_results = pylibcugraph.homogeneous_biased_neighbor_sample( ... resource_handle, G, start_vertices, starting_vertex_label_offsets, ... h_fan_out, False, True) - >>> >>> sampling_results + >>> sampling_results {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32), 'minors': array([1, 3, 3, 4, 3, 4], dtype=int32), 'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)} @@ -383,8 +383,6 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, if renumber: cupy_renumber_map = result.get_renumber_map() cupy_renumber_map_offsets = result.get_renumber_map_offsets() - cupy_edge_renumber_map = result.get_edge_renumber_map() - cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets() sampling_results = { 'major_offsets': cupy_major_offsets, @@ -397,9 +395,7 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'label_hop_offsets': cupy_label_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, - 'renumber_map_offsets': cupy_renumber_map_offsets, - 'edge_renumber_map' : cupy_edge_renumber_map, - 'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets + 'renumber_map_offsets': cupy_renumber_map_offsets } else: diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx index 3c6cdf7742..bb88ffcf6a 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx @@ -211,7 +211,7 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, >>> sampling_results = pylibcugraph.homogeneous_uniform_neighbor_sample( ... resource_handle, G, start_vertices, starting_vertex_label_offsets, ... h_fan_out, False, True) - >>> >>> sampling_results + >>> sampling_results {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32), 'minors': array([1, 3, 3, 4, 3, 4], dtype=int32), 'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)} @@ -378,8 +378,6 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, if renumber: cupy_renumber_map = result.get_renumber_map() cupy_renumber_map_offsets = result.get_renumber_map_offsets() - cupy_edge_renumber_map = result.get_edge_renumber_map() - cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets() sampling_results = { 'major_offsets': cupy_major_offsets, @@ -392,9 +390,7 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'label_hop_offsets': cupy_label_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, - 'renumber_map_offsets': cupy_renumber_map_offsets, - 'edge_renumber_map' : cupy_edge_renumber_map, - 'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets + 'renumber_map_offsets': cupy_renumber_map_offsets } else: diff --git a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx index b93618d73c..a2ea7cb971 100644 --- a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx +++ b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx @@ -24,6 +24,7 @@ from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_sample_result_get_majors, cugraph_sample_result_get_minors, cugraph_sample_result_get_label_hop_offsets, + cugraph_sample_result_get_label_type_hop_offsets, cugraph_sample_result_get_sources, # deprecated cugraph_sample_result_get_destinations, # deprecated cugraph_sample_result_get_edge_weight, @@ -206,6 +207,19 @@ cdef class SamplingResult: return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) + def get_label_type_hop_offsets(self): + if self.c_sample_result_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = ( + cugraph_sample_result_get_label_type_hop_offsets(self.c_sample_result_ptr) + ) + if device_array_view_ptr is NULL: + return None + + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, + self) + # Deprecated def get_offsets(self): if self.c_sample_result_ptr is NULL: