From 95c5ba23e5c087ee956f666d0a188e2c8c6fcb50 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 19 Nov 2024 14:57:08 -0800 Subject: [PATCH 01/42] homogeneous neighborhood sampling doesn't support edge renumber_map along with offsets --- .../pylibcugraph/homogeneous_biased_neighbor_sample.pyx | 6 +----- .../pylibcugraph/homogeneous_uniform_neighbor_sample.pyx | 6 +----- 2 files changed, 2 insertions(+), 10 deletions(-) diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx index e2476de160..3c278bc06c 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx @@ -383,8 +383,6 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, if renumber: cupy_renumber_map = result.get_renumber_map() cupy_renumber_map_offsets = result.get_renumber_map_offsets() - cupy_edge_renumber_map = result.get_edge_renumber_map() - cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets() sampling_results = { 'major_offsets': cupy_major_offsets, @@ -397,9 +395,7 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'label_hop_offsets': cupy_label_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, - 'renumber_map_offsets': cupy_renumber_map_offsets, - 'edge_renumber_map' : cupy_edge_renumber_map, - 'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets + 'renumber_map_offsets': cupy_renumber_map_offsets } else: diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx index 3c6cdf7742..32ca1f10c1 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx @@ -378,8 +378,6 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, if renumber: cupy_renumber_map = result.get_renumber_map() cupy_renumber_map_offsets = result.get_renumber_map_offsets() - cupy_edge_renumber_map = result.get_edge_renumber_map() - cupy_edge_renumber_map_offsets = result.get_edge_renumber_map_offsets() sampling_results = { 'major_offsets': cupy_major_offsets, @@ -392,9 +390,7 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'label_hop_offsets': cupy_label_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, - 'renumber_map_offsets': cupy_renumber_map_offsets, - 'edge_renumber_map' : cupy_edge_renumber_map, - 'edge_renumber_map_offsets' : cupy_edge_renumber_map_offsets + 'renumber_map_offsets': cupy_renumber_map_offsets } else: From af465f9976f3dac7bf4bfb91eb2e0b7a5f7d4948 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 19 Nov 2024 15:01:20 -0800 Subject: [PATCH 02/42] update docstrings --- .../pylibcugraph/homogeneous_biased_neighbor_sample.pyx | 2 +- .../pylibcugraph/homogeneous_uniform_neighbor_sample.pyx | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx index 3c278bc06c..cbd7a5dcff 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_biased_neighbor_sample.pyx @@ -216,7 +216,7 @@ def homogeneous_biased_neighbor_sample(ResourceHandle resource_handle, >>> sampling_results = pylibcugraph.homogeneous_biased_neighbor_sample( ... resource_handle, G, start_vertices, starting_vertex_label_offsets, ... h_fan_out, False, True) - >>> >>> sampling_results + >>> sampling_results {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32), 'minors': array([1, 3, 3, 4, 3, 4], dtype=int32), 'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)} diff --git a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx index 32ca1f10c1..bb88ffcf6a 100644 --- a/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/homogeneous_uniform_neighbor_sample.pyx @@ -211,7 +211,7 @@ def homogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, >>> sampling_results = pylibcugraph.homogeneous_uniform_neighbor_sample( ... resource_handle, G, start_vertices, starting_vertex_label_offsets, ... h_fan_out, False, True) - >>> >>> sampling_results + >>> sampling_results {'majors': array([2, 2, 5, 5, 1, 1], dtype=int32), 'minors': array([1, 3, 3, 4, 3, 4], dtype=int32), 'weight': array([3.1, 4.1, 7.2, 3.2, 2.1, 1.1], dtype=float32)} From 61a0926fd61693165228a261ddf096bb507e9721 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 20 Nov 2024 13:02:10 -0800 Subject: [PATCH 03/42] fix bug in heterogeneous renumbering --- cpp/src/c_api/neighbor_sampling.cpp | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index be3a44d813..e89630dfb8 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1220,13 +1220,18 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } else { // heterogeneous renumbering + // FIXME: If no 'vertex_type_offsets' is provided, all vertices are assumed to have + // a vertex type of value 1. Update the API once 'vertex_type_offsets' is supported rmm::device_uvector vertex_type_offsets( - graph_view.local_vertex_partition_range_size(), handle_.get_stream()); + 2, handle_.get_stream()); + + cugraph::detail::stride_fill( + handle_.get_stream(), + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} - cugraph::detail::sequence_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0} // FIXME: Update array ); rmm::device_uvector output_majors(0, handle_.get_stream()); @@ -1240,7 +1245,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { edge_id, label_type_hop_offsets, // Contains information about the type and hop offsets output_renumber_map, - (*renumber_map_offsets), + renumber_map_offsets, renumbered_and_sorted_edge_id_renumber_map, renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( @@ -1267,7 +1272,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { edge_label ? (*offsets).size() - 1 : size_t{1}, hop ? fan_out_->size_ : size_t{1}, - size_t{1}, + vertex_type_offsets.size() - 1, // num_vertex_type is by default 1 if not provided num_edge_types_, src_is_major, do_expensive_check_); From 707cf3de5e4c17bac579824e928220360806826b Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 20 Nov 2024 13:11:20 -0800 Subject: [PATCH 04/42] fix style --- cpp/src/c_api/neighbor_sampling.cpp | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index e89630dfb8..d3ee4e93c6 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1222,15 +1222,13 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { // FIXME: If no 'vertex_type_offsets' is provided, all vertices are assumed to have // a vertex type of value 1. Update the API once 'vertex_type_offsets' is supported - rmm::device_uvector vertex_type_offsets( - 2, handle_.get_stream()); + rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); - cugraph::detail::stride_fill( - handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0}, - vertex_t{graph_view.local_vertex_partition_range_size()} + cugraph::detail::stride_fill(handle_.get_stream(), + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} ); @@ -1272,7 +1270,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { edge_label ? (*offsets).size() - 1 : size_t{1}, hop ? fan_out_->size_ : size_t{1}, - vertex_type_offsets.size() - 1, // num_vertex_type is by default 1 if not provided + + vertex_type_offsets.size() - + 1, // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided num_edge_types_, src_is_major, do_expensive_check_); From ef848e9d8095ff51a6f4167c03997326ea596a69 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 16:48:00 -0800 Subject: [PATCH 05/42] handle case where there is no sampling result prior to renumbering --- cpp/src/c_api/neighbor_sampling.cpp | 289 +++++++++++--------- cpp/src/sampling/neighbor_sampling_impl.hpp | 53 +++- 2 files changed, 197 insertions(+), 145 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index d3ee4e93c6..cd6cdaf2bb 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -63,6 +63,7 @@ struct cugraph_sample_result_t { cugraph_type_erased_device_array_t* wgt_{nullptr}; cugraph_type_erased_device_array_t* hop_{nullptr}; cugraph_type_erased_device_array_t* label_hop_offsets_{nullptr}; + cugraph_type_erased_device_array_t* label_type_hop_offsets_{nullptr}; cugraph_type_erased_device_array_t* label_{nullptr}; cugraph_type_erased_device_array_t* renumber_map_{nullptr}; cugraph_type_erased_device_array_t* renumber_map_offsets_{nullptr}; @@ -1011,6 +1012,8 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { options_.with_replacement_}, do_expensive_check_); } else { + + raft::print_device_vector("labels", (*start_vertex_labels).data(), (*start_vertex_labels).size(), std::cout); std::tie(src, dst, wgt, edge_id, edge_type, hop, offsets) = cugraph::heterogeneous_uniform_neighbor_sample( handle_, @@ -1108,6 +1111,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> major_offsets{std::nullopt}; std::optional> label_hop_offsets{std::nullopt}; + std::optional> label_type_hop_offsets{std::nullopt}; std::optional> renumber_map{std::nullopt}; std::optional> renumber_map_offsets{std::nullopt}; @@ -1125,21 +1129,127 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } if (options_.renumber_results_) { - if (num_edge_types_ == 1) { // homogeneous renumbering - if (options_.compression_type_ == cugraph_compression_type_t::COO) { - // COO + if (src.size() > 0) { // Only renumber if there are edgelist to renumber + if (num_edge_types_ == 1) { // homogeneous renumbering + if (options_.compression_type_ == cugraph_compression_type_t::COO) { + // COO + + rmm::device_uvector output_majors(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(output_majors, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_sort_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + offsets ? (*offsets).size() - 1 : size_t{1}, + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + do_expensive_check_); + + majors.emplace(std::move(output_majors)); + renumber_map.emplace(std::move(output_renumber_map)); + } else { + // (D)CSC, (D)CSR + + bool doubly_compress = + (options_.compression_type_ == cugraph_compression_type_t::DCSR) || + (options_.compression_type_ == cugraph_compression_type_t::DCSC); + + rmm::device_uvector output_major_offsets(0, handle_.get_stream()); + rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + + std::tie(majors, + output_major_offsets, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = + cugraph::renumber_and_compress_sampled_edgelist( + handle_, + std::move(src), + std::move(dst), + std::move(wgt), + std::move(edge_id), + std::move(edge_type), + std::move(hop), + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertices_->as_type(), start_vertices_->size_}) + : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + : std::nullopt, + offsets ? std::make_optional( + raft::device_span{offsets->data(), offsets->size()}) + : std::nullopt, + edge_label ? (*offsets).size() - 1 : size_t{1}, // FIXME: update edge_label + hop ? fan_out_->size_ : size_t{1}, + src_is_major, + options_.compress_per_hop_, + doubly_compress, + do_expensive_check_); + + renumber_map.emplace(std::move(output_renumber_map)); + major_offsets.emplace(std::move(output_major_offsets)); + } + + // These are now represented by label_hop_offsets + hop.reset(); + offsets.reset(); + + } else { // heterogeneous renumbering + + // FIXME: If no 'vertex_type_offsets' is provided, all vertices are assumed to have + // a vertex type of value 1. Update the API once 'vertex_type_offsets' is supported + rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); + + cugraph::detail::stride_fill(handle_.get_stream(), + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} + + ); rmm::device_uvector output_majors(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + std::tie(output_majors, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = - cugraph::renumber_and_sort_sampled_edgelist( + minors, + wgt, + edge_id, + label_type_hop_offsets, // Contains information about the type and hop offsets + output_renumber_map, + renumber_map_offsets, + renumbered_and_sorted_edge_id_renumber_map, + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = + cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( handle_, std::move(src), std::move(dst), @@ -1158,138 +1268,32 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, - offsets ? (*offsets).size() - 1 : size_t{1}, - hop ? fan_out_->size_ : size_t{1}, - src_is_major, - do_expensive_check_); - - majors.emplace(std::move(output_majors)); - renumber_map.emplace(std::move(output_renumber_map)); - } else { - // (D)CSC, (D)CSR - - bool doubly_compress = - (options_.compression_type_ == cugraph_compression_type_t::DCSR) || - (options_.compression_type_ == cugraph_compression_type_t::DCSC); - - rmm::device_uvector output_major_offsets(0, handle_.get_stream()); - rmm::device_uvector output_renumber_map(0, handle_.get_stream()); + raft::device_span{vertex_type_offsets.data(), + vertex_type_offsets.size()}, - std::tie(majors, - output_major_offsets, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = - cugraph::renumber_and_compress_sampled_edgelist( - handle_, - std::move(src), - std::move(dst), - std::move(wgt), - std::move(edge_id), - std::move(edge_type), - std::move(hop), - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertices_->as_type(), start_vertices_->size_}) - : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, - offsets ? std::make_optional( - raft::device_span{offsets->data(), offsets->size()}) - : std::nullopt, - edge_label ? (*offsets).size() - 1 : size_t{1}, // FIXME: update edge_label + edge_label ? (*offsets).size() - 1 : size_t{1}, hop ? fan_out_->size_ : size_t{1}, + + vertex_type_offsets.size() - + 1, // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided + num_edge_types_, src_is_major, - options_.compress_per_hop_, - doubly_compress, do_expensive_check_); + if (edge_type) { + (*edge_type) + .resize(raft::device_span{(*label_type_hop_offsets).data(), + (*label_type_hop_offsets).size()} + .back() - + 1, + handle_.get_stream()); + cugraph::detail::sequence_fill( + handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0}); + } + majors.emplace(std::move(output_majors)); + // FIXME: Need to update renumber_map because default values are being passed renumber_map.emplace(std::move(output_renumber_map)); - major_offsets.emplace(std::move(output_major_offsets)); } - - // These are now represented by label_hop_offsets - hop.reset(); - offsets.reset(); - - } else { // heterogeneous renumbering - - // FIXME: If no 'vertex_type_offsets' is provided, all vertices are assumed to have - // a vertex type of value 1. Update the API once 'vertex_type_offsets' is supported - rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); - - cugraph::detail::stride_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0}, - vertex_t{graph_view.local_vertex_partition_range_size()} - - ); - - rmm::device_uvector output_majors(0, handle_.get_stream()); - rmm::device_uvector output_renumber_map(0, handle_.get_stream()); - - // extract the edge_type from label_type_hop_offsets - std::optional> label_type_hop_offsets{std::nullopt}; - std::tie(output_majors, - minors, - wgt, - edge_id, - label_type_hop_offsets, // Contains information about the type and hop offsets - output_renumber_map, - renumber_map_offsets, - renumbered_and_sorted_edge_id_renumber_map, - renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = - cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( - handle_, - std::move(src), - std::move(dst), - std::move(wgt), - std::move(edge_id), - std::move(edge_type), - std::move(hop), - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertices_->as_type(), start_vertices_->size_}) - : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) - : std::nullopt, - offsets ? std::make_optional( - raft::device_span{offsets->data(), offsets->size()}) - : std::nullopt, - raft::device_span{vertex_type_offsets.data(), - vertex_type_offsets.size()}, - - edge_label ? (*offsets).size() - 1 : size_t{1}, - hop ? fan_out_->size_ : size_t{1}, - - vertex_type_offsets.size() - - 1, // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided - num_edge_types_, - src_is_major, - do_expensive_check_); - if (edge_type) { - (*edge_type) - .resize(raft::device_span{(*label_type_hop_offsets).data(), - (*label_type_hop_offsets).size()} - .back() - - 1, - handle_.get_stream()); - cugraph::detail::sequence_fill( - handle_.get_stream(), (*edge_type).begin(), (*edge_type).size(), edge_type_t{0}); - } - - majors.emplace(std::move(output_majors)); - // FIXME: Need to update renumber_map because default values are being passed - renumber_map.emplace(std::move(output_renumber_map)); } } else { @@ -1344,6 +1348,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + (label_type_hop_offsets) + ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_type_hop_offsets, SIZE_T) + : nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -1562,6 +1569,16 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_la : NULL; } +extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_type_hop_offsets( + const cugraph_sample_result_t* result) +{ + auto internal_pointer = reinterpret_cast(result); + return internal_pointer->label_type_hop_offsets_ != nullptr + ? reinterpret_cast( + internal_pointer->label_type_hop_offsets_->view()) + : NULL; +} + extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_index( const cugraph_sample_result_t* result) { diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index ccca71cdf2..79451a38d4 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -104,6 +104,13 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); + + label_t num_labels = 0; + + if (starting_vertex_labels) { + // Initial number of labels. Will be leveraged if there is no sampling result + num_labels = starting_vertex_labels->size(); + } if (num_edge_types > 1) { for (int i = 0; i < num_edge_types; i++) { @@ -362,15 +369,43 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_label_vectors = std::nullopt; } - return detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); + std::optional> result_offsets{std::nullopt}; + + std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, + result_edge_types, result_hops, result_labels, result_offsets) + = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + if (result_srcs.size() == 0){ + // Update the 'edgelist_label_offsets' array to be proportional to the + // number of labels + result_offsets->resize(num_labels + 1 ,handle.get_stream()); + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size()), + result_offsets->begin(), + [] __device__(auto idx) { + return 0; + }); + } + + return std::make_tuple(std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + std::move(result_offsets)); } } // namespace detail From 535df5d75de49f45ac2d5fdb363f5b92fbb9e985 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 16:54:49 -0800 Subject: [PATCH 06/42] expose 'label_type_hop_offsets' --- cpp/include/cugraph_c/sampling_algorithms.h | 10 ++++++++++ .../pylibcugraph/_cugraph_c/algorithms.pxd | 5 +++++ .../heterogeneous_biased_neighbor_sample.pyx | 4 +++- .../heterogeneous_uniform_neighbor_sample.pyx | 4 ++++ .../internal_types/sampling_result.pyx | 14 ++++++++++++++ 5 files changed, 36 insertions(+), 1 deletion(-) diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index ef75e726d8..499b313155 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -735,6 +735,16 @@ cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_hop( cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_hop_offsets( const cugraph_sample_result_t* result); +/** + * @ingroup samplingC + * @brief Get the label-type-hop offsets from the sampling algorithm result + * + * @param [in] result The result from a sampling algorithm + * @return type erased array pointing to the label-type-hop offsets + */ +cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_type_hop_offsets( + const cugraph_sample_result_t* result); + /** * @ingroup samplingC * @brief Get the index from the sampling algorithm result diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 21f5190ad5..149d34473b 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -244,6 +244,11 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sample_result_get_label_hop_offsets( const cugraph_sample_result_t* result ) + + cdef cugraph_type_erased_device_array_view_t* \ + cugraph_sample_result_get_label_type_hop_offsets( + const cugraph_sample_result_t* result + ) cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_start_labels( diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx index ecdfba3afc..0f49dec712 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx @@ -388,7 +388,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, cupy_edge_types = result.get_edge_types() cupy_batch_ids = result.get_batch_ids() cupy_label_hop_offsets = result.get_label_hop_offsets() - + cupy_label_type_hop_offsets = result.get_label_type_hop_offsets() if renumber: cupy_renumber_map = result.get_renumber_map() @@ -405,6 +405,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, 'renumber_map_offsets': cupy_renumber_map_offsets, @@ -422,6 +423,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, } # Return everything that isn't null diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx index 3fa3575e27..c586fcdff8 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx @@ -372,6 +372,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, # Get cupy "views" of the individual arrays to return. These each increment # the refcount on the SamplingResult instance which will keep the data alive # until all references are removed and the GC runs. + cupy_majors = result.get_majors() cupy_major_offsets = result.get_major_offsets() cupy_minors = result.get_minors() @@ -380,6 +381,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, cupy_edge_types = result.get_edge_types() cupy_batch_ids = result.get_batch_ids() cupy_label_hop_offsets = result.get_label_hop_offsets() + cupy_label_type_hop_offsets = result.get_label_type_hop_offsets() if renumber: cupy_renumber_map = result.get_renumber_map() @@ -396,6 +398,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, 'hop_id': None, 'renumber_map': cupy_renumber_map, 'renumber_map_offsets': cupy_renumber_map_offsets, @@ -413,6 +416,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, 'edge_type': cupy_edge_types, 'batch_id': cupy_batch_ids, 'label_hop_offsets': cupy_label_hop_offsets, + 'label_type_hop_offsets': cupy_label_type_hop_offsets, } # Return everything that isn't null diff --git a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx index b93618d73c..9e53d0aed9 100644 --- a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx +++ b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx @@ -24,6 +24,7 @@ from pylibcugraph._cugraph_c.algorithms cimport ( cugraph_sample_result_get_majors, cugraph_sample_result_get_minors, cugraph_sample_result_get_label_hop_offsets, + cugraph_sample_result_get_label_type_hop_offsets, cugraph_sample_result_get_sources, # deprecated cugraph_sample_result_get_destinations, # deprecated cugraph_sample_result_get_edge_weight, @@ -205,6 +206,19 @@ cdef class SamplingResult: return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) + + def get_label_type_hop_offsets(self): + if self.c_sample_result_ptr is NULL: + raise ValueError("pointer not set, must call set_ptr() with a " + "non-NULL value first.") + cdef cugraph_type_erased_device_array_view_t* device_array_view_ptr = ( + cugraph_sample_result_get_label_type_hop_offsets(self.c_sample_result_ptr) + ) + if device_array_view_ptr is NULL: + return None + + return create_cupy_array_view_for_device_ptr(device_array_view_ptr, + self) # Deprecated def get_offsets(self): From 3501a453c7ba1f246077abba672f8f5f602fc651 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 16:56:15 -0800 Subject: [PATCH 07/42] fix style --- cpp/src/c_api/neighbor_sampling.cpp | 64 +++++++++---------- cpp/src/sampling/neighbor_sampling_impl.hpp | 56 ++++++++-------- .../pylibcugraph/_cugraph_c/algorithms.pxd | 2 +- .../internal_types/sampling_result.pyx | 2 +- 4 files changed, 63 insertions(+), 61 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index cd6cdaf2bb..c343adc228 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1012,8 +1012,8 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { options_.with_replacement_}, do_expensive_check_); } else { - - raft::print_device_vector("labels", (*start_vertex_labels).data(), (*start_vertex_labels).size(), std::cout); + raft::print_device_vector( + "labels", (*start_vertex_labels).data(), (*start_vertex_labels).size(), std::cout); std::tie(src, dst, wgt, edge_id, edge_type, hop, offsets) = cugraph::heterogeneous_uniform_neighbor_sample( handle_, @@ -1129,7 +1129,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } if (options_.renumber_results_) { - if (src.size() > 0) { // Only renumber if there are edgelist to renumber + if (src.size() > 0) { // Only renumber if there are edgelist to renumber if (num_edge_types_ == 1) { // homogeneous renumbering if (options_.compression_type_ == cugraph_compression_type_t::COO) { // COO @@ -1137,13 +1137,13 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { rmm::device_uvector output_majors(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); std::tie(output_majors, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = cugraph::renumber_and_sort_sampled_edgelist( handle_, std::move(src), @@ -1181,14 +1181,14 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { rmm::device_uvector output_renumber_map(0, handle_.get_stream()); std::tie(majors, - output_major_offsets, - minors, - wgt, - edge_id, - edge_type, - label_hop_offsets, - output_renumber_map, - renumber_map_offsets) = + output_major_offsets, + minors, + wgt, + edge_id, + edge_type, + label_hop_offsets, + output_renumber_map, + renumber_map_offsets) = cugraph::renumber_and_compress_sampled_edgelist( handle_, std::move(src), @@ -1230,10 +1230,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); cugraph::detail::stride_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0}, - vertex_t{graph_view.local_vertex_partition_range_size()} + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} ); @@ -1241,14 +1241,14 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { rmm::device_uvector output_renumber_map(0, handle_.get_stream()); std::tie(output_majors, - minors, - wgt, - edge_id, - label_type_hop_offsets, // Contains information about the type and hop offsets - output_renumber_map, - renumber_map_offsets, - renumbered_and_sorted_edge_id_renumber_map, - renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = + minors, + wgt, + edge_id, + label_type_hop_offsets, // Contains information about the type and hop offsets + output_renumber_map, + renumber_map_offsets, + renumbered_and_sorted_edge_id_renumber_map, + renumbered_and_sorted_edge_id_renumber_map_label_type_offsets) = cugraph::heterogeneous_renumber_and_sort_sampled_edgelist( handle_, std::move(src), @@ -1569,8 +1569,8 @@ extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_la : NULL; } -extern "C" cugraph_type_erased_device_array_view_t* cugraph_sample_result_get_label_type_hop_offsets( - const cugraph_sample_result_t* result) +extern "C" cugraph_type_erased_device_array_view_t* +cugraph_sample_result_get_label_type_hop_offsets(const cugraph_sample_result_t* result) { auto internal_pointer = reinterpret_cast(result); return internal_pointer->label_type_hop_offsets_ != nullptr diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 79451a38d4..0a5d65ea1e 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -104,7 +104,7 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - + label_t num_labels = 0; if (starting_vertex_labels) { @@ -370,34 +370,36 @@ neighbor_sample_impl(raft::handle_t const& handle, } std::optional> result_offsets{std::nullopt}; - - std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, - result_edge_types, result_hops, result_labels, result_offsets) - = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - if (result_srcs.size() == 0){ + + std::tie(result_srcs, + result_dsts, + result_weights, + result_edge_ids, + result_edge_types, + result_hops, + result_labels, + result_offsets) = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + if (result_srcs.size() == 0) { // Update the 'edgelist_label_offsets' array to be proportional to the // number of labels - result_offsets->resize(num_labels + 1 ,handle.get_stream()); - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size()), - result_offsets->begin(), - [] __device__(auto idx) { - return 0; - }); - } - + result_offsets->resize(num_labels + 1, handle.get_stream()); + + thrust::transform(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size()), + result_offsets->begin(), + [] __device__(auto idx) { return 0; }); + } + return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd index 149d34473b..38781614b2 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/algorithms.pxd @@ -244,7 +244,7 @@ cdef extern from "cugraph_c/algorithms.h": cugraph_sample_result_get_label_hop_offsets( const cugraph_sample_result_t* result ) - + cdef cugraph_type_erased_device_array_view_t* \ cugraph_sample_result_get_label_type_hop_offsets( const cugraph_sample_result_t* result diff --git a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx index 9e53d0aed9..a2ea7cb971 100644 --- a/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx +++ b/python/pylibcugraph/pylibcugraph/internal_types/sampling_result.pyx @@ -206,7 +206,7 @@ cdef class SamplingResult: return create_cupy_array_view_for_device_ptr(device_array_view_ptr, self) - + def get_label_type_hop_offsets(self): if self.c_sample_result_ptr is NULL: raise ValueError("pointer not set, must call set_ptr() with a " From 288b8fc5450b168b618475e0809b21420d703ccb Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 20:36:35 -0800 Subject: [PATCH 08/42] remove debug print and properly compute the 'result_offsets' --- cpp/src/c_api/neighbor_sampling.cpp | 2 - cpp/src/sampling/neighbor_sampling_impl.hpp | 88 ++++++++++++++------- 2 files changed, 61 insertions(+), 29 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index c343adc228..4fd4f346b3 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1012,8 +1012,6 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { options_.with_replacement_}, do_expensive_check_); } else { - raft::print_device_vector( - "labels", (*start_vertex_labels).data(), (*start_vertex_labels).size(), std::cout); std::tie(src, dst, wgt, edge_id, edge_type, hop, offsets) = cugraph::heterogeneous_uniform_neighbor_sample( handle_, diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 0a5d65ea1e..3bc2c90c57 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -370,36 +370,70 @@ neighbor_sample_impl(raft::handle_t const& handle, } std::optional> result_offsets{std::nullopt}; + std::optional> cp_result_labels{std::nullopt}; + if (result_labels) { + cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - std::tie(result_srcs, - result_dsts, - result_weights, - result_edge_ids, - result_edge_types, - result_hops, - result_labels, - result_offsets) = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - if (result_srcs.size() == 0) { - // Update the 'edgelist_label_offsets' array to be proportional to the - // number of labels - result_offsets->resize(num_labels + 1, handle.get_stream()); - - thrust::transform(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size()), - result_offsets->begin(), - [] __device__(auto idx) { return 0; }); + thrust::copy( + handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); } + // FIXME: remove the offsets computation in 'shuffle_and_organize_output' as it doesn't + // account account for missing labels + std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, + result_edge_types, result_hops, result_labels, result_offsets) + = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + + if (result_labels) { + // Re-compute the result_offsets and account for missing labels + result_offsets = rmm::device_uvector(num_labels + 1, handle.get_stream()); + + // Sort labels + thrust::sort( + handle.get_thrust_policy(), + cp_result_labels->begin(), + cp_result_labels->end()); + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size()), + result_offsets->begin() + 1, + [ + result_labels = raft::device_span( + cp_result_labels->data(), + cp_result_labels->size()) + ] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + + return sampled_label_size; + }); + + // Run inclusive scan + thrust::inclusive_scan(handle.get_thrust_policy(), + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); + } + return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), From 9f459dedc5707f92786b7c3140461fd035b60629 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 20:41:25 -0800 Subject: [PATCH 09/42] update docstrings --- cpp/src/sampling/neighbor_sampling_impl.hpp | 92 ++++++++++----------- 1 file changed, 44 insertions(+), 48 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 3bc2c90c57..0e6bf50daa 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -374,66 +374,62 @@ neighbor_sample_impl(raft::handle_t const& handle, if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); + thrust::copy(handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); } // FIXME: remove the offsets computation in 'shuffle_and_organize_output' as it doesn't - // account account for missing labels - std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, - result_edge_types, result_hops, result_labels, result_offsets) - = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - + // account for missing labels that are not sampled. + std::tie(result_srcs, + result_dsts, + result_weights, + result_edge_ids, + result_edge_types, + result_hops, + result_labels, + result_offsets) = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); if (result_labels) { // Re-compute the result_offsets and account for missing labels result_offsets = rmm::device_uvector(num_labels + 1, handle.get_stream()); // Sort labels - thrust::sort( - handle.get_thrust_policy(), - cp_result_labels->begin(), - cp_result_labels->end()); - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size()), - result_offsets->begin() + 1, - [ - result_labels = raft::device_span( - cp_result_labels->data(), - cp_result_labels->size()) - ] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - - return sampled_label_size; - }); - + thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); + + thrust::transform(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size()), + result_offsets->begin() + 1, + [result_labels = raft::device_span( + cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + + return sampled_label_size; + }); + // Run inclusive scan thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); } - + return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), From fc0426d79bf6b307c2c24c5d243b5f41b0b33485 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 20:43:47 -0800 Subject: [PATCH 10/42] directly return the result --- cpp/src/sampling/neighbor_sampling_impl.hpp | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 0e6bf50daa..0b20b57d2b 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -418,9 +418,7 @@ neighbor_sample_impl(raft::handle_t const& handle, auto itr_upper = thrust::upper_bound( thrust::seq, result_labels.begin(), result_labels.end(), idx); - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - - return sampled_label_size; + return thrust::distance(itr_lower, itr_upper); }); // Run inclusive scan From d997656f4e4c5e66fab4e96d493cc507bbb2646b Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 20:47:35 -0800 Subject: [PATCH 11/42] fix illegal memory access --- cpp/src/sampling/neighbor_sampling_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 0b20b57d2b..2ddc85a41b 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -408,7 +408,7 @@ neighbor_sample_impl(raft::handle_t const& handle, thrust::transform(handle.get_thrust_policy(), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size()), + thrust::make_counting_iterator(result_offsets->size() - 1), result_offsets->begin() + 1, [result_labels = raft::device_span( cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { From fc70811d69780ce9e838afbb5a591d09d4fdb4b5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 21:45:06 -0800 Subject: [PATCH 12/42] rename variable for consistency --- cpp/src/c_api/neighbor_sampling.cpp | 88 ++++++++++++++--------------- 1 file changed, 44 insertions(+), 44 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 4fd4f346b3..5171248103 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -778,7 +778,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertex_offsets_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; int num_edge_types_{}; cugraph::c_api::cugraph_sampling_options_t options_{}; @@ -791,7 +791,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph_graph_t* graph, cugraph_edge_property_view_t const* edge_biases, cugraph_type_erased_device_array_view_t const* start_vertices, - cugraph_type_erased_device_array_view_t const* start_vertex_offsets, + cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, cugraph_type_erased_host_array_view_t const* fan_out, int num_edge_types, cugraph::c_api::cugraph_sampling_options_t options, @@ -806,9 +806,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { start_vertices_( reinterpret_cast( start_vertices)), - start_vertex_offsets_( + starting_vertex_label_offsets_( reinterpret_cast( - start_vertex_offsets)), + starting_vertex_label_offsets)), fan_out_( reinterpret_cast(fan_out)), num_edge_types_(num_edge_types), @@ -880,17 +880,17 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> renumbered_and_sorted_edge_id_renumber_map_label_type_offsets(std::nullopt); - if (start_vertex_offsets_ != nullptr) { + if (starting_vertex_label_offsets_ != nullptr) { // Retrieve the start_vertex_labels start_vertex_labels = cugraph::detail::convert_starting_vertex_label_offsets_to_labels( handle_, - raft::device_span{start_vertex_offsets_->as_type(), - start_vertex_offsets_->size_}); + raft::device_span{starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}); // Get the number of labels on each GPU if constexpr (multi_gpu) { - auto num_local_labels = start_vertex_offsets_->size_ - 1; + auto num_local_labels = starting_vertex_label_offsets_->size_ - 1; auto global_labels = cugraph::host_scalar_allgather( handle_.get_comms(), num_local_labels, handle_.get_stream()); @@ -898,7 +898,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::exclusive_scan( global_labels.begin(), global_labels.end(), global_labels.begin(), label_t{0}); - // Compute the global start_vertex_label_offsets + // Compute the global starting_vertex_label_offsets cugraph::detail::transform_increment_ints( raft::device_span{(*start_vertex_labels).data(), @@ -997,7 +997,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, (edge_biases != nullptr) ? *edge_biases : edge_weights->view(), raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1021,7 +1021,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1049,7 +1049,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, (edge_biases != nullptr) ? *edge_biases : edge_weights->view(), raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1072,7 +1072,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (edge_ids != nullptr) ? std::make_optional(edge_ids->view()) : std::nullopt, (edge_types != nullptr) ? std::make_optional(edge_types->view()) : std::nullopt, raft::device_span{start_vertices.data(), start_vertices.size()}, - (start_vertex_offsets_ != nullptr) + (starting_vertex_label_offsets_ != nullptr) ? std::make_optional>((*start_vertex_labels).data(), (*start_vertex_labels).size()) : std::nullopt, @@ -1156,7 +1156,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { : std::nullopt, options_.retain_seeds_ ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) @@ -1201,7 +1201,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { : std::nullopt, options_.retain_seeds_ ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) @@ -1261,7 +1261,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { : std::nullopt, options_.retain_seeds_ ? std::make_optional(raft::device_span{ - start_vertex_offsets_->as_type(), start_vertex_offsets_->size_}) + starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) @@ -2038,7 +2038,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2049,17 +2049,17 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( auto options_cpp = *reinterpret_cast(options); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2082,7 +2082,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( graph, nullptr, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2097,7 +2097,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2115,17 +2115,17 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( *error); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2148,7 +2148,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( graph, edge_biases, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2162,7 +2162,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, // RENAME? + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, // RENAME? const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2172,17 +2172,17 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( auto options_cpp = *reinterpret_cast(options); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2205,7 +2205,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( graph, nullptr, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, fan_out, 1, // num_edge_types std::move(options_cpp), @@ -2220,7 +2220,7 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( cugraph_graph_t* graph, const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* start_vertex_offsets, + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2237,17 +2237,17 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( *error); // FIXME: Should we maintain this contition? - CAPI_EXPECTS((!options_cpp.retain_seeds_) || (start_vertex_offsets != nullptr), + CAPI_EXPECTS((!options_cpp.retain_seeds_) || (starting_vertex_label_offsets != nullptr), CUGRAPH_INVALID_INPUT, - "must specify start_vertex_offsets if retain_seeds is true", + "must specify starting_vertex_label_offsets if retain_seeds is true", *error); - CAPI_EXPECTS((start_vertex_offsets == nullptr) || + CAPI_EXPECTS((starting_vertex_label_offsets == nullptr) || (reinterpret_cast( - start_vertex_offsets) + starting_vertex_label_offsets) ->type_ == SIZE_T), CUGRAPH_INVALID_INPUT, - "start_vertex_offsets should be of type size_t", + "starting_vertex_label_offsets should be of type size_t", *error); CAPI_EXPECTS( @@ -2270,7 +2270,7 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( graph, edge_biases, start_vertices, - start_vertex_offsets, + starting_vertex_label_offsets, fan_out, 1, // num_edge_types std::move(options_cpp), From 463f12b8cb5d908fcf9595317e0c529d34f51a75 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 21:45:42 -0800 Subject: [PATCH 13/42] fix style --- cpp/src/c_api/neighbor_sampling.cpp | 50 +++++++++++++++-------------- 1 file changed, 26 insertions(+), 24 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 5171248103..1cc212da5d 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -778,7 +778,8 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{ + nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; int num_edge_types_{}; cugraph::c_api::cugraph_sampling_options_t options_{}; @@ -786,17 +787,18 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { bool do_expensive_check_{false}; cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; - neighbor_sampling_functor(cugraph_resource_handle_t const* handle, - cugraph_rng_state_t* rng_state, - cugraph_graph_t* graph, - cugraph_edge_property_view_t const* edge_biases, - cugraph_type_erased_device_array_view_t const* start_vertices, - cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, - cugraph_type_erased_host_array_view_t const* fan_out, - int num_edge_types, - cugraph::c_api::cugraph_sampling_options_t options, - bool is_biased, - bool do_expensive_check) + neighbor_sampling_functor( + cugraph_resource_handle_t const* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + cugraph_edge_property_view_t const* edge_biases, + cugraph_type_erased_device_array_view_t const* start_vertices, + cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, + cugraph_type_erased_host_array_view_t const* fan_out, + int num_edge_types, + cugraph::c_api::cugraph_sampling_options_t options, + bool is_biased, + bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), rng_state_(reinterpret_cast(rng_state)), @@ -1154,10 +1156,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -1199,10 +1201,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -1259,10 +1261,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, From 3767b94ecfc4de95a0b19ac746ee7c30a79a6fd3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Thu, 21 Nov 2024 21:50:15 -0800 Subject: [PATCH 14/42] rename variable for consistency --- cpp/include/cugraph/sampling_functions.hpp | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpp/include/cugraph/sampling_functions.hpp b/cpp/include/cugraph/sampling_functions.hpp index 3d41e95441..981c42135f 100644 --- a/cpp/include/cugraph/sampling_functions.hpp +++ b/cpp/include/cugraph/sampling_functions.hpp @@ -306,7 +306,7 @@ struct sampling_flags_t { * @param edge_type_view Optional view object holding edge types for @p graph_view. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -340,7 +340,7 @@ homogeneous_uniform_neighbor_sample( std::optional> edge_id_view, std::optional> edge_type_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, sampling_flags_t sampling_flags, @@ -385,7 +385,7 @@ homogeneous_uniform_neighbor_sample( * corresponding edge can never be selected. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -421,7 +421,7 @@ homogeneous_biased_neighbor_sample( std::optional> edge_type_view, edge_property_view_t edge_bias_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, sampling_flags_t sampling_flags, @@ -462,7 +462,7 @@ homogeneous_biased_neighbor_sample( * @param edge_type_view Optional view object holding edge types for @p graph_view. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -498,7 +498,7 @@ heterogeneous_uniform_neighbor_sample( std::optional> edge_id_view, std::optional> edge_type_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, edge_type_t num_edge_types, @@ -545,7 +545,7 @@ heterogeneous_uniform_neighbor_sample( * corresponding edge can never be selected. * @param starting_vertices Device span of starting vertex IDs for the sampling. * In a multi-gpu context the starting vertices should be local to this GPU. - * @param starting_vertex_label_offsets Optional device span of labels associated with each starting + * @param starting_vertex_labels Optional device span of labels associated with each starting * vertex for the sampling. * @param label_to_output_comm_rank Optional device span identifying which rank should get sampling * outputs of each vertex label. This should be the same on each rank. @@ -583,7 +583,7 @@ heterogeneous_biased_neighbor_sample( std::optional> edge_type_view, edge_property_view_t edge_bias_view, raft::device_span starting_vertices, - std::optional> starting_vertex_label_offsets, + std::optional> starting_vertex_labels, std::optional> label_to_output_comm_rank, raft::host_span fan_out, edge_type_t num_edge_types, From 6aeee2830bd535885571998fe5d55016c239a826 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 22 Nov 2024 12:05:28 -0800 Subject: [PATCH 15/42] add support for vertex type --- cpp/include/cugraph_c/sampling_algorithms.h | 6 ++ cpp/src/c_api/neighbor_sampling.cpp | 97 ++++++++++++--------- 2 files changed, 63 insertions(+), 40 deletions(-) diff --git a/cpp/include/cugraph_c/sampling_algorithms.h b/cpp/include/cugraph_c/sampling_algorithms.h index 499b313155..f048d338b9 100644 --- a/cpp/include/cugraph_c/sampling_algorithms.h +++ b/cpp/include/cugraph_c/sampling_algorithms.h @@ -551,6 +551,8 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( * @param [in] start_vertices Device array of start vertices for the sampling * @param [in] starting_vertex_label_offsets Device array of the offsets for each label in * the seed list. This parameter is only used with the retain_seeds option. + * @param [in] vertex_type_offsets Device array of the offsets for each vertex type in the + * graph. * @param [in] fan_out Host array defining the fan out at each step in the sampling * algorithm. We only support fan_out values of type INT32 * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous @@ -570,6 +572,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -598,6 +601,8 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( * @param [in] start_vertices Device array of start vertices for the sampling * @param [in] starting_vertex_label_offsets Device array of the offsets for each label in * the seed list. This parameter is only used with the retain_seeds option. + * @param [in] vertex_type_offsets Device array of the offsets for each vertex type in the + * graph. * @param [in] fan_out Host array defining the fan out at each step in the sampling * algorithm. We only support fan_out values of type INT32 * @param [in] num_edge_types Number of edge types where a value of 1 translates to homogeneous @@ -618,6 +623,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 1cc212da5d..54fec00232 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -778,8 +778,8 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{ - nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertex_type_offsets_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; int num_edge_types_{}; cugraph::c_api::cugraph_sampling_options_t options_{}; @@ -787,18 +787,18 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { bool do_expensive_check_{false}; cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; - neighbor_sampling_functor( - cugraph_resource_handle_t const* handle, - cugraph_rng_state_t* rng_state, - cugraph_graph_t* graph, - cugraph_edge_property_view_t const* edge_biases, - cugraph_type_erased_device_array_view_t const* start_vertices, - cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, - cugraph_type_erased_host_array_view_t const* fan_out, - int num_edge_types, - cugraph::c_api::cugraph_sampling_options_t options, - bool is_biased, - bool do_expensive_check) + neighbor_sampling_functor(cugraph_resource_handle_t const* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + cugraph_edge_property_view_t const* edge_biases, + cugraph_type_erased_device_array_view_t const* start_vertices, + cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, + cugraph_type_erased_device_array_view_t const* vertex_type_offsets, + cugraph_type_erased_host_array_view_t const* fan_out, + int num_edge_types, + cugraph::c_api::cugraph_sampling_options_t options, + bool is_biased, + bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), rng_state_(reinterpret_cast(rng_state)), @@ -811,6 +811,9 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { starting_vertex_label_offsets_( reinterpret_cast( starting_vertex_label_offsets)), + vertex_type_offsets_( + reinterpret_cast( + vertex_type_offsets)), fan_out_( reinterpret_cast(fan_out)), num_edge_types_(num_edge_types), @@ -875,7 +878,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> start_vertex_labels{std::nullopt}; std::optional> local_label_to_comm_rank{std::nullopt}; std::optional> label_to_comm_rank{ - std::nullopt}; // global after allgatherv + std::nullopt}; // global after allgatherv std::optional> renumbered_and_sorted_edge_id_renumber_map( std::nullopt); @@ -1156,10 +1159,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), - starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -1201,10 +1204,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), - starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -1225,17 +1228,19 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { } else { // heterogeneous renumbering - // FIXME: If no 'vertex_type_offsets' is provided, all vertices are assumed to have - // a vertex type of value 1. Update the API once 'vertex_type_offsets' is supported rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); - cugraph::detail::stride_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0}, - vertex_t{graph_view.local_vertex_partition_range_size()} + if (vertex_type_offsets_ != nullptr) { + // If no 'vertex_type_offsets' is provided, all vertices are assumed to have + // a vertex type of value 1. + cugraph::detail::stride_fill(handle_.get_stream(), + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} - ); + ); + } rmm::device_uvector output_majors(0, handle_.get_stream()); rmm::device_uvector output_renumber_map(0, handle_.get_stream()); @@ -1261,21 +1266,27 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), - starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ + ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, - raft::device_span{vertex_type_offsets.data(), + + (vertex_type_offsets_ != nullptr) + ? raft::device_span{vertex_type_offsets_->as_type(), vertex_type_offsets_->size_} + : raft::device_span{vertex_type_offsets.data(), vertex_type_offsets.size()}, - + edge_label ? (*offsets).size() - 1 : size_t{1}, hop ? fan_out_->size_ : size_t{1}, - vertex_type_offsets.size() - - 1, // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided + (vertex_type_offsets_ != nullptr) + ? vertex_type_offsets_->size_ - 1 + : vertex_type_offsets.size() - 1, + + // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided num_edge_types_, src_is_major, do_expensive_check_); @@ -2041,6 +2052,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2085,6 +2097,7 @@ cugraph_error_code_t cugraph_heterogeneous_uniform_neighbor_sample( nullptr, start_vertices, starting_vertex_label_offsets, + vertex_type_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2100,6 +2113,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_type_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -2151,6 +2165,7 @@ cugraph_error_code_t cugraph_heterogeneous_biased_neighbor_sample( edge_biases, start_vertices, starting_vertex_label_offsets, + vertex_type_offsets, fan_out, num_edge_types, std::move(options_cpp), @@ -2164,7 +2179,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( cugraph_rng_state_t* rng_state, cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, - const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, // RENAME? + const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, const cugraph_type_erased_host_array_view_t* fan_out, const cugraph_sampling_options_t* options, bool_t do_expensive_check, @@ -2208,6 +2223,7 @@ cugraph_error_code_t cugraph_homogeneous_uniform_neighbor_sample( nullptr, start_vertices, starting_vertex_label_offsets, + nullptr, fan_out, 1, // num_edge_types std::move(options_cpp), @@ -2273,6 +2289,7 @@ cugraph_error_code_t cugraph_homogeneous_biased_neighbor_sample( edge_biases, start_vertices, starting_vertex_label_offsets, + nullptr, fan_out, 1, // num_edge_types std::move(options_cpp), From 223a73b1383bc0aa69a0626ecd013769917bf176 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 22 Nov 2024 12:34:14 -0800 Subject: [PATCH 16/42] add support for vertex type at the plc layer --- cpp/src/c_api/neighbor_sampling.cpp | 2 +- .../heterogeneous_biased_neighbor_sample.pyx | 21 +++++++++++++++++++ .../heterogeneous_uniform_neighbor_sample.pyx | 20 ++++++++++++++++++ 3 files changed, 42 insertions(+), 1 deletion(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 54fec00232..a87fd0cfd8 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1230,7 +1230,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { rmm::device_uvector vertex_type_offsets(2, handle_.get_stream()); - if (vertex_type_offsets_ != nullptr) { + if (vertex_type_offsets_ == nullptr) { // If no 'vertex_type_offsets' is provided, all vertices are assumed to have // a vertex type of value 1. cugraph::detail::stride_fill(handle_.get_stream(), diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx index 0f49dec712..e506a413ab 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx @@ -87,6 +87,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, _GPUGraph input_graph, start_vertex_list, starting_vertex_label_offsets, + vertex_type_offsets, h_fan_out, num_edge_types, bool_t with_replacement, @@ -123,6 +124,9 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, Offsets of each label within the start vertex list. Expanding 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) + + vertex_type_offsets: device array type (Optional) + Offsets for each vertex type in the graph. h_fan_out: numpy array type Device array containing the branching out (fan-out) degrees per @@ -247,6 +251,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(start_vertex_list, "start_vertex_list") assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True) + assert_CAI_type(vertex_type_offsets, "vertex_type_offsets", True) assert_AI_type(h_fan_out, "h_fan_out") @@ -276,6 +281,11 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, if starting_vertex_label_offsets is not None: cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_vertex_type_offsets_ptr + if vertex_type_offsets is not None: + cai_vertex_type_offsets_ptr = \ + vertex_type_offsets.__cuda_array_interface__['data'][0] cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \ @@ -293,6 +303,16 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, len(starting_vertex_label_offsets), SIZE_T ) + + cdef cugraph_type_erased_device_array_view_t* vertex_type_offsets_ptr = NULL + if vertex_type_offsets is not None: + vertex_type_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_vertex_type_offsets_ptr, + len(vertex_type_offsets), + SIZE_T + ) + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL if retain_seeds: @@ -354,6 +374,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, NULL, # FIXME: Add support for biased neighbor sampling start_vertex_list_ptr, starting_vertex_label_offsets_ptr, + vertex_type_offsets_ptr, fan_out_ptr, num_edge_types, sampling_options, diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx index c586fcdff8..6e8ab93de6 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx @@ -84,6 +84,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, _GPUGraph input_graph, start_vertex_list, starting_vertex_label_offsets, + vertex_type_offsets, h_fan_out, num_edge_types, bool_t with_replacement, @@ -118,6 +119,9 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, Offsets of each label within the start vertex list. Expanding 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) + + vertex_type_offsets: device array type (Optional) + Offsets for each vertex type in the graph. h_fan_out: numpy array type Device array containing the branching out (fan-out) degrees per @@ -242,6 +246,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, assert_CAI_type(start_vertex_list, "start_vertex_list") assert_CAI_type(starting_vertex_label_offsets, "starting_vertex_label_offsets", True) + assert_CAI_type(vertex_type_offsets, "vertex_type_offsets", True) assert_AI_type(h_fan_out, "h_fan_out") @@ -270,6 +275,11 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, if starting_vertex_label_offsets is not None: cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] + + cdef uintptr_t cai_vertex_type_offsets_ptr + if vertex_type_offsets is not None: + cai_vertex_type_offsets_ptr = \ + vertex_type_offsets.__cuda_array_interface__['data'][0] cdef cugraph_type_erased_device_array_view_t* start_vertex_list_ptr = \ @@ -288,6 +298,15 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, SIZE_T ) + cdef cugraph_type_erased_device_array_view_t* vertex_type_offsets_ptr = NULL + if vertex_type_offsets is not None: + vertex_type_offsets_ptr = \ + cugraph_type_erased_device_array_view_create( + cai_vertex_type_offsets_ptr, + len(vertex_type_offsets), + SIZE_T + ) + cdef cugraph_type_erased_device_array_view_t* label_offsets_ptr = NULL if retain_seeds: if starting_vertex_label_offsets is None: @@ -347,6 +366,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, c_graph_ptr, start_vertex_list_ptr, starting_vertex_label_offsets_ptr, + vertex_type_offsets_ptr, fan_out_ptr, num_edge_types, sampling_options, From 061c8ccbd0637a8a1df28c204231b840342e2fac Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 22 Nov 2024 12:36:29 -0800 Subject: [PATCH 17/42] properly handle missing edge types --- cpp/src/sampling/neighbor_sampling_impl.hpp | 137 ++++++++++++-------- 1 file changed, 81 insertions(+), 56 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 2ddc85a41b..142d8054ab 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -30,6 +30,7 @@ #include #include +#include namespace cugraph { namespace detail { @@ -104,14 +105,38 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); + + label_t num_unique_labels = 0; + + std::optional> cp_starting_vertex_labels{std::nullopt}; - label_t num_labels = 0; if (starting_vertex_labels) { - // Initial number of labels. Will be leveraged if there is no sampling result - num_labels = starting_vertex_labels->size(); + // Find the number of unique lables + cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + cp_starting_vertex_labels->begin()); + + thrust::sort( + handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + + num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + + } + + + + if (num_edge_types > 1) { for (int i = 0; i < num_edge_types; i++) { cugraph::edge_property_t, bool> @@ -374,60 +399,60 @@ neighbor_sample_impl(raft::handle_t const& handle, if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); + thrust::copy( + handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); } - - // FIXME: remove the offsets computation in 'shuffle_and_organize_output' as it doesn't - // account for missing labels that are not sampled. - std::tie(result_srcs, - result_dsts, - result_weights, - result_edge_ids, - result_edge_types, - result_hops, - result_labels, - result_offsets) = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - if (result_labels) { - // Re-compute the result_offsets and account for missing labels - result_offsets = rmm::device_uvector(num_labels + 1, handle.get_stream()); - - // Sort labels - thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); - - thrust::transform(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size() - 1), - result_offsets->begin() + 1, - [result_labels = raft::device_span( - cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - return thrust::distance(itr_lower, itr_upper); - }); - - // Run inclusive scan - thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); - } - + std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, + result_edge_types, result_hops, result_labels, result_offsets) + = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { + // There are missing labels not sampled. + result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); + + // Sort labels + thrust::sort( + handle.get_thrust_policy(), + cp_result_labels->begin(), + cp_result_labels->end()); + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size() - 1), + result_offsets->begin() + 1, + [ + result_labels = raft::device_span( + cp_result_labels->data(), + cp_result_labels->size()) + ] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + return sampled_label_size; + }); + + // Run inclusive scan + thrust::inclusive_scan(handle.get_thrust_policy(), + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); + } return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), From ed4c06953ab2decf637551aa18aa360e5f3cc43d Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Fri, 22 Nov 2024 12:37:57 -0800 Subject: [PATCH 18/42] fix style --- cpp/src/c_api/neighbor_sampling.cpp | 78 +++++----- cpp/src/sampling/neighbor_sampling_impl.hpp | 138 ++++++++---------- .../heterogeneous_biased_neighbor_sample.pyx | 6 +- .../heterogeneous_uniform_neighbor_sample.pyx | 4 +- 4 files changed, 109 insertions(+), 117 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index a87fd0cfd8..600c2fe1a7 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -778,7 +778,8 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { cugraph::c_api::cugraph_graph_t* graph_{nullptr}; cugraph::c_api::cugraph_edge_property_view_t const* edge_biases_{nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* start_vertices_{nullptr}; - cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{nullptr}; + cugraph::c_api::cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets_{ + nullptr}; cugraph::c_api::cugraph_type_erased_device_array_view_t const* vertex_type_offsets_{nullptr}; cugraph::c_api::cugraph_type_erased_host_array_view_t const* fan_out_{nullptr}; int num_edge_types_{}; @@ -787,18 +788,19 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { bool do_expensive_check_{false}; cugraph::c_api::cugraph_sample_result_t* result_{nullptr}; - neighbor_sampling_functor(cugraph_resource_handle_t const* handle, - cugraph_rng_state_t* rng_state, - cugraph_graph_t* graph, - cugraph_edge_property_view_t const* edge_biases, - cugraph_type_erased_device_array_view_t const* start_vertices, - cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, - cugraph_type_erased_device_array_view_t const* vertex_type_offsets, - cugraph_type_erased_host_array_view_t const* fan_out, - int num_edge_types, - cugraph::c_api::cugraph_sampling_options_t options, - bool is_biased, - bool do_expensive_check) + neighbor_sampling_functor( + cugraph_resource_handle_t const* handle, + cugraph_rng_state_t* rng_state, + cugraph_graph_t* graph, + cugraph_edge_property_view_t const* edge_biases, + cugraph_type_erased_device_array_view_t const* start_vertices, + cugraph_type_erased_device_array_view_t const* starting_vertex_label_offsets, + cugraph_type_erased_device_array_view_t const* vertex_type_offsets, + cugraph_type_erased_host_array_view_t const* fan_out, + int num_edge_types, + cugraph::c_api::cugraph_sampling_options_t options, + bool is_biased, + bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), rng_state_(reinterpret_cast(rng_state)), @@ -878,7 +880,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { std::optional> start_vertex_labels{std::nullopt}; std::optional> local_label_to_comm_rank{std::nullopt}; std::optional> label_to_comm_rank{ - std::nullopt}; // global after allgatherv + std::nullopt}; // global after allgatherv std::optional> renumbered_and_sorted_edge_id_renumber_map( std::nullopt); @@ -1159,10 +1161,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -1204,10 +1206,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, @@ -1234,10 +1236,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { // If no 'vertex_type_offsets' is provided, all vertices are assumed to have // a vertex type of value 1. cugraph::detail::stride_fill(handle_.get_stream(), - vertex_type_offsets.begin(), - vertex_type_offsets.size(), - vertex_t{0}, - vertex_t{graph_view.local_vertex_partition_range_size()} + vertex_type_offsets.begin(), + vertex_type_offsets.size(), + vertex_t{0}, + vertex_t{graph_view.local_vertex_partition_range_size()} ); } @@ -1266,27 +1268,27 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { ? std::make_optional(raft::device_span{ start_vertices_->as_type(), start_vertices_->size_}) : std::nullopt, - options_.retain_seeds_ - ? std::make_optional(raft::device_span{ - starting_vertex_label_offsets_->as_type(), starting_vertex_label_offsets_->size_}) - : std::nullopt, + options_.retain_seeds_ ? std::make_optional(raft::device_span{ + starting_vertex_label_offsets_->as_type(), + starting_vertex_label_offsets_->size_}) + : std::nullopt, offsets ? std::make_optional( raft::device_span{offsets->data(), offsets->size()}) : std::nullopt, - + (vertex_type_offsets_ != nullptr) - ? raft::device_span{vertex_type_offsets_->as_type(), vertex_type_offsets_->size_} + ? raft::device_span{vertex_type_offsets_->as_type(), + vertex_type_offsets_->size_} : raft::device_span{vertex_type_offsets.data(), - vertex_type_offsets.size()}, - + vertex_type_offsets.size()}, + edge_label ? (*offsets).size() - 1 : size_t{1}, hop ? fan_out_->size_ : size_t{1}, - (vertex_type_offsets_ != nullptr) - ? vertex_type_offsets_->size_ - 1 - : vertex_type_offsets.size() - 1, + (vertex_type_offsets_ != nullptr) ? vertex_type_offsets_->size_ - 1 + : vertex_type_offsets.size() - 1, - // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided + // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided num_edge_types_, src_is_major, do_expensive_check_); diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 142d8054ab..a09cf757a5 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -30,6 +30,7 @@ #include #include + #include namespace cugraph { @@ -105,38 +106,30 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - + label_t num_unique_labels = 0; std::optional> cp_starting_vertex_labels{std::nullopt}; - if (starting_vertex_labels) { // Find the number of unique lables - cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - cp_starting_vertex_labels->begin()); - - thrust::sort( - handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - + cp_starting_vertex_labels = + rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + cp_starting_vertex_labels->begin()); + + thrust::sort(handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), cp_starting_vertex_labels->begin(), cp_starting_vertex_labels->end()); - - } - - - - if (num_edge_types > 1) { for (int i = 0; i < num_edge_types; i++) { cugraph::edge_property_t, bool> @@ -399,60 +392,57 @@ neighbor_sample_impl(raft::handle_t const& handle, if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); + thrust::copy(handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); + } + std::tie(result_srcs, + result_dsts, + result_weights, + result_edge_ids, + result_edge_types, + result_hops, + result_labels, + result_offsets) = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { + // There are missing labels not sampled. + result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); + + // Sort labels + thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); + + thrust::transform(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size() - 1), + result_offsets->begin() + 1, + [result_labels = raft::device_span( + cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + return sampled_label_size; + }); + + // Run inclusive scan + thrust::inclusive_scan(handle.get_thrust_policy(), + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); } - std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, - result_edge_types, result_hops, result_labels, result_offsets) - = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { - // There are missing labels not sampled. - result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); - - // Sort labels - thrust::sort( - handle.get_thrust_policy(), - cp_result_labels->begin(), - cp_result_labels->end()); - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size() - 1), - result_offsets->begin() + 1, - [ - result_labels = raft::device_span( - cp_result_labels->data(), - cp_result_labels->size()) - ] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - return sampled_label_size; - }); - - // Run inclusive scan - thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); - } return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx index e506a413ab..ee0e85fa3b 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_biased_neighbor_sample.pyx @@ -124,7 +124,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, Offsets of each label within the start vertex list. Expanding 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) - + vertex_type_offsets: device array type (Optional) Offsets for each vertex type in the graph. @@ -281,7 +281,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, if starting_vertex_label_offsets is not None: cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] - + cdef uintptr_t cai_vertex_type_offsets_ptr if vertex_type_offsets is not None: cai_vertex_type_offsets_ptr = \ @@ -303,7 +303,7 @@ def heterogeneous_biased_neighbor_sample(ResourceHandle resource_handle, len(starting_vertex_label_offsets), SIZE_T ) - + cdef cugraph_type_erased_device_array_view_t* vertex_type_offsets_ptr = NULL if vertex_type_offsets is not None: vertex_type_offsets_ptr = \ diff --git a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx index 6e8ab93de6..dbee65323d 100644 --- a/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx +++ b/python/pylibcugraph/pylibcugraph/heterogeneous_uniform_neighbor_sample.pyx @@ -119,7 +119,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, Offsets of each label within the start vertex list. Expanding 'starting_vertex_label_offsets' must lead to an array of len(start_vertex_list) - + vertex_type_offsets: device array type (Optional) Offsets for each vertex type in the graph. @@ -275,7 +275,7 @@ def heterogeneous_uniform_neighbor_sample(ResourceHandle resource_handle, if starting_vertex_label_offsets is not None: cai_starting_vertex_label_offsets_ptr = \ starting_vertex_label_offsets.__cuda_array_interface__['data'][0] - + cdef uintptr_t cai_vertex_type_offsets_ptr if vertex_type_offsets is not None: cai_vertex_type_offsets_ptr = \ From 4d3f8c1f348c7646a98b6660a784ca84ccefbae3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 15:56:22 -0800 Subject: [PATCH 19/42] properly handle sampling with multiple edge types --- cpp/src/sampling/neighbor_sampling_impl.hpp | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index a09cf757a5..8b0b6fe5c8 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -252,8 +252,12 @@ neighbor_sample_impl(raft::handle_t const& handle, if (labels) { (*level_result_label_vectors).push_back(std::move(*labels)); } if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } + + //vector.push_back (raft::device_span(vertex_t const)) //<- // level_result_dst_vectors } + + // Call prepare frontier for each hop. FIXME // FIXME: We should modify vertex_partition_range_lasts to return a raft::host_span // rather than making a copy. auto vertex_partition_range_lasts = modified_graph_view.vertex_partition_range_lasts(); @@ -262,7 +266,7 @@ neighbor_sample_impl(raft::handle_t const& handle, handle, starting_vertices, starting_vertex_labels, - raft::device_span{level_result_dst_vectors.back().data(), + raft::device_span{level_result_dst_vectors.back().data(), // define a vector . level_result_dst_vectors.back().size()}, frontier_vertex_labels ? std::make_optional(raft::device_span( From d88eebd8ee7bd2a81d6903e90b76e4942b4f91f7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 16:01:35 -0800 Subject: [PATCH 20/42] remove debug print --- cpp/src/sampling/detail/prepare_next_frontier_impl.cuh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh index 5c04d628f0..bf401642e6 100644 --- a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh +++ b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh @@ -58,11 +58,12 @@ prepare_next_frontier( bool dedupe_sources, bool do_expensive_check) { + // Create a single new frontier. vertex_partition_device_view_t d_vertex_partition(vertex_partition); - size_t frontier_size = sampled_dst_vertices.size(); + size_t frontier_size = sampled_dst_vertices.size(); // std:reduce for all edge typw within hop / Reduce the size of each of the span. std::reduce for each of the size - example code that concatenate device vectors (look at that) if (prior_sources_behavior == prior_sources_behavior_t::CARRY_OVER) { - frontier_size += sampled_src_vertices.size(); + frontier_size += sampled_src_vertices.size(); // make room for all of the previous vertices. } rmm::device_uvector frontier_vertices(frontier_size, handle.get_stream()); From 77a28c24bdcbf58d19005cac57a320047958030f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 16:45:54 -0800 Subject: [PATCH 21/42] undo changes to 'prepare_next_frontier' --- cpp/src/sampling/detail/prepare_next_frontier_impl.cuh | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh index bf401642e6..5c04d628f0 100644 --- a/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh +++ b/cpp/src/sampling/detail/prepare_next_frontier_impl.cuh @@ -58,12 +58,11 @@ prepare_next_frontier( bool dedupe_sources, bool do_expensive_check) { - // Create a single new frontier. vertex_partition_device_view_t d_vertex_partition(vertex_partition); - size_t frontier_size = sampled_dst_vertices.size(); // std:reduce for all edge typw within hop / Reduce the size of each of the span. std::reduce for each of the size - example code that concatenate device vectors (look at that) + size_t frontier_size = sampled_dst_vertices.size(); if (prior_sources_behavior == prior_sources_behavior_t::CARRY_OVER) { - frontier_size += sampled_src_vertices.size(); // make room for all of the previous vertices. + frontier_size += sampled_src_vertices.size(); } rmm::device_uvector frontier_vertices(frontier_size, handle.get_stream()); From b760a61aa1a0a9e05f63022b6439a6f61f51be02 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 17:14:55 -0800 Subject: [PATCH 22/42] properly handle sampling with multiple edge types --- cpp/src/sampling/neighbor_sampling_impl.hpp | 254 +++++++++++++------- 1 file changed, 166 insertions(+), 88 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 8b0b6fe5c8..0cd7dbd35a 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -30,7 +30,6 @@ #include #include - #include namespace cugraph { @@ -106,25 +105,26 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - + label_t num_unique_labels = 0; std::optional> cp_starting_vertex_labels{std::nullopt}; if (starting_vertex_labels) { // Find the number of unique lables - cp_starting_vertex_labels = - rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - cp_starting_vertex_labels->begin()); - - thrust::sort(handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - + cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + cp_starting_vertex_labels->begin()); + + thrust::sort( + handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), cp_starting_vertex_labels->begin(), cp_starting_vertex_labels->end()); @@ -160,6 +160,16 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector> level_result_src_vectors{}; std::vector> level_result_dst_vectors{}; + + rmm::device_uvector level_result_src(0, handle.get_stream()); + rmm::device_uvector level_result_dst(0, handle.get_stream()); + + // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1 + auto num_hops = ((fan_out.size() % num_edge_types) == 0) + ? (fan_out.size() / num_edge_types) + : ((fan_out.size() / num_edge_types) + 1); + + auto level_result_weight_vectors = edge_weight_view ? std::make_optional(std::vector>{}) : std::nullopt; @@ -172,12 +182,26 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels ? std::make_optional(std::vector>{}) : std::nullopt; - level_result_src_vectors.reserve(fan_out.size()); - level_result_dst_vectors.reserve(fan_out.size()); - if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(fan_out.size()); } - if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(fan_out.size()); } - if (level_result_edge_type_vectors) { (*level_result_edge_type_vectors).reserve(fan_out.size()); } - if (level_result_label_vectors) { (*level_result_label_vectors).reserve(fan_out.size()); } + level_result_src_vectors.reserve(num_hops); + level_result_dst_vectors.reserve(num_hops); + + auto level_result_weight = + edge_weight_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_id = + edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_type = + edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_label = + starting_vertex_labels ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + + if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(num_hops); } + if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(num_hops); } + if (level_result_edge_type_vectors) { (*level_result_edge_type_vectors).reserve(num_hops); } + if (level_result_label_vectors) { (*level_result_label_vectors).reserve(num_hops); } rmm::device_uvector frontier_vertices(0, handle.get_stream()); auto frontier_vertex_labels = @@ -198,11 +222,8 @@ neighbor_sample_impl(raft::handle_t const& handle, } std::vector level_sizes{}; + std::vector level_sizes_edge_types{}; - // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1 - auto num_hops = ((fan_out.size() % num_edge_types) == 0) - ? (fan_out.size() / num_edge_types) - : ((fan_out.size() / num_edge_types) + 1); for (auto hop = 0; hop < num_hops; hop++) { for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { @@ -242,22 +263,70 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels); } - level_sizes.push_back(srcs.size()); - level_result_src_vectors.push_back(std::move(srcs)); - level_result_dst_vectors.push_back(std::move(dsts)); + level_sizes_edge_types.push_back(srcs.size()); - if (weights) { (*level_result_weight_vectors).push_back(std::move(*weights)); } - if (edge_ids) { (*level_result_edge_id_vectors).push_back(std::move(*edge_ids)); } - if (edge_types) { (*level_result_edge_type_vectors).push_back(std::move(*edge_types)); } - if (labels) { (*level_result_label_vectors).push_back(std::move(*labels)); } + level_result_src.resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); + level_result_dst.resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); - if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } + raft::copy(level_result_src.begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + srcs.begin(), + srcs.size(), + handle.get_stream()); + + raft::copy(level_result_dst.begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + dsts.begin(), + srcs.size(), + handle.get_stream()); + + if (weights) { + (*level_result_weight).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); + + raft::copy(level_result_weight->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + weights->begin(), + srcs.size(), + handle.get_stream()); + } + + if (edge_ids) { + (*level_result_edge_id).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); + raft::copy(level_result_edge_id->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + edge_ids->begin(), + srcs.size(), + handle.get_stream()); + } + if (edge_types) { + (*level_result_edge_type).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); + + + raft::copy(level_result_edge_type->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + edge_types->begin(), + srcs.size(), + handle.get_stream()); + } + + if (labels) { + (*level_result_label).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); + + raft::copy(level_result_label->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + labels->begin(), + srcs.size(), + handle.get_stream()); + + } - //vector.push_back (raft::device_span(vertex_t const)) //<- // level_result_dst_vectors + if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } } + level_sizes.push_back(level_result_src.size()); + level_result_src_vectors.push_back(std::move(level_result_src)); + level_result_dst_vectors.push_back(std::move(level_result_dst)); + + if (level_result_weight) { (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); } + if (level_result_edge_id) { (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); } + if (level_result_edge_type) { (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); } + if (level_result_label) { (*level_result_label_vectors).push_back(std::move(*level_result_label)); } + - // Call prepare frontier for each hop. FIXME // FIXME: We should modify vertex_partition_range_lasts to return a raft::host_span // rather than making a copy. auto vertex_partition_range_lasts = modified_graph_view.vertex_partition_range_lasts(); @@ -266,11 +335,11 @@ neighbor_sample_impl(raft::handle_t const& handle, handle, starting_vertices, starting_vertex_labels, - raft::device_span{level_result_dst_vectors.back().data(), // define a vector . - level_result_dst_vectors.back().size()}, + raft::device_span{level_result_dst.data(), + level_result_dst.size()}, frontier_vertex_labels ? std::make_optional(raft::device_span( - level_result_label_vectors->back().data(), level_result_label_vectors->back().size())) + level_result_label->data(), level_result_label->size())) : std::nullopt, std::move(vertex_used_as_source), modified_graph_view.local_vertex_partition_view(), @@ -368,13 +437,14 @@ neighbor_sample_impl(raft::handle_t const& handle, if (return_hops) { result_hops = rmm::device_uvector(result_size, handle.get_stream()); output_offset = 0; - for (size_t i = 0; i < fan_out.size(); ++i) { + for (size_t i = 0; i < num_hops; ++i) { // FIXME: replace this by the number of hops scalar_fill( handle, result_hops->data() + output_offset, level_sizes[i], static_cast(i)); output_offset += level_sizes[i]; } } + auto result_labels = level_result_label_vectors ? std::make_optional(rmm::device_uvector(result_size, handle.get_stream())) @@ -396,57 +466,65 @@ neighbor_sample_impl(raft::handle_t const& handle, if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); - } - std::tie(result_srcs, - result_dsts, - result_weights, - result_edge_ids, - result_edge_types, - result_hops, - result_labels, - result_offsets) = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { - // There are missing labels not sampled. - result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); - - // Sort labels - thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); - - thrust::transform(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size() - 1), - result_offsets->begin() + 1, - [result_labels = raft::device_span( - cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - return sampled_label_size; - }); - - // Run inclusive scan - thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); + thrust::copy( + handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); } + + std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, + result_edge_types, result_hops, result_labels, result_offsets) + = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + + if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { + result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); + + // Sort labels + thrust::sort( + handle.get_thrust_policy(), + cp_result_labels->begin(), + cp_result_labels->end()); + + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size() - 1), + result_offsets->begin() + 1, + [ + result_labels = raft::device_span( + cp_result_labels->data(), + cp_result_labels->size()) + ] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + + //return thrust::distance(itr_lower, itr_upper); + return sampled_label_size; + }); + + // Run inclusive scan + thrust::inclusive_scan(handle.get_thrust_policy(), + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); + } + return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), From 580d3e9dd7803df29047e2340d2d576b6412b2d7 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 17:20:30 -0800 Subject: [PATCH 23/42] properly compute the number of hops for heterogeneous renumbering --- cpp/src/c_api/neighbor_sampling.cpp | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 600c2fe1a7..47b1fa4c2a 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1283,8 +1283,10 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { vertex_type_offsets.size()}, edge_label ? (*offsets).size() - 1 : size_t{1}, - hop ? fan_out_->size_ : size_t{1}, - + hop ? (((fan_out_->size_ % num_edge_types_) == 0) + ? (fan_out_->size_ / num_edge_types_) + : ((fan_out_->size_ / num_edge_types_) + 1)) + : size_t{1}, (vertex_type_offsets_ != nullptr) ? vertex_type_offsets_->size_ - 1 : vertex_type_offsets.size() - 1, From 529955a17ba529da6a6469c47119d864bfa44888 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 17:21:45 -0800 Subject: [PATCH 24/42] fix style --- cpp/src/c_api/neighbor_sampling.cpp | 4 +- cpp/src/sampling/neighbor_sampling_impl.hpp | 258 +++++++++++--------- 2 files changed, 138 insertions(+), 124 deletions(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 47b1fa4c2a..1aa39fdedd 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1284,8 +1284,8 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { edge_label ? (*offsets).size() - 1 : size_t{1}, hop ? (((fan_out_->size_ % num_edge_types_) == 0) - ? (fan_out_->size_ / num_edge_types_) - : ((fan_out_->size_ / num_edge_types_) + 1)) + ? (fan_out_->size_ / num_edge_types_) + : ((fan_out_->size_ / num_edge_types_) + 1)) : size_t{1}, (vertex_type_offsets_ != nullptr) ? vertex_type_offsets_->size_ - 1 : vertex_type_offsets.size() - 1, diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 0cd7dbd35a..7356635869 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -30,6 +30,7 @@ #include #include + #include namespace cugraph { @@ -105,26 +106,25 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - + label_t num_unique_labels = 0; std::optional> cp_starting_vertex_labels{std::nullopt}; if (starting_vertex_labels) { // Find the number of unique lables - cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - cp_starting_vertex_labels->begin()); - - thrust::sort( - handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - + cp_starting_vertex_labels = + rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + cp_starting_vertex_labels->begin()); + + thrust::sort(handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), cp_starting_vertex_labels->begin(), cp_starting_vertex_labels->end()); @@ -169,7 +169,6 @@ neighbor_sample_impl(raft::handle_t const& handle, ? (fan_out.size() / num_edge_types) : ((fan_out.size() / num_edge_types) + 1); - auto level_result_weight_vectors = edge_weight_view ? std::make_optional(std::vector>{}) : std::nullopt; @@ -190,13 +189,14 @@ neighbor_sample_impl(raft::handle_t const& handle, : std::nullopt; auto level_result_edge_id = edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_edge_type = edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_label = - starting_vertex_labels ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + starting_vertex_labels + ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(num_hops); } if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(num_hops); } @@ -224,7 +224,6 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector level_sizes{}; std::vector level_sizes_edge_types{}; - for (auto hop = 0; hop < num_hops; hop++) { for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { auto k_level = fan_out[(hop * num_edge_types) + edge_type_id]; @@ -265,53 +264,69 @@ neighbor_sample_impl(raft::handle_t const& handle, level_sizes_edge_types.push_back(srcs.size()); - level_result_src.resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); - level_result_dst.resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); + level_result_src.resize( + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), + handle.get_stream()); + level_result_dst.resize( + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), + handle.get_stream()); - raft::copy(level_result_src.begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + raft::copy(level_result_src.begin() + + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), srcs.begin(), srcs.size(), handle.get_stream()); - - raft::copy(level_result_dst.begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + + raft::copy(level_result_dst.begin() + + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), dsts.begin(), srcs.size(), handle.get_stream()); if (weights) { - (*level_result_weight).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); - - raft::copy(level_result_weight->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - weights->begin(), - srcs.size(), - handle.get_stream()); + (*level_result_weight) + .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), + handle.get_stream()); + + raft::copy(level_result_weight->begin() + + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + weights->begin(), + srcs.size(), + handle.get_stream()); } - + if (edge_ids) { - (*level_result_edge_id).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); - raft::copy(level_result_edge_id->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - edge_ids->begin(), - srcs.size(), - handle.get_stream()); + (*level_result_edge_id) + .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), + handle.get_stream()); + raft::copy(level_result_edge_id->begin() + + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + edge_ids->begin(), + srcs.size(), + handle.get_stream()); } if (edge_types) { - (*level_result_edge_type).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); - - - raft::copy(level_result_edge_type->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - edge_types->begin(), - srcs.size(), - handle.get_stream()); + (*level_result_edge_type) + .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), + handle.get_stream()); + + raft::copy(level_result_edge_type->begin() + + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + edge_types->begin(), + srcs.size(), + handle.get_stream()); } - - if (labels) { - (*level_result_label).resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), handle.get_stream()); - raft::copy(level_result_label->begin() + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - labels->begin(), - srcs.size(), - handle.get_stream()); - + if (labels) { + (*level_result_label) + .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), + handle.get_stream()); + + raft::copy(level_result_label->begin() + + std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + labels->begin(), + srcs.size(), + handle.get_stream()); } if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } @@ -321,11 +336,18 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_src_vectors.push_back(std::move(level_result_src)); level_result_dst_vectors.push_back(std::move(level_result_dst)); - if (level_result_weight) { (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); } - if (level_result_edge_id) { (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); } - if (level_result_edge_type) { (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); } - if (level_result_label) { (*level_result_label_vectors).push_back(std::move(*level_result_label)); } - + if (level_result_weight) { + (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); + } + if (level_result_edge_id) { + (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); + } + if (level_result_edge_type) { + (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); + } + if (level_result_label) { + (*level_result_label_vectors).push_back(std::move(*level_result_label)); + } // FIXME: We should modify vertex_partition_range_lasts to return a raft::host_span // rather than making a copy. @@ -335,12 +357,10 @@ neighbor_sample_impl(raft::handle_t const& handle, handle, starting_vertices, starting_vertex_labels, - raft::device_span{level_result_dst.data(), - level_result_dst.size()}, - frontier_vertex_labels - ? std::make_optional(raft::device_span( - level_result_label->data(), level_result_label->size())) - : std::nullopt, + raft::device_span{level_result_dst.data(), level_result_dst.size()}, + frontier_vertex_labels ? std::make_optional(raft::device_span( + level_result_label->data(), level_result_label->size())) + : std::nullopt, std::move(vertex_used_as_source), modified_graph_view.local_vertex_partition_view(), vertex_partition_range_lasts, @@ -437,14 +457,13 @@ neighbor_sample_impl(raft::handle_t const& handle, if (return_hops) { result_hops = rmm::device_uvector(result_size, handle.get_stream()); output_offset = 0; - for (size_t i = 0; i < num_hops; ++i) { // FIXME: replace this by the number of hops + for (size_t i = 0; i < num_hops; ++i) { // FIXME: replace this by the number of hops scalar_fill( handle, result_hops->data() + output_offset, level_sizes[i], static_cast(i)); output_offset += level_sizes[i]; } } - auto result_labels = level_result_label_vectors ? std::make_optional(rmm::device_uvector(result_size, handle.get_stream())) @@ -466,65 +485,60 @@ neighbor_sample_impl(raft::handle_t const& handle, if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); + thrust::copy(handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); + } + + std::tie(result_srcs, + result_dsts, + result_weights, + result_edge_ids, + result_edge_types, + result_hops, + result_labels, + result_offsets) = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { + result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); + + // Sort labels + thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); + + thrust::transform(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size() - 1), + result_offsets->begin() + 1, + [result_labels = raft::device_span( + cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + + // return thrust::distance(itr_lower, itr_upper); + return sampled_label_size; + }); + + // Run inclusive scan + thrust::inclusive_scan(handle.get_thrust_policy(), + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); } - std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, - result_edge_types, result_hops, result_labels, result_offsets) - = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - - if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { - result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); - - // Sort labels - thrust::sort( - handle.get_thrust_policy(), - cp_result_labels->begin(), - cp_result_labels->end()); - - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size() - 1), - result_offsets->begin() + 1, - [ - result_labels = raft::device_span( - cp_result_labels->data(), - cp_result_labels->size()) - ] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - - //return thrust::distance(itr_lower, itr_upper); - return sampled_label_size; - }); - - // Run inclusive scan - thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); - } - return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), From bf172518b8edf230ea3ab251db2fca166ab9026e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 19:21:06 -0800 Subject: [PATCH 25/42] update docstrings --- cpp/src/c_api/neighbor_sampling.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 1aa39fdedd..64f997a972 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -1290,7 +1290,7 @@ struct neighbor_sampling_functor : public cugraph::c_api::abstract_functor { (vertex_type_offsets_ != nullptr) ? vertex_type_offsets_->size_ - 1 : vertex_type_offsets.size() - 1, - // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided + // num_vertex_type is by default 1 if 'vertex_type_offsets' is not provided. num_edge_types_, src_is_major, do_expensive_check_); From 88b35baa276429e38a6c211ee531f387117f97f6 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 20:48:43 -0800 Subject: [PATCH 26/42] simplify code and re-order statements --- cpp/src/sampling/neighbor_sampling_impl.hpp | 262 +++++++++----------- 1 file changed, 122 insertions(+), 140 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 882cff4b45..c097bc68d5 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -106,28 +106,30 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - + label_t num_unique_labels = 0; - std::optional> cp_starting_vertex_labels{std::nullopt}; - if (starting_vertex_labels) { // Find the number of unique lables - cp_starting_vertex_labels = - rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - cp_starting_vertex_labels->begin()); - - thrust::sort(handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - + std::optional> cp_starting_vertex_labels{std::nullopt}; + cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); + + thrust::copy( + handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + cp_starting_vertex_labels->begin()); + + thrust::sort( + handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), cp_starting_vertex_labels->begin(), cp_starting_vertex_labels->end()); + + } if (num_edge_types > 1) { @@ -169,6 +171,7 @@ neighbor_sample_impl(raft::handle_t const& handle, ? (fan_out.size() / num_edge_types) : ((fan_out.size() / num_edge_types) + 1); + auto level_result_weight_vectors = edge_weight_view ? std::make_optional(std::vector>{}) : std::nullopt; @@ -189,14 +192,13 @@ neighbor_sample_impl(raft::handle_t const& handle, : std::nullopt; auto level_result_edge_id = edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_edge_type = edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_label = - starting_vertex_labels - ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + starting_vertex_labels ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(num_hops); } if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(num_hops); } @@ -222,9 +224,8 @@ neighbor_sample_impl(raft::handle_t const& handle, } std::vector level_sizes{}; - std::vector level_sizes_edge_types{}; - for (size_t hop = 0; hop < num_hops; ++hop) { + for (auto hop = 0; hop < num_hops; hop++) { for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { auto k_level = fan_out[(hop * num_edge_types) + edge_type_id]; rmm::device_uvector srcs(0, handle.get_stream()); @@ -262,71 +263,56 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels); } - level_sizes_edge_types.push_back(srcs.size()); + auto old_size = level_result_src.size(); + level_result_src.resize(old_size + srcs.size(), handle.get_stream()); + level_result_dst.resize(old_size + srcs.size(), handle.get_stream()); - level_result_src.resize( - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), - handle.get_stream()); - level_result_dst.resize( - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), - handle.get_stream()); - raft::copy(level_result_src.begin() + - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + raft::copy(level_result_src.begin() + old_size, srcs.begin(), srcs.size(), handle.get_stream()); - - raft::copy(level_result_dst.begin() + - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), + + raft::copy(level_result_dst.begin() + old_size, dsts.begin(), srcs.size(), handle.get_stream()); if (weights) { - (*level_result_weight) - .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), - handle.get_stream()); - - raft::copy(level_result_weight->begin() + - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - weights->begin(), - srcs.size(), - handle.get_stream()); - } + (*level_result_weight).resize(old_size + srcs.size(), handle.get_stream()); + raft::copy(level_result_weight->begin() + old_size, + weights->begin(), + srcs.size(), + handle.get_stream()); + } + + + if (edge_ids) { - (*level_result_edge_id) - .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), - handle.get_stream()); - raft::copy(level_result_edge_id->begin() + - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - edge_ids->begin(), - srcs.size(), - handle.get_stream()); + (*level_result_edge_id).resize(old_size + srcs.size(), handle.get_stream()); + raft::copy(level_result_edge_id->begin() + old_size, + edge_ids->begin(), + srcs.size(), + handle.get_stream()); } if (edge_types) { - (*level_result_edge_type) - .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), - handle.get_stream()); - - raft::copy(level_result_edge_type->begin() + - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - edge_types->begin(), - srcs.size(), - handle.get_stream()); - } + (*level_result_edge_type).resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy(level_result_edge_type->begin() + old_size, + edge_types->begin(), + srcs.size(), + handle.get_stream()); + } + if (labels) { - (*level_result_label) - .resize(std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end()), - handle.get_stream()); - - raft::copy(level_result_label->begin() + - std::reduce(level_sizes_edge_types.begin(), level_sizes_edge_types.end() - 1), - labels->begin(), - srcs.size(), - handle.get_stream()); + (*level_result_label).resize(old_size + srcs.size(), handle.get_stream()); + + raft::copy(level_result_label->begin() + old_size, + labels->begin(), + srcs.size(), + handle.get_stream()); } if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } @@ -336,18 +322,10 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_src_vectors.push_back(std::move(level_result_src)); level_result_dst_vectors.push_back(std::move(level_result_dst)); - if (level_result_weight) { - (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); - } - if (level_result_edge_id) { - (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); - } - if (level_result_edge_type) { - (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); - } - if (level_result_label) { - (*level_result_label_vectors).push_back(std::move(*level_result_label)); - } + if (level_result_weight) { (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); } + if (level_result_edge_id) { (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); } + if (level_result_edge_type) { (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); } + if (level_result_label) { (*level_result_label_vectors).push_back(std::move(*level_result_label)); } // FIXME: We should modify vertex_partition_range_lasts to return a raft::host_span // rather than making a copy. @@ -357,10 +335,12 @@ neighbor_sample_impl(raft::handle_t const& handle, handle, starting_vertices, starting_vertex_labels, - raft::device_span{level_result_dst.data(), level_result_dst.size()}, - frontier_vertex_labels ? std::make_optional(raft::device_span( - level_result_label->data(), level_result_label->size())) - : std::nullopt, + raft::device_span{level_result_dst_vectors.back().data(), + level_result_dst_vectors.back().size()}, + frontier_vertex_labels + ? std::make_optional(raft::device_span( + level_result_label->data(), level_result_label->size())) + : std::nullopt, std::move(vertex_used_as_source), modified_graph_view.local_vertex_partition_view(), vertex_partition_range_lasts, @@ -457,7 +437,7 @@ neighbor_sample_impl(raft::handle_t const& handle, if (return_hops) { result_hops = rmm::device_uvector(result_size, handle.get_stream()); output_offset = 0; - for (size_t i = 0; i < num_hops; ++i) { // FIXME: replace this by the number of hops + for (size_t i = 0; i < num_hops; ++i) { scalar_fill( handle, result_hops->data() + output_offset, level_sizes[i], static_cast(i)); output_offset += level_sizes[i]; @@ -485,60 +465,62 @@ neighbor_sample_impl(raft::handle_t const& handle, if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); - } - - std::tie(result_srcs, - result_dsts, - result_weights, - result_edge_ids, - result_edge_types, - result_hops, - result_labels, - result_offsets) = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { - result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); - - // Sort labels - thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); - - thrust::transform(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size() - 1), - result_offsets->begin() + 1, - [result_labels = raft::device_span( - cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - - // return thrust::distance(itr_lower, itr_upper); - return sampled_label_size; - }); - - // Run inclusive scan - thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); + thrust::copy( + handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); } + std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, + result_edge_types, result_hops, result_labels, result_offsets) + = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { + result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); + + // Sort labels + thrust::sort( + handle.get_thrust_policy(), + cp_result_labels->begin(), + cp_result_labels->end()); + + thrust::transform( + handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size() - 1), + result_offsets->begin() + 1, + [ + result_labels = raft::device_span( + cp_result_labels->data(), + cp_result_labels->size()) + ] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + + return sampled_label_size; + }); + + // Run inclusive scan + thrust::inclusive_scan(handle.get_thrust_policy(), + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); + } + return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), From f8c576a38db64657e95eaf178479a8937c1f8b8f Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 26 Nov 2024 20:49:46 -0800 Subject: [PATCH 27/42] fix style --- cpp/src/sampling/neighbor_sampling_impl.hpp | 211 ++++++++++---------- 1 file changed, 102 insertions(+), 109 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index c097bc68d5..cf3cb3d0bb 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -106,30 +106,27 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_masks_vector{}; graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - + label_t num_unique_labels = 0; if (starting_vertex_labels) { // Find the number of unique lables std::optional> cp_starting_vertex_labels{std::nullopt}; - cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); - - thrust::copy( - handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - cp_starting_vertex_labels->begin()); - - thrust::sort( - handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - + cp_starting_vertex_labels = + rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + cp_starting_vertex_labels->begin()); + + thrust::sort(handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), cp_starting_vertex_labels->begin(), cp_starting_vertex_labels->end()); - - } if (num_edge_types > 1) { @@ -171,7 +168,6 @@ neighbor_sample_impl(raft::handle_t const& handle, ? (fan_out.size() / num_edge_types) : ((fan_out.size() / num_edge_types) + 1); - auto level_result_weight_vectors = edge_weight_view ? std::make_optional(std::vector>{}) : std::nullopt; @@ -192,13 +188,14 @@ neighbor_sample_impl(raft::handle_t const& handle, : std::nullopt; auto level_result_edge_id = edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_edge_type = edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_label = - starting_vertex_labels ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + starting_vertex_labels + ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(num_hops); } if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(num_hops); } @@ -267,52 +264,44 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_src.resize(old_size + srcs.size(), handle.get_stream()); level_result_dst.resize(old_size + srcs.size(), handle.get_stream()); + raft::copy( + level_result_src.begin() + old_size, srcs.begin(), srcs.size(), handle.get_stream()); - raft::copy(level_result_src.begin() + old_size, - srcs.begin(), - srcs.size(), - handle.get_stream()); - - raft::copy(level_result_dst.begin() + old_size, - dsts.begin(), - srcs.size(), - handle.get_stream()); + raft::copy( + level_result_dst.begin() + old_size, dsts.begin(), srcs.size(), handle.get_stream()); if (weights) { (*level_result_weight).resize(old_size + srcs.size(), handle.get_stream()); raft::copy(level_result_weight->begin() + old_size, - weights->begin(), - srcs.size(), - handle.get_stream()); + weights->begin(), + srcs.size(), + handle.get_stream()); } - - - + if (edge_ids) { (*level_result_edge_id).resize(old_size + srcs.size(), handle.get_stream()); raft::copy(level_result_edge_id->begin() + old_size, - edge_ids->begin(), - srcs.size(), - handle.get_stream()); + edge_ids->begin(), + srcs.size(), + handle.get_stream()); } if (edge_types) { (*level_result_edge_type).resize(old_size + srcs.size(), handle.get_stream()); - raft::copy(level_result_edge_type->begin() + old_size, - edge_types->begin(), - srcs.size(), - handle.get_stream()); + edge_types->begin(), + srcs.size(), + handle.get_stream()); } - + if (labels) { (*level_result_label).resize(old_size + srcs.size(), handle.get_stream()); raft::copy(level_result_label->begin() + old_size, - labels->begin(), - srcs.size(), - handle.get_stream()); + labels->begin(), + srcs.size(), + handle.get_stream()); } if (num_edge_types > 1) { modified_graph_view.clear_edge_mask(); } @@ -322,10 +311,18 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_src_vectors.push_back(std::move(level_result_src)); level_result_dst_vectors.push_back(std::move(level_result_dst)); - if (level_result_weight) { (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); } - if (level_result_edge_id) { (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); } - if (level_result_edge_type) { (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); } - if (level_result_label) { (*level_result_label_vectors).push_back(std::move(*level_result_label)); } + if (level_result_weight) { + (*level_result_weight_vectors).push_back(std::move(*level_result_weight)); + } + if (level_result_edge_id) { + (*level_result_edge_id_vectors).push_back(std::move(*level_result_edge_id)); + } + if (level_result_edge_type) { + (*level_result_edge_type_vectors).push_back(std::move(*level_result_edge_type)); + } + if (level_result_label) { + (*level_result_label_vectors).push_back(std::move(*level_result_label)); + } // FIXME: We should modify vertex_partition_range_lasts to return a raft::host_span // rather than making a copy. @@ -337,10 +334,9 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, - frontier_vertex_labels - ? std::make_optional(raft::device_span( - level_result_label->data(), level_result_label->size())) - : std::nullopt, + frontier_vertex_labels ? std::make_optional(raft::device_span( + level_result_label->data(), level_result_label->size())) + : std::nullopt, std::move(vertex_used_as_source), modified_graph_view.local_vertex_partition_view(), vertex_partition_range_lasts, @@ -465,62 +461,59 @@ neighbor_sample_impl(raft::handle_t const& handle, if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - thrust::copy( - handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); + thrust::copy(handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end(), + cp_result_labels->begin()); + } + + std::tie(result_srcs, + result_dsts, + result_weights, + result_edge_ids, + result_edge_types, + result_hops, + result_labels, + result_offsets) = detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); + + if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { + result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); + + // Sort labels + thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); + + thrust::transform(handle.get_thrust_policy(), + thrust::make_counting_iterator(0), + thrust::make_counting_iterator(result_offsets->size() - 1), + result_offsets->begin() + 1, + [result_labels = raft::device_span( + cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { + auto itr_lower = thrust::lower_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto itr_upper = thrust::upper_bound( + thrust::seq, result_labels.begin(), result_labels.end(), idx); + + auto sampled_label_size = thrust::distance(itr_lower, itr_upper); + + return sampled_label_size; + }); + + // Run inclusive scan + thrust::inclusive_scan(handle.get_thrust_policy(), + result_offsets->begin() + 1, + result_offsets->end(), + result_offsets->begin() + 1); } - std::tie(result_srcs, result_dsts, result_weights, result_edge_ids, - result_edge_types, result_hops, result_labels, result_offsets) - = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { - result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); - - // Sort labels - thrust::sort( - handle.get_thrust_policy(), - cp_result_labels->begin(), - cp_result_labels->end()); - - thrust::transform( - handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size() - 1), - result_offsets->begin() + 1, - [ - result_labels = raft::device_span( - cp_result_labels->data(), - cp_result_labels->size()) - ] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - - return sampled_label_size; - }); - - // Run inclusive scan - thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); - } - return std::make_tuple(std::move(result_srcs), std::move(result_dsts), std::move(result_weights), From 9648d6992156b8f5fd5ec769a157a385c806ea3e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Wed, 27 Nov 2024 09:02:16 -0800 Subject: [PATCH 28/42] update docstrings --- cpp/src/sampling/neighbor_sampling_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index cf3cb3d0bb..dd3ae5f1d3 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -163,7 +163,7 @@ neighbor_sample_impl(raft::handle_t const& handle, rmm::device_uvector level_result_src(0, handle.get_stream()); rmm::device_uvector level_result_dst(0, handle.get_stream()); - // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1 + // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1. auto num_hops = ((fan_out.size() % num_edge_types) == 0) ? (fan_out.size() / num_edge_types) : ((fan_out.size() / num_edge_types) + 1); From 9d84558ed393e74c430e0379ab064b00dd188c16 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 30 Nov 2024 09:00:03 -0800 Subject: [PATCH 29/42] fix bug when creating struct --- cpp/src/c_api/neighbor_sampling.cpp | 2 ++ cpp/src/sampling/neighbor_sampling_impl.hpp | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/c_api/neighbor_sampling.cpp b/cpp/src/c_api/neighbor_sampling.cpp index 64f997a972..37982eab82 100644 --- a/cpp/src/c_api/neighbor_sampling.cpp +++ b/cpp/src/c_api/neighbor_sampling.cpp @@ -404,6 +404,7 @@ struct uniform_neighbor_sampling_functor : public cugraph::c_api::abstract_funct (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, @@ -757,6 +758,7 @@ struct biased_neighbor_sampling_functor : public cugraph::c_api::abstract_functo (label_hop_offsets) ? new cugraph::c_api::cugraph_type_erased_device_array_t(*label_hop_offsets, SIZE_T) : nullptr, + nullptr, (edge_label) ? new cugraph::c_api::cugraph_type_erased_device_array_t(edge_label.value(), INT32) : nullptr, diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index dd3ae5f1d3..b0e78ec7db 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -335,7 +335,7 @@ neighbor_sample_impl(raft::handle_t const& handle, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, frontier_vertex_labels ? std::make_optional(raft::device_span( - level_result_label->data(), level_result_label->size())) + level_result_label_vectors->back().data(), level_result_label_vectors->back().size())) : std::nullopt, std::move(vertex_used_as_source), modified_graph_view.local_vertex_partition_view(), From a7a224cb2a59272014b938517a37bf75a090924a Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 30 Nov 2024 09:14:08 -0800 Subject: [PATCH 30/42] fix style --- cpp/src/sampling/neighbor_sampling_impl.hpp | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index b0e78ec7db..2df150fb76 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -334,9 +334,10 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, - frontier_vertex_labels ? std::make_optional(raft::device_span( - level_result_label_vectors->back().data(), level_result_label_vectors->back().size())) - : std::nullopt, + frontier_vertex_labels + ? std::make_optional(raft::device_span( + level_result_label_vectors->back().data(), level_result_label_vectors->back().size())) + : std::nullopt, std::move(vertex_used_as_source), modified_graph_view.local_vertex_partition_view(), vertex_partition_range_lasts, From d64dc665ef1bd1a09d08e031860fa4cbfa8b9da5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sat, 30 Nov 2024 16:25:22 -0800 Subject: [PATCH 31/42] add missing arguments --- .../pylibcugraph/_cugraph_c/sampling_algorithms.pxd | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd index 762fd37a35..f496cc7d88 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/sampling_algorithms.pxd @@ -73,6 +73,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": cugraph_graph_t* graph, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_tyoe_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, @@ -88,6 +89,7 @@ cdef extern from "cugraph_c/sampling_algorithms.h": const cugraph_edge_property_view_t* edge_biases, const cugraph_type_erased_device_array_view_t* start_vertices, const cugraph_type_erased_device_array_view_t* starting_vertex_label_offsets, + const cugraph_type_erased_device_array_view_t* vertex_tyoe_offsets, const cugraph_type_erased_host_array_view_t* fan_out, int num_edge_types, const cugraph_sampling_options_t* options, From bcfc99cdbf6769662f0633ec9ce71c7a005cb2c3 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 1 Dec 2024 14:00:50 -0800 Subject: [PATCH 32/42] update label list if some are missing from the result --- cpp/src/sampling/neighbor_sampling_impl.hpp | 19 ++++++++++++++++--- .../tests/sampling/test_bulk_sampler.py | 10 +++++++--- .../tests/sampling/test_bulk_sampler_mg.py | 10 +++++++--- 3 files changed, 30 insertions(+), 9 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 2df150fb76..11a792fe03 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -109,9 +109,10 @@ neighbor_sample_impl(raft::handle_t const& handle, label_t num_unique_labels = 0; + std::optional> cp_starting_vertex_labels{std::nullopt}; + if (starting_vertex_labels) { // Find the number of unique lables - std::optional> cp_starting_vertex_labels{std::nullopt}; cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); @@ -486,8 +487,19 @@ neighbor_sample_impl(raft::handle_t const& handle, label_to_output_comm_rank); if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { - result_offsets = rmm::device_uvector(num_unique_labels + 1, handle.get_stream()); - + // If there are missing labels, still inlude it in the result_labels + result_labels = std::move(*cp_starting_vertex_labels); + auto unique_labels_end = + thrust::unique(handle.get_thrust_policy(), + result_labels->begin(), + result_labels->end()); + + auto num_unique_labels = thrust::distance( + result_labels->begin(), unique_labels_end); + + result_labels->resize(num_unique_labels, handle.get_stream()); + + result_offsets->resize(num_unique_labels + 1, handle.get_stream()); // Sort labels thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); @@ -507,6 +519,7 @@ neighbor_sample_impl(raft::handle_t const& handle, return sampled_label_size; }); + // Run inclusive scan thrust::inclusive_scan(handle.get_thrust_policy(), diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py index 3c5d642800..765c6ef893 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py @@ -278,7 +278,11 @@ def test_bulk_sampler_empty_batches(scratch_dir): assert len(os.listdir(samples_path)) == 1 - df = cudf.read_parquet(os.path.join(samples_path, "batch=0-1.parquet")) + # There are 3 batches [0, 1, 2] where batch 1 has no results. In fact, seeds + # [7, 8, 9] have no outgoing edges. The previous implementation returned and + # offsets array omitting seeds with no outgoing edges from the + # edge_label_offsets which is no longer the case + df = cudf.read_parquet(os.path.join(samples_path, "batch=0-2.parquet")) assert df[ (df.batch_id == 0) & (df.hop_id == 0) @@ -289,12 +293,12 @@ def test_bulk_sampler_empty_batches(scratch_dir): ].destinations.sort_values().values_host.tolist() == [2, 3, 7, 8] assert df[ - (df.batch_id == 1) & (df.hop_id == 0) + (df.batch_id == 2) & (df.hop_id == 0) ].destinations.sort_values().values_host.tolist() == [7, 8] assert len(df[(df.batch_id == 1) & (df.hop_id == 1)]) == 0 - assert df.batch_id.max() == 1 + assert df.batch_id.max() == 2 shutil.rmtree(samples_path) diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py index 3fddb8f405..77db37d4b9 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py @@ -228,7 +228,11 @@ def test_bulk_sampler_empty_batches(dask_client, scratch_dir): assert len(os.listdir(samples_path)) == 1 - df = cudf.read_parquet(os.path.join(samples_path, "batch=0-1.parquet")) + # There are 3 batches [0, 1, 2] where batch 1 has no results. In fact, seeds + # [7, 8, 9] have no outgoing edges. The previous implementation returned and + # offsets array omitting seeds with no outgoing edges from the + # edge_label_offsets which is no longer the case + df = cudf.read_parquet(os.path.join(samples_path, "batch=0-2.parquet")) assert df[ (df.batch_id == 0) & (df.hop_id == 0) @@ -239,12 +243,12 @@ def test_bulk_sampler_empty_batches(dask_client, scratch_dir): ].destinations.sort_values().values_host.tolist() == [2, 3, 7, 8] assert df[ - (df.batch_id == 1) & (df.hop_id == 0) + (df.batch_id == 2) & (df.hop_id == 0) ].destinations.sort_values().values_host.tolist() == [7, 8] assert len(df[(df.batch_id == 1) & (df.hop_id == 1)]) == 0 - assert df.batch_id.max() == 1 + assert df.batch_id.max() == 2 shutil.rmtree(samples_path) From 19c37a810023f139567b819862530d6d0b35102e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Sun, 1 Dec 2024 14:01:49 -0800 Subject: [PATCH 33/42] fix style --- cpp/src/sampling/neighbor_sampling_impl.hpp | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 11a792fe03..d666d2dd28 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -490,13 +490,10 @@ neighbor_sample_impl(raft::handle_t const& handle, // If there are missing labels, still inlude it in the result_labels result_labels = std::move(*cp_starting_vertex_labels); auto unique_labels_end = - thrust::unique(handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end()); - - auto num_unique_labels = thrust::distance( - result_labels->begin(), unique_labels_end); - + thrust::unique(handle.get_thrust_policy(), result_labels->begin(), result_labels->end()); + + auto num_unique_labels = thrust::distance(result_labels->begin(), unique_labels_end); + result_labels->resize(num_unique_labels, handle.get_stream()); result_offsets->resize(num_unique_labels + 1, handle.get_stream()); @@ -519,7 +516,6 @@ neighbor_sample_impl(raft::handle_t const& handle, return sampled_label_size; }); - // Run inclusive scan thrust::inclusive_scan(handle.get_thrust_policy(), From 3a0bf574bfdee1f325b3528e31085f3de600a741 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Mon, 2 Dec 2024 14:29:00 -0800 Subject: [PATCH 34/42] properly handle the case where hop is 0 --- cpp/src/sampling/neighbor_sampling_impl.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index d666d2dd28..50732497c3 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -250,7 +250,7 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertex_labels, static_cast(k_level), with_replacement); - } else { + } else if (k_level < 0) { std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = gather_one_hop_edgelist(handle, modified_graph_view, From 08a37cfbfafb1783695703bbd47641912276d472 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 08:50:31 -0800 Subject: [PATCH 35/42] reduce variable score and reorder instructions --- cpp/src/sampling/neighbor_sampling_impl.hpp | 119 +++++++++++--------- 1 file changed, 63 insertions(+), 56 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 50732497c3..01f1517142 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -107,29 +107,6 @@ neighbor_sample_impl(raft::handle_t const& handle, graph_view_t modified_graph_view = graph_view; edge_masks_vector.reserve(num_edge_types); - label_t num_unique_labels = 0; - - std::optional> cp_starting_vertex_labels{std::nullopt}; - - if (starting_vertex_labels) { - // Find the number of unique lables - cp_starting_vertex_labels = - rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - cp_starting_vertex_labels->begin()); - - thrust::sort(handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - - num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - } - if (num_edge_types > 1) { for (int i = 0; i < num_edge_types; i++) { cugraph::edge_property_t, bool> @@ -161,9 +138,6 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector> level_result_src_vectors{}; std::vector> level_result_dst_vectors{}; - rmm::device_uvector level_result_src(0, handle.get_stream()); - rmm::device_uvector level_result_dst(0, handle.get_stream()); - // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1. auto num_hops = ((fan_out.size() % num_edge_types) == 0) ? (fan_out.size() / num_edge_types) @@ -184,30 +158,26 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_src_vectors.reserve(num_hops); level_result_dst_vectors.reserve(num_hops); - auto level_result_weight = - edge_weight_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; - auto level_result_edge_id = - edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; - auto level_result_edge_type = - edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; - auto level_result_label = - starting_vertex_labels - ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; - if (level_result_weight_vectors) { (*level_result_weight_vectors).reserve(num_hops); } if (level_result_edge_id_vectors) { (*level_result_edge_id_vectors).reserve(num_hops); } if (level_result_edge_type_vectors) { (*level_result_edge_type_vectors).reserve(num_hops); } if (level_result_label_vectors) { (*level_result_label_vectors).reserve(num_hops); } rmm::device_uvector frontier_vertices(0, handle.get_stream()); + auto frontier_vertex_labels = starting_vertex_labels ? std::make_optional(rmm::device_uvector{0, handle.get_stream()}) : std::nullopt; + + if (starting_vertex_labels) { + frontier_vertex_labels->resize(starting_vertex_labels->size(), handle.get_stream()); + + thrust::copy(handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + frontier_vertex_labels->begin()); + } std::optional< std::tuple, std::optional>>> @@ -224,6 +194,24 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector level_sizes{}; for (auto hop = 0; hop < num_hops; hop++) { + + rmm::device_uvector level_result_src(0, handle.get_stream()); + rmm::device_uvector level_result_dst(0, handle.get_stream()); + + auto level_result_weight = + edge_weight_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_id = + edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_edge_type = + edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + auto level_result_label = + starting_vertex_labels + ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; + for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { auto k_level = fan_out[(hop * num_edge_types) + edge_type_id]; rmm::device_uvector srcs(0, handle.get_stream()); @@ -332,7 +320,10 @@ neighbor_sample_impl(raft::handle_t const& handle, prepare_next_frontier( handle, starting_vertices, - starting_vertex_labels, + frontier_vertex_labels + ? std::make_optional(raft::device_span( + frontier_vertex_labels->data(), frontier_vertex_labels->size())) + : std::nullopt, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, frontier_vertex_labels @@ -348,11 +339,6 @@ neighbor_sample_impl(raft::handle_t const& handle, starting_vertices = raft::device_span(frontier_vertices.data(), frontier_vertices.size()); - - if (frontier_vertex_labels) { - starting_vertex_labels = raft::device_span(frontier_vertex_labels->data(), - frontier_vertex_labels->size()); - } } auto result_size = std::reduce(level_sizes.begin(), level_sizes.end()); @@ -458,7 +444,7 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_label_vectors = std::nullopt; } - std::optional> result_offsets{std::nullopt}; + std::optional> result_label_offsets{std::nullopt}; std::optional> cp_result_labels{std::nullopt}; if (result_labels) { cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); @@ -476,7 +462,7 @@ neighbor_sample_impl(raft::handle_t const& handle, result_edge_types, result_hops, result_labels, - result_offsets) = detail::shuffle_and_organize_output(handle, + result_label_offsets) = detail::shuffle_and_organize_output(handle, std::move(result_srcs), std::move(result_dsts), std::move(result_weights), @@ -486,7 +472,28 @@ neighbor_sample_impl(raft::handle_t const& handle, std::move(result_labels), label_to_output_comm_rank); - if (result_labels && (result_offsets->size() != num_unique_labels + 1)) { + label_t num_unique_labels = 0; + + std::optional> cp_starting_vertex_labels{std::nullopt}; + + if (starting_vertex_labels) { + // Find the number of unique labels + cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); + thrust::copy(handle.get_thrust_policy(), + starting_vertex_labels->begin(), + starting_vertex_labels->end(), + cp_starting_vertex_labels->begin()); + + thrust::sort(handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + + num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), + cp_starting_vertex_labels->begin(), + cp_starting_vertex_labels->end()); + } + + if (result_labels && (result_label_offsets->size() != num_unique_labels + 1)) { // If there are missing labels, still inlude it in the result_labels result_labels = std::move(*cp_starting_vertex_labels); auto unique_labels_end = @@ -496,14 +503,14 @@ neighbor_sample_impl(raft::handle_t const& handle, result_labels->resize(num_unique_labels, handle.get_stream()); - result_offsets->resize(num_unique_labels + 1, handle.get_stream()); + result_label_offsets->resize(num_unique_labels + 1, handle.get_stream()); // Sort labels thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); thrust::transform(handle.get_thrust_policy(), thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_offsets->size() - 1), - result_offsets->begin() + 1, + thrust::make_counting_iterator(result_label_offsets->size() - 1), + result_label_offsets->begin() + 1, [result_labels = raft::device_span( cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { auto itr_lower = thrust::lower_bound( @@ -519,9 +526,9 @@ neighbor_sample_impl(raft::handle_t const& handle, // Run inclusive scan thrust::inclusive_scan(handle.get_thrust_policy(), - result_offsets->begin() + 1, - result_offsets->end(), - result_offsets->begin() + 1); + result_label_offsets->begin() + 1, + result_label_offsets->end(), + result_label_offsets->begin() + 1); } return std::make_tuple(std::move(result_srcs), @@ -531,7 +538,7 @@ neighbor_sample_impl(raft::handle_t const& handle, std::move(result_edge_types), std::move(result_hops), std::move(result_labels), - std::move(result_offsets)); + std::move(result_label_offsets)); } } // namespace detail From f23df9a8949ff52529bb0020cd4d80a461a16759 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 12:29:20 -0800 Subject: [PATCH 36/42] remove unnecessary code --- cpp/src/sampling/neighbor_sampling_impl.hpp | 93 ++------------------- 1 file changed, 9 insertions(+), 84 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 01f1517142..601d1bbcd3 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -455,90 +455,15 @@ neighbor_sample_impl(raft::handle_t const& handle, cp_result_labels->begin()); } - std::tie(result_srcs, - result_dsts, - result_weights, - result_edge_ids, - result_edge_types, - result_hops, - result_labels, - result_label_offsets) = detail::shuffle_and_organize_output(handle, - std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - label_to_output_comm_rank); - - label_t num_unique_labels = 0; - - std::optional> cp_starting_vertex_labels{std::nullopt}; - - if (starting_vertex_labels) { - // Find the number of unique labels - cp_starting_vertex_labels = rmm::device_uvector(starting_vertex_labels->size(), handle.get_stream()); - thrust::copy(handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - cp_starting_vertex_labels->begin()); - - thrust::sort(handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - - num_unique_labels = thrust::unique_count(handle.get_thrust_policy(), - cp_starting_vertex_labels->begin(), - cp_starting_vertex_labels->end()); - } - - if (result_labels && (result_label_offsets->size() != num_unique_labels + 1)) { - // If there are missing labels, still inlude it in the result_labels - result_labels = std::move(*cp_starting_vertex_labels); - auto unique_labels_end = - thrust::unique(handle.get_thrust_policy(), result_labels->begin(), result_labels->end()); - - auto num_unique_labels = thrust::distance(result_labels->begin(), unique_labels_end); - - result_labels->resize(num_unique_labels, handle.get_stream()); - - result_label_offsets->resize(num_unique_labels + 1, handle.get_stream()); - // Sort labels - thrust::sort(handle.get_thrust_policy(), cp_result_labels->begin(), cp_result_labels->end()); - - thrust::transform(handle.get_thrust_policy(), - thrust::make_counting_iterator(0), - thrust::make_counting_iterator(result_label_offsets->size() - 1), - result_label_offsets->begin() + 1, - [result_labels = raft::device_span( - cp_result_labels->data(), cp_result_labels->size())] __device__(auto idx) { - auto itr_lower = thrust::lower_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto itr_upper = thrust::upper_bound( - thrust::seq, result_labels.begin(), result_labels.end(), idx); - - auto sampled_label_size = thrust::distance(itr_lower, itr_upper); - - return sampled_label_size; - }); - - // Run inclusive scan - thrust::inclusive_scan(handle.get_thrust_policy(), - result_label_offsets->begin() + 1, - result_label_offsets->end(), - result_label_offsets->begin() + 1); - } - - return std::make_tuple(std::move(result_srcs), - std::move(result_dsts), - std::move(result_weights), - std::move(result_edge_ids), - std::move(result_edge_types), - std::move(result_hops), - std::move(result_labels), - std::move(result_label_offsets)); + return detail::shuffle_and_organize_output(handle, + std::move(result_srcs), + std::move(result_dsts), + std::move(result_weights), + std::move(result_edge_ids), + std::move(result_edge_types), + std::move(result_hops), + std::move(result_labels), + label_to_output_comm_rank); } } // namespace detail From 279755965a0e7177352fae7add39780625237069 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 12:30:09 -0800 Subject: [PATCH 37/42] fix style --- cpp/src/sampling/neighbor_sampling_impl.hpp | 22 ++++++++++----------- 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 601d1bbcd3..f4df1b4b9a 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -164,12 +164,12 @@ neighbor_sample_impl(raft::handle_t const& handle, if (level_result_label_vectors) { (*level_result_label_vectors).reserve(num_hops); } rmm::device_uvector frontier_vertices(0, handle.get_stream()); - + auto frontier_vertex_labels = starting_vertex_labels ? std::make_optional(rmm::device_uvector{0, handle.get_stream()}) : std::nullopt; - + if (starting_vertex_labels) { frontier_vertex_labels->resize(starting_vertex_labels->size(), handle.get_stream()); @@ -194,24 +194,23 @@ neighbor_sample_impl(raft::handle_t const& handle, std::vector level_sizes{}; for (auto hop = 0; hop < num_hops; hop++) { - rmm::device_uvector level_result_src(0, handle.get_stream()); rmm::device_uvector level_result_dst(0, handle.get_stream()); auto level_result_weight = - edge_weight_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + edge_weight_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) + : std::nullopt; auto level_result_edge_id = edge_id_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_edge_type = edge_type_view ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) - : std::nullopt; + : std::nullopt; auto level_result_label = starting_vertex_labels ? std::make_optional(rmm::device_uvector(0, handle.get_stream())) : std::nullopt; - + for (auto edge_type_id = 0; edge_type_id < num_edge_types; edge_type_id++) { auto k_level = fan_out[(hop * num_edge_types) + edge_type_id]; rmm::device_uvector srcs(0, handle.get_stream()); @@ -320,10 +319,9 @@ neighbor_sample_impl(raft::handle_t const& handle, prepare_next_frontier( handle, starting_vertices, - frontier_vertex_labels - ? std::make_optional(raft::device_span( - frontier_vertex_labels->data(), frontier_vertex_labels->size())) - : std::nullopt, + frontier_vertex_labels ? std::make_optional(raft::device_span( + frontier_vertex_labels->data(), frontier_vertex_labels->size())) + : std::nullopt, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, frontier_vertex_labels From 1c70abc360d8b1930ba6ab869175fcdbb45c4d3e Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 12:46:37 -0800 Subject: [PATCH 38/42] undo changes --- .../cugraph/tests/sampling/test_bulk_sampler.py | 12 ++++-------- .../cugraph/tests/sampling/test_bulk_sampler_mg.py | 10 +++------- 2 files changed, 7 insertions(+), 15 deletions(-) diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py index 765c6ef893..65bcce7877 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py @@ -119,7 +119,7 @@ def test_bulk_sampler_remainder(scratch_dir): assert b in recovered_samples["batch_id"].values_host.tolist() for x in range(0, 6, 2): - subdir = f"{x}-{x + 1}" + subdir = f"{x}-{x+1}" df = cudf.read_parquet(os.path.join(samples_path, f"batch={subdir}.parquet")) assert ((df.batch_id == x) | (df.batch_id == (x + 1))).all() @@ -278,11 +278,7 @@ def test_bulk_sampler_empty_batches(scratch_dir): assert len(os.listdir(samples_path)) == 1 - # There are 3 batches [0, 1, 2] where batch 1 has no results. In fact, seeds - # [7, 8, 9] have no outgoing edges. The previous implementation returned and - # offsets array omitting seeds with no outgoing edges from the - # edge_label_offsets which is no longer the case - df = cudf.read_parquet(os.path.join(samples_path, "batch=0-2.parquet")) + df = cudf.read_parquet(os.path.join(samples_path, "batch=0-1.parquet")) assert df[ (df.batch_id == 0) & (df.hop_id == 0) @@ -293,12 +289,12 @@ def test_bulk_sampler_empty_batches(scratch_dir): ].destinations.sort_values().values_host.tolist() == [2, 3, 7, 8] assert df[ - (df.batch_id == 2) & (df.hop_id == 0) + (df.batch_id == 1) & (df.hop_id == 0) ].destinations.sort_values().values_host.tolist() == [7, 8] assert len(df[(df.batch_id == 1) & (df.hop_id == 1)]) == 0 - assert df.batch_id.max() == 2 + assert df.batch_id.max() == 1 shutil.rmtree(samples_path) diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py index 77db37d4b9..3fddb8f405 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler_mg.py @@ -228,11 +228,7 @@ def test_bulk_sampler_empty_batches(dask_client, scratch_dir): assert len(os.listdir(samples_path)) == 1 - # There are 3 batches [0, 1, 2] where batch 1 has no results. In fact, seeds - # [7, 8, 9] have no outgoing edges. The previous implementation returned and - # offsets array omitting seeds with no outgoing edges from the - # edge_label_offsets which is no longer the case - df = cudf.read_parquet(os.path.join(samples_path, "batch=0-2.parquet")) + df = cudf.read_parquet(os.path.join(samples_path, "batch=0-1.parquet")) assert df[ (df.batch_id == 0) & (df.hop_id == 0) @@ -243,12 +239,12 @@ def test_bulk_sampler_empty_batches(dask_client, scratch_dir): ].destinations.sort_values().values_host.tolist() == [2, 3, 7, 8] assert df[ - (df.batch_id == 2) & (df.hop_id == 0) + (df.batch_id == 1) & (df.hop_id == 0) ].destinations.sort_values().values_host.tolist() == [7, 8] assert len(df[(df.batch_id == 1) & (df.hop_id == 1)]) == 0 - assert df.batch_id.max() == 2 + assert df.batch_id.max() == 1 shutil.rmtree(samples_path) From 6ca87be67001c4efbd09af45cc7ae00d1ea4e655 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 12:53:02 -0800 Subject: [PATCH 39/42] remove unsued variables --- cpp/src/sampling/neighbor_sampling_impl.hpp | 11 ----------- 1 file changed, 11 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index f4df1b4b9a..65657596c3 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -442,17 +442,6 @@ neighbor_sample_impl(raft::handle_t const& handle, level_result_label_vectors = std::nullopt; } - std::optional> result_label_offsets{std::nullopt}; - std::optional> cp_result_labels{std::nullopt}; - if (result_labels) { - cp_result_labels = rmm::device_uvector(result_labels->size(), handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - result_labels->begin(), - result_labels->end(), - cp_result_labels->begin()); - } - return detail::shuffle_and_organize_output(handle, std::move(result_srcs), std::move(result_dsts), From 62b3b69b0ae2387dd1d27d8aa79ff5376880e899 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 14:35:04 -0800 Subject: [PATCH 40/42] undo changes --- python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py index 65bcce7877..3c5d642800 100644 --- a/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py +++ b/python/cugraph/cugraph/tests/sampling/test_bulk_sampler.py @@ -119,7 +119,7 @@ def test_bulk_sampler_remainder(scratch_dir): assert b in recovered_samples["batch_id"].values_host.tolist() for x in range(0, 6, 2): - subdir = f"{x}-{x+1}" + subdir = f"{x}-{x + 1}" df = cudf.read_parquet(os.path.join(samples_path, f"batch={subdir}.parquet")) assert ((df.batch_id == x) | (df.batch_id == (x + 1))).all() From a737fbc91744d95e78c3f934b5b7919f25bc47d5 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 14:35:23 -0800 Subject: [PATCH 41/42] remove unsed variables --- cpp/src/sampling/neighbor_sampling_impl.hpp | 52 ++++++++++++--------- 1 file changed, 30 insertions(+), 22 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 65657596c3..916ea844f8 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -135,14 +135,14 @@ neighbor_sample_impl(raft::handle_t const& handle, } } - std::vector> level_result_src_vectors{}; - std::vector> level_result_dst_vectors{}; - // Get the number of hop. If homogeneous neighbor sample, num_edge_types = 1. auto num_hops = ((fan_out.size() % num_edge_types) == 0) ? (fan_out.size() / num_edge_types) : ((fan_out.size() / num_edge_types) + 1); + std::vector> level_result_src_vectors{}; + std::vector> level_result_dst_vectors{}; + auto level_result_weight_vectors = edge_weight_view ? std::make_optional(std::vector>{}) : std::nullopt; @@ -170,15 +170,6 @@ neighbor_sample_impl(raft::handle_t const& handle, ? std::make_optional(rmm::device_uvector{0, handle.get_stream()}) : std::nullopt; - if (starting_vertex_labels) { - frontier_vertex_labels->resize(starting_vertex_labels->size(), handle.get_stream()); - - thrust::copy(handle.get_thrust_policy(), - starting_vertex_labels->begin(), - starting_vertex_labels->end(), - frontier_vertex_labels->begin()); - } - std::optional< std::tuple, std::optional>>> vertex_used_as_source{std::nullopt}; @@ -233,8 +224,15 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_type_view, edge_bias_view, rng_state, - starting_vertices, - starting_vertex_labels, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 + ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span( + frontier_vertex_labels->data(), frontier_vertex_labels->size())) + : std::nullopt, static_cast(k_level), with_replacement); } else if (k_level < 0) { @@ -244,8 +242,15 @@ neighbor_sample_impl(raft::handle_t const& handle, edge_weight_view, edge_id_view, edge_type_view, - starting_vertices, - starting_vertex_labels); + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 + ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span( + frontier_vertex_labels->data(), frontier_vertex_labels->size())) + : std::nullopt); } auto old_size = level_result_src.size(); @@ -318,10 +323,15 @@ neighbor_sample_impl(raft::handle_t const& handle, std::tie(frontier_vertices, frontier_vertex_labels, vertex_used_as_source) = prepare_next_frontier( handle, - starting_vertices, - frontier_vertex_labels ? std::make_optional(raft::device_span( - frontier_vertex_labels->data(), frontier_vertex_labels->size())) - : std::nullopt, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 + ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span( + frontier_vertex_labels->data(), frontier_vertex_labels->size())) + : std::nullopt, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, frontier_vertex_labels @@ -335,8 +345,6 @@ neighbor_sample_impl(raft::handle_t const& handle, dedupe_sources, do_expensive_check); - starting_vertices = - raft::device_span(frontier_vertices.data(), frontier_vertices.size()); } auto result_size = std::reduce(level_sizes.begin(), level_sizes.end()); From c434eae1f34017f3cce9044f61439e2ef80a9f35 Mon Sep 17 00:00:00 2001 From: jnke2016 Date: Tue, 3 Dec 2024 14:35:55 -0800 Subject: [PATCH 42/42] fix style --- cpp/src/sampling/neighbor_sampling_impl.hpp | 78 ++++++++++----------- 1 file changed, 37 insertions(+), 41 deletions(-) diff --git a/cpp/src/sampling/neighbor_sampling_impl.hpp b/cpp/src/sampling/neighbor_sampling_impl.hpp index 916ea844f8..ed77b33043 100644 --- a/cpp/src/sampling/neighbor_sampling_impl.hpp +++ b/cpp/src/sampling/neighbor_sampling_impl.hpp @@ -216,41 +216,39 @@ neighbor_sample_impl(raft::handle_t const& handle, } if (k_level > 0) { - std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = - sample_edges(handle, - modified_graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - edge_bias_view, - rng_state, - hop == 0 - ? starting_vertices - : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), - hop == 0 - ? starting_vertex_labels - : starting_vertex_labels - ? std::make_optional(raft::device_span( - frontier_vertex_labels->data(), frontier_vertex_labels->size())) - : std::nullopt, - static_cast(k_level), - with_replacement); + std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = sample_edges( + handle, + modified_graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + edge_bias_view, + rng_state, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt, + static_cast(k_level), + with_replacement); } else if (k_level < 0) { - std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = - gather_one_hop_edgelist(handle, - modified_graph_view, - edge_weight_view, - edge_id_view, - edge_type_view, - hop == 0 - ? starting_vertices - : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), - hop == 0 - ? starting_vertex_labels - : starting_vertex_labels - ? std::make_optional(raft::device_span( - frontier_vertex_labels->data(), frontier_vertex_labels->size())) - : std::nullopt); + std::tie(srcs, dsts, weights, edge_ids, edge_types, labels) = gather_one_hop_edgelist( + handle, + modified_graph_view, + edge_weight_view, + edge_id_view, + edge_type_view, + hop == 0 + ? starting_vertices + : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt); } auto old_size = level_result_src.size(); @@ -326,12 +324,11 @@ neighbor_sample_impl(raft::handle_t const& handle, hop == 0 ? starting_vertices : raft::device_span(frontier_vertices.data(), frontier_vertices.size()), - hop == 0 - ? starting_vertex_labels - : starting_vertex_labels - ? std::make_optional(raft::device_span( - frontier_vertex_labels->data(), frontier_vertex_labels->size())) - : std::nullopt, + hop == 0 ? starting_vertex_labels + : starting_vertex_labels + ? std::make_optional(raft::device_span(frontier_vertex_labels->data(), + frontier_vertex_labels->size())) + : std::nullopt, raft::device_span{level_result_dst_vectors.back().data(), level_result_dst_vectors.back().size()}, frontier_vertex_labels @@ -344,7 +341,6 @@ neighbor_sample_impl(raft::handle_t const& handle, prior_sources_behavior, dedupe_sources, do_expensive_check); - } auto result_size = std::reduce(level_sizes.begin(), level_sizes.end());