Skip to content

Commit

Permalink
Fix MG similarity issues (rapidsai#4741)
Browse files Browse the repository at this point in the history
This PR adds C++ tests for the all-pairs variation of similarity algorithms.  Previously the all-pairs variation was only tested in SG mode.

This also addresses an issue where the all-pairs implementation would crash when there was a load imbalance across the GPUs and one of the GPUs ran out of work before the others.

Closes rapidsai#4704

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Joseph Nke (https://github.com/jnke2016)
  - Rick Ratzel (https://github.com/rlratzel)

URL: rapidsai#4741
  • Loading branch information
ChuckHastings authored and BradReesWork committed Nov 18, 2024
1 parent 671186a commit 12a3102
Show file tree
Hide file tree
Showing 7 changed files with 448 additions and 251 deletions.
354 changes: 181 additions & 173 deletions cpp/src/link_prediction/similarity_impl.cuh

Large diffs are not rendered by default.

173 changes: 115 additions & 58 deletions cpp/tests/link_prediction/mg_similarity_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
struct Similarity_Usecase {
bool use_weights{false};
bool check_correctness{true};
size_t max_seeds{std::numeric_limits<size_t>::max()};
bool all_pairs{false};
std::optional<size_t> max_seeds{std::nullopt};
std::optional<size_t> max_vertex_pairs_to_check{std::nullopt};
std::optional<size_t> topk{std::nullopt};
};

template <typename input_usecase_t>
Expand Down Expand Up @@ -80,56 +83,96 @@ class Tests_MGSimilarity
auto mg_edge_weight_view =
mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;

rmm::device_uvector<vertex_t> d_start_vertices(
std::min(
static_cast<size_t>(mg_graph_view.local_vertex_partition_range_size()),
similarity_usecase.max_seeds / comm_size +
(static_cast<size_t>(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)),
handle_->get_stream());
cugraph::test::populate_vertex_ids(
*handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first());

auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs(
*handle_,
mg_graph_view,
raft::device_span<vertex_t const>(d_start_vertices.data(), d_start_vertices.size()),
2);

auto h_start_vertices = cugraph::test::to_host(*handle_, d_start_vertices);
auto h_offsets = cugraph::test::to_host(*handle_, d_offsets);

std::vector<vertex_t> h_v1(h_offsets.back());
for (size_t i = 0; i < h_start_vertices.size(); ++i) {
std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]);
}
rmm::device_uvector<vertex_t> v1(0, handle_->get_stream());
rmm::device_uvector<vertex_t> v2(0, handle_->get_stream());
rmm::device_uvector<weight_t> result_score(0, handle_->get_stream());

auto d_v1 = cugraph::test::to_device(*handle_, h_v1);
auto d_v2 = std::move(two_hop_nbrs);

std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) =
cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
vertex_t,
edge_t,
weight_t,
int32_t>(*handle_,
std::move(d_v1),
std::move(d_v2),
std::nullopt,
std::nullopt,
std::nullopt,
mg_graph_view.vertex_partition_range_lasts());

std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
{d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}};
raft::random::RngState rng_state{0};

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
handle_->get_comms().barrier();
hr_timer.start("MG similarity test");
rmm::device_uvector<vertex_t> sources(0, handle_->get_stream());
std::optional<raft::device_span<vertex_t const>> sources_span{std::nullopt};

if (similarity_usecase.max_seeds) {
sources = cugraph::select_random_vertices(
*handle_,
mg_graph_view,
std::optional<raft::device_span<vertex_t const>>{std::nullopt},
rng_state,
std::min(*similarity_usecase.max_seeds,
static_cast<size_t>(mg_graph_view.number_of_vertices())),
false,
false);
sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
}

auto result_score = test_functor.run(
*handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
if (similarity_usecase.all_pairs) {
if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
handle_->get_comms().barrier();
hr_timer.start("MG similarity test");
}

std::tie(v1, v2, result_score) = test_functor.run(*handle_,
mg_graph_view,
mg_edge_weight_view,
sources_span,
similarity_usecase.use_weights,
similarity_usecase.topk);
} else {
if (!sources_span) {
sources.resize(mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream());
cugraph::test::populate_vertex_ids(
*handle_, sources, mg_graph_view.local_vertex_partition_range_first());
sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
}

rmm::device_uvector<size_t> offsets(0, handle_->get_stream());

std::tie(offsets, v2) = cugraph::k_hop_nbrs(*handle_, mg_graph_view, *sources_span, 2);

v1.resize(v2.size(), handle_->get_stream());
cugraph::test::expand_sparse_offsets(
*handle_,
raft::device_span<size_t const>{offsets.data(), offsets.size()},
raft::device_span<vertex_t>{v1.data(), v1.size()},
size_t{0},
vertex_t{0});

cugraph::unrenumber_local_int_vertices(*handle_,
v1.data(),
v1.size(),
sources.data(),
vertex_t{0},
static_cast<vertex_t>(sources.size()),
true);

std::tie(v1, v2) = cugraph::test::remove_self_loops(*handle_, std::move(v1), std::move(v2));

std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
vertex_t,
edge_t,
weight_t,
int32_t>(*handle_,
std::move(v1),
std::move(v2),
std::nullopt,
std::nullopt,
std::nullopt,
mg_graph_view.vertex_partition_range_lasts());

std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
{v1.data(), v1.size()}, {v2.data(), v2.size()}};

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
handle_->get_comms().barrier();
hr_timer.start("MG similarity test");
}

result_score = test_functor.run(
*handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
}

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
Expand All @@ -147,14 +190,14 @@ class Tests_MGSimilarity
mg_edge_weight_view,
std::optional<raft::device_span<vertex_t const>>(std::nullopt));

d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size());
d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size());
v1 = cugraph::test::device_gatherv(*handle_, v1.data(), v1.size());
v2 = cugraph::test::device_gatherv(*handle_, v2.data(), v2.size());
result_score =
cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size());

if (d_v1.size() > 0) {
auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1);
auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2);
if (v1.size() > 0) {
auto h_vertex_pair1 = cugraph::test::to_host(*handle_, v1);
auto h_vertex_pair2 = cugraph::test::to_host(*handle_, v2);
auto h_result_score = cugraph::test::to_host(*handle_, result_score);

similarity_compare(mg_graph_view.number_of_vertices(),
Expand Down Expand Up @@ -258,10 +301,13 @@ INSTANTIATE_TEST_SUITE_P(
file_test,
Tests_MGSimilarity_File,
::testing::Combine(
// enable correctness checks
// Disable weighted computation testing in 22.10
//::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100, 10},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
Similarity_Usecase{false, true, true, 20, 100, 10}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
cugraph::test::File_Usecase("test/datasets/netscience.mtx"))));

Expand All @@ -273,7 +319,13 @@ INSTANTIATE_TEST_SUITE_P(
// Disable weighted computation testing in 22.10
//::testing::Values(Similarity_Usecase{true, true, 20},
// Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100, 10},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
Similarity_Usecase{false, true, true, 20, 100, 10}),
::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));

INSTANTIATE_TEST_SUITE_P(
Expand All @@ -285,7 +337,12 @@ INSTANTIATE_TEST_SUITE_P(
Tests_MGSimilarity_Rmat,
::testing::Combine(
// disable correctness checks for large graphs
::testing::Values(Similarity_Usecase{false, false, 20}),
::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100, 10},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100, 10}),
::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false))));

CUGRAPH_MG_TEST_PROGRAM_MAIN()
26 changes: 11 additions & 15 deletions cpp/tests/link_prediction/similarity_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,6 @@ class Tests_Similarity
auto edge_weight_view =
edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
hr_timer.start("Similarity test");
}

rmm::device_uvector<vertex_t> v1(0, handle.get_stream());
rmm::device_uvector<vertex_t> v2(0, handle.get_stream());
rmm::device_uvector<weight_t> result_score(0, handle.get_stream());
Expand All @@ -111,6 +106,11 @@ class Tests_Similarity
}

if (similarity_usecase.all_pairs) {
if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
hr_timer.start("Similarity test");
}

std::tie(v1, v2, result_score) = test_functor.run(handle,
graph_view,
edge_weight_view,
Expand Down Expand Up @@ -141,21 +141,17 @@ class Tests_Similarity
static_cast<vertex_t>(sources.size()),
true);

auto new_size = thrust::distance(
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::remove_if(
handle.get_thrust_policy(),
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::make_zip_iterator(v1.end(), v2.end()),
[] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));

v1.resize(new_size, handle.get_stream());
v2.resize(new_size, handle.get_stream());
std::tie(v1, v2) = cugraph::test::remove_self_loops(handle, std::move(v1), std::move(v2));

// FIXME: Need to add some tests that specify actual vertex pairs
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
{v1.data(), v1.size()}, {v2.data(), v2.size()}};

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
hr_timer.start("Similarity test");
}

result_score = test_functor.run(
handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
}
Expand Down
30 changes: 30 additions & 0 deletions cpp/tests/utilities/thrust_wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -566,5 +566,35 @@ template void expand_hypersparse_offsets(raft::handle_t const& handle,
raft::device_span<int64_t> indices,
size_t base_offset);

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& v1,
rmm::device_uvector<vertex_t>&& v2)
{
auto new_size = thrust::distance(
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::remove_if(
handle.get_thrust_policy(),
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::make_zip_iterator(v1.end(), v2.end()),
[] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));

v1.resize(new_size, handle.get_stream());
v2.resize(new_size, handle.get_stream());

return std::make_tuple(std::move(v1), std::move(v2));
}

template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& v1,
rmm::device_uvector<int32_t>&& v2);

template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& v1,
rmm::device_uvector<int64_t>&& v2);

} // namespace test
} // namespace cugraph
6 changes: 6 additions & 0 deletions cpp/tests/utilities/thrust_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,5 +112,11 @@ void expand_hypersparse_offsets(raft::handle_t const& handle,
raft::device_span<idx_t> indices,
offset_t base_offset);

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& v1,
rmm::device_uvector<vertex_t>&& v2);

} // namespace test
} // namespace cugraph
Loading

0 comments on commit 12a3102

Please sign in to comment.