Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix MG similarity issues #4741

Merged
merged 8 commits into from
Nov 18, 2024
354 changes: 181 additions & 173 deletions cpp/src/link_prediction/similarity_impl.cuh

Large diffs are not rendered by default.

173 changes: 115 additions & 58 deletions cpp/tests/link_prediction/mg_similarity_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,10 @@
struct Similarity_Usecase {
bool use_weights{false};
bool check_correctness{true};
size_t max_seeds{std::numeric_limits<size_t>::max()};
bool all_pairs{false};
std::optional<size_t> max_seeds{std::nullopt};
std::optional<size_t> max_vertex_pairs_to_check{std::nullopt};
std::optional<size_t> topk{std::nullopt};
};

template <typename input_usecase_t>
Expand Down Expand Up @@ -80,56 +83,96 @@ class Tests_MGSimilarity
auto mg_edge_weight_view =
mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;

rmm::device_uvector<vertex_t> d_start_vertices(
std::min(
static_cast<size_t>(mg_graph_view.local_vertex_partition_range_size()),
similarity_usecase.max_seeds / comm_size +
(static_cast<size_t>(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)),
handle_->get_stream());
cugraph::test::populate_vertex_ids(
*handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first());

auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs(
*handle_,
mg_graph_view,
raft::device_span<vertex_t const>(d_start_vertices.data(), d_start_vertices.size()),
2);

auto h_start_vertices = cugraph::test::to_host(*handle_, d_start_vertices);
auto h_offsets = cugraph::test::to_host(*handle_, d_offsets);

std::vector<vertex_t> h_v1(h_offsets.back());
for (size_t i = 0; i < h_start_vertices.size(); ++i) {
std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]);
}
rmm::device_uvector<vertex_t> v1(0, handle_->get_stream());
rmm::device_uvector<vertex_t> v2(0, handle_->get_stream());
rmm::device_uvector<weight_t> result_score(0, handle_->get_stream());

auto d_v1 = cugraph::test::to_device(*handle_, h_v1);
auto d_v2 = std::move(two_hop_nbrs);

std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) =
cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
vertex_t,
edge_t,
weight_t,
int32_t>(*handle_,
std::move(d_v1),
std::move(d_v2),
std::nullopt,
std::nullopt,
std::nullopt,
mg_graph_view.vertex_partition_range_lasts());

std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
{d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}};
raft::random::RngState rng_state{0};

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
handle_->get_comms().barrier();
hr_timer.start("MG similarity test");
rmm::device_uvector<vertex_t> sources(0, handle_->get_stream());
std::optional<raft::device_span<vertex_t const>> sources_span{std::nullopt};

if (similarity_usecase.max_seeds) {
sources = cugraph::select_random_vertices(
*handle_,
mg_graph_view,
std::optional<raft::device_span<vertex_t const>>{std::nullopt},
rng_state,
std::min(*similarity_usecase.max_seeds,
static_cast<size_t>(mg_graph_view.number_of_vertices())),
false,
false);
sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
}

auto result_score = test_functor.run(
*handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
if (similarity_usecase.all_pairs) {
if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
handle_->get_comms().barrier();
hr_timer.start("MG similarity test");
}

std::tie(v1, v2, result_score) = test_functor.run(*handle_,
mg_graph_view,
mg_edge_weight_view,
sources_span,
similarity_usecase.use_weights,
similarity_usecase.topk);
} else {
if (!sources_span) {
sources.resize(mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream());
cugraph::test::populate_vertex_ids(
*handle_, sources, mg_graph_view.local_vertex_partition_range_first());
sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
}

rmm::device_uvector<size_t> offsets(0, handle_->get_stream());

std::tie(offsets, v2) = cugraph::k_hop_nbrs(*handle_, mg_graph_view, *sources_span, 2);

v1.resize(v2.size(), handle_->get_stream());
cugraph::test::expand_sparse_offsets(
*handle_,
raft::device_span<size_t const>{offsets.data(), offsets.size()},
raft::device_span<vertex_t>{v1.data(), v1.size()},
size_t{0},
vertex_t{0});

cugraph::unrenumber_local_int_vertices(*handle_,
v1.data(),
v1.size(),
sources.data(),
vertex_t{0},
static_cast<vertex_t>(sources.size()),
true);

std::tie(v1, v2) = cugraph::test::remove_self_loops(*handle_, std::move(v1), std::move(v2));

std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
vertex_t,
edge_t,
weight_t,
int32_t>(*handle_,
std::move(v1),
std::move(v2),
std::nullopt,
std::nullopt,
std::nullopt,
mg_graph_view.vertex_partition_range_lasts());

std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
{v1.data(), v1.size()}, {v2.data(), v2.size()}};

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
handle_->get_comms().barrier();
hr_timer.start("MG similarity test");
}

result_score = test_functor.run(
*handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
}

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
Expand All @@ -147,14 +190,14 @@ class Tests_MGSimilarity
mg_edge_weight_view,
std::optional<raft::device_span<vertex_t const>>(std::nullopt));

d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size());
d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size());
v1 = cugraph::test::device_gatherv(*handle_, v1.data(), v1.size());
v2 = cugraph::test::device_gatherv(*handle_, v2.data(), v2.size());
result_score =
cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size());

if (d_v1.size() > 0) {
auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1);
auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2);
if (v1.size() > 0) {
auto h_vertex_pair1 = cugraph::test::to_host(*handle_, v1);
auto h_vertex_pair2 = cugraph::test::to_host(*handle_, v2);
auto h_result_score = cugraph::test::to_host(*handle_, result_score);

similarity_compare(mg_graph_view.number_of_vertices(),
Expand Down Expand Up @@ -258,10 +301,13 @@ INSTANTIATE_TEST_SUITE_P(
file_test,
Tests_MGSimilarity_File,
::testing::Combine(
// enable correctness checks
// Disable weighted computation testing in 22.10
//::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100, 10},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
Similarity_Usecase{false, true, true, 20, 100, 10}),
::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
cugraph::test::File_Usecase("test/datasets/netscience.mtx"))));

Expand All @@ -273,7 +319,13 @@ INSTANTIATE_TEST_SUITE_P(
// Disable weighted computation testing in 22.10
//::testing::Values(Similarity_Usecase{true, true, 20},
// Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, 20}),
::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100, 10},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
Similarity_Usecase{false, true, true, 20, 100, 10}),
::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));

INSTANTIATE_TEST_SUITE_P(
Expand All @@ -285,7 +337,12 @@ INSTANTIATE_TEST_SUITE_P(
Tests_MGSimilarity_Rmat,
::testing::Combine(
// disable correctness checks for large graphs
::testing::Values(Similarity_Usecase{false, false, 20}),
::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100},
Similarity_Usecase{false, true, false, 20, 100, 10},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100},
Similarity_Usecase{false, true, true, 20, 100, 10}),
::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false))));

CUGRAPH_MG_TEST_PROGRAM_MAIN()
26 changes: 11 additions & 15 deletions cpp/tests/link_prediction/similarity_test.cu
Original file line number Diff line number Diff line change
Expand Up @@ -83,11 +83,6 @@ class Tests_Similarity
auto edge_weight_view =
edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
hr_timer.start("Similarity test");
}

rmm::device_uvector<vertex_t> v1(0, handle.get_stream());
rmm::device_uvector<vertex_t> v2(0, handle.get_stream());
rmm::device_uvector<weight_t> result_score(0, handle.get_stream());
Expand All @@ -111,6 +106,11 @@ class Tests_Similarity
}

if (similarity_usecase.all_pairs) {
if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
hr_timer.start("Similarity test");
}

std::tie(v1, v2, result_score) = test_functor.run(handle,
graph_view,
edge_weight_view,
Expand Down Expand Up @@ -141,21 +141,17 @@ class Tests_Similarity
static_cast<vertex_t>(sources.size()),
true);

auto new_size = thrust::distance(
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::remove_if(
handle.get_thrust_policy(),
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::make_zip_iterator(v1.end(), v2.end()),
[] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));

v1.resize(new_size, handle.get_stream());
v2.resize(new_size, handle.get_stream());
std::tie(v1, v2) = cugraph::test::remove_self_loops(handle, std::move(v1), std::move(v2));

// FIXME: Need to add some tests that specify actual vertex pairs
std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
{v1.data(), v1.size()}, {v2.data(), v2.size()}};

if (cugraph::test::g_perf) {
RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement
hr_timer.start("Similarity test");
}

result_score = test_functor.run(
handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
}
Expand Down
30 changes: 30 additions & 0 deletions cpp/tests/utilities/thrust_wrapper.cu
Original file line number Diff line number Diff line change
Expand Up @@ -546,5 +546,35 @@ template void expand_hypersparse_offsets(raft::handle_t const& handle,
raft::device_span<int64_t> indices,
size_t base_offset);

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& v1,
rmm::device_uvector<vertex_t>&& v2)
{
auto new_size = thrust::distance(
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::remove_if(
handle.get_thrust_policy(),
thrust::make_zip_iterator(v1.begin(), v2.begin()),
thrust::make_zip_iterator(v1.end(), v2.end()),
[] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));

v1.resize(new_size, handle.get_stream());
v2.resize(new_size, handle.get_stream());

return std::make_tuple(std::move(v1), std::move(v2));
}

template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<int32_t>&& v1,
rmm::device_uvector<int32_t>&& v2);

template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<int64_t>&& v1,
rmm::device_uvector<int64_t>&& v2);

} // namespace test
} // namespace cugraph
6 changes: 6 additions & 0 deletions cpp/tests/utilities/thrust_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -107,5 +107,11 @@ void expand_hypersparse_offsets(raft::handle_t const& handle,
raft::device_span<idx_t> indices,
offset_t base_offset);

template <typename vertex_t>
std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& v1,
rmm::device_uvector<vertex_t>&& v2);

} // namespace test
} // namespace cugraph
Loading
Loading