diff --git a/cpp/src/link_prediction/similarity_impl.cuh b/cpp/src/link_prediction/similarity_impl.cuh
index b39895129dc..00f73b5c263 100644
--- a/cpp/src/link_prediction/similarity_impl.cuh
+++ b/cpp/src/link_prediction/similarity_impl.cuh
@@ -287,10 +287,8 @@ all_pairs_similarity(raft::handle_t const& handle,
     //  computing/updating topk with each batch
 
     //   FIXME: Experiment with this and adjust as necessary
-    // size_t const
-    // MAX_PAIRS_PER_BATCH{static_cast<size_t>(handle.get_device_properties().multiProcessorCount) *
-    // (1 << 15)};
-    size_t const MAX_PAIRS_PER_BATCH{100};
+    size_t const MAX_PAIRS_PER_BATCH{
+      static_cast<size_t>(handle.get_device_properties().multiProcessorCount) * (1 << 15)};
 
     rmm::device_uvector<edge_t> degrees = graph_view.compute_out_degrees(handle);
     rmm::device_uvector<size_t> two_hop_degrees(degrees.size() + 1, handle.get_stream());
@@ -362,195 +360,205 @@ all_pairs_similarity(raft::handle_t const& handle,
                       1,
                       handle.get_stream());
 
+    handle.sync_stream();
+
     std::tie(batch_offsets, std::ignore) = compute_offset_aligned_element_chunks(
       handle,
       raft::device_span<size_t const>{two_hop_degree_offsets.data(), two_hop_degree_offsets.size()},
       sum_two_hop_degrees,
       MAX_PAIRS_PER_BATCH);
 
-    for (size_t batch_number = 0; batch_number < (batch_offsets.size() - 1); ++batch_number) {
-      if (batch_offsets[batch_number + 1] > batch_offsets[batch_number]) {
-        auto [offsets, v2] =
-          k_hop_nbrs(handle,
-                     graph_view,
-                     raft::device_span<vertex_t const>{
-                       tmp_vertices.data() + batch_offsets[batch_number],
-                       batch_offsets[batch_number + 1] - batch_offsets[batch_number]},
-                     2,
-                     do_expensive_check);
-
-        auto v1 = cugraph::detail::expand_sparse_offsets(
-          raft::device_span<size_t const>{offsets.data(), offsets.size()},
-          vertex_t{0},
-          handle.get_stream());
+    // FIXME: compute_offset_aligned_element_chunks can return duplicates.  Should it?  Should
+    // explore
+    //  whether this functionality should be pushed into that function
+    batch_offsets.resize(std::distance(batch_offsets.begin(),
+                                       std::unique(batch_offsets.begin(), batch_offsets.end())));
 
-        cugraph::unrenumber_local_int_vertices(
-          handle,
-          v1.data(),
-          v1.size(),
+    size_t num_batches = batch_offsets.size() - 1;
+    if constexpr (multi_gpu) {
+      num_batches = cugraph::host_scalar_allreduce(
+        handle.get_comms(), num_batches, raft::comms::op_t::MAX, handle.get_stream());
+    }
+
+    for (size_t batch_number = 0; batch_number < num_batches; ++batch_number) {
+      raft::device_span<vertex_t const> batch_seeds{tmp_vertices.data(), size_t{0}};
+
+      if (((batch_number + 1) < batch_offsets.size()) &&
+          (batch_offsets[batch_number + 1] > batch_offsets[batch_number])) {
+        batch_seeds = raft::device_span<vertex_t const>{
           tmp_vertices.data() + batch_offsets[batch_number],
-          vertex_t{0},
-          static_cast<vertex_t>(batch_offsets[batch_number + 1] - batch_offsets[batch_number]),
-          do_expensive_check);
+          batch_offsets[batch_number + 1] - batch_offsets[batch_number]};
+      }
+
+      auto [offsets, v2] = k_hop_nbrs(handle, graph_view, batch_seeds, 2, do_expensive_check);
 
-        auto new_size = thrust::distance(
+      auto v1 = cugraph::detail::expand_sparse_offsets(
+        raft::device_span<size_t const>{offsets.data(), offsets.size()},
+        vertex_t{0},
+        handle.get_stream());
+
+      cugraph::unrenumber_local_int_vertices(
+        handle,
+        v1.data(),
+        v1.size(),
+        tmp_vertices.data() + batch_offsets[batch_number],
+        vertex_t{0},
+        static_cast<vertex_t>(batch_offsets[batch_number + 1] - batch_offsets[batch_number]),
+        do_expensive_check);
+
+      auto new_size = thrust::distance(
+        thrust::make_zip_iterator(v1.begin(), v2.begin()),
+        thrust::remove_if(
+          handle.get_thrust_policy(),
           thrust::make_zip_iterator(v1.begin(), v2.begin()),
-          thrust::remove_if(
-            handle.get_thrust_policy(),
-            thrust::make_zip_iterator(v1.begin(), v2.begin()),
-            thrust::make_zip_iterator(v1.end(), v2.end()),
-            [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
-
-        v1.resize(new_size, handle.get_stream());
-        v2.resize(new_size, handle.get_stream());
-
-        if constexpr (multi_gpu) {
-          // shuffle vertex pairs
-          auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts();
-
-          std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
-            detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
-                                                                                           edge_t,
-                                                                                           weight_t,
-                                                                                           int>(
-              handle,
-              std::move(v1),
-              std::move(v2),
-              std::nullopt,
-              std::nullopt,
-              std::nullopt,
-              vertex_partition_range_lasts);
-        }
+          thrust::make_zip_iterator(v1.end(), v2.end()),
+          [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
 
-        auto score =
-          similarity(handle,
-                     graph_view,
-                     edge_weight_view,
-                     std::make_tuple(raft::device_span<vertex_t const>{v1.data(), v1.size()},
-                                     raft::device_span<vertex_t const>{v2.data(), v2.size()}),
-                     functor,
-                     coeff,
-                     do_expensive_check);
-
-        // Add a remove_if to remove items that are less than the last topk element
-        new_size = thrust::distance(
-          thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
-          thrust::remove_if(handle.get_thrust_policy(),
-                            thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
-                            thrust::make_zip_iterator(score.end(), v1.end(), v2.end()),
-                            [similarity_threshold] __device__(auto tuple) {
-                              return thrust::get<0>(tuple) < similarity_threshold;
-                            }));
-
-        score.resize(new_size, handle.get_stream());
-        v1.resize(new_size, handle.get_stream());
-        v2.resize(new_size, handle.get_stream());
-
-        thrust::sort_by_key(handle.get_thrust_policy(),
-                            score.begin(),
-                            score.end(),
-                            thrust::make_zip_iterator(v1.begin(), v2.begin()),
-                            thrust::greater<weight_t>{});
-
-        size_t v1_keep = std::min(*topk, v1.size());
-
-        if (score.size() < (top_v1.size() + v1_keep)) {
-          score.resize(top_v1.size() + v1_keep, handle.get_stream());
-          v1.resize(score.size(), handle.get_stream());
-          v2.resize(score.size(), handle.get_stream());
-        }
+      v1.resize(new_size, handle.get_stream());
+      v2.resize(new_size, handle.get_stream());
 
-        thrust::copy(
-          handle.get_thrust_policy(), top_v1.begin(), top_v1.end(), v1.begin() + v1_keep);
-        thrust::copy(
-          handle.get_thrust_policy(), top_v2.begin(), top_v2.end(), v2.begin() + v1_keep);
-        thrust::copy(
-          handle.get_thrust_policy(), top_score.begin(), top_score.end(), score.begin() + v1_keep);
-
-        thrust::sort_by_key(handle.get_thrust_policy(),
-                            score.begin(),
-                            score.end(),
-                            thrust::make_zip_iterator(v1.begin(), v2.begin()),
-                            thrust::greater<weight_t>{});
-
-        if (top_v1.size() < std::min(*topk, v1.size())) {
-          top_v1.resize(std::min(*topk, v1.size()), handle.get_stream());
-          top_v2.resize(top_v1.size(), handle.get_stream());
-          top_score.resize(top_v1.size(), handle.get_stream());
-        }
+      if constexpr (multi_gpu) {
+        // shuffle vertex pairs
+        auto vertex_partition_range_lasts = graph_view.vertex_partition_range_lasts();
+
+        std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
+          detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<vertex_t,
+                                                                                         edge_t,
+                                                                                         weight_t,
+                                                                                         int>(
+            handle,
+            std::move(v1),
+            std::move(v2),
+            std::nullopt,
+            std::nullopt,
+            std::nullopt,
+            vertex_partition_range_lasts);
+      }
 
-        thrust::copy(
-          handle.get_thrust_policy(), v1.begin(), v1.begin() + top_v1.size(), top_v1.begin());
-        thrust::copy(
-          handle.get_thrust_policy(), v2.begin(), v2.begin() + top_v1.size(), top_v2.begin());
-        thrust::copy(handle.get_thrust_policy(),
-                     score.begin(),
-                     score.begin() + top_v1.size(),
-                     top_score.begin());
-
-        if constexpr (multi_gpu) {
-          bool is_root  = handle.get_comms().get_rank() == int{0};
-          auto rx_sizes = cugraph::host_scalar_gather(
-            handle.get_comms(), top_v1.size(), int{0}, handle.get_stream());
-          std::vector<size_t> rx_displs;
-          size_t gathered_size{0};
-
-          if (is_root) {
-            rx_displs.resize(handle.get_comms().get_size());
-            rx_displs[0] = 0;
-            std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1);
-            gathered_size = std::reduce(rx_sizes.begin(), rx_sizes.end());
-          }
+      auto score =
+        similarity(handle,
+                   graph_view,
+                   edge_weight_view,
+                   std::make_tuple(raft::device_span<vertex_t const>{v1.data(), v1.size()},
+                                   raft::device_span<vertex_t const>{v2.data(), v2.size()}),
+                   functor,
+                   coeff,
+                   do_expensive_check);
+
+      // Add a remove_if to remove items that are less than the last topk element
+      new_size = thrust::distance(
+        thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
+        thrust::remove_if(handle.get_thrust_policy(),
+                          thrust::make_zip_iterator(score.begin(), v1.begin(), v2.begin()),
+                          thrust::make_zip_iterator(score.end(), v1.end(), v2.end()),
+                          [similarity_threshold] __device__(auto tuple) {
+                            return thrust::get<0>(tuple) < similarity_threshold;
+                          }));
+
+      score.resize(new_size, handle.get_stream());
+      v1.resize(new_size, handle.get_stream());
+      v2.resize(new_size, handle.get_stream());
+
+      thrust::sort_by_key(handle.get_thrust_policy(),
+                          score.begin(),
+                          score.end(),
+                          thrust::make_zip_iterator(v1.begin(), v2.begin()),
+                          thrust::greater<weight_t>{});
+
+      size_t v1_keep = std::min(*topk, v1.size());
+
+      if (score.size() < (top_v1.size() + v1_keep)) {
+        score.resize(top_v1.size() + v1_keep, handle.get_stream());
+        v1.resize(score.size(), handle.get_stream());
+        v2.resize(score.size(), handle.get_stream());
+      }
 
-          rmm::device_uvector<vertex_t> gathered_v1(gathered_size, handle.get_stream());
-          rmm::device_uvector<vertex_t> gathered_v2(gathered_size, handle.get_stream());
-          rmm::device_uvector<weight_t> gathered_score(gathered_size, handle.get_stream());
-
-          cugraph::device_gatherv(
-            handle.get_comms(),
-            thrust::make_zip_iterator(top_v1.begin(), top_v2.begin(), top_score.begin()),
-            thrust::make_zip_iterator(
-              gathered_v1.begin(), gathered_v2.begin(), gathered_score.begin()),
-
-            top_v1.size(),
-            rx_sizes,
-            rx_displs,
-            int{0},
-            handle.get_stream());
-
-          if (is_root) {
-            thrust::sort_by_key(handle.get_thrust_policy(),
-                                gathered_score.begin(),
-                                gathered_score.end(),
-                                thrust::make_zip_iterator(gathered_v1.begin(), gathered_v2.begin()),
-                                thrust::greater<weight_t>{});
-
-            if (gathered_v1.size() > *topk) {
-              gathered_v1.resize(*topk, handle.get_stream());
-              gathered_v2.resize(*topk, handle.get_stream());
-              gathered_score.resize(*topk, handle.get_stream());
-            }
-
-            top_v1    = std::move(gathered_v1);
-            top_v2    = std::move(gathered_v2);
-            top_score = std::move(gathered_score);
-          } else {
-            top_v1.resize(0, handle.get_stream());
-            top_v2.resize(0, handle.get_stream());
-            top_score.resize(0, handle.get_stream());
-          }
+      thrust::copy(handle.get_thrust_policy(), top_v1.begin(), top_v1.end(), v1.begin() + v1_keep);
+      thrust::copy(handle.get_thrust_policy(), top_v2.begin(), top_v2.end(), v2.begin() + v1_keep);
+      thrust::copy(
+        handle.get_thrust_policy(), top_score.begin(), top_score.end(), score.begin() + v1_keep);
+
+      thrust::sort_by_key(handle.get_thrust_policy(),
+                          score.begin(),
+                          score.end(),
+                          thrust::make_zip_iterator(v1.begin(), v2.begin()),
+                          thrust::greater<weight_t>{});
+
+      if (top_v1.size() < std::min(*topk, v1.size())) {
+        top_v1.resize(std::min(*topk, v1.size()), handle.get_stream());
+        top_v2.resize(top_v1.size(), handle.get_stream());
+        top_score.resize(top_v1.size(), handle.get_stream());
+      }
+
+      thrust::copy(
+        handle.get_thrust_policy(), v1.begin(), v1.begin() + top_v1.size(), top_v1.begin());
+      thrust::copy(
+        handle.get_thrust_policy(), v2.begin(), v2.begin() + top_v1.size(), top_v2.begin());
+      thrust::copy(handle.get_thrust_policy(),
+                   score.begin(),
+                   score.begin() + top_v1.size(),
+                   top_score.begin());
+
+      if constexpr (multi_gpu) {
+        bool is_root  = handle.get_comms().get_rank() == int{0};
+        auto rx_sizes = cugraph::host_scalar_gather(
+          handle.get_comms(), top_v1.size(), int{0}, handle.get_stream());
+        std::vector<size_t> rx_displs;
+        size_t gathered_size{0};
+
+        if (is_root) {
+          rx_displs.resize(handle.get_comms().get_size());
+          rx_displs[0] = 0;
+          std::partial_sum(rx_sizes.begin(), rx_sizes.end() - 1, rx_displs.begin() + 1);
+          gathered_size = std::reduce(rx_sizes.begin(), rx_sizes.end());
         }
 
-        if (top_score.size() == *topk) {
-          raft::update_host(
-            &similarity_threshold, top_score.data() + *topk - 1, 1, handle.get_stream());
+        rmm::device_uvector<vertex_t> gathered_v1(gathered_size, handle.get_stream());
+        rmm::device_uvector<vertex_t> gathered_v2(gathered_size, handle.get_stream());
+        rmm::device_uvector<weight_t> gathered_score(gathered_size, handle.get_stream());
+
+        cugraph::device_gatherv(
+          handle.get_comms(),
+          thrust::make_zip_iterator(top_v1.begin(), top_v2.begin(), top_score.begin()),
+          thrust::make_zip_iterator(
+            gathered_v1.begin(), gathered_v2.begin(), gathered_score.begin()),
+          top_v1.size(),
+          rx_sizes,
+          rx_displs,
+          int{0},
+          handle.get_stream());
 
-          if constexpr (multi_gpu) {
-            similarity_threshold = host_scalar_bcast(
-              handle.get_comms(), similarity_threshold, int{0}, handle.get_stream());
+        if (is_root) {
+          thrust::sort_by_key(handle.get_thrust_policy(),
+                              gathered_score.begin(),
+                              gathered_score.end(),
+                              thrust::make_zip_iterator(gathered_v1.begin(), gathered_v2.begin()),
+                              thrust::greater<weight_t>{});
+
+          if (gathered_v1.size() > *topk) {
+            gathered_v1.resize(*topk, handle.get_stream());
+            gathered_v2.resize(*topk, handle.get_stream());
+            gathered_score.resize(*topk, handle.get_stream());
           }
+
+          top_v1    = std::move(gathered_v1);
+          top_v2    = std::move(gathered_v2);
+          top_score = std::move(gathered_score);
+        } else {
+          top_v1.resize(0, handle.get_stream());
+          top_v2.resize(0, handle.get_stream());
+          top_score.resize(0, handle.get_stream());
         }
       }
+
+      if (top_score.size() == *topk) {
+        raft::update_host(
+          &similarity_threshold, top_score.data() + *topk - 1, 1, handle.get_stream());
+      }
+      if constexpr (multi_gpu) {
+        similarity_threshold =
+          host_scalar_bcast(handle.get_comms(), similarity_threshold, int{0}, handle.get_stream());
+      }
     }
 
     return std::make_tuple(std::move(top_v1), std::move(top_v2), std::move(top_score));
diff --git a/cpp/tests/link_prediction/mg_similarity_test.cpp b/cpp/tests/link_prediction/mg_similarity_test.cpp
index 302248fe516..87214c808da 100644
--- a/cpp/tests/link_prediction/mg_similarity_test.cpp
+++ b/cpp/tests/link_prediction/mg_similarity_test.cpp
@@ -29,7 +29,10 @@
 struct Similarity_Usecase {
   bool use_weights{false};
   bool check_correctness{true};
-  size_t max_seeds{std::numeric_limits<size_t>::max()};
+  bool all_pairs{false};
+  std::optional<size_t> max_seeds{std::nullopt};
+  std::optional<size_t> max_vertex_pairs_to_check{std::nullopt};
+  std::optional<size_t> topk{std::nullopt};
 };
 
 template <typename input_usecase_t>
@@ -80,56 +83,96 @@ class Tests_MGSimilarity
     auto mg_edge_weight_view =
       mg_edge_weights ? std::make_optional((*mg_edge_weights).view()) : std::nullopt;
 
-    rmm::device_uvector<vertex_t> d_start_vertices(
-      std::min(
-        static_cast<size_t>(mg_graph_view.local_vertex_partition_range_size()),
-        similarity_usecase.max_seeds / comm_size +
-          (static_cast<size_t>(comm_rank) < similarity_usecase.max_seeds % comm_size ? 1 : 0)),
-      handle_->get_stream());
-    cugraph::test::populate_vertex_ids(
-      *handle_, d_start_vertices, mg_graph_view.local_vertex_partition_range_first());
-
-    auto [d_offsets, two_hop_nbrs] = cugraph::k_hop_nbrs(
-      *handle_,
-      mg_graph_view,
-      raft::device_span<vertex_t const>(d_start_vertices.data(), d_start_vertices.size()),
-      2);
-
-    auto h_start_vertices = cugraph::test::to_host(*handle_, d_start_vertices);
-    auto h_offsets        = cugraph::test::to_host(*handle_, d_offsets);
-
-    std::vector<vertex_t> h_v1(h_offsets.back());
-    for (size_t i = 0; i < h_start_vertices.size(); ++i) {
-      std::fill(h_v1.begin() + h_offsets[i], h_v1.begin() + h_offsets[i + 1], h_start_vertices[i]);
-    }
+    rmm::device_uvector<vertex_t> v1(0, handle_->get_stream());
+    rmm::device_uvector<vertex_t> v2(0, handle_->get_stream());
+    rmm::device_uvector<weight_t> result_score(0, handle_->get_stream());
 
-    auto d_v1 = cugraph::test::to_device(*handle_, h_v1);
-    auto d_v2 = std::move(two_hop_nbrs);
-
-    std::tie(d_v1, d_v2, std::ignore, std::ignore, std::ignore, std::ignore) =
-      cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
-        vertex_t,
-        edge_t,
-        weight_t,
-        int32_t>(*handle_,
-                 std::move(d_v1),
-                 std::move(d_v2),
-                 std::nullopt,
-                 std::nullopt,
-                 std::nullopt,
-                 mg_graph_view.vertex_partition_range_lasts());
-
-    std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
-      {d_v1.data(), d_v1.size()}, {d_v2.data(), d_v2.size()}};
+    raft::random::RngState rng_state{0};
 
-    if (cugraph::test::g_perf) {
-      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
-      handle_->get_comms().barrier();
-      hr_timer.start("MG similarity test");
+    rmm::device_uvector<vertex_t> sources(0, handle_->get_stream());
+    std::optional<raft::device_span<vertex_t const>> sources_span{std::nullopt};
+
+    if (similarity_usecase.max_seeds) {
+      sources = cugraph::select_random_vertices(
+        *handle_,
+        mg_graph_view,
+        std::optional<raft::device_span<vertex_t const>>{std::nullopt},
+        rng_state,
+        std::min(*similarity_usecase.max_seeds,
+                 static_cast<size_t>(mg_graph_view.number_of_vertices())),
+        false,
+        false);
+      sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
     }
 
-    auto result_score = test_functor.run(
-      *handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
+    if (similarity_usecase.all_pairs) {
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        handle_->get_comms().barrier();
+        hr_timer.start("MG similarity test");
+      }
+
+      std::tie(v1, v2, result_score) = test_functor.run(*handle_,
+                                                        mg_graph_view,
+                                                        mg_edge_weight_view,
+                                                        sources_span,
+                                                        similarity_usecase.use_weights,
+                                                        similarity_usecase.topk);
+    } else {
+      if (!sources_span) {
+        sources.resize(mg_graph_view.local_vertex_partition_range_size(), handle_->get_stream());
+        cugraph::test::populate_vertex_ids(
+          *handle_, sources, mg_graph_view.local_vertex_partition_range_first());
+        sources_span = raft::device_span<vertex_t const>{sources.data(), sources.size()};
+      }
+
+      rmm::device_uvector<size_t> offsets(0, handle_->get_stream());
+
+      std::tie(offsets, v2) = cugraph::k_hop_nbrs(*handle_, mg_graph_view, *sources_span, 2);
+
+      v1.resize(v2.size(), handle_->get_stream());
+      cugraph::test::expand_sparse_offsets(
+        *handle_,
+        raft::device_span<size_t const>{offsets.data(), offsets.size()},
+        raft::device_span<vertex_t>{v1.data(), v1.size()},
+        size_t{0},
+        vertex_t{0});
+
+      cugraph::unrenumber_local_int_vertices(*handle_,
+                                             v1.data(),
+                                             v1.size(),
+                                             sources.data(),
+                                             vertex_t{0},
+                                             static_cast<vertex_t>(sources.size()),
+                                             true);
+
+      std::tie(v1, v2) = cugraph::test::remove_self_loops(*handle_, std::move(v1), std::move(v2));
+
+      std::tie(v1, v2, std::ignore, std::ignore, std::ignore, std::ignore) =
+        cugraph::detail::shuffle_int_vertex_pairs_with_values_to_local_gpu_by_edge_partitioning<
+          vertex_t,
+          edge_t,
+          weight_t,
+          int32_t>(*handle_,
+                   std::move(v1),
+                   std::move(v2),
+                   std::nullopt,
+                   std::nullopt,
+                   std::nullopt,
+                   mg_graph_view.vertex_partition_range_lasts());
+
+      std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
+        {v1.data(), v1.size()}, {v2.data(), v2.size()}};
+
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        handle_->get_comms().barrier();
+        hr_timer.start("MG similarity test");
+      }
+
+      result_score = test_functor.run(
+        *handle_, mg_graph_view, mg_edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
+    }
 
     if (cugraph::test::g_perf) {
       RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
@@ -147,14 +190,14 @@ class Tests_MGSimilarity
         mg_edge_weight_view,
         std::optional<raft::device_span<vertex_t const>>(std::nullopt));
 
-      d_v1 = cugraph::test::device_gatherv(*handle_, d_v1.data(), d_v1.size());
-      d_v2 = cugraph::test::device_gatherv(*handle_, d_v2.data(), d_v2.size());
+      v1 = cugraph::test::device_gatherv(*handle_, v1.data(), v1.size());
+      v2 = cugraph::test::device_gatherv(*handle_, v2.data(), v2.size());
       result_score =
         cugraph::test::device_gatherv(*handle_, result_score.data(), result_score.size());
 
-      if (d_v1.size() > 0) {
-        auto h_vertex_pair1 = cugraph::test::to_host(*handle_, d_v1);
-        auto h_vertex_pair2 = cugraph::test::to_host(*handle_, d_v2);
+      if (v1.size() > 0) {
+        auto h_vertex_pair1 = cugraph::test::to_host(*handle_, v1);
+        auto h_vertex_pair2 = cugraph::test::to_host(*handle_, v2);
         auto h_result_score = cugraph::test::to_host(*handle_, result_score);
 
         similarity_compare(mg_graph_view.number_of_vertices(),
@@ -258,10 +301,13 @@ INSTANTIATE_TEST_SUITE_P(
   file_test,
   Tests_MGSimilarity_File,
   ::testing::Combine(
-    // enable correctness checks
-    // Disable weighted computation testing in 22.10
-    //::testing::Values(Similarity_Usecase{true, true, 20}, Similarity_Usecase{false, true, 20}),
-    ::testing::Values(Similarity_Usecase{false, true, 20}),
+    ::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100, 10}),
     ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"),
                       cugraph::test::File_Usecase("test/datasets/netscience.mtx"))));
 
@@ -273,7 +319,13 @@ INSTANTIATE_TEST_SUITE_P(
     // Disable weighted computation testing in 22.10
     //::testing::Values(Similarity_Usecase{true, true, 20},
     // Similarity_Usecase{false, true, 20}),
-    ::testing::Values(Similarity_Usecase{false, true, 20}),
+    ::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, std::nullopt, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100, 10}),
     ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false))));
 
 INSTANTIATE_TEST_SUITE_P(
@@ -285,7 +337,12 @@ INSTANTIATE_TEST_SUITE_P(
   Tests_MGSimilarity_Rmat,
   ::testing::Combine(
     // disable correctness checks for large graphs
-    ::testing::Values(Similarity_Usecase{false, false, 20}),
+    ::testing::Values(Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100},
+                      Similarity_Usecase{false, true, false, 20, 100, 10},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100},
+                      Similarity_Usecase{false, true, true, 20, 100, 10}),
     ::testing::Values(cugraph::test::Rmat_Usecase(20, 16, 0.57, 0.19, 0.19, 0, true, false))));
 
 CUGRAPH_MG_TEST_PROGRAM_MAIN()
diff --git a/cpp/tests/link_prediction/similarity_test.cu b/cpp/tests/link_prediction/similarity_test.cu
index ec6db102830..0c4f526264f 100644
--- a/cpp/tests/link_prediction/similarity_test.cu
+++ b/cpp/tests/link_prediction/similarity_test.cu
@@ -83,11 +83,6 @@ class Tests_Similarity
     auto edge_weight_view =
       edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt;
 
-    if (cugraph::test::g_perf) {
-      RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
-      hr_timer.start("Similarity test");
-    }
-
     rmm::device_uvector<vertex_t> v1(0, handle.get_stream());
     rmm::device_uvector<vertex_t> v2(0, handle.get_stream());
     rmm::device_uvector<weight_t> result_score(0, handle.get_stream());
@@ -111,6 +106,11 @@ class Tests_Similarity
     }
 
     if (similarity_usecase.all_pairs) {
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        hr_timer.start("Similarity test");
+      }
+
       std::tie(v1, v2, result_score) = test_functor.run(handle,
                                                         graph_view,
                                                         edge_weight_view,
@@ -141,21 +141,17 @@ class Tests_Similarity
                                              static_cast<vertex_t>(sources.size()),
                                              true);
 
-      auto new_size = thrust::distance(
-        thrust::make_zip_iterator(v1.begin(), v2.begin()),
-        thrust::remove_if(
-          handle.get_thrust_policy(),
-          thrust::make_zip_iterator(v1.begin(), v2.begin()),
-          thrust::make_zip_iterator(v1.end(), v2.end()),
-          [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
-
-      v1.resize(new_size, handle.get_stream());
-      v2.resize(new_size, handle.get_stream());
+      std::tie(v1, v2) = cugraph::test::remove_self_loops(handle, std::move(v1), std::move(v2));
 
       // FIXME:  Need to add some tests that specify actual vertex pairs
       std::tuple<raft::device_span<vertex_t const>, raft::device_span<vertex_t const>> vertex_pairs{
         {v1.data(), v1.size()}, {v2.data(), v2.size()}};
 
+      if (cugraph::test::g_perf) {
+        RAFT_CUDA_TRY(cudaDeviceSynchronize());  // for consistent performance measurement
+        hr_timer.start("Similarity test");
+      }
+
       result_score = test_functor.run(
         handle, graph_view, edge_weight_view, vertex_pairs, similarity_usecase.use_weights);
     }
diff --git a/cpp/tests/utilities/thrust_wrapper.cu b/cpp/tests/utilities/thrust_wrapper.cu
index ef1c4f831eb..c48244fd3d8 100644
--- a/cpp/tests/utilities/thrust_wrapper.cu
+++ b/cpp/tests/utilities/thrust_wrapper.cu
@@ -546,5 +546,35 @@ template void expand_hypersparse_offsets(raft::handle_t const& handle,
                                          raft::device_span<int64_t> indices,
                                          size_t base_offset);
 
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& v1,
+  rmm::device_uvector<vertex_t>&& v2)
+{
+  auto new_size = thrust::distance(
+    thrust::make_zip_iterator(v1.begin(), v2.begin()),
+    thrust::remove_if(
+      handle.get_thrust_policy(),
+      thrust::make_zip_iterator(v1.begin(), v2.begin()),
+      thrust::make_zip_iterator(v1.end(), v2.end()),
+      [] __device__(auto tuple) { return thrust::get<0>(tuple) == thrust::get<1>(tuple); }));
+
+  v1.resize(new_size, handle.get_stream());
+  v2.resize(new_size, handle.get_stream());
+
+  return std::make_tuple(std::move(v1), std::move(v2));
+}
+
+template std::tuple<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int32_t>&& v1,
+  rmm::device_uvector<int32_t>&& v2);
+
+template std::tuple<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<int64_t>&& v1,
+  rmm::device_uvector<int64_t>&& v2);
+
 }  // namespace test
 }  // namespace cugraph
diff --git a/cpp/tests/utilities/thrust_wrapper.hpp b/cpp/tests/utilities/thrust_wrapper.hpp
index afdff33d80a..2131d700b7d 100644
--- a/cpp/tests/utilities/thrust_wrapper.hpp
+++ b/cpp/tests/utilities/thrust_wrapper.hpp
@@ -107,5 +107,11 @@ void expand_hypersparse_offsets(raft::handle_t const& handle,
                                 raft::device_span<idx_t> indices,
                                 offset_t base_offset);
 
+template <typename vertex_t>
+std::tuple<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> remove_self_loops(
+  raft::handle_t const& handle,
+  rmm::device_uvector<vertex_t>&& v1,
+  rmm::device_uvector<vertex_t>&& v2);
+
 }  // namespace test
 }  // namespace cugraph
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
index 34ee72e799b..c9fb73babb8 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_jaccard.py
@@ -17,6 +17,7 @@
 
 import pytest
 import networkx as nx
+import pandas as pd
 
 import cudf
 import cugraph
@@ -153,6 +154,54 @@ def networkx_call(M, benchmark_callable=None):
     return src, dst, coeff
 
 
+# FIXME: This compare is shared across several tests... it should be
+#        a general utility
+def compare(src1, dst1, val1, src2, dst2, val2):
+    #
+    #  We will do comparison computations by using dataframe
+    #  merge functions (essentially doing fast joins).  We
+    #  start by making two data frames
+    #
+    df1 = cudf.DataFrame()
+    df1["src1"] = src1
+    df1["dst1"] = dst1
+    if val1 is not None:
+        df1["val1"] = val1
+
+    df2 = cudf.DataFrame()
+    df2["src2"] = src2
+    df2["dst2"] = dst2
+    if val2 is not None:
+        df2["val2"] = val2
+
+    #
+    #  Check to see if all pairs in the original data frame
+    #  still exist in the new data frame.  If we join (merge)
+    #  the data frames where (src1[i]=src2[i]) and (dst1[i]=dst2[i])
+    #  then we should get exactly the same number of entries in
+    #  the data frame if we did not lose any data.
+    #
+    join = df1.merge(df2, left_on=["src1", "dst1"], right_on=["src2", "dst2"])
+
+    if len(df1) != len(join):
+        join2 = df1.merge(
+            df2, how="left", left_on=["src1", "dst1"], right_on=["src2", "dst2"]
+        )
+        pd.set_option("display.max_rows", 500)
+        print("df1 = \n", df1.sort_values(["src1", "dst1"]))
+        print("df2 = \n", df2.sort_values(["src2", "dst2"]))
+        print(
+            "join2 = \n",
+            join2.sort_values(["src1", "dst1"])
+            .to_pandas()
+            .query("src2.isnull()", engine="python"),
+        )
+
+    assert len(df1) == len(join)
+
+    assert_series_equal(join["val1"], join["val2"], check_names=False)
+
+
 # =============================================================================
 # Pytest Fixtures
 # =============================================================================
@@ -415,7 +464,7 @@ def test_all_pairs_jaccard_with_topk():
     jaccard_results = (
         jaccard_results[jaccard_results["first"] != jaccard_results["second"]]
         .sort_values(["jaccard_coeff", "first", "second"], ascending=False)
-        .reset_index(drop=True)[:topk]
+        .reset_index(drop=True)
     )
 
     # Call all-pairs Jaccard
@@ -425,6 +474,37 @@ def test_all_pairs_jaccard_with_topk():
         .reset_index(drop=True)
     )
 
-    assert_frame_equal(
-        jaccard_results, all_pairs_jaccard_results, check_dtype=False, check_like=True
+    # 1. All pair similarity might return different top pairs k pairs
+    # which are still valid hence, ensure the pairs returned by all-pairs
+    # exists, and that any results better than the k-th result are included
+    # in the result
+
+    # FIXME: This problem could exist in overlap, cosine and sorensen,
+    #        consider replicating this code or making a share comparison
+    #        function
+    worst_coeff = all_pairs_jaccard_results["jaccard_coeff"].min()
+    better_than_k = jaccard_results[jaccard_results["jaccard_coeff"] > worst_coeff]
+
+    compare(
+        all_pairs_jaccard_results["first"],
+        all_pairs_jaccard_results["second"],
+        all_pairs_jaccard_results["jaccard_coeff"],
+        jaccard_results["first"],
+        jaccard_results["second"],
+        jaccard_results["jaccard_coeff"],
+    )
+
+    compare(
+        better_than_k["first"],
+        better_than_k["second"],
+        better_than_k["jaccard_coeff"],
+        all_pairs_jaccard_results["first"],
+        all_pairs_jaccard_results["second"],
+        all_pairs_jaccard_results["jaccard_coeff"],
+    )
+
+    # 2. Ensure the coefficient scores are still the highest
+    assert_series_equal(
+        all_pairs_jaccard_results["jaccard_coeff"],
+        jaccard_results["jaccard_coeff"][:topk],
     )
diff --git a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
index 4c30f149ea5..5369398fa16 100644
--- a/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
+++ b/python/cugraph/cugraph/tests/link_prediction/test_sorensen.py
@@ -157,6 +157,8 @@ def networkx_call(M, benchmark_callable=None):
     return src, dst, coeff
 
 
+# FIXME: This compare is shared across several tests... it should be
+#        a general utility
 def compare(src1, dst1, val1, src2, dst2, val2):
     #
     #  We will do comparison computations by using dataframe
@@ -200,6 +202,8 @@ def compare(src1, dst1, val1, src2, dst2, val2):
 
     assert len(df1) == len(join)
 
+    assert_series_equal(join["val1"], join["val2"], check_names=False)
+
 
 # =============================================================================
 # Pytest Fixtures
@@ -456,7 +460,7 @@ def test_all_pairs_sorensen_with_topk():
     sorensen_results = (
         sorensen_results[sorensen_results["first"] != sorensen_results["second"]]
         .sort_values(["sorensen_coeff", "first", "second"], ascending=False)
-        .reset_index(drop=True)[:topk]
+        .reset_index(drop=True)
     )
 
     # Call all-pairs sorensen
@@ -468,7 +472,14 @@ def test_all_pairs_sorensen_with_topk():
 
     # 1. All pair similarity might return different top pairs k pairs
     # which are still valid hence, ensure the pairs returned by all-pairs
-    # exists.
+    # exists, and that any results better than the k-th result are included
+    # in the result
+
+    # FIXME: This problem could exist in overlap, cosine and jaccard,
+    #        consider replicating this code or making a share comparison
+    #        function
+    worst_coeff = all_pairs_sorensen_results["sorensen_coeff"].min()
+    better_than_k = sorensen_results[sorensen_results["sorensen_coeff"] > worst_coeff]
 
     compare(
         all_pairs_sorensen_results["first"],
@@ -479,6 +490,15 @@ def test_all_pairs_sorensen_with_topk():
         sorensen_results["sorensen_coeff"],
     )
 
+    compare(
+        better_than_k["first"],
+        better_than_k["second"],
+        better_than_k["sorensen_coeff"],
+        all_pairs_sorensen_results["first"],
+        all_pairs_sorensen_results["second"],
+        all_pairs_sorensen_results["sorensen_coeff"],
+    )
+
     # 2. Ensure the coefficient scores are still the highest
     assert_series_equal(
         all_pairs_sorensen_results["sorensen_coeff"],