diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index 09b1431e33b..f67fe54615b 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -481,6 +481,10 @@ ConfigureTest(RANDOM_WALKS_TEST sampling/sg_random_walks_test.cpp) # - UNIFORM NBR SAMPLING tests -------------------------------------------------------------------- ConfigureTest(UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/uniform_neighbor_sampling.cpp) +# - UNIFORM NBR SAMPLING tests -------------------------------------------------------------------- +ConfigureTest( + HOMOGENEOUS_UNIFORM_NEIGHBOR_SAMPLING_TEST sampling/homogeneous_uniform_neighbor_sampling.cpp) + ################################################################################################### # - BIASED NBR SAMPLING tests --------------------------------------------------------------------- ConfigureTest(BIASED_NEIGHBOR_SAMPLING_TEST sampling/biased_neighbor_sampling.cpp) diff --git a/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp b/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp new file mode 100644 index 00000000000..789ac48111a --- /dev/null +++ b/cpp/tests/sampling/homogeneous_uniform_neighbor_sampling.cpp @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2022-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include "detail/nbr_sampling_validate.hpp" +#include "utilities/base_fixture.hpp" +#include "utilities/property_generator_utilities.hpp" + +#include +#include + +#include + +struct Homogeneous_Uniform_Neighbor_Sampling_Usecase { + std::vector fanout{{-1}}; + int32_t batch_size{10}; + bool flag_replacement{true}; + + bool edge_masking{false}; + bool check_correctness{true}; +}; + +template +class Tests_Homogeneous_Uniform_Neighbor_Sampling + : public ::testing::TestWithParam< + std::tuple> { + public: + Tests_Homogeneous_Uniform_Neighbor_Sampling() {} + + static void SetUpTestCase() {} + static void TearDownTestCase() {} + + virtual void SetUp() {} + virtual void TearDown() {} + + template + void run_current_test( + std::tuple const& param) + { + auto [homogeneous_uniform_neighbor_sampling_usecase, input_usecase] = param; + + raft::handle_t handle{}; + HighResTimer hr_timer{}; + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Construct graph"); + } + + auto [graph, edge_weights, renumber_map_labels] = + cugraph::test::construct_graph( + handle, input_usecase, true, true); + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + auto graph_view = graph.view(); + auto edge_weight_view = + edge_weights ? std::make_optional((*edge_weights).view()) : std::nullopt; + + std::optional> edge_mask{std::nullopt}; + if (homogeneous_uniform_neighbor_sampling_usecase.edge_masking) { + edge_mask = + cugraph::test::generate::edge_property(handle, graph_view, 2); + graph_view.attach_edge_mask((*edge_mask).view()); + } + + constexpr float select_probability{0.05}; + + // FIXME: Update the tests to initialize RngState and use it instead + // of seed... + constexpr uint64_t seed{0}; + + raft::random::RngState rng_state(seed); + + auto random_sources = cugraph::select_random_vertices( + handle, + graph_view, + std::optional>{std::nullopt}, + rng_state, + std::max(static_cast(graph_view.number_of_vertices() * select_probability), + std::min(static_cast(graph_view.number_of_vertices()), size_t{1})), + false, + false); + + // + // Now we'll assign the vertices to batches + // + + auto batch_number = std::make_optional>(0, handle.get_stream()); + + batch_number = cugraph::test::sequence( + handle, random_sources.size(), homogeneous_uniform_neighbor_sampling_usecase.batch_size, int32_t{0}); + + rmm::device_uvector random_sources_copy(random_sources.size(), handle.get_stream()); + + raft::copy(random_sources_copy.data(), + random_sources.data(), + random_sources.size(), + handle.get_stream()); + + std::optional> + label_to_output_comm_rank_mapping{std::nullopt}; + +#ifdef NO_CUGRAPH_OPS + EXPECT_THROW( + cugraph::homogeneous_uniform_neighbor_sample( + handle, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + raft::device_span{random_sources_copy.data(), random_sources.size()}, + batch_number ? std::make_optional(raft::device_span{batch_number->data(), + batch_number->size()}) + : std::nullopt, + label_to_output_comm_rank_mapping, + raft::host_span(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(), + homogeneous_uniform_neighbor_sampling_usecase.fanout.size()), + rng_state, + true, + homogeneous_uniform_neighbor_sampling_usecase.flag_replacement), + std::exception); +#else + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.start("Uniform neighbor sampling"); + } + + auto&& [src_out, dst_out, wgt_out, edge_id, edge_type, hop, offsets] = + cugraph::homogeneous_uniform_neighbor_sample( + handle, + rng_state, + graph_view, + edge_weight_view, + std::optional>{std::nullopt}, + std::optional>{std::nullopt}, + raft::device_span{random_sources_copy.data(), random_sources.size()}, + batch_number ? std::make_optional(raft::device_span{batch_number->data(), + batch_number->size()}) + : std::nullopt, + label_to_output_comm_rank_mapping, + raft::host_span(homogeneous_uniform_neighbor_sampling_usecase.fanout.data(), + homogeneous_uniform_neighbor_sampling_usecase.fanout.size()), + cugraph::sampling_flags_t{ + cugraph::prior_sources_behavior_t{0}, + true, // return_hops + false, // dedupe_sources + homogeneous_uniform_neighbor_sampling_usecase.flag_replacement + } + ); + + + if (cugraph::test::g_perf) { + RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement + hr_timer.stop(); + hr_timer.display_and_clear(std::cout); + } + + if (homogeneous_uniform_neighbor_sampling_usecase.check_correctness) { + // First validate that the extracted edges are actually a subset of the + // edges in the input graph + rmm::device_uvector vertices(2 * src_out.size(), handle.get_stream()); + raft::copy(vertices.data(), src_out.data(), src_out.size(), handle.get_stream()); + raft::copy( + vertices.data() + src_out.size(), dst_out.data(), dst_out.size(), handle.get_stream()); + vertices = cugraph::test::sort(handle, std::move(vertices)); + vertices = cugraph::test::unique(handle, std::move(vertices)); + + rmm::device_uvector d_subgraph_offsets(2, handle.get_stream()); + std::vector h_subgraph_offsets({0, vertices.size()}); + + raft::update_device(d_subgraph_offsets.data(), + h_subgraph_offsets.data(), + h_subgraph_offsets.size(), + handle.get_stream()); + + rmm::device_uvector src_compare(0, handle.get_stream()); + rmm::device_uvector dst_compare(0, handle.get_stream()); + std::optional> wgt_compare{std::nullopt}; + + std::tie(src_compare, dst_compare, wgt_compare, std::ignore) = extract_induced_subgraphs( + handle, + graph_view, + edge_weight_view, + raft::device_span(d_subgraph_offsets.data(), 2), + raft::device_span(vertices.data(), vertices.size()), + true); + + ASSERT_TRUE(cugraph::test::validate_extracted_graph_is_subgraph( + handle, src_compare, dst_compare, wgt_compare, src_out, dst_out, wgt_out)); + + if (random_sources.size() < 100) { + // This validation is too expensive for large number of vertices + ASSERT_TRUE( + cugraph::test::validate_sampling_depth(handle, + std::move(src_out), + std::move(dst_out), + std::move(wgt_out), + std::move(random_sources), + homogeneous_uniform_neighbor_sampling_usecase.fanout.size())); + } + } +#endif + } +}; + +using Tests_Homogeneous_Uniform_Neighbor_Sampling_File = + Tests_Homogeneous_Uniform_Neighbor_Sampling; + +using Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat = + Tests_Homogeneous_Uniform_Neighbor_Sampling; + + +TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int32Float) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_File, CheckInt32Int64Float) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_File, CheckInt64Int64Float) +{ + run_current_test( + override_File_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt32Int32Float) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt32Int64Float) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +TEST_P(Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, CheckInt64Int64Float) +{ + run_current_test( + override_Rmat_Usecase_with_cmd_line_arguments(GetParam())); +} + +INSTANTIATE_TEST_SUITE_P( + file_test, + Tests_Homogeneous_Uniform_Neighbor_Sampling_File, + ::testing::Combine( + ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + file_large_test, + Tests_Homogeneous_Uniform_Neighbor_Sampling_File, + ::testing::Combine( + ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/web-Google.mtx"), + cugraph::test::File_Usecase("test/datasets/ljournal-2008.mtx"), + cugraph::test::File_Usecase("test/datasets/webbase-1M.mtx")))); + +INSTANTIATE_TEST_SUITE_P( + rmat_small_test, + Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, + ::testing::Combine( + ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, false, true}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 128, true, true}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, false, false, 0)))); + +INSTANTIATE_TEST_SUITE_P( + rmat_benchmark_test, /* note that scale & edge factor can be overridden in benchmarking (with + --gtest_filter to select only the rmat_benchmark_test with a specific + vertex & edge type combination) by command line arguments and do not + include more than one Rmat_Usecase that differ only in scale or edge + factor (to avoid running same benchmarks more than once) */ + Tests_Homogeneous_Uniform_Neighbor_Sampling_Rmat, + ::testing::Combine( + ::testing::Values(Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, false, true, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, false, false}, + Homogeneous_Uniform_Neighbor_Sampling_Usecase{{4, 10}, 1024, true, true, false}), + ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, false, false, 0)))); + +CUGRAPH_TEST_PROGRAM_MAIN()