diff --git a/ci/build_cpp.sh b/ci/build_cpp.sh index 3fd57f24c40..3fb72cac08b 100755 --- a/ci/build_cpp.sh +++ b/ci/build_cpp.sh @@ -11,6 +11,6 @@ rapids-print-env rapids-logger "Begin cpp build" -rapids-mamba-retry mambabuild conda/recipes/libcugraph +rapids-conda-retry mambabuild conda/recipes/libcugraph rapids-upload-conda-to-s3 cpp diff --git a/ci/build_python.sh b/ci/build_python.sh index 429ba649d1d..62eb6c2ccec 100755 --- a/ci/build_python.sh +++ b/ci/build_python.sh @@ -15,12 +15,12 @@ rapids-logger "Begin py build" # TODO: Remove `--no-test` flags once importing on a CPU # node works correctly -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ conda/recipes/pylibcugraph -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ @@ -30,7 +30,7 @@ rapids-mamba-retry mambabuild \ # platform to ensure it is included in each set of artifacts, since test # scripts only install from one set of artifacts based on the CUDA version used # for the test run. -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ @@ -40,7 +40,7 @@ rapids-mamba-retry mambabuild \ # built on each CUDA platform to ensure they are included in each set of # artifacts, since test scripts only install from one set of artifacts based on # the CUDA version used for the test run. -rapids-mamba-retry mambabuild \ +rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ @@ -50,7 +50,7 @@ RAPIDS_CUDA_MAJOR="${RAPIDS_CUDA_VERSION%%.*}" if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then # Only CUDA 11 is supported right now due to PyTorch requirement. - rapids-mamba-retry mambabuild \ + rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ @@ -60,7 +60,7 @@ if [[ ${RAPIDS_CUDA_MAJOR} == "11" ]]; then conda/recipes/cugraph-pyg # Only CUDA 11 is supported right now due to PyTorch requirement. - rapids-mamba-retry mambabuild \ + rapids-conda-retry mambabuild \ --no-test \ --channel "${CPP_CHANNEL}" \ --channel "${RAPIDS_CONDA_BLD_OUTPUT_DIR}" \ diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index cb27260baa0..78846bc5766 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -594,6 +594,8 @@ weight_t hungarian(raft::handle_t const& handle, * @param[in] graph input graph object * @param[out] clustering Pointer to device array where the clustering should be stored * @param[in] max_level (optional) maximum number of levels to run (default 100) + * @param[in] threshold (optional) threshold for convergence at each level (default + * 1e-7) * @param[in] resolution (optional) The value of the resolution parameter to use. * Called gamma in the modularity formula, this changes the size * of the communities. Higher resolutions lead to more smaller @@ -612,6 +614,7 @@ std::pair louvain( std::optional> edge_weight_view, vertex_t* clustering, size_t max_level = 100, + weight_t threshold = weight_t{1e-7}, weight_t resolution = weight_t{1}); template @@ -657,6 +660,7 @@ std::pair>, weight_t> louvain( graph_view_t const& graph_view, std::optional> edge_weight_view, size_t max_level = 100, + weight_t threshold = weight_t{1e-7}, weight_t resolution = weight_t{1}); /** diff --git a/cpp/include/cugraph_c/community_algorithms.h b/cpp/include/cugraph_c/community_algorithms.h index 79c945ffb8b..8f1015f8632 100644 --- a/cpp/include/cugraph_c/community_algorithms.h +++ b/cpp/include/cugraph_c/community_algorithms.h @@ -93,6 +93,7 @@ typedef struct { * @param [in] graph Pointer to graph. NOTE: Graph might be modified if the storage * needs to be transposed * @param [in] max_level Maximum level in hierarchy + * @param [in] threshold Threshold parameter, defines convergence at each level of hierarchy * @param [in] resolution Resolution parameter (gamma) in modularity formula. * This changes the size of the communities. Higher resolutions * lead to more smaller communities, lower resolutions lead to @@ -107,6 +108,7 @@ typedef struct { cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool_t do_expensive_check, cugraph_hierarchical_clustering_result_t** result, diff --git a/cpp/src/c_api/louvain.cpp b/cpp/src/c_api/louvain.cpp index ff75cafa031..0e48b29388a 100644 --- a/cpp/src/c_api/louvain.cpp +++ b/cpp/src/c_api/louvain.cpp @@ -36,6 +36,7 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { raft::handle_t const& handle_; cugraph::c_api::cugraph_graph_t* graph_; size_t max_level_; + double threshold_; double resolution_; bool do_expensive_check_; cugraph::c_api::cugraph_hierarchical_clustering_result_t* result_{}; @@ -43,12 +44,14 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { louvain_functor(::cugraph_resource_handle_t const* handle, ::cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool do_expensive_check) : abstract_functor(), handle_(*reinterpret_cast(handle)->handle_), graph_(reinterpret_cast(graph)), max_level_(max_level), + threshold_(threshold), resolution_(resolution), do_expensive_check_(do_expensive_check) { @@ -102,6 +105,7 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { .view()), clusters.data(), max_level_, + static_cast(threshold_), static_cast(resolution_)); rmm::device_uvector vertices(graph_view.local_vertex_partition_range_size(), @@ -121,12 +125,13 @@ struct louvain_functor : public cugraph::c_api::abstract_functor { extern "C" cugraph_error_code_t cugraph_louvain(const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool_t do_expensive_check, cugraph_hierarchical_clustering_result_t** result, cugraph_error_t** error) { - louvain_functor functor(handle, graph, max_level, resolution, do_expensive_check); + louvain_functor functor(handle, graph, max_level, threshold, resolution, do_expensive_check); return cugraph::c_api::run_algorithm(graph, functor, result, error); } diff --git a/cpp/src/community/louvain_impl.cuh b/cpp/src/community/louvain_impl.cuh index 167de36dd13..7777921a091 100644 --- a/cpp/src/community/louvain_impl.cuh +++ b/cpp/src/community/louvain_impl.cuh @@ -47,6 +47,7 @@ std::pair>, weight_t> louvain( graph_view_t const& graph_view, std::optional> edge_weight_view, size_t max_level, + weight_t threshold, weight_t resolution) { using graph_t = cugraph::graph_t; @@ -169,7 +170,7 @@ std::pair>, weight_t> louvain( // during each iteration of the loop bool up_down = true; - while (new_Q > (cur_Q + 0.0001)) { + while (new_Q > (cur_Q + threshold)) { cur_Q = new_Q; next_clusters_v = detail::update_clustering_by_delta_modularity(handle, @@ -291,12 +292,13 @@ std::pair>, weight_t> louvain( graph_view_t const& graph_view, std::optional> edge_weight_view, size_t max_level, + weight_t threshold, weight_t resolution) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); CUGRAPH_EXPECTS(edge_weight_view.has_value(), "Graph must be weighted"); - return detail::louvain(handle, graph_view, edge_weight_view, max_level, resolution); + return detail::louvain(handle, graph_view, edge_weight_view, max_level, threshold, resolution); } template @@ -317,6 +319,7 @@ std::pair louvain( std::optional> edge_weight_view, vertex_t* clustering, size_t max_level, + weight_t threshold, weight_t resolution) { CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented."); @@ -328,7 +331,7 @@ std::pair louvain( weight_t modularity; std::tie(dendrogram, modularity) = - detail::louvain(handle, graph_view, edge_weight_view, max_level, resolution); + detail::louvain(handle, graph_view, edge_weight_view, max_level, threshold, resolution); detail::flatten_dendrogram(handle, graph_view, *dendrogram, clustering); diff --git a/cpp/src/community/louvain_mg.cu b/cpp/src/community/louvain_mg.cu index d6d266df273..0be32ed049f 100644 --- a/cpp/src/community/louvain_mg.cu +++ b/cpp/src/community/louvain_mg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,36 +25,42 @@ template std::pair>, float> louvain( graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair louvain( @@ -63,6 +69,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -70,6 +77,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -77,6 +85,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -84,6 +93,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -91,6 +101,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -98,6 +109,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + double, double); } // namespace cugraph diff --git a/cpp/src/community/louvain_sg.cu b/cpp/src/community/louvain_sg.cu index 4e26aa1cf18..3fc0ffab928 100644 --- a/cpp/src/community/louvain_sg.cu +++ b/cpp/src/community/louvain_sg.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2023, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -25,36 +25,42 @@ template std::pair>, float> louvain( graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, float> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + float, float); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair>, double> louvain( raft::handle_t const&, graph_view_t const&, std::optional>, size_t, + double, double); template std::pair louvain( @@ -63,6 +69,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -70,6 +77,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -77,6 +85,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -84,6 +93,7 @@ template std::pair louvain( std::optional>, int32_t*, size_t, + double, double); template std::pair louvain( raft::handle_t const&, @@ -91,6 +101,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + float, float); template std::pair louvain( raft::handle_t const&, @@ -98,6 +109,7 @@ template std::pair louvain( std::optional>, int64_t*, size_t, + double, double); } // namespace cugraph diff --git a/cpp/src/prims/detail/nbr_intersection.cuh b/cpp/src/prims/detail/nbr_intersection.cuh index f4c4745b14c..2f30faebb3e 100644 --- a/cpp/src/prims/detail/nbr_intersection.cuh +++ b/cpp/src/prims/detail/nbr_intersection.cuh @@ -1023,7 +1023,7 @@ nbr_intersection(raft::handle_t const& handle, (*major_nbr_offsets).begin() + 1); } - std::tie(*major_nbr_indices, std::ignore) = shuffle_values( + std::tie(major_nbr_indices, std::ignore) = shuffle_values( major_comm, local_nbrs_for_rx_majors.begin(), local_nbr_counts, handle.get_stream()); if constexpr (!std::is_same_v) { diff --git a/cpp/tests/c_api/louvain_test.c b/cpp/tests/c_api/louvain_test.c index f3813b5a1ac..e9ac5c9ff06 100644 --- a/cpp/tests/c_api/louvain_test.c +++ b/cpp/tests/c_api/louvain_test.c @@ -33,6 +33,7 @@ int generic_louvain_test(vertex_t* h_src, size_t num_vertices, size_t num_edges, size_t max_level, + double threshold, double resolution, bool_t store_transposed) { @@ -60,7 +61,7 @@ int generic_louvain_test(vertex_t* h_src, TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); ret_code = - cugraph_louvain(p_handle, p_graph, max_level, resolution, FALSE, &p_result, &ret_error); + cugraph_louvain(p_handle, p_graph, max_level, threshold, resolution, FALSE, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_louvain failed."); @@ -108,6 +109,7 @@ int test_louvain() size_t num_edges = 16; size_t num_vertices = 6; size_t max_level = 10; + weight_t threshold = 1e-7; weight_t resolution = 1.0; vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; @@ -126,6 +128,7 @@ int test_louvain() num_vertices, num_edges, max_level, + threshold, resolution, FALSE); } @@ -135,6 +138,7 @@ int test_louvain_no_weight() size_t num_edges = 16; size_t num_vertices = 6; size_t max_level = 10; + weight_t threshold = 1e-7; weight_t resolution = 1.0; vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; @@ -151,6 +155,7 @@ int test_louvain_no_weight() num_vertices, num_edges, max_level, + threshold, resolution, FALSE); } diff --git a/cpp/tests/c_api/mg_louvain_test.c b/cpp/tests/c_api/mg_louvain_test.c index d4c10d49891..2465709c03c 100644 --- a/cpp/tests/c_api/mg_louvain_test.c +++ b/cpp/tests/c_api/mg_louvain_test.c @@ -33,6 +33,7 @@ int generic_louvain_test(const cugraph_resource_handle_t* p_handle, size_t num_vertices, size_t num_edges, size_t max_level, + double threshold, double resolution, bool_t store_transposed) { @@ -51,7 +52,7 @@ int generic_louvain_test(const cugraph_resource_handle_t* p_handle, TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); ret_code = - cugraph_louvain(p_handle, p_graph, max_level, resolution, FALSE, &p_result, &ret_error); + cugraph_louvain(p_handle, p_graph, max_level, threshold, resolution, FALSE, &p_result, &ret_error); TEST_ASSERT(test_ret_value, ret_code == CUGRAPH_SUCCESS, cugraph_error_message(ret_error)); TEST_ALWAYS_ASSERT(ret_code == CUGRAPH_SUCCESS, "cugraph_louvain failed."); @@ -106,6 +107,7 @@ int test_louvain(const cugraph_resource_handle_t* handle) size_t num_edges = 8; size_t num_vertices = 6; size_t max_level = 10; + weight_t threshold = 1e-7; weight_t resolution = 1.0; vertex_t h_src[] = {0, 1, 1, 2, 2, 2, 3, 4, 1, 3, 4, 0, 1, 3, 5, 5}; @@ -116,7 +118,7 @@ int test_louvain(const cugraph_resource_handle_t* handle) // Louvain wants store_transposed = FALSE return generic_louvain_test( - handle, h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, max_level, resolution, FALSE); + handle, h_src, h_dst, h_wgt, h_result, num_vertices, num_edges, max_level, threshold, resolution, FALSE); } /******************************************************************************/ diff --git a/cpp/tests/community/louvain_test.cpp b/cpp/tests/community/louvain_test.cpp index 4792042365b..1e1fb6d4c33 100644 --- a/cpp/tests/community/louvain_test.cpp +++ b/cpp/tests/community/louvain_test.cpp @@ -30,8 +30,9 @@ #include struct Louvain_Usecase { - size_t max_level_{100}; - double resolution_{1}; + std::optional max_level_{std::nullopt}; + std::optional threshold_{std::nullopt}; + std::optional resolution_{std::nullopt}; bool check_correctness_{false}; int expected_level_{0}; float expected_modularity_{0}; @@ -54,6 +55,12 @@ class Tests_Louvain { auto [louvain_usecase, input_usecase] = param; + // Legacy implementation does not support resolution parameter, + // defaulting it to 1. If the test case is not resolution + // 1 then skip it. + if (louvain_usecase.resolution_) + if (louvain_usecase.resolution_ != double{1}) return; + raft::handle_t handle{}; bool directed{false}; @@ -134,6 +141,9 @@ class Tests_Louvain EXPECT_THROW(louvain(graph_view, edge_weight_view, graph_view.local_vertex_partition_range_size(), + louvain_usecase.max_level_, + louvain_usecase.threshold_, + louvain_usecase.resolution_, louvain_usecase.check_correctness_, louvain_usecase.expected_level_, louvain_usecase.expected_modularity_), @@ -142,6 +152,9 @@ class Tests_Louvain louvain(graph_view, edge_weight_view, graph_view.local_vertex_partition_range_size(), + louvain_usecase.max_level_, + louvain_usecase.threshold_, + louvain_usecase.resolution_, louvain_usecase.check_correctness_, louvain_usecase.expected_level_, louvain_usecase.expected_modularity_); @@ -185,6 +198,9 @@ class Tests_Louvain cugraph::graph_view_t const& graph_view, std::optional> edge_weight_view, vertex_t num_vertices, + std::optional max_level, + std::optional threshold, + std::optional resolution, bool check_correctness, int expected_level, float expected_modularity) @@ -195,8 +211,29 @@ class Tests_Louvain size_t level; weight_t modularity; - std::tie(level, modularity) = cugraph::louvain( - handle, graph_view, edge_weight_view, clustering_v.data(), size_t{100}, weight_t{1}); + if (resolution) { + std::tie(level, modularity) = + cugraph::louvain(handle, + graph_view, + edge_weight_view, + clustering_v.data(), + max_level ? *max_level : size_t{100}, + threshold ? static_cast(*threshold) : weight_t{1e-7}, + static_cast(*resolution)); + } else if (threshold) { + std::tie(level, modularity) = cugraph::louvain(handle, + graph_view, + edge_weight_view, + clustering_v.data(), + max_level ? *max_level : size_t{100}, + static_cast(*threshold)); + } else if (max_level) { + std::tie(level, modularity) = + cugraph::louvain(handle, graph_view, edge_weight_view, clustering_v.data(), *max_level); + } else { + std::tie(level, modularity) = + cugraph::louvain(handle, graph_view, edge_weight_view, clustering_v.data()); + } RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -421,8 +458,11 @@ TEST_P(Tests_Louvain_Rmat64, CheckInt64Int64FloatFloat) INSTANTIATE_TEST_SUITE_P( simple_test, Tests_Louvain_File, - ::testing::Combine(::testing::Values(Louvain_Usecase{100, 1, true, 3, 0.408695}), - ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); + ::testing::Combine( + ::testing::Values(Louvain_Usecase{std::nullopt, std::nullopt, std::nullopt, true, 3, 0.408695}, + Louvain_Usecase{20, double{1e-4}, std::nullopt, true, 3, 0.408695}, + Louvain_Usecase{100, double{1e-4}, double{0.8}, true, 3, 0.48336622}), + ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with diff --git a/cpp/tests/community/mg_louvain_test.cpp b/cpp/tests/community/mg_louvain_test.cpp index f89301c485b..41339e32d77 100644 --- a/cpp/tests/community/mg_louvain_test.cpp +++ b/cpp/tests/community/mg_louvain_test.cpp @@ -41,6 +41,7 @@ // struct Louvain_Usecase { size_t max_level_{100}; + double threshold_{1e-7}; double resolution_{1}; bool check_correctness_{true}; }; @@ -72,6 +73,7 @@ class Tests_MGLouvain cugraph::graph_view_t const& mg_graph_view, std::optional> mg_edge_weight_view, cugraph::Dendrogram const& mg_dendrogram, + weight_t threshold, weight_t resolution, weight_t mg_modularity) { @@ -100,6 +102,7 @@ class Tests_MGLouvain &sg_edge_weights, &sg_modularity, &handle, + threshold, resolution, comm_rank](size_t i) { rmm::device_uvector d_mg_aggregate_cluster_v(0, handle.get_stream()); @@ -128,6 +131,7 @@ class Tests_MGLouvain sg_edge_weight_view, d_sg_cluster_v.data(), size_t{1}, + threshold, resolution); EXPECT_TRUE(cugraph::test::check_invertible( @@ -185,12 +189,13 @@ class Tests_MGLouvain hr_timer.start("MG Louvain"); } - auto [dendrogram, mg_modularity] = - cugraph::louvain(*handle_, - mg_graph_view, - mg_edge_weight_view, - louvain_usecase.max_level_, - louvain_usecase.resolution_); + auto [dendrogram, mg_modularity] = cugraph::louvain( + *handle_, + mg_graph_view, + mg_edge_weight_view, + louvain_usecase.max_level_, + static_cast(louvain_usecase.threshold_), + static_cast(louvain_usecase.resolution_)); if (cugraph::test::g_perf) { RAFT_CUDA_TRY(cudaDeviceSynchronize()); // for consistent performance measurement @@ -206,6 +211,7 @@ class Tests_MGLouvain mg_graph_view, mg_edge_weight_view, *dendrogram, + louvain_usecase.threshold_, louvain_usecase.resolution_, mg_modularity); } @@ -257,15 +263,16 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGLouvain_File, ::testing::Combine( // enable correctness checks for small graphs - ::testing::Values(Louvain_Usecase{100, 1}), + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx"), cugraph::test::File_Usecase("test/datasets/dolphins.mtx")))); -INSTANTIATE_TEST_SUITE_P(rmat_small_tests, - Tests_MGLouvain_Rmat, - ::testing::Combine(::testing::Values(Louvain_Usecase{100, 1}), - ::testing::Values(cugraph::test::Rmat_Usecase( - 10, 16, 0.57, 0.19, 0.19, 0, true, false)))); +INSTANTIATE_TEST_SUITE_P( + rmat_small_tests, + Tests_MGLouvain_Rmat, + ::testing::Combine( + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1}), + ::testing::Values(cugraph::test::Rmat_Usecase(10, 16, 0.57, 0.19, 0.19, 0, true, false)))); INSTANTIATE_TEST_SUITE_P( file_benchmark_test, /* note that the test filename can be overridden in benchmarking (with @@ -276,7 +283,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGLouvain_File, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(Louvain_Usecase{100, 1, false}), + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1, false}), ::testing::Values(cugraph::test::File_Usecase("test/datasets/karate.mtx")))); INSTANTIATE_TEST_SUITE_P( @@ -288,7 +295,7 @@ INSTANTIATE_TEST_SUITE_P( Tests_MGLouvain_Rmat, ::testing::Combine( // disable correctness checks for large graphs - ::testing::Values(Louvain_Usecase{100, 1, false}), + ::testing::Values(Louvain_Usecase{100, double{1e-7}, 1, false}), ::testing::Values(cugraph::test::Rmat_Usecase(20, 32, 0.57, 0.19, 0.19, 0, true, false)))); CUGRAPH_MG_TEST_PROGRAM_MAIN() diff --git a/python/cugraph/cugraph/community/louvain.py b/python/cugraph/cugraph/community/louvain.py index 35ca864824f..7f9742c8f09 100644 --- a/python/cugraph/cugraph/community/louvain.py +++ b/python/cugraph/cugraph/community/louvain.py @@ -17,11 +17,13 @@ ) import cudf +import warnings from pylibcugraph import louvain as pylibcugraph_louvain from pylibcugraph import ResourceHandle -def louvain(G, max_iter=100, resolution=1.0): +# FIXME: max_level should default to 100 once max_iter is removed +def louvain(G, max_level=None, max_iter=None, resolution=1.0, threshold=1e-7): """ Compute the modularity optimizing partition of the input graph using the Louvain method @@ -40,18 +42,30 @@ def louvain(G, max_iter=100, resolution=1.0): present. The current implementation only supports undirected graphs. - max_iter : integer, optional (default=100) - This controls the maximum number of levels/iterations of the Louvain + max_level : integer, optional (default=100) + This controls the maximum number of levels of the Louvain algorithm. When specified the algorithm will terminate after no more - than the specified number of iterations. No error occurs when the + than the specified number of levels. No error occurs when the algorithm terminates early in this manner. - resolution: float/double, optional (default=1.0) + max_iter : integer, optional (default=None) + This parameter is deprecated in favor of max_level. Previously + it was used to control the maximum number of levels of the Louvain + algorithm. + + resolution: float, optional (default=1.0) Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. Defaults to 1. + threshold: float + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + Defaults to 1e-7. + Returns ------- parts : cudf.DataFrame @@ -80,10 +94,29 @@ def louvain(G, max_iter=100, resolution=1.0): if G.is_directed(): raise ValueError("input graph must be undirected") + # FIXME: This max_iter logic and the max_level defaulting can be deleted + # in favor of defaulting max_level in call once max_iter is deleted + if max_iter: + if max_level: + raise ValueError( + "max_iter is deprecated. Cannot specify both max_iter and max_level" + ) + + warning_msg = ( + "max_iter has been renamed max_level. Use of max_iter is " + "deprecated and will no longer be supported in the next releases." + ) + warnings.warn(warning_msg, FutureWarning) + max_level = max_iter + + if max_level is None: + max_level = 100 + vertex, partition, mod_score = pylibcugraph_louvain( resource_handle=ResourceHandle(), graph=G._plc_graph, - max_level=max_iter, + max_level=max_level, + threshold=threshold, resolution=resolution, do_expensive_check=False, ) diff --git a/python/cugraph/cugraph/dask/common/part_utils.py b/python/cugraph/cugraph/dask/common/part_utils.py index fda7e257367..7c0aad6c3ee 100644 --- a/python/cugraph/cugraph/dask/common/part_utils.py +++ b/python/cugraph/cugraph/dask/common/part_utils.py @@ -73,7 +73,7 @@ def persist_distributed_data(dask_df, client): _keys = dask_df.__dask_keys__() worker_dict = {} for i, key in enumerate(_keys): - worker_dict[str(key)] = tuple([worker_addresses[i]]) + worker_dict[key] = tuple([worker_addresses[i]]) persisted = client.persist(dask_df, workers=worker_dict) parts = futures_of(persisted) return parts @@ -89,7 +89,7 @@ def get_persisted_df_worker_map(dask_df, client): ddf_keys = futures_of(dask_df) output_map = {} for w, w_keys in client.has_what().items(): - output_map[w] = [ddf_k for ddf_k in ddf_keys if str(ddf_k.key) in w_keys] + output_map[w] = [ddf_k for ddf_k in ddf_keys if ddf_k.key in w_keys] if len(output_map[w]) == 0: output_map[w] = _create_empty_dask_df_future(dask_df._meta, client, w) return output_map @@ -157,7 +157,7 @@ async def _extract_partitions( # NOTE: We colocate (X, y) here by zipping delayed # n partitions of them as (X1, y1), (X2, y2)... # and asking client to compute a single future for - # each tuple in the list + # each tuple in the list. dela = [np.asarray(d.to_delayed()) for d in dask_obj] # TODO: ravel() is causing strange behavior w/ delayed Arrays which are @@ -167,7 +167,7 @@ async def _extract_partitions( parts = client.compute([p for p in zip(*raveled)]) await wait(parts) - key_to_part = [(str(part.key), part) for part in parts] + key_to_part = [(part.key, part) for part in parts] who_has = await client.who_has(parts) return [(first(who_has[key]), part) for key, part in key_to_part] @@ -229,7 +229,7 @@ def load_balance_func(ddf_, by, client=None): wait(parts) who_has = client.who_has(parts) - key_to_part = [(str(part.key), part) for part in parts] + key_to_part = [(part.key, part) for part in parts] gpu_fututres = [ (first(who_has[key]), part.key[1], part) for key, part in key_to_part ] @@ -245,7 +245,7 @@ def load_balance_func(ddf_, by, client=None): for cumsum in cumsum_parts: num_rows.append(cumsum.iloc[-1]) - # Calculate current partition divisions + # Calculate current partition divisions. divisions = [sum(num_rows[0:x:1]) for x in range(0, len(num_rows) + 1)] divisions[-1] = divisions[-1] - 1 divisions = tuple(divisions) @@ -271,7 +271,7 @@ def load_balance_func(ddf_, by, client=None): def concat_dfs(df_list): """ - Concat a list of cudf dataframes + Concat a list of cudf dataframes. """ return cudf.concat(df_list) @@ -279,17 +279,17 @@ def concat_dfs(df_list): def get_delayed_dict(ddf): """ Returns a dicitionary with the dataframe tasks as keys and - the dataframe delayed objects as values + the dataframe delayed objects as values. """ df_delayed = {} for delayed_obj in ddf.to_delayed(): - df_delayed[str(delayed_obj.key)] = delayed_obj + df_delayed[delayed_obj.key] = delayed_obj return df_delayed def concat_within_workers(client, ddf): """ - Concats all partitions within workers without transfers + Concats all partitions within workers without transfers. """ df_delayed = get_delayed_dict(ddf) diff --git a/python/cugraph/cugraph/dask/community/louvain.py b/python/cugraph/cugraph/dask/community/louvain.py index c003939f5eb..8efbbafaf7b 100644 --- a/python/cugraph/cugraph/dask/community/louvain.py +++ b/python/cugraph/cugraph/dask/community/louvain.py @@ -28,6 +28,8 @@ from pylibcugraph import louvain as pylibcugraph_louvain from typing import Tuple, TYPE_CHECKING +import warnings + if TYPE_CHECKING: from cugraph import Graph @@ -45,19 +47,30 @@ def convert_to_cudf(result: cp.ndarray) -> Tuple[cudf.DataFrame, float]: def _call_plc_louvain( - sID: bytes, mg_graph_x, max_iter: int, resolution: int, do_expensive_check: bool + sID: bytes, + mg_graph_x, + max_level: int, + threshold: float, + resolution: float, + do_expensive_check: bool, ) -> Tuple[cp.ndarray, cp.ndarray, float]: return pylibcugraph_louvain( resource_handle=ResourceHandle(Comms.get_handle(sID).getHandle()), graph=mg_graph_x, - max_level=max_iter, + max_level=max_level, + threshold=threshold, resolution=resolution, do_expensive_check=do_expensive_check, ) +# FIXME: max_level should default to 100 once max_iter is removed def louvain( - input_graph: Graph, max_iter: int = 100, resolution: int = 1.0 + input_graph: Graph, + max_level: int = None, + max_iter: int = None, + resolution: float = 1.0, + threshold: float = 1e-7, ) -> Tuple[dask_cudf.DataFrame, float]: """ Compute the modularity optimizing partition of the input graph using the @@ -77,17 +90,27 @@ def louvain( present. The current implementation only supports undirected graphs. - max_iter : integer, optional (default=100) - This controls the maximum number of levels/iterations of the Louvain + max_level : integer, optional (default=100) + This controls the maximum number of levels of the Louvain algorithm. When specified the algorithm will terminate after no more - than the specified number of iterations. No error occurs when the + than the specified number of levels. No error occurs when the algorithm terminates early in this manner. - resolution: float/double, optional (default=1.0) + max_iter : integer, optional (default=None) + This parameter is deprecated in favor of max_level. Previously + it was used to control the maximum number of levels of the Louvain + algorithm. + + resolution: float, optional (default=1.0) Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. - Defaults to 1. + + threshold: float, optional (default=1e-7) + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. Returns ------- @@ -115,6 +138,24 @@ def louvain( if input_graph.is_directed(): raise ValueError("input graph must be undirected") + # FIXME: This max_iter logic and the max_level defaulting can be deleted + # in favor of defaulting max_level in call once max_iter is deleted + if max_iter: + if max_level: + raise ValueError( + "max_iter is deprecated. Cannot specify both max_iter and max_level" + ) + + warning_msg = ( + "max_iter has been renamed max_level. Use of max_iter is " + "deprecated and will no longer be supported in the next releases. " + ) + warnings.warn(warning_msg, FutureWarning) + max_level = max_iter + + if max_level is None: + max_level = 100 + # Initialize dask client client = default_client() @@ -125,7 +166,8 @@ def louvain( _call_plc_louvain, Comms.get_session_id(), input_graph._plc_graph[w], - max_iter, + max_level, + threshold, resolution, do_expensive_check, workers=[w], diff --git a/python/cugraph/cugraph/datasets/dataset.py b/python/cugraph/cugraph/datasets/dataset.py index b276a87b88e..877eade7708 100644 --- a/python/cugraph/cugraph/datasets/dataset.py +++ b/python/cugraph/cugraph/datasets/dataset.py @@ -266,6 +266,42 @@ def get_path(self): return self._path.absolute() + def is_directed(self): + """ + Returns True if the graph is a directed graph. + """ + return self.metadata["is_directed"] + + def is_multigraph(self): + """ + Returns True if the graph is a multigraph. + """ + return self.metadata["is_multigraph"] + + def is_symmetric(self): + """ + Returns True if the graph is symmetric. + """ + return self.metadata["is_symmetric"] + + def number_of_nodes(self): + """ + An alias of number_of_vertices() + """ + return self.number_of_vertices() + + def number_of_vertices(self): + """ + Get the number of vertices in the graph. + """ + return self.metadata["number_of_nodes"] + + def number_of_edges(self): + """ + Get the number of edges in the graph. + """ + return self.metadata["number_of_edges"] + def download_all(force=False): """ diff --git a/python/cugraph/cugraph/tests/utils/test_dataset.py b/python/cugraph/cugraph/tests/utils/test_dataset.py index 643d0468d46..c2a4f7c6072 100644 --- a/python/cugraph/cugraph/tests/utils/test_dataset.py +++ b/python/cugraph/cugraph/tests/utils/test_dataset.py @@ -328,6 +328,16 @@ def test_is_multigraph(dataset): assert G.is_multigraph() == dataset.metadata["is_multigraph"] +@pytest.mark.parametrize("dataset", ALL_DATASETS) +def test_object_getters(dataset): + assert dataset.is_directed() == dataset.metadata["is_directed"] + assert dataset.is_multigraph() == dataset.metadata["is_multigraph"] + assert dataset.is_symmetric() == dataset.metadata["is_symmetric"] + assert dataset.number_of_nodes() == dataset.metadata["number_of_nodes"] + assert dataset.number_of_vertices() == dataset.metadata["number_of_nodes"] + assert dataset.number_of_edges() == dataset.metadata["number_of_edges"] + + # # Test experimental for DeprecationWarnings # diff --git a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd index 736241b827c..3c273b7d3fa 100644 --- a/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd +++ b/python/pylibcugraph/pylibcugraph/_cugraph_c/community_algorithms.pxd @@ -98,6 +98,7 @@ cdef extern from "cugraph_c/community_algorithms.h": const cugraph_resource_handle_t* handle, cugraph_graph_t* graph, size_t max_level, + double threshold, double resolution, bool_t do_expensive_check, cugraph_hierarchical_clustering_result_t** result, diff --git a/python/pylibcugraph/pylibcugraph/louvain.pyx b/python/pylibcugraph/pylibcugraph/louvain.pyx index ecae7e700b4..eca569d7da1 100644 --- a/python/pylibcugraph/pylibcugraph/louvain.pyx +++ b/python/pylibcugraph/pylibcugraph/louvain.pyx @@ -51,7 +51,8 @@ from pylibcugraph.utils cimport ( def louvain(ResourceHandle resource_handle, _GPUGraph graph, size_t max_level, - double resolution, + float threshold, + float resolution, bool_t do_expensive_check): """ Compute the modularity optimizing partition of the input graph using the @@ -72,11 +73,16 @@ def louvain(ResourceHandle resource_handle, than the specified number of iterations. No error occurs when the algorithm terminates early in this manner. - resolution: double + threshold: float + Modularity gain threshold for each level. If the gain of + modularity between 2 levels of the algorithm is less than the + given threshold then the algorithm stops and returns the + resulting communities. + + resolution: float Called gamma in the modularity formula, this changes the size of the communities. Higher resolutions lead to more smaller communities, lower resolutions lead to fewer larger communities. - Defaults to 1. do_expensive_check : bool_t If True, performs more extensive tests on the inputs to ensure @@ -100,7 +106,7 @@ def louvain(ResourceHandle resource_handle, ... resource_handle, graph_props, srcs, dsts, weights, ... store_transposed=True, renumber=False, do_expensive_check=False) >>> (vertices, clusters, modularity) = pylibcugraph.louvain( - resource_handle, G, 100, 1., False) + resource_handle, G, 100, 1e-7, 1., False) >>> vertices [0, 1, 2] >>> clusters @@ -119,6 +125,7 @@ def louvain(ResourceHandle resource_handle, error_code = cugraph_louvain(c_resource_handle_ptr, c_graph_ptr, max_level, + threshold, resolution, do_expensive_check, &result_ptr, diff --git a/python/pylibcugraph/pylibcugraph/tests/test_louvain.py b/python/pylibcugraph/pylibcugraph/tests/test_louvain.py index d2027a46d9a..adea5e01f15 100644 --- a/python/pylibcugraph/pylibcugraph/tests/test_louvain.py +++ b/python/pylibcugraph/pylibcugraph/tests/test_louvain.py @@ -1,4 +1,4 @@ -# Copyright (c) 2022, NVIDIA CORPORATION. +# Copyright (c) 2022-2023, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -77,6 +77,7 @@ def test_sg_louvain_cupy(): ) max_level = 100 + threshold = 0.0001 resolution = 1.0 sg = SGGraph( @@ -91,7 +92,7 @@ def test_sg_louvain_cupy(): ) vertices, clusters, modularity = louvain( - resource_handle, sg, max_level, resolution, do_expensive_check=False + resource_handle, sg, max_level, threshold, resolution, do_expensive_check=False ) check_results(vertices, clusters, modularity) @@ -130,6 +131,7 @@ def test_sg_louvain_cudf(): ) max_level = 100 + threshold = 0.0001 resolution = 1.0 sg = SGGraph( @@ -144,7 +146,7 @@ def test_sg_louvain_cudf(): ) vertices, clusters, modularity = louvain( - resource_handle, sg, max_level, resolution, do_expensive_check=False + resource_handle, sg, max_level, threshold, resolution, do_expensive_check=False ) check_results(vertices, clusters, modularity)