diff --git a/cpp/include/cugraph/algorithms.hpp b/cpp/include/cugraph/algorithms.hpp index 99a4d68dacc..a0c4ad4fe3b 100644 --- a/cpp/include/cugraph/algorithms.hpp +++ b/cpp/include/cugraph/algorithms.hpp @@ -792,12 +792,13 @@ void ecg(raft::handle_t const& handle, * * @throws cugraph::logic_error when an error occurs. * - * @tparam graph_view_t Type of graph - * * @param[in] handle Library handle (RAFT). If a communicator is set in the handle, - * @param[in] graph Input graph object - * @param[in] edge_weight_view View object holding edge weights for @p graph_view. * @param[in] rng_state The RngState instance holding pseudo-random number generator state. + * @param[in] graph_view Input graph view object + * @param[in] edge_weight_view View object holding edge weights for @p graph_view. + * @param[in] min_weight Minimum edge weight to use in the final call of the clustering + * algorithm if an edge does not appear in any of the ensemble runs. + * @param[in] ensemble_size The ensemble size parameter * @param[in] max_level (optional) maximum number of levels to run (default 100) * @param[in] threshold (optional) threshold for convergence at each level (default * 1e-7) diff --git a/cpp/src/community/ecg_impl.cuh b/cpp/src/community/ecg_impl.cuh index dc8637e07d5..9481083258e 100644 --- a/cpp/src/community/ecg_impl.cuh +++ b/cpp/src/community/ecg_impl.cuh @@ -48,6 +48,17 @@ std::tuple, size_t, weight_t> ecg( { using graph_view_t = cugraph::graph_view_t; + CUGRAPH_EXPECTS(min_weight >= weight_t{0.0}, + "Invalid input arguments: min_weight must be positive"); + CUGRAPH_EXPECTS(ensemble_size >= 1, + "Invalid input arguments: ensemble_size must be a non-zero integer"); + CUGRAPH_EXPECTS( + threshold > 0.0 && threshold <= 1.0, + "Invalid input arguments: threshold must be a positive number in range [0.0, 1.0]"); + CUGRAPH_EXPECTS( + resolution > 0.0 && resolution <= 1.0, + "Invalid input arguments: resolution must be a positive number in range [0.0, 1.0]"); + edge_src_property_t src_cluster_assignments(handle, graph_view); edge_dst_property_t dst_cluster_assignments(handle, graph_view); edge_property_t modified_edge_weights(handle, graph_view); @@ -80,7 +91,7 @@ std::tuple, size_t, weight_t> ecg( src_cluster_assignments.view(), dst_cluster_assignments.view(), modified_edge_weights.view(), - [] __device__(auto src, auto dst, auto src_property, auto dst_property, auto edge_property) { + [] __device__(auto, auto, auto src_property, auto dst_property, auto edge_property) { return edge_property + (src_property == dst_property); }, modified_edge_weights.mutable_view()); @@ -92,8 +103,8 @@ std::tuple, size_t, weight_t> ecg( edge_src_dummy_property_t{}.view(), edge_dst_dummy_property_t{}.view(), view_concat(*edge_weight_view, modified_edge_weights.view()), - [min_weight, ensemble_size] __device__( - auto src, auto dst, thrust::nullopt_t, thrust::nullopt_t, auto edge_properties) { + [min_weight, ensemble_size = static_cast(ensemble_size)] __device__( + auto, auto, thrust::nullopt_t, thrust::nullopt_t, auto edge_properties) { auto e_weight = thrust::get<0>(edge_properties); auto e_frequency = thrust::get<1>(edge_properties); return min_weight + (e_weight - min_weight) * e_frequency / ensemble_size; @@ -101,7 +112,6 @@ std::tuple, size_t, weight_t> ecg( modified_edge_weights.mutable_view()); std::tie(max_level, modularity) = - cugraph::louvain(handle, std::make_optional(std::reference_wrapper(rng_state)), graph_view, diff --git a/cpp/src/community/louvain_impl.cuh b/cpp/src/community/louvain_impl.cuh index d767d8e3d85..0824f0c3639 100644 --- a/cpp/src/community/louvain_impl.cuh +++ b/cpp/src/community/louvain_impl.cuh @@ -20,13 +20,12 @@ #include #include -#include -// FIXME: Only outstanding items preventing this becoming a .hpp file #include #include #include #include #include +#include #include #include @@ -116,17 +115,12 @@ std::pair>, weight_t> louvain( auto const comm_size = comm.get_size(); auto const comm_rank = comm.get_rank(); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::cout << "A. comm_rank: " << comm_rank << " size: " << random_cluster_assignments.size() - << std::endl; - std::vector tx_value_counts(comm_size); std::fill(tx_value_counts.begin(), tx_value_counts.end(), random_cluster_assignments.size() / comm_size); - std::vector h_random_numbers; + std::vector h_random_gpu_ranks; { rmm::device_uvector d_random_numbers( random_cluster_assignments.size() % comm_size, handle.get_stream()); @@ -137,16 +131,16 @@ std::pair>, weight_t> louvain( vertex_t{comm_size}, *rng_state); - h_random_numbers.resize(d_random_numbers.size()); + h_random_gpu_ranks.resize(d_random_numbers.size()); - raft::update_host(h_random_numbers.data(), + raft::update_host(h_random_gpu_ranks.data(), d_random_numbers.data(), d_random_numbers.size(), handle.get_stream()); } for (int i = 0; i < static_cast(random_cluster_assignments.size() % comm_size); i++) { - tx_value_counts[h_random_numbers[i]]++; + tx_value_counts[h_random_gpu_ranks[i]]++; } std::tie(random_cluster_assignments, std::ignore) = @@ -155,11 +149,6 @@ std::pair>, weight_t> louvain( tx_value_counts, handle.get_stream()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::cout << "B. comm_rank: " << comm_rank << " size: " << random_cluster_assignments.size() - << std::endl; - // shuffle/permute locally again random_numbers.resize(random_cluster_assignments.size(), handle.get_stream()); @@ -174,16 +163,12 @@ std::pair>, weight_t> louvain( random_numbers.end(), random_cluster_assignments.begin()); - // find out how many elements current GPU needs to send to other GPUs + // take care of deficits and extras numbers vertex_t nr_extras = static_cast(random_cluster_assignments.size()) - current_graph_view.local_vertex_partition_range_size(); vertex_t nr_deficits = nr_extras >= 0 ? 0 : -nr_extras; - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "rank: " << comm_rank << " extras: " << nr_extras - << " nr_deficits: " << nr_deficits << std::endl; - auto extra_cluster_ids = cugraph::detail::device_allgatherv( handle, comm, @@ -192,56 +177,18 @@ std::pair>, weight_t> louvain( current_graph_view.local_vertex_partition_range_size(), nr_extras > 0 ? nr_extras : 0)); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - if (current_graph_view.local_vertex_partition_range_size() < 50) - raft::print_device_vector( - "extra_cluster_ids:", extra_cluster_ids.data(), extra_cluster_ids.size(), std::cout); - random_cluster_assignments.resize(current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); - auto deficits = cugraph::host_scalar_allgather(handle.get_comms(), nr_deficits, handle.get_stream()); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << "rank: " << comm_rank << " #deficits_global = " << deficits.size() - << std::endl; - std::exclusive_scan(deficits.begin(), deficits.end(), deficits.begin(), vertex_t{0}); - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << std::endl; - if (comm_rank == 0) - std::for_each( - deficits.begin(), deficits.end(), [](const int& n) { std::cout << n << " "; }); - std::cout << std::endl; - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - std::cout << std::endl; - if (comm_rank == 1) - std::for_each( - deficits.begin(), deficits.end(), [](const int& n) { std::cout << n << "--"; }); - std::cout << std::endl; - raft::copy(random_cluster_assignments.data() + current_graph_view.local_vertex_partition_range_size() - nr_deficits, extra_cluster_ids.begin() + deficits[comm_rank], nr_deficits, handle.get_stream()); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - if (current_graph_view.local_vertex_partition_range_size() < 50) - raft::print_device_vector("copied_extras:", - random_cluster_assignments.data() + - current_graph_view.local_vertex_partition_range_size() - - nr_deficits, - nr_deficits, - std::cout); - - RAFT_CUDA_TRY(cudaDeviceSynchronize()); - - std::cout << "C. comm_rank: " << comm_rank << " size: " << random_cluster_assignments.size() - << std::endl; } assert(random_cluster_assignments.size() == @@ -474,7 +421,6 @@ std::pair>, weight_t> louvain( CUGRAPH_EXPECTS(edge_weight_view.has_value(), "Graph must be weighted"); - std::cout << "returning dendrogram " << std::endl; return detail::louvain( handle, rng_state, graph_view, edge_weight_view, max_level, threshold, resolution); } @@ -514,8 +460,6 @@ std::pair louvain( detail::flatten_dendrogram(handle, graph_view, *dendrogram, clustering); - std::cout << "returning mod: " << modularity << std::endl; - return std::make_pair(dendrogram->num_levels(), modularity); } diff --git a/cpp/tests/community/mg_ecg_test.cpp b/cpp/tests/community/mg_ecg_test.cpp index 9fe969063cf..a493d5cb193 100644 --- a/cpp/tests/community/mg_ecg_test.cpp +++ b/cpp/tests/community/mg_ecg_test.cpp @@ -46,7 +46,7 @@ struct Ecg_Usecase { size_t max_level_{100}; double threshold_{1e-7}; double resolution_{1.0}; - bool check_correctness_{false}; + bool check_correctness_{true}; }; //////////////////////////////////////////////////////////////////////////////// @@ -112,7 +112,8 @@ class Tests_MGEcg : public ::testing::TestWithParam