From 2e060b1fabdd754a699fbe4fbe17e56186613ad8 Mon Sep 17 00:00:00 2001 From: Charles Hastings Date: Thu, 18 Apr 2024 15:48:51 -0700 Subject: [PATCH] relabel with the correct size, update pytest golden result, deterministic labeling of partitions --- cpp/src/community/leiden_impl.cuh | 71 ++++++++++++++++--- .../cugraph/tests/community/test_leiden.py | 6 +- 2 files changed, 64 insertions(+), 13 deletions(-) diff --git a/cpp/src/community/leiden_impl.cuh b/cpp/src/community/leiden_impl.cuh index 73382ae51eb..c07f9f6ffba 100644 --- a/cpp/src/community/leiden_impl.cuh +++ b/cpp/src/community/leiden_impl.cuh @@ -108,7 +108,7 @@ std::pair>, weight_t> leiden( rmm::device_uvector louvain_of_refined_graph(0, handle.get_stream()); // #V - while (dendrogram->num_levels() < 2 * max_level + 1) { + while (dendrogram->num_levels() < max_level) { // // Initialize every cluster to reference each vertex to itself // @@ -249,8 +249,8 @@ std::pair>, weight_t> leiden( detail::timer_start(handle, hr_timer, "update_clustering"); #endif - rmm::device_uvector louvain_assignment_for_vertices = - rmm::device_uvector(dendrogram->current_level_size(), handle.get_stream()); + rmm::device_uvector louvain_assignment_for_vertices(dendrogram->current_level_size(), + handle.get_stream()); raft::copy(louvain_assignment_for_vertices.begin(), dendrogram->current_level_begin(), @@ -479,19 +479,19 @@ std::pair>, weight_t> leiden( louvain_of_refined_graph.resize(current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); + rmm::device_uvector numeric_sequence( + current_graph_view.local_vertex_partition_range_size(), handle.get_stream()); - // Temporarily use louvain_of_refined_graph to be a numeric sequence to renumber the - // dendrogram. detail::sequence_fill(handle.get_stream(), - louvain_of_refined_graph.data(), - louvain_of_refined_graph.size(), + numeric_sequence.data(), + numeric_sequence.size(), current_graph_view.local_vertex_partition_range_first()); relabel( handle, std::make_tuple(static_cast((*numbering_map).begin()), - static_cast(louvain_of_refined_graph.begin())), - current_graph_view.local_vertex_partition_range_size(), + static_cast(numeric_sequence.begin())), + (*numbering_map).size(), dendrogram->current_level_begin(), dendrogram->current_level_size(), false); @@ -505,7 +505,58 @@ std::pair>, weight_t> leiden( handle, std::make_tuple(static_cast(leiden_to_louvain_map.first.begin()), static_cast(leiden_to_louvain_map.second.begin())), - current_graph_view.local_vertex_partition_range_size(), + leiden_to_louvain_map.first.size(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), + false); + + // Relabel clusters so that each cluster is identified by the lowest vertex id + // that is assigned to it. Note that numbering_map and numeric_sequence go out + // of scope at the end of this block, we will reuse their memory + raft::copy(numbering_map->begin(), + louvain_of_refined_graph.data(), + louvain_of_refined_graph.size(), + handle.get_stream()); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()), + thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end())); + + size_t new_size = thrust::distance(numbering_map->begin(), + thrust::unique_by_key(handle.get_thrust_policy(), + numbering_map->begin(), + numbering_map->end(), + numeric_sequence.begin()) + .first); + + numbering_map->resize(new_size, handle.get_stream()); + numeric_sequence.resize(new_size, handle.get_stream()); + + if constexpr (multi_gpu) { + std::tie(*numbering_map, numeric_sequence) = + shuffle_ext_vertex_value_pairs_to_local_gpu_by_vertex_partitioning( + handle, std::move(*numbering_map), std::move(numeric_sequence)); + + thrust::sort(handle.get_thrust_policy(), + thrust::make_zip_iterator(numbering_map->begin(), numeric_sequence.begin()), + thrust::make_zip_iterator(numbering_map->end(), numeric_sequence.end())); + + size_t new_size = thrust::distance(numbering_map->begin(), + thrust::unique_by_key(handle.get_thrust_policy(), + numbering_map->begin(), + numbering_map->end(), + numeric_sequence.begin()) + .first); + + numbering_map->resize(new_size, handle.get_stream()); + numeric_sequence.resize(new_size, handle.get_stream()); + } + + relabel( + handle, + std::make_tuple(static_cast((*numbering_map).begin()), + static_cast(numeric_sequence.begin())), + (*numbering_map).size(), louvain_of_refined_graph.data(), louvain_of_refined_graph.size(), false); diff --git a/python/cugraph/cugraph/tests/community/test_leiden.py b/python/cugraph/cugraph/tests/community/test_leiden.py index 71117c4210f..48300b2201c 100644 --- a/python/cugraph/cugraph/tests/community/test_leiden.py +++ b/python/cugraph/cugraph/tests/community/test_leiden.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019-2023, NVIDIA CORPORATION. +# Copyright (c) 2019-2024, NVIDIA CORPORATION. # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at @@ -83,8 +83,8 @@ "input_type": "CSR", "expected_output": { # fmt: off - "partition": [3, 3, 3, 3, 2, 2, 2, 3, 1, 3, 2, 3, 3, 3, 1, 1, 2, 3, 1, 3, - 1, 3, 1, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 1], + "partition": [0, 0, 0, 0, 3, 3, 3, 0, 1, 0, 3, 0, 0, 0, 1, 1, 3, 0, 1, 0, + 1, 0, 1, 2, 2, 2, 1, 2, 2, 1, 1, 2, 1, 1], # fmt: on "modularity_score": 0.41880345, },