Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Address Leiden clustering generating too many clusters #4730

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions cpp/src/community/detail/refine.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@ refine_clustering(
rmm::device_uvector<typename graph_view_t::vertex_type>&& next_clusters_v,
edge_src_property_t<graph_view_t, weight_t> const& src_vertex_weights_cache,
edge_src_property_t<graph_view_t, typename graph_view_t::vertex_type> const& src_clusters_cache,
edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache,
bool up_down);
edge_dst_property_t<graph_view_t, typename graph_view_t::vertex_type> const& dst_clusters_cache);

}
} // namespace cugraph
9 changes: 7 additions & 2 deletions cpp/src/community/detail/refine_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -150,8 +150,7 @@ refine_clustering(
edge_src_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
src_louvain_assignment_cache,
edge_dst_property_t<GraphViewType, typename GraphViewType::vertex_type> const&
dst_louvain_assignment_cache,
bool up_down)
dst_louvain_assignment_cache)
{
const weight_t POSITIVE_GAIN = 1e-6;
using vertex_t = typename GraphViewType::vertex_type;
Expand Down Expand Up @@ -230,6 +229,7 @@ refine_clustering(
cugraph::reduce_op::plus<weight_t>{},
weighted_cut_of_vertices_to_louvain.begin());

// FIXME: Consider using bit mask logic here. Would reduce memory by 8x
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, and also a higher chance to fit in L2 cache.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

rmm::device_uvector<uint8_t> singleton_and_connected_flags(
graph_view.local_vertex_partition_range_size(), handle.get_stream());

Expand Down Expand Up @@ -297,6 +297,11 @@ refine_clustering(
edge_dst_property_t<GraphViewType, vertex_t> dst_leiden_assignment_cache(handle);
edge_src_property_t<GraphViewType, uint8_t> src_singleton_and_connected_flag_cache(handle);

// FIXME: Why is kvstore used here? Can't this be accomplished by
// a direct lookup in louvain_assignment_of_vertices using
// leiden - graph_view.local_vertex_partition_range_first() as the
// index?
// Changing this would save memory and time
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Any reason to defer the update?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Trying to get a fix out to a user question. General Leiden improvements isn't a priority right now.

kv_store_t<vertex_t, vertex_t, false> leiden_to_louvain_map(
leiden_assignment.begin(),
leiden_assignment.end(),
Expand Down
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_mg_v32_e32.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int32_t>,
std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_mg_v64_e64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int64_t>,
std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, true>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_sg_v32_e32.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int32_t>,
std::pair<rmm::device_uvector<int32_t>, rmm::device_uvector<int32_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int32_t, int32_t, false, false>, int32_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
6 changes: 2 additions & 4 deletions cpp/src/community/detail/refine_sg_v64_e64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -37,8 +37,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

template std::tuple<rmm::device_uvector<int64_t>,
std::pair<rmm::device_uvector<int64_t>, rmm::device_uvector<int64_t>>>
Expand All @@ -59,8 +58,7 @@ refine_clustering(
edge_src_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
src_clusters_cache,
edge_dst_property_t<cugraph::graph_view_t<int64_t, int64_t, false, false>, int64_t> const&
dst_clusters_cache,
bool up_down);
dst_clusters_cache);

} // namespace detail
} // namespace cugraph
26 changes: 17 additions & 9 deletions cpp/src/community/leiden_impl.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
HighResTimer hr_timer{};
#endif

weight_t best_modularity = weight_t{-1.0};
weight_t final_Q{-1};

weight_t total_edge_weight =
compute_total_edge_weight(handle, current_graph_view, *current_edge_weight_view);

Expand Down Expand Up @@ -368,9 +369,6 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
detail::timer_stop<graph_view_t::is_multi_gpu>(handle, hr_timer);
#endif

bool terminate = (cur_Q <= best_modularity);
if (!terminate) { best_modularity = cur_Q; }

#ifdef TIMING
detail::timer_start<graph_view_t::is_multi_gpu>(handle, hr_timer, "contract graph");
#endif
Expand All @@ -386,8 +384,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
auto nr_unique_louvain_clusters =
remove_duplicates<vertex_t, multi_gpu>(handle, copied_louvain_partition);

terminate =
terminate || (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());
bool terminate = (nr_unique_louvain_clusters == current_graph_view.number_of_vertices());

rmm::device_uvector<vertex_t> refined_leiden_partition(0, handle.get_stream());
std::pair<rmm::device_uvector<vertex_t>, rmm::device_uvector<vertex_t>> leiden_to_louvain_map{
Expand Down Expand Up @@ -426,11 +423,19 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
std::move(louvain_assignment_for_vertices),
src_vertex_weights_cache,
src_louvain_assignment_cache,
dst_louvain_assignment_cache,
up_down);
dst_louvain_assignment_cache);
}

// Clear buffer and contract the graph
final_Q = detail::compute_modularity(handle,
current_graph_view,
current_edge_weight_view,
src_louvain_assignment_cache,
dst_louvain_assignment_cache,
louvain_assignment_for_vertices,
cluster_weights,
total_edge_weight,
resolution);

cluster_keys.resize(0, handle.get_stream());
cluster_weights.resize(0, handle.get_stream());
Expand All @@ -445,6 +450,9 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
dst_louvain_assignment_cache.clear(handle);

if (!terminate) {
src_louvain_assignment_cache.clear(handle);
dst_louvain_assignment_cache.clear(handle);

auto nr_unique_leiden = static_cast<vertex_t>(leiden_to_louvain_map.first.size());
if (graph_view_t::is_multi_gpu) {
nr_unique_leiden = host_scalar_allreduce(
Expand Down Expand Up @@ -586,7 +594,7 @@ std::pair<std::unique_ptr<Dendrogram<vertex_t>>, weight_t> leiden(
detail::timer_display<graph_view_t::is_multi_gpu>(handle, hr_timer, std::cout);
#endif

return std::make_pair(std::move(dendrogram), best_modularity);
return std::make_pair(std::move(dendrogram), final_Q);
}

template <typename vertex_t, bool multi_gpu>
Expand Down
Loading