Skip to content

Commit

Permalink
Graph coloring and MIS (#4211)
Browse files Browse the repository at this point in the history
This PR implements 
- MNMG Graph Coloring
- MNMG MIS for general graphs

Closes #4230 
Closes #4231

Authors:
  - Naim (https://github.com/naimnv)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: #4211
  • Loading branch information
naimnv authored Mar 12, 2024
1 parent e4b58ec commit a4eab99
Show file tree
Hide file tree
Showing 18 changed files with 1,468 additions and 40 deletions.
8 changes: 6 additions & 2 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -197,8 +197,8 @@ set(CUGRAPH_SOURCES
src/community/detail/common_methods_sg.cu
src/community/detail/refine_sg.cu
src/community/detail/refine_mg.cu
src/community/detail/mis_sg.cu
src/community/detail/mis_mg.cu
src/community/detail/maximal_independent_moves_sg.cu
src/community/detail/maximal_independent_moves_mg.cu
src/detail/utility_wrappers.cu
src/structure/graph_view_mg.cu
src/structure/remove_self_loops.cu
Expand Down Expand Up @@ -295,6 +295,10 @@ set(CUGRAPH_SOURCES
src/tree/legacy/mst.cu
src/components/weakly_connected_components_sg.cu
src/components/weakly_connected_components_mg.cu
src/components/mis_sg.cu
src/components/mis_mg.cu
src/components/vertex_coloring_sg.cu
src/components/vertex_coloring_mg.cu
src/structure/create_graph_from_edgelist_sg.cu
src/structure/create_graph_from_edgelist_mg.cu
src/structure/symmetrize_edgelist_sg.cu
Expand Down
30 changes: 28 additions & 2 deletions cpp/include/cugraph/algorithms.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -2340,15 +2340,41 @@ std::tuple<rmm::device_uvector<size_t>, rmm::device_uvector<vertex_t>> k_hop_nbr
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object.
* @param rng_state The RngState instance holding pseudo-random number generator state.
* @return A device vector containing vertices found in the maximal independent set
* @return A device vector containing vertices in the maximal independent set.
*/

template <typename vertex_t, typename edge_t, bool multi_gpu>
rmm::device_uvector<vertex_t> maximal_independent_set(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
raft::random::RngState& rng_state);

/*
* @brief Find a Greedy Vertex Coloring
*
* A vertex coloring is an assignment of colors or labels to each vertex of a graph so that
* no two adjacent vertices have the same color or label. Finding the minimum number of colors
* needed to color the vertices of a graph is an NP-hard problem and therefore for practical
* use cases greedy coloring is used. Here we provide an implementation of greedy vertex
* coloring based on maximal independent set.
* See
* https://research.nvidia.com/sites/default/files/pubs/2015-05_Parallel-Graph-Coloring/nvr-2015-001.pdf
* for further information.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param graph_view Graph view object.
* @param rng_state The RngState instance holding pseudo-random number generator state.
* @return A device vector containing color for each vertex.
*/
template <typename vertex_t, typename edge_t, bool multi_gpu>
rmm::device_uvector<vertex_t> vertex_coloring(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
raft::random::RngState& rng_state);

} // namespace cugraph

/**
Expand Down
313 changes: 313 additions & 0 deletions cpp/src/community/detail/maximal_independent_moves.cuh
Original file line number Diff line number Diff line change
@@ -0,0 +1,313 @@

/*
* Copyright (c) 2023-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include "maximal_independent_moves.hpp"
#include "prims/fill_edge_src_dst_property.cuh"
#include "prims/per_v_transform_reduce_incoming_outgoing_e.cuh"
#include "prims/update_edge_src_dst_property.cuh"

#include <cugraph/edge_property.hpp>
#include <cugraph/edge_src_dst_property.hpp>
#include <cugraph/graph_functions.hpp>
#include <cugraph/graph_view.hpp>
#include <cugraph/utilities/host_scalar_comm.hpp>

#include <cuda/functional>
#include <thrust/count.h>
#include <thrust/distance.h>
#include <thrust/iterator/counting_iterator.h>
#include <thrust/merge.h>
#include <thrust/optional.h>
#include <thrust/remove.h>
#include <thrust/set_operations.h>
#include <thrust/transform.h>
#include <thrust/transform_reduce.h>

#include <cmath>

namespace cugraph {

namespace detail {

template <typename vertex_t, typename edge_t, bool multi_gpu>
rmm::device_uvector<vertex_t> maximal_independent_moves(
raft::handle_t const& handle,
cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
raft::random::RngState& rng_state)
{
using GraphViewType = cugraph::graph_view_t<vertex_t, edge_t, false, multi_gpu>;

vertex_t local_vtx_partitoin_size = graph_view.local_vertex_partition_range_size();

rmm::device_uvector<vertex_t> remaining_vertices(local_vtx_partitoin_size, handle.get_stream());

auto vertex_begin =
thrust::make_counting_iterator(graph_view.local_vertex_partition_range_first());
auto vertex_end = thrust::make_counting_iterator(graph_view.local_vertex_partition_range_last());

// Compute out-degree
auto out_degrees = graph_view.compute_out_degrees(handle);

// Only vertices with non-zero out-degree are possible can move
remaining_vertices.resize(
thrust::distance(remaining_vertices.begin(),
thrust::copy_if(handle.get_thrust_policy(),
vertex_begin,
vertex_end,
out_degrees.begin(),
remaining_vertices.begin(),
[] __device__(auto deg) { return deg > 0; })),
handle.get_stream());

// Set ID of each vertex as its rank
rmm::device_uvector<vertex_t> ranks(local_vtx_partitoin_size, handle.get_stream());
thrust::copy(handle.get_thrust_policy(), vertex_begin, vertex_end, ranks.begin());

// Set ranks of zero out-degree vetices to std::numeric_limits<vertex_t>::lowest()
thrust::transform_if(handle.get_thrust_policy(),
out_degrees.begin(),
out_degrees.end(),
ranks.begin(),
cuda::proclaim_return_type<vertex_t>(
[] __device__(auto) { return std::numeric_limits<vertex_t>::lowest(); }),
[] __device__(auto deg) { return deg == 0; });

out_degrees.resize(0, handle.get_stream());
out_degrees.shrink_to_fit(handle.get_stream());

size_t loop_counter = 0;
while (true) {
loop_counter++;

// Copy ranks into temporary vector to begin with

rmm::device_uvector<vertex_t> temporary_ranks(local_vtx_partitoin_size, handle.get_stream());
thrust::copy(handle.get_thrust_policy(), ranks.begin(), ranks.end(), temporary_ranks.begin());

// Select a random set of candidate vertices

vertex_t nr_remaining_vertices_to_check = remaining_vertices.size();
if (multi_gpu) {
nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(),
nr_remaining_vertices_to_check,
raft::comms::op_t::SUM,
handle.get_stream());
}

vertex_t nr_candidates = (nr_remaining_vertices_to_check < 1024)
? nr_remaining_vertices_to_check
: std::min(static_cast<vertex_t>((0.50 + 0.25 * loop_counter) *
nr_remaining_vertices_to_check),
nr_remaining_vertices_to_check);

// FIXME: Can we improve performance here?
// FIXME: if(nr_remaining_vertices_to_check < 1024), may avoid calling select_random_vertices
auto d_sampled_vertices =
cugraph::select_random_vertices(handle,
graph_view,
std::make_optional(raft::device_span<vertex_t const>{
remaining_vertices.data(), remaining_vertices.size()}),
rng_state,
nr_candidates,
false,
true);

rmm::device_uvector<vertex_t> non_candidate_vertices(
remaining_vertices.size() - d_sampled_vertices.size(), handle.get_stream());

thrust::set_difference(handle.get_thrust_policy(),
remaining_vertices.begin(),
remaining_vertices.end(),
d_sampled_vertices.begin(),
d_sampled_vertices.end(),
non_candidate_vertices.begin());

// Set temporary ranks of non-candidate vertices to std::numeric_limits<vertex_t>::lowest()
thrust::for_each(
handle.get_thrust_policy(),
non_candidate_vertices.begin(),
non_candidate_vertices.end(),
[temporary_ranks =
raft::device_span<vertex_t>(temporary_ranks.data(), temporary_ranks.size()),
v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
//
// if rank of a non-candidate vertex is not std::numeric_limits<vertex_t>::max() (i.e. the
// vertex is not already in MIS), set it to std::numeric_limits<vertex_t>::lowest()
//
auto v_offset = v - v_first;
if (temporary_ranks[v_offset] < std::numeric_limits<vertex_t>::max()) {
temporary_ranks[v_offset] = std::numeric_limits<vertex_t>::lowest();
}
});

// Caches for ranks
edge_src_property_t<GraphViewType, vertex_t> src_rank_cache(handle);
edge_dst_property_t<GraphViewType, vertex_t> dst_rank_cache(handle);

// Update rank caches with temporary ranks
if constexpr (multi_gpu) {
src_rank_cache = edge_src_property_t<GraphViewType, vertex_t>(handle, graph_view);
dst_rank_cache = edge_dst_property_t<GraphViewType, vertex_t>(handle, graph_view);
update_edge_src_property(handle, graph_view, temporary_ranks.begin(), src_rank_cache);
update_edge_dst_property(handle, graph_view, temporary_ranks.begin(), dst_rank_cache);
}

//
// Find maximum rank outgoing neighbor for each vertex
//

rmm::device_uvector<vertex_t> max_outgoing_ranks(local_vtx_partitoin_size, handle.get_stream());

per_v_transform_reduce_outgoing_e(
handle,
graph_view,
multi_gpu
? src_rank_cache.view()
: detail::edge_major_property_view_t<vertex_t, vertex_t const*>(temporary_ranks.data()),
multi_gpu ? dst_rank_cache.view()
: detail::edge_minor_property_view_t<vertex_t, vertex_t const*>(
temporary_ranks.data(), vertex_t{0}),
edge_dummy_property_t{}.view(),
[] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return dst_rank; },
std::numeric_limits<vertex_t>::lowest(),
cugraph::reduce_op::maximum<vertex_t>{},
max_outgoing_ranks.begin());

//
// Find maximum rank incoming neighbor for each vertex
//

rmm::device_uvector<vertex_t> max_incoming_ranks(local_vtx_partitoin_size, handle.get_stream());

per_v_transform_reduce_incoming_e(
handle,
graph_view,
multi_gpu
? src_rank_cache.view()
: detail::edge_major_property_view_t<vertex_t, vertex_t const*>(temporary_ranks.data()),
multi_gpu ? dst_rank_cache.view()
: detail::edge_minor_property_view_t<vertex_t, vertex_t const*>(
temporary_ranks.data(), vertex_t{0}),
edge_dummy_property_t{}.view(),
[] __device__(auto src, auto dst, auto src_rank, auto dst_rank, auto wt) { return src_rank; },
std::numeric_limits<vertex_t>::lowest(),
cugraph::reduce_op::maximum<vertex_t>{},
max_incoming_ranks.begin());

temporary_ranks.resize(0, handle.get_stream());
temporary_ranks.shrink_to_fit(handle.get_stream());

//
// Compute max of outgoing and incoming neighbors
//
thrust::transform(handle.get_thrust_policy(),
max_incoming_ranks.begin(),
max_incoming_ranks.end(),
max_outgoing_ranks.begin(),
max_outgoing_ranks.begin(),
thrust::maximum<vertex_t>());

max_incoming_ranks.resize(0, handle.get_stream());
max_incoming_ranks.shrink_to_fit(handle.get_stream());

//
// If the max neighbor of a vertex is already in MIS (i.e. has rank
// std::numeric_limits<vertex_t>::max()), discard it, otherwise,
// include the vertex if it has larger rank than its maximum rank neighbor
//
auto last = thrust::remove_if(
handle.get_thrust_policy(),
d_sampled_vertices.begin(),
d_sampled_vertices.end(),
[max_rank_neighbor_first = max_outgoing_ranks.begin(),
ranks = raft::device_span<vertex_t>(ranks.data(), ranks.size()),
v_first = graph_view.local_vertex_partition_range_first()] __device__(auto v) {
auto v_offset = v - v_first;
auto max_neighbor_rank = *(max_rank_neighbor_first + v_offset);
auto rank_of_v = ranks[v_offset];

if (max_neighbor_rank >= std::numeric_limits<vertex_t>::max()) {
// Maximum rank neighbor is alreay in MIS
// Discard current vertex by setting its rank to
// std::numeric_limits<vertex_t>::lowest()
ranks[v_offset] = std::numeric_limits<vertex_t>::lowest();
return true;
}

if (rank_of_v >= max_neighbor_rank) {
// Include v and set its rank to std::numeric_limits<vertex_t>::max()
ranks[v_offset] = std::numeric_limits<vertex_t>::max();
return true;
}
return false;
});

max_outgoing_ranks.resize(0, handle.get_stream());
max_outgoing_ranks.shrink_to_fit(handle.get_stream());

d_sampled_vertices.resize(thrust::distance(d_sampled_vertices.begin(), last),
handle.get_stream());
d_sampled_vertices.shrink_to_fit(handle.get_stream());

remaining_vertices.resize(non_candidate_vertices.size() + d_sampled_vertices.size(),
handle.get_stream());
remaining_vertices.shrink_to_fit(handle.get_stream());

// merge non-candidate and remaining candidate vertices
thrust::merge(handle.get_thrust_policy(),
non_candidate_vertices.begin(),
non_candidate_vertices.end(),
d_sampled_vertices.begin(),
d_sampled_vertices.end(),
remaining_vertices.begin());

nr_remaining_vertices_to_check = remaining_vertices.size();
if (multi_gpu) {
nr_remaining_vertices_to_check = host_scalar_allreduce(handle.get_comms(),
nr_remaining_vertices_to_check,
raft::comms::op_t::SUM,
handle.get_stream());
}

if (nr_remaining_vertices_to_check == 0) { break; }
}

// Count number of vertices included in MIS

vertex_t nr_vertices_included_in_mis = thrust::count_if(
handle.get_thrust_policy(), ranks.begin(), ranks.end(), [] __device__(auto v_rank) {
return v_rank >= std::numeric_limits<vertex_t>::max();
});

// Build MIS and return
rmm::device_uvector<vertex_t> mis(nr_vertices_included_in_mis, handle.get_stream());
thrust::copy_if(
handle.get_thrust_policy(),
vertex_begin,
vertex_end,
ranks.begin(),
mis.begin(),
[] __device__(auto v_rank) { return v_rank >= std::numeric_limits<vertex_t>::max(); });

ranks.resize(0, handle.get_stream());
ranks.shrink_to_fit(handle.get_stream());
return mis;
}
} // namespace detail

} // namespace cugraph
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,13 @@
#include <rmm/device_uvector.hpp>

namespace cugraph {
namespace detail {

template <typename vertex_t, typename edge_t, bool multi_gpu>
rmm::device_uvector<vertex_t> maximal_independent_set(
rmm::device_uvector<vertex_t> maximal_independent_moves(
raft::handle_t const& handle,
graph_view_t<vertex_t, edge_t, false, multi_gpu> const& graph_view,
raft::random::RngState& rng_state);

} // namespace detail
} // namespace cugraph
Loading

0 comments on commit a4eab99

Please sign in to comment.