Skip to content

Commit

Permalink
add support for dropping self loops and removing multi edges to C API…
Browse files Browse the repository at this point in the history
… for graph creation (SG and MG)
  • Loading branch information
ChuckHastings committed Nov 16, 2023
1 parent 0dbea1a commit 52b3162
Show file tree
Hide file tree
Showing 11 changed files with 1,149 additions and 33 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,8 @@ set(CUGRAPH_SOURCES
src/community/detail/mis_mg.cu
src/detail/utility_wrappers.cu
src/structure/graph_view_mg.cu
src/structure/remove_self_loops.cu
src/structure/sort_and_remove_multi_edges.cu
src/utilities/path_retrieval.cu
src/structure/legacy/graph.cu
src/linear_assignment/legacy/hungarian.cu
Expand Down
65 changes: 65 additions & 0 deletions cpp/include/cugraph/graph_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -973,4 +973,69 @@ renumber_sampled_edgelist(
label_offsets,
bool do_expensive_check = false);

/**
* @brief Remove self loops from an edge list
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weight. Currently float and double are supported.
* @tparam edge_type_t Type of edge type. Needs to be an integral type.
*
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param edgelist_srcs List of source vertex ids
* @param edgelist_dsts List of destination vertex ids
* @param edgelist_weights Optional list of edge weights
* @param edgelist_edge_ids Optional list of edge ids
* @param edgelist_edge_types Optional list of edge types
* @return Tuple of vectors storing edge sources, destinations, optional weights,
* optional edge ids, optional edge types.
*/
template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_t>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
std::optional<rmm::device_uvector<weight_t>>,
std::optional<rmm::device_uvector<edge_t>>,
std::optional<rmm::device_uvector<edge_type_t>>>
remove_self_loops(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& edgelist_srcs,
rmm::device_uvector<vertex_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types);

/**
* @brief Sort the edges and remove all but one edge when a multi-edge exists
*
* In an MG context it is assumed that edges have been shuffled to the proper GPU,
* in which case any multi-edges will be on the same GPU.
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weight. Currently float and double are supported.
* @tparam edge_type_t Type of edge type. Needs to be an integral type.
*
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param edgelist_srcs List of source vertex ids
* @param edgelist_dsts List of destination vertex ids
* @param edgelist_weights Optional list of edge weights
* @param edgelist_edge_ids Optional list of edge ids
* @param edgelist_edge_types Optional list of edge types
* @return Tuple of vectors storing edge sources, destinations, optional weights,
* optional edge ids, optional edge types.
*/
template <typename vertex_t, typename edge_t, typename weight_t, typename edge_type_t>
std::tuple<rmm::device_uvector<vertex_t>,
rmm::device_uvector<vertex_t>,
std::optional<rmm::device_uvector<weight_t>>,
std::optional<rmm::device_uvector<edge_t>>,
std::optional<rmm::device_uvector<edge_type_t>>>
sort_and_remove_multi_edges(raft::handle_t const& handle,
rmm::device_uvector<vertex_t>&& edgelist_srcs,
rmm::device_uvector<vertex_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<weight_t>>&& edgelist_weights,
std::optional<rmm::device_uvector<edge_t>>&& edgelist_edge_ids,
std::optional<rmm::device_uvector<edge_type_t>>&& edgelist_edge_types);

} // namespace cugraph
67 changes: 43 additions & 24 deletions cpp/include/cugraph_c/graph.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,9 @@ typedef struct {
argument that can be NULL if edge types are not used.
* @param [in] store_transposed If true create the graph initially in transposed format
* @param [in] renumber If true, renumber vertices to make an efficient data structure.
* If false, do not renumber. Renumbering is required if the vertices are not sequential
* integer values from 0 to num_vertices.
* If false, do not renumber. Renumbering enables some significant optimizations within
* the graph primitives library, so it is strongly encouraged. Renumbering is required if
* the vertices are not sequential integer values from 0 to num_vertices.
* @param [in] do_expensive_check If true, do expensive checks to validate the input data
* is consistent with software assumptions. If false bypass these checks.
* @param [out] graph A pointer to the graph object
Expand Down Expand Up @@ -94,8 +95,15 @@ cugraph_error_code_t cugraph_sg_graph_create(
argument that can be NULL if edge types are not used.
* @param [in] store_transposed If true create the graph initially in transposed format
* @param [in] renumber If true, renumber vertices to make an efficient data structure.
* If false, do not renumber. Renumbering is required if the vertices are not sequential
* integer values from 0 to num_vertices.
* If false, do not renumber. Renumbering enables some significant optimizations within
* the graph primitives library, so it is strongly encouraged. Renumbering is required if
* the vertices are not sequential integer values from 0 to num_vertices.
* @param [in] drop_self_loops If true, drop any self loops that exist in the provided edge list.
* @param [in] drop_multi_edges If true, drop any multi edges that exist in the provided edge list.
* Note that setting this flag will arbitrarily select one instance of a multi edge to be the
* edge that survives. If the edges have properties that should be honored (e.g. sum the
weights,
* or take the maximum weight), the caller should do that on not rely on this flag.
* @param [in] do_expensive_check If true, do expensive checks to validate the input data
* is consistent with software assumptions. If false bypass these checks.
* @param [out] graph A pointer to the graph object
Expand All @@ -115,6 +123,8 @@ cugraph_error_code_t cugraph_graph_create_sg(
const cugraph_type_erased_device_array_view_t* edge_type_ids,
bool_t store_transposed,
bool_t renumber,
bool_t drop_self_loops,
bool_t drop_multi_edges,
bool_t do_expensive_check,
cugraph_graph_t** graph,
cugraph_error_t** error);
Expand All @@ -136,8 +146,9 @@ cugraph_error_code_t cugraph_graph_create_sg(
argument that can be NULL if edge types are not used.
* @param [in] store_transposed If true create the graph initially in transposed format
* @param [in] renumber If true, renumber vertices to make an efficient data structure.
* If false, do not renumber. Renumbering is required if the vertices are not sequential
* integer values from 0 to num_vertices.
* If false, do not renumber. Renumbering enables some significant optimizations within
* the graph primitives library, so it is strongly encouraged. Renumbering is required if
* the vertices are not sequential integer values from 0 to num_vertices.
* @param [in] do_expensive_check If true, do expensive checks to validate the input data
* is consistent with software assumptions. If false bypass these checks.
* @param [out] graph A pointer to the graph object
Expand Down Expand Up @@ -175,8 +186,9 @@ cugraph_error_code_t cugraph_sg_graph_create_from_csr(
argument that can be NULL if edge types are not used.
* @param [in] store_transposed If true create the graph initially in transposed format
* @param [in] renumber If true, renumber vertices to make an efficient data structure.
* If false, do not renumber. Renumbering is required if the vertices are not sequential
* integer values from 0 to num_vertices.
* If false, do not renumber. Renumbering enables some significant optimizations within
* the graph primitives library, so it is strongly encouraged. Renumbering is required if
* the vertices are not sequential integer values from 0 to num_vertices.
* @param [in] do_expensive_check If true, do expensive checks to validate the input data
* is consistent with software assumptions. If false bypass these checks.
* @param [out] graph A pointer to the graph object
Expand All @@ -199,22 +211,6 @@ cugraph_error_code_t cugraph_graph_create_sg_from_csr(
cugraph_graph_t** graph,
cugraph_error_t** error);

/**
* @brief Destroy an graph
*
* @param [in] graph A pointer to the graph object to destroy
*/
void cugraph_graph_free(cugraph_graph_t* graph);

/**
* @brief Destroy an SG graph
*
* @deprecated This API will be deleted, use cugraph_graph_free instead
*
* @param [in] graph A pointer to the graph object to destroy
*/
void cugraph_sg_graph_free(cugraph_graph_t* graph);

/**
* @brief Construct an MG graph
*
Expand Down Expand Up @@ -287,6 +283,11 @@ cugraph_error_code_t cugraph_mg_graph_create(
* @param [in] store_transposed If true create the graph initially in transposed format
* @param [in] num_arrays The number of arrays specified in @p vertices, @p src, @p dst, @p
* weights, @p edge_ids and @p edge_type_ids
* @param [in] drop_self_loops If true, drop any self loops that exist in the provided edge list.
* @param [in] drop_multi_edges If true, drop any multi edges that exist in the provided edge list.
* Note that setting this flag will arbitrarily select one instance of a multi edge to be the
* edge that survives. If the edges have properties that should be honored (e.g. sum the
* weights, or take the maximum weight), the caller should do that on not rely on this flag.
* @param [in] do_expensive_check If true, do expensive checks to validate the input data
* is consistent with software assumptions. If false bypass these checks.
* @param [out] graph A pointer to the graph object
Expand All @@ -305,10 +306,28 @@ cugraph_error_code_t cugraph_graph_create_mg(
cugraph_type_erased_device_array_view_t const* const* edge_type_ids,
bool_t store_transposed,
size_t num_arrays,
bool_t drop_self_loops,
bool_t drop_multi_edges,
bool_t do_expensive_check,
cugraph_graph_t** graph,
cugraph_error_t** error);

/**
* @brief Destroy an graph
*
* @param [in] graph A pointer to the graph object to destroy
*/
void cugraph_graph_free(cugraph_graph_t* graph);

/**
* @brief Destroy an SG graph
*
* @deprecated This API will be deleted, use cugraph_graph_free instead
*
* @param [in] graph A pointer to the graph object to destroy
*/
void cugraph_sg_graph_free(cugraph_graph_t* graph);

/**
* @brief Destroy an MG graph
*
Expand Down
50 changes: 42 additions & 8 deletions cpp/src/c_api/graph_mg.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,9 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids_;
size_t num_arrays_;
bool_t renumber_;
bool_t check_;
bool_t drop_self_loops_;
bool_t drop_multi_edges_;
bool_t do_expensive_check_;
cugraph::c_api::cugraph_graph_t* result_{};

create_graph_functor(
Expand All @@ -87,7 +89,9 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
cugraph::c_api::cugraph_type_erased_device_array_view_t const* const* edge_type_ids,
size_t num_arrays,
bool_t renumber,
bool_t check)
bool_t drop_self_loops,
bool_t drop_multi_edges,
bool_t do_expensive_check)
: abstract_functor(),
properties_(properties),
vertex_type_(vertex_type),
Expand All @@ -103,7 +107,9 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
edge_type_ids_(edge_type_ids),
num_arrays_(num_arrays),
renumber_(renumber),
check_(check)
drop_self_loops_(drop_self_loops),
drop_multi_edges_(drop_multi_edges),
do_expensive_check_(do_expensive_check)
{
}

Expand Down Expand Up @@ -192,6 +198,28 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
cugraph::graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>,
edge_type_id_t>(handle_);

if (drop_multi_edges_) {
std::tie(
edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) =
cugraph::sort_and_remove_multi_edges(handle_,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::move(edgelist_weights),
std::move(edgelist_edge_ids),
std::move(edgelist_edge_types));
}

if (drop_self_loops_) {
std::tie(
edgelist_srcs, edgelist_dsts, edgelist_weights, edgelist_edge_ids, edgelist_edge_types) =
cugraph::remove_self_loops(handle_,
std::move(edgelist_srcs),
std::move(edgelist_dsts),
std::move(edgelist_weights),
std::move(edgelist_edge_ids),
std::move(edgelist_edge_types));
}

std::tie(*graph, new_edge_weights, new_edge_ids, new_edge_types, new_number_map) =
cugraph::create_graph_from_edgelist<vertex_t,
edge_t,
Expand All @@ -209,7 +237,7 @@ struct create_graph_functor : public cugraph::c_api::abstract_functor {
std::move(edgelist_edge_types),
cugraph::graph_properties_t{properties_->is_symmetric, properties_->is_multigraph},
renumber_,
check_);
do_expensive_check_);

if (renumber_) {
*number_map = std::move(new_number_map.value());
Expand Down Expand Up @@ -256,7 +284,9 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg(
cugraph_type_erased_device_array_view_t const* const* edge_type_ids,
bool_t store_transposed,
size_t num_arrays,
bool_t check,
bool_t drop_self_loops,
bool_t drop_multi_edges,
bool_t do_expensive_check,
cugraph_graph_t** graph,
cugraph_error_t** error)
{
Expand Down Expand Up @@ -432,7 +462,9 @@ extern "C" cugraph_error_code_t cugraph_graph_create_mg(
p_edge_type_ids,
num_arrays,
bool_t::TRUE,
check);
drop_self_loops,
drop_multi_edges,
do_expensive_check);

try {
cugraph::c_api::vertex_dispatcher(
Expand Down Expand Up @@ -462,7 +494,7 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create(
cugraph_type_erased_device_array_view_t const* edge_type_ids,
bool_t store_transposed,
size_t num_edges,
bool_t check,
bool_t do_expensive_check,
cugraph_graph_t** graph,
cugraph_error_t** error)
{
Expand All @@ -476,7 +508,9 @@ extern "C" cugraph_error_code_t cugraph_mg_graph_create(
&edge_type_ids,
store_transposed,
1,
check,
FALSE,
FALSE,
do_expensive_check,
graph,
error);
}
Expand Down
Loading

0 comments on commit 52b3162

Please sign in to comment.