Skip to content

Commit

Permalink
Create a graph from the edge list in multiple chunks (#4539)
Browse files Browse the repository at this point in the history
Added a graph creation function that takes edge list in multiple chunks. This helps to cut peak memory footprint.

Authors:
  - Seunghwa Kang (https://github.com/seunghwak)

Approvers:
  - Chuck Hastings (https://github.com/ChuckHastings)

URL: #4539
  • Loading branch information
seunghwak authored Jul 18, 2024
1 parent 8829aa9 commit 55cb992
Show file tree
Hide file tree
Showing 14 changed files with 1,843 additions and 428 deletions.
66 changes: 66 additions & 0 deletions cpp/include/cugraph/graph_functions.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,72 @@ create_graph_from_edgelist(raft::handle_t const& handle,
bool renumber,
bool do_expensive_check = false);

/**
* @brief create a graph from (the optional vertex list and) the given edge list (with optional edge
* IDs and types).
*
* This version takes edge list in multiple chunks (e.g. edge data from multiple files).
*
* @tparam vertex_t Type of vertex identifiers. Needs to be an integral type.
* @tparam edge_t Type of edge identifiers. Needs to be an integral type.
* @tparam weight_t Type of edge weight. Needs to be floating point type
* @tparam edge_id_t Type of edge id. Needs to be an integral type
* @tparam edge_type_t Type of edge type. Needs to be an integral type, currently only int32_t is
* supported
* @tparam store_transposed Flag indicating whether to use sources (if false) or destinations (if
* true) as major indices in storing edges using a 2D sparse matrix. transposed.
* @tparam multi_gpu Flag indicating whether template instantiation should target single-GPU (false)
* or multi-GPU (true).
* @param handle RAFT handle object to encapsulate resources (e.g. CUDA stream, communicator, and
* handles to various CUDA libraries) to run graph algorithms.
* @param vertices If valid, part of the entire set of vertices in the graph to be renumbered.
* This parameter can be used to include isolated vertices. If @p renumber is false and @p vertices
* is valid, @p vertices elements should be consecutive integers starting from 0. If multi-GPU,
* applying the compute_gpu_id_from_vertex_t to every vertex should return the local GPU ID for this
* function to work (vertices should be pre-shuffled).
* @param edgelist_srcs Vectors of edge source vertex IDs. If multi-GPU, applying the
* compute_gpu_id_from_ext_edge_endpoints_t to every edge should return the local GPU ID for this
* function to work (edges should be pre-shuffled).
* @param edgelist_dsts Vectors of edge destination vertex IDs.
* @param edgelist_weights Vectors of weight values for edges
* @param edgelist_edge_ids Vectors of edge_id values for edges
* @param edgelist_edge_types Vectors of edge_type values for edges
* @param graph_properties Properties of the graph represented by the input (optional vertex list
* and) edge list.
* @param renumber Flag indicating whether to renumber vertices or not (must be true if @p multi_gpu
* is true).
* @param do_expensive_check A flag to run expensive checks for input arguments (if set to `true`).
* @return Tuple of the generated graph and optional edge_property_t objects storing the provided
* edge properties and a renumber map (if @p renumber is true).
*/
template <typename vertex_t,
typename edge_t,
typename weight_t,
typename edge_id_t,
typename edge_type_t,
bool store_transposed,
bool multi_gpu>
std::tuple<
graph_t<vertex_t, edge_t, store_transposed, multi_gpu>,
std::optional<
edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, weight_t>>,
std::optional<
edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, edge_id_t>>,
std::optional<
edge_property_t<graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu>, edge_type_t>>,
std::optional<rmm::device_uvector<vertex_t>>>
create_graph_from_edgelist(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<vertex_t>>&& vertices,
std::vector<rmm::device_uvector<vertex_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<vertex_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<weight_t>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<edge_id_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<edge_type_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check = false);

/**
* @brief Find all 2-hop neighbors in the graph
*
Expand Down
1,196 changes: 946 additions & 250 deletions cpp/src/structure/create_graph_from_edgelist_impl.cuh

Large diffs are not rendered by default.

92 changes: 88 additions & 4 deletions cpp/src/structure/create_graph_from_edgelist_mg_v32_e32.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, float, int32_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<float>>&& edgelist_weights,
Expand All @@ -51,7 +51,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, float, int32_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<float>>&& edgelist_weights,
Expand All @@ -72,7 +72,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, double, int32_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<double>>&& edgelist_weights,
Expand All @@ -93,7 +93,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, double, int32_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<double>>&& edgelist_weights,
Expand All @@ -103,4 +103,88 @@ create_graph_from_edgelist<int32_t, int32_t, double, int32_t, int32_t, true, tru
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int32_t, false, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, float>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, float, int32_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<float>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int32_t, true, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, true, true>, float>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, true, true>, int32_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, true, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, float, int32_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<float>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int32_t, false, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, double>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, false, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, double, int32_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<double>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int32_t, true, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, true, true>, double>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, true, true>, int32_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int32_t, true, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int32_t, double, int32_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<double>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);

} // namespace cugraph
91 changes: 87 additions & 4 deletions cpp/src/structure/create_graph_from_edgelist_mg_v32_e64.cu
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, float, int64_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<float>>&& edgelist_weights,
Expand All @@ -51,7 +51,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, float, int64_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<float>>&& edgelist_weights,
Expand All @@ -72,7 +72,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, double, int64_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<double>>&& edgelist_weights,
Expand All @@ -93,7 +93,7 @@ template std::tuple<
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, double, int64_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertex_span,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
rmm::device_uvector<int32_t>&& edgelist_srcs,
rmm::device_uvector<int32_t>&& edgelist_dsts,
std::optional<rmm::device_uvector<double>>&& edgelist_weights,
Expand All @@ -103,4 +103,87 @@ create_graph_from_edgelist<int32_t, int64_t, double, int64_t, int32_t, true, tru
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int64_t, false, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, false, true>, float>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, false, true>, int64_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, false, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, float, int64_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<float>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int64_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int64_t, true, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, true, true>, float>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, true, true>, int64_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, true, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, float, int64_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<float>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int64_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int64_t, false, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, false, true>, double>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, false, true>, int64_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, false, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, double, int64_t, int32_t, false, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<double>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int64_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);

template std::tuple<
cugraph::graph_t<int32_t, int64_t, true, true>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, true, true>, double>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, true, true>, int64_t>>,
std::optional<
cugraph::edge_property_t<cugraph::graph_view_t<int32_t, int64_t, true, true>, int32_t>>,
std::optional<rmm::device_uvector<int32_t>>>
create_graph_from_edgelist<int32_t, int64_t, double, int64_t, int32_t, true, true>(
raft::handle_t const& handle,
std::optional<rmm::device_uvector<int32_t>>&& vertices,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_srcs,
std::vector<rmm::device_uvector<int32_t>>&& edgelist_dsts,
std::optional<std::vector<rmm::device_uvector<double>>>&& edgelist_weights,
std::optional<std::vector<rmm::device_uvector<int64_t>>>&& edgelist_edge_ids,
std::optional<std::vector<rmm::device_uvector<int32_t>>>&& edgelist_edge_types,
graph_properties_t graph_properties,
bool renumber,
bool do_expensive_check);
} // namespace cugraph
Loading

0 comments on commit 55cb992

Please sign in to comment.