Skip to content

Commit

Permalink
Merge branch 'branch-24.02' into mtmg_louvain
Browse files Browse the repository at this point in the history
  • Loading branch information
ChuckHastings committed Dec 1, 2023
2 parents 4dcb9f4 + c6aa981 commit 575f6a8
Show file tree
Hide file tree
Showing 20 changed files with 439 additions and 296 deletions.
54 changes: 21 additions & 33 deletions cpp/include/cugraph/graph_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,11 @@ class graph_base_t {
properties_(properties){};

vertex_t number_of_vertices() const { return number_of_vertices_; }
edge_t number_of_edges() const { return number_of_edges_; }
edge_t number_of_edges() const
{
CUGRAPH_EXPECTS(!(this->has_edge_mask()), "unimplemented.");
return number_of_edges_;
}

template <typename vertex_type = vertex_t>
std::enable_if_t<std::is_signed<vertex_type>::value, bool> is_valid_vertex(vertex_type v) const
Expand All @@ -285,6 +289,20 @@ class graph_base_t {
bool is_symmetric() const { return properties_.is_symmetric; }
bool is_multigraph() const { return properties_.is_multigraph; }

void attach_edge_mask(edge_property_view_t<edge_t, uint32_t const*, bool> edge_mask_view)
{
edge_mask_view_ = edge_mask_view;
}

void clear_edge_mask() { edge_mask_view_ = std::nullopt; }

bool has_edge_mask() const { return edge_mask_view_.has_value(); }

std::optional<edge_property_view_t<edge_t, uint32_t const*, bool>> edge_mask_view() const
{
return edge_mask_view_;
}

protected:
raft::handle_t const* handle_ptr() const { return handle_ptr_; };
graph_properties_t graph_properties() const { return properties_; }
Expand All @@ -296,6 +314,8 @@ class graph_base_t {
edge_t number_of_edges_{0};

graph_properties_t properties_{};

std::optional<edge_property_view_t<edge_t, uint32_t const*, bool>> edge_mask_view_{std::nullopt};
};

} // namespace detail
Expand Down Expand Up @@ -731,20 +751,6 @@ class graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if
return local_sorted_unique_edge_dst_vertex_partition_offsets_;
}

void attach_edge_mask(edge_property_view_t<edge_t, uint32_t const*, bool> edge_mask_view)
{
edge_mask_view_ = edge_mask_view;
}

void clear_edge_mask() { edge_mask_view_ = std::nullopt; }

bool has_edge_mask() const { return edge_mask_view_.has_value(); }

std::optional<edge_property_view_t<edge_t, uint32_t const*, bool>> edge_mask_view() const
{
return edge_mask_view_;
}

private:
std::vector<edge_t const*> edge_partition_offsets_{};
std::vector<vertex_t const*> edge_partition_indices_{};
Expand Down Expand Up @@ -790,8 +796,6 @@ class graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if
std::optional<raft::host_span<vertex_t const>>,
std::optional<std::byte> /* dummy */>
local_sorted_unique_edge_dst_vertex_partition_offsets_{std::nullopt};

std::optional<edge_property_view_t<edge_t, uint32_t const*, bool>> edge_mask_view_{std::nullopt};
};

// single-GPU version
Expand Down Expand Up @@ -1012,28 +1016,12 @@ class graph_view_t<vertex_t, edge_t, store_transposed, multi_gpu, std::enable_if
return std::nullopt;
}

void attach_edge_mask(edge_property_view_t<edge_t, uint32_t const*, bool> edge_mask_view)
{
edge_mask_view_ = edge_mask_view;
}

void clear_edge_mask() { edge_mask_view_ = std::nullopt; }

bool has_edge_mask() const { return edge_mask_view_.has_value(); }

std::optional<edge_property_view_t<edge_t, uint32_t const*, bool>> edge_mask_view() const
{
return edge_mask_view_;
}

private:
edge_t const* offsets_{nullptr};
vertex_t const* indices_{nullptr};

// segment offsets based on vertex degree, relevant only if vertex IDs are renumbered
std::optional<std::vector<vertex_t>> segment_offsets_{std::nullopt};

std::optional<edge_property_view_t<edge_t, uint32_t const*, bool>> edge_mask_view_{std::nullopt};
};

} // namespace cugraph
8 changes: 4 additions & 4 deletions cpp/include/cugraph/mtmg/detail/device_shared_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -57,10 +57,10 @@ class device_shared_wrapper_t {
{
std::lock_guard<std::mutex> lock(lock_);

auto pos = objects_.find(handle.get_local_rank());
auto pos = objects_.find(handle.get_rank());
CUGRAPH_EXPECTS(pos == objects_.end(), "Cannot overwrite wrapped object");

objects_.insert(std::make_pair(handle.get_local_rank(), std::move(obj)));
objects_.insert(std::make_pair(handle.get_rank(), std::move(obj)));
}

/**
Expand Down Expand Up @@ -90,7 +90,7 @@ class device_shared_wrapper_t {
{
std::lock_guard<std::mutex> lock(lock_);

auto pos = objects_.find(handle.get_local_rank());
auto pos = objects_.find(handle.get_rank());
CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object");

return pos->second;
Expand All @@ -106,7 +106,7 @@ class device_shared_wrapper_t {
{
std::lock_guard<std::mutex> lock(lock_);

auto pos = objects_.find(handle.get_local_rank());
auto pos = objects_.find(handle.get_rank());

CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object");

Expand Down
21 changes: 7 additions & 14 deletions cpp/include/cugraph/mtmg/handle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,19 @@ namespace mtmg {
*
*/
class handle_t {
handle_t(handle_t const&) = delete;
handle_t operator=(handle_t const&) = delete;

public:
/**
* @brief Constructor
*
* @param raft_handle Raft handle for the resources
* @param thread_rank Rank for this thread
* @param device_id Device id for the device this handle operates on
*/
handle_t(raft::handle_t const& raft_handle, int thread_rank, size_t device_id)
: raft_handle_(raft_handle),
thread_rank_(thread_rank),
local_rank_(raft_handle.get_comms().get_rank()), // FIXME: update for multi-node
device_id_(device_id)
handle_t(raft::handle_t const& raft_handle, int thread_rank, rmm::cuda_device_id device_id)
: raft_handle_(raft_handle), thread_rank_(thread_rank), device_id_raii_(device_id)
{
}

Expand Down Expand Up @@ -118,18 +119,10 @@ class handle_t {
*/
int get_rank() const { return raft_handle_.get_comms().get_rank(); }

/**
* @brief Get local gpu rank
*
* @return local gpu rank
*/
int get_local_rank() const { return local_rank_; }

private:
raft::handle_t const& raft_handle_;
int thread_rank_;
int local_rank_;
size_t device_id_;
rmm::cuda_set_device_raii device_id_raii_;
};

} // namespace mtmg
Expand Down
10 changes: 2 additions & 8 deletions cpp/include/cugraph/mtmg/instance_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -47,15 +47,10 @@ class instance_manager_t {

~instance_manager_t()
{
int current_device{};
RAFT_CUDA_TRY(cudaGetDevice(&current_device));

for (size_t i = 0; i < nccl_comms_.size(); ++i) {
RAFT_CUDA_TRY(cudaSetDevice(device_ids_[i].value()));
rmm::cuda_set_device_raii local_set_device(device_ids_[i]);
RAFT_NCCL_TRY(ncclCommDestroy(*nccl_comms_[i]));
}

RAFT_CUDA_TRY(cudaSetDevice(current_device));
}

/**
Expand All @@ -75,8 +70,7 @@ class instance_manager_t {
int gpu_id = local_id % raft_handle_.size();
int thread_id = local_id / raft_handle_.size();

RAFT_CUDA_TRY(cudaSetDevice(device_ids_[gpu_id].value()));
return handle_t(*raft_handle_[gpu_id], thread_id, static_cast<size_t>(gpu_id));
return handle_t(*raft_handle_[gpu_id], thread_id, device_ids_[gpu_id]);
}

/**
Expand Down
11 changes: 3 additions & 8 deletions cpp/include/cugraph/mtmg/resource_manager.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ class resource_manager_t {

local_rank_map_.insert(std::pair(global_rank, local_device_id));

RAFT_CUDA_TRY(cudaSetDevice(local_device_id.value()));
rmm::cuda_set_device_raii local_set_device(local_device_id);

// FIXME: There is a bug in the cuda_memory_resource that results in a Hang.
// using the pool resource as a work-around.
Expand Down Expand Up @@ -182,14 +182,12 @@ class resource_manager_t {
--gpu_row_comm_size;
}

int current_device{};
RAFT_CUDA_TRY(cudaGetDevice(&current_device));
RAFT_NCCL_TRY(ncclGroupStart());

for (size_t i = 0; i < local_ranks_to_include.size(); ++i) {
int rank = local_ranks_to_include[i];
auto pos = local_rank_map_.find(rank);
RAFT_CUDA_TRY(cudaSetDevice(pos->second.value()));
rmm::cuda_set_device_raii local_set_device(pos->second);

nccl_comms.push_back(std::make_unique<ncclComm_t>());
handles.push_back(
Expand All @@ -204,7 +202,6 @@ class resource_manager_t {
handles[i].get(), *nccl_comms[i], ranks_to_include.size(), rank);
}
RAFT_NCCL_TRY(ncclGroupEnd());
RAFT_CUDA_TRY(cudaSetDevice(current_device));

std::vector<std::thread> running_threads;

Expand All @@ -217,9 +214,7 @@ class resource_manager_t {
&device_ids,
&nccl_comms,
&handles]() {
int rank = local_ranks_to_include[idx];
RAFT_CUDA_TRY(cudaSetDevice(device_ids[idx].value()));

rmm::cuda_set_device_raii local_set_device(device_ids[idx]);
cugraph::partition_manager::init_subcomm(*handles[idx], gpu_row_comm_size);
});
}
Expand Down
1 change: 1 addition & 0 deletions cpp/include/cugraph/utilities/misc_utils.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include <raft/util/cudart_utils.hpp>
#include <rmm/device_uvector.hpp>

#include <cuda/atomic>
#include <thrust/binary_search.h>
#include <thrust/gather.h>
#include <thrust/iterator/counting_iterator.h>
Expand Down
2 changes: 0 additions & 2 deletions cpp/src/prims/count_if_e.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -74,8 +74,6 @@ typename GraphViewType::edge_type count_if_e(raft::handle_t const& handle,
using vertex_t = typename GraphViewType::vertex_type;
using edge_t = typename GraphViewType::edge_type;

CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (do_expensive_check) {
// currently, nothing to do
}
Expand Down
73 changes: 65 additions & 8 deletions cpp/src/prims/fill_edge_property.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once

#include <cugraph/edge_partition_edge_property_device_view.cuh>
#include <cugraph/edge_property.hpp>
#include <cugraph/graph_view.hpp>
#include <cugraph/utilities/error.hpp>
Expand All @@ -23,6 +24,7 @@
#include <rmm/exec_policy.hpp>

#include <thrust/fill.h>
#include <thrust/iterator/constant_iterator.h>

#include <cstddef>

Expand All @@ -38,21 +40,78 @@ void fill_edge_property(raft::handle_t const& handle,
{
static_assert(std::is_same_v<T, typename EdgePropertyOutputWrapper::value_type>);

using edge_t = typename GraphViewType::edge_type;

auto edge_mask_view = graph_view.edge_mask_view();

auto value_firsts = edge_property_output.value_firsts();
auto edge_counts = edge_property_output.edge_counts();
for (size_t i = 0; i < graph_view.number_of_local_edge_partitions(); ++i) {
auto edge_partition_e_mask =
edge_mask_view
? thrust::make_optional<
detail::edge_partition_edge_property_device_view_t<edge_t, uint32_t const*, bool>>(
*edge_mask_view, i)
: thrust::nullopt;

if constexpr (cugraph::has_packed_bool_element<
std::remove_reference_t<decltype(value_firsts[i])>,
T>()) {
static_assert(std::is_arithmetic_v<T>, "unimplemented for thrust::tuple types.");
auto packed_input = input ? packed_bool_full_mask() : packed_bool_empty_mask();
thrust::fill_n(handle.get_thrust_policy(),
value_firsts[i],
packed_bool_size(static_cast<size_t>(edge_counts[i])),
packed_input);
auto rem = edge_counts[i] % packed_bools_per_word();
if (edge_partition_e_mask) {
auto input_first =
thrust::make_zip_iterator(value_firsts[i], (*edge_partition_e_mask).value_first());
thrust::transform(handle.get_thrust_policy(),
input_first,
input_first + packed_bool_size(static_cast<size_t>(edge_counts[i] - rem)),
value_firsts[i],
[packed_input] __device__(thrust::tuple<T, uint32_t> pair) {
auto old_value = thrust::get<0>(pair);
auto mask = thrust::get<1>(pair);
return (old_value & ~mask) | (packed_input & mask);
});
if (rem > 0) {
thrust::transform(
handle.get_thrust_policy(),
input_first + packed_bool_size(static_cast<size_t>(edge_counts[i] - rem)),
input_first + packed_bool_size(static_cast<size_t>(edge_counts[i])),
value_firsts[i] + packed_bool_size(static_cast<size_t>(edge_counts[i] - rem)),
[packed_input, rem] __device__(thrust::tuple<T, uint32_t> pair) {
auto old_value = thrust::get<0>(pair);
auto mask = thrust::get<1>(pair);
return ((old_value & ~mask) | (packed_input & mask)) & packed_bool_partial_mask(rem);
});
}
} else {
thrust::fill_n(handle.get_thrust_policy(),
value_firsts[i],
packed_bool_size(static_cast<size_t>(edge_counts[i] - rem)),
packed_input);
if (rem > 0) {
thrust::fill_n(
handle.get_thrust_policy(),
value_firsts[i] + packed_bool_size(static_cast<size_t>(edge_counts[i] - rem)),
1,
packed_input & packed_bool_partial_mask(rem));
}
}
} else {
thrust::fill_n(
handle.get_thrust_policy(), value_firsts[i], static_cast<size_t>(edge_counts[i]), input);
if (edge_partition_e_mask) {
thrust::transform_if(handle.get_thrust_policy(),
thrust::make_constant_iterator(input),
thrust::make_constant_iterator(input) + edge_counts[i],
thrust::make_counting_iterator(edge_t{0}),
value_firsts[i],
thrust::identity<T>{},
[edge_partition_e_mask = *edge_partition_e_mask] __device__(edge_t i) {
return edge_partition_e_mask.get(i);
});
} else {
thrust::fill_n(
handle.get_thrust_policy(), value_firsts[i], static_cast<size_t>(edge_counts[i]), input);
}
}
}
}
Expand All @@ -79,8 +138,6 @@ void fill_edge_property(raft::handle_t const& handle,
edge_property_t<GraphViewType, T>& edge_property_output,
bool do_expensive_check = false)
{
CUGRAPH_EXPECTS(!graph_view.has_edge_mask(), "unimplemented.");

if (do_expensive_check) {
// currently, nothing to do
}
Expand Down
Loading

0 comments on commit 575f6a8

Please sign in to comment.