Skip to content

Commit

Permalink
New mtmg API for integration (#3521)
Browse files Browse the repository at this point in the history
Creating a new API for integrating multi-threaded multi-GPU programs into the cugraph library.

This API will extend our OPG (one [process] per GPU) model to support a single process handling multiple GPUs, and will also ultimately support a multi-node configuration where some compute nodes might not have GPUs.

closes rapidsai/graph_dl#241

Authors:
  - Chuck Hastings (https://github.com/ChuckHastings)

Approvers:
  - Seunghwa Kang (https://github.com/seunghwak)

URL: #3521
  • Loading branch information
ChuckHastings authored Sep 19, 2023
1 parent b2e85bf commit ed7b1a4
Show file tree
Hide file tree
Showing 23 changed files with 2,268 additions and 3 deletions.
2 changes: 2 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -166,6 +166,7 @@ endif()

include(cmake/thirdparty/get_nccl.cmake)
include(cmake/thirdparty/get_cuhornet.cmake)
include(cmake/thirdparty/get_ucp.cmake)

if(BUILD_TESTS)
include(cmake/thirdparty/get_gtest.cmake)
Expand Down Expand Up @@ -292,6 +293,7 @@ set(CUGRAPH_SOURCES
src/community/triangle_count_mg.cu
src/traversal/k_hop_nbrs_sg.cu
src/traversal/k_hop_nbrs_mg.cu
src/mtmg/vertex_result.cu
)

if(USE_CUGRAPH_OPS)
Expand Down
35 changes: 35 additions & 0 deletions cpp/cmake/thirdparty/get_ucp.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#=============================================================================
# Copyright (c) 2023, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#=============================================================================

function(find_and_configure_ucp)

if(TARGET UCP::UCP)
return()
endif()

rapids_find_generate_module(UCP
HEADER_NAMES ucp.h
LIBRARY_NAMES ucp
INCLUDE_SUFFIXES ucp/api
)

# Currently UCP has no CMake build-system so we require
# it built and installed on the machine already
rapids_find_package(UCP REQUIRED)

endfunction()

find_and_configure_ucp()
39 changes: 39 additions & 0 deletions cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cugraph/mtmg/detail/device_shared_wrapper.hpp>
#include <raft/core/device_span.hpp>

namespace cugraph {
namespace mtmg {
namespace detail {

/**
* @brief Wrap an object to be available for each GPU
*
* In the MTMG environment we need the ability to manage a collection of objects
* that are associated with a particular GPU, and fetch the objects from an
* arbitrary GPU thread. This object will wrap any object and allow it to be
* accessed from different threads.
*/
template <typename T>
using device_shared_device_span_t = device_shared_wrapper_t<raft::device_span<T>>;

} // namespace detail
} // namespace mtmg
} // namespace cugraph
58 changes: 58 additions & 0 deletions cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cugraph/mtmg/detail/device_shared_device_span.hpp>
#include <rmm/device_uvector.hpp>

namespace cugraph {
namespace mtmg {
namespace detail {

/**
* @brief Wrap an object to be available for each GPU
*
* In the MTMG environment we need the ability to manage a collection of objects
* that are associated with a particular GPU, and fetch the objects from an
* arbitrary GPU thread. This object will wrap any object and allow it to be
* accessed from different threads.
*/
template <typename T>
class device_shared_device_vector_t : public device_shared_wrapper_t<rmm::device_uvector<T>> {
using parent_t = detail::device_shared_wrapper_t<rmm::device_uvector<T>>;

public:
/**
* @brief Create a device_shared_device_span (read only view)
*/
auto view()
{
std::lock_guard<std::mutex> lock(parent_t::lock_);

device_shared_device_span_t<T const> result;

std::for_each(parent_t::objects_.begin(), parent_t::objects_.end(), [&result](auto& p) {
result.set(p.first, raft::device_span<T const>{p.second.data(), p.second.size()});
});

return result;
}
};

} // namespace detail
} // namespace mtmg
} // namespace cugraph
123 changes: 123 additions & 0 deletions cpp/include/cugraph/mtmg/detail/device_shared_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <cugraph/mtmg/handle.hpp>
#include <cugraph/utilities/error.hpp>

#include <map>
#include <mutex>

namespace cugraph {
namespace mtmg {
namespace detail {

/**
* @brief Wrap an object to be available for each GPU
*
* In the MTMG environment we need the ability to manage a collection of objects
* that are associated with a particular GPU, and fetch the objects from an
* arbitrary GPU thread. This object will wrap any object and allow it to be
* accessed from different threads.
*/
template <typename T>
class device_shared_wrapper_t {
public:
using wrapped_t = T;

device_shared_wrapper_t() = default;
device_shared_wrapper_t(device_shared_wrapper_t&& other) : objects_{std::move(other.objects_)} {}
device_shared_wrapper_t& operator=(device_shared_wrapper_t&& other)
{
objects_ = std::move(other.objects_);
return *this;
}

/**
* @brief Move a wrapped object into the wrapper for this thread
*
* @param handle Handle is used to identify the GPU we associated this object with
* @param obj Wrapped object
*/
void set(cugraph::mtmg::handle_t const& handle, wrapped_t&& obj)
{
std::lock_guard<std::mutex> lock(lock_);

auto pos = objects_.find(handle.get_local_rank());
CUGRAPH_EXPECTS(pos == objects_.end(), "Cannot overwrite wrapped object");

objects_.insert(std::make_pair(handle.get_local_rank(), std::move(obj)));
}

/**
* @brief Move a wrapped object into the wrapper for this thread
*
* @param local_rank Identify which GPU to associated this object with
* @param obj Wrapped object
*/
void set(int local_rank, wrapped_t&& obj)
{
std::lock_guard<std::mutex> lock(lock_);

auto pos = objects_.find(local_rank);
CUGRAPH_EXPECTS(pos == objects_.end(), "Cannot overwrite wrapped object");

objects_.insert(std::make_pair(local_rank, std::move(obj)));
}

public:
/**
* @brief Get reference to an object for a particular thread
*
* @param handle Handle is used to identify the GPU we associated this object with
* @return Reference to the wrapped object
*/
wrapped_t& get(cugraph::mtmg::handle_t const& handle)
{
std::lock_guard<std::mutex> lock(lock_);

auto pos = objects_.find(handle.get_local_rank());
CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object");

return pos->second;
}

/**
* @brief Get the pointer to an object for a particular thread from this wrapper
*
* @param handle Handle is used to identify the GPU we associated this object with
* @return Shared pointer the wrapped object
*/
wrapped_t const& get(cugraph::mtmg::handle_t const& handle) const
{
std::lock_guard<std::mutex> lock(lock_);

auto pos = objects_.find(handle.get_local_rank());

CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object");

return pos->second;
}

protected:
mutable std::mutex lock_{};
std::map<int, wrapped_t> objects_{};
};

} // namespace detail
} // namespace mtmg
} // namespace cugraph
Loading

0 comments on commit ed7b1a4

Please sign in to comment.