-
Notifications
You must be signed in to change notification settings - Fork 309
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New mtmg API for integration (#3521)
Creating a new API for integrating multi-threaded multi-GPU programs into the cugraph library. This API will extend our OPG (one [process] per GPU) model to support a single process handling multiple GPUs, and will also ultimately support a multi-node configuration where some compute nodes might not have GPUs. closes rapidsai/graph_dl#241 Authors: - Chuck Hastings (https://github.com/ChuckHastings) Approvers: - Seunghwa Kang (https://github.com/seunghwak) URL: #3521
- Loading branch information
1 parent
b2e85bf
commit ed7b1a4
Showing
23 changed files
with
2,268 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
#============================================================================= | ||
# Copyright (c) 2023, NVIDIA CORPORATION. | ||
# | ||
# Licensed under the Apache License, Version 2.0 (the "License"); | ||
# you may not use this file except in compliance with the License. | ||
# You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, software | ||
# distributed under the License is distributed on an "AS IS" BASIS, | ||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
# See the License for the specific language governing permissions and | ||
# limitations under the License. | ||
#============================================================================= | ||
|
||
function(find_and_configure_ucp) | ||
|
||
if(TARGET UCP::UCP) | ||
return() | ||
endif() | ||
|
||
rapids_find_generate_module(UCP | ||
HEADER_NAMES ucp.h | ||
LIBRARY_NAMES ucp | ||
INCLUDE_SUFFIXES ucp/api | ||
) | ||
|
||
# Currently UCP has no CMake build-system so we require | ||
# it built and installed on the machine already | ||
rapids_find_package(UCP REQUIRED) | ||
|
||
endfunction() | ||
|
||
find_and_configure_ucp() |
39 changes: 39 additions & 0 deletions
39
cpp/include/cugraph/mtmg/detail/device_shared_device_span.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,39 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <cugraph/mtmg/detail/device_shared_wrapper.hpp> | ||
#include <raft/core/device_span.hpp> | ||
|
||
namespace cugraph { | ||
namespace mtmg { | ||
namespace detail { | ||
|
||
/** | ||
* @brief Wrap an object to be available for each GPU | ||
* | ||
* In the MTMG environment we need the ability to manage a collection of objects | ||
* that are associated with a particular GPU, and fetch the objects from an | ||
* arbitrary GPU thread. This object will wrap any object and allow it to be | ||
* accessed from different threads. | ||
*/ | ||
template <typename T> | ||
using device_shared_device_span_t = device_shared_wrapper_t<raft::device_span<T>>; | ||
|
||
} // namespace detail | ||
} // namespace mtmg | ||
} // namespace cugraph |
58 changes: 58 additions & 0 deletions
58
cpp/include/cugraph/mtmg/detail/device_shared_device_vector.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,58 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <cugraph/mtmg/detail/device_shared_device_span.hpp> | ||
#include <rmm/device_uvector.hpp> | ||
|
||
namespace cugraph { | ||
namespace mtmg { | ||
namespace detail { | ||
|
||
/** | ||
* @brief Wrap an object to be available for each GPU | ||
* | ||
* In the MTMG environment we need the ability to manage a collection of objects | ||
* that are associated with a particular GPU, and fetch the objects from an | ||
* arbitrary GPU thread. This object will wrap any object and allow it to be | ||
* accessed from different threads. | ||
*/ | ||
template <typename T> | ||
class device_shared_device_vector_t : public device_shared_wrapper_t<rmm::device_uvector<T>> { | ||
using parent_t = detail::device_shared_wrapper_t<rmm::device_uvector<T>>; | ||
|
||
public: | ||
/** | ||
* @brief Create a device_shared_device_span (read only view) | ||
*/ | ||
auto view() | ||
{ | ||
std::lock_guard<std::mutex> lock(parent_t::lock_); | ||
|
||
device_shared_device_span_t<T const> result; | ||
|
||
std::for_each(parent_t::objects_.begin(), parent_t::objects_.end(), [&result](auto& p) { | ||
result.set(p.first, raft::device_span<T const>{p.second.data(), p.second.size()}); | ||
}); | ||
|
||
return result; | ||
} | ||
}; | ||
|
||
} // namespace detail | ||
} // namespace mtmg | ||
} // namespace cugraph |
123 changes: 123 additions & 0 deletions
123
cpp/include/cugraph/mtmg/detail/device_shared_wrapper.hpp
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,123 @@ | ||
/* | ||
* Copyright (c) 2023, NVIDIA CORPORATION. | ||
* | ||
* Licensed under the Apache License, Version 2.0 (the "License"); | ||
* you may not use this file except in compliance with the License. | ||
* You may obtain a copy of the License at | ||
* | ||
* http://www.apache.org/licenses/LICENSE-2.0 | ||
* | ||
* Unless required by applicable law or agreed to in writing, software | ||
* distributed under the License is distributed on an "AS IS" BASIS, | ||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | ||
* See the License for the specific language governing permissions and | ||
* limitations under the License. | ||
*/ | ||
|
||
#pragma once | ||
|
||
#include <cugraph/mtmg/handle.hpp> | ||
#include <cugraph/utilities/error.hpp> | ||
|
||
#include <map> | ||
#include <mutex> | ||
|
||
namespace cugraph { | ||
namespace mtmg { | ||
namespace detail { | ||
|
||
/** | ||
* @brief Wrap an object to be available for each GPU | ||
* | ||
* In the MTMG environment we need the ability to manage a collection of objects | ||
* that are associated with a particular GPU, and fetch the objects from an | ||
* arbitrary GPU thread. This object will wrap any object and allow it to be | ||
* accessed from different threads. | ||
*/ | ||
template <typename T> | ||
class device_shared_wrapper_t { | ||
public: | ||
using wrapped_t = T; | ||
|
||
device_shared_wrapper_t() = default; | ||
device_shared_wrapper_t(device_shared_wrapper_t&& other) : objects_{std::move(other.objects_)} {} | ||
device_shared_wrapper_t& operator=(device_shared_wrapper_t&& other) | ||
{ | ||
objects_ = std::move(other.objects_); | ||
return *this; | ||
} | ||
|
||
/** | ||
* @brief Move a wrapped object into the wrapper for this thread | ||
* | ||
* @param handle Handle is used to identify the GPU we associated this object with | ||
* @param obj Wrapped object | ||
*/ | ||
void set(cugraph::mtmg::handle_t const& handle, wrapped_t&& obj) | ||
{ | ||
std::lock_guard<std::mutex> lock(lock_); | ||
|
||
auto pos = objects_.find(handle.get_local_rank()); | ||
CUGRAPH_EXPECTS(pos == objects_.end(), "Cannot overwrite wrapped object"); | ||
|
||
objects_.insert(std::make_pair(handle.get_local_rank(), std::move(obj))); | ||
} | ||
|
||
/** | ||
* @brief Move a wrapped object into the wrapper for this thread | ||
* | ||
* @param local_rank Identify which GPU to associated this object with | ||
* @param obj Wrapped object | ||
*/ | ||
void set(int local_rank, wrapped_t&& obj) | ||
{ | ||
std::lock_guard<std::mutex> lock(lock_); | ||
|
||
auto pos = objects_.find(local_rank); | ||
CUGRAPH_EXPECTS(pos == objects_.end(), "Cannot overwrite wrapped object"); | ||
|
||
objects_.insert(std::make_pair(local_rank, std::move(obj))); | ||
} | ||
|
||
public: | ||
/** | ||
* @brief Get reference to an object for a particular thread | ||
* | ||
* @param handle Handle is used to identify the GPU we associated this object with | ||
* @return Reference to the wrapped object | ||
*/ | ||
wrapped_t& get(cugraph::mtmg::handle_t const& handle) | ||
{ | ||
std::lock_guard<std::mutex> lock(lock_); | ||
|
||
auto pos = objects_.find(handle.get_local_rank()); | ||
CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object"); | ||
|
||
return pos->second; | ||
} | ||
|
||
/** | ||
* @brief Get the pointer to an object for a particular thread from this wrapper | ||
* | ||
* @param handle Handle is used to identify the GPU we associated this object with | ||
* @return Shared pointer the wrapped object | ||
*/ | ||
wrapped_t const& get(cugraph::mtmg::handle_t const& handle) const | ||
{ | ||
std::lock_guard<std::mutex> lock(lock_); | ||
|
||
auto pos = objects_.find(handle.get_local_rank()); | ||
|
||
CUGRAPH_EXPECTS(pos != objects_.end(), "Uninitialized wrapped object"); | ||
|
||
return pos->second; | ||
} | ||
|
||
protected: | ||
mutable std::mutex lock_{}; | ||
std::map<int, wrapped_t> objects_{}; | ||
}; | ||
|
||
} // namespace detail | ||
} // namespace mtmg | ||
} // namespace cugraph |
Oops, something went wrong.