Skip to content

Commit

Permalink
[DO NOT MERGE] PoC for the new design of cuda::mr::memory_resource
Browse files Browse the repository at this point in the history
  • Loading branch information
miscco committed Sep 26, 2022
1 parent d212232 commit 2c85df6
Show file tree
Hide file tree
Showing 9 changed files with 202 additions and 138 deletions.
11 changes: 9 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -58,13 +58,16 @@ rapids_find_package(
rapids_cpm_init()
include(cmake/thirdparty/get_spdlog.cmake)
include(cmake/thirdparty/get_thrust.cmake)
include(cmake/thirdparty/get_libcudacxx.cmake)

# library targets
add_library(rmm INTERFACE)
add_library(rmm::rmm ALIAS rmm)

target_include_directories(rmm INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>")
target_include_directories(
rmm
INTERFACE "$<BUILD_INTERFACE:${LIBCUDACXX_INCLUDE_DIR}>"
"$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>" "$<INSTALL_INTERFACE:include>")

if(CUDA_STATIC_RUNTIME)
message(STATUS "RMM: Enabling static linking of cudart")
Expand Down Expand Up @@ -109,6 +112,10 @@ include(CPack)
# install export targets
install(TARGETS rmm EXPORT rmm-exports)
install(DIRECTORY include/rmm/ DESTINATION include/rmm)
install(
DIRECTORY ${RMM_GENERATED_INCLUDE_DIR}/include/libcxx
${RMM_GENERATED_INCLUDE_DIR}/include/libcudacxx
DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/rmm)
install(FILES ${RMM_BINARY_DIR}/include/rmm/version_config.hpp DESTINATION include/rmm)

set(doc_string
Expand Down
33 changes: 33 additions & 0 deletions cmake/thirdparty/get_libcudacxx.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
# =============================================================================
# Copyright (c) 2020, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software distributed under the License
# is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
# or implied. See the License for the specific language governing permissions and limitations under
# the License.
# =============================================================================

# Use CPM to find or clone libcudacxx
function(find_and_configure_libcudacxx VERSION)
rapids_cpm_find(
libcudacxx ${VERSION}
GIT_REPOSITORY https://github.com/miscco/libcudacxx.git
GIT_TAG memory_resource # ${VERSION}
GIT_SHALLOW TRUE DOWNLOAD_ONLY TRUE)

set(LIBCUDACXX_INCLUDE_DIR
"${libcudacxx_SOURCE_DIR}/include"
PARENT_SCOPE)
set(LIBCXX_INCLUDE_DIR
"${libcudacxx_SOURCE_DIR}/libcxx/include"
PARENT_SCOPE)
endfunction()

set(RMM_MIN_VERSION_libcudacxx 1.5.0)

find_and_configure_libcudacxx(${RMM_MIN_VERSION_libcudacxx})
6 changes: 6 additions & 0 deletions include/rmm/cuda_stream_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@
#include <atomic>
#include <cstddef>
#include <cstdint>
#include <cuda/stream_ref>

namespace rmm {

Expand Down Expand Up @@ -60,6 +61,11 @@ class cuda_stream_view {
* @brief Implicit conversion to cudaStream_t.
*/
constexpr operator cudaStream_t() const noexcept { return value(); }

/**
* @brief Implicit conversion to stream_ref.
*/
operator cuda::stream_ref() const noexcept { return value(); }

/**
* @brief Return true if the wrapped stream is the CUDA per-thread default stream.
Expand Down
95 changes: 93 additions & 2 deletions include/rmm/mr/device/device_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
#include <cstddef>
#include <utility>

#include <cuda/memory_resource>

namespace rmm::mr {

/**
Expand Down Expand Up @@ -88,6 +90,28 @@ class device_memory_resource {
device_memory_resource(device_memory_resource&&) noexcept = default;
device_memory_resource& operator=(device_memory_resource&&) noexcept = default;

/**
* @brief Allocates memory of size at least \p bytes.
*
* The returned pointer will have at minimum 256 byte alignment.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws `rmm::bad_alloc` When the requested `bytes` cannot be allocated on
* the specified `stream`.
*
* @param bytes The size of the allocation
* @param alignment The desired alignment, will be rounded up to `allocation_size_alignment`
* @param stream Stream on which to perform allocation
* @return void* Pointer to the newly allocated memory
*/
void* allocate(std::size_t bytes, std::size_t alignment = 0)
{
const auto align = std::min(allocation_size_alignment, alignment);
return do_allocate(rmm::detail::align_up(bytes, align), cuda_stream_view{});
}

/**
* @brief Allocates memory of size at least \p bytes.
*
Expand All @@ -103,11 +127,36 @@ class device_memory_resource {
* @param stream Stream on which to perform allocation
* @return void* Pointer to the newly allocated memory
*/
void* allocate(std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
void* allocate(std::size_t bytes, cuda_stream_view stream)
{
return do_allocate(rmm::detail::align_up(bytes, allocation_size_alignment), stream);
}

/**
* @brief Deallocate memory pointed to by \p p.
*
* `p` must have been returned by a prior call to `allocate(bytes,stream)` on
* a `device_memory_resource` that compares equal to `*this`, and the storage
* it points to must not yet have been deallocated, otherwise behavior is
* undefined.
*
* If supported, this operation may optionally be executed on a stream.
* Otherwise, the stream is ignored and the null stream is used.
*
* @throws Nothing.
*
* @param p Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the
* value of `bytes` that was passed to the `allocate` call that returned `p`.
* @param alignment The alignment of the allocation. must be the one passed to `allocate`
* @param stream Stream on which to perform deallocation
*/
void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = 0)
{
const auto align = std::min(allocation_size_alignment, alignment);
do_deallocate(ptr, rmm::detail::align_up(bytes, align), cuda_stream_view{});
}

/**
* @brief Deallocate memory pointed to by \p p.
*
Expand All @@ -126,7 +175,7 @@ class device_memory_resource {
* value of `bytes` that was passed to the `allocate` call that returned `p`.
* @param stream Stream on which to perform deallocation
*/
void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream = cuda_stream_view{})
void deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream)
{
do_deallocate(ptr, rmm::detail::align_up(bytes, allocation_size_alignment), stream);
}
Expand All @@ -148,6 +197,42 @@ class device_memory_resource {
{
return do_is_equal(other);
}

/**
* @brief Compare this resource to another.
*
* Two device_memory_resources compare equal if and only if memory allocated
* from one device_memory_resource can be deallocated from the other and vice
* versa.
*
* By default, simply checks if \p *this and \p other refer to the same
* object, i.e., does not check if they are two objects of the same class.
*
* @param other The other resource to compare to
* @returns If the two resources are equivalent
*/
[[nodiscard]] bool operator==(device_memory_resource const& other) const noexcept
{
return do_is_equal(other);
}

/**
* @brief Compare this resource to another.
*
* Two device_memory_resources compare equal if and only if memory allocated
* from one device_memory_resource can be deallocated from the other and vice
* versa.
*
* By default, simply checks if \p *this and \p other do not refer to the same
* object, i.e., does not check if they are two objects of the same class.
*
* @param other The other resource to compare to
* @returns If the two resources are equivalent
*/
[[nodiscard]] bool operator!=(device_memory_resource const& other) const noexcept
{
return !do_is_equal(other);
}

/**
* @brief Query whether the resource supports use of non-null CUDA streams for
Expand Down Expand Up @@ -176,6 +261,12 @@ class device_memory_resource {
{
return do_get_mem_info(stream);
}

/**
* @brief Signal that this resource allocates device accessible memory.
*/
friend void get_property(device_memory_resource const&, cuda::mr::device_accessible) noexcept
{}

private:
// All allocations are padded to a multiple of allocation_size_alignment bytes.
Expand Down
136 changes: 28 additions & 108 deletions include/rmm/mr/host/host_memory_resource.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <cstddef>
#include <utility>

#include <cuda/memory_resource>

namespace rmm::mr {

/**
Expand All @@ -26,24 +28,11 @@ namespace rmm::mr {
* This is based on `std::pmr::memory_resource`:
* https://en.cppreference.com/w/cpp/memory/memory_resource
*
* When C++17 is available for use in RMM, `rmm::host_memory_resource` should
* inherit from `std::pmr::memory_resource`.
*
* This class serves as the interface that all host memory resource
* implementations must satisfy.
*
* There are two private, pure virtual functions that all derived classes must
* implement: `do_allocate` and `do_deallocate`. Optionally, derived classes may
* also override `is_equal`. By default, `is_equal` simply performs an identity
* comparison.
*
* The public, non-virtual functions `allocate`, `deallocate`, and `is_equal`
* simply call the private virtual functions. The reason for this is to allow
* implementing shared, default behavior in the base class. For example, the
* base class' `allocate` function may log every allocation, no matter what
* derived class implementation is used.
* This class acts as a convenience utility class that handles equality_comparable_with and
* defines the `host_accessible` property
*
*/
template <class Derived>
class host_memory_resource {
public:
host_memory_resource() = default;
Expand All @@ -54,110 +43,41 @@ class host_memory_resource {
host_memory_resource& operator=(host_memory_resource&&) noexcept = default;

/**
* @brief Allocates memory on the host of size at least `bytes` bytes.
*
* The returned storage is aligned to the specified `alignment` if supported, and to
* `alignof(std::max_align_t)` otherwise.
*
* @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
*
* @param bytes The size of the allocation
* @param alignment Alignment of the allocation
* @return void* Pointer to the newly allocated memory
*/
void* allocate(std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
{
return do_allocate(bytes, alignment);
}

/**
* @brief Deallocate memory pointed to by `ptr`.
* @brief Compare this resource to another.
*
* `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
* `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
* yet have been deallocated, otherwise behavior is undefined.
* Two host_memory_resources compare equal if and only if memory allocated from one
* host_memory_resource can be deallocated from the other and vice versa.
*
* @throws Nothing.
* By default, simply checks if `left` and `right` refer to the same object, i.e., does not check
* whether they are two objects of the same class.
*
* @param ptr Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
* that was passed to the `allocate` call that returned `ptr`.
* @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
* that was passed to the `allocate` call that returned `ptr`.
* @param stream Stream on which to perform deallocation
* @param left This resource
* @param right The other resource to compare to
* @return true If the two resources are equivalent
*/
void deallocate(void* ptr, std::size_t bytes, std::size_t alignment = alignof(std::max_align_t))
{
do_deallocate(ptr, bytes, alignment);
template<class Other, std::enable_if_t<std::is_same_v<Other, Derived>, int> = 0>
[[nodiscard]] friend bool operator==(const Derived& left, const Other& right) noexcept {
return &left == &right;
}

/**
* @brief Compare this resource to another.
*
* This synthesizes the inequality operator in case there is non defined but equality is defined
*
* Two `host_memory_resource`s compare equal if and only if memory allocated from one
* `host_memory_resource` can be deallocated from the other and vice versa.
*
* By default, simply checks if \p *this and \p other refer to the same object, i.e., does not
* check if they are two objects of the same class.
*
* @param other The other resource to compare to
* @returns true if the two resources are equivalent
* @param left A resource of derived type
* @param right A different compatible resource
* @returns If the two resources are not equivalent
*/
[[nodiscard]] bool is_equal(host_memory_resource const& other) const noexcept
{
return do_is_equal(other);
template<class T, class = std::void_t<decltype(std::declval<const Derived&>() == std::declval<const T&>())>>
[[nodiscard]] friend bool operator!=(const Derived& left, const T& right) noexcept {
return !(left == right);
}

private:
/**
* @brief Allocates memory on the host of size at least `bytes` bytes.
*
* The returned storage is aligned to the specified `alignment` if supported, and to
* `alignof(std::max_align_t)` otherwise.
*
* @throws std::bad_alloc When the requested `bytes` and `alignment` cannot be allocated.
*
* @param bytes The size of the allocation
* @param alignment Alignment of the allocation
* @return void* Pointer to the newly allocated memory
*/
virtual void* do_allocate(std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t)) = 0;

/**
* @brief Deallocate memory pointed to by `ptr`.
*
* `ptr` must have been returned by a prior call to `allocate(bytes,alignment)` on a
* `host_memory_resource` that compares equal to `*this`, and the storage it points to must not
* yet have been deallocated, otherwise behavior is undefined.
*
* @throws Nothing.
*
* @param ptr Pointer to be deallocated
* @param bytes The size in bytes of the allocation. This must be equal to the value of `bytes`
* that was passed to the `allocate` call that returned `ptr`.
* @param alignment Alignment of the allocation. This must be equal to the value of `alignment`
* that was passed to the `allocate` call that returned `ptr`.
*/
virtual void do_deallocate(void* ptr,
std::size_t bytes,
std::size_t alignment = alignof(std::max_align_t)) = 0;


/**
* @brief Compare this resource to another.
*
* Two host_memory_resources compare equal if and only if memory allocated from one
* host_memory_resource can be deallocated from the other and vice versa.
*
* By default, simply checks if `*this` and `other` refer to the same object, i.e., does not check
* whether they are two objects of the same class.
*
* @param other The other resource to compare to
* @return true If the two resources are equivalent
* @brief Signal that this resource allocates host accessible memory.
*/
[[nodiscard]] virtual bool do_is_equal(host_memory_resource const& other) const noexcept
{
return this == &other;
}
friend void get_property(Derived const&, cuda::mr::host_accessible) noexcept
{}
};
} // namespace rmm::mr
Loading

0 comments on commit 2c85df6

Please sign in to comment.