From 64aa9410ec942312f924964647efc7e9e34b392d Mon Sep 17 00:00:00 2001 From: Mark Harris <783069+harrism@users.noreply.github.com> Date: Tue, 16 Jan 2024 07:49:27 +1100 Subject: [PATCH] Require explicit pool size in `pool_memory_resource` and move some things out of detail namespace (#1417) Fixes #1416. - ~Deprecates existing ctors of `pool_memory_resource` that provide optional parameter for the initial pool size.~ - Adds new ctors that require an explicit initial pool size. - We don't yet deprecate anything in this PR because that would break builds of some RAPIDS libraries. We will follow up with PRs to cuDF, cuGraph and anything else needed to remove deprecated usages after this PR is merged. - Adds a new utility `fraction_of_available_device_memory` that calculates the specified fraction of free memory on the current CUDA device. This is now used in tests to provide an explicit pool size and can be used to produce the previous behavior of `pool_memory_resource` for consumers of the library. - Moves `available_device_memory` from a detail header to `cuda_device.hpp` so it is now publicly usable, along with the above utility. - Temporarily adds `detail::available_device_memory` as an alias of the above in order to keep cudf and cugraph building until we can update them. - Duplicates commonly externally used alignment functions that are currently in `rmm::detail` to the public `rmm` namespace. The detail versions will be removed after cuDF and cuGraph are updated to not use them. Authors: - Mark Harris (https://github.com/harrism) - Lawrence Mitchell (https://github.com/wence-) Approvers: - Michael Schellenberger Costa (https://github.com/miscco) - Lawrence Mitchell (https://github.com/wence-) - Jake Hemstad (https://github.com/jrhemstad) URL: https://github.com/rapidsai/rmm/pull/1417 --- .clang-tidy | 4 +- README.md | 10 +- .../device_uvector/device_uvector_bench.cu | 9 +- .../multi_stream_allocations_bench.cu | 6 +- .../random_allocations/random_allocations.cpp | 8 +- benchmarks/replay/replay.cpp | 4 +- doxygen/Doxyfile | 2 +- include/doxygen_groups.h | 3 +- include/rmm/aligned.hpp | 119 +++++++++++++++++ include/rmm/cuda_device.hpp | 46 ++++++- include/rmm/detail/aligned.hpp | 12 +- include/rmm/detail/cuda_util.hpp | 31 ----- .../mr/device/aligned_resource_adaptor.hpp | 18 +-- .../rmm/mr/device/arena_memory_resource.hpp | 7 +- .../rmm/mr/device/binning_memory_resource.hpp | 7 +- .../mr/device/cuda_async_memory_resource.hpp | 5 +- .../cuda_async_view_memory_resource.hpp | 3 +- include/rmm/mr/device/detail/arena.hpp | 12 +- .../detail/stream_ordered_memory_resource.hpp | 8 +- .../rmm/mr/device/device_memory_resource.hpp | 8 +- .../mr/device/fixed_size_memory_resource.hpp | 9 +- .../mr/device/limiting_resource_adaptor.hpp | 10 +- include/rmm/mr/device/per_device_resource.hpp | 12 +- .../rmm/mr/device/pool_memory_resource.hpp | 121 +++++++++++------- include/rmm/mr/host/new_delete_resource.hpp | 12 +- .../rmm/mr/host/pinned_memory_resource.hpp | 10 +- python/docs/conf.py | 8 +- python/docs/librmm_docs/deprecated.rst | 5 + python/docs/librmm_docs/index.rst | 2 + python/docs/librmm_docs/utilities.rst | 5 + python/rmm/_lib/memory_resource.pyx | 11 +- tests/container_multidevice_tests.cu | 4 +- tests/mr/device/aligned_mr_tests.cpp | 7 +- tests/mr/device/arena_mr_tests.cpp | 16 +-- tests/mr/device/failure_callback_mr_tests.cpp | 9 +- tests/mr/device/mr_ref_test.hpp | 20 +-- tests/mr/device/mr_test.hpp | 16 ++- tests/mr/device/pool_mr_tests.cpp | 22 ++-- tests/mr/host/mr_ref_tests.cpp | 6 +- tests/mr/host/mr_tests.cpp | 6 +- tests/mr/host/pinned_pool_mr_tests.cpp | 5 +- 41 files changed, 420 insertions(+), 218 deletions(-) create mode 100644 include/rmm/aligned.hpp delete mode 100644 include/rmm/detail/cuda_util.hpp create mode 100644 python/docs/librmm_docs/deprecated.rst create mode 100644 python/docs/librmm_docs/utilities.rst diff --git a/.clang-tidy b/.clang-tidy index 9b3f844c9..70a0bea16 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -62,8 +62,8 @@ CheckOptions: value: 'alignment' - key: cppcoreguidelines-avoid-magic-numbers.IgnorePowersOf2IntegerValues value: '1' - - key: readability-magic-numbers.IgnorePowersOf2IntegerValues - value: '1' + - key: cppcoreguidelines-avoid-magic-numbers.IgnoredIntegerValues + value: "0;1;2;3;4;50;100" - key: cppcoreguidelines-avoid-do-while.IgnoreMacros value: 'true' ... diff --git a/README.md b/README.md index e033ef56f..a1b85d33c 100644 --- a/README.md +++ b/README.md @@ -332,7 +332,9 @@ Accessing and modifying the default resource is done through two functions: ```c++ rmm::mr::cuda_memory_resource cuda_mr; // Construct a resource that uses a coalescing best-fit pool allocator -rmm::mr::pool_memory_resource pool_mr{&cuda_mr}; +// With the pool initially half of available device memory +auto initial_size = rmm::percent_of_free_device_memory(50); +rmm::mr::pool_memory_resource pool_mr{&cuda_mr, initial_size}; rmm::mr::set_current_device_resource(&pool_mr); // Updates the current device resource pointer to `pool_mr` rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource(); // Points to `pool_mr` ``` @@ -351,11 +353,13 @@ per-device resources. Here is an example loop that creates `unique_ptr`s to `poo objects for each device and sets them as the per-device resource for that device. ```c++ -std::vector> per_device_pools; +using pool_mr = rmm::mr::pool_memory_resource; +std::vector> per_device_pools; for(int i = 0; i < N; ++i) { cudaSetDevice(i); // set device i before creating MR // Use a vector of unique_ptr to maintain the lifetime of the MRs - per_device_pools.push_back(std::make_unique()); + // Note: for brevity, omitting creation of upstream and computing initial_size + per_device_pools.push_back(std::make_unique(upstream, initial_size)); // Set the per-device resource for device i set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); } diff --git a/benchmarks/device_uvector/device_uvector_bench.cu b/benchmarks/device_uvector/device_uvector_bench.cu index 454db81a5..8b7f9a5ba 100644 --- a/benchmarks/device_uvector/device_uvector_bench.cu +++ b/benchmarks/device_uvector/device_uvector_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include "../synchronization/synchronization.hpp" +#include #include #include #include @@ -38,7 +39,8 @@ void BM_UvectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) @@ -59,7 +61,8 @@ BENCHMARK(BM_UvectorSizeConstruction) void BM_ThrustVectorSizeConstruction(benchmark::State& state) { rmm::mr::cuda_memory_resource cuda_mr{}; - rmm::mr::pool_memory_resource mr{&cuda_mr}; + rmm::mr::pool_memory_resource mr{ + &cuda_mr, rmm::percent_of_free_device_memory(50)}; rmm::mr::set_current_device_resource(&mr); for (auto _ : state) { // NOLINT(clang-analyzer-deadcode.DeadStores) diff --git a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu index 5ed1b31f9..4943e507f 100644 --- a/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu +++ b/benchmarks/multi_stream_allocations/multi_stream_allocations_bench.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -100,7 +101,8 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/benchmarks/random_allocations/random_allocations.cpp b/benchmarks/random_allocations/random_allocations.cpp index 470442830..2856cd323 100644 --- a/benchmarks/random_allocations/random_allocations.cpp +++ b/benchmarks/random_allocations/random_allocations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2022, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,6 +16,7 @@ #include +#include #include #include #include @@ -165,12 +166,13 @@ inline auto make_cuda_async() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() { - auto free = rmm::detail::available_device_memory().first; + auto free = rmm::available_device_memory().first; constexpr auto reserve{64UL << 20}; // Leave some space for CUDA overhead. return rmm::mr::make_owning_wrapper(make_cuda(), free - reserve); } diff --git a/benchmarks/replay/replay.cpp b/benchmarks/replay/replay.cpp index 320811875..253708ace 100644 --- a/benchmarks/replay/replay.cpp +++ b/benchmarks/replay/replay.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -61,7 +61,7 @@ inline auto make_pool(std::size_t simulated_size) return rmm::mr::make_owning_wrapper( make_simulated(simulated_size), simulated_size, simulated_size); } - return rmm::mr::make_owning_wrapper(make_cuda()); + return rmm::mr::make_owning_wrapper(make_cuda(), 0); } inline auto make_arena(std::size_t simulated_size) diff --git a/doxygen/Doxyfile b/doxygen/Doxyfile index 149603f59..513f15875 100644 --- a/doxygen/Doxyfile +++ b/doxygen/Doxyfile @@ -504,7 +504,7 @@ EXTRACT_PACKAGE = NO # included in the documentation. # The default value is: NO. -EXTRACT_STATIC = NO +EXTRACT_STATIC = YES # If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined # locally in source files will be included in the documentation. If set to NO, diff --git a/include/doxygen_groups.h b/include/doxygen_groups.h index be5eaf17f..70ec0cd68 100644 --- a/include/doxygen_groups.h +++ b/include/doxygen_groups.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -41,4 +41,5 @@ * @defgroup errors Errors * @defgroup logging Logging * @defgroup thrust_integrations Thrust Integrations + * @defgroup utilities Utilities */ diff --git a/include/rmm/aligned.hpp b/include/rmm/aligned.hpp new file mode 100644 index 000000000..7a0feaabf --- /dev/null +++ b/include/rmm/aligned.hpp @@ -0,0 +1,119 @@ +/* + * Copyright (c) 2020-2024, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace rmm { + +/** + * @addtogroup utilities + * @{ + * @file + */ + +/** + * @brief Default alignment used for host memory allocated by RMM. + * + */ +static constexpr std::size_t RMM_DEFAULT_HOST_ALIGNMENT{alignof(std::max_align_t)}; + +/** + * @brief Default alignment used for CUDA memory allocation. + * + */ +static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; + +/** + * @brief Returns whether or not `value` is a power of 2. + * + * @param[in] value to check. + * + * @return Whether the input a power of two with non-negative exponent + */ +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } + +/** + * @brief Returns whether or not `alignment` is a valid memory alignment. + * + * @param[in] alignment to check + * + * @return Whether the alignment is valid + */ +constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(alignment); } + +/** + * @brief Align up to nearest multiple of specified power of 2 + * + * @param[in] value value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return (value + (alignment - 1)) & ~(alignment - 1); +} + +/** + * @brief Align down to the nearest multiple of specified power of 2 + * + * @param[in] value value to align + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return Return the aligned value, as one would expect + */ +constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value & ~(alignment - 1); +} + +/** + * @brief Checks whether a value is aligned to a multiple of a specified power of 2 + * + * @param[in] value value to check for alignment + * @param[in] alignment amount, in bytes, must be a power of 2 + * + * @return true if aligned + */ +constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept +{ + assert(is_supported_alignment(alignment)); + return value == align_down(value, alignment); +} + +/** + * @brief Checks whether the provided pointer is aligned to a specified @p alignment + * + * @param[in] ptr pointer to check for alignment + * @param[in] alignment required alignment in bytes, must be a power of 2 + * + * @return true if the pointer is aligned + */ +inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) +{ + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) + return is_aligned(reinterpret_cast(ptr), alignment); +} + +/** @} */ // end of group + +} // namespace rmm diff --git a/include/rmm/cuda_device.hpp b/include/rmm/cuda_device.hpp index 054bbb920..565d86926 100644 --- a/include/rmm/cuda_device.hpp +++ b/include/rmm/cuda_device.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include @@ -102,6 +103,49 @@ inline int get_num_cuda_devices() return num_dev; } +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +inline std::pair available_device_memory() +{ + std::size_t free{}; + std::size_t total{}; + RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); + return {free, total}; +} + +namespace detail { + +/** + * @brief Returns the available and total device memory in bytes for the current device + * + * @deprecated Use rmm::available_device_memory() instead. + * + * @return The available and total device memory in bytes for the current device as a std::pair. + */ +//[[deprecated("Use `rmm::available_device_memory` instead.")]] // +const auto available_device_memory = rmm::available_device_memory; + +} // namespace detail + +/** + * @brief Returns the approximate specified percent of available device memory on the current CUDA + * device, aligned (down) to the nearest CUDA allocation size. + * + * @param percent The percent of free memory to return. + * + * @return The recommended initial device memory pool size in bytes. + */ +inline std::size_t percent_of_free_device_memory(int percent) +{ + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); + auto fraction = static_cast(percent) / 100.0; + return rmm::align_down(static_cast(static_cast(free) * fraction), + rmm::CUDA_ALLOCATION_ALIGNMENT); +} + /** * @brief RAII class that sets the current CUDA device to the specified device on construction * and restores the previous device on destruction. diff --git a/include/rmm/detail/aligned.hpp b/include/rmm/detail/aligned.hpp index 321be53b5..54d287bfb 100644 --- a/include/rmm/detail/aligned.hpp +++ b/include/rmm/detail/aligned.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -40,7 +40,7 @@ static constexpr std::size_t CUDA_ALLOCATION_ALIGNMENT{256}; * @brief Returns whether or not `n` is a power of 2. * */ -constexpr bool is_pow2(std::size_t value) { return (0 == (value & (value - 1))); } +constexpr bool is_pow2(std::size_t value) { return (value != 0U) && ((value & (value - 1)) == 0U); } /** * @brief Returns whether or not `alignment` is a valid memory alignment. @@ -51,7 +51,7 @@ constexpr bool is_supported_alignment(std::size_t alignment) { return is_pow2(al /** * @brief Align up to nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -65,7 +65,7 @@ constexpr std::size_t align_up(std::size_t value, std::size_t alignment) noexcep /** * @brief Align down to the nearest multiple of specified power of 2 * - * @param[in] v value to align + * @param[in] value value to align * @param[in] alignment amount, in bytes, must be a power of 2 * * @return Return the aligned value, as one would expect @@ -79,7 +79,7 @@ constexpr std::size_t align_down(std::size_t value, std::size_t alignment) noexc /** * @brief Checks whether a value is aligned to a multiple of a specified power of 2 * - * @param[in] v value to check for alignment + * @param[in] value value to check for alignment * @param[in] alignment amount, in bytes, must be a power of 2 * * @return true if aligned @@ -93,7 +93,7 @@ constexpr bool is_aligned(std::size_t value, std::size_t alignment) noexcept inline bool is_pointer_aligned(void* ptr, std::size_t alignment = CUDA_ALLOCATION_ALIGNMENT) { // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) - return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); + return rmm::detail::is_aligned(reinterpret_cast(ptr), alignment); } /** diff --git a/include/rmm/detail/cuda_util.hpp b/include/rmm/detail/cuda_util.hpp deleted file mode 100644 index 613b8d156..000000000 --- a/include/rmm/detail/cuda_util.hpp +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (c) 2021, NVIDIA CORPORATION. - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -#pragma once - -#include - -namespace rmm::detail { - -/// Gets the available and total device memory in bytes for the current device -inline std::pair available_device_memory() -{ - std::size_t free{}; - std::size_t total{}; - RMM_CUDA_TRY(cudaMemGetInfo(&free, &total)); - return {free, total}; -} - -} // namespace rmm::detail diff --git a/include/rmm/mr/device/aligned_resource_adaptor.hpp b/include/rmm/mr/device/aligned_resource_adaptor.hpp index 05e9915cc..be7c3036c 100644 --- a/include/rmm/mr/device/aligned_resource_adaptor.hpp +++ b/include/rmm/mr/device/aligned_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include @@ -65,12 +65,12 @@ class aligned_resource_adaptor final : public device_memory_resource { * are aligned. */ explicit aligned_resource_adaptor(Upstream* upstream, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT, + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT, std::size_t alignment_threshold = default_alignment_threshold) : upstream_{upstream}, alignment_{alignment}, alignment_threshold_{alignment_threshold} { RMM_EXPECTS(nullptr != upstream, "Unexpected null upstream resource pointer."); - RMM_EXPECTS(rmm::detail::is_supported_alignment(alignment), + RMM_EXPECTS(rmm::is_supported_alignment(alignment), "Allocation alignment is not a power of 2."); } @@ -127,14 +127,14 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { return upstream_->allocate(bytes, stream); } auto const size = upstream_allocation_size(bytes); void* pointer = upstream_->allocate(size, stream); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast) auto const address = reinterpret_cast(pointer); - auto const aligned_address = rmm::detail::align_up(address, alignment_); + auto const aligned_address = rmm::align_up(address, alignment_); // NOLINTNEXTLINE(cppcoreguidelines-pro-type-reinterpret-cast,performance-no-int-to-ptr) void* aligned_pointer = reinterpret_cast(aligned_address); if (pointer != aligned_pointer) { @@ -153,7 +153,7 @@ class aligned_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - if (alignment_ == rmm::detail::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { + if (alignment_ == rmm::CUDA_ALLOCATION_ALIGNMENT || bytes < alignment_threshold_) { upstream_->deallocate(ptr, bytes, stream); } else { { @@ -208,8 +208,8 @@ class aligned_resource_adaptor final : public device_memory_resource { */ std::size_t upstream_allocation_size(std::size_t bytes) const { - auto const aligned_size = rmm::detail::align_up(bytes, alignment_); - return aligned_size + alignment_ - rmm::detail::CUDA_ALLOCATION_ALIGNMENT; + auto const aligned_size = rmm::align_up(bytes, alignment_); + return aligned_size + alignment_ - rmm::CUDA_ALLOCATION_ALIGNMENT; } Upstream* upstream_; ///< The upstream resource used for satisfying allocation requests diff --git a/include/rmm/mr/device/arena_memory_resource.hpp b/include/rmm/mr/device/arena_memory_resource.hpp index 929b8454f..1b821b440 100644 --- a/include/rmm/mr/device/arena_memory_resource.hpp +++ b/include/rmm/mr/device/arena_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2022, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -145,7 +146,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); @@ -195,7 +196,7 @@ class arena_memory_resource final : public device_memory_resource { #ifdef RMM_ARENA_USE_SIZE_CLASSES bytes = rmm::mr::detail::arena::align_to_size_class(bytes); #else - bytes = rmm::detail::align_up(bytes, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + bytes = rmm::align_up(bytes, rmm::CUDA_ALLOCATION_ALIGNMENT); #endif auto& arena = get_arena(stream); diff --git a/include/rmm/mr/device/binning_memory_resource.hpp b/include/rmm/mr/device/binning_memory_resource.hpp index c2e1621a6..2a9975b18 100644 --- a/include/rmm/mr/device/binning_memory_resource.hpp +++ b/include/rmm/mr/device/binning_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -138,8 +138,7 @@ class binning_memory_resource final : public device_memory_resource { */ void add_bin(std::size_t allocation_size, device_memory_resource* bin_resource = nullptr) { - allocation_size = - rmm::detail::align_up(allocation_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + allocation_size = rmm::align_up(allocation_size, rmm::CUDA_ALLOCATION_ALIGNMENT); if (nullptr != bin_resource) { resource_bins_.insert({allocation_size, bin_resource}); diff --git a/include/rmm/mr/device/cuda_async_memory_resource.hpp b/include/rmm/mr/device/cuda_async_memory_resource.hpp index de31c7dc4..f8295c6f6 100644 --- a/include/rmm/mr/device/cuda_async_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include #include #include #include @@ -120,7 +119,7 @@ class cuda_async_memory_resource final : public device_memory_resource { pool_handle(), cudaMemPoolReuseAllowOpportunistic, &disabled)); } - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); // Need an l-value to take address to pass to cudaMemPoolSetAttribute uint64_t threshold = release_threshold.value_or(total); diff --git a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp index 825fcab1e..562944669 100644 --- a/include/rmm/mr/device/cuda_async_view_memory_resource.hpp +++ b/include/rmm/mr/device/cuda_async_view_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +17,6 @@ #include #include -#include #include #include #include diff --git a/include/rmm/mr/device/detail/arena.hpp b/include/rmm/mr/device/detail/arena.hpp index d8da58493..c7965ca34 100644 --- a/include/rmm/mr/device/detail/arena.hpp +++ b/include/rmm/mr/device/detail/arena.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,9 +16,9 @@ #pragma once +#include +#include #include -#include -#include #include #include #include @@ -508,8 +508,8 @@ class global_arena final { : upstream_mr_{upstream_mr} { RMM_EXPECTS(nullptr != upstream_mr_, "Unexpected null upstream pointer."); - auto const size = rmm::detail::align_down(arena_size.value_or(default_size()), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const size = + rmm::align_down(arena_size.value_or(default_size()), rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size >= superblock::minimum_size, "Arena size smaller than minimum superblock size."); initialize(size); @@ -692,7 +692,7 @@ class global_arena final { */ constexpr std::size_t default_size() const { - auto const [free, total] = rmm::detail::available_device_memory(); + auto const [free, total] = rmm::available_device_memory(); return free / 2; } diff --git a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp index a57bf1c6d..1d6829cb5 100644 --- a/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp +++ b/include/rmm/mr/device/detail/stream_ordered_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -207,7 +207,7 @@ class stream_ordered_memory_resource : public crtp, public device_ auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); RMM_EXPECTS(size <= this->underlying().get_maximum_allocation_size(), "Maximum allocation size exceeded", rmm::out_of_memory); @@ -241,7 +241,7 @@ class stream_ordered_memory_resource : public crtp, public device_ lock_guard lock(mtx_); auto stream_event = get_event(stream); - size = rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + size = rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); auto const block = this->underlying().free_block(ptr, size); // TODO: cudaEventRecord has significant overhead on deallocations. For the non-PTDS case diff --git a/include/rmm/mr/device/device_memory_resource.hpp b/include/rmm/mr/device/device_memory_resource.hpp index 63e5f39a4..e3014b6c3 100644 --- a/include/rmm/mr/device/device_memory_resource.hpp +++ b/include/rmm/mr/device/device_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -78,10 +78,12 @@ namespace rmm::mr { * device. * * @code{.cpp} - * std::vector> per_device_pools; + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; * for(int i = 0; i < N; ++i) { * cudaSetDevice(i); - * per_device_pools.push_back(std::make_unique()); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode diff --git a/include/rmm/mr/device/fixed_size_memory_resource.hpp b/include/rmm/mr/device/fixed_size_memory_resource.hpp index 01fb8a6bc..91cc95c53 100644 --- a/include/rmm/mr/device/fixed_size_memory_resource.hpp +++ b/include/rmm/mr/device/fixed_size_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,8 +15,8 @@ */ #pragma once +#include #include -#include #include #include #include @@ -77,7 +77,7 @@ class fixed_size_memory_resource std::size_t block_size = default_block_size, std::size_t blocks_to_preallocate = default_blocks_to_preallocate) : upstream_mr_{upstream_mr}, - block_size_{rmm::detail::align_up(block_size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT)}, + block_size_{rmm::align_up(block_size, rmm::CUDA_ALLOCATION_ALIGNMENT)}, upstream_chunk_size_{block_size * blocks_to_preallocate} { // allocate initial blocks and insert into free list @@ -207,8 +207,7 @@ class fixed_size_memory_resource { // Deallocating a fixed-size block just inserts it in the free list, which is // handled by the parent class - RMM_LOGGING_ASSERT(rmm::detail::align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT) <= - block_size_); + RMM_LOGGING_ASSERT(rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT) <= block_size_); return block_type{ptr}; } diff --git a/include/rmm/mr/device/limiting_resource_adaptor.hpp b/include/rmm/mr/device/limiting_resource_adaptor.hpp index 6573956d0..2123c3cac 100644 --- a/include/rmm/mr/device/limiting_resource_adaptor.hpp +++ b/include/rmm/mr/device/limiting_resource_adaptor.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,7 @@ */ #pragma once -#include +#include #include #include @@ -54,7 +54,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ limiting_resource_adaptor(Upstream* upstream, std::size_t allocation_limit, - std::size_t alignment = rmm::detail::CUDA_ALLOCATION_ALIGNMENT) + std::size_t alignment = rmm::CUDA_ALLOCATION_ALIGNMENT) : allocation_limit_{allocation_limit}, allocated_bytes_(0), alignment_(alignment), @@ -134,7 +134,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void* do_allocate(std::size_t bytes, cuda_stream_view stream) override { - auto const proposed_size = rmm::detail::align_up(bytes, alignment_); + auto const proposed_size = rmm::align_up(bytes, alignment_); auto const old = allocated_bytes_.fetch_add(proposed_size); if (old + proposed_size <= allocation_limit_) { try { @@ -158,7 +158,7 @@ class limiting_resource_adaptor final : public device_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, cuda_stream_view stream) override { - std::size_t allocated_size = rmm::detail::align_up(bytes, alignment_); + std::size_t allocated_size = rmm::align_up(bytes, alignment_); upstream_->deallocate(ptr, bytes, stream); allocated_bytes_ -= allocated_size; } diff --git a/include/rmm/mr/device/per_device_resource.hpp b/include/rmm/mr/device/per_device_resource.hpp index 139389f0c..a56a784a1 100644 --- a/include/rmm/mr/device/per_device_resource.hpp +++ b/include/rmm/mr/device/per_device_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -69,6 +69,16 @@ * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); * } * @endcode + * @code{.cpp} + * using pool_mr = rmm::mr::pool_memory_resource; + * std::vector> per_device_pools; + * for(int i = 0; i < N; ++i) { + * cudaSetDevice(i); + * // Note: for brevity, omitting creation of upstream and computing initial_size + * per_device_pools.push_back(std::make_unique(upstream, initial_size)); + * set_per_device_resource(cuda_device_id{i}, &per_device_pools.back()); + * } + * @endcode */ namespace rmm::mr { diff --git a/include/rmm/mr/device/pool_memory_resource.hpp b/include/rmm/mr/device/pool_memory_resource.hpp index 20b250524..c0317cf57 100644 --- a/include/rmm/mr/device/pool_memory_resource.hpp +++ b/include/rmm/mr/device/pool_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2023, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,8 @@ */ #pragma once +#include #include -#include -#include #include #include #include @@ -110,10 +109,37 @@ class pool_memory_resource final friend class detail::stream_ordered_memory_resource, detail::coalescing_free_list>; + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory + * pool using `upstream_mr`. + * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available memory from the upstream resource. + */ + //[[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream* upstream_mr, + thrust::optional initial_pool_size = thrust::nullopt, + thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + /** * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using * `upstream_mr`. * + * @deprecated Use the constructor that takes an explicit initial pool size instead. + * * @throws rmm::logic_error if `upstream_mr == nullptr` * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. @@ -121,24 +147,46 @@ class pool_memory_resource final * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to zero. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ - explicit pool_memory_resource(Upstream* upstream_mr, + template , int> = 0> + //[[deprecated("Must specify initial_pool_size")]] // + explicit pool_memory_resource(Upstream2& upstream_mr, thrust::optional initial_pool_size = thrust::nullopt, thrust::optional maximum_pool_size = thrust::nullopt) + : pool_memory_resource(upstream_mr, initial_pool_size.value_or(0), maximum_pool_size) + { + } + + /** + * @brief Construct a `pool_memory_resource` and allocate the initial device memory pool using + * `upstream_mr`. + * + * @throws rmm::logic_error if `upstream_mr == nullptr` + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a + * multiple of pool_memory_resource::allocation_alignment bytes. + * + * @param upstream_mr The memory_resource from which to allocate blocks for the pool. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. + * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all + * of the available from the upstream resource. + */ + explicit pool_memory_resource(Upstream* upstream_mr, + std::size_t initial_pool_size, + thrust::optional maximum_pool_size = thrust::nullopt) : upstream_mr_{[upstream_mr]() { RMM_EXPECTS(nullptr != upstream_mr, "Unexpected null upstream pointer."); return upstream_mr; }()} { - RMM_EXPECTS(rmm::detail::is_aligned(initial_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(initial_pool_size, rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Initial pool size required to be a multiple of 256 bytes"); - RMM_EXPECTS(rmm::detail::is_aligned(maximum_pool_size.value_or(0), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT), + RMM_EXPECTS(rmm::is_aligned(maximum_pool_size.value_or(0), rmm::CUDA_ALLOCATION_ALIGNMENT), "Error, Maximum pool size required to be a multiple of 256 bytes"); initialize_pool(initial_pool_size, maximum_pool_size); @@ -149,21 +197,20 @@ class pool_memory_resource final * `upstream_mr`. * * @throws rmm::logic_error if `upstream_mr == nullptr` - * @throws rmm::logic_error if `initial_pool_size` is neither the default nor aligned to a - * multiple of pool_memory_resource::allocation_alignment bytes. + * @throws rmm::logic_error if `initial_pool_size` is not aligned to a multiple of + * pool_memory_resource::allocation_alignment bytes. * @throws rmm::logic_error if `maximum_pool_size` is neither the default nor aligned to a * multiple of pool_memory_resource::allocation_alignment bytes. * * @param upstream_mr The memory_resource from which to allocate blocks for the pool. - * @param initial_pool_size Minimum size, in bytes, of the initial pool. Defaults to half of the - * available memory on the current device. + * @param initial_pool_size Minimum size, in bytes, of the initial pool. * @param maximum_pool_size Maximum size, in bytes, that the pool can grow to. Defaults to all - * of the available memory on the current device. + * of the available memory from the upstream resource. */ template , int> = 0> explicit pool_memory_resource(Upstream2& upstream_mr, - thrust::optional initial_pool_size = thrust::nullopt, + std::size_t initial_pool_size, thrust::optional maximum_pool_size = thrust::nullopt) : pool_memory_resource(cuda::std::addressof(upstream_mr), initial_pool_size, maximum_pool_size) { @@ -276,38 +323,22 @@ class pool_memory_resource final /** * @brief Allocate initial memory for the pool * - * If initial_size is unset, then queries the upstream memory resource for available memory if - * upstream supports `get_mem_info`, or queries the device (using CUDA API) for available memory - * if not. Then attempts to initialize to half the available memory. - * - * If initial_size is set, then tries to initialize the pool to that size. - * * @param initial_size The optional initial size for the pool * @param maximum_size The optional maximum size for the pool + * + * @throws logic_error if @p initial_size is larger than @p maximum_size (if set). */ - // NOLINTNEXTLINE(bugprone-easily-swappable-parameters) - void initialize_pool(thrust::optional initial_size, - thrust::optional maximum_size) + void initialize_pool(std::size_t initial_size, thrust::optional maximum_size) { - auto const try_size = [&]() { - if (not initial_size.has_value()) { - auto const [free, total] = (get_upstream()->supports_get_mem_info()) - ? get_upstream()->get_mem_info(cuda_stream_legacy) - : rmm::detail::available_device_memory(); - return rmm::detail::align_up(std::min(free, total / 2), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - } - return initial_size.value(); - }(); - current_pool_size_ = 0; // try_to_expand will set this if it succeeds maximum_pool_size_ = maximum_size; - RMM_EXPECTS(try_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), - "Initial pool size exceeds the maximum pool size!"); + RMM_EXPECTS( + initial_size <= maximum_pool_size_.value_or(std::numeric_limits::max()), + "Initial pool size exceeds the maximum pool size!"); - if (try_size > 0) { - auto const block = try_to_expand(try_size, try_size, cuda_stream_legacy); + if (initial_size > 0) { + auto const block = try_to_expand(initial_size, initial_size, cuda_stream_legacy); this->insert_block(block, cuda_stream_legacy); } } @@ -346,9 +377,9 @@ class pool_memory_resource final { if (maximum_pool_size_.has_value()) { auto const unaligned_remaining = maximum_pool_size_.value() - pool_size(); - using rmm::detail::align_up; - auto const remaining = align_up(unaligned_remaining, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); - auto const aligned_size = align_up(size, rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + using rmm::align_up; + auto const remaining = align_up(unaligned_remaining, rmm::CUDA_ALLOCATION_ALIGNMENT); + auto const aligned_size = align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT); return (aligned_size <= remaining) ? std::max(aligned_size, remaining / 2) : 0; } return std::max(size, pool_size()); @@ -416,7 +447,7 @@ class pool_memory_resource final RMM_LOGGING_ASSERT(iter != allocated_blocks_.end()); auto block = *iter; - RMM_LOGGING_ASSERT(block.size() == rmm::detail::align_up(size, allocation_alignment)); + RMM_LOGGING_ASSERT(block.size() == rmm::align_up(size, allocation_alignment)); allocated_blocks_.erase(iter); return block; diff --git a/include/rmm/mr/host/new_delete_resource.hpp b/include/rmm/mr/host/new_delete_resource.hpp index 044f74063..4bb272df3 100644 --- a/include/rmm/mr/host/new_delete_resource.hpp +++ b/include/rmm/mr/host/new_delete_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,6 +17,7 @@ #include +#include #include #include @@ -58,12 +59,11 @@ class new_delete_resource final : public host_memory_resource { * @return Pointer to the newly allocated memory */ void* do_allocate(std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate( bytes, alignment, [](std::size_t size) { return ::operator new(size); }); @@ -84,7 +84,7 @@ class new_delete_resource final : public host_memory_resource { */ void do_deallocate(void* ptr, std::size_t bytes, - std::size_t alignment = rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT) override + std::size_t alignment = rmm::RMM_DEFAULT_HOST_ALIGNMENT) override { rmm::detail::aligned_deallocate( ptr, bytes, alignment, [](void* ptr) { ::operator delete(ptr); }); diff --git a/include/rmm/mr/host/pinned_memory_resource.hpp b/include/rmm/mr/host/pinned_memory_resource.hpp index e49767faf..b5c273ef5 100644 --- a/include/rmm/mr/host/pinned_memory_resource.hpp +++ b/include/rmm/mr/host/pinned_memory_resource.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,6 +15,7 @@ */ #pragma once +#include #include #include #include @@ -114,7 +115,7 @@ class pinned_memory_resource final : public host_memory_resource { */ void deallocate_async(void* ptr, std::size_t bytes, std::size_t alignment, cuda_stream_view) { - do_deallocate(ptr, rmm::detail::align_up(bytes, alignment)); + do_deallocate(ptr, rmm::align_up(bytes, alignment)); } /** @@ -143,9 +144,8 @@ class pinned_memory_resource final : public host_memory_resource { if (0 == bytes) { return nullptr; } // If the requested alignment isn't supported, use default - alignment = (rmm::detail::is_supported_alignment(alignment)) - ? alignment - : rmm::detail::RMM_DEFAULT_HOST_ALIGNMENT; + alignment = + (rmm::is_supported_alignment(alignment)) ? alignment : rmm::RMM_DEFAULT_HOST_ALIGNMENT; return rmm::detail::aligned_allocate(bytes, alignment, [](std::size_t size) { void* ptr{nullptr}; diff --git a/python/docs/conf.py b/python/docs/conf.py index ba5aa6d20..b4c141eb4 100644 --- a/python/docs/conf.py +++ b/python/docs/conf.py @@ -248,14 +248,16 @@ def on_missing_reference(app, env, node, contnode): if match := re.search("(.*)<.*>", reftarget): reftarget = match.group(1) + # This is the document we're linking _from_, and hence where + # we should try and resolve the xref wrt. + refdoc = node.get("refdoc") # Try to find the target prefixed with e.g. namespaces in case that's # all that's missing. Include the empty prefix in case we're searching # for a stripped template. extra_prefixes = ["rmm::", "rmm::mr::", "mr::", ""] - for (name, dispname, type, docname, anchor, priority) in env.domains[ + for (name, dispname, typ, docname, anchor, priority) in env.domains[ "cpp" ].get_objects(): - for prefix in extra_prefixes: if ( name == f"{prefix}{reftarget}" @@ -263,7 +265,7 @@ def on_missing_reference(app, env, node, contnode): ): return env.domains["cpp"].resolve_xref( env, - docname, + refdoc, app.builder, node["reftype"], name, diff --git a/python/docs/librmm_docs/deprecated.rst b/python/docs/librmm_docs/deprecated.rst new file mode 100644 index 000000000..b5ed90caa --- /dev/null +++ b/python/docs/librmm_docs/deprecated.rst @@ -0,0 +1,5 @@ +Deprecated functionality +======================== + +.. doxygenpage:: deprecated + :content-only: diff --git a/python/docs/librmm_docs/index.rst b/python/docs/librmm_docs/index.rst index 6afd94d2e..2b61deb9f 100644 --- a/python/docs/librmm_docs/index.rst +++ b/python/docs/librmm_docs/index.rst @@ -17,6 +17,8 @@ librmm Documentation cuda_streams errors logging + utilities + deprecated .. doxygennamespace:: rmm diff --git a/python/docs/librmm_docs/utilities.rst b/python/docs/librmm_docs/utilities.rst new file mode 100644 index 000000000..25b455746 --- /dev/null +++ b/python/docs/librmm_docs/utilities.rst @@ -0,0 +1,5 @@ +Utilities +============ + +.. doxygengroup:: utilities + :members: diff --git a/python/rmm/_lib/memory_resource.pyx b/python/rmm/_lib/memory_resource.pyx index ce7f45e19..690e2e338 100644 --- a/python/rmm/_lib/memory_resource.pyx +++ b/python/rmm/_lib/memory_resource.pyx @@ -120,12 +120,15 @@ cdef extern from "rmm/mr/device/cuda_async_memory_resource.hpp" \ win32 win32_kmt +cdef extern from "rmm/cuda_device.hpp" namespace "rmm" nogil: + size_t percent_of_free_device_memory(int percent) except + + cdef extern from "rmm/mr/device/pool_memory_resource.hpp" \ namespace "rmm::mr" nogil: cdef cppclass pool_memory_resource[Upstream](device_memory_resource): pool_memory_resource( Upstream* upstream_mr, - optional[size_t] initial_pool_size, + size_t initial_pool_size, optional[size_t] maximum_pool_size) except + size_t pool_size() @@ -369,12 +372,12 @@ cdef class PoolMemoryResource(UpstreamResourceAdaptor): initial_pool_size=None, maximum_pool_size=None ): - cdef optional[size_t] c_initial_pool_size + cdef size_t c_initial_pool_size cdef optional[size_t] c_maximum_pool_size c_initial_pool_size = ( - optional[size_t]() if + percent_of_free_device_memory(50) if initial_pool_size is None - else make_optional[size_t](initial_pool_size) + else initial_pool_size ) c_maximum_pool_size = ( optional[size_t]() if diff --git a/tests/container_multidevice_tests.cu b/tests/container_multidevice_tests.cu index 9de9ddf40..e58ba53a2 100644 --- a/tests/container_multidevice_tests.cu +++ b/tests/container_multidevice_tests.cu @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,12 +15,12 @@ */ #include "device_check_resource_adaptor.hpp" -#include "rmm/mr/device/per_device_resource.hpp" #include #include #include #include +#include #include diff --git a/tests/mr/device/aligned_mr_tests.cpp b/tests/mr/device/aligned_mr_tests.cpp index dfcdfa72f..5fbb4b8f1 100644 --- a/tests/mr/device/aligned_mr_tests.cpp +++ b/tests/mr/device/aligned_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,7 +15,8 @@ */ #include "../../mock_resource.hpp" -#include + +#include #include #include #include @@ -223,7 +224,7 @@ TEST(AlignedTest, AlignRealPointer) auto const threshold{65536}; aligned_real mr{rmm::mr::get_current_device_resource(), alignment, threshold}; void* alloc = mr.allocate(threshold); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc, alignment)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc, alignment)); mr.deallocate(alloc, threshold); } diff --git a/tests/mr/device/arena_mr_tests.cpp b/tests/mr/device/arena_mr_tests.cpp index 7525cac9f..1068e0cf0 100644 --- a/tests/mr/device/arena_mr_tests.cpp +++ b/tests/mr/device/arena_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,9 +15,10 @@ */ #include "../../byte_literals.hpp" + +#include +#include #include -#include -#include #include #include #include @@ -487,10 +488,9 @@ TEST_F(ArenaTest, SizeSmallerThanSuperblockSize) // NOLINT TEST_F(ArenaTest, AllocateNinetyPercent) // NOLINT { EXPECT_NO_THROW([]() { // NOLINT(cppcoreguidelines-avoid-goto) - auto const free = rmm::detail::available_device_memory().first; - auto const ninety_percent = - rmm::detail::align_up(static_cast(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const free = rmm::available_device_memory().first; + auto const ninety_percent = rmm::align_up( + static_cast(static_cast(free) * 0.9), rmm::CUDA_ALLOCATION_ALIGNMENT); arena_mr mr(rmm::mr::get_current_device_resource(), ninety_percent); }()); } @@ -501,7 +501,7 @@ TEST_F(ArenaTest, SmallMediumLarge) // NOLINT arena_mr mr(rmm::mr::get_current_device_resource()); auto* small = mr.allocate(256); auto* medium = mr.allocate(64_MiB); - auto const free = rmm::detail::available_device_memory().first; + auto const free = rmm::available_device_memory().first; auto* large = mr.allocate(free / 3); mr.deallocate(small, 256); mr.deallocate(medium, 64_MiB); diff --git a/tests/mr/device/failure_callback_mr_tests.cpp b/tests/mr/device/failure_callback_mr_tests.cpp index bb5484c69..79acd5c7e 100644 --- a/tests/mr/device/failure_callback_mr_tests.cpp +++ b/tests/mr/device/failure_callback_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021, NVIDIA CORPORATION. + * Copyright (c) 2021-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -15,16 +15,17 @@ */ #include "../../byte_literals.hpp" -#include "rmm/cuda_stream_view.hpp" -#include "rmm/mr/device/device_memory_resource.hpp" -#include +#include #include #include +#include #include #include +#include + namespace rmm::test { namespace { diff --git a/tests/mr/device/mr_ref_test.hpp b/tests/mr/device/mr_ref_test.hpp index 804c710a5..25ff76891 100644 --- a/tests/mr/device/mr_ref_test.hpp +++ b/tests/mr/device/mr_ref_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #include "../../byte_literals.hpp" +#include +#include #include #include -#include #include #include #include @@ -78,7 +79,7 @@ inline void test_allocate(resource_ref ref, std::size_t bytes) try { void* ptr = ref.allocate(bytes); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate(ptr, bytes); } catch (rmm::out_of_memory const& e) { @@ -94,7 +95,7 @@ inline void test_allocate_async(async_resource_ref ref, void* ptr = ref.allocate_async(bytes, stream); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); ref.deallocate_async(ptr, bytes, stream); if (not stream.is_default()) { stream.synchronize(); } @@ -202,7 +203,7 @@ inline void test_random_allocations(resource_ref ref, alloc.size = distribution(generator); EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [&ref](allocation& alloc) { @@ -228,7 +229,7 @@ inline void test_random_async_allocations(async_resource_ref ref, EXPECT_NO_THROW(alloc.ptr = ref.allocate(alloc.size)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, &ref](allocation& alloc) { @@ -269,7 +270,7 @@ inline void test_mixed_random_allocation_free(resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate(size), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -316,7 +317,7 @@ inline void test_mixed_random_async_allocation_free(async_resource_ref ref, EXPECT_NO_THROW(allocations.emplace_back(ref.allocate_async(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -379,7 +380,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/mr_test.hpp b/tests/mr/device/mr_test.hpp index 03f880e72..ef4b4bc80 100644 --- a/tests/mr/device/mr_test.hpp +++ b/tests/mr/device/mr_test.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -18,9 +18,10 @@ #include "../../byte_literals.hpp" +#include +#include #include #include -#include #include #include #include @@ -74,7 +75,7 @@ inline void test_get_current_device_resource() EXPECT_NE(nullptr, rmm::mr::get_current_device_resource()); void* ptr = rmm::mr::get_current_device_resource()->allocate(1_MiB); EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); rmm::mr::get_current_device_resource()->deallocate(ptr, 1_MiB); } @@ -86,7 +87,7 @@ inline void test_allocate(rmm::mr::device_memory_resource* mr, void* ptr = mr->allocate(bytes); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(ptr)); EXPECT_TRUE(is_device_memory(ptr)); mr->deallocate(ptr, bytes); if (not stream.is_default()) { stream.synchronize(); } @@ -154,7 +155,7 @@ inline void test_random_allocations(rmm::mr::device_memory_resource* mr, EXPECT_NO_THROW(alloc.ptr = mr->allocate(alloc.size, stream)); if (not stream.is_default()) { stream.synchronize(); } EXPECT_NE(nullptr, alloc.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(alloc.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(alloc.ptr)); }); std::for_each(allocations.begin(), allocations.end(), [stream, mr](allocation& alloc) { @@ -196,7 +197,7 @@ inline void test_mixed_random_allocation_free(rmm::mr::device_memory_resource* m EXPECT_NO_THROW(allocations.emplace_back(mr->allocate(size, stream), size)); auto new_allocation = allocations.back(); EXPECT_NE(nullptr, new_allocation.ptr); - EXPECT_TRUE(rmm::detail::is_pointer_aligned(new_allocation.ptr)); + EXPECT_TRUE(rmm::is_pointer_aligned(new_allocation.ptr)); } else { auto const index = static_cast(index_distribution(generator) % active_allocations); active_allocations--; @@ -257,7 +258,8 @@ inline auto make_managed() { return std::make_shared(make_cuda()); + return rmm::mr::make_owning_wrapper( + make_cuda(), rmm::percent_of_free_device_memory(50)); } inline auto make_arena() diff --git a/tests/mr/device/pool_mr_tests.cpp b/tests/mr/device/pool_mr_tests.cpp index 2f32889d0..a2793386f 100644 --- a/tests/mr/device/pool_mr_tests.cpp +++ b/tests/mr/device/pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021, NVIDIA CORPORATION. + * Copyright (c) 2020-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -14,9 +14,8 @@ * limitations under the License. */ +#include #include -#include -#include #include #include #include @@ -39,7 +38,7 @@ using limiting_mr = rmm::mr::limiting_resource_adaptor(static_cast(free) * 0.9), - rmm::detail::CUDA_ALLOCATION_ALIGNMENT); + auto const ninety_percent_pool = rmm::percent_of_free_device_memory(90); pool_mr mr{rmm::mr::get_current_device_resource(), ninety_percent_pool}; }; EXPECT_NO_THROW(allocate_ninety()); @@ -83,9 +80,8 @@ TEST(PoolTest, AllocateNinetyPercent) TEST(PoolTest, TwoLargeBuffers) { auto two_large = []() { - auto const [free, total] = rmm::detail::available_device_memory(); - (void)total; - pool_mr mr{rmm::mr::get_current_device_resource()}; + [[maybe_unused]] auto const [free, total] = rmm::available_device_memory(); + pool_mr mr{rmm::mr::get_current_device_resource(), rmm::percent_of_free_device_memory(50)}; auto* ptr1 = mr.allocate(free / 4); auto* ptr2 = mr.allocate(free / 4); mr.deallocate(ptr1, free / 4); @@ -158,8 +154,8 @@ TEST(PoolTest, NonAlignedPoolSize) TEST(PoolTest, UpstreamDoesntSupportMemInfo) { cuda_mr cuda; - pool_mr mr1(&cuda); - pool_mr mr2(&mr1); + pool_mr mr1(&cuda, 0); + pool_mr mr2(&mr1, 0); auto* ptr = mr2.allocate(1024); mr2.deallocate(ptr, 1024); } diff --git a/tests/mr/host/mr_ref_tests.cpp b/tests/mr/host/mr_ref_tests.cpp index 6563eb635..416641f18 100644 --- a/tests/mr/host/mr_ref_tests.cpp +++ b/tests/mr/host/mr_ref_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. diff --git a/tests/mr/host/mr_tests.cpp b/tests/mr/host/mr_tests.cpp index 678d6aeb8..e0078c920 100644 --- a/tests/mr/host/mr_tests.cpp +++ b/tests/mr/host/mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,7 @@ #include "../../byte_literals.hpp" -#include +#include #include #include #include @@ -35,7 +35,7 @@ namespace rmm::test { namespace { inline bool is_aligned(void* ptr, std::size_t alignment = alignof(std::max_align_t)) { - return rmm::detail::is_pointer_aligned(ptr, alignment); + return rmm::is_pointer_aligned(ptr, alignment); } // Returns true if a pointer points to a device memory or managed memory allocation. diff --git a/tests/mr/host/pinned_pool_mr_tests.cpp b/tests/mr/host/pinned_pool_mr_tests.cpp index dcdae37fa..d10b85e72 100644 --- a/tests/mr/host/pinned_pool_mr_tests.cpp +++ b/tests/mr/host/pinned_pool_mr_tests.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,7 +16,6 @@ #include #include -#include #include #include #include @@ -33,7 +32,7 @@ using pool_mr = rmm::mr::pool_memory_resource; TEST(PinnedPoolTest, ThrowOnNullUpstream) { - auto construct_nullptr = []() { pool_mr mr{nullptr}; }; + auto construct_nullptr = []() { pool_mr mr{nullptr, 1024}; }; EXPECT_THROW(construct_nullptr(), rmm::logic_error); }