Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Factoring out allocator cache
Browse files Browse the repository at this point in the history
hkaiser committed Sep 17, 2024
1 parent f8d92b0 commit 5f69e24
Showing 18 changed files with 210 additions and 165 deletions.
2 changes: 1 addition & 1 deletion libs/core/allocator_support/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -42,7 +42,7 @@ set(allocator_support_compat_headers
)
# cmake-format: on

set(allocator_support_sources)
set(allocator_support_sources thread_local_caching_allocator.cpp)

include(HPX_AddModule)
add_hpx_module(
Original file line number Diff line number Diff line change
@@ -10,6 +10,7 @@
#include <hpx/allocator_support/config/defines.hpp>

#include <cstddef>
#include <functional>
#include <memory>
#include <new>
#include <type_traits>
@@ -21,9 +22,18 @@ namespace hpx::util {
!((defined(HPX_HAVE_CUDA) && defined(__CUDACC__)) || \
defined(HPX_HAVE_HIP))

namespace detail {

HPX_CORE_EXPORT void init_allocator_cache(
std::function<void()>&& clear_cache);
HPX_CORE_EXPORT std::pair<void*, std::size_t>
allocate_from_cache() noexcept;
HPX_CORE_EXPORT [[nodiscard]] bool cache_empty() noexcept;
HPX_CORE_EXPORT void return_to_cache(void* p, std::size_t n) noexcept;
} // namespace detail

///////////////////////////////////////////////////////////////////////////
template <template <typename, typename> class Stack, typename T = char,
typename Allocator = std::allocator<T>>
template <typename T = char, typename Allocator = std::allocator<T>>
struct thread_local_caching_allocator
{
HPX_NO_UNIQUE_ADDRESS Allocator alloc;
@@ -39,7 +49,7 @@ namespace hpx::util {
template <typename U>
struct rebind
{
using other = thread_local_caching_allocator<Stack, U,
using other = thread_local_caching_allocator<U,
typename traits::template rebind_alloc<U>>;
};

@@ -51,91 +61,36 @@ namespace hpx::util {
using propagate_on_container_swap =
typename traits::propagate_on_container_swap;

private:
struct allocated_cache
explicit thread_local_caching_allocator(
Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Allocator>))
: alloc(alloc)
{
explicit allocated_cache(Allocator const& a) noexcept(
noexcept(std::is_nothrow_copy_constructible_v<Allocator>))
: alloc(a)
, data(0)
{
}

allocated_cache(allocated_cache const&) = delete;
allocated_cache(allocated_cache&&) = delete;
allocated_cache& operator=(allocated_cache const&) = delete;
allocated_cache& operator=(allocated_cache&&) = delete;

~allocated_cache()
{
clear_cache();
}

pointer allocate(size_type n)
{
pointer p;
std::pair<T*, size_type> pair;
if (data.pop(pair))
// Note: capturing the allocator will be ok only as long as it
// doesn't have any state as this lambda will be possibly called
// very late during destruction of the thread_local cache.
static_assert(std::is_empty_v<Allocator>,
"Please don't use allocators with state in conjunction with "
"the thread_local_caching_allocator");

auto f = [alloc]() mutable {
while (!detail::cache_empty())
{
p = pair.first;
}
else
{
p = traits::allocate(alloc, n);
if (p == nullptr)
auto [p, n] = detail::allocate_from_cache();
if (p != nullptr)
{
throw std::bad_alloc();
traits::deallocate(const_cast<Allocator&>(alloc),
static_cast<char*>(p), n);
}
}
};

++allocated;
return p;
}

void deallocate(pointer p, size_type n) noexcept
{
data.push(std::make_pair(p, n));
if (++deallocated > 2 * (allocated + 16))
{
clear_cache();
allocated = 0;
deallocated = 0;
}
}

private:
void clear_cache() noexcept
{
std::pair<T*, size_type> p;
while (data.pop(p))
{
traits::deallocate(alloc, p.first, p.second);
}
}

HPX_NO_UNIQUE_ADDRESS Allocator alloc;
Stack<std::pair<T*, size_type>, Allocator> data;
std::size_t allocated = 0;
std::size_t deallocated = 0;
};

allocated_cache& cache()
{
thread_local allocated_cache allocated_data(alloc);
return allocated_data;
}

public:
explicit thread_local_caching_allocator(
Allocator const& alloc = Allocator{}) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Allocator>))
: alloc(alloc)
{
detail::init_allocator_cache(HPX_MOVE(f));
}

template <typename U, typename Alloc>
explicit thread_local_caching_allocator(
thread_local_caching_allocator<Stack, U, Alloc> const&
thread_local_caching_allocator<U, Alloc> const&
rhs) noexcept(noexcept(std::
is_nothrow_copy_constructible_v<Alloc>))
: alloc(rhs.alloc)
@@ -155,16 +110,28 @@ namespace hpx::util {

[[nodiscard]] pointer allocate(size_type n, void const* = nullptr)
{
n *= sizeof(T);

if (max_size() < n)
{
throw std::bad_array_new_length();
}
return cache().allocate(n);

auto [p, _] = detail::allocate_from_cache();

Check notice

Code scanning / CodeQL

Unused local variable Note

Variable _ is not used.
if (p == nullptr)
{
p = traits::allocate(alloc, n);
if (p == nullptr)
{
throw std::bad_alloc();
}
}
return static_cast<pointer>(p);
}

void deallocate(pointer p, size_type n) noexcept
{
cache().deallocate(p, n);
detail::return_to_cache(p, n * sizeof(T));
}

[[nodiscard]] constexpr size_type max_size() noexcept
@@ -199,8 +166,7 @@ namespace hpx::util {
}
};
#else
template <template <typename, typename> class Stack, typename T = char,
typename Allocator = std::allocator<T>>
template <typename T = char, typename Allocator = std::allocator<T>>
using thread_local_caching_allocator = Allocator;
#endif
} // namespace hpx::util
111 changes: 111 additions & 0 deletions libs/core/allocator_support/src/thread_local_caching_allocator.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
// Copyright (c) 2023-2024 Hartmut Kaiser
//
// SPDX-License-Identifier: BSL-1.0
// Distributed under the Boost Software License, Version 1.0. (See accompanying
// file LICENSE_1_0.txt or copy at http://www.boost.org/LICENSE_1_0.txt)

#include <hpx/config.hpp>
#include <hpx/allocator_support/config/defines.hpp>

#if defined(HPX_ALLOCATOR_SUPPORT_HAVE_CACHING) && \
!((defined(HPX_HAVE_CUDA) && defined(__CUDACC__)) || \
defined(HPX_HAVE_HIP))

#include <hpx/allocator_support/thread_local_caching_allocator.hpp>

#include <cstddef>
#include <functional>
#include <stack>
#include <utility>

namespace hpx::util::detail {

///////////////////////////////////////////////////////////////////////////
struct allocated_cache
{
explicit allocated_cache() noexcept = default;

void init(std::function<void()>&& clear) noexcept
{
if (!clear_cache) // initialize once
{
clear_cache = HPX_MOVE(clear);
}
}

allocated_cache(allocated_cache const&) = delete;
allocated_cache(allocated_cache&&) = delete;
allocated_cache& operator=(allocated_cache const&) = delete;
allocated_cache& operator=(allocated_cache&&) = delete;

~allocated_cache()
{
clear_cache();
}

std::pair<void*, std::size_t> allocate() noexcept
{
std::pair<void*, std::size_t> p{nullptr, 0};
if (!data.empty())
{
p = data.top();
data.pop();

++allocated;
}
return p;
}

void deallocate(void* p, std::size_t n) noexcept
{
data.emplace(p, n);
if (++deallocated > 2 * (allocated + 16))
{
clear_cache();

allocated = 0;
deallocated = 0;
}
}

[[nodiscard]] bool empty() const noexcept
{
return data.empty();
}

private:
std::stack<std::pair<void*, std::size_t>> data;
std::size_t allocated = 0;
std::size_t deallocated = 0;
std::function<void()> clear_cache;
};

///////////////////////////////////////////////////////////////////////////
allocated_cache& cache()
{
thread_local allocated_cache allocated_data;
return allocated_data;
}

void init_allocator_cache(std::function<void()>&& clear_cache)
{
cache().init(HPX_MOVE(clear_cache));
}

std::pair<void*, std::size_t> allocate_from_cache() noexcept
{
return cache().allocate();
}

void return_to_cache(void* p, std::size_t const n) noexcept
{
cache().deallocate(p, n);
}

bool cache_empty() noexcept
{
return cache().empty();
}
} // namespace hpx::util::detail

#endif
2 changes: 1 addition & 1 deletion libs/core/async_base/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -34,6 +34,6 @@ add_hpx_module(
COMPAT_HEADERS ${async_base_compat_headers}
SOURCES ${async_base_sources}
MODULE_DEPENDENCIES hpx_allocator_support hpx_concepts hpx_config
hpx_concurrency hpx_coroutines hpx_tag_invoke
hpx_coroutines hpx_tag_invoke
CMAKE_SUBDIRS examples tests
)
7 changes: 2 additions & 5 deletions libs/core/async_base/include/hpx/async_base/dataflow.hpp
Original file line number Diff line number Diff line change
@@ -29,7 +29,6 @@ namespace hpx {
#else

#include <hpx/config.hpp>
#include <hpx/concurrency/stack.hpp>
#include <hpx/modules/allocator_support.hpp>
#include <hpx/modules/concepts.hpp>
#include <hpx/modules/tag_invoke.hpp>
@@ -61,14 +60,12 @@ namespace hpx {
// clang-format on
friend constexpr HPX_FORCEINLINE auto tag_fallback_invoke(
dataflow_t tag, F&& f, Ts&&... ts)
-> decltype(tag(hpx::util::thread_local_caching_allocator<
hpx::lockfree::variable_size_stack, char,
-> decltype(tag(hpx::util::thread_local_caching_allocator<char,
hpx::util::internal_allocator<>>{},
HPX_FORWARD(F, f), HPX_FORWARD(Ts, ts)...))
{
using allocator_type =
hpx::util::thread_local_caching_allocator<
hpx::lockfree::variable_size_stack, char,
hpx::util::thread_local_caching_allocator<char,
hpx::util::internal_allocator<>>;
return hpx::functional::tag_invoke(tag, allocator_type{},
HPX_FORWARD(F, f), HPX_FORWARD(Ts, ts)...);
1 change: 0 additions & 1 deletion libs/core/async_combinators/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -43,7 +43,6 @@ add_hpx_module(
EXCLUDE_FROM_GLOBAL_HEADER "hpx/async_combinators/future_wait.hpp"
MODULE_DEPENDENCIES
hpx_async_base
hpx_concurrency
hpx_config
hpx_errors
hpx_futures
Original file line number Diff line number Diff line change
@@ -131,7 +131,6 @@ namespace hpx {
#include <hpx/config.hpp>
#include <hpx/allocator_support/internal_allocator.hpp>
#include <hpx/allocator_support/thread_local_caching_allocator.hpp>
#include <hpx/concurrency/stack.hpp>
#include <hpx/datastructures/tuple.hpp>
#include <hpx/functional/tag_invoke.hpp>
#include <hpx/futures/detail/future_data.hpp>
@@ -229,8 +228,7 @@ namespace hpx::lcos::detail {
using frame_type = async_when_all_frame<result_type>;
using no_addref = typename frame_type::base_type::init_no_addref;

using allocator_type = hpx::util::thread_local_caching_allocator<
hpx::lockfree::variable_size_stack, char,
using allocator_type = hpx::util::thread_local_caching_allocator<char,
hpx::util::internal_allocator<>>;
auto frame = hpx::util::traverse_pack_async_allocator(allocator_type{},
hpx::util::async_traverse_in_place_tag<frame_type>{}, no_addref{},
1 change: 0 additions & 1 deletion libs/core/execution/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -160,7 +160,6 @@ add_hpx_module(
MODULE_DEPENDENCIES
hpx_async_base
hpx_async_combinators
hpx_concurrency
hpx_config
hpx_threading
hpx_pack_traversal
Original file line number Diff line number Diff line change
@@ -13,7 +13,6 @@
#include <hpx/assert.hpp>
#include <hpx/async_base/launch_policy.hpp>
#include <hpx/async_base/traits/is_launch_policy.hpp>
#include <hpx/concurrency/stack.hpp>
#include <hpx/coroutines/thread_enums.hpp>
#include <hpx/execution/traits/executor_traits.hpp>
#include <hpx/execution/traits/future_then_result_exec.hpp>
@@ -65,9 +64,9 @@ namespace hpx::lcos::detail {
using continuation_result_type =
hpx::util::invoke_result_t<F, Future>;

using allocator_type = hpx::util::thread_local_caching_allocator<
hpx::lockfree::variable_size_stack, char,
hpx::util::internal_allocator<>>;
using allocator_type =
hpx::util::thread_local_caching_allocator<char,
hpx::util::internal_allocator<>>;

hpx::traits::detail::shared_state_ptr_t<result_type> p =
detail::make_continuation_alloc<continuation_result_type>(
Loading

0 comments on commit 5f69e24

Please sign in to comment.