From 2f29bdbe281e2f81452a17d65d8ddc661d121109 Mon Sep 17 00:00:00 2001 From: Francesco Biscani Date: Thu, 1 Aug 2024 14:48:49 +0200 Subject: [PATCH] Allow the LLVM cache machinery to cache multiple modules at a time. --- include/heyoka/llvm_state.hpp | 9 ++-- src/llvm_state.cpp | 26 +++++++---- src/llvm_state_mem_cache.cpp | 82 +++++++++++++++++++++++++---------- 3 files changed, 81 insertions(+), 36 deletions(-) diff --git a/include/heyoka/llvm_state.hpp b/include/heyoka/llvm_state.hpp index 2bf169838..6cf3dae84 100644 --- a/include/heyoka/llvm_state.hpp +++ b/include/heyoka/llvm_state.hpp @@ -12,6 +12,7 @@ #include #include +#include #include #include #include @@ -338,12 +339,14 @@ namespace detail // The value contained in the in-memory cache. struct llvm_mc_value { - std::string opt_bc, opt_ir, obj; + std::vector opt_bc, opt_ir, obj; + + std::size_t total_size() const; }; // Cache lookup and insertion. -std::optional llvm_state_mem_cache_lookup(const std::string &, unsigned); -void llvm_state_mem_cache_try_insert(std::string, unsigned, llvm_mc_value); +std::optional llvm_state_mem_cache_lookup(const std::vector &, unsigned); +void llvm_state_mem_cache_try_insert(std::vector, unsigned, llvm_mc_value); } // namespace detail diff --git a/src/llvm_state.cpp b/src/llvm_state.cpp index 659711721..f096fd0c7 100644 --- a/src/llvm_state.cpp +++ b/src/llvm_state.cpp @@ -1325,11 +1325,15 @@ void llvm_state::compile() // to fix the module and re-attempt compilation without having // altered the module and without having already added the trigger // function. + // NOTE: this function does its own cleanup, no need to + // start the try catch block yet. add_obj_trigger(); try { // Fetch the bitcode *before* optimisation. auto orig_bc = get_bc(); + std::vector obc; + obc.push_back(std::move(orig_bc)); // Combine m_opt_level, m_force_avx512, m_slp_vectorize and m_c_model into a single value, // as they all affect codegen. @@ -1341,22 +1345,26 @@ void llvm_state::compile() assert(m_opt_level <= 3u); assert(static_cast(m_c_model) <= 7u); static_assert(std::numeric_limits::digits >= 7u); - const auto olevel = m_opt_level + (static_cast(m_force_avx512) << 2) - + (static_cast(m_slp_vectorize) << 3) + (static_cast(m_c_model) << 4); + const auto comp_flag = m_opt_level + (static_cast(m_force_avx512) << 2) + + (static_cast(m_slp_vectorize) << 3) + + (static_cast(m_c_model) << 4); - if (auto cached_data = detail::llvm_state_mem_cache_lookup(orig_bc, olevel)) { + if (auto cached_data = detail::llvm_state_mem_cache_lookup(obc, comp_flag)) { // Cache hit. // Assign the snapshots. - m_ir_snapshot = std::move(cached_data->opt_ir); - m_bc_snapshot = std::move(cached_data->opt_bc); + assert(cached_data->opt_ir.size() == 1u); + assert(cached_data->opt_bc.size() == 1u); + assert(cached_data->obj.size() == 1u); + m_ir_snapshot = std::move(cached_data->opt_ir[0]); + m_bc_snapshot = std::move(cached_data->opt_bc[0]); // Clear out module and builder. m_module.reset(); m_builder.reset(); // Assign the object file. - detail::llvm_state_add_obj_to_jit(*m_jitter, std::move(cached_data->obj)); + detail::llvm_state_add_obj_to_jit(*m_jitter, std::move(cached_data->obj[0])); } else { sw.reset(); @@ -1372,10 +1380,10 @@ void llvm_state::compile() logger->trace("materialisation runtime: {}", sw); - // Try to insert orig_bc into the cache. - detail::llvm_state_mem_cache_try_insert(std::move(orig_bc), olevel, + // Try to insert obc into the cache. + detail::llvm_state_mem_cache_try_insert(std::move(obc), comp_flag, // NOLINTNEXTLINE(bugprone-unchecked-optional-access) - {m_bc_snapshot, m_ir_snapshot, *m_jitter->m_object_file}); + {{m_bc_snapshot}, {m_ir_snapshot}, {*m_jitter->m_object_file}}); } // LCOV_EXCL_START } catch (...) { diff --git a/src/llvm_state_mem_cache.cpp b/src/llvm_state_mem_cache.cpp index e3135ae25..88ba8ea0f 100644 --- a/src/llvm_state_mem_cache.cpp +++ b/src/llvm_state_mem_cache.cpp @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include @@ -28,7 +28,8 @@ #include // This in-memory cache maps the bitcode -// of an LLVM module and an optimisation level to: +// of one or more LLVM modules and an integer flag +// (representing several compilation settings) to: // // - the optimised version of the bitcode, // - the textual IR corresponding @@ -43,6 +44,26 @@ HEYOKA_BEGIN_NAMESPACE namespace detail { +// Helper to compute the total size in bytes +// of the data contained in an llvm_mc_value. +// Will throw on overflow. +std::size_t llvm_mc_value::total_size() const +{ + assert(!opt_bc.empty()); + assert(opt_bc.size() == opt_ir.size()); + assert(opt_bc.size() == obj.size()); + + boost::safe_numerics::safe ret = 0; + + for (decltype(opt_bc.size()) i = 0; i < opt_bc.size(); ++i) { + ret += opt_bc[i].size(); + ret += opt_ir[i].size(); + ret += obj[i].size(); + } + + return ret; +} + namespace { @@ -56,16 +77,33 @@ HEYOKA_CONSTINIT std::mutex mem_cache_mutex; // Definition of the data structures for the cache. -using lru_queue_t = std::list>; +using lru_queue_t = std::list, unsigned>>; using lru_key_t = lru_queue_t::iterator; +// Implementation of hashing for std::pair, unsigned> and +// its heterogeneous counterpart. +template +auto cache_key_hasher(const T &k) noexcept +{ + assert(!k.first.empty()); + + // Combine the bitcodes. + auto seed = std::hash{}(k.first[0]); + for (decltype(k.first.size()) i = 1; i < k.first.size(); ++i) { + boost::hash_combine(seed, k.first[i]); + } + + // Combine with the compilation flag. + boost::hash_combine(seed, static_cast(k.second)); + + return seed; +} + struct lru_hasher { std::size_t operator()(const lru_key_t &k) const noexcept { - auto seed = std::hash{}(k->first); - boost::hash_combine(seed, k->second); - return seed; + return cache_key_hasher(*k); } }; @@ -96,16 +134,16 @@ HEYOKA_CONSTINIT std::uint64_t mem_cache_limit = 2147483648ull; // Machinery for heterogeneous lookup into the cache. // NOTE: this function MUST be invoked while holding the global lock. -auto llvm_state_mem_cache_hl(const std::string &bc, unsigned opt_level) +auto llvm_state_mem_cache_hl(const std::vector &bc, unsigned comp_flag) { - using compat_key_t = std::pair; + // NOTE: the heterogeneous version of the key replaces std::vector + // with a const reference. + using compat_key_t = std::pair &, unsigned>; struct compat_hasher { std::size_t operator()(const compat_key_t &k) const noexcept { - auto seed = std::hash{}(k.first); - boost::hash_combine(seed, k.second); - return seed; + return cache_key_hasher(k); } }; @@ -120,7 +158,7 @@ auto llvm_state_mem_cache_hl(const std::string &bc, unsigned opt_level) } }; - return lru_map.find(std::make_pair(std::cref(bc), opt_level), compat_hasher{}, compat_cmp{}); + return lru_map.find(std::make_pair(std::cref(bc), comp_flag), compat_hasher{}, compat_cmp{}); } // Debug function to run sanity checks on the cache. @@ -131,15 +169,13 @@ void llvm_state_mem_cache_sanity_checks() // Check that the computed size of the cache is consistent with mem_cache_size. assert(std::accumulate(lru_map.begin(), lru_map.end(), boost::safe_numerics::safe(0), - [](const auto &a, const auto &p) { - return a + p.second.opt_bc.size() + p.second.opt_ir.size() + p.second.obj.size(); - }) + [](const auto &a, const auto &p) { return a + p.second.total_size(); }) == mem_cache_size); } } // namespace -std::optional llvm_state_mem_cache_lookup(const std::string &bc, unsigned opt_level) +std::optional llvm_state_mem_cache_lookup(const std::vector &bc, unsigned comp_flag) { // Lock down. const std::lock_guard lock(mem_cache_mutex); @@ -147,7 +183,7 @@ std::optional llvm_state_mem_cache_lookup(const std::string &bc, // Sanity checks. llvm_state_mem_cache_sanity_checks(); - if (const auto it = llvm_state_mem_cache_hl(bc, opt_level); it == lru_map.end()) { + if (const auto it = llvm_state_mem_cache_hl(bc, comp_flag); it == lru_map.end()) { // Cache miss. return {}; } else { @@ -163,7 +199,7 @@ std::optional llvm_state_mem_cache_lookup(const std::string &bc, } } -void llvm_state_mem_cache_try_insert(std::string bc, unsigned opt_level, llvm_mc_value val) +void llvm_state_mem_cache_try_insert(std::vector bc, unsigned comp_flag, llvm_mc_value val) { // Lock down. const std::lock_guard lock(mem_cache_mutex); @@ -174,7 +210,7 @@ void llvm_state_mem_cache_try_insert(std::string bc, unsigned opt_level, llvm_mc // Do a first lookup to check if bc is already in the cache. // This could happen, e.g., if two threads are compiling the same // code concurrently. - if (const auto it = llvm_state_mem_cache_hl(bc, opt_level); it != lru_map.end()) { + if (const auto it = llvm_state_mem_cache_hl(bc, comp_flag); it != lru_map.end()) { assert(val.opt_bc == it->second.opt_bc); assert(val.opt_ir == it->second.opt_ir); assert(val.obj == it->second.obj); @@ -183,8 +219,7 @@ void llvm_state_mem_cache_try_insert(std::string bc, unsigned opt_level, llvm_mc } // Compute the new cache size. - auto new_cache_size = static_cast(boost::safe_numerics::safe(mem_cache_size) - + val.opt_bc.size() + val.opt_ir.size() + val.obj.size()); + auto new_cache_size = boost::safe_numerics::safe(mem_cache_size) + val.total_size(); // Remove items from the cache if we are exceeding // the limit. @@ -195,8 +230,7 @@ void llvm_state_mem_cache_try_insert(std::string bc, unsigned opt_level, llvm_mc const auto &cur_val = cur_it->second; // NOTE: no possibility of overflow here, as cur_size is guaranteed // not to be greater than mem_cache_size. - const auto cur_size - = static_cast(cur_val.opt_bc.size()) + cur_val.opt_ir.size() + cur_val.obj.size(); + const auto cur_size = cur_val.total_size(); // NOTE: the next 4 lines cannot throw, which ensures that the // cache cannot be left in an inconsistent state. @@ -222,7 +256,7 @@ void llvm_state_mem_cache_try_insert(std::string bc, unsigned opt_level, llvm_mc // Add the new item to the front of the queue. // NOTE: if this throws, we have not modified lru_map yet, // no cleanup needed. - lru_queue.emplace_front(std::move(bc), opt_level); + lru_queue.emplace_front(std::move(bc), comp_flag); // Add the new item to the map. try {