From 7ef81304c0c361750ab8647bd2075a0dcc5ddb09 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Wo=CC=81jcik?= <3044353+pwojcikdev@users.noreply.github.com> Date: Fri, 18 Oct 2024 17:26:07 +0200 Subject: [PATCH] Deferred confirming set --- nano/lib/stats_enums.hpp | 3 ++ nano/node/confirming_set.cpp | 99 +++++++++++++++++++++++++++++++++--- nano/node/confirming_set.hpp | 15 +++++- nano/node/node.cpp | 2 +- 4 files changed, 111 insertions(+), 8 deletions(-) diff --git a/nano/lib/stats_enums.hpp b/nano/lib/stats_enums.hpp index 1e3889d180..c233218f6e 100644 --- a/nano/lib/stats_enums.hpp +++ b/nano/lib/stats_enums.hpp @@ -137,6 +137,8 @@ enum class detail empty, done, retry, + requeued, + evicted, // processing queue queue, @@ -506,6 +508,7 @@ enum class detail cementing, cemented_hash, cementing_failed, + deferred_failed, // election_state passive, diff --git a/nano/node/confirming_set.cpp b/nano/node/confirming_set.cpp index 4d8d145084..73b356ed78 100644 --- a/nano/node/confirming_set.cpp +++ b/nano/node/confirming_set.cpp @@ -1,5 +1,7 @@ +#include #include #include +#include #include #include #include @@ -7,9 +9,10 @@ #include #include -nano::confirming_set::confirming_set (confirming_set_config const & config_a, nano::ledger & ledger_a, nano::stats & stats_a, nano::logger & logger_a) : +nano::confirming_set::confirming_set (confirming_set_config const & config_a, nano::ledger & ledger_a, nano::block_processor & block_processor_a, nano::stats & stats_a, nano::logger & logger_a) : config{ config_a }, ledger{ ledger_a }, + block_processor{ block_processor_a }, stats{ stats_a }, logger{ logger_a }, notification_workers{ 1, nano::thread_role::name::confirmation_height_notifications } @@ -20,6 +23,28 @@ nano::confirming_set::confirming_set (confirming_set_config const & config_a, na cemented_observers.notify (context.block); } }); + + // Requeue blocks that failed to cement immediately due to missing ledger blocks + block_processor.batch_processed.add ([this] (auto const & batch) { + bool should_notify = false; + { + std::lock_guard lock{ mutex }; + for (auto const & [result, context] : batch) + { + if (auto it = deferred.get ().find (context.block->hash ()); it != deferred.get ().end ()) + { + stats.inc (nano::stat::type::confirming_set, nano::stat::detail::requeued); + set.push_back (*it); + deferred.get ().erase (it); + should_notify = true; + } + } + } + if (should_notify) + { + condition.notify_all (); + } + }); } nano::confirming_set::~confirming_set () @@ -78,13 +103,14 @@ void nano::confirming_set::stop () bool nano::confirming_set::contains (nano::block_hash const & hash) const { std::lock_guard lock{ mutex }; - return set.get ().contains (hash) || current.contains (hash); + return set.get ().contains (hash) || deferred.get ().contains (hash) || current.contains (hash); } std::size_t nano::confirming_set::size () const { + // Do not report deferred blocks, as they are not currently being processed (and might never be requeued) std::lock_guard lock{ mutex }; - return set.size (); + return set.size () + current.size (); } void nano::confirming_set::run () @@ -94,6 +120,9 @@ void nano::confirming_set::run () { stats.inc (nano::stat::type::confirming_set, nano::stat::detail::loop); + cleanup (lock); + debug_assert (lock.owns_lock ()); + if (!set.empty ()) { run_batch (lock); @@ -121,6 +150,46 @@ auto nano::confirming_set::next_batch (size_t max_count) -> std::deque return results; } +void nano::confirming_set::cleanup (std::unique_lock & lock) +{ + debug_assert (lock.owns_lock ()); + debug_assert (!mutex.try_lock ()); + + auto const cutoff = std::chrono::steady_clock::now () - config.deferred_age_cutoff; + std::deque evicted; + + auto should_evict = [&] (entry const & entry) { + return entry.timestamp < cutoff; + }; + + // Iterate in sequenced (insertion) order + for (auto it = deferred.begin (), end = deferred.end (); it != end;) + { + if (should_evict (*it) || deferred.size () > config.max_deferred) + { + stats.inc (nano::stat::type::confirming_set, nano::stat::detail::evicted); + debug_assert (ledger.any.block_exists (ledger.tx_begin_read (), it->hash)); + evicted.push_back (*it); + it = deferred.erase (it); + } + else + { + break; // Entries are sequenced, so we can stop here and avoid unnecessary iteration + } + } + + // Notify about evicted blocks so that other components can perform necessary cleanup + if (!evicted.empty ()) + { + lock.unlock (); + for (auto const & entry : evicted) + { + cementing_failed.notify (entry.hash); + } + lock.lock (); + } +} + void nano::confirming_set::run_batch (std::unique_lock & lock) { debug_assert (lock.owns_lock ()); @@ -134,9 +203,9 @@ void nano::confirming_set::run_batch (std::unique_lock & lock) // Keep track of the blocks we're currently cementing, so that the .contains (...) check is accurate debug_assert (current.empty ()); - for (auto const & [hash, election] : batch) + for (auto const & entry : batch) { - current.insert (hash); + current.insert (entry.hash); } lock.unlock (); @@ -175,10 +244,14 @@ void nano::confirming_set::run_batch (std::unique_lock & lock) } }; + std::deque failed; { auto transaction = ledger.tx_begin_write (nano::store::writer::confirmation_height); - for (auto const & [hash, election] : batch) + for (auto const & entry : batch) { + auto const & hash = entry.hash; + auto const & election = entry.election; + size_t cemented_count = 0; bool success = false; do @@ -233,8 +306,20 @@ void nano::confirming_set::run_batch (std::unique_lock & lock) { stats.inc (nano::stat::type::confirming_set, nano::stat::detail::cementing_failed); logger.debug (nano::log::type::confirming_set, "Failed to cement block: {}", hash.to_string ()); + failed.push_back (entry); } } + + lock.lock (); + + // Requeue failed blocks for processing later + // Add them to the deferred set while still holding the exclusive database write transaction to avoid block processor races + for (auto const & entry : failed) + { + deferred.push_back (entry); + } + + lock.unlock (); } notify (); @@ -242,6 +327,7 @@ void nano::confirming_set::run_batch (std::unique_lock & lock) already_cemented.notify (already); + // Clear current set only after the transaction is committed lock.lock (); current.clear (); lock.unlock (); @@ -253,6 +339,7 @@ nano::container_info nano::confirming_set::container_info () const nano::container_info info; info.put ("set", set); + info.put ("deferred", deferred); info.add ("notification_workers", notification_workers.container_info ()); return info; } diff --git a/nano/node/confirming_set.hpp b/nano/node/confirming_set.hpp index 99569ce1e6..9e52960808 100644 --- a/nano/node/confirming_set.hpp +++ b/nano/node/confirming_set.hpp @@ -34,6 +34,11 @@ class confirming_set_config final /** Maximum number of dependent blocks to be stored in memory during processing */ size_t max_blocks{ 128 * 1024 }; size_t max_queued_notifications{ 8 }; + + /** Maximum number of failed blocks to wait for requeuing */ + size_t max_deferred{ 16 * 1024 }; + /** Max age of deferred blocks before they are dropped */ + std::chrono::seconds deferred_age_cutoff{ 15min }; }; /** @@ -45,7 +50,7 @@ class confirming_set final friend class confirmation_height_pruned_source_Test; public: - confirming_set (confirming_set_config const &, nano::ledger &, nano::stats &, nano::logger &); + confirming_set (confirming_set_config const &, nano::ledger &, nano::block_processor &, nano::stats &, nano::logger &); ~confirming_set (); void start (); @@ -69,12 +74,14 @@ class confirming_set final nano::observer_set const &> batch_cemented; nano::observer_set const &> already_cemented; + nano::observer_set cementing_failed; nano::observer_set> cemented_observers; private: // Dependencies confirming_set_config const & config; nano::ledger & ledger; + nano::block_processor & block_processor; nano::stats & stats; nano::logger & logger; @@ -83,11 +90,13 @@ class confirming_set final { nano::block_hash hash; std::shared_ptr election; + std::chrono::steady_clock::time_point timestamp{ std::chrono::steady_clock::now () }; }; void run (); void run_batch (std::unique_lock &); std::deque next_batch (size_t max_count); + void cleanup (std::unique_lock &); private: // clang-format off @@ -102,7 +111,11 @@ class confirming_set final >>; // clang-format on + // Blocks that are ready to be cemented ordered_entries set; + // Blocks that could not be cemented immediately (e.g. waiting for rollbacks to complete) + ordered_entries deferred; + // Blocks that are being cemented in the current batch std::unordered_set current; nano::thread_pool notification_workers; diff --git a/nano/node/node.cpp b/nano/node/node.cpp index 3cb0e03534..c57f9c0a97 100644 --- a/nano/node/node.cpp +++ b/nano/node/node.cpp @@ -115,7 +115,7 @@ nano::node::node (std::shared_ptr io_ctx_a, std::filesy port_mapping_impl{ std::make_unique (*this) }, port_mapping{ *port_mapping_impl }, block_processor (*this), - confirming_set_impl{ std::make_unique (config.confirming_set, ledger, stats, logger) }, + confirming_set_impl{ std::make_unique (config.confirming_set, ledger, block_processor, stats, logger) }, confirming_set{ *confirming_set_impl }, active_impl{ std::make_unique (*this, confirming_set, block_processor) }, active{ *active_impl },