From 7cb0c0a8096e8f4eec14aafd35dda8a7ea573035 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20W=C3=B3jcik?= <3044353+pwojcikdev@users.noreply.github.com> Date: Fri, 6 Sep 2024 16:21:35 +0200 Subject: [PATCH] Cementing fixes (#4722) * Adjust max cemented blocks per iteration * Stats * Refresh long running transactions * Graceful shutdown for `confirming_set` * Fix compilation * Fix assert --- nano/core_test/ledger.cpp | 60 ++++++++++++++++++++------- nano/core_test/request_aggregator.cpp | 7 +++- nano/lib/stats_enums.hpp | 3 +- nano/node/confirming_set.cpp | 13 +++++- nano/node/confirming_set.hpp | 4 +- nano/secure/ledger.cpp | 19 +++++++-- nano/secure/ledger.hpp | 4 +- nano/test_common/testutil.cpp | 3 +- 8 files changed, 84 insertions(+), 29 deletions(-) diff --git a/nano/core_test/ledger.cpp b/nano/core_test/ledger.cpp index 1156ad11a8..e7e0dfc3dc 100644 --- a/nano/core_test/ledger.cpp +++ b/nano/core_test/ledger.cpp @@ -5529,7 +5529,12 @@ TEST (ledger, cement_unbounded) auto ctx = nano::test::ledger_diamond (5); auto & ledger = ctx.ledger (); auto bottom = ctx.blocks ().back (); - auto confirmed = ledger.confirm (ledger.tx_begin_write (), bottom->hash ()); + + std::deque> confirmed; + { + auto tx = ledger.tx_begin_write (); + confirmed = ledger.confirm (tx, bottom->hash ()); + } ASSERT_TRUE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); // Check that all blocks got confirmed in a single call ASSERT_TRUE (std::all_of (ctx.blocks ().begin (), ctx.blocks ().end (), [&] (auto const & block) { @@ -5547,8 +5552,13 @@ TEST (ledger, cement_bounded) auto & ledger = ctx.ledger (); auto bottom = ctx.blocks ().back (); - // This should only cement some of the dependencies - auto confirmed1 = ledger.confirm (ledger.tx_begin_write (), bottom->hash (), /* max cementing batch size */ 3); + std::deque> confirmed1, confirmed2, confirmed3; + + { + // This should only cement some of the dependencies + auto tx = ledger.tx_begin_write (); + confirmed1 = ledger.confirm (tx, bottom->hash (), /* max cementing batch size */ 3); + } ASSERT_FALSE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); ASSERT_EQ (confirmed1.size (), 3); // Only topmost dependencies should get cemented during this call @@ -5556,8 +5566,11 @@ TEST (ledger, cement_bounded) return ledger.dependents_confirmed (ledger.tx_begin_read (), *block); })); - // This should cement a few more dependencies - auto confirmed2 = ledger.confirm (ledger.tx_begin_write (), bottom->hash (), /* max cementing batch size */ 16); + { + // This should cement a few more dependencies + auto tx = ledger.tx_begin_write (); + confirmed2 = ledger.confirm (tx, bottom->hash (), /* max cementing batch size */ 16); + } ASSERT_FALSE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); ASSERT_EQ (confirmed2.size (), 16); // Only topmost dependencies should get cemented during this call @@ -5565,8 +5578,11 @@ TEST (ledger, cement_bounded) return ledger.dependents_confirmed (ledger.tx_begin_read (), *block); })); - // This should cement the remaining dependencies - auto confirmed3 = ledger.confirm (ledger.tx_begin_write (), bottom->hash (), /* max cementing batch size */ 64); + { + // This should cement the remaining dependencies + auto tx = ledger.tx_begin_write (); + confirmed3 = ledger.confirm (tx, bottom->hash (), /* max cementing batch size */ 64); + } ASSERT_TRUE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); ASSERT_LE (confirmed3.size (), 64); // Every block in the ledger should be cemented @@ -5575,15 +5591,20 @@ TEST (ledger, cement_bounded) })); } -// Tests that bounded cementing works when mumber of blocks is large but tree height is small (recursion stack is small) +// Tests that bounded cementing works when number of blocks is large but tree height is small (recursion stack is small) TEST (ledger, cement_bounded_diamond) { auto ctx = nano::test::ledger_diamond (4); auto & ledger = ctx.ledger (); auto bottom = ctx.blocks ().back (); - // This should only cement some of the dependencies - auto confirmed1 = ledger.confirm (ledger.tx_begin_write (), bottom->hash (), /* max cementing batch size */ 3); + std::deque> confirmed1, confirmed2, confirmed3, confirmed4; + + { + // This should only cement some of the dependencies + auto tx = ledger.tx_begin_write (); + confirmed1 = ledger.confirm (tx, bottom->hash (), /* max cementing batch size */ 3); + } ASSERT_FALSE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); ASSERT_EQ (confirmed1.size (), 3); // Only topmost dependencies should get cemented during this call @@ -5591,8 +5612,11 @@ TEST (ledger, cement_bounded_diamond) return ledger.dependents_confirmed (ledger.tx_begin_read (), *block); })); - // This should cement a few more dependencies - auto confirmed2 = ledger.confirm (ledger.tx_begin_write (), bottom->hash (), /* max cementing batch size */ 16); + { + // This should cement a few more dependencies + auto tx = ledger.tx_begin_write (); + confirmed2 = ledger.confirm (tx, bottom->hash (), /* max cementing batch size */ 16); + } ASSERT_FALSE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); ASSERT_EQ (confirmed2.size (), 16); // Only topmost dependencies should get cemented during this call @@ -5600,8 +5624,11 @@ TEST (ledger, cement_bounded_diamond) return ledger.dependents_confirmed (ledger.tx_begin_read (), *block); })); - // A few more bounded calls should confirm the whole tree - auto confirmed3 = ledger.confirm (ledger.tx_begin_write (), bottom->hash (), /* max cementing batch size */ 64); + { + // A few more bounded calls should confirm the whole tree + auto tx = ledger.tx_begin_write (); + confirmed3 = ledger.confirm (tx, bottom->hash (), /* max cementing batch size */ 64); + } ASSERT_FALSE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); ASSERT_EQ (confirmed3.size (), 64); // Only topmost dependencies should get cemented during this call @@ -5609,7 +5636,10 @@ TEST (ledger, cement_bounded_diamond) return ledger.dependents_confirmed (ledger.tx_begin_read (), *block); })); - auto confirmed4 = ledger.confirm (ledger.tx_begin_write (), bottom->hash (), /* max cementing batch size */ 64); + { + auto tx = ledger.tx_begin_write (); + confirmed4 = ledger.confirm (tx, bottom->hash (), /* max cementing batch size */ 64); + } ASSERT_TRUE (ledger.confirmed.block_exists (ledger.tx_begin_read (), bottom->hash ())); ASSERT_LT (confirmed4.size (), 64); // Every block in the ledger should be cemented diff --git a/nano/core_test/request_aggregator.cpp b/nano/core_test/request_aggregator.cpp index 5cc40202bb..fcb580ef9a 100644 --- a/nano/core_test/request_aggregator.cpp +++ b/nano/core_test/request_aggregator.cpp @@ -290,8 +290,11 @@ TEST (request_aggregator, split) ASSERT_EQ (nano::block_status::progress, node.ledger.process (node.ledger.tx_begin_write (), block)); request.emplace_back (block->hash (), block->root ()); } - // Confirm all blocks - node.ledger.confirm (node.ledger.tx_begin_write (), blocks.back ()->hash ()); + { + // Confirm all blocks + auto tx = node.ledger.tx_begin_write (); + node.ledger.confirm (tx, blocks.back ()->hash ()); + } ASSERT_TIMELY_EQ (5s, max_vbh + 2, node.ledger.cemented_count ()); ASSERT_EQ (max_vbh + 1, request.size ()); auto client = std::make_shared (node); diff --git a/nano/lib/stats_enums.hpp b/nano/lib/stats_enums.hpp index 9b27ee8ad8..64e7f97165 100644 --- a/nano/lib/stats_enums.hpp +++ b/nano/lib/stats_enums.hpp @@ -492,7 +492,8 @@ enum class detail notify_already_cemented, notify_intermediate, already_cemented, - cementing_hash, + cementing, + cemented_hash, // election_state passive, diff --git a/nano/node/confirming_set.cpp b/nano/node/confirming_set.cpp index 49f6daff7f..867aa80e1c 100644 --- a/nano/node/confirming_set.cpp +++ b/nano/node/confirming_set.cpp @@ -133,10 +133,11 @@ void nano::confirming_set::run_batch (std::unique_lock & lock) notification.already_cemented.swap (already); std::unique_lock lock{ mutex }; + while (notification_workers.num_queued_tasks () >= config.max_queued_notifications) { stats.inc (nano::stat::type::confirming_set, nano::stat::detail::cooldown); - condition.wait_for (lock, 100ms, [this] { return stopped; }); + condition.wait_for (lock, 100ms, [this] { return stopped.load (); }); if (stopped) { return; @@ -168,11 +169,17 @@ void nano::confirming_set::run_batch (std::unique_lock & lock) { transaction.refresh_if_needed (); - stats.inc (nano::stat::type::confirming_set, nano::stat::detail::cementing_hash); + // Cementing deep dependency chains might take a long time, allow for graceful shutdown, ignore notifications + if (stopped) + { + return; + } // Issue notifications here, so that `cemented` set is not too large before we add more blocks notify_maybe (transaction); + stats.inc (nano::stat::type::confirming_set, nano::stat::detail::cementing); + auto added = ledger.confirm (transaction, hash, config.max_blocks); if (!added.empty ()) { @@ -190,6 +197,8 @@ void nano::confirming_set::run_batch (std::unique_lock & lock) debug_assert (ledger.confirmed.block_exists (transaction, hash)); } } while (!ledger.confirmed.block_exists (transaction, hash)); + + stats.inc (nano::stat::type::confirming_set, nano::stat::detail::cemented_hash); } } diff --git a/nano/node/confirming_set.hpp b/nano/node/confirming_set.hpp index cb33915518..2f363b51e6 100644 --- a/nano/node/confirming_set.hpp +++ b/nano/node/confirming_set.hpp @@ -20,7 +20,7 @@ class confirming_set_config final public: /** Maximum number of dependent blocks to be stored in memory during processing */ - size_t max_blocks{ 64 * 128 }; + size_t max_blocks{ 128 * 1024 }; size_t max_queued_notifications{ 8 }; }; @@ -75,7 +75,7 @@ class confirming_set final nano::thread_pool notification_workers; - bool stopped{ false }; + std::atomic stopped{ false }; mutable std::mutex mutex; std::condition_variable condition; std::thread thread; diff --git a/nano/secure/ledger.cpp b/nano/secure/ledger.cpp index e61ff59119..822cc32c93 100644 --- a/nano/secure/ledger.cpp +++ b/nano/secure/ledger.cpp @@ -814,12 +814,12 @@ nano::uint128_t nano::ledger::account_receivable (secure::transaction const & tr // Both stack and result set are bounded to limit maximum memory usage // Callers must ensure that the target block was confirmed, and if not, call this function multiple times -std::deque> nano::ledger::confirm (secure::write_transaction const & transaction, nano::block_hash const & hash, size_t max_blocks) +std::deque> nano::ledger::confirm (secure::write_transaction & transaction, nano::block_hash const & target_hash, size_t max_blocks) { std::deque> result; std::deque stack; - stack.push_back (hash); + stack.push_back (target_hash); while (!stack.empty ()) { auto hash = stack.back (); @@ -831,6 +831,8 @@ std::deque> nano::ledger::confirm (secure::write_tr { if (!dependent.is_zero () && !confirmed.block_exists_or_pruned (transaction, dependent)) { + stats.inc (nano::stat::type::confirmation_height, nano::stat::detail::dependent_unconfirmed); + stack.push_back (dependent); // Limit the stack size to avoid excessive memory usage @@ -849,7 +851,7 @@ std::deque> nano::ledger::confirm (secure::write_tr { // We must only confirm blocks that have their dependencies confirmed debug_assert (dependents_confirmed (transaction, *block)); - confirm (transaction, *block); + confirm_one (transaction, *block); result.push_back (block); } } @@ -858,6 +860,14 @@ std::deque> nano::ledger::confirm (secure::write_tr // Unconfirmed dependencies were added } + // Refresh the transaction to avoid long-running transactions + // Ensure that the block wasn't rolled back during the refresh + bool refreshed = transaction.refresh_if_needed (); + if (refreshed) + { + release_assert (any.block_exists (transaction, target_hash), "block was rolled back during cementing"); + } + // Early return might leave parts of the dependency tree unconfirmed if (result.size () >= max_blocks) { @@ -868,12 +878,13 @@ std::deque> nano::ledger::confirm (secure::write_tr return result; } -void nano::ledger::confirm (secure::write_transaction const & transaction, nano::block const & block) +void nano::ledger::confirm_one (secure::write_transaction & transaction, nano::block const & block) { debug_assert ((!store.confirmation_height.get (transaction, block.account ()) && block.sideband ().height == 1) || store.confirmation_height.get (transaction, block.account ()).value ().height + 1 == block.sideband ().height); confirmation_height_info info{ block.sideband ().height, block.hash () }; store.confirmation_height.put (transaction, block.account (), info); ++cache.cemented_count; + stats.inc (nano::stat::type::confirmation_height, nano::stat::detail::blocks_confirmed); } diff --git a/nano/secure/ledger.hpp b/nano/secure/ledger.hpp index 99336c56ba..3419731e11 100644 --- a/nano/secure/ledger.hpp +++ b/nano/secure/ledger.hpp @@ -60,7 +60,7 @@ class ledger final std::string block_text (nano::block_hash const &); std::pair hash_root_random (secure::transaction const &) const; std::optional pending_info (secure::transaction const &, nano::pending_key const & key) const; - std::deque> confirm (secure::write_transaction const &, nano::block_hash const & hash, size_t max_blocks = 1024 * 128); + std::deque> confirm (secure::write_transaction &, nano::block_hash const & hash, size_t max_blocks = 1024 * 128); nano::block_status process (secure::write_transaction const &, std::shared_ptr block); bool rollback (secure::write_transaction const &, nano::block_hash const &, std::vector> &); bool rollback (secure::write_transaction const &, nano::block_hash const &); @@ -100,7 +100,7 @@ class ledger final private: void initialize (nano::generate_cache_flags const &); - void confirm (secure::write_transaction const & transaction, nano::block const & block); + void confirm_one (secure::write_transaction &, nano::block const & block); std::unique_ptr any_impl; std::unique_ptr confirmed_impl; diff --git a/nano/test_common/testutil.cpp b/nano/test_common/testutil.cpp index d77eefc47b..59554846fa 100644 --- a/nano/test_common/testutil.cpp +++ b/nano/test_common/testutil.cpp @@ -138,7 +138,8 @@ void nano::test::confirm (nano::ledger & ledger, std::shared_ptr co void nano::test::confirm (nano::ledger & ledger, nano::block_hash const & hash) { - ledger.confirm (ledger.tx_begin_write (), hash); + auto transaction = ledger.tx_begin_write (); + ledger.confirm (transaction, hash); } bool nano::test::block_or_pruned_all_exists (nano::node & node, std::vector hashes)