From 4581625fc2295ddfead98794e095b2a022e8bdd6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Piotr=20Wo=CC=81jcik?= <3044353+pwojcikdev@users.noreply.github.com> Date: Tue, 2 Jul 2024 18:47:49 +0200 Subject: [PATCH] Log node status periodically --- nano/lib/logging_enums.hpp | 1 + nano/lib/thread_roles.cpp | 3 + nano/lib/thread_roles.hpp | 1 + nano/node/CMakeLists.txt | 6 +- nano/node/active_elections.cpp | 8 +++ nano/node/active_elections.hpp | 3 +- nano/node/monitor.cpp | 126 +++++++++++++++++++++++++++++++++ nano/node/monitor.hpp | 52 ++++++++++++++ nano/node/node.cpp | 5 ++ nano/node/node.hpp | 4 ++ nano/node/nodeconfig.cpp | 10 +++ nano/node/nodeconfig.hpp | 2 + 12 files changed, 218 insertions(+), 3 deletions(-) create mode 100644 nano/node/monitor.cpp create mode 100644 nano/node/monitor.hpp diff --git a/nano/lib/logging_enums.hpp b/nano/lib/logging_enums.hpp index b2b00b350c..7eecf47cf4 100644 --- a/nano/lib/logging_enums.hpp +++ b/nano/lib/logging_enums.hpp @@ -80,6 +80,7 @@ enum class type peer_history, message_processor, local_block_broadcaster, + monitor, // bootstrap bulk_pull_client, diff --git a/nano/lib/thread_roles.cpp b/nano/lib/thread_roles.cpp index 2948caa217..0c68f7d4b5 100644 --- a/nano/lib/thread_roles.cpp +++ b/nano/lib/thread_roles.cpp @@ -157,6 +157,9 @@ std::string nano::thread_role::get_string (nano::thread_role::name role) case nano::thread_role::name::vote_router: thread_role_name_string = "Vote router"; break; + case nano::thread_role::name::monitor: + thread_role_name_string = "Monitor"; + break; default: debug_assert (false && "nano::thread_role::get_string unhandled thread role"); } diff --git a/nano/lib/thread_roles.hpp b/nano/lib/thread_roles.hpp index b6e3196cac..61efd84575 100644 --- a/nano/lib/thread_roles.hpp +++ b/nano/lib/thread_roles.hpp @@ -59,6 +59,7 @@ enum class name port_mapping, stats, vote_router, + monitor, }; std::string_view to_string (name); diff --git a/nano/node/CMakeLists.txt b/nano/node/CMakeLists.txt index 535f917b67..37646b575b 100644 --- a/nano/node/CMakeLists.txt +++ b/nano/node/CMakeLists.txt @@ -101,6 +101,10 @@ add_library( make_store.cpp message_processor.hpp message_processor.cpp + messages.hpp + messages.cpp + monitor.hpp + monitor.cpp network.hpp network.cpp nodeconfig.hpp @@ -196,8 +200,6 @@ add_library( websocketconfig.cpp websocket_stream.hpp websocket_stream.cpp - messages.hpp - messages.cpp xorshift.hpp) target_link_libraries( diff --git a/nano/node/active_elections.cpp b/nano/node/active_elections.cpp index aa183428e8..3f7d7baeec 100644 --- a/nano/node/active_elections.cpp +++ b/nano/node/active_elections.cpp @@ -515,6 +515,14 @@ std::size_t nano::active_elections::size () const return roots.size (); } +std::size_t nano::active_elections::size (nano::election_behavior behavior) const +{ + nano::lock_guard lock{ mutex }; + auto count = count_by_behavior[behavior]; + debug_assert (count >= 0); + return static_cast (count); +} + bool nano::active_elections::publish (std::shared_ptr const & block_a) { nano::unique_lock lock{ mutex }; diff --git a/nano/node/active_elections.hpp b/nano/node/active_elections.hpp index a4cb478a38..96e47def58 100644 --- a/nano/node/active_elections.hpp +++ b/nano/node/active_elections.hpp @@ -120,6 +120,7 @@ class active_elections final bool erase (nano::qualified_root const &); bool empty () const; std::size_t size () const; + std::size_t size (nano::election_behavior) const; bool publish (std::shared_ptr const &); /** @@ -172,7 +173,7 @@ class active_elections final std::chrono::seconds const election_time_to_live; /** Keeps track of number of elections by election behavior (normal, hinted, optimistic) */ - nano::enum_array count_by_behavior; + nano::enum_array count_by_behavior{}; nano::condition_variable condition; bool stopped{ false }; diff --git a/nano/node/monitor.cpp b/nano/node/monitor.cpp new file mode 100644 index 0000000000..92d19ffe8d --- /dev/null +++ b/nano/node/monitor.cpp @@ -0,0 +1,126 @@ +#include "nano/secure/ledger.hpp" + +#include +#include +#include +#include + +nano::monitor::monitor (nano::monitor_config const & config_a, nano::node & node_a) : + config{ config_a }, + node{ node_a }, + logger{ node_a.logger } +{ +} + +nano::monitor::~monitor () +{ + debug_assert (!thread.joinable ()); +} + +void nano::monitor::start () +{ + if (!config.enabled) + { + return; + } + + thread = std::thread ([this] () { + nano::thread_role::set (nano::thread_role::name::monitor); + run (); + }); +} + +void nano::monitor::stop () +{ + { + nano::lock_guard guard{ mutex }; + stopped = true; + } + condition.notify_all (); + if (thread.joinable ()) + { + thread.join (); + } +} + +void nano::monitor::run () +{ + std::unique_lock lock{ mutex }; + while (!stopped) + { + run_one (); + condition.wait_until (lock, std::chrono::steady_clock::now () + config.interval, [this] { return stopped; }); + } +} + +void nano::monitor::run_one () +{ + // Node status: + // - blocks (confirmed, total) + // - blocks rate (over last 5m, peak over last 5m) + // - peers + // - stake (online, peered, trended, quorum needed) + // - elections active (normal, hinted, optimistic) + // - election stats over last 5m (confirmed, dropped) + + auto const now = std::chrono::steady_clock::now (); + auto blocks_cemented = node.ledger.cemented_count (); + auto blocks_total = node.ledger.block_count (); + + // Wait for node to warm up before logging + if (last_time != std::chrono::steady_clock::time_point{}) + { + // TODO: Maybe emphasize somehow that confirmed doesn't need to be equal to total; backlog is OK + logger.info (nano::log::type::monitor, "Blocks confirmed: {} | total: {}", + blocks_cemented, + blocks_total); + + // Calculate the rates + auto elapsed_seconds = std::chrono::duration_cast (now - last_time).count (); + auto blocks_confirmed_rate = static_cast (blocks_cemented - last_blocks_cemented) / elapsed_seconds; + auto blocks_checked_rate = static_cast (blocks_total - last_blocks_total) / elapsed_seconds; + + logger.info (nano::log::type::monitor, "Blocks rate (average over last {}s): confirmed {:.2f}/s | total {:.2f}/s", + elapsed_seconds, + blocks_confirmed_rate, + blocks_checked_rate); + + logger.info (nano::log::type::monitor, "Peers: {} (stake peered: {} | stake online: {} | quorum: {})", + node.network.size (), + nano::uint128_union{ node.rep_crawler.total_weight () }.format_balance (Mxrb_ratio, 1, true), + nano::uint128_union{ node.online_reps.online () }.format_balance (Mxrb_ratio, 1, true), + nano::uint128_union{ node.online_reps.delta () }.format_balance (Mxrb_ratio, 1, true)); + + logger.info (nano::log::type::monitor, "Elections active: {} (priority: {} | hinted: {} | optimistic: {})", + node.active.size (), + node.active.size (nano::election_behavior::priority), + node.active.size (nano::election_behavior::hinted), + node.active.size (nano::election_behavior::optimistic)); + } + + last_time = now; + last_blocks_cemented = blocks_cemented; + last_blocks_total = blocks_total; +} + +/* + * monitor_config + */ + +nano::error nano::monitor_config::serialize (nano::tomlconfig & toml) const +{ + toml.put ("enable", enabled, "Enable or disable periodic node status logging\ntype:bool"); + toml.put ("interval", interval.count (), "Interval between status logs\ntype:seconds"); + + return toml.get_error (); +} + +nano::error nano::monitor_config::deserialize (nano::tomlconfig & toml) +{ + toml.get ("enabled", enabled); + auto interval_l = interval.count (); + toml.get ("interval", interval_l); + interval = std::chrono::seconds{ interval_l }; + + return toml.get_error (); +} \ No newline at end of file diff --git a/nano/node/monitor.hpp b/nano/node/monitor.hpp new file mode 100644 index 0000000000..a76f753bbc --- /dev/null +++ b/nano/node/monitor.hpp @@ -0,0 +1,52 @@ +#pragma once + +#include +#include + +#include +#include + +using namespace std::chrono_literals; + +namespace nano +{ +class monitor_config final +{ +public: + nano::error deserialize (nano::tomlconfig &); + nano::error serialize (nano::tomlconfig &) const; + +public: + bool enabled{ true }; + std::chrono::seconds interval{ 60s }; +}; + +class monitor final +{ +public: + monitor (monitor_config const &, nano::node &); + ~monitor (); + + void start (); + void stop (); + +private: // Dependencies + monitor_config const & config; + nano::node & node; + nano::logger & logger; + +private: + void run (); + void run_one (); + + std::chrono::steady_clock::time_point last_time{}; + + size_t last_blocks_cemented{ 0 }; + size_t last_blocks_total{ 0 }; + + bool stopped{ false }; + nano::condition_variable condition; + mutable nano::mutex mutex; + std::thread thread; +}; +} \ No newline at end of file diff --git a/nano/node/node.cpp b/nano/node/node.cpp index 109de8c102..8abc0ac89a 100644 --- a/nano/node/node.cpp +++ b/nano/node/node.cpp @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include @@ -223,6 +224,8 @@ nano::node::node (std::shared_ptr io_ctx_a, std::filesy process_live_dispatcher{ ledger, scheduler.priority, vote_cache, websocket }, peer_history_impl{ std::make_unique (config.peer_history, store, network, logger, stats) }, peer_history{ *peer_history_impl }, + monitor_impl{ std::make_unique (config.monitor, *this) }, + monitor{ *monitor_impl }, startup_time (std::chrono::steady_clock::now ()), node_seq (seq) { @@ -721,6 +724,7 @@ void nano::node::start () local_block_broadcaster.start (); peer_history.start (); vote_router.start (); + monitor.start (); add_initial_peers (); } @@ -770,6 +774,7 @@ void nano::node::stop () local_block_broadcaster.stop (); message_processor.stop (); network.stop (); // Stop network last to avoid killing in-use sockets + monitor.stop (); // work pool is not stopped on purpose due to testing setup diff --git a/nano/node/node.hpp b/nano/node/node.hpp index 10494aa83d..6b8d2eec94 100644 --- a/nano/node/node.hpp +++ b/nano/node/node.hpp @@ -45,6 +45,7 @@ namespace nano class active_elections; class confirming_set; class message_processor; +class monitor; class node; class vote_processor; class vote_cache_processor; @@ -216,7 +217,10 @@ class node final : public std::enable_shared_from_this nano::process_live_dispatcher process_live_dispatcher; std::unique_ptr peer_history_impl; nano::peer_history & peer_history; + std::unique_ptr monitor_impl; + nano::monitor & monitor; +public: std::chrono::steady_clock::time_point const startup_time; std::chrono::seconds unchecked_cutoff = std::chrono::seconds (7 * 24 * 60 * 60); // Week std::atomic unresponsive_work_peers{ false }; diff --git a/nano/node/nodeconfig.cpp b/nano/node/nodeconfig.cpp index 0eef9a5674..10adbddeaa 100644 --- a/nano/node/nodeconfig.cpp +++ b/nano/node/nodeconfig.cpp @@ -249,6 +249,10 @@ nano::error nano::node_config::serialize_toml (nano::tomlconfig & toml) const message_processor.serialize (message_processor_l); toml.put_child ("message_processor", message_processor_l); + nano::tomlconfig monitor_l; + monitor.serialize (monitor_l); + toml.put_child ("monitor", monitor_l); + return toml.get_error (); } @@ -366,6 +370,12 @@ nano::error nano::node_config::deserialize_toml (nano::tomlconfig & toml) message_processor.deserialize (config_l); } + if (toml.has_key ("monitor")) + { + auto config_l = toml.get_required_child ("monitor"); + monitor.deserialize (config_l); + } + if (toml.has_key ("work_peers")) { work_peers.clear (); diff --git a/nano/node/nodeconfig.hpp b/nano/node/nodeconfig.hpp index a71953b690..8cada5ae08 100644 --- a/nano/node/nodeconfig.hpp +++ b/nano/node/nodeconfig.hpp @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -154,6 +155,7 @@ class node_config nano::message_processor_config message_processor; nano::network_config network; nano::local_block_broadcaster_config local_block_broadcaster; + nano::monitor_config monitor; public: std::string serialize_frontiers_confirmation (nano::frontiers_confirmation_mode) const;