diff --git a/src/v/cluster/health_monitor_backend.cc b/src/v/cluster/health_monitor_backend.cc index 0ad5f74b9681..88ef8b740085 100644 --- a/src/v/cluster/health_monitor_backend.cc +++ b/src/v/cluster/health_monitor_backend.cc @@ -295,7 +295,7 @@ health_monitor_backend::get_cluster_health( } ss::future -health_monitor_backend::get_cluster_disk_health( +health_monitor_backend::get_cluster_data_disk_health( force_refresh refresh, model::timeout_clock::time_point deadline) { auto ec = co_await maybe_refresh_cluster_health(refresh, deadline); if (ec) { @@ -308,7 +308,7 @@ health_monitor_backend::get_cluster_disk_health( // operate, I guess. co_return storage::disk_space_alert::ok; } - co_return _reports_disk_health; + co_return _reports_data_disk_health; } ss::future @@ -444,8 +444,8 @@ ss::future health_monitor_backend::collect_cluster_health() { auto old_reports = std::exchange(_reports, {}); - // update nodes reports and cache cluster-level disk health - storage::disk_space_alert cluster_disk_health + // update nodes reports and cache cluster-level data disk health + storage::disk_space_alert cluster_data_disk_health = storage::disk_space_alert::ok; for (auto& r : reports) { if (r) { @@ -471,14 +471,14 @@ ss::future health_monitor_backend::collect_cluster_health() { for (auto& cb : _node_callbacks) { cb.second(r.value(), old_report); } - cluster_disk_health = storage::max_severity( - r.value().local_state.get_disk_alert(), cluster_disk_health); + cluster_data_disk_health = storage::max_severity( + r.value().local_state.data_disk.alert, cluster_data_disk_health); _reports.emplace( id, ss::make_lw_shared(std::move(r.value()))); } } - _reports_disk_health = cluster_disk_health; + _reports_data_disk_health = cluster_data_disk_health; if (config::shard_local_cfg().enable_usage()) { vlog(clusterlog.info, "collecting cloud health statistics"); diff --git a/src/v/cluster/health_monitor_backend.h b/src/v/cluster/health_monitor_backend.h index 470178deb9f4..dc55fb82e0f9 100644 --- a/src/v/cluster/health_monitor_backend.h +++ b/src/v/cluster/health_monitor_backend.h @@ -65,7 +65,7 @@ class health_monitor_backend { ss::future> get_cluster_health( cluster_report_filter, force_refresh, model::timeout_clock::time_point); - ss::future get_cluster_disk_health( + ss::future get_cluster_data_disk_health( force_refresh refresh, model::timeout_clock::time_point deadline); ss::future> collect_current_node_health(); @@ -182,7 +182,7 @@ class health_monitor_backend { status_cache_t _status; report_cache_t _reports; - storage::disk_space_alert _reports_disk_health + storage::disk_space_alert _reports_data_disk_health = storage::disk_space_alert::ok; std::optional _bytes_in_cloud_storage; diff --git a/src/v/cluster/health_monitor_frontend.cc b/src/v/cluster/health_monitor_frontend.cc index 756109975c00..f5906bc5743c 100644 --- a/src/v/cluster/health_monitor_frontend.cc +++ b/src/v/cluster/health_monitor_frontend.cc @@ -60,8 +60,9 @@ health_monitor_frontend::get_cluster_health( }); } -storage::disk_space_alert health_monitor_frontend::get_cluster_disk_health() { - return _cluster_disk_health; +storage::disk_space_alert +health_monitor_frontend::get_cluster_data_disk_health() { + return _cluster_data_disk_health; } /** @@ -100,23 +101,24 @@ health_monitor_frontend::get_cluster_health_overview( ss::future<> health_monitor_frontend::update_other_shards( const storage::disk_space_alert dsa) { - co_await container().invoke_on_others( - [dsa](health_monitor_frontend& fe) { fe._cluster_disk_health = dsa; }); + co_await container().invoke_on_others([dsa](health_monitor_frontend& fe) { + fe._cluster_data_disk_health = dsa; + }); } ss::future<> health_monitor_frontend::update_frontend_and_backend_cache() { auto deadline = model::time_from_now(default_timeout); auto disk_health = co_await dispatch_to_backend( [deadline](health_monitor_backend& be) { - return be.get_cluster_disk_health(force_refresh::no, deadline); + return be.get_cluster_data_disk_health(force_refresh::no, deadline); }); - if (disk_health != _cluster_disk_health) { + if (disk_health != _cluster_data_disk_health) { vlog( clusterlog.debug, - "Update disk health cache {} -> {}", - _cluster_disk_health, + "Update data disk health cache {} -> {}", + _cluster_data_disk_health, disk_health); - _cluster_disk_health = disk_health; + _cluster_data_disk_health = disk_health; co_await update_other_shards(disk_health); } } diff --git a/src/v/cluster/health_monitor_frontend.h b/src/v/cluster/health_monitor_frontend.h index 7ea0df709bc7..018200b5f266 100644 --- a/src/v/cluster/health_monitor_frontend.h +++ b/src/v/cluster/health_monitor_frontend.h @@ -57,7 +57,7 @@ class health_monitor_frontend ss::future> get_cluster_health( cluster_report_filter, force_refresh, model::timeout_clock::time_point); - storage::disk_space_alert get_cluster_disk_health(); + storage::disk_space_alert get_cluster_data_disk_health(); // Collects or return cached version of current node health report. ss::future> get_current_node_health(); @@ -101,7 +101,7 @@ class health_monitor_frontend config::binding _alive_timeout; // Currently the worst / max of all nodes' disk space state - storage::disk_space_alert _cluster_disk_health{ + storage::disk_space_alert _cluster_data_disk_health{ storage::disk_space_alert::ok}; ss::timer _refresh_timer; ss::gate _refresh_gate; diff --git a/src/v/cluster/metadata_cache.cc b/src/v/cluster/metadata_cache.cc index 034583c438cc..c49ceb6d2742 100644 --- a/src/v/cluster/metadata_cache.cc +++ b/src/v/cluster/metadata_cache.cc @@ -149,7 +149,7 @@ std::vector metadata_cache::node_ids() const { } bool metadata_cache::should_reject_writes() const { - return _health_monitor.local().get_cluster_disk_health() + return _health_monitor.local().get_cluster_data_disk_health() == storage::disk_space_alert::degraded; } diff --git a/tests/rptest/tests/full_disk_test.py b/tests/rptest/tests/full_disk_test.py index 79397282a1bf..19954d56a21e 100644 --- a/tests/rptest/tests/full_disk_test.py +++ b/tests/rptest/tests/full_disk_test.py @@ -147,7 +147,7 @@ def check_health_monitor_frontend(disk_space_change: str): # Looking for a log statement about a change in disk space. # This is a check for the health monitor frontend because # that structure logs disk space alerts. - pattern = f"Update disk health cache {disk_space_change}" + pattern = f"Update data disk health cache {disk_space_change}" wait_until( lambda: self.redpanda.search_log_any(pattern), timeout_sec=5,