From 667b60fb509aaba865b1c846cc5b70caff7a22a2 Mon Sep 17 00:00:00 2001 From: WenyXu Date: Sat, 2 Dec 2023 03:35:04 +0000 Subject: [PATCH] chore: add logs and metrics --- src/datanode/src/alive_keeper.rs | 6 ++++- src/datanode/src/heartbeat.rs | 2 +- .../src/handler/region_lease_handler.rs | 9 ++++++++ src/meta-srv/src/metrics.rs | 16 ++++++++++++++ src/meta-srv/src/region/lease_keeper.rs | 22 +++++++++++++++++-- src/meta-srv/src/service/heartbeat.rs | 5 ++++- 6 files changed, 55 insertions(+), 5 deletions(-) diff --git a/src/datanode/src/alive_keeper.rs b/src/datanode/src/alive_keeper.rs index d8b0b7c50d67..e8bb653d6968 100644 --- a/src/datanode/src/alive_keeper.rs +++ b/src/datanode/src/alive_keeper.rs @@ -129,8 +129,12 @@ impl RegionAliveKeeper { let (role, region_id) = (region.role().into(), RegionId::from(region.region_id)); if let Some(handle) = self.find_handle(region_id).await { handle.reset_deadline(role, deadline).await; + } else { + warn!( + "Trying to renew the lease for region {region_id}, the keeper handler is not found!" + ); + // Else the region alive keeper might be triggered by lagging messages, we can safely ignore it. } - // Else the region alive keeper might be triggered by lagging messages, we can safely ignore it. } } diff --git a/src/datanode/src/heartbeat.rs b/src/datanode/src/heartbeat.rs index 4e2242288c05..1764662930a1 100644 --- a/src/datanode/src/heartbeat.rs +++ b/src/datanode/src/heartbeat.rs @@ -72,9 +72,9 @@ impl HeartbeatTask { opts.heartbeat.interval.as_millis() as u64, )); let resp_handler_executor = Arc::new(HandlerGroupExecutor::new(vec![ + region_alive_keeper.clone(), Arc::new(ParseMailboxMessageHandler), Arc::new(RegionHeartbeatResponseHandler::new(region_server.clone())), - region_alive_keeper.clone(), ])); Ok(Self { diff --git a/src/meta-srv/src/handler/region_lease_handler.rs b/src/meta-srv/src/handler/region_lease_handler.rs index 67fcba4519e4..9402adc61461 100644 --- a/src/meta-srv/src/handler/region_lease_handler.rs +++ b/src/meta-srv/src/handler/region_lease_handler.rs @@ -18,6 +18,7 @@ use std::sync::Arc; use api::v1::meta::{HeartbeatRequest, RegionLease, Role}; use async_trait::async_trait; use common_meta::key::TableMetadataManagerRef; +use common_telemetry::info; use store_api::region_engine::{GrantedRegion, RegionRole}; use store_api::storage::RegionId; @@ -123,6 +124,10 @@ impl HeartbeatHandler for RegionLeaseHandler { &leaders, RegionRole::Leader, ); + info!( + "Granting region lease, found closable leader regions: {:?} on datanode {}", + closable, datanode_id + ); inactive_regions.extend(closable); let followers = followers.into_iter().flatten().collect::>(); @@ -144,6 +149,10 @@ impl HeartbeatHandler for RegionLeaseHandler { &followers, RegionRole::Follower, ); + info!( + "Granting region lease, found closable follower regions {:?} on datanode {}", + closable, datanode_id + ); inactive_regions.extend(closable); acc.inactive_region_ids = inactive_regions; diff --git a/src/meta-srv/src/metrics.rs b/src/meta-srv/src/metrics.rs index 8c73c9a7c171..8949149ba558 100644 --- a/src/meta-srv/src/metrics.rs +++ b/src/meta-srv/src/metrics.rs @@ -34,4 +34,20 @@ lazy_static! { pub static ref METRIC_META_LEADER_CACHED_KV_LOAD: HistogramVec = register_histogram_vec!("meta_leader_cache_kv_load", "meta load cache", &["prefix"]) .unwrap(); + pub static ref METRIC_META_LOAD_FOLLOWER_METADATA: Histogram = register_histogram!( + "meta_load_follower_metadata", + "meta load follower regions metadata elapsed" + ) + .unwrap(); + pub static ref METRIC_META_LOAD_LEADER_METADATA: Histogram = register_histogram!( + "meta_load_leader_metadata", + "meta load leader regions metadata elapsed" + ) + .unwrap(); + pub static ref METRIC_META_HANDLE_HEARTBEAT: HistogramVec = register_histogram_vec!( + "meta_handle_heartbeat", + "meta handle heartbeat elapsed", + &["pusher"] + ) + .unwrap(); } diff --git a/src/meta-srv/src/region/lease_keeper.rs b/src/meta-srv/src/region/lease_keeper.rs index f9471f7e07e1..06bc4cf382d2 100644 --- a/src/meta-srv/src/region/lease_keeper.rs +++ b/src/meta-srv/src/region/lease_keeper.rs @@ -21,11 +21,13 @@ use std::sync::{Arc, RwLock}; use common_meta::key::table_route::TableRouteValue; use common_meta::key::TableMetadataManagerRef; use common_meta::DatanodeId; +use common_telemetry::warn; use snafu::ResultExt; use store_api::storage::{RegionId, TableId}; use self::mito::find_staled_leader_regions; use crate::error::{self, Result}; +use crate::metrics; use crate::region::lease_keeper::utils::find_staled_follower_regions; pub type RegionLeaseKeeperRef = Arc; @@ -89,7 +91,11 @@ impl RegionLeaseKeeper { ) -> Result<(HashSet, HashSet)> { let tables = self.collect_tables(datanode_regions); let table_ids = tables.keys().copied().collect::>(); - let metadata_subset = self.collect_tables_metadata(&table_ids).await?; + + let metadata_subset = { + let _timer = metrics::METRIC_META_LOAD_LEADER_METADATA.start_timer(); + self.collect_tables_metadata(&table_ids).await? + }; let mut closable_set = HashSet::new(); let mut downgradable_set = HashSet::new(); @@ -104,6 +110,10 @@ impl RegionLeaseKeeper { downgradable_set.extend(downgradable); closable_set.extend(closable); } else { + warn!( + "The table {} metadata is not found, appends closable leader regions: {:?}", + table_id, regions + ); // If table metadata is not found. closable_set.extend(regions); } @@ -128,7 +138,11 @@ impl RegionLeaseKeeper { ) -> Result<(HashSet, HashSet)> { let tables = self.collect_tables(datanode_regions); let table_ids = tables.keys().copied().collect::>(); - let metadata_subset = self.collect_tables_metadata(&table_ids).await?; + + let metadata_subset = { + let _timer = metrics::METRIC_META_LOAD_FOLLOWER_METADATA.start_timer(); + self.collect_tables_metadata(&table_ids).await? + }; let mut upgradable_set = HashSet::new(); let mut closable_set = HashSet::new(); @@ -143,6 +157,10 @@ impl RegionLeaseKeeper { upgradable_set.extend(upgradable); closable_set.extend(closable); } else { + warn!( + "The table {} metadata is not found, appends closable followers regions: {:?}", + table_id, regions + ); // If table metadata is not found. closable_set.extend(regions); } diff --git a/src/meta-srv/src/service/heartbeat.rs b/src/meta-srv/src/service/heartbeat.rs index 2e5d82ac37f1..e4a7e08e7478 100644 --- a/src/meta-srv/src/service/heartbeat.rs +++ b/src/meta-srv/src/service/heartbeat.rs @@ -27,11 +27,11 @@ use tokio::sync::mpsc; use tokio_stream::wrappers::ReceiverStream; use tonic::{Request, Response, Streaming}; -use crate::error; use crate::error::Result; use crate::handler::Pusher; use crate::metasrv::{Context, MetaSrv}; use crate::service::{GrpcResult, GrpcStream}; +use crate::{error, metrics}; #[async_trait::async_trait] impl heartbeat_server::Heartbeat for MetaSrv { @@ -69,6 +69,9 @@ impl heartbeat_server::Heartbeat for MetaSrv { handler_group.register(&key, pusher).await; pusher_key = Some(key); } + // Safety: must exist. + let _timer = metrics::METRIC_META_HANDLE_HEARTBEAT + .with_label_values(&[pusher_key.as_ref().unwrap()]); let res = handler_group .handle(req, ctx.clone())