Skip to content

Commit

Permalink
raftstore: calculate the slow score by considering individual disk pe…
Browse files Browse the repository at this point in the history
…rformance factors (tikv#17801) (tikv#17912)

close tikv#17884

This pr introduces an extra and individual inspector to detect whether there exists I/O hung issues on kvdb disk, if
the kvdb is deployed with a separate mount path.

Signed-off-by: ti-chi-bot <[email protected]>
Signed-off-by: lucasliang <[email protected]>

Co-authored-by: lucasliang <[email protected]>
  • Loading branch information
ti-chi-bot and LykxSassinator authored Dec 2, 2024
1 parent eb66485 commit 99279b8
Show file tree
Hide file tree
Showing 24 changed files with 712 additions and 167 deletions.
2 changes: 2 additions & 0 deletions components/health_controller/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
//! that are specific to different modules, increasing the complexity and
//! possibility to misuse of `HealthController`.
#![feature(div_duration)]

pub mod reporters;
pub mod slow_score;
pub mod trend;
Expand Down
104 changes: 87 additions & 17 deletions components/health_controller/src/reporters.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use prometheus::IntGauge;
use crate::{
slow_score::{SlowScore, SlowScoreTickResult},
trend::{RequestPerSecRecorder, Trend},
types::InspectFactor,
HealthController, HealthControllerInner, RaftstoreDuration,
};

Expand All @@ -27,6 +28,7 @@ pub struct RaftstoreReporterConfig {
/// worker) is expected to tick it. But the interval is necessary in
/// some internal calculations.
pub inspect_interval: Duration,
pub inspect_kvdb_interval: Duration,

pub unsensitive_cause: f64,
pub unsensitive_result: f64,
Expand All @@ -43,9 +45,72 @@ pub struct RaftstoreReporterConfig {
pub result_l2_gap_gauges: IntGauge,
}

/// A unified slow score that combines multiple slow scores.
///
/// It calculates the final slow score of a store by picking the maximum
/// score among multiple factors. Each factor represents a different aspect of
/// the store's performance. Typically, we have two factors: Raft Disk I/O and
/// KvDB Disk I/O. If there are more factors in the future, we can add them
/// here.
#[derive(Default)]
pub struct UnifiedSlowScore {
factors: Vec<SlowScore>,
}

impl UnifiedSlowScore {
pub fn new(cfg: &RaftstoreReporterConfig) -> Self {
let mut unified_slow_score = UnifiedSlowScore::default();
// The first factor is for Raft Disk I/O.
unified_slow_score
.factors
.push(SlowScore::new(cfg.inspect_interval));
// The second factor is for KvDB Disk I/O.
unified_slow_score
.factors
.push(SlowScore::new_with_extra_config(
cfg.inspect_kvdb_interval,
0.6,
));
unified_slow_score
}

#[inline]
pub fn record(
&mut self,
id: u64,
factor: InspectFactor,
duration: &RaftstoreDuration,
not_busy: bool,
) {
self.factors[factor as usize].record(id, duration.delays_on_disk_io(false), not_busy);
}

#[inline]
pub fn get(&self, factor: InspectFactor) -> &SlowScore {
&self.factors[factor as usize]
}

#[inline]
pub fn get_mut(&mut self, factor: InspectFactor) -> &mut SlowScore {
&mut self.factors[factor as usize]
}

// Returns the maximum score of all factors.
pub fn get_score(&self) -> f64 {
self.factors
.iter()
.map(|factor| factor.get())
.fold(1.0, f64::max)
}

pub fn last_tick_finished(&self) -> bool {
self.factors.iter().all(SlowScore::last_tick_finished)
}
}

pub struct RaftstoreReporter {
health_controller_inner: Arc<HealthControllerInner>,
slow_score: SlowScore,
slow_score: UnifiedSlowScore,
slow_trend: SlowTrendStatistics,
is_healthy: bool,
}
Expand All @@ -56,18 +121,14 @@ impl RaftstoreReporter {
pub fn new(health_controller: &HealthController, cfg: RaftstoreReporterConfig) -> Self {
Self {
health_controller_inner: health_controller.inner.clone(),
slow_score: SlowScore::new(cfg.inspect_interval),
slow_score: UnifiedSlowScore::new(&cfg),
slow_trend: SlowTrendStatistics::new(cfg),
is_healthy: true,
}
}

pub fn get_tick_interval(&self) -> Duration {
self.slow_score.get_inspect_interval()
}

pub fn get_slow_score(&self) -> f64 {
self.slow_score.get()
self.slow_score.get_score()
}

pub fn get_slow_trend(&self) -> &SlowTrendStatistics {
Expand All @@ -77,17 +138,18 @@ impl RaftstoreReporter {
pub fn record_raftstore_duration(
&mut self,
id: u64,
factor: InspectFactor,
duration: RaftstoreDuration,
store_not_busy: bool,
) {
// Fine-tuned, `SlowScore` only takes the I/O jitters on the disk into account.
self.slow_score
.record(id, duration.delays_on_disk_io(false), store_not_busy);
.record(id, factor, &duration, store_not_busy);
self.slow_trend.record(duration);

// Publish slow score to health controller
self.health_controller_inner
.update_raftstore_slow_score(self.slow_score.get());
.update_raftstore_slow_score(self.slow_score.get_score());
}

fn is_healthy(&self) -> bool {
Expand All @@ -109,34 +171,42 @@ impl RaftstoreReporter {
}
}

pub fn tick(&mut self, store_maybe_busy: bool) -> SlowScoreTickResult {
pub fn tick(&mut self, store_maybe_busy: bool, factor: InspectFactor) -> SlowScoreTickResult {
// Record a fairly great value when timeout
self.slow_trend.slow_cause.record(500_000, Instant::now());

// healthy: The health status of the current store.
// all_ticks_finished: The last tick of all factors is finished.
// factor_tick_finished: The last tick of the current factor is finished.
let (healthy, all_ticks_finished, factor_tick_finished) = (
self.is_healthy(),
self.slow_score.last_tick_finished(),
self.slow_score.get(factor).last_tick_finished(),
);
// The health status is recovered to serving as long as any tick
// does not timeout.
if !self.is_healthy() && self.slow_score.last_tick_finished() {
if !healthy && all_ticks_finished {
self.set_is_healthy(true);
}
if !self.slow_score.last_tick_finished() {
if !all_ticks_finished {
// If the last tick is not finished, it means that the current store might
// be busy on handling requests or delayed on I/O operations. And only when
// the current store is not busy, it should record the last_tick as a timeout.
if !store_maybe_busy {
self.slow_score.record_timeout();
if !store_maybe_busy && !factor_tick_finished {
self.slow_score.get_mut(factor).record_timeout();
}
}

let slow_score_tick_result = self.slow_score.tick();
let slow_score_tick_result = self.slow_score.get_mut(factor).tick();
if slow_score_tick_result.updated_score.is_some() && !slow_score_tick_result.has_new_record
{
self.set_is_healthy(false);
}

// Publish the slow score to health controller
if let Some(slow_score_value) = slow_score_tick_result.updated_score {
if slow_score_tick_result.updated_score.is_some() {
self.health_controller_inner
.update_raftstore_slow_score(slow_score_value);
.update_raftstore_slow_score(self.slow_score.get_score());
}

slow_score_tick_result
Expand Down
79 changes: 78 additions & 1 deletion components/health_controller/src/slow_score.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,12 @@ use std::{

use ordered_float::OrderedFloat;

/// Interval for updating the slow score.
const UPDATE_INTERVALS: Duration = Duration::from_secs(10);
/// Recovery intervals for the slow score.
/// If the score has reached 100 and there is no timeout inspecting requests
/// during this interval, the score will go back to 1 after 5min.
const RECOVERY_INTERVALS: Duration = Duration::from_secs(60 * 5);
// Slow score is a value that represents the speed of a store and ranges in [1,
// 100]. It is maintained in the AIMD way.
// If there are some inspecting requests timeout during a round, by default the
Expand Down Expand Up @@ -45,7 +51,7 @@ impl SlowScore {

inspect_interval,
ratio_thresh: OrderedFloat(0.1),
min_ttr: Duration::from_secs(5 * 60),
min_ttr: RECOVERY_INTERVALS,
last_record_time: Instant::now(),
last_update_time: Instant::now(),
round_ticks: 30,
Expand All @@ -54,6 +60,29 @@ impl SlowScore {
}
}

// Only for kvdb.
pub fn new_with_extra_config(inspect_interval: Duration, timeout_ratio: f64) -> SlowScore {
SlowScore {
value: OrderedFloat(1.0),

timeout_requests: 0,
total_requests: 0,

inspect_interval,
ratio_thresh: OrderedFloat(timeout_ratio),
min_ttr: RECOVERY_INTERVALS,
last_record_time: Instant::now(),
last_update_time: Instant::now(),
// The minimal round ticks is 1 for kvdb.
round_ticks: cmp::max(
UPDATE_INTERVALS.div_duration_f64(inspect_interval) as u64,
1_u64,
),
last_tick_id: 0,
last_tick_finished: true,
}
}

pub fn record(&mut self, id: u64, duration: Duration, not_busy: bool) {
self.last_record_time = Instant::now();
if id != self.last_tick_id {
Expand Down Expand Up @@ -207,4 +236,52 @@ mod tests {
slow_score.update_impl(Duration::from_secs(57))
);
}

#[test]
fn test_slow_score_extra() {
let mut slow_score = SlowScore::new_with_extra_config(Duration::from_millis(1000), 0.6);
slow_score.timeout_requests = 1;
slow_score.total_requests = 10;
let score = slow_score.update_impl(Duration::from_secs(10));
assert!(score > OrderedFloat(1.16));
assert!(score < OrderedFloat(1.17));

slow_score.timeout_requests = 2;
slow_score.total_requests = 10;
let score = slow_score.update_impl(Duration::from_secs(10));
assert!(score > OrderedFloat(1.5));
assert!(score < OrderedFloat(1.6));

slow_score.timeout_requests = 0;
slow_score.total_requests = 100;
assert_eq!(
OrderedFloat(1.0),
slow_score.update_impl(Duration::from_secs(57))
);

slow_score.timeout_requests = 3;
slow_score.total_requests = 10;
assert_eq!(
OrderedFloat(1.5),
slow_score.update_impl(Duration::from_secs(10))
);

slow_score.timeout_requests = 6;
slow_score.total_requests = 10;
assert_eq!(
OrderedFloat(3.0),
slow_score.update_impl(Duration::from_secs(10))
);

slow_score.timeout_requests = 10;
slow_score.total_requests = 10;
assert_eq!(
OrderedFloat(6.0),
slow_score.update_impl(Duration::from_secs(10))
);

// Test too large inspect interval.
let slow_score = SlowScore::new_with_extra_config(Duration::from_secs(11), 0.1);
assert_eq!(slow_score.round_ticks, 1);
}
}
16 changes: 16 additions & 0 deletions components/health_controller/src/types.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,22 @@ impl RaftstoreDuration {
}
}

#[derive(Debug, Clone, Copy, PartialEq)]
pub enum InspectFactor {
RaftDisk = 0,
KvDisk,
// TODO: Add more factors, like network io.
}

impl InspectFactor {
pub fn as_str(&self) -> &str {
match *self {
InspectFactor::RaftDisk => "raft",
InspectFactor::KvDisk => "kvdb",
}
}
}

/// Used to inspect the latency of all stages of raftstore.
pub struct LatencyInspector {
id: u64,
Expand Down
5 changes: 3 additions & 2 deletions components/raftstore-v2/src/worker/pd/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use causal_ts::CausalTsProviderImpl;
use collections::HashMap;
use concurrency_manager::ConcurrencyManager;
use engine_traits::{KvEngine, RaftEngine, TabletRegistry};
use health_controller::types::{LatencyInspector, RaftstoreDuration};
use health_controller::types::{InspectFactor, LatencyInspector, RaftstoreDuration};
use kvproto::{metapb, pdpb};
use pd_client::{BucketStat, PdClient};
use raftstore::store::{
Expand Down Expand Up @@ -254,6 +254,7 @@ where
let mut stats_monitor = PdStatsMonitor::new(
store_heartbeat_interval / NUM_COLLECT_STORE_INFOS_PER_HEARTBEAT,
cfg.value().inspect_interval.0,
std::time::Duration::default(),
PdReporter::new(pd_scheduler, logger.clone()),
);
stats_monitor.start(auto_split_controller, collector_reg_handle)?;
Expand Down Expand Up @@ -428,7 +429,7 @@ impl StoreStatsReporter for PdReporter {
}
}

fn update_latency_stats(&self, timer_tick: u64) {
fn update_latency_stats(&self, timer_tick: u64, _factor: InspectFactor) {
// Tick slowness statistics.
{
if let Err(e) = self.scheduler.schedule(Task::TickSlownessStats) {
Expand Down
Loading

0 comments on commit 99279b8

Please sign in to comment.