Skip to content

Commit

Permalink
do not let failure list grow without bound
Browse files Browse the repository at this point in the history
  • Loading branch information
davepacheco committed Jul 3, 2024
1 parent d02316a commit 34a814d
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 5 deletions.
2 changes: 1 addition & 1 deletion nexus/db-queries/src/db/saga_recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ use std::collections::BTreeSet;
use std::sync::Arc;
use steno::SagaId;

/// Describes the result [`recover`]
/// Describes the result of [`recover()`]
pub struct SagasRecovered {
recovered: BTreeMap<SagaId, BoxFuture<'static, Result<(), Error>>>,
skipped: BTreeSet<SagaId>,
Expand Down
15 changes: 11 additions & 4 deletions nexus/src/app/background/tasks/saga_recovery.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,14 @@ use nexus_db_queries::db::DataStore;
use serde::Serialize;
use slog_error_chain::InlineErrorChain;
use std::collections::BTreeMap;
use std::collections::VecDeque;
use std::sync::Arc;
use steno::SagaId;
use uuid::Uuid;

/// Maximum number of recent failures to keep track of for debugging
const N_FAILED_SAGA_HISTORY: usize = 16;

/// Background task that recovers sagas assigned to this Nexus
///
/// Normally, this task only does anything of note once, when Nexus starts up.
Expand All @@ -39,7 +43,7 @@ pub struct SagaRecovery {
registry: Arc<ActionRegistry>,

sagas_recovered: BTreeMap<SagaId, DateTime<Utc>>,
recent_failures: Vec<RecoveryFailure>,
recent_failures: VecDeque<RecoveryFailure>,
last_pass: LastPass,
}

Expand All @@ -48,7 +52,7 @@ pub struct SagaRecovery {
#[derive(Clone, Serialize)]
pub struct SagaRecoveryTaskStatus {
all_recovered: BTreeMap<SagaId, DateTime<Utc>>,
recent_failures: Vec<RecoveryFailure>,
recent_failures: VecDeque<RecoveryFailure>,
last_pass: LastPass,
}

Expand Down Expand Up @@ -85,7 +89,7 @@ impl SagaRecovery {
sec,
registry,
sagas_recovered: BTreeMap::new(),
recent_failures: Vec::new(),
recent_failures: VecDeque::with_capacity(N_FAILED_SAGA_HISTORY),
last_pass: LastPass::NeverStarted,
}
}
Expand Down Expand Up @@ -149,7 +153,10 @@ impl BackgroundTask for SagaRecovery {
}

for (saga_id, error) in ok.iter_failed() {
self.recent_failures.push(RecoveryFailure {
if self.recent_failures.len() == N_FAILED_SAGA_HISTORY {
let _ = self.recent_failures.pop_front();
}
self.recent_failures.push_back(RecoveryFailure {
time: now,
saga_id,
message: InlineErrorChain::new(error).to_string(),
Expand Down

0 comments on commit 34a814d

Please sign in to comment.