From 078e1f0dbde62eadc12fa324146c14cfaae9574c Mon Sep 17 00:00:00 2001 From: Rain Date: Thu, 12 Oct 2023 20:11:48 -0700 Subject: [PATCH] [update-engine] a few improvements to event buffers (#4164) * For failures, track the parent key that failed (similar to aborts). * Track the last root event index that causes data for a step to be updated -- we're going to use this in the line-based displayer. * Add tests. Depends on #4163. --- update-engine/src/buffer.rs | 349 ++++++++++++++++++++++++++++------ wicket/src/ui/panes/update.rs | 22 ++- 2 files changed, 307 insertions(+), 64 deletions(-) diff --git a/update-engine/src/buffer.rs b/update-engine/src/buffer.rs index 1779ef7da6..d1028ff8cc 100644 --- a/update-engine/src/buffer.rs +++ b/update-engine/src/buffer.rs @@ -6,6 +6,7 @@ use std::{ collections::{HashMap, VecDeque}, + fmt, time::Duration, }; @@ -96,7 +97,7 @@ impl EventBuffer { /// /// This might cause older low-priority events to fall off the list. pub fn add_step_event(&mut self, event: StepEvent) { - self.event_store.handle_step_event(event, self.max_low_priority); + self.event_store.handle_root_step_event(event, self.max_low_priority); } /// Returns the root execution ID, if this event buffer is aware of any @@ -132,7 +133,8 @@ impl EventBuffer { let mut step_events = Vec::new(); let mut progress_events = Vec::new(); for (_, step_data) in self.steps().as_slice() { - step_events.extend(step_data.step_events_since(last_seen).cloned()); + step_events + .extend(step_data.step_events_since_impl(last_seen).cloned()); progress_events .extend(step_data.step_status.progress_event().cloned()); } @@ -161,7 +163,7 @@ impl EventBuffer { /// have been reported before a sender shuts down. pub fn has_pending_events_since(&self, last_seen: Option) -> bool { for (_, step_data) in self.steps().as_slice() { - if step_data.step_events_since(last_seen).next().is_some() { + if step_data.step_events_since_impl(last_seen).next().is_some() { return true; } } @@ -223,8 +225,8 @@ impl EventStore { }) } - /// Handles a step event. - fn handle_step_event( + /// Handles a non-nested step event. + fn handle_root_step_event( &mut self, event: StepEvent, max_low_priority: usize, @@ -234,12 +236,17 @@ impl EventStore { return; } + // This is a non-nested step event so the event index is a root event + // index. + let root_event_index = RootEventIndex(event.event_index); + let actions = - self.recurse_for_step_event(&event, 0, None, event.event_index); + self.recurse_for_step_event(&event, 0, None, root_event_index); if let Some(new_execution) = actions.new_execution { if new_execution.nest_level == 0 { self.root_execution_id = Some(new_execution.execution_id); } + let total_steps = new_execution.steps_to_add.len(); for (new_step_key, new_step, sort_key) in new_execution.steps_to_add { // These are brand new steps so their keys shouldn't exist in the @@ -249,6 +256,8 @@ impl EventStore { new_step, sort_key, new_execution.nest_level, + total_steps, + root_event_index, ) }); } @@ -302,7 +311,7 @@ impl EventStore { event: &StepEvent, nest_level: usize, parent_sort_key: Option<&StepSortKey>, - root_event_index: usize, + root_event_index: RootEventIndex, ) -> RecurseActions { let mut new_execution = None; let (step_key, progress_key) = match &event.kind { @@ -318,7 +327,7 @@ impl EventStore { }; let sort_key = StepSortKey::new( parent_sort_key, - root_event_index, + root_event_index.0, step.index, ); let step_node = self.add_step_node(step_key); @@ -360,7 +369,7 @@ impl EventStore { attempt_elapsed: *attempt_elapsed, }; // Mark this key and all child keys completed. - self.mark_step_key_completed(key, info); + self.mark_step_key_completed(key, info, root_event_index); // Register the next step in the event map. let next_key = StepKey { @@ -400,7 +409,7 @@ impl EventStore { attempt_elapsed: *attempt_elapsed, }; // Mark this key and all child keys completed. - self.mark_execution_id_completed(key, info); + self.mark_execution_id_completed(key, info, root_event_index); (Some(key), Some(key)) } @@ -426,7 +435,7 @@ impl EventStore { step_elapsed: *step_elapsed, attempt_elapsed: *attempt_elapsed, }; - self.mark_step_failed(key, info); + self.mark_step_failed(key, info, root_event_index); (Some(key), Some(key)) } @@ -450,7 +459,7 @@ impl EventStore { step_elapsed: *step_elapsed, attempt_elapsed: *attempt_elapsed, }; - self.mark_step_aborted(key, info); + self.mark_step_aborted(key, info, root_event_index); (Some(key), Some(key)) } @@ -524,11 +533,12 @@ impl EventStore { &mut self, root_key: StepKey, info: CompletionInfo, + root_event_index: RootEventIndex, ) { if let Some(value) = self.map.get_mut(&root_key) { // Completion status only applies to the root key. Nodes reachable // from this node are still marked as complete, but without status. - value.mark_completed(Some(info)); + value.mark_completed(Some(info), root_event_index); } // Mark anything reachable from this node as completed. @@ -538,7 +548,7 @@ impl EventStore { if let EventTreeNode::Step(key) = key { if key != root_key { if let Some(value) = self.map.get_mut(&key) { - value.mark_completed(None); + value.mark_completed(None, root_event_index); } } } @@ -549,10 +559,11 @@ impl EventStore { &mut self, root_key: StepKey, info: CompletionInfo, + root_event_index: RootEventIndex, ) { if let Some(value) = self.map.get_mut(&root_key) { // Completion status only applies to the root key. - value.mark_completed(Some(info)); + value.mark_completed(Some(info), root_event_index); } let mut dfs = DfsPostOrder::new( @@ -563,58 +574,87 @@ impl EventStore { if let EventTreeNode::Step(key) = key { if key != root_key { if let Some(value) = self.map.get_mut(&key) { - value.mark_completed(None); + value.mark_completed(None, root_event_index); } } } } } - fn mark_step_failed(&mut self, root_key: StepKey, info: FailureInfo) { - self.mark_step_failed_impl(root_key, |value, kind| { + fn mark_step_failed( + &mut self, + root_key: StepKey, + info: FailureInfo, + root_event_index: RootEventIndex, + ) { + self.mark_step_failed_impl(root_key, root_event_index, |value, kind| { match kind { MarkStepFailedImplKind::Root => { - value.mark_failed(Some(info.clone())); + value.mark_failed( + FailureReason::StepFailed(info.clone()), + root_event_index, + ); } MarkStepFailedImplKind::Descendant => { - value.mark_failed(None); + value.mark_failed( + FailureReason::ParentFailed { parent_step: root_key }, + root_event_index, + ); } MarkStepFailedImplKind::Future => { value.mark_will_not_be_run( WillNotBeRunReason::PreviousStepFailed { step: root_key, }, + root_event_index, ); } }; }) } - fn mark_step_aborted(&mut self, root_key: StepKey, info: AbortInfo) { - self.mark_step_failed_impl(root_key, |value, kind| { - match kind { - MarkStepFailedImplKind::Root => { - value.mark_aborted(AbortReason::StepAborted(info.clone())); - } - MarkStepFailedImplKind::Descendant => { - value.mark_aborted(AbortReason::ParentAborted { - parent_step: root_key, - }); - } - MarkStepFailedImplKind::Future => { - value.mark_will_not_be_run( - WillNotBeRunReason::PreviousStepAborted { - step: root_key, - }, - ); - } - }; - }); + fn mark_step_aborted( + &mut self, + root_key: StepKey, + info: AbortInfo, + root_event_index: RootEventIndex, + ) { + self.mark_step_failed_impl( + root_key, + root_event_index, + |value, kind| { + match kind { + MarkStepFailedImplKind::Root => { + value.mark_aborted( + AbortReason::StepAborted(info.clone()), + root_event_index, + ); + } + MarkStepFailedImplKind::Descendant => { + value.mark_aborted( + AbortReason::ParentAborted { + parent_step: root_key, + }, + root_event_index, + ); + } + MarkStepFailedImplKind::Future => { + value.mark_will_not_be_run( + WillNotBeRunReason::PreviousStepAborted { + step: root_key, + }, + root_event_index, + ); + } + }; + }, + ); } fn mark_step_failed_impl( &mut self, root_key: StepKey, + root_event_index: RootEventIndex, mut cb: impl FnMut(&mut EventBufferStepData, MarkStepFailedImplKind), ) { if let Some(value) = self.map.get_mut(&root_key) { @@ -627,7 +667,7 @@ impl EventStore { for index in 0..root_key.index { let key = StepKey { execution_id: root_key.execution_id, index }; if let Some(value) = self.map.get_mut(&key) { - value.mark_completed(None); + value.mark_completed(None, root_event_index); } } @@ -744,10 +784,17 @@ impl<'buf, S: StepSpec> EventBufferSteps<'buf, S> { pub struct EventBufferStepData { step_info: StepInfo, sort_key: StepSortKey, + // XXX: nest_level and total_steps are common to each execution, but are + // stored separately here. Should we store them in a separate map + // indexed by execution ID? nest_level: usize, - // Invariant: stored in order sorted by event_index. + total_steps: usize, + // Invariant: stored in order sorted by leaf event index. high_priority: Vec>, step_status: StepStatus, + // The last root event index that caused the data within this step to be + // updated. + last_root_event_index: RootEventIndex, } impl EventBufferStepData { @@ -755,36 +802,65 @@ impl EventBufferStepData { step_info: StepInfo, sort_key: StepSortKey, nest_level: usize, + total_steps: usize, + root_event_index: RootEventIndex, ) -> Self { Self { step_info, sort_key, nest_level, + total_steps, high_priority: Vec::new(), step_status: StepStatus::NotStarted, + last_root_event_index: root_event_index, } } + #[inline] pub fn step_info(&self) -> &StepInfo { &self.step_info } + #[inline] pub fn nest_level(&self) -> usize { self.nest_level } + #[inline] + pub fn total_steps(&self) -> usize { + self.total_steps + } + + #[inline] pub fn step_status(&self) -> &StepStatus { &self.step_status } + #[inline] + pub fn last_root_event_index(&self) -> RootEventIndex { + self.last_root_event_index + } + + #[inline] fn sort_key(&self) -> &StepSortKey { &self.sort_key } + /// Returns step events since the provided event index. + pub fn step_events_since( + &self, + last_seen: Option, + ) -> Vec<&StepEvent> { + let mut events: Vec<_> = + self.step_events_since_impl(last_seen).collect(); + events.sort_unstable_by_key(|event| event.event_index); + events + } + // Returns step events since the provided event index. // // Does not necessarily return results in sorted order. - fn step_events_since( + fn step_events_since_impl( &self, last_seen: Option, ) -> impl Iterator> { @@ -799,11 +875,12 @@ impl EventBufferStepData { iter.chain(iter2) } - fn add_high_priority_step_event(&mut self, event: StepEvent) { + fn add_high_priority_step_event(&mut self, root_event: StepEvent) { + let root_event_index = RootEventIndex(root_event.event_index); // Dedup by the *leaf index* in case nested reports aren't deduped // coming in. match self.high_priority.binary_search_by(|probe| { - probe.leaf_event_index().cmp(&event.leaf_event_index()) + probe.leaf_event_index().cmp(&root_event.leaf_event_index()) }) { Ok(_) => { // This is a duplicate. @@ -811,16 +888,19 @@ impl EventBufferStepData { Err(index) => { // index is typically the last element, so this should be quite // efficient. - self.high_priority.insert(index, event); + self.update_root_event_index(root_event_index); + self.high_priority.insert(index, root_event); } } } fn add_low_priority_step_event( &mut self, - event: StepEvent, + root_event: StepEvent, max_low_priority: usize, ) { + let root_event_index = RootEventIndex(root_event.event_index); + let mut updated = false; match &mut self.step_status { StepStatus::NotStarted => { unreachable!( @@ -831,7 +911,7 @@ impl EventBufferStepData { // Dedup by the *leaf index* in case nested reports aren't // deduped coming in. match low_priority.binary_search_by(|probe| { - probe.leaf_event_index().cmp(&event.leaf_event_index()) + probe.leaf_event_index().cmp(&root_event.leaf_event_index()) }) { Ok(_) => { // This is a duplicate. @@ -839,7 +919,8 @@ impl EventBufferStepData { Err(index) => { // The index is almost always at the end, so this is // efficient enough. - low_priority.insert(index, event); + low_priority.insert(index, root_event); + updated = true; } } @@ -857,12 +938,21 @@ impl EventBufferStepData { // likely duplicate events. } } + + if updated { + self.update_root_event_index(root_event_index); + } } - fn mark_completed(&mut self, status: Option) { + fn mark_completed( + &mut self, + status: Option, + root_event_index: RootEventIndex, + ) { match self.step_status { StepStatus::NotStarted | StepStatus::Running { .. } => { self.step_status = StepStatus::Completed { info: status }; + self.update_root_event_index(root_event_index); } StepStatus::Completed { .. } | StepStatus::Failed { .. } @@ -874,10 +964,15 @@ impl EventBufferStepData { } } - fn mark_failed(&mut self, info: Option) { + fn mark_failed( + &mut self, + reason: FailureReason, + root_event_index: RootEventIndex, + ) { match self.step_status { StepStatus::NotStarted | StepStatus::Running { .. } => { - self.step_status = StepStatus::Failed { info }; + self.step_status = StepStatus::Failed { reason }; + self.update_root_event_index(root_event_index); } StepStatus::Completed { .. } | StepStatus::Failed { .. } @@ -889,7 +984,11 @@ impl EventBufferStepData { } } - fn mark_aborted(&mut self, reason: AbortReason) { + fn mark_aborted( + &mut self, + reason: AbortReason, + root_event_index: RootEventIndex, + ) { match &mut self.step_status { StepStatus::NotStarted => { match reason { @@ -909,12 +1008,14 @@ impl EventBufferStepData { }; } } + self.update_root_event_index(root_event_index); } StepStatus::Running { progress_event, .. } => { self.step_status = StepStatus::Aborted { reason, last_progress: Some(progress_event.clone()), }; + self.update_root_event_index(root_event_index); } StepStatus::Completed { .. } | StepStatus::Failed { .. } @@ -926,10 +1027,15 @@ impl EventBufferStepData { } } - fn mark_will_not_be_run(&mut self, reason: WillNotBeRunReason) { + fn mark_will_not_be_run( + &mut self, + reason: WillNotBeRunReason, + root_event_index: RootEventIndex, + ) { match self.step_status { StepStatus::NotStarted => { self.step_status = StepStatus::WillNotBeRun { reason }; + self.update_root_event_index(root_event_index); } StepStatus::Running { .. } => { // This is a weird situation. We should never encounter it in @@ -966,6 +1072,15 @@ impl EventBufferStepData { } } } + + fn update_root_event_index(&mut self, root_event_index: RootEventIndex) { + debug_assert!( + root_event_index >= self.last_root_event_index, + "event index must be monotonically increasing" + ); + self.last_root_event_index = + self.last_root_event_index.max(root_event_index); + } } /// The step status as last seen by events. @@ -990,8 +1105,8 @@ pub enum StepStatus { /// The step has failed. Failed { - /// Failure information. - info: Option, + /// The reason for the failure. + reason: FailureReason, }, /// Execution was aborted while this step was running. @@ -1053,6 +1168,17 @@ pub struct CompletionInfo { pub attempt_elapsed: Duration, } +#[derive(Clone, Debug)] +pub enum FailureReason { + /// This step failed. + StepFailed(FailureInfo), + /// A parent step failed. + ParentFailed { + /// The parent step that failed. + parent_step: StepKey, + }, +} + #[derive(Clone, Debug)] pub struct FailureInfo { pub total_attempts: usize, @@ -1230,12 +1356,24 @@ pub struct StepKey { pub index: usize, } +/// A newtype to track root event indexes within [`EventBuffer`]s, to ensure +/// that we aren't mixing them with leaf event indexes in this code. +#[derive(Clone, Copy, Debug, Eq, PartialEq, PartialOrd, Ord, Hash)] +pub struct RootEventIndex(pub usize); + +impl fmt::Display for RootEventIndex { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.0) + } +} + #[cfg(test)] mod tests { use std::collections::HashSet; use anyhow::{bail, ensure, Context}; use futures::StreamExt; + use indexmap::IndexSet; use omicron_test_utils::dev::test_setup_log; use serde::{de::IntoDeserializer, Deserialize}; use tokio::sync::mpsc; @@ -1546,6 +1684,18 @@ mod tests { reported_step_events.extend(report.step_events); last_seen = report.last_seen; + // Ensure that the last root index was updated for this + // event's corresponding steps, but not for any others. + if let Event::Step(event) = event { + check_last_root_event_index(event, &buffer) + .with_context(|| { + format!( + "{description}, at index {i} (time {time}):\ + error with last root event index" + ) + })?; + } + // Call last_seen without feeding a new event in to ensure that // a report with no step events is produced. let report = buffer.generate_report_since(last_seen); @@ -1625,8 +1775,7 @@ mod tests { let mut last_seen_opt = with_deltas.then_some(None); for (i, event) in self.generated_events.iter().enumerate() { - // Going to use event_added in an upcoming commit. - let _event_added = (event_fn)(&mut buffer, event); + let event_added = (event_fn)(&mut buffer, event); let report = match last_seen_opt { Some(last_seen) => buffer.generate_report_since(last_seen), @@ -1646,6 +1795,18 @@ mod tests { }) .unwrap(); + if let Event::Step(event) = event { + if event_added { + check_last_root_event_index(event, &buffer) + .with_context(|| { + format!( + "{description}, at index {i}: \ + error with last root event index" + ) + })?; + } + } + receive_buffer.add_event_report(report.clone()); let this_step_events = receive_buffer.generate_report().step_events; @@ -1832,6 +1993,78 @@ mod tests { } } + fn check_last_root_event_index( + event: &StepEvent, + buffer: &EventBuffer, + ) -> anyhow::Result<()> { + let root_event_index = RootEventIndex(event.event_index); + let event_step_keys = step_keys(event); + let steps = buffer.steps(); + for (step_key, data) in steps.as_slice() { + let data_index = data.last_root_event_index(); + if event_step_keys.contains(step_key) { + ensure!( + data_index == root_event_index, + "last_root_event_index should have been updated \ + but wasn't (actual: {data_index}, expected: {root_event_index}) \ + for step {step_key:?} (event: {event:?})", + ); + } else { + ensure!( + data_index < root_event_index, + "last_root_event_index should *not* have been updated \ + but was (current: {data_index}, new: {root_event_index}) \ + for step {step_key:?} (event: {event:?})", + ); + } + } + + Ok(()) + } + + /// Returns the step keys that this step event would cause updates against, + /// in order from root to leaf. + fn step_keys(event: &StepEvent) -> IndexSet { + let mut out = IndexSet::new(); + step_keys_impl(event, &mut out); + out + } + + fn step_keys_impl( + event: &StepEvent, + out: &mut IndexSet, + ) { + match &event.kind { + StepEventKind::NoStepsDefined | StepEventKind::Unknown => {} + StepEventKind::ExecutionStarted { steps, .. } => { + for step in steps { + out.insert(StepKey { + execution_id: event.execution_id, + index: step.index, + }); + } + } + StepEventKind::ProgressReset { step, .. } + | StepEventKind::AttemptRetry { step, .. } + | StepEventKind::StepCompleted { step, .. } + | StepEventKind::ExecutionCompleted { last_step: step, .. } + | StepEventKind::ExecutionFailed { failed_step: step, .. } + | StepEventKind::ExecutionAborted { aborted_step: step, .. } => { + out.insert(StepKey { + execution_id: event.execution_id, + index: step.info.index, + }); + } + StepEventKind::Nested { step, event, .. } => { + out.insert(StepKey { + execution_id: event.execution_id, + index: step.info.index, + }); + step_keys_impl(event, out); + } + } + } + #[derive(Copy, Clone, Debug)] #[allow(unused)] enum WithDeltas { diff --git a/wicket/src/ui/panes/update.rs b/wicket/src/ui/panes/update.rs index ea68cb4a16..da6f10cf88 100644 --- a/wicket/src/ui/panes/update.rs +++ b/wicket/src/ui/panes/update.rs @@ -29,7 +29,7 @@ use ratatui::widgets::{ use slog::{info, o, Logger}; use tui_tree_widget::{Tree, TreeItem, TreeState}; use update_engine::{ - AbortReason, ExecutionStatus, StepKey, WillNotBeRunReason, + AbortReason, ExecutionStatus, FailureReason, StepKey, WillNotBeRunReason, }; use wicket_common::update_events::{ EventBuffer, EventReport, ProgressEvent, StepOutcome, StepStatus, @@ -340,7 +340,7 @@ impl UpdatePane { Span::styled("Completed", style::successful_update_bold()), ])); } - StepStatus::Failed { info: Some(info) } => { + StepStatus::Failed { reason: FailureReason::StepFailed(info) } => { let mut spans = vec![ Span::styled("Status: ", style::selected()), Span::styled("Failed", style::failed_update_bold()), @@ -381,13 +381,23 @@ impl UpdatePane { } } } - StepStatus::Failed { info: None } => { - // No information is available, so all we can do is say that - // this step failed. - let spans = vec![ + StepStatus::Failed { + reason: FailureReason::ParentFailed { parent_step }, + } => { + let mut spans = vec![ Span::styled("Status: ", style::selected()), Span::styled("Failed", style::failed_update_bold()), ]; + if let Some(value) = id_state.event_buffer.get(parent_step) { + spans.push(Span::styled( + " at parent step ", + style::plain_text(), + )); + spans.push(Span::styled( + value.step_info().description.as_ref(), + style::selected(), + )); + } body.lines.push(Line::from(spans)); } StepStatus::Aborted {