diff --git a/Cargo.lock b/Cargo.lock
index f10181f752..1eea7f9154 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -9761,6 +9761,7 @@ dependencies = [
"sha3",
"sled-hardware-types",
"slog",
+ "strum",
"thiserror",
"toml 0.8.19",
"uuid",
@@ -12117,6 +12118,7 @@ dependencies = [
"schemars",
"serde",
"serde_json",
+ "sled-agent-types",
"sled-hardware-types",
"slog",
"update-engine",
diff --git a/clients/wicketd-client/Cargo.toml b/clients/wicketd-client/Cargo.toml
index 0e55acd8bb..5e52eedb49 100644
--- a/clients/wicketd-client/Cargo.toml
+++ b/clients/wicketd-client/Cargo.toml
@@ -19,6 +19,7 @@ reqwest = { workspace = true, features = ["rustls-tls", "stream"] }
schemars.workspace = true
serde.workspace = true
serde_json.workspace = true
+sled-agent-types.workspace = true
sled-hardware-types.workspace = true
slog.workspace = true
update-engine.workspace = true
diff --git a/clients/wicketd-client/src/lib.rs b/clients/wicketd-client/src/lib.rs
index 7a07ecd6a5..40b60ac612 100644
--- a/clients/wicketd-client/src/lib.rs
+++ b/clients/wicketd-client/src/lib.rs
@@ -67,6 +67,7 @@ progenitor::generate_api!(
RotSlot = wicket_common::inventory::RotSlot,
RotState = wicket_common::inventory::RotState,
RouteConfig = omicron_common::api::internal::shared::RouteConfig,
+ RssStep = sled_agent_types::rack_ops::RssStep,
SpComponentCaboose = wicket_common::inventory::SpComponentCaboose,
SpComponentInfo = wicket_common::inventory::SpComponentInfo,
SpIdentifier = wicket_common::inventory::SpIdentifier,
diff --git a/openapi/bootstrap-agent.json b/openapi/bootstrap-agent.json
index bd928001bb..6b4d2093a1 100644
--- a/openapi/bootstrap-agent.json
+++ b/openapi/bootstrap-agent.json
@@ -1070,11 +1070,15 @@
"enum": [
"initializing"
]
+ },
+ "step": {
+ "$ref": "#/components/schemas/RssStep"
}
},
"required": [
"id",
- "status"
+ "status",
+ "step"
]
},
{
@@ -1280,6 +1284,235 @@
"nexthop"
]
},
+ "RssStep": {
+ "description": "Steps we go through during initial rack setup. Keep this list in order that they happen.",
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "requested"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "starting"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "load_existing_plan"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "create_sled_plan"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "init_trust_quorum"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "network_config_update"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "sled_init"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "ensure_storage"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "init_dns"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "configure_dns"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "init_ntp"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "wait_for_time_sync"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "wait_for_database"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "cluster_init"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "zones_init"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "nexus_handoff"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ }
+ ]
+ },
"SemverVersion": {
"type": "string",
"pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$"
diff --git a/openapi/wicketd.json b/openapi/wicketd.json
index 87cfe045d3..6d17d9c071 100644
--- a/openapi/wicketd.json
+++ b/openapi/wicketd.json
@@ -2680,7 +2680,7 @@
]
},
"RackOperationStatus": {
- "description": "Current status of any rack-level operation being performed by this bootstrap agent.\n\nJSON schema
\n\n```json { \"description\": \"Current status of any rack-level operation being performed by this bootstrap agent.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initializing\" ] } } }, { \"description\": \"`id` will be none if the rack was already initialized on startup.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_panicked\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"resetting\" ] } } }, { \"description\": \"`reset_id` will be None if the rack is in an uninitialized-on-startup, or Some if it is in an uninitialized state due to a reset operation completing.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"reset_id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"uninitialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_panicked\" ] } } } ] } ``` ",
+ "description": "Current status of any rack-level operation being performed by this bootstrap agent.\n\nJSON schema
\n\n```json { \"description\": \"Current status of any rack-level operation being performed by this bootstrap agent.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"id\", \"status\", \"step\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initializing\" ] }, \"step\": { \"$ref\": \"#/components/schemas/RssStep\" } } }, { \"description\": \"`id` will be none if the rack was already initialized on startup.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackInitKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"initialization_panicked\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"resetting\" ] } } }, { \"description\": \"`reset_id` will be None if the rack is in an uninitialized-on-startup, or Some if it is in an uninitialized state due to a reset operation completing.\", \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"reset_id\": { \"allOf\": [ { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" } ] }, \"status\": { \"type\": \"string\", \"enum\": [ \"uninitialized\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"message\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"message\": { \"type\": \"string\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_failed\" ] } } }, { \"type\": \"object\", \"required\": [ \"id\", \"status\" ], \"properties\": { \"id\": { \"$ref\": \"#/components/schemas/TypedUuidForRackResetKind\" }, \"status\": { \"type\": \"string\", \"enum\": [ \"reset_panicked\" ] } } } ] } ``` ",
"oneOf": [
{
"type": "object",
@@ -2693,11 +2693,15 @@
"enum": [
"initializing"
]
+ },
+ "step": {
+ "$ref": "#/components/schemas/RssStep"
}
},
"required": [
"id",
- "status"
+ "status",
+ "step"
]
},
{
@@ -3150,6 +3154,235 @@
"nexthop"
]
},
+ "RssStep": {
+ "description": "Steps we go through during initial rack setup. Keep this list in order that they happen.\n\nJSON schema
\n\n```json { \"description\": \"Steps we go through during initial rack setup. Keep this list in order that they happen.\", \"oneOf\": [ { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"requested\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"starting\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"load_existing_plan\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"create_sled_plan\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"init_trust_quorum\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"network_config_update\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"sled_init\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"ensure_storage\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"init_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"configure_dns\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"init_ntp\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"wait_for_time_sync\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"wait_for_database\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"cluster_init\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"zones_init\" ] } } }, { \"type\": \"object\", \"required\": [ \"status\" ], \"properties\": { \"status\": { \"type\": \"string\", \"enum\": [ \"nexus_handoff\" ] } } } ] } ``` ",
+ "oneOf": [
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "requested"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "starting"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "load_existing_plan"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "create_sled_plan"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "init_trust_quorum"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "network_config_update"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "sled_init"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "ensure_storage"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "init_dns"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "configure_dns"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "init_ntp"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "wait_for_time_sync"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "wait_for_database"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "cluster_init"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "zones_init"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ },
+ {
+ "type": "object",
+ "properties": {
+ "status": {
+ "type": "string",
+ "enum": [
+ "nexus_handoff"
+ ]
+ }
+ },
+ "required": [
+ "status"
+ ]
+ }
+ ]
+ },
"SemverVersion": {
"type": "string",
"pattern": "^(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)(?:-((?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*)(?:\\.(?:0|[1-9]\\d*|\\d*[a-zA-Z-][0-9a-zA-Z-]*))*))?(?:\\+([0-9a-zA-Z-]+(?:\\.[0-9a-zA-Z-]+)*))?$"
diff --git a/sled-agent/src/bootstrap/rack_ops.rs b/sled-agent/src/bootstrap/rack_ops.rs
index 3eb00b419a..cd59aa4849 100644
--- a/sled-agent/src/bootstrap/rack_ops.rs
+++ b/sled-agent/src/bootstrap/rack_ops.rs
@@ -10,7 +10,7 @@ use bootstore::schemes::v0 as bootstore;
use omicron_uuid_kinds::RackInitUuid;
use omicron_uuid_kinds::RackResetUuid;
use sled_agent_types::rack_init::RackInitializeRequest;
-use sled_agent_types::rack_ops::RackOperationStatus;
+use sled_agent_types::rack_ops::{RackOperationStatus, RssStep};
use sled_storage::manager::StorageHandle;
use slog::Logger;
use std::mem;
@@ -19,6 +19,7 @@ use std::sync::Arc;
use std::sync::Mutex;
use tokio::sync::oneshot;
use tokio::sync::oneshot::error::TryRecvError;
+use tokio::sync::watch;
#[derive(Debug, Clone, thiserror::Error)]
pub enum RssAccessError {
@@ -62,7 +63,7 @@ impl RssAccess {
let mut status = self.status.lock().unwrap();
match &mut *status {
- RssStatus::Initializing { id, completion } => {
+ RssStatus::Initializing { id, completion, step_rx } => {
let id = *id;
// This is our only chance to notice the initialization task has
// panicked: if it dropped the sending half of `completion`
@@ -75,7 +76,11 @@ impl RssAccess {
}
Err(TryRecvError::Empty) => {
// Initialization task is still running
- RackOperationStatus::Initializing { id }
+ // Update the step we are on.
+ RackOperationStatus::Initializing {
+ id,
+ step: *step_rx.borrow(),
+ }
}
Err(TryRecvError::Closed) => {
// Initialization task has panicked!
@@ -171,9 +176,9 @@ impl RssAccess {
RssStatus::Uninitialized { .. } => {
let (completion_tx, completion) = oneshot::channel();
let id = RackInitUuid::new_v4();
- *status = RssStatus::Initializing { id, completion };
+ let (step_tx, step_rx) = watch::channel(RssStep::Requested);
+ *status = RssStatus::Initializing { id, completion, step_rx };
mem::drop(status);
-
let parent_log = parent_log.clone();
let storage_manager = storage_manager.clone();
let bootstore_node_handle = bootstore_node_handle.clone();
@@ -185,6 +190,7 @@ impl RssAccess {
storage_manager,
bootstore_node_handle,
request,
+ step_tx,
)
.await;
let new_status = match result {
@@ -284,6 +290,9 @@ enum RssStatus {
Initializing {
id: RackInitUuid,
completion: oneshot::Receiver<()>,
+ // Used by the RSS task to update us with what step it is on.
+ // This holds the current RSS step.
+ step_rx: watch::Receiver,
},
Resetting {
id: RackResetUuid,
@@ -313,6 +322,7 @@ async fn rack_initialize(
storage_manager: StorageHandle,
bootstore_node_handle: bootstore::NodeHandle,
request: RackInitializeRequest,
+ step_tx: watch::Sender,
) -> Result<(), SetupServiceError> {
RssHandle::run_rss(
parent_log,
@@ -320,6 +330,7 @@ async fn rack_initialize(
global_zone_bootstrap_ip,
storage_manager,
bootstore_node_handle,
+ step_tx,
)
.await
}
diff --git a/sled-agent/src/bootstrap/rss_handle.rs b/sled-agent/src/bootstrap/rss_handle.rs
index eee7eed085..0cf6054ca2 100644
--- a/sled-agent/src/bootstrap/rss_handle.rs
+++ b/sled-agent/src/bootstrap/rss_handle.rs
@@ -15,6 +15,7 @@ use omicron_common::backoff::retry_notify;
use omicron_common::backoff::retry_policy_local;
use omicron_common::backoff::BackoffError;
use sled_agent_types::rack_init::RackInitializeRequest;
+use sled_agent_types::rack_ops::RssStep;
use sled_agent_types::sled::StartSledAgentRequest;
use sled_storage::manager::StorageHandle;
use slog::Logger;
@@ -22,6 +23,7 @@ use std::net::Ipv6Addr;
use std::net::SocketAddrV6;
use tokio::sync::mpsc;
use tokio::sync::oneshot;
+use tokio::sync::watch;
use tokio::task::JoinHandle;
pub(super) struct RssHandle {
@@ -48,6 +50,7 @@ impl RssHandle {
our_bootstrap_address: Ipv6Addr,
storage_manager: StorageHandle,
bootstore: bootstore::NodeHandle,
+ step_tx: watch::Sender,
) -> Result<(), SetupServiceError> {
let (tx, rx) = rss_channel(our_bootstrap_address);
@@ -57,6 +60,7 @@ impl RssHandle {
storage_manager,
tx,
bootstore,
+ step_tx,
);
let log = log.new(o!("component" => "BootstrapAgentRssHandler"));
rx.await_local_rss_request(&log).await;
diff --git a/sled-agent/src/rack_setup/service.rs b/sled-agent/src/rack_setup/service.rs
index 3f73e55d0f..17b75c4334 100644
--- a/sled-agent/src/rack_setup/service.rs
+++ b/sled-agent/src/rack_setup/service.rs
@@ -118,6 +118,7 @@ use sled_agent_types::early_networking::{
use sled_agent_types::rack_init::{
BootstrapAddressDiscovery, RackInitializeRequest as Config,
};
+use sled_agent_types::rack_ops::RssStep;
use sled_agent_types::sled::StartSledAgentRequest;
use sled_agent_types::time_sync::TimeSync;
use sled_hardware_types::underlay::BootstrapInterface;
@@ -130,8 +131,25 @@ use std::iter;
use std::net::{Ipv6Addr, SocketAddrV6};
use std::time::Duration;
use thiserror::Error;
+use tokio::sync::watch;
use uuid::Uuid;
+/// For tracking the current RSS step and sending notifications about it.
+pub struct RssProgress {
+ step_tx: watch::Sender,
+}
+
+impl RssProgress {
+ pub fn new(step_tx: watch::Sender) -> Self {
+ step_tx.send_replace(RssStep::Starting);
+ RssProgress { step_tx }
+ }
+
+ pub fn update(&mut self, new_step: RssStep) {
+ self.step_tx.send_replace(new_step);
+ }
+}
+
/// Describes errors which may occur while operating the setup service.
#[derive(Error, Debug)]
pub enum SetupServiceError {
@@ -224,6 +242,7 @@ impl RackSetupService {
storage_manager: StorageHandle,
local_bootstrap_agent: BootstrapAgentHandle,
bootstore: bootstore::NodeHandle,
+ step_tx: watch::Sender,
) -> Self {
let handle = tokio::task::spawn(async move {
let svc = ServiceInner::new(log.clone());
@@ -233,6 +252,7 @@ impl RackSetupService {
&storage_manager,
local_bootstrap_agent,
bootstore,
+ step_tx,
)
.await
{
@@ -1049,8 +1069,10 @@ impl ServiceInner {
storage_manager: &StorageHandle,
local_bootstrap_agent: BootstrapAgentHandle,
bootstore: bootstore::NodeHandle,
+ step_tx: watch::Sender,
) -> Result<(), SetupServiceError> {
info!(self.log, "Injecting RSS configuration: {:#?}", config);
+ let mut rss_step = RssProgress::new(step_tx);
let resolver = DnsResolver::new_from_subnet(
self.log.new(o!("component" => "DnsResolver")),
@@ -1081,6 +1103,7 @@ impl ServiceInner {
"RSS configuration looks like it has already been applied",
);
+ rss_step.update(RssStep::LoadExistingPlan);
let sled_plan = SledPlan::load(&self.log, storage_manager)
.await?
.expect("Sled plan should exist if completed marker exists");
@@ -1100,6 +1123,7 @@ impl ServiceInner {
let nexus_address =
resolver.lookup_socket_v6(ServiceName::Nexus).await?;
+ rss_step.update(RssStep::NexusHandoff);
self.handoff_to_nexus(
&config,
&sled_plan,
@@ -1113,6 +1137,7 @@ impl ServiceInner {
info!(self.log, "RSS configuration has not been fully applied yet");
}
+ rss_step.update(RssStep::CreateSledPlan);
// Wait for either:
// - All the peers to re-load an old plan (if one exists)
// - Enough peers to create a new plan (if one does not exist)
@@ -1163,6 +1188,7 @@ impl ServiceInner {
};
let config = &plan.config;
+ rss_step.update(RssStep::InitTrustQuorum);
// Initialize the trust quorum if there are peers configured.
if let Some(peers) = &config.trust_quorum_peers {
let initial_membership: BTreeSet<_> =
@@ -1185,8 +1211,10 @@ impl ServiceInner {
},
};
info!(self.log, "Writing Rack Network Configuration to bootstore");
+ rss_step.update(RssStep::NetworkConfigUpdate);
bootstore.update_network_config(early_network_config.into()).await?;
+ rss_step.update(RssStep::SledInit);
// Forward the sled initialization requests to our sled-agent.
local_bootstrap_agent
.initialize_sleds(
@@ -1223,6 +1251,7 @@ impl ServiceInner {
.await?
};
+ rss_step.update(RssStep::EnsureStorage);
// Before we can ask for any services, we need to ensure that storage is
// operational.
self.ensure_storage_config_at_least(&service_plan).await?;
@@ -1239,7 +1268,9 @@ impl ServiceInner {
matches!(zone_type, OmicronZoneType::InternalDns { .. })
},
);
+ rss_step.update(RssStep::InitDns);
self.ensure_zone_config_at_least(v2generator.sled_configs()).await?;
+ rss_step.update(RssStep::ConfigureDns);
self.initialize_internal_dns_records(&service_plan).await?;
// Ask MGS in each switch zone which switch it is.
@@ -1247,6 +1278,7 @@ impl ServiceInner {
.lookup_switch_zone_underlay_addrs(&resolver)
.await;
+ rss_step.update(RssStep::InitNtp);
// Next start up the NTP services.
let v3generator = v2generator.new_version_with(
DeployStepVersion::V3_DNS_AND_NTP,
@@ -1260,11 +1292,13 @@ impl ServiceInner {
);
self.ensure_zone_config_at_least(v3generator.sled_configs()).await?;
+ rss_step.update(RssStep::WaitForTimeSync);
// Wait until time is synchronized on all sleds before proceeding.
self.wait_for_timesync(&sled_addresses).await?;
info!(self.log, "Finished setting up Internal DNS and NTP");
+ rss_step.update(RssStep::WaitForDatabase);
// Wait until Cockroach has been initialized before running Nexus.
let v4generator = v3generator.new_version_with(
DeployStepVersion::V4_COCKROACHDB,
@@ -1276,9 +1310,11 @@ impl ServiceInner {
// Now that datasets and zones have started for CockroachDB,
// perform one-time initialization of the cluster.
+ rss_step.update(RssStep::ClusterInit);
self.initialize_cockroach(&service_plan).await?;
// Issue the rest of the zone initialization requests.
+ rss_step.update(RssStep::ZonesInit);
let v5generator = v4generator
.new_version_with(DeployStepVersion::V5_EVERYTHING, &|_| true);
self.ensure_zone_config_at_least(v5generator.sled_configs()).await?;
@@ -1296,6 +1332,7 @@ impl ServiceInner {
let nexus_address =
resolver.lookup_socket_v6(ServiceName::Nexus).await?;
+ rss_step.update(RssStep::NexusHandoff);
// At this point, even if we reboot, we must not try to manage sleds,
// services, or DNS records.
self.handoff_to_nexus(
diff --git a/sled-agent/types/Cargo.toml b/sled-agent/types/Cargo.toml
index e01d40db28..b33cdc8651 100644
--- a/sled-agent/types/Cargo.toml
+++ b/sled-agent/types/Cargo.toml
@@ -29,6 +29,7 @@ serde_json.workspace = true
sha3.workspace = true
sled-hardware-types.workspace = true
slog.workspace = true
+strum.workspace = true
thiserror.workspace = true
toml.workspace = true
uuid.workspace = true
diff --git a/sled-agent/types/src/rack_ops.rs b/sled-agent/types/src/rack_ops.rs
index d8c0fa1c88..3ff56a02f0 100644
--- a/sled-agent/types/src/rack_ops.rs
+++ b/sled-agent/types/src/rack_ops.rs
@@ -5,16 +5,16 @@
use omicron_uuid_kinds::{RackInitUuid, RackResetUuid};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
+use strum::{EnumCount, EnumIter, IntoEnumIterator};
/// Current status of any rack-level operation being performed by this bootstrap
/// agent.
-#[derive(
- Clone, Debug, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema,
-)]
+#[derive(Clone, PartialEq, Eq, Hash, Serialize, Deserialize, JsonSchema)]
#[serde(tag = "status", rename_all = "snake_case")]
pub enum RackOperationStatus {
Initializing {
id: RackInitUuid,
+ step: RssStep,
},
/// `id` will be none if the rack was already initialized on startup.
Initialized {
@@ -44,3 +44,55 @@ pub enum RackOperationStatus {
id: RackResetUuid,
},
}
+
+/// Steps we go through during initial rack setup.
+/// Keep this list in order that they happen.
+#[derive(
+ Copy,
+ Clone,
+ Debug,
+ Deserialize,
+ EnumCount,
+ EnumIter,
+ Eq,
+ Hash,
+ JsonSchema,
+ Ord,
+ PartialEq,
+ PartialOrd,
+ Serialize,
+)]
+#[serde(tag = "status", rename_all = "snake_case")]
+pub enum RssStep {
+ Requested,
+ Starting,
+ LoadExistingPlan,
+ CreateSledPlan,
+ InitTrustQuorum,
+ NetworkConfigUpdate,
+ SledInit,
+ EnsureStorage,
+ InitDns,
+ ConfigureDns,
+ InitNtp,
+ WaitForTimeSync,
+ WaitForDatabase,
+ ClusterInit,
+ ZonesInit,
+ NexusHandoff,
+}
+
+impl RssStep {
+ pub fn max_step(&self) -> usize {
+ RssStep::COUNT
+ }
+
+ pub fn index(&self) -> usize {
+ for (index, variant) in RssStep::iter().enumerate() {
+ if *self == variant {
+ return index;
+ }
+ }
+ return 0;
+ }
+}
diff --git a/wicket/src/ui/panes/rack_setup.rs b/wicket/src/ui/panes/rack_setup.rs
index cc6a2c5621..cbf66a1cf3 100644
--- a/wicket/src/ui/panes/rack_setup.rs
+++ b/wicket/src/ui/panes/rack_setup.rs
@@ -431,11 +431,21 @@ fn draw_rack_status_details_popup(
style::plain_text(),
)]));
}
- Ok(RackOperationStatus::Initializing { id }) => {
+ Ok(RackOperationStatus::Initializing { id, step }) => {
body.lines.push(Line::from(vec![
status,
Span::styled("Initializing", style::plain_text()),
]));
+ let max = step.max_step();
+ let index = step.index();
+ body.lines.push(Line::from(vec![Span::styled(
+ format!("Current step: {}/{}", index, max),
+ style::plain_text(),
+ )]));
+ body.lines.push(Line::from(vec![Span::styled(
+ format!("Current operation: {:?}", step),
+ style::plain_text(),
+ )]));
body.lines.push(Line::from(vec![Span::styled(
format!("Current operation ID: {}", id),
style::plain_text(),
@@ -632,8 +642,11 @@ fn rss_config_text<'a>(
Ok(RackOperationStatus::Initialized { .. }) => {
Span::styled("Initialized", ok_style)
}
- Ok(RackOperationStatus::Initializing { .. }) => {
- Span::styled("Initializing", warn_style)
+ Ok(RackOperationStatus::Initializing { step, .. }) => {
+ let max = step.max_step();
+ let index = step.index();
+ let msg = format!("Initializing: Step {}/{}", index, max);
+ Span::styled(msg, warn_style)
}
Ok(RackOperationStatus::Resetting { .. }) => {
Span::styled("Resetting", warn_style)