diff --git a/clients/nexus-client/src/lib.rs b/clients/nexus-client/src/lib.rs index 6546af8673..acf282a1f9 100644 --- a/clients/nexus-client/src/lib.rs +++ b/clients/nexus-client/src/lib.rs @@ -83,21 +83,34 @@ impl From for omicron_common::api::external::DiskState { } } -impl From - for omicron_common::api::external::InstanceState -{ - fn from(s: types::InstanceState) -> Self { +impl From for types::VmmState { + fn from(s: omicron_common::api::internal::nexus::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Input; match s { - types::InstanceState::Creating => Self::Creating, - types::InstanceState::Starting => Self::Starting, - types::InstanceState::Running => Self::Running, - types::InstanceState::Stopping => Self::Stopping, - types::InstanceState::Stopped => Self::Stopped, - types::InstanceState::Rebooting => Self::Rebooting, - types::InstanceState::Migrating => Self::Migrating, - types::InstanceState::Repairing => Self::Repairing, - types::InstanceState::Failed => Self::Failed, - types::InstanceState::Destroyed => Self::Destroyed, + Input::Starting => types::VmmState::Starting, + Input::Running => types::VmmState::Running, + Input::Stopping => types::VmmState::Stopping, + Input::Stopped => types::VmmState::Stopped, + Input::Rebooting => types::VmmState::Rebooting, + Input::Migrating => types::VmmState::Migrating, + Input::Failed => types::VmmState::Failed, + Input::Destroyed => types::VmmState::Destroyed, + } + } +} + +impl From for omicron_common::api::internal::nexus::VmmState { + fn from(s: types::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Output; + match s { + types::VmmState::Starting => Output::Starting, + types::VmmState::Running => Output::Running, + types::VmmState::Stopping => Output::Stopping, + types::VmmState::Stopped => Output::Stopped, + types::VmmState::Rebooting => Output::Rebooting, + types::VmmState::Migrating => Output::Migrating, + types::VmmState::Failed => Output::Failed, + types::VmmState::Destroyed => Output::Destroyed, } } } @@ -140,26 +153,6 @@ impl From } } -impl From - for types::InstanceState -{ - fn from(s: omicron_common::api::external::InstanceState) -> Self { - use omicron_common::api::external::InstanceState; - match s { - InstanceState::Creating => Self::Creating, - InstanceState::Starting => Self::Starting, - InstanceState::Running => Self::Running, - InstanceState::Stopping => Self::Stopping, - InstanceState::Stopped => Self::Stopped, - InstanceState::Rebooting => Self::Rebooting, - InstanceState::Migrating => Self::Migrating, - InstanceState::Repairing => Self::Repairing, - InstanceState::Failed => Self::Failed, - InstanceState::Destroyed => Self::Destroyed, - } - } -} - impl From for types::DiskRuntimeState { @@ -192,25 +185,6 @@ impl From for types::DiskState { } } -impl From<&types::InstanceState> - for omicron_common::api::external::InstanceState -{ - fn from(state: &types::InstanceState) -> Self { - match state { - types::InstanceState::Creating => Self::Creating, - types::InstanceState::Starting => Self::Starting, - types::InstanceState::Running => Self::Running, - types::InstanceState::Stopping => Self::Stopping, - types::InstanceState::Stopped => Self::Stopped, - types::InstanceState::Rebooting => Self::Rebooting, - types::InstanceState::Migrating => Self::Migrating, - types::InstanceState::Repairing => Self::Repairing, - types::InstanceState::Failed => Self::Failed, - types::InstanceState::Destroyed => Self::Destroyed, - } - } -} - impl From for types::ProducerKind { diff --git a/clients/sled-agent-client/src/lib.rs b/clients/sled-agent-client/src/lib.rs index 300e3713ea..862ae00cc9 100644 --- a/clients/sled-agent-client/src/lib.rs +++ b/clients/sled-agent-client/src/lib.rs @@ -257,22 +257,18 @@ impl From } } -impl From - for types::InstanceState -{ - fn from(s: omicron_common::api::external::InstanceState) -> Self { - use omicron_common::api::external::InstanceState::*; +impl From for types::VmmState { + fn from(s: omicron_common::api::internal::nexus::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Input; match s { - Creating => Self::Creating, - Starting => Self::Starting, - Running => Self::Running, - Stopping => Self::Stopping, - Stopped => Self::Stopped, - Rebooting => Self::Rebooting, - Migrating => Self::Migrating, - Repairing => Self::Repairing, - Failed => Self::Failed, - Destroyed => Self::Destroyed, + Input::Starting => types::VmmState::Starting, + Input::Running => types::VmmState::Running, + Input::Stopping => types::VmmState::Stopping, + Input::Stopped => types::VmmState::Stopped, + Input::Rebooting => types::VmmState::Rebooting, + Input::Migrating => types::VmmState::Migrating, + Input::Failed => types::VmmState::Failed, + Input::Destroyed => types::VmmState::Destroyed, } } } @@ -299,6 +295,22 @@ impl From } } +impl From for omicron_common::api::internal::nexus::VmmState { + fn from(s: types::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Output; + match s { + types::VmmState::Starting => Output::Starting, + types::VmmState::Running => Output::Running, + types::VmmState::Stopping => Output::Stopping, + types::VmmState::Stopped => Output::Stopped, + types::VmmState::Rebooting => Output::Rebooting, + types::VmmState::Migrating => Output::Migrating, + types::VmmState::Failed => Output::Failed, + types::VmmState::Destroyed => Output::Destroyed, + } + } +} + impl From for omicron_common::api::internal::nexus::VmmRuntimeState { @@ -319,26 +331,6 @@ impl From } } -impl From - for omicron_common::api::external::InstanceState -{ - fn from(s: types::InstanceState) -> Self { - use types::InstanceState::*; - match s { - Creating => Self::Creating, - Starting => Self::Starting, - Running => Self::Running, - Stopping => Self::Stopping, - Stopped => Self::Stopped, - Rebooting => Self::Rebooting, - Migrating => Self::Migrating, - Repairing => Self::Repairing, - Failed => Self::Failed, - Destroyed => Self::Destroyed, - } - } -} - impl From for omicron_common::api::external::InstanceCpuCount { diff --git a/common/src/api/external/mod.rs b/common/src/api/external/mod.rs index 07a7776f1e..6b171b59fe 100644 --- a/common/src/api/external/mod.rs +++ b/common/src/api/external/mod.rs @@ -996,6 +996,22 @@ pub enum InstanceState { Destroyed, } +impl From for InstanceState { + fn from(state: crate::api::internal::nexus::VmmState) -> Self { + use crate::api::internal::nexus::VmmState as InternalVmmState; + match state { + InternalVmmState::Starting => Self::Starting, + InternalVmmState::Running => Self::Running, + InternalVmmState::Stopping => Self::Stopping, + InternalVmmState::Stopped => Self::Stopped, + InternalVmmState::Rebooting => Self::Rebooting, + InternalVmmState::Migrating => Self::Migrating, + InternalVmmState::Failed => Self::Failed, + InternalVmmState::Destroyed => Self::Destroyed, + } + } +} + impl Display for InstanceState { fn fmt(&self, f: &mut Formatter) -> FormatResult { write!(f, "{}", self.label()) diff --git a/common/src/api/internal/nexus.rs b/common/src/api/internal/nexus.rs index de611262bf..b569437f43 100644 --- a/common/src/api/internal/nexus.rs +++ b/common/src/api/internal/nexus.rs @@ -6,7 +6,7 @@ use crate::api::external::{ ByteCount, DiskState, Generation, Hostname, InstanceCpuCount, - InstanceState, SemverVersion, Vni, + SemverVersion, Vni, }; use chrono::{DateTime, Utc}; use omicron_uuid_kinds::DownstairsRegionKind; @@ -60,11 +60,36 @@ pub struct InstanceRuntimeState { pub time_updated: DateTime, } +/// One of the states that a VMM can be in. +#[derive( + Copy, Clone, Debug, Deserialize, Serialize, JsonSchema, Eq, PartialEq, +)] +#[serde(rename_all = "snake_case")] +pub enum VmmState { + /// The VMM is initializing and has not started running guest CPUs yet. + Starting, + /// The VMM has finished initializing and may be running guest CPUs. + Running, + /// The VMM is shutting down. + Stopping, + /// The VMM's guest has stopped, and the guest will not run again, but the + /// VMM process may not have released all of its resources yet. + Stopped, + /// The VMM is being restarted or its guest OS is rebooting. + Rebooting, + /// The VMM is part of a live migration. + Migrating, + /// The VMM process reported an internal failure. + Failed, + /// The VMM process has been destroyed and its resources have been released. + Destroyed, +} + /// The dynamic runtime properties of an individual VMM process. #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)] pub struct VmmRuntimeState { /// The last state reported by this VMM. - pub state: InstanceState, + pub state: VmmState, /// The generation number for this VMM's state. pub gen: Generation, /// Timestamp for the VMM's state. diff --git a/nexus/db-model/src/instance.rs b/nexus/db-model/src/instance.rs index 8f110aff71..99c0126174 100644 --- a/nexus/db-model/src/instance.rs +++ b/nexus/db-model/src/instance.rs @@ -72,9 +72,7 @@ impl Instance { InstanceIdentity::new(instance_id, params.identity.clone()); let runtime_state = InstanceRuntimeState::new( - InstanceState::new( - omicron_common::api::external::InstanceState::Creating, - ), + InstanceState::Creating, identity.time_modified, ); @@ -138,13 +136,6 @@ impl DatastoreAttachTargetConfig for Instance { // `diesel::prelude::AsChangeset`. #[diesel(table_name = instance, treat_none_as_null = true)] pub struct InstanceRuntimeState { - /// The instance state to fall back on if asked to compute this instance's - /// state while it has no active VMM. - /// - /// This field is guarded by the instance's `gen` field. - #[diesel(column_name = state)] - pub nexus_state: InstanceState, - /// The time at which the runtime state was last updated. This is distinct /// from the time the record was last modified, because some updates don't /// modify the runtime state. @@ -197,6 +188,13 @@ pub struct InstanceRuntimeState { /// lock was not held is still valid when setting the lock ID. #[diesel(column_name = updater_gen)] pub updater_gen: Generation, + + /// The "internal" state of this instance. The instance's externally-visible + /// state may be delegated to the instance's active VMM, if it has one. + /// + /// This field is guarded by the instance's `gen` field. + #[diesel(column_name = state)] + pub nexus_state: InstanceState, } impl InstanceRuntimeState { @@ -221,13 +219,13 @@ impl From state: omicron_common::api::internal::nexus::InstanceRuntimeState, ) -> Self { let nexus_state = if state.propolis_id.is_some() { - omicron_common::api::external::InstanceState::Running + InstanceState::Vmm } else { - omicron_common::api::external::InstanceState::Stopped + InstanceState::NoVmm }; Self { - nexus_state: InstanceState::new(nexus_state), + nexus_state, time_updated: state.time_updated, gen: state.gen.into(), propolis_id: state.propolis_id, diff --git a/nexus/db-model/src/instance_state.rs b/nexus/db-model/src/instance_state.rs index dca809758f..673b06e2cd 100644 --- a/nexus/db-model/src/instance_state.rs +++ b/nexus/db-model/src/instance_state.rs @@ -2,72 +2,60 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -use super::impl_enum_wrapper; +use super::impl_enum_type; use omicron_common::api::external; use serde::Deserialize; use serde::Serialize; use std::fmt; -use std::io::Write; -impl_enum_wrapper!( +impl_enum_type!( #[derive(SqlType, Debug)] - #[diesel(postgres_type(name = "instance_state", schema = "public"))] + #[diesel(postgres_type(name = "instance_state_v2", schema = "public"))] pub struct InstanceStateEnum; - #[derive(Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)] + #[derive(Copy, Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)] #[diesel(sql_type = InstanceStateEnum)] - pub struct InstanceState(pub external::InstanceState); + pub enum InstanceState; // Enum values Creating => b"creating" - Starting => b"starting" - Running => b"running" - Stopping => b"stopping" - Stopped => b"stopped" - Rebooting => b"rebooting" - Migrating => b"migrating" - Repairing => b"repairing" + NoVmm => b"no_vmm" + Vmm => b"vmm" Failed => b"failed" Destroyed => b"destroyed" ); impl InstanceState { - pub fn new(state: external::InstanceState) -> Self { - Self(state) + pub fn state(&self) -> external::InstanceState { + external::InstanceState::from(*self) } - pub fn state(&self) -> &external::InstanceState { - &self.0 + pub fn label(&self) -> &'static str { + match self { + InstanceState::Creating => "creating", + InstanceState::NoVmm => "no VMM", + InstanceState::Vmm => "VMM", + InstanceState::Failed => "failed", + InstanceState::Destroyed => "destroyed", + } } } impl fmt::Display for InstanceState { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { - write!(f, "{}", self.0) + write!(f, "{}", self.label()) } } -impl From for sled_agent_client::types::InstanceState { - fn from(s: InstanceState) -> Self { - use external::InstanceState::*; - use sled_agent_client::types::InstanceState as Output; - match s.0 { - Creating => Output::Creating, - Starting => Output::Starting, - Running => Output::Running, - Stopping => Output::Stopping, - Stopped => Output::Stopped, - Rebooting => Output::Rebooting, - Migrating => Output::Migrating, - Repairing => Output::Repairing, - Failed => Output::Failed, - Destroyed => Output::Destroyed, +impl From for omicron_common::api::external::InstanceState { + fn from(value: InstanceState) -> Self { + use omicron_common::api::external::InstanceState as Output; + match value { + InstanceState::Creating => Output::Creating, + InstanceState::NoVmm => Output::Stopped, + InstanceState::Vmm => Output::Running, + InstanceState::Failed => Output::Failed, + InstanceState::Destroyed => Output::Destroyed, } } } - -impl From for InstanceState { - fn from(state: external::InstanceState) -> Self { - Self::new(state) - } -} diff --git a/nexus/db-model/src/lib.rs b/nexus/db-model/src/lib.rs index 51fd0f6c9e..040882a8f0 100644 --- a/nexus/db-model/src/lib.rs +++ b/nexus/db-model/src/lib.rs @@ -56,6 +56,7 @@ mod semver_version; mod switch_interface; mod switch_port; mod v2p_mapping; +mod vmm_state; // These actually represent subqueries, not real table. // However, they must be defined in the same crate as our tables // for join-based marker trait generation. @@ -197,6 +198,7 @@ pub use v2p_mapping::*; pub use virtual_provisioning_collection::*; pub use virtual_provisioning_resource::*; pub use vmm::*; +pub use vmm_state::*; pub use vni::*; pub use volume::*; pub use volume_repair::*; diff --git a/nexus/db-model/src/schema.rs b/nexus/db-model/src/schema.rs index 8a00ce6e37..dedb0efc62 100644 --- a/nexus/db-model/src/schema.rs +++ b/nexus/db-model/src/schema.rs @@ -424,7 +424,6 @@ table! { memory -> Int8, hostname -> Text, boot_on_fault -> Bool, - state -> crate::InstanceStateEnum, time_state_updated -> Timestamptz, state_generation -> Int8, active_propolis_id -> Nullable, @@ -432,6 +431,7 @@ table! { migration_id -> Nullable, updater_id -> Nullable, updater_gen-> Int8, + state -> crate::InstanceStateEnum, } } @@ -444,9 +444,9 @@ table! { sled_id -> Uuid, propolis_ip -> Inet, propolis_port -> Int4, - state -> crate::InstanceStateEnum, time_state_updated -> Timestamptz, state_generation -> Int8, + state -> crate::VmmStateEnum, } } joinable!(vmm -> sled (sled_id)); @@ -459,7 +459,7 @@ table! { project_name -> Text, time_created -> Timestamptz, time_modified -> Timestamptz, - state -> crate::InstanceStateEnum, + state -> crate::VmmStateEnum, active_sled_id -> Uuid, migration_id -> Nullable, ncpus -> Int8, diff --git a/nexus/db-model/src/schema_versions.rs b/nexus/db-model/src/schema_versions.rs index 4465c3aacf..09039c952b 100644 --- a/nexus/db-model/src/schema_versions.rs +++ b/nexus/db-model/src/schema_versions.rs @@ -17,7 +17,7 @@ use std::collections::BTreeMap; /// /// This must be updated when you change the database schema. Refer to /// schema/crdb/README.adoc in the root of this repository for details. -pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(69, 0, 0); +pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(70, 0, 0); /// List of all past database schema versions, in *reverse* order /// @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy> = Lazy::new(|| { // | leaving the first copy as an example for the next person. // v // KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"), + KnownVersion::new(70, "separate-instance-and-vmm-states"), KnownVersion::new(69, "expose-stage0"), KnownVersion::new(68, "filter-v2p-mapping-by-instance-state"), KnownVersion::new(67, "add-instance-updater-lock"), diff --git a/nexus/db-model/src/sled_instance.rs b/nexus/db-model/src/sled_instance.rs index bbc92ddf18..1415c38eea 100644 --- a/nexus/db-model/src/sled_instance.rs +++ b/nexus/db-model/src/sled_instance.rs @@ -1,6 +1,6 @@ use crate::schema::sled_instance; -use crate::InstanceState; use crate::Name; +use crate::VmmState; use db_macros::Asset; use nexus_types::external_api::views; use nexus_types::identity::Asset; @@ -21,7 +21,7 @@ pub struct SledInstance { pub silo_name: Name, pub project_name: Name, - pub state: InstanceState, + pub state: VmmState, pub ncpus: i64, pub memory: i64, } @@ -34,7 +34,7 @@ impl From for views::SledInstance { active_sled_id: sled_instance.active_sled_id, silo_name: sled_instance.silo_name.into(), project_name: sled_instance.project_name.into(), - state: *sled_instance.state.state(), + state: sled_instance.state.into(), migration_id: sled_instance.migration_id, ncpus: sled_instance.ncpus, memory: sled_instance.memory, diff --git a/nexus/db-model/src/vmm.rs b/nexus/db-model/src/vmm.rs index ca3be120d4..cfa1d43759 100644 --- a/nexus/db-model/src/vmm.rs +++ b/nexus/db-model/src/vmm.rs @@ -12,7 +12,7 @@ //! state updates to each other without sending parameters that are useless to //! sled agent or that sled agent will never update (like the sled ID). -use super::{Generation, InstanceState}; +use super::{Generation, VmmState}; use crate::schema::vmm; use crate::SqlU16; use chrono::{DateTime, Utc}; @@ -68,12 +68,10 @@ impl Vmm { propolis_port: u16, initial_state: VmmInitialState, ) -> Self { - use omicron_common::api::external::InstanceState as ApiInstanceState; - let now = Utc::now(); - let api_state = match initial_state { - VmmInitialState::Starting => ApiInstanceState::Starting, - VmmInitialState::Migrating => ApiInstanceState::Migrating, + let state = match initial_state { + VmmInitialState::Starting => VmmState::Starting, + VmmInitialState::Migrating => VmmState::Migrating, }; Self { @@ -85,7 +83,7 @@ impl Vmm { propolis_ip, propolis_port: SqlU16(propolis_port), runtime: VmmRuntimeState { - state: InstanceState::new(api_state), + state, time_state_updated: now, gen: Generation::new(), }, @@ -106,16 +104,16 @@ impl Vmm { )] #[diesel(table_name = vmm)] pub struct VmmRuntimeState { - /// The state of this VMM. If this VMM is the active VMM for a given - /// instance, this state is the instance's logical state. - pub state: InstanceState, - /// The time at which this state was most recently updated. pub time_state_updated: DateTime, /// The generation number protecting this VMM's state and update time. #[diesel(column_name = state_generation)] pub gen: Generation, + + /// The state of this VMM. If this VMM is the active VMM for a given + /// instance, this state is the instance's logical state. + pub state: VmmState, } impl From @@ -125,7 +123,7 @@ impl From value: omicron_common::api::internal::nexus::VmmRuntimeState, ) -> Self { Self { - state: InstanceState::new(value.state), + state: value.state.into(), time_state_updated: value.time_updated, gen: value.gen.into(), } diff --git a/nexus/db-model/src/vmm_state.rs b/nexus/db-model/src/vmm_state.rs new file mode 100644 index 0000000000..f737f48f69 --- /dev/null +++ b/nexus/db-model/src/vmm_state.rs @@ -0,0 +1,114 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use super::impl_enum_type; +use serde::Deserialize; +use serde::Serialize; +use std::fmt; + +impl_enum_type!( + #[derive(SqlType, Debug)] + #[diesel(postgres_type(name = "vmm_state", schema = "public"))] + pub struct VmmStateEnum; + + #[derive(Copy, Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)] + #[diesel(sql_type = VmmStateEnum)] + pub enum VmmState; + + Starting => b"starting" + Running => b"running" + Stopping => b"stopping" + Stopped => b"stopped" + Rebooting => b"rebooting" + Migrating => b"migrating" + Failed => b"failed" + Destroyed => b"destroyed" +); + +impl VmmState { + pub fn label(&self) -> &'static str { + match self { + VmmState::Starting => "starting", + VmmState::Running => "running", + VmmState::Stopping => "stopping", + VmmState::Stopped => "stopped", + VmmState::Rebooting => "rebooting", + VmmState::Migrating => "migrating", + VmmState::Failed => "failed", + VmmState::Destroyed => "destroyed", + } + } +} + +impl fmt::Display for VmmState { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "{}", self.label()) + } +} + +impl From for omicron_common::api::internal::nexus::VmmState { + fn from(value: VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as Output; + match value { + VmmState::Starting => Output::Starting, + VmmState::Running => Output::Running, + VmmState::Stopping => Output::Stopping, + VmmState::Stopped => Output::Stopped, + VmmState::Rebooting => Output::Rebooting, + VmmState::Migrating => Output::Migrating, + VmmState::Failed => Output::Failed, + VmmState::Destroyed => Output::Destroyed, + } + } +} + +impl From for sled_agent_client::types::VmmState { + fn from(value: VmmState) -> Self { + use sled_agent_client::types::VmmState as Output; + match value { + VmmState::Starting => Output::Starting, + VmmState::Running => Output::Running, + VmmState::Stopping => Output::Stopping, + VmmState::Stopped => Output::Stopped, + VmmState::Rebooting => Output::Rebooting, + VmmState::Migrating => Output::Migrating, + VmmState::Failed => Output::Failed, + VmmState::Destroyed => Output::Destroyed, + } + } +} + +impl From for VmmState { + fn from(value: omicron_common::api::internal::nexus::VmmState) -> Self { + use omicron_common::api::internal::nexus::VmmState as ApiState; + use VmmState as Output; + match value { + ApiState::Starting => Output::Starting, + ApiState::Running => Output::Running, + ApiState::Stopping => Output::Stopping, + ApiState::Stopped => Output::Stopped, + ApiState::Rebooting => Output::Rebooting, + ApiState::Migrating => Output::Migrating, + ApiState::Failed => Output::Failed, + ApiState::Destroyed => Output::Destroyed, + } + } +} + +impl From for omicron_common::api::external::InstanceState { + fn from(value: VmmState) -> Self { + use omicron_common::api::external::InstanceState as Output; + + match value { + VmmState::Starting => Output::Starting, + VmmState::Running => Output::Running, + VmmState::Stopping => Output::Stopping, + VmmState::Stopped => Output::Stopped, + VmmState::Rebooting => Output::Rebooting, + VmmState::Migrating => Output::Migrating, + VmmState::Failed => Output::Failed, + VmmState::Destroyed => Output::Destroyed, + } + } +} diff --git a/nexus/db-queries/src/db/datastore/disk.rs b/nexus/db-queries/src/db/datastore/disk.rs index e1d504761c..de3d40969b 100644 --- a/nexus/db-queries/src/db/datastore/disk.rs +++ b/nexus/db-queries/src/db/datastore/disk.rs @@ -183,8 +183,8 @@ impl DataStore { // // We currently only permit attaching disks to stopped instances. let ok_to_attach_instance_states = vec![ - db::model::InstanceState(api::external::InstanceState::Creating), - db::model::InstanceState(api::external::InstanceState::Stopped), + db::model::InstanceState::Creating, + db::model::InstanceState::NoVmm, ]; let attach_update = DiskSetClauseForAttach::new(authz_instance.id()); @@ -321,9 +321,9 @@ impl DataStore { // // We currently only permit detaching disks from stopped instances. let ok_to_detach_instance_states = vec![ - db::model::InstanceState(api::external::InstanceState::Creating), - db::model::InstanceState(api::external::InstanceState::Stopped), - db::model::InstanceState(api::external::InstanceState::Failed), + db::model::InstanceState::Creating, + db::model::InstanceState::NoVmm, + db::model::InstanceState::Failed, ]; let detached_label = api::external::DiskState::Detached.label(); diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index c3cd45669f..8dda8041fa 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -31,7 +31,6 @@ use crate::db::queries::external_ip::NextExternalIp; use crate::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE; use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES; use crate::db::queries::external_ip::SAFE_TO_ATTACH_INSTANCE_STATES_CREATING; -use crate::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; use crate::db::update_and_check::UpdateAndCheck; use crate::db::update_and_check::UpdateStatus; use async_bb8_diesel::AsyncRunQueryDsl; @@ -479,11 +478,6 @@ impl DataStore { } Err(match &collection.runtime_state.nexus_state { - state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) - => Error::unavail(&format!( - "tried to attach {kind} IP while instance was changing state: \ - attach will be safe to retry once start/stop completes" - )), state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { if attached_count >= i64::from(MAX_EXTERNAL_IPS_PLUS_SNAT) { Error::invalid_request(&format!( @@ -608,10 +602,6 @@ impl DataStore { } match collection.runtime_state.nexus_state { - state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state) => Error::unavail(&format!( - "tried to attach {kind} IP while instance was changing state: \ - detach will be safe to retry once start/stop completes" - )), state if SAFE_TO_ATTACH_INSTANCE_STATES.contains(&state) => { Error::internal_error(&format!("failed to detach {kind} IP")) }, diff --git a/nexus/db-queries/src/db/datastore/instance.rs b/nexus/db-queries/src/db/datastore/instance.rs index 3b655e5bb9..b9989fe31c 100644 --- a/nexus/db-queries/src/db/datastore/instance.rs +++ b/nexus/db-queries/src/db/datastore/instance.rs @@ -74,9 +74,9 @@ impl InstanceAndActiveVmm { &self, ) -> omicron_common::api::external::InstanceState { if let Some(vmm) = &self.vmm { - vmm.runtime.state.0 + vmm.runtime.state.into() } else { - self.instance.runtime().nexus_state.0 + self.instance.runtime().nexus_state.into() } } } @@ -89,11 +89,13 @@ impl From<(Instance, Option)> for InstanceAndActiveVmm { impl From for omicron_common::api::external::Instance { fn from(value: InstanceAndActiveVmm) -> Self { - let (run_state, time_run_state_updated) = if let Some(vmm) = value.vmm { - (vmm.runtime.state, vmm.runtime.time_state_updated) + let run_state: omicron_common::api::external::InstanceState; + let time_run_state_updated: chrono::DateTime; + (run_state, time_run_state_updated) = if let Some(vmm) = value.vmm { + (vmm.runtime.state.into(), vmm.runtime.time_state_updated) } else { ( - value.instance.runtime_state.nexus_state.clone(), + value.instance.runtime_state.nexus_state.into(), value.instance.runtime_state.time_updated, ) }; @@ -109,7 +111,7 @@ impl From for omicron_common::api::external::Instance { .parse() .expect("found invalid hostname in the database"), runtime: omicron_common::api::external::InstanceRuntimeState { - run_state: *run_state.state(), + run_state, time_run_state_updated, }, } @@ -196,8 +198,8 @@ impl DataStore { })?; bail_unless!( - instance.runtime().nexus_state.state() - == &api::external::InstanceState::Creating, + instance.runtime().nexus_state + == nexus_db_model::InstanceState::Creating, "newly-created Instance has unexpected state: {:?}", instance.runtime().nexus_state ); @@ -477,13 +479,12 @@ impl DataStore { // instance must be "stopped" or "failed" in order to delete it. The // delete operation sets "time_deleted" (just like with other objects) // and also sets the state to "destroyed". - use api::external::InstanceState as ApiInstanceState; use db::model::InstanceState as DbInstanceState; use db::schema::{disk, instance}; - let stopped = DbInstanceState::new(ApiInstanceState::Stopped); - let failed = DbInstanceState::new(ApiInstanceState::Failed); - let destroyed = DbInstanceState::new(ApiInstanceState::Destroyed); + let stopped = DbInstanceState::NoVmm; + let failed = DbInstanceState::Failed; + let destroyed = DbInstanceState::Destroyed; let ok_to_delete_instance_states = vec![stopped, failed]; let detached_label = api::external::DiskState::Detached.label(); diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 3ea2945b2f..3076afa39f 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -33,7 +33,6 @@ use diesel::prelude::*; use diesel::result::Error as DieselError; use nexus_db_model::ServiceNetworkInterface; use nexus_types::identity::Resource; -use omicron_common::api::external; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; @@ -681,8 +680,7 @@ impl DataStore { .filter(dsl::time_deleted.is_null()) .select(Instance::as_select()) }; - let stopped = - db::model::InstanceState::new(external::InstanceState::Stopped); + let stopped = db::model::InstanceState::NoVmm; // This is the actual query to update the target interface. // Unlike Postgres, CockroachDB doesn't support inserting or updating a view @@ -713,7 +711,6 @@ impl DataStore { self.transaction_retry_wrapper("instance_update_network_interface") .transaction(&conn, |conn| { let err = err.clone(); - let stopped = stopped.clone(); let update_target_query = update_target_query.clone(); async move { let instance_runtime = @@ -759,7 +756,6 @@ impl DataStore { self.transaction_retry_wrapper("instance_update_network_interface") .transaction(&conn, |conn| { let err = err.clone(); - let stopped = stopped.clone(); let update_target_query = update_target_query.clone(); async move { let instance_state = @@ -897,7 +893,7 @@ mod tests { .addr_iter() .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) .take(10); - let mut macs = external::MacAddr::iter_system(); + let mut macs = omicron_common::api::external::MacAddr::iter_system(); let mut service_nics = Vec::new(); for (i, ip) in ip_range.enumerate() { let name = format!("service-nic-{i}"); @@ -905,7 +901,7 @@ mod tests { Uuid::new_v4(), Uuid::new_v4(), NEXUS_VPC_SUBNET.clone(), - external::IdentityMetadataCreateParams { + omicron_common::api::external::IdentityMetadataCreateParams { name: name.parse().unwrap(), description: name, }, diff --git a/nexus/db-queries/src/db/datastore/vmm.rs b/nexus/db-queries/src/db/datastore/vmm.rs index b8fb47de26..bcb615411e 100644 --- a/nexus/db-queries/src/db/datastore/vmm.rs +++ b/nexus/db-queries/src/db/datastore/vmm.rs @@ -9,9 +9,9 @@ use crate::authz; use crate::context::OpContext; use crate::db::error::public_error_from_diesel; use crate::db::error::ErrorHandler; -use crate::db::model::InstanceState as DbInstanceState; use crate::db::model::Vmm; use crate::db::model::VmmRuntimeState; +use crate::db::model::VmmState as DbVmmState; use crate::db::pagination::paginated; use crate::db::schema::vmm::dsl; use crate::db::update_and_check::UpdateAndCheck; @@ -22,7 +22,6 @@ use diesel::prelude::*; use omicron_common::api::external::CreateResult; use omicron_common::api::external::DataPageParams; use omicron_common::api::external::Error; -use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; @@ -55,10 +54,7 @@ impl DataStore { opctx: &OpContext, vmm_id: &Uuid, ) -> UpdateResult { - let valid_states = vec![ - DbInstanceState::new(ApiInstanceState::Destroyed), - DbInstanceState::new(ApiInstanceState::Failed), - ]; + let valid_states = vec![DbVmmState::Destroyed, DbVmmState::Failed]; let updated = diesel::update(dsl::vmm) .filter(dsl::id.eq(*vmm_id)) @@ -190,7 +186,7 @@ impl DataStore { pagparams: &DataPageParams<'_, Uuid>, ) -> ListResultVec { use crate::db::schema::instance::dsl as instance_dsl; - let destroyed = DbInstanceState::new(ApiInstanceState::Destroyed); + let destroyed = DbVmmState::Destroyed; paginated(dsl::vmm, dsl::id, pagparams) // In order to be considered "abandoned", a VMM must be: // - in the `Destroyed` state diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 7d5e254aac..9bb6a44ea7 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -29,34 +29,28 @@ use diesel::RunQueryDsl; use nexus_db_model::InstanceState as DbInstanceState; use nexus_db_model::IpAttachState; use nexus_db_model::IpAttachStateEnum; +use nexus_db_model::VmmState as DbVmmState; use omicron_common::address::NUM_SOURCE_NAT_PORTS; use omicron_common::api::external; -use omicron_common::api::external::InstanceState as ApiInstanceState; use uuid::Uuid; // Broadly, we want users to be able to attach/detach at will // once an instance is created and functional. -pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = [ - DbInstanceState(ApiInstanceState::Stopped), - DbInstanceState(ApiInstanceState::Running), - DbInstanceState(ApiInstanceState::Creating), -]; -pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = [ - DbInstanceState(ApiInstanceState::Stopped), - DbInstanceState(ApiInstanceState::Running), -]; +pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = + [DbInstanceState::NoVmm, DbInstanceState::Vmm, DbInstanceState::Creating]; +pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = + [DbInstanceState::NoVmm, DbInstanceState::Vmm]; // If we're in a state which will naturally resolve to either // stopped/running, we want users to know that the request can be // retried safely via Error::unavail. // TODO: We currently stop if there's a migration or other state change. // There may be a good case for RPWing // external_ip_state -> { NAT RPW, sled-agent } in future. -pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbInstanceState; 5] = [ - DbInstanceState(ApiInstanceState::Starting), - DbInstanceState(ApiInstanceState::Stopping), - DbInstanceState(ApiInstanceState::Creating), - DbInstanceState(ApiInstanceState::Rebooting), - DbInstanceState(ApiInstanceState::Migrating), +pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbVmmState; 4] = [ + DbVmmState::Starting, + DbVmmState::Stopping, + DbVmmState::Rebooting, + DbVmmState::Migrating, ]; /// The maximum number of disks that can be attached to an instance. diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index 69c1827b6d..e7ce4ca61a 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -41,26 +41,26 @@ use uuid::Uuid; // States an instance must be in to operate on its network interfaces, in // most situations. -static INSTANCE_STOPPED: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Stopped)); +const INSTANCE_STOPPED: db::model::InstanceState = + db::model::InstanceState::NoVmm; -static INSTANCE_FAILED: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Failed)); +const INSTANCE_FAILED: db::model::InstanceState = + db::model::InstanceState::Failed; // An instance can be in the creating state while we manipulate its // interfaces. The intention is for this only to be the case during sagas. -static INSTANCE_CREATING: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Creating)); +const INSTANCE_CREATING: db::model::InstanceState = + db::model::InstanceState::Creating; // A sentinel value for the instance state when the instance actually does // not exist. -static INSTANCE_DESTROYED: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Destroyed)); +const INSTANCE_DESTROYED: db::model::InstanceState = + db::model::InstanceState::Destroyed; // A sentinel value for the instance state when the instance has an active // VMM, irrespective of that VMM's actual state. -static INSTANCE_RUNNING: Lazy = - Lazy::new(|| db::model::InstanceState(external::InstanceState::Running)); +const INSTANCE_RUNNING: db::model::InstanceState = + db::model::InstanceState::Vmm; static NO_INSTANCE_SENTINEL_STRING: Lazy = Lazy::new(|| String::from(NO_INSTANCE_SENTINEL)); @@ -1853,6 +1853,7 @@ mod tests { use crate::db::model; use crate::db::model::IncompleteNetworkInterface; use crate::db::model::Instance; + use crate::db::model::InstanceState; use crate::db::model::NetworkInterface; use crate::db::model::Project; use crate::db::model::VpcSubnet; @@ -1929,55 +1930,29 @@ mod tests { db_datastore: &DataStore, ) -> Instance { let instance = create_instance(opctx, project_id, db_datastore).await; - instance_set_state( - db_datastore, - instance, - external::InstanceState::Stopped, - ) - .await + instance_set_state(db_datastore, instance, InstanceState::NoVmm).await } async fn instance_set_state( db_datastore: &DataStore, mut instance: Instance, - state: external::InstanceState, + state: InstanceState, ) -> Instance { - let new_runtime = model::InstanceRuntimeState { - nexus_state: model::InstanceState::new(state), - gen: instance.runtime_state.gen.next().into(), - ..instance.runtime_state.clone() + let propolis_id = match state { + InstanceState::Vmm => Some(Uuid::new_v4()), + _ => None, }; - let res = db_datastore - .instance_update_runtime(&instance.id(), &new_runtime) - .await; - assert!(matches!(res, Ok(true)), "Failed to change instance state"); - instance.runtime_state = new_runtime; - instance - } - /// Sets or clears the active Propolis ID in the supplied instance record. - /// This can be used to exercise the "does this instance have an active - /// VMM?" test that determines in part whether an instance's network - /// interfaces can change. - /// - /// Note that this routine does not construct a VMM record for the - /// corresponding ID, so any functions that expect such a record to exist - /// will fail in strange and exciting ways. - async fn instance_set_active_vmm( - db_datastore: &DataStore, - mut instance: Instance, - propolis_id: Option, - ) -> Instance { let new_runtime = model::InstanceRuntimeState { + nexus_state: state, propolis_id, gen: instance.runtime_state.gen.next().into(), ..instance.runtime_state.clone() }; - let res = db_datastore .instance_update_runtime(&instance.id(), &new_runtime) .await; - assert!(matches!(res, Ok(true)), "Failed to change instance VMM ref"); + assert!(matches!(res, Ok(true)), "Failed to change instance state"); instance.runtime_state = new_runtime; instance } @@ -2102,13 +2077,13 @@ mod tests { &self.db_datastore, ) .await, - external::InstanceState::Stopped, + InstanceState::NoVmm, ) .await } async fn create_running_instance(&self) -> Instance { - let instance = instance_set_state( + instance_set_state( &self.db_datastore, create_instance( &self.opctx, @@ -2116,14 +2091,7 @@ mod tests { &self.db_datastore, ) .await, - external::InstanceState::Starting, - ) - .await; - - instance_set_active_vmm( - &self.db_datastore, - instance, - Some(Uuid::new_v4()), + InstanceState::Vmm, ) .await } diff --git a/nexus/src/app/background/abandoned_vmm_reaper.rs b/nexus/src/app/background/abandoned_vmm_reaper.rs index b24c543575..4685012e28 100644 --- a/nexus/src/app/background/abandoned_vmm_reaper.rs +++ b/nexus/src/app/background/abandoned_vmm_reaper.rs @@ -219,15 +219,14 @@ mod tests { use chrono::Utc; use nexus_db_model::ByteCount; use nexus_db_model::Generation; - use nexus_db_model::InstanceState; use nexus_db_model::Resources; use nexus_db_model::SledResource; use nexus_db_model::SledResourceKind; use nexus_db_model::Vmm; use nexus_db_model::VmmRuntimeState; + use nexus_db_model::VmmState; use nexus_test_utils::resource_helpers; use nexus_test_utils_macros::nexus_test; - use omicron_common::api::external::InstanceState as ApiInstanceState; use uuid::Uuid; type ControlPlaneTestContext = @@ -269,9 +268,7 @@ mod tests { propolis_ip: "::1".parse().unwrap(), propolis_port: 12345.into(), runtime: VmmRuntimeState { - state: InstanceState::new( - ApiInstanceState::Destroyed - ), + state: VmmState::Destroyed, time_state_updated: Utc::now(), gen: Generation::new(), } diff --git a/nexus/src/app/background/instance_watcher.rs b/nexus/src/app/background/instance_watcher.rs index d473ea8e99..c4eda68594 100644 --- a/nexus/src/app/background/instance_watcher.rs +++ b/nexus/src/app/background/instance_watcher.rs @@ -142,7 +142,7 @@ impl InstanceWatcher { let new_runtime_state: SledInstanceState = state.into(); check.outcome = - CheckOutcome::Success(new_runtime_state.vmm_state.state); + CheckOutcome::Success(new_runtime_state.vmm_state.state.into()); slog::debug!( opctx.log, "updating instance state"; diff --git a/nexus/src/app/instance.rs b/nexus/src/app/instance.rs index 63b080b436..1132f1f5b8 100644 --- a/nexus/src/app/instance.rs +++ b/nexus/src/app/instance.rs @@ -20,6 +20,7 @@ use futures::future::Fuse; use futures::{FutureExt, SinkExt, StreamExt}; use nexus_db_model::IpAttachState; use nexus_db_model::IpKind; +use nexus_db_model::VmmState as DbVmmState; use nexus_db_queries::authn; use nexus_db_queries::authz; use nexus_db_queries::context::OpContext; @@ -43,6 +44,7 @@ use omicron_common::api::external::LookupResult; use omicron_common::api::external::NameOrId; use omicron_common::api::external::UpdateResult; use omicron_common::api::internal::nexus; +use omicron_common::api::internal::nexus::VmmState; use omicron_common::api::internal::shared::SourceNatConfig; use propolis_client::support::tungstenite::protocol::frame::coding::CloseCode; use propolis_client::support::tungstenite::protocol::CloseFrame; @@ -477,7 +479,7 @@ impl super::Nexus { let (instance, vmm) = (state.instance(), state.vmm()); if vmm.is_none() - || vmm.as_ref().unwrap().runtime.state.0 != InstanceState::Running + || vmm.as_ref().unwrap().runtime.state != DbVmmState::Running { return Err(Error::invalid_request( "instance must be running before it can migrate", @@ -707,16 +709,16 @@ impl super::Nexus { let (instance, vmm) = (state.instance(), state.vmm()); if let Some(vmm) = vmm { - match vmm.runtime.state.0 { - InstanceState::Starting - | InstanceState::Running - | InstanceState::Rebooting => { + match vmm.runtime.state { + DbVmmState::Starting + | DbVmmState::Running + | DbVmmState::Rebooting => { debug!(self.log, "asked to start an active instance"; "instance_id" => %authz_instance.id()); return Ok(state); } - InstanceState::Stopped => { + DbVmmState::Stopped => { let propolis_id = instance .runtime() .propolis_id @@ -733,7 +735,7 @@ impl super::Nexus { _ => { return Err(Error::conflict(&format!( "instance is in state {} but must be {} to be started", - vmm.runtime.state.0, + vmm.runtime.state, InstanceState::Stopped ))); } @@ -841,9 +843,9 @@ impl super::Nexus { requested: &InstanceStateChangeRequest, ) -> Result { let effective_state = if let Some(vmm) = vmm_state { - vmm.runtime.state.0 + vmm.runtime.state.into() } else { - instance_state.runtime().nexus_state.0 + instance_state.runtime().nexus_state.into() }; // Requests that operate on active instances have to be directed to the @@ -1362,7 +1364,7 @@ impl super::Nexus { "error" => ?reason); let new_runtime = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Failed), + nexus_state: db::model::InstanceState::Failed, // TODO(#4226): Clearing the Propolis ID is required to allow the // instance to be deleted, but this doesn't actually terminate the @@ -1647,24 +1649,22 @@ impl super::Nexus { let (instance, vmm) = (state.instance(), state.vmm()); if let Some(vmm) = vmm { - match vmm.runtime.state.0 { - InstanceState::Running - | InstanceState::Rebooting - | InstanceState::Migrating - | InstanceState::Repairing => { + match vmm.runtime.state { + DbVmmState::Running + | DbVmmState::Rebooting + | DbVmmState::Migrating => { Ok(SocketAddr::new(vmm.propolis_ip.ip(), vmm.propolis_port.into())) } - InstanceState::Creating - | InstanceState::Starting - | InstanceState::Stopping - | InstanceState::Stopped - | InstanceState::Failed => { + DbVmmState::Starting + | DbVmmState::Stopping + | DbVmmState::Stopped + | DbVmmState::Failed => { Err(Error::invalid_request(format!( "cannot connect to serial console of instance in state \"{}\"", - vmm.runtime.state.0, + vmm.runtime.state, ))) } - InstanceState::Destroyed => Err(Error::invalid_request( + DbVmmState::Destroyed => Err(Error::invalid_request( "cannot connect to serial console of destroyed instance", )), } @@ -2092,7 +2092,7 @@ pub(crate) async fn notify_instance_updated( if result.is_ok() { let propolis_terminated = matches!( new_runtime_state.vmm_state.state, - InstanceState::Destroyed | InstanceState::Failed + VmmState::Destroyed | VmmState::Failed ); if propolis_terminated { diff --git a/nexus/src/app/sagas/instance_common.rs b/nexus/src/app/sagas/instance_common.rs index b941739393..ba9854c146 100644 --- a/nexus/src/app/sagas/instance_common.rs +++ b/nexus/src/app/sagas/instance_common.rs @@ -9,14 +9,12 @@ use std::net::{IpAddr, Ipv6Addr}; use crate::Nexus; use chrono::Utc; use nexus_db_model::{ - ByteCount, ExternalIp, IpAttachState, Ipv4NatEntry, - SledReservationConstraints, SledResource, + ByteCount, ExternalIp, InstanceState, IpAttachState, Ipv4NatEntry, + SledReservationConstraints, SledResource, VmmState, }; use nexus_db_queries::authz; use nexus_db_queries::db::lookup::LookupPath; -use nexus_db_queries::db::queries::external_ip::SAFE_TRANSIENT_INSTANCE_STATES; use nexus_db_queries::{authn, context::OpContext, db, db::DataStore}; -use omicron_common::api::external::InstanceState; use omicron_common::api::external::{Error, NameOrId}; use serde::{Deserialize, Serialize}; use steno::ActionError; @@ -125,7 +123,7 @@ pub async fn destroy_vmm_record( prev_record: &db::model::Vmm, ) -> Result<(), anyhow::Error> { let new_runtime = db::model::VmmRuntimeState { - state: db::model::InstanceState(InstanceState::Destroyed), + state: db::model::VmmState::Destroyed, time_state_updated: Utc::now(), gen: prev_record.runtime.gen.next().into(), }; @@ -201,6 +199,16 @@ pub async fn instance_ip_move_state( } } +/// Yields the sled on which an instance is found to be running so that IP +/// attachment and detachment operations can be propagated there. +/// +/// # Preconditions +/// +/// To synchronize correctly with other concurrent operations on an instance, +/// the calling saga must have placed the IP it is attaching or detaching into +/// the Attaching or Detaching state so that concurrent attempts to start the +/// instance will notice that the IP state is in flux and ask the caller to +/// retry. pub async fn instance_ip_get_instance_state( sagactx: &NexusActionContext, serialized_authn: &authn::saga::Serialized, @@ -220,15 +228,20 @@ pub async fn instance_ip_get_instance_state( .await .map_err(ActionError::action_failed)?; - let found_state = inst_and_vmm.instance().runtime_state.nexus_state.0; + let found_vmm_state = + inst_and_vmm.vmm().as_ref().map(|vmm| vmm.runtime.state); + let found_instance_state = + inst_and_vmm.instance().runtime_state.nexus_state; let mut sled_id = inst_and_vmm.sled_id(); + slog::debug!( + osagactx.log(), "evaluating instance state for IP attach/detach"; + "instance_state" => ?found_instance_state, + "vmm_state" => ?found_vmm_state + ); + // Arriving here means we started in a correct state (running/stopped). // We need to consider how we interact with the other sagas/ops: - // - starting: our claim on an IP will block it from moving past - // DPD_ensure and instance_start will undo. If we complete - // before then, it can move past and will fill in routes/opte. - // Act as though we have no sled_id. // - stopping: this is not sagaized, and the propolis/sled-agent might // go away. Act as though stopped if we catch it here, // otherwise convert OPTE ensure to 'service unavailable' @@ -236,30 +249,91 @@ pub async fn instance_ip_get_instance_state( // - deleting: can only be called from stopped -- we won't push to dpd // or sled-agent, and IP record might be deleted or forcibly // detached. Catch here just in case. - match found_state { - InstanceState::Stopped - | InstanceState::Starting - | InstanceState::Stopping => { + // - starting: see below. + match (found_instance_state, found_vmm_state) { + // If there's no VMM, the instance is definitely not on any sled. + (InstanceState::NoVmm, _) => { sled_id = None; } - InstanceState::Running => {} - state if SAFE_TRANSIENT_INSTANCE_STATES.contains(&state.into()) => { + + // If the instance is running normally or rebooting, it's resident on + // the sled given by its VMM record. + ( + InstanceState::Vmm, + Some(VmmState::Running) | Some(VmmState::Rebooting), + ) => {} + + // If the VMM is in the Stopping, Migrating, or Starting states, its + // sled assignment is in doubt, so report a transient state error and + // ask the caller to retry. + // + // Although an instance with a Starting VMM has a sled assignment, + // there's no way to tell at this point whether or not there's a + // concurrent instance-start saga that has passed the point where it + // sends IP assignments to the instance's new sled: + // + // - If the start saga is still in progress and hasn't pushed any IP + // information to the instance's new sled yet, then either of two + // things can happen: + // - This function's caller can finish modifying IPs before the start + // saga propagates IP information to the sled. In this case the + // calling saga should do nothing--the start saga will send the + // right IP set to the sled. + // - If the start saga "wins" the race, it will see that the instance + // still has an attaching/detaching IP and bail out. + // - If the start saga is already done, and Nexus is just waiting for + // the VMM to report that it's Running, the calling saga needs to + // send the IP change to the instance's sled. + // + // There's no way to distinguish these cases, so if a VMM is Starting, + // block the attach/detach. + ( + InstanceState::Vmm, + Some(state @ VmmState::Starting) + | Some(state @ VmmState::Migrating) + | Some(state @ VmmState::Stopping) + | Some(state @ VmmState::Stopped), + ) => { return Err(ActionError::action_failed(Error::unavail(&format!( "can't {verb} in transient state {state}" - )))) + )))); } - InstanceState::Destroyed => { + (InstanceState::Destroyed, _) => { return Err(ActionError::action_failed(Error::not_found_by_id( omicron_common::api::external::ResourceType::Instance, &authz_instance.id(), ))) } - // Final cases are repairing/failed. - _ => { + (InstanceState::Creating, _) => { + return Err(ActionError::action_failed(Error::invalid_request( + "cannot modify instance IPs, instance is still being created", + ))) + } + (InstanceState::Failed, _) + | (InstanceState::Vmm, Some(VmmState::Failed)) => { return Err(ActionError::action_failed(Error::invalid_request( "cannot modify instance IPs, instance is in unhealthy state", ))) } + + // This case represents an inconsistency in the database. It should + // never happen, but don't blow up Nexus if it somehow does. + (InstanceState::Vmm, None) => { + return Err(ActionError::action_failed(Error::internal_error( + &format!( + "instance {} is in the 'VMM' state but has no VMM ID", + authz_instance.id(), + ), + ))); + } + (InstanceState::Vmm, Some(VmmState::Destroyed)) => { + return Err(ActionError::action_failed(Error::internal_error( + &format!( + "instance {} points to destroyed VMM", + authz_instance.id(), + ), + ))); + } } Ok(sled_id) diff --git a/nexus/src/app/sagas/instance_create.rs b/nexus/src/app/sagas/instance_create.rs index a6771f65a0..f336a01f0c 100644 --- a/nexus/src/app/sagas/instance_create.rs +++ b/nexus/src/app/sagas/instance_create.rs @@ -18,7 +18,6 @@ use nexus_db_queries::{authn, authz, db}; use nexus_defaults::DEFAULT_PRIMARY_NIC_NAME; use nexus_types::external_api::params::InstanceDiskAttachment; use omicron_common::api::external::IdentityMetadataCreateParams; -use omicron_common::api::external::InstanceState; use omicron_common::api::external::Name; use omicron_common::api::external::NameOrId; use omicron_common::api::external::{Error, InternalContext}; @@ -994,7 +993,7 @@ async fn sic_delete_instance_record( }; let runtime_state = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Failed), + nexus_state: db::model::InstanceState::Failed, // Must update the generation, or the database query will fail. // // The runtime state of the instance record is only changed as a result @@ -1029,13 +1028,13 @@ async fn sic_move_to_stopped( let instance_record = sagactx.lookup::("instance_record")?; - // Create a new generation of the isntance record with the Stopped state and + // Create a new generation of the instance record with the no-VMM state and // try to write it back to the database. If this node is replayed, or the // instance has already changed state by the time this step is reached, this // update will (correctly) be ignored because its generation number is out // of date. let new_state = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Stopped), + nexus_state: db::model::InstanceState::NoVmm, gen: db::model::Generation::from( instance_record.runtime_state.gen.next(), ), diff --git a/nexus/src/app/sagas/instance_ip_attach.rs b/nexus/src/app/sagas/instance_ip_attach.rs index 3332b71274..f8edf37dc4 100644 --- a/nexus/src/app/sagas/instance_ip_attach.rs +++ b/nexus/src/app/sagas/instance_ip_attach.rs @@ -420,6 +420,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { let params = new_test_params(&opctx, datastore, use_float).await; @@ -509,6 +515,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { test_helpers::action_failure_can_unwind::( nexus, @@ -535,6 +547,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { test_helpers::action_failure_can_unwind_idempotently::< SagaInstanceIpAttach, @@ -561,9 +579,15 @@ pub(crate) mod test { let opctx = test_helpers::test_opctx(cptestctx); let datastore = &nexus.db_datastore; let _project_id = ip_manip_test_setup(&client).await; - let _instance = + let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + for use_float in [false, true] { let params = new_test_params(&opctx, datastore, use_float).await; let dag = create_saga_dag::(params).unwrap(); diff --git a/nexus/src/app/sagas/instance_ip_detach.rs b/nexus/src/app/sagas/instance_ip_detach.rs index 2f1d76c853..9625d77bf9 100644 --- a/nexus/src/app/sagas/instance_ip_detach.rs +++ b/nexus/src/app/sagas/instance_ip_detach.rs @@ -391,6 +391,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { @@ -484,6 +490,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { @@ -512,6 +524,12 @@ pub(crate) mod test { let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { @@ -540,9 +558,15 @@ pub(crate) mod test { let opctx = test_helpers::test_opctx(cptestctx); let datastore = &nexus.db_datastore; let _project_id = ip_manip_test_setup(&client).await; - let _instance = + let instance = create_instance(client, PROJECT_NAME, INSTANCE_NAME).await; + crate::app::sagas::test_helpers::instance_simulate( + cptestctx, + &instance.identity.id, + ) + .await; + attach_instance_ips(nexus, &opctx).await; for use_float in [false, true] { diff --git a/nexus/src/app/sagas/instance_migrate.rs b/nexus/src/app/sagas/instance_migrate.rs index 1cfd170faf..db1d838014 100644 --- a/nexus/src/app/sagas/instance_migrate.rs +++ b/nexus/src/app/sagas/instance_migrate.rs @@ -755,8 +755,8 @@ mod tests { let new_instance = new_state.instance(); let new_vmm = new_state.vmm().as_ref(); assert_eq!( - new_instance.runtime().nexus_state.0, - omicron_common::api::external::InstanceState::Stopped + new_instance.runtime().nexus_state, + nexus_db_model::InstanceState::NoVmm, ); assert!(new_instance.runtime().propolis_id.is_none()); assert!(new_vmm.is_none()); diff --git a/nexus/src/app/sagas/instance_start.rs b/nexus/src/app/sagas/instance_start.rs index e7caedfc9c..d67ff02c20 100644 --- a/nexus/src/app/sagas/instance_start.rs +++ b/nexus/src/app/sagas/instance_start.rs @@ -16,7 +16,7 @@ use crate::app::sagas::declare_saga_actions; use chrono::Utc; use nexus_db_queries::db::{identity::Resource, lookup::LookupPath}; use nexus_db_queries::{authn, authz, db}; -use omicron_common::api::external::{Error, InstanceState}; +use omicron_common::api::external::Error; use serde::{Deserialize, Serialize}; use slog::info; use steno::{ActionError, Node}; @@ -260,9 +260,7 @@ async fn sis_move_to_starting( // be running before Propolis thinks it has started.) None => { let new_runtime = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new( - InstanceState::Running, - ), + nexus_state: db::model::InstanceState::Vmm, propolis_id: Some(propolis_id), time_updated: Utc::now(), gen: db_instance.runtime().gen.next().into(), @@ -300,7 +298,7 @@ async fn sis_move_to_starting_undo( "instance_id" => %instance_id); let new_runtime = db::model::InstanceRuntimeState { - nexus_state: db::model::InstanceState::new(InstanceState::Stopped), + nexus_state: db::model::InstanceState::NoVmm, propolis_id: None, gen: db_instance.runtime_state.gen.next().into(), ..db_instance.runtime_state @@ -762,10 +760,9 @@ mod test { .as_ref() .expect("running instance should have a vmm") .runtime - .state - .0; + .state; - assert_eq!(vmm_state, InstanceState::Running); + assert_eq!(vmm_state, nexus_db_model::VmmState::Running); } #[nexus_test(server = crate::Server)] @@ -818,8 +815,8 @@ mod test { assert!(new_db_instance.runtime().propolis_id.is_none()); assert_eq!( - new_db_instance.runtime().nexus_state.0, - InstanceState::Stopped + new_db_instance.runtime().nexus_state, + nexus_db_model::InstanceState::NoVmm ); assert!(test_helpers::no_virtual_provisioning_resource_records_exist(cptestctx).await); @@ -861,10 +858,9 @@ mod test { .as_ref() .expect("running instance should have a vmm") .runtime - .state - .0; + .state; - assert_eq!(vmm_state, InstanceState::Running); + assert_eq!(vmm_state, nexus_db_model::VmmState::Running); } /// Tests that if a start saga unwinds because sled agent returned failure @@ -930,7 +926,7 @@ mod test { assert_eq!( db_instance.instance().runtime_state.nexus_state, - nexus_db_model::InstanceState(InstanceState::Stopped) + nexus_db_model::InstanceState::NoVmm ); assert!(db_instance.vmm().is_none()); diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index f8b56b3522..41e1793fab 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -2041,13 +2041,9 @@ mod test { .as_ref() .expect("running instance should have a sled") .runtime - .state - .0; + .state; - assert_eq!( - new_state, - omicron_common::api::external::InstanceState::Running - ); + assert_eq!(new_state, nexus_db_model::VmmState::Running); instance_state } diff --git a/nexus/tests/integration_tests/schema.rs b/nexus/tests/integration_tests/schema.rs index 89d2e274c5..bf73855ea7 100644 --- a/nexus/tests/integration_tests/schema.rs +++ b/nexus/tests/integration_tests/schema.rs @@ -1021,7 +1021,6 @@ async fn dbinit_equals_sum_of_all_up() { observed_schema.pretty_assert_eq(&expected_schema); assert_eq!(observed_data, expected_data); - crdb.cleanup().await.unwrap(); logctx.cleanup_successful(); } @@ -1054,6 +1053,17 @@ const SLED2: Uuid = Uuid::from_u128(0x2222513d_5c3d_4647_83b0_8f3515da7be1); // "7AC4" -> "Rack" const RACK1: Uuid = Uuid::from_u128(0x11117ac4_5c3d_4647_83b0_8f3515da7be1); +// "6701" -> "Proj"ect +const PROJECT: Uuid = Uuid::from_u128(0x11116701_5c3d_4647_83b0_8f3515da7be1); + +// "1257" -> "Inst"ance +const INSTANCE1: Uuid = Uuid::from_u128(0x11111257_5c3d_4647_83b0_8f3515da7be1); +const INSTANCE2: Uuid = Uuid::from_u128(0x22221257_5c3d_4647_83b0_8f3515da7be1); +const INSTANCE3: Uuid = Uuid::from_u128(0x33331257_5c3d_4647_83b0_8f3515da7be1); + +// "67060115" -> "Prop"olis +const PROPOLIS: Uuid = Uuid::from_u128(0x11116706_5c3d_4647_83b0_8f3515da7be1); + fn before_23_0_0(client: &Client) -> BoxFuture<'_, ()> { Box::pin(async move { // Create two silos @@ -1219,6 +1229,93 @@ fn after_37_0_1(client: &Client) -> BoxFuture<'_, ()> { }) } +fn before_70_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async move { + client + .batch_execute(&format!( + " + INSERT INTO instance (id, name, description, time_created, + time_modified, time_deleted, project_id, user_data, state, + time_state_updated, state_generation, active_propolis_id, + target_propolis_id, migration_id, ncpus, memory, hostname, + boot_on_fault, updater_id, updater_gen) VALUES + + ('{INSTANCE1}', 'inst1', '', now(), now(), NULL, '{PROJECT}', '', + 'stopped', now(), 1, NULL, NULL, NULL, 2, 1073741824, 'inst1', false, + NULL, 1), + ('{INSTANCE2}', 'inst2', '', now(), now(), NULL, '{PROJECT}', '', + 'running', now(), 1, '{PROPOLIS}', NULL, NULL, 2, 1073741824, 'inst2', + false, NULL, 1), + ('{INSTANCE3}', 'inst3', '', now(), now(), NULL, '{PROJECT}', '', + 'failed', now(), 1, NULL, NULL, NULL, 2, 1073741824, 'inst3', false, + NULL, 1); + " + )) + .await + .expect("failed to create instances"); + + client + .batch_execute(&format!( + " + INSERT INTO vmm (id, time_created, time_deleted, instance_id, state, + time_state_updated, state_generation, sled_id, propolis_ip, + propolis_port) VALUES + + ('{PROPOLIS}', now(), NULL, '{INSTANCE2}', 'running', now(), 1, + '{SLED1}', 'fd00:1122:3344:200::1', '12400'); + " + )) + .await + .expect("failed to create VMMs"); + }) +} + +fn after_70_0_0(client: &Client) -> BoxFuture<'_, ()> { + Box::pin(async { + let rows = client + .query("SELECT state FROM instance ORDER BY id", &[]) + .await + .expect("failed to load instance states"); + let instance_states = process_rows(&rows); + + assert_eq!( + instance_states[0].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("instance_state_v2", "no_vmm")) + )] + ); + assert_eq!( + instance_states[1].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("instance_state_v2", "vmm")) + )] + ); + assert_eq!( + instance_states[2].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("instance_state_v2", "failed")) + )] + ); + + let rows = client + .query("SELECT state FROM vmm ORDER BY id", &[]) + .await + .expect("failed to load VMM states"); + let vmm_states = process_rows(&rows); + + assert_eq!( + vmm_states[0].values, + vec![ColumnValue::new( + "state", + SqlEnum::from(("vmm_state", "running")) + )] + ); + }) +} + // Lazily initializes all migration checks. The combination of Rust function // pointers and async makes defining a static table fairly painful, so we're // using lazy initialization instead. @@ -1240,6 +1337,10 @@ fn get_migration_checks() -> BTreeMap { SemverVersion(semver::Version::parse("37.0.1").unwrap()), DataMigrationFns { before: None, after: after_37_0_1 }, ); + map.insert( + SemverVersion(semver::Version::parse("70.0.0").unwrap()), + DataMigrationFns { before: Some(before_70_0_0), after: after_70_0_0 }, + ); map } diff --git a/openapi/nexus-internal.json b/openapi/nexus-internal.json index 828378eaba..637334483d 100644 --- a/openapi/nexus-internal.json +++ b/openapi/nexus-internal.json @@ -3179,81 +3179,6 @@ "time_updated" ] }, - "InstanceState": { - "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", - "oneOf": [ - { - "description": "The instance is being created.", - "type": "string", - "enum": [ - "creating" - ] - }, - { - "description": "The instance is currently starting up.", - "type": "string", - "enum": [ - "starting" - ] - }, - { - "description": "The instance is currently running.", - "type": "string", - "enum": [ - "running" - ] - }, - { - "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", - "type": "string", - "enum": [ - "stopping" - ] - }, - { - "description": "The instance is currently stopped.", - "type": "string", - "enum": [ - "stopped" - ] - }, - { - "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", - "type": "string", - "enum": [ - "rebooting" - ] - }, - { - "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", - "type": "string", - "enum": [ - "migrating" - ] - }, - { - "description": "The instance is attempting to recover from a failure.", - "type": "string", - "enum": [ - "repairing" - ] - }, - { - "description": "The instance has encountered a failure.", - "type": "string", - "enum": [ - "failed" - ] - }, - { - "description": "The instance has been deleted.", - "type": "string", - "enum": [ - "destroyed" - ] - } - ] - }, "IpKind": { "type": "string", "enum": [ @@ -4996,7 +4921,7 @@ "description": "The last state reported by this VMM.", "allOf": [ { - "$ref": "#/components/schemas/InstanceState" + "$ref": "#/components/schemas/VmmState" } ] }, @@ -5012,6 +4937,67 @@ "time_updated" ] }, + "VmmState": { + "description": "One of the states that a VMM can be in.", + "oneOf": [ + { + "description": "The VMM is initializing and has not started running guest CPUs yet.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The VMM has finished initializing and may be running guest CPUs.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The VMM is shutting down.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The VMM's guest has stopped, and the guest will not run again, but the VMM process may not have released all of its resources yet.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The VMM is being restarted or its guest OS is rebooting.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The VMM is part of a live migration.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The VMM process reported an internal failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The VMM process has been destroyed and its resources have been released.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index b975f16484..68513345e2 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -3138,81 +3138,6 @@ "time_updated" ] }, - "InstanceState": { - "description": "Running state of an Instance (primarily: booted or stopped)\n\nThis typically reflects whether it's starting, running, stopping, or stopped, but also includes states related to the Instance's lifecycle", - "oneOf": [ - { - "description": "The instance is being created.", - "type": "string", - "enum": [ - "creating" - ] - }, - { - "description": "The instance is currently starting up.", - "type": "string", - "enum": [ - "starting" - ] - }, - { - "description": "The instance is currently running.", - "type": "string", - "enum": [ - "running" - ] - }, - { - "description": "The instance has been requested to stop and a transition to \"Stopped\" is imminent.", - "type": "string", - "enum": [ - "stopping" - ] - }, - { - "description": "The instance is currently stopped.", - "type": "string", - "enum": [ - "stopped" - ] - }, - { - "description": "The instance is in the process of rebooting - it will remain in the \"rebooting\" state until the VM is starting once more.", - "type": "string", - "enum": [ - "rebooting" - ] - }, - { - "description": "The instance is in the process of migrating - it will remain in the \"migrating\" state until the migration process is complete and the destination propolis is ready to continue execution.", - "type": "string", - "enum": [ - "migrating" - ] - }, - { - "description": "The instance is attempting to recover from a failure.", - "type": "string", - "enum": [ - "repairing" - ] - }, - { - "description": "The instance has encountered a failure.", - "type": "string", - "enum": [ - "failed" - ] - }, - { - "description": "The instance has been deleted.", - "type": "string", - "enum": [ - "destroyed" - ] - } - ] - }, "InstanceStateRequested": { "description": "Requestable running state of an Instance.\n\nA subset of [`omicron_common::api::external::InstanceState`].", "oneOf": [ @@ -4560,7 +4485,7 @@ "description": "The last state reported by this VMM.", "allOf": [ { - "$ref": "#/components/schemas/InstanceState" + "$ref": "#/components/schemas/VmmState" } ] }, @@ -4576,6 +4501,67 @@ "time_updated" ] }, + "VmmState": { + "description": "One of the states that a VMM can be in.", + "oneOf": [ + { + "description": "The VMM is initializing and has not started running guest CPUs yet.", + "type": "string", + "enum": [ + "starting" + ] + }, + { + "description": "The VMM has finished initializing and may be running guest CPUs.", + "type": "string", + "enum": [ + "running" + ] + }, + { + "description": "The VMM is shutting down.", + "type": "string", + "enum": [ + "stopping" + ] + }, + { + "description": "The VMM's guest has stopped, and the guest will not run again, but the VMM process may not have released all of its resources yet.", + "type": "string", + "enum": [ + "stopped" + ] + }, + { + "description": "The VMM is being restarted or its guest OS is rebooting.", + "type": "string", + "enum": [ + "rebooting" + ] + }, + { + "description": "The VMM is part of a live migration.", + "type": "string", + "enum": [ + "migrating" + ] + }, + { + "description": "The VMM process reported an internal failure.", + "type": "string", + "enum": [ + "failed" + ] + }, + { + "description": "The VMM process has been destroyed and its resources have been released.", + "type": "string", + "enum": [ + "destroyed" + ] + } + ] + }, "Vni": { "description": "A Geneve Virtual Network Identifier", "type": "integer", diff --git a/schema/crdb/dbinit.sql b/schema/crdb/dbinit.sql index b759f86f1b..7bda66c5f2 100644 --- a/schema/crdb/dbinit.sql +++ b/schema/crdb/dbinit.sql @@ -954,15 +954,33 @@ CREATE UNIQUE INDEX IF NOT EXISTS lookup_project_by_silo ON omicron.public.proje * Instances */ -CREATE TYPE IF NOT EXISTS omicron.public.instance_state AS ENUM ( +CREATE TYPE IF NOT EXISTS omicron.public.instance_state_v2 AS ENUM ( + /* The instance exists in the DB but its create saga is still in flight. */ 'creating', + + /* + * The instance has no active VMM. Corresponds to the "stopped" external + * state. + */ + 'no_vmm', + + /* The instance's state is derived from its active VMM's state. */ + 'vmm', + + /* Something bad happened while trying to interact with the instance. */ + 'failed', + + /* The instance has been destroyed. */ + 'destroyed' +); + +CREATE TYPE IF NOT EXISTS omicron.public.vmm_state AS ENUM ( 'starting', 'running', 'stopping', 'stopped', 'rebooting', 'migrating', - 'repairing', 'failed', 'destroyed' ); @@ -989,8 +1007,7 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( /* user data for instance initialization systems (e.g. cloud-init) */ user_data BYTES NOT NULL, - /* The state of the instance when it has no active VMM. */ - state omicron.public.instance_state NOT NULL, + /* The last-updated time and generation for the instance's state. */ time_state_updated TIMESTAMPTZ NOT NULL, state_generation INT NOT NULL, @@ -1014,8 +1031,21 @@ CREATE TABLE IF NOT EXISTS omicron.public.instance ( updater_id UUID, /* Generation of the instance updater lock */ - updater_gen INT NOT NULL DEFAULT 0 + updater_gen INT NOT NULL DEFAULT 0, + /* + * The internal instance state. If this is 'vmm', the externally-visible + * instance state is derived from its active VMM's state. This column is + * distant from its generation number and update time because it is + * deleted and recreated by the schema upgrade process; see the + * `separate-instance-and-vmm-states` schema change for details. + */ + state omicron.public.instance_state_v2 NOT NULL, + + CONSTRAINT vmm_iff_active_propolis CHECK ( + ((state = 'vmm') AND (active_propolis_id IS NOT NULL)) OR + ((state != 'vmm') AND (active_propolis_id IS NULL)) + ) ); -- Names for instances within a project should be unique @@ -3477,12 +3507,12 @@ CREATE TABLE IF NOT EXISTS omicron.public.vmm ( time_created TIMESTAMPTZ NOT NULL, time_deleted TIMESTAMPTZ, instance_id UUID NOT NULL, - state omicron.public.instance_state NOT NULL, time_state_updated TIMESTAMPTZ NOT NULL, state_generation INT NOT NULL, sled_id UUID NOT NULL, propolis_ip INET NOT NULL, - propolis_port INT4 NOT NULL CHECK (propolis_port BETWEEN 0 AND 65535) DEFAULT 12400 + propolis_port INT4 NOT NULL CHECK (propolis_port BETWEEN 0 AND 65535) DEFAULT 12400, + state omicron.public.vmm_state NOT NULL ); CREATE INDEX IF NOT EXISTS lookup_vmms_by_sled_id ON omicron.public.vmm ( @@ -4045,7 +4075,7 @@ INSERT INTO omicron.public.db_metadata ( version, target_version ) VALUES - (TRUE, NOW(), NOW(), '69.0.0', NULL) + (TRUE, NOW(), NOW(), '70.0.0', NULL) ON CONFLICT DO NOTHING; COMMIT; diff --git a/schema/crdb/separate-instance-and-vmm-states/README.adoc b/schema/crdb/separate-instance-and-vmm-states/README.adoc new file mode 100644 index 0000000000..f005ef0b29 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/README.adoc @@ -0,0 +1,64 @@ +# Overview + +This schema change splits the "instance state" enum that instances and VMMs +share into two enums, one for instance states and one for VMM states. Variants +used by only one of these objects only appear in the corresponding enum. This +upgrade also adds a database-level constraint that requires that an instance's +state reports that it has an active VMM if and only if it has an active Propolis +ID. + +This change is mechanically tricky for two reasons. First, the states instances +and VMMs have after an upgrade depends on the state that they have before the +upgrade. (While this upgrade is supposed to take place offline, past experience +with instances that are stuck in a non-Stopped state shows that we can't take +for granted that all instances and VMMs will be Stopped at upgrade time.) +Second, Postgres and/or CRDB don't support all the schema change primitives we +might use to deprecate the old state column. Specifically: + +* CockroachDB doesn't support altering column types without enabling an + experimental flag + (see https://github.com/cockroachdb/cockroach/issues/49329?version=v22.1). +* Postgres doesn't support removing enum variants (adding and renaming are OK), + so we can't shrink and directly reuse the existing instance state enum without + leaving a set of "reserved"/"unused" variants around. +* Even if it did, Postgres doesn't support the `IF EXISTS` qualifier for many + `ALTER TYPE` and `ALTER TABLE` statements, e.g. `ALTER TABLE RENAME COLUMN` + and `ALTER TYPE RENAME TO`. There are ways to work around this (e.g. put the + statement into a user-defined function or code block and catch the relevant + exceptions from it), but CockroachDB v22.1.9 doesn't support UDFs (support + was added in v22.2). + +These limitations make it hard to change the schema idempotently. To get around +this, the change uses the following general procedure to change a column's type +from one enum to another: + +. Create a new enum with the variants of interest. +. Create a new temporary column to hold the old object state. (Adding a column + supports `IF NOT EXISTS`). +. Copy the old object state to the temporary column. +. Drop the old column (this supports `IF EXISTS`). +. Recreate the state column with the new type. +. Populate the column's values using the data saved in the temporary column. +. Add a `NOT NULL` qualifier to the new column. +. Drop the temporary column. + +Note that deleting and recreating columns this way (instead of modfying them in +place) changes their column indices in the affected table. These columns need to +be moved to the (current) ends of the table definitions in dbinit.sql, or the +schema upgrade tests will fail. + +# Upgrade steps + +The individual transactions in this upgrade do the following: + +* `up01` and `up02` drop views that depend on the `state` column in the `vmm` + table. +* `up03` through `up10` change the `instance` table's state enum using the + procedure described above. +* `up11` through `up18` upgrade the `vmm` table. +* `up19` and `up21` recreate the views deleted at the beginning of this + procedure. +* `up20` deletes the now-unused `instance_state` enum. +* `up22` adds a constraint to the `instance` table that requires that an + instance be in the `vmm` state if and only if it has a non-NULL active + Propolis ID. diff --git a/schema/crdb/separate-instance-and-vmm-states/up01.sql b/schema/crdb/separate-instance-and-vmm-states/up01.sql new file mode 100644 index 0000000000..1d8fee0b05 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up01.sql @@ -0,0 +1 @@ +DROP VIEW IF EXISTS omicron.public.sled_instance; diff --git a/schema/crdb/separate-instance-and-vmm-states/up02.sql b/schema/crdb/separate-instance-and-vmm-states/up02.sql new file mode 100644 index 0000000000..aebe0119f5 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up02.sql @@ -0,0 +1 @@ +DROP VIEW IF EXISTS omicron.public.v2p_mapping_view; diff --git a/schema/crdb/separate-instance-and-vmm-states/up03.sql b/schema/crdb/separate-instance-and-vmm-states/up03.sql new file mode 100644 index 0000000000..d7f40f8c5c --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up03.sql @@ -0,0 +1,7 @@ +CREATE TYPE IF NOT EXISTS omicron.public.instance_state_v2 AS ENUM ( + 'creating', + 'no_vmm', + 'vmm', + 'failed', + 'destroyed' +); diff --git a/schema/crdb/separate-instance-and-vmm-states/up04.sql b/schema/crdb/separate-instance-and-vmm-states/up04.sql new file mode 100644 index 0000000000..c2e2e59191 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up04.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance ADD COLUMN IF NOT EXISTS downlevel_state omicron.public.instance_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up05.sql b/schema/crdb/separate-instance-and-vmm-states/up05.sql new file mode 100644 index 0000000000..89d57a3260 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up05.sql @@ -0,0 +1,2 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.instance SET downlevel_state = state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up06.sql b/schema/crdb/separate-instance-and-vmm-states/up06.sql new file mode 100644 index 0000000000..b3d61de712 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up06.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance DROP COLUMN IF EXISTS state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up07.sql b/schema/crdb/separate-instance-and-vmm-states/up07.sql new file mode 100644 index 0000000000..8a21a819be --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up07.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance ADD COLUMN IF NOT EXISTS state omicron.public.instance_state_v2; diff --git a/schema/crdb/separate-instance-and-vmm-states/up08.sql b/schema/crdb/separate-instance-and-vmm-states/up08.sql new file mode 100644 index 0000000000..82346ee6a4 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up08.sql @@ -0,0 +1,8 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.instance SET state = CASE + WHEN downlevel_state = 'creating' THEN 'creating' + WHEN downlevel_state = 'failed' THEN 'failed' + WHEN downlevel_state = 'destroyed' THEN 'destroyed' + WHEN active_propolis_id IS NOT NULL THEN 'vmm' + ELSE 'no_vmm' +END; diff --git a/schema/crdb/separate-instance-and-vmm-states/up09.sql b/schema/crdb/separate-instance-and-vmm-states/up09.sql new file mode 100644 index 0000000000..1036c339b8 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up09.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance ALTER COLUMN state SET NOT NULL; diff --git a/schema/crdb/separate-instance-and-vmm-states/up10.sql b/schema/crdb/separate-instance-and-vmm-states/up10.sql new file mode 100644 index 0000000000..fbce77e29c --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up10.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.instance DROP COLUMN IF EXISTS downlevel_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up11.sql b/schema/crdb/separate-instance-and-vmm-states/up11.sql new file mode 100644 index 0000000000..39aa085b6d --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up11.sql @@ -0,0 +1,10 @@ +CREATE TYPE IF NOT EXISTS omicron.public.vmm_state AS ENUM ( + 'starting', + 'running', + 'stopping', + 'stopped', + 'rebooting', + 'migrating', + 'failed', + 'destroyed' +); diff --git a/schema/crdb/separate-instance-and-vmm-states/up12.sql b/schema/crdb/separate-instance-and-vmm-states/up12.sql new file mode 100644 index 0000000000..8bfa1e7623 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up12.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm ADD COLUMN IF NOT EXISTS downlevel_state omicron.public.instance_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up13.sql b/schema/crdb/separate-instance-and-vmm-states/up13.sql new file mode 100644 index 0000000000..8f906bcb27 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up13.sql @@ -0,0 +1,2 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.vmm SET downlevel_state = state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up14.sql b/schema/crdb/separate-instance-and-vmm-states/up14.sql new file mode 100644 index 0000000000..65d2e99f38 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up14.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm DROP COLUMN IF EXISTS state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up15.sql b/schema/crdb/separate-instance-and-vmm-states/up15.sql new file mode 100644 index 0000000000..3f3f80d508 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up15.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm ADD COLUMN IF NOT EXISTS state omicron.public.vmm_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up16.sql b/schema/crdb/separate-instance-and-vmm-states/up16.sql new file mode 100644 index 0000000000..0e7b8f8a82 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up16.sql @@ -0,0 +1,2 @@ +SET LOCAL disallow_full_table_scans = off; +UPDATE omicron.public.vmm SET state = downlevel_state::text::vmm_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up17.sql b/schema/crdb/separate-instance-and-vmm-states/up17.sql new file mode 100644 index 0000000000..7cc912ba99 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up17.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm ALTER COLUMN state SET NOT NULL; diff --git a/schema/crdb/separate-instance-and-vmm-states/up18.sql b/schema/crdb/separate-instance-and-vmm-states/up18.sql new file mode 100644 index 0000000000..349f05d7ec --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up18.sql @@ -0,0 +1 @@ +ALTER TABLE omicron.public.vmm DROP COLUMN IF EXISTS downlevel_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up19.sql b/schema/crdb/separate-instance-and-vmm-states/up19.sql new file mode 100644 index 0000000000..b1a96ece52 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up19.sql @@ -0,0 +1,23 @@ +CREATE OR REPLACE VIEW omicron.public.sled_instance +AS SELECT + instance.id, + instance.name, + silo.name as silo_name, + project.name as project_name, + vmm.sled_id as active_sled_id, + instance.time_created, + instance.time_modified, + instance.migration_id, + instance.ncpus, + instance.memory, + vmm.state +FROM + omicron.public.instance AS instance + JOIN omicron.public.project AS project ON + instance.project_id = project.id + JOIN omicron.public.silo AS silo ON + project.silo_id = silo.id + JOIN omicron.public.vmm AS vmm ON + instance.active_propolis_id = vmm.id +WHERE + instance.time_deleted IS NULL AND vmm.time_deleted IS NULL; diff --git a/schema/crdb/separate-instance-and-vmm-states/up20.sql b/schema/crdb/separate-instance-and-vmm-states/up20.sql new file mode 100644 index 0000000000..7575559a2f --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up20.sql @@ -0,0 +1 @@ +DROP TYPE IF EXISTS omicron.public.instance_state; diff --git a/schema/crdb/separate-instance-and-vmm-states/up21.sql b/schema/crdb/separate-instance-and-vmm-states/up21.sql new file mode 100644 index 0000000000..a0fe7b7a48 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up21.sql @@ -0,0 +1,42 @@ +CREATE VIEW IF NOT EXISTS omicron.public.v2p_mapping_view +AS +WITH VmV2pMappings AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.vmm vmm ON n.parent_id = vmm.instance_id + JOIN omicron.public.sled s ON vmm.sled_id = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'instance' + AND (vmm.state = 'running' OR vmm.state = 'starting') + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +), +ProbeV2pMapping AS ( + SELECT + n.id as nic_id, + s.id as sled_id, + s.ip as sled_ip, + v.vni, + n.mac, + n.ip + FROM omicron.public.network_interface n + JOIN omicron.public.vpc_subnet vs ON vs.id = n.subnet_id + JOIN omicron.public.vpc v ON v.id = n.vpc_id + JOIN omicron.public.probe p ON n.parent_id = p.id + JOIN omicron.public.sled s ON p.sled = s.id + WHERE n.time_deleted IS NULL + AND n.kind = 'probe' + AND s.sled_policy = 'in_service' + AND s.sled_state = 'active' +) +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM VmV2pMappings +UNION +SELECT nic_id, sled_id, sled_ip, vni, mac, ip FROM ProbeV2pMapping; diff --git a/schema/crdb/separate-instance-and-vmm-states/up22.sql b/schema/crdb/separate-instance-and-vmm-states/up22.sql new file mode 100644 index 0000000000..cf884a9c68 --- /dev/null +++ b/schema/crdb/separate-instance-and-vmm-states/up22.sql @@ -0,0 +1,5 @@ +ALTER TABLE omicron.public.instance +ADD CONSTRAINT IF NOT EXISTS vmm_iff_active_propolis CHECK ( + ((state = 'vmm') AND (active_propolis_id IS NOT NULL)) OR + ((state != 'vmm') AND (active_propolis_id IS NULL)) +) diff --git a/sled-agent/src/common/instance.rs b/sled-agent/src/common/instance.rs index d7ee8982e0..62af337c4c 100644 --- a/sled-agent/src/common/instance.rs +++ b/sled-agent/src/common/instance.rs @@ -6,9 +6,8 @@ use crate::params::InstanceMigrationSourceParams; use chrono::{DateTime, Utc}; -use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, SledInstanceState, VmmRuntimeState, + InstanceRuntimeState, SledInstanceState, VmmRuntimeState, VmmState, }; use propolis_client::types::{ InstanceState as PropolisApiState, InstanceStateMonitorResponse, @@ -35,7 +34,7 @@ impl From for PropolisInstanceState { } } -impl From for ApiInstanceState { +impl From for VmmState { fn from(value: PropolisInstanceState) -> Self { use propolis_client::types::InstanceState as State; match value.0 { @@ -43,25 +42,28 @@ impl From for ApiInstanceState { // when an instance has an active VMM. A Propolis that is "creating" // its virtual machine objects is "starting" from the external API's // perspective. - State::Creating | State::Starting => ApiInstanceState::Starting, - State::Running => ApiInstanceState::Running, - State::Stopping => ApiInstanceState::Stopping, + State::Creating | State::Starting => VmmState::Starting, + State::Running => VmmState::Running, + State::Stopping => VmmState::Stopping, // A Propolis that is stopped but not yet destroyed should still // appear to be Stopping from an external API perspective, since // they cannot be restarted yet. Instances become logically Stopped // once Propolis reports that the VM is Destroyed (see below). - State::Stopped => ApiInstanceState::Stopping, - State::Rebooting => ApiInstanceState::Rebooting, - State::Migrating => ApiInstanceState::Migrating, - State::Repairing => ApiInstanceState::Repairing, - State::Failed => ApiInstanceState::Failed, + State::Stopped => VmmState::Stopping, + State::Rebooting => VmmState::Rebooting, + State::Migrating => VmmState::Migrating, + State::Failed => VmmState::Failed, // Nexus needs to learn when a VM has entered the "destroyed" state // so that it can release its resource reservation. When this // happens, this module also clears the active VMM ID from the // instance record, which will accordingly set the Nexus-owned // instance state to Stopped, preventing this state from being used // as an externally-visible instance state. - State::Destroyed => ApiInstanceState::Destroyed, + State::Destroyed => VmmState::Destroyed, + // Propolis never actually uses the Repairing state, so this should + // be unreachable, but since these states come from another process, + // program defensively and convert Repairing to Running. + State::Repairing => VmmState::Running, } } } @@ -170,11 +172,11 @@ pub enum PublishedVmmState { Rebooting, } -impl From for ApiInstanceState { +impl From for VmmState { fn from(value: PublishedVmmState) -> Self { match value { - PublishedVmmState::Stopping => ApiInstanceState::Stopping, - PublishedVmmState::Rebooting => ApiInstanceState::Rebooting, + PublishedVmmState::Stopping => VmmState::Stopping, + PublishedVmmState::Rebooting => VmmState::Rebooting, } } } @@ -525,7 +527,7 @@ mod test { }; let vmm = VmmRuntimeState { - state: ApiInstanceState::Starting, + state: VmmState::Starting, gen: Generation::new(), time_updated: now, }; @@ -535,7 +537,7 @@ mod test { fn make_migration_source_instance() -> InstanceStates { let mut state = make_instance(); - state.vmm.state = ApiInstanceState::Migrating; + state.vmm.state = VmmState::Migrating; state.instance.migration_id = Some(Uuid::new_v4()); state.instance.dst_propolis_id = Some(Uuid::new_v4()); state @@ -543,7 +545,7 @@ mod test { fn make_migration_target_instance() -> InstanceStates { let mut state = make_instance(); - state.vmm.state = ApiInstanceState::Migrating; + state.vmm.state = VmmState::Migrating; state.instance.migration_id = Some(Uuid::new_v4()); state.propolis_id = Uuid::new_v4(); state.instance.dst_propolis_id = Some(state.propolis_id); @@ -661,7 +663,7 @@ mod test { // The Stopped state is translated internally to Stopping to prevent // external viewers from perceiving that the instance is stopped before // the VMM is fully retired. - assert_eq!(state.vmm.state, ApiInstanceState::Stopping); + assert_eq!(state.vmm.state, VmmState::Stopping); assert!(state.vmm.gen > prev.vmm.gen); let prev = state.clone(); @@ -672,7 +674,7 @@ mod test { )); assert_state_change_has_gen_change(&prev, &state); assert_eq!(state.instance.gen, prev.instance.gen); - assert_eq!(state.vmm.state, ApiInstanceState::Destroyed); + assert_eq!(state.vmm.state, VmmState::Destroyed); assert!(state.vmm.gen > prev.vmm.gen); } @@ -695,7 +697,7 @@ mod test { )); assert_state_change_has_gen_change(&prev, &state); assert_eq!(state.instance.gen, prev.instance.gen); - assert_eq!(state.vmm.state, ApiInstanceState::Failed); + assert_eq!(state.vmm.state, VmmState::Failed); assert!(state.vmm.gen > prev.vmm.gen); } @@ -734,7 +736,7 @@ mod test { assert!(state.instance.migration_id.is_none()); assert!(state.instance.dst_propolis_id.is_none()); assert!(state.instance.gen > prev.instance.gen); - assert_eq!(state.vmm.state, ApiInstanceState::Running); + assert_eq!(state.vmm.state, VmmState::Running); assert!(state.vmm.gen > prev.vmm.gen); // Pretend Nexus set some new migration IDs. @@ -766,7 +768,7 @@ mod test { state.instance.dst_propolis_id.unwrap(), prev.instance.dst_propolis_id.unwrap() ); - assert_eq!(state.vmm.state, ApiInstanceState::Migrating); + assert_eq!(state.vmm.state, VmmState::Migrating); assert!(state.vmm.gen > prev.vmm.gen); assert_eq!(state.instance.gen, prev.instance.gen); @@ -778,7 +780,7 @@ mod test { observed.migration_status = ObservedMigrationStatus::Succeeded; assert!(state.apply_propolis_observation(&observed).is_none()); assert_state_change_has_gen_change(&prev, &state); - assert_eq!(state.vmm.state, ApiInstanceState::Migrating); + assert_eq!(state.vmm.state, VmmState::Migrating); assert!(state.vmm.gen > prev.vmm.gen); assert_eq!(state.instance.migration_id, prev.instance.migration_id); assert_eq!( diff --git a/sled-agent/src/instance.rs b/sled-agent/src/instance.rs index 271eceb556..c6c567595d 100644 --- a/sled-agent/src/instance.rs +++ b/sled-agent/src/instance.rs @@ -1539,9 +1539,9 @@ mod tests { use illumos_utils::zone::__mock_MockZones::__id::Context as MockZonesIdContext; use internal_dns::resolver::Resolver; use omicron_common::api::external::{ - ByteCount, Generation, Hostname, InstanceCpuCount, InstanceState, + ByteCount, Generation, Hostname, InstanceCpuCount, }; - use omicron_common::api::internal::nexus::InstanceProperties; + use omicron_common::api::internal::nexus::{InstanceProperties, VmmState}; use omicron_common::FileKv; use sled_storage::manager_test_harness::StorageManagerTestHarness; use std::net::Ipv6Addr; @@ -1781,7 +1781,7 @@ mod tests { time_updated: Default::default(), }, vmm_runtime: VmmRuntimeState { - state: InstanceState::Creating, + state: VmmState::Starting, gen: Generation::new(), time_updated: Default::default(), }, @@ -1880,7 +1880,7 @@ mod tests { TIMEOUT_DURATION, state_rx.wait_for(|maybe_state| match maybe_state { ReceivedInstanceState::InstancePut(sled_inst_state) => { - sled_inst_state.vmm_state.state == InstanceState::Running + sled_inst_state.vmm_state.state == VmmState::Running } _ => false, }), @@ -1953,7 +1953,7 @@ mod tests { .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); if let ReceivedInstanceState::InstancePut(SledInstanceState { - vmm_state: VmmRuntimeState { state: InstanceState::Running, .. }, + vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() { @@ -2035,7 +2035,7 @@ mod tests { .expect_err("*should've* timed out waiting for Instance::put_state, but didn't?"); if let ReceivedInstanceState::InstancePut(SledInstanceState { - vmm_state: VmmRuntimeState { state: InstanceState::Running, .. }, + vmm_state: VmmRuntimeState { state: VmmState::Running, .. }, .. }) = state_rx.borrow().to_owned() { @@ -2138,7 +2138,7 @@ mod tests { TIMEOUT_DURATION, state_rx.wait_for(|maybe_state| match maybe_state { ReceivedInstanceState::InstancePut(sled_inst_state) => { - sled_inst_state.vmm_state.state == InstanceState::Running + sled_inst_state.vmm_state.state == VmmState::Running } _ => false, }), diff --git a/sled-agent/src/sim/collection.rs b/sled-agent/src/sim/collection.rs index bbc3e440ab..f5be31bd37 100644 --- a/sled-agent/src/sim/collection.rs +++ b/sled-agent/src/sim/collection.rs @@ -421,11 +421,11 @@ mod test { use omicron_common::api::external::DiskState; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; - use omicron_common::api::external::InstanceState; use omicron_common::api::internal::nexus::DiskRuntimeState; use omicron_common::api::internal::nexus::InstanceRuntimeState; use omicron_common::api::internal::nexus::SledInstanceState; use omicron_common::api::internal::nexus::VmmRuntimeState; + use omicron_common::api::internal::nexus::VmmState; use omicron_test_utils::dev::test_setup_log; use uuid::Uuid; @@ -442,7 +442,7 @@ mod test { }; let vmm_state = VmmRuntimeState { - state: InstanceState::Starting, + state: VmmState::Starting, gen: Generation::new(), time_updated: Utc::now(), }; @@ -478,7 +478,7 @@ mod test { let r1 = instance.object.current(); info!(logctx.log, "new instance"; "state" => ?r1); - assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); // There's no asynchronous transition going on yet so a @@ -508,7 +508,7 @@ mod test { ); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(rnext.instance_state.propolis_id.is_none()); - assert_eq!(rnext.vmm_state.state, InstanceState::Destroyed); + assert_eq!(rnext.vmm_state.state, VmmState::Destroyed); assert!(rx.try_next().is_err()); logctx.cleanup_successful(); @@ -524,7 +524,7 @@ mod test { let r1 = instance.object.current(); info!(logctx.log, "new instance"; "state" => ?r1); - assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); // There's no asynchronous transition going on yet so a @@ -553,7 +553,7 @@ mod test { let rnext = instance.object.current(); assert_eq!(rnext.vmm_state.gen, rprev.vmm_state.gen); assert_eq!(rnext.vmm_state.time_updated, rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Starting); + assert_eq!(rnext.vmm_state.state, VmmState::Starting); rprev = rnext; // Now poke the instance. It should transition to Running. @@ -563,8 +563,8 @@ mod test { assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_none()); assert!(rx.try_next().is_err()); - assert_eq!(rprev.vmm_state.state, InstanceState::Starting); - assert_eq!(rnext.vmm_state.state, InstanceState::Running); + assert_eq!(rprev.vmm_state.state, VmmState::Starting); + assert_eq!(rnext.vmm_state.state, VmmState::Running); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); rprev = rnext; @@ -596,7 +596,7 @@ mod test { let rnext = instance.object.current(); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Stopping); rprev = rnext; // Propolis publishes its own transition to Stopping before it publishes @@ -606,8 +606,8 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_some()); - assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); - assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); + assert_eq!(rprev.vmm_state.state, VmmState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Stopping); rprev = rnext; // The Stopping-to-Stopped transition is masked from external viewers of @@ -618,8 +618,8 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); assert!(instance.object.desired().is_some()); - assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); - assert_eq!(rnext.vmm_state.state, InstanceState::Stopping); + assert_eq!(rprev.vmm_state.state, VmmState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Stopping); rprev = rnext; // ...and Stopped (internally) goes to Destroyed. This transition is @@ -629,8 +629,8 @@ mod test { let rnext = instance.object.current(); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated >= rprev.vmm_state.time_updated); - assert_eq!(rprev.vmm_state.state, InstanceState::Stopping); - assert_eq!(rnext.vmm_state.state, InstanceState::Destroyed); + assert_eq!(rprev.vmm_state.state, VmmState::Stopping); + assert_eq!(rnext.vmm_state.state, VmmState::Destroyed); assert!(rnext.instance_state.gen > rprev.instance_state.gen); logctx.cleanup_successful(); } @@ -645,7 +645,7 @@ mod test { let r1 = instance.object.current(); info!(logctx.log, "new instance"; "state" => ?r1); - assert_eq!(r1.vmm_state.state, InstanceState::Starting); + assert_eq!(r1.vmm_state.state, VmmState::Starting); assert_eq!(r1.vmm_state.gen, Generation::new()); assert!(instance .transition(InstanceStateRequested::Running) @@ -670,7 +670,7 @@ mod test { let (rprev, rnext) = (rnext, instance.object.current()); assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Rebooting); + assert_eq!(rnext.vmm_state.state, VmmState::Rebooting); instance.transition_finish(); let (rprev, rnext) = (rnext, instance.object.current()); @@ -681,7 +681,7 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Rebooting); + assert_eq!(rnext.vmm_state.state, VmmState::Rebooting); assert!(instance.object.desired().is_some()); instance.transition_finish(); let (rprev, rnext) = (rnext, instance.object.current()); @@ -693,7 +693,7 @@ mod test { assert!(rnext.vmm_state.gen > rprev.vmm_state.gen); assert!(rnext.vmm_state.time_updated > rprev.vmm_state.time_updated); - assert_eq!(rnext.vmm_state.state, InstanceState::Running); + assert_eq!(rnext.vmm_state.state, VmmState::Running); logctx.cleanup_successful(); } diff --git a/sled-agent/src/sim/instance.rs b/sled-agent/src/sim/instance.rs index 8b00adce60..ed88dbcc6f 100644 --- a/sled-agent/src/sim/instance.rs +++ b/sled-agent/src/sim/instance.rs @@ -14,10 +14,9 @@ use chrono::Utc; use nexus_client; use omicron_common::api::external::Error; use omicron_common::api::external::Generation; -use omicron_common::api::external::InstanceState as ApiInstanceState; use omicron_common::api::external::ResourceType; use omicron_common::api::internal::nexus::{ - InstanceRuntimeState, SledInstanceState, + InstanceRuntimeState, SledInstanceState, VmmState, }; use propolis_client::types::{ InstanceMigrateStatusResponse as PropolisMigrateStatus, @@ -32,7 +31,7 @@ use crate::common::instance::{Action as InstanceAction, InstanceStates}; #[derive(Clone, Debug)] enum MonitorChange { - InstanceState(PropolisInstanceState), + PropolisState(PropolisInstanceState), MigrateStatus(PropolisMigrateStatus), } @@ -68,7 +67,7 @@ struct SimInstanceInner { impl SimInstanceInner { /// Pushes a Propolis instance state transition to the state change queue. fn queue_propolis_state(&mut self, propolis_state: PropolisInstanceState) { - self.queue.push_back(MonitorChange::InstanceState(propolis_state)); + self.queue.push_back(MonitorChange::PropolisState(propolis_state)); } /// Pushes a Propolis migration status to the state change queue. @@ -85,7 +84,7 @@ impl SimInstanceInner { self.queue .iter() .filter_map(|entry| match entry { - MonitorChange::InstanceState(state) => Some(state), + MonitorChange::PropolisState(state) => Some(state), _ => None, }) .last() @@ -111,7 +110,7 @@ impl SimInstanceInner { self ))); } - if self.state.vmm().state != ApiInstanceState::Migrating { + if self.state.vmm().state != VmmState::Migrating { return Err(Error::invalid_request(&format!( "can't request migration in for a vmm that wasn't \ created in the migrating state (current state: {:?})", @@ -142,32 +141,25 @@ impl SimInstanceInner { } InstanceStateRequested::Running => { match self.next_resting_state() { - // It's only valid to request the Running state after - // successfully registering a VMM, and a registered VMM - // should never be in the Creating state. - ApiInstanceState::Creating => unreachable!( - "VMMs should never try to reach the Creating state" - ), - ApiInstanceState::Starting => { + VmmState::Starting => { self.queue_propolis_state( PropolisInstanceState::Running, ); } - ApiInstanceState::Running - | ApiInstanceState::Rebooting - | ApiInstanceState::Migrating => {} + VmmState::Running + | VmmState::Rebooting + | VmmState::Migrating => {} // Propolis forbids direct transitions from a stopped state // back to a running state. Callers who want to restart a // stopped instance must recreate it. - ApiInstanceState::Stopping - | ApiInstanceState::Stopped - | ApiInstanceState::Repairing - | ApiInstanceState::Failed - | ApiInstanceState::Destroyed => { + VmmState::Stopping + | VmmState::Stopped + | VmmState::Failed + | VmmState::Destroyed => { return Err(Error::invalid_request(&format!( "can't request state Running with pending resting \ - state {} (current state: {:?})", + state {:?} (current state: {:?})", self.next_resting_state(), self ))) @@ -176,13 +168,10 @@ impl SimInstanceInner { } InstanceStateRequested::Stopped => { match self.next_resting_state() { - ApiInstanceState::Creating => unreachable!( - "VMMs should never try to reach the Creating state" - ), - ApiInstanceState::Starting => { + VmmState::Starting => { self.state.terminate_rudely(); } - ApiInstanceState::Running => { + VmmState::Running => { self.state.transition_vmm( PublishedVmmState::Stopping, Utc::now(), @@ -199,13 +188,13 @@ impl SimInstanceInner { } // Idempotently allow requests to stop an instance that is // already stopping. - ApiInstanceState::Stopping - | ApiInstanceState::Stopped - | ApiInstanceState::Destroyed => {} + VmmState::Stopping + | VmmState::Stopped + | VmmState::Destroyed => {} _ => { return Err(Error::invalid_request(&format!( "can't request state Stopped with pending resting \ - state {} (current state: {:?})", + state {:?} (current state: {:?})", self.next_resting_state(), self ))) @@ -213,10 +202,10 @@ impl SimInstanceInner { } } InstanceStateRequested::Reboot => match self.next_resting_state() { - ApiInstanceState::Running => { + VmmState::Running => { // Further requests to reboot are ignored if the instance // is currently rebooting or about to reboot. - if self.state.vmm().state != ApiInstanceState::Rebooting + if self.state.vmm().state != VmmState::Rebooting && !self.reboot_pending() { self.state.transition_vmm( @@ -233,7 +222,7 @@ impl SimInstanceInner { } _ => { return Err(Error::invalid_request(&format!( - "can't request Reboot with pending resting state {} \ + "can't request Reboot with pending resting state {:?} \ (current state: {:?})", self.next_resting_state(), self @@ -249,7 +238,7 @@ impl SimInstanceInner { fn execute_desired_transition(&mut self) -> Option { if let Some(change) = self.queue.pop_front() { match change { - MonitorChange::InstanceState(state) => { + MonitorChange::PropolisState(state) => { if matches!(state, PropolisInstanceState::Destroyed) { self.destroyed = true; } @@ -305,25 +294,26 @@ impl SimInstanceInner { /// Returns the "resting" state the simulated instance will reach if its /// queue is drained. - fn next_resting_state(&self) -> ApiInstanceState { + fn next_resting_state(&self) -> VmmState { if self.queue.is_empty() { self.state.vmm().state } else { if let Some(last_state) = self.last_queued_instance_state() { - use ApiInstanceState as ApiState; use PropolisInstanceState as PropolisState; match last_state { PropolisState::Creating | PropolisState::Starting => { - ApiState::Starting + VmmState::Starting + } + PropolisState::Running => VmmState::Running, + PropolisState::Stopping => VmmState::Stopping, + PropolisState::Stopped => VmmState::Stopped, + PropolisState::Rebooting => VmmState::Rebooting, + PropolisState::Migrating => VmmState::Migrating, + PropolisState::Failed => VmmState::Failed, + PropolisState::Destroyed => VmmState::Destroyed, + PropolisState::Repairing => { + unreachable!("Propolis doesn't use the Repairing state") } - PropolisState::Running => ApiState::Running, - PropolisState::Stopping => ApiState::Stopping, - PropolisState::Stopped => ApiState::Stopped, - PropolisState::Rebooting => ApiState::Rebooting, - PropolisState::Migrating => ApiState::Migrating, - PropolisState::Repairing => ApiState::Repairing, - PropolisState::Failed => ApiState::Failed, - PropolisState::Destroyed => ApiState::Destroyed, } } else { self.state.vmm().state @@ -337,7 +327,7 @@ impl SimInstanceInner { self.queue.iter().any(|s| { matches!( s, - MonitorChange::InstanceState(PropolisInstanceState::Rebooting) + MonitorChange::PropolisState(PropolisInstanceState::Rebooting) ) }) } @@ -419,7 +409,7 @@ impl Simulatable for SimInstance { fn new(current: SledInstanceState) -> Self { assert!(matches!( current.vmm_state.state, - ApiInstanceState::Starting | ApiInstanceState::Migrating), + VmmState::Starting | VmmState::Migrating), "new VMMs should always be registered in the Starting or Migrating \ state (supplied state: {:?})", current.vmm_state.state