Skip to content

Commit

Permalink
[nexus] add InstanceState::SagaUnwound
Browse files Browse the repository at this point in the history
This branch is part of ongoing work on the `instance-update` saga (see
PR #5749); I've factored it out into a separate PR. This is largely
because this branch makes mechanical changes to a bunch of different
files that aren't directly related to the core change of #5749, and I'd
like to avoid the additional merge conflicts that are likely when these
changes remain un-merged for a long time.
  • Loading branch information
hawkw committed May 31, 2024
1 parent b0dfd53 commit 486103b
Show file tree
Hide file tree
Showing 14 changed files with 463 additions and 173 deletions.
113 changes: 91 additions & 22 deletions nexus/db-model/src/instance_state.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,20 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use super::impl_enum_wrapper;
use super::impl_enum_type;
use omicron_common::api::external;
use serde::Deserialize;
use serde::Serialize;
use std::fmt;
use std::io::Write;

impl_enum_wrapper!(
impl_enum_type!(
#[derive(SqlType, Debug)]
#[diesel(postgres_type(name = "instance_state", schema = "public"))]
pub struct InstanceStateEnum;

#[derive(Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)]
#[derive(Copy, Clone, Debug, PartialEq, AsExpression, FromSqlRow, Serialize, Deserialize)]
#[diesel(sql_type = InstanceStateEnum)]
pub struct InstanceState(pub external::InstanceState);
pub enum InstanceState;

// Enum values
Creating => b"creating"
Expand All @@ -29,45 +28,115 @@ impl_enum_wrapper!(
Repairing => b"repairing"
Failed => b"failed"
Destroyed => b"destroyed"
SagaUnwound => b"saga_unwound"
);

impl InstanceState {
pub fn new(state: external::InstanceState) -> Self {
Self(state)
Self::from(state)
}

pub fn state(&self) -> &external::InstanceState {
&self.0
pub fn state(&self) -> external::InstanceState {
external::InstanceState::from(*self)
}

pub fn label(&self) -> &'static str {
match self {
InstanceState::Creating => "creating",
InstanceState::Starting => "starting",
InstanceState::Running => "running",
InstanceState::Stopping => "stopping",
InstanceState::Stopped => "stopped",
InstanceState::Rebooting => "rebooting",
InstanceState::Migrating => "migrating",
InstanceState::Repairing => "repairing",
InstanceState::Failed => "failed",
InstanceState::Destroyed => "destroyed",
InstanceState::SagaUnwound => "saga_unwound",
}
}
}

impl fmt::Display for InstanceState {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
write!(f, "{}", self.0)
write!(f, "{}", self.label())
}
}

impl From<InstanceState> for sled_agent_client::types::InstanceState {
fn from(s: InstanceState) -> Self {
use external::InstanceState::*;
use sled_agent_client::types::InstanceState as Output;
match s.0 {
Creating => Output::Creating,
Starting => Output::Starting,
Running => Output::Running,
Stopping => Output::Stopping,
Stopped => Output::Stopped,
Rebooting => Output::Rebooting,
Migrating => Output::Migrating,
Repairing => Output::Repairing,
Failed => Output::Failed,
Destroyed => Output::Destroyed,
match s {
InstanceState::Creating => Output::Creating,
InstanceState::Starting => Output::Starting,
InstanceState::Running => Output::Running,
InstanceState::Stopping => Output::Stopping,
InstanceState::Stopped => Output::Stopped,
InstanceState::Rebooting => Output::Rebooting,
InstanceState::Migrating => Output::Migrating,
InstanceState::Repairing => Output::Repairing,
InstanceState::Failed => Output::Failed,
InstanceState::Destroyed | InstanceState::SagaUnwound => {
Output::Destroyed
}
}
}
}

impl From<external::InstanceState> for InstanceState {
fn from(state: external::InstanceState) -> Self {
Self::new(state)
match state {
external::InstanceState::Creating => InstanceState::Creating,
external::InstanceState::Starting => InstanceState::Starting,
external::InstanceState::Running => InstanceState::Running,
external::InstanceState::Stopping => InstanceState::Stopping,
external::InstanceState::Stopped => InstanceState::Stopped,
external::InstanceState::Rebooting => InstanceState::Rebooting,
external::InstanceState::Migrating => InstanceState::Migrating,
external::InstanceState::Repairing => InstanceState::Repairing,
external::InstanceState::Failed => InstanceState::Failed,
external::InstanceState::Destroyed => InstanceState::Destroyed,
}
}
}

impl From<InstanceState> for external::InstanceState {
fn from(state: InstanceState) -> Self {
match state {
InstanceState::Creating => external::InstanceState::Creating,
InstanceState::Starting => external::InstanceState::Starting,
InstanceState::Running => external::InstanceState::Running,
InstanceState::Stopping => external::InstanceState::Stopping,
InstanceState::Stopped => external::InstanceState::Stopped,
InstanceState::Rebooting => external::InstanceState::Rebooting,
InstanceState::Migrating => external::InstanceState::Migrating,
InstanceState::Repairing => external::InstanceState::Repairing,
InstanceState::Failed => external::InstanceState::Failed,
InstanceState::Destroyed | InstanceState::SagaUnwound => {
external::InstanceState::Destroyed
}
}
}
}

impl PartialEq<external::InstanceState> for InstanceState {
fn eq(&self, other: &external::InstanceState) -> bool {
match (self, other) {
(InstanceState::Creating, external::InstanceState::Creating)
| (InstanceState::Starting, external::InstanceState::Starting)
| (InstanceState::Running, external::InstanceState::Running)
| (InstanceState::Stopping, external::InstanceState::Stopping)
| (InstanceState::Stopped, external::InstanceState::Stopped)
| (InstanceState::Rebooting, external::InstanceState::Rebooting)
| (InstanceState::Migrating, external::InstanceState::Migrating)
| (InstanceState::Repairing, external::InstanceState::Repairing)
| (InstanceState::Failed, external::InstanceState::Failed)
| (InstanceState::Destroyed, external::InstanceState::Destroyed)
| (
InstanceState::SagaUnwound,
external::InstanceState::Destroyed,
) => true,
_ => false,
}
}
}
2 changes: 1 addition & 1 deletion nexus/db-model/src/sled_instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@ impl From<SledInstance> for views::SledInstance {
active_sled_id: sled_instance.active_sled_id,
silo_name: sled_instance.silo_name.into(),
project_name: sled_instance.project_name.into(),
state: *sled_instance.state.state(),
state: sled_instance.state.state(),
migration_id: sled_instance.migration_id,
ncpus: sled_instance.ncpus,
memory: sled_instance.memory,
Expand Down
10 changes: 5 additions & 5 deletions nexus/db-queries/src/db/datastore/disk.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,8 +183,8 @@ impl DataStore {
//
// We currently only permit attaching disks to stopped instances.
let ok_to_attach_instance_states = vec![
db::model::InstanceState(api::external::InstanceState::Creating),
db::model::InstanceState(api::external::InstanceState::Stopped),
db::model::InstanceState::Creating,
db::model::InstanceState::Stopped,
];

let attach_update = DiskSetClauseForAttach::new(authz_instance.id());
Expand Down Expand Up @@ -321,9 +321,9 @@ impl DataStore {
//
// We currently only permit detaching disks from stopped instances.
let ok_to_detach_instance_states = vec![
db::model::InstanceState(api::external::InstanceState::Creating),
db::model::InstanceState(api::external::InstanceState::Stopped),
db::model::InstanceState(api::external::InstanceState::Failed),
db::model::InstanceState::Creating,
db::model::InstanceState::Stopped,
db::model::InstanceState::Failed,
];

let detached_label = api::external::DiskState::Detached.label();
Expand Down
62 changes: 43 additions & 19 deletions nexus/db-queries/src/db/datastore/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ use crate::db::lookup::LookupPath;
use crate::db::model::Generation;
use crate::db::model::Instance;
use crate::db::model::InstanceRuntimeState;
use crate::db::model::InstanceState;
use crate::db::model::Name;
use crate::db::model::Project;
use crate::db::model::Sled;
Expand All @@ -30,13 +31,14 @@ use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateAndQueryResult;
use crate::db::update_and_check::UpdateStatus;
use async_bb8_diesel::AsyncRunQueryDsl;
use chrono::Utc;
use chrono::{DateTime, Utc};
use diesel::prelude::*;
use nexus_db_model::ApplySledFilterExt;
use nexus_db_model::Disk;
use nexus_db_model::VmmRuntimeState;
use nexus_types::deployment::SledFilter;
use omicron_common::api;
use omicron_common::api::external;
use omicron_common::api::external::http_pagination::PaginatedBy;
use omicron_common::api::external::CreateResult;
use omicron_common::api::external::DataPageParams;
Expand Down Expand Up @@ -70,13 +72,39 @@ impl InstanceAndActiveVmm {
self.vmm.as_ref().map(|v| v.sled_id)
}

pub fn effective_state(
&self,
) -> omicron_common::api::external::InstanceState {
if let Some(vmm) = &self.vmm {
vmm.runtime.state.0
} else {
self.instance.runtime().nexus_state.0
pub fn effective_state(&self) -> external::InstanceState {
Self::determine_effective_state(&self.instance, &self.vmm).0
}

pub fn determine_effective_state(
instance: &Instance,
vmm: &Option<Vmm>,
) -> (external::InstanceState, DateTime<Utc>) {
match vmm.as_ref().map(|v| &v.runtime) {
// If an active VMM is `Stopped`, the instance must be recast as
// `Stopping`, as no start saga can proceed until the active VMM has
// been `Destroyed`.
Some(VmmRuntimeState {
state: InstanceState::Stopped,
time_state_updated,
..
}) => (external::InstanceState::Stopping, *time_state_updated),
// If the active VMM is `SagaUnwound`, then the instance should be
// treated as `Stopped`.
Some(VmmRuntimeState {
state: InstanceState::SagaUnwound,
time_state_updated,
..
}) => (external::InstanceState::Stopped, *time_state_updated),
// If the active VMM is `SagaUnwound`, then the instance should be
// treated as `Stopped`.
Some(VmmRuntimeState { state, time_state_updated, .. }) => {
(external::InstanceState::from(*state), *time_state_updated)
}
None => (
external::InstanceState::Stopped,
instance.runtime_state.time_updated,
),
}
}
}
Expand All @@ -89,15 +117,11 @@ impl From<(Instance, Option<Vmm>)> for InstanceAndActiveVmm {

impl From<InstanceAndActiveVmm> for omicron_common::api::external::Instance {
fn from(value: InstanceAndActiveVmm) -> Self {
let (run_state, time_run_state_updated) = if let Some(vmm) = value.vmm {
(vmm.runtime.state, vmm.runtime.time_state_updated)
} else {
(
value.instance.runtime_state.nexus_state.clone(),
value.instance.runtime_state.time_updated,
)
};

let (run_state, time_run_state_updated) =
InstanceAndActiveVmm::determine_effective_state(
&value.instance,
&value.vmm,
);
Self {
identity: value.instance.identity(),
project_id: value.instance.project_id,
Expand All @@ -109,7 +133,7 @@ impl From<InstanceAndActiveVmm> for omicron_common::api::external::Instance {
.parse()
.expect("found invalid hostname in the database"),
runtime: omicron_common::api::external::InstanceRuntimeState {
run_state: *run_state.state(),
run_state,
time_run_state_updated,
},
}
Expand Down Expand Up @@ -197,7 +221,7 @@ impl DataStore {

bail_unless!(
instance.runtime().nexus_state.state()
== &api::external::InstanceState::Creating,
== api::external::InstanceState::Creating,
"newly-created Instance has unexpected state: {:?}",
instance.runtime().nexus_state
);
Expand Down
2 changes: 0 additions & 2 deletions nexus/db-queries/src/db/datastore/network_interface.rs
Original file line number Diff line number Diff line change
Expand Up @@ -713,7 +713,6 @@ impl DataStore {
self.transaction_retry_wrapper("instance_update_network_interface")
.transaction(&conn, |conn| {
let err = err.clone();
let stopped = stopped.clone();
let update_target_query = update_target_query.clone();
async move {
let instance_runtime =
Expand Down Expand Up @@ -759,7 +758,6 @@ impl DataStore {
self.transaction_retry_wrapper("instance_update_network_interface")
.transaction(&conn, |conn| {
let err = err.clone();
let stopped = stopped.clone();
let update_target_query = update_target_query.clone();
async move {
let instance_state =
Expand Down
12 changes: 6 additions & 6 deletions nexus/db-queries/src/db/datastore/vmm.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ use diesel::prelude::*;
use omicron_common::api::external::CreateResult;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
use omicron_common::api::external::InstanceState as ApiInstanceState;
use omicron_common::api::external::ListResultVec;
use omicron_common::api::external::LookupResult;
use omicron_common::api::external::LookupType;
Expand Down Expand Up @@ -55,14 +54,15 @@ impl DataStore {
opctx: &OpContext,
vmm_id: &Uuid,
) -> UpdateResult<bool> {
let valid_states = vec![
DbInstanceState::new(ApiInstanceState::Destroyed),
DbInstanceState::new(ApiInstanceState::Failed),
const VALID_STATES: &[DbInstanceState] = &[
DbInstanceState::Destroyed,
DbInstanceState::Failed,
DbInstanceState::SagaUnwound,
];

let updated = diesel::update(dsl::vmm)
.filter(dsl::id.eq(*vmm_id))
.filter(dsl::state.eq_any(valid_states))
.filter(dsl::state.eq_any(VALID_STATES))
.filter(dsl::time_deleted.is_null())
.set(dsl::time_deleted.eq(Utc::now()))
.check_if_exists::<Vmm>(*vmm_id)
Expand Down Expand Up @@ -190,7 +190,7 @@ impl DataStore {
pagparams: &DataPageParams<'_, Uuid>,
) -> ListResultVec<Vmm> {
use crate::db::schema::instance::dsl as instance_dsl;
let destroyed = DbInstanceState::new(ApiInstanceState::Destroyed);
let destroyed = DbInstanceState::Destroyed;
paginated(dsl::vmm, dsl::id, pagparams)
// In order to be considered "abandoned", a VMM must be:
// - in the `Destroyed` state
Expand Down
23 changes: 10 additions & 13 deletions nexus/db-queries/src/db/queries/external_ip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -31,32 +31,29 @@ use nexus_db_model::IpAttachState;
use nexus_db_model::IpAttachStateEnum;
use omicron_common::address::NUM_SOURCE_NAT_PORTS;
use omicron_common::api::external;
use omicron_common::api::external::InstanceState as ApiInstanceState;
use uuid::Uuid;

// Broadly, we want users to be able to attach/detach at will
// once an instance is created and functional.
pub const SAFE_TO_ATTACH_INSTANCE_STATES_CREATING: [DbInstanceState; 3] = [
DbInstanceState(ApiInstanceState::Stopped),
DbInstanceState(ApiInstanceState::Running),
DbInstanceState(ApiInstanceState::Creating),
];
pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] = [
DbInstanceState(ApiInstanceState::Stopped),
DbInstanceState(ApiInstanceState::Running),
DbInstanceState::Stopped,
DbInstanceState::Running,
DbInstanceState::Creating,
];
pub const SAFE_TO_ATTACH_INSTANCE_STATES: [DbInstanceState; 2] =
[DbInstanceState::Stopped, DbInstanceState::Running];
// If we're in a state which will naturally resolve to either
// stopped/running, we want users to know that the request can be
// retried safely via Error::unavail.
// TODO: We currently stop if there's a migration or other state change.
// There may be a good case for RPWing
// external_ip_state -> { NAT RPW, sled-agent } in future.
pub const SAFE_TRANSIENT_INSTANCE_STATES: [DbInstanceState; 5] = [
DbInstanceState(ApiInstanceState::Starting),
DbInstanceState(ApiInstanceState::Stopping),
DbInstanceState(ApiInstanceState::Creating),
DbInstanceState(ApiInstanceState::Rebooting),
DbInstanceState(ApiInstanceState::Migrating),
DbInstanceState::Starting,
DbInstanceState::Stopping,
DbInstanceState::Creating,
DbInstanceState::Rebooting,
DbInstanceState::Migrating,
];

/// The maximum number of disks that can be attached to an instance.
Expand Down
Loading

0 comments on commit 486103b

Please sign in to comment.