Skip to content

Commit

Permalink
sled-agent api plumbing
Browse files Browse the repository at this point in the history
  • Loading branch information
hawkw committed Jun 6, 2024
1 parent d57060f commit 77e3080
Show file tree
Hide file tree
Showing 7 changed files with 197 additions and 64 deletions.
43 changes: 42 additions & 1 deletion clients/nexus-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,7 +149,48 @@ impl From<omicron_common::api::internal::nexus::SledInstanceState>
instance_state: s.instance_state.into(),
propolis_id: s.propolis_id,
vmm_state: s.vmm_state.into(),
migration_state: None,
migration_state: s.migration_state.map(Into::into),
}
}
}

impl From<omicron_common::api::internal::nexus::MigrationRuntimeState>
for types::MigrationRuntimeState
{
fn from(
s: omicron_common::api::internal::nexus::MigrationRuntimeState,
) -> Self {
Self {
migration_id: s.migration_id,
role: s.role.into(),
state: s.state.into(),
gen: s.gen.into(),
time_updated: s.time_updated,
}
}
}

impl From<omicron_common::api::internal::nexus::MigrationRole>
for types::MigrationRole
{
fn from(s: omicron_common::api::internal::nexus::MigrationRole) -> Self {
use omicron_common::api::internal::nexus::MigrationRole as Input;
match s {
Input::Source => Self::Source,
Input::Target => Self::Target,
}
}
}

impl From<omicron_common::api::internal::nexus::MigrationState>
for types::MigrationState
{
fn from(s: omicron_common::api::internal::nexus::MigrationState) -> Self {
use omicron_common::api::internal::nexus::MigrationState as Input;
match s {
Input::InProgress => Self::InProgress,
Input::Completed => Self::Completed,
Input::Failed => Self::Failed,
}
}
}
Expand Down
41 changes: 40 additions & 1 deletion clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,7 +327,46 @@ impl From<types::SledInstanceState>
instance_state: s.instance_state.into(),
propolis_id: s.propolis_id,
vmm_state: s.vmm_state.into(),
migration_state: None,
migration_state: s.migration_state.map(Into::into),
}
}
}

impl From<types::MigrationRuntimeState>
for omicron_common::api::internal::nexus::MigrationRuntimeState
{
fn from(s: types::MigrationRuntimeState) -> Self {
Self {
migration_id: s.migration_id,
state: s.state.into(),
role: s.role.into(),
gen: s.gen,
time_updated: s.time_updated,
}
}
}

impl From<types::MigrationRole>
for omicron_common::api::internal::nexus::MigrationRole
{
fn from(r: types::MigrationRole) -> Self {
use omicron_common::api::internal::nexus::MigrationRole as Output;
match r {
types::MigrationRole::Source => Output::Source,
types::MigrationRole::Target => Output::Target,
}
}
}

impl From<types::MigrationState>
for omicron_common::api::internal::nexus::MigrationState
{
fn from(s: types::MigrationState) -> Self {
use omicron_common::api::internal::nexus::MigrationState as Output;
match s {
types::MigrationState::InProgress => Output::InProgress,
types::MigrationState::Failed => Output::Failed,
types::MigrationState::Completed => Output::Completed,
}
}
}
Expand Down
5 changes: 3 additions & 2 deletions nexus/db-model/src/migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ use crate::schema::migration;
use crate::MigrationState;
use chrono::DateTime;
use chrono::Utc;
use omicron_common::api::internal::nexus;
use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;
Expand Down Expand Up @@ -62,11 +63,11 @@ impl Migration {
Self {
id: migration_id,
time_created: Utc::now(),
source_state: MigrationState::InProgress,
source_state: nexus::MigrationState::InProgress.into(),
source_propolis_id,
source_gen: Generation::new(),
time_source_updated: None,
target_state: MigrationState::InProgress,
target_state: nexus::MigrationState::InProgress.into(),
target_propolis_id,
target_gen: Generation::new(),
time_target_updated: None,
Expand Down
4 changes: 3 additions & 1 deletion nexus/db-queries/src/db/datastore/migration.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,8 @@ use crate::db::update_and_check::UpdateAndCheck;
use crate::db::update_and_check::UpdateStatus;
use async_bb8_diesel::AsyncRunQueryDsl;
use chrono::Utc;
use diesel::prelude::*;
use omicron_common::api::external::CreateResult;
use omicron_common::api::external::Error;
use omicron_common::api::external::UpdateResult;
use uuid::Uuid;

Expand All @@ -30,6 +30,8 @@ impl DataStore {
.values(migration)
.on_conflict(dsl::id)
.do_update()
// I don't know what this does but it somehow
.set(dsl::time_created.eq(dsl::time_created))
.returning(Migration::as_returning())
.get_result_async(&*self.pool_connection_authorized(opctx).await?)
.await
Expand Down
1 change: 1 addition & 0 deletions nexus/db-queries/src/db/datastore/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ pub mod instance;
mod inventory;
mod ip_pool;
mod ipv4_nat_entry;
mod migration;
mod network_interface;
mod oximeter;
mod physical_disk;
Expand Down
15 changes: 9 additions & 6 deletions nexus/src/app/sagas/instance_migrate.rs
Original file line number Diff line number Diff line change
Expand Up @@ -216,11 +216,14 @@ async fn sim_create_migration_record(

osagactx
.datastore()
.migration_insert(db::model::Migration::new(
migration_id,
source_propolis_id,
target_propolis_id,
))
.migration_insert(
&opctx,
db::model::Migration::new(
migration_id,
source_propolis_id,
target_propolis_id,
),
)
.await
.map_err(ActionError::action_failed)
}
Expand All @@ -239,7 +242,7 @@ async fn sim_delete_migration_record(

info!(osagactx.log(), "deleting migration record";
"migration_id" => %migration_id);
osagactx.datastore().migration_mark_deleted(migration_id).await?;
osagactx.datastore().migration_mark_deleted(&opctx, migration_id).await?;
Ok(())
}

Expand Down
152 changes: 99 additions & 53 deletions sled-agent/src/common/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
use crate::params::InstanceMigrationSourceParams;
use chrono::{DateTime, Utc};
use omicron_common::api::external::Generation;
use omicron_common::api::internal::nexus::{
InstanceRuntimeState, SledInstanceState, VmmRuntimeState, VmmState,
InstanceRuntimeState, MigrationRole, MigrationRuntimeState, MigrationState,
SledInstanceState, VmmRuntimeState, VmmState,
};
use propolis_client::types::{
InstanceState as PropolisApiState, InstanceStateMonitorResponse,
MigrationState,
MigrationState as PropolisMigrationState,
};
use uuid::Uuid;

Expand All @@ -21,6 +23,7 @@ pub struct InstanceStates {
instance: InstanceRuntimeState,
vmm: VmmRuntimeState,
propolis_id: Uuid,
migration: Option<MigrationRuntimeState>,
}

/// Newtype to allow conversion from Propolis API states (returned by the
Expand Down Expand Up @@ -123,10 +126,10 @@ impl ObservedPropolisState {
if this_id == propolis_migration.migration_id =>
{
match propolis_migration.state {
MigrationState::Finish => {
PropolisMigrationState::Finish => {
ObservedMigrationStatus::Succeeded
}
MigrationState::Error => {
PropolisMigrationState::Error => {
ObservedMigrationStatus::Failed
}
_ => ObservedMigrationStatus::InProgress,
Expand Down Expand Up @@ -208,7 +211,7 @@ impl InstanceStates {
vmm: VmmRuntimeState,
propolis_id: Uuid,
) -> Self {
InstanceStates { instance, vmm, propolis_id }
InstanceStates { instance, vmm, propolis_id, migration: None }
}

pub fn instance(&self) -> &InstanceRuntimeState {
Expand All @@ -231,7 +234,7 @@ impl InstanceStates {
instance_state: self.instance.clone(),
vmm_state: self.vmm.clone(),
propolis_id: self.propolis_id,
migration_state: None, // TODO(eliza): add this
migration_state: self.migration.clone(),
}
}

Expand All @@ -257,56 +260,82 @@ impl InstanceStates {
// Update the instance record to reflect the result of any completed
// migration.
match observed.migration_status {
ObservedMigrationStatus::Succeeded => match self.propolis_role() {
// This is a successful migration out. Point the instance to the
// target VMM, but don't clear migration IDs; let the target do
// that so that the instance will continue to appear to be
// migrating until it is safe to migrate again.
PropolisRole::Active => {
self.switch_propolis_id_to_target(observed.time);

assert_eq!(self.propolis_role(), PropolisRole::Retired);
ObservedMigrationStatus::Succeeded => {
if let Some(ref mut migration) = self.migration {
migration.state = MigrationState::Completed;
migration.gen = migration.gen.next();
migration.time_updated = observed.time;
} else {
// XXX(eliza): we don't have an active migration state, but
// we saw a migration succeed. this is almost certainly a
// bug, but i don't *really* want to panic here...but, we
// also don't have a `slog` logger in this function, so ...
// what do.
}
match self.propolis_role() {
// This is a successful migration out. Point the instance to the
// target VMM, but don't clear migration IDs; let the target do
// that so that the instance will continue to appear to be
// migrating until it is safe to migrate again.
PropolisRole::Active => {
self.switch_propolis_id_to_target(observed.time);

assert_eq!(self.propolis_role(), PropolisRole::Retired);
}

// This is a successful migration in. Point the instance to the
// target VMM and clear migration IDs so that another migration
// in can begin. Propolis will continue reporting that this
// migration was successful, but because its ID has been
// discarded the observed migration status will change from
// Succeeded to NoMigration.
//
// Note that these calls increment the instance's generation
// number twice. This is by design and allows the target's
// migration-ID-clearing update to overtake the source's update.
PropolisRole::MigrationTarget => {
self.switch_propolis_id_to_target(observed.time);
self.clear_migration_ids(observed.time);

assert_eq!(self.propolis_role(), PropolisRole::Active);
}
// This is a successful migration in. Point the instance to the
// target VMM and clear migration IDs so that another migration
// in can begin. Propolis will continue reporting that this
// migration was successful, but because its ID has been
// discarded the observed migration status will change from
// Succeeded to NoMigration.
//
// Note that these calls increment the instance's generation
// number twice. This is by design and allows the target's
// migration-ID-clearing update to overtake the source's update.
PropolisRole::MigrationTarget => {
self.switch_propolis_id_to_target(observed.time);
self.clear_migration_ids(observed.time);

assert_eq!(self.propolis_role(), PropolisRole::Active);
}

// This is a migration source that previously reported success
// and removed itself from the active Propolis position. Don't
// touch the instance.
PropolisRole::Retired => {}
},
ObservedMigrationStatus::Failed => match self.propolis_role() {
// This is a failed migration out. CLear migration IDs so that
// Nexus can try again.
PropolisRole::Active => {
self.clear_migration_ids(observed.time);
// This is a migration source that previously reported success
// and removed itself from the active Propolis position. Don't
// touch the instance.
PropolisRole::Retired => {}
}
}
ObservedMigrationStatus::Failed => {
if let Some(ref mut migration) = self.migration {
migration.state = MigrationState::Failed;
migration.gen = migration.gen.next();
migration.time_updated = observed.time;
} else {
// XXX(eliza): we don't have an active migration state, but
// we saw a migration succeed. this is almost certainly a
// bug, but i don't *really* want to panic here...but, we
// also don't have a `slog` logger in this function, so ...
// what do.
}
match self.propolis_role() {
// This is a failed migration out. CLear migration IDs so that
// Nexus can try again.
PropolisRole::Active => {
self.clear_migration_ids(observed.time);
}

// This is a failed migration in. Leave the migration IDs alone
// so that the migration won't appear to have concluded until
// the source is ready to start a new one.
PropolisRole::MigrationTarget => {}
// This is a failed migration in. Leave the migration IDs alone
// so that the migration won't appear to have concluded until
// the source is ready to start a new one.
PropolisRole::MigrationTarget => {}

// This VMM was part of a failed migration and was subsequently
// removed from the instance record entirely. There's nothing to
// update.
PropolisRole::Retired => {}
},
// This VMM was part of a failed migration and was subsequently
// removed from the instance record entirely. There's nothing to
// update.
PropolisRole::Retired => {}
}
}
ObservedMigrationStatus::NoMigration
| ObservedMigrationStatus::InProgress
| ObservedMigrationStatus::Pending => {}
Expand Down Expand Up @@ -432,12 +461,29 @@ impl InstanceStates {
ids: &Option<InstanceMigrationSourceParams>,
now: DateTime<Utc>,
) {
if let Some(ids) = ids {
self.instance.migration_id = Some(ids.migration_id);
self.instance.dst_propolis_id = Some(ids.dst_propolis_id);
if let Some(InstanceMigrationSourceParams {
migration_id,
dst_propolis_id,
}) = *ids
{
self.instance.migration_id = Some(migration_id);
self.instance.dst_propolis_id = Some(dst_propolis_id);
let role = if dst_propolis_id == self.propolis_id {
MigrationRole::Target
} else {
MigrationRole::Source
};
self.migration = Some(MigrationRuntimeState {
migration_id,
state: MigrationState::InProgress,
role,
gen: Generation::new(),
time_updated: now,
})
} else {
self.instance.migration_id = None;
self.instance.dst_propolis_id = None;
self.migration = None;
}

self.instance.gen = self.instance.gen.next();
Expand Down

0 comments on commit 77e3080

Please sign in to comment.