Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add migration table and explicit migration tracking in sled-agent #5859

Merged
merged 34 commits into from
Jun 12, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
Show all changes
34 commits
Select commit Hold shift + click to select a range
370b73c
add migration table
hawkw Jun 5, 2024
d017eb0
add migration state to nexus API
hawkw Jun 5, 2024
9362651
okay i really hopep the CTE works
hawkw Jun 5, 2024
e6e5e14
unbreak CTE, et cetera
hawkw Jun 5, 2024
c8a0c7c
fix migration name
hawkw Jun 5, 2024
00c2c93
add generation numbers
hawkw Jun 5, 2024
7f95255
regen openapi again
hawkw Jun 6, 2024
6304817
add timestamps
hawkw Jun 6, 2024
d57060f
actually create and delete migration records
hawkw Jun 6, 2024
77e3080
sled-agent api plumbing
hawkw Jun 6, 2024
6b63d22
fix CTE syntax error
hawkw Jun 6, 2024
7f3c7fa
add test that migration state updates
hawkw Jun 6, 2024
45d7844
clippy
hawkw Jun 6, 2024
c6311fb
make simulated sled agent complete src migration
hawkw Jun 7, 2024
43254ad
do the actual correct simulated migration
hawkw Jun 7, 2024
f649f82
Merge branch 'main' into eliza/migration-table
hawkw Jun 7, 2024
fd5e49c
Update common/src/api/internal/nexus.rs
hawkw Jun 7, 2024
631ba0f
add pending state
hawkw Jun 7, 2024
1aecc88
fix sled-agent sim panicking when unsetting migration
hawkw Jun 7, 2024
270368f
fix unexpected null
hawkw Jun 7, 2024
47cfd55
fix places where i got source/target backwards
hawkw Jun 10, 2024
f399eb1
misc. @gjcolombo review feedback
hawkw Jun 10, 2024
8e75658
@smklein's review feedback
hawkw Jun 10, 2024
762febc
update tests to expect migration states
hawkw Jun 11, 2024
7634307
update tests, start out in pending
hawkw Jun 11, 2024
e4f7c6b
don't generate spurious state transitions
hawkw Jun 11, 2024
9d0de2b
delete records when a migration completes
hawkw Jun 11, 2024
f6c9875
report migration failure if an in-progress vmm vanishes
hawkw Jun 11, 2024
77a6dfe
update comment
hawkw Jun 11, 2024
29eae36
fix comment
hawkw Jun 11, 2024
18ccc7e
rm code from other branch
hawkw Jun 11, 2024
00ad191
oops
hawkw Jun 11, 2024
4e3e4f2
Merge branch 'main' into eliza/migration-table
hawkw Jun 12, 2024
a1ca946
Update sled-agent/src/common/instance.rs
hawkw Jun 12, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 43 additions & 0 deletions clients/nexus-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -149,6 +149,49 @@ impl From<omicron_common::api::internal::nexus::SledInstanceState>
instance_state: s.instance_state.into(),
propolis_id: s.propolis_id,
vmm_state: s.vmm_state.into(),
migration_state: s.migration_state.map(Into::into),
}
}
}

impl From<omicron_common::api::internal::nexus::MigrationRuntimeState>
for types::MigrationRuntimeState
{
fn from(
s: omicron_common::api::internal::nexus::MigrationRuntimeState,
) -> Self {
Self {
migration_id: s.migration_id,
role: s.role.into(),
state: s.state.into(),
gen: s.gen,
time_updated: s.time_updated,
}
}
}

impl From<omicron_common::api::internal::nexus::MigrationRole>
for types::MigrationRole
{
fn from(s: omicron_common::api::internal::nexus::MigrationRole) -> Self {
use omicron_common::api::internal::nexus::MigrationRole as Input;
match s {
Input::Source => Self::Source,
Input::Target => Self::Target,
}
}
}

impl From<omicron_common::api::internal::nexus::MigrationState>
for types::MigrationState
{
fn from(s: omicron_common::api::internal::nexus::MigrationState) -> Self {
use omicron_common::api::internal::nexus::MigrationState as Input;
match s {
Input::Pending => Self::Pending,
Input::InProgress => Self::InProgress,
Input::Completed => Self::Completed,
Input::Failed => Self::Failed,
}
}
}
Expand Down
41 changes: 41 additions & 0 deletions clients/sled-agent-client/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -327,6 +327,47 @@ impl From<types::SledInstanceState>
instance_state: s.instance_state.into(),
propolis_id: s.propolis_id,
vmm_state: s.vmm_state.into(),
migration_state: s.migration_state.map(Into::into),
}
}
}

impl From<types::MigrationRuntimeState>
for omicron_common::api::internal::nexus::MigrationRuntimeState
{
fn from(s: types::MigrationRuntimeState) -> Self {
Self {
migration_id: s.migration_id,
state: s.state.into(),
role: s.role.into(),
gen: s.gen,
time_updated: s.time_updated,
}
}
}

impl From<types::MigrationRole>
for omicron_common::api::internal::nexus::MigrationRole
{
fn from(r: types::MigrationRole) -> Self {
use omicron_common::api::internal::nexus::MigrationRole as Output;
match r {
types::MigrationRole::Source => Output::Source,
types::MigrationRole::Target => Output::Target,
}
}
}

impl From<types::MigrationState>
for omicron_common::api::internal::nexus::MigrationState
{
fn from(s: types::MigrationState) -> Self {
use omicron_common::api::internal::nexus::MigrationState as Output;
match s {
types::MigrationState::Pending => Output::Pending,
types::MigrationState::InProgress => Output::InProgress,
types::MigrationState::Failed => Output::Failed,
types::MigrationState::Completed => Output::Completed,
}
}
}
Expand Down
86 changes: 86 additions & 0 deletions common/src/api/internal/nexus.rs
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ use omicron_uuid_kinds::UpstairsSessionKind;
use parse_display::{Display, FromStr};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use std::fmt;
use std::net::SocketAddr;
use std::time::Duration;
use strum::{EnumIter, IntoEnumIterator};
Expand Down Expand Up @@ -108,6 +109,91 @@ pub struct SledInstanceState {

/// The most recent state of the sled's VMM process.
pub vmm_state: VmmRuntimeState,

/// The current state of any in-progress migration for this instance, as
/// understood by this sled.
pub migration_state: Option<MigrationRuntimeState>,
}

/// An update from a sled regarding the state of a migration, indicating the
/// role of the VMM whose migration state was updated.
#[derive(Clone, Debug, Deserialize, Serialize, JsonSchema)]
pub struct MigrationRuntimeState {
pub migration_id: Uuid,
pub state: MigrationState,
pub role: MigrationRole,
pub gen: Generation,

/// Timestamp for the migration state update.
pub time_updated: DateTime<Utc>,
}

/// The state of an instance's live migration.
#[derive(
Clone,
Copy,
Debug,
Default,
PartialEq,
Eq,
Deserialize,
Serialize,
JsonSchema,
)]
#[serde(rename_all = "snake_case")]
pub enum MigrationState {
hawkw marked this conversation as resolved.
Show resolved Hide resolved
/// The migration has not started for this VMM.
#[default]
Pending,
/// The migration is in progress.
InProgress,
/// The migration has failed.
Failed,
/// The migration has completed.
Completed,
}

impl MigrationState {
pub fn label(&self) -> &'static str {
match self {
Self::Pending => "pending",
Self::InProgress => "in_progress",
Self::Completed => "completed",
Self::Failed => "failed",
}
}
}

impl fmt::Display for MigrationState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.label())
}
}

#[derive(
Clone, Copy, Debug, PartialEq, Eq, Deserialize, Serialize, JsonSchema,
)]
#[serde(rename_all = "snake_case")]
pub enum MigrationRole {
/// This update concerns the source VMM of a migration.
Source,
/// This update concerns the target VMM of a migration.
Target,
}

impl MigrationRole {
pub fn label(&self) -> &'static str {
match self {
Self::Source => "source",
Self::Target => "target",
}
}
}

impl fmt::Display for MigrationRole {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
f.write_str(self.label())
}
}

// Oximeter producer/collector objects.
Expand Down
4 changes: 4 additions & 0 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ pub mod ipv6;
mod ipv6net;
mod l4_port_range;
mod macaddr;
mod migration;
mod migration_state;
mod name;
mod network_interface;
mod oximeter_info;
Expand Down Expand Up @@ -152,6 +154,8 @@ pub use ipv4net::*;
pub use ipv6::*;
pub use ipv6net::*;
pub use l4_port_range::*;
pub use migration::*;
pub use migration_state::*;
pub use name::*;
pub use network_interface::*;
pub use oximeter_info::*;
Expand Down
80 changes: 80 additions & 0 deletions nexus/db-model/src/migration.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use super::Generation;
use crate::schema::migration;
use crate::MigrationState;
use chrono::DateTime;
use chrono::Utc;
use omicron_common::api::internal::nexus;
use serde::Deserialize;
use serde::Serialize;
use uuid::Uuid;

/// The state of a migration as understood by Nexus.
#[derive(
Clone, Debug, Queryable, Insertable, Selectable, Serialize, Deserialize,
)]
#[diesel(table_name = migration)]
pub struct Migration {
/// The migration's UUID.
///
/// This is the primary key of the migration table and is referenced by the
/// `instance` table's `migration_id` field.
pub id: Uuid,

/// The time at which this migration record was created.
pub time_created: DateTime<Utc>,

/// The time at which this migration record was deleted,
pub time_deleted: Option<DateTime<Utc>>,

/// The time at which the source VMM state was last updated.

hawkw marked this conversation as resolved.
Show resolved Hide resolved
/// The state of the migration source VMM.
pub source_state: MigrationState,

/// The ID of the migration source VMM.
pub source_propolis_id: Uuid,

/// The generation number for the source state.
pub source_gen: Generation,

/// The time the source VMM state was most recently updated.
pub time_source_updated: Option<DateTime<Utc>>,

/// The state of the migration target VMM.
pub target_state: MigrationState,
Comment on lines +45 to +46
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'm not opposed to starting with the "state" being the same at source and destination, but I wonder if we'll eventually want different types here. Seems possible that there's a source-only or target-only state in the future.

(No change needed, just musing)

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now, the intention for this table is to store pretty low-resolution data about the migration state, basically just whether it's completed or failed, so we're not including more detailed migration status reported by Propolis. So, we don't really expect these to include states that are specific to one side of the migration currently, although I suppose we could...


/// The ID of the migration target VMM.
pub target_propolis_id: Uuid,

/// The generation number for the target state.
pub target_gen: Generation,

/// The time the target VMM state was most recently updated.
pub time_target_updated: Option<DateTime<Utc>>,
}

impl Migration {
pub fn new(
migration_id: Uuid,
source_propolis_id: Uuid,
target_propolis_id: Uuid,
) -> Self {
Self {
id: migration_id,
time_created: Utc::now(),
time_deleted: None,
source_state: nexus::MigrationState::Pending.into(),
source_propolis_id,
source_gen: Generation::new(),
time_source_updated: None,
target_state: nexus::MigrationState::Pending.into(),
target_propolis_id,
target_gen: Generation::new(),
time_target_updated: None,
}
}
}
40 changes: 40 additions & 0 deletions nexus/db-model/src/migration_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Database representation of a migration's state as understood by Nexus.

use super::impl_enum_wrapper;
use omicron_common::api::internal::nexus;
use serde::Deserialize;
use serde::Serialize;
use std::fmt;
use std::io::Write;

impl_enum_wrapper!(
#[derive(Clone, SqlType, Debug, QueryId)]
#[diesel(postgres_type(name = "migration_state", schema = "public"))]
pub struct MigrationStateEnum;

#[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq, Eq)]
#[diesel(sql_type = MigrationStateEnum)]
pub struct MigrationState(pub nexus::MigrationState);

// Enum values
Pending => b"pending"
InProgress => b"in_progress"
Completed => b"completed"
Failed => b"failed"
);

impl fmt::Display for MigrationState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Display::fmt(&self.0, f)
}
}

impl From<nexus::MigrationState> for MigrationState {
fn from(s: nexus::MigrationState) -> Self {
Self(s)
}
}
19 changes: 19 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1758,6 +1758,25 @@ table! {
}
}

table! {
migration (id) {
id -> Uuid,
time_created -> Timestamptz,
time_deleted -> Nullable<Timestamptz>,
source_state -> crate::MigrationStateEnum,
source_propolis_id -> Uuid,
source_gen -> Int8,
time_source_updated -> Nullable<Timestamptz>,
target_state -> crate::MigrationStateEnum,
target_propolis_id -> Uuid,
target_gen -> Int8,
time_target_updated -> Nullable<Timestamptz>,
}
}

allow_tables_to_appear_in_same_query!(instance, migration);
joinable!(instance -> migration (migration_id));

allow_tables_to_appear_in_same_query!(
ip_pool_range,
ip_pool,
Expand Down
3 changes: 2 additions & 1 deletion nexus/db-model/src/schema_versions.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ use std::collections::BTreeMap;
///
/// This must be updated when you change the database schema. Refer to
/// schema/crdb/README.adoc in the root of this repository for details.
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(72, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(73, 0, 0);

/// List of all past database schema versions, in *reverse* order
///
Expand All @@ -29,6 +29,7 @@ static KNOWN_VERSIONS: Lazy<Vec<KnownVersion>> = Lazy::new(|| {
// | leaving the first copy as an example for the next person.
// v
// KnownVersion::new(next_int, "unique-dirname-with-the-sql-files"),
KnownVersion::new(73, "add-migration-table"),
KnownVersion::new(72, "fix-provisioning-counters"),
KnownVersion::new(71, "add-saga-unwound-vmm-state"),
KnownVersion::new(70, "separate-instance-and-vmm-states"),
Expand Down
Loading
Loading