Skip to content

Commit

Permalink
[nexus] add basic support for expunged sled policy and decommissioned…
Browse files Browse the repository at this point in the history
… sled state (#5032)

This PR does a few things:

* Migrates our current `sled_provision_state` to a new `sled_policy` enum, which has more information (per [RFD 457](https://rfd.shared.oxide.computer/rfd/0457)). This PR implements the expunged state, not the graceful removal state.
* Adds a `sled_state` enum, which describes Nexus's view of the sled. This PR adds the `active` and `decommissioned` states.
* Adds **internal** code to move around between valid states.
* Makes the blueprint execution code aware of sleds eligible for discretionary services.
* Adds tests for all of this new stuff, as well as valid and invalid state transitions -- and also makes sure that if we _do_ end up in an invalid state, things don't break down.

Not done here, but in future PRs (to try and keep this PR a manageable size):

* We'll add the endpoint to mark the sled as expunged (this is an irreversible operation and will need the appropriate warnings): #5134
* We'll add blueprint code to start removing sleds.
* We'll also remove the sled `time_deleted` because it has a lifecycle too complicated to be described that way -- instead, we'll add a `time_decommissioned` field: #5131
  • Loading branch information
sunshowers authored Feb 23, 2024
1 parent a07cae6 commit a6ef7f9
Show file tree
Hide file tree
Showing 53 changed files with 2,115 additions and 468 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 4 additions & 2 deletions nexus/blueprint-execution/src/dns.rs
Original file line number Diff line number Diff line change
Expand Up @@ -328,7 +328,8 @@ mod test {
use nexus_types::deployment::Policy;
use nexus_types::deployment::SledResources;
use nexus_types::deployment::ZpoolName;
use nexus_types::external_api::views::SledProvisionState;
use nexus_types::external_api::views::SledPolicy;
use nexus_types::external_api::views::SledState;
use nexus_types::internal_api::params::DnsConfigParams;
use nexus_types::internal_api::params::DnsConfigZone;
use nexus_types::internal_api::params::DnsRecord;
Expand Down Expand Up @@ -409,7 +410,8 @@ mod test {
.zip(possible_sled_subnets)
.map(|(sled_id, subnet)| {
let sled_resources = SledResources {
provision_state: SledProvisionState::Provisionable,
policy: SledPolicy::provisionable(),
state: SledState::Active,
zpools: BTreeSet::from([ZpoolName::from_str(&format!(
"oxp_{}",
Uuid::new_v4()
Expand Down
6 changes: 4 additions & 2 deletions nexus/db-model/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -73,9 +73,10 @@ mod silo_user;
mod silo_user_password_hash;
mod sled;
mod sled_instance;
mod sled_provision_state;
mod sled_policy;
mod sled_resource;
mod sled_resource_kind;
mod sled_state;
mod sled_underlay_subnet_allocation;
mod snapshot;
mod ssh_key;
Expand Down Expand Up @@ -161,9 +162,10 @@ pub use silo_user::*;
pub use silo_user_password_hash::*;
pub use sled::*;
pub use sled_instance::*;
pub use sled_provision_state::*;
pub use sled_policy::to_db_sled_policy; // Do not expose DbSledPolicy
pub use sled_resource::*;
pub use sled_resource_kind::*;
pub use sled_state::*;
pub use sled_underlay_subnet_allocation::*;
pub use snapshot::*;
pub use ssh_key::*;
Expand Down
5 changes: 3 additions & 2 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ use omicron_common::api::external::SemverVersion;
///
/// This should be updated whenever the schema is changed. For more details,
/// refer to: schema/crdb/README.adoc
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(36, 0, 0);
pub const SCHEMA_VERSION: SemverVersion = SemverVersion::new(37, 0, 1);

table! {
disk (id) {
Expand Down Expand Up @@ -824,7 +824,8 @@ table! {
ip -> Inet,
port -> Int4,
last_used_address -> Inet,
provision_state -> crate::SledProvisionStateEnum,
sled_policy -> crate::sled_policy::SledPolicyEnum,
sled_state -> crate::SledStateEnum,
}
}

Expand Down
31 changes: 23 additions & 8 deletions nexus/db-model/src/sled.rs
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,11 @@
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use super::{ByteCount, Generation, SqlU16, SqlU32};
use super::{ByteCount, Generation, SledState, SqlU16, SqlU32};
use crate::collection::DatastoreCollectionConfig;
use crate::ipv6;
use crate::schema::{physical_disk, service, sled, zpool};
use crate::{ipv6, SledProvisionState};
use crate::sled_policy::DbSledPolicy;
use chrono::{DateTime, Utc};
use db_macros::Asset;
use nexus_types::{external_api::shared, external_api::views, identity::Asset};
Expand Down Expand Up @@ -60,7 +61,11 @@ pub struct Sled {
/// The last IP address provided to a propolis instance on this sled
pub last_used_address: ipv6::Ipv6Addr,

provision_state: SledProvisionState,
#[diesel(column_name = sled_policy)]
policy: DbSledPolicy,

#[diesel(column_name = sled_state)]
state: SledState,
}

impl Sled {
Expand All @@ -84,8 +89,15 @@ impl Sled {
&self.serial_number
}

pub fn provision_state(&self) -> SledProvisionState {
self.provision_state
/// The policy here is the `views::SledPolicy` because we expect external
/// users to always use that.
pub fn policy(&self) -> views::SledPolicy {
self.policy.into()
}

/// Returns the sled's state.
pub fn state(&self) -> SledState {
self.state
}
}

Expand All @@ -99,7 +111,8 @@ impl From<Sled> for views::Sled {
part: sled.part_number,
revision: sled.revision,
},
provision_state: sled.provision_state.into(),
policy: sled.policy.into(),
state: sled.state.into(),
usable_hardware_threads: sled.usable_hardware_threads.0,
usable_physical_ram: *sled.usable_physical_ram,
}
Expand Down Expand Up @@ -197,8 +210,10 @@ impl SledUpdate {
serial_number: self.serial_number,
part_number: self.part_number,
revision: self.revision,
// By default, sleds start as provisionable.
provision_state: SledProvisionState::Provisionable,
// By default, sleds start in-service.
policy: DbSledPolicy::InService,
// Currently, new sleds start in the "active" state.
state: SledState::Active,
usable_hardware_threads: self.usable_hardware_threads,
usable_physical_ram: self.usable_physical_ram,
reservoir_size: self.reservoir_size,
Expand Down
68 changes: 68 additions & 0 deletions nexus/db-model/src/sled_policy.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Database representation of a sled's operator-defined policy.
//!
//! This is related to, but different from `SledState`: a sled's **policy** is
//! its disposition as specified by the operator, while its **state** refers to
//! what's currently on it, as determined by Nexus.
//!
//! For example, a sled might be in the `Active` state, but have a policy of
//! `Expunged` -- this would mean that Nexus knows about resources currently
//! provisioned on the sled, but the operator has said that it should be marked
//! as gone.
use super::impl_enum_type;
use nexus_types::external_api::views::{SledPolicy, SledProvisionPolicy};
use serde::{Deserialize, Serialize};

impl_enum_type!(
#[derive(Clone, SqlType, Debug, QueryId)]
#[diesel(postgres_type(name = "sled_policy", schema = "public"))]
pub struct SledPolicyEnum;

/// This type is not actually public, because [`SledPolicy`] has a somewhat
/// different, friendlier shape while being equivalent -- external code
/// should always use [`SledPolicy`].
///
/// However, it must be marked `pub` to avoid errors like `crate-private
/// type `DbSledPolicy` in public interface`. Marking this type `pub`,
/// without actually making it public, tricks rustc in a desirable way.
#[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq)]
#[diesel(sql_type = SledPolicyEnum)]
pub enum DbSledPolicy;

// Enum values
InService => b"in_service"
NoProvision => b"no_provision"
Expunged => b"expunged"
);

/// Converts a [`SledPolicy`] to a version that can be inserted into a
/// database.
pub fn to_db_sled_policy(policy: SledPolicy) -> DbSledPolicy {
match policy {
SledPolicy::InService {
provision_policy: SledProvisionPolicy::Provisionable,
} => DbSledPolicy::InService,
SledPolicy::InService {
provision_policy: SledProvisionPolicy::NonProvisionable,
} => DbSledPolicy::NoProvision,
SledPolicy::Expunged => DbSledPolicy::Expunged,
}
}

impl From<DbSledPolicy> for SledPolicy {
fn from(policy: DbSledPolicy) -> Self {
match policy {
DbSledPolicy::InService => SledPolicy::InService {
provision_policy: SledProvisionPolicy::Provisionable,
},
DbSledPolicy::NoProvision => SledPolicy::InService {
provision_policy: SledProvisionPolicy::NonProvisionable,
},
DbSledPolicy::Expunged => SledPolicy::Expunged,
}
}
}
53 changes: 0 additions & 53 deletions nexus/db-model/src/sled_provision_state.rs

This file was deleted.

59 changes: 59 additions & 0 deletions nexus/db-model/src/sled_state.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
// This Source Code Form is subject to the terms of the Mozilla Public
// License, v. 2.0. If a copy of the MPL was not distributed with this
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

//! Database representation of a sled's state as understood by Nexus.
//!
//! This is related to, but different from `SledState`: a sled's **policy** is
//! its disposition as specified by the operator, while its **state** refers to
//! what's currently on it, as determined by Nexus.
//!
//! For example, a sled might be in the `Active` state, but have a policy of
//! `Expunged` -- this would mean that Nexus knows about resources currently
//! provisioned on the sled, but the operator has said that it should be marked
//! as gone.
use super::impl_enum_type;
use nexus_types::external_api::views;
use serde::{Deserialize, Serialize};
use std::fmt;
use strum::EnumIter;

impl_enum_type!(
#[derive(Clone, SqlType, Debug, QueryId)]
#[diesel(postgres_type(name = "sled_state", schema = "public"))]
pub struct SledStateEnum;

#[derive(Clone, Copy, Debug, AsExpression, FromSqlRow, Serialize, Deserialize, PartialEq, Eq, EnumIter)]
#[diesel(sql_type = SledStateEnum)]
pub enum SledState;

// Enum values
Active => b"active"
Decommissioned => b"decommissioned"
);

impl fmt::Display for SledState {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
// Forward to the canonical implementation in nexus-types.
views::SledState::from(*self).fmt(f)
}
}

impl From<SledState> for views::SledState {
fn from(state: SledState) -> Self {
match state {
SledState::Active => views::SledState::Active,
SledState::Decommissioned => views::SledState::Decommissioned,
}
}
}

impl From<views::SledState> for SledState {
fn from(state: views::SledState) -> Self {
match state {
views::SledState::Active => SledState::Active,
views::SledState::Decommissioned => SledState::Decommissioned,
}
}
}
3 changes: 2 additions & 1 deletion nexus/db-queries/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ sled-agent-client.workspace = true
slog.workspace = true
static_assertions.workspace = true
steno.workspace = true
strum.workspace = true
swrite.workspace = true
thiserror.workspace = true
tokio = { workspace = true, features = ["full"] }
Expand Down Expand Up @@ -76,10 +77,10 @@ omicron-test-utils.workspace = true
openapiv3.workspace = true
pem.workspace = true
petgraph.workspace = true
predicates.workspace = true
pretty_assertions.workspace = true
rcgen.workspace = true
regex.workspace = true
rustls.workspace = true
strum.workspace = true
subprocess.workspace = true
term.workspace = true
3 changes: 2 additions & 1 deletion nexus/db-queries/src/authz/context.rs
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,8 @@ mod test {
let logctx = dev::test_setup_log("test_unregistered_resource");
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) =
crate::db::datastore::datastore_test(&logctx, &db).await;
crate::db::datastore::test_utils::datastore_test(&logctx, &db)
.await;

// Define a resource that we "forget" to register with Oso.
use super::AuthorizedResource;
Expand Down
6 changes: 4 additions & 2 deletions nexus/db-queries/src/authz/policy_test/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -62,7 +62,8 @@ use uuid::Uuid;
async fn test_iam_roles_behavior() {
let logctx = dev::test_setup_log("test_iam_roles");
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = db::datastore::datastore_test(&logctx, &db).await;
let (opctx, datastore) =
db::datastore::test_utils::datastore_test(&logctx, &db).await;

// Before we can create the resources, users, and role assignments that we
// need, we must grant the "test-privileged" user privileges to fetch and
Expand Down Expand Up @@ -328,7 +329,8 @@ async fn test_conferred_roles() {
// To start, this test looks a lot like the test above.
let logctx = dev::test_setup_log("test_conferred_roles");
let mut db = test_setup_database(&logctx.log).await;
let (opctx, datastore) = db::datastore::datastore_test(&logctx, &db).await;
let (opctx, datastore) =
db::datastore::test_utils::datastore_test(&logctx, &db).await;

// Before we can create the resources, users, and role assignments that we
// need, we must grant the "test-privileged" user privileges to fetch and
Expand Down
Loading

0 comments on commit a6ef7f9

Please sign in to comment.