Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

RSS <-> Nexus handoff: Set an initial (disabled) target blueprint #5244

Merged
merged 13 commits into from
Mar 15, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

30 changes: 26 additions & 4 deletions common/src/api/external/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -650,16 +650,28 @@ impl From<ByteCount> for i64 {
pub struct Generation(u64);

impl Generation {
pub fn new() -> Generation {
pub const fn new() -> Generation {
Generation(1)
}

pub fn next(&self) -> Generation {
pub const fn from_u32(value: u32) -> Generation {
// `as` is a little distasteful because it allows lossy conversion, but
// (a) we know converting `u32` to `u64` will always succeed
// losslessly, and (b) it allows to make this function `const`, unlike
// if we were to use `u64::from(value)`.
Generation(value as u64)
}

pub const fn next(&self) -> Generation {
// It should technically be an operational error if this wraps or even
// exceeds the value allowed by an i64. But it seems unlikely enough to
// happen in practice that we can probably feel safe with this.
let next_gen = self.0 + 1;
assert!(next_gen <= u64::try_from(i64::MAX).unwrap());
// `as` is a little distasteful because it allows lossy conversion, but
// (a) we know converting `i64::MAX` to `u64` will always succeed
// losslessly, and (b) it allows to make this function `const`, unlike
// if we were to use `u64::try_from(i64::MAX).unwrap()`.
assert!(next_gen <= i64::MAX as u64);
Generation(next_gen)
}
}
Expand Down Expand Up @@ -697,11 +709,21 @@ impl TryFrom<i64> for Generation {
fn try_from(value: i64) -> Result<Self, Self::Error> {
Ok(Generation(
u64::try_from(value)
.map_err(|_| anyhow!("generation number too large"))?,
.map_err(|_| anyhow!("negative generation number"))?,
))
}
}

impl TryFrom<u64> for Generation {
type Error = anyhow::Error;

fn try_from(value: u64) -> Result<Self, Self::Error> {
i64::try_from(value)
.map_err(|_| anyhow!("generation number too large"))?;
Ok(Generation(value))
}
}

/// An RFC-1035-compliant hostname.
#[derive(
Clone, Debug, Deserialize, Display, Eq, PartialEq, SerializeDisplay,
Expand Down
30 changes: 25 additions & 5 deletions nexus/db-queries/src/db/datastore/deployment.rs
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,17 @@ impl DataStore {
&self,
opctx: &OpContext,
blueprint: &Blueprint,
) -> Result<(), Error> {
let conn = self.pool_connection_authorized(opctx).await?;
Self::blueprint_insert_on_connection(&conn, opctx, blueprint).await
}

/// Variant of [Self::blueprint_insert] which may be called from a
/// transaction context.
pub(crate) async fn blueprint_insert_on_connection(
conn: &async_bb8_diesel::Connection<DbConnection>,
opctx: &OpContext,
blueprint: &Blueprint,
) -> Result<(), Error> {
opctx
.authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG)
Expand Down Expand Up @@ -152,8 +163,7 @@ impl DataStore {
// batch rather than making a bunch of round-trips to the database.
// We'd do that if we had an interface for doing that with bound
// parameters, etc. See oxidecomputer/omicron#973.
let pool = self.pool_connection_authorized(opctx).await?;
pool.transaction_async(|conn| async move {
conn.transaction_async(|conn| async move {
// Insert the row for the blueprint.
{
use db::schema::blueprint::dsl;
Expand Down Expand Up @@ -620,6 +630,18 @@ impl DataStore {
&self,
opctx: &OpContext,
target: BlueprintTarget,
) -> Result<(), Error> {
let conn = self.pool_connection_authorized(opctx).await?;
Self::blueprint_target_set_current_on_connection(&conn, opctx, target)
.await
}

/// Variant of [Self::blueprint_target_set_current] which may be called from
/// a transaction context.
pub(crate) async fn blueprint_target_set_current_on_connection(
conn: &async_bb8_diesel::Connection<DbConnection>,
opctx: &OpContext,
target: BlueprintTarget,
) -> Result<(), Error> {
opctx
.authorize(authz::Action::Modify, &authz::BLUEPRINT_CONFIG)
Expand All @@ -631,10 +653,8 @@ impl DataStore {
time_made_target: target.time_made_target,
};

let conn = self.pool_connection_authorized(opctx).await?;

query
.execute_async(&*conn)
.execute_async(conn)
.await
.map_err(|e| Error::from(query.decode_error(e)))?;

Expand Down
66 changes: 64 additions & 2 deletions nexus/db-queries/src/db/datastore/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ use nexus_db_model::PasswordHashString;
use nexus_db_model::SiloUser;
use nexus_db_model::SiloUserPasswordHash;
use nexus_db_model::SledUnderlaySubnetAllocation;
use nexus_types::deployment::Blueprint;
use nexus_types::deployment::BlueprintTarget;
use nexus_types::external_api::params as external_params;
use nexus_types::external_api::shared;
use nexus_types::external_api::shared::IdentityType;
Expand All @@ -68,6 +70,7 @@ use uuid::Uuid;
pub struct RackInit {
pub rack_id: Uuid,
pub rack_subnet: IpNetwork,
pub blueprint: Blueprint,
pub services: Vec<internal_params::ServicePutRequest>,
pub datasets: Vec<Dataset>,
pub service_ip_pool_ranges: Vec<IpRange>,
Expand All @@ -85,6 +88,8 @@ pub struct RackInit {
enum RackInitError {
AddingIp(Error),
AddingNic(Error),
BlueprintInsert(Error),
BlueprintTargetSet(Error),
ServiceInsert(Error),
DatasetInsert { err: AsyncInsertError, zpool_id: Uuid },
RackUpdate { err: DieselError, rack_id: Uuid },
Expand Down Expand Up @@ -126,6 +131,12 @@ impl From<RackInitError> for Error {
RackInitError::ServiceInsert(err) => Error::internal_error(
&format!("failed to insert Service record: {:#}", err),
),
RackInitError::BlueprintInsert(err) => Error::internal_error(
&format!("failed to insert Blueprint: {:#}", err),
),
RackInitError::BlueprintTargetSet(err) => Error::internal_error(
&format!("failed to insert set target Blueprint: {:#}", err),
),
RackInitError::RackUpdate { err, rack_id } => {
public_error_from_diesel(
err,
Expand Down Expand Up @@ -583,6 +594,7 @@ impl DataStore {
let service_pool = service_pool.clone();
async move {
let rack_id = rack_init.rack_id;
let blueprint = rack_init.blueprint;
let services = rack_init.services;
let datasets = rack_init.datasets;
let service_ip_pool_ranges = rack_init.service_ip_pool_ranges;
Expand All @@ -608,7 +620,7 @@ impl DataStore {
return Ok::<_, DieselError>(rack);
}

// Otherwise, insert services and datasets.
// Otherwise, insert blueprint and datasets.

// Set up the IP pool for internal services.
for range in service_ip_pool_ranges {
Expand All @@ -629,6 +641,46 @@ impl DataStore {
})?;
}

// Insert the RSS-generated blueprint.
Self::blueprint_insert_on_connection(
&conn,
opctx,
&blueprint,
).await
jgallagher marked this conversation as resolved.
Show resolved Hide resolved
.map_err(|e| {
warn!(
log,
"Initializing Rack: Failed to insert blueprint"
);
err.set(RackInitError::BlueprintInsert(e)).unwrap();
DieselError::RollbackTransaction
})?;

// Mark the RSS-generated blueprint as the current target,
// DISABLED. We may change this to enabled in the future
// when more of Reconfigurator is automated, but for now we
// require a support operation to enable it.
Self::blueprint_target_set_current_on_connection(
&conn,
opctx,
BlueprintTarget {
target_id: blueprint.id,
enabled: false,
time_made_target: Utc::now(),
},
Comment on lines +677 to +681
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We wouldn't really expect anything to be able to concurrently set a different target while we're doing this operation, would we? As in, other than this pathway, is "setting the target" still an operator/developer-driven operation?

Why I ask: I'm just curious if some other background task in Nexus could attempt to concurrently set a blueprint and cause this blueprint_target_set call to fail

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In principle a background task could sneak in, set a blueprint target, and cause this to fail. In practice the only thing that could do that is a support operator:

  1. A blueprint can only be made the initial target if its parent blueprint is None
  2. The only ways to get a blueprint with a parent of None are to generate one from an inventory collection (which is expected to only be a support-driven operator, forever, as once we do it to deployed systems and merge this change it will never be required again) or to construct one by hand, which is what RSS is doing on this PR.

)
.await
.map_err(|e| {
warn!(
log,
"Initializing Rack: \
Failed to set blueprint as target"
jgallagher marked this conversation as resolved.
Show resolved Hide resolved
);
err.set(RackInitError::BlueprintTargetSet(e))
.unwrap();
DieselError::RollbackTransaction
})?;

// Allocate records for all services.
for service in services {
self.rack_populate_service_records(
Expand Down Expand Up @@ -882,7 +934,7 @@ mod test {
};
use omicron_common::api::internal::shared::SourceNatConfig;
use omicron_test_utils::dev;
use std::collections::HashMap;
use std::collections::{BTreeMap, BTreeSet, HashMap};
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr, SocketAddrV6};
use std::num::NonZeroU32;

Expand All @@ -893,6 +945,16 @@ mod test {
RackInit {
rack_id: Uuid::parse_str(nexus_test_utils::RACK_UUID).unwrap(),
rack_subnet: nexus_test_utils::RACK_SUBNET.parse().unwrap(),
blueprint: Blueprint {
id: Uuid::new_v4(),
omicron_zones: BTreeMap::new(),
zones_in_service: BTreeSet::new(),
parent_blueprint_id: None,
internal_dns_version: Generation::new().next(),
time_created: Utc::now(),
creator: "test suite".to_string(),
comment: "test suite".to_string(),
},
services: vec![],
datasets: vec![],
service_ip_pool_ranges: vec![],
Expand Down
4 changes: 2 additions & 2 deletions nexus/reconfigurator/planning/src/blueprint_builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -687,8 +687,8 @@ impl<'a> BlueprintZones<'a> {
} else {
// The first generation is reserved to mean the one
// containing no zones. See
// OMICRON_ZONES_CONFIG_INITIAL_GENERATION. So we start
// with the next one.
// OmicronZonesConfig::INITIAL_GENERATION. So we start with the
// next one.
OmicronZonesConfig {
generation: Generation::new().next(),
zones: vec![],
Expand Down
2 changes: 1 addition & 1 deletion nexus/reconfigurator/planning/src/planner.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ mod test {
let (sled_id, sled_zones) = sleds[0];
// We have defined elsewhere that the first generation contains no
// zones. So the first one with zones must be newer. See
// OMICRON_ZONES_CONFIG_INITIAL_GENERATION.
// OmicronZonesConfig::INITIAL_GENERATION.
assert!(sled_zones.generation > Generation::new());
assert_eq!(sled_id, new_sled_id);
assert_eq!(sled_zones.zones.len(), 1);
Expand Down
29 changes: 17 additions & 12 deletions nexus/src/app/background/blueprint_load.rs
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,7 @@ mod test {
nexus_test_utils::ControlPlaneTestContext<crate::Server>;

fn create_blueprint(
parent_blueprint_id: Option<Uuid>,
parent_blueprint_id: Uuid,
) -> (BlueprintTarget, Blueprint) {
let id = Uuid::new_v4();
(
Expand All @@ -206,7 +206,7 @@ mod test {
id,
omicron_zones: BTreeMap::new(),
zones_in_service: BTreeSet::new(),
parent_blueprint_id,
parent_blueprint_id: Some(parent_blueprint_id),
internal_dns_version: Generation::new(),
time_created: now_db_precision(),
creator: "test".to_string(),
Expand Down Expand Up @@ -236,26 +236,31 @@ mod test {
let mut task = TargetBlueprintLoader::new(datastore.clone());
let mut rx = task.watcher();

// We expect an appropriate status with no blueprint in the datastore
// We expect to see the initial blueprint set up by nexus-test-utils
// (emulating RSS).
let value = task.activate(&opctx).await;
assert_eq!(json!({"status": "no target blueprint"}), value);
assert!(rx.borrow().is_none());
let initial_blueprint =
rx.borrow_and_update().clone().expect("no initial blueprint");
let update = serde_json::from_value::<TargetUpdate>(value).unwrap();
assert_eq!(update.target_id, initial_blueprint.1.id);
assert_eq!(update.status, "first target blueprint");

let (target, blueprint) = create_blueprint(None);
let (target, blueprint) = create_blueprint(update.target_id);

// Inserting a blueprint, but not making it the target returns the same
// status
// Inserting a blueprint, but not making it the target return status
// indicating that the target hasn't changed
datastore.blueprint_insert(&opctx, &blueprint).await.unwrap();
let value = task.activate(&opctx).await;
assert_eq!(json!({"status": "no target blueprint"}), value);
assert!(rx.borrow().is_none());
let update = serde_json::from_value::<TargetUpdate>(value).unwrap();
assert_eq!(update.target_id, initial_blueprint.1.id);
assert_eq!(update.status, "target blueprint unchanged");

// Setting a target blueprint makes the loader see it and broadcast it
datastore.blueprint_target_set_current(&opctx, target).await.unwrap();
let value = task.activate(&opctx).await;
let update = serde_json::from_value::<TargetUpdate>(value).unwrap();
assert_eq!(update.target_id, blueprint.id);
assert_eq!(update.status, "first target blueprint");
assert_eq!(update.status, "target blueprint updated");
let rx_update = rx.borrow_and_update().clone().unwrap();
assert_eq!(rx_update.0, target);
assert_eq!(rx_update.1, blueprint);
Expand All @@ -268,7 +273,7 @@ mod test {
assert_eq!(false, rx.has_changed().unwrap());

// Adding a new blueprint and updating the target triggers a change
let (new_target, new_blueprint) = create_blueprint(Some(blueprint.id));
let (new_target, new_blueprint) = create_blueprint(blueprint.id);
datastore.blueprint_insert(&opctx, &new_blueprint).await.unwrap();
datastore
.blueprint_target_set_current(&opctx, new_target)
Expand Down
1 change: 1 addition & 0 deletions nexus/src/app/rack.rs
Original file line number Diff line number Diff line change
Expand Up @@ -221,6 +221,7 @@ impl super::Nexus {
RackInit {
rack_subnet: rack_network_config.rack_subnet.into(),
rack_id,
blueprint: request.blueprint,
services: request.services,
datasets,
service_ip_pool_ranges,
Expand Down
3 changes: 3 additions & 0 deletions nexus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ use dropshot::ConfigDropshot;
use external_api::http_entrypoints::external_api;
use internal_api::http_entrypoints::internal_api;
use nexus_config::NexusConfig;
use nexus_types::deployment::Blueprint;
use nexus_types::external_api::views::SledProvisionPolicy;
use nexus_types::internal_api::params::ServiceKind;
use omicron_common::address::IpRange;
Expand Down Expand Up @@ -232,6 +233,7 @@ impl nexus_test_interface::NexusServer for Server {
async fn start(
internal_server: InternalServer,
config: &NexusConfig,
blueprint: Blueprint,
services: Vec<nexus_types::internal_api::params::ServicePutRequest>,
datasets: Vec<nexus_types::internal_api::params::DatasetCreateRequest>,
internal_dns_zone_config: nexus_types::internal_api::params::DnsConfigParams,
Expand Down Expand Up @@ -276,6 +278,7 @@ impl nexus_test_interface::NexusServer for Server {
&opctx,
config.deployment.rack_id,
internal_api::params::RackInitializationRequest {
blueprint,
services,
datasets,
internal_services_ip_pool_ranges,
Expand Down
2 changes: 2 additions & 0 deletions nexus/test-interface/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@

use async_trait::async_trait;
use nexus_config::NexusConfig;
use nexus_types::deployment::Blueprint;
use slog::Logger;
use std::net::{SocketAddr, SocketAddrV6};
use uuid::Uuid;
Expand All @@ -50,6 +51,7 @@ pub trait NexusServer: Send + Sync + 'static {
async fn start(
internal_server: Self::InternalServer,
config: &NexusConfig,
blueprint: Blueprint,
services: Vec<nexus_types::internal_api::params::ServicePutRequest>,
datasets: Vec<nexus_types::internal_api::params::DatasetCreateRequest>,
internal_dns_config: nexus_types::internal_api::params::DnsConfigParams,
Expand Down
Loading
Loading