Skip to content

Commit

Permalink
Working idempotent double attach/detach.
Browse files Browse the repository at this point in the history
This required that we drop the non-null parent constraint on ephemeral
IPs, but I think that's worth it in the name of consistency.
  • Loading branch information
FelixMcFelix committed Dec 28, 2023
1 parent c259342 commit 21d5776
Show file tree
Hide file tree
Showing 14 changed files with 382 additions and 124 deletions.
6 changes: 3 additions & 3 deletions nexus/db-model/src/external_ip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ impl IncompleteExternalIp {
}
}

pub fn for_ephemeral(id: Uuid, instance_id: Uuid, pool_id: Uuid) -> Self {
pub fn for_ephemeral(id: Uuid, pool_id: Uuid) -> Self {
let kind = IpKind::Ephemeral;
Self {
id,
Expand All @@ -192,7 +192,7 @@ impl IncompleteExternalIp {
time_created: Utc::now(),
kind,
is_service: false,
parent_id: Some(instance_id),
parent_id: None,
pool_id,
project_id: None,
explicit_ip: None,
Expand Down Expand Up @@ -402,7 +402,7 @@ impl IpKind {
pub fn initial_state(&self) -> IpAttachState {
match &self {
IpKind::SNat => IpAttachState::Attached,
IpKind::Ephemeral => IpAttachState::Attaching,
IpKind::Ephemeral => IpAttachState::Detached,
IpKind::Floating => IpAttachState::Detached,
}
}
Expand Down
2 changes: 2 additions & 0 deletions nexus/db-model/src/schema.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1406,6 +1406,8 @@ allow_tables_to_appear_in_same_query!(

allow_tables_to_appear_in_same_query!(dns_zone, dns_version, dns_name);
allow_tables_to_appear_in_same_query!(external_ip, service);
allow_tables_to_appear_in_same_query!(external_ip, instance);
joinable!(external_ip -> instance (parent_id));

allow_tables_to_appear_in_same_query!(
switch_port,
Expand Down
266 changes: 207 additions & 59 deletions nexus/db-queries/src/db/datastore/external_ip.rs

Large diffs are not rendered by default.

18 changes: 14 additions & 4 deletions nexus/db-queries/src/db/queries/external_ip.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1065,9 +1065,11 @@ mod tests {
Uuid::new_v4(),
instance_id,
/* pool_name = */ None,
true,
)
.await
.expect("Failed to allocate Ephemeral IP when there is space");
.expect("Failed to allocate Ephemeral IP when there is space")
.0;
assert_eq!(ephemeral_ip.ip.ip(), range.last_address());
assert_eq!(ephemeral_ip.first_port.0, 0);
assert_eq!(ephemeral_ip.last_port.0, super::MAX_PORT);
Expand Down Expand Up @@ -1105,6 +1107,7 @@ mod tests {
Uuid::new_v4(),
instance_id,
/* pool_name = */ None,
true,
)
.await;
assert!(
Expand Down Expand Up @@ -1250,9 +1253,11 @@ mod tests {
id,
instance_id,
pool_name,
true,
)
.await
.expect("Failed to allocate instance ephemeral IP address");
.expect("Failed to allocate instance ephemeral IP address")
.0;
assert_eq!(ip.kind, IpKind::Ephemeral);
assert_eq!(ip.ip.ip(), range.first_address());
assert_eq!(ip.first_port.0, 0);
Expand Down Expand Up @@ -1780,9 +1785,11 @@ mod tests {
id,
instance_id,
pool_name,
true,
)
.await
.expect("Failed to allocate instance ephemeral IP address");
.expect("Failed to allocate instance ephemeral IP address")
.0;
assert_eq!(ip.kind, IpKind::Ephemeral);
assert_eq!(ip.ip.ip(), second_range.first_address());
assert_eq!(ip.first_port.0, 0);
Expand Down Expand Up @@ -1823,9 +1830,11 @@ mod tests {
Uuid::new_v4(),
instance_id,
pool_name.clone(),
true,
)
.await
.expect("Failed to allocate instance ephemeral IP address");
.expect("Failed to allocate instance ephemeral IP address")
.0;
println!("{ip:#?}");
if let IpAddr::V4(addr) = ip.ip.ip() {
assert_eq!(addr.octets()[3], octet);
Expand All @@ -1842,6 +1851,7 @@ mod tests {
Uuid::new_v4(),
Uuid::new_v4(),
pool_name,
true,
)
.await
.expect_err("Should not use IP addresses from a different pool");
Expand Down
4 changes: 2 additions & 2 deletions nexus/src/app/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1942,7 +1942,7 @@ impl super::Nexus {
opctx: &OpContext,
instance_lookup: &lookup::Instance<'_>,
ext_ip: &params::ExternalIpDelete,
) -> UpdateResult<views::ExternalIp> {
) -> UpdateResult<Option<views::ExternalIp>> {
let (.., authz_project, authz_instance) =
instance_lookup.lookup_for(authz::Action::Modify).await?;

Expand All @@ -1960,7 +1960,7 @@ impl super::Nexus {
.await?;

saga_outputs
.lookup_node_output::<views::ExternalIp>("output")
.lookup_node_output::<Option<views::ExternalIp>>("output")
.map_err(|e| Error::internal_error(&format!("{:#}", &e)))
.internal_context("looking up output from ip attach saga")
}
Expand Down
25 changes: 13 additions & 12 deletions nexus/src/app/instance_network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -342,18 +342,19 @@ impl super::Nexus {
.instance_lookup_external_ips(&opctx, instance_id)
.await?;

let (ips_of_interest, must_all_be_attached) =
if let Some(wanted_id) = ip_filter {
if let Some(ip) = ips.iter().find(|v| v.id == wanted_id) {
(std::slice::from_ref(ip), false)
} else {
return Err(Error::internal_error(&format!(
"failed to find external ip address with id: {wanted_id}",
)));
}
let (ips_of_interest, must_all_be_attached) = if let Some(wanted_id) =
ip_filter
{
if let Some(ip) = ips.iter().find(|v| v.id == wanted_id) {
(std::slice::from_ref(ip), false)
} else {
(&ips[..], true)
};
return Err(Error::internal_error(&format!(
"failed to find external ip address with id: {wanted_id}, saw {ips:?}",
)));
}
} else {
(&ips[..], true)
};

// This is performed so that an IP attach/detach will block the
// instance_start saga. Return service unavailable to indicate
Expand Down Expand Up @@ -482,7 +483,7 @@ impl super::Nexus {
std::slice::from_ref(ip)
} else {
return Err(Error::internal_error(&format!(
"failed to find external ip address with id: {wanted_id}",
"failed to find external ip address with id: {wanted_id}, saw {external_ips:?}",
)));
}
} else {
Expand Down
70 changes: 63 additions & 7 deletions nexus/src/app/sagas/instance_common.rs
Original file line number Diff line number Diff line change
Expand Up @@ -151,6 +151,19 @@ pub struct InstanceStateForIp {
pub state: InstanceState,
}

/// External IP state needed for IP attach/detachment.
///
/// This holds a record of the mid-processing external IP, where possible.
/// there are cases where this might not be known (e.g., double detach of an
/// ephemeral IP).
/// In particular we need to explicitly no-op if not `do_saga`, to prevent
/// failures borne from instance state changes from knocking out a valid IP binding.
#[derive(Debug, Deserialize, Serialize)]
pub struct ModifyStateForExternalIp {
pub external_ip: Option<ExternalIp>,
pub do_saga: bool,
}

/// Move an external IP from one state to another as a saga operation,
/// returning `Ok(true)` if the record was successfully moved and `Ok(false)`
/// if the record was lost.
Expand All @@ -168,7 +181,16 @@ pub async fn instance_ip_move_state(
let opctx =
crate::context::op_context_for_saga_action(&sagactx, serialized_authn);

let new_ip = sagactx.lookup::<ExternalIp>("target_ip")?;
let new_ip = sagactx.lookup::<ModifyStateForExternalIp>("target_ip")?;

if !new_ip.do_saga {
return Ok(true);
}
let Some(new_ip) = new_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};

match datastore
.external_ip_complete_op(&opctx, new_ip.id, new_ip.kind, from, to)
Expand Down Expand Up @@ -265,7 +287,15 @@ pub async fn instance_ip_add_nat(
return Ok(());
};

let target_ip = sagactx.lookup::<ExternalIp>("target_ip")?;
let target_ip = sagactx.lookup::<ModifyStateForExternalIp>("target_ip")?;
if !target_ip.do_saga {
return Ok(());
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};

// Querying sleds requires fleet access; use the instance allocator context
// for this.
Expand Down Expand Up @@ -305,7 +335,15 @@ pub async fn instance_ip_remove_nat(
return Ok(());
};

let target_ip = sagactx.lookup::<ExternalIp>("target_ip")?;
let target_ip = sagactx.lookup::<ModifyStateForExternalIp>("target_ip")?;
if !target_ip.do_saga {
return Ok(());
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};

osagactx
.nexus()
Expand All @@ -329,9 +367,18 @@ pub async fn instance_ip_add_opte(
return Ok(());
};

let new_ip = sagactx.lookup::<ExternalIp>("target_ip")?;
let target_ip = sagactx.lookup::<ModifyStateForExternalIp>("target_ip")?;
if !target_ip.do_saga {
return Ok(());
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};

let sled_agent_body =
new_ip.try_into().map_err(ActionError::action_failed)?;
target_ip.try_into().map_err(ActionError::action_failed)?;

osagactx
.nexus()
Expand Down Expand Up @@ -369,9 +416,18 @@ pub async fn instance_ip_remove_opte(
return Ok(());
};

let new_ip = sagactx.lookup::<ExternalIp>("target_ip")?;
let target_ip = sagactx.lookup::<ModifyStateForExternalIp>("target_ip")?;
if !target_ip.do_saga {
return Ok(());
}
let Some(target_ip) = target_ip.external_ip else {
return Err(ActionError::action_failed(Error::internal_error(
"tried to `do_saga` without valid external IP",
)));
};

let sled_agent_body =
new_ip.try_into().map_err(ActionError::action_failed)?;
target_ip.try_into().map_err(ActionError::action_failed)?;

osagactx
.nexus()
Expand Down
3 changes: 3 additions & 0 deletions nexus/src/app/sagas/instance_create.rs
Original file line number Diff line number Diff line change
Expand Up @@ -634,9 +634,11 @@ async fn sic_allocate_instance_external_ip(
ip_id,
instance_id,
pool_name,
true,
)
.await
.map_err(ActionError::action_failed)?
.0
}
// Set the parent of an existing floating IP to the new instance's ID.
params::ExternalIpCreate::Floating { ref floating_ip_name } => {
Expand All @@ -652,6 +654,7 @@ async fn sic_allocate_instance_external_ip(
.floating_ip_begin_attach(&opctx, &authz_fip, instance_id, true)
.await
.map_err(ActionError::action_failed)?
.0
}
};

Expand Down
30 changes: 24 additions & 6 deletions nexus/src/app/sagas/instance_ip_attach.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,16 @@
use super::instance_common::{
instance_ip_add_nat, instance_ip_add_opte, instance_ip_get_instance_state,
instance_ip_move_state, instance_ip_remove_nat, instance_ip_remove_opte,
InstanceStateForIp,
InstanceStateForIp, ModifyStateForExternalIp,
};
use super::{ActionRegistry, NexusActionContext, NexusSaga};
use crate::app::sagas::declare_saga_actions;
use crate::app::{authn, authz, db};
use crate::external_api::params;
use nexus_db_model::{ExternalIp, IpAttachState};
use nexus_db_model::IpAttachState;
use nexus_db_queries::db::lookup::LookupPath;
use nexus_types::external_api::views;
use omicron_common::api::external::Error;
use serde::Deserialize;
use serde::Serialize;
use steno::ActionError;
Expand Down Expand Up @@ -80,7 +81,7 @@ pub struct Params {

async fn siia_begin_attach_ip(
sagactx: NexusActionContext,
) -> Result<ExternalIp, ActionError> {
) -> Result<ModifyStateForExternalIp, ActionError> {
let osagactx = sagactx.user_data();
let datastore = osagactx.datastore();
let params = sagactx.saga_params::<Params>()?;
Expand All @@ -100,9 +101,14 @@ async fn siia_begin_attach_ip(
Uuid::new_v4(),
params.authz_instance.id(),
pool_name,
false,
)
.await
.map_err(ActionError::action_failed)
.map(|(external_ip, do_saga)| ModifyStateForExternalIp {
external_ip: Some(external_ip),
do_saga,
})
}
// Set the parent of an existing floating IP to the new instance's ID.
params::ExternalIpCreate::Floating { ref floating_ip_name } => {
Expand All @@ -123,6 +129,10 @@ async fn siia_begin_attach_ip(
)
.await
.map_err(ActionError::action_failed)
.map(|(external_ip, do_saga)| ModifyStateForExternalIp {
external_ip: Some(external_ip),
do_saga,
})
}
}
}
Expand Down Expand Up @@ -213,7 +223,7 @@ async fn siia_complete_attach(
) -> Result<views::ExternalIp, ActionError> {
let log = sagactx.user_data().log();
let params = sagactx.saga_params::<Params>()?;
let target_ip = sagactx.lookup::<ExternalIp>("target_ip")?;
let target_ip = sagactx.lookup::<ModifyStateForExternalIp>("target_ip")?;

if !instance_ip_move_state(
&sagactx,
Expand All @@ -229,7 +239,15 @@ async fn siia_complete_attach(
)
}

target_ip.try_into().map_err(ActionError::action_failed)
target_ip
.external_ip
.ok_or_else(|| {
Error::internal_error(
"must always have a defined external IP during instance attach",
)
})
.and_then(TryInto::try_into)
.map_err(ActionError::action_failed)
}

#[derive(Debug)]
Expand Down Expand Up @@ -264,7 +282,7 @@ pub(crate) mod test {
ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper,
};
use dropshot::test_util::ClientTestContext;
use nexus_db_model::{IpKind, Name};
use nexus_db_model::{ExternalIp, IpKind, Name};
use nexus_db_queries::context::OpContext;
use nexus_test_utils::resource_helpers::{
create_floating_ip, create_instance, create_project, populate_ip_pool,
Expand Down
Loading

0 comments on commit 21d5776

Please sign in to comment.