Skip to content

Commit

Permalink
Clippy + neuter errors in undo path
Browse files Browse the repository at this point in the history
  • Loading branch information
FelixMcFelix committed Dec 28, 2023
1 parent 9db79b9 commit c259342
Show file tree
Hide file tree
Showing 3 changed files with 102 additions and 70 deletions.
88 changes: 56 additions & 32 deletions nexus/src/app/sagas/instance_ip_attach.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,13 @@ use uuid::Uuid;
// sled-agent as temporary errors and unwinding. For the delete case, we
// allow the attach/detach completion to have a missing record.
// See `instance_common::instance_ip_get_instance_state` for more info.
//
// One more consequence of sled state being able to change beneath us
// is that the central undo actions (DPD/OPTE state) *must* be best-effort.
// This is not bad per-se: instance stop does not itself remove NAT routing
// rules. The only reason these should fail is because an instance has stopped,
// at which point there's no good in e.g. adding another entry to a non-existent
// sled-agent regardless.

declare_saga_actions! {
instance_ip_attach;
Expand Down Expand Up @@ -71,9 +78,6 @@ pub struct Params {
pub serialized_authn: authn::saga::Serialized,
}

// TODO: factor this out for attach, detach, and instance create
// to share an impl.

async fn siia_begin_attach_ip(
sagactx: NexusActionContext,
) -> Result<ExternalIp, ActionError> {
Expand Down Expand Up @@ -169,13 +173,17 @@ async fn siia_nat(sagactx: NexusActionContext) -> Result<(), ActionError> {
async fn siia_nat_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let log = sagactx.user_data().log();
let params = sagactx.saga_params::<Params>()?;
instance_ip_remove_nat(
if let Err(e) = instance_ip_remove_nat(
&sagactx,
&params.serialized_authn,
&params.authz_instance,
)
.await?;
.await
{
error!(log, "siia_nat_undo: failed to notify DPD: {e}");
}

Ok(())
}
Expand All @@ -190,8 +198,13 @@ async fn siia_update_opte(
async fn siia_update_opte_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let log = sagactx.user_data().log();
let params = sagactx.saga_params::<Params>()?;
instance_ip_remove_opte(&sagactx, &params.authz_instance).await?;
if let Err(e) =
instance_ip_remove_opte(&sagactx, &params.authz_instance).await
{
error!(log, "siia_update_opte_undo: failed to notify sled-agent: {e}");
}
Ok(())
}

Expand Down Expand Up @@ -219,8 +232,6 @@ async fn siia_complete_attach(
target_ip.try_into().map_err(ActionError::action_failed)
}

// TODO: backout changes if run state changed illegally?

#[derive(Debug)]
pub struct SagaInstanceIpAttach;
impl NexusSaga for SagaInstanceIpAttach {
Expand All @@ -247,27 +258,19 @@ impl NexusSaga for SagaInstanceIpAttach {
#[cfg(test)]
pub(crate) mod test {
use super::*;
use crate::app::{
saga::create_saga_dag,
sagas::test_helpers::{self, instance_simulate},
};
use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection};
use crate::app::{saga::create_saga_dag, sagas::test_helpers};
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::{
ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper,
};
use dropshot::test_util::ClientTestContext;
use nexus_db_model::{IpKind, Name};
use nexus_db_queries::context::OpContext;
use nexus_test_utils::resource_helpers::{
create_disk, create_floating_ip, create_instance, create_project,
object_create, populate_ip_pool,
create_floating_ip, create_instance, create_project, populate_ip_pool,
};
use nexus_test_utils_macros::nexus_test;
use nexus_types::external_api::params::ExternalIpCreate;
use omicron_common::api::external::{
ByteCount, IdentityMetadataCreateParams, InstanceCpuCount,
SimpleIdentity,
};
use omicron_common::api::external::SimpleIdentity;

type ControlPlaneTestContext =
nexus_test_utils::ControlPlaneTestContext<crate::Server>;
Expand Down Expand Up @@ -327,11 +330,12 @@ pub(crate) mod test {
let client = &cptestctx.external_client;
let apictx = &cptestctx.server.apictx();
let nexus = &apictx.nexus;
let sled_agent = &cptestctx.sled_agent.sled_agent;

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let project_id = ip_manip_test_setup(&client).await;
let _instance =
let _project_id = ip_manip_test_setup(&client).await;
let instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

for use_float in [false, true] {
Expand All @@ -340,9 +344,31 @@ pub(crate) mod test {
let dag = create_saga_dag::<SagaInstanceIpAttach>(params).unwrap();
let saga = nexus.create_runnable_saga(dag).await.unwrap();
nexus.run_saga(saga).await.expect("Attach saga should succeed");

// TODO: assert sled agent, dpd happy, ...
}

let instance_id = instance.id();

// Sled agent has a record of the new external IPs.
let mut eips = sled_agent.external_ips.lock().await;
let my_eips = eips.entry(instance_id).or_default();
assert!(my_eips.iter().any(|v| matches!(
v,
omicron_sled_agent::params::InstanceExternalIpBody::Floating(_)
)));
assert!(my_eips.iter().any(|v| matches!(
v,
omicron_sled_agent::params::InstanceExternalIpBody::Ephemeral(_)
)));

// DB has records for SNAT plus the new IPs.
let db_eips = datastore
.instance_lookup_external_ips(&opctx, instance_id)
.await
.unwrap();
assert_eq!(db_eips.len(), 3);
assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral));
assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating));
assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat));
}

pub(crate) async fn verify_clean_slate(
Expand Down Expand Up @@ -381,10 +407,9 @@ pub(crate) mod test {
.is_none());

// No IP bindings remain on sled-agent.
let eips = &*sled_agent.external_ips.lock().await;
for (_nic_id, eip_set) in eips {
assert!(eip_set.is_empty());
}
let mut eips = sled_agent.external_ips.lock().await;
let my_eips = eips.entry(instance_id).or_default();
assert!(my_eips.is_empty());
}

#[nexus_test(server = crate::Server)]
Expand All @@ -398,7 +423,7 @@ pub(crate) mod test {

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let project_id = ip_manip_test_setup(&client).await;
let _project_id = ip_manip_test_setup(&client).await;
let instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

Expand All @@ -424,7 +449,7 @@ pub(crate) mod test {

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let project_id = ip_manip_test_setup(&client).await;
let _project_id = ip_manip_test_setup(&client).await;
let instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

Expand All @@ -447,14 +472,13 @@ pub(crate) mod test {
async fn test_actions_succeed_idempotently(
cptestctx: &ControlPlaneTestContext,
) {
let log = &cptestctx.logctx.log;
let client = &cptestctx.external_client;
let apictx = &cptestctx.server.apictx();
let nexus = &apictx.nexus;

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let project_id = ip_manip_test_setup(&client).await;
let _project_id = ip_manip_test_setup(&client).await;
let _instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

Expand Down
66 changes: 39 additions & 27 deletions nexus/src/app/sagas/instance_ip_detach.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,25 +14,14 @@ use crate::external_api::params;
use nexus_db_model::{ExternalIp, IpAttachState, IpKind};
use nexus_db_queries::db::lookup::LookupPath;
use nexus_types::external_api::views;
use omicron_common::api::external::{Error, InstanceState};
use omicron_common::api::external::Error;
use serde::Deserialize;
use serde::Serialize;
use steno::ActionError;
use uuid::Uuid;

// rough sequence of evts:
// - take temp ownership of instance while interacting w/ sled agent
// -> mark instance migration id as Some(0) if None
// - Withdraw routes
// -> ensure_dpd... (?) Do we actually need to?
// -> must precede OPTE: host may change its sending
// behaviour prematurely
// - Deregister addr in OPTE
// -> Put addr in sled-agent endpoint
// - Detach and Delete EIP iff. Ephemeral
// -> why so late? Risk that we can't recover our IP in an unwind.
// - free up migration_id of instance.
// -> mark instance migration id as None
// This runs on similar logic to instance IP attach: see its head
// comment for an explanation of the structure wrt. other sagas.

declare_saga_actions! {
instance_ip_detach;
Expand Down Expand Up @@ -177,13 +166,17 @@ async fn siid_nat(sagactx: NexusActionContext) -> Result<(), ActionError> {
async fn siid_nat_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let log = sagactx.user_data().log();
let params = sagactx.saga_params::<Params>()?;
instance_ip_add_nat(
if let Err(e) = instance_ip_add_nat(
&sagactx,
&params.serialized_authn,
&params.authz_instance,
)
.await?;
.await
{
error!(log, "siid_nat_undo: failed to notify DPD: {e}");
}

Ok(())
}
Expand All @@ -198,8 +191,12 @@ async fn siid_update_opte(
async fn siid_update_opte_undo(
sagactx: NexusActionContext,
) -> Result<(), anyhow::Error> {
let log = sagactx.user_data().log();
let params = sagactx.saga_params::<Params>()?;
instance_ip_add_opte(&sagactx, &params.authz_instance).await?;
if let Err(e) = instance_ip_add_opte(&sagactx, &params.authz_instance).await
{
error!(log, "siid_update_opte_undo: failed to notify sled-agent: {e}");
}
Ok(())
}

Expand All @@ -220,7 +217,7 @@ async fn siid_complete_detach(
{
warn!(
log,
"siia_complete_attach: external IP was deleted or call was idempotent"
"siid_complete_attach: external IP was deleted or call was idempotent"
)
}

Expand Down Expand Up @@ -263,7 +260,7 @@ pub(crate) mod test {
},
Nexus,
};
use async_bb8_diesel::{AsyncRunQueryDsl, AsyncSimpleConnection};
use async_bb8_diesel::AsyncRunQueryDsl;
use diesel::{
ExpressionMethods, OptionalExtension, QueryDsl, SelectableHelper,
};
Expand Down Expand Up @@ -342,11 +339,12 @@ pub(crate) mod test {
let client = &cptestctx.external_client;
let apictx = &cptestctx.server.apictx();
let nexus = &apictx.nexus;
let sled_agent = &cptestctx.sled_agent.sled_agent;

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let _ = ip_manip_test_setup(&client).await;
let _instance =
let instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

attach_instance_ips(nexus, &opctx).await;
Expand All @@ -358,6 +356,21 @@ pub(crate) mod test {
let saga = nexus.create_runnable_saga(dag).await.unwrap();
nexus.run_saga(saga).await.expect("Detach saga should succeed");
}

let instance_id = instance.id();

// Sled agent has removed its records of the external IPs.
let mut eips = sled_agent.external_ips.lock().await;
let my_eips = eips.entry(instance_id).or_default();
assert!(my_eips.is_empty());

// DB only has record for SNAT.
let db_eips = datastore
.instance_lookup_external_ips(&opctx, instance_id)
.await
.unwrap();
assert_eq!(db_eips.len(), 1);
assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat));
}

pub(crate) async fn verify_clean_slate(
Expand Down Expand Up @@ -401,9 +414,9 @@ pub(crate) mod test {
.await
.unwrap();
assert_eq!(db_eips.len(), 3);
assert!(db_eips.iter().find(|v| v.kind == IpKind::Ephemeral).is_some());
assert!(db_eips.iter().find(|v| v.kind == IpKind::Floating).is_some());
assert!(db_eips.iter().find(|v| v.kind == IpKind::SNat).is_some());
assert!(db_eips.iter().any(|v| v.kind == IpKind::Ephemeral));
assert!(db_eips.iter().any(|v| v.kind == IpKind::Floating));
assert!(db_eips.iter().any(|v| v.kind == IpKind::SNat));

// No IP bindings remain on sled-agent.
let eips = &*sled_agent.external_ips.lock().await;
Expand All @@ -423,7 +436,7 @@ pub(crate) mod test {

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let project_id = ip_manip_test_setup(&client).await;
let _project_id = ip_manip_test_setup(&client).await;
let instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

Expand Down Expand Up @@ -451,7 +464,7 @@ pub(crate) mod test {

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let project_id = ip_manip_test_setup(&client).await;
let _project_id = ip_manip_test_setup(&client).await;
let instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

Expand All @@ -476,14 +489,13 @@ pub(crate) mod test {
async fn test_actions_succeed_idempotently(
cptestctx: &ControlPlaneTestContext,
) {
let log = &cptestctx.logctx.log;
let client = &cptestctx.external_client;
let apictx = &cptestctx.server.apictx();
let nexus = &apictx.nexus;

let opctx = test_helpers::test_opctx(cptestctx);
let datastore = &nexus.db_datastore;
let project_id = ip_manip_test_setup(&client).await;
let _project_id = ip_manip_test_setup(&client).await;
let _instance =
create_instance(client, PROJECT_NAME, INSTANCE_NAME).await;

Expand Down
18 changes: 7 additions & 11 deletions sled-agent/src/sim/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -640,17 +640,13 @@ impl SledAgent {
// High-level behaviour: this should always succeed UNLESS
// trying to add a double ephemeral.
if let InstanceExternalIpBody::Ephemeral(curr_ip) = &body_args {
if my_eips
.iter()
.find(|v| {
if let InstanceExternalIpBody::Ephemeral(other_ip) = v {
curr_ip != other_ip
} else {
false
}
})
.is_some()
{
if my_eips.iter().any(|v| {
if let InstanceExternalIpBody::Ephemeral(other_ip) = v {
curr_ip != other_ip
} else {
false
}
}) {
return Err(Error::invalid_request("cannot replace exisitng ephemeral IP without explicit removal"));
}
}
Expand Down

0 comments on commit c259342

Please sign in to comment.