Skip to content

Commit

Permalink
nexus: don't require sled ID to delete v2p mappings (#4139)
Browse files Browse the repository at this point in the history
Nexus's `delete_instance_v2p_mappings` fetches the target instance's DB
record to get the instance's current sled ID. This allows this function
to (a) skip the sled when deleting V2P mappings, and (b) obtain the
sled's physical host IP to pass into a `SetVirtualNetworkInterfaceHost`
parameter block to pass to sled agent.

Change `delete_instance_v2p_mappings` not to need the sled ID for either
of these purposes:

1. It's already safe to ask a sled to delete a V2P mapping that its XDE
driver has already deleted. (Currently, this is true because explicitly
asking to destroy a V2P mapping is actually a no-op. But even if it
weren't, Nexus already assumes this request is idempotent, since it can
be invoked from the instance deletion saga.)
2. The physical host IP is not necessary for the `oxide-vpc` lib to
identify a V2P mapping for removal; only the VNI and virtual IP are
needed (see `VpcMappings::del` in the OPTE repo).

This helps to clear a path for upcoming changes that will make an
instance's sled ID optional (i.e. it is possible for an instance not to
have one, or for the last-known location of a running instance to be
more challenging to reason about than this routine presupposes).

Tested: cargo test, including integration test updates for tests that
are affected by this behavior; ran an instance on a dev cluster,
verified it was externally accessible, verified the expected sled agent
calls were seen when stopping/deleting the instance, and verified a new
instance that reused the previous instance's external IP was also
reachable.
  • Loading branch information
gjcolombo authored Sep 26, 2023
1 parent 3019a1b commit 9a1f6bc
Show file tree
Hide file tree
Showing 9 changed files with 72 additions and 43 deletions.
12 changes: 12 additions & 0 deletions illumos-utils/src/opte/params.rs
Original file line number Diff line number Diff line change
Expand Up @@ -38,3 +38,15 @@ pub struct SetVirtualNetworkInterfaceHost {
pub physical_host_ip: Ipv6Addr,
pub vni: external::Vni,
}

/// The data needed to identify a virtual IP for which a sled maintains an OPTE
/// virtual-to-physical mapping such that that mapping can be deleted.
#[derive(Clone, Debug, Serialize, Deserialize, JsonSchema, PartialEq)]
pub struct DeleteVirtualNetworkInterfaceHost {
/// The virtual IP whose mapping should be deleted.
pub virtual_ip: IpAddr,

/// The VNI for the network containing the virtual IP whose mapping should
/// be deleted.
pub vni: external::Vni,
}
6 changes: 4 additions & 2 deletions illumos-utils/src/opte/port_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
use crate::opte::default_boundary_services;
use crate::opte::opte_firewall_rules;
use crate::opte::params::DeleteVirtualNetworkInterfaceHost;
use crate::opte::params::SetVirtualNetworkInterfaceHost;
use crate::opte::params::VpcFirewallRule;
use crate::opte::Error;
Expand Down Expand Up @@ -480,17 +481,18 @@ impl PortManager {
#[cfg(target_os = "illumos")]
pub fn unset_virtual_nic_host(
&self,
_mapping: &SetVirtualNetworkInterfaceHost,
_mapping: &DeleteVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
// TODO requires https://github.com/oxidecomputer/opte/issues/332

slog::warn!(self.inner.log, "unset_virtual_nic_host unimplmented");
Ok(())
}

#[cfg(not(target_os = "illumos"))]
pub fn unset_virtual_nic_host(
&self,
_mapping: &SetVirtualNetworkInterfaceHost,
_mapping: &DeleteVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
info!(self.inner.log, "Ignoring unset of virtual NIC mapping");
Ok(())
Expand Down
32 changes: 6 additions & 26 deletions nexus/src/app/instance_network.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ use nexus_db_queries::db::lookup::LookupPath;
use omicron_common::api::external::DataPageParams;
use omicron_common::api::external::Error;
use omicron_common::api::internal::shared::SwitchLocation;
use sled_agent_client::types::DeleteVirtualNetworkInterfaceHost;
use sled_agent_client::types::SetVirtualNetworkInterfaceHost;
use std::collections::HashSet;
use std::str::FromStr;
Expand Down Expand Up @@ -186,26 +187,16 @@ impl super::Nexus {
// For every sled that isn't the sled this instance was allocated to, delete
// the virtual to physical mapping for each of this instance's NICs. If
// there isn't a V2P mapping, del_v2p should be a no-op.
let (.., authz_instance, db_instance) =
LookupPath::new(&opctx, &self.db_datastore)
.instance_id(instance_id)
.fetch_for(authz::Action::Read)
.await?;
let (.., authz_instance) = LookupPath::new(&opctx, &self.db_datastore)
.instance_id(instance_id)
.lookup_for(authz::Action::Read)
.await?;

let instance_nics = self
.db_datastore
.derive_guest_network_interface_info(&opctx, &authz_instance)
.await?;

// Lookup the physical host IP of the sled hosting this instance
let instance_sled_id = db_instance.runtime().sled_id;
let physical_host_ip = *self
.sled_lookup(&self.opctx_alloc, &instance_sled_id)?
.fetch()
.await?
.1
.ip;

let mut last_sled_id: Option<Uuid> = None;

loop {
Expand All @@ -221,22 +212,11 @@ impl super::Nexus {
Vec::with_capacity(sleds_page.len() * instance_nics.len());

for sled in &sleds_page {
// del_v2p not required for sled instance was allocated to, OPTE
// currently does that automatically
//
// TODO(#3107): Remove this when XDE stops deleting mappings
// implicitly.
if sled.id() == instance_sled_id {
continue;
}

for nic in &instance_nics {
let client = self.sled_client(&sled.id()).await?;
let nic_id = nic.id;
let mapping = SetVirtualNetworkInterfaceHost {
let mapping = DeleteVirtualNetworkInterfaceHost {
virtual_ip: nic.ip,
virtual_mac: nic.mac.clone(),
physical_host_ip,
vni: nic.vni.clone(),
};

Expand Down
7 changes: 1 addition & 6 deletions nexus/tests/integration_tests/instances.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3661,12 +3661,7 @@ async fn test_instance_v2p_mappings(cptestctx: &ControlPlaneTestContext) {
// Validate that every sled no longer has the V2P mapping for this instance
for sled_agent in &sled_agents {
let v2p_mappings = sled_agent.v2p_mappings.lock().await;
if sled_agent.id != db_instance.runtime().sled_id {
assert!(!v2p_mappings.is_empty());
assert!(v2p_mappings.get(&nics[0].identity.id).unwrap().is_empty());
} else {
assert!(v2p_mappings.is_empty());
}
assert!(v2p_mappings.is_empty());
}
}

Expand Down
25 changes: 24 additions & 1 deletion openapi/sled-agent.json
Original file line number Diff line number Diff line change
Expand Up @@ -442,7 +442,7 @@
"content": {
"application/json": {
"schema": {
"$ref": "#/components/schemas/SetVirtualNetworkInterfaceHost"
"$ref": "#/components/schemas/DeleteVirtualNetworkInterfaceHost"
}
}
},
Expand Down Expand Up @@ -1171,6 +1171,29 @@
"service_address"
]
},
"DeleteVirtualNetworkInterfaceHost": {
"description": "The data needed to identify a virtual IP for which a sled maintains an OPTE virtual-to-physical mapping such that that mapping can be deleted.",
"type": "object",
"properties": {
"virtual_ip": {
"description": "The virtual IP whose mapping should be deleted.",
"type": "string",
"format": "ip"
},
"vni": {
"description": "The VNI for the network containing the virtual IP whose mapping should be deleted.",
"allOf": [
{
"$ref": "#/components/schemas/Vni"
}
]
}
},
"required": [
"virtual_ip",
"vni"
]
},
"DiskEnsureBody": {
"description": "Sent from to a sled agent to establish the runtime state of a Disk",
"type": "object",
Expand Down
6 changes: 4 additions & 2 deletions sled-agent/src/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,9 @@ use dropshot::{
HttpResponseDeleted, HttpResponseHeaders, HttpResponseOk,
HttpResponseUpdatedNoContent, Path, Query, RequestContext, TypedBody,
};
use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost;
use illumos_utils::opte::params::{
DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost,
};
use omicron_common::api::external::Error;
use omicron_common::api::internal::nexus::DiskRuntimeState;
use omicron_common::api::internal::nexus::InstanceRuntimeState;
Expand Down Expand Up @@ -600,7 +602,7 @@ async fn set_v2p(
async fn del_v2p(
rqctx: RequestContext<SledAgent>,
_path_params: Path<V2pPathParam>,
body: TypedBody<SetVirtualNetworkInterfaceHost>,
body: TypedBody<DeleteVirtualNetworkInterfaceHost>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let sa = rqctx.context();
let body_args = body.into_inner();
Expand Down
3 changes: 2 additions & 1 deletion sled-agent/src/sim/http_entrypoints.rs
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ use dropshot::HttpResponseUpdatedNoContent;
use dropshot::Path;
use dropshot::RequestContext;
use dropshot::TypedBody;
use illumos_utils::opte::params::DeleteVirtualNetworkInterfaceHost;
use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost;
use omicron_common::api::internal::nexus::DiskRuntimeState;
use omicron_common::api::internal::nexus::InstanceRuntimeState;
Expand Down Expand Up @@ -307,7 +308,7 @@ async fn set_v2p(
async fn del_v2p(
rqctx: RequestContext<Arc<SledAgent>>,
path_params: Path<V2pPathParam>,
body: TypedBody<SetVirtualNetworkInterfaceHost>,
body: TypedBody<DeleteVirtualNetworkInterfaceHost>,
) -> Result<HttpResponseUpdatedNoContent, HttpError> {
let sa = rqctx.context();
let interface_id = path_params.into_inner().interface_id;
Expand Down
18 changes: 15 additions & 3 deletions sled-agent/src/sim/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ use std::str::FromStr;

use crucible_client_types::VolumeConstructionRequest;
use dropshot::HttpServer;
use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost;
use illumos_utils::opte::params::{
DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost,
};
use nexus_client::types::PhysicalDiskKind;
use omicron_common::address::PROPOLIS_PORT;
use propolis_client::Client as PropolisClient;
Expand Down Expand Up @@ -574,11 +576,21 @@ impl SledAgent {
pub async fn unset_virtual_nic_host(
&self,
interface_id: Uuid,
mapping: &SetVirtualNetworkInterfaceHost,
mapping: &DeleteVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
let mut v2p_mappings = self.v2p_mappings.lock().await;
let vec = v2p_mappings.entry(interface_id).or_default();
vec.retain(|x| x != mapping);
vec.retain(|x| {
x.virtual_ip != mapping.virtual_ip || x.vni != mapping.vni
});

// If the last entry was removed, remove the entire interface ID so that
// tests don't have to distinguish never-created entries from
// previously-extant-but-now-empty entries.
if vec.is_empty() {
v2p_mappings.remove(&interface_id);
}

Ok(())
}

Expand Down
6 changes: 4 additions & 2 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,9 @@ use crate::zone_bundle::BundleError;
use bootstore::schemes::v0 as bootstore;
use camino::Utf8PathBuf;
use dropshot::HttpError;
use illumos_utils::opte::params::SetVirtualNetworkInterfaceHost;
use illumos_utils::opte::params::{
DeleteVirtualNetworkInterfaceHost, SetVirtualNetworkInterfaceHost,
};
use illumos_utils::opte::PortManager;
use illumos_utils::zone::PROPOLIS_ZONE_PREFIX;
use illumos_utils::zone::ZONE_PREFIX;
Expand Down Expand Up @@ -878,7 +880,7 @@ impl SledAgent {

pub async fn unset_virtual_nic_host(
&self,
mapping: &SetVirtualNetworkInterfaceHost,
mapping: &DeleteVirtualNetworkInterfaceHost,
) -> Result<(), Error> {
self.inner
.port_manager
Expand Down

0 comments on commit 9a1f6bc

Please sign in to comment.