Skip to content

Commit

Permalink
Setup the VMM reservoir in a background task
Browse files Browse the repository at this point in the history
Fixes the first part of #5121
  • Loading branch information
andrewjstone committed Feb 22, 2024
1 parent a5be09f commit 1d0ed8c
Show file tree
Hide file tree
Showing 6 changed files with 286 additions and 135 deletions.
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions sled-agent/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dpd-client.workspace = true
display-error-chain.workspace = true
dropshot.workspace = true
flate2.workspace = true
flume.workspace = true
futures.workspace = true
glob.workspace = true
hex.workspace = true
Expand Down
112 changes: 11 additions & 101 deletions sled-agent/src/instance_manager.rs
Original file line number Diff line number Diff line change
Expand Up @@ -13,24 +13,24 @@ use crate::params::{
InstanceHardware, InstanceMigrationSourceParams, InstancePutStateResponse,
InstanceStateRequested, InstanceUnregisterResponse,
};
use crate::vmm_reservoir::VmmReservoirManagerHandle;
use crate::zone_bundle::BundleError;
use crate::zone_bundle::ZoneBundler;
use omicron_common::api::external::ByteCount;

use anyhow::anyhow;
use illumos_utils::dladm::Etherstub;
use illumos_utils::link::VnicAllocator;
use illumos_utils::opte::PortManager;
use illumos_utils::running_zone::ZoneBuilderFactory;
use illumos_utils::vmm_reservoir;
use omicron_common::api::external::ByteCount;
use omicron_common::api::internal::nexus::InstanceRuntimeState;
use omicron_common::api::internal::nexus::SledInstanceState;
use omicron_common::api::internal::nexus::VmmRuntimeState;
use sled_storage::manager::StorageHandle;
use slog::Logger;
use std::collections::BTreeMap;
use std::net::SocketAddr;
use std::sync::{Arc, Mutex};
use std::sync::Arc;
use tokio::sync::{mpsc, oneshot};
use uuid::Uuid;

Expand All @@ -48,12 +48,6 @@ pub enum Error {
#[error("OPTE port management error: {0}")]
Opte(#[from] illumos_utils::opte::Error),

#[error("Failed to create reservoir: {0}")]
Reservoir(#[from] vmm_reservoir::Error),

#[error("Invalid reservoir configuration: {0}")]
ReservoirConfig(String),

#[error("Cannot find data link: {0}")]
Underlay(#[from] sled_hardware::underlay::Error),

Expand All @@ -72,12 +66,6 @@ pub enum Error {
RequestDropped(#[from] oneshot::error::RecvError),
}

pub enum ReservoirMode {
None,
Size(u32),
Percentage(u8),
}

pub(crate) struct InstanceManagerServices {
pub nexus_client: NexusClientWithResolver,
pub vnic_allocator: VnicAllocator<Etherstub>,
Expand All @@ -90,14 +78,8 @@ pub(crate) struct InstanceManagerServices {
// Describes the internals of the "InstanceManager", though most of the
// instance manager's state exists within the "InstanceManagerRunner" structure.
struct InstanceManagerInternal {
log: Logger,
tx: mpsc::Sender<InstanceManagerRequest>,
// NOTE: Arguably, this field could be "owned" by the InstanceManagerRunner.
// It was not moved there, and the reservoir functions were not converted to
// use the message-passing interface (see: "InstanceManagerRequest") because
// callers of "get/set reservoir size" are not async, and (in the case of
// getting the size) they also do not expect a "Result" type.
reservoir_size: Mutex<ByteCount>,
vmm_reservoir_manager: VmmReservoirManagerHandle,

#[allow(dead_code)]
runner_handle: tokio::task::JoinHandle<()>,
Expand All @@ -110,6 +92,7 @@ pub struct InstanceManager {

impl InstanceManager {
/// Initializes a new [`InstanceManager`] object.
#[allow(clippy::too_many_arguments)]
pub fn new(
log: Logger,
nexus_client: NexusClientWithResolver,
Expand All @@ -118,6 +101,7 @@ impl InstanceManager {
storage: StorageHandle,
zone_bundler: ZoneBundler,
zone_builder_factory: ZoneBuilderFactory,
vmm_reservoir_manager: VmmReservoirManagerHandle,
) -> Result<InstanceManager, Error> {
let (tx, rx) = mpsc::channel(QUEUE_SIZE);
let (terminate_tx, terminate_rx) = mpsc::unbounded_channel();
Expand All @@ -142,91 +126,13 @@ impl InstanceManager {

Ok(Self {
inner: Arc::new(InstanceManagerInternal {
log,
tx,
// no reservoir size set on startup
reservoir_size: Mutex::new(ByteCount::from_kibibytes_u32(0)),
vmm_reservoir_manager,
runner_handle,
}),
})
}

/// Sets the VMM reservoir to the requested percentage of usable physical
/// RAM or to a size in MiB. Either mode will round down to the nearest
/// aligned size required by the control plane.
pub fn set_reservoir_size(
&self,
hardware: &sled_hardware::HardwareManager,
mode: ReservoirMode,
) -> Result<(), Error> {
let hardware_physical_ram_bytes = hardware.usable_physical_ram_bytes();
let req_bytes = match mode {
ReservoirMode::None => return Ok(()),
ReservoirMode::Size(mb) => {
let bytes = ByteCount::from_mebibytes_u32(mb).to_bytes();
if bytes > hardware_physical_ram_bytes {
return Err(Error::ReservoirConfig(format!(
"cannot specify a reservoir of {bytes} bytes when \
physical memory is {hardware_physical_ram_bytes} bytes",
)));
}
bytes
}
ReservoirMode::Percentage(percent) => {
if !matches!(percent, 1..=99) {
return Err(Error::ReservoirConfig(format!(
"VMM reservoir percentage of {} must be between 0 and \
100",
percent
)));
};
(hardware_physical_ram_bytes as f64 * (percent as f64 / 100.0))
.floor() as u64
}
};

let req_bytes_aligned = vmm_reservoir::align_reservoir_size(req_bytes);

if req_bytes_aligned == 0 {
warn!(
self.inner.log,
"Requested reservoir size of {} bytes < minimum aligned size \
of {} bytes",
req_bytes,
vmm_reservoir::RESERVOIR_SZ_ALIGN
);
return Ok(());
}

// The max ByteCount value is i64::MAX, which is ~8 million TiB.
// As this value is either a percentage of DRAM or a size in MiB
// represented as a u32, constructing this should always work.
let reservoir_size = ByteCount::try_from(req_bytes_aligned).unwrap();
if let ReservoirMode::Percentage(percent) = mode {
info!(
self.inner.log,
"{}% of {} physical ram = {} bytes)",
percent,
hardware_physical_ram_bytes,
req_bytes,
);
}
info!(
self.inner.log,
"Setting reservoir size to {reservoir_size} bytes"
);
vmm_reservoir::ReservoirControl::set(reservoir_size)?;

*self.inner.reservoir_size.lock().unwrap() = reservoir_size;

Ok(())
}

/// Returns the last-set size of the reservoir
pub fn reservoir_size(&self) -> ByteCount {
*self.inner.reservoir_size.lock().unwrap()
}

pub async fn ensure_registered(
&self,
instance_id: Uuid,
Expand Down Expand Up @@ -379,6 +285,10 @@ impl InstanceManager {
.map_err(|_| Error::FailedSendInstanceManagerClosed)?;
rx.await?
}

pub fn reservoir_size(&self) -> ByteCount {
self.inner.vmm_reservoir_manager.reservoir_size()
}
}

// Most requests that can be sent to the "InstanceManagerRunner" task.
Expand Down
1 change: 1 addition & 0 deletions sled-agent/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@ mod smf_helper;
mod storage_monitor;
mod swap_device;
mod updates;
mod vmm_reservoir;
mod zone_bundle;

#[cfg(test)]
Expand Down
50 changes: 16 additions & 34 deletions sled-agent/src/sled_agent.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use crate::bootstrap::early_networking::{
};
use crate::bootstrap::params::{BaseboardId, StartSledAgentRequest};
use crate::config::Config;
use crate::instance_manager::{InstanceManager, ReservoirMode};
use crate::instance_manager::InstanceManager;
use crate::long_running_tasks::LongRunningTaskHandles;
use crate::metrics::MetricsManager;
use crate::nexus::{ConvertInto, NexusClientWithResolver, NexusRequestQueue};
Expand All @@ -25,6 +25,7 @@ use crate::params::{
use crate::services::{self, ServiceManager};
use crate::storage_monitor::UnderlayAccess;
use crate::updates::{ConfigUpdates, UpdateManager};
use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager};
use crate::zone_bundle;
use crate::zone_bundle::BundleError;
use bootstore::schemes::v0 as bootstore;
Expand Down Expand Up @@ -413,6 +414,19 @@ impl SledAgent {
.map_err(|_| ())
.expect("Failed to send to StorageMonitor");

// Configure the VMM reservoir as either a percentage of DRAM or as an
// exact size in MiB.
let reservoir_mode = ReservoirMode::from_config(
config.vmm_reservoir_percentage,
config.vmm_reservoir_size_mb,
);

let vmm_reservoir_manager = VmmReservoirManager::spawn(
&log,
long_running_task_handles.hardware_manager.clone(),
reservoir_mode,
);

let instances = InstanceManager::new(
parent_log.clone(),
nexus_client.clone(),
Expand All @@ -421,41 +435,9 @@ impl SledAgent {
storage_manager.clone(),
long_running_task_handles.zone_bundler.clone(),
ZoneBuilderFactory::default(),
vmm_reservoir_manager,
)?;

// Configure the VMM reservoir as either a percentage of DRAM or as an
// exact size in MiB.
let reservoir_mode = match (
config.vmm_reservoir_percentage,
config.vmm_reservoir_size_mb,
) {
(None, None) => ReservoirMode::None,
(Some(p), None) => ReservoirMode::Percentage(p),
(None, Some(mb)) => ReservoirMode::Size(mb),
(Some(_), Some(_)) => panic!(
"only one of vmm_reservoir_percentage and \
vmm_reservoir_size_mb is allowed"
),
};

match reservoir_mode {
ReservoirMode::None => warn!(log, "Not using VMM reservoir"),
ReservoirMode::Size(0) | ReservoirMode::Percentage(0) => {
warn!(log, "Not using VMM reservoir (size 0 bytes requested)")
}
_ => {
instances
.set_reservoir_size(
&long_running_task_handles.hardware_manager,
reservoir_mode,
)
.map_err(|e| {
error!(log, "Failed to setup VMM reservoir: {e}");
e
})?;
}
}

let update_config = ConfigUpdates {
zone_artifact_path: Utf8PathBuf::from("/opt/oxide"),
};
Expand Down
Loading

0 comments on commit 1d0ed8c

Please sign in to comment.