diff --git a/crates/viona-api/src/lib.rs b/crates/viona-api/src/lib.rs index 7e2a54551..aad2dde93 100644 --- a/crates/viona-api/src/lib.rs +++ b/crates/viona-api/src/lib.rs @@ -65,7 +65,7 @@ impl VionaFd { Ok(vers as u32) } - /// Check VMM ioctl command against those known to not require any + /// Check viona ioctl command against those known to not require any /// copyin/copyout to function. const fn ioctl_usize_safe(cmd: i32) -> bool { matches!( @@ -76,7 +76,8 @@ impl VionaFd { | ioctls::VNA_IOC_RING_PAUSE | ioctls::VNA_IOC_RING_INTR_CLR | ioctls::VNA_IOC_VERSION - | ioctls::VNA_IOC_SET_PROMISC + | ioctls::VNA_IOC_SET_NOTIFY_IOP + | ioctls::VNA_IOC_SET_PROMISC, ) } } diff --git a/lib/propolis/src/hw/pci/bar.rs b/lib/propolis/src/hw/pci/bar.rs index c1dfe8939..bbe5370a8 100644 --- a/lib/propolis/src/hw/pci/bar.rs +++ b/lib/propolis/src/hw/pci/bar.rs @@ -121,33 +121,35 @@ impl Bars { &mut self, bar: BarN, val: u32, - ) -> Option<(BarDefine, u64, u64)> { + ) -> Option { let idx = bar as usize; let ent = &mut self.entries[idx]; - let (def, old, new) = match ent.kind { + let (id, def, val_old, val_new) = match ent.kind { EntryKind::Empty => return None, EntryKind::Pio(size) => { let mask = u32::from(!(size - 1)); let old = ent.value; ent.value = u64::from(val & mask); - (BarDefine::Pio(size), old, ent.value) + (bar, BarDefine::Pio(size), old, ent.value) } EntryKind::Mmio(size) => { let mask = !(size - 1); let old = ent.value; ent.value = u64::from(val & mask); - (BarDefine::Mmio(size), old, ent.value) + (bar, BarDefine::Mmio(size), old, ent.value) } EntryKind::Mmio64(size) => { let old = ent.value; let mask = !(size - 1) as u32; let low = val & mask; ent.value = (old & (0xffffffff << 32)) | u64::from(low); - (BarDefine::Mmio64(size), old, ent.value) + (bar, BarDefine::Mmio64(size), old, ent.value) } EntryKind::Mmio64High => { assert!(idx > 0); - let ent = &mut self.entries[idx - 1]; + let real_idx = idx - 1; + let id = BarN::from_repr(real_idx as u8).unwrap(); + let ent = &mut self.entries[real_idx]; let size = match ent.kind { EntryKind::Mmio64(sz) => sz, _ => panic!(), @@ -156,11 +158,11 @@ impl Bars { let old = ent.value; let high = ((u64::from(val) << 32) & mask) & 0xffffffff00000000; ent.value = high | (old & 0xffffffff); - (BarDefine::Mmio64(size), old, ent.value) + (id, BarDefine::Mmio64(size), old, ent.value) } }; - if old != new { - return Some((def, old, new)); + if val_old != val_new { + return Some(WriteResult { id, def, val_old, val_new }); } None } @@ -287,6 +289,18 @@ impl Bars { } } +/// Result from a write to a BAR +pub struct WriteResult { + /// Identifier of the actual impacted BAR. + /// + /// If write was to the high word of a 64-bit BAR, this would hold the + /// `BarN` for the lower word. + pub id: BarN, + pub def: BarDefine, + pub val_old: u64, + pub val_new: u64, +} + pub mod migrate { use serde::{Deserialize, Serialize}; diff --git a/lib/propolis/src/hw/pci/device.rs b/lib/propolis/src/hw/pci/device.rs index 4907aab70..32ca014b8 100644 --- a/lib/propolis/src/hw/pci/device.rs +++ b/lib/propolis/src/hw/pci/device.rs @@ -15,11 +15,11 @@ use crate::migrate::*; use crate::util::regmap::{Flags, RegMap}; use lazy_static::lazy_static; +use strum::IntoEnumIterator; pub trait Device: Send + Sync + 'static { fn device_state(&self) -> &DeviceState; - #[allow(unused_variables)] fn bar_rw(&self, bar: BarN, rwo: RWOp) { match rwo { RWOp::Read(ro) => { @@ -30,7 +30,6 @@ pub trait Device: Send + Sync + 'static { } } } - #[allow(unused_variables)] fn cfg_rw(&self, region: u8, rwo: RWOp) { match rwo { RWOp::Read(ro) => { @@ -46,6 +45,13 @@ pub trait Device: Send + Sync + 'static { fn interrupt_mode_change(&self, mode: IntrMode) {} #[allow(unused_variables)] fn msi_update(&self, info: MsiUpdate) {} + + /// Notification that configuration of BAR(s) has changed, either due to + /// writes to the BARs themselves, or an overall status change (via the + /// Command register or a device reset). + #[allow(unused_variables)] + fn bar_update(&self, bstate: BarState) {} + // TODO // fn cap_read(&self); // fn cap_write(&self); @@ -181,6 +187,18 @@ impl State { fn attached(&self) -> &bus::Attachment { self.attach.as_ref().unwrap() } + /// Is MMIO access decoding enabled? + fn mmio_en(&self) -> bool { + self.reg_command.contains(RegCmd::MMIO_EN) + } + /// Is PIO access decoding enabled? + fn pio_en(&self) -> bool { + self.reg_command.contains(RegCmd::IO_EN) + } + /// Given the device state, is decoding enabled for a specified [BarDefine] + fn decoding_active(&self, bar: &BarDefine) -> bool { + (bar.is_pio() && self.pio_en()) || (bar.is_mmio() && self.mmio_en()) + } } pub(super) struct Cap { @@ -375,15 +393,17 @@ impl DeviceState { StdCfgReg::Bar(bar) => { let val = wo.read_u32(); let mut state = self.state.lock().unwrap(); - if let Some((def, _old, new)) = state.bars.reg_write(*bar, val) - { - let pio_en = state.reg_command.contains(RegCmd::IO_EN); - let mmio_en = state.reg_command.contains(RegCmd::MMIO_EN); - + if let Some(res) = state.bars.reg_write(*bar, val) { let attach = state.attached(); - if (pio_en && def.is_pio()) || (mmio_en && def.is_mmio()) { - attach.bar_unregister(*bar); - attach.bar_register(*bar, def, new); + if state.decoding_active(&res.def) { + attach.bar_unregister(res.id); + attach.bar_register(res.id, res.def, res.val_new); + dev.bar_update(BarState { + id: res.id, + def: res.def, + value: res.val_new, + decode_en: true, + }); } } } @@ -425,6 +445,8 @@ impl DeviceState { // Update BAR registrations if diff.intersects(RegCmd::IO_EN | RegCmd::MMIO_EN) { + let pio_en = val.contains(RegCmd::IO_EN); + let mmio_en = val.contains(RegCmd::MMIO_EN); for n in BarN::iter() { let bar = state.bars.get(n); if bar.is_none() { @@ -433,18 +455,30 @@ impl DeviceState { let (def, v) = bar.unwrap(); if diff.contains(RegCmd::IO_EN) && def.is_pio() { - if val.contains(RegCmd::IO_EN) { + if pio_en { attach.bar_register(n, def, v); } else { attach.bar_unregister(n); } + dev.bar_update(BarState { + id: n, + def, + value: v, + decode_en: pio_en, + }); } if diff.contains(RegCmd::MMIO_EN) && def.is_mmio() { - if val.contains(RegCmd::MMIO_EN) { + if mmio_en { attach.bar_register(n, def, v); } else { attach.bar_unregister(n); } + dev.bar_update(BarState { + id: n, + def, + value: v, + decode_en: mmio_en, + }); } } } @@ -482,6 +516,17 @@ impl DeviceState { self.which_intr_mode(&state) } + pub(crate) fn bar(&self, id: BarN) -> Option { + let state = self.state.lock().unwrap(); + state.bars.get(id).map(|(def, value)| { + let decode_en = match def { + BarDefine::Pio(_) => state.pio_en(), + BarDefine::Mmio(_) | BarDefine::Mmio64(_) => state.mmio_en(), + }; + BarState { id, def, value, decode_en } + }) + } + fn cfg_cap_rw(&self, dev: &dyn Device, id: &CfgReg, rwo: RWOp) { match id { CfgReg::CapId(i) => { @@ -608,10 +653,7 @@ impl DeviceState { let attach = inner.attached(); for n in BarN::iter() { if let Some((def, addr)) = inner.bars.get(n) { - let pio_en = inner.reg_command.contains(RegCmd::IO_EN); - let mmio_en = inner.reg_command.contains(RegCmd::MMIO_EN); - - if (pio_en && def.is_pio()) || (mmio_en && def.is_mmio()) { + if inner.decoding_active(&def) { attach.bar_register(n, def, addr); } } @@ -1093,6 +1135,15 @@ impl Clone for MsixHdl { } } +/// Describes the state of a BAR +pub struct BarState { + pub id: BarN, + pub def: BarDefine, + pub value: u64, + /// Is decoding for this BAR enabled in the device control? + pub decode_en: bool, +} + pub struct Builder { ident: Ident, lintr_support: bool, diff --git a/lib/propolis/src/hw/pci/mod.rs b/lib/propolis/src/hw/pci/mod.rs index 161d018c0..fb07ea53b 100644 --- a/lib/propolis/src/hw/pci/mod.rs +++ b/lib/propolis/src/hw/pci/mod.rs @@ -12,7 +12,7 @@ use std::sync::{Arc, Mutex}; use crate::common::*; use crate::intr_pins::IntrPin; -use strum::FromRepr; +use strum::{EnumIter, FromRepr}; pub mod bar; pub mod bits; @@ -244,7 +244,9 @@ impl Display for Bdf { } } -#[derive(Copy, Clone, Eq, PartialEq, Debug, Ord, PartialOrd, FromRepr)] +#[derive( + Copy, Clone, Eq, PartialEq, Debug, Ord, PartialOrd, FromRepr, EnumIter, +)] #[repr(u8)] pub enum BarN { BAR0 = 0, @@ -254,23 +256,6 @@ pub enum BarN { BAR4, BAR5, } -impl BarN { - fn iter() -> BarIter { - BarIter { n: 0 } - } -} -struct BarIter { - n: u8, -} -impl Iterator for BarIter { - type Item = BarN; - - fn next(&mut self) -> Option { - let res = BarN::from_repr(self.n)?; - self.n += 1; - Some(res) - } -} #[repr(u8)] #[derive(Copy, Clone)] diff --git a/lib/propolis/src/hw/virtio/pci.rs b/lib/propolis/src/hw/virtio/pci.rs index 1a75bdc5b..7e1005798 100644 --- a/lib/propolis/src/hw/virtio/pci.rs +++ b/lib/propolis/src/hw/virtio/pci.rs @@ -88,6 +88,26 @@ impl VirtioState { pub trait PciVirtio: VirtioDevice + Send + Sync + 'static { fn virtio_state(&self) -> &PciVirtioState; fn pci_state(&self) -> &pci::DeviceState; + + #[allow(unused_variables)] + /// Notification that the IO port representing the queue notification + /// register in the device BAR has changed. + fn notify_port_update(&self, state: Option) {} + + /// Noticiation from the PCI emulation that one of the BARs has undergone a + /// change of configuration + fn bar_update(&self, bstate: pci::BarState) { + if bstate.id == pci::BarN::BAR0 { + // Notify the device about the location (if any) of the queue notify + // register in the containing BAR region. + let port = if bstate.decode_en { + Some(bstate.value as u16 + LEGACY_REG_OFF_QUEUE_NOTIFY as u16) + } else { + None + }; + self.notify_port_update(port); + } + } } impl pci::Device for D { @@ -163,6 +183,10 @@ impl pci::Device for D { state.intr_mode_updating = false; vs.state_cv.notify_all(); } + + fn bar_update(&self, bstate: pci::BarState) { + PciVirtio::bar_update(self, bstate); + } } pub struct PciVirtioState { @@ -422,9 +446,9 @@ impl PciVirtioState { /// Indicate to the guest that the VirtIO device has encountered an error of /// some sort and requires a reset. - pub fn set_needs_reset(&self, _dev: &dyn VirtioDevice) { + pub fn set_needs_reset(&self, dev: &dyn VirtioDevice) { let mut state = self.state.lock().unwrap(); - self.needs_reset_locked(_dev, &mut state); + self.needs_reset_locked(dev, &mut state); } fn queue_notify(&self, dev: &dyn VirtioDevice, queue: u16) { @@ -618,6 +642,12 @@ impl MigrateMulti for dyn PciVirtio { // to the VirtIO state. vs.set_intr_mode(ps, ps.get_intr_mode().into(), true); + // Perform a (potentially spurious) update notification for the BAR + // containing the virtio registers. This ensures that anything + // interested in the placement of that BAR (such as the notify-port + // logic) is kept well aware + self.bar_update(ps.bar(pci::BarN::BAR0).unwrap()); + Ok(()) } } @@ -790,6 +820,7 @@ enum VirtioTop { const LEGACY_REG_SZ: usize = 0x18; const LEGACY_REG_SZ_NO_MSIX: usize = 0x14; +const LEGACY_REG_OFF_QUEUE_NOTIFY: usize = 0x10; #[derive(Copy, Clone, Eq, PartialEq, Debug)] enum LegacyReg { diff --git a/lib/propolis/src/hw/virtio/viona.rs b/lib/propolis/src/hw/virtio/viona.rs index 4276aac51..c2576bef3 100644 --- a/lib/propolis/src/hw/virtio/viona.rs +++ b/lib/propolis/src/hw/virtio/viona.rs @@ -12,6 +12,7 @@ use std::sync::{Arc, Condvar, Mutex, Weak}; use crate::common::*; use crate::hw::pci; +use crate::lifecycle::{self, IndicatedState}; use crate::migrate::*; use crate::util::regmap::RegMap; use crate::vmm::VmmHdl; @@ -70,11 +71,16 @@ enum VRingState { struct Inner { poller: Option, + iop_state: Option, vring_state: [VRingState; 2], } impl Inner { fn new() -> Self { - Self { poller: None, vring_state: [Default::default(); 2] } + Self { + poller: None, + iop_state: None, + vring_state: [Default::default(); 2], + } } /// Get the `VRingState` for a given VirtQueue @@ -91,6 +97,7 @@ impl Inner { pub struct PciVirtioViona { virtio_state: PciVirtioState, pci_state: pci::DeviceState, + indicator: lifecycle::Indicator, dev_features: u32, mac_addr: [u8; ETHERADDRL], @@ -138,6 +145,7 @@ impl PciVirtioViona { let mut this = PciVirtioViona { virtio_state, pci_state, + indicator: Default::default(), dev_features, mac_addr: [0; ETHERADDRL], @@ -322,6 +330,21 @@ impl PciVirtioViona { drop(inner); wait_state.wait_stopped(); } + + // Transition the emulation to a "running" state, either at initial start-up + // or resumption from a "paused" state. + fn run(&self) { + self.poller_start(); + if self.queues_restart().is_err() { + self.virtio_state.set_needs_reset(self); + self.notify_port_update(None); + } else { + // If all is well with the queue restart, attempt to wire up the + // notification ioport again. + let state = self.inner.lock().unwrap(); + let _ = self.hdl.set_notify_iop(state.iop_state); + } + } } impl VirtioDevice for PciVirtioViona { fn cfg_rw(&self, mut rwo: RWOp) { @@ -441,20 +464,25 @@ impl Lifecycle for PciVirtioViona { self.virtio_state.reset(self); } fn start(&self) -> anyhow::Result<()> { - // This device initializes into a paused state. Starting it is - // equivalent to resuming it. - self.resume(); + self.run(); + self.indicator.start(); Ok(()) } fn pause(&self) { self.poller_stop(false); self.queues_sync(); + + // In case the device is being paused because of a pending instance + // reinitialization (as part of a reboot/reset), the notification ioport + // binding must be torn down. Bhyve will emit failure of an attempted + // reinitialization operation if any ioport hooks persist at that time. + let _ = self.hdl.set_notify_iop(None); + + self.indicator.pause(); } fn resume(&self) { - self.poller_start(); - if self.queues_restart().is_err() { - self.virtio_state.set_needs_reset(self); - } + self.run(); + self.indicator.resume(); } fn halt(&self) { self.poller_stop(true); @@ -462,6 +490,7 @@ impl Lifecycle for PciVirtioViona { // destruction. self.queues_kill(); let _ = self.hdl.delete(); + self.indicator.halt(); } fn migrate(&self) -> Migrator { Migrator::Multi(self) @@ -475,6 +504,18 @@ impl PciVirtio for PciVirtioViona { fn pci_state(&self) -> &pci::DeviceState { &self.pci_state } + fn notify_port_update(&self, port: Option) { + let mut state = self.inner.lock().unwrap(); + state.iop_state = port; + + // The notification ioport for the device can change due to guest + // action, or other administrative tasks within propolis. We want to + // update the in-kernel IO port hook only in the former case, when the + // device emulation is actually running. + if self.indicator.state() == IndicatedState::Run { + let _ = self.hdl.set_notify_iop(port); + } + } } impl MigrateMulti for PciVirtioViona { @@ -648,6 +689,23 @@ impl VionaHdl { Ok(()) } + /// Set the IO port to which viona attaches for virtqueue notifications + /// + /// The viona driver is able to install an IO port hook in the associated VM + /// at a specified address in order to process `out` operations which would + /// result in the in-kernel emulated virtqueues being notified of available + /// buffers. + /// + /// With a non-zero argument, viona will attempt to attach such a hook. + /// When the argument is zero, any existing hook is torn down. + fn set_notify_iop(&self, port: Option) -> io::Result<()> { + self.0.ioctl_usize( + viona_api::VNA_IOC_SET_NOTIFY_IOP, + port.unwrap_or(0) as usize, + )?; + Ok(()) + } + /// Set the desired promiscuity level on this interface. #[cfg(feature = "falcon")] fn set_promisc(&self, p: viona_api::viona_promisc_t) -> io::Result<()> { diff --git a/lib/propolis/src/lifecycle.rs b/lib/propolis/src/lifecycle.rs index 0192c04f1..1cc2fafc3 100644 --- a/lib/propolis/src/lifecycle.rs +++ b/lib/propolis/src/lifecycle.rs @@ -2,6 +2,8 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. +use std::sync::atomic::{AtomicUsize, Ordering}; + use futures::future::{self, BoxFuture}; use crate::migrate::Migrator; @@ -87,3 +89,96 @@ pub trait Lifecycle: Send + Sync + 'static { Migrator::Empty } } + +/// Indicator for tracking [Lifecycle] states. +/// +/// As implementors of the [Lifecycle] trait are driven through various state +/// changes when called through its functions, the underlying resource may want +/// to keep track of which state it is in. Device emulation may wish to +/// postpone certain setup tasks when undergoing state import during a +/// migration, for example. +/// +/// The [Indicator] is meant as a convenience tool for such implementors to +/// simply track the current state by calling the appropriate methods +/// corresponding to their [Lifecycle] counterparts. +#[derive(Default)] +pub struct Indicator(AtomicUsize); +impl Indicator { + pub const fn new() -> Self { + Self(AtomicUsize::new(IndicatedState::Init as usize)) + } + /// Indicate that holder is has started + /// + /// To be called as part of the implementor's [Lifecycle::start()] method + pub fn start(&self) { + self.state_change(IndicatedState::Run); + } + /// Indicate that holder is has paused + /// + /// To be called as when [Lifecycle::pause()] has been called on the + /// implementor _and_ that pause has completed (if there is async work + /// required for such a state). + pub fn pause(&self) { + self.state_change(IndicatedState::Pause); + } + /// Indicate that holder is has resumed + /// + /// To be called as part of the implementor's [Lifecycle::resume()] method + pub fn resume(&self) { + self.state_change(IndicatedState::Run); + } + /// Indicate that holder is has halted + /// + /// To be called as part of the implementor's [Lifecycle::halt()] method + pub fn halt(&self) { + self.state_change(IndicatedState::Halt); + } + /// Get the currently indicated state for the holder + pub fn state(&self) -> IndicatedState { + IndicatedState::from_usize(self.0.load(Ordering::SeqCst)) + } + + fn state_change(&self, new: IndicatedState) { + let old = self.0.swap(new as usize, Ordering::SeqCst); + IndicatedState::assert_valid_transition( + IndicatedState::from_usize(old), + new, + ); + } +} + +/// Represents the current state held in [Indicator] +#[derive(Copy, Clone, Eq, PartialEq, strum::FromRepr, Debug)] +#[repr(usize)] +pub enum IndicatedState { + /// Lifecycle entity has been created, but has not been + /// (started)[Lifecycle::start()] yet. + Init, + /// Lifecycle entity is running + Run, + /// Lifecycle entity is paused + Pause, + /// Lifecycle entity has halted + Halt, +} +impl IndicatedState { + const fn valid_transition(old: Self, new: Self) -> bool { + use IndicatedState::*; + match (old, new) { + (Init, Run) | (Init, Halt) => true, + (Run, Pause) => true, + (Pause, Run) | (Pause, Halt) => true, + _ => false, + } + } + fn from_usize(raw: usize) -> Self { + IndicatedState::from_repr(raw) + .expect("raw value {raw} should be valid IndicatedState") + } + fn assert_valid_transition(old: Self, new: Self) { + assert!( + Self::valid_transition(old, new), + "transition from {old:?} to {new:?} is not valid" + ); + } +}