From 4a03e4f5444ac023837ff6e07412a8e0b2713ce8 Mon Sep 17 00:00:00 2001 From: Alexandru Agache Date: Tue, 15 Dec 2020 12:41:00 +0200 Subject: [PATCH] add net support Signed-off-by: Alexandru Agache --- coverage_config_x86_64.json | 4 +- src/devices/src/virtio/block/device.rs | 10 +- src/devices/src/virtio/block/mod.rs | 2 +- src/devices/src/virtio/mod.rs | 27 +- src/devices/src/virtio/net/bindings.rs | 269 +++++++++++++++++++ src/devices/src/virtio/net/device.rs | 150 +++++++++++ src/devices/src/virtio/net/mod.rs | 48 ++++ src/devices/src/virtio/net/queue_handler.rs | 95 +++++++ src/devices/src/virtio/net/simple_handler.rs | 186 +++++++++++++ src/devices/src/virtio/net/tap.rs | 203 ++++++++++++++ src/vmm/src/lib.rs | 78 +++++- 11 files changed, 1061 insertions(+), 11 deletions(-) create mode 100644 src/devices/src/virtio/net/bindings.rs create mode 100644 src/devices/src/virtio/net/device.rs create mode 100644 src/devices/src/virtio/net/mod.rs create mode 100644 src/devices/src/virtio/net/queue_handler.rs create mode 100644 src/devices/src/virtio/net/simple_handler.rs create mode 100644 src/devices/src/virtio/net/tap.rs diff --git a/coverage_config_x86_64.json b/coverage_config_x86_64.json index f147e8fa..1841115a 100644 --- a/coverage_config_x86_64.json +++ b/coverage_config_x86_64.json @@ -1,5 +1,5 @@ { - "coverage_score": 80.8, - "exclude_path": "msr_index.rs,mpspec.rs,tests/", + "coverage_score": 69.3, + "exclude_path": "msr_index.rs,mpspec.rs,tests/,src/devices/src/virtio/net/bindings.rs", "crate_features": "" } diff --git a/src/devices/src/virtio/block/device.rs b/src/devices/src/virtio/block/device.rs index c04f03e0..3b2ee1ed 100644 --- a/src/devices/src/virtio/block/device.rs +++ b/src/devices/src/virtio/block/device.rs @@ -47,7 +47,7 @@ where let config_space = build_config_space(&args.file_path)?; let virtio_cfg = VirtioConfig::new(device_features, queues, config_space); - let common_cfg = CommonConfig::new(virtio_cfg, env).map_err(Error::Generic)?; + let common_cfg = CommonConfig::new(virtio_cfg, env).map_err(Error::Virtio)?; Ok(Block { cfg: common_cfg, @@ -70,10 +70,10 @@ where // Register the device on the MMIO bus. env.register_mmio_device(block.clone()) - .map_err(Error::Generic)?; + .map_err(Error::Virtio)?; env.insert_cmdline_str(args.cmdline_config_substring()) - .map_err(Error::Generic)?; + .map_err(Error::Virtio)?; Ok(block) } @@ -128,14 +128,14 @@ impl VirtioDeviceActions for Bloc disk, }; - let mut ioevents = self.cfg.prepare_activate().map_err(Error::Generic)?; + let mut ioevents = self.cfg.prepare_activate().map_err(Error::Virtio)?; let handler = Arc::new(Mutex::new(QueueHandler { inner, ioeventfd: ioevents.remove(0), })); - self.cfg.finalize_activate(handler).map_err(Error::Generic) + self.cfg.finalize_activate(handler).map_err(Error::Virtio) } fn reset(&mut self) -> Result<()> { diff --git a/src/devices/src/virtio/block/mod.rs b/src/devices/src/virtio/block/mod.rs index b7a21c98..82a13918 100644 --- a/src/devices/src/virtio/block/mod.rs +++ b/src/devices/src/virtio/block/mod.rs @@ -31,7 +31,7 @@ const SECTOR_SHIFT: u8 = 9; #[derive(Debug)] pub enum Error { Backend(stdio_executor::Error), - Generic(crate::virtio::Error), + Virtio(crate::virtio::Error), OpenFile(io::Error), Seek(io::Error), } diff --git a/src/devices/src/virtio/mod.rs b/src/devices/src/virtio/mod.rs index df474c2f..931bdfa3 100644 --- a/src/devices/src/virtio/mod.rs +++ b/src/devices/src/virtio/mod.rs @@ -4,6 +4,7 @@ // We're only providing virtio over MMIO devices for now, but we aim to add PCI support as well. pub mod block; +pub mod net; use std::convert::TryFrom; use std::io; @@ -17,7 +18,7 @@ use event_manager::{ }; use kvm_ioctls::{IoEventAddress, VmFd}; use linux_loader::cmdline::Cmdline; -use vm_device::bus::{self, MmioRange}; +use vm_device::bus::{self, MmioAddress, MmioRange}; use vm_device::device_manager::MmioManager; use vm_device::DeviceMmio; use vm_memory::{GuestAddress, GuestAddressSpace}; @@ -58,6 +59,7 @@ pub enum Error { Cmdline(linux_loader::cmdline::Error), Endpoint(EvmgrError), EventFd(io::Error), + Overflow, QueuesNotValid, RegisterIoevent(errno::Error), RegisterIrqfd(errno::Error), @@ -73,6 +75,24 @@ pub struct MmioConfig { pub gsi: u32, } +impl MmioConfig { + pub fn new(base: u64, size: u64, gsi: u32) -> Result { + MmioRange::new(MmioAddress(base), size) + .map(|range| MmioConfig { range, gsi }) + .map_err(Error::Bus) + } + + pub fn next(&self) -> Result { + let range = self.range; + let next_start = range + .base() + .0 + .checked_add(range.size()) + .ok_or(Error::Overflow)?; + Self::new(next_start, range.size(), self.gsi + 1) + } +} + // Represents the environment the devices in this crate current expect in order to be created // and registered with the appropriate buses/handlers/etc. We're always passing a mmio_cfg object // for now, and we'll re-evaluate the mechanism for exposing environment (i.e. maybe we'll do it @@ -313,6 +333,9 @@ pub(crate) mod tests { } } + // Skipping until adding Arm support and figuring out how make the irqchip creation in the + // `Mock` object work there as well. + #[cfg_attr(target_arch = "aarch64", ignore)] #[test] fn test_env() { // Just a dummy device we're going to register on the bus. @@ -348,6 +371,8 @@ pub(crate) mod tests { assert!(mock.kernel_cmdline.as_str().ends_with("ending_string")); } + // Ignoring until aarch64 support is here. + #[cfg_attr(target_arch = "aarch64", ignore)] #[test] fn test_common_config() { let mut mock = EnvMock::new(); diff --git a/src/devices/src/virtio/net/bindings.rs b/src/devices/src/virtio/net/bindings.rs new file mode 100644 index 00000000..3c0d8b87 --- /dev/null +++ b/src/devices/src/virtio/net/bindings.rs @@ -0,0 +1,269 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +// The following are manually copied from crosvm/firecracker. In the latter, they can be found as +// part of the `net_gen` local crate. We should figure out how to proceed going forward (i.e. +// create some bindings of our own, put them in a common crate, etc). + +#![allow(clippy::all)] +#![allow(non_upper_case_globals)] +#![allow(non_camel_case_types)] +#![allow(non_snake_case)] + +pub const TUN_F_CSUM: ::std::os::raw::c_uint = 1; +pub const TUN_F_TSO4: ::std::os::raw::c_uint = 2; +pub const TUN_F_TSO6: ::std::os::raw::c_uint = 4; +pub const TUN_F_UFO: ::std::os::raw::c_uint = 16; + +#[repr(C)] +pub struct __BindgenUnionField(::std::marker::PhantomData); +impl __BindgenUnionField { + #[inline] + pub fn new() -> Self { + __BindgenUnionField(::std::marker::PhantomData) + } + #[inline] + pub unsafe fn as_ref(&self) -> &T { + ::std::mem::transmute(self) + } + #[inline] + pub unsafe fn as_mut(&mut self) -> &mut T { + ::std::mem::transmute(self) + } +} +impl ::std::default::Default for __BindgenUnionField { + #[inline] + fn default() -> Self { + Self::new() + } +} +impl ::std::clone::Clone for __BindgenUnionField { + #[inline] + fn clone(&self) -> Self { + Self::new() + } +} +impl ::std::marker::Copy for __BindgenUnionField {} +impl ::std::fmt::Debug for __BindgenUnionField { + fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::fmt::Result { + fmt.write_str("__BindgenUnionField") + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq { + pub ifr_ifrn: ifreq__bindgen_ty_1, + pub ifr_ifru: ifreq__bindgen_ty_2, +} + +impl Clone for ifreq { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq__bindgen_ty_1 { + pub ifrn_name: __BindgenUnionField<[::std::os::raw::c_uchar; 16usize]>, + pub bindgen_union_field: [u8; 16usize], +} + +impl Clone for ifreq__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifreq__bindgen_ty_2 { + pub ifru_addr: __BindgenUnionField, + pub ifru_dstaddr: __BindgenUnionField, + pub ifru_broadaddr: __BindgenUnionField, + pub ifru_netmask: __BindgenUnionField, + pub ifru_hwaddr: __BindgenUnionField, + pub ifru_flags: __BindgenUnionField<::std::os::raw::c_short>, + pub ifru_ivalue: __BindgenUnionField<::std::os::raw::c_int>, + pub ifru_mtu: __BindgenUnionField<::std::os::raw::c_int>, + pub ifru_map: __BindgenUnionField, + pub ifru_slave: __BindgenUnionField<[::std::os::raw::c_char; 16usize]>, + pub ifru_newname: __BindgenUnionField<[::std::os::raw::c_char; 16usize]>, + pub ifru_data: __BindgenUnionField<*mut ::std::os::raw::c_void>, + pub ifru_settings: __BindgenUnionField, + pub bindgen_union_field: [u64; 3usize], +} + +impl Clone for ifreq__bindgen_ty_2 { + fn clone(&self) -> Self { + *self + } +} + +pub type sa_family_t = ::std::os::raw::c_ushort; + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sockaddr { + pub sa_family: sa_family_t, + pub sa_data: [::std::os::raw::c_char; 14usize], +} + +impl Clone for sockaddr { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct if_settings { + pub type_: ::std::os::raw::c_uint, + pub size: ::std::os::raw::c_uint, + pub ifs_ifsu: if_settings__bindgen_ty_1, +} + +impl Clone for if_settings { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct if_settings__bindgen_ty_1 { + pub raw_hdlc: __BindgenUnionField<*mut raw_hdlc_proto>, + pub cisco: __BindgenUnionField<*mut cisco_proto>, + pub fr: __BindgenUnionField<*mut fr_proto>, + pub fr_pvc: __BindgenUnionField<*mut fr_proto_pvc>, + pub fr_pvc_info: __BindgenUnionField<*mut fr_proto_pvc_info>, + pub sync: __BindgenUnionField<*mut sync_serial_settings>, + pub te1: __BindgenUnionField<*mut te1_settings>, + pub bindgen_union_field: u64, +} + +impl Clone for if_settings__bindgen_ty_1 { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct ifmap { + pub mem_start: ::std::os::raw::c_ulong, + pub mem_end: ::std::os::raw::c_ulong, + pub base_addr: ::std::os::raw::c_ushort, + pub irq: ::std::os::raw::c_uchar, + pub dma: ::std::os::raw::c_uchar, + pub port: ::std::os::raw::c_uchar, +} + +impl Clone for ifmap { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct raw_hdlc_proto { + pub encoding: ::std::os::raw::c_ushort, + pub parity: ::std::os::raw::c_ushort, +} + +impl Clone for raw_hdlc_proto { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct cisco_proto { + pub interval: ::std::os::raw::c_uint, + pub timeout: ::std::os::raw::c_uint, +} + +impl Clone for cisco_proto { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto { + pub t391: ::std::os::raw::c_uint, + pub t392: ::std::os::raw::c_uint, + pub n391: ::std::os::raw::c_uint, + pub n392: ::std::os::raw::c_uint, + pub n393: ::std::os::raw::c_uint, + pub lmi: ::std::os::raw::c_ushort, + pub dce: ::std::os::raw::c_ushort, +} + +impl Clone for fr_proto { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto_pvc { + pub dlci: ::std::os::raw::c_uint, +} + +impl Clone for fr_proto_pvc { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct fr_proto_pvc_info { + pub dlci: ::std::os::raw::c_uint, + pub master: [::std::os::raw::c_char; 16usize], +} + +impl Clone for fr_proto_pvc_info { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct sync_serial_settings { + pub clock_rate: ::std::os::raw::c_uint, + pub clock_type: ::std::os::raw::c_uint, + pub loopback: ::std::os::raw::c_ushort, +} + +impl Clone for sync_serial_settings { + fn clone(&self) -> Self { + *self + } +} + +#[repr(C)] +#[derive(Debug, Default, Copy)] +pub struct te1_settings { + pub clock_rate: ::std::os::raw::c_uint, + pub clock_type: ::std::os::raw::c_uint, + pub loopback: ::std::os::raw::c_ushort, + pub slot_map: ::std::os::raw::c_uint, +} + +impl Clone for te1_settings { + fn clone(&self) -> Self { + *self + } +} diff --git a/src/devices/src/virtio/net/device.rs b/src/devices/src/virtio/net/device.rs new file mode 100644 index 00000000..7bddc531 --- /dev/null +++ b/src/devices/src/virtio/net/device.rs @@ -0,0 +1,150 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +use std::borrow::{Borrow, BorrowMut}; +use std::ops::DerefMut; +use std::sync::{Arc, Mutex}; + +use vm_device::bus::MmioAddress; +use vm_device::device_manager::MmioManager; +use vm_device::{DeviceMmio, MutDeviceMmio}; +use vm_memory::GuestAddressSpace; +use vm_virtio::device::{VirtioConfig, VirtioDeviceActions, VirtioDeviceType, VirtioMmioDevice}; +use vm_virtio::Queue; + +use crate::virtio::features::{VIRTIO_F_IN_ORDER, VIRTIO_F_RING_EVENT_IDX, VIRTIO_F_VERSION_1}; +use crate::virtio::net::features::*; +use crate::virtio::net::{Error, NetArgs, Result, NET_DEVICE_ID, VIRTIO_NET_HDR_SIZE}; +use crate::virtio::{CommonConfig, Env, SingleFdSignalQueue, QUEUE_MAX_SIZE}; + +use super::bindings; +use super::queue_handler::QueueHandler; +use super::simple_handler::SimpleHandler; +use super::tap::Tap; + +pub struct Net { + cfg: CommonConfig, + tap_name: String, +} + +impl Net +where + M: GuestAddressSpace + Clone + Send + 'static, +{ + pub fn new(env: &mut Env, args: &NetArgs) -> Result>> + where + // We're using this (more convoluted) bound so we can pass both references and smart + // pointers such as mutex guards here. + B: DerefMut, + B::Target: MmioManager>, + { + let device_features = (1 << VIRTIO_F_VERSION_1) + | (1 << VIRTIO_F_RING_EVENT_IDX) + | (1 << VIRTIO_F_IN_ORDER) + | (1 << VIRTIO_NET_F_CSUM) + | (1 << VIRTIO_NET_F_GUEST_CSUM) + | (1 << VIRTIO_NET_F_GUEST_TSO4) + | (1 << VIRTIO_NET_F_GUEST_TSO6) + | (1 << VIRTIO_NET_F_GUEST_UFO) + | (1 << VIRTIO_NET_F_HOST_TSO4) + | (1 << VIRTIO_NET_F_HOST_TSO6) + | (1 << VIRTIO_NET_F_HOST_UFO); + + // An rx/tx queue pair. + let queues = vec![Queue::new(env.mem.clone(), QUEUE_MAX_SIZE); 2]; + // TODO: We'll need a minimal config space to support setting an explicit MAC addr + // on the guest interface at least. We use an empty one for now. + let config_space = Vec::new(); + let virtio_cfg = VirtioConfig::new(device_features, queues, config_space); + + let common_cfg = CommonConfig::new(virtio_cfg, env).map_err(Error::Virtio)?; + + let net = Arc::new(Mutex::new(Net { + cfg: common_cfg, + tap_name: args.tap_name.clone(), + })); + + env.register_mmio_device(net.clone()) + .map_err(Error::Virtio)?; + + Ok(net) + } +} + +impl VirtioDeviceType for Net { + fn device_type(&self) -> u32 { + NET_DEVICE_ID + } +} + +impl Borrow> for Net { + fn borrow(&self) -> &VirtioConfig { + &self.cfg.virtio + } +} + +impl BorrowMut> for Net { + fn borrow_mut(&mut self) -> &mut VirtioConfig { + &mut self.cfg.virtio + } +} + +impl VirtioDeviceActions for Net { + type E = Error; + + fn activate(&mut self) -> Result<()> { + let rxq = self.cfg.virtio.queues[0].clone(); + let txq = self.cfg.virtio.queues[1].clone(); + + let tap = Tap::open_named(self.tap_name.as_str()).map_err(Error::Tap)?; + + // Set offload flags to match the relevant virtio features of the device (for now, + // statically set in the constructor. + tap.set_offload( + bindings::TUN_F_CSUM + | bindings::TUN_F_UFO + | bindings::TUN_F_TSO4 + | bindings::TUN_F_TSO6, + ) + .map_err(Error::Tap)?; + + // The layout of the header is specified in the standard and is 12 bytes in size. We + // should define this somewhere. + tap.set_vnet_hdr_size(VIRTIO_NET_HDR_SIZE as i32) + .map_err(Error::Tap)?; + + let driver_notify = SingleFdSignalQueue { + irqfd: self.cfg.irqfd.clone(), + interrupt_status: self.cfg.virtio.interrupt_status.clone(), + }; + + let inner = SimpleHandler::new(driver_notify, rxq, txq, tap); + + let mut ioevents = self.cfg.prepare_activate().map_err(Error::Virtio)?; + + let handler = Arc::new(Mutex::new(QueueHandler { + inner, + rx_ioevent: ioevents.remove(0), + tx_ioevent: ioevents.remove(0), + })); + + self.cfg.finalize_activate(handler).map_err(Error::Virtio) + } + + fn reset(&mut self) -> std::result::Result<(), Error> { + // Not implemented for now. + Ok(()) + } +} + +impl VirtioMmioDevice for Net {} + +impl MutDeviceMmio for Net { + fn mmio_read(&mut self, _base: MmioAddress, offset: u64, data: &mut [u8]) { + self.read(offset, data); + } + + fn mmio_write(&mut self, _base: MmioAddress, offset: u64, data: &[u8]) { + self.write(offset, data); + } +} diff --git a/src/devices/src/virtio/net/mod.rs b/src/devices/src/virtio/net/mod.rs new file mode 100644 index 00000000..32cb89ae --- /dev/null +++ b/src/devices/src/virtio/net/mod.rs @@ -0,0 +1,48 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +mod bindings; +mod device; +mod queue_handler; +mod simple_handler; +pub mod tap; + +pub use device::Net; + +// TODO: Move relevant defines to vm-virtio crate. + +// Values taken from the virtio standard (section 5.1.3 of the 1.1 version). +pub mod features { + pub const VIRTIO_NET_F_CSUM: u64 = 0; + pub const VIRTIO_NET_F_GUEST_CSUM: u64 = 1; + pub const VIRTIO_NET_F_GUEST_TSO4: u64 = 7; + pub const VIRTIO_NET_F_GUEST_TSO6: u64 = 8; + pub const VIRTIO_NET_F_GUEST_UFO: u64 = 10; + pub const VIRTIO_NET_F_HOST_TSO4: u64 = 11; + pub const VIRTIO_NET_F_HOST_TSO6: u64 = 12; + pub const VIRTIO_NET_F_HOST_UFO: u64 = 14; +} + +// Size of the `virtio_net_hdr` structure defined by the standard. +pub const VIRTIO_NET_HDR_SIZE: usize = 12; + +// Net device ID as defined by the standard. +pub const NET_DEVICE_ID: u32 = 1; + +// Prob have to find better names here, but these basically represent the order of the queues. +// If the net device has a single RX/TX pair, then the former has index 0 and the latter 1. When +// the device has multiqueue support, then RX queues have indices 2k, and TX queues 2k+1. +const RXQ_INDEX: u16 = 0; +const TXQ_INDEX: u16 = 1; + +#[derive(Debug)] +pub enum Error { + Virtio(crate::virtio::Error), + Tap(tap::Error), +} + +pub type Result = std::result::Result; + +pub struct NetArgs { + pub tap_name: String, +} diff --git a/src/devices/src/virtio/net/queue_handler.rs b/src/devices/src/virtio/net/queue_handler.rs new file mode 100644 index 00000000..4759c2bf --- /dev/null +++ b/src/devices/src/virtio/net/queue_handler.rs @@ -0,0 +1,95 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +use event_manager::{EventOps, Events, MutEventSubscriber}; +use log::error; +use vm_memory::GuestAddressSpace; +use vmm_sys_util::epoll::EventSet; +use vmm_sys_util::eventfd::EventFd; + +use crate::virtio::SingleFdSignalQueue; + +use super::simple_handler::SimpleHandler; + +const TAPFD_DATA: u32 = 0; +const RX_IOEVENT_DATA: u32 = 1; +const TX_IOEVENT_DATA: u32 = 2; + +pub struct QueueHandler { + pub inner: SimpleHandler, + pub rx_ioevent: EventFd, + pub tx_ioevent: EventFd, +} + +impl QueueHandler { + // Helper method that receives an error message to be logged and the `ops` handle + // which is used to unregister all events. + fn handle_error>(&self, s: S, ops: &mut EventOps) { + error!("{}", s.as_ref()); + ops.remove(Events::empty(&self.rx_ioevent)) + .expect("Failed to remove rx ioevent"); + ops.remove(Events::empty(&self.tx_ioevent)) + .expect("Failed to remove tx ioevent"); + ops.remove(Events::empty(&self.inner.tap)) + .expect("Failed to remove tap event"); + } +} + +impl MutEventSubscriber for QueueHandler { + fn process(&mut self, events: Events, ops: &mut EventOps) { + // TODO: We can also consider panicking on the errors that cannot be generated + // or influenced. + + if events.event_set() != EventSet::IN { + self.handle_error("Unexpected event_set", ops); + return; + } + + match events.data() { + TAPFD_DATA => { + if let Err(e) = self.inner.process_tap() { + self.handle_error(format!("Process tap error {:?}", e), ops); + } + } + RX_IOEVENT_DATA => { + if self.rx_ioevent.read().is_err() { + self.handle_error("Rx ioevent read", ops); + } else if let Err(e) = self.inner.process_rxq() { + self.handle_error(format!("Process rx error {:?}", e), ops); + } + } + TX_IOEVENT_DATA => { + if self.tx_ioevent.read().is_err() { + self.handle_error("Tx ioevent read", ops); + } + if let Err(e) = self.inner.process_txq() { + self.handle_error(format!("Process tx error {:?}", e), ops); + } + } + _ => self.handle_error("Unexpected data", ops), + } + } + + fn init(&mut self, ops: &mut EventOps) { + ops.add(Events::with_data( + &self.inner.tap, + TAPFD_DATA, + EventSet::IN | EventSet::EDGE_TRIGGERED, + )) + .expect("Unable to add tapfd"); + + ops.add(Events::with_data( + &self.rx_ioevent, + RX_IOEVENT_DATA, + EventSet::IN, + )) + .expect("Unable to add rxfd"); + + ops.add(Events::with_data( + &self.tx_ioevent, + TX_IOEVENT_DATA, + EventSet::IN, + )) + .expect("Unable to add txfd"); + } +} diff --git a/src/devices/src/virtio/net/simple_handler.rs b/src/devices/src/virtio/net/simple_handler.rs new file mode 100644 index 00000000..7466bb06 --- /dev/null +++ b/src/devices/src/virtio/net/simple_handler.rs @@ -0,0 +1,186 @@ +// Copyright 2020 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 OR BSD-3-Clause + +use std::cmp; +use std::io::{self, Read, Write}; +use std::result; + +use log::warn; +use vm_memory::{Bytes, GuestAddressSpace}; +use vm_virtio::{DescriptorChain, Queue}; + +use crate::virtio::net::tap::Tap; +use crate::virtio::net::{RXQ_INDEX, TXQ_INDEX}; +use crate::virtio::SignalUsedQueue; + +// According to the standard: "If the VIRTIO_NET_F_GUEST_TSO4, VIRTIO_NET_F_GUEST_TSO6 or +// VIRTIO_NET_F_GUEST_UFO features are used, the maximum incoming packet will be to 65550 +// bytes long (the maximum size of a TCP or UDP packet, plus the 14 byte ethernet header), +// otherwise 1514 bytes. The 12-byte struct virtio_net_hdr is prepended to this, making for +// 65562 or 1526 bytes." For transmission, the standard states "The header and packet are added +// as one output descriptor to the transmitq, and the device is notified of the new entry". +// We assume the TX frame will not exceed this size either. +const MAX_BUFFER_SIZE: usize = 65562; + +#[derive(Debug)] +pub enum Error { + GuestMemory(vm_memory::GuestMemoryError), + Queue(vm_virtio::Error), + Tap(io::Error), +} + +impl From for Error { + fn from(e: vm_virtio::Error) -> Self { + Error::Queue(e) + } +} + +// A simple handler implementation for a RX/TX queue pair, which does not make assumptions about +// the way queue notification is implemented. The backend is not yet generic (we always assume a +// `Tap` object), but we're looking at improving that going forward. +// TODO: Find a better name. +pub struct SimpleHandler { + pub driver_notify: S, + pub rxq: Queue, + pub rxbuf_current: usize, + pub rxbuf: [u8; MAX_BUFFER_SIZE], + pub txq: Queue, + pub txbuf: [u8; MAX_BUFFER_SIZE], + pub tap: Tap, +} + +impl SimpleHandler { + pub fn new(driver_notify: S, rxq: Queue, txq: Queue, tap: Tap) -> Self { + SimpleHandler { + driver_notify, + rxq, + rxbuf_current: 0, + rxbuf: [0u8; MAX_BUFFER_SIZE], + txq, + txbuf: [0u8; MAX_BUFFER_SIZE], + tap, + } + } + + // Have to see how to approach error handling for the `Queue` implementation in particular, + // because many situations are not really recoverable. We should consider reporting them based + // on the metrics/events solution when they appear, and not propagate them further unless + // it's really useful/necessary. + fn write_frame_to_guest(&mut self) -> result::Result { + let num_bytes = self.rxbuf_current; + + let mut chain = match self.rxq.iter()?.next() { + Some(c) => c, + _ => return Ok(false), + }; + + let mut count = 0; + let buf = &mut self.rxbuf[..num_bytes]; + + while let Some(desc) = chain.next() { + let left = buf.len() - count; + + if left == 0 { + break; + } + + let len = cmp::min(left, desc.len() as usize); + chain + .memory() + .write_slice(&buf[count..count + len], desc.addr()) + .map_err(Error::GuestMemory)?; + + count += len; + } + + if count != buf.len() { + // The frame was too large for the chain. + warn!("rx frame too large"); + } + + self.rxq.add_used(chain.head_index(), count as u32)?; + + self.rxbuf_current = 0; + + Ok(true) + } + + pub fn process_tap(&mut self) -> result::Result<(), Error> { + loop { + if self.rxbuf_current == 0 { + match self.tap.read(&mut self.rxbuf) { + Ok(n) => self.rxbuf_current = n, + Err(_) => { + // TODO: Do something (logs, metrics, etc.) in response to an error when + // reading from tap. EAGAIN means there's nothing available to read anymore + // (because we open the TAP as non-blocking). + break; + } + } + } + + if !self.write_frame_to_guest()? && !self.rxq.enable_notification()? { + break; + } + } + + if self.rxq.needs_notification()? { + self.driver_notify.signal_used_queue(RXQ_INDEX); + } + + Ok(()) + } + + fn send_frame_from_chain( + &mut self, + chain: &mut DescriptorChain, + ) -> result::Result { + let mut count = 0; + + while let Some(desc) = chain.next() { + let left = self.txbuf.len() - count; + let len = desc.len() as usize; + + if len > left { + warn!("tx frame too large"); + break; + } + + chain + .memory() + .read_slice(&mut self.txbuf[count..count + len], desc.addr()) + .map_err(Error::GuestMemory)?; + + count += len; + } + + self.tap.write(&self.txbuf[..count]).map_err(Error::Tap)?; + + Ok(count as u32) + } + + pub fn process_txq(&mut self) -> result::Result<(), Error> { + loop { + self.txq.disable_notification()?; + + while let Some(mut chain) = self.txq.iter()?.next() { + self.send_frame_from_chain(&mut chain)?; + + self.txq.add_used(chain.head_index(), 0)?; + + if self.txq.needs_notification()? { + self.driver_notify.signal_used_queue(TXQ_INDEX); + } + } + + if !self.txq.enable_notification()? { + return Ok(()); + } + } + } + + pub fn process_rxq(&mut self) -> result::Result<(), Error> { + self.rxq.disable_notification()?; + self.process_tap() + } +} diff --git a/src/devices/src/virtio/net/tap.rs b/src/devices/src/virtio/net/tap.rs new file mode 100644 index 00000000..9c29498f --- /dev/null +++ b/src/devices/src/virtio/net/tap.rs @@ -0,0 +1,203 @@ +// Copyright 2018 Amazon.com, Inc. or its affiliates. All Rights Reserved. +// SPDX-License-Identifier: Apache-2.0 +// +// Portions Copyright 2017 The Chromium OS Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the THIRD-PARTY file. + +// We should add a tap abstraction to rust-vmm as well. Using this one, which is copied from +// Firecracker until then. + +use std::fs::File; +use std::io::{Error as IoError, Read, Result as IoResult, Write}; +use std::os::raw::{c_char, c_int, c_uint, c_ulong}; +use std::os::unix::io::{AsRawFd, FromRawFd, RawFd}; + +use vmm_sys_util::ioctl::{ioctl_with_mut_ref, ioctl_with_ref, ioctl_with_val}; +use vmm_sys_util::{ioctl_expr, ioctl_ioc_nr, ioctl_iow_nr}; + +use super::bindings::ifreq; + +// As defined in the Linux UAPI: +// https://elixir.bootlin.com/linux/v4.17/source/include/uapi/linux/if.h#L33 +const IFACE_NAME_MAX_LEN: usize = 16; + +// Taken from firecracker net_gen/if_tun.rs ... we should see what to do about the net related +// bindings overall for rust-vmm. +const IFF_TAP: ::std::os::raw::c_uint = 2; +const IFF_NO_PI: ::std::os::raw::c_uint = 4096; +const IFF_VNET_HDR: ::std::os::raw::c_uint = 16384; + +/// List of errors the tap implementation can throw. +#[derive(Debug)] +pub enum Error { + /// Unable to create tap interface. + CreateTap(IoError), + /// Invalid interface name. + InvalidIfname, + /// ioctl failed. + IoctlError(IoError), + /// Couldn't open /dev/net/tun. + OpenTun(IoError), +} + +pub type Result = ::std::result::Result; + +const TUNTAP: ::std::os::raw::c_uint = 84; +ioctl_iow_nr!(TUNSETIFF, TUNTAP, 202, ::std::os::raw::c_int); +ioctl_iow_nr!(TUNSETOFFLOAD, TUNTAP, 208, ::std::os::raw::c_uint); +ioctl_iow_nr!(TUNSETVNETHDRSZ, TUNTAP, 216, ::std::os::raw::c_int); + +/// Handle for a network tap interface. +/// +/// For now, this simply wraps the file descriptor for the tap device so methods +/// can run ioctls on the interface. The tap interface fd will be closed when +/// Tap goes out of scope, and the kernel will clean up the interface automatically. +#[derive(Debug)] +pub struct Tap { + tap_file: File, + pub(crate) if_name: [u8; IFACE_NAME_MAX_LEN], +} + +// Returns a byte vector representing the contents of a null terminated C string which +// contains if_name. +fn build_terminated_if_name(if_name: &str) -> Result<[u8; IFACE_NAME_MAX_LEN]> { + // Convert the string slice to bytes, and shadow the variable, + // since we no longer need the &str version. + let if_name = if_name.as_bytes(); + + if if_name.len() >= IFACE_NAME_MAX_LEN { + return Err(Error::InvalidIfname); + } + + let mut terminated_if_name = [b'\0'; IFACE_NAME_MAX_LEN]; + terminated_if_name[..if_name.len()].copy_from_slice(if_name); + + Ok(terminated_if_name) +} + +pub struct IfReqBuilder(ifreq); + +impl IfReqBuilder { + #[allow(clippy::new_without_default)] + pub fn new() -> Self { + Self(Default::default()) + } + + pub fn if_name(mut self, if_name: &[u8; IFACE_NAME_MAX_LEN]) -> Self { + // Since we don't call as_mut on the same union field more than once, this block is safe. + let ifrn_name = unsafe { self.0.ifr_ifrn.ifrn_name.as_mut() }; + ifrn_name.copy_from_slice(if_name.as_ref()); + + self + } + + pub(crate) fn flags(mut self, flags: i16) -> Self { + // Since we don't call as_mut on the same union field more than once, this block is safe. + let ifru_flags = unsafe { self.0.ifr_ifru.ifru_flags.as_mut() }; + *ifru_flags = flags; + + self + } + + pub(crate) fn execute(mut self, socket: &F, ioctl: u64) -> Result { + // ioctl is safe. Called with a valid socket fd, and we check the return. + let ret = unsafe { ioctl_with_mut_ref(socket, ioctl, &mut self.0) }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(self.0) + } +} + +impl Tap { + /// Create a TUN/TAP device given the interface name. + /// # Arguments + /// + /// * `if_name` - the name of the interface. + pub fn open_named(if_name: &str) -> Result { + let terminated_if_name = build_terminated_if_name(if_name)?; + + let fd = unsafe { + // Open calls are safe because we give a constant null-terminated + // string and verify the result. + libc::open( + b"/dev/net/tun\0".as_ptr() as *const c_char, + libc::O_RDWR | libc::O_NONBLOCK | libc::O_CLOEXEC, + ) + }; + if fd < 0 { + return Err(Error::OpenTun(IoError::last_os_error())); + } + // We just checked that the fd is valid. + let tuntap = unsafe { File::from_raw_fd(fd) }; + + let ifreq = IfReqBuilder::new() + .if_name(&terminated_if_name) + .flags((IFF_TAP | IFF_NO_PI | IFF_VNET_HDR) as i16) + .execute(&tuntap, TUNSETIFF())?; + + // Safe since only the name is accessed, and it's cloned out. + Ok(Tap { + tap_file: tuntap, + if_name: unsafe { *ifreq.ifr_ifrn.ifrn_name.as_ref() }, + }) + } + + pub fn if_name_as_str(&self) -> &str { + let len = self + .if_name + .iter() + .position(|x| *x == 0) + .unwrap_or(IFACE_NAME_MAX_LEN); + std::str::from_utf8(&self.if_name[..len]).unwrap_or("") + } + + /// Set the offload flags for the tap interface. + pub fn set_offload(&self, flags: c_uint) -> Result<()> { + // ioctl is safe. Called with a valid tap fd, and we check the return. + let ret = unsafe { ioctl_with_val(&self.tap_file, TUNSETOFFLOAD(), c_ulong::from(flags)) }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(()) + } + + /// Set the size of the vnet hdr. + pub fn set_vnet_hdr_size(&self, size: c_int) -> Result<()> { + // ioctl is safe. Called with a valid tap fd, and we check the return. + let ret = unsafe { ioctl_with_ref(&self.tap_file, TUNSETVNETHDRSZ(), &size) }; + if ret < 0 { + return Err(Error::IoctlError(IoError::last_os_error())); + } + + Ok(()) + } +} + +impl Read for Tap { + fn read(&mut self, buf: &mut [u8]) -> IoResult { + self.tap_file.read(buf) + } +} + +impl Write for Tap { + fn write(&mut self, buf: &[u8]) -> IoResult { + self.tap_file.write(&buf) + } + + fn flush(&mut self) -> IoResult<()> { + Ok(()) + } +} + +impl AsRawFd for Tap { + fn as_raw_fd(&self) -> RawFd { + self.tap_file.as_raw_fd() + } +} + +// TODO: If we don't end up using an external abstraction for `Tap` interfaces, add unit tests +// based on a mock framework that do not require elevated privileges to run. diff --git a/src/vmm/src/lib.rs b/src/vmm/src/lib.rs index 112f1685..7b302c54 100644 --- a/src/vmm/src/lib.rs +++ b/src/vmm/src/lib.rs @@ -42,7 +42,9 @@ use vmm_sys_util::{epoll::EventSet, eventfd::EventFd, terminal::Terminal}; use boot::build_bootparams; pub use config::*; use devices::virtio::block::{self, BlockArgs}; +use devices::virtio::net::{self, NetArgs}; use devices::virtio::{Env, MmioConfig}; + use serial::SerialWrapper; use vm_vcpu::vcpu::{cpuid::filter_cpuid, VcpuState}; use vm_vcpu::vm::{self, ExitHandler, KvmVm, VmState}; @@ -97,6 +99,8 @@ pub enum Error { IO(io::Error), /// Failed to load kernel. KernelLoad(loader::Error), + /// Failed to create net device. + Net(net::Error), /// Address stored in the rip registry does not fit in guest memory. RipOutOfGuestMemory, /// Invalid KVM API version. @@ -125,6 +129,7 @@ impl std::convert::From for Error { pub type Result = std::result::Result; type Block = block::Block>; +type Net = net::Net>; /// A live VMM. pub struct VMM { @@ -141,6 +146,7 @@ pub struct VMM { event_mgr: EventManager>>, exit_handler: WrappedExitHandler, block_devices: Vec>>, + net_devices: Vec>>, } // The `VmmExitHandler` is used as the mechanism for exiting from the event manager loop. @@ -231,11 +237,14 @@ impl TryFrom for VMM { kernel_cfg: config.kernel_config, exit_handler: wrapped_exit_handler, block_devices: Vec::new(), + net_devices: Vec::new(), }; vmm.create_vcpus(&config.vcpu_config)?; vmm.add_serial_console()?; + // Adding the virtio devices. We'll come up with a cleaner abstraction for `Env`. + // We transiently define a mut `Cmdline` object here to pass for device creation // (devices expect a `&mut Cmdline` to leverage the newly added virtio_mmio // functionality), until we figure out how this fits with the rest of the vmm, which @@ -243,8 +252,12 @@ impl TryFrom for VMM { // somehow ASAP. let mut cmdline = cmdline::Cmdline::new(4096); - if let Some(block_cfg) = config.block_config.as_ref() { - vmm.add_block_device(block_cfg, &mut cmdline)?; + if let Some(cfg) = config.block_config.as_ref() { + vmm.add_block_device(cfg, &mut cmdline)?; + } + + if let Some(cfg) = config.net_config.as_ref() { + vmm.add_net_device(cfg, &mut cmdline)?; } if !cmdline.as_str().is_empty() { @@ -398,6 +411,10 @@ impl VMM { Ok(()) } + // All methods that add a virtio device use hardcoded addresses and interrupts for now, and + // only support a single device. We need to expand this, but it looks like a good match if we + // can do it after figuring out how to better separate concerns and make the VMM agnostic of + // the actual device types. fn add_block_device( &mut self, cfg: &BlockConfig, @@ -433,6 +450,34 @@ impl VMM { Ok(()) } + fn add_net_device(&mut self, cfg: &NetConfig, cmdline: &mut cmdline::Cmdline) -> Result<()> { + let mem = Arc::new(self.guest_memory.clone()); + + let range = MmioRange::new(MmioAddress(MMIO_GAP_START + 0x1000), 0x1000).unwrap(); + let mmio_cfg = MmioConfig { range, gsi: 6 }; + + let mut guard = self.device_mgr.lock().unwrap(); + + let mut env = Env { + mem, + vm_fd: self.vm.vm_fd(), + event_mgr: &mut self.event_mgr, + mmio_mgr: guard.deref_mut(), + mmio_cfg, + kernel_cmdline: cmdline, + }; + + let args = NetArgs { + tap_name: cfg.tap_name.clone(), + }; + + // We can also hold this somewhere if we need to keep the handle for later. + let net = Net::new(&mut env, &args).map_err(Error::Net)?; + self.net_devices.push(net); + + Ok(()) + } + // Helper function that computes the kernel_load_addr needed by the // VcpuState when creating the Vcpus. fn compute_kernel_load_addr(&self, kernel_load: &KernelLoaderResult) -> Result { @@ -582,6 +627,7 @@ mod tests { kernel_cfg: vmm_config.kernel_config, exit_handler, block_devices: Vec::new(), + net_devices: Vec::new(), } } @@ -814,4 +860,32 @@ mod tests { // MMIO range. assert!(vmm.add_block_device(&block_config, &mut cmdline).is_err()); } + + #[test] + fn test_add_net() { + let vmm_config = default_vmm_config(); + let mut vmm = mock_vmm(vmm_config); + + // The device only attempts to open the tap interface during activation, so we can + // specify any name here for now. + let cfg = NetConfig { + tap_name: "imaginary_tap".to_owned(), + }; + + { + let mut cmdline = cmdline::Cmdline::new(4096); + assert!(vmm.add_net_device(&cfg, &mut cmdline).is_ok()); + assert_eq!(vmm.net_devices.len(), 1); + assert!(cmdline.as_str().contains("virtio")); + } + + { + // The current implementation of the VMM does not allow more than one net device + // as we are hard coding the `MmioConfig`. + let mut cmdline = cmdline::Cmdline::new(4096); + // This currently fails because a device is already registered with the provided + // MMIO range. + assert!(vmm.add_net_device(&cfg, &mut cmdline).is_err()); + } + } }