From c3e1198d77aeade173b495b1a8dec8622b52260e Mon Sep 17 00:00:00 2001 From: Harshavardhan Unnibhavi Date: Thu, 9 Sep 2021 23:55:41 +0200 Subject: [PATCH 1/2] Inflight I/O: Implement missing traits This commit implements the get and set inflight fd members of the VhostUserSlaveReqHandlerMut trait, which is used to pass inflight I/O queue tracking regions as memfds. Fixes #14. Signed-off-by: Harshavardhan Unnibhavi --- src/handler.rs | 517 +++++++++++++++++++++++++++++++++++-- tests/vhost-user-server.rs | 2 +- 2 files changed, 497 insertions(+), 22 deletions(-) diff --git a/src/handler.rs b/src/handler.rs index f61faaa..47102be 100644 --- a/src/handler.rs +++ b/src/handler.rs @@ -6,12 +6,17 @@ use std::error; use std::fs::File; use std::io; -use std::os::unix::io::AsRawFd; +use std::mem; +use std::os::unix::io::{AsRawFd, FromRawFd}; +use std::slice; use std::sync::Arc; use std::thread; +use libc::c_void; + use vhost::vhost_user::message::{ - VhostUserConfigFlags, VhostUserMemoryRegion, VhostUserProtocolFeatures, + DescStatePacked, DescStateSplit, QueueRegionPacked, QueueRegionSplit, VhostUserConfigFlags, + VhostUserInflight, VhostUserMemoryRegion, VhostUserProtocolFeatures, VhostUserSingleMemoryRegion, VhostUserVirtioFeatures, VhostUserVringAddrFlags, VhostUserVringState, }; @@ -19,11 +24,13 @@ use vhost::vhost_user::{ Error as VhostUserError, Result as VhostUserResult, SlaveFsCacheReq, VhostUserSlaveReqHandlerMut, }; +use virtio_bindings::bindings::virtio_net::VIRTIO_F_RING_PACKED; use virtio_bindings::bindings::virtio_ring::VIRTIO_RING_F_EVENT_IDX; use vm_memory::bitmap::Bitmap; use vm_memory::mmap::NewBitmap; use vm_memory::{ - FileOffset, GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap, MmapRegion, + Address, FileOffset, GuestAddress, GuestAddressSpace, GuestMemoryMmap, GuestRegionMmap, + MmapRegion, }; use vmm_sys_util::epoll::EventSet; @@ -71,6 +78,38 @@ struct AddrMapping { gpa_base: u64, } +#[derive(Default)] +struct InflightFdState { + inflight_file: Option, + inflight_mapping_addr: Option, + inflight_mmap_size: usize, +} + +impl InflightFdState { + fn new() -> Self { + Self::default() + } + + fn set_inflight_state( + &mut self, + inflight_file: Option, + inflight_mapping_addr: Option, + inflight_mmap_size: usize, + ) { + self.inflight_file = inflight_file; + self.inflight_mapping_addr = inflight_mapping_addr; + self.inflight_mmap_size = inflight_mmap_size; + } + + fn get_inflight_mapping_addr(&self) -> Option { + self.inflight_mapping_addr + } + + fn get_inflight_mmap_size(&self) -> usize { + self.inflight_mmap_size + } +} + pub struct VhostUserHandler { backend: S, handlers: Vec>>, @@ -85,6 +124,7 @@ pub struct VhostUserHandler { atomic_mem: GM, vrings: Vec, worker_threads: Vec>>, + inflight_state: InflightFdState, } // Ensure VhostUserHandler: Clone + Send + Sync + 'static. @@ -143,6 +183,7 @@ where atomic_mem, vrings, worker_threads, + inflight_state: InflightFdState::new(), }) } } @@ -492,24 +533,6 @@ where self.backend.set_slave_req_fd(vu_req); } - fn get_inflight_fd( - &mut self, - _inflight: &vhost::vhost_user::message::VhostUserInflight, - ) -> VhostUserResult<(vhost::vhost_user::message::VhostUserInflight, File)> { - // Assume the backend hasn't negotiated the inflight feature; it - // wouldn't be correct for the backend to do so, as we don't (yet) - // provide a way for it to handle such requests. - Err(VhostUserError::InvalidOperation) - } - - fn set_inflight_fd( - &mut self, - _inflight: &vhost::vhost_user::message::VhostUserInflight, - _file: File, - ) -> VhostUserResult<()> { - Err(VhostUserError::InvalidOperation) - } - fn get_max_mem_slots(&mut self) -> VhostUserResult { Ok(MAX_MEM_SLOTS) } @@ -577,6 +600,237 @@ where Ok(()) } + + fn get_inflight_fd( + &mut self, + inflight: &vhost::vhost_user::message::VhostUserInflight, + ) -> VhostUserResult<(vhost::vhost_user::message::VhostUserInflight, File)> { + // Total size of the inflight queue region + let total_mmap_size = + self.get_inflight_queue_size(inflight.queue_size) * inflight.num_queues as usize; + + // Create a memfd region to hold the queues for inflight I/O tracking + let (dup_file, total_mmap_size) = self.memfd_alloc(total_mmap_size)?; + + let dup_inflight_file = match dup_file { + Some(f) => f, + None => return Err(VhostUserError::SlaveInternalError), + }; + + Ok(( + VhostUserInflight { + mmap_size: total_mmap_size as u64, + mmap_offset: 0, + num_queues: inflight.num_queues, + queue_size: inflight.queue_size, + }, + dup_inflight_file, + )) + } + + fn set_inflight_fd( + &mut self, + inflight: &vhost::vhost_user::message::VhostUserInflight, + file: File, + ) -> VhostUserResult<()> { + let ret_val = -1; + + // Need to unmap any previously mmaped regions as closing the + // associated file doesn't unmap it automatically. + // unsafe as extern munmap is called + if let Some(inflight_addr) = self.inflight_state.get_inflight_mapping_addr() { + unsafe { + libc::munmap( + inflight_addr.raw_value() as *mut c_void, + self.inflight_state.get_inflight_mmap_size(), + ) + }; + } + + let mmap_size = inflight.mmap_size; + let mmap_offset = inflight.mmap_offset; + + // mmap the file to the memfd region, unsafe as extern mmap is called + let mmap_ptr = unsafe { + libc::mmap( + std::ptr::null_mut::(), + mmap_size as usize, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + file.as_raw_fd(), + mmap_offset as i64, + ) + }; + + if mmap_ptr == ret_val as *mut c_void { + self.inflight_state.set_inflight_state(None, None, 0); + return Err(VhostUserError::SlaveInternalError); + } else { + self.inflight_state.set_inflight_state( + Some(file), + Some(GuestAddress::new(mmap_ptr as u64)), + mmap_size as usize, + ); + } + + self.set_inflight_region_desc_num(inflight.num_queues, inflight.queue_size); + + Ok(()) + } +} + +impl VhostUserHandler +where + S: VhostUserBackend, + V: VringT>, + B: NewBitmap + Clone, +{ + fn get_inflight_queue_size(&mut self, queue_size: u16) -> usize { + let queue_region_size; + let descr_state_size; + let virtio_features = self.get_features().unwrap(); + + if virtio_features & (1 << VIRTIO_F_RING_PACKED) == 0 { + // Use descriptor and queue states for split virtqueues + queue_region_size = mem::size_of::(); + descr_state_size = mem::size_of::(); + } else { + // Use descriptor and queue states for packed virtqueues + queue_region_size = mem::size_of::(); + descr_state_size = mem::size_of::(); + } + queue_region_size + descr_state_size * queue_size as usize + } + + fn memfd_alloc(&mut self, mmap_size: usize) -> VhostUserResult<(Option, usize)> { + let mut ret_val; + let inflight_file; + let dup_inflight_file; + + // create an anonymous file and return a file descriptor to it. + // unsafe as extern syscall funciton is called + ret_val = unsafe { + libc::syscall( + libc::SYS_memfd_create, + &std::ffi::CString::new("inflight-region").unwrap(), + libc::MFD_ALLOW_SEALING, + ) + }; + + if ret_val == -1 { + return Err(VhostUserError::MemFdCreateError); + } + + // safe as this is the sole owner of the file descriptor returned from memfd_create + inflight_file = unsafe { File::from_raw_fd(ret_val as i32) }; + + // truncate the inflight_file to "mmap_size" length. + // unsafe as extern "ftruncate" is called + ret_val = unsafe { libc::ftruncate(inflight_file.as_raw_fd(), mmap_size as i64) } as i64; + + if ret_val == -1 { + return Err(VhostUserError::FileTrucateError); + } + + // place seals to restrict further modifications to the inflight_file file. + // unsafe as extern fcntl is called. + ret_val = unsafe { + libc::fcntl( + inflight_file.as_raw_fd(), + libc::F_ADD_SEALS, + libc::F_SEAL_GROW | libc::F_SEAL_SHRINK | libc::F_SEAL_SEAL, + ) + } as i64; + + if ret_val == -1 { + return Err(VhostUserError::MemFdSealError); + } + + // duplicate the inflight_file file descriptor. + // unsafe as extern dup is called. + ret_val = unsafe { libc::dup(inflight_file.as_raw_fd()).into() }; + + if ret_val == -1 { + return Err(VhostUserError::MemFdCreateError); + } + + // safe as this is the sole owner of the dup'ed memfd + dup_inflight_file = unsafe { File::from_raw_fd(ret_val as i32) }; + + ret_val = -1; + + // map the inflight_file into memory. + // unsafe as extern mmap is called + let mmap_ptr = unsafe { + libc::mmap( + std::ptr::null_mut(), + mmap_size, + libc::PROT_READ | libc::PROT_WRITE, + libc::MAP_SHARED, + inflight_file.as_raw_fd(), + 0, + ) + }; + + if mmap_ptr == ret_val as *mut c_void { + self.inflight_state.set_inflight_state(None, None, 0); + Ok((None, 0)) + } else { + // Zero out the memory mapped region, unsafe as extern memset is called + unsafe { libc::memset(mmap_ptr, 0, mmap_size) }; + + self.inflight_state.set_inflight_state( + Some(inflight_file), + Some(GuestAddress::new(mmap_ptr as u64)), + mmap_size, + ); + + Ok((Some(dup_inflight_file), mmap_size)) + } + } + + fn set_desc_num_packed(&mut self, inflight_region: u64, num_queues: u16, queue_size: u16) { + let regions = unsafe { + slice::from_raw_parts_mut( + inflight_region as *mut QueueRegionPacked, + num_queues as usize, + ) + }; + + for r in regions.iter_mut() { + r.desc_num = queue_size; + } + } + + fn set_desc_num_split(&mut self, inflight_region: u64, num_queues: u16, queue_size: u16) { + let regions = unsafe { + slice::from_raw_parts_mut( + inflight_region as *mut QueueRegionSplit, + num_queues as usize, + ) + }; + + for r in regions.iter_mut() { + r.desc_num = queue_size; + } + } + + fn set_inflight_region_desc_num(&mut self, num_queues: u16, queue_size: u16) { + // unwrap is safe as we check that there are no errors in mapping the memfd file in the caller + // by ensuring that the mapping address is valid. + let inflight_region = self + .inflight_state + .get_inflight_mapping_addr() + .unwrap() + .raw_value(); + + let virtio_features = self.get_features().unwrap(); + + match virtio_features & (1 << VIRTIO_F_RING_PACKED) { + 0 => self.set_desc_num_split(inflight_region, num_queues, queue_size), + _ => self.set_desc_num_packed(inflight_region, num_queues, queue_size), + }; + } } impl Drop for VhostUserHandler { @@ -591,3 +845,224 @@ impl Drop for VhostUserHandler { } } } + +#[cfg(test)] +pub mod tests { + use super::*; + use crate::VringRwLock; + use std::{os::unix::prelude::RawFd, result}; + use vm_memory::GuestMemoryAtomic; + use vmm_sys_util::eventfd::EventFd; + + #[derive(Clone)] + pub struct TestVhostBackend { + packed: bool, + } + + impl TestVhostBackend { + fn new(packed: bool) -> Self { + TestVhostBackend { packed } + } + } + + impl VhostUserBackend for TestVhostBackend { + fn num_queues(&self) -> usize { + 2 + } + + fn max_queue_size(&self) -> usize { + 256 + } + + fn features(&self) -> u64 { + if !self.packed { + return !(1 << VIRTIO_F_RING_PACKED); + } + 0xffff_ffff_ffff_ffff + } + + fn protocol_features(&self) -> VhostUserProtocolFeatures { + VhostUserProtocolFeatures::all() + } + + fn set_event_idx(&self, _enabled: bool) {} + + fn update_memory( + &self, + _mem: GuestMemoryAtomic, + ) -> result::Result<(), io::Error> { + Ok(()) + } + + fn exit_event(&self, _thread_index: usize) -> Option { + let event_fd = EventFd::new(0).unwrap(); + + Some(event_fd) + } + + fn handle_event( + &self, + _device_event: u16, + _evset: EventSet, + _vrings: &[VringRwLock], + _thread_id: usize, + ) -> result::Result { + Ok(true) + } + } + + fn check_memfd_flags(memfd_file: RawFd, dup_memfd_file: RawFd) { + let memfd_seals = unsafe { libc::fcntl(memfd_file, libc::F_GET_SEALS) }; + let dup_memfd_seals = unsafe { libc::fcntl(dup_memfd_file, libc::F_GET_SEALS) }; + + assert_eq!(memfd_seals, dup_memfd_seals); + assert_ne!(0, memfd_seals & libc::F_SEAL_GROW); + assert_ne!(0, memfd_seals & libc::F_SEAL_SHRINK); + assert_ne!(0, memfd_seals & libc::F_SEAL_SEAL); + } + + fn dummy_set_inflight_region( + vhost_user_handler: &mut VhostUserHandler, + ) { + vhost_user_handler.set_inflight_region_desc_num( + vhost_user_handler.backend.num_queues() as u16, + vhost_user_handler.backend.max_queue_size() as u16, + ); + } + + #[test] + fn test_get_inflight_queue_size() { + let mut vhost_user_handler_packed = VhostUserHandler::new( + TestVhostBackend::new(true), + GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ), + ) + .unwrap(); + let packed_queue_size = vhost_user_handler_packed + .get_inflight_queue_size(vhost_user_handler_packed.backend.max_queue_size() as u16); + + assert_eq!(packed_queue_size, 8229); + + let mut vhost_user_handler_split = VhostUserHandler::new( + TestVhostBackend::new(false), + GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ), + ) + .unwrap(); + let split_queue_size = vhost_user_handler_split + .get_inflight_queue_size(vhost_user_handler_split.backend.max_queue_size() as u16); + assert_eq!(split_queue_size, 4120); + } + + #[test] + fn test_memfd_alloc() { + let mut vhost_user_handler_packed = VhostUserHandler::new( + TestVhostBackend::new(true), + GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ), + ) + .unwrap(); + + if let Ok((dup_memfd_file, _)) = vhost_user_handler_packed.memfd_alloc(1024) { + if dup_memfd_file.is_some() { + check_memfd_flags( + vhost_user_handler_packed + .inflight_state + .inflight_file + .as_ref() + .unwrap() + .try_clone() + .unwrap() + .as_raw_fd(), + dup_memfd_file.as_ref().unwrap().as_raw_fd(), + ); + + assert_eq!( + 1024, + vhost_user_handler_packed + .inflight_state + .inflight_file + .as_ref() + .unwrap() + .metadata() + .unwrap() + .len() + ); + + assert_eq!( + vhost_user_handler_packed + .inflight_state + .inflight_file + .as_ref() + .unwrap() + .metadata() + .unwrap() + .len(), + dup_memfd_file.as_ref().unwrap().metadata().unwrap().len() + ); + } + } + } + + #[test] + fn test_set_inflight_desc_num() { + let mut vhost_user_handler_packed = VhostUserHandler::new( + TestVhostBackend::new(true), + GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ), + ) + .unwrap(); + + if let Ok((_, _)) = vhost_user_handler_packed.memfd_alloc(8229) { + dummy_set_inflight_region(&mut vhost_user_handler_packed); + + let raw_ptr = vhost_user_handler_packed + .inflight_state + .get_inflight_mapping_addr() + .unwrap() + .raw_value() as *mut QueueRegionPacked; + + for i in 0..vhost_user_handler_packed.backend.num_queues() { + unsafe { + let queue_region = raw_ptr.add(i); + assert_eq!( + vhost_user_handler_packed.backend.max_queue_size() as u16, + std::ptr::read_unaligned(std::ptr::addr_of!((*queue_region).desc_num)) + ); + } + } + } + + let mut vhost_user_handler_split = VhostUserHandler::new( + TestVhostBackend::new(false), + GuestMemoryAtomic::new( + GuestMemoryMmap::<()>::from_ranges(&[(GuestAddress(0x100000), 0x10000)]).unwrap(), + ), + ) + .unwrap(); + + if let Ok((_, _)) = vhost_user_handler_split.memfd_alloc(4120) { + dummy_set_inflight_region(&mut vhost_user_handler_split); + + let raw_ptr = vhost_user_handler_split + .inflight_state + .get_inflight_mapping_addr() + .unwrap() + .raw_value() as *mut QueueRegionSplit; + + for i in 0..vhost_user_handler_split.backend.num_queues() { + unsafe { + let queue_region = raw_ptr.add(i); + assert_eq!( + vhost_user_handler_split.backend.max_queue_size() as u16, + std::ptr::read_unaligned(std::ptr::addr_of!((*queue_region).desc_num)) + ); + } + } + } + } +} diff --git a/tests/vhost-user-server.rs b/tests/vhost-user-server.rs index 3065647..b262798 100644 --- a/tests/vhost-user-server.rs +++ b/tests/vhost-user-server.rs @@ -288,7 +288,7 @@ fn vhost_user_get_inflight(path: &Path, barrier: Arc) { num_queues: 1, queue_size: 256, }; - assert!(master.get_inflight_fd(&inflight).is_err()); + assert!(master.get_inflight_fd(&inflight).is_ok()); } #[test] From 89e34929774429cce4329ff4f9e3f358a10a5e3d Mon Sep 17 00:00:00 2001 From: Harshavardhan Unnibhavi Date: Wed, 22 Dec 2021 15:32:14 +0100 Subject: [PATCH 2/2] x86_64 coverage: reduce by 1.4 Signed-off-by: Harshavardhan Unnibhavi --- coverage_config_x86_64.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/coverage_config_x86_64.json b/coverage_config_x86_64.json index 7abd6c6..0e94b25 100644 --- a/coverage_config_x86_64.json +++ b/coverage_config_x86_64.json @@ -1,5 +1,5 @@ { - "coverage_score": 87.7, + "coverage_score": 86.3, "exclude_path": "", "crate_features": "" }