From 801dec46dbe3daf4b4918422a4b76c6a6c3854b0 Mon Sep 17 00:00:00 2001 From: Florian Hartung Date: Thu, 22 Feb 2024 15:06:47 +0100 Subject: [PATCH 1/9] refactor(syscall): write out abbreviations for request and response --- core/src/syscall.rs | 56 +++++++++++++-------------- hypervisor/src/hypervisor/syscall.rs | 46 +++++++++++----------- partition/src/syscall.rs | 57 +++++++++++++++------------- 3 files changed, 83 insertions(+), 76 deletions(-) diff --git a/core/src/syscall.rs b/core/src/syscall.rs index 8dcb474..b11d27a 100644 --- a/core/src/syscall.rs +++ b/core/src/syscall.rs @@ -179,21 +179,21 @@ pub enum ApexSyscall { } #[derive(Debug, PartialEq)] -pub struct SyscallRequ { +pub struct SyscallRequest { pub id: ApexSyscall, pub params: Vec, } #[derive(Debug, PartialEq)] -pub struct SyscallResp { +pub struct SyscallResponse { pub id: ApexSyscall, pub status: u64, } -impl SyscallRequ { - /// Serializes a SyscallRequ into its binary representation +impl SyscallRequest { + /// Serializes a SyscallRequest into its binary representation /// - /// The format for serializing a SyscallRequ is defined as follows: + /// The format for serializing a SyscallRequest is defined as follows: /// ```text /// id [u64] /// nparams [u8] @@ -212,7 +212,7 @@ impl SyscallRequ { Ok(serialized) } - /// Deserializes a serialized SyscallRequ back into its internal + /// Deserializes a serialized SyscallRequest back into its internal /// representation pub fn deserialize(serialized: &Vec) -> Result { let mut serialized: &[u8] = serialized; @@ -226,14 +226,14 @@ impl SyscallRequ { params.push(serialized.read_u64::()?); } - Ok(SyscallRequ { id, params }) + Ok(SyscallRequest { id, params }) } } -impl SyscallResp { - /// Serializes a SyscallResp into its binary representation +impl SyscallResponse { + /// Serializes a SyscallResponse into its binary representation /// - /// The format for serializing a SyscallResp is defined as follows: + /// The format for serializing a SyscallResponse is defined as follows: /// ```text /// id [u64] /// status [u64] @@ -248,7 +248,7 @@ impl SyscallResp { Ok(serialized) } - /// Deserializes a serialized SyscallResp back into its internal + /// Deserializes a serialized SyscallResponse back into its internal /// representation pub fn deserialize(serialized: &Vec) -> Result { let mut serialized: &[u8] = serialized; @@ -257,7 +257,7 @@ impl SyscallResp { .ok_or(anyhow!("deserialization of ApexSyscall failed"))?; let status = serialized.read_u64::()?; - Ok(SyscallResp { id, status }) + Ok(SyscallResponse { id, status }) } } @@ -266,12 +266,12 @@ mod tests { use super::*; #[test] - fn test_serialize_requ() { - let requ = SyscallRequ { + fn test_serialize_request() { + let request = SyscallRequest { id: ApexSyscall::Start, params: vec![1, 2, 3], }; - let serialized = requ.serialize().unwrap(); + let serialized = request.serialize().unwrap(); let mut serialized: &[u8] = &serialized; let id = serialized.read_u64::().unwrap(); @@ -290,12 +290,12 @@ mod tests { } #[test] - fn test_serialize_resp() { - let resp = SyscallResp { + fn test_serialize_response() { + let response = SyscallResponse { id: ApexSyscall::Start, status: 42, }; - let serialized = resp.serialize().unwrap(); + let serialized = response.serialize().unwrap(); let mut serialized: &[u8] = &serialized; let id = serialized.read_u64::().unwrap(); @@ -307,26 +307,26 @@ mod tests { } #[test] - fn test_deserialize_requ() { - let requ = SyscallRequ { + fn test_deserialize_request() { + let request = SyscallRequest { id: ApexSyscall::Start, params: vec![1, 2, 3], }; - let serialized = requ.serialize().unwrap(); - let deserialized = SyscallRequ::deserialize(&serialized).unwrap(); - assert_eq!(requ, deserialized); + let serialized = request.serialize().unwrap(); + let deserialized = SyscallRequest::deserialize(&serialized).unwrap(); + assert_eq!(request, deserialized); assert!(!serialized.is_empty()); } #[test] - fn test_deserialize_resp() { - let resp = SyscallResp { + fn test_deserialize_response() { + let response = SyscallResponse { id: ApexSyscall::Start, status: 42, }; - let serialized = resp.serialize().unwrap(); - let deserialized = SyscallResp::deserialize(&serialized).unwrap(); - assert_eq!(resp, deserialized); + let serialized = response.serialize().unwrap(); + let deserialized = SyscallResponse::deserialize(&serialized).unwrap(); + assert_eq!(response, deserialized); assert!(!serialized.is_empty()); } } diff --git a/hypervisor/src/hypervisor/syscall.rs b/hypervisor/src/hypervisor/syscall.rs index 52bd14a..31be990 100644 --- a/hypervisor/src/hypervisor/syscall.rs +++ b/hypervisor/src/hypervisor/syscall.rs @@ -5,14 +5,15 @@ use std::num::NonZeroUsize; use std::os::fd::{AsRawFd, BorrowedFd, FromRawFd, OwnedFd, RawFd}; use std::time::{Duration, Instant}; -use a653rs_linux_core::mfd::{Mfd, Seals}; -use a653rs_linux_core::syscall::{SyscallRequ, SyscallResp}; use anyhow::{anyhow, bail, Result}; use libc::EINTR; use nix::sys::socket::{recvmsg, ControlMessageOwned, MsgFlags}; use nix::{cmsg_space, unistd}; use polling::{Event, Events, Poller}; +use a653rs_linux_core::mfd::{Mfd, Seals}; +use a653rs_linux_core::syscall::{SyscallRequest, SyscallResponse}; + /// Receives an FD triple from fd // TODO: Use generics here fn recv_fd_triple(fd: BorrowedFd) -> Result<[OwnedFd; 3]> { @@ -78,21 +79,21 @@ pub fn handle(fd: BorrowedFd, timeout: Option) -> Result { assert!(res); } - let [requ_fd, resp_fd, event_fd] = recv_fd_triple(fd)?; - let mut requ_fd = Mfd::from_fd(requ_fd)?; - let mut resp_fd = Mfd::from_fd(resp_fd)?; + let [request_fd, resp_fd, event_fd] = recv_fd_triple(fd)?; + let mut request_fd = Mfd::from_fd(request_fd)?; + let mut response_fd = Mfd::from_fd(resp_fd)?; // Fetch the request - let requ = SyscallRequ::deserialize(&requ_fd.read_all()?)?; - debug!("Received system call {:?}", requ); + let request = SyscallRequest::deserialize(&request_fd.read_all()?)?; + debug!("Received system call {:?}", request); // Write the response (dummy response right now) - let resp = SyscallResp { - id: requ.id, + let response = SyscallResponse { + id: request.id, status: 0, }; - resp_fd.write(&resp.serialize()?)?; - resp_fd.finalize(Seals::Readable)?; + response_fd.write(&response.serialize()?)?; + response_fd.finalize(Seals::Readable)?; // Trigger the event let buf = 1_u64.to_ne_bytes(); @@ -109,12 +110,13 @@ mod tests { use std::io::IoSlice; use std::os::fd::{AsFd, AsRawFd}; - use a653rs_linux_core::syscall::ApexSyscall; use nix::sys::eventfd::{eventfd, EfdFlags}; use nix::sys::socket::{ sendmsg, socketpair, AddressFamily, ControlMessage, SockFlag, SockType, }; + use a653rs_linux_core::syscall::ApexSyscall; + use super::*; #[test] @@ -128,14 +130,14 @@ mod tests { .unwrap(); let request_thread = std::thread::spawn(move || { - let mut requ_fd = Mfd::create("requ").unwrap(); - let mut resp_fd = Mfd::create("resp").unwrap(); + let mut request_fd = Mfd::create("requ").unwrap(); + let mut response_fd = Mfd::create("resp").unwrap(); let event_fd = eventfd(0, EfdFlags::empty()).unwrap(); // Initialize the request fd - requ_fd + request_fd .write( - &SyscallRequ { + &SyscallRequest { id: ApexSyscall::Start, params: vec![1, 2, 3], } @@ -143,13 +145,13 @@ mod tests { .unwrap(), ) .unwrap(); - requ_fd.finalize(Seals::Readable).unwrap(); + request_fd.finalize(Seals::Readable).unwrap(); // Send the fds to the responder { let fds = [ - requ_fd.as_raw_fd(), - resp_fd.as_raw_fd(), + request_fd.as_raw_fd(), + response_fd.as_raw_fd(), event_fd.as_raw_fd(), ]; let cmsg = [ControlMessage::ScmRights(&fds)]; @@ -169,9 +171,9 @@ mod tests { assert_eq!(events.len(), 1); } - let resp = SyscallResp::deserialize(&resp_fd.read_all().unwrap()).unwrap(); - assert_eq!(resp.id, ApexSyscall::Start); - assert_eq!(resp.status, 0); + let response = SyscallResponse::deserialize(&response_fd.read_all().unwrap()).unwrap(); + assert_eq!(response.id, ApexSyscall::Start); + assert_eq!(response.status, 0); }); let response_thread = std::thread::spawn(move || { diff --git a/partition/src/syscall.rs b/partition/src/syscall.rs index ea8a8b8..228ae31 100644 --- a/partition/src/syscall.rs +++ b/partition/src/syscall.rs @@ -6,14 +6,15 @@ use std::io::IoSlice; use std::num::NonZeroUsize; use std::os::fd::{AsFd, AsRawFd, BorrowedFd}; -use a653rs_linux_core::mfd::{Mfd, Seals}; -use a653rs_linux_core::syscall::{SyscallRequ, SyscallResp}; use anyhow::Result; use nix::libc::EINTR; use nix::sys::eventfd::{self, EfdFlags}; use nix::sys::socket::{sendmsg, ControlMessage, MsgFlags}; use polling::{Event, Events, Poller}; +use a653rs_linux_core::mfd::{Mfd, Seals}; +use a653rs_linux_core::syscall::{SyscallRequest, SyscallResponse}; + use crate::SYSCALL; /// Sends a vector of file descriptors through a Unix socket @@ -52,27 +53,30 @@ fn wait_event(event_fd: BorrowedFd) -> Result<()> { Ok(()) } -fn execute_fd(fd: BorrowedFd, requ: SyscallRequ) -> Result { +fn execute_fd(fd: BorrowedFd, request: SyscallRequest) -> Result { // Create the file descriptor triple - let mut requ_fd = Mfd::create("requ")?; - let mut resp_fd = Mfd::create("resp")?; + let mut request_fd = Mfd::create("requ")?; + let mut response_fd = Mfd::create("resp")?; let event_fd = eventfd::eventfd(0, EfdFlags::empty())?; // Initialize the request file descriptor - requ_fd.write(&requ.serialize()?)?; - requ_fd.finalize(Seals::Readable)?; + request_fd.write(&request.serialize()?)?; + request_fd.finalize(Seals::Readable)?; // Send the file descriptors to the hypervisor - send_fds(fd, [requ_fd.as_fd(), resp_fd.as_fd(), event_fd.as_fd()])?; + send_fds( + fd, + [request_fd.as_fd(), response_fd.as_fd(), event_fd.as_fd()], + )?; wait_event(event_fd.as_fd())?; - let resp = SyscallResp::deserialize(&resp_fd.read_all()?)?; - Ok(resp) + let response = SyscallResponse::deserialize(&response_fd.read_all()?)?; + Ok(response) } -pub fn execute(requ: SyscallRequ) -> Result { - execute_fd(SYSCALL.as_fd(), requ) +pub fn execute(request: SyscallRequest) -> Result { + execute_fd(SYSCALL.as_fd(), request) } #[cfg(test)] @@ -80,12 +84,13 @@ mod tests { use std::io::IoSliceMut; use std::os::fd::{FromRawFd, OwnedFd, RawFd}; - use a653rs_linux_core::syscall::ApexSyscall; use nix::sys::socket::{ recvmsg, socketpair, AddressFamily, ControlMessageOwned, SockFlag, SockType, }; use nix::{cmsg_space, unistd}; + use a653rs_linux_core::syscall::ApexSyscall; + use super::*; #[test] @@ -99,17 +104,17 @@ mod tests { .unwrap(); let request_thread = std::thread::spawn(move || { - let resp = execute_fd( + let response = execute_fd( requester.as_fd(), - SyscallRequ { + SyscallRequest { id: ApexSyscall::Start, params: vec![1, 2, 42], }, ) .unwrap(); - assert_eq!(resp.id, ApexSyscall::Start); - assert_eq!(resp.status, 42); + assert_eq!(response.id, ApexSyscall::Start); + assert_eq!(response.status, 42); }); let response_thread = std::thread::spawn(move || { // Receive the file descriptors @@ -132,19 +137,19 @@ mod tests { _ => panic!("unknown cmsg received"), }; - let [req, resp, event_fd] = fds.try_into().unwrap(); - let mut requ_fd = Mfd::from_fd(req).unwrap(); - let mut resp_fd = Mfd::from_fd(resp).unwrap(); + let [request, response, event_fd] = fds.try_into().unwrap(); + let mut request_fd = Mfd::from_fd(request).unwrap(); + let mut response_fd = Mfd::from_fd(response).unwrap(); // Fetch the request - let requ = SyscallRequ::deserialize(&requ_fd.read_all().unwrap()).unwrap(); - assert_eq!(requ.id, ApexSyscall::Start); - assert_eq!(requ.params, vec![1, 2, 42]); + let request = SyscallRequest::deserialize(&request_fd.read_all().unwrap()).unwrap(); + assert_eq!(request.id, ApexSyscall::Start); + assert_eq!(request.params, vec![1, 2, 42]); // Write the response - resp_fd + response_fd .write( - &SyscallResp { + &SyscallResponse { id: ApexSyscall::Start, status: 42, } @@ -152,7 +157,7 @@ mod tests { .unwrap(), ) .unwrap(); - resp_fd.finalize(Seals::Readable).unwrap(); + response_fd.finalize(Seals::Readable).unwrap(); // Trigger the eventfd let buf = 1_u64.to_ne_bytes(); From 3a772a63c8b522e3832cbebad9495ef2ab48c366 Mon Sep 17 00:00:00 2001 From: Florian Hartung Date: Tue, 27 Feb 2024 10:36:44 +0100 Subject: [PATCH 2/9] refactor(hypervisor): change partition's tmpfs location --- hypervisor/src/hypervisor/partition.rs | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/hypervisor/src/hypervisor/partition.rs b/hypervisor/src/hypervisor/partition.rs index 9a22002..6a5e6f7 100644 --- a/hypervisor/src/hypervisor/partition.rs +++ b/hypervisor/src/hypervisor/partition.rs @@ -12,7 +12,6 @@ use anyhow::{anyhow, Context}; use clone3::Clone3; use itertools::Itertools; use nix::mount::{umount2, MntFlags, MsFlags}; -use nix::sys::socket::{self, bind, AddressFamily, SockFlag, SockType, UnixAddr}; use nix::unistd::{chdir, close, pivot_root, setgid, setuid, Gid, Pid, Uid}; use polling::{Event, Events, Poller}; use procfs::process::Process; @@ -28,7 +27,6 @@ use a653rs_linux_core::health_event::PartitionCall; use a653rs_linux_core::ipc::{channel_pair, io_pair, IoReceiver, IoSender, IpcReceiver}; use a653rs_linux_core::partition::{PartitionConstants, SamplingConstant}; use a653rs_linux_core::sampling::Sampling; -use a653rs_linux_core::syscall::SYSCALL_SOCKET_PATH; pub use mounting::FileMounter; use crate::hypervisor::config::Partition as PartitionConfig; @@ -224,36 +222,21 @@ impl Run { // TODO: Check for duplicate mounts + let tmpfs_path = base.working_dir.path().join("tmpfs"); for m in mounts { debug!("mounting {:?}", &m); - m.mount(base.working_dir.path()) + m.mount(&tmpfs_path) .context("failed to mount") .typ(SystemError::Panic)?; } // Change working directory and root (unmount old root) - chdir(base.working_dir.path()).unwrap(); + chdir(&tmpfs_path).unwrap(); pivot_root(".", ".").unwrap(); umount2(".", MntFlags::MNT_DETACH).unwrap(); //umount("old").unwrap(); chdir("/").unwrap(); - // After we've performed the pseudo chroot, we can create the - // Unix domain socket - let syscall_socket = socket::socket( - AddressFamily::Unix, - SockType::Datagram, - SockFlag::SOCK_CLOEXEC, - None, - ) - .unwrap(); - - bind( - syscall_socket.as_raw_fd(), - &UnixAddr::new(SYSCALL_SOCKET_PATH).unwrap(), - ) - .unwrap(); - let constants: RawFd = PartitionConstants { name: base.name.clone(), identifier: base.id, From 0d26cbd81c17fc6e724e6d50665630ed5cf61b2b Mon Sep 17 00:00:00 2001 From: Sven Friedrich Date: Tue, 20 Feb 2024 14:04:27 +0100 Subject: [PATCH 3/9] feat(core): extend cgroup with threaded functions --- core/src/cgroup.rs | 59 ++++++++++++++++++++++---- hypervisor/src/hypervisor/mod.rs | 4 +- hypervisor/src/hypervisor/partition.rs | 4 +- partition/src/process.rs | 4 +- 4 files changed, 58 insertions(+), 13 deletions(-) diff --git a/core/src/cgroup.rs b/core/src/cgroup.rs index ef9fd20..3eeec0c 100644 --- a/core/src/cgroup.rs +++ b/core/src/cgroup.rs @@ -72,8 +72,15 @@ impl CGroup { Self::new_root(&self.path, name) } + /// Creates a threaded sub-cgroup inside this one + pub fn new_threaded(&self, name: &str) -> anyhow::Result { + let cgroup = Self::new_root(&self.path, name)?; + cgroup.set_threaded()?; + Ok(cgroup) + } + /// Moves a process to this cgroup - pub fn mv(&self, pid: Pid) -> anyhow::Result<()> { + pub fn mv_proc(&self, pid: Pid) -> anyhow::Result<()> { trace!("Move {pid:?} to {}", self.get_path().display()); if !is_cgroup(&self.path)? { bail!("{} is not a valid cgroup", self.path.display()); @@ -83,6 +90,28 @@ impl CGroup { Ok(()) } + /// Moves a thread to this cgroup + pub fn mv_thread(&self, pid: Pid) -> anyhow::Result<()> { + trace!("Move {pid:?} to {}", self.get_path().display()); + if !is_cgroup(&self.path)? { + bail!("{} is not a valid cgroup", self.path.display()); + } + + fs::write(self.path.join("cgroup.threads"), pid.to_string())?; + Ok(()) + } + + /// Changes the cgroups type to "threaded" + fn set_threaded(&self) -> anyhow::Result<()> { + trace!("Change type of {} to threaded", self.get_path().display()); + if !is_cgroup(&self.path)? { + bail!("{} is not a valid cgroup", self.path.display()); + } + + fs::write(self.path.join("cgroup.type"), "threaded")?; + Ok(()) + } + /// Returns all PIDs associated with this cgroup pub fn get_pids(&self) -> anyhow::Result> { if !is_cgroup(&self.path)? { @@ -97,6 +126,20 @@ impl CGroup { Ok(pids) } + /// Returns all TIDs associated with this cgroup + pub fn get_tids(&self) -> anyhow::Result> { + if !is_cgroup(&self.path)? { + bail!("{} is not a valid cgroup", self.path.display()); + } + + let pids: Vec = fs::read(self.path.join("cgroup.threads"))? + .lines() + .map(|line| Pid::from_raw(line.unwrap().parse().unwrap())) + .collect(); + + Ok(pids) + } + /// Checks whether this cgroup is populated pub fn populated(&self) -> anyhow::Result { if !is_cgroup(&self.path)? { @@ -323,8 +366,8 @@ mod tests { let cg1 = CGroup::new_root(get_path(), &gen_name()).unwrap(); let cg2 = cg1.new(&gen_name()).unwrap(); - cg1.mv(pid).unwrap(); - cg2.mv(pid).unwrap(); + cg1.mv_proc(pid).unwrap(); + cg2.mv_proc(pid).unwrap(); proc.kill().unwrap(); cg1.rm().unwrap(); @@ -341,14 +384,14 @@ mod tests { assert!(cg1.get_pids().unwrap().is_empty()); assert!(cg2.get_pids().unwrap().is_empty()); - cg1.mv(pid).unwrap(); + cg1.mv_proc(pid).unwrap(); let pids = cg1.get_pids().unwrap(); assert!(!pids.is_empty()); assert!(cg2.get_pids().unwrap().is_empty()); assert_eq!(pids.len(), 1); assert_eq!(pids[0], pid); - cg2.mv(pid).unwrap(); + cg2.mv_proc(pid).unwrap(); let pids = cg2.get_pids().unwrap(); assert!(!pids.is_empty()); assert!(cg1.get_pids().unwrap().is_empty()); @@ -369,7 +412,7 @@ mod tests { assert!(!cg.populated().unwrap()); assert_eq!(cg.populated().unwrap(), !cg.get_pids().unwrap().is_empty()); - cg.mv(pid).unwrap(); + cg.mv_proc(pid).unwrap(); assert!(cg.populated().unwrap()); assert_eq!(cg.populated().unwrap(), !cg.get_pids().unwrap().is_empty()); @@ -394,7 +437,7 @@ mod tests { assert!(!cg.frozen().unwrap()); // Do the same with a non-empty cgroup - cg.mv(pid).unwrap(); + cg.mv_proc(pid).unwrap(); cg.freeze().unwrap(); assert!(cg.frozen().unwrap()); cg.unfreeze().unwrap(); @@ -415,7 +458,7 @@ mod tests { cg.kill().unwrap(); // Do the same with a non-empty cgroup - cg.mv(pid).unwrap(); + cg.mv_proc(pid).unwrap(); assert!(cg.populated().unwrap()); cg.kill().unwrap(); diff --git a/hypervisor/src/hypervisor/mod.rs b/hypervisor/src/hypervisor/mod.rs index d1e533b..9dc8c40 100644 --- a/hypervisor/src/hypervisor/mod.rs +++ b/hypervisor/src/hypervisor/mod.rs @@ -104,7 +104,7 @@ impl Hypervisor { pub fn run(mut self) -> LeveledResult<()> { self.cg - .mv(nix::unistd::getpid()) + .mv_proc(nix::unistd::getpid()) .typ(SystemError::CGroup) .lev(ErrorLevel::ModuleInit)?; @@ -178,7 +178,7 @@ impl Drop for Hypervisor { // requires that the cgroup must have been deleted externally if let Err(e) = CGroup::import_root(&self.prev_cg) .unwrap() - .mv(nix::unistd::getpid()) + .mv_proc(nix::unistd::getpid()) { error!("{e}") } diff --git a/hypervisor/src/hypervisor/partition.rs b/hypervisor/src/hypervisor/partition.rs index 6a5e6f7..17d1a6f 100644 --- a/hypervisor/src/hypervisor/partition.rs +++ b/hypervisor/src/hypervisor/partition.rs @@ -398,7 +398,9 @@ impl Run { } // Move main process to own cgroup - self.cgroup_main.mv(self.main).typ(SystemError::CGroup)?; + self.cgroup_main + .mv_proc(self.main) + .typ(SystemError::CGroup)?; self.freeze_aperiodic()?; self.freeze_periodic()?; diff --git a/partition/src/process.rs b/partition/src/process.rs index 5123cb2..f849520 100644 --- a/partition/src/process.rs +++ b/partition/src/process.rs @@ -191,7 +191,7 @@ impl Process { safemem::write_bytes(stack, 0); let cbk = Box::new(move || { let cg = self.cg().unwrap(); - cg.mv(getpid()).unwrap(); + cg.mv_proc(getpid()).unwrap(); (self.attr.entry_point)(); 0 }); @@ -201,7 +201,7 @@ impl Process { nix::sched::clone(cbk, stack, CloneFlags::empty(), Some(SIGCHLD)) .lev_typ(SystemError::Panic, ErrorLevel::Partition)? }; - cg.mv(child).unwrap(); + cg.mv_proc(child).unwrap(); self.pid.write(&child).lev(ErrorLevel::Partition)?; From dae772b0575ba7ffd555f2162c70928fbd5a861d Mon Sep 17 00:00:00 2001 From: Sven Friedrich Date: Tue, 20 Feb 2024 14:16:56 +0100 Subject: [PATCH 4/9] refactor: place processes in own sub-cgroup --- core/src/partition.rs | 1 + hypervisor/src/hypervisor/partition.rs | 8 +++++--- partition/src/process.rs | 12 ++++++++++-- 3 files changed, 16 insertions(+), 5 deletions(-) diff --git a/core/src/partition.rs b/core/src/partition.rs index 8de9e67..13f72eb 100644 --- a/core/src/partition.rs +++ b/core/src/partition.rs @@ -37,6 +37,7 @@ pub struct SamplingConstant { impl PartitionConstants { pub const PARTITION_CONSTANTS_FD: &'static str = "PARTITION_CONSTANTS_FD"; + pub const PROCESSES_CGROUP: &'static str = "processes"; pub const APERIODIC_PROCESS_CGROUP: &'static str = "aperiodic"; pub const PERIODIC_PROCESS_CGROUP: &'static str = "periodic"; diff --git a/hypervisor/src/hypervisor/partition.rs b/hypervisor/src/hypervisor/partition.rs index 17d1a6f..2caf1a1 100644 --- a/hypervisor/src/hypervisor/partition.rs +++ b/hypervisor/src/hypervisor/partition.rs @@ -72,12 +72,14 @@ impl Run { pub fn new(base: &Base, condition: StartCondition, warm_start: bool) -> TypedResult { trace!("Create new \"Run\" for \"{}\" partition", base.name()); let cgroup_main = base.cgroup.new("main").typ(SystemError::CGroup)?; - let cgroup_periodic = base + let cgroup_processes = base .cgroup + .new(PartitionConstants::PROCESSES_CGROUP) + .typ(SystemError::CGroup)?; + let cgroup_periodic = cgroup_processes .new(PartitionConstants::PERIODIC_PROCESS_CGROUP) .typ(SystemError::CGroup)?; - let cgroup_aperiodic = base - .cgroup + let cgroup_aperiodic = cgroup_processes .new(PartitionConstants::APERIODIC_PROCESS_CGROUP) .typ(SystemError::CGroup)?; diff --git a/partition/src/process.rs b/partition/src/process.rs index f849520..a5eea03 100644 --- a/partition/src/process.rs +++ b/partition/src/process.rs @@ -213,9 +213,17 @@ impl Process { pub(crate) fn cg(&self) -> TypedResult { let cg_name = if self.periodic { - PartitionConstants::PERIODIC_PROCESS_CGROUP + format!( + "{}/{}", + PartitionConstants::PROCESSES_CGROUP, + PartitionConstants::PERIODIC_PROCESS_CGROUP + ) } else { - PartitionConstants::APERIODIC_PROCESS_CGROUP + format!( + "{}/{}", + PartitionConstants::PROCESSES_CGROUP, + PartitionConstants::APERIODIC_PROCESS_CGROUP + ) }; let path = cgroup::mount_point().typ(SystemError::CGroup)?; From a9505ee5ecba1a8ef97d968ed7d98c16e242c2cb Mon Sep 17 00:00:00 2001 From: Sven Friedrich Date: Wed, 21 Feb 2024 15:34:35 +0100 Subject: [PATCH 5/9] feat!: threaded partitions Inside of partitions threads are used for "processes" instead of processes. This means that arinc "processes" now share an address space --- Cargo.lock | 7 - core/src/partition.rs | 1 + hypervisor/src/hypervisor/partition.rs | 49 +++--- partition/Cargo.toml | 1 - partition/src/apex.rs | 4 +- partition/src/lib.rs | 37 +---- partition/src/process.rs | 215 +++++++------------------ 7 files changed, 92 insertions(+), 222 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 5335b24..a897583 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -27,7 +27,6 @@ dependencies = [ "once_cell", "polling", "procfs", - "safemem", "tinyvec", ] @@ -1048,12 +1047,6 @@ version = "1.0.16" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "f98d2aa92eebf49b69786be48e4477826b256916e84a57ff2a4f21923b48eb4c" -[[package]] -name = "safemem" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072" - [[package]] name = "same-file" version = "1.0.6" diff --git a/core/src/partition.rs b/core/src/partition.rs index 13f72eb..5c78787 100644 --- a/core/src/partition.rs +++ b/core/src/partition.rs @@ -38,6 +38,7 @@ pub struct SamplingConstant { impl PartitionConstants { pub const PARTITION_CONSTANTS_FD: &'static str = "PARTITION_CONSTANTS_FD"; pub const PROCESSES_CGROUP: &'static str = "processes"; + pub const MAIN_PROCESS_CGROUP: &'static str = "main"; pub const APERIODIC_PROCESS_CGROUP: &'static str = "aperiodic"; pub const PERIODIC_PROCESS_CGROUP: &'static str = "periodic"; diff --git a/hypervisor/src/hypervisor/partition.rs b/hypervisor/src/hypervisor/partition.rs index 2caf1a1..d5c163d 100644 --- a/hypervisor/src/hypervisor/partition.rs +++ b/hypervisor/src/hypervisor/partition.rs @@ -1,6 +1,7 @@ use std::collections::HashMap; use std::net::{TcpStream, UdpSocket}; use std::os::unix::prelude::{AsRawFd, FromRawFd, OwnedFd, PermissionsExt, RawFd}; +use std::os::unix::process::CommandExt; use std::path::{self, Path, PathBuf}; use std::process::{Command, Stdio}; use std::thread::sleep; @@ -12,12 +13,12 @@ use anyhow::{anyhow, Context}; use clone3::Clone3; use itertools::Itertools; use nix::mount::{umount2, MntFlags, MsFlags}; -use nix::unistd::{chdir, close, pivot_root, setgid, setuid, Gid, Pid, Uid}; +use nix::unistd::{chdir, close, getpid, pivot_root, setgid, setuid, Gid, Pid, Uid}; use polling::{Event, Events, Poller}; use procfs::process::Process; use tempfile::{tempdir, TempDir}; -use a653rs_linux_core::cgroup::CGroup; +use a653rs_linux_core::cgroup::{self, CGroup}; use a653rs_linux_core::error::{ ErrorLevel, LeveledResult, ResultExt, SystemError, TypedError, TypedResult, TypedResultExt, }; @@ -49,11 +50,11 @@ pub enum TransitionAction { // Struct for holding information of a partition which is not in Idle Mode #[derive(Debug)] pub(crate) struct Run { - cgroup_main: CGroup, + _cgroup_main: CGroup, cgroup_aperiodic: CGroup, cgroup_periodic: CGroup, - main: Pid, + _main: Pid, periodic: bool, aperiodic: bool, @@ -71,16 +72,18 @@ pub(crate) struct Run { impl Run { pub fn new(base: &Base, condition: StartCondition, warm_start: bool) -> TypedResult { trace!("Create new \"Run\" for \"{}\" partition", base.name()); - let cgroup_main = base.cgroup.new("main").typ(SystemError::CGroup)?; let cgroup_processes = base .cgroup .new(PartitionConstants::PROCESSES_CGROUP) .typ(SystemError::CGroup)?; + let cgroup_main = cgroup_processes + .new_threaded(PartitionConstants::MAIN_PROCESS_CGROUP) + .typ(SystemError::CGroup)?; let cgroup_periodic = cgroup_processes - .new(PartitionConstants::PERIODIC_PROCESS_CGROUP) + .new_threaded(PartitionConstants::PERIODIC_PROCESS_CGROUP) .typ(SystemError::CGroup)?; let cgroup_aperiodic = cgroup_processes - .new(PartitionConstants::APERIODIC_PROCESS_CGROUP) + .new_threaded(PartitionConstants::APERIODIC_PROCESS_CGROUP) .typ(SystemError::CGroup)?; let real_uid = nix::unistd::getuid(); @@ -260,7 +263,8 @@ impl Run { .unwrap(); // Run binary - let mut handle = Command::new("/bin") + let mut command = Command::new("/bin"); + let mut command = command .stdout(Stdio::null()) .stdin(Stdio::null()) .stderr(Stdio::null()) @@ -268,11 +272,21 @@ impl Run { .env( PartitionConstants::PARTITION_CONSTANTS_FD, constants.to_string(), - ) - .spawn() - .unwrap(); - handle.wait().unwrap(); - + ); + unsafe { + let path = cgroup::mount_point().typ(SystemError::CGroup)?; + let path = path + .join(PartitionConstants::PROCESSES_CGROUP) + .join(PartitionConstants::MAIN_PROCESS_CGROUP); + let cgroup_main = CGroup::import_root(path).typ(SystemError::CGroup).unwrap(); + + command = command.pre_exec(move || { + cgroup_main + .mv_proc(getpid()) + .map_err(|e| std::io::Error::new(std::io::ErrorKind::Other, e)) + }); + } + command.exec(); unsafe { libc::_exit(0) }; } child => child, @@ -286,10 +300,10 @@ impl Run { let pid = Pid::from_raw(pid); Ok(Run { - cgroup_main, + _cgroup_main: cgroup_main, cgroup_aperiodic, cgroup_periodic, - main: pid, + _main: pid, mode, mode_file, call_rx, @@ -399,11 +413,6 @@ impl Run { self.periodic = true; } - // Move main process to own cgroup - self.cgroup_main - .mv_proc(self.main) - .typ(SystemError::CGroup)?; - self.freeze_aperiodic()?; self.freeze_periodic()?; diff --git a/partition/Cargo.toml b/partition/Cargo.toml index 81bb96f..83f3de6 100644 --- a/partition/Cargo.toml +++ b/partition/Cargo.toml @@ -22,5 +22,4 @@ once_cell.workspace = true lazy_static = "1.4" anyhow = "1.0" log.workspace = true -safemem = "0.3" tinyvec = "1.6" diff --git a/partition/src/apex.rs b/partition/src/apex.rs index e8ad0a5..ce25215 100644 --- a/partition/src/apex.rs +++ b/partition/src/apex.rs @@ -66,8 +66,8 @@ impl ApexProcessP4 for ApexLinuxPartition { fn start(process_id: ProcessId) -> Result<(), ErrorReturnCode> { let proc = match process_id { - 1 => APERIODIC_PROCESS.read().unwrap(), - 2 => PERIODIC_PROCESS.read().unwrap(), + 1 => APERIODIC_PROCESS.get(), + 2 => PERIODIC_PROCESS.get(), _ => None, }; diff --git a/partition/src/lib.rs b/partition/src/lib.rs index 51a31fc..f7b161e 100644 --- a/partition/src/lib.rs +++ b/partition/src/lib.rs @@ -10,6 +10,7 @@ extern crate log; #[cfg(feature = "socket")] use std::net::{TcpStream, UdpSocket}; +use std::sync::Arc; #[cfg(feature = "socket")] use a653rs_linux_core::ipc::IoReceiver; @@ -24,9 +25,8 @@ use a653rs_linux_core::health_event::PartitionCall; use a653rs_linux_core::ipc::IpcSender; use a653rs_linux_core::partition::*; use a653rs_linux_core::syscall::SYSCALL_SOCKET_PATH; -use memmap2::{MmapMut, MmapOptions}; use nix::sys::socket::{self, connect, AddressFamily, SockFlag, SockType, UnixAddr}; -use once_cell::sync::Lazy; +use once_cell::sync::{Lazy, OnceCell}; use process::Process; use tinyvec::ArrayVec; @@ -36,8 +36,6 @@ pub mod syscall; //mod scheduler; pub(crate) mod process; -const APERIODIC_PROCESS_FILE: &str = "aperiodic"; -const PERIODIC_PROCESS_FILE: &str = "periodic"; const SAMPLING_PORTS_FILE: &str = "sampling_channels"; // const MAX_SAMPLING_PORTS: usize = 32; @@ -54,27 +52,8 @@ pub(crate) static SYSTEM_TIME: Lazy = Lazy::new(|| { pub(crate) static PARTITION_MODE: Lazy> = Lazy::new(|| TempFile::::try_from(CONSTANTS.partition_mode_fd).unwrap()); -pub(crate) static APERIODIC_PROCESS: Lazy>> = Lazy::new(|| { - // TODO Get rid of get_memfd? Use env instead? - if let Ok(fd) = get_memfd(APERIODIC_PROCESS_FILE) { - TempFile::try_from(fd).unwrap() - } else { - let file: TempFile> = TempFile::create(APERIODIC_PROCESS_FILE).unwrap(); - file.write(&None).unwrap(); - file - } -}); - -// TODO generate in hypervisor -pub(crate) static PERIODIC_PROCESS: Lazy>> = Lazy::new(|| { - if let Ok(fd) = get_memfd(PERIODIC_PROCESS_FILE) { - TempFile::try_from(fd).unwrap() - } else { - let file: TempFile> = TempFile::create(PERIODIC_PROCESS_FILE).unwrap(); - file.write(&None).unwrap(); - file - } -}); +pub(crate) static PERIODIC_PROCESS: OnceCell> = OnceCell::new(); +pub(crate) static APERIODIC_PROCESS: OnceCell> = OnceCell::new(); pub(crate) type SamplingPortsType = (usize, Duration); pub(crate) static SAMPLING_PORTS: Lazy>> = @@ -99,14 +78,6 @@ pub(crate) static UDP_IO_RX: Lazy> = pub(crate) static TCP_IO_RX: Lazy> = Lazy::new(|| unsafe { IoReceiver::::from_raw_fd(CONSTANTS.io_fd) }); -pub(crate) static SIGNAL_STACK: Lazy = Lazy::new(|| { - MmapOptions::new() - .stack() - .len(nix::libc::SIGSTKSZ) - .map_anon() - .unwrap() -}); - pub(crate) static SYSCALL: Lazy = Lazy::new(|| { let syscall_socket = socket::socket( AddressFamily::Unix, diff --git a/partition/src/process.rs b/partition/src/process.rs index a5eea03..bcb827d 100644 --- a/partition/src/process.rs +++ b/partition/src/process.rs @@ -1,7 +1,6 @@ -use std::mem::forget; -use std::os::unix::prelude::{FromRawFd, IntoRawFd, OwnedFd}; -use std::ptr::null_mut; -use std::sync::Mutex; +use std::sync::atomic::{AtomicBool, AtomicI32}; +use std::sync::{Arc, Barrier, Mutex}; +use std::thread::Builder; use a653rs::bindings::*; use a653rs::prelude::{ProcessAttribute, SystemTime}; @@ -10,41 +9,21 @@ use a653rs_linux_core::cgroup::CGroup; use a653rs_linux_core::error::{ ErrorLevel, LeveledResult, ResultExt, SystemError, TypedResult, TypedResultExt, }; -use a653rs_linux_core::fd::PidFd; -use a653rs_linux_core::file::TempFile; use a653rs_linux_core::partition::PartitionConstants; use anyhow::anyhow; -use memmap2::MmapOptions; -use nix::libc::{stack_t, SIGCHLD}; -use nix::sched::CloneFlags; -use nix::sys::signal::{sigaction, SaFlags, SigAction, SigHandler, Signal}; -use nix::sys::signalfd::SigSet; -use nix::unistd::{getpid, Pid}; -use once_cell::sync::{Lazy, OnceCell}; +use nix::unistd::{gettid, Pid}; -use crate::partition::ApexLinuxPartition; -use crate::{APERIODIC_PROCESS, PERIODIC_PROCESS, SIGNAL_STACK}; - -//use crate::{APERIODIC_PROCESS, PERIODIC_PROCESS}; - -#[derive(Debug, Clone, Copy)] -struct StackPtr(*mut [u8]); - -unsafe impl Sync for StackPtr {} -unsafe impl Send for StackPtr {} - -static STACKS: [OnceCell; 2] = [OnceCell::new(), OnceCell::new()]; - -static SYNC: Lazy> = Lazy::new(|| Mutex::new(Default::default())); +use crate::{APERIODIC_PROCESS, PERIODIC_PROCESS}; #[repr(C)] #[derive(Debug, Clone)] pub(crate) struct Process { id: i32, attr: ProcessAttribute, - activated: TempFile, - pid: TempFile, + activated: Arc, + pid: Arc, periodic: bool, + stack_size: usize, } impl Process { @@ -59,81 +38,45 @@ impl Process { .stack_size .try_into() .lev_typ(SystemError::Panic, ErrorLevel::Partition)?; - let mut stack = MmapOptions::new() - .stack() - .len(stack_size) - .map_anon() - .lev_typ(SystemError::Panic, ErrorLevel::Partition)?; let periodic = attr.period != SystemTime::Infinite; let id = periodic as i32 + 1; - let guard = SYNC - .lock() - .map_err(|e| anyhow!("{e:?}")) - .lev_typ(SystemError::Panic, ErrorLevel::Partition)?; - let proc_file = if periodic { - PERIODIC_PROCESS.clone() + &PERIODIC_PROCESS } else { - APERIODIC_PROCESS.clone() + &APERIODIC_PROCESS }; - if proc_file.read().lev(ErrorLevel::Partition)?.is_some() { - return Err(anyhow!("Process type already exists. Periodic: {periodic}")) - .lev_typ(SystemError::Panic, ErrorLevel::Partition); - } - - // Files for dropping fd - let mut fds = Vec::new(); - let activated = TempFile::create(format!("state_{name}")).lev(ErrorLevel::Partition)?; - fds.push(unsafe { OwnedFd::from_raw_fd(activated.fd()) }); - activated.write(&false).lev(ErrorLevel::Partition)?; - let pid = TempFile::create(format!("pid_{name}")).lev(ErrorLevel::Partition)?; - fds.push(unsafe { OwnedFd::from_raw_fd(pid.fd()) }); - - let process = Self { + let res = proc_file.try_insert(Arc::new(Self { id, attr, - activated, - pid, + activated: Arc::new(AtomicBool::new(false)), + pid: Arc::new(AtomicI32::new(0)), periodic, - }; - - proc_file.write(&Some(process)).lev(ErrorLevel::Partition)?; - - // We can unwrap because it was already checked that the cell is empty - STACKS[periodic as usize] - .set(StackPtr(stack.as_mut())) - .unwrap(); - - drop(guard); - - // dissolve files into fds - for f in fds { - f.into_raw_fd(); + stack_size, + })); + if res.is_ok() { + trace!("Created process \"{name}\" with id: {id}"); + Ok(id as ProcessId) + } else { + Err(anyhow!("Process type already exists. Periodic: {periodic}")) + .lev_typ(SystemError::Panic, ErrorLevel::Partition) } - // forget stack ptr so we do not call munmap - forget(stack); - - trace!("Created process \"{name}\" with id: {id}"); - Ok(id as ProcessId) } - pub(crate) fn get_self() -> Option { - if let Ok(Some(p)) = APERIODIC_PROCESS.read() { - if let Ok(id) = p.pid.read() { - if id == nix::unistd::getpid() { - return Some(p); - } + pub(crate) fn get_self() -> Option> { + if let Some(p) = APERIODIC_PROCESS.get() { + let id = p.pid.load(std::sync::atomic::Ordering::Relaxed); + if id == nix::unistd::gettid().as_raw() { + return Some(p.clone()); } } - if let Ok(Some(p)) = PERIODIC_PROCESS.read() { - if let Ok(id) = p.pid.read() { - if id == nix::unistd::getpid() { - return Some(p); - } + if let Some(p) = PERIODIC_PROCESS.get() { + let id = p.pid.load(std::sync::atomic::Ordering::Relaxed); + if id == nix::unistd::gettid().as_raw() { + return Some(p.clone()); } } @@ -147,87 +90,49 @@ impl Process { .lev_typ(SystemError::Panic, ErrorLevel::Partition) } - pub fn start(&self) -> LeveledResult { + pub fn start(&self) -> LeveledResult<()> { let name = self.name()?; trace!("Start Process \"{name}\""); - unsafe { - let stack = stack_t { - ss_sp: SIGNAL_STACK.as_ptr() as *mut nix::libc::c_void, - ss_flags: 0, - ss_size: nix::libc::SIGSTKSZ, - }; - nix::libc::sigaltstack(&stack, null_mut()); - - let report_sigsegv_action = SigAction::new( - SigHandler::Handler(handle_sigsegv), - SaFlags::SA_ONSTACK, - SigSet::empty(), - ); - sigaction(Signal::SIGSEGV, &report_sigsegv_action).unwrap(); - - let report_sigfpe_action = SigAction::new( - SigHandler::Handler(handle_sigfpe), - SaFlags::SA_ONSTACK, - SigSet::empty(), - ); - sigaction(Signal::SIGFPE, &report_sigfpe_action).unwrap(); - } let cg = self.cg().lev(ErrorLevel::Partition)?; cg.freeze() .typ(SystemError::CGroup) .lev(ErrorLevel::Partition)?; - let stack = unsafe { - STACKS[self.periodic as usize] - .get() - .expect("TODO: Do not expect here") - .0 - .as_mut() - .expect("TODO: Do not expect here") - }; - - //let stack_size = self.attr.stack_size as u64; - safemem::write_bytes(stack, 0); - let cbk = Box::new(move || { - let cg = self.cg().unwrap(); - cg.mv_proc(getpid()).unwrap(); - (self.attr.entry_point)(); - 0 - }); - - // Make extra sure that the process is in the cgroup - let child = unsafe { - nix::sched::clone(cbk, stack, CloneFlags::empty(), Some(SIGCHLD)) - .lev_typ(SystemError::Panic, ErrorLevel::Partition)? - }; - cg.mv_proc(child).unwrap(); - - self.pid.write(&child).lev(ErrorLevel::Partition)?; - - let pidfd = PidFd::try_from(child).lev(ErrorLevel::Partition)?; + let sync = Arc::new((Barrier::new(2), Mutex::new(()))); + let s = sync.clone(); + let entry = self.attr.entry_point; + let proc = self.pid.clone(); + let _thread = Builder::new() + .name(name.to_string()) + .stack_size(self.stack_size) + .spawn(move || { + proc.store(gettid().as_raw(), std::sync::atomic::Ordering::Relaxed); + s.0.wait(); + drop(s.1.lock().unwrap()); + (entry)(); + }) + .lev_typ(SystemError::Panic, ErrorLevel::Partition)?; + let lock = sync.1.lock().unwrap(); + sync.0.wait(); + let pid = Pid::from_raw(self.pid.load(std::sync::atomic::Ordering::Relaxed)); + cg.mv_thread(pid).unwrap(); + drop(lock); - trace!("Started process \"{name}\" with pid: {child}"); - Ok(pidfd) + Ok(()) } pub(crate) fn cg(&self) -> TypedResult { let cg_name = if self.periodic { - format!( - "{}/{}", - PartitionConstants::PROCESSES_CGROUP, - PartitionConstants::PERIODIC_PROCESS_CGROUP - ) + PartitionConstants::PERIODIC_PROCESS_CGROUP } else { - format!( - "{}/{}", - PartitionConstants::PROCESSES_CGROUP, - PartitionConstants::APERIODIC_PROCESS_CGROUP - ) + PartitionConstants::APERIODIC_PROCESS_CGROUP }; let path = cgroup::mount_point().typ(SystemError::CGroup)?; - let path = path.join(cg_name); + let path = path + .join(PartitionConstants::PROCESSES_CGROUP) + .join(cg_name); CGroup::import_root(path).typ(SystemError::CGroup) } @@ -236,11 +141,3 @@ impl Process { self.periodic } } - -extern "C" fn handle_sigsegv(_: i32) { - ApexLinuxPartition::raise_system_error(SystemError::Segmentation); -} - -extern "C" fn handle_sigfpe(_: i32) { - ApexLinuxPartition::raise_system_error(SystemError::FloatingPoint); -} From 9e2c61f47473e3886cc7d1b2b9e9b283c1b9a59f Mon Sep 17 00:00:00 2001 From: Sven Friedrich Date: Wed, 21 Feb 2024 16:13:10 +0100 Subject: [PATCH 6/9] feat: use socket file for partition ipc --- core/src/ipc.rs | 46 ++++++++++++++++---------- core/src/partition.rs | 2 +- hypervisor/src/hypervisor/partition.rs | 16 ++++++--- partition/src/lib.rs | 9 ++--- 4 files changed, 47 insertions(+), 26 deletions(-) diff --git a/core/src/ipc.rs b/core/src/ipc.rs index c97a930..f4cd77e 100644 --- a/core/src/ipc.rs +++ b/core/src/ipc.rs @@ -4,6 +4,7 @@ use std::marker::PhantomData; use std::os::fd::{AsFd, BorrowedFd, OwnedFd}; use std::os::unix::net::UnixDatagram; use std::os::unix::prelude::{AsRawFd, FromRawFd, RawFd}; +use std::path::Path; use std::time::Duration; use anyhow::Error; @@ -90,24 +91,17 @@ where } } -/// Create a pair consisting of an IpcSender and an IpcReceiver -pub fn channel_pair() -> TypedResult<(IpcSender, IpcReceiver)> -where - T: for<'de> Deserialize<'de> + Serialize, -{ - trace!("Create IPC channel pair"); - let (tx, rx) = socketpair( - AddressFamily::Unix, - SockType::Datagram, - None, - SockFlag::SOCK_NONBLOCK, - ) - .typ(SystemError::Panic)?; - - let tx = IpcSender::from(tx); - let rx = IpcReceiver::from(rx); +pub fn bind_receiver(path: &Path) -> TypedResult> { + let socket = UnixDatagram::bind(path).typ(SystemError::Panic)?; + socket.set_nonblocking(true).typ(SystemError::Panic)?; + Ok(IpcReceiver::from(socket)) +} - Ok((tx, rx)) +pub fn connect_sender(path: &Path) -> TypedResult> { + let socket = UnixDatagram::unbound().typ(SystemError::Panic)?; + socket.connect(path).typ(SystemError::Panic)?; + socket.set_nonblocking(true).typ(SystemError::Panic)?; + Ok(IpcSender::from(socket)) } impl AsRawFd for IpcSender { @@ -134,6 +128,24 @@ impl AsFd for IpcReceiver { } } +impl From for IpcReceiver { + fn from(value: UnixDatagram) -> Self { + Self { + socket: value, + _p: PhantomData, + } + } +} + +impl From for IpcSender { + fn from(value: UnixDatagram) -> Self { + Self { + socket: value, + _p: PhantomData, + } + } +} + impl From for IpcSender { fn from(value: OwnedFd) -> Self { Self { diff --git a/core/src/partition.rs b/core/src/partition.rs index 5c78787..407c4db 100644 --- a/core/src/partition.rs +++ b/core/src/partition.rs @@ -17,7 +17,6 @@ pub struct PartitionConstants { pub period: Duration, pub duration: Duration, pub start_condition: StartCondition, - pub sender_fd: RawFd, pub start_time_fd: RawFd, pub partition_mode_fd: RawFd, @@ -41,6 +40,7 @@ impl PartitionConstants { pub const MAIN_PROCESS_CGROUP: &'static str = "main"; pub const APERIODIC_PROCESS_CGROUP: &'static str = "aperiodic"; pub const PERIODIC_PROCESS_CGROUP: &'static str = "periodic"; + pub const IPC_SENDER: &'static str = "/.inner/ipc"; pub fn open() -> TypedResult { let fd = std::env::var(Self::PARTITION_CONSTANTS_FD) diff --git a/hypervisor/src/hypervisor/partition.rs b/hypervisor/src/hypervisor/partition.rs index d5c163d..89a0de5 100644 --- a/hypervisor/src/hypervisor/partition.rs +++ b/hypervisor/src/hypervisor/partition.rs @@ -25,7 +25,7 @@ use a653rs_linux_core::error::{ use a653rs_linux_core::file::TempFile; use a653rs_linux_core::health::{ModuleRecoveryAction, PartitionHMTable, RecoveryAction}; use a653rs_linux_core::health_event::PartitionCall; -use a653rs_linux_core::ipc::{channel_pair, io_pair, IoReceiver, IoSender, IpcReceiver}; +use a653rs_linux_core::ipc::{bind_receiver, io_pair, IoReceiver, IoSender, IpcReceiver}; use a653rs_linux_core::partition::{PartitionConstants, SamplingConstant}; use a653rs_linux_core::sampling::Sampling; pub use mounting::FileMounter; @@ -94,7 +94,12 @@ impl Run { .ok_or_else(|| anyhow!("SystemTime was not set")) .typ(SystemError::Panic)?; - let (call_tx, call_rx) = channel_pair::()?; + let ipc_path = base + .working_dir + .path() + .join(PartitionConstants::IPC_SENDER.trim_start_matches('/')); + std::fs::create_dir_all(ipc_path.parent().unwrap()).typ(SystemError::Panic)?; + let call_rx = bind_receiver::(&ipc_path)?; // TODO add a `::new(warm_start: bool)->Self` function to `OperatingMode`, use // it here @@ -154,13 +159,14 @@ impl Run { let mut keep = base.sampling_fds(); keep.push(sys_time.as_raw_fd()); - keep.push(call_tx.as_raw_fd()); keep.push(mode_file.as_raw_fd()); keep.push(udp_io_rx.as_raw_fd()); keep.push(tcp_io_rx.as_raw_fd()); Partition::release_fds(&keep).unwrap(); + let ipc_path_inner: PathBuf = PartitionConstants::IPC_SENDER[1..].into(); + // Mount the required mounts let mut mounts = vec![ // Mount working directory as tmpfs @@ -208,6 +214,9 @@ impl Run { None, ) .unwrap(), + // IPC Socket for Syscalls + FileMounter::new(Some(ipc_path), ipc_path_inner, None, MsFlags::MS_BIND, None) + .unwrap(), ]; for (source, target) in base.mounts.iter().cloned() { @@ -248,7 +257,6 @@ impl Run { period: base.period, duration: base.duration, start_condition: condition, - sender_fd: call_tx.as_raw_fd(), start_time_fd: sys_time.as_raw_fd(), partition_mode_fd: mode_file.as_raw_fd(), io_fd: udp_io_rx.as_raw_fd(), diff --git a/partition/src/lib.rs b/partition/src/lib.rs index f7b161e..d69d975 100644 --- a/partition/src/lib.rs +++ b/partition/src/lib.rs @@ -10,19 +10,20 @@ extern crate log; #[cfg(feature = "socket")] use std::net::{TcpStream, UdpSocket}; -use std::sync::Arc; +#[cfg(feature = "socket")] +use std::os::fd::FromRawFd; #[cfg(feature = "socket")] use a653rs_linux_core::ipc::IoReceiver; use std::os::fd::{AsRawFd, OwnedFd}; -use std::os::unix::prelude::FromRawFd; +use std::sync::Arc; use std::time::{Duration, Instant}; use a653rs::prelude::OperatingMode; use a653rs_linux_core::file::{get_memfd, TempFile}; use a653rs_linux_core::health_event::PartitionCall; -use a653rs_linux_core::ipc::IpcSender; +use a653rs_linux_core::ipc::{self, IpcSender}; use a653rs_linux_core::partition::*; use a653rs_linux_core::syscall::SYSCALL_SOCKET_PATH; use nix::sys::socket::{self, connect, AddressFamily, SockFlag, SockType, UnixAddr}; @@ -68,7 +69,7 @@ pub(crate) static SAMPLING_PORTS: Lazy> = - Lazy::new(|| unsafe { IpcSender::from_raw_fd(CONSTANTS.sender_fd) }); + Lazy::new(|| ipc::connect_sender(PartitionConstants::IPC_SENDER.as_ref()).unwrap()); #[cfg(feature = "socket")] pub(crate) static UDP_IO_RX: Lazy> = From 959e3f732e06a9248c6842b72c9f54829a49b792 Mon Sep 17 00:00:00 2001 From: Sven Friedrich Date: Tue, 27 Feb 2024 12:50:05 +0100 Subject: [PATCH 7/9] feat(hypervisor): specialize `FileMounter` --- Cargo.lock | 1 + Cargo.toml | 1 + core/Cargo.toml | 2 +- hypervisor/Cargo.toml | 1 + hypervisor/src/hypervisor/partition.rs | 56 ++--------- .../src/hypervisor/partition/mounting.rs | 93 +++++++++++++------ 6 files changed, 81 insertions(+), 73 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index a897583..083556b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -60,6 +60,7 @@ dependencies = [ "a653rs", "a653rs-linux-core", "anyhow", + "bytesize", "clap", "clone3", "humantime", diff --git a/Cargo.toml b/Cargo.toml index e21c77f..92137d7 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -26,3 +26,4 @@ procfs = "0.16" polling = "3.4" itertools = "0.12.1" once_cell = "1.19" +bytesize = "1.1" diff --git a/core/Cargo.toml b/core/Cargo.toml index d0399e7..2aac840 100644 --- a/core/Cargo.toml +++ b/core/Cargo.toml @@ -22,7 +22,7 @@ serde = { version = "1.0", features = ["derive"] } memfd = "0.6" bincode = "1.3" thiserror = "1.0" -bytesize = {version = "1.1.0", features = ["serde"]} +bytesize = {workspace = true, features = ["serde"]} byteorder = "1.4.3" enum_primitive = "0.1" diff --git a/hypervisor/Cargo.toml b/hypervisor/Cargo.toml index d7dbc55..16aac51 100644 --- a/hypervisor/Cargo.toml +++ b/hypervisor/Cargo.toml @@ -20,6 +20,7 @@ procfs.workspace = true polling.workspace = true itertools.workspace = true once_cell.workspace = true +bytesize.workspace = true anyhow = "1.0" tempfile = "3.3" clone3 = "0.2" diff --git a/hypervisor/src/hypervisor/partition.rs b/hypervisor/src/hypervisor/partition.rs index 89a0de5..b4b6031 100644 --- a/hypervisor/src/hypervisor/partition.rs +++ b/hypervisor/src/hypervisor/partition.rs @@ -10,9 +10,10 @@ use std::time::{Duration, Instant}; use a653rs::bindings::{PartitionId, PortDirection}; use a653rs::prelude::{OperatingMode, StartCondition}; use anyhow::{anyhow, Context}; +use bytesize::ByteSize; use clone3::Clone3; use itertools::Itertools; -use nix::mount::{umount2, MntFlags, MsFlags}; +use nix::mount::{umount2, MntFlags}; use nix::unistd::{chdir, close, getpid, pivot_root, setgid, setuid, Gid, Pid, Uid}; use polling::{Event, Events, Poller}; use procfs::process::Process; @@ -170,53 +171,17 @@ impl Run { // Mount the required mounts let mut mounts = vec![ // Mount working directory as tmpfs - FileMounter::new( - None, - "".into(), - Some("tmpfs".into()), - MsFlags::empty(), - Some("size=500k".to_owned()), - ) - .unwrap(), + FileMounter::tmpfs("", ByteSize::kb(500)), // Mount binary - FileMounter::new( - Some(base.bin.clone()), - "bin".into(), - None, - MsFlags::MS_RDONLY | MsFlags::MS_BIND, - None, - ) - .unwrap(), + FileMounter::bind_ro(&base.bin, "/bin").unwrap(), // Mount /dev/null (for stdio::null) - FileMounter::new( - Some("/dev/null".into()), - "dev/null".into(), - None, - MsFlags::MS_RDONLY | MsFlags::MS_BIND, - None, - ) - .unwrap(), + FileMounter::bind_ro("/dev/null", "/dev/null").unwrap(), // Mount proc - FileMounter::new( - Some("/proc".into()), - "proc".into(), - Some("proc".into()), - MsFlags::empty(), - None, - ) - .unwrap(), + FileMounter::proc(), // Mount CGroup v2 - FileMounter::new( - None, - "sys/fs/cgroup".into(), - Some("cgroup2".into()), - MsFlags::empty(), - None, - ) - .unwrap(), + FileMounter::cgroup(), // IPC Socket for Syscalls - FileMounter::new(Some(ipc_path), ipc_path_inner, None, MsFlags::MS_BIND, None) - .unwrap(), + FileMounter::bind_rw(ipc_path, ipc_path_inner).unwrap(), ]; for (source, target) in base.mounts.iter().cloned() { @@ -225,10 +190,9 @@ impl Run { let relative_target = target .strip_prefix("/") .context("target paths for mounting must be absolute") - .typ(SystemError::Panic)? - .to_path_buf(); + .typ(SystemError::Panic)?; - let file_mounter = FileMounter::from_paths(source, relative_target) + let file_mounter = FileMounter::bind_rw(source, relative_target) .context("failed to initialize file mounter") .typ(SystemError::Panic)?; mounts.push(file_mounter); diff --git a/hypervisor/src/hypervisor/partition/mounting.rs b/hypervisor/src/hypervisor/partition/mounting.rs index 1442eba..057f5b0 100644 --- a/hypervisor/src/hypervisor/partition/mounting.rs +++ b/hypervisor/src/hypervisor/partition/mounting.rs @@ -2,6 +2,7 @@ use std::fs; use std::path::{Path, PathBuf}; use anyhow::{bail, Context}; +use bytesize::ByteSize; use nix::mount::{mount, MsFlags}; /// Information about the files that are to be mounted @@ -19,10 +20,16 @@ pub struct FileMounter { impl FileMounter { // Mount (and consume) a device pub fn mount(self, base_dir: &Path) -> anyhow::Result<()> { - let target: &PathBuf = &base_dir.join(self.target); + let relative_target = self.target.strip_prefix("/").unwrap_or(&self.target); + println!("{relative_target:?}"); + let target: &PathBuf = &base_dir.join(relative_target); let fstype = self.fstype.map(PathBuf::from); let data = self.data.map(PathBuf::from); + if let Some(src) = &self.source { + Self::exists(src)?; + } + if self.is_dir { trace!("Creating directory {}", target.display()); fs::create_dir_all(target).context("failed to create target directory")?; @@ -47,38 +54,72 @@ impl FileMounter { .context("failed to make `nix::mount()` call") } - /// Creates a new `FileMounter` from a source path and a relative target - /// path. - pub fn from_paths(source: PathBuf, target: PathBuf) -> anyhow::Result { - Self::new(Some(source), target, None, MsFlags::MS_BIND, None) + fn exists>(path: T) -> anyhow::Result<()> { + if !path.as_ref().exists() { + bail!( + "source file/dir does not exist: {}", + path.as_ref().display() + ) + } + Ok(()) } - pub fn new( - source: Option, - target: PathBuf, - fstype: Option, - flags: MsFlags, - data: Option, - ) -> anyhow::Result { - if let Some(source) = source.as_ref() { - if !source.exists() { - bail!("source file/dir does not exist: {}", source.display()) - } + pub fn tmpfs>(target: T, size: ByteSize) -> Self { + FileMounter { + source: None, + target: target.as_ref().to_path_buf(), + fstype: Some("tmpfs".into()), + flags: MsFlags::empty(), + data: Some(format!("size={}", size.0)), + is_dir: true, } + } - if target.is_absolute() { - bail!("target path cannot be absolute because it will later be appended to a base directory"); + pub fn cgroup() -> Self { + FileMounter { + source: None, + target: "/sys/fs/cgroup".into(), + fstype: Some("cgroup2".into()), + flags: MsFlags::empty(), + data: None, + is_dir: true, } + } - let is_dir = source.as_ref().map_or(true, |source| source.is_dir()); + pub fn proc() -> Self { + FileMounter { + source: Some("/proc".into()), + target: "/proc".into(), + fstype: Some("proc".into()), + flags: MsFlags::empty(), + data: None, + is_dir: true, + } + } - Ok(Self { - source, - target, - fstype, - flags, - data, - is_dir, + pub fn bind_ro, U: AsRef>(source: T, target: U) -> anyhow::Result { + Self::exists(&source)?; + + Ok(FileMounter { + source: Some(source.as_ref().to_path_buf()), + target: target.as_ref().to_path_buf(), + fstype: None, + flags: MsFlags::MS_RDONLY | MsFlags::MS_BIND, + data: None, + is_dir: source.as_ref().is_dir(), + }) + } + + pub fn bind_rw, U: AsRef>(source: T, target: U) -> anyhow::Result { + Self::exists(&source)?; + + Ok(FileMounter { + source: Some(source.as_ref().to_path_buf()), + target: target.as_ref().to_path_buf(), + fstype: None, + flags: MsFlags::MS_BIND, + data: None, + is_dir: source.as_ref().is_dir(), }) } } From fc19383d24cd287a9827ca88d5666169be0f45b8 Mon Sep 17 00:00:00 2001 From: Florian Hartung Date: Thu, 22 Feb 2024 11:16:22 +0100 Subject: [PATCH 8/9] refactor(partition): docs & using channel instead of Barrier --- Cargo.lock | 257 +++++++++++++++++++++++++++++++++++++-- partition/Cargo.toml | 1 + partition/src/process.rs | 41 +++++-- 3 files changed, 273 insertions(+), 26 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 083556b..4fe4c43 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -25,6 +25,7 @@ dependencies = [ "memmap2", "nix", "once_cell", + "oneshot", "polling", "procfs", "tinyvec", @@ -281,7 +282,7 @@ dependencies = [ "android-tzdata", "iana-time-zone", "num-traits 0.2.18", - "windows-targets", + "windows-targets 0.52.0", ] [[package]] @@ -523,6 +524,19 @@ dependencies = [ "serde", ] +[[package]] +name = "generator" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5cc16584ff22b460a382b7feec54b23d2908d858152e5739a120b949293bd74e" +dependencies = [ + "cc", + "libc", + "log", + "rustversion", + "windows", +] + [[package]] name = "getrandom" version = "0.2.12" @@ -697,6 +711,29 @@ version = "0.4.20" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f" +[[package]] +name = "loom" +version = "0.5.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ff50ecb28bb86013e935fb6683ab1f6d3a20016f123c76fd4c27470076ac30f5" +dependencies = [ + "cfg-if 1.0.0", + "generator", + "pin-utils", + "scoped-tls", + "tracing", + "tracing-subscriber", +] + +[[package]] +name = "matchers" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8263075bb86c5a1b1427b5ae862e8889656f126e9f77c484496e8b47cf5c5558" +dependencies = [ + "regex-automata 0.1.10", +] + [[package]] name = "memchr" version = "2.7.1" @@ -751,6 +788,16 @@ dependencies = [ "memoffset", ] +[[package]] +name = "nu-ansi-term" +version = "0.46.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77a8165726e8236064dbb45459242600304b42a5ea24ee2948e18e023bf7ba84" +dependencies = [ + "overload", + "winapi", +] + [[package]] name = "num" version = "0.4.1" @@ -841,12 +888,33 @@ version = "1.19.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +[[package]] +name = "oneshot" +version = "0.1.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6f6640c6bda7731b1fdbab747981a0f896dd1fedaf9f4a53fa237a04a84431f4" +dependencies = [ + "loom", +] + +[[package]] +name = "overload" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b15813163c1d831bf4a13c3610c05c0d03b39feb07f7e09fa234dac9b15aaf39" + [[package]] name = "pin-project-lite" version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8afb450f006bf6385ca15ef45d71d2288452bc3683ce2e2cacc0d18e4be60b58" +[[package]] +name = "pin-utils" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" + [[package]] name = "ping_client" version = "0.1.0" @@ -1002,8 +1070,17 @@ checksum = "b62dbe01f0b06f9d8dc7d49e05a0785f153b00b2c227856282f671e0318c9b15" dependencies = [ "aho-corasick", "memchr", - "regex-automata", - "regex-syntax", + "regex-automata 0.4.5", + "regex-syntax 0.8.2", +] + +[[package]] +name = "regex-automata" +version = "0.1.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6c230d73fb8d8c1b9c0b3135c5142a8acee3a0558fb8db5cf1cb65f8d7862132" +dependencies = [ + "regex-syntax 0.6.29", ] [[package]] @@ -1014,9 +1091,15 @@ checksum = "5bb987efffd3c6d0d8f5f89510bb458559eab11e4f869acb20bf845e016259cd" dependencies = [ "aho-corasick", "memchr", - "regex-syntax", + "regex-syntax 0.8.2", ] +[[package]] +name = "regex-syntax" +version = "0.6.29" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f162c6dd7b008981e4d40210aca20b4bd0f9b60ca9271061b07f78537722f2e1" + [[package]] name = "regex-syntax" version = "0.8.2" @@ -1057,6 +1140,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scoped-tls" +version = "1.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" + [[package]] name = "serde" version = "1.0.196" @@ -1090,6 +1179,21 @@ dependencies = [ "unsafe-libyaml", ] +[[package]] +name = "sharded-slab" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +dependencies = [ + "lazy_static", +] + +[[package]] +name = "smallvec" +version = "1.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6ecd384b10a64542d77071bd64bd7b231f4ed5940fba55e98c3de13824cf3d7" + [[package]] name = "static_assertions" version = "1.1.0" @@ -1193,6 +1297,16 @@ dependencies = [ "syn 2.0.48", ] +[[package]] +name = "thread_local" +version = "1.1.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8b9ef9bad013ada3808854ceac7b46812a6465ba368859a37e2100283d2d719c" +dependencies = [ + "cfg-if 1.0.0", + "once_cell", +] + [[package]] name = "tinyvec" version = "1.6.0" @@ -1206,14 +1320,59 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c3523ab5a71916ccf420eebdf5521fcef02141234bbc0b8a49f2fdc4544364ef" dependencies = [ "pin-project-lite", + "tracing-attributes", "tracing-core", ] +[[package]] +name = "tracing-attributes" +version = "0.1.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "34704c8d6ebcbc939824180af020566b01a7c01f80641264eba0999f6c2b6be7" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.48", +] + [[package]] name = "tracing-core" version = "0.1.32" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c06d3da6113f116aaee68e4d601191614c9053067f9ab7f6edbcb161237daa54" +dependencies = [ + "once_cell", + "valuable", +] + +[[package]] +name = "tracing-log" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" +dependencies = [ + "log", + "once_cell", + "tracing-core", +] + +[[package]] +name = "tracing-subscriber" +version = "0.3.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ad0f048c97dbd9faa9b7df56362b8ebcaa52adb06b498c050d2f4e32f90a7a8b" +dependencies = [ + "matchers", + "nu-ansi-term", + "once_cell", + "regex", + "sharded-slab", + "smallvec", + "thread_local", + "tracing", + "tracing-core", + "tracing-log", +] [[package]] name = "uapi" @@ -1265,6 +1424,12 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "711b9620af191e0cdc7468a8d14e709c3dcdb115b36f838e601583af800a370a" +[[package]] +name = "valuable" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "830b7e5d4d90034032940e4ace0d9a9a057e7a45cd94e6c007832e39edb82f6d" + [[package]] name = "walkdir" version = "2.4.0" @@ -1379,13 +1544,22 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f" +[[package]] +name = "windows" +version = "0.48.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e686886bc078bc1b0b600cac0147aadb815089b6e4da64016cbd754b6342700f" +dependencies = [ + "windows-targets 0.48.5", +] + [[package]] name = "windows-core" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "33ab640c8d7e35bf8ba19b884ba838ceb4fba93a4e8c65a9059d08afcfc683d9" dependencies = [ - "windows-targets", + "windows-targets 0.52.0", ] [[package]] @@ -1394,7 +1568,22 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d" dependencies = [ - "windows-targets", + "windows-targets 0.52.0", +] + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm 0.48.5", + "windows_aarch64_msvc 0.48.5", + "windows_i686_gnu 0.48.5", + "windows_i686_msvc 0.48.5", + "windows_x86_64_gnu 0.48.5", + "windows_x86_64_gnullvm 0.48.5", + "windows_x86_64_msvc 0.48.5", ] [[package]] @@ -1403,51 +1592,93 @@ version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a18201040b24831fbb9e4eb208f8892e1f50a37feb53cc7ff887feb8f50e7cd" dependencies = [ - "windows_aarch64_gnullvm", - "windows_aarch64_msvc", - "windows_i686_gnu", - "windows_i686_msvc", - "windows_x86_64_gnu", - "windows_x86_64_gnullvm", - "windows_x86_64_msvc", + "windows_aarch64_gnullvm 0.52.0", + "windows_aarch64_msvc 0.52.0", + "windows_i686_gnu 0.52.0", + "windows_i686_msvc 0.52.0", + "windows_x86_64_gnu 0.52.0", + "windows_x86_64_gnullvm 0.52.0", + "windows_x86_64_msvc 0.52.0", ] +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + [[package]] name = "windows_aarch64_gnullvm" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cb7764e35d4db8a7921e09562a0304bf2f93e0a51bfccee0bd0bb0b666b015ea" +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + [[package]] name = "windows_aarch64_msvc" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "bbaa0368d4f1d2aaefc55b6fcfee13f41544ddf36801e793edbbfd7d7df075ef" +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + [[package]] name = "windows_i686_gnu" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a28637cb1fa3560a16915793afb20081aba2c92ee8af57b4d5f28e4b3e7df313" +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + [[package]] name = "windows_i686_msvc" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ffe5e8e31046ce6230cc7215707b816e339ff4d4d67c65dffa206fd0f7aa7b9a" +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + [[package]] name = "windows_x86_64_gnu" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3d6fa32db2bc4a2f5abeacf2b69f7992cd09dca97498da74a151a3132c26befd" +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + [[package]] name = "windows_x86_64_gnullvm" version = "0.52.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1a657e1e9d3f514745a572a6846d3c7aa7dbe1658c056ed9c3344c4109a6949e" +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" + [[package]] name = "windows_x86_64_msvc" version = "0.52.0" diff --git a/partition/Cargo.toml b/partition/Cargo.toml index 83f3de6..454059a 100644 --- a/partition/Cargo.toml +++ b/partition/Cargo.toml @@ -23,3 +23,4 @@ lazy_static = "1.4" anyhow = "1.0" log.workspace = true tinyvec = "1.6" +oneshot = "0.1.6" diff --git a/partition/src/process.rs b/partition/src/process.rs index bcb827d..d4ca83e 100644 --- a/partition/src/process.rs +++ b/partition/src/process.rs @@ -1,17 +1,18 @@ -use std::sync::atomic::{AtomicBool, AtomicI32}; -use std::sync::{Arc, Barrier, Mutex}; +use std::sync::atomic::{AtomicBool, AtomicI32, Ordering}; +use std::sync::{Arc, Mutex}; use std::thread::Builder; use a653rs::bindings::*; use a653rs::prelude::{ProcessAttribute, SystemTime}; +use anyhow::anyhow; +use nix::unistd::{gettid, Pid}; + use a653rs_linux_core::cgroup; use a653rs_linux_core::cgroup::CGroup; use a653rs_linux_core::error::{ ErrorLevel, LeveledResult, ResultExt, SystemError, TypedResult, TypedResultExt, }; use a653rs_linux_core::partition::PartitionConstants; -use anyhow::anyhow; -use nix::unistd::{gettid, Pid}; use crate::{APERIODIC_PROCESS, PERIODIC_PROCESS}; @@ -99,24 +100,38 @@ impl Process { .typ(SystemError::CGroup) .lev(ErrorLevel::Partition)?; - let sync = Arc::new((Barrier::new(2), Mutex::new(()))); - let s = sync.clone(); let entry = self.attr.entry_point; - let proc = self.pid.clone(); + + // A mutex required for freezing the thread right before execution of `entry`. + let sync = Arc::new(Mutex::new(())); + let sync2 = Arc::clone(&sync); + // A channel for the thread to send its thread id. + let (pid_tx, pid_rx) = oneshot::channel(); + + // Before spawning the thread, lock the `sync` mutex so the thread cannot + // execute `entry` yet. + let lock = sync.lock().unwrap(); let _thread = Builder::new() .name(name.to_string()) .stack_size(self.stack_size) .spawn(move || { - proc.store(gettid().as_raw(), std::sync::atomic::Ordering::Relaxed); - s.0.wait(); - drop(s.1.lock().unwrap()); + pid_tx.send(gettid().as_raw()).unwrap(); + + // We want this thread to be frozen right here before the entry function gets + // executed. To do that, we wait for the `sync` mutex to unlock. During the wait + // period this thread is then moved to the frozen cgroup. + drop(sync2.lock().unwrap()); (entry)(); }) .lev_typ(SystemError::Panic, ErrorLevel::Partition)?; - let lock = sync.1.lock().unwrap(); - sync.0.wait(); - let pid = Pid::from_raw(self.pid.load(std::sync::atomic::Ordering::Relaxed)); + // Receive thread id and store it + let pid_raw = pid_rx.recv().unwrap(); + self.pid.store(pid_raw, Ordering::Relaxed); + let pid = Pid::from_raw(pid_raw); + // Freeze thread by moving it to the cgroup cg.mv_thread(pid).unwrap(); + // Now unlock the `sync` mutex, so the thread can continue execution when the + // cgroup is unfrozen. drop(lock); Ok(()) From c318df6f1b66e44348cbc98608e350bb7fec4bfe Mon Sep 17 00:00:00 2001 From: Florian Hartung Date: Thu, 22 Feb 2024 11:26:02 +0100 Subject: [PATCH 9/9] fix(partition): use safer atomic orderings Replace `Ordering::Relaxed` with `Ordering::SeqCst` as a relaxed ordering may have unexpected behaviour. Also, the performance hit should not be significant. --- partition/src/process.rs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/partition/src/process.rs b/partition/src/process.rs index d4ca83e..a4c5ab1 100644 --- a/partition/src/process.rs +++ b/partition/src/process.rs @@ -68,14 +68,14 @@ impl Process { pub(crate) fn get_self() -> Option> { if let Some(p) = APERIODIC_PROCESS.get() { - let id = p.pid.load(std::sync::atomic::Ordering::Relaxed); + let id = p.pid.load(Ordering::SeqCst); if id == nix::unistd::gettid().as_raw() { return Some(p.clone()); } } if let Some(p) = PERIODIC_PROCESS.get() { - let id = p.pid.load(std::sync::atomic::Ordering::Relaxed); + let id = p.pid.load(Ordering::SeqCst); if id == nix::unistd::gettid().as_raw() { return Some(p.clone()); } @@ -126,7 +126,7 @@ impl Process { .lev_typ(SystemError::Panic, ErrorLevel::Partition)?; // Receive thread id and store it let pid_raw = pid_rx.recv().unwrap(); - self.pid.store(pid_raw, Ordering::Relaxed); + self.pid.store(pid_raw, Ordering::SeqCst); let pid = Pid::from_raw(pid_raw); // Freeze thread by moving it to the cgroup cg.mv_thread(pid).unwrap();