From 08bfcfd2caa2c5ed633cd0f7786188644be2d520 Mon Sep 17 00:00:00 2001 From: "Panagiotis \"Ivory\" Vasilopoulos" Date: Tue, 25 Jun 2024 15:35:35 +0200 Subject: [PATCH] ASLR: Lay down the foundations - Move x86_64-related paging code to src/arch/x86_64/paging - Tests: x86_64-related paging tests should use a guest_address that is not 0 - Tests: Move them in separate files, use appropriate 'use' directives - Tests: Use init_guest_mem wrapper in test_virt_to_phys - Fix kernel memory loading - Add guest_address getter in UhyveVm - Change names of constants to clarify their purpose - Use u64 for arch::RAM_START instead of GuestVirtAddr - Remove pagetable_l0 from virt_to_phys function - Various `cargo fmt`-related changes - aarch64: Blindly replace constant names and similar RAM_START change We currently rely on guest_address in MmapMemory to calculate the offsets during the initialization of the VM and when converting virtual addresses to physical addresses. The latter case is intended to be temporary - we should read the value from the CR3 register at a later point, but this is too complex for the time being because of the different architectures. Although this current revision does work with relocatable binaries, it is not making use of this functionality _just_ yet. Fixes #719. Co-authored-by: Jonathan --- src/arch/aarch64/mod.rs | 45 ++++---- src/arch/x86_64/mod.rs | 189 +++++++--------------------------- src/arch/x86_64/paging/mod.rs | 153 +++++++++++++++++++++++++++ src/consts.rs | 19 ++-- src/hypercall.rs | 23 ++--- src/linux/gdb/breakpoints.rs | 11 +- src/linux/gdb/mod.rs | 6 +- src/linux/x86_64/kvm_cpu.rs | 24 +++-- src/macos/aarch64/vcpu.rs | 19 +++- src/macos/x86_64/vcpu.rs | 29 ++++-- src/vm.rs | 29 +++++- tests/gdb.rs | 2 +- 12 files changed, 319 insertions(+), 230 deletions(-) create mode 100644 src/arch/x86_64/paging/mod.rs diff --git a/src/arch/aarch64/mod.rs b/src/arch/aarch64/mod.rs index 636c1265..8576a8b2 100644 --- a/src/arch/aarch64/mod.rs +++ b/src/arch/aarch64/mod.rs @@ -4,7 +4,7 @@ use bitflags::bitflags; use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; use crate::{ - consts::{BOOT_INFO_ADDR, BOOT_PGT}, + consts::{BOOT_INFO_ADDR_OFFSET, PGT_OFFSET}, mem::MmapMemory, paging::PagetableError, }; @@ -115,7 +115,6 @@ fn is_valid_address(virtual_address: GuestVirtAddr) -> bool { pub fn virt_to_phys( addr: GuestVirtAddr, mem: &MmapMemory, - pagetable_l0: GuestPhysAddr, ) -> Result { if !is_valid_address(addr) { return Err(PagetableError::InvalidAddress); @@ -133,7 +132,7 @@ pub fn virt_to_phys( // - We are page_aligned, and thus also PageTableEntry aligned. let mut pagetable: &[PageTableEntry] = unsafe { std::mem::transmute::<&[u8], &[PageTableEntry]>( - mem.slice_at(pagetable_l0, PAGE_SIZE).unwrap(), + mem.slice_at(mem.guest_address, PAGE_SIZE).unwrap(), ) }; // TODO: Depending on the virtual address length and granule (defined in TCR register by TG and TxSZ), we could reduce the number of pagetable walks. Hermit doesn't do this at the moment. @@ -155,43 +154,43 @@ pub fn virt_to_phys( Ok(pte.address()) } -pub fn init_guest_mem(mem: &mut [u8]) { +pub fn init_guest_mem(mem: &mut [u8], _guest_address: u64) { let mem_addr = std::ptr::addr_of_mut!(mem[0]); - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 512 * size_of::()); + assert!(mem.len() >= PGT_OFFSET as usize + 512 * size_of::()); let pgt_slice = unsafe { - std::slice::from_raw_parts_mut(mem_addr.offset(BOOT_PGT.as_u64() as isize) as *mut u64, 512) + std::slice::from_raw_parts_mut(mem_addr.offset(PGT_OFFSET as isize) as *mut u64, 512) }; pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT.as_u64() + 0x1000 + PT_PT; - pgt_slice[511] = BOOT_PGT.as_u64() + PT_PT + PT_SELF; + pgt_slice[0] = PGT_OFFSET + 0x1000 + PT_PT; + pgt_slice[511] = PGT_OFFSET + PT_PT + PT_SELF; - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x1000 + 512 * size_of::()); + assert!(mem.len() >= PGT_OFFSET as usize + 0x1000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x1000) as *mut u64, + mem_addr.offset(PGT_OFFSET as isize + 0x1000) as *mut u64, 512, ) }; pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT.as_u64() + 0x2000 + PT_PT; + pgt_slice[0] = PGT_OFFSET + 0x2000 + PT_PT; - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x2000 + 512 * size_of::()); + assert!(mem.len() >= PGT_OFFSET as usize + 0x2000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x2000) as *mut u64, + mem_addr.offset(PGT_OFFSET as isize + 0x2000) as *mut u64, 512, ) }; pgt_slice.fill(0); - pgt_slice[0] = BOOT_PGT.as_u64() + 0x3000 + PT_PT; - pgt_slice[1] = BOOT_PGT.as_u64() + 0x4000 + PT_PT; - pgt_slice[2] = BOOT_PGT.as_u64() + 0x5000 + PT_PT; + pgt_slice[0] = PGT_OFFSET + 0x3000 + PT_PT; + pgt_slice[1] = PGT_OFFSET + 0x4000 + PT_PT; + pgt_slice[2] = PGT_OFFSET + 0x5000 + PT_PT; - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x3000 + 512 * size_of::()); + assert!(mem.len() >= PGT_OFFSET as usize + 0x3000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x3000) as *mut u64, + mem_addr.offset(PGT_OFFSET as isize + 0x3000) as *mut u64, 512, ) }; @@ -199,12 +198,12 @@ pub fn init_guest_mem(mem: &mut [u8]) { // map Uhyve ports into the virtual address space pgt_slice[0] = PT_MEM_CD; // map BootInfo into the virtual address space - pgt_slice[BOOT_INFO_ADDR.as_u64() as usize / PAGE_SIZE] = BOOT_INFO_ADDR.as_u64() + PT_MEM; + pgt_slice[BOOT_INFO_ADDR_OFFSET as usize / PAGE_SIZE] = BOOT_INFO_ADDR_OFFSET + PT_MEM; - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x4000 + 512 * size_of::()); + assert!(mem.len() >= PGT_OFFSET as usize + 0x4000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x4000) as *mut u64, + mem_addr.offset(PGT_OFFSET as isize + 0x4000) as *mut u64, 512, ) }; @@ -212,10 +211,10 @@ pub fn init_guest_mem(mem: &mut [u8]) { *i = 0x200000u64 + (idx * PAGE_SIZE) as u64 + PT_MEM; } - assert!(mem.len() >= BOOT_PGT.as_u64() as usize + 0x5000 + 512 * size_of::()); + assert!(mem.len() >= PGT_OFFSET as usize + 0x5000 + 512 * size_of::()); let pgt_slice = unsafe { std::slice::from_raw_parts_mut( - mem_addr.offset(BOOT_PGT.as_u64() as isize + 0x5000) as *mut u64, + mem_addr.offset(PGT_OFFSET as isize + 0x5000) as *mut u64, 512, ) }; diff --git a/src/arch/x86_64/mod.rs b/src/arch/x86_64/mod.rs index ff2c82f5..4d0a13d5 100644 --- a/src/arch/x86_64/mod.rs +++ b/src/arch/x86_64/mod.rs @@ -1,3 +1,4 @@ +pub mod paging; pub mod registers; use core::arch::x86_64::_rdtsc as rdtsc; @@ -10,17 +11,15 @@ use log::{debug, warn}; use raw_cpuid::{CpuId, CpuIdReaderNative}; use thiserror::Error; use uhyve_interface::{GuestPhysAddr, GuestVirtAddr}; -use x86_64::{ - structures::paging::{ - page_table::{FrameError, PageTableEntry}, - Page, PageTable, PageTableFlags, PageTableIndex, Size2MiB, - }, - PhysAddr, +use x86_64::structures::paging::{ + page_table::{FrameError, PageTableEntry}, + PageTable, PageTableIndex, }; -use crate::{consts::*, mem::MmapMemory, paging::PagetableError}; +use crate::{consts::PML4_OFFSET, mem::MmapMemory, paging::PagetableError}; pub const RAM_START: GuestPhysAddr = GuestPhysAddr::new(0x00); + const MHZ_TO_HZ: u64 = 1000000; const KHZ_TO_HZ: u64 = 1000; @@ -111,92 +110,10 @@ pub fn get_cpu_frequency_from_os() -> std::result::Result u64 { - ((base & 0xff000000u64) << (56 - 24)) - | ((flags & 0x0000f0ffu64) << 40) - | ((limit & 0x000f0000u64) << (48 - 16)) - | ((base & 0x00ffffffu64) << 16) - | (limit & 0x0000ffffu64) -} - -pub const MIN_PHYSMEM_SIZE: usize = BOOT_PDE.as_u64() as usize + 0x1000; - -/// Creates the pagetables and the GDT in the guest memory space. -/// -/// The memory slice must be larger than [`MIN_PHYSMEM_SIZE`]. -/// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the -/// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the -/// pagetables and thus the integrity of the guest's memory is not ensured -pub fn initialize_pagetables(mem: &mut [u8]) { - assert!(mem.len() >= MIN_PHYSMEM_SIZE); - let mem_addr = std::ptr::addr_of_mut!(mem[0]); - - let (gdt_entry, pml4, pdpte, pde); - // Safety: - // We only operate in `mem`, which is plain bytes and we have ownership of - // these and it is asserted to be large enough. - unsafe { - gdt_entry = mem_addr - .add(BOOT_GDT.as_u64() as usize) - .cast::<[u64; 3]>() - .as_mut() - .unwrap(); - - pml4 = mem_addr - .add(BOOT_PML4.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - pdpte = mem_addr - .add(BOOT_PDPTE.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - pde = mem_addr - .add(BOOT_PDE.as_u64() as usize) - .cast::() - .as_mut() - .unwrap(); - - /* For simplicity we currently use 2MB pages and only a single - PML4/PDPTE/PDE. */ - - // per default is the memory zeroed, which we allocate by the system - // call mmap, so the following is not necessary: - /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); - libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ - } - // initialize GDT - gdt_entry[BOOT_GDT_NULL] = 0; - gdt_entry[BOOT_GDT_CODE] = create_gdt_entry(0xA09B, 0, 0xFFFFF); - gdt_entry[BOOT_GDT_DATA] = create_gdt_entry(0xC093, 0, 0xFFFFF); - - pml4[0].set_addr( - BOOT_PDPTE, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pml4[511].set_addr( - BOOT_PML4, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE, - ); - pdpte[0].set_addr(BOOT_PDE, PageTableFlags::PRESENT | PageTableFlags::WRITABLE); - - for i in 0..512 { - let addr = PhysAddr::new(i as u64 * Page::::SIZE); - pde[i].set_addr( - addr, - PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, - ); - } -} - /// Converts a virtual address in the guest to a physical address in the guest pub fn virt_to_phys( addr: GuestVirtAddr, mem: &MmapMemory, - pagetable_l0: GuestPhysAddr, ) -> Result { /// Number of Offset bits of a virtual address for a 4 KiB page, which are shifted away to get its Page Frame Number (PFN). pub const PAGE_BITS: u64 = 12; @@ -204,8 +121,12 @@ pub fn virt_to_phys( /// Number of bits of the index in each table (PML4, PDPT, PDT, PGT). pub const PAGE_MAP_BITS: usize = 9; - let mut page_table = - unsafe { (mem.host_address(pagetable_l0).unwrap() as *mut PageTable).as_mut() }.unwrap(); + let mut page_table = unsafe { + (mem.host_address(GuestPhysAddr::new(mem.guest_address.as_u64() + PML4_OFFSET)) + .unwrap() as *mut PageTable) + .as_mut() + } + .unwrap(); let mut page_bits = 39; let mut entry = PageTableEntry::new(); @@ -232,14 +153,18 @@ pub fn virt_to_phys( Ok(entry.addr() + (addr.as_u64() & !((!0u64) << PAGE_BITS))) } -pub fn init_guest_mem(mem: &mut [u8]) { +pub fn init_guest_mem(mem: &mut [u8], guest_address: u64) { // TODO: we should maybe return an error on failure (e.g., the memory is too small) - initialize_pagetables(mem); + paging::initialize_pagetables(mem, guest_address); } #[cfg(test)] mod tests { + use x86_64::structures::paging::PageTableFlags; + use super::*; + use crate::consts::{MIN_PHYSMEM_SIZE, PDE_OFFSET, PDPTE_OFFSET, PML4_OFFSET}; + // test is derived from // https://github.com/gz/rust-cpuid/blob/master/examples/tsc_frequency.rs #[test] @@ -320,81 +245,43 @@ mod tests { } #[test] - fn test_pagetable_initialization() { - let mut mem: Vec = vec![0; MIN_PHYSMEM_SIZE]; - initialize_pagetables((&mut mem[0..MIN_PHYSMEM_SIZE]).try_into().unwrap()); - - // Test pagetable setup - let addr_pdpte = u64::from_le_bytes( - mem[(BOOT_PML4.as_u64() as usize)..(BOOT_PML4.as_u64() as usize + 8)] - .try_into() - .unwrap(), - ); - assert_eq!( - addr_pdpte, - BOOT_PDPTE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() - ); - let addr_pde = u64::from_le_bytes( - mem[(BOOT_PDPTE.as_u64() as usize)..(BOOT_PDPTE.as_u64() as usize + 8)] - .try_into() - .unwrap(), + fn test_virt_to_phys() { + let guest_address = 0x11111000; + let mem = MmapMemory::new( + 0, + MIN_PHYSMEM_SIZE * 2, + GuestPhysAddr::new(guest_address), + true, + true, ); - assert_eq!( - addr_pde, - BOOT_PDE.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + init_guest_mem( + unsafe { mem.as_slice_mut() }.try_into().unwrap(), + guest_address, ); - for i in (0..4096).step_by(8) { - let addr = BOOT_PDE.as_u64() as usize + i; - let entry = u64::from_le_bytes(mem[addr..(addr + 8)].try_into().unwrap()); - assert!( - PageTableFlags::from_bits_truncate(entry) - .difference( - PageTableFlags::PRESENT - | PageTableFlags::WRITABLE - | PageTableFlags::HUGE_PAGE - ) - .is_empty(), - "Pagetable bits at {addr:#x} are incorrect" - ) - } - - // Test GDT - let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; - for (i, res) in gdt_results.iter().enumerate() { - let gdt_addr = BOOT_GDT.as_u64() as usize + i * 8; - let gdt_entry = u64::from_le_bytes(mem[gdt_addr..gdt_addr + 8].try_into().unwrap()); - assert_eq!(*res, gdt_entry); - } - } - - #[test] - fn test_virt_to_phys() { - let mem = MmapMemory::new(0, MIN_PHYSMEM_SIZE * 2, GuestPhysAddr::new(0), true, true); - initialize_pagetables(unsafe { mem.as_slice_mut() }.try_into().unwrap()); - // Get the address of the first entry in PML4 (the address of the PML4 itself) let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PML4); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, GuestPhysAddr::new(guest_address + PML4_OFFSET)); // The last entry on the PML4 is the address of the PML4 with flags let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFFFF000 | (4096 - 8)); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); assert_eq!( mem.read::(p_addr).unwrap(), - BOOT_PML4.as_u64() | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + (guest_address + PML4_OFFSET) + | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() ); // the first entry on the 3rd level entry in the pagetables is the address of the boot pdpte let virt_addr = GuestVirtAddr::new(0xFFFFFFFFFFE00000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PDPTE); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, GuestPhysAddr::new(guest_address + PDPTE_OFFSET)); // the first entry on the 2rd level entry in the pagetables is the address of the boot pde let virt_addr = GuestVirtAddr::new(0xFFFFFFFFC0000000); - let p_addr = virt_to_phys(virt_addr, &mem, BOOT_PML4).unwrap(); - assert_eq!(p_addr, BOOT_PDE); + let p_addr = virt_to_phys(virt_addr, &mem).unwrap(); + assert_eq!(p_addr, GuestPhysAddr::new(guest_address + PDE_OFFSET)); // That address points to a huge page assert!( PageTableFlags::from_bits_truncate(mem.read::(p_addr).unwrap()).contains( diff --git a/src/arch/x86_64/paging/mod.rs b/src/arch/x86_64/paging/mod.rs new file mode 100644 index 00000000..1502b1ad --- /dev/null +++ b/src/arch/x86_64/paging/mod.rs @@ -0,0 +1,153 @@ +use uhyve_interface::GuestPhysAddr; +use x86_64::{ + structures::paging::{Page, PageTable, PageTableFlags, Size2MiB}, + PhysAddr, +}; + +use crate::consts::*; + +/// Creates the pagetables and the GDT in the guest memory space. +/// +/// The memory slice must be larger than [`MIN_PHYSMEM_SIZE`]. +/// Also, the memory `mem` needs to be zeroed for [`PAGE_SIZE`] bytes at the +/// offsets [`BOOT_PML4`] and [`BOOT_PDPTE`], otherwise the integrity of the +/// pagetables and thus the integrity of the guest's memory is not ensured +pub fn initialize_pagetables(mem: &mut [u8], guest_address: u64) { + assert!(mem.len() >= MIN_PHYSMEM_SIZE); + let mem_addr = std::ptr::addr_of_mut!(mem[0]); + + let (gdt_entry, pml4, pdpte, pde); + // Safety: + // We only operate in `mem`, which is plain bytes and we have ownership of + // these and it is asserted to be large enough. + unsafe { + gdt_entry = mem_addr + .add(GDT_OFFSET as usize) + .cast::<[u64; 3]>() + .as_mut() + .unwrap(); + + pml4 = mem_addr + .add(PML4_OFFSET as usize) + .cast::() + .as_mut() + .unwrap(); + pdpte = mem_addr + .add(PDPTE_OFFSET as usize) + .cast::() + .as_mut() + .unwrap(); + pde = mem_addr + .add(PDE_OFFSET as usize) + .cast::() + .as_mut() + .unwrap(); + + /* For simplicity we currently use 2MB pages and only a single + PML4/PDPTE/PDE. */ + + // per default is the memory zeroed, which we allocate by the system + // call mmap, so the following is not necessary: + /*libc::memset(pml4 as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pdpte as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE); + libc::memset(pde as *mut _ as *mut libc::c_void, 0x00, PAGE_SIZE);*/ + } + // initialize GDT + gdt_entry[BOOT_GDT_NULL] = 0; + gdt_entry[BOOT_GDT_CODE] = create_gdt_entry(0xA09B, 0, 0xFFFFF); + gdt_entry[BOOT_GDT_DATA] = create_gdt_entry(0xC093, 0, 0xFFFFF); + + pml4[0].set_addr( + GuestPhysAddr::new(guest_address + PDPTE_OFFSET), + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pml4[511].set_addr( + GuestPhysAddr::new(guest_address + PML4_OFFSET), + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + pdpte[0].set_addr( + GuestPhysAddr::new(guest_address + PDE_OFFSET), + PageTableFlags::PRESENT | PageTableFlags::WRITABLE, + ); + + for i in 0..512 { + let addr = PhysAddr::new(i as u64 * Page::::SIZE); + pde[i].set_addr( + addr, + PageTableFlags::PRESENT | PageTableFlags::WRITABLE | PageTableFlags::HUGE_PAGE, + ); + } +} + +// Constructor for a conventional segment GDT (or LDT) entry +pub fn create_gdt_entry(flags: u64, base: u64, limit: u64) -> u64 { + ((base & 0xff000000u64) << (56 - 24)) + | ((flags & 0x0000f0ffu64) << 40) + | ((limit & 0x000f0000u64) << (48 - 16)) + | ((base & 0x00ffffffu64) << 16) + | (limit & 0x0000ffffu64) +} + +#[cfg(test)] +mod tests { + use super::*; + use crate::consts::{GDT_OFFSET, PDE_OFFSET, PDPTE_OFFSET, PML4_OFFSET}; + + #[test] + fn test_pagetable_initialization() { + let guest_address = 0x15000; + + let mut mem: Vec = vec![0; MIN_PHYSMEM_SIZE]; + // This will return a pagetable setup that we will check. + initialize_pagetables( + (&mut mem[0..MIN_PHYSMEM_SIZE]).try_into().unwrap(), + guest_address, + ); + + // Check PDPTE address + let addr_pdpte = u64::from_le_bytes( + mem[(PML4_OFFSET as usize)..(PML4_OFFSET as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pdpte - guest_address, + PDPTE_OFFSET | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + // Check PDE + let addr_pde = u64::from_le_bytes( + mem[(PDPTE_OFFSET as usize)..(PDPTE_OFFSET as usize + 8)] + .try_into() + .unwrap(), + ); + assert_eq!( + addr_pde - guest_address, + PDE_OFFSET | (PageTableFlags::PRESENT | PageTableFlags::WRITABLE).bits() + ); + + // Check PDE's pagetable bits + for i in (0..4096).step_by(8) { + let pde_addr = (PDE_OFFSET) as usize + i; + let entry = u64::from_le_bytes(mem[pde_addr..(pde_addr + 8)].try_into().unwrap()); + assert!( + PageTableFlags::from_bits_truncate(entry) + .difference( + PageTableFlags::PRESENT + | PageTableFlags::WRITABLE + | PageTableFlags::HUGE_PAGE + ) + .is_empty(), + "Pagetable bits at {pde_addr:#x} are incorrect" + ) + } + + // Test GDT + let gdt_results = [0x0, 0xAF9B000000FFFF, 0xCF93000000FFFF]; + for (i, res) in gdt_results.iter().enumerate() { + let gdt_addr = GDT_OFFSET as usize + i * 8; + let gdt_entry = u64::from_le_bytes(mem[gdt_addr..gdt_addr + 8].try_into().unwrap()); + assert_eq!(*res, gdt_entry); + } + } +} diff --git a/src/consts.rs b/src/consts.rs index b33727f3..5a139f66 100644 --- a/src/consts.rs +++ b/src/consts.rs @@ -1,19 +1,22 @@ -use uhyve_interface::GuestPhysAddr; - pub const PAGE_SIZE: usize = 0x1000; pub const GDT_KERNEL_CODE: u16 = 1; pub const GDT_KERNEL_DATA: u16 = 2; pub const APIC_DEFAULT_BASE: u64 = 0xfee00000; -pub const BOOT_GDT: GuestPhysAddr = GuestPhysAddr::new(0x1000); + pub const BOOT_GDT_NULL: usize = 0; pub const BOOT_GDT_CODE: usize = 1; pub const BOOT_GDT_DATA: usize = 2; pub const BOOT_GDT_MAX: usize = 3; -pub const BOOT_PML4: GuestPhysAddr = GuestPhysAddr::new(0x10000); -pub const BOOT_PGT: GuestPhysAddr = BOOT_PML4; -pub const BOOT_PDPTE: GuestPhysAddr = GuestPhysAddr::new(0x11000); -pub const BOOT_PDE: GuestPhysAddr = GuestPhysAddr::new(0x12000); -pub const BOOT_INFO_ADDR: GuestPhysAddr = GuestPhysAddr::new(0x9000); + +// guest_address + OFFSET +pub const GDT_OFFSET: u64 = 0x1000; +pub const PML4_OFFSET: u64 = 0x10000; +pub const PGT_OFFSET: u64 = 0x10000; +pub const PDPTE_OFFSET: u64 = 0x11000; +pub const PDE_OFFSET: u64 = 0x12000; +pub const BOOT_INFO_ADDR_OFFSET: u64 = 0x9000; +pub const MIN_PHYSMEM_SIZE: usize = 0x13000; + pub const EFER_SCE: u64 = 1; /* System Call Extensions */ pub const EFER_LME: u64 = 1 << 8; /* Long mode enable */ pub const EFER_LMA: u64 = 1 << 10; /* Long mode active (read-only) */ diff --git a/src/hypercall.rs b/src/hypercall.rs index 0205419c..47921ec6 100644 --- a/src/hypercall.rs +++ b/src/hypercall.rs @@ -7,7 +7,6 @@ use std::{ use uhyve_interface::{parameters::*, GuestPhysAddr, Hypercall, HypercallAddress, MAX_ARGC_ENVC}; use crate::{ - consts::BOOT_PML4, mem::{MemoryError, MmapMemory}, virt_to_phys, }; @@ -102,7 +101,7 @@ pub fn read(mem: &MmapMemory, sysread: &mut ReadPrams) { unsafe { let bytes_read = libc::read( sysread.fd, - mem.host_address(virt_to_phys(sysread.buf, mem, BOOT_PML4).unwrap()) + mem.host_address(virt_to_phys(sysread.buf, mem).unwrap()) .unwrap() as *mut libc::c_void, sysread.len, ); @@ -121,17 +120,15 @@ pub fn write(mem: &MmapMemory, syswrite: &WriteParams) -> io::Result<()> { unsafe { let step = libc::write( syswrite.fd, - mem.host_address( - virt_to_phys(syswrite.buf + bytes_written as u64, mem, BOOT_PML4).unwrap(), - ) - .map_err(|e| match e { - MemoryError::BoundsViolation => { - unreachable!("Bounds violation after host_address function") - } - MemoryError::WrongMemoryError => { - Error::new(ErrorKind::AddrNotAvailable, e.to_string()) - } - })? as *const libc::c_void, + mem.host_address(virt_to_phys(syswrite.buf + bytes_written as u64, mem).unwrap()) + .map_err(|e| match e { + MemoryError::BoundsViolation => { + unreachable!("Bounds violation after host_address function") + } + MemoryError::WrongMemoryError => { + Error::new(ErrorKind::AddrNotAvailable, e.to_string()) + } + })? as *const libc::c_void, syswrite.len - bytes_written, ); if step >= 0 { diff --git a/src/linux/gdb/breakpoints.rs b/src/linux/gdb/breakpoints.rs index 454f9cdf..1d84f999 100644 --- a/src/linux/gdb/breakpoints.rs +++ b/src/linux/gdb/breakpoints.rs @@ -4,10 +4,7 @@ use gdbstub::target::{self, ext::breakpoints::WatchKind, TargetResult}; use uhyve_interface::GuestVirtAddr; use super::GdbUhyve; -use crate::{ - arch::x86_64::{registers, virt_to_phys}, - consts::BOOT_PML4, -}; +use crate::arch::x86_64::{registers, virt_to_phys}; #[derive(Clone, Copy, Debug, PartialEq, Eq, Hash)] pub struct SwBreakpoint { addr: u64, @@ -55,8 +52,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { // Safety: mem is not altered during the lifetime of `instructions` let instructions = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) - .map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, kind, ) } @@ -76,8 +72,7 @@ impl target::ext::breakpoints::SwBreakpoint for GdbUhyve { // Safety: mem is not altered during the lifetime of `instructions` let instructions = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem, BOOT_PML4) - .map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(addr), &self.vm.mem).map_err(|_err| ())?, kind, ) } diff --git a/src/linux/gdb/mod.rs b/src/linux/gdb/mod.rs index 5dd525f0..29f08e3a 100644 --- a/src/linux/gdb/mod.rs +++ b/src/linux/gdb/mod.rs @@ -30,7 +30,6 @@ use self::breakpoints::SwBreakpoints; use super::HypervisorError; use crate::{ arch::x86_64::{registers::debug::HwBreakpoints, virt_to_phys}, - consts::BOOT_PML4, linux::{x86_64::kvm_cpu::KvmCpu, KickSignal}, vcpu::{VcpuStopReason, VirtualCPU}, vm::UhyveVm, @@ -131,7 +130,7 @@ impl SingleThreadBase for GdbUhyve { // Safety: mem is copied to data before mem can be modified. let src = unsafe { self.vm.mem.slice_at( - virt_to_phys(guest_addr, &self.vm.mem, BOOT_PML4).map_err(|_err| ())?, + virt_to_phys(guest_addr, &self.vm.mem).map_err(|_err| ())?, data.len(), ) } @@ -144,8 +143,7 @@ impl SingleThreadBase for GdbUhyve { // Safety: self.vm.mem is not altered during the lifetime of mem. let mem = unsafe { self.vm.mem.slice_at_mut( - virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem, BOOT_PML4) - .map_err(|_err| ())?, + virt_to_phys(GuestVirtAddr::new(start_addr), &self.vm.mem).map_err(|_err| ())?, data.len(), ) } diff --git a/src/linux/x86_64/kvm_cpu.rs b/src/linux/x86_64/kvm_cpu.rs index 8ef03734..a1285a64 100644 --- a/src/linux/x86_64/kvm_cpu.rs +++ b/src/linux/x86_64/kvm_cpu.rs @@ -229,6 +229,7 @@ impl KvmCpu { &self, entry_point: u64, stack_address: u64, + guest_address: u64, cpu_id: u32, ) -> Result<(), kvm_ioctls::Error> { //debug!("Setup long mode"); @@ -241,7 +242,7 @@ impl KvmCpu { | Cr0Flags::PAGING; sregs.cr0 = cr0.bits(); - sregs.cr3 = BOOT_PML4.as_u64(); + sregs.cr3 = guest_address + PML4_OFFSET; let cr4 = Cr4Flags::PHYSICAL_ADDRESS_EXTENSION; sregs.cr4 = cr4.bits(); @@ -272,7 +273,7 @@ impl KvmCpu { sregs.ss = seg; //sregs.fs = seg; //sregs.gs = seg; - sregs.gdt.base = BOOT_GDT.as_u64(); + sregs.gdt.base = guest_address + GDT_OFFSET; sregs.gdt.limit = ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u16; self.vcpu.set_sregs(&sregs)?; @@ -280,7 +281,7 @@ impl KvmCpu { let mut regs = self.vcpu.get_regs()?; regs.rflags = 2; regs.rip = entry_point; - regs.rdi = BOOT_INFO_ADDR.as_u64(); + regs.rdi = guest_address + BOOT_INFO_ADDR_OFFSET; regs.rsi = cpu_id.into(); regs.rsp = stack_address; @@ -305,8 +306,14 @@ impl KvmCpu { &mut self.vcpu } - fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { - self.setup_long_mode(entry_point, stack_address, cpu_id)?; + fn init( + &mut self, + entry_point: u64, + stack_address: u64, + guest_address: u64, + cpu_id: u32, + ) -> HypervisorResult<()> { + self.setup_long_mode(entry_point, stack_address, guest_address, cpu_id)?; self.setup_cpuid()?; // be sure that the multiprocessor is runable @@ -335,7 +342,12 @@ impl VirtualCPU for KvmCpu { parent_vm: parent_vm.clone(), pci_addr: None, }; - kvcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; + kvcpu.init( + parent_vm.get_entry_point(), + parent_vm.stack_address(), + parent_vm.guest_address(), + id, + )?; Ok(kvcpu) } diff --git a/src/macos/aarch64/vcpu.rs b/src/macos/aarch64/vcpu.rs index 35d86869..622e4ad0 100644 --- a/src/macos/aarch64/vcpu.rs +++ b/src/macos/aarch64/vcpu.rs @@ -26,7 +26,13 @@ pub struct XhyveCpu { } impl XhyveCpu { - fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { + fn init( + &mut self, + entry_point: u64, + stack_address: u64, + guest_address: u64, + cpu_id: u32, + ) -> HypervisorResult<()> { debug!("Initialize VirtualCPU"); /* pstate = all interrupts masked */ @@ -36,7 +42,7 @@ impl XhyveCpu { self.vcpu .write_system_register(SystemRegister::SP_EL1, stack_address)?; self.vcpu - .write_register(Register::X0, BOOT_INFO_ADDR.as_u64())?; + .write_register(Register::X0, guest_address + BOOT_INFO_ADDR_OFFSET)?; self.vcpu.write_register(Register::X1, cpu_id.into())?; /* @@ -88,7 +94,7 @@ impl XhyveCpu { self.vcpu .write_system_register(SystemRegister::TTBR1_EL1, 0)?; self.vcpu - .write_system_register(SystemRegister::TTBR0_EL1, BOOT_PGT.as_u64())?; + .write_system_register(SystemRegister::TTBR0_EL1, guest_address + PGT_OFFSET)?; /* * Prepare system control register (SCTRL) @@ -142,7 +148,12 @@ impl VirtualCPU for XhyveCpu { parent_vm: parent_vm.clone(), vcpu: xhypervisor::VirtualCpu::new().unwrap(), }; - vcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; + vcpu.init( + parent_vm.get_entry_point(), + parent_vm.stack_address(), + parent_vm.guest_address(), + id, + )?; Ok(vcpu) } diff --git a/src/macos/x86_64/vcpu.rs b/src/macos/x86_64/vcpu.rs index 1cfaf230..38507ed3 100644 --- a/src/macos/x86_64/vcpu.rs +++ b/src/macos/x86_64/vcpu.rs @@ -164,7 +164,7 @@ pub struct XhyveCpu { } impl XhyveCpu { - fn setup_system_gdt(&mut self) -> Result<(), xhypervisor::Error> { + fn setup_system_gdt(&mut self, guest_address: u64) -> Result<(), xhypervisor::Error> { debug!("Setup GDT"); self.vcpu.write_vmcs(VMCS_GUEST_CS_LIMIT, 0)?; @@ -187,7 +187,7 @@ impl XhyveCpu { self.vcpu.write_vmcs(VMCS_GUEST_GS_AR, 0x4093)?; self.vcpu - .write_vmcs(VMCS_GUEST_GDTR_BASE, BOOT_GDT.as_u64())?; + .write_vmcs(VMCS_GUEST_GDTR_BASE, guest_address + GDT_OFFSET)?; self.vcpu.write_vmcs( VMCS_GUEST_GDTR_LIMIT, ((std::mem::size_of::() * BOOT_GDT_MAX) - 1) as u64, @@ -233,7 +233,7 @@ impl XhyveCpu { Ok(()) } - fn setup_system_64bit(&mut self) -> Result<(), xhypervisor::Error> { + fn setup_system_64bit(&mut self, guest_address: u64) -> Result<(), xhypervisor::Error> { debug!("Setup 64bit mode"); let cr0 = Cr0Flags::PROTECTED_MODE_ENABLE @@ -256,7 +256,7 @@ impl XhyveCpu { self.vcpu.write_register(&Register::CR0, cr0.bits())?; self.vcpu.write_register(&Register::CR4, cr4.bits())?; self.vcpu - .write_register(&Register::CR3, BOOT_PML4.as_u64())?; + .write_register(&Register::CR3, guest_address + PML4_OFFSET)?; self.vcpu.write_register(&Register::DR7, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_SYSENTER_ESP, 0)?; self.vcpu.write_vmcs(VMCS_GUEST_SYSENTER_EIP, 0)?; @@ -594,7 +594,13 @@ impl XhyveCpu { &self.vcpu } - fn init(&mut self, entry_point: u64, stack_address: u64, cpu_id: u32) -> HypervisorResult<()> { + fn init( + &mut self, + entry_point: u64, + stack_address: u64, + guest_address: u64, + cpu_id: u32, + ) -> HypervisorResult<()> { self.setup_capabilities()?; self.setup_msr()?; @@ -615,7 +621,7 @@ impl XhyveCpu { self.vcpu.write_register(&Register::RDX, 0)?; self.vcpu.write_register(&Register::RSI, cpu_id.into())?; self.vcpu - .write_register(&Register::RDI, BOOT_INFO_ADDR.as_u64())?; + .write_register(&Register::RDI, guest_address + BOOT_INFO_ADDR_OFFSET)?; self.vcpu.write_register(&Register::R8, 0)?; self.vcpu.write_register(&Register::R9, 0)?; self.vcpu.write_register(&Register::R10, 0)?; @@ -624,8 +630,8 @@ impl XhyveCpu { self.vcpu.write_register(&Register::R13, 0)?; self.vcpu.write_register(&Register::R14, 0)?; self.vcpu.write_register(&Register::R15, 0)?; - self.setup_system_gdt()?; - self.setup_system_64bit()?; + self.setup_system_gdt(guest_address)?; + self.setup_system_64bit(guest_address)?; Ok(()) } @@ -639,7 +645,12 @@ impl VirtualCPU for XhyveCpu { vcpu: xhypervisor::VirtualCpu::new().unwrap(), apic_base: APIC_DEFAULT_BASE, }; - vcpu.init(parent_vm.get_entry_point(), parent_vm.stack_address(), id)?; + vcpu.init( + parent_vm.get_entry_point(), + parent_vm.stack_address(), + parent_vm.guest_address(), + id, + )?; Ok(vcpu) } diff --git a/src/vm.rs b/src/vm.rs index e058cbb7..1a9bfd48 100644 --- a/src/vm.rs +++ b/src/vm.rs @@ -75,6 +75,7 @@ pub struct UhyveVm { offset: u64, entry_point: u64, stack_address: u64, + guest_address: u64, pub mem: Arc, num_cpus: u32, path: PathBuf, @@ -90,8 +91,18 @@ impl UhyveVm { pub fn new(kernel_path: PathBuf, params: Params) -> HypervisorResult> { let memory_size = params.memory_size.get(); + // TODO: Move functionality to load_kernel. We don't know whether the binaries are relocatable yet. + // TODO: Use random address instead of arch::RAM_START here. #[cfg(target_os = "linux")] + #[cfg(target_arch = "x86_64")] let mem = MmapMemory::new(0, memory_size, arch::RAM_START, params.thp, params.ksm); + + // TODO: guest_address is only taken into account on Linux platforms. + // TODO: Before changing this, fix init_guest_mem in `src/arch/aarch64/mod.rs` + #[cfg(target_os = "linux")] + #[cfg(not(target_arch = "x86_64"))] + let mem = MmapMemory::new(0, memory_size, arch::RAM_START, params.thp, params.ksm); + #[cfg(not(target_os = "linux"))] let mem = MmapMemory::new(0, memory_size, arch::RAM_START, false, false); @@ -119,6 +130,7 @@ impl UhyveVm { offset: 0, entry_point: 0, stack_address: 0, + guest_address: mem.guest_address.as_u64(), mem: mem.into(), num_cpus: cpu_count, path: kernel_path, @@ -152,6 +164,10 @@ impl UhyveVm { self.stack_address } + pub fn guest_address(&self) -> u64 { + self.guest_address + } + /// Returns the number of cores for the vm. pub fn num_cpus(&self) -> u32 { self.num_cpus @@ -172,6 +188,7 @@ impl UhyveVm { unsafe { self.mem.as_slice_mut() } // slice only lives during this fn call .try_into() .expect("Guest memory is not large enough for pagetables"), + self.guest_address, ); } @@ -179,8 +196,13 @@ impl UhyveVm { let elf = fs::read(self.kernel_path())?; let object = KernelObject::parse(&elf).map_err(LoadKernelError::ParseKernelError)?; + // The offset of the kernel in the Memory. Must be larger than BOOT_INFO_OFFSET + KERNEL_STACK_SIZE + let kernel_offset = 0x40_000_usize; // TODO: should be a random start address, if we have a relocatable executable - let kernel_start_address = object.start_addr().unwrap_or(0x400000) as usize; + let kernel_start_address = object + .start_addr() + .unwrap_or(self.mem.guest_address.as_u64() + kernel_offset as u64) + as usize; let kernel_end_address = kernel_start_address + object.mem_size(); self.offset = kernel_start_address as u64; @@ -194,7 +216,7 @@ impl UhyveVm { } = object.load_kernel( // Safety: Slice only lives during this fn call, so no aliasing happens &mut unsafe { self.mem.as_slice_uninit_mut() } - [kernel_start_address..kernel_end_address], + [self.offset as usize..object.mem_size() + self.offset as usize], kernel_start_address as u64, ); self.entry_point = entry_point; @@ -219,7 +241,7 @@ impl UhyveVm { }; unsafe { let raw_boot_info_ptr = - self.mem.host_address.add(BOOT_INFO_ADDR.as_u64() as usize) as *mut RawBootInfo; + self.mem.host_address.add(BOOT_INFO_ADDR_OFFSET as usize) as *mut RawBootInfo; *raw_boot_info_ptr = RawBootInfo::from(boot_info); self.boot_info = raw_boot_info_ptr; } @@ -239,6 +261,7 @@ impl fmt::Debug for UhyveVm { f.debug_struct("UhyveVm") .field("entry_point", &self.entry_point) .field("stack_address", &self.stack_address) + .field("guest_address", &self.guest_address) .field("mem", &self.mem) .field("num_cpus", &self.num_cpus) .field("path", &self.path) diff --git a/tests/gdb.rs b/tests/gdb.rs index b3593f6d..66079e3f 100644 --- a/tests/gdb.rs +++ b/tests/gdb.rs @@ -43,7 +43,7 @@ fn gdb() -> io::Result<()> { write!( &mut command_file, "target remote :{port} -symbol-file {bin_path} -o 0x400000 +symbol-file {bin_path} -o 0x40000 break gdb::main continue