diff --git a/elfloader-tool/include/arch-arm/64/mode/structures.h b/elfloader-tool/include/arch-arm/64/mode/structures.h index aaa8bced..dbc7a49f 100644 --- a/elfloader-tool/include/arch-arm/64/mode/structures.h +++ b/elfloader-tool/include/arch-arm/64/mode/structures.h @@ -6,6 +6,12 @@ #pragma once +/* ARM VMSAv8-64 (with a fully populated last level) has the same number of PTEs + * in all levels (we don't use concatenated pagetables in ELFloader) and each + * table entry is always eight bytes large. + */ +#define BITS_PER_LEVEL (PAGE_BITS - 3) + #define ARM_1GB_BLOCK_BITS 30 #define ARM_2MB_BLOCK_BITS 21 @@ -26,9 +32,5 @@ #define GET_PMD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS)) & MASK(PMD_BITS)) extern uint64_t _boot_pgd_up[BIT(PGD_BITS)]; -extern uint64_t _boot_pud_up[BIT(PUD_BITS)]; -extern uint64_t _boot_pmd_up[BIT(PMD_BITS)]; - extern uint64_t _boot_pgd_down[BIT(PGD_BITS)]; -extern uint64_t _boot_pud_down[BIT(PUD_BITS)]; diff --git a/elfloader-tool/include/arch-arm/elfloader.h b/elfloader-tool/include/arch-arm/elfloader.h index 93293a75..ceab4796 100644 --- a/elfloader-tool/include/arch-arm/elfloader.h +++ b/elfloader-tool/include/arch-arm/elfloader.h @@ -22,7 +22,22 @@ typedef void (*init_arm_kernel_t)(word_t ui_p_reg_start, /* Enable the mmu. */ extern void arm_enable_mmu(void); + +/* These functions are very similar however, there are some small differences + * between the ARMv8 and legacy implementation. + * + * New ARMv8 implementation: + * - Does the MMU disabling. This is to keep the time spent with MMU off low. + * - Is only meant if seL4 runs in EL2. + */ +#if defined(CONFIG_ARCH_AARCH64) +/* Switches MMU-related stuff: pagetables, MAIR & TCR etc. Works also if the MMU + * was off initially. EL2 translation regime only. + */ +extern void arm_switch_to_hyp_tables(void); +#else extern void arm_enable_hyp_mmu(void); +#endif /* Setup boot VSpace. */ diff --git a/elfloader-tool/include/drivers/uart.h b/elfloader-tool/include/drivers/uart.h index 1fa9f970..74ce4b16 100644 --- a/elfloader-tool/include/drivers/uart.h +++ b/elfloader-tool/include/drivers/uart.h @@ -6,6 +6,7 @@ #pragma once +#include #include #define dev_get_uart(dev) ((struct elfloader_uart_ops *)(dev->drv->ops)) @@ -16,3 +17,7 @@ struct elfloader_uart_ops { volatile void *uart_get_mmio(void); void uart_set_out(struct elfloader_device *out); +#if defined(CONFIG_ARCH_AARCH64) +/* Implemented in mmu.c */ +void mmu_set_uart_base(volatile void *base); +#endif diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index e927f3a6..c70b5466 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -11,88 +11,415 @@ #include #include #include +#include #include -#include +#include /* dsb() */ +#include -/* -* Create a "boot" page table, which contains a 1:1 mapping below -* the kernel's first vaddr, and a virtual-to-physical mapping above the -* kernel's first vaddr. -*/ -void init_boot_vspace(struct image_info *kernel_info) +/* Note: "typeof()" is a GCC extension that is supported by Clang, too. */ +#define READ_ONCE(x) (*(const volatile typeof(x) *)&(x)) +#define WRITE_ONCE(var, value) \ + *((volatile typeof(var) *)(&(var))) = (value); + + +//#define DEBUG_PAGETABLES + +#ifndef DEBUG_PAGETABLES +#define dbg_printf(...) /* empty */ +static void dgb_print_2M_mapping_details(const char *map_name UNUSED, + paddr_t pa UNUSED, size_t size UNUSED) {} +#else +#define dbg_printf(...) printf(__VA_ARGS__) + +static int dgb_print_2M_mapping_indices(paddr_t pa) { - word_t i; + return printf("%u.%u.%u.X", + GET_PGD_INDEX(pa), + GET_PUD_INDEX(pa), + GET_PMD_INDEX(pa)); +} - vaddr_t first_vaddr = kernel_info->virt_region_start; - vaddr_t last_vaddr = kernel_info->virt_region_end; - paddr_t first_paddr = kernel_info->phys_region_start; +static void dgb_print_2M_mapping_details(const char *map_name, paddr_t pa, size_t size) +{ + int cnt = 0; + paddr_t pa_start = pa; + size_t orig_sz = size; - _boot_pgd_down[0] = ((uintptr_t)_boot_pud_down) | BIT(1) | BIT(0); /* its a page table */ + pa = ROUND_DOWN(pa, ARM_2MB_BLOCK_BITS); + size += (pa_start - pa); + size = ROUND_UP(size, ARM_2MB_BLOCK_BITS); - for (i = 0; i < BIT(PUD_BITS); i++) { - _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) - | BIT(10) /* access flag */ - | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ - | BIT(0); /* 1G block */ + cnt += dgb_print_2M_mapping_indices(pa); + if (orig_sz) { + while (cnt < 11) { + printf(" "); + cnt++; + } + cnt += printf("--"); + while (cnt < 16) { + printf(" "); + cnt++; + } + cnt += dgb_print_2M_mapping_indices(pa + size - 1); + } + while (cnt < 27) { + printf(" "); + cnt++; } + if (orig_sz) { + printf("PA 0x%lx - 0x%lx (size: %lu MiB): %s\n", pa, pa + size - 1, size / 1024u / 1024, map_name); + } else { + /* No range given, just a single 2 MiB page */ + printf("PA 0x%lx: %s\n", pa, map_name); + } +} +#endif /* DEBUG_PAGETABLES */ - _boot_pgd_up[GET_PGD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pud_up) | BIT(1) | BIT(0); /* its a page table */ +/* Page allocator. Contains a fixed number of pages. All page-aligned. No returning possible. */ - _boot_pud_up[GET_PUD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pmd_up) | BIT(1) | BIT(0); /* its a page table */ +#define NUM_PAGES 7 +static char pages[BIT(PAGE_BITS) * NUM_PAGES] ALIGN(BIT(PGD_SIZE_BITS)); +static unsigned page_cnt; - /* We only map in 1 GiB, so check that the kernel doesn't cross 1GiB boundary. */ - if ((first_vaddr & ~MASK(ARM_1GB_BLOCK_BITS)) != (last_vaddr & ~MASK(ARM_1GB_BLOCK_BITS))) { - printf("We only map 1GiB, but kernel vaddr range covers multiple GiB.\n"); - abort(); +static void *get_page(void) +{ + void *ret = NULL; + + if (page_cnt == 0) { + dbg_printf("get_page(): pages @ 0x%p\n", pages); } - for (i = GET_PMD_INDEX(first_vaddr); i < BIT(PMD_BITS); i++) { - _boot_pmd_up[i] = first_paddr - | BIT(10) /* access flag */ + + if (page_cnt < NUM_PAGES) { + ret = &pages[BIT(PAGE_BITS) * page_cnt]; + dbg_printf("get_page(): ret: 0x%p (%u->%u)\n", ret, page_cnt, page_cnt + 1); + page_cnt ++; + } + + return ret; +} + +/* Translate a PA to a VA such that when accessing the VA we end up at that PA. + * Usually done in OS kernels via a physical memory map which has a constant + * virt-to-phys offset. Here this is the same, since either the MMU is off or + * we're running on the identity mapping. + */ +static inline uint64_t pa_to_va(uint64_t pa) +{ + return pa; +} + +static inline uint64_t va_to_pa(uint64_t va) +{ + return va; +} + +typedef uint64_t pte_t; + +/* This can be used to clear unwanted bits from a PA that is supposed to be put + * into a PTE/PDE; or it can be used to extract the PA from a PTE/PDE. + */ +static inline uint64_t mask_pa(uint64_t pa) +{ + /* Mask out the upper 16 bits and lower 12 bits. Only 48-bit OA for now. */ + return (pa & 0x0000FFFFFFFFF000); +} + +static inline uintptr_t pde_to_paddr(uint64_t pde_val) +{ + /* ARM DDI ARM DDI 0487I.a, page D8-5124 */ + return mask_pa(pde_val); +} + +static inline uint64_t make_pde(uintptr_t pa) +{ + /* For now we set all (upper) attributes to zero */ + return (mask_pa(pa) | BIT(1) | BIT(0)); +} + +/* Accept a pointer, otherwise same as make_pde() */ +static inline uint64_t make_pde_from_ptr(pte_t *pagetable_target) +{ + return make_pde(va_to_pa((uintptr_t)pagetable_target)); +} + +/* ARM DDI 0487I.a, section D8.5.2 */ +#define INNER_SHAREABLE 3 +static inline uint64_t make_pte(paddr_t pa, uint8_t mem_attr_index) +{ + /* Note: As per R_PYFVQ from the ARM spec, we can always safely set the + * shareability to inner, even for device-type memory. + */ + return mask_pa(pa) + | BIT(10) /* access flag */ #if CONFIG_MAX_NUM_NODES > 1 - | (3 << 8) /* make sure the shareability is the same as the kernel's */ + | (INNER_SHAREABLE << 8) #endif - | (MT_NORMAL << 2) /* MT_NORMAL memory */ - | BIT(0); /* 2M block */ - first_paddr += BIT(ARM_2MB_BLOCK_BITS); + | (mem_attr_index << 2) + | BIT(0); /* valid page/block mapping */ +} + +static inline _Bool pte_is_valid(pte_t pte) +{ + return (pte & 1); +} + +static inline _Bool pte_is_block(pte_t pte) +{ + return ((pte & 3) == 1); +} + +/* Take care about atomicity */ +static inline void pte_set(pte_t *ptep, pte_t val) +{ + WRITE_ONCE(*ptep, val); +} + +static inline pte_t pte_get(pte_t *ptep) +{ + return READ_ONCE(*ptep); +} + +static_assert(PGD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +static_assert(PUD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +static_assert(PMD_BITS == BITS_PER_LEVEL, "Mismatch in expected pagetable size"); +/* ARM VMSAv8-64: Each table entry is always eight bytes large */ +static_assert(PAGE_BITS == (BITS_PER_LEVEL + 3), "Mismatch in expected page size"); + +/* A valid PA can be maximum 48 or 52 bit large, so upper bits are always zero */ +#define INVALID_PA ((uint64_t)-1) +static paddr_t walk_pagetables(vaddr_t va, uint64_t *l0_table, + unsigned *level, pte_t **fault_pde) +{ + paddr_t ret = INVALID_PA; + /* All levels have the same size and therefore number of index bits + * (9 for 4kiB Translation Granule) on ARMv8. + */ + uint64_t index_mask_bits = PGD_BITS + PUD_BITS + PMD_BITS + PAGE_BITS; + uint64_t *tbl = l0_table; + + unsigned idx, lvl; + paddr_t pa; + pte_t pte; + + /* Walk up to four levels */ + for (lvl = 0; lvl <= 3; lvl++) { + idx = (va >> index_mask_bits) & MASK(BITS_PER_LEVEL); + pte = pte_get(&tbl[idx]); + + if (!pte_is_valid(pte)) { + goto err_out; + } else if (pte_is_block(pte)) { + /* L0 giant pages (512 GiB) are not allowed by the architecture for + * 4kiB Granule size and 48 bit OA. We don't support 52 bit OA. + */ + if (lvl == 0) { + goto err_out; + } + break; + } + if (lvl == 3) { + /* ARM DDI 0487I.a, page D8-5126 (I_WYRBP), D8-5131 (I_VKPKF): + * If the PTE in the last level is valid, it is interpreted as a page + * table, irrespectively of bit 1. This allows for the "loopback + * trick" - described in every (good) OS lecture at university :-) + * Other architectures like RISC-V have screwed this up with their + * pagetable format. + */ + break; + } + /* We have a table descriptor. Descent to the next lower level */ + pa = pde_to_paddr(pte); + vaddr_t va_next = pa_to_va(pa); + tbl = (uint64_t *)va_next; + + index_mask_bits -= BITS_PER_LEVEL; } - /* Architecturally required barrier to make all writes to pagetable memories - * visible to the pagetable walker. See ARM DDI 0487I.a, section D8.2.6. + ret = (pa | (va & (MASK(index_mask_bits)))); + +err_out: + *level = lvl; + *fault_pde = &tbl[idx]; + return ret; +} + +/* Returns NULL if there is already something mappped at the requested VA. Fills + * in page tables if needed until the desired level is reached. + */ +static pte_t *fill_pt_tree(vaddr_t va, uint64_t *l0_table, unsigned target_lvl) +{ + paddr_t pa; + unsigned lvl; + pte_t *fault_pde; + + pa = walk_pagetables(va, l0_table, &lvl, &fault_pde); + + while ((lvl < target_lvl) && (pa == INVALID_PA)) { + /* fault_pde points to the entry to write. Add a new pagetable */ + pte_set(fault_pde, make_pde_from_ptr(get_page())); + + pa = walk_pagetables(va, l0_table, &lvl, &fault_pde); + } + + if ((lvl == target_lvl) && fault_pde && !pte_is_valid(pte_get(fault_pde))) { + return fault_pde; + } + return NULL; +} + +extern char _text[]; +extern char _end[]; + +extern size_t dtb_size; + +static inline void clean_inval_cl(void *addr) +{ + asm volatile("dc civac, %0\n\t" :: "r"(addr)); +} + +static void clean_inval_pagetables(void) +{ + dsb(); + /* Whole image for now; EFI case: Maybe our image is loaded on the boot + * CPU with caches enabled (and still being dirty), but the secondary CPUs + * start with caches disabled. Further, assume CL size is >= 64 Bytes. + * Maybe this is too cautious. Can we relax this? */ + for (vaddr_t va = (vaddr_t)_text; va < (vaddr_t)(_end); va += 64) { + clean_inval_cl((void *)va); + } dsb(); } -void init_hyp_boot_vspace(struct image_info *kernel_info) +static void map_uart(paddr_t base) +{ + pte_t *pte; + + base = ROUND_DOWN(base, ARM_2MB_BLOCK_BITS); + pte = fill_pt_tree(base, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(base, MT_DEVICE_nGnRnE)); + } else { + printf("Unable to map the UART at PA 0x%lx\n", base); + abort(); + } + dbg_printf("Done mapping UART at PA: 0x%lx\n", base); +} + + +static paddr_t uart_base_mmio; +void mmu_set_uart_base(volatile void *base) +{ + uart_base_mmio = (paddr_t)base; +} + +/* + * Create a "boot" page table, which contains a 1:1 mapping for the ELFloader and + * the DTB. Moreover create a mapping for the kernel image at the desired VA with the + * physical memory that was used when extracting the kernel from the elfloader + * image previously. + */ +static void init_boot_vspace_impl(const struct image_info *kernel_info, _Bool has_one_va_range) { - word_t i; - word_t pmd_index; + /* We may be running with MMU & caches off. Before we write new values + * make sure to clean & invalidate all previous data in those locations. + */ + clean_inval_pagetables(); + + /* Map UART, using strongly ordered memory; one 2 MiB page; 1:1 VA/PA */ + paddr_t uart_base = ROUND_DOWN(uart_base_mmio, ARM_2MB_BLOCK_BITS); + map_uart(uart_base); + + /* Map Elfloader image, using NORMAL memory; 1:1 VA/PA */ + paddr_t start_paddr = ROUND_DOWN(((paddr_t)_text), ARM_2MB_BLOCK_BITS); + paddr_t end_paddr = ROUND_UP(((paddr_t)_end), ARM_2MB_BLOCK_BITS); + + for (paddr_t pa = start_paddr; pa < end_paddr; pa += BIT(ARM_2MB_BLOCK_BITS)) { + pte_t *pte = fill_pt_tree(pa, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map ELFloader at PA: 0x%lx\n", pa); + abort(); + } + dbg_printf("Map Elfloader PA: 0x%lx\n", pa); + } + dbg_printf("Done mapping Elfloader\n"); + + paddr_t dtb_map_start, dtb_map_end; + if (dtb && (dtb_size > 0)) { + /* Device Tree Blob (DTB): + * An UEFI-supplied DTB lies outside of the image memory => Add mapping. + * For other DTBs the ELFloader of course saves the *target* address of + * the copied DTB in "dtb". + * So we also need to add a mapping here in those cases. + */ + paddr_t dtb_end = (paddr_t)dtb + dtb_size; + + dtb_map_start = ROUND_DOWN((paddr_t)dtb, ARM_2MB_BLOCK_BITS); + dtb_map_end = ROUND_UP(dtb_end, ARM_2MB_BLOCK_BITS); + for (paddr_t pa = dtb_map_start; pa < dtb_map_end; pa += BIT(ARM_2MB_BLOCK_BITS)) { + pte_t *pte = fill_pt_tree(pa, _boot_pgd_down, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map DTB at PA: 0x%lx\n", pa); + } + dbg_printf("Map DTB PA: 0x%lx\n", pa); + } + dbg_printf("Done mapping DTB\n"); + } + + /* Map the kernel */ vaddr_t first_vaddr = kernel_info->virt_region_start; + vaddr_t last_vaddr = kernel_info->virt_region_end; paddr_t first_paddr = kernel_info->phys_region_start; - _boot_pgd_down[0] = ((uintptr_t)_boot_pud_down) | BIT(1) | BIT(0); - for (i = 0; i < BIT(PUD_BITS); i++) { - _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) - | BIT(10) /* access flag */ - | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ - | BIT(0); /* 1G block */ + uint64_t *l0_table = has_one_va_range ? _boot_pgd_down : _boot_pgd_up; + paddr_t pa = first_paddr; + for (vaddr_t va = first_vaddr; va < last_vaddr; + va += BIT(ARM_2MB_BLOCK_BITS), + pa += BIT(ARM_2MB_BLOCK_BITS)) { + + pte_t *pte = fill_pt_tree(va, l0_table, 2); + if (pte) { + pte_set(pte, make_pte(pa, MT_NORMAL)); + } else { + printf("Unable to map kernel at VA/PA: 0x%lx / 0x%lx\n", va, pa); + } + dbg_printf("Map kernel VA -> PA: 0x%lx -> 0x%lx\n", va, pa); } + dbg_printf("Done mapping kernel\n"); - _boot_pgd_down[GET_PGD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pud_up) | BIT(1) | BIT(0); /* its a page table */ + dbg_printf("Mapping indices:\n"); + dgb_print_2M_mapping_details("UART", uart_base, /* one 2 MiB page */ 2u * 1024 * 1024); + dgb_print_2M_mapping_details("ELFloader image", (paddr_t)_text, (paddr_t)_end - (paddr_t)_text); + if (dtb && (dtb_size > 0)) { + dgb_print_2M_mapping_details("dtb", dtb_map_start, dtb_map_end - dtb_map_start - 1); + } - _boot_pud_up[GET_PUD_INDEX(first_vaddr)] - = ((uintptr_t)_boot_pmd_up) | BIT(1) | BIT(0); /* its a page table */ - pmd_index = GET_PMD_INDEX(first_vaddr); - for (i = pmd_index; i < BIT(PMD_BITS); i++) { - _boot_pmd_up[i] = (((i - pmd_index) << ARM_2MB_BLOCK_BITS) + first_paddr) - | BIT(10) /* access flag */ -#if CONFIG_MAX_NUM_NODES > 1 - | (3 << 8) -#endif - | (MT_NORMAL << 2) /* MT_NORMAL memory */ - | BIT(0); /* 2M block */ - } + /* Architecturally required barrier to make all writes to pagetable memories + * visible to the pagetable walker. See ARM DDI 0487I.a, section D8.2.6. + */ + dsb(); + + /* Maintenance again, just to be sure. This is only necessary for the secondary + * CPUs; they may come up with caches & MMU disabled. What they should usually + * do is enable caches & MMU together! The following code is only necessary + * if they enable ONLY the MMU first and after that they enable the cache. + * That would be totally ... well ... suboptimal, but we play "better safe + * than sorry" here. + */ + clean_inval_pagetables(); +} + +void init_boot_vspace(struct image_info *kernel_info) +{ + init_boot_vspace_impl(kernel_info, 0); +} + +void init_hyp_boot_vspace(struct image_info *kernel_info) +{ + init_boot_vspace_impl(kernel_info, 1); } diff --git a/elfloader-tool/src/arch-arm/64/structures.c b/elfloader-tool/src/arch-arm/64/structures.c index 654fc7dc..e24680f4 100644 --- a/elfloader-tool/src/arch-arm/64/structures.c +++ b/elfloader-tool/src/arch-arm/64/structures.c @@ -8,11 +8,6 @@ #include #include -/* Paging structures for kernel mapping */ +/* Top-level paging structures for kernel and identity mapping */ uint64_t _boot_pgd_up[BIT(PGD_BITS)] ALIGN(BIT(PGD_SIZE_BITS)); -uint64_t _boot_pud_up[BIT(PUD_BITS)] ALIGN(BIT(PUD_SIZE_BITS)); -uint64_t _boot_pmd_up[BIT(PMD_BITS)] ALIGN(BIT(PMD_SIZE_BITS)); - -/* Paging structures for identity mapping */ uint64_t _boot_pgd_down[BIT(PGD_BITS)] ALIGN(BIT(PGD_SIZE_BITS)); -uint64_t _boot_pud_down[BIT(PUD_BITS)] ALIGN(BIT(PUD_SIZE_BITS)); diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S index 8b9fd216..eb091f81 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/mmu-hyp.S @@ -21,12 +21,6 @@ .extern invalidate_icache .extern _boot_pgd_down -BEGIN_FUNC(disable_mmu_caches_hyp) - /* Assume D-cache already cleaned to PoC */ - disable_mmu sctlr_el2, x9 - ret -END_FUNC(disable_mmu_caches_hyp) - BEGIN_FUNC(clean_dcache_by_range) /* Ordering needed for strongly-ordered mem, not needed for NORMAL mem. * See ARM DDI 0487I.a, page D7-5063. @@ -76,7 +70,10 @@ BEGIN_FUNC(leave_hyp) eret END_FUNC(leave_hyp) -BEGIN_FUNC(arm_enable_hyp_mmu) +BEGIN_FUNC(arm_switch_to_hyp_tables) + /* Load MAIR & TCR values; construct TTBR address before disabling and re- + * enabling the MMU & caches. + */ /* * DEVICE_nGnRnE 000 00000000 * DEVICE_nGnRE 001 00000100 @@ -91,16 +88,28 @@ BEGIN_FUNC(arm_enable_hyp_mmu) MAIR(0x44, MT_NORMAL_NC) | \ MAIR(0xff, MT_NORMAL) | \ MAIR(0xaa, MT_NORMAL_WT) - msr mair_el2, x5 + ldr x8, =TCR_T0SZ(48) | TCR_IRGN0_WBWC | TCR_ORGN0_WBWC | TCR_SH0_ISH | TCR_TG0_4K | TCR_PS | TCR_EL2_RES1 + + /* Use x16 as temp register */ + disable_mmu sctlr_el2, x16 + + msr mair_el2, x5 msr tcr_el2, x8 isb + /* For non-VHE the "down" contains both the the kernel mapping and 1:1 mapping. */ adrp x8, _boot_pgd_down msr ttbr0_el2, x8 isb + /* Invalidate TLBs */ + dsb sy tlbi alle2is + tlbi vmalls12e1 + dsb sy + + tlbi vmalle1is dsb ish isb @@ -113,4 +122,4 @@ BEGIN_FUNC(arm_enable_hyp_mmu) /* NOTE: enable_mmu already contains an isb after enabling. */ ret -END_FUNC(arm_enable_hyp_mmu) +END_FUNC(arm_switch_to_hyp_tables) diff --git a/elfloader-tool/src/arch-arm/smp_boot.c b/elfloader-tool/src/arch-arm/smp_boot.c index 704d90d2..f795f3a7 100644 --- a/elfloader-tool/src/arch-arm/smp_boot.c +++ b/elfloader-tool/src/arch-arm/smp_boot.c @@ -51,20 +51,17 @@ void non_boot_main(void) if (is_hyp_mode()) { extern void leave_hyp(void); - extern void disable_mmu_caches_hyp(void); -#ifdef CONFIG_ARCH_AARCH64 - /* Disable the MMU and cacheability unconditionally on ARM64. - * The 32 bit ARM platforms do not expect the MMU to be turned - * off, so we leave them alone. */ - disable_mmu_caches_hyp(); -#endif #ifndef CONFIG_ARM_HYPERVISOR_SUPPORT leave_hyp(); #endif } /* Enable the MMU, and enter the kernel. */ if (is_hyp_mode()) { +#if defined(CONFIG_ARCH_AARCH64) + arm_switch_to_hyp_tables(); +#else arm_enable_hyp_mmu(); +#endif } else { arm_enable_mmu(); } diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index f63e9824..5b49d2a5 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -191,34 +191,31 @@ void continue_boot(int was_relocated) init_boot_vspace(&kernel_info); #endif - /* If in EL2, disable MMU and I/D cacheability unconditionally */ if (is_hyp_mode()) { - extern void disable_mmu_caches_hyp(void); extern void clean_dcache_by_range(paddr_t start, paddr_t end); paddr_t start = kernel_info.phys_region_start; paddr_t end = kernel_info.phys_region_end; clean_dcache_by_range(start, end); + start = (paddr_t)user_info.phys_region_start; end = (paddr_t)user_info.phys_region_end; clean_dcache_by_range(start, end); + start = (paddr_t)_text; end = (paddr_t)_end; clean_dcache_by_range(start, end); + if (dtb) { start = (paddr_t)dtb; end = start + dtb_size; clean_dcache_by_range(start, end); } -#if defined(CONFIG_ARCH_AARCH64) - /* Disable the MMU and cacheability unconditionally on ARM64. - * The 32 bit ARM platforms do not expect the MMU to be turned - * off, so we leave them alone. */ - disable_mmu_caches_hyp(); -#endif - -#if (defined(CONFIG_ARCH_ARM_V7A) || defined(CONFIG_ARCH_ARM_V8A)) && !defined(CONFIG_ARM_HYPERVISOR_SUPPORT) +#if defined(CONFIG_ARM_HYPERVISOR_SUPPORT) + printf("Switch to hypervisor mapping\n"); + arm_switch_to_hyp_tables(); +#else extern void leave_hyp(void); /* Switch to EL1, assume EL2 MMU already disabled for ARMv8. */ leave_hyp(); @@ -230,8 +227,7 @@ void continue_boot(int was_relocated) #endif /* CONFIG_MAX_NUM_NODES */ if (is_hyp_mode()) { - printf("Enabling hypervisor MMU and paging\n"); - arm_enable_hyp_mmu(); + /* Nothing to be done here, we already switched above */ } else { printf("Enabling MMU and paging\n"); arm_enable_mmu(); diff --git a/elfloader-tool/src/drivers/uart/common.c b/elfloader-tool/src/drivers/uart/common.c index a5c914ff..b6ec30af 100644 --- a/elfloader-tool/src/drivers/uart/common.c +++ b/elfloader-tool/src/drivers/uart/common.c @@ -18,6 +18,9 @@ void uart_set_out(struct elfloader_device *out) return; } uart_out = out; +#if defined(CONFIG_ARCH_AARCH64) + mmu_set_uart_base(out->region_bases[0]); +#endif } volatile void *uart_get_mmio(void)