diff --git a/elfloader-tool/include/arch-arm/64/mode/aarch64.h b/elfloader-tool/include/arch-arm/64/mode/aarch64.h new file mode 100644 index 00000000..e46611c4 --- /dev/null +++ b/elfloader-tool/include/arch-arm/64/mode/aarch64.h @@ -0,0 +1,65 @@ +/* + * Copyright 2023, NIO GmbH + * + * SPDX-License-Identifier: GPL-2.0-only + */ +#pragma once + +/* This file contains useful defines for assembly and C code. */ + +#define PSR_F_BIT 0x00000040 +#define PSR_I_BIT 0x00000080 +#define PSR_A_BIT 0x00000100 +#define PSR_D_BIT 0x00000200 + +#define PSR_MODE_EL0t 0x00000000 +#define PSR_MODE_EL1t 0x00000004 +#define PSR_MODE_EL1h 0x00000005 +#define PSR_MODE_EL2t 0x00000008 +#define PSR_MODE_EL2h 0x00000009 +#define PSR_MODE_SVC_32 0x00000013 + +#define TCR_T0SZ(x) ((64 - (x))) +#define TCR_T1SZ(x) ((64 - (x)) << 16) +#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) + +#define TCR_IRGN0_WBWC (1 << 8) +#define TCR_IRGN_NC ((0 << 8) | (0 << 24)) +#define TCR_IRGN_WBWA ((1 << 8) | (1 << 24)) +#define TCR_IRGN_WT ((2 << 8) | (2 << 24)) +#define TCR_IRGN_WBnWA ((3 << 8) | (3 << 24)) +#define TCR_IRGN_MASK ((3 << 8) | (3 << 24)) + +#define TCR_ORGN0_WBWC (1 << 10) +#define TCR_ORGN_NC ((0 << 10) | (0 << 26)) +#define TCR_ORGN_WBWA ((1 << 10) | (1 << 26)) +#define TCR_ORGN_WT ((2 << 10) | (2 << 26)) +#define TCR_ORGN_WBnWA ((3 << 10) | (3 << 26)) +#define TCR_ORGN_MASK ((3 << 10) | (3 << 26)) + +#define TCR_SH0_ISH (3 << 12) +#define TCR_SHARED ((3 << 12) | (3 << 28)) + +#define TCR_TG0_4K (0 << 14) +#define TCR_TG0_64K (1 << 14) +#define TCR_TG1_4K (2 << 30) +#define TCR_TG1_64K (3 << 30) + +#define TCR_PS_4G (0 << 16) +#define TCR_PS_64G (1 << 16) +#define TCR_PS_1T (2 << 16) +#define TCR_PS_4T (3 << 16) +#define TCR_PS_16T (4 << 16) +#define TCR_PS_256T (5 << 16) + +/* bits are reserved as 1 */ +#define TCR_EL2_RES1 ((1 << 23) | (1 << 31)) +#define TCR_ASID16 (1 << 36) + +#define MT_DEVICE_nGnRnE 0 +#define MT_DEVICE_nGnRE 1 +#define MT_DEVICE_GRE 2 +#define MT_NORMAL_NC 3 +#define MT_NORMAL 4 +#define MT_NORMAL_WT 5 +#define MAIR(_attr, _mt) ((_attr) << ((_mt) * 8)) diff --git a/elfloader-tool/include/arch-arm/64/mode/assembler.h b/elfloader-tool/include/arch-arm/64/mode/assembler.h index 4f9972c0..75b5c555 100644 --- a/elfloader-tool/include/arch-arm/64/mode/assembler.h +++ b/elfloader-tool/include/arch-arm/64/mode/assembler.h @@ -9,63 +9,7 @@ /* This file contains useful macros for assembly code. */ #ifdef __ASSEMBLER__ - -#define PSR_F_BIT 0x00000040 -#define PSR_I_BIT 0x00000080 -#define PSR_A_BIT 0x00000100 -#define PSR_D_BIT 0x00000200 - -#define PSR_MODE_EL0t 0x00000000 -#define PSR_MODE_EL1t 0x00000004 -#define PSR_MODE_EL1h 0x00000005 -#define PSR_MODE_EL2t 0x00000008 -#define PSR_MODE_EL2h 0x00000009 -#define PSR_MODE_SVC_32 0x00000013 - -#define TCR_T0SZ(x) ((64 - (x))) -#define TCR_T1SZ(x) ((64 - (x)) << 16) -#define TCR_TxSZ(x) (TCR_T0SZ(x) | TCR_T1SZ(x)) - -#define TCR_IRGN0_WBWC (1 << 8) -#define TCR_IRGN_NC ((0 << 8) | (0 << 24)) -#define TCR_IRGN_WBWA ((1 << 8) | (1 << 24)) -#define TCR_IRGN_WT ((2 << 8) | (2 << 24)) -#define TCR_IRGN_WBnWA ((3 << 8) | (3 << 24)) -#define TCR_IRGN_MASK ((3 << 8) | (3 << 24)) - -#define TCR_ORGN0_WBWC (1 << 10) -#define TCR_ORGN_NC ((0 << 10) | (0 << 26)) -#define TCR_ORGN_WBWA ((1 << 10) | (1 << 26)) -#define TCR_ORGN_WT ((2 << 10) | (2 << 26)) -#define TCR_ORGN_WBnWA ((3 << 10) | (3 << 26)) -#define TCR_ORGN_MASK ((3 << 10) | (3 << 26)) - -#define TCR_SH0_ISH (3 << 12) -#define TCR_SHARED ((3 << 12) | (3 << 28)) - -#define TCR_TG0_4K (0 << 14) -#define TCR_TG0_64K (1 << 14) -#define TCR_TG1_4K (2 << 30) -#define TCR_TG1_64K (3 << 30) - -#define TCR_PS_4G (0 << 16) -#define TCR_PS_64G (1 << 16) -#define TCR_PS_1T (2 << 16) -#define TCR_PS_4T (3 << 16) -#define TCR_PS_16T (4 << 16) -#define TCR_PS_256T (5 << 16) - -/* bits are reserved as 1 */ -#define TCR_EL2_RES1 ((1 << 23) | (1 << 31)) -#define TCR_ASID16 (1 << 36) - -#define MT_DEVICE_nGnRnE 0 -#define MT_DEVICE_nGnRE 1 -#define MT_DEVICE_GRE 2 -#define MT_NORMAL_NC 3 -#define MT_NORMAL 4 -#define MT_NORMAL_WT 5 -#define MAIR(_attr, _mt) ((_attr) << ((_mt) * 8)) +#include .macro enable_mmu sctlr tmp mrs \tmp, \sctlr diff --git a/elfloader-tool/include/arch-arm/64/mode/structures.h b/elfloader-tool/include/arch-arm/64/mode/structures.h index f77ef93d..aaa8bced 100644 --- a/elfloader-tool/include/arch-arm/64/mode/structures.h +++ b/elfloader-tool/include/arch-arm/64/mode/structures.h @@ -21,9 +21,9 @@ #define PMD_BITS 9 #define PMD_SIZE_BITS (PMD_BITS + PMDE_SIZE_BITS) -#define GET_PGD_INDEX(x) (((x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS + PUD_BITS)) & MASK(PGD_BITS)) -#define GET_PUD_INDEX(x) (((x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS)) & MASK(PUD_BITS)) -#define GET_PMD_INDEX(x) (((x) >> (ARM_2MB_BLOCK_BITS)) & MASK(PMD_BITS)) +#define GET_PGD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS + PUD_BITS)) & MASK(PGD_BITS)) +#define GET_PUD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS + PMD_BITS)) & MASK(PUD_BITS)) +#define GET_PMD_INDEX(x) (((word_t)(x) >> (ARM_2MB_BLOCK_BITS)) & MASK(PMD_BITS)) extern uint64_t _boot_pgd_up[BIT(PGD_BITS)]; extern uint64_t _boot_pud_up[BIT(PUD_BITS)]; diff --git a/elfloader-tool/include/arch-arm/cpuid.h b/elfloader-tool/include/arch-arm/cpuid.h index f84612be..c0e1a6ce 100644 --- a/elfloader-tool/include/arch-arm/cpuid.h +++ b/elfloader-tool/include/arch-arm/cpuid.h @@ -12,7 +12,7 @@ uint32_t read_cpuid_id(void); /* read MP ID register from CPUID */ -uint32_t read_cpuid_mpidr(void); +word_t read_cpuid_mpidr(void); /* check if CPU is in HYP/EL2 mode */ word_t is_hyp_mode(void); diff --git a/elfloader-tool/src/arch-arm/64/cpuid.c b/elfloader-tool/src/arch-arm/64/cpuid.c index 66d9d09b..6c979a0f 100644 --- a/elfloader-tool/src/arch-arm/64/cpuid.c +++ b/elfloader-tool/src/arch-arm/64/cpuid.c @@ -6,6 +6,7 @@ #include #include +#include /* we only care about the affinity bits */ #define MPIDR_MASK (0xff00ffffff) diff --git a/elfloader-tool/src/arch-arm/64/crt0.S b/elfloader-tool/src/arch-arm/64/crt0.S index acd4de92..7d1249be 100644 --- a/elfloader-tool/src/arch-arm/64/crt0.S +++ b/elfloader-tool/src/arch-arm/64/crt0.S @@ -29,7 +29,7 @@ BEGIN_FUNC(_start) bl fixup_image_base mov x2, x0 /* restore original arguments for next step */ - ldp x0, x1, [sp, #-16]! + ldp x0, x1, [sp], #16 /* fixup_image_base returns 1 if no need to move */ cmp x2, #1 beq 1f diff --git a/elfloader-tool/src/arch-arm/64/mmu.c b/elfloader-tool/src/arch-arm/64/mmu.c index 75d3b0a5..d5169e61 100644 --- a/elfloader-tool/src/arch-arm/64/mmu.c +++ b/elfloader-tool/src/arch-arm/64/mmu.c @@ -11,6 +11,8 @@ #include #include #include +#include +#include /* * Create a "boot" page table, which contains a 1:1 mapping below @@ -30,7 +32,7 @@ void init_boot_vspace(struct image_info *kernel_info) for (i = 0; i < BIT(PUD_BITS); i++) { _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) | BIT(10) /* access flag */ - | (0 << 2) /* strongly ordered memory */ + | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ | BIT(0); /* 1G block */ } @@ -51,7 +53,7 @@ void init_boot_vspace(struct image_info *kernel_info) #if CONFIG_MAX_NUM_NODES > 1 | (3 << 8) /* make sure the shareability is the same as the kernel's */ #endif - | (4 << 2) /* MT_NORMAL memory */ + | (MT_NORMAL << 2) /* MT_NORMAL memory */ | BIT(0); /* 2M block */ first_paddr += BIT(ARM_2MB_BLOCK_BITS); } @@ -68,7 +70,7 @@ void init_hyp_boot_vspace(struct image_info *kernel_info) for (i = 0; i < BIT(PUD_BITS); i++) { _boot_pud_down[i] = (i << ARM_1GB_BLOCK_BITS) | BIT(10) /* access flag */ - | (0 << 2) /* strongly ordered memory */ + | (MT_DEVICE_nGnRnE << 2) /* strongly ordered memory */ | BIT(0); /* 1G block */ } @@ -85,7 +87,7 @@ void init_hyp_boot_vspace(struct image_info *kernel_info) #if CONFIG_MAX_NUM_NODES > 1 | (3 << 8) #endif - | (4 << 2) /* MT_NORMAL memory */ + | (MT_NORMAL << 2) /* MT_NORMAL memory */ | BIT(0); /* 2M block */ } } diff --git a/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c b/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c index a86b02b4..edbf07e5 100644 --- a/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c +++ b/elfloader-tool/src/arch-arm/armv/armv8-a/64/smp.c @@ -35,6 +35,12 @@ void core_entry(uint64_t sp) int is_core_up(int i) { + /* Secondary core may be booted with caches disabled, + * this value might be written in memory, invalidate our + * copy and get a new one. */ + asm volatile("dc ivac, %0\n\t" + "dmb nsh\n\t" + :: "r"(&core_up[i])); return core_up[i] == i; } diff --git a/elfloader-tool/src/arch-arm/drivers/smp-psci.c b/elfloader-tool/src/arch-arm/drivers/smp-psci.c index ef3ea012..ae5fe951 100644 --- a/elfloader-tool/src/arch-arm/drivers/smp-psci.c +++ b/elfloader-tool/src/arch-arm/drivers/smp-psci.c @@ -3,6 +3,7 @@ * * SPDX-License-Identifier: GPL-2.0-only */ +#include #include #include #include @@ -24,7 +25,13 @@ static int smp_psci_cpu_on(UNUSED struct elfloader_device *dev, } secondary_data.entry = entry; secondary_data.stack = stack; - dmb(); +#if defined(CONFIG_ARCH_AARCH64) + /* If the secondary core caches are off, need to make sure that the info + * is clean to the physical memory so that the sedcondary cores can read it. + */ + asm volatile("dc cvac, %0" :: "r"(&secondary_data)); + dsb(); +#endif int ret = psci_cpu_on(cpu->cpu_id, (unsigned long)&secondary_startup, 0); if (ret != PSCI_SUCCESS) { printf("Failed to bring up core 0x%x with error %d\n", cpu->cpu_id, ret); diff --git a/elfloader-tool/src/arch-arm/smp_boot.c b/elfloader-tool/src/arch-arm/smp_boot.c index d429d113..da6743c6 100644 --- a/elfloader-tool/src/arch-arm/smp_boot.c +++ b/elfloader-tool/src/arch-arm/smp_boot.c @@ -24,7 +24,7 @@ static volatile int non_boot_lock = 0; void arm_disable_dcaches(void); extern void const *dtb; -extern uint32_t dtb_size; +extern size_t dtb_size; /* Entry point for all CPUs other than the initial. */ void non_boot_main(void) @@ -34,7 +34,11 @@ void non_boot_main(void) #endif /* Spin until the first CPU has finished initialisation. */ while (!non_boot_lock) { -#ifndef CONFIG_ARCH_AARCH64 +#ifdef CONFIG_ARCH_AARCH64 + /* The compiler may optimize this loop away, add a dsb() + * to force a reload. */ + dsb(); +#else cpu_idle(); #endif } @@ -58,10 +62,10 @@ void non_boot_main(void) arm_enable_mmu(); } - /* Jump to the kernel. */ + /* Jump to the kernel. Note: Our DTB is smaller than 4 GiB. */ ((init_arm_kernel_t)kernel_info.virt_entry)(user_info.phys_region_start, user_info.phys_region_end, user_info.phys_virt_offset, - user_info.virt_entry, (paddr_t)dtb, dtb_size); + user_info.virt_entry, (paddr_t)dtb, (uint32_t)dtb_size); printf("AP Kernel returned back to the elf-loader.\n"); abort(); @@ -117,7 +121,13 @@ WEAK void init_cpus(void) abort(); } - while (!is_core_up(num_cpus)); + while (!is_core_up(num_cpus)) { +#if defined(CONFIG_ARCH_AARCH64) + /* The compiler may optimize this loop away, add a dsb() + * to force a reload. */ + dsb(); +#endif + } printf("Core %d is up with logic id %d\n", elfloader_cpus[i].cpu_id, num_cpus); num_cpus++; } @@ -134,6 +144,17 @@ void smp_boot(void) arm_disable_dcaches(); #endif init_cpus(); + +#if defined(CONFIG_ARCH_AARCH64) + dsb(); +#endif + non_boot_lock = 1; + +#if defined(CONFIG_ARCH_AARCH64) + /* Secondary CPUs may still run with MMU & caches off. Force the update to be visible. */ + asm volatile("dc civac, %0\n\t" :: "r"(&non_boot_lock) : "memory");; +#endif + } #endif /* CONFIG_MAX_NUM_NODES */ diff --git a/elfloader-tool/src/arch-arm/sys_boot.c b/elfloader-tool/src/arch-arm/sys_boot.c index bf98aaf2..d3d6b9e0 100644 --- a/elfloader-tool/src/arch-arm/sys_boot.c +++ b/elfloader-tool/src/arch-arm/sys_boot.c @@ -22,8 +22,13 @@ /* 0xd00dfeed in big endian */ #define DTB_MAGIC (0xedfe0dd0) -/* Maximum alignment we need to preserve when relocating (64K) */ -#define MAX_ALIGN_BITS (14) +/* Maximum alignment we need to preserve when relocating (64K) + * + * The 64 kiB alignment is a maximum requirement for a stage2 + * concatenated pagetable. See Table G5-4 in ARM DDI 0487I.a, page + * G5-9186. + */ +#define MAX_ALIGN_BITS (16) #ifdef CONFIG_IMAGE_EFI ALIGN(BIT(PAGE_BITS)) VISIBLE @@ -216,17 +221,23 @@ void continue_boot(int was_relocated) arm_enable_mmu(); } - /* Enter kernel. The UART may no longer be accessible here. */ + /* The UART may no longer be accessible here. */ if ((uintptr_t)uart_get_mmio() < kernel_info.virt_region_start) { printf("Jumping to kernel-image entry point...\n\n"); } +#if defined(CONFIG_ARCH_AARCH64) + /* Clear D&A in DAIF */ + asm volatile("msr daifclr, #0xC\n\t"); +#endif + + /* Jump to the kernel. Note: Our DTB is smaller than 4 GiB. */ ((init_arm_kernel_t)kernel_info.virt_entry)(user_info.phys_region_start, user_info.phys_region_end, user_info.phys_virt_offset, user_info.virt_entry, (word_t)dtb, - dtb_size); + (uint32_t)dtb_size); /* We should never get here. */ printf("ERROR: Kernel returned back to the ELF Loader\n"); diff --git a/elfloader-tool/src/binaries/efi/efi_init.c b/elfloader-tool/src/binaries/efi/efi_init.c index a177c083..47dc2651 100644 --- a/elfloader-tool/src/binaries/efi/efi_init.c +++ b/elfloader-tool/src/binaries/efi/efi_init.c @@ -4,18 +4,28 @@ * SPDX-License-Identifier: GPL-2.0-only */ +#include #include #include void *__application_handle = NULL; // current efi application handler efi_system_table_t *__efi_system_table = NULL; // current efi system table +static unsigned long efi_exit_bs_result = EFI_SUCCESS; +static unsigned long exit_boot_services(void); + +unsigned long efi_exit_boot_services(void) +{ + return efi_exit_bs_result; +} + extern void _start(void); unsigned int efi_main(uintptr_t application_handle, uintptr_t efi_system_table) { clear_bss(); __application_handle = (void *)application_handle; __efi_system_table = (efi_system_table_t *)efi_system_table; + efi_exit_bs_result = exit_boot_services(); _start(); return 0; } @@ -41,7 +51,7 @@ void *efi_get_fdt(void) * This means boot time services are not available anymore. We should store * system information e.g. current memory map and pass them to kernel. */ -unsigned long efi_exit_boot_services(void) +static unsigned long exit_boot_services(void) { unsigned long status; efi_memory_desc_t *memory_map; @@ -52,31 +62,44 @@ unsigned long efi_exit_boot_services(void) efi_boot_services_t *bts = get_efi_boot_services(); /* - * As the number of existing memeory segments are unknown, - * we need to resort to a trial and error to guess that. - * We start from 32 and increase it by one until get a valid value. + * As the number of existing memory segments are unknown, + * we need to start somewhere. The API then tells us how much space we need + * if it is not enough. */ map_size = sizeof(*memory_map) * 32; -again: - status = bts->allocate_pool(EFI_LOADER_DATA, map_size, (void **)&memory_map); + do { + status = bts->allocate_pool(EFI_LOADER_DATA, map_size, (void **)&memory_map); + /* If the allocation fails, there is something wrong and we cannot continue */ + if (status != EFI_SUCCESS) { + return status; + } + + status = bts->get_memory_map(&map_size, memory_map, &key, &desc_size, &desc_version); + if (status != EFI_SUCCESS) { + bts->free_pool(memory_map); + memory_map = NULL; + + if (status == EFI_BUFFER_TOO_SMALL) { + /* Note: "map_size" is an IN/OUT-parameter and has been updated to the + * required size. We still add one more entry ("desc_size" is in bytes) + * due to the hint from the spec ("since allocation of the new buffer + * may potentially increase memory map size."). + */ + map_size += desc_size; + } else { + /* some other error; bail out! */ + return status; + } + } + } while (status == EFI_BUFFER_TOO_SMALL); - if (status != EFI_SUCCESS) - return status; - - status = bts->get_memory_map(&map_size, memory_map, &key, &desc_size, &desc_version); - if (status == EFI_BUFFER_TOO_SMALL) { - bts->free_pool(memory_map); - - map_size += sizeof(*memory_map); - goto again; - } + status = bts->exit_boot_services(__application_handle, key); - if (status != EFI_SUCCESS){ - bts->free_pool(memory_map); - return status; - } +#if defined(CONFIG_ARCH_AARCH64) + /* Now that we're free, mask all exceptions until we enter the kernel */ + asm volatile("msr daifset, #0xF\n\t"); +#endif - status = bts->exit_boot_services(__application_handle, key); return status; } diff --git a/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S b/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S index 13792b87..c735616b 100644 --- a/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S +++ b/elfloader-tool/src/binaries/efi/gnuefi/crt0-efi-aarch64.S @@ -64,7 +64,7 @@ extra_header_fields: .short 0 // MinorSubsystemVersion .long 0 // Win32VersionValue - .long _edata - ImageBase // SizeOfImage + .long _end - ImageBase // SizeOfImage // Everything before the kernel image is considered part of the header .long _gnuefi_start - ImageBase // SizeOfHeaders diff --git a/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds b/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds index bbbc502f..268c1b3c 100644 --- a/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds +++ b/elfloader-tool/src/binaries/efi/gnuefi/elf_aarch64_efi.lds @@ -31,6 +31,9 @@ SECTIONS *(.data) *(.data1) *(.data.*) + __start__driver_list = .; + *(_driver_list) + __stop__driver_list = .; *(.got.plt) *(.got) diff --git a/elfloader-tool/src/common.c b/elfloader-tool/src/common.c index 9846422f..d351df06 100644 --- a/elfloader-tool/src/common.c +++ b/elfloader-tool/src/common.c @@ -468,29 +468,29 @@ int load_images( /* keep it page aligned */ next_phys_addr = dtb_phys_start = ROUND_UP(kernel_phys_end, PAGE_BITS); - size_t dtb_size = fdt_size(dtb); - if (0 == dtb_size) { + size_t dtb_sz = fdt_size(dtb); + if (0 == dtb_sz) { printf("ERROR: Invalid device tree blob supplied\n"); return -1; } /* Make sure this is a sane thing to do */ ret = ensure_phys_range_valid(next_phys_addr, - next_phys_addr + dtb_size); + next_phys_addr + dtb_sz); if (0 != ret) { printf("ERROR: Physical address of DTB invalid\n"); return -1; } - memmove((void *)next_phys_addr, dtb, dtb_size); - next_phys_addr += dtb_size; + memmove((void *)next_phys_addr, dtb, dtb_sz); + next_phys_addr += dtb_sz; next_phys_addr = ROUND_UP(next_phys_addr, PAGE_BITS); dtb_phys_end = next_phys_addr; printf("Loaded DTB from %p.\n", dtb); printf(" paddr=[%p..%p]\n", dtb_phys_start, dtb_phys_end - 1); *chosen_dtb = (void *)dtb_phys_start; - *chosen_dtb_size = dtb_size; + *chosen_dtb_size = dtb_sz; } else { next_phys_addr = ROUND_UP(kernel_phys_end, PAGE_BITS); }