From 175aa0b614df395758f810a0ce80b3e00a770340 Mon Sep 17 00:00:00 2001 From: David Garske Date: Thu, 22 Aug 2024 11:19:34 -0700 Subject: [PATCH] Fixes to support wolfBoot native make and gcc-arm cross compiler for Xilinx UltraScale+ MPSoC. ZD 18159 --- IDE/XilinxSDK/README.md | 10 +- arch.mk | 160 +++++++------ hal/raspi3.c | 11 - hal/zynq.c | 368 ++++------------------------ hal/zynq.h | 346 +++++++++++++++++++++++++++ hal/zynq.ld | 31 ++- src/boot_aarch64.c | 47 +++- src/boot_aarch64_start.S | 421 ++++++++++++++++++++++++++++----- src/boot_aarch64_translation.S | 244 +++++++++++++++++++ src/boot_aarch64_vectors.S | 404 +++++++++++++++++++++++++++++++ 10 files changed, 1556 insertions(+), 486 deletions(-) create mode 100644 hal/zynq.h create mode 100644 src/boot_aarch64_translation.S create mode 100644 src/boot_aarch64_vectors.S diff --git a/IDE/XilinxSDK/README.md b/IDE/XilinxSDK/README.md index 09b3dd970..b06a7c73c 100644 --- a/IDE/XilinxSDK/README.md +++ b/IDE/XilinxSDK/README.md @@ -91,9 +91,13 @@ Xilinx uses a `bootgen` tool for generating a boot binary image that has Xilinx * Use "offset=" option to place the application into a specific location in flash. * Use "load=" option to have FSBL load into specific location in RAM. -Generating a boot.bin (from boot.bif). -Run the Xilinx -> Vitis Shell and cd into the workspace root. +Default install locations for bootgen tools: +* Linux: `/tools/Xilinx/Vitis/2022.1/bin` +* Windows: `C:\Xilinx\Vitis\2022.1\bin` + +Open the Vitis Shell from the IDE by using file menu "Xilinx" -> "Vitis Shell". +Generating a boot.bin (from boot.bif). Example boot.bif in workspace root: ``` @@ -107,6 +111,8 @@ the_ROM_image: } ``` +From the workspace root: + ```sh bootgen -image boot.bif -arch zynqmp -w -o BOOT.bin diff --git a/arch.mk b/arch.mk index 92dbe8319..2cbcc7217 100644 --- a/arch.mk +++ b/arch.mk @@ -54,18 +54,34 @@ ifeq ($(ARCH),x86_64) endif endif -## ARM +## ARM Cortex-A ifeq ($(ARCH),AARCH64) CROSS_COMPILE?=aarch64-none-elf- - CFLAGS+=-DARCH_AARCH64 -march=armv8-a - OBJS+=src/boot_aarch64.o src/boot_aarch64_start.o - CFLAGS+=-DNO_QNX + CFLAGS+=-DARCH_AARCH64 + OBJS+=src/boot_aarch64.o src/boot_aarch64_start.o src/boot_aarch64_vectors.o src/boot_aarch64_translation.o + + ifeq ($(TARGET),zynq) + CFLAGS+=-march=armv8-a+crypto -DCORTEX_A53 + CFLAGS+=-DNO_QNX + + # Support detection and skip of U-Boot legecy header */ + CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY + CFLAGS+=-DWOLFBOOT_DUALBOOT + endif + ifeq ($(SPMATH),1) MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_arm64.o endif + ifeq ($(NO_ASM),0) + ARCH_FLAGS=-mstrict-align + CFLAGS+=$(ARCH_FLAGS) -DWOLFSSL_ARMASM -DWC_HASH_DATA_ALIGNMENT=8 + WOLFCRYPT_OBJS += lib/wolfssl/wolfcrypt/src/port/arm/armv8-sha256.o \ + lib/wolfssl/wolfcrypt/src/port/arm/armv8-aes.o + endif endif +## ARM Cortex-M ifeq ($(ARCH),ARM) CROSS_COMPILE?=arm-none-eabi- CFLAGS+=-mthumb -mlittle-endian -mthumb-interwork -DARCH_ARM @@ -137,7 +153,6 @@ ifeq ($(ARCH),ARM) endif endif - ifeq ($(TARGET),stm32l5) CORTEX_M33=1 CFLAGS+=-Ihal @@ -177,93 +192,92 @@ ifeq ($(ARCH),ARM) endif ## Cortex-M CPU -ifeq ($(CORTEX_M33),1) - CFLAGS+=-mcpu=cortex-m33 -DCORTEX_M33 - LDFLAGS+=-mcpu=cortex-m33 - ifeq ($(TZEN),1) - OBJS+=hal/stm32_tz.o - CFLAGS+=-mcmse - ifeq ($(WOLFCRYPT_TZ),1) - SECURE_OBJS+=./src/wc_callable.o - SECURE_OBJS+=./lib/wolfssl/wolfcrypt/src/random.o - CFLAGS+=-DWOLFCRYPT_SECURE_MODE - SECURE_LDFLAGS+=-Wl,--cmse-implib -Wl,--out-implib=./src/wc_secure_calls.o - endif - endif # TZEN=1 - ifeq ($(NO_ASM),1) - ifeq ($(SPMATH),1) - ifeq ($(NO_ASM),1) - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o - else - CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + ifeq ($(CORTEX_M33),1) + CFLAGS+=-mcpu=cortex-m33 -DCORTEX_M33 + LDFLAGS+=-mcpu=cortex-m33 + ifeq ($(TZEN),1) + OBJS+=hal/stm32_tz.o + CFLAGS+=-mcmse + ifeq ($(WOLFCRYPT_TZ),1) + SECURE_OBJS+=./src/wc_callable.o + SECURE_OBJS+=./lib/wolfssl/wolfcrypt/src/random.o + CFLAGS+=-DWOLFCRYPT_SECURE_MODE + SECURE_LDFLAGS+=-Wl,--cmse-implib -Wl,--out-implib=./src/wc_secure_calls.o endif - endif - else - ifeq ($(SPMATH),1) - CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o - endif - endif -else - ifeq ($(CORTEX_M7),1) - CFLAGS+=-mcpu=cortex-m7 - LDFLAGS+=-mcpu=cortex-m7 - ifeq ($(SPMATH),1) - ifeq ($(NO_ASM),1) - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o - else + endif # TZEN=1 + ifeq ($(NO_ASM),1) + ifeq ($(SPMATH),1) + ifeq ($(NO_ASM),1) + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o + else + CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + endif + endif + else + ifeq ($(SPMATH),1) CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o endif endif else - ifeq ($(CORTEX_M0),1) - CFLAGS+=-mcpu=cortex-m0 - LDFLAGS+=-mcpu=cortex-m0 + ifeq ($(CORTEX_M7),1) + CFLAGS+=-mcpu=cortex-m7 + LDFLAGS+=-mcpu=cortex-m7 ifeq ($(SPMATH),1) ifeq ($(NO_ASM),1) MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o else - CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_THUMB_ASM - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_armthumb.o + CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o endif endif else - ifeq ($(CORTEX_M3),1) - - CFLAGS+=-mcpu=cortex-m3 - LDFLAGS+=-mcpu=cortex-m3 - ifeq ($(NO_ASM),1) - ifeq ($(SPMATH),1) + ifeq ($(CORTEX_M0),1) + CFLAGS+=-mcpu=cortex-m0 + LDFLAGS+=-mcpu=cortex-m0 + ifeq ($(SPMATH),1) + ifeq ($(NO_ASM),1) MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o + else + CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_THUMB_ASM + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_armthumb.o endif - else - ifeq ($(SPMATH),1) - CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM -DWOLFSSL_SP_NO_UMAAL - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o - endif - endif - else - # default Cortex M4 - CFLAGS+=-mcpu=cortex-m4 - LDFLAGS+=-mcpu=cortex-m4 - ifeq ($(NO_ASM),1) - ifeq ($(SPMATH),1) - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o endif else - CFLAGS+=-fomit-frame-pointer # required with debug builds only - ifeq ($(SPMATH),1) - CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM - MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + ifeq ($(CORTEX_M3),1) + CFLAGS+=-mcpu=cortex-m3 + LDFLAGS+=-mcpu=cortex-m3 + ifeq ($(NO_ASM),1) + ifeq ($(SPMATH),1) + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o + endif + else + ifeq ($(SPMATH),1) + CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM -DWOLFSSL_SP_NO_UMAAL + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + endif + endif + else + # default Cortex M4 + CFLAGS+=-mcpu=cortex-m4 + LDFLAGS+=-mcpu=cortex-m4 + ifeq ($(NO_ASM),1) + ifeq ($(SPMATH),1) + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_c32.o + endif + else + CFLAGS+=-fomit-frame-pointer # required with debug builds only + ifeq ($(SPMATH),1) + CFLAGS+=-DWOLFSSL_SP_ASM -DWOLFSSL_SP_ARM_CORTEX_M_ASM + MATH_OBJS += ./lib/wolfssl/wolfcrypt/src/sp_cortexm.o + endif + endif endif endif endif endif endif -endif -endif ifeq ($(TZEN),1) CFLAGS+=-DTZEN @@ -698,12 +712,6 @@ ifeq ($(TARGET),nxp_p1021) SPI_TARGET=nxp endif -ifeq ($(TARGET),zynq) - # Support detection and skip of U-Boot legecy header */ - CFLAGS+=-DWOLFBOOT_UBOOT_LEGACY - CFLAGS+=-DWOLFBOOT_DUALBOOT -endif - ifeq ($(TARGET),ti_hercules) # HALCoGen Source and Include? CORTEX_R5=1 diff --git a/hal/raspi3.c b/hal/raspi3.c index 6ec9d5543..fbe204ba3 100644 --- a/hal/raspi3.c +++ b/hal/raspi3.c @@ -86,17 +86,6 @@ void* hal_get_dts_update_address(void) return NULL; /* Not yet supported */ } -/* QSPI functions */ -void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) -{ -} - - -void zynq_init(uint32_t cpu_clock) -{ -} - - /* public HAL functions */ void hal_init(void) diff --git a/hal/zynq.c b/hal/zynq.c index fd1b31ac9..702237608 100644 --- a/hal/zynq.c +++ b/hal/zynq.c @@ -1,6 +1,6 @@ /* zynq.c * - * Copyright (C) 2021 wolfSSL Inc. + * Copyright (C) 2024 wolfSSL Inc. * * This file is part of wolfBoot. * @@ -19,8 +19,13 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ -#include -#include +#ifdef TARGET_zynq + +#include "hal/zynq.h" + +#ifndef ARCH_AARCH64 +# error "wolfBoot zynq HAL: wrong architecture selected. Please compile with ARCH=AARCH64." +#endif #if defined(__QNXNTO__) && !defined(NO_QNX) #define USE_QNX @@ -31,9 +36,9 @@ #include #include "image.h" #include "printf.h" -#ifndef ARCH_AARCH64 -# error "wolfBoot zynq HAL: wrong architecture selected. Please compile with ARCH=AARCH64." -#endif + +#include +#include #ifdef USE_XQSPIPSU /* Xilinx BSP Driver */ @@ -48,214 +53,6 @@ /* QSPI bare-metal */ #endif - -/* QSPI bare-metal driver */ -#define CORTEXA53_0_CPU_CLK_FREQ_HZ 1099989014 -#define CORTEXA53_0_TIMESTAMP_CLK_FREQ 99998999 - -/* Generic Quad-SPI */ -#define QSPI_BASE 0xFF0F0000UL -#define LQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x14))) /* SPI enable: 0: disable the SPI, 1: enable the SPI */ -#define GQSPI_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x100))) /* configuration register. */ -#define GQSPI_ISR (*((volatile uint32_t*)(QSPI_BASE + 0x104))) /* interrupt status register. */ -#define GQSPI_IER (*((volatile uint32_t*)(QSPI_BASE + 0x108))) /* interrupt enable register. */ -#define GQSPI_IDR (*((volatile uint32_t*)(QSPI_BASE + 0x10C))) /* interrupt disable register. */ -#define GQSPI_IMR (*((volatile uint32_t*)(QSPI_BASE + 0x110))) /* interrupt unmask register. */ -#define GQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x114))) /* enable register. */ -#define GQSPI_TXD (*((volatile uint32_t*)(QSPI_BASE + 0x11C))) /* TX data register. Keyhole addresses for the transmit data FIFO. */ -#define GQSPI_RXD (*((volatile uint32_t*)(QSPI_BASE + 0x120))) /* RX data register. */ -#define GQSPI_TX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x128))) /* TXFIFO Threshold Level register: (bits 5:0) Defines the level at which the TX_FIFO_NOT_FULL interrupt is generated */ -#define GQSPI_RX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x12C))) /* RXFIFO threshold level register: (bits 5:0) Defines the level at which the RX_FIFO_NOT_EMPTY interrupt is generated */ -#define GQSPI_GPIO (*((volatile uint32_t*)(QSPI_BASE + 0x130))) -#define GQSPI_LPBK_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x138))) /* adjusting the internal loopback clock delay for read data capturing */ -#define GQSPI_GEN_FIFO (*((volatile uint32_t*)(QSPI_BASE + 0x140))) /* generic FIFO data register. Keyhole addresses for the generic FIFO. */ -#define GQSPI_SEL (*((volatile uint32_t*)(QSPI_BASE + 0x144))) /* select register. */ -#define GQSPI_FIFO_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x14C))) /* FIFO control register. */ -#define GQSPI_GF_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x150))) /* generic FIFO threshold level register: (bits 4:0) Defines the level at which the GEN_FIFO_NOT_FULL interrupt is generated */ -#define GQSPI_POLL_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x154))) /* poll configuration register */ -#define GQSPI_P_TIMEOUT (*((volatile uint32_t*)(QSPI_BASE + 0x158))) /* poll timeout register. */ -#define GQSPI_XFER_STS (*((volatile uint32_t*)(QSPI_BASE + 0x15C))) /* transfer status register. */ -#define QSPI_DATA_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x1F8))) /* adjusting the internal receive data delay for read data capturing */ -#define GQSPI_MOD_ID (*((volatile uint32_t*)(QSPI_BASE + 0x1FC))) -#define QSPIDMA_DST_STS (*((volatile uint32_t*)(QSPI_BASE + 0x808))) -#define QSPIDMA_DST_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x80C))) -#define QSPIDMA_DST_I_STS (*((volatile uint32_t*)(QSPI_BASE + 0x814))) -#define QSPIDMA_DST_CTRL2 (*((volatile uint32_t*)(QSPI_BASE + 0x824))) - -/* GQSPI Registers */ -/* GQSPI_CFG: Configuration registers */ -#define GQSPI_CFG_CLK_POL (1UL << 1) /* Clock polarity outside QSPI word: 0: QSPI clock is quiescent low, 1: QSPI clock is quiescent high */ -#define GQSPI_CFG_CLK_PH (1UL << 2) /* Clock phase: 1: the QSPI clock is inactive outside the word, 0: the QSPI clock is active outside the word */ -/* 000: divide by 2, 001: divide by 4, 010: divide by 8, - 011: divide by 16, 100: divide by 32, 101: divide by 64, - 110: divide by 128, 111: divide by 256 */ -#define GQSPI_CFG_BAUD_RATE_DIV_MASK (7UL << 3) -#define GQSPI_CFG_BAUD_RATE_DIV(d) ((d << 3) & GQSPI_CFG_BAUD_RATE_DIV_MASK) -#define GQSPI_CFG_WP_HOLD (1UL << 19) /* If set, Holdb and WPn pins are actively driven by the qspi controller in 1-bit and 2-bit modes. */ -#define GQSPI_CFG_EN_POLL_TIMEOUT (1UL << 20) /* Poll Timeout Enable: 0: disable, 1: enable */ -#define GQSPI_CFG_ENDIAN (1UL << 26) /* Endian format transmit data register: 0: little endian, 1: big endian */ -#define GQSPI_CFG_START_GEN_FIFO (1UL << 28) /* Trigger Generic FIFO Command Execution: 0:disable executing requests, 1: enable executing requests */ -#define GQSPI_CFG_GEN_FIFO_START_MODE (1UL << 29) /* Start mode of Generic FIFO: 0: Auto Start Mode, 1: Manual Start Mode */ -#define GQSPI_CFG_MODE_EN_MASK (3UL << 30) /* Flash memory interface mode control: 00: IO mode, 10: DMA mode */ -#define GQSPI_CFG_MODE_EN(m) ((m << 30) & GQSPI_CFG_MODE_EN_MASK) -#define GQSPI_CFG_MODE_EN_IO GQSPI_CFG_MODE_EN(0) -#define GQSPI_CFG_MODE_EN_DMA GQSPI_CFG_MODE_EN(2) - -/* GQSPI_ISR / GQSPI_IER / GQSPI_IDR / GQSPI_IMR: Interrupt registers */ -#define GQSPI_IXR_RX_FIFO_EMPTY (1UL << 11) -#define GQSPI_IXR_GEN_FIFO_FULL (1UL << 10) -#define GQSPI_IXR_GEN_FIFO_NOT_FULL (1UL << 9) -#define GQSPI_IXR_TX_FIFO_EMPTY (1UL << 8) -#define GQSPI_IXR_GEN_FIFO_EMPTY (1UL << 7) -#define GQSPI_IXR_RX_FIFO_FULL (1UL << 5) -#define GQSPI_IXR_RX_FIFO_NOT_EMPTY (1UL << 4) -#define GQSPI_IXR_TX_FIFO_FULL (1UL << 3) -#define GQSPI_IXR_TX_FIFO_NOT_FULL (1UL << 2) -#define GQSPI_IXR_POLL_TIME_EXPIRE (1UL << 1) - -#define GQSPI_IXR_ALL_MASK (GQSPI_IXR_POLL_TIME_EXPIRE | GQSPI_IXR_TX_FIFO_NOT_FULL | \ - GQSPI_IXR_TX_FIFO_FULL | GQSPI_IXR_RX_FIFO_NOT_EMPTY | GQSPI_IXR_RX_FIFO_FULL | \ - GQSPI_IXR_GEN_FIFO_EMPTY | GQSPI_IXR_TX_FIFO_EMPTY | GQSPI_IXR_GEN_FIFO_NOT_FULL | \ - GQSPI_IXR_GEN_FIFO_FULL | GQSPI_IXR_RX_FIFO_EMPTY) -#define GQSPI_ISR_WR_TO_CLR_MASK 0x00000002U - -/* GQSPI_GEN_FIFO: FIFO data register */ -/* bits 0-7: Length in bytes (except when GQSPI_GEN_FIFO_EXP_MASK is set length as 255 chunks) */ -#define GQSPI_GEN_FIFO_IMM_MASK (0xFFUL) /* Immediate Data Field */ -#define GQSPI_GEN_FIFO_IMM(imm) (imm & GQSPI_GEN_FIFO_IMM_MASK) -#define GQSPI_GEN_FIFO_DATA_XFER (1UL << 8) /* Indicates IMM is size, otherwise byte is sent directly in IMM reg */ -#define GQSPI_GEN_FIFO_EXP_MASK (1UL << 9) /* Length is Exponent (length / 255) */ -#define GQSPI_GEN_FIFO_MODE_MASK (3UL << 10) -#define GQSPI_GEN_FIFO_MODE(m) ((m << 10) & GQSPI_GEN_FIFO_MODE_MASK) -#define GQSPI_GEN_FIFO_MODE_SPI GQSPI_GEN_FIFO_MODE(1) -#define GQSPI_GEN_FIFO_MODE_DSPI GQSPI_GEN_FIFO_MODE(2) -#define GQSPI_GEN_FIFO_MODE_QSPI GQSPI_GEN_FIFO_MODE(3) -#define GQSPI_GEN_FIFO_CS_MASK (3UL << 12) -#define GQSPI_GEN_FIFO_CS(c) ((c << 12) & GQSPI_GEN_FIFO_CS_MASK) -#define GQSPI_GEN_FIFO_CS_LOWER GQSPI_GEN_FIFO_CS(1) -#define GQSPI_GEN_FIFO_CS_UPPER GQSPI_GEN_FIFO_CS(2) -#define GQSPI_GEN_FIFO_CS_BOTH GQSPI_GEN_FIFO_CS(3) -#define GQSPI_GEN_FIFO_BUS_MASK (3UL << 14) -#define GQSPI_GEN_FIFO_BUS(b) ((b << 14) & GQSPI_GEN_FIFO_BUS_MASK) -#define GQSPI_GEN_FIFO_BUS_LOW GQSPI_GEN_FIFO_BUS(1) -#define GQSPI_GEN_FIFO_BUS_UP GQSPI_GEN_FIFO_BUS(2) -#define GQSPI_GEN_FIFO_BUS_BOTH GQSPI_GEN_FIFO_BUS(3) -#define GQSPI_GEN_FIFO_TX (1UL << 16) -#define GQSPI_GEN_FIFO_RX (1UL << 17) -#define GQSPI_GEN_FIFO_STRIPE (1UL << 18) /* Stripe data across the lower and upper data buses. */ -#define GQSPI_GEN_FIFO_POLL (1UL << 19) - -/* GQSPI_FIFO_CTRL */ -#define GQSPI_FIFO_CTRL_RST_GEN_FIFO (1UL << 0) -#define GQSPI_FIFO_CTRL_RST_TX_FIFO (1UL << 1) -#define GQSPI_FIFO_CTRL_RST_RX_FIFO (1UL << 2) - -/* QSPIDMA_DST_CTRL */ -#define QSPIDMA_DST_CTRL_DEF 0x403FFA00UL -#define QSPIDMA_DST_CTRL2_DEF 0x081BFFF8UL - -/* QSPIDMA_DST_STS */ -#define QSPIDMA_DST_STS_WTC 0xE000U - -/* QSPIDMA_DST_I_STS */ -#define QSPIDMA_DST_I_STS_ALL_MASK 0xFEU - -/* IOP System-level Control */ -#define IOU_SLCR_BASSE 0xFF180000 -#define IOU_TAPDLY_BYPASS (*((volatile uint32_t*)(IOU_SLCR_BASSE + 0x390))) -#define IOU_TAPDLY_BYPASS_LQSPI_RX (1UL << 2) /* LQSPI Tap Delay Enable on Rx Clock signal. 0: enable. 1: disable (bypass tap delay). */ - - -/* Configuration used for bare-metal only */ -#define GQSPI_CLK_FREQ_HZ 124987511 -#define GQSPI_CLK_DIV 2 /* (CLK / (2 << val) = BUS) - DIV 2 = 37.5 MHz */ -#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) - num of clock cycles foes in IMM */ -#define GQSPI_FIFO_WORD_SZ 4 -#define GQSPI_TIMEOUT_TRIES 100000 -#define QSPI_FLASH_READY_TRIES 1000 - - -/* QSPI Configuration */ -#ifndef GQSPI_QSPI_MODE -#define GQSPI_QSPI_MODE GQSPI_GEN_FIFO_MODE_QSPI -#endif -#ifndef GQPI_USE_DUAL_PARALLEL -#define GQPI_USE_DUAL_PARALLEL 1 /* 0=no stripe, 1=stripe */ -#endif -#ifndef GQPI_USE_4BYTE_ADDR -#define GQPI_USE_4BYTE_ADDR 1 -#endif -#ifndef GQSPI_DUMMY_READ -#define GQSPI_DUMMY_READ (8*8) /* Number of dummy clock cycles for reads */ -#endif - - - -/* Flash Parameters: - * Micron Serial NOR Flash Memory 64KB Sector Erase MT25QU512ABB - * Stacked device (two 512Mb (64MB)) - * Dual Parallel so total addressable size is double - */ -#ifndef FLASH_DEVICE_SIZE - #ifdef ZCU102 - /* 64*2 (dual parallel) = 128MB */ - #define FLASH_DEVICE_SIZE (2 * 64 * 1024 * 1024) /* MT25QU512ABB */ - #else - /* 128*2 (dual parallel) = 256MB */ - #define FLASH_DEVICE_SIZE (2 * 128 * 1024 * 1024) /* MT25QU01GBBB */ - #endif -#endif -#ifndef FLASH_PAGE_SIZE - #ifdef ZCU102 - #define FLASH_PAGE_SIZE 256 /* MT25QU512ABB */ - #else - #define FLASH_PAGE_SIZE 512 /* MT25QU01GBBB */ - #endif -#endif -#define FLASH_NUM_SECTORS (FLASH_DEVICE_SIZE/WOLFBOOT_SECTOR_SIZE) - - -/* Flash Commands */ -#define WRITE_ENABLE_CMD 0x06U -#define READ_SR_CMD 0x05U -#define WRITE_DISABLE_CMD 0x04U -#define READ_ID_CMD 0x9FU -#define MULTI_IO_READ_ID_CMD 0xAFU -#define READ_FSR_CMD 0x70U -#define ENTER_QSPI_MODE_CMD 0x35U -#define EXIT_QSPI_MODE_CMD 0xF5U -#define ENTER_4B_ADDR_MODE_CMD 0xB7U -#define EXIT_4B_ADDR_MODE_CMD 0xE9U - -#define FAST_READ_CMD 0x0BU -#define DUAL_READ_CMD 0x3BU -#define QUAD_READ_CMD 0x6BU -#define FAST_READ_4B_CMD 0x0CU -#define DUAL_READ_4B_CMD 0x3CU -#define QUAD_READ_4B_CMD 0x6CU - -#define PAGE_PROG_CMD 0x02U -#define DUAL_PROG_CMD 0xA2U -#define QUAD_PROG_CMD 0x22U -#define PAGE_PROG_4B_CMD 0x12U -#define DUAL_PROG_4B_CMD 0x12U -#define QUAD_PROG_4B_CMD 0x34U - -#define SEC_ERASE_CMD 0xD8U -#define SEC_4K_ERASE_CMD 0x20U -#define RESET_ENABLE_CMD 0x66U -#define RESET_MEMORY_CMD 0x99U - -#define WRITE_EN_MASK 0x02 /* 0=Write Enabled, 1=Disabled Write */ -#define FLASH_READY_MASK 0x80 /* 0=Busy, 1=Ready */ - - -/* Return Codes */ -#define GQSPI_CODE_SUCCESS 0 -#define GQSPI_CODE_FAILED -100 -#define GQSPI_CODE_TIMEOUT -101 - - /* QSPI Slave Device Information */ typedef struct QspiDev { uint32_t mode; /* GQSPI_GEN_FIFO_MODE_SPI, GQSPI_GEN_FIFO_MODE_DSPI or GQSPI_GEN_FIFO_MODE_QSPI */ @@ -279,84 +76,35 @@ static int qspi_wait_we(QspiDev_t* dev); static int test_ext_flash(QspiDev_t* dev); #endif -/* eFUSE support */ -#define ZYNQMP_EFUSE_BASE 0xFFCC0000 -#define ZYNQMP_EFUSE_STATUS (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x0008))) -#define ZYNQMP_EFUSE_SEC_CTRL (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x1058))) -#define ZYNQMP_EFUSE_PPK0_0 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A0))) -#define ZYNQMP_EFUSE_PPK0_1 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A4))) -#define ZYNQMP_EFUSE_PPK0_2 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A8))) -#define ZYNQMP_EFUSE_PPK0_3 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10AC))) -#define ZYNQMP_EFUSE_PPK0_4 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B0))) -#define ZYNQMP_EFUSE_PPK0_5 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B4))) -#define ZYNQMP_EFUSE_PPK0_6 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B8))) -#define ZYNQMP_EFUSE_PPK0_7 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10BC))) -#define ZYNQMP_EFUSE_PPK0_8 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C0))) -#define ZYNQMP_EFUSE_PPK0_9 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C4))) -#define ZYNQMP_EFUSE_PPK0_10 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C8))) -#define ZYNQMP_EFUSE_PPK0_11 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10CC))) - -/* eFUSE STATUS Registers */ -#define ZYNQMP_EFUSE_STATUS_CACHE_DONE (1UL << 5) -#define ZYNQMP_EFUSE_STATUS_CACHE_LOAD (1UL << 4) - -/* eFUSE SEC_CTRL Registers */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_INVLD (3UL << 30) /* Revokes PPK1 */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_WRLK (1UL << 29) /* Locks writing to PPK1 eFuses */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_INVLD (3UL << 27) /* Revokes PPK0 */ -#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_WRLK (1UL << 26) /* Locks writing to PPK0 eFuses */ -#define ZYNQMP_EFUSE_SEC_CTRL_RSA_EN (15UL << 11) /* Enables RSA Authentication during boot. All boots must be authenticated */ -#define ZYNQMP_EFUSE_SEC_CTRL_SEC_LOCK (1UL << 10) /* Disables the reboot into JTAG mode when doing a secure lockdown. */ -#define ZYNQMP_EFUSE_SEC_CTRL_JTAG_DIS (1UL << 5) /* Disables the JTAG controller. The only instructions available are BYPASS and IDCODE. */ -#define ZYNQMP_EFUSE_SEC_CTRL_ENC_ONLY (1UL << 2) /* Requires all boots to be encrypted using the eFuse key. */ -#define ZYNQMP_EFUSE_SEC_CTRL_AES_WRLK (1UL << 1) /* Locks writing to the AES key section of eFuse */ -#define ZYNQMP_EFUSE_SEC_CTRL_AES_RDLK (1UL << 0) /* Locks the AES key CRC check function */ - - #ifdef DEBUG_UART -/* UART Support for Debugging */ -#define ZYNQMP_UART0_BASE 0xFF000000 -#define ZYNQMP_UART1_BASE 0xFF010000 - -#define ZYNQMP_UART_CR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x0))) -#define ZYNQMP_UART_MR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x4))) -#define ZYNQMP_UART_SR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x2C))) -#define ZYNQMP_UART_FIFO (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x30))) -#define ZYNQMP_UART_BR_GEN (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x18))) /* 2 - 65535: baud_sample */ -#define ZYNQMP_UART_BR_DIV (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x34))) /* 4 - 255: Baud rate */ - -/* UART Control Registers */ -#define ZYNQMP_UART_CR_TX_EN 0x00000010 /* TX enabled */ -#define ZYNQMP_UART_CR_RX_EN 0x00000004 /* RX enabled */ -#define ZYNQMP_UART_CR_TXRST 0x00000002 /* TX logic reset */ -#define ZYNQMP_UART_CR_RXRST 0x00000001 /* RX logic reset */ - -/* UART Mode Registers */ -#define ZYNQMP_UART_MR_PARITY_NONE 0x00000020 /* No parity */ - -/* UART Channel Status Register (read only) */ -#define ZYNQMP_UART_SR_TXFULL 0x00000010U /* TX FIFO full */ -#define ZYNQMP_UART_SR_TXEMPTY 0x00000008U /* TX FIFO empty */ -#define ZYNQMP_UART_SR_RXFULL 0x00000004U /* RX FIFO full */ -#define ZYNQMP_UART_SR_RXEMPTY 0x00000002U /* RX FIFO empty */ - -/* UART Configuration */ -#define UART_MASTER_CLOCK 100000000 -#define DEBUG_UART_BASE ZYNQMP_UART1_BASE -#define DEBUG_UART_BAUD 115200 -#define DEBUG_UART_DIV 4 - void uart_init(void) { - /* Enable TX/RX and Reset */ - ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TX_EN | ZYNQMP_UART_CR_RX_EN | - ZYNQMP_UART_CR_TXRST | ZYNQMP_UART_CR_RXRST); + /* Disable Interrupts */ + ZYNQMP_UART_IDR = ZYNQMP_UART_ISR_MASK; + /* Disable TX/RX */ + ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TX_DIS | ZYNQMP_UART_CR_RX_DIS); + /* Clear ISR */ + ZYNQMP_UART_ISR = ZYNQMP_UART_ISR_MASK; + /* 8-bits, no parity */ ZYNQMP_UART_MR = ZYNQMP_UART_MR_PARITY_NONE; + /* FIFO Trigger Level */ + ZYNQMP_UART_RXWM = 32; /* half of 64 byte FIFO */ + ZYNQMP_UART_TXWM = 32; /* half of 64 byte FIFO */ + + /* RX Timeout - disable */ + ZYNQMP_UART_RXTOUT = 0; + /* baud (115200) = master clk / (BR_GEN * (BR_DIV + 1)) */ + ZYNQMP_UART_BR_GEN = UART_MASTER_CLOCK / (DEBUG_UART_BAUD * (DEBUG_UART_DIV+1)); ZYNQMP_UART_BR_DIV = DEBUG_UART_DIV; - ZYNQMP_UART_BR_GEN = UART_MASTER_CLOCK / DEBUG_UART_BAUD / (DEBUG_UART_DIV+1); + + /* Reset TX/RX */ + ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TXRST | ZYNQMP_UART_CR_RXRST); + + /* Enable TX/RX */ + ZYNQMP_UART_CR = (ZYNQMP_UART_CR_TX_EN | ZYNQMP_UART_CR_RX_EN); } void uart_write(const char* buf, uint32_t sz) @@ -366,10 +114,10 @@ void uart_write(const char* buf, uint32_t sz) char c = buf[pos++]; if (c == '\n') { /* handle CRLF */ while (ZYNQMP_UART_SR & ZYNQMP_UART_SR_TXFULL); - ZYNQMP_UART_SR = '\r'; + ZYNQMP_UART_FIFO = '\r'; } while (ZYNQMP_UART_SR & ZYNQMP_UART_SR_TXFULL); - ZYNQMP_UART_SR = c; + ZYNQMP_UART_FIFO = c; } /* Wait till TX Fifo is empty */ while (!(ZYNQMP_UART_SR & ZYNQMP_UART_SR_TXEMPTY)); @@ -778,8 +526,7 @@ static int qspi_transfer(QspiDev_t* pDev, return ret; } - -#endif +#endif /* QSPI Implementation */ static int qspi_flash_read_id(QspiDev_t* dev, uint8_t* id, uint32_t idSz) { @@ -1139,33 +886,6 @@ void qspi_init(uint32_t cpu_clock, uint32_t flash_freq) } -void zynq_init(uint32_t cpu_clock) -{ - qspi_init(cpu_clock, 0); -} - -void zynq_exit(void) -{ - int ret; - -#if GQPI_USE_4BYTE_ADDR == 1 - /* Exit 4-byte address mode */ - ret = qspi_exit_4byte_addr(&mDev); - if (ret != GQSPI_CODE_SUCCESS) - return; -#endif - -#ifdef USE_QNX - if (mDev.qnx) { - xzynq_qspi_close(mDev.qnx); - mDev.qnx = NULL; - } -#endif - - (void)ret; -} - - /* public HAL functions */ void hal_init(void) { @@ -1182,12 +902,24 @@ void hal_init(void) asm volatile("msr cntfrq_el0, %0" : : "r" (cpu_freq) : "memory"); #endif - zynq_init(cpu_freq); + qspi_init(cpu_freq, 0); } void hal_prepare_boot(void) { - zynq_exit(); +#if GQPI_USE_4BYTE_ADDR == 1 + /* Exit 4-byte address mode */ + int ret = qspi_exit_4byte_addr(&mDev); + if (ret != GQSPI_CODE_SUCCESS) + return; +#endif + +#ifdef USE_QNX + if (mDev.qnx) { + xzynq_qspi_close(mDev.qnx); + mDev.qnx = NULL; + } +#endif } /* Flash functions must be relocated to RAM for execution */ @@ -1403,3 +1135,5 @@ static int test_ext_flash(QspiDev_t* dev) return ret; } #endif /* TEST_EXT_FLASH */ + +#endif /* TARGET_zynq */ diff --git a/hal/zynq.h b/hal/zynq.h new file mode 100644 index 000000000..6e0a7b29f --- /dev/null +++ b/hal/zynq.h @@ -0,0 +1,346 @@ +/* zynq.h + * + * Copyright (C) 2024 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef _ZYNQMP_H_ +#define _ZYNQMP_H_ + +#define EL3 1 +#define EL1_NONSECURE 0 +#define HYP_GUEST 0 +#if 1 + #define FREERTOS_BSP +#endif + +/* Errata: 855873: An eviction might overtake a cache clean operation */ +#define CONFIG_ARM_ERRATA_855873 1 + +#define XPAR_PSU_DDR_0_S_AXI_BASEADDR 0x00000000 +#define XPAR_PSU_DDR_0_S_AXI_HIGHADDR 0x7FFFFFFF +#define XPAR_PSU_DDR_1_S_AXI_BASEADDR 0x800000000 +#define XPAR_PSU_DDR_1_S_AXI_HIGHADDR 0x87FFFFFFF + +/* Clocking */ +#define CORTEXA53_0_CPU_CLK_FREQ_HZ 1199880127 +#define CORTEXA53_0_TIMESTAMP_CLK_FREQ 99990005 +#define UART_MASTER_CLOCK 99990005 +#define GQSPI_CLK_FREQ_HZ 124987511 + +/* IOP System-level Control */ +#define IOU_SLCR_BASSE 0xFF180000 +#define IOU_TAPDLY_BYPASS (*((volatile uint32_t*)(IOU_SLCR_BASSE + 0x390))) +#define IOU_TAPDLY_BYPASS_LQSPI_RX (1UL << 2) /* LQSPI Tap Delay Enable on Rx Clock signal. 0: enable. 1: disable (bypass tap delay). */ + +/* QSPI bare-metal driver */ +/* Generic Quad-SPI */ +#define QSPI_BASE 0xFF0F0000UL +#define LQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x14))) /* SPI enable: 0: disable the SPI, 1: enable the SPI */ +#define GQSPI_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x100))) /* configuration register. */ +#define GQSPI_ISR (*((volatile uint32_t*)(QSPI_BASE + 0x104))) /* interrupt status register. */ +#define GQSPI_IER (*((volatile uint32_t*)(QSPI_BASE + 0x108))) /* interrupt enable register. */ +#define GQSPI_IDR (*((volatile uint32_t*)(QSPI_BASE + 0x10C))) /* interrupt disable register. */ +#define GQSPI_IMR (*((volatile uint32_t*)(QSPI_BASE + 0x110))) /* interrupt unmask register. */ +#define GQSPI_EN (*((volatile uint32_t*)(QSPI_BASE + 0x114))) /* enable register. */ +#define GQSPI_TXD (*((volatile uint32_t*)(QSPI_BASE + 0x11C))) /* TX data register. Keyhole addresses for the transmit data FIFO. */ +#define GQSPI_RXD (*((volatile uint32_t*)(QSPI_BASE + 0x120))) /* RX data register. */ +#define GQSPI_TX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x128))) /* TXFIFO Threshold Level register: (bits 5:0) Defines the level at which the TX_FIFO_NOT_FULL interrupt is generated */ +#define GQSPI_RX_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x12C))) /* RXFIFO threshold level register: (bits 5:0) Defines the level at which the RX_FIFO_NOT_EMPTY interrupt is generated */ +#define GQSPI_GPIO (*((volatile uint32_t*)(QSPI_BASE + 0x130))) +#define GQSPI_LPBK_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x138))) /* adjusting the internal loopback clock delay for read data capturing */ +#define GQSPI_GEN_FIFO (*((volatile uint32_t*)(QSPI_BASE + 0x140))) /* generic FIFO data register. Keyhole addresses for the generic FIFO. */ +#define GQSPI_SEL (*((volatile uint32_t*)(QSPI_BASE + 0x144))) /* select register. */ +#define GQSPI_FIFO_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x14C))) /* FIFO control register. */ +#define GQSPI_GF_THRESH (*((volatile uint32_t*)(QSPI_BASE + 0x150))) /* generic FIFO threshold level register: (bits 4:0) Defines the level at which the GEN_FIFO_NOT_FULL interrupt is generated */ +#define GQSPI_POLL_CFG (*((volatile uint32_t*)(QSPI_BASE + 0x154))) /* poll configuration register */ +#define GQSPI_P_TIMEOUT (*((volatile uint32_t*)(QSPI_BASE + 0x158))) /* poll timeout register. */ +#define GQSPI_XFER_STS (*((volatile uint32_t*)(QSPI_BASE + 0x15C))) /* transfer status register. */ +#define QSPI_DATA_DLY_ADJ (*((volatile uint32_t*)(QSPI_BASE + 0x1F8))) /* adjusting the internal receive data delay for read data capturing */ +#define GQSPI_MOD_ID (*((volatile uint32_t*)(QSPI_BASE + 0x1FC))) +#define QSPIDMA_DST_STS (*((volatile uint32_t*)(QSPI_BASE + 0x808))) +#define QSPIDMA_DST_CTRL (*((volatile uint32_t*)(QSPI_BASE + 0x80C))) +#define QSPIDMA_DST_I_STS (*((volatile uint32_t*)(QSPI_BASE + 0x814))) +#define QSPIDMA_DST_CTRL2 (*((volatile uint32_t*)(QSPI_BASE + 0x824))) + +/* GQSPI Registers */ +/* GQSPI_CFG: Configuration registers */ +#define GQSPI_CFG_CLK_POL (1UL << 1) /* Clock polarity outside QSPI word: 0: QSPI clock is quiescent low, 1: QSPI clock is quiescent high */ +#define GQSPI_CFG_CLK_PH (1UL << 2) /* Clock phase: 1: the QSPI clock is inactive outside the word, 0: the QSPI clock is active outside the word */ +/* 000: divide by 2, 001: divide by 4, 010: divide by 8, + 011: divide by 16, 100: divide by 32, 101: divide by 64, + 110: divide by 128, 111: divide by 256 */ +#define GQSPI_CFG_BAUD_RATE_DIV_MASK (7UL << 3) +#define GQSPI_CFG_BAUD_RATE_DIV(d) ((d << 3) & GQSPI_CFG_BAUD_RATE_DIV_MASK) +#define GQSPI_CFG_WP_HOLD (1UL << 19) /* If set, Holdb and WPn pins are actively driven by the qspi controller in 1-bit and 2-bit modes. */ +#define GQSPI_CFG_EN_POLL_TIMEOUT (1UL << 20) /* Poll Timeout Enable: 0: disable, 1: enable */ +#define GQSPI_CFG_ENDIAN (1UL << 26) /* Endian format transmit data register: 0: little endian, 1: big endian */ +#define GQSPI_CFG_START_GEN_FIFO (1UL << 28) /* Trigger Generic FIFO Command Execution: 0:disable executing requests, 1: enable executing requests */ +#define GQSPI_CFG_GEN_FIFO_START_MODE (1UL << 29) /* Start mode of Generic FIFO: 0: Auto Start Mode, 1: Manual Start Mode */ +#define GQSPI_CFG_MODE_EN_MASK (3UL << 30) /* Flash memory interface mode control: 00: IO mode, 10: DMA mode */ +#define GQSPI_CFG_MODE_EN(m) ((m << 30) & GQSPI_CFG_MODE_EN_MASK) +#define GQSPI_CFG_MODE_EN_IO GQSPI_CFG_MODE_EN(0) +#define GQSPI_CFG_MODE_EN_DMA GQSPI_CFG_MODE_EN(2) + +/* GQSPI_ISR / GQSPI_IER / GQSPI_IDR / GQSPI_IMR: Interrupt registers */ +#define GQSPI_IXR_RX_FIFO_EMPTY (1UL << 11) +#define GQSPI_IXR_GEN_FIFO_FULL (1UL << 10) +#define GQSPI_IXR_GEN_FIFO_NOT_FULL (1UL << 9) +#define GQSPI_IXR_TX_FIFO_EMPTY (1UL << 8) +#define GQSPI_IXR_GEN_FIFO_EMPTY (1UL << 7) +#define GQSPI_IXR_RX_FIFO_FULL (1UL << 5) +#define GQSPI_IXR_RX_FIFO_NOT_EMPTY (1UL << 4) +#define GQSPI_IXR_TX_FIFO_FULL (1UL << 3) +#define GQSPI_IXR_TX_FIFO_NOT_FULL (1UL << 2) +#define GQSPI_IXR_POLL_TIME_EXPIRE (1UL << 1) + +#define GQSPI_IXR_ALL_MASK (GQSPI_IXR_POLL_TIME_EXPIRE | GQSPI_IXR_TX_FIFO_NOT_FULL | \ + GQSPI_IXR_TX_FIFO_FULL | GQSPI_IXR_RX_FIFO_NOT_EMPTY | GQSPI_IXR_RX_FIFO_FULL | \ + GQSPI_IXR_GEN_FIFO_EMPTY | GQSPI_IXR_TX_FIFO_EMPTY | GQSPI_IXR_GEN_FIFO_NOT_FULL | \ + GQSPI_IXR_GEN_FIFO_FULL | GQSPI_IXR_RX_FIFO_EMPTY) +#define GQSPI_ISR_WR_TO_CLR_MASK 0x00000002U + +/* GQSPI_GEN_FIFO: FIFO data register */ +/* bits 0-7: Length in bytes (except when GQSPI_GEN_FIFO_EXP_MASK is set length as 255 chunks) */ +#define GQSPI_GEN_FIFO_IMM_MASK (0xFFUL) /* Immediate Data Field */ +#define GQSPI_GEN_FIFO_IMM(imm) (imm & GQSPI_GEN_FIFO_IMM_MASK) +#define GQSPI_GEN_FIFO_DATA_XFER (1UL << 8) /* Indicates IMM is size, otherwise byte is sent directly in IMM reg */ +#define GQSPI_GEN_FIFO_EXP_MASK (1UL << 9) /* Length is Exponent (length / 255) */ +#define GQSPI_GEN_FIFO_MODE_MASK (3UL << 10) +#define GQSPI_GEN_FIFO_MODE(m) ((m << 10) & GQSPI_GEN_FIFO_MODE_MASK) +#define GQSPI_GEN_FIFO_MODE_SPI GQSPI_GEN_FIFO_MODE(1) +#define GQSPI_GEN_FIFO_MODE_DSPI GQSPI_GEN_FIFO_MODE(2) +#define GQSPI_GEN_FIFO_MODE_QSPI GQSPI_GEN_FIFO_MODE(3) +#define GQSPI_GEN_FIFO_CS_MASK (3UL << 12) +#define GQSPI_GEN_FIFO_CS(c) ((c << 12) & GQSPI_GEN_FIFO_CS_MASK) +#define GQSPI_GEN_FIFO_CS_LOWER GQSPI_GEN_FIFO_CS(1) +#define GQSPI_GEN_FIFO_CS_UPPER GQSPI_GEN_FIFO_CS(2) +#define GQSPI_GEN_FIFO_CS_BOTH GQSPI_GEN_FIFO_CS(3) +#define GQSPI_GEN_FIFO_BUS_MASK (3UL << 14) +#define GQSPI_GEN_FIFO_BUS(b) ((b << 14) & GQSPI_GEN_FIFO_BUS_MASK) +#define GQSPI_GEN_FIFO_BUS_LOW GQSPI_GEN_FIFO_BUS(1) +#define GQSPI_GEN_FIFO_BUS_UP GQSPI_GEN_FIFO_BUS(2) +#define GQSPI_GEN_FIFO_BUS_BOTH GQSPI_GEN_FIFO_BUS(3) +#define GQSPI_GEN_FIFO_TX (1UL << 16) +#define GQSPI_GEN_FIFO_RX (1UL << 17) +#define GQSPI_GEN_FIFO_STRIPE (1UL << 18) /* Stripe data across the lower and upper data buses. */ +#define GQSPI_GEN_FIFO_POLL (1UL << 19) + +/* GQSPI_FIFO_CTRL */ +#define GQSPI_FIFO_CTRL_RST_GEN_FIFO (1UL << 0) +#define GQSPI_FIFO_CTRL_RST_TX_FIFO (1UL << 1) +#define GQSPI_FIFO_CTRL_RST_RX_FIFO (1UL << 2) + +/* QSPIDMA_DST_CTRL */ +#define QSPIDMA_DST_CTRL_DEF 0x403FFA00UL +#define QSPIDMA_DST_CTRL2_DEF 0x081BFFF8UL + +/* QSPIDMA_DST_STS */ +#define QSPIDMA_DST_STS_WTC 0xE000U + +/* QSPIDMA_DST_I_STS */ +#define QSPIDMA_DST_I_STS_ALL_MASK 0xFEU + +/* QSPI Configuration (bare-metal only) */ +#define GQSPI_CLK_DIV 2 /* (CLK / (2 << val) = BUS) - DIV 2 = 37.5 MHz */ +#define GQSPI_CS_ASSERT_CLOCKS 5 /* CS Setup Time (tCSS) - num of clock cycles foes in IMM */ +#define GQSPI_FIFO_WORD_SZ 4 +#define GQSPI_TIMEOUT_TRIES 100000 +#define QSPI_FLASH_READY_TRIES 1000 + +/* QSPI Configuration */ +#ifndef GQSPI_QSPI_MODE +#define GQSPI_QSPI_MODE GQSPI_GEN_FIFO_MODE_QSPI +#endif +#ifndef GQPI_USE_DUAL_PARALLEL +#define GQPI_USE_DUAL_PARALLEL 1 /* 0=no stripe, 1=stripe */ +#endif +#ifndef GQPI_USE_4BYTE_ADDR +#define GQPI_USE_4BYTE_ADDR 1 +#endif +#ifndef GQSPI_DUMMY_READ +#define GQSPI_DUMMY_READ (8*8) /* Number of dummy clock cycles for reads */ +#endif + + + +/* Flash Parameters: + * Micron Serial NOR Flash Memory 64KB Sector Erase MT25QU512ABB + * Stacked device (two 512Mb (64MB)) + * Dual Parallel so total addressable size is double + */ +#ifndef FLASH_DEVICE_SIZE + #ifdef ZCU102 + /* 64*2 (dual parallel) = 128MB */ + #define FLASH_DEVICE_SIZE (2 * 64 * 1024 * 1024) /* MT25QU512ABB */ + #else + /* 128*2 (dual parallel) = 256MB */ + #define FLASH_DEVICE_SIZE (2 * 128 * 1024 * 1024) /* MT25QU01GBBB */ + #endif +#endif +#ifndef FLASH_PAGE_SIZE + #ifdef ZCU102 + #define FLASH_PAGE_SIZE 256 /* MT25QU512ABB */ + #else + #define FLASH_PAGE_SIZE 512 /* MT25QU01GBBB */ + #endif +#endif +#define FLASH_NUM_SECTORS (FLASH_DEVICE_SIZE/WOLFBOOT_SECTOR_SIZE) + + +/* Flash Commands */ +#define WRITE_ENABLE_CMD 0x06U +#define READ_SR_CMD 0x05U +#define WRITE_DISABLE_CMD 0x04U +#define READ_ID_CMD 0x9FU +#define MULTI_IO_READ_ID_CMD 0xAFU +#define READ_FSR_CMD 0x70U +#define ENTER_QSPI_MODE_CMD 0x35U +#define EXIT_QSPI_MODE_CMD 0xF5U +#define ENTER_4B_ADDR_MODE_CMD 0xB7U +#define EXIT_4B_ADDR_MODE_CMD 0xE9U + +#define FAST_READ_CMD 0x0BU +#define DUAL_READ_CMD 0x3BU +#define QUAD_READ_CMD 0x6BU +#define FAST_READ_4B_CMD 0x0CU +#define DUAL_READ_4B_CMD 0x3CU +#define QUAD_READ_4B_CMD 0x6CU + +#define PAGE_PROG_CMD 0x02U +#define DUAL_PROG_CMD 0xA2U +#define QUAD_PROG_CMD 0x22U +#define PAGE_PROG_4B_CMD 0x12U +#define DUAL_PROG_4B_CMD 0x12U +#define QUAD_PROG_4B_CMD 0x34U + +#define SEC_ERASE_CMD 0xD8U +#define SEC_4K_ERASE_CMD 0x20U +#define RESET_ENABLE_CMD 0x66U +#define RESET_MEMORY_CMD 0x99U + +#define WRITE_EN_MASK 0x02 /* 0=Write Enabled, 1=Disabled Write */ +#define FLASH_READY_MASK 0x80 /* 0=Busy, 1=Ready */ + + +/* Return Codes */ +#define GQSPI_CODE_SUCCESS 0 +#define GQSPI_CODE_FAILED -100 +#define GQSPI_CODE_TIMEOUT -101 + + + +/* eFUSE support */ +#define ZYNQMP_EFUSE_BASE 0xFFCC0000 +#define ZYNQMP_EFUSE_STATUS (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x0008))) +#define ZYNQMP_EFUSE_SEC_CTRL (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x1058))) +#define ZYNQMP_EFUSE_PPK0_0 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A0))) +#define ZYNQMP_EFUSE_PPK0_1 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A4))) +#define ZYNQMP_EFUSE_PPK0_2 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10A8))) +#define ZYNQMP_EFUSE_PPK0_3 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10AC))) +#define ZYNQMP_EFUSE_PPK0_4 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B0))) +#define ZYNQMP_EFUSE_PPK0_5 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B4))) +#define ZYNQMP_EFUSE_PPK0_6 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10B8))) +#define ZYNQMP_EFUSE_PPK0_7 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10BC))) +#define ZYNQMP_EFUSE_PPK0_8 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C0))) +#define ZYNQMP_EFUSE_PPK0_9 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C4))) +#define ZYNQMP_EFUSE_PPK0_10 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10C8))) +#define ZYNQMP_EFUSE_PPK0_11 (*((volatile uint32_t*)(ZYNQMP_EFUSE_BASE + 0x10CC))) + +/* eFUSE STATUS Registers */ +#define ZYNQMP_EFUSE_STATUS_CACHE_DONE (1UL << 5) +#define ZYNQMP_EFUSE_STATUS_CACHE_LOAD (1UL << 4) + +/* eFUSE SEC_CTRL Registers */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_INVLD (3UL << 30) /* Revokes PPK1 */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK1_WRLK (1UL << 29) /* Locks writing to PPK1 eFuses */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_INVLD (3UL << 27) /* Revokes PPK0 */ +#define ZYNQMP_EFUSE_SEC_CTRL_PPK0_WRLK (1UL << 26) /* Locks writing to PPK0 eFuses */ +#define ZYNQMP_EFUSE_SEC_CTRL_RSA_EN (15UL << 11) /* Enables RSA Authentication during boot. All boots must be authenticated */ +#define ZYNQMP_EFUSE_SEC_CTRL_SEC_LOCK (1UL << 10) /* Disables the reboot into JTAG mode when doing a secure lockdown. */ +#define ZYNQMP_EFUSE_SEC_CTRL_JTAG_DIS (1UL << 5) /* Disables the JTAG controller. The only instructions available are BYPASS and IDCODE. */ +#define ZYNQMP_EFUSE_SEC_CTRL_ENC_ONLY (1UL << 2) /* Requires all boots to be encrypted using the eFuse key. */ +#define ZYNQMP_EFUSE_SEC_CTRL_AES_WRLK (1UL << 1) /* Locks writing to the AES key section of eFuse */ +#define ZYNQMP_EFUSE_SEC_CTRL_AES_RDLK (1UL << 0) /* Locks the AES key CRC check function */ + + +/* UART Support */ +#define ZYNQMP_UART0_BASE 0xFF000000 +#define ZYNQMP_UART1_BASE 0xFF010000 + +#define ZYNQMP_UART_CR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x00))) +#define ZYNQMP_UART_MR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x04))) +#define ZYNQMP_UART_IDR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x0C))) /* Interrupt Disable Register */ +#define ZYNQMP_UART_ISR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x14))) /* Interrupt Status Register */ +#define ZYNQMP_UART_RXTOUT (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x1C))) +#define ZYNQMP_UART_RXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x20))) +#define ZYNQMP_UART_TXWM (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x44))) +#define ZYNQMP_UART_SR (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x2C))) +#define ZYNQMP_UART_FIFO (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x30))) +#define ZYNQMP_UART_BR_GEN (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x18))) /* 2 - 65535: baud_sample */ +#define ZYNQMP_UART_BR_DIV (*((volatile uint32_t*)(DEBUG_UART_BASE + 0x34))) /* 4 - 255: Baud rate */ + + +/* UART Control Registers */ +#define ZYNQMP_UART_CR_TX_DIS 0x00000020 /* TX disable */ +#define ZYNQMP_UART_CR_TX_EN 0x00000010 /* TX enabled */ +#define ZYNQMP_UART_CR_RX_DIS 0x00000008 /* RX disable */ +#define ZYNQMP_UART_CR_RX_EN 0x00000004 /* RX enabled */ +#define ZYNQMP_UART_CR_TXRST 0x00000002 /* TX logic reset */ +#define ZYNQMP_UART_CR_RXRST 0x00000001 /* RX logic reset */ + +/* UART ISR Mask 0-13 bits */ +#define ZYNQMP_UART_ISR_MASK 0x3FFF + +/* UART Mode Registers */ +#define ZYNQMP_UART_MR_PARITY_NONE 0x00000020 /* No parity */ + +/* UART Channel Status Register (read only) */ +#define ZYNQMP_UART_SR_TXFULL 0x00000010U /* TX FIFO full */ +#define ZYNQMP_UART_SR_TXEMPTY 0x00000008U /* TX FIFO empty */ +#define ZYNQMP_UART_SR_RXFULL 0x00000004U /* RX FIFO full */ +#define ZYNQMP_UART_SR_RXEMPTY 0x00000002U /* RX FIFO empty */ + +/* UART Configuration */ +#if defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 0 + #define DEBUG_UART_BASE ZYNQMP_UART0_BASE +#elif defined(DEBUG_UART_NUM) && DEBUG_UART_NUM == 1 + #define DEBUG_UART_BASE ZYNQMP_UART1_BASE +#endif +#ifndef DEBUG_UART_BASE + /* default to UART0 */ + #define DEBUG_UART_BASE ZYNQMP_UART0_BASE +#endif + +#ifndef DEBUG_UART_BAUD + #define DEBUG_UART_BAUD 115200 + #define DEBUG_UART_DIV 6 +#endif + + +#define GICD_BASE 0xF9010000 +#define GICD_CTLR 0x0000 +#define GICD_TYPER 0x0004 +#define GICD_SGIR 0x0F00 +#define GICD_IGROUPRn 0x0080 + +#define GICC_BASE 0xF9020000 +#define GICC_PMR 0x0004 + + +#endif /* _ZYNQMP_H_ */ diff --git a/hal/zynq.ld b/hal/zynq.ld index ac37289a4..3042a9acb 100644 --- a/hal/zynq.ld +++ b/hal/zynq.ld @@ -11,8 +11,10 @@ _EL2_STACK_SIZE = DEFINED(_EL2_STACK_SIZE) ? _EL2_STACK_SIZE : 1024; /* Define Memories in the system */ MEMORY { - psu_ddr_0_MEM_0 : ORIGIN = 0x40000000, LENGTH = 0x100000 - + psu_ddr_0_MEM_0 : ORIGIN = 0x0, LENGTH = 0x7FF00000 + psu_ddr_1_MEM_0 : ORIGIN = 0x800000000, LENGTH = 0x80000000 + psu_ocm_ram_0_MEM_0 : ORIGIN = 0xFFFC0000, LENGTH = 0x40000 + psu_qspi_linear_0_MEM_0 : ORIGIN = 0xC0000000, LENGTH = 0x20000000 } /* Specify the default entry point to the program */ @@ -119,6 +121,10 @@ SECTIONS *(.got2) } > psu_ddr_0_MEM_0 +.note.gnu.build-id : { + KEEP (*(.note.gnu.build-id)) +} > psu_ddr_0_MEM_0 + .ctors : { . = ALIGN(64); __CTOR_LIST__ = .; @@ -227,6 +233,16 @@ SECTIONS __sdata_end = .; } > psu_ddr_0_MEM_0 +.sbss (NOLOAD) : { + . = ALIGN(64); + __sbss_start = .; + *(.sbss) + *(.sbss.*) + *(.gnu.linkonce.sb.*) + . = ALIGN(64); + __sbss_end = .; +} > psu_ddr_0_MEM_0 + .tdata : { . = ALIGN(64); __tdata_start = .; @@ -245,17 +261,6 @@ SECTIONS __tbss_end = .; } > psu_ddr_0_MEM_0 - -.sbss (NOLOAD) : { - . = ALIGN(64); - __sbss_start = .; - *(.sbss) - *(.sbss.*) - *(.gnu.linkonce.sb.*) - . = ALIGN(64); - __sbss_end = .; -} > psu_ddr_0_MEM_0 - .bss (NOLOAD) : { . = ALIGN(64); __bss_start__ = .; diff --git a/src/boot_aarch64.c b/src/boot_aarch64.c index 837e1838a..97abff8c7 100644 --- a/src/boot_aarch64.c +++ b/src/boot_aarch64.c @@ -1,6 +1,6 @@ /* boot_aarch64.c * - * Copyright (C) 2021 wolfSSL Inc. + * Copyright (C) 2024 wolfSSL Inc. * * This file is part of wolfBoot. * @@ -25,17 +25,22 @@ #include "loader.h" #include "wolfboot/wolfboot.h" +/* Linker exported variables */ extern unsigned int __bss_start__; extern unsigned int __bss_end__; -static volatile unsigned int cpu_id; -extern unsigned int *END_STACK; +#ifndef NO_XIP +extern unsigned int _stored_data; +extern unsigned int _start_data; +extern unsigned int _end_data; +#endif extern void main(void); extern void gicv2_init_secure(void); -void boot_entry_C(void) +void boot_entry_C(void) { - register unsigned int *dst; + register unsigned int *dst, *src; + /* Initialize the BSS section to 0 */ dst = &__bss_start__; while (dst < (unsigned int *)&__bss_end__) { @@ -43,6 +48,21 @@ void boot_entry_C(void) dst++; } +#ifndef NO_XIP + /* Copy data section from flash to RAM if necessary */ + src = (unsigned int*)&_stored_data; + dst = (unsigned int*)&_start_data; + if(src!=dst) { + while (dst < (unsigned int *)&_end_data) { + *dst = *src; + dst++; + src++; + } + } +#else + (void)src; +#endif + /* Run wolfboot! */ main(); } @@ -102,3 +122,20 @@ void RAMFUNCTION arch_reboot(void) } #endif + +void SynchronousInterrupt(void) +{ + +} +void IRQInterrupt(void) +{ + +} +void FIQInterrupt(void) +{ + +} +void SErrorInterrupt(void) +{ + +} \ No newline at end of file diff --git a/src/boot_aarch64_start.S b/src/boot_aarch64_start.S index 0d1494ba3..ac4b7a15d 100644 --- a/src/boot_aarch64_start.S +++ b/src/boot_aarch64_start.S @@ -1,6 +1,6 @@ /** * Aarch64 bootup - * Copyright (C) 2021 wolfSSL Inc. + * Copyright (C) 2024 wolfSSL Inc. * * This file is part of wolfBoot. * @@ -19,83 +19,380 @@ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA */ +/* Code is adapted from the default AMD/Xilinx boot.S */ -#define GICD_BASE 0xF9010000 -#define GICD_CTLR 0x0000 -#define GICD_TYPER 0x0004 -#define GICD_SGIR 0x0F00 -#define GICD_IGROUPRn 0x0080 - -#define GICC_BASE 0xF9020000 -#define GICC_PMR 0x0004 +#ifdef TARGET_zynq +#include "hal/zynq.h" +#endif #ifndef USE_BUILTIN_STARTUP -.section ".boot" + +.globl MMUTableL0 +.globl MMUTableL1 +.globl MMUTableL2 +.global _prestart +.global _boot + +.global __el3_stack +.global __el2_stack +.global __el1_stack +.global __el0_stack .global _vector_table -_vector_table: - mov x21, x0 // read ATAG/FDT address -4: ldr x1, =_vector_table // get start of .text in x1 - // Read current EL - mrs x0, CurrentEL - and x0, x0, #0x0C +.set EL3_stack, __el3_stack +.set EL2_stack, __el2_stack +.set EL1_stack, __el1_stack +.set EL0_stack, __el0_stack + +.set TT_S1_FAULT, 0x0 +.set TT_S1_TABLE, 0x3 + +.set L0Table, MMUTableL0 +.set L1Table, MMUTableL1 +.set L2Table, MMUTableL2 +.set vector_base, _vector_table +.set rvbar_base, 0xFD5C0040 + +# Cortex-A53 timestamp clock frequency +.set counterfreq, 99990005 + +.set MODE_EL1, 0x5 +.set DAIF_BIT, 0x1C0 + +.section .boot,"ax" +_boot: + mov x0, #0 + mov x1, #0 + mov x2, #0 + mov x3, #0 + mov x4, #0 + mov x5, #0 + mov x6, #0 + mov x7, #0 + mov x8, #0 + mov x9, #0 + mov x10, #0 + mov x11, #0 + mov x12, #0 + mov x13, #0 + mov x14, #0 + mov x15, #0 + mov x16, #0 + mov x17, #0 + mov x18, #0 + mov x19, #0 + mov x20, #0 + mov x21, #0 + mov x22, #0 + mov x23, #0 + mov x24, #0 + mov x25, #0 + mov x26, #0 + mov x27, #0 + mov x28, #0 + mov x29, #0 + mov x30, #0 + + /* Init EL3 or EL1 */ + mrs x0, currentEL + cmp x0, #0xC + beq InitEL3 + + cmp x0, #0x4 + beq InitEL1 + + /* go to error if current exception level is neither EL3 nor EL1 */ + b error + +InitEL3: +#if defined(EL3) && EL3 == 1 + /* Set vector table base address */ + ldr x1, =vector_base + msr VBAR_EL3,x1 + + /* Set reset vector address */ + /* Get the cpu ID */ + mrs x0, MPIDR_EL1 + and x0, x0, #0xFF + mov w0, w0 + ldr w2, =rvbar_base + /* calculate the rvbar base address for particular CPU core */ + mov w3, #0x8 + mul w0, w0, w3 + add w2, w2, w0 + /* store vector base address to RVBAR */ + str x1, [x2] + + /* Define stack pointer for current exception level */ + ldr x2,=EL3_stack + mov sp,x2 + + /* Enable Trapping of SIMD/FPU register for standalone BSP */ + mov x0, #0 +#ifndef FREERTOS_BSP + orr x0, x0, #(0x1 << 10) +#endif + msr CPTR_EL3, x0 + isb + + /* Clear FPUStatus variable to make sure that it contains current + * status of FPU i.e. disabled. In case of a warm restart execution + * when bss sections are not cleared, it may contain previously updated + * value which does not hold true now. + */ +#ifndef FREERTOS_BSP + ldr x0,=FPUStatus + str xzr, [x0] +#endif + /* Configure SCR_EL3 */ + mov w1, #0 /* Initial value of register is unknown */ + orr w1, w1, #(1 << 11) /* Set ST bit (Secure EL1 can access CNTPS_TVAL_EL1, CNTPS_CTL_EL1 & CNTPS_CVAL_EL1) */ + orr w1, w1, #(1 << 10) /* Set RW bit (EL1 is AArch64, as this is the Secure world) */ + orr w1, w1, #(1 << 3) /* Set EA bit (SError routed to EL3) */ + orr w1, w1, #(1 << 2) /* Set FIQ bit (FIQs routed to EL3) */ + orr w1, w1, #(1 << 1) /* Set IRQ bit (IRQs routed to EL3) */ + msr SCR_EL3, x1 + + /*configure cpu auxiliary control register EL1 */ + ldr x0,=0x80CA000 /* L1 Data prefetch control - 5, Enable device split throttle, 2 independent data prefetch streams */ +#if CONFIG_ARM_ERRATA_855873 + /* Set ENDCCASCI bit in CPUACTLR_EL1 register, to execute data + * cache clean operations as data cache clean and invalidate + */ + orr x0, x0, #(1 << 44) /* Set ENDCCASCI bit */ +#endif + msr S3_1_C15_C2_0, x0 /* CPUACTLR_EL1 */ + + /* program the counter frequency */ + ldr x0,=counterfreq + msr CNTFRQ_EL0, x0 + + /*Enable hardware coherency between cores*/ + mrs x0, S3_1_c15_c2_1 /* Read EL1 CPU Extended Control Register */ + orr x0, x0, #(1 << 6) /* Set the SMPEN bit */ + msr S3_1_c15_c2_1, x0 /* Write EL1 CPU Extended Control Register */ + isb + + tlbi ALLE3 + ic IALLU /* Invalidate I cache to PoU */ + bl invalidate_dcaches + dsb sy + isb + + ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL3 */ + msr TTBR0_EL3, x1 /* Set TTBR0_EL3 */ + + /********************************************** + * Set up memory attributes + * This equates to: + * 0 = b01000100 = Normal, Inner/Outer Non-Cacheable + * 1 = b11111111 = Normal, Inner/Outer WB/WA/RA + * 2 = b00000000 = Device-nGnRnE + * 3 = b00000100 = Device-nGnRE + * 4 = b10111011 = Normal, Inner/Outer WT/WA/RA + **********************************************/ + ldr x1, =0x000000BB0400FF44 + msr MAIR_EL3, x1 + + /********************************************** + * Set up TCR_EL3 + * Physical Address Size PS = 010 -> 40bits 1TB + * Granual Size TG0 = 00 -> 4KB + * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) + ***************************************************/ + ldr x1,=0x80823518 - // EL == 3? - cmp x0, #12 - bne 2f -3: mrs x2, scr_el3 - orr x2, x2, 0x0F // scr_el3 |= NS|IRQ|FIQ|EA - msr scr_el3, x2 + msr TCR_EL3, x1 + isb - msr cptr_el3, xzr // enable FP/SIMD + /* Enable SError Exception for asynchronous abort */ + mrs x1,DAIF + bic x1,x1,#(0x1<<8) + msr DAIF,x1 - // EL == 1? -2: cmp x0, #4 - beq 1f + /* Configure SCTLR_EL3 */ + mov x1, #0 /* Most of the SCTLR_EL3 bits are unknown at reset */ + orr x1, x1, #(1 << 12) /* Enable I cache */ + orr x1, x1, #(1 << 3) /* Enable SP alignment check */ + orr x1, x1, #(1 << 2) /* Enable caches */ + orr x1, x1, #(1 << 0) /* Enable MMU */ + msr SCTLR_EL3, x1 + dsb sy + isb - // EL == 2? - mov x2, #3 << 20 - msr cptr_el2, x2 /* Enable FP/SIMD */ - b 0f + b boot_entry_C /* jump to start */ +#else + /* present exception level and selected exception level mismatch */ + b error +#endif -1: mov x0, #3 << 20 - msr cpacr_el1, x0 // Enable FP/SIMD for EL1 - msr sp_el1, x1 +InitEL1: +#if defined(EL1_NONSECURE) && EL1_NONSECURE == 1 + /* Set vector table base address */ + ldr x1, =vector_base + msr VBAR_EL1,x1 - /* Suspend slave CPUs */ -0: mrs x3, mpidr_el1 // read MPIDR_EL1 - and x3, x3, #3 // CPUID = MPIDR_EL1 & 0x03 - cbz x3, 8f // if 0, branch forward -7: wfi // infinite sleep - b 7b + /* Trap floating point access only in case of standalone BSP */ +#ifdef FREERTOS_BSP + mrs x0, CPACR_EL1 + orr x0, x0, #(0x3 << 20) + msr CPACR_EL1, x0 +#else + mrs x0, CPACR_EL1 + bic x0, x0, #(0x3 << 20) + msr CPACR_EL1, x0 +#endif + isb -8: mov sp, x1 // set stack pointer - bl boot_entry_C // boot_entry_C never returns - b 7b // go to sleep anyhow in case. -#endif /* USE_BUILTIN_STARTUP */ + /* Clear FPUStatus variable to make sure that it contains current + * status of FPU i.e. disabled. In case of a warm restart execution + * when bss sections are not cleared, it may contain previously updated + * value which does not hold true now. + */ +#ifndef FREERTOS_BSP + ldr x0,=FPUStatus + str xzr, [x0] +#endif + /* Define stack pointer for current exception level */ + ldr x2,=EL1_stack + mov sp,x2 + /* Disable MMU first */ + mov x1,#0x0 + msr SCTLR_EL1, x1 + isb + + TLBI VMALLE1 + + ic IALLU /* Invalidate I cache to PoU */ + bl invalidate_dcaches + dsb sy + isb + + ldr x1, =L0Table /* Get address of level 0 for TTBR0_EL1 */ + msr TTBR0_EL1, x1 /* Set TTBR0_EL1 */ + + /********************************************** + * Set up memory attributes + * This equates to: + * 0 = b01000100 = Normal, Inner/Outer Non-Cacheable + * 1 = b11111111 = Normal, Inner/Outer WB/WA/RA + * 2 = b00000000 = Device-nGnRnE + * 3 = b00000100 = Device-nGnRE + * 4 = b10111011 = Normal, Inner/Outer WT/WA/RA + **********************************************/ + ldr x1, =0x000000BB0400FF44 + msr MAIR_EL1, x1 + + /********************************************** + * Set up TCR_EL1 + * Physical Address Size PS = 010 -> 44bits 16TB + * Granual Size TG0 = 00 -> 4KB + * size offset of the memory region T0SZ = 24 -> (region size 2^(64-24) = 2^40) + ***************************************************/ + ldr x1,=0x285800518 + + msr TCR_EL1, x1 + isb + + /* Enable SError Exception for asynchronous abort */ + mrs x1,DAIF + bic x1,x1,#(0x1<<8) + msr DAIF,x1 + + /* Enable MMU */ + mov x1,#0x0 + orr x1, x1, #(1 << 18) /* Set WFE non trapping */ + orr x1, x1, #(1 << 17) /* Set WFI non trapping */ + orr x1, x1, #(1 << 5) /* Set CP15 barrier enabled */ + orr x1, x1, #(1 << 12) /* Set I bit */ + orr x1, x1, #(1 << 2) /* Set C bit */ + orr x1, x1, #(1 << 0) /* Set M bit */ + msr SCTLR_EL1, x1 + isb + + bl boot_entry_C /* jump to start */ +#else + /* present exception level and selected exception level mismatch */ + b error +#endif + +error: b error + + +invalidate_dcaches: + dmb ISH + mrs x0, CLIDR_EL1 /* x0 = CLIDR */ + ubfx w2, w0, #24, #3 /* w2 = CLIDR.LoC */ + cmp w2, #0 /* LoC is 0? */ + b.eq invalidatecaches_end /* No cleaning required and enable MMU */ + mov w1, #0 /* w1 = level iterator */ + +invalidatecaches_flush_level: + add w3, w1, w1, lsl #1 /* w3 = w1 * 3 (right-shift for cache type) */ + lsr w3, w0, w3 /* w3 = w0 >> w3 */ + ubfx w3, w3, #0, #3 /* w3 = cache type of this level */ + cmp w3, #2 /* No cache at this level? */ + b.lt invalidatecaches_next_level + + lsl w4, w1, #1 + msr CSSELR_EL1, x4 /* Select current cache level in CSSELR */ + isb /* ISB required to reflect new CSIDR */ + mrs x4, CCSIDR_EL1 /* w4 = CSIDR */ + + ubfx w3, w4, #0, #3 + add w3, w3, #2 /* w3 = log2(line size) */ + ubfx w5, w4, #13, #15 + ubfx w4, w4, #3, #10 /* w4 = Way number */ + clz w6, w4 /* w6 = 32 - log2(number of ways) */ + +invalidatecaches_flush_set: + mov w8, w4 /* w8 = Way number */ +invalidatecaches_flush_way: + lsl w7, w1, #1 /* Fill level field */ + lsl w9, w5, w3 + orr w7, w7, w9 /* Fill index field */ + lsl w9, w8, w6 + orr w7, w7, w9 /* Fill way field */ + dc CISW, x7 /* Invalidate by set/way to point of coherency */ + subs w8, w8, #1 /* Decrement way */ + b.ge invalidatecaches_flush_way + subs w5, w5, #1 /* Descrement set */ + b.ge invalidatecaches_flush_set + +invalidatecaches_next_level: + add w1, w1, #1 /* Next level */ + cmp w2, w1 + b.gt invalidatecaches_flush_level + +invalidatecaches_end: + ret + +.end + +#endif /* !USE_BUILTIN_STARTUP */ /* Initialize GIC 400 (GICv2) */ .global gicv2_init_secure gicv2_init_secure: - ldr x0, =GICD_BASE - mov w9, #0x3 /* EnableGrp0 | EnableGrp1 */ - str w9, [x0, GICD_CTLR] /* Secure GICD_CTLR */ - ldr w9, [x0, GICD_TYPER] - and w10, w9, #0x1f /* ITLinesNumber */ - cbz w10, 1f /* No SPIs */ - add x11, x0, GICD_IGROUPRn - mov w9, #~0 /* Config SPIs as Grp1 */ - str w9, [x11], #0x4 -0: str w9, [x11], #0x4 - sub w10, w10, #0x1 - cbnz w10, 0b - - ldr x1, =GICC_BASE /* GICC_CTLR */ - mov w0, #3 /* EnableGrp0 | EnableGrp1 */ - str w0, [x1] - - mov w0, #1 << 7 /* Allow NS access to GICC_PMR */ - str w0, [x1, #4] /* GICC_PMR */ + ldr x0, =GICD_BASE + mov w9, #0x3 /* EnableGrp0 | EnableGrp1 */ + str w9, [x0, GICD_CTLR] /* Secure GICD_CTLR */ + ldr w9, [x0, GICD_TYPER] + and w10, w9, #0x1f /* ITLinesNumber */ + cbz w10, 1f /* No SPIs */ + add x11, x0, GICD_IGROUPRn + mov w9, #~0 /* Config SPIs as Grp1 */ + str w9, [x11], #0x4 +0: str w9, [x11], #0x4 + sub w10, w10, #0x1 + cbnz w10, 0b + + ldr x1, =GICC_BASE /* GICC_CTLR */ + mov w0, #3 /* EnableGrp0 | EnableGrp1 */ + str w0, [x1] + + mov w0, #1 << 7 /* Allow NS access to GICC_PMR */ + str w0, [x1, #4] /* GICC_PMR */ 1: ret diff --git a/src/boot_aarch64_translation.S b/src/boot_aarch64_translation.S new file mode 100644 index 000000000..34e697226 --- /dev/null +++ b/src/boot_aarch64_translation.S @@ -0,0 +1,244 @@ +/** + * Aarch64 bootup + * Copyright (C) 2024 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +#ifndef USE_BUILTIN_STARTUP + +/* +* translation_table.S contains a static page table required by MMU for +* cortex-A53. This translation table is flat mapped (input address = output +* address) with default memory attributes defined for zynq ultrascale+ +* architecture. It utilizes translation granual size of 4KB with 2MB section +* size for initial 4GB memory and 1GB section size for memory after 4GB. +* The overview of translation table memory attributes is described below. +* +*| | Memory Range | Definition in Translation Table | +*|-----------------------|-----------------------------|-----------------------------------| +*| DDR | 0x0000000000 - 0x007FFFFFFF | Normal write-back Cacheable | +*| PL | 0x0080000000 - 0x00BFFFFFFF | Strongly Ordered | +*| QSPI, lower PCIe | 0x00C0000000 - 0x00EFFFFFFF | Strongly Ordere | +*| Reserved | 0x00F0000000 - 0x00F7FFFFFF | Unassigned | +*| STM Coresight | 0x00F8000000 - 0x00F8FFFFFF | Strongly Ordered | +*| GIC | 0x00F9000000 - 0x00F91FFFFF | Strongly Ordered | +*| Reserved | 0x00F9200000 - 0x00FCFFFFFF | Unassigned | +*| FPS, LPS slaves | 0x00FD000000 - 0x00FFBFFFFF | Strongly Ordered | +*| CSU, PMU | 0x00FFC00000 - 0x00FFDFFFFF | Strongly Ordered | +*| TCM, OCM | 0x00FFE00000 - 0x00FFFFFFFF | Normal inner write-back cacheable | +*| Reserved | 0x0100000000 - 0x03FFFFFFFF | Unassigned | +*| PL, PCIe | 0x0400000000 - 0x07FFFFFFFF | Strongly Ordered | +*| DDR | 0x0800000000 - 0x0FFFFFFFFF | Normal inner write-back cacheable | +*| PL, PCIe | 0x1000000000 - 0xBFFFFFFFFF | Strongly Ordered | +*| Reserved | 0xC000000000 - 0xFFFFFFFFFF | Unassigned | +* +* For DDR region 0x0000000000 - 0x007FFFFFFF, a system where DDR is less than +* 2GB, region after DDR and before PL is marked as undefined/reserved in +* translation table. Region 0xF9100000 - 0xF91FFFFF is reserved memory in +* 0x00F9000000 - 0x00F91FFFFF range, but it is marked as strongly ordered +* because minimum section size in translation table section is 2MB. Region +* 0x00FFC00000 - 0x00FFDFFFFF contains CSU and PMU memory which are marked as +* Device since it is less than 1MB and falls in a region with device memory. +*/ + +/* Code is adapted from the default AMD/Xilinx translation_table.S */ + +#ifdef TARGET_zynq +#include "hal/zynq.h" +#endif + +.globl MMUTableL0 +.globl MMUTableL1 +.globl MMUTableL2 + +.set reserved, 0x0 /* Fault*/ +#if EL1_NONSECURE +.set Memory, 0x405 | (2 << 8) | (0x0) /* normal writeback write allocate outer shared read write */ +#else +.set Memory, 0x405 | (3 << 8) | (0x0) /* normal writeback write allocate inner shared read write */ +#endif +.set Device, 0x409 | (1 << 53) | (1 << 54) | (0x0) /* strongly ordered read write non executable*/ +.section .mmu_tbl0,"a" + +MMUTableL0: + +.set SECT, MMUTableL1 /* 0x0000_0000 - 0x7F_FFFF_FFFF */ +.8byte SECT + 0x3 +.set SECT, MMUTableL1+0x1000 /* 0x80_0000_0000 - 0xFF_FFFF_FFFF */ +.8byte SECT + 0x3 + +.section .mmu_tbl1,"a" + +MMUTableL1: + +.set SECT, MMUTableL2 /* 0x0000_0000 - 0x3FFF_FFFF */ +.8byte SECT + 0x3 /* 1GB DDR */ + +.rept 0x3 /* 0x4000_0000 - 0xFFFF_FFFF */ +.set SECT, SECT + 0x1000 /*1GB DDR, 1GB PL, 2GB other devices n memory */ +.8byte SECT + 0x3 +.endr + +.set SECT,0x100000000 +.rept 0xC /* 0x0001_0000_0000 - 0x0003_FFFF_FFFF */ +.8byte SECT + reserved /* 12GB Reserved */ +.set SECT, SECT + 0x40000000 +.endr + +.rept 0x10 /* 0x0004_0000_0000 - 0x0007_FFFF_FFFF */ +.8byte SECT + Device /* 8GB PL, 8GB PCIe */ +.set SECT, SECT + 0x40000000 +.endr + + +#ifdef XPAR_PSU_DDR_1_S_AXI_BASEADDR +.set DDR_1_START, XPAR_PSU_DDR_1_S_AXI_BASEADDR +.set DDR_1_END, XPAR_PSU_DDR_1_S_AXI_HIGHADDR +.set DDR_1_SIZE, (DDR_1_END - DDR_1_START)+1 +.if DDR_1_SIZE > 0x800000000 +/* If DDR size is larger than 32GB, truncate to 32GB */ +.set DDR_1_REG, 0x20 +.else +.set DDR_1_REG, DDR_1_SIZE/0x40000000 +.endif +#else +.set DDR_1_REG, 0 +#warning "There's no DDR_1 in the HW design. MMU translation table marks 32 GB DDR address space as undefined" +#endif + +.set UNDEF_1_REG, 0x20 - DDR_1_REG + +.rept DDR_1_REG /* DDR based on size in hdf*/ +.8byte SECT + Memory +.set SECT, SECT+0x40000000 +.endr + +.rept UNDEF_1_REG /* reserved for region where ddr is absent */ +.8byte SECT + reserved +.set SECT, SECT+0x40000000 +.endr + +.rept 0x1C0 /* 0x0010_0000_0000 - 0x007F_FFFF_FFFF */ +.8byte SECT + Device /* 448 GB PL */ +.set SECT, SECT + 0x40000000 +.endr + + +.rept 0x100 /* 0x0080_0000_0000 - 0x00BF_FFFF_FFFF */ +.8byte SECT + Device /* 256GB PCIe */ +.set SECT, SECT + 0x40000000 +.endr + + +.rept 0x100 /* 0x00C0_0000_0000 - 0x00FF_FFFF_FFFF */ +.8byte SECT + reserved /* 256GB reserved */ +.set SECT, SECT + 0x40000000 +.endr + + +.section .mmu_tbl2,"a" + +MMUTableL2: + +.set SECT, 0 + +#ifdef XPAR_PSU_DDR_0_S_AXI_BASEADDR +.set DDR_0_START, XPAR_PSU_DDR_0_S_AXI_BASEADDR +.set DDR_0_END, XPAR_PSU_DDR_0_S_AXI_HIGHADDR +.set DDR_0_SIZE, (DDR_0_END - DDR_0_START)+1 +.if DDR_0_SIZE > 0x80000000 +/* If DDR size is larger than 2GB, truncate to 2GB */ +.set DDR_0_REG, 0x400 +.else +.set DDR_0_REG, DDR_0_SIZE/0x200000 +.endif +#else +.set DDR_0_REG, 0 +#warning "There's no DDR_0 in the HW design. MMU translation table marks 2 GB DDR address space as undefined" +#endif + +.set UNDEF_0_REG, 0x400 - DDR_0_REG + +.rept DDR_0_REG /* DDR based on size in hdf*/ +.8byte SECT + Memory +.set SECT, SECT+0x200000 +.endr + +.rept UNDEF_0_REG /* reserved for region where ddr is absent */ +.8byte SECT + reserved +.set SECT, SECT+0x200000 +.endr + +.rept 0x0200 /* 0x8000_0000 - 0xBFFF_FFFF */ +.8byte SECT + Device /* 1GB lower PL */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x0100 /* 0xC000_0000 - 0xDFFF_FFFF */ +.8byte SECT + Device /* 512MB QSPI */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x080 /* 0xE000_0000 - 0xEFFF_FFFF */ +.8byte SECT + Device /* 256MB lower PCIe */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x040 /* 0xF000_0000 - 0xF7FF_FFFF */ +.8byte SECT + reserved /* 128MB Reserved */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x8 /* 0xF800_0000 - 0xF8FF_FFFF */ +.8byte SECT + Device /* 16MB coresight */ +.set SECT, SECT+0x200000 +.endr + +/* 1MB RPU LLP is marked for 2MB region as the minimum block size in +translation table is 2MB and adjacent 63MB reserved region is +converted to 62MB */ + +.rept 0x1 /* 0xF900_0000 - 0xF91F_FFFF */ +.8byte SECT + Device /* 2MB RPU low latency port */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x1F /* 0xF920_0000 - 0xFCFF_FFFF */ +.8byte SECT + reserved /* 62MB Reserved */ +.set SECT, SECT+0x200000 +.endr + +.rept 0x8 /* 0xFD00_0000 - 0xFDFF_FFFF */ +.8byte SECT + Device /* 16MB FPS */ +.set SECT, SECT+0x200000 +.endr + +.rept 0xE /* 0xFE00_0000 - 0xFFBF_FFFF */ +.8byte SECT + Device /* 28MB LPS */ +.set SECT, SECT+0x200000 +.endr + + /* 0xFFC0_0000 - 0xFFDF_FFFF */ +.8byte SECT + Device /*2MB PMU/CSU */ + +.set SECT, SECT+0x200000 /* 0xFFE0_0000 - 0xFFFF_FFFF*/ +.8byte SECT + Memory /*2MB OCM/TCM*/ + +.end + +#endif /* !USE_BUILTIN_STARTUP */ diff --git a/src/boot_aarch64_vectors.S b/src/boot_aarch64_vectors.S new file mode 100644 index 000000000..02500e79d --- /dev/null +++ b/src/boot_aarch64_vectors.S @@ -0,0 +1,404 @@ +/** + * Aarch64 bootup + * Copyright (C) 2024 wolfSSL Inc. + * + * This file is part of wolfBoot. + * + * wolfBoot is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 3 of the License, or + * (at your option) any later version. + * + * wolfBoot is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1335, USA + */ + +/* Code is adapted from the default AMD/Xilinx asm_vectors.S */ + +#ifndef USE_BUILTIN_STARTUP + +#ifdef TARGET_zynq +#include "hal/zynq.h" +#endif + +.org 0 +.text + +.globl _boot +.globl _vector_table + +.globl FIQInterrupt +.globl IRQInterrupt +.globl SErrorInterrupt +.globl SynchronousInterrupt +.globl FPUStatus + +/* + * FPUContextSize is the size of the array where floating point registers are + * stored when required. The default size corresponds to the case when there is no + * nested interrupt. If there are nested interrupts in application which are using + * floating point operation, the size of FPUContextSize need to be increased as per + * requirement + */ + +.set FPUContextSize, 528 + +.macro saveregister + stp X0,X1, [sp,#-0x10]! + stp X2,X3, [sp,#-0x10]! + stp X4,X5, [sp,#-0x10]! + stp X6,X7, [sp,#-0x10]! + stp X8,X9, [sp,#-0x10]! + stp X10,X11, [sp,#-0x10]! + stp X12,X13, [sp,#-0x10]! + stp X14,X15, [sp,#-0x10]! + stp X16,X17, [sp,#-0x10]! + stp X18,X19, [sp,#-0x10]! + stp X29,X30, [sp,#-0x10]! +.endm + +.macro restoreregister + ldp X29,X30, [sp], #0x10 + ldp X18,X19, [sp], #0x10 + ldp X16,X17, [sp], #0x10 + ldp X14,X15, [sp], #0x10 + ldp X12,X13, [sp], #0x10 + ldp X10,X11, [sp], #0x10 + ldp X8,X9, [sp], #0x10 + ldp X6,X7, [sp], #0x10 + ldp X4,X5, [sp], #0x10 + ldp X2,X3, [sp], #0x10 + ldp X0,X1, [sp], #0x10 +.endm + +.macro savefloatregister + +/* Load the floating point context array address from FPUContextBase */ + ldr x1,=FPUContextBase + ldr x0, [x1] + +/* Save all the floating point register to the array */ + stp q0,q1, [x0], #0x20 + stp q2,q3, [x0], #0x20 + stp q4,q5, [x0], #0x20 + stp q6,q7, [x0], #0x20 + stp q8,q9, [x0], #0x20 + stp q10,q11, [x0], #0x20 + stp q12,q13, [x0], #0x20 + stp q14,q15, [x0], #0x20 + stp q16,q17, [x0], #0x20 + stp q18,q19, [x0], #0x20 + stp q20,q21, [x0], #0x20 + stp q22,q23, [x0], #0x20 + stp q24,q25, [x0], #0x20 + stp q26,q27, [x0], #0x20 + stp q28,q29, [x0], #0x20 + stp q30,q31, [x0], #0x20 + mrs x2, FPCR + mrs x3, FPSR + stp x2, x3, [x0], #0x10 + +/* Save current address of floating point context array to FPUContextBase */ + str x0, [x1] +.endm + +.macro restorefloatregister + +/* Restore the address of floating point context array from FPUContextBase */ + ldr x1,=FPUContextBase + ldr x0, [x1] + +/* Restore all the floating point register from the array */ + ldp x2, x3, [x0,#-0x10]! + msr FPCR, x2 + msr FPSR, x3 + ldp q30,q31, [x0,#-0x20]! + ldp q28,q29, [x0,#-0x20]! + ldp q26,q27, [x0,#-0x20]! + ldp q24,q25, [x0,#-0x20]! + ldp q22,q23, [x0,#-0x20]! + ldp q20,q21, [x0,#-0x20]! + ldp q18,q19, [x0,#-0x20]! + ldp q16,q17, [x0,#-0x20]! + ldp q14,q15, [x0,#-0x20]! + ldp q12,q13, [x0,#-0x20]! + ldp q10,q11, [x0,#-0x20]! + ldp q8,q9, [x0,#-0x20]! + ldp q6,q7, [x0,#-0x20]! + ldp q4,q5, [x0,#-0x20]! + ldp q2,q3, [x0,#-0x20]! + ldp q0,q1, [x0,#-0x20]! + +/* Save current address of floating point context array to FPUContextBase */ + str x0, [x1] +.endm + +.macro exception_return + eret +#if defined (versal) + dsb nsh + isb +#endif +.endm + +.org 0 + +.section .vectors, "a" + +_vector_table: +.set VBAR, _vector_table +.org VBAR +/* + * if application is built for XEN GUEST as EL1 Non-secure following image + * header is required by XEN. + */ +.if (HYP_GUEST == 1) + + /* Valid Image header. */ + /* HW reset vector. */ + ldr x16, =_boot + br x16 +#if defined (versal) + dsb nsh + isb +#endif + /* text offset. */ + .dword 0 + /* image size. */ + .dword 0 + /* flags. */ + .dword 8 + /* RES0 */ + .dword 0 + .dword 0 + .dword 0 + + /* magic */ + .dword 0x644d5241 + /* RES0 */ + .dword 0 + /* End of Image header. */ +.endif + + b _boot +.org (VBAR + 0x200) + b SynchronousInterruptHandler + +.org (VBAR + 0x280) + b IRQInterruptHandler + +.org (VBAR + 0x300) + b FIQInterruptHandler + +.org (VBAR + 0x380) + b SErrorInterruptHandler + + +SynchronousInterruptHandler: + saveregister + +/* Check if the Synchronous abort is occurred due to floating point access. */ +.if (EL3 == 1) + mrs x0, ESR_EL3 +.else + mrs x0, ESR_EL1 +.endif + and x0, x0, #(0x3F << 26) + mov x1, #(0x7 << 26) + cmp x0, x1 +/* If exception is not due to floating point access go to synchronous handler */ + bne synchronoushandler + +/* + * If excpetion occurred due to floating point access, Enable the floating point + * access i.e. do not trap floating point instruction + */ + .if (EL3 == 1) + mrs x1,CPTR_EL3 + bic x1, x1, #(0x1<<10) + msr CPTR_EL3, x1 +.else + mrs x1,CPACR_EL1 + orr x1, x1, #(0x1<<20) + msr CPACR_EL1, x1 +.endif + isb + +/* If the floating point access was previously enabled, store FPU context + * registers(storefloat). + */ + ldr x0, =FPUStatus + ldrb w1,[x0] + cbnz w1, storefloat +/* + * If the floating point access was not enabled previously, save the status of + * floating point accessibility i.e. enabled and store floating point context + * array address(FPUContext) to FPUContextBase. + */ + mov w1, #0x1 + strb w1, [x0] + ldr x0, =FPUContext + ldr x1, =FPUContextBase + str x0,[x1] + b restorecontext +storefloat: + savefloatregister + b restorecontext +synchronoushandler: + bl SynchronousInterrupt +restorecontext: + restoreregister + exception_return + +IRQInterruptHandler: + + saveregister +/* Save the status of SPSR, ELR and CPTR to stack */ + .if (EL3 == 1) + mrs x0, CPTR_EL3 + mrs x1, ELR_EL3 + mrs x2, SPSR_EL3 +.else + mrs x0, CPACR_EL1 + mrs x1, ELR_EL1 + mrs x2, SPSR_EL1 +.endif + stp x0, x1, [sp,#-0x10]! + str x2, [sp,#-0x10]! + +/* Trap floating point access */ + .if (EL3 == 1) + mrs x1,CPTR_EL3 + orr x1, x1, #(0x1<<10) + msr CPTR_EL3, x1 +.else + mrs x1,CPACR_EL1 + bic x1, x1, #(0x1<<20) + msr CPACR_EL1, x1 +.endif + isb + + bl IRQInterrupt +/* + * If floating point access is enabled during interrupt handling, + * restore floating point registers. + */ + + .if (EL3 == 1) + mrs x0, CPTR_EL3 + ands x0, x0, #(0x1<<10) + bne RestorePrevState +.else + mrs x0,CPACR_EL1 + ands x0, x0, #(0x1<<20) + beq RestorePrevState +.endif + + restorefloatregister + +/* Restore the status of SPSR, ELR and CPTR from stack */ +RestorePrevState: + ldr x2,[sp],0x10 + ldp x0, x1, [sp],0x10 + .if (EL3 == 1) + msr CPTR_EL3, x0 + msr ELR_EL3, x1 + msr SPSR_EL3, x2 +.else + msr CPACR_EL1, x0 + msr ELR_EL1, x1 + msr SPSR_EL1, x2 +.endif + restoreregister + exception_return + +FIQInterruptHandler: + + saveregister +/* Save the status of SPSR, ELR and CPTR to stack */ + .if (EL3 == 1) + mrs x0, CPTR_EL3 + mrs x1, ELR_EL3 + mrs x2, SPSR_EL3 +.else + mrs x0, CPACR_EL1 + mrs x1, ELR_EL1 + mrs x2, SPSR_EL1 +.endif + stp x0, x1, [sp,#-0x10]! + str x2, [sp,#-0x10]! + +/* Trap floating point access */ + .if (EL3 == 1) + mrs x1,CPTR_EL3 + orr x1, x1, #(0x1<<10) + msr CPTR_EL3, x1 +.else + mrs x1,CPACR_EL1 + bic x1, x1, #(0x1<<20) + msr CPACR_EL1, x1 +.endif + isb + bl FIQInterrupt + /* + * If floating point access is enabled during interrupt handling, + * restore floating point registers. + */ + + .if (EL3 == 1) + mrs x0, CPTR_EL3 + ands x0, x0, #(0x1<<10) + bne RestorePrevStatefiq +.else + mrs x0,CPACR_EL1 + ands x0, x0, #(0x1<<20) + beq RestorePrevStatefiq +.endif + + restorefloatregister + + /* Restore the status of SPSR, ELR and CPTR from stack */ +RestorePrevStatefiq: + ldr x2,[sp],0x10 + ldp x0, x1, [sp],0x10 + .if (EL3 == 1) + msr CPTR_EL3, x0 + msr ELR_EL3, x1 + msr SPSR_EL3, x2 +.else + msr CPACR_EL1, x0 + msr ELR_EL1, x1 + msr SPSR_EL1, x2 +.endif + restoreregister + exception_return + +SErrorInterruptHandler: + + saveregister + + bl SErrorInterrupt + + restoreregister + + exception_return + + +.align 8 +/* Array to store floating point registers */ +FPUContext: .skip FPUContextSize + +/* Stores address for floating point context array */ +FPUContextBase: .skip 8 + +FPUStatus: .skip 1 + +.end + +#endif /* !USE_BUILTIN_STARTUP */