From 8e08387ea0ad78cdb944609493f3d2cab582931e Mon Sep 17 00:00:00 2001 From: ChinYikMing Date: Fri, 27 Dec 2024 04:33:15 +0800 Subject: [PATCH] Handle signals properly Page faults trigger a trap, which is handled by do_page_fault(). This function calls lock_mm_and_find_vma() to locate and validate the virtual memory area (VMA), returning the VMA if valid, or NULL otherwise. Typically, attempts to read or write to a NULL VMA result in a NULL return. If the VMA is invalid, bad_area_nosemaphore() is invoked, which checks whether the fault originated in kernel or user space. For user-space faults, a SIGSEGV signal is sent to the user process via do_trap(), which determines if the signal should be ignored or blocked, and if not, adds it to the task's pending signal list. Kernel-space faults cause the kernel to crash via die_kernel_fault(). Before returning to user space (via the resume_userspace label), pending work (indicated by the _TIF_WORK_MASK mask) is processed by do_work_pending(). Signals are handled by do_signal(), which in turn calls handle_signal(). handle_signal() creates a signal handler frame that will be jumped to upon returning to user space. This frame creation process might modifies the Control and Status Register (CSR) SEPC. If there are a signal pending, the SEPC CSR overwritten the original trap/fault PC. This caused an assertion failure in get_ppn_and_offset() when running the vi program, reported in [1]. To address this, a variable last_csr_sepc was introduced to store the original SEPC CSR value before entering the trap path. After returning to user space, last_csr_sepc is compared with the current SEPC CSR value. If they differ, the fault ld/st instruction returns early and jumps to the signal handler frame. This commit prevents emulator crashes when the guest OS accesses invalid memory. Consequently, reads or writes to a NULL value now correctly result in a segmentation fault. In addition, two user-space programs: mem_null_read and mem_null_write are bundled into the rootfs for verification. Original behaviour: 1. $ make system ENABLE_SYSTEM=1 -j$(nproc) # run system emulation 2. $ mem_null_read # Emulator crashes 3. $ mem_null_write # Emulator crashes 4. $ vi # Emulator crashes Patch Reproduce / Testing procedure: 1. $ make system ENABLE_SYSTEM=1 -j$(nproc) # run system emulation 2. $ mem_null_read # NULL read causes SIGSEGV with no crashing 3. $ mem_null_write # NULL write causes SIGSEGV with no crashing 4. $ vi # w/o filename causes SIGSEGV with no crashing [1] #508 --- src/emulate.c | 12 +++++++-- src/riscv_private.h | 6 +++++ src/system.c | 59 ++++++++++++++++++++++++++++++++++++++++----- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/src/emulate.c b/src/emulate.c index 00317e4c..f1d4b2c2 100644 --- a/src/emulate.c +++ b/src/emulate.c @@ -46,6 +46,7 @@ static bool need_clear_block_map = false; static uint32_t reloc_enable_mmu_jalr_addr; static bool reloc_enable_mmu = false; bool need_retranslate = false; +bool need_handle_signal = false; #endif static void rv_trap_default_handler(riscv_t *rv) @@ -379,8 +380,12 @@ static uint32_t peripheral_update_ctr = 64; { \ IIF(RV32_HAS(SYSTEM))(ctr++;, ) cycle++; \ code; \ - nextop: \ - PC += __rv_insn_##inst##_len; \ + IIF(RV32_HAS(SYSTEM)) \ + ( \ + if (need_handle_signal) { \ + need_handle_signal = false; \ + return true; \ + }, ) nextop : PC += __rv_insn_##inst##_len; \ IIF(RV32_HAS(SYSTEM)) \ (IIF(RV32_HAS(JIT))( \ , if (unlikely(need_clear_block_map)) { \ @@ -1219,6 +1224,9 @@ static void _trap_handler(riscv_t *rv) mode = rv->csr_stvec & 0x3; cause = rv->csr_scause; rv->csr_sepc = rv->PC; +#if RV32_HAS(SYSTEM) + rv->last_csr_sepc = rv->csr_sepc; +#endif } else { /* machine */ const uint32_t mstatus_mie = (rv->csr_mstatus & MSTATUS_MIE) >> MSTATUS_MIE_SHIFT; diff --git a/src/riscv_private.h b/src/riscv_private.h index 0ae6f279..4e052760 100644 --- a/src/riscv_private.h +++ b/src/riscv_private.h @@ -201,6 +201,12 @@ struct riscv_internal { #if RV32_HAS(SYSTEM) /* The flag is used to indicate the current emulation is in a trap */ bool is_trapped; + + /* + * The flag that stores the SEPC CSR at the trap point for corectly + * executing signal handler. + */ + uint32_t last_csr_sepc; #endif }; diff --git a/src/system.c b/src/system.c index 62ecbc11..f0bc0359 100644 --- a/src/system.c +++ b/src/system.c @@ -25,6 +25,17 @@ void emu_update_uart_interrupts(riscv_t *rv) plic_update_interrupts(attr->plic); } +/* + * Linux kernel might create signal frame when returning from trap + * handling, which modifies the SEPC CSR. Thus, the fault instruction + * cannot always redo. For example, invalid memory access causes SIGSEGV. + */ +extern bool need_handle_signal; +#define CHECK_PENDING_SIGNAL(rv, signal_flag) \ + do { \ + signal_flag = (rv->csr_sepc != rv->last_csr_sepc); \ + } while (0) + #define MMIO_R 1 #define MMIO_W 0 @@ -297,8 +308,14 @@ static uint32_t mmu_read_w(riscv_t *rv, const uint32_t addr) uint32_t level; pte_t *pte = mmu_walk(rv, addr, &level); bool ok = MMU_FAULT_CHECK(read, rv, pte, addr, PTE_R); - if (unlikely(!ok)) + if (unlikely(!ok)) { +#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) + CHECK_PENDING_SIGNAL(rv, need_handle_signal); + if (need_handle_signal) + return 0; +#endif pte = mmu_walk(rv, addr, &level); + } { get_ppn_and_offset(); @@ -323,8 +340,14 @@ static uint16_t mmu_read_s(riscv_t *rv, const uint32_t addr) uint32_t level; pte_t *pte = mmu_walk(rv, addr, &level); bool ok = MMU_FAULT_CHECK(read, rv, pte, addr, PTE_R); - if (unlikely(!ok)) + if (unlikely(!ok)) { +#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) + CHECK_PENDING_SIGNAL(rv, need_handle_signal); + if (need_handle_signal) + return 0; +#endif pte = mmu_walk(rv, addr, &level); + } get_ppn_and_offset(); return memory_read_s(ppn | offset); @@ -338,8 +361,14 @@ static uint8_t mmu_read_b(riscv_t *rv, const uint32_t addr) uint32_t level; pte_t *pte = mmu_walk(rv, addr, &level); bool ok = MMU_FAULT_CHECK(read, rv, pte, addr, PTE_R); - if (unlikely(!ok)) + if (unlikely(!ok)) { +#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) + CHECK_PENDING_SIGNAL(rv, need_handle_signal); + if (need_handle_signal) + return 0; +#endif pte = mmu_walk(rv, addr, &level); + } { get_ppn_and_offset(); @@ -364,8 +393,14 @@ static void mmu_write_w(riscv_t *rv, const uint32_t addr, const uint32_t val) uint32_t level; pte_t *pte = mmu_walk(rv, addr, &level); bool ok = MMU_FAULT_CHECK(write, rv, pte, addr, PTE_W); - if (unlikely(!ok)) + if (unlikely(!ok)) { +#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) + CHECK_PENDING_SIGNAL(rv, need_handle_signal); + if (need_handle_signal) + return; +#endif pte = mmu_walk(rv, addr, &level); + } { get_ppn_and_offset(); @@ -390,8 +425,14 @@ static void mmu_write_s(riscv_t *rv, const uint32_t addr, const uint16_t val) uint32_t level; pte_t *pte = mmu_walk(rv, addr, &level); bool ok = MMU_FAULT_CHECK(write, rv, pte, addr, PTE_W); - if (unlikely(!ok)) + if (unlikely(!ok)) { +#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) + CHECK_PENDING_SIGNAL(rv, need_handle_signal); + if (need_handle_signal) + return; +#endif pte = mmu_walk(rv, addr, &level); + } get_ppn_and_offset(); memory_write_s(ppn | offset, (uint8_t *) &val); @@ -405,8 +446,14 @@ static void mmu_write_b(riscv_t *rv, const uint32_t addr, const uint8_t val) uint32_t level; pte_t *pte = mmu_walk(rv, addr, &level); bool ok = MMU_FAULT_CHECK(write, rv, pte, addr, PTE_W); - if (unlikely(!ok)) + if (unlikely(!ok)) { +#if RV32_HAS(SYSTEM) && !RV32_HAS(ELF_LOADER) + CHECK_PENDING_SIGNAL(rv, need_handle_signal); + if (need_handle_signal) + return; +#endif pte = mmu_walk(rv, addr, &level); + } { get_ppn_and_offset();