From 9e00743aba832f3f30ecb017d3345baf1f372140 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 10:46:43 +0100 Subject: [PATCH 1/8] powerpc/trace: Restrict hash_fault trace event to HASH MMU 'perf list' on powerpc 8xx shows an event named "1:hash_fault". This event is pointless because trace_hash_fault() is called only from mm/book3s64/hash_utils.c Only define it when CONFIG_PPC_64S_HASH_MMU is selected. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/85a86e51b4ab26ce4b592984cc0a0851a3cc9479.1708076780.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/trace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/trace.h b/arch/powerpc/include/asm/trace.h index 82cc2c6704e6e..d9ac3a4f46e1f 100644 --- a/arch/powerpc/include/asm/trace.h +++ b/arch/powerpc/include/asm/trace.h @@ -267,6 +267,7 @@ TRACE_EVENT_FN(opal_exit, ); #endif +#ifdef CONFIG_PPC_64S_HASH_MMU TRACE_EVENT(hash_fault, TP_PROTO(unsigned long addr, unsigned long access, unsigned long trap), @@ -286,7 +287,7 @@ TRACE_EVENT(hash_fault, TP_printk("hash fault with addr 0x%lx and access = 0x%lx trap = 0x%lx", __entry->addr, __entry->access, __entry->trap) ); - +#endif TRACE_EVENT(tlbie, From d5835fb60bad641dbae64fe30c02f10857bf4647 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 11:10:36 +0100 Subject: [PATCH 2/8] powerpc: Use user_mode() macro when possible There is a nice macro to check user mode. Use it instead of open coding anding with MSR_PR to increase readability and avoid having to comment what that anding is for. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/fbf74887dcf1f1ba9e1680fc3247cbb581b00662.1708078228.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/interrupt.h | 2 +- arch/powerpc/kernel/syscall.c | 2 +- arch/powerpc/kernel/traps.c | 4 ++-- arch/powerpc/lib/sstep.c | 23 +++++++++++------------ arch/powerpc/perf/core-book3s.c | 2 +- arch/powerpc/xmon/xmon.c | 4 ++-- 6 files changed, 18 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index a4196ab1d0167..7b610864b3645 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -97,7 +97,7 @@ DECLARE_STATIC_KEY_FALSE(interrupt_exit_not_reentrant); static inline bool is_implicit_soft_masked(struct pt_regs *regs) { - if (regs->msr & MSR_PR) + if (user_mode(regs)) return false; if (regs->nip >= (unsigned long)__end_soft_masked) diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c index 77fedb190c936..f6f868e817e63 100644 --- a/arch/powerpc/kernel/syscall.c +++ b/arch/powerpc/kernel/syscall.c @@ -31,7 +31,7 @@ notrace long system_call_exception(struct pt_regs *regs, unsigned long r0) user_exit_irqoff(); BUG_ON(regs_is_unrecoverable(regs)); - BUG_ON(!(regs->msr & MSR_PR)); + BUG_ON(!user_mode(regs)); BUG_ON(arch_irq_disabled_regs(regs)); #ifdef CONFIG_PPC_PKEY diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index 11e062b47d3f8..f23430adb68ad 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -404,7 +404,7 @@ noinstr void hv_nmi_check_nonrecoverable(struct pt_regs *regs) return; if (!(regs->msr & MSR_HV)) return; - if (regs->msr & MSR_PR) + if (user_mode(regs)) return; /* @@ -1510,7 +1510,7 @@ static void do_program_check(struct pt_regs *regs) if (!is_kernel_addr(bugaddr) && !(regs->msr & MSR_IR)) bugaddr += PAGE_OFFSET; - if (!(regs->msr & MSR_PR) && /* not user-mode */ + if (!user_mode(regs) && report_bug(bugaddr, regs) == BUG_TRAP_TYPE_WARN) { regs_add_return_ip(regs, 4); return; diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 5766180f5380a..e65f3fb68d06b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1429,7 +1429,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; case 18: /* rfid, scary */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = RFI; return 0; @@ -1742,13 +1742,13 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 1; #endif case 83: /* mfmsr */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MFMSR; op->reg = rd; return 0; case 146: /* mtmsr */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MTMSR; op->reg = rd; @@ -1756,7 +1756,7 @@ int analyse_instr(struct instruction_op *op, const struct pt_regs *regs, return 0; #ifdef CONFIG_PPC64 case 178: /* mtmsrd */ - if (regs->msr & MSR_PR) + if (user_mode(regs)) goto priv; op->type = MTMSR; op->reg = rd; @@ -3437,14 +3437,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) * stored in the thread_struct. If the instruction is in * the kernel, we must not touch the state in the thread_struct. */ - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) + if (!user_mode(regs) && !(regs->msr & MSR_FP)) return 0; err = do_fp_load(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) return 0; err = do_vec_load(op->reg, ea, size, regs, cross_endian); break; @@ -3459,7 +3459,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) */ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) + if (!user_mode(regs) && !(regs->msr & msrbit)) return 0; err = do_vsx_load(op, ea, regs, cross_endian); break; @@ -3495,8 +3495,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) } #endif if ((op->type & UPDATE) && size == sizeof(long) && - op->reg == 1 && op->update_reg == 1 && - !(regs->msr & MSR_PR) && + op->reg == 1 && op->update_reg == 1 && !user_mode(regs) && ea >= regs->gpr[1] - STACK_INT_FRAME_SIZE) { err = handle_stack_update(ea, regs); break; @@ -3508,14 +3507,14 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_FP)) + if (!user_mode(regs) && !(regs->msr & MSR_FP)) return 0; err = do_fp_store(op, ea, regs, cross_endian); break; #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (!(regs->msr & MSR_PR) && !(regs->msr & MSR_VEC)) + if (!user_mode(regs) && !(regs->msr & MSR_VEC)) return 0; err = do_vec_store(op->reg, ea, size, regs, cross_endian); break; @@ -3530,7 +3529,7 @@ int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op) */ if (op->reg >= 32 && (op->vsx_flags & VSX_CHECK_VEC)) msrbit = MSR_VEC; - if (!(regs->msr & MSR_PR) && !(regs->msr & msrbit)) + if (!user_mode(regs) && !(regs->msr & msrbit)) return 0; err = do_vsx_store(op, ea, regs, cross_endian); break; diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 6f0d46c530275..6b5f8a94e7d89 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -256,7 +256,7 @@ static bool regs_sipr(struct pt_regs *regs) static inline u32 perf_flags_from_msr(struct pt_regs *regs) { - if (regs->msr & MSR_PR) + if (user_mode(regs)) return PERF_RECORD_MISC_USER; if ((regs->msr & MSR_HV) && freeze_events_kernel != MMCR0_FCHV) return PERF_RECORD_MISC_HYPERVISOR; diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index f413c220165c0..c85fa3f0dd3b1 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -1818,8 +1818,8 @@ static void print_bug_trap(struct pt_regs *regs) const struct bug_entry *bug; unsigned long addr; - if (regs->msr & MSR_PR) - return; /* not in kernel */ + if (user_mode(regs)) + return; addr = regs->nip; /* address of trap instruction */ if (!is_kernel_addr(addr)) return; From 09ca1b11716f96461a4675eb0418d5cb97687389 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 11:12:05 +0100 Subject: [PATCH 3/8] powerpc: Implement set_memory_rox() Same as x86 and s390, add set_memory_rox() to avoid doing one pass with set_memory_ro() and a second pass with set_memory_x(). See commit 60463628c9e0 ("x86/mm: Implement native set_memory_rox()") and commit 22e99fa56443 ("s390/mm: implement set_memory_rox()") for more information. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/dc9a794f82ab62572d7d0be5cb4b8b27920a4f78.1708078316.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/set_memory.h | 7 +++++++ arch/powerpc/mm/pageattr.c | 4 ++++ 2 files changed, 11 insertions(+) diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index 7ebc807aa8cc8..9a025b776a4b3 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -8,6 +8,7 @@ #define SET_MEMORY_X 3 #define SET_MEMORY_NP 4 /* Set memory non present */ #define SET_MEMORY_P 5 /* Set memory present */ +#define SET_MEMORY_ROX 6 int change_memory_attr(unsigned long addr, int numpages, long action); @@ -41,4 +42,10 @@ static inline int set_memory_p(unsigned long addr, int numpages) return change_memory_attr(addr, numpages, SET_MEMORY_P); } +static inline int set_memory_rox(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_ROX); +} +#define set_memory_rox set_memory_rox + #endif diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 6163e484bc6d4..421db7c4f2a48 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -38,6 +38,10 @@ static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) /* Don't clear DIRTY bit */ pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_RO); break; + case SET_MEMORY_ROX: + /* Don't clear DIRTY bit */ + pte_update_delta(ptep, addr, _PAGE_KERNEL_RW & ~_PAGE_DIRTY, _PAGE_KERNEL_ROX); + break; case SET_MEMORY_RW: pte_update_delta(ptep, addr, _PAGE_KERNEL_RO, _PAGE_KERNEL_RW); break; From f7f18e30b468458b2611ca65d745b50edcda9f43 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 11:13:28 +0100 Subject: [PATCH 4/8] powerpc/kprobes: Handle error returned by set_memory_rox() set_memory_rox() can fail. In case it fails, free allocated memory and return NULL. Link: https://github.com/KSPP/linux/issues/7 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/b4907cf4339bd086abc40430d91311436cb0c18e.1708078401.git.christophe.leroy@csgroup.eu --- arch/powerpc/kernel/kprobes.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index b20ee72e873a1..bbca90a5e2ec0 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -134,10 +134,16 @@ void *alloc_insn_page(void) if (!page) return NULL; - if (strict_module_rwx_enabled()) - set_memory_rox((unsigned long)page, 1); + if (strict_module_rwx_enabled()) { + int err = set_memory_rox((unsigned long)page, 1); + if (err) + goto error; + } return page; +error: + module_memfree(page); + return NULL; } int arch_prepare_kprobe(struct kprobe *p) From 8f17bd2f4196eedd32e84bb4b7c3c1e4850c3dc0 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 11:16:26 +0100 Subject: [PATCH 5/8] powerpc: Handle error in mark_rodata_ro() and mark_initmem_nx() mark_rodata_ro() and mark_initmem_nx() use functions that can fail like set_memory_nx() and set_memory_ro(), leading to a not protected kernel. In case of failure, panic. Link: https://github.com/KSPP/linux/issues/7 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/836f75710daef12dfea55f8fb6055d7fdaf716e3.1708078577.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s32/mmu.c | 7 ++++-- arch/powerpc/mm/mmu_decl.h | 8 +++---- arch/powerpc/mm/nohash/8xx.c | 33 +++++++++++++++++----------- arch/powerpc/mm/nohash/e500.c | 10 ++++++--- arch/powerpc/mm/pgtable_32.c | 39 ++++++++++++++++++++++++---------- 5 files changed, 65 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/mm/book3s32/mmu.c b/arch/powerpc/mm/book3s32/mmu.c index 5445587bfe841..100f999871bc3 100644 --- a/arch/powerpc/mm/book3s32/mmu.c +++ b/arch/powerpc/mm/book3s32/mmu.c @@ -193,7 +193,7 @@ static bool is_module_segment(unsigned long addr) return true; } -void mmu_mark_initmem_nx(void) +int mmu_mark_initmem_nx(void) { int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; @@ -230,9 +230,10 @@ void mmu_mark_initmem_nx(void) mtsr(mfsr(i << 28) | 0x10000000, i << 28); } + return 0; } -void mmu_mark_rodata_ro(void) +int mmu_mark_rodata_ro(void) { int nb = mmu_has_feature(MMU_FTR_USE_HIGH_BATS) ? 8 : 4; int i; @@ -245,6 +246,8 @@ void mmu_mark_rodata_ro(void) } update_bats(); + + return 0; } /* diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 72341b9fb5521..6107e4944509b 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -160,11 +160,11 @@ static inline unsigned long p_block_mapped(phys_addr_t pa) { return 0; } #endif #if defined(CONFIG_PPC_BOOK3S_32) || defined(CONFIG_PPC_8xx) || defined(CONFIG_PPC_E500) -void mmu_mark_initmem_nx(void); -void mmu_mark_rodata_ro(void); +int mmu_mark_initmem_nx(void); +int mmu_mark_rodata_ro(void); #else -static inline void mmu_mark_initmem_nx(void) { } -static inline void mmu_mark_rodata_ro(void) { } +static inline int mmu_mark_initmem_nx(void) { return 0; } +static inline int mmu_mark_rodata_ro(void) { return 0; } #endif #ifdef CONFIG_PPC_8xx diff --git a/arch/powerpc/mm/nohash/8xx.c b/arch/powerpc/mm/nohash/8xx.c index 6be6421086ed9..43d4842bb1c7a 100644 --- a/arch/powerpc/mm/nohash/8xx.c +++ b/arch/powerpc/mm/nohash/8xx.c @@ -119,23 +119,26 @@ void __init mmu_mapin_immr(void) PAGE_KERNEL_NCG, MMU_PAGE_512K, true); } -static void mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, - pgprot_t prot, bool new) +static int mmu_mapin_ram_chunk(unsigned long offset, unsigned long top, + pgprot_t prot, bool new) { unsigned long v = PAGE_OFFSET + offset; unsigned long p = offset; + int err = 0; WARN_ON(!IS_ALIGNED(offset, SZ_512K) || !IS_ALIGNED(top, SZ_512K)); - for (; p < ALIGN(p, SZ_8M) && p < top; p += SZ_512K, v += SZ_512K) - __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); - for (; p < ALIGN_DOWN(top, SZ_8M) && p < top; p += SZ_8M, v += SZ_8M) - __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); - for (; p < ALIGN_DOWN(top, SZ_512K) && p < top; p += SZ_512K, v += SZ_512K) - __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN(p, SZ_8M) && p < top && !err; p += SZ_512K, v += SZ_512K) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); + for (; p < ALIGN_DOWN(top, SZ_8M) && p < top && !err; p += SZ_8M, v += SZ_8M) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_8M, new); + for (; p < ALIGN_DOWN(top, SZ_512K) && p < top && !err; p += SZ_512K, v += SZ_512K) + err = __early_map_kernel_hugepage(v, p, prot, MMU_PAGE_512K, new); if (!new) flush_tlb_kernel_range(PAGE_OFFSET + v, PAGE_OFFSET + top); + + return err; } unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) @@ -166,27 +169,33 @@ unsigned long __init mmu_mapin_ram(unsigned long base, unsigned long top) return top; } -void mmu_mark_initmem_nx(void) +int mmu_mark_initmem_nx(void) { unsigned long etext8 = ALIGN(__pa(_etext), SZ_8M); unsigned long sinittext = __pa(_sinittext); unsigned long boundary = strict_kernel_rwx_enabled() ? sinittext : etext8; unsigned long einittext8 = ALIGN(__pa(_einittext), SZ_8M); + int err = 0; if (!debug_pagealloc_enabled_or_kfence()) - mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); + err = mmu_mapin_ram_chunk(boundary, einittext8, PAGE_KERNEL, false); mmu_pin_tlb(block_mapped_ram, false); + + return err; } #ifdef CONFIG_STRICT_KERNEL_RWX -void mmu_mark_rodata_ro(void) +int mmu_mark_rodata_ro(void) { unsigned long sinittext = __pa(_sinittext); + int err; - mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); + err = mmu_mapin_ram_chunk(0, sinittext, PAGE_KERNEL_ROX, false); if (IS_ENABLED(CONFIG_PIN_TLB_DATA)) mmu_pin_tlb(block_mapped_ram, true); + + return err; } #endif diff --git a/arch/powerpc/mm/nohash/e500.c b/arch/powerpc/mm/nohash/e500.c index 921c3521ec113..266fb22131fc1 100644 --- a/arch/powerpc/mm/nohash/e500.c +++ b/arch/powerpc/mm/nohash/e500.c @@ -285,19 +285,23 @@ void __init adjust_total_lowmem(void) } #ifdef CONFIG_STRICT_KERNEL_RWX -void mmu_mark_rodata_ro(void) +int mmu_mark_rodata_ro(void) { unsigned long remapped; remapped = map_mem_in_cams(__max_low_memory, CONFIG_LOWMEM_CAM_NUM, false, false); - WARN_ON(__max_low_memory != remapped); + if (WARN_ON(__max_low_memory != remapped)) + return -EINVAL; + + return 0; } #endif -void mmu_mark_initmem_nx(void) +int mmu_mark_initmem_nx(void) { /* Everything is done in mmu_mark_rodata_ro() */ + return 0; } void setup_initial_memory_limit(phys_addr_t first_memblock_base, diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 5c02fd08d61ef..e94919853ca33 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -130,32 +130,41 @@ void __init mapin_ram(void) } } -void mark_initmem_nx(void) +static int __mark_initmem_nx(void) { unsigned long numpages = PFN_UP((unsigned long)_einittext) - PFN_DOWN((unsigned long)_sinittext); + int err; - mmu_mark_initmem_nx(); + err = mmu_mark_initmem_nx(); if (!v_block_mapped((unsigned long)_sinittext)) { - set_memory_nx((unsigned long)_sinittext, numpages); - set_memory_rw((unsigned long)_sinittext, numpages); + err = set_memory_nx((unsigned long)_sinittext, numpages); + if (err) + return err; + err = set_memory_rw((unsigned long)_sinittext, numpages); } + return err; +} + +void mark_initmem_nx(void) +{ + int err = __mark_initmem_nx(); + + if (err) + panic("%s() failed, err = %d\n", __func__, err); } #ifdef CONFIG_STRICT_KERNEL_RWX -void mark_rodata_ro(void) +static int __mark_rodata_ro(void) { unsigned long numpages; if (IS_ENABLED(CONFIG_STRICT_MODULE_RWX) && mmu_has_feature(MMU_FTR_HPTE_TABLE)) pr_warn("This platform has HASH MMU, STRICT_MODULE_RWX won't work\n"); - if (v_block_mapped((unsigned long)_stext + 1)) { - mmu_mark_rodata_ro(); - ptdump_check_wx(); - return; - } + if (v_block_mapped((unsigned long)_stext + 1)) + return mmu_mark_rodata_ro(); /* * mark text and rodata as read only. __end_rodata is set by @@ -165,7 +174,15 @@ void mark_rodata_ro(void) numpages = PFN_UP((unsigned long)__end_rodata) - PFN_DOWN((unsigned long)_stext); - set_memory_ro((unsigned long)_stext, numpages); + return set_memory_ro((unsigned long)_stext, numpages); +} + +void mark_rodata_ro(void) +{ + int err = __mark_rodata_ro(); + + if (err) + panic("%s() failed, err = %d\n", __func__, err); // mark_initmem_nx() should have already run by now ptdump_check_wx(); From c8a196c41f7ace0d6fb0f8c5fbb2e3e3e8749818 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 11:17:33 +0100 Subject: [PATCH 6/8] powerpc: Refactor __kernel_map_pages() __kernel_map_pages() is almost identical for PPC32 and RADIX. Refactor it. On PPC32 it is not needed for KFENCE, but to keep it simple just make it similar to PPC64. Move the prototype of hash__kernel_map_pages() into mmu_decl.h to allow IS_ENABLED() to work on 32-bit. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/3656d47c53bff577739dac536dbae31fff52f6d8.1708078640.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/book3s/64/hash.h | 2 -- arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ---------- arch/powerpc/include/asm/book3s/64/radix.h | 2 -- arch/powerpc/mm/book3s64/radix_pgtable.c | 14 -------------- arch/powerpc/mm/mmu_decl.h | 2 ++ arch/powerpc/mm/pageattr.c | 20 ++++++++++++++++++++ arch/powerpc/mm/pgtable_32.c | 15 --------------- 7 files changed, 22 insertions(+), 43 deletions(-) diff --git a/arch/powerpc/include/asm/book3s/64/hash.h b/arch/powerpc/include/asm/book3s/64/hash.h index 6e70ae5116318..faf3e3b4e4b2b 100644 --- a/arch/powerpc/include/asm/book3s/64/hash.h +++ b/arch/powerpc/include/asm/book3s/64/hash.h @@ -269,8 +269,6 @@ int hash__create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); int hash__remove_section_mapping(unsigned long start, unsigned long end); -void hash__kernel_map_pages(struct page *page, int numpages, int enable); - #endif /* !__ASSEMBLY__ */ #endif /* __KERNEL__ */ #endif /* _ASM_POWERPC_BOOK3S_64_HASH_H */ diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 927d585652bc7..62c43d3d80ecc 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1027,16 +1027,6 @@ static inline void vmemmap_remove_mapping(unsigned long start, } #endif -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) -static inline void __kernel_map_pages(struct page *page, int numpages, int enable) -{ - if (radix_enabled()) - radix__kernel_map_pages(page, numpages, enable); - else - hash__kernel_map_pages(page, numpages, enable); -} -#endif - static inline pte_t pmd_pte(pmd_t pmd) { return __pte_raw(pmd_raw(pmd)); diff --git a/arch/powerpc/include/asm/book3s/64/radix.h b/arch/powerpc/include/asm/book3s/64/radix.h index 357e23a403d34..8f55ff74bb680 100644 --- a/arch/powerpc/include/asm/book3s/64/radix.h +++ b/arch/powerpc/include/asm/book3s/64/radix.h @@ -362,8 +362,6 @@ int radix__create_section_mapping(unsigned long start, unsigned long end, int radix__remove_section_mapping(unsigned long start, unsigned long end); #endif /* CONFIG_MEMORY_HOTPLUG */ -void radix__kernel_map_pages(struct page *page, int numpages, int enable); - #ifdef CONFIG_ARCH_WANT_OPTIMIZE_DAX_VMEMMAP #define vmemmap_can_optimize vmemmap_can_optimize bool vmemmap_can_optimize(struct vmem_altmap *altmap, struct dev_pagemap *pgmap); diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index c6a4ac766b2bf..e16e2fd104c55 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -1339,20 +1339,6 @@ void __ref radix__vmemmap_free(unsigned long start, unsigned long end, #endif #endif -#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) -void radix__kernel_map_pages(struct page *page, int numpages, int enable) -{ - unsigned long addr; - - addr = (unsigned long)page_address(page); - - if (enable) - set_memory_p(addr, numpages); - else - set_memory_np(addr, numpages); -} -#endif - #ifdef CONFIG_TRANSPARENT_HUGEPAGE unsigned long radix__pmd_hugepage_update(struct mm_struct *mm, unsigned long addr, diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index 6107e4944509b..f5fd95701ec97 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -186,3 +186,5 @@ static inline bool debug_pagealloc_enabled_or_kfence(void) int create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); #endif + +void hash__kernel_map_pages(struct page *page, int numpages, int enable); diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 421db7c4f2a48..8a9d24218b74b 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -14,6 +14,7 @@ #include #include +#include static pte_basic_t pte_update_delta(pte_t *ptep, unsigned long addr, unsigned long old, unsigned long new) @@ -101,3 +102,22 @@ int change_memory_attr(unsigned long addr, int numpages, long action) return apply_to_existing_page_range(&init_mm, start, size, change_page_attr, (void *)action); } + +#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KFENCE) +#ifdef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC +void __kernel_map_pages(struct page *page, int numpages, int enable) +{ + unsigned long addr = (unsigned long)page_address(page); + + if (PageHighMem(page)) + return; + + if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled()) + hash__kernel_map_pages(page, numpages, enable); + else if (enable) + set_memory_p(addr, numpages); + else + set_memory_np(addr, numpages); +} +#endif +#endif diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index e94919853ca33..4be97b4a44f92 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -188,18 +188,3 @@ void mark_rodata_ro(void) ptdump_check_wx(); } #endif - -#if defined(CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC) && defined(CONFIG_DEBUG_PAGEALLOC) -void __kernel_map_pages(struct page *page, int numpages, int enable) -{ - unsigned long addr = (unsigned long)page_address(page); - - if (PageHighMem(page)) - return; - - if (enable) - set_memory_p(addr, numpages); - else - set_memory_np(addr, numpages); -} -#endif /* CONFIG_DEBUG_PAGEALLOC */ From 009cf11d4aab7663d0af9044de7258acddcd99dd Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Fri, 16 Feb 2024 11:17:34 +0100 Subject: [PATCH 7/8] powerpc: Don't ignore errors from set_memory_{n}p() in __kernel_map_pages() set_memory_p() and set_memory_np() can fail. As mentioned in linux/mm.h: /* * To support DEBUG_PAGEALLOC architecture must ensure that * __kernel_map_pages() never fails */ So panic in case set_memory_p() or set_memory_np() fail in __kernel_map_pages(). Link: https://github.com/KSPP/linux/issues/7 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://msgid.link/20ef75884aa6a636e8298736f3d1056b0793d3d9.1708078640.git.christophe.leroy@csgroup.eu --- arch/powerpc/mm/book3s64/hash_utils.c | 3 ++- arch/powerpc/mm/mmu_decl.h | 2 +- arch/powerpc/mm/pageattr.c | 10 +++++++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/mm/book3s64/hash_utils.c b/arch/powerpc/mm/book3s64/hash_utils.c index 0626a25b0d728..01c3b4b652410 100644 --- a/arch/powerpc/mm/book3s64/hash_utils.c +++ b/arch/powerpc/mm/book3s64/hash_utils.c @@ -2172,7 +2172,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi) mmu_kernel_ssize, 0); } -void hash__kernel_map_pages(struct page *page, int numpages, int enable) +int hash__kernel_map_pages(struct page *page, int numpages, int enable) { unsigned long flags, vaddr, lmi; int i; @@ -2189,6 +2189,7 @@ void hash__kernel_map_pages(struct page *page, int numpages, int enable) kernel_unmap_linear_page(vaddr, lmi); } local_irq_restore(flags); + return 0; } #endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_KFENCE */ diff --git a/arch/powerpc/mm/mmu_decl.h b/arch/powerpc/mm/mmu_decl.h index f5fd95701ec97..cd4dfac0397b5 100644 --- a/arch/powerpc/mm/mmu_decl.h +++ b/arch/powerpc/mm/mmu_decl.h @@ -187,4 +187,4 @@ int create_section_mapping(unsigned long start, unsigned long end, int nid, pgprot_t prot); #endif -void hash__kernel_map_pages(struct page *page, int numpages, int enable); +int hash__kernel_map_pages(struct page *page, int numpages, int enable); diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c index 8a9d24218b74b..ac22bf28086fa 100644 --- a/arch/powerpc/mm/pageattr.c +++ b/arch/powerpc/mm/pageattr.c @@ -107,17 +107,21 @@ int change_memory_attr(unsigned long addr, int numpages, long action) #ifdef CONFIG_ARCH_SUPPORTS_DEBUG_PAGEALLOC void __kernel_map_pages(struct page *page, int numpages, int enable) { + int err; unsigned long addr = (unsigned long)page_address(page); if (PageHighMem(page)) return; if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled()) - hash__kernel_map_pages(page, numpages, enable); + err = hash__kernel_map_pages(page, numpages, enable); else if (enable) - set_memory_p(addr, numpages); + err = set_memory_p(addr, numpages); else - set_memory_np(addr, numpages); + err = set_memory_np(addr, numpages); + + if (err) + panic("%s: changing memory protections failed\n", __func__); } #endif #endif From 362c297127ccc2838da19a0a098cbe24a34d7708 Mon Sep 17 00:00:00 2001 From: Brian King Date: Wed, 17 Jan 2024 15:46:32 -0600 Subject: [PATCH 8/8] powerpc: Enable support for 32 bit MSI-X vectors Some devices are not capable of addressing 64 bits via DMA, which includes MSI-X vectors. This allows us to ensure these devices use MSI-X vectors in 32 bit space. Signed-off-by: Brian King Signed-off-by: Michael Ellerman Link: https://msgid.link/20240117214632.134539-1-brking@linux.vnet.ibm.com --- arch/powerpc/platforms/pseries/msi.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 423ee1d5bd944..6dfb55b52d363 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -26,6 +26,7 @@ static int query_token, change_token; #define RTAS_CHANGE_MSI_FN 3 #define RTAS_CHANGE_MSIX_FN 4 #define RTAS_CHANGE_32MSI_FN 5 +#define RTAS_CHANGE_32MSIX_FN 6 /* RTAS Helpers */ @@ -41,7 +42,7 @@ static int rtas_change_msi(struct pci_dn *pdn, u32 func, u32 num_irqs) seq_num = 1; do { if (func == RTAS_CHANGE_MSI_FN || func == RTAS_CHANGE_MSIX_FN || - func == RTAS_CHANGE_32MSI_FN) + func == RTAS_CHANGE_32MSI_FN || func == RTAS_CHANGE_32MSIX_FN) rc = rtas_call(change_token, 6, 4, rtas_ret, addr, BUID_HI(buid), BUID_LO(buid), func, num_irqs, seq_num); @@ -406,8 +407,12 @@ static int rtas_prepare_msi_irqs(struct pci_dev *pdev, int nvec_in, int type, if (use_32bit_msi_hack && rc > 0) rtas_hack_32bit_msi_gen2(pdev); - } else - rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); + } else { + if (pdev->no_64bit_msi) + rc = rtas_change_msi(pdn, RTAS_CHANGE_32MSIX_FN, nvec); + else + rc = rtas_change_msi(pdn, RTAS_CHANGE_MSIX_FN, nvec); + } if (rc != nvec) { if (nvec != nvec_in) {