From c39b1dcf055d420a498d1047c645b776e4d1a7aa Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Oct 2024 10:39:28 +0200 Subject: [PATCH 1/3] powerpc/vdso: Add a page for non-time data The page containing VDSO time data is swapped with the one containing TIME namespace data when a process uses a non-root time namespace. For other data like powerpc specific data and RNG data, it means tracking whether time namespace is the root one or not to know which page to use. Simplify the logic behind by moving time data out of first data page so that the first data page which contains everything else always remains the first page. Time data is in the second or third page depending on selected time namespace. While we are playing with get_datapage macro, directly take into account the data offset inside the macro instead of adding that offset afterwards. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/0557d3ec898c1d0ea2fc59fa8757618e524c5d94.1727858295.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso_datapage.h | 24 +++++++----------------- arch/powerpc/kernel/vdso.c | 16 ++++++++++------ arch/powerpc/kernel/vdso/cacheflush.S | 2 +- arch/powerpc/kernel/vdso/datapage.S | 4 ++-- arch/powerpc/kernel/vdso/getrandom.S | 3 +-- arch/powerpc/kernel/vdso/gettimeofday.S | 5 ++--- arch/powerpc/kernel/vdso/vdso32.lds.S | 2 +- arch/powerpc/kernel/vdso/vdso64.lds.S | 2 +- 8 files changed, 25 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/include/asm/vdso_datapage.h b/arch/powerpc/include/asm/vdso_datapage.h index 248dee138f7bf..33069afccb3c6 100644 --- a/arch/powerpc/include/asm/vdso_datapage.h +++ b/arch/powerpc/include/asm/vdso_datapage.h @@ -82,8 +82,9 @@ struct vdso_arch_data { __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ __u32 compat_syscall_map[SYSCALL_MAP_SIZE]; /* Map of compat syscalls */ - struct vdso_data data[CS_BASES]; struct vdso_rng_data rng_data; + + struct vdso_data data[CS_BASES] __aligned(1 << CONFIG_PAGE_SHIFT); }; #else /* CONFIG_PPC64 */ @@ -95,8 +96,9 @@ struct vdso_arch_data { __u64 tb_ticks_per_sec; /* Timebase tics / sec 0x38 */ __u32 syscall_map[SYSCALL_MAP_SIZE]; /* Map of syscalls */ __u32 compat_syscall_map[0]; /* No compat syscalls on PPC32 */ - struct vdso_data data[CS_BASES]; struct vdso_rng_data rng_data; + + struct vdso_data data[CS_BASES] __aligned(1 << CONFIG_PAGE_SHIFT); }; #endif /* CONFIG_PPC64 */ @@ -105,29 +107,17 @@ extern struct vdso_arch_data *vdso_data; #else /* __ASSEMBLY__ */ -.macro get_datapage ptr +.macro get_datapage ptr offset=0 bcl 20, 31, .+4 999: mflr \ptr - addis \ptr, \ptr, (_vdso_datapage - 999b)@ha - addi \ptr, \ptr, (_vdso_datapage - 999b)@l + addis \ptr, \ptr, (_vdso_datapage - 999b + \offset)@ha + addi \ptr, \ptr, (_vdso_datapage - 999b + \offset)@l .endm #include #include -.macro get_realdatapage ptr scratch - get_datapage \ptr -#ifdef CONFIG_TIME_NS - lwz \scratch, VDSO_CLOCKMODE_OFFSET(\ptr) - xoris \scratch, \scratch, VDSO_CLOCKMODE_TIMENS@h - xori \scratch, \scratch, VDSO_CLOCKMODE_TIMENS@l - cntlzw \scratch, \scratch - rlwinm \scratch, \scratch, PAGE_SHIFT - 5, 1 << PAGE_SHIFT - add \ptr, \ptr, \scratch -#endif -.endm - #endif /* __ASSEMBLY__ */ #endif /* __KERNEL__ */ diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index ee4b9d676cff5..6166c1862c06f 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -48,12 +48,13 @@ long sys_ni_syscall(void); */ static union { struct vdso_arch_data data; - u8 page[PAGE_SIZE]; + u8 page[2 * PAGE_SIZE]; } vdso_data_store __page_aligned_data; struct vdso_arch_data *vdso_data = &vdso_data_store.data; enum vvar_pages { - VVAR_DATA_PAGE_OFFSET, + VVAR_BASE_PAGE_OFFSET, + VVAR_TIME_PAGE_OFFSET, VVAR_TIMENS_PAGE_OFFSET, VVAR_NR_PAGES, }; @@ -119,7 +120,7 @@ static struct vm_special_mapping vdso64_spec __ro_after_init = { #ifdef CONFIG_TIME_NS struct vdso_data *arch_get_vdso_data(void *vvar_page) { - return ((struct vdso_arch_data *)vvar_page)->data; + return vvar_page; } /* @@ -153,11 +154,14 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, unsigned long pfn; switch (vmf->pgoff) { - case VVAR_DATA_PAGE_OFFSET: + case VVAR_BASE_PAGE_OFFSET: + pfn = virt_to_pfn(vdso_data); + break; + case VVAR_TIME_PAGE_OFFSET: if (timens_page) pfn = page_to_pfn(timens_page); else - pfn = virt_to_pfn(vdso_data); + pfn = virt_to_pfn(vdso_data->data); break; #ifdef CONFIG_TIME_NS case VVAR_TIMENS_PAGE_OFFSET: @@ -170,7 +174,7 @@ static vm_fault_t vvar_fault(const struct vm_special_mapping *sm, */ if (!timens_page) return VM_FAULT_SIGBUS; - pfn = virt_to_pfn(vdso_data); + pfn = virt_to_pfn(vdso_data->data); break; #endif /* CONFIG_TIME_NS */ default: diff --git a/arch/powerpc/kernel/vdso/cacheflush.S b/arch/powerpc/kernel/vdso/cacheflush.S index 3b2479bd2f9a1..0085ae464dac9 100644 --- a/arch/powerpc/kernel/vdso/cacheflush.S +++ b/arch/powerpc/kernel/vdso/cacheflush.S @@ -30,7 +30,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_COHERENT_ICACHE) #ifdef CONFIG_PPC64 mflr r12 .cfi_register lr,r12 - get_realdatapage r10, r11 + get_datapage r10 mtlr r12 .cfi_restore lr #endif diff --git a/arch/powerpc/kernel/vdso/datapage.S b/arch/powerpc/kernel/vdso/datapage.S index 2b19b6201a33a..db8e167f01667 100644 --- a/arch/powerpc/kernel/vdso/datapage.S +++ b/arch/powerpc/kernel/vdso/datapage.S @@ -28,7 +28,7 @@ V_FUNCTION_BEGIN(__kernel_get_syscall_map) mflr r12 .cfi_register lr,r12 mr. r4,r3 - get_realdatapage r3, r11 + get_datapage r3 mtlr r12 #ifdef __powerpc64__ addi r3,r3,CFG_SYSCALL_MAP64 @@ -52,7 +52,7 @@ V_FUNCTION_BEGIN(__kernel_get_tbfreq) .cfi_startproc mflr r12 .cfi_register lr,r12 - get_realdatapage r3, r11 + get_datapage r3 #ifndef __powerpc64__ lwz r4,(CFG_TB_TICKS_PER_SEC + 4)(r3) #endif diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S index f3bbf931931ca..3deddcf89f99b 100644 --- a/arch/powerpc/kernel/vdso/getrandom.S +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -31,8 +31,7 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_realdatapage r8, r11 - addi r8, r8, VDSO_RNG_DATA_OFFSET + get_datapage r8 VDSO_RNG_DATA_OFFSET bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ diff --git a/arch/powerpc/kernel/vdso/gettimeofday.S b/arch/powerpc/kernel/vdso/gettimeofday.S index 5540d7021fa25..5333848322ca6 100644 --- a/arch/powerpc/kernel/vdso/gettimeofday.S +++ b/arch/powerpc/kernel/vdso/gettimeofday.S @@ -32,11 +32,10 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_datapage r5 .ifeq \call_time - addi r5, r5, VDSO_DATA_OFFSET + get_datapage r5 VDSO_DATA_OFFSET .else - addi r4, r5, VDSO_DATA_OFFSET + get_datapage r4 VDSO_DATA_OFFSET .endif bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) diff --git a/arch/powerpc/kernel/vdso/vdso32.lds.S b/arch/powerpc/kernel/vdso/vdso32.lds.S index 7b41d5d256e81..1a1b0b6d681a9 100644 --- a/arch/powerpc/kernel/vdso/vdso32.lds.S +++ b/arch/powerpc/kernel/vdso/vdso32.lds.S @@ -16,7 +16,7 @@ OUTPUT_ARCH(powerpc:common) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 3 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text diff --git a/arch/powerpc/kernel/vdso/vdso64.lds.S b/arch/powerpc/kernel/vdso/vdso64.lds.S index 9481e4b892ed1..e21b5506cad62 100644 --- a/arch/powerpc/kernel/vdso/vdso64.lds.S +++ b/arch/powerpc/kernel/vdso/vdso64.lds.S @@ -16,7 +16,7 @@ OUTPUT_ARCH(powerpc:common64) SECTIONS { - PROVIDE(_vdso_datapage = . - 2 * PAGE_SIZE); + PROVIDE(_vdso_datapage = . - 3 * PAGE_SIZE); . = SIZEOF_HEADERS; .hash : { *(.hash) } :text From 4e3fa1aecb2c1f128f7289272fe2947e4396f1ce Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Wed, 2 Oct 2024 10:39:29 +0200 Subject: [PATCH 2/3] powerpc/vdso: Implement __arch_get_vdso_rng_data() VDSO time functions do not call any other function, so they don't need to save/restore LR. However, retrieving the address of VDSO data page requires using LR hence saving then restoring it, which can be heavy on some CPUs. On the other hand, VDSO functions on powerpc are not standard functions and require a wrapper function to call C VDSO functions. And that wrapper has to save and restore LR in order to call the C VDSO function, so retrieving VDSO data page address in that wrapper doesn't require additional save/restore of LR. For random VDSO functions it is a bit different. Because the function calls __arch_chacha20_blocks_nostack(), it saves and restores LR. Retrieving VDSO data page address can then be done there without additional save/restore of LR. So lets implement __arch_get_vdso_rng_data() and simplify the wrapper. It starts paving the way for the day powerpc will implement a more standard ABI for VDSO functions. Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/a1a9bd0df508f1b5c04684b7366940577dfc6262.1727858295.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso/getrandom.h | 16 ++++++++++++++-- arch/powerpc/kernel/asm-offsets.c | 1 - arch/powerpc/kernel/vdso/getrandom.S | 1 - arch/powerpc/kernel/vdso/vgetrandom.c | 4 ++-- 4 files changed, 16 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/vdso/getrandom.h b/arch/powerpc/include/asm/vdso/getrandom.h index 501d6bb14e8a7..80ce0709725eb 100644 --- a/arch/powerpc/include/asm/vdso/getrandom.h +++ b/arch/powerpc/include/asm/vdso/getrandom.h @@ -7,6 +7,8 @@ #ifndef __ASSEMBLY__ +#include + static __always_inline int do_syscall_3(const unsigned long _r0, const unsigned long _r3, const unsigned long _r4, const unsigned long _r5) { @@ -43,11 +45,21 @@ static __always_inline ssize_t getrandom_syscall(void *buffer, size_t len, unsig static __always_inline struct vdso_rng_data *__arch_get_vdso_rng_data(void) { - return NULL; + struct vdso_arch_data *data; + + asm ( + " bcl 20, 31, .+4 ;" + "0: mflr %0 ;" + " addis %0, %0, (_vdso_datapage - 0b)@ha ;" + " addi %0, %0, (_vdso_datapage - 0b)@l ;" + : "=r" (data) : : "lr" + ); + + return &data->rng_data; } ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, - size_t opaque_len, const struct vdso_rng_data *vd); + size_t opaque_len); #endif /* !__ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 131a8cc10dbe8..7b3feb6bc2103 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -335,7 +335,6 @@ int main(void) /* datapage offsets for use by vdso */ OFFSET(VDSO_DATA_OFFSET, vdso_arch_data, data); - OFFSET(VDSO_RNG_DATA_OFFSET, vdso_arch_data, rng_data); OFFSET(CFG_TB_TICKS_PER_SEC, vdso_arch_data, tb_ticks_per_sec); #ifdef CONFIG_PPC64 OFFSET(CFG_ICACHE_BLOCKSZ, vdso_arch_data, icache_block_size); diff --git a/arch/powerpc/kernel/vdso/getrandom.S b/arch/powerpc/kernel/vdso/getrandom.S index 3deddcf89f99b..a80d9fb436f7e 100644 --- a/arch/powerpc/kernel/vdso/getrandom.S +++ b/arch/powerpc/kernel/vdso/getrandom.S @@ -31,7 +31,6 @@ PPC_STL r2, PPC_MIN_STKFRM + STK_GOT(r1) .cfi_rel_offset r2, PPC_MIN_STKFRM + STK_GOT #endif - get_datapage r8 VDSO_RNG_DATA_OFFSET bl CFUNC(DOTSYM(\funct)) PPC_LL r0, PPC_MIN_STKFRM + PPC_LR_STKOFF(r1) #ifdef __powerpc64__ diff --git a/arch/powerpc/kernel/vdso/vgetrandom.c b/arch/powerpc/kernel/vdso/vgetrandom.c index 5f855d45fb7bf..cc79b960a5413 100644 --- a/arch/powerpc/kernel/vdso/vgetrandom.c +++ b/arch/powerpc/kernel/vdso/vgetrandom.c @@ -8,7 +8,7 @@ #include ssize_t __c_kernel_getrandom(void *buffer, size_t len, unsigned int flags, void *opaque_state, - size_t opaque_len, const struct vdso_rng_data *vd) + size_t opaque_len) { - return __cvdso_getrandom_data(vd, buffer, len, flags, opaque_state, opaque_len); + return __cvdso_getrandom(buffer, len, flags, opaque_state, opaque_len); } From 0161bd38c24312853ed5ae9a425a1c41c4ac674a Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Thu, 10 Oct 2024 00:17:57 +0200 Subject: [PATCH 3/3] powerpc/vdso: Flag VDSO64 entry points as functions On powerpc64 as shown below by readelf, vDSO functions symbols have type NOTYPE. $ powerpc64-linux-gnu-readelf -a arch/powerpc/kernel/vdso/vdso64.so.dbg ELF Header: Magic: 7f 45 4c 46 02 02 01 00 00 00 00 00 00 00 00 00 Class: ELF64 Data: 2's complement, big endian Version: 1 (current) OS/ABI: UNIX - System V ABI Version: 0 Type: DYN (Shared object file) Machine: PowerPC64 Version: 0x1 ... Symbol table '.dynsym' contains 12 entries: Num: Value Size Type Bind Vis Ndx Name ... 1: 0000000000000524 84 NOTYPE GLOBAL DEFAULT 8 __[...]@@LINUX_2.6.15 ... 4: 0000000000000000 0 OBJECT GLOBAL DEFAULT ABS LINUX_2.6.15 5: 00000000000006c0 48 NOTYPE GLOBAL DEFAULT 8 __[...]@@LINUX_2.6.15 Symbol table '.symtab' contains 56 entries: Num: Value Size Type Bind Vis Ndx Name ... 45: 0000000000000000 0 OBJECT GLOBAL DEFAULT ABS LINUX_2.6.15 46: 00000000000006c0 48 NOTYPE GLOBAL DEFAULT 8 __kernel_getcpu 47: 0000000000000524 84 NOTYPE GLOBAL DEFAULT 8 __kernel_clock_getres To overcome that, commit ba83b3239e65 ("selftests: vDSO: fix vDSO symbols lookup for powerpc64") was applied to have selftests also look for NOTYPE symbols, but the correct fix should be to flag VDSO entry points as functions. The original commit that brought VDSO support into powerpc/64 has the following explanation: Note that the symbols exposed by the vDSO aren't "normal" function symbols, apps can't be expected to link against them directly, the vDSO's are both seen as if they were linked at 0 and the symbols just contain offsets to the various functions. This is done on purpose to avoid a relocation step (ppc64 functions normally have descriptors with abs addresses in them). When glibc uses those functions, it's expected to use it's own trampolines that know how to reach them. The descriptors it's talking about are the OPD function descriptors used on ABI v1 (big endian). But it would be more correct for a text symbol to have type function, even if there's no function descriptor for it. glibc has a special case already for handling the VDSO symbols which creates a fake opd pointing at the kernel symbol. So changing the VDSO symbol type to function shouldn't affect that. For ABI v2, there is no function descriptors and VDSO functions can safely have function type. So lets flag VDSO entry points as functions and revert the selftest change. Link: https://github.com/mpe/linux-fullhistory/commit/5f2dd691b62da9d9cc54b938f8b29c22c93cb805 Fixes: ba83b3239e65 ("selftests: vDSO: fix vDSO symbols lookup for powerpc64") Signed-off-by: Christophe Leroy Reviewed-By: Segher Boessenkool Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/b6ad2f1ee9887af3ca5ecade2a56f4acda517a85.1728512263.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/vdso.h | 1 + tools/testing/selftests/vDSO/parse_vdso.c | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/vdso.h b/arch/powerpc/include/asm/vdso.h index 7650b6ce14c85..8d972bc98b55f 100644 --- a/arch/powerpc/include/asm/vdso.h +++ b/arch/powerpc/include/asm/vdso.h @@ -25,6 +25,7 @@ int vdso_getcpu_init(void); #ifdef __VDSO64__ #define V_FUNCTION_BEGIN(name) \ .globl name; \ + .type name,@function; \ name: \ #define V_FUNCTION_END(name) \ diff --git a/tools/testing/selftests/vDSO/parse_vdso.c b/tools/testing/selftests/vDSO/parse_vdso.c index 7dd5668ea8a6e..28f35620c4991 100644 --- a/tools/testing/selftests/vDSO/parse_vdso.c +++ b/tools/testing/selftests/vDSO/parse_vdso.c @@ -222,8 +222,7 @@ void *vdso_sym(const char *version, const char *name) ELF(Sym) *sym = &vdso_info.symtab[chain]; /* Check for a defined global or weak function w/ right name. */ - if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC && - ELF64_ST_TYPE(sym->st_info) != STT_NOTYPE) + if (ELF64_ST_TYPE(sym->st_info) != STT_FUNC) continue; if (ELF64_ST_BIND(sym->st_info) != STB_GLOBAL && ELF64_ST_BIND(sym->st_info) != STB_WEAK)