From 5e296fc37e1afa3fb38b886fe2bf7737777d0f61 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:17:00 +1100 Subject: [PATCH 01/55] powerpc/64: Drop IPI_PRIORITY from asm-offsets The last use of IPI_PRIORITY in asm was removed in commit 37f55d30df2e ("KVM: PPC: Book3S HV: Convert kvmppc_read_intr to a C function"). Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009051701.132282-1-mpe@ellerman.id.au --- arch/powerpc/kernel/asm-offsets.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7b3feb6bc2103..d8fe97662a028 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -596,7 +596,6 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); - DEFINE(IPI_PRIORITY, IPI_PRIORITY); OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr); OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar); OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); From 3c9670df7f7e871f0d2c2208d2ce79f6cfbca0f6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:18:25 +1100 Subject: [PATCH 02/55] powerpc/machdep: Drop include of seq_file.h Drop the include of seq_file.h in machdep.h, replace it with a forward declaration of struct seq_file, which is all that's required. Add direct includes of seq_file.h to some files that were getting seq_file.h via machdep.h. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009051826.132805-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/platforms/52xx/efika.c | 1 + arch/powerpc/platforms/embedded6xx/linkstation.c | 1 + arch/powerpc/platforms/embedded6xx/mvme5100.c | 1 + arch/powerpc/platforms/pseries/lpar.c | 1 + arch/powerpc/platforms/pseries/msi.c | 1 + arch/powerpc/platforms/pseries/papr_scm.c | 1 + arch/powerpc/sysdev/xive/spapr.c | 1 + 8 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 1862f94335ee8..3326730cd300a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -4,7 +4,6 @@ #ifdef __KERNEL__ #include -#include #include #include #include @@ -18,6 +17,7 @@ struct file; struct pci_controller; struct kimage; struct pci_host_bridge; +struct seq_file; struct machdep_calls { const char *name; diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 37a67120f257c..a7172f9ebaad9 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index e265f026eee2a..4012f206ec63d 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 00bec0f051be1..5ca41972ef221 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -14,6 +14,7 @@ #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index c1d8bee8f7018..0c428f1ae7127 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 6dfb55b52d363..fdc2f7f38dc9a 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 9e297f88adc5d..f84ac9fbe203c 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f2fa985a2c771..5aedbe3e8e6a2 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include From b23b9edf64b6387334aa2f8687cca6792b0d9d6c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:18:26 +1100 Subject: [PATCH 03/55] powerpc/machdep: Drop include of dma-mapping.h Drop the include of dma-mapping.h in machdep.h, replace it with forward declarations of struct device and struct pci_dev, and include time64.h and page.h which are required for time64_t and pgprot_t respectively. Add direct includes of some other headers to some files that were getting them via machdep.h. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009051826.132805-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/machdep.h | 6 +++++- arch/powerpc/kernel/sysfs.c | 1 + arch/powerpc/platforms/pseries/svm.c | 1 + drivers/cpuidle/cpuidle-pseries.c | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 3326730cd300a..3298eec123a32 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -5,15 +5,19 @@ #include #include -#include #include +#include + +#include struct pt_regs; struct pci_bus; +struct device; struct device_node; struct iommu_table; struct rtc_time; struct file; +struct pci_dev; struct pci_controller; struct kimage; struct pci_host_bridge; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index b842c83ab497d..6b3dd6decdf90 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 3b4045d508ec8..10b8eb6bff393 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 14db9b7d985d1..f68c65f1d023f 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -22,6 +22,7 @@ #include #include #include +#include static struct cpuidle_driver pseries_idle_driver = { .name = "pseries_idle", From cadae3a45d23aa4f6485938a67cbc47aaaa25e38 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Aug 2024 22:24:01 +1000 Subject: [PATCH 04/55] powerpc/pseries: Fix dtl_access_lock to be a rw_semaphore The dtl_access_lock needs to be a rw_sempahore, a sleeping lock, because the code calls kmalloc() while holding it, which can sleep: # echo 1 > /proc/powerpc/vcpudispatch_stats BUG: sleeping function called from invalid context at include/linux/sched/mm.h:337 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 199, name: sh preempt_count: 1, expected: 0 3 locks held by sh/199: #0: c00000000a0743f8 (sb_writers#3){.+.+}-{0:0}, at: vfs_write+0x324/0x438 #1: c0000000028c7058 (dtl_enable_mutex){+.+.}-{3:3}, at: vcpudispatch_stats_write+0xd4/0x5f4 #2: c0000000028c70b8 (dtl_access_lock){+.+.}-{2:2}, at: vcpudispatch_stats_write+0x220/0x5f4 CPU: 0 PID: 199 Comm: sh Not tainted 6.10.0-rc4 #152 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries Call Trace: dump_stack_lvl+0x130/0x148 (unreliable) __might_resched+0x174/0x410 kmem_cache_alloc_noprof+0x340/0x3d0 alloc_dtl_buffers+0x124/0x1ac vcpudispatch_stats_write+0x2a8/0x5f4 proc_reg_write+0xf4/0x150 vfs_write+0xfc/0x438 ksys_write+0x88/0x148 system_call_exception+0x1c4/0x5a0 system_call_common+0xf4/0x258 Fixes: 06220d78f24a ("powerpc/pseries: Introduce rwlock to gatekeep DTLB usage") Tested-by: Kajol Jain Reviewed-by: Nysal Jan K.A Reviewed-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240819122401.513203-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/dtl.h | 4 ++-- arch/powerpc/platforms/pseries/dtl.c | 8 ++++---- arch/powerpc/platforms/pseries/lpar.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h index d6f43d149f8dc..a5c21bc623cb0 100644 --- a/arch/powerpc/include/asm/dtl.h +++ b/arch/powerpc/include/asm/dtl.h @@ -1,8 +1,8 @@ #ifndef _ASM_POWERPC_DTL_H #define _ASM_POWERPC_DTL_H +#include #include -#include /* * Layout of entries in the hypervisor's dispatch trace log buffer. @@ -35,7 +35,7 @@ struct dtl_entry { #define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) extern struct kmem_cache *dtl_cache; -extern rwlock_t dtl_access_lock; +extern struct rw_semaphore dtl_access_lock; extern void register_dtl_buffer(int cpu); extern void alloc_dtl_buffers(unsigned long *time_limit); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 8cb9d36ea4915..f293588b8c7b5 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -191,7 +191,7 @@ static int dtl_enable(struct dtl *dtl) return -EBUSY; /* ensure there are no other conflicting dtl users */ - if (!read_trylock(&dtl_access_lock)) + if (!down_read_trylock(&dtl_access_lock)) return -EBUSY; n_entries = dtl_buf_entries; @@ -199,7 +199,7 @@ static int dtl_enable(struct dtl *dtl) if (!buf) { printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", __func__, dtl->cpu); - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); return -ENOMEM; } @@ -217,7 +217,7 @@ static int dtl_enable(struct dtl *dtl) spin_unlock(&dtl->lock); if (rc) { - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); kmem_cache_free(dtl_cache, buf); } @@ -232,7 +232,7 @@ static void dtl_disable(struct dtl *dtl) dtl->buf = NULL; dtl->buf_entries = 0; spin_unlock(&dtl->lock); - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); } /* file interface */ diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0c428f1ae7127..6a415febc53b7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -170,7 +170,7 @@ struct vcpu_dispatch_data { */ #define NR_CPUS_H NR_CPUS -DEFINE_RWLOCK(dtl_access_lock); +DECLARE_RWSEM(dtl_access_lock); static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data); static DEFINE_PER_CPU(u64, dtl_entry_ridx); static DEFINE_PER_CPU(struct dtl_worker, dtl_workers); @@ -464,7 +464,7 @@ static int dtl_worker_enable(unsigned long *time_limit) { int rc = 0, state; - if (!write_trylock(&dtl_access_lock)) { + if (!down_write_trylock(&dtl_access_lock)) { rc = -EBUSY; goto out; } @@ -480,7 +480,7 @@ static int dtl_worker_enable(unsigned long *time_limit) pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n"); free_dtl_buffers(time_limit); reset_global_dtl_mask(); - write_unlock(&dtl_access_lock); + up_write(&dtl_access_lock); rc = -EINVAL; goto out; } @@ -495,7 +495,7 @@ static void dtl_worker_disable(unsigned long *time_limit) cpuhp_remove_state(dtl_worker_state); free_dtl_buffers(time_limit); reset_global_dtl_mask(); - write_unlock(&dtl_access_lock); + up_write(&dtl_access_lock); } static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p, From c7182a0bdec16cdea912b5a3aea9a80f4f657b7d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:38:06 +1100 Subject: [PATCH 05/55] powerpc/boot: Remove bogus reference to lilo The help text refers to lilo, but the install script does not run lilo and never has. The reference to lilo seems to have come originally from arch/ppc/Makefile, but it was not true there either. Remove it. Reported-by: Thorsten Leemhuis Link: https://fosstodon.org/@kernellogger/113032940928131612 Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009053806.135807-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index cbb353ddacb7a..1c5c28d58e944 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -352,7 +352,7 @@ define archhelp echo ' install - Install kernel using' echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' - echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' install to $$(INSTALL_PATH)' echo ' *_defconfig - Select default config from arch/powerpc/configs' echo '' echo ' Targets with
embed a device tree blob inside the image' From 62f8f307c80e99ab18d38aa1a5bbbc18128ee5f8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 13 Oct 2024 21:29:57 +1100 Subject: [PATCH 06/55] powerpc/64: Remove maple platform The maple platform was added in 2004 [1], to support the "Maple" 970FX evaluation board. It was later used for IBM JS20/JS21 machines, as well as the Bimini machine, aka "Yellow Dog Powerstation". Sadly all those machines have passed into memory, and there's been no evidence for years that anyone is still using any of them. Remove the platform and related code. It can always be reinstated if there's interest. Note that this has no impact on support for 970FX based Power Macs. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux-fullhistory.git/commit/?id=f0d068d65c5e555ffcfbc189de32598f6f00770c Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241013102957.548291-1-mpe@ellerman.id.au --- Documentation/arch/powerpc/booting.rst | 4 +- arch/powerpc/Kconfig.debug | 6 - arch/powerpc/boot/.gitignore | 1 - arch/powerpc/boot/Makefile | 3 +- arch/powerpc/boot/wrapper | 7 +- arch/powerpc/configs/maple_defconfig | 111 ---- arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/include/asm/udbg.h | 1 - arch/powerpc/kernel/misc_64.S | 8 +- arch/powerpc/kernel/prom_init.c | 86 ---- arch/powerpc/kernel/udbg.c | 3 - arch/powerpc/kernel/udbg_16550.c | 23 - arch/powerpc/platforms/Kconfig | 1 - arch/powerpc/platforms/Makefile | 1 - arch/powerpc/platforms/maple/Kconfig | 19 - arch/powerpc/platforms/maple/Makefile | 2 - arch/powerpc/platforms/maple/maple.h | 14 - arch/powerpc/platforms/maple/pci.c | 672 ------------------------- arch/powerpc/platforms/maple/setup.c | 363 ------------- arch/powerpc/platforms/maple/time.c | 170 ------- 20 files changed, 8 insertions(+), 1488 deletions(-) delete mode 100644 arch/powerpc/configs/maple_defconfig delete mode 100644 arch/powerpc/platforms/maple/Kconfig delete mode 100644 arch/powerpc/platforms/maple/Makefile delete mode 100644 arch/powerpc/platforms/maple/maple.h delete mode 100644 arch/powerpc/platforms/maple/pci.c delete mode 100644 arch/powerpc/platforms/maple/setup.c delete mode 100644 arch/powerpc/platforms/maple/time.c diff --git a/Documentation/arch/powerpc/booting.rst b/Documentation/arch/powerpc/booting.rst index 11aa440f98cc9..472e97891aef2 100644 --- a/Documentation/arch/powerpc/booting.rst +++ b/Documentation/arch/powerpc/booting.rst @@ -93,8 +93,8 @@ given platform based on the content of the device-tree. Thus, you should: a) add your platform support as a _boolean_ option in - arch/powerpc/Kconfig, following the example of PPC_PSERIES, - PPC_PMAC and PPC_MAPLE. The latter is probably a good + arch/powerpc/Kconfig, following the example of PPC_PSERIES + and PPC_PMAC. The latter is probably a good example of a board support to start from. b) create your main platform file as diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 0bbec4afc0d59..20d05605fa83f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -223,12 +223,6 @@ config PPC_EARLY_DEBUG_RTAS_CONSOLE help Select this to enable early debugging via the RTAS console. -config PPC_EARLY_DEBUG_MAPLE - bool "Maple real mode" - depends on PPC_MAPLE - help - Select this to enable early debugging for Maple. - config PPC_EARLY_DEBUG_PAS_REALMODE bool "PA Semi real mode" depends on PPC_PASEMI diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index a4716d138cfc0..5a867f23fe7f7 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -30,7 +30,6 @@ zImage.coff zImage.epapr zImage.holly zImage.*lds -zImage.maple zImage.miboot zImage.pmac zImage.pseries diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index fa8518067d38e..1ff6ad4f6cd27 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -276,7 +276,6 @@ quiet_cmd_wrap = WRAP $@ image-$(CONFIG_PPC_PSERIES) += zImage.pseries image-$(CONFIG_PPC_POWERNV) += zImage.pseries -image-$(CONFIG_PPC_MAPLE) += zImage.maple image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries image-$(CONFIG_PPC_PS3) += dtbImage.ps3 image-$(CONFIG_PPC_CHRP) += zImage.chrp @@ -444,7 +443,7 @@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \ zImage.miboot zImage.pmac zImage.pseries \ - zImage.maple simpleImage.* otheros.bld + simpleImage.* otheros.bld # clean up files cached by wrapper clean-kernel-base := vmlinux.strip vmlinux.bin diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index b1f5549a3c9c4..1db60fe13802d 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -271,11 +271,6 @@ pseries) fi make_space=n ;; -maple) - platformo="$object/of.o $object/epapr.o" - link_address='0x400000' - make_space=n - ;; pmac|chrp) platformo="$object/of.o $object/epapr.o" make_space=n @@ -517,7 +512,7 @@ fi # post-processing needed for some platforms case "$platform" in -pseries|chrp|maple) +pseries|chrp) $objbin/addnote "$ofile" ;; coff) diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig deleted file mode 100644 index c821a97f4a899..0000000000000 --- a/arch/powerpc/configs/maple_defconfig +++ /dev/null @@ -1,111 +0,0 @@ -CONFIG_PPC64=y -CONFIG_SMP=y -CONFIG_NR_CPUS=4 -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y -# CONFIG_PPC_POWERNV is not set -# CONFIG_PPC_PSERIES is not set -# CONFIG_PPC_PMAC is not set -CONFIG_PPC_MAPLE=y -CONFIG_UDBG_RTAS_CONSOLE=y -CONFIG_GEN_RTC=y -CONFIG_KEXEC=y -CONFIG_IRQ_ALL_CPUS=y -CONFIG_PPC_4K_PAGES=y -CONFIG_PCI_MSI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IPV6 is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -# CONFIG_SCSI_PROC_FS is not set -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_IPR=y -CONFIG_ATA=y -CONFIG_PATA_AMD=y -CONFIG_ATA_GENERIC=y -CONFIG_NETDEVICES=y -CONFIG_AMD8111_ETH=y -CONFIG_TIGON3=y -CONFIG_E1000=y -CONFIG_USB_PEGASUS=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_HVC_RTAS=y -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_AMD8111=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_HID_GYRATION=y -CONFIG_HID_PANTHERLORD=y -CONFIG_HID_PETALYNX=y -CONFIG_HID_SAMSUNG=y -CONFIG_HID_SUNPLUS=y -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -# CONFIG_USB_EHCI_HCD_PPC_OF is not set -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_UHCI_HCD=y -CONFIG_USB_SERIAL=y -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_GARMIN=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_KEYSPAN=y -CONFIG_USB_SERIAL_TI=m -CONFIG_EXT2_FS=y -CONFIG_EXT4_FS=y -CONFIG_FS_DAX=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_DEFAULT="utf-8" -CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACK_USAGE=y -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y -CONFIG_BOOTX_TEXT=y -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_PCBC=m -# CONFIG_CRYPTO_HW is not set -CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index a5e3e7f97f4d7..f39c0d000c438 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -44,7 +44,6 @@ CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y CONFIG_PAPR_SCM=m CONFIG_PPC_SVM=y -CONFIG_PPC_MAPLE=y CONFIG_PPC_PASEMI=y CONFIG_PPC_PASEMI_IOMMU=y CONFIG_PPC_PS3=y diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 289023f7a6567..a8681b12864fd 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -38,7 +38,6 @@ void __init udbg_early_init(void); void __init udbg_init_debug_lpar(void); void __init udbg_init_debug_lpar_hvsi(void); void __init udbg_init_pmac_realmode(void); -void __init udbg_init_maple_realmode(void); void __init udbg_init_pas_realmode(void); void __init udbg_init_rtas_panel(void); void __init udbg_init_rtas_console(void); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 91123e102db40..a997c7f43dc01 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -74,7 +74,7 @@ _GLOBAL(rmci_off) blr #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_PMAC /* * Do an IO access in real mode @@ -137,7 +137,7 @@ _GLOBAL(real_writeb) sync isync blr -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ +#endif // CONFIG_PPC_PMAC #ifdef CONFIG_PPC_PASEMI @@ -174,7 +174,7 @@ _GLOBAL(real_205_writeb) #endif /* CONFIG_PPC_PASEMI */ -#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE) +#ifdef CONFIG_CPU_FREQ_PMAC64 /* * SCOM access functions for 970 (FX only for now) * @@ -243,7 +243,7 @@ _GLOBAL(scom970_write) /* restore interrupts */ mtmsrd r5,1 blr -#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ +#endif // CONFIG_CPU_FREQ_PMAC64 /* kexec_wait(phys_cpu) * diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index fbb68fc28ed3a..73210e5bcfa77 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2792,90 +2792,6 @@ static void __init flatten_device_tree(void) dt_struct_start, dt_struct_end); } -#ifdef CONFIG_PPC_MAPLE -/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property. - * The values are bad, and it doesn't even have the right number of cells. */ -static void __init fixup_device_tree_maple(void) -{ - phandle isa; - u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ - u32 isa_ranges[6]; - char *name; - - name = "/ht@0/isa@4"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(isa)) { - name = "/ht@0/isa@6"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - rloc = 0x01003000; /* IO space; PCI device = 6 */ - } - if (!PHANDLE_VALID(isa)) - return; - - if (prom_getproplen(isa, "ranges") != 12) - return; - if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) - == PROM_ERROR) - return; - - if (isa_ranges[0] != 0x1 || - isa_ranges[1] != 0xf4000000 || - isa_ranges[2] != 0x00010000) - return; - - prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); - - isa_ranges[0] = 0x1; - isa_ranges[1] = 0x0; - isa_ranges[2] = rloc; - isa_ranges[3] = 0x0; - isa_ranges[4] = 0x0; - isa_ranges[5] = 0x00010000; - prom_setprop(isa, name, "ranges", - isa_ranges, sizeof(isa_ranges)); -} - -#define CPC925_MC_START 0xf8000000 -#define CPC925_MC_LENGTH 0x1000000 -/* The values for memory-controller don't have right number of cells */ -static void __init fixup_device_tree_maple_memory_controller(void) -{ - phandle mc; - u32 mc_reg[4]; - char *name = "/hostbridge@f8000000"; - u32 ac, sc; - - mc = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(mc)) - return; - - if (prom_getproplen(mc, "reg") != 8) - return; - - prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac)); - prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc)); - if ((ac != 2) || (sc != 2)) - return; - - if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR) - return; - - if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH) - return; - - prom_printf("Fixing up bogus hostbridge on Maple...\n"); - - mc_reg[0] = 0x0; - mc_reg[1] = CPC925_MC_START; - mc_reg[2] = 0x0; - mc_reg[3] = CPC925_MC_LENGTH; - prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg)); -} -#else -#define fixup_device_tree_maple() -#define fixup_device_tree_maple_memory_controller() -#endif - #ifdef CONFIG_PPC_CHRP /* * Pegasos and BriQ lacks the "ranges" property in the isa node @@ -3193,8 +3109,6 @@ static inline void fixup_device_tree_pasemi(void) { } static void __init fixup_device_tree(void) { - fixup_device_tree_maple(); - fixup_device_tree_maple_memory_controller(); fixup_device_tree_chrp(); fixup_device_tree_pmac(); fixup_device_tree_efika(); diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 4b99208f5adcd..0a72a537f879e 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -39,9 +39,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE) /* RTAS console debug */ udbg_init_rtas_console(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) - /* Maple real mode debug */ - udbg_init_maple_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 313802aff5713..dfe8ed2192e8e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -205,29 +205,6 @@ void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) udbg_use_uart(); } -#ifdef CONFIG_PPC_MAPLE - -#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8) - -static u8 udbg_uart_in_maple(unsigned int reg) -{ - return real_readb(UDBG_UART_MAPLE_ADDR + reg); -} - -static void udbg_uart_out_maple(unsigned int reg, u8 val) -{ - real_writeb(val, UDBG_UART_MAPLE_ADDR + reg); -} - -void __init udbg_init_maple_realmode(void) -{ - udbg_uart_in = udbg_uart_in_maple; - udbg_uart_out = udbg_uart_out_maple; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_MAPLE */ - #ifdef CONFIG_PPC_PASEMI #define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL) diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 1112a58316191..a454149ae02fc 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -7,7 +7,6 @@ source "arch/powerpc/platforms/chrp/Kconfig" source "arch/powerpc/platforms/512x/Kconfig" source "arch/powerpc/platforms/52xx/Kconfig" source "arch/powerpc/platforms/powermac/Kconfig" -source "arch/powerpc/platforms/maple/Kconfig" source "arch/powerpc/platforms/pasemi/Kconfig" source "arch/powerpc/platforms/ps3/Kconfig" source "arch/powerpc/platforms/cell/Kconfig" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 786d374bff317..3cee4a842736d 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -14,7 +14,6 @@ obj-$(CONFIG_FSL_SOC_BOOKE) += 85xx/ obj-$(CONFIG_PPC_86xx) += 86xx/ obj-$(CONFIG_PPC_POWERNV) += powernv/ obj-$(CONFIG_PPC_PSERIES) += pseries/ -obj-$(CONFIG_PPC_MAPLE) += maple/ obj-$(CONFIG_PPC_PASEMI) += pasemi/ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig deleted file mode 100644 index 4c058cc57c901..0000000000000 --- a/arch/powerpc/platforms/maple/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -config PPC_MAPLE - depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN - bool "Maple 970FX Evaluation Board" - select FORCE_PCI - select MPIC - select U3_DART - select MPIC_U3_HT_IRQS - select GENERIC_TBSYNC - select PPC_UDBG_16550 - select PPC_970_NAP - select PPC_64S_HASH_MMU - select PPC_HASH_MMU_NATIVE - select PPC_RTAS - select MMIO_NVRAM - select ATA_NONSTANDARD if ATA - help - This option enables support for the Maple 970FX Evaluation Board. - For more information, refer to diff --git a/arch/powerpc/platforms/maple/Makefile b/arch/powerpc/platforms/maple/Makefile deleted file mode 100644 index 19f35ab828a7e..0000000000000 --- a/arch/powerpc/platforms/maple/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-y += setup.o pci.o time.o diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h deleted file mode 100644 index 8ddbaa4ebd0b4..0000000000000 --- a/arch/powerpc/platforms/maple/maple.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Declarations for maple-specific code. - * - * Maple is the name of a PPC970 evaluation board. - */ -extern int maple_set_rtc_time(struct rtc_time *tm); -extern void maple_get_rtc_time(struct rtc_time *tm); -extern time64_t maple_get_boot_time(void); -extern void maple_pci_init(void); -extern void maple_pci_irq_fixup(struct pci_dev *dev); -extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); - -extern struct pci_controller_ops maple_pci_controller_ops; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c deleted file mode 100644 index b9ff37c7f6f01..0000000000000 --- a/arch/powerpc/platforms/maple/pci.c +++ /dev/null @@ -1,672 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "maple.h" - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; - -static int __init fixup_one_level_bus_range(struct device_node *node, int higher) -{ - for (; node; node = node->sibling) { - const int *bus_range; - const unsigned int *class_code; - int len; - - /* For PCI<->PCI bridges or CardBus bridges, we go down */ - class_code = of_get_property(node, "class-code", NULL); - if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && - (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) - continue; - bus_range = of_get_property(node, "bus-range", &len); - if (bus_range != NULL && len > 2 * sizeof(int)) { - if (bus_range[1] > higher) - higher = bus_range[1]; - } - higher = fixup_one_level_bus_range(node->child, higher); - } - return higher; -} - -/* This routine fixes the "bus-range" property of all bridges in the - * system since they tend to have their "last" member wrong on macs - * - * Note that the bus numbers manipulated here are OF bus numbers, they - * are not Linux bus numbers. - */ -static void __init fixup_bus_range(struct device_node *bridge) -{ - int *bus_range; - struct property *prop; - int len; - - /* Lookup the "bus-range" property for the hose */ - prop = of_find_property(bridge, "bus-range", &len); - if (prop == NULL || prop->value == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %pOF\n", - bridge); - return; - } - bus_range = prop->value; - bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); -} - - -static unsigned long u3_agp_cfa0(u8 devfn, u8 off) -{ - return (1 << (unsigned long)PCI_SLOT(devfn)) | - ((unsigned long)PCI_FUNC(devfn) << 8) | - ((unsigned long)off & 0xFCUL); -} - -static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off) -{ - return ((unsigned long)bus << 16) | - ((unsigned long)devfn << 8) | - ((unsigned long)off & 0xFCUL) | - 1UL; -} - -static volatile void __iomem *u3_agp_cfg_access(struct pci_controller* hose, - u8 bus, u8 dev_fn, u8 offset) -{ - unsigned int caddr; - - if (bus == hose->first_busno) { - if (dev_fn < (11 << 3)) - return NULL; - caddr = u3_agp_cfa0(dev_fn, offset); - } else - caddr = u3_agp_cfa1(bus, dev_fn, offset); - - /* Uninorth will return garbage if we don't read back the value ! */ - do { - out_le32(hose->cfg_addr, caddr); - } while (in_le32(hose->cfg_addr) != caddr); - - offset &= 0x07; - return hose->cfg_data + offset; -} - -static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u3_agp_pci_ops = -{ - .read = u3_agp_read_config, - .write = u3_agp_write_config, -}; - -static unsigned long u3_ht_cfa0(u8 devfn, u8 off) -{ - return (devfn << 8) | off; -} - -static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off) -{ - return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL; -} - -static volatile void __iomem *u3_ht_cfg_access(struct pci_controller* hose, - u8 bus, u8 devfn, u8 offset) -{ - if (bus == hose->first_busno) { - if (PCI_SLOT(devfn) == 0) - return NULL; - return hose->cfg_data + u3_ht_cfa0(devfn, offset); - } else - return hose->cfg_data + u3_ht_cfa1(bus, devfn, offset); -} - -static int u3_ht_root_read_config(struct pci_controller *hose, u8 offset, - int len, u32 *val) -{ - volatile void __iomem *addr; - - addr = hose->cfg_addr; - addr += ((offset & ~3) << 2) + (4 - len - (offset & 3)); - - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_be16(addr); - break; - default: - *val = in_be32(addr); - break; - } - - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_root_write_config(struct pci_controller *hose, u8 offset, - int len, u32 val) -{ - volatile void __iomem *addr; - - addr = hose->cfg_addr + ((offset & ~3) << 2) + (4 - len - (offset & 3)); - - if (offset >= PCI_BASE_ADDRESS_0 && offset < PCI_CAPABILITY_LIST) - return PCIBIOS_SUCCESSFUL; - - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_be16(addr, val); - break; - default: - out_be32(addr, val); - break; - } - - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) - return u3_ht_root_read_config(hose, offset, len, val); - - if (offset > 0xff) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) - return u3_ht_root_write_config(hose, offset, len, val); - - if (offset > 0xff) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u3_ht_pci_ops = -{ - .read = u3_ht_read_config, - .write = u3_ht_write_config, -}; - -static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off) -{ - return (1 << PCI_SLOT(devfn)) | - (PCI_FUNC(devfn) << 8) | - ((off >> 8) << 28) | - (off & 0xfcu); -} - -static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn, - unsigned int off) -{ - return (bus << 16) | - (devfn << 8) | - ((off >> 8) << 28) | - (off & 0xfcu) | 1u; -} - -static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose, - u8 bus, u8 dev_fn, int offset) -{ - unsigned int caddr; - - if (bus == hose->first_busno) - caddr = u4_pcie_cfa0(dev_fn, offset); - else - caddr = u4_pcie_cfa1(bus, dev_fn, offset); - - /* Uninorth will return garbage if we don't read back the value ! */ - do { - out_le32(hose->cfg_addr, caddr); - } while (in_le32(hose->cfg_addr) != caddr); - - offset &= 0x03; - return hose->cfg_data + offset; -} - -static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} -static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u4_pcie_pci_ops = -{ - .read = u4_pcie_read_config, - .write = u4_pcie_write_config, -}; - -static void __init setup_u3_agp(struct pci_controller* hose) -{ - /* On G5, we move AGP up to high bus number so we don't need - * to reassign bus numbers for HT. If we ever have P2P bridges - * on AGP, we'll have to move pci_assign_all_buses to the - * pci_controller structure so we enable it for AGP and not for - * HT childs. - * We hard code the address because of the different size of - * the reg address cell, we shall fix that by killing struct - * reg_property and using some accessor functions instead - */ - hose->first_busno = 0xf0; - hose->last_busno = 0xff; - hose->ops = &u3_agp_pci_ops; - hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); - hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); - - u3_agp = hose; -} - -static void __init setup_u4_pcie(struct pci_controller* hose) -{ - /* We currently only implement the "non-atomic" config space, to - * be optimised later. - */ - hose->ops = &u4_pcie_pci_ops; - hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); - hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); - - u4_pcie = hose; -} - -static void __init setup_u3_ht(struct pci_controller* hose) -{ - hose->ops = &u3_ht_pci_ops; - - /* We hard code the address because of the different size of - * the reg address cell, we shall fix that by killing struct - * reg_property and using some accessor functions instead - */ - hose->cfg_data = ioremap(0xf2000000, 0x02000000); - hose->cfg_addr = ioremap(0xf8070000, 0x1000); - - hose->first_busno = 0; - hose->last_busno = 0xef; - - u3_ht = hose; -} - -static int __init maple_add_bridge(struct device_node *dev) -{ - int len; - struct pci_controller *hose; - char* disp_name; - const int *bus_range; - int primary = 1; - - DBG("Adding PCI host bridge %pOF\n", dev); - - bus_range = of_get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n", - dev); - } - - hose = pcibios_alloc_controller(dev); - if (hose == NULL) - return -ENOMEM; - hose->first_busno = bus_range ? bus_range[0] : 0; - hose->last_busno = bus_range ? bus_range[1] : 0xff; - hose->controller_ops = maple_pci_controller_ops; - - disp_name = NULL; - if (of_device_is_compatible(dev, "u3-agp")) { - setup_u3_agp(hose); - disp_name = "U3-AGP"; - primary = 0; - } else if (of_device_is_compatible(dev, "u3-ht")) { - setup_u3_ht(hose); - disp_name = "U3-HT"; - primary = 1; - } else if (of_device_is_compatible(dev, "u4-pcie")) { - setup_u4_pcie(hose); - disp_name = "U4-PCIE"; - primary = 0; - } - printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", - disp_name, hose->first_busno, hose->last_busno); - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(hose, dev, primary); - - /* Fixup "bus-range" OF property */ - fixup_bus_range(dev); - - /* Check for legacy IOs */ - isa_bridge_find_early(hose); - - /* create pci_dn's for DT nodes under this PHB */ - pci_devs_phb_init_dynamic(hose); - - return 0; -} - - -void maple_pci_irq_fixup(struct pci_dev *dev) -{ - DBG(" -> maple_pci_irq_fixup\n"); - - /* Fixup IRQ for PCIe host */ - if (u4_pcie != NULL && dev->bus->number == 0 && - pci_bus_to_host(dev->bus) == u4_pcie) { - printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n"); - dev->irq = irq_create_mapping(NULL, 1); - if (dev->irq) - irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW); - } - - /* Hide AMD8111 IDE interrupt when in legacy mode so - * the driver calls pci_get_legacy_ide_irq() - */ - if (dev->vendor == PCI_VENDOR_ID_AMD && - dev->device == PCI_DEVICE_ID_AMD_8111_IDE && - (dev->class & 5) != 5) { - dev->irq = 0; - } - - DBG(" <- maple_pci_irq_fixup\n"); -} - -static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge) -{ - struct pci_controller *hose = pci_bus_to_host(bridge->bus); - struct device_node *np, *child; - - if (hose != u3_agp) - return 0; - - /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We - * assume there is no P2P bridge on the AGP bus, which should be a - * safe assumptions hopefully. - */ - np = hose->dn; - PCI_DN(np)->busno = 0xf0; - for_each_child_of_node(np, child) - PCI_DN(child)->busno = 0xf0; - - return 0; -} - -void __init maple_pci_init(void) -{ - struct device_node *np, *root; - struct device_node *ht = NULL; - - /* Probe root PCI hosts, that is on U3 the AGP host and the - * HyperTransport host. That one is actually "kept" around - * and actually added last as its resource management relies - * on the AGP resources to have been setup first - */ - root = of_find_node_by_path("/"); - if (root == NULL) { - printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n"); - return; - } - for_each_child_of_node(root, np) { - if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "ht")) - continue; - if ((of_device_is_compatible(np, "u4-pcie") || - of_device_is_compatible(np, "u3-agp")) && - maple_add_bridge(np) == 0) - of_node_get(np); - - if (of_device_is_compatible(np, "u3-ht")) { - of_node_get(np); - ht = np; - } - } - of_node_put(root); - - /* Now setup the HyperTransport host if we found any - */ - if (ht && maple_add_bridge(ht) != 0) - of_node_put(ht); - - ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare; - - /* Tell pci.c to not change any resource allocations. */ - pci_add_flags(PCI_PROBE_ONLY); -} - -int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) -{ - struct device_node *np; - unsigned int defirq = channel ? 15 : 14; - unsigned int irq; - - if (pdev->vendor != PCI_VENDOR_ID_AMD || - pdev->device != PCI_DEVICE_ID_AMD_8111_IDE) - return defirq; - - np = pci_device_to_OF_node(pdev); - if (np == NULL) { - printk("Failed to locate OF node for IDE %s\n", - pci_name(pdev)); - return defirq; - } - irq = irq_of_parse_and_map(np, channel & 0x1); - if (!irq) { - printk("Failed to map onboard IDE interrupt for channel %d\n", - channel); - return defirq; - } - return irq; -} - -static void quirk_ipr_msi(struct pci_dev *dev) -{ - /* Something prevents MSIs from the IPR from working on Bimini, - * and the driver has no smarts to recover. So disable MSI - * on it for now. */ - - if (machine_is(maple)) { - dev->no_msi = 1; - dev_info(&dev->dev, "Quirk disabled MSI\n"); - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN, - quirk_ipr_msi); - -struct pci_controller_ops maple_pci_controller_ops = { -}; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c deleted file mode 100644 index f329a03edf4a6..0000000000000 --- a/arch/powerpc/platforms/maple/setup.c +++ /dev/null @@ -1,363 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Maple (970 eval board) setup code - * - * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), - * IBM Corp. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "maple.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -static unsigned long maple_find_nvram_base(void) -{ - struct device_node *rtcs; - unsigned long result = 0; - - /* find NVRAM device */ - rtcs = of_find_compatible_node(NULL, "nvram", "AMD8111"); - if (rtcs) { - struct resource r; - if (of_address_to_resource(rtcs, 0, &r)) { - printk(KERN_EMERG "Maple: Unable to translate NVRAM" - " address\n"); - goto bail; - } - if (!(r.flags & IORESOURCE_IO)) { - printk(KERN_EMERG "Maple: NVRAM address isn't PIO!\n"); - goto bail; - } - result = r.start; - } else - printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); - bail: - of_node_put(rtcs); - return result; -} - -static void __noreturn maple_restart(char *cmd) -{ - unsigned int maple_nvram_base; - const unsigned int *maple_nvram_offset, *maple_nvram_command; - struct device_node *sp; - - maple_nvram_base = maple_find_nvram_base(); - if (maple_nvram_base == 0) - goto fail; - - /* find service processor device */ - sp = of_find_node_by_name(NULL, "service-processor"); - if (!sp) { - printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); - goto fail; - } - maple_nvram_offset = of_get_property(sp, "restart-addr", NULL); - maple_nvram_command = of_get_property(sp, "restart-value", NULL); - of_node_put(sp); - - /* send command */ - outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); - for (;;) ; - fail: - printk(KERN_EMERG "Maple: Manual Restart Required\n"); - for (;;) ; -} - -static void __noreturn maple_power_off(void) -{ - unsigned int maple_nvram_base; - const unsigned int *maple_nvram_offset, *maple_nvram_command; - struct device_node *sp; - - maple_nvram_base = maple_find_nvram_base(); - if (maple_nvram_base == 0) - goto fail; - - /* find service processor device */ - sp = of_find_node_by_name(NULL, "service-processor"); - if (!sp) { - printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); - goto fail; - } - maple_nvram_offset = of_get_property(sp, "power-off-addr", NULL); - maple_nvram_command = of_get_property(sp, "power-off-value", NULL); - of_node_put(sp); - - /* send command */ - outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); - for (;;) ; - fail: - printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); - for (;;) ; -} - -static void __noreturn maple_halt(void) -{ - maple_power_off(); -} - -#ifdef CONFIG_SMP -static struct smp_ops_t maple_smp_ops = { - .probe = smp_mpic_probe, - .message_pass = smp_mpic_message_pass, - .kick_cpu = smp_generic_kick_cpu, - .setup_cpu = smp_mpic_setup_cpu, - .give_timebase = smp_generic_give_timebase, - .take_timebase = smp_generic_take_timebase, -}; -#endif /* CONFIG_SMP */ - -static void __init maple_use_rtas_reboot_and_halt_if_present(void) -{ - if (rtas_function_implemented(RTAS_FN_SYSTEM_REBOOT) && - rtas_function_implemented(RTAS_FN_POWER_OFF)) { - ppc_md.restart = rtas_restart; - pm_power_off = rtas_power_off; - ppc_md.halt = rtas_halt; - } -} - -static void __init maple_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - /* Setup SMP callback */ -#ifdef CONFIG_SMP - smp_ops = &maple_smp_ops; -#endif - maple_use_rtas_reboot_and_halt_if_present(); - - printk(KERN_DEBUG "Using native/NAP idle loop\n"); - - mmio_nvram_init(); -} - -/* - * This is almost identical to pSeries and CHRP. We need to make that - * code generic at one point, with appropriate bits in the device-tree to - * identify the presence of an HT APIC - */ -static void __init maple_init_IRQ(void) -{ - struct device_node *root, *np, *mpic_node = NULL; - const unsigned int *opprop; - unsigned long openpic_addr = 0; - int naddr, n, i, opplen, has_isus = 0; - struct mpic *mpic; - unsigned int flags = 0; - - /* Locate MPIC in the device-tree. Note that there is a bug - * in Maple device-tree where the type of the controller is - * open-pic and not interrupt-controller - */ - - for_each_node_by_type(np, "interrupt-controller") - if (of_device_is_compatible(np, "open-pic")) { - mpic_node = np; - break; - } - if (mpic_node == NULL) - for_each_node_by_type(np, "open-pic") { - mpic_node = np; - break; - } - if (mpic_node == NULL) { - printk(KERN_ERR - "Failed to locate the MPIC interrupt controller\n"); - return; - } - - /* Find address list in /platform-open-pic */ - root = of_find_node_by_path("/"); - naddr = of_n_addr_cells(root); - opprop = of_get_property(root, "platform-open-pic", &opplen); - if (opprop) { - openpic_addr = of_read_number(opprop, naddr); - has_isus = (opplen > naddr); - printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n", - openpic_addr, has_isus); - } - - BUG_ON(openpic_addr == 0); - - /* Check for a big endian MPIC */ - if (of_property_read_bool(np, "big-endian")) - flags |= MPIC_BIG_ENDIAN; - - /* XXX Maple specific bits */ - flags |= MPIC_U3_HT_IRQS; - /* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */ - flags |= MPIC_BIG_ENDIAN; - - /* Setup the openpic driver. More device-tree junks, we hard code no - * ISUs for now. I'll have to revisit some stuffs with the folks doing - * the firmware for those - */ - mpic = mpic_alloc(mpic_node, openpic_addr, flags, - /*has_isus ? 16 :*/ 0, 0, " MPIC "); - BUG_ON(mpic == NULL); - - /* Add ISUs */ - opplen /= sizeof(u32); - for (n = 0, i = naddr; i < opplen; i += naddr, n++) { - unsigned long isuaddr = of_read_number(opprop + i, naddr); - mpic_assign_isu(mpic, n, isuaddr); - } - - /* All ISUs are setup, complete initialization */ - mpic_init(mpic); - ppc_md.get_irq = mpic_get_irq; - of_node_put(mpic_node); - of_node_put(root); -} - -static void __init maple_progress(char *s, unsigned short hex) -{ - printk("*** %04x : %s\n", hex, s ? s : ""); -} - - -/* - * Called very early, MMU is off, device-tree isn't unflattened - */ -static int __init maple_probe(void) -{ - if (!of_machine_is_compatible("Momentum,Maple") && - !of_machine_is_compatible("Momentum,Apache")) - return 0; - - pm_power_off = maple_power_off; - - iommu_init_early_dart(&maple_pci_controller_ops); - - return 1; -} - -#ifdef CONFIG_EDAC -/* - * Register a platform device for CPC925 memory controller on - * all boards with U3H (CPC925) bridge. - */ -static int __init maple_cpc925_edac_setup(void) -{ - struct platform_device *pdev; - struct device_node *np = NULL; - struct resource r; - int ret; - volatile void __iomem *mem; - u32 rev; - - np = of_find_node_by_type(NULL, "memory-controller"); - if (!np) { - printk(KERN_ERR "%s: Unable to find memory-controller node\n", - __func__); - return -ENODEV; - } - - ret = of_address_to_resource(np, 0, &r); - of_node_put(np); - - if (ret < 0) { - printk(KERN_ERR "%s: Unable to get memory-controller reg\n", - __func__); - return -ENODEV; - } - - mem = ioremap(r.start, resource_size(&r)); - if (!mem) { - printk(KERN_ERR "%s: Unable to map memory-controller memory\n", - __func__); - return -ENOMEM; - } - - rev = __raw_readl(mem); - iounmap(mem); - - if (rev < 0x34 || rev > 0x3f) { /* U3H */ - printk(KERN_ERR "%s: Non-CPC925(U3H) bridge revision: %02x\n", - __func__, rev); - return 0; - } - - pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - - printk(KERN_INFO "%s: CPC925 platform device created\n", __func__); - - return 0; -} -machine_device_initcall(maple, maple_cpc925_edac_setup); -#endif - -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .discover_phbs = maple_pci_init, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .progress = maple_progress, - .power_save = power4_idle, -}; diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c deleted file mode 100644 index 91606411d2e08..0000000000000 --- a/arch/powerpc/platforms/maple/time.c +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), - * IBM Corp. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "maple.h" - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -static int maple_rtc_addr; - -static int maple_clock_read(int addr) -{ - outb_p(addr, maple_rtc_addr); - return inb_p(maple_rtc_addr+1); -} - -static void maple_clock_write(unsigned long val, int addr) -{ - outb_p(addr, maple_rtc_addr); - outb_p(val, maple_rtc_addr+1); -} - -void maple_get_rtc_time(struct rtc_time *tm) -{ - do { - tm->tm_sec = maple_clock_read(RTC_SECONDS); - tm->tm_min = maple_clock_read(RTC_MINUTES); - tm->tm_hour = maple_clock_read(RTC_HOURS); - tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH); - tm->tm_mon = maple_clock_read(RTC_MONTH); - tm->tm_year = maple_clock_read(RTC_YEAR); - } while (tm->tm_sec != maple_clock_read(RTC_SECONDS)); - - if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY) - || RTC_ALWAYS_BCD) { - tm->tm_sec = bcd2bin(tm->tm_sec); - tm->tm_min = bcd2bin(tm->tm_min); - tm->tm_hour = bcd2bin(tm->tm_hour); - tm->tm_mday = bcd2bin(tm->tm_mday); - tm->tm_mon = bcd2bin(tm->tm_mon); - tm->tm_year = bcd2bin(tm->tm_year); - } - if ((tm->tm_year + 1900) < 1970) - tm->tm_year += 100; - - tm->tm_wday = -1; -} - -int maple_set_rtc_time(struct rtc_time *tm) -{ - unsigned char save_control, save_freq_select; - int sec, min, hour, mon, mday, year; - - spin_lock(&rtc_lock); - - save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */ - - maple_clock_write((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */ - - maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - sec = tm->tm_sec; - min = tm->tm_min; - hour = tm->tm_hour; - mon = tm->tm_mon; - mday = tm->tm_mday; - year = tm->tm_year; - - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - sec = bin2bcd(sec); - min = bin2bcd(min); - hour = bin2bcd(hour); - mon = bin2bcd(mon); - mday = bin2bcd(mday); - year = bin2bcd(year); - } - maple_clock_write(sec, RTC_SECONDS); - maple_clock_write(min, RTC_MINUTES); - maple_clock_write(hour, RTC_HOURS); - maple_clock_write(mon, RTC_MONTH); - maple_clock_write(mday, RTC_DAY_OF_MONTH); - maple_clock_write(year, RTC_YEAR); - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - maple_clock_write(save_control, RTC_CONTROL); - maple_clock_write(save_freq_select, RTC_FREQ_SELECT); - - spin_unlock(&rtc_lock); - - return 0; -} - -static struct resource rtc_iores = { - .name = "rtc", - .flags = IORESOURCE_IO | IORESOURCE_BUSY, -}; - -time64_t __init maple_get_boot_time(void) -{ - struct rtc_time tm; - struct device_node *rtcs; - - rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00"); - if (rtcs) { - struct resource r; - if (of_address_to_resource(rtcs, 0, &r)) { - printk(KERN_EMERG "Maple: Unable to translate RTC" - " address\n"); - goto bail; - } - if (!(r.flags & IORESOURCE_IO)) { - printk(KERN_EMERG "Maple: RTC address isn't PIO!\n"); - goto bail; - } - maple_rtc_addr = r.start; - printk(KERN_INFO "Maple: Found RTC at IO 0x%x\n", - maple_rtc_addr); - } - bail: - of_node_put(rtcs); - if (maple_rtc_addr == 0) { - maple_rtc_addr = RTC_PORT(0); /* legacy address */ - printk(KERN_INFO "Maple: No device node for RTC, assuming " - "legacy address (0x%x)\n", maple_rtc_addr); - } - - rtc_iores.start = maple_rtc_addr; - rtc_iores.end = maple_rtc_addr + 7; - request_resource(&ioport_resource, &rtc_iores); - - maple_get_rtc_time(&tm); - return rtc_tm_to_time64(&tm); -} - From 0b9846529e29ba988ce88b98df633de79675fcb3 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:34 +0530 Subject: [PATCH 07/55] powerpc/trace: Account for -fpatchable-function-entry support by toolchain So far, we have relied on the fact that gcc supports both -mprofile-kernel, as well as -fpatchable-function-entry, and clang supports neither. Our Makefile only checks for CONFIG_MPROFILE_KERNEL to decide which files to build. Clang has a feature request out [*] to implement -fpatchable-function-entry, and is unlikely to support -mprofile-kernel. Update our Makefile checks so that we pick up the correct files to build once clang picks up support for -fpatchable-function-entry. [*] https://github.com/llvm/llvm-project/issues/57031 Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-2-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/Makefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 125f4ca588b98..d6c3885453bda 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -9,12 +9,15 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o -ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o +ifdef CONFIG_FUNCTION_TRACER +obj32-y += ftrace.o ftrace_entry.o +ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),) +obj64-y += ftrace_64_pg.o ftrace_64_pg_entry.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_64_pg_entry.o +obj64-y += ftrace.o ftrace_entry.o +endif endif + obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) From be87d713eaddf0421ccd61cc060c4c29bc36fc9b Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:35 +0530 Subject: [PATCH 08/55] powerpc/kprobes: Use ftrace to determine if a probe is at function entry Rather than hard-coding the offset into a function to be used to determine if a kprobe is at function entry, use ftrace_location() to determine the ftrace location within the function and categorize all instructions till that offset to be function entry. For functions that cannot be traced, we fall back to using a fixed offset of 8 (two instructions) to categorize a probe as being at function entry for 64-bit elfv2, unless we are using pcrel. Acked-by: Masami Hiramatsu (Google) Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-3-hbathini@linux.ibm.com --- arch/powerpc/kernel/kprobes.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index f8aa91bc3b175..bf382c459e1f0 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -105,24 +105,22 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } -static bool arch_kprobe_on_func_entry(unsigned long offset) +static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset) { -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else + unsigned long ip = ftrace_location(addr); + + if (ip) + return offset <= (ip - addr); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return offset <= 8; return !offset; -#endif } /* XXX try and fold the magic of kprobe_lookup_name() in this */ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - *on_func_entry = arch_kprobe_on_func_entry(offset); + *on_func_entry = arch_kprobe_on_func_entry(addr, offset); return (kprobe_opcode_t *)(addr + offset); } From 161d62c2b067c4071cb515efe16475171e1c051e Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:36 +0530 Subject: [PATCH 09/55] powerpc64/ftrace: Nop out additional 'std' instruction emitted by gcc v5.x Gcc v5.x emits a 3-instruction sequence for -mprofile-kernel: mflr r0 std r0, 16(r1) bl _mcount Gcc v6.x moved to a simpler 2-instruction sequence by removing the 'std' instruction. The store saved the return address in the LR save area in the caller stack frame for stack unwinding. However, with dynamic ftrace, we no longer have a call to _mcount on kernel boot when ftrace is not enabled. When ftrace is enabled, that store is performed within ftrace_caller(). As such, the additional 'std' instruction is redundant. Nop it out on kernel boot. With this change, we now use the same 2-instruction profiling sequence with both -mprofile-kernel, as well as -fpatchable-function-entry on 64-bit powerpc. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-4-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d8d6b4fd9a14c..2ef504700e8d7 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -246,8 +246,12 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ ret = ftrace_read_inst(ip - 4, &old); if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { + /* Gcc v5.x emit the additional 'std' instruction, gcc v6.x don't */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)), + ppc_inst(PPC_RAW_NOP())); } } else { return -EINVAL; From 654b3fa61b817a46037197b73a7ac6d36d01df7e Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:37 +0530 Subject: [PATCH 10/55] powerpc32/ftrace: Unify 32-bit and 64-bit ftrace entry code On 32-bit powerpc, gcc generates a three instruction sequence for function profiling: mflr r0 stw r0, 4(r1) bl _mcount On kernel boot, the call to _mcount() is nop-ed out, to be patched back in when ftrace is actually enabled. The 'stw' instruction therefore is not necessary unless ftrace is enabled. Nop it out during ftrace init. When ftrace is enabled, we want the 'stw' so that stack unwinding works properly. Perform the same within the ftrace handler, similar to 64-bit powerpc. Reviewed-by: Nicholas Piggin Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-5-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/ftrace.c | 6 ++++-- arch/powerpc/kernel/trace/ftrace_entry.S | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2ef504700e8d7..8c3e523e4f964 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -240,8 +240,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) } else if (IS_ENABLED(CONFIG_PPC32)) { /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - if (!ret) - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)), + ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ ret = ftrace_read_inst(ip - 4, &old); diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 76dbe9fd2c0f2..244a1c7bb1e8e 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -33,6 +33,8 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs + /* Save the original return address in A's stack frame */ + PPC_STL r0, LRSAVE(r1) /* Create a minimal stack frame for representing B */ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) @@ -44,8 +46,6 @@ SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 - /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 From c12cfe9dee077763708e0a5cf3aca02a85b1e8ba Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:38 +0530 Subject: [PATCH 11/55] powerpc/module_64: Convert #ifdef to IS_ENABLED() Minor refactor for converting #ifdef to IS_ENABLED(). Reviewed-by: Nicholas Piggin Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-6-hbathini@linux.ibm.com --- arch/powerpc/kernel/module_64.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index e9bab599d0c27..1db88409bd955 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -241,14 +241,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, } } -#ifdef CONFIG_DYNAMIC_FTRACE - /* make the trampoline to the ftrace_caller */ - relocs++; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - /* an additional one for ftrace_regs_caller */ - relocs++; -#endif -#endif + /* stubs for ftrace_caller and ftrace_regs_caller */ + relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); From 8b0dc1305ea0bbb015b560193cdd76fd4100f062 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:39 +0530 Subject: [PATCH 12/55] powerpc/ftrace: Remove pointer to struct module from dyn_arch_ftrace Pointer to struct module is only relevant for ftrace records belonging to kernel modules. Having this field in dyn_arch_ftrace wastes memory for all ftrace records belonging to the kernel. Remove the same in favour of looking up the module from the ftrace record address, similar to other architectures. Reviewed-by: Nicholas Piggin Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-7-hbathini@linux.ibm.com --- arch/powerpc/include/asm/ftrace.h | 1 - arch/powerpc/kernel/trace/ftrace.c | 49 +++++++++-------- arch/powerpc/kernel/trace/ftrace_64_pg.c | 69 ++++++++++-------------- 3 files changed, 56 insertions(+), 63 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 559560286e6d0..278d4548e8f10 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -24,7 +24,6 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, struct module; struct dyn_ftrace; struct dyn_arch_ftrace { - struct module *mod; }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 8c3e523e4f964..fe0546fbac8e1 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -106,28 +106,43 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } +#ifdef CONFIG_MODULES +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + struct module *mod = NULL; + + preempt_disable(); + mod = __module_text_address(ip); + preempt_enable(); + + if (!mod) + pr_err("No module loaded at addr=%lx\n", ip); + + return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); +} +#else +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + return 0; +} +#endif + static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) { unsigned long ip = rec->ip; unsigned long stub; - if (is_offset_in_branch_range(addr - ip)) { + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; -#ifdef CONFIG_MODULES - } else if (rec->arch.mod) { - /* Module code would be going to one of the module stubs */ - stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp : - rec->arch.mod->arch.tramp_regs); -#endif - } else if (core_kernel_text(ip)) { + else if (core_kernel_text(ip)) /* We would be branching to one of our ftrace stubs */ stub = find_ftrace_tramp(ip); - if (!stub) { - pr_err("0x%lx: No ftrace stubs reachable\n", ip); - return -EINVAL; - } - } else { + else + stub = ftrace_lookup_module_stub(ip, addr); + + if (!stub) { + pr_err("0x%lx: No ftrace stubs reachable\n", ip); return -EINVAL; } @@ -262,14 +277,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) if (ret) return ret; - if (!core_kernel_text(ip)) { - if (!mod) { - pr_err("0x%lx: No module provided for non-kernel address\n", ip); - return -EFAULT; - } - rec->arch.mod = mod; - } - /* Nop-out the ftrace location */ new = ppc_inst(PPC_RAW_NOP()); addr = MCOUNT_ADDR; diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 12fab1803bcf4..8a551dfca3d04 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -116,6 +116,20 @@ static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) } #ifdef CONFIG_MODULES +static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) +{ + struct module *mod; + + preempt_disable(); + mod = __module_text_address(rec->ip); + preempt_enable(); + + if (!mod) + pr_err("No module loaded at addr=%lx\n", rec->ip); + + return mod; +} + static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -124,6 +138,12 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; ppc_inst_t op, pop; + if (!mod) { + mod = ftrace_lookup_module(rec); + if (!mod) + return -EINVAL; + } + /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); @@ -366,27 +386,6 @@ int ftrace_make_nop(struct module *mod, return -EINVAL; } - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - pr_err("No module loaded addr=%lx\n", addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - pr_err("Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; - return __ftrace_make_nop(mod, rec, addr); } @@ -411,7 +410,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ppc_inst_t op[2]; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* read where this goes */ if (copy_inst_from_kernel_nofault(op, ip)) @@ -533,16 +535,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_make_call(rec, addr); } @@ -555,7 +547,10 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, ppc_inst_t op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* If we never set up ftrace trampolines, then bail */ if (!mod->arch.tramp || !mod->arch.tramp_regs) { @@ -668,14 +663,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return -EINVAL; } - /* - * Out of range jumps are called from modules. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_modify_call(rec, old_addr, addr); } #endif From 1d59bd2fc07f0b2e643b2a07405cf0717b93984f Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:40 +0530 Subject: [PATCH 13/55] powerpc/ftrace: Skip instruction patching if the instructions are the same To simplify upcoming changes to ftrace, add a check to skip actual instruction patching if the old and new instructions are the same. We still validate that the instruction is what we expect, but don't actually patch the same instruction again. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-8-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index fe0546fbac8e1..719517265d39a 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -82,7 +82,7 @@ static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_ { int ret = ftrace_validate_inst(ip, old); - if (!ret) + if (!ret && !ppc_inst_equal(old, new)) ret = patch_instruction((u32 *)ip, new); return ret; From ed6144656bb1ea29ad83671b48a21c89e7873b8a Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:41 +0530 Subject: [PATCH 14/55] powerpc/ftrace: Move ftrace stub used for init text before _einittext Move the ftrace stub used to cover inittext before _einittext so that it is within kernel text, as seen through core_kernel_text(). This is required for a subsequent change to ftrace. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-9-hbathini@linux.ibm.com --- arch/powerpc/kernel/vmlinux.lds.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7ab4e2fb28b1e..b4c9decc7a75c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -265,14 +265,13 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT - + *(.tramp.ftrace.init); /* *.init.text might be RO so we must ensure this section ends on * a page boundary. */ . = ALIGN(PAGE_SIZE); _einittext = .; - *(.tramp.ftrace.init); } :text /* .exit.text is discarded at runtime, not link time, From 9670f6d2097c4f97e15c67920dfddc664d7ee91c Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:42 +0530 Subject: [PATCH 15/55] powerpc64/bpf: Fold bpf_jit_emit_func_call_hlp() into bpf_jit_emit_func_call_rel() Commit 61688a82e047 ("powerpc/bpf: enable kfunc call") enhanced bpf_jit_emit_func_call_hlp() to handle calls out to module region, where bpf progs are generated. The only difference now between bpf_jit_emit_func_call_hlp() and bpf_jit_emit_func_call_rel() is in handling of the initial pass where target function address is not known. Fold that logic into bpf_jit_emit_func_call_hlp() and rename it to bpf_jit_emit_func_call_rel() to simplify bpf function call JIT code. We don't actually need to load/restore TOC across a call out to a different kernel helper or to a different bpf program since they all work with the kernel TOC. We only need to do it if we have to call out to a module function. So, guard TOC load/restore with appropriate conditions. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-10-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 61 +++++++++---------------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 2cbcdf93cc197..f3be024fc6854 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -202,14 +202,22 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -static int -bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) { unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; long reladdr; - if (WARN_ON_ONCE(!kernel_text_address(func_addr))) - return -EINVAL; + /* bpf to bpf call, func is not known in the initial pass. Emit 5 nops as a placeholder */ + if (!func) { + for (int i = 0; i < 5; i++) + EMIT(PPC_RAW_NOP()); + /* elfv1 needs an additional instruction to load addr from descriptor */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + return 0; + } #ifdef CONFIG_PPC_KERNEL_PCREL reladdr = func_addr - local_paca->kernelbase; @@ -266,7 +274,8 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, * We can clobber r2 since we get called through a * function pointer (so caller will save/restore r2). */ - EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); + if (is_module_text_address(func_addr)) + EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); } else { PPC_LI64(_R12, func); EMIT(PPC_RAW_MTCTR(_R12)); @@ -276,46 +285,14 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, * Load r2 with kernel TOC as kernel TOC is used if function address falls * within core kernel text. */ - EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); + if (is_module_text_address(func_addr)) + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); } #endif return 0; } -int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) -{ - unsigned int i, ctx_idx = ctx->idx; - - if (WARN_ON_ONCE(func && is_module_text_address(func))) - return -EINVAL; - - /* skip past descriptor if elf v1 */ - func += FUNCTION_DESCR_SIZE; - - /* Load function address into r12 */ - PPC_LI64(_R12, func); - - /* For bpf-to-bpf function calls, the callee's address is unknown - * until the last extra pass. As seen above, we use PPC_LI64() to - * load the callee's address, but this may optimize the number of - * instructions required based on the nature of the address. - * - * Since we don't want the number of instructions emitted to increase, - * we pad the optimized PPC_LI64() call with NOPs to guarantee that - * we always have a five-instruction sequence, which is the maximum - * that PPC_LI64() can emit. - */ - if (!image) - for (i = ctx->idx - ctx_idx; i < 5; i++) - EMIT(PPC_RAW_NOP()); - - EMIT(PPC_RAW_MTCTR(_R12)); - EMIT(PPC_RAW_BCTRL()); - - return 0; -} - static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* @@ -1102,11 +1079,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code if (ret < 0) return ret; - if (func_addr_fixed) - ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr); - else - ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); - + ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); if (ret) return ret; From 782f46cbce5328da9380f166bd31cd17a04a7b10 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:43 +0530 Subject: [PATCH 16/55] powerpc/ftrace: Add a postlink script to validate function tracer Function tracer on powerpc can only work with vmlinux having a .text size of up to ~64MB due to powerpc branch instruction having a limited relative branch range of 32MB. Today, this is only detected on kernel boot when ftrace is init'ed. Add a post-link script to check the size of .text so that we can detect this at build time, and break the build if necessary. We add a dependency on !COMPILE_TEST for CONFIG_HAVE_FUNCTION_TRACER so that allyesconfig and other test builds can continue to work without enabling ftrace. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-11-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 2 +- arch/powerpc/Makefile.postlink | 8 +++++ arch/powerpc/tools/ftrace_check.sh | 50 ++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100755 arch/powerpc/tools/ftrace_check.sh diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6aaca48955a34..bb99ec9da63c9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -243,7 +243,7 @@ config PPC select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if PPC64 || (PPC32 && CC_IS_GCC) + select HAVE_FUNCTION_TRACER if !COMPILE_TEST && (PPC64 || (PPC32 && CC_IS_GCC)) select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC select HAVE_GENERIC_VDSO select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index ae5a4256b03d8..bb601be361736 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -24,6 +24,9 @@ else $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" endif +quiet_cmd_ftrace_check = CHKFTRC $@ + cmd_ftrace_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/ftrace_check.sh "$(NM)" "$@" + # `@true` prevents complaint when there is nothing to be done vmlinux: FORCE @@ -34,6 +37,11 @@ endif ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) endif +ifdef CONFIG_FUNCTION_TRACER +ifndef CONFIG_PPC64_ELF_ABI_V1 + $(call cmd,ftrace_check) +endif +endif clean: rm -f .tmp_symbols.txt diff --git a/arch/powerpc/tools/ftrace_check.sh b/arch/powerpc/tools/ftrace_check.sh new file mode 100755 index 0000000000000..405e7e3066175 --- /dev/null +++ b/arch/powerpc/tools/ftrace_check.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# +# This script checks vmlinux to ensure that all functions can call ftrace_caller() either directly, +# or through the stub, ftrace_tramp_text, at the end of kernel text. + +# Error out if any command fails +set -e + +# Allow for verbose output +if [ "$V" = "1" ]; then + set -x +fi + +if [ $# -lt 2 ]; then + echo "$0 [path to nm] [path to vmlinux]" 1>&2 + exit 1 +fi + +# Have Kbuild supply the path to nm so we handle cross compilation. +nm="$1" +vmlinux="$2" + +stext_addr=$($nm "$vmlinux" | grep -e " [TA] _stext$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +ftrace_caller_addr=$($nm "$vmlinux" | grep -e " T ftrace_caller$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +ftrace_tramp_addr=$($nm "$vmlinux" | grep -e " T ftrace_tramp_text$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') + +ftrace_caller_offset=$(echo "ibase=16;$ftrace_caller_addr - $stext_addr" | bc) +ftrace_tramp_offset=$(echo "ibase=16;$ftrace_tramp_addr - $ftrace_caller_addr" | bc) +sz_32m=$(printf "%d" 0x2000000) +sz_64m=$(printf "%d" 0x4000000) + +# ftrace_caller - _stext < 32M +if [ "$ftrace_caller_offset" -ge "$sz_32m" ]; then + echo "ERROR: ftrace_caller (0x$ftrace_caller_addr) is beyond 32MiB of _stext" 1>&2 + echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the size \ + of kernel text" 1>&2 + exit 1 +fi + +# ftrace_tramp_text - ftrace_caller < 64M +if [ "$ftrace_tramp_offset" -ge "$sz_64m" ]; then + echo "ERROR: kernel text extends beyond 64MiB from ftrace_caller" 1>&2 + echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the size \ + of kernel text" 1>&2 + exit 1 +fi From 1198c9c689cfdaa2d08eb508c13ff116043f07b7 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:44 +0530 Subject: [PATCH 17/55] kbuild: Add generic hook for architectures to use before the final vmlinux link On powerpc, we would like to be able to make a pass on vmlinux.o and generate a new object file to be linked into vmlinux. Add a generic pass in Makefile.vmlinux that architectures can use for this purpose. Architectures need to select CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX and must provide arch//tools/Makefile with .arch.vmlinux.o target, which will be invoked prior to the final vmlinux link step. Acked-by: Masahiro Yamada Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-12-hbathini@linux.ibm.com --- arch/Kconfig | 6 ++++++ scripts/Makefile.vmlinux | 7 +++++++ scripts/link-vmlinux.sh | 7 ++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 8af374ea1adc2..a1538927c8c18 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1684,4 +1684,10 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT config ARCH_NEED_CMPXCHG_1_EMU bool +config ARCH_WANTS_PRE_LINK_VMLINUX + bool + help + An architecture can select this if it provides arch//tools/Makefile + with .arch.vmlinux.o target to be linked into vmlinux. + endmenu diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 1284f05555b97..dddad554e9127 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -22,6 +22,13 @@ targets += .vmlinux.export.o vmlinux: .vmlinux.export.o endif +ifdef CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX +vmlinux: arch/$(SRCARCH)/tools/vmlinux.arch.o + +arch/$(SRCARCH)/tools/vmlinux.arch.o: vmlinux.o FORCE + $(Q)$(MAKE) $(build)=arch/$(SRCARCH)/tools $@ +endif + ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Final link of vmlinux with optional arch pass after final link diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index a9b3f34a78d2c..a3c634b2f348f 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -100,7 +100,7 @@ vmlinux_link() ${ld} ${ldflags} -o ${output} \ ${wl}--whole-archive ${objs} ${wl}--no-whole-archive \ ${wl}--start-group ${libs} ${wl}--end-group \ - ${kallsymso} ${btf_vmlinux_bin_o} ${ldlibs} + ${kallsymso} ${btf_vmlinux_bin_o} ${arch_vmlinux_o} ${ldlibs} } # generate .BTF typeinfo from DWARF debuginfo @@ -198,6 +198,11 @@ fi ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init init/version-timestamp.o +arch_vmlinux_o= +if is_enabled CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX; then + arch_vmlinux_o=arch/${SRCARCH}/tools/vmlinux.arch.o +fi + btf_vmlinux_bin_o= kallsymso= strip_debug= From eec37961a56aa4f3fe1c33ffd48eec7d1bb0c009 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:45 +0530 Subject: [PATCH 18/55] powerpc64/ftrace: Move ftrace sequence out of line Function profile sequence on powerpc includes two instructions at the beginning of each function: mflr r0 bl ftrace_caller The call to ftrace_caller() gets nop'ed out during kernel boot and is patched in when ftrace is enabled. Given the sequence, we cannot return from ftrace_caller with 'blr' as we need to keep LR and r0 intact. This results in link stack (return address predictor) imbalance when ftrace is enabled. To address that, we would like to use a three instruction sequence: mflr r0 bl ftrace_caller mtlr r0 Further more, to support DYNAMIC_FTRACE_WITH_CALL_OPS, we need to reserve two instruction slots before the function. This results in a total of five instruction slots to be reserved for ftrace use on each function that is traced. Move the function profile sequence out-of-line to minimize its impact. To do this, we reserve a single nop at function entry using -fpatchable-function-entry=1 and add a pass on vmlinux.o to determine the total number of functions that can be traced. This is then used to generate a .S file reserving the appropriate amount of space for use as ftrace stubs, which is built and linked into vmlinux. On bootup, the stub space is split into separate stubs per function and populated with the proper instruction sequence. A pointer to the associated stub is maintained in dyn_arch_ftrace. For modules, space for ftrace stubs is reserved from the generic module stub space. This is restricted to and enabled by default only on 64-bit powerpc, though there are some changes to accommodate 32-bit powerpc. This is done so that 32-bit powerpc could choose to opt into this based on further tests and benchmarks. As an example, after this patch, kernel functions will have a single nop at function entry: : addis r2,r12,467 addi r2,r2,-16028 nop mfocrf r11,8 ... When ftrace is enabled, the nop is converted to an unconditional branch to the stub associated with that function: : addis r2,r12,467 addi r2,r2,-16028 b ftrace_ool_stub_text_end+0x11b28 mfocrf r11,8 ... The associated stub: : mflr r0 bl ftrace_caller mtlr r0 b kernel_clone+0xc ... This change showed an improvement of ~10% in null_syscall benchmark on a Power 10 system with ftrace enabled. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-13-hbathini@linux.ibm.com --- arch/powerpc/Kbuild | 2 +- arch/powerpc/Kconfig | 4 + arch/powerpc/Makefile | 4 + arch/powerpc/include/asm/ftrace.h | 11 ++ arch/powerpc/include/asm/module.h | 5 + arch/powerpc/kernel/asm-offsets.c | 4 + arch/powerpc/kernel/module_64.c | 58 +++++++- arch/powerpc/kernel/trace/ftrace.c | 162 +++++++++++++++++++-- arch/powerpc/kernel/trace/ftrace_entry.S | 116 +++++++++++---- arch/powerpc/tools/.gitignore | 2 + arch/powerpc/tools/Makefile | 9 ++ arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 41 ++++++ 12 files changed, 380 insertions(+), 38 deletions(-) create mode 100644 arch/powerpc/tools/.gitignore create mode 100644 arch/powerpc/tools/Makefile create mode 100755 arch/powerpc/tools/ftrace-gen-ool-stubs.sh diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 571f260b08423..b010ccb071b6d 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -19,4 +19,4 @@ obj-$(CONFIG_KEXEC_CORE) += kexec/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ # for cleaning -subdir- += boot +subdir- += boot tools diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bb99ec9da63c9..c995f3fe19e9a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -569,6 +569,10 @@ config ARCH_USING_PATCHABLE_FUNCTION_ENTRY def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN +config PPC_FTRACE_OUT_OF_LINE + def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY + select ARCH_WANTS_PRE_LINK_VMLINUX + config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1c5c28d58e944..a52167830e8b6 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -148,7 +148,11 @@ CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float) ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY +ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +CC_FLAGS_FTRACE := -fpatchable-function-entry=1 +else CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif else CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 278d4548e8f10..bdbafc668b20a 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -24,6 +24,10 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, struct module; struct dyn_ftrace; struct dyn_arch_ftrace { +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* pointer to the associated out-of-line stub */ + unsigned long ool_stub; +#endif }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -130,6 +134,13 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } #ifdef CONFIG_FUNCTION_TRACER extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +struct ftrace_ool_stub { + u32 insn[4]; +}; +extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_inittext[]; +extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_inittext_count; +#endif void ftrace_free_init_tramp(void); unsigned long ftrace_call_adjust(unsigned long addr); #else diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 300c777cc3075..9ee70a4a0fde1 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -47,6 +47,11 @@ struct mod_arch_specific { #ifdef CONFIG_DYNAMIC_FTRACE unsigned long tramp; unsigned long tramp_regs; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + struct ftrace_ool_stub *ool_stubs; + unsigned int ool_stub_count; + unsigned int ool_stub_index; +#endif #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8fe97662a028..340f69bfcaa7f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -675,5 +675,9 @@ int main(void) DEFINE(BPT_SIZE, BPT_SIZE); #endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 1db88409bd955..6816e9967cab6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -205,7 +205,9 @@ static int relacmp(const void *_x, const void *_y) /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs) + const Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) { /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; @@ -244,6 +246,24 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, /* stubs for ftrace_caller and ftrace_regs_caller */ relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* stubs for the function tracer */ + for (i = 1; i < hdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) { + me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long); + me->arch.ool_stub_index = 0; + relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / + sizeof(struct ppc64_stub_entry); + break; + } + } + if (i == hdr->e_shnum) { + pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); + return -ENOEXEC; + } +#endif + pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -454,7 +474,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #endif /* Override the stubs size */ - sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me); return 0; } @@ -1079,6 +1099,37 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return 0; } +static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + unsigned int i, total_stubs, num_stubs; + struct ppc64_stub_entry *stub; + + total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub); + num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry); + + /* Find the next available entry */ + stub = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stub_func_addr(stub[i].funcdata); i++) + if (WARN_ON(i >= total_stubs)) + return -1; + + if (WARN_ON(i + num_stubs > total_stubs)) + return -1; + + stub += i; + me->arch.ool_stubs = (struct ftrace_ool_stub *)stub; + + /* reserve stubs */ + for (i = 0; i < num_stubs; i++) + if (patch_u32((void *)&stub->funcdata, PPC_RAW_NOP())) + return -1; +#endif + + return 0; +} + int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.tramp = stub_for_addr(sechdrs, @@ -1097,6 +1148,9 @@ int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) if (!mod->arch.tramp) return -ENOENT; + if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod)) + return -ENOENT; + return 0; } #endif diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 719517265d39a..1fee074388cc6 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -37,7 +37,8 @@ unsigned long ftrace_call_adjust(unsigned long addr) if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) return 0; - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) addr += MCOUNT_INSN_SIZE; return addr; @@ -127,11 +128,25 @@ static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long a } #endif +static unsigned long ftrace_get_ool_stub(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + return rec->arch.ool_stub; +#else + BUILD_BUG(); +#endif +} + static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) { - unsigned long ip = rec->ip; + unsigned long ip; unsigned long stub; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + else + ip = rec->ip; + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; @@ -142,7 +157,7 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ stub = ftrace_lookup_module_stub(ip, addr); if (!stub) { - pr_err("0x%lx: No ftrace stubs reachable\n", ip); + pr_err("0x%lx (0x%lx): No ftrace stubs reachable\n", ip, rec->ip); return -EINVAL; } @@ -150,6 +165,92 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ return 0; } +static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + static int ool_stub_text_end_index, ool_stub_inittext_index; + int ret = 0, ool_stub_count, *ool_stub_index; + ppc_inst_t inst; + /* + * See ftrace_entry.S if changing the below instruction sequence, as we rely on + * decoding the last branch instruction here to recover the correct function ip. + */ + struct ftrace_ool_stub *ool_stub, ool_stub_template = { + .insn = { + PPC_RAW_MFLR(_R0), + PPC_RAW_NOP(), /* bl ftrace_caller */ + PPC_RAW_MTLR(_R0), + PPC_RAW_NOP() /* b rec->ip + 4 */ + } + }; + + WARN_ON(rec->arch.ool_stub); + + if (is_kernel_inittext(rec->ip)) { + ool_stub = ftrace_ool_stub_inittext; + ool_stub_index = &ool_stub_inittext_index; + ool_stub_count = ftrace_ool_stub_inittext_count; + } else if (is_kernel_text(rec->ip)) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; +#ifdef CONFIG_MODULES + } else if (mod) { + ool_stub = mod->arch.ool_stubs; + ool_stub_index = &mod->arch.ool_stub_index; + ool_stub_count = mod->arch.ool_stub_count; +#endif + } else { + return -EINVAL; + } + + ool_stub += (*ool_stub_index)++; + + if (WARN_ON(*ool_stub_index > ool_stub_count)) + return -EINVAL; + + if (!is_offset_in_branch_range((long)rec->ip - (long)&ool_stub->insn[0]) || + !is_offset_in_branch_range((long)(rec->ip + MCOUNT_INSN_SIZE) - + (long)&ool_stub->insn[3])) { + pr_err("%s: ftrace ool stub out of range (%p -> %p).\n", + __func__, (void *)rec->ip, (void *)&ool_stub->insn[0]); + return -EINVAL; + } + + rec->arch.ool_stub = (unsigned long)&ool_stub->insn[0]; + + /* bl ftrace_caller */ + if (!mod) + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &inst); +#ifdef CONFIG_MODULES + else + /* + * We can't use ftrace_get_call_inst() since that uses + * __module_text_address(rec->ip) to look up the module. + * But, since the module is not fully formed at this stage, + * the lookup fails. We know the target though, so generate + * the branch inst directly. + */ + inst = ftrace_create_branch_inst(ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE, + mod->arch.tramp, 1); +#endif + ool_stub_template.insn[1] = ppc_inst_val(inst); + + /* b rec->ip + 4 */ + if (!ret && create_branch(&inst, &ool_stub->insn[3], rec->ip + MCOUNT_INSN_SIZE, 0)) + return -EINVAL; + ool_stub_template.insn[3] = ppc_inst_val(inst); + + if (!ret) + ret = patch_instructions((u32 *)ool_stub, (u32 *)&ool_stub_template, + sizeof(ool_stub_template), false); + + return ret; +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + BUILD_BUG(); +#endif +} + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -162,18 +263,29 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { ppc_inst_t old, new; - int ret; + unsigned long ip = rec->ip; + int ret = 0; /* This can only ever be called during module load */ - if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip))) + if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(ip))) return -EINVAL; old = ppc_inst(PPC_RAW_NOP()); - ret = ftrace_get_call_inst(rec, addr, &new); - if (ret) - return ret; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &old); + } + + ret |= ftrace_get_call_inst(rec, addr, &new); + + if (!ret) + ret = ftrace_modify_code(ip, old, new); - return ftrace_modify_code(rec->ip, old, new); + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), + ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); + + return ret; } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -206,6 +318,13 @@ void ftrace_replace_code(int enable) new_addr = ftrace_get_addr_new(rec); update = ftrace_update_record(rec, enable); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && update != FTRACE_UPDATE_IGNORE) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &nop_inst); + if (ret) + goto out; + } + switch (update) { case FTRACE_UPDATE_IGNORE: default: @@ -230,6 +349,24 @@ void ftrace_replace_code(int enable) if (!ret) ret = ftrace_modify_code(ip, old, new); + + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && + (update == FTRACE_UPDATE_MAKE_NOP || update == FTRACE_UPDATE_MAKE_CALL)) { + /* Update the actual ftrace location */ + call_inst = ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - + (long)rec->ip)); + nop_inst = ppc_inst(PPC_RAW_NOP()); + ip = rec->ip; + + if (update == FTRACE_UPDATE_MAKE_NOP) + ret = ftrace_modify_code(ip, call_inst, nop_inst); + else + ret = ftrace_modify_code(ip, nop_inst, call_inst); + + if (ret) + goto out; + } + if (ret) goto out; } @@ -249,7 +386,8 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) /* Verify instructions surrounding the ftrace location */ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { /* Expect nops */ - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); if (!ret) ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_PPC32)) { @@ -277,6 +415,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) if (ret) return ret; + /* Set up out-of-line stub */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return ftrace_init_ool_stub(mod, rec); + /* Nop-out the ftrace location */ new = ppc_inst(PPC_RAW_NOP()); addr = MCOUNT_ADDR; diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 244a1c7bb1e8e..5b2fc6483dce9 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -56,7 +56,7 @@ SAVE_GPR(2, r1) SAVE_GPRS(11, 31, r1) .else -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) SAVE_GPR(14, r1) #endif .endif @@ -78,10 +78,6 @@ /* Get the _mcount() call site out of LR */ mflr r7 - /* Save it as pt_regs->nip */ - PPC_STL r7, _NIP(r1) - /* Also save it in B's stackframe header for proper unwind */ - PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Save the read LR in pt_regs->link */ PPC_STL r0, _LINK(r1) @@ -96,16 +92,6 @@ lwz r5,function_trace_op@l(r3) #endif -#ifdef CONFIG_LIVEPATCH_64 - mr r14, r7 /* remember old NIP */ -#endif - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - /* Save special regs */ PPC_STL r8, _MSR(r1) .if \allregs == 1 @@ -114,17 +100,69 @@ PPC_STL r11, _CCR(r1) .endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Save our real return address in nvr for return */ + .if \allregs == 0 + SAVE_GPR(15, r1) + .endif + mr r15, r7 + /* + * We want the ftrace location in the function, but our lr (in r7) + * points at the 'mtlr r0' instruction in the out of line stub. To + * recover the ftrace location, we read the branch instruction in the + * stub, and adjust our lr by the branch offset. + * + * See ftrace_init_ool_stub() for the profile sequence. + */ + lwz r8, MCOUNT_INSN_SIZE(r7) + slwi r8, r8, 6 + srawi r8, r8, 6 + add r3, r7, r8 + /* + * Override our nip to point past the branch in the original function. + * This allows reliable stack trace and the ftrace stack tracer to work as-is. + */ + addi r7, r3, MCOUNT_INSN_SIZE +#else + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE +#endif + + /* Save NIP as pt_regs->nip */ + PPC_STL r7, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + mr r14, r7 /* remember old NIP */ +#endif + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + /* Load &pt_regs in r6 for call below */ addi r6, r1, STACK_INT_FRAME_REGS .endm .macro ftrace_regs_exit allregs +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) mtctr r3 #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ +#endif +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + /* Load LR with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) + cmpd r14, r3 /* has NIP been altered? */ + bne- 1f + + mr r3, r15 + .if \allregs == 0 + REST_GPR(15, r1) + .endif +1: mtlr r3 #endif /* Restore gprs */ @@ -132,14 +170,16 @@ REST_GPRS(2, 31, r1) .else REST_GPRS(3, 10, r1) -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) REST_GPR(14, r1) #endif .endif /* Restore possibly modified LR */ PPC_LL r0, _LINK(r1) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE mtlr r0 +#endif #ifdef CONFIG_PPC64 /* Restore callee's TOC */ @@ -153,7 +193,16 @@ /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - bctr /* jump after _mcount site */ + /* jump after _mcount site */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* + * Return with blr to keep the link stack balanced. The function profiling sequence + * uses 'mtlr r0' to restore LR. + */ + blr +#else + bctr +#endif .endm _GLOBAL(ftrace_regs_caller) @@ -177,6 +226,11 @@ _GLOBAL(ftrace_stub) #ifdef CONFIG_PPC64 ftrace_no_trace: +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + blr +#else mflr r3 mtctr r3 REST_GPR(3, r1) @@ -184,6 +238,7 @@ ftrace_no_trace: mtlr r0 bctr #endif +#endif #ifdef CONFIG_LIVEPATCH_64 /* @@ -194,11 +249,17 @@ ftrace_no_trace: * We get here when a function A, calls another function B, but B has * been live patched with a new function C. * - * On entry: - * - we have no stack frame and can not allocate one + * On entry, we have no stack frame and can not allocate one. + * + * With PPC_FTRACE_OUT_OF_LINE=n, on entry: * - LR points back to the original caller (in A) * - CTR holds the new NIP in C * - r0, r11 & r12 are free + * + * With PPC_FTRACE_OUT_OF_LINE=y, on entry: + * - r0 points back to the original caller (in A) + * - LR holds the new NIP in C + * - r11 & r12 are free */ livepatch_handler: ld r12, PACA_THREAD_INFO(r13) @@ -208,18 +269,23 @@ livepatch_handler: addi r11, r11, 24 std r11, TI_livepatch_sp(r12) - /* Save toc & real LR on livepatch stack */ - std r2, -24(r11) - mflr r12 - std r12, -16(r11) - /* Store stack end marker */ lis r12, STACK_END_MAGIC@h ori r12, r12, STACK_END_MAGIC@l std r12, -8(r11) - /* Put ctr in r12 for global entry and branch there */ + /* Save toc & real LR on livepatch stack */ + std r2, -24(r11) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + mflr r12 + std r12, -16(r11) mfctr r12 +#else + std r0, -16(r11) + mflr r12 + /* Put ctr in r12 for global entry and branch there */ + mtctr r12 +#endif bctrl /* diff --git a/arch/powerpc/tools/.gitignore b/arch/powerpc/tools/.gitignore new file mode 100644 index 0000000000000..ec380a14a09aa --- /dev/null +++ b/arch/powerpc/tools/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/vmlinux.arch.S diff --git a/arch/powerpc/tools/Makefile b/arch/powerpc/tools/Makefile new file mode 100644 index 0000000000000..d2e7ecd5f46f9 --- /dev/null +++ b/arch/powerpc/tools/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +quiet_cmd_gen_ftrace_ool_stubs = GEN $@ + cmd_gen_ftrace_ool_stubs = $< "$(CONFIG_64BIT)" "$(OBJDUMP)" vmlinux.o $@ + +$(obj)/vmlinux.arch.S: $(src)/ftrace-gen-ool-stubs.sh vmlinux.o FORCE + $(call if_changed,gen_ftrace_ool_stubs) + +targets += vmlinux.arch.S diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh new file mode 100755 index 0000000000000..96e1ca5803e4b --- /dev/null +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# Error out on error +set -e + +is_64bit="$1" +objdump="$2" +vmlinux_o="$3" +arch_vmlinux_S="$4" + +RELOCATION=R_PPC64_ADDR64 +if [ -z "$is_64bit" ]; then + RELOCATION=R_PPC_ADDR32 +fi + +num_ool_stubs_text=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep -v ".init.text" | grep -c "$RELOCATION") +num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep ".init.text" | grep -c "$RELOCATION") + +cat > "$arch_vmlinux_S" < +#include + +.pushsection .tramp.ftrace.text,"aw" +SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text) + +SYM_CODE_START(ftrace_ool_stub_text_end) + .space $num_ool_stubs_text * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text_end) +.popsection + +.pushsection .tramp.ftrace.init,"aw" +SYM_DATA(ftrace_ool_stub_inittext_count, .long $num_ool_stubs_inittext) + +SYM_CODE_START(ftrace_ool_stub_inittext) + .space $num_ool_stubs_inittext * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_inittext) +.popsection +EOF From cf9bc0efcce2c324314cf7f5138c08f85ef7b5eb Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:46 +0530 Subject: [PATCH 19/55] powerpc64/ftrace: Support .text larger than 32MB with out-of-line stubs We are restricted to a .text size of ~32MB when using out-of-line function profile sequence. Allow this to be extended up to the previous limit of ~64MB by reserving space in the middle of .text. A new config option CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE is introduced to specify the number of function stubs that are reserved in .text. On boot, ftrace utilizes stubs from this area first before using the stub area at the end of .text. A ppc64le defconfig has ~44k functions that can be traced. A more conservative value of 32k functions is chosen as the default value of PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE so that we do not allot more space than necessary by default. If building a kernel that only has 32k trace-able functions, we won't allot any more space at the end of .text during the pass on vmlinux.o. Otherwise, only the remaining functions get space for stubs at the end of .text. This default value should help cover a .text size of ~48MB in total (including space reserved at the end of .text which can cover up to 32MB), which should be sufficient for most common builds. For a very small kernel build, this can be set to 0. Or, this can be bumped up to a larger value to support vmlinux .text size up to ~64MB. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-14-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 12 ++++++++++++ arch/powerpc/include/asm/ftrace.h | 6 ++++-- arch/powerpc/kernel/trace/ftrace.c | 21 +++++++++++++++++---- arch/powerpc/kernel/trace/ftrace_entry.S | 8 ++++++++ arch/powerpc/tools/Makefile | 3 ++- arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 21 +++++++++++++++------ 6 files changed, 58 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c995f3fe19e9a..50d418f72ee8e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -573,6 +573,18 @@ config PPC_FTRACE_OUT_OF_LINE def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY select ARCH_WANTS_PRE_LINK_VMLINUX +config PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE + int "Number of ftrace out-of-line stubs to reserve within .text" + depends on PPC_FTRACE_OUT_OF_LINE + default 32768 + help + Number of stubs to reserve for use by ftrace. This space is + reserved within .text, and is distinct from any additional space + added at the end of .text before the final vmlinux link. Set to + zero to have stubs only be generated at the end of vmlinux (only + if the size of vmlinux is less than 32MB). Set to a higher value + if building vmlinux larger than 48MB. + config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index bdbafc668b20a..28f3590ca7808 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -138,8 +138,10 @@ extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; struct ftrace_ool_stub { u32 insn[4]; }; -extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_inittext[]; -extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_inittext_count; +extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_text[], + ftrace_ool_stub_inittext[]; +extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count, + ftrace_ool_stub_inittext_count; #endif void ftrace_free_init_tramp(void); unsigned long ftrace_call_adjust(unsigned long addr); diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 1fee074388cc6..bee2c54a8c047 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -168,7 +168,7 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) { #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE - static int ool_stub_text_end_index, ool_stub_inittext_index; + static int ool_stub_text_index, ool_stub_text_end_index, ool_stub_inittext_index; int ret = 0, ool_stub_count, *ool_stub_index; ppc_inst_t inst; /* @@ -191,9 +191,22 @@ static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) ool_stub_index = &ool_stub_inittext_index; ool_stub_count = ftrace_ool_stub_inittext_count; } else if (is_kernel_text(rec->ip)) { - ool_stub = ftrace_ool_stub_text_end; - ool_stub_index = &ool_stub_text_end_index; - ool_stub_count = ftrace_ool_stub_text_end_count; + /* + * ftrace records are sorted, so we first use up the stub area within .text + * (ftrace_ool_stub_text) before using the area at the end of .text + * (ftrace_ool_stub_text_end), unless the stub is out of range of the record. + */ + if (ool_stub_text_index >= ftrace_ool_stub_text_count || + !is_offset_in_branch_range((long)rec->ip - + (long)&ftrace_ool_stub_text[ool_stub_text_index])) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; + } else { + ool_stub = ftrace_ool_stub_text; + ool_stub_index = &ool_stub_text_index; + ool_stub_count = ftrace_ool_stub_text_count; + } #ifdef CONFIG_MODULES } else if (mod) { ool_stub = mod->arch.ool_stubs; diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 5b2fc6483dce9..a6bf7f8410403 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -374,6 +374,14 @@ _GLOBAL(return_to_handler) blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) + +SYM_CODE_START(ftrace_ool_stub_text) + .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text) +#endif + .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: diff --git a/arch/powerpc/tools/Makefile b/arch/powerpc/tools/Makefile index d2e7ecd5f46f9..e1f7afcd9fdfd 100644 --- a/arch/powerpc/tools/Makefile +++ b/arch/powerpc/tools/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-or-later quiet_cmd_gen_ftrace_ool_stubs = GEN $@ - cmd_gen_ftrace_ool_stubs = $< "$(CONFIG_64BIT)" "$(OBJDUMP)" vmlinux.o $@ + cmd_gen_ftrace_ool_stubs = $< "$(CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE)" "$(CONFIG_64BIT)" \ + "$(OBJDUMP)" vmlinux.o $@ $(obj)/vmlinux.arch.S: $(src)/ftrace-gen-ool-stubs.sh vmlinux.o FORCE $(call if_changed,gen_ftrace_ool_stubs) diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index 96e1ca5803e4b..6a201df83524e 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -4,10 +4,11 @@ # Error out on error set -e -is_64bit="$1" -objdump="$2" -vmlinux_o="$3" -arch_vmlinux_S="$4" +num_ool_stubs_text_builtin="$1" +is_64bit="$2" +objdump="$3" +vmlinux_o="$4" +arch_vmlinux_S="$5" RELOCATION=R_PPC64_ADDR64 if [ -z "$is_64bit" ]; then @@ -19,15 +20,23 @@ num_ool_stubs_text=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | grep ".init.text" | grep -c "$RELOCATION") +if [ "$num_ool_stubs_text" -gt "$num_ool_stubs_text_builtin" ]; then + num_ool_stubs_text_end=$((num_ool_stubs_text - num_ool_stubs_text_builtin)) +else + num_ool_stubs_text_end=0 +fi + cat > "$arch_vmlinux_S" < #include .pushsection .tramp.ftrace.text,"aw" -SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text) +SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text_end) SYM_CODE_START(ftrace_ool_stub_text_end) - .space $num_ool_stubs_text * FTRACE_OOL_STUB_SIZE +#if $num_ool_stubs_text_end + .space $num_ool_stubs_text_end * FTRACE_OOL_STUB_SIZE +#endif SYM_CODE_END(ftrace_ool_stub_text_end) .popsection From e717754f0bb5c5347aac82232691340955735ce1 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:47 +0530 Subject: [PATCH 20/55] powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_CALL_OPS Implement support for DYNAMIC_FTRACE_WITH_CALL_OPS similar to the arm64 implementation. This works by patching-in a pointer to an associated ftrace_ops structure before each traceable function. If multiple ftrace_ops are associated with a call site, then a special ftrace_list_ops is used to enable iterating over all the registered ftrace_ops. If no ftrace_ops are associated with a call site, then a special ftrace_nop_ops structure is used to render the ftrace call as a no-op. ftrace trampoline can then read the associated ftrace_ops for a call site by loading from an offset from the LR, and branch directly to the associated function. The primary advantage with this approach is that we don't have to iterate over all the registered ftrace_ops for call sites that have a single ftrace_ops registered. This is the equivalent of implementing support for dynamic ftrace trampolines, which set up a special ftrace trampoline for each registered ftrace_ops and have individual call sites branch into those directly. A secondary advantage is that this gives us a way to add support for direct ftrace callers without having to resort to using stubs. The address of the direct call trampoline can be loaded from the ftrace_ops structure. To support this, we reserve a nop before each function on 32-bit powerpc. For 64-bit powerpc, two nops are reserved before each out-of-line stub. During ftrace activation, we update this location with the associated ftrace_ops pointer. Then, on ftrace entry, we load from this location and call into ftrace_ops->func(). For 64-bit powerpc, we ensure that the out-of-line stub area is doubleword aligned so that ftrace_ops address can be updated atomically. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-15-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/Makefile | 4 ++ arch/powerpc/include/asm/ftrace.h | 5 +- arch/powerpc/kernel/asm-offsets.c | 4 ++ arch/powerpc/kernel/trace/ftrace.c | 59 +++++++++++++++++++++- arch/powerpc/kernel/trace/ftrace_entry.S | 36 ++++++++++--- arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 5 +- 7 files changed, 102 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 50d418f72ee8e..79bd5a375527d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -234,6 +234,7 @@ config PPC select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 + select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || (PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY) select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a52167830e8b6..99af7953e8442 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -151,8 +151,12 @@ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE CC_FLAGS_FTRACE := -fpatchable-function-entry=1 else +ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS # PPC32 only +CC_FLAGS_FTRACE := -fpatchable-function-entry=3,1 +else CC_FLAGS_FTRACE := -fpatchable-function-entry=2 endif +endif else CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 28f3590ca7808..1ad1328cf4e31 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -136,8 +136,11 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE struct ftrace_ool_stub { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + struct ftrace_ops *ftrace_op; +#endif u32 insn[4]; -}; +} __aligned(sizeof(unsigned long)); extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_text[], ftrace_ool_stub_inittext[]; extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count, diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 340f69bfcaa7f..318349f78820e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -679,5 +679,9 @@ int main(void) DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#endif + return 0; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index bee2c54a8c047..9090d1a21600e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -38,8 +38,11 @@ unsigned long ftrace_call_adjust(unsigned long addr) return 0; if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && - !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } return addr; } @@ -264,6 +267,46 @@ static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) #endif } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *powerpc_rec_get_ops(struct dyn_ftrace *rec) +{ + const struct ftrace_ops *ops = NULL; + + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); + } + + if (!ops) + ops = &ftrace_list_ops; + + return ops; +} + +static int ftrace_rec_set_ops(struct dyn_ftrace *rec, const struct ftrace_ops *ops) +{ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return patch_ulong((void *)(ftrace_get_ool_stub(rec) - sizeof(unsigned long)), + (unsigned long)ops); + else + return patch_ulong((void *)(rec->ip - MCOUNT_INSN_SIZE - sizeof(unsigned long)), + (unsigned long)ops); +} + +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} + +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, powerpc_rec_get_ops(rec)); +} +#else +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -294,6 +337,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (!ret) ret = ftrace_modify_code(ip, old, new); + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); @@ -345,16 +392,19 @@ void ftrace_replace_code(int enable) case FTRACE_UPDATE_MODIFY_CALL: ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst); ret |= ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = call_inst; new = new_call_inst; break; case FTRACE_UPDATE_MAKE_NOP: ret = ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_set_nop_ops(rec); old = call_inst; new = nop_inst; break; case FTRACE_UPDATE_MAKE_CALL: ret = ftrace_get_call_inst(rec, new_addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = nop_inst; new = call_inst; break; @@ -470,6 +520,13 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ppc_inst_t old, new; int ret; + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index a6bf7f8410403..ff376c9903081 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -85,11 +85,21 @@ /* Save callee's TOC in the ABI compliant location */ std r2, STK_GOT(r1) LOAD_PACA_TOC() /* get kernel TOC in r2 */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* r7 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r7) + PPC_LL r12, FTRACE_OPS_FUNC(r5) + mtctr r12 +#else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ +#ifdef CONFIG_PPC64 LOAD_REG_ADDR(r3, function_trace_op) ld r5,0(r3) #else lis r3,function_trace_op@ha lwz r5,function_trace_op@l(r3) +#endif #endif /* Save special regs */ @@ -205,20 +215,30 @@ #endif .endm -_GLOBAL(ftrace_regs_caller) - ftrace_regs_entry 1 - /* ftrace_call(r3, r4, r5, r6) */ +.macro ftrace_regs_func allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + bctrl +#else + .if \allregs == 1 .globl ftrace_regs_call ftrace_regs_call: + .else +.globl ftrace_call +ftrace_call: + .endif + /* ftrace_call(r3, r4, r5, r6) */ bl ftrace_stub +#endif +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + ftrace_regs_func 1 ftrace_regs_exit 1 _GLOBAL(ftrace_caller) ftrace_regs_entry 0 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub + ftrace_regs_func 0 ftrace_regs_exit 0 _GLOBAL(ftrace_stub) @@ -377,7 +397,7 @@ _GLOBAL(return_to_handler) #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) -SYM_CODE_START(ftrace_ool_stub_text) +SYM_START(ftrace_ool_stub_text, SYM_L_GLOBAL, .balign SZL) .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE SYM_CODE_END(ftrace_ool_stub_text) #endif diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index 6a201df83524e..950a7778324b6 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -28,12 +28,13 @@ fi cat > "$arch_vmlinux_S" < +#include #include .pushsection .tramp.ftrace.text,"aw" SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text_end) -SYM_CODE_START(ftrace_ool_stub_text_end) +SYM_START(ftrace_ool_stub_text_end, SYM_L_GLOBAL, .balign SZL) #if $num_ool_stubs_text_end .space $num_ool_stubs_text_end * FTRACE_OOL_STUB_SIZE #endif @@ -43,7 +44,7 @@ SYM_CODE_END(ftrace_ool_stub_text_end) .pushsection .tramp.ftrace.init,"aw" SYM_DATA(ftrace_ool_stub_inittext_count, .long $num_ool_stubs_inittext) -SYM_CODE_START(ftrace_ool_stub_inittext) +SYM_START(ftrace_ool_stub_inittext, SYM_L_GLOBAL, .balign SZL) .space $num_ool_stubs_inittext * FTRACE_OOL_STUB_SIZE SYM_CODE_END(ftrace_ool_stub_inittext) .popsection From a52f6043a2238d656ddd23ce0499cf4f12645faa Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:48 +0530 Subject: [PATCH 21/55] powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS Add support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS similar to the arm64 implementation. ftrace direct calls allow custom trampolines to be called into directly from function ftrace call sites, bypassing the ftrace trampoline completely. This functionality is currently utilized by BPF trampolines to hook into kernel function entries. Since we have limited relative branch range, we support ftrace direct calls through support for DYNAMIC_FTRACE_WITH_CALL_OPS. In this approach, ftrace trampoline is not entirely bypassed. Rather, it is re-purposed into a stub that reads direct_call field from the associated ftrace_ops structure and branches into that, if it is not NULL. For this, it is sufficient if we can ensure that the ftrace trampoline is reachable from all traceable functions. When multiple ftrace_ops are associated with a call site, we utilize a call back to set pt_regs->orig_gpr3 that can then be tested on the return path from the ftrace trampoline to branch into the direct caller. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-16-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/ftrace.h | 16 ++++ arch/powerpc/kernel/asm-offsets.c | 3 + arch/powerpc/kernel/trace/ftrace.c | 11 +++ arch/powerpc/kernel/trace/ftrace_entry.S | 114 +++++++++++++++++------ 5 files changed, 116 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 79bd5a375527d..73e9e42b2e756 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -235,6 +235,7 @@ config PPC select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || (PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 1ad1328cf4e31..5eb7631355a16 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -148,6 +148,22 @@ extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count, #endif void ftrace_free_init_tramp(void); unsigned long ftrace_call_adjust(unsigned long addr); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When an ftrace registered caller is tracing a function that is also set by a + * register_ftrace_direct() call, it needs to be differentiated in the + * ftrace_caller trampoline so that the direct call can be invoked after the + * other ftrace ops. To do this, place the direct caller in the orig_gpr3 field + * of pt_regs. This tells ftrace_caller that there's a direct caller. + */ +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) +{ + struct pt_regs *regs = &fregs->regs; + + regs->orig_gpr3 = addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #else static inline void ftrace_free_init_tramp(void) { } static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 318349f78820e..ae198b2d9b8c6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -681,6 +681,9 @@ int main(void) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call); +#endif #endif return 0; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 9090d1a21600e..051f3db146066 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -150,6 +150,17 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ else ip = rec->ip; + if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR && + addr != FTRACE_REGS_ADDR) { + /* This can only happen with ftrace direct */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + pr_err("0x%lx (0x%lx): Unexpected target address 0x%lx\n", + ip, rec->ip, addr); + return -EINVAL; + } + addr = FTRACE_ADDR; + } + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index ff376c9903081..2c1b24100eca2 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -33,14 +33,38 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs - /* Save the original return address in A's stack frame */ - PPC_STL r0, LRSAVE(r1) /* Create a minimal stack frame for representing B */ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) + .if \allregs == 1 + SAVE_GPRS(11, 12, r1) + .endif + + /* Get the _mcount() call site out of LR */ + mflr r11 + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Load the ftrace_op */ + PPC_LL r12, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + + /* Load direct_call from the ftrace_op */ + PPC_LL r12, FTRACE_OPS_DIRECT_CALL(r12) + PPC_LCMPI r12, 0 + .if \allregs == 1 + bne .Lftrace_direct_call_regs + .else + bne .Lftrace_direct_call + .endif +#endif + + /* Save the previous LR in pt_regs->link */ + PPC_STL r0, _LINK(r1) + /* Also save it in A's stack frame */ + PPC_STL r0, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE+LRSAVE(r1) + /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) SAVE_GPRS(3, 10, r1) @@ -54,7 +78,7 @@ .if \allregs == 1 SAVE_GPR(2, r1) - SAVE_GPRS(11, 31, r1) + SAVE_GPRS(13, 31, r1) .else #if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) SAVE_GPR(14, r1) @@ -67,20 +91,15 @@ .if \allregs == 1 /* Load special regs for save below */ + mfcr r7 mfmsr r8 mfctr r9 mfxer r10 - mfcr r11 .else /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ li r8, 0 .endif - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save the read LR in pt_regs->link */ - PPC_STL r0, _LINK(r1) - #ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, STK_GOT(r1) @@ -88,8 +107,8 @@ #endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS - /* r7 points to the instruction following the call to ftrace */ - PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r7) + /* r11 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) PPC_LL r12, FTRACE_OPS_FUNC(r5) mtctr r12 #else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ @@ -105,45 +124,51 @@ /* Save special regs */ PPC_STL r8, _MSR(r1) .if \allregs == 1 + PPC_STL r7, _CCR(r1) PPC_STL r9, _CTR(r1) PPC_STL r10, _XER(r1) - PPC_STL r11, _CCR(r1) .endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Clear orig_gpr3 to later detect ftrace_direct call */ + li r7, 0 + PPC_STL r7, ORIG_GPR3(r1) +#endif + #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE /* Save our real return address in nvr for return */ .if \allregs == 0 SAVE_GPR(15, r1) .endif - mr r15, r7 + mr r15, r11 /* - * We want the ftrace location in the function, but our lr (in r7) + * We want the ftrace location in the function, but our lr (in r11) * points at the 'mtlr r0' instruction in the out of line stub. To * recover the ftrace location, we read the branch instruction in the * stub, and adjust our lr by the branch offset. * * See ftrace_init_ool_stub() for the profile sequence. */ - lwz r8, MCOUNT_INSN_SIZE(r7) + lwz r8, MCOUNT_INSN_SIZE(r11) slwi r8, r8, 6 srawi r8, r8, 6 - add r3, r7, r8 + add r3, r11, r8 /* * Override our nip to point past the branch in the original function. * This allows reliable stack trace and the ftrace stack tracer to work as-is. */ - addi r7, r3, MCOUNT_INSN_SIZE + addi r11, r3, MCOUNT_INSN_SIZE #else /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE + subi r3, r11, MCOUNT_INSN_SIZE #endif /* Save NIP as pt_regs->nip */ - PPC_STL r7, _NIP(r1) + PPC_STL r11, _NIP(r1) /* Also save it in B's stackframe header for proper unwind */ - PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) + PPC_STL r11, LRSAVE+SWITCH_FRAME_SIZE(r1) #if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) - mr r14, r7 /* remember old NIP */ + mr r14, r11 /* remember old NIP */ #endif /* Put the original return address in r4 as parent_ip */ @@ -154,14 +179,32 @@ .endm .macro ftrace_regs_exit allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Check orig_gpr3 to detect ftrace_direct call */ + PPC_LL r3, ORIG_GPR3(r1) + PPC_LCMPI cr1, r3, 0 + mtctr r3 +#endif + + /* Restore possibly modified LR */ + PPC_LL r0, _LINK(r1) + #ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) - mtctr r3 - #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + beq cr1,2f + mtlr r3 + b 3f +#endif +2: mtctr r3 + mtlr r0 +3: + #else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ /* Load LR with the possibly modified NIP */ PPC_LL r3, _NIP(r1) @@ -185,12 +228,6 @@ #endif .endif - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) -#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE - mtlr r0 -#endif - #ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, STK_GOT(r1) @@ -203,8 +240,12 @@ /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif + /* jump after _mcount site */ #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnectr cr1 +#endif /* * Return with blr to keep the link stack balanced. The function profiling sequence * uses 'mtlr r0' to restore LR. @@ -260,6 +301,21 @@ ftrace_no_trace: #endif #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +.Lftrace_direct_call_regs: + mtctr r12 + REST_GPRS(11, 12, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +.Lftrace_direct_call: + mtctr r12 + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +SYM_FUNC_START(ftrace_stub_direct_tramp) + blr +SYM_FUNC_END(ftrace_stub_direct_tramp) +#endif + #ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As From 71db948b9d2744e92124720f682ed2c26f0de75b Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:49 +0530 Subject: [PATCH 22/55] samples/ftrace: Add support for ftrace direct samples on powerpc Add powerpc 32-bit and 64-bit samples for ftrace direct. This serves to show the sample instruction sequence to be used by ftrace direct calls to adhere to the ftrace ABI. On 64-bit powerpc, TOC setup requires some additional work. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-17-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 2 + samples/ftrace/ftrace-direct-modify.c | 85 +++++++++++++++- samples/ftrace/ftrace-direct-multi-modify.c | 101 +++++++++++++++++++- samples/ftrace/ftrace-direct-multi.c | 79 ++++++++++++++- samples/ftrace/ftrace-direct-too.c | 83 +++++++++++++++- samples/ftrace/ftrace-direct.c | 69 ++++++++++++- 6 files changed, 414 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 73e9e42b2e756..a9dd4c39ec009 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -275,6 +275,8 @@ config PPC select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_SETUP_PER_CPU_AREA if PPC64 select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,$(m32-flag) -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 -mstack-protector-guard-offset=0) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 81220390851a3..cfea7a38befb0 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -2,7 +2,7 @@ #include #include #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -199,6 +199,89 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE +" bl my_direct_func1\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE +" bl my_direct_func2\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static unsigned long my_tramp = (unsigned long)my_tramp1; diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index f943e40d57fd3..8f7986d698d87 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -2,7 +2,7 @@ #include #include #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -225,6 +225,105 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#define PPC_FTRACE_RECOVER_IP \ +" lwz 8, 4(3)\n" \ +" li 9, 6\n" \ +" slw 8, 8, 9\n" \ +" sraw 8, 8, 9\n" \ +" add 3, 3, 8\n" \ +" addi 3, 3, 4\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#define PPC_FTRACE_RECOVER_IP "" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func1\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func2\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC + " .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long tramps[2] = { (unsigned long)my_tramp1, diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index aed6df2927ce1..db326c81a27dd 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -4,7 +4,7 @@ #include /* for handle_mm_fault() */ #include #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -141,6 +141,83 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#define PPC_FTRACE_RECOVER_IP \ +" lwz 8, 4(3)\n" \ +" li 9, 6\n" \ +" slw 8, 8, 9\n" \ +" sraw 8, 8, 9\n" \ +" add 3, 3, 8\n" \ +" addi 3, 3, 4\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#define PPC_FTRACE_RECOVER_IP "" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static int __init ftrace_direct_multi_init(void) diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 6ff546a5d7eb0..3d0fa260332d4 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -3,7 +3,7 @@ #include /* for handle_mm_fault() */ #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -153,6 +153,87 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 64 +#define STACK_FRAME_ARG1 32 +#define STACK_FRAME_ARG2 40 +#define STACK_FRAME_ARG3 48 +#define STACK_FRAME_ARG4 56 +#else +#define STACK_FRAME_SIZE 32 +#define STACK_FRAME_ARG1 16 +#define STACK_FRAME_ARG2 20 +#define STACK_FRAME_ARG3 24 +#define STACK_FRAME_ARG4 28 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n" + PPC_STL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n" + PPC_STL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n" + PPC_STL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n" +" bl my_direct_func\n" + PPC_LL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n" + PPC_LL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n" + PPC_LL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n" + PPC_LL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static int __init ftrace_direct_init(void) diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index ef0945670e1eb..956834b0d19ac 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -3,7 +3,7 @@ #include /* for wake_up_process() */ #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -134,6 +134,73 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" bl my_direct_func\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static int __init ftrace_direct_init(void) From d243b62b7bd3d5314382d3b54e4992226245e936 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:50 +0530 Subject: [PATCH 23/55] powerpc64/bpf: Add support for bpf trampolines Add support for bpf_arch_text_poke() and arch_prepare_bpf_trampoline() for 64-bit powerpc. While the code is generic, BPF trampolines are only enabled on 64-bit powerpc. 32-bit powerpc will need testing and some updates. BPF Trampolines adhere to the existing ftrace ABI utilizing a two-instruction profiling sequence, as well as the newer ABI utilizing a three-instruction profiling sequence enabling return with a 'blr'. The trampoline code itself closely follows x86 implementation. BPF prog JIT is extended to mimic 64-bit powerpc approach for ftrace having a single nop at function entry, followed by the function profiling sequence out-of-line and a separate long branch stub for calls to trampolines that are out of range. A dummy_tramp is provided to simplify synchronization similar to arm64. When attaching a bpf trampoline to a bpf prog, we can patch up to three things: - the nop at bpf prog entry to go to the out-of-line stub - the instruction in the out-of-line stub to either call the bpf trampoline directly, or to branch to the long_branch stub. - the trampoline address before the long_branch stub. We do not need any synchronization here since we always have a valid branch target regardless of the order in which the above stores are seen. dummy_tramp ensures that the long_branch stub goes to a valid destination on other cpus, even when the branch to the long_branch stub is seen before the updated trampoline address. However, when detaching a bpf trampoline from a bpf prog, or if changing the bpf trampoline address, we need synchronization to ensure that other cpus can no longer branch into the older trampoline so that it can be safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus make forward progress, but we still need to ensure that other cpus execute isync (or some CSI) so that they don't go back into the trampoline again. While here, update the stale comment that describes the redzone usage in ppc64 BPF JIT. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-18-hbathini@linux.ibm.com --- arch/powerpc/include/asm/ppc-opcode.h | 14 + arch/powerpc/net/bpf_jit.h | 17 + arch/powerpc/net/bpf_jit_comp.c | 847 +++++++++++++++++++++++++- arch/powerpc/net/bpf_jit_comp32.c | 7 +- arch/powerpc/net/bpf_jit_comp64.c | 11 +- 5 files changed, 891 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index b98a9e982c03b..4312bcb913a42 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -587,12 +587,26 @@ #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) #define PPC_RAW_EIEIO() (0x7c0006ac) +/* bcl 20,31,$+4 */ +#define PPC_RAW_BCL4() (0x429f0005) #define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset)) #define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset)) #define PPC_RAW_TW(t0, a, b) (0x7c000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0) #define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2)) +#ifdef CONFIG_PPC32 +#define PPC_RAW_STL PPC_RAW_STW +#define PPC_RAW_STLU PPC_RAW_STWU +#define PPC_RAW_LL PPC_RAW_LWZ +#define PPC_RAW_CMPLI PPC_RAW_CMPWI +#else +#define PPC_RAW_STL PPC_RAW_STD +#define PPC_RAW_STLU PPC_RAW_STDU +#define PPC_RAW_LL PPC_RAW_LD +#define PPC_RAW_CMPLI PPC_RAW_CMPDI +#endif + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index cdea5dccaefe7..6beacaec63d30 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -12,6 +12,7 @@ #include #include +#include #ifdef CONFIG_PPC64_ELF_ABI_V1 #define FUNCTION_DESCR_SIZE 24 @@ -21,6 +22,9 @@ #define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4) +#define SZL sizeof(unsigned long) +#define BPF_INSN_SAFETY 64 + #define PLANT_INSTR(d, idx, instr) \ do { if (d) { (d)[idx] = instr; } idx++; } while (0) #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) @@ -81,6 +85,18 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) +#define PPC_LI_ADDR PPC_LI64 + +#ifndef CONFIG_PPC_KERNEL_PCREL +#define PPC64_LOAD_PACA() \ + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))) +#else +#define PPC64_LOAD_PACA() do {} while (0) +#endif +#else +#define PPC_LI64(d, i) BUILD_BUG() +#define PPC_LI_ADDR PPC_LI32 +#define PPC64_LOAD_PACA() BUILD_BUG() #endif /* @@ -165,6 +181,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u32 *addrs, int pass, bool extra_pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 2a36cc2e7e9e2..28e2fd8b7900a 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -22,11 +22,81 @@ #include "bpf_jit.h" +/* These offsets are from bpf prog end and stay the same across progs */ +static int bpf_jit_ool_stub, bpf_jit_long_branch_stub; + static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits ;" +" .global dummy_tramp ;" +" .type dummy_tramp, @function ;" +"dummy_tramp: ;" +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +" blr ;" +#else +/* LR is always in r11, so we don't need a 'mflr r11' here */ +" mtctr 11 ;" +" mtlr 0 ;" +" bctr ;" +#endif +" .size dummy_tramp, .-dummy_tramp ;" +" .popsection ;" +); + +void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx) +{ + int ool_stub_idx, long_branch_stub_idx; + + /* + * Out-of-line stub: + * mflr r0 + * [b|bl] tramp + * mtlr r0 // only with CONFIG_PPC_FTRACE_OUT_OF_LINE + * b bpf_func + 4 + */ + ool_stub_idx = ctx->idx; + EMIT(PPC_RAW_MFLR(_R0)); + EMIT(PPC_RAW_NOP()); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + EMIT(PPC_RAW_MTLR(_R0)); + WARN_ON_ONCE(!is_offset_in_branch_range(4 - (long)ctx->idx * 4)); + EMIT(PPC_RAW_BRANCH(4 - (long)ctx->idx * 4)); + + /* + * Long branch stub: + * .long + * mflr r11 + * bcl 20,31,$+4 + * mflr r12 + * ld r12, -8-SZL(r12) + * mtctr r12 + * mtlr r11 // needed to retain ftrace ABI + * bctr + */ + if (image) + *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp; + ctx->idx += SZL / 4; + long_branch_stub_idx = ctx->idx; + EMIT(PPC_RAW_MFLR(_R11)); + EMIT(PPC_RAW_BCL4()); + EMIT(PPC_RAW_MFLR(_R12)); + EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL)); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_MTLR(_R11)); + EMIT(PPC_RAW_BCTR()); + + if (!bpf_jit_ool_stub) { + bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4; + bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4; + } +} + int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) { if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { @@ -222,7 +292,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp->bpf_func = (void *)fimage; fp->jited = 1; - fp->jited_len = proglen + FUNCTION_DESCR_SIZE; + fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE; if (!fp->is_func || extra_pass) { if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { @@ -369,3 +439,778 @@ bool bpf_jit_supports_far_kfunc_call(void) { return IS_ENABLED(CONFIG_PPC64); } + +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +int arch_protect_bpf_trampoline(void *image, unsigned int size) +{ + return 0; +} + +static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx, + struct bpf_tramp_link *l, int regs_off, int retval_off, + int run_ctx_off, bool save_ret) +{ + struct bpf_prog *p = l->link.prog; + ppc_inst_t branch_insn; + u32 jmp_idx; + int ret = 0; + + /* Save cookie */ + if (IS_ENABLED(CONFIG_PPC64)) { + PPC_LI64(_R3, l->cookie); + EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, + bpf_cookie))); + } else { + PPC_LI32(_R3, l->cookie >> 32); + PPC_LI32(_R4, l->cookie); + EMIT(PPC_RAW_STW(_R3, _R1, + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); + EMIT(PPC_RAW_STW(_R4, _R1, + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie) + 4)); + } + + /* __bpf_prog_enter(p, &bpf_tramp_run_ctx) */ + PPC_LI_ADDR(_R3, p); + EMIT(PPC_RAW_MR(_R25, _R3)); + EMIT(PPC_RAW_ADDI(_R4, _R1, run_ctx_off)); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)bpf_trampoline_enter(p)); + if (ret) + return ret; + + /* Remember prog start time returned by __bpf_prog_enter */ + EMIT(PPC_RAW_MR(_R26, _R3)); + + /* + * if (__bpf_prog_enter(p) == 0) + * goto skip_exec_of_prog; + * + * Emit a nop to be later patched with conditional branch, once offset is known + */ + EMIT(PPC_RAW_CMPLI(_R3, 0)); + jmp_idx = ctx->idx; + EMIT(PPC_RAW_NOP()); + + /* p->bpf_func(ctx) */ + EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); + if (!p->jited) + PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); + if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func, + BRANCH_SET_LINK)) { + if (image) + image[ctx->idx] = ppc_inst_val(branch_insn); + ctx->idx++; + } else { + EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + } + + if (save_ret) + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + + /* Fix up branch */ + if (image) { + if (create_cond_branch(&branch_insn, &image[jmp_idx], + (unsigned long)&image[ctx->idx], COND_EQ << 16)) + return -EINVAL; + image[jmp_idx] = ppc_inst_val(branch_insn); + } + + /* __bpf_prog_exit(p, start_time, &bpf_tramp_run_ctx) */ + EMIT(PPC_RAW_MR(_R3, _R25)); + EMIT(PPC_RAW_MR(_R4, _R26)); + EMIT(PPC_RAW_ADDI(_R5, _R1, run_ctx_off)); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)bpf_trampoline_exit(p)); + + return ret; +} + +static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx, + struct bpf_tramp_links *tl, int regs_off, int retval_off, + int run_ctx_off, u32 *branches) +{ + int i; + + /* + * The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + EMIT(PPC_RAW_LI(_R3, 0)); + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off, + run_ctx_off, true)) + return -EINVAL; + + /* + * mod_ret prog stored return value after prog ctx. Emit: + * if (*(u64 *)(ret_val) != 0) + * goto do_fexit; + */ + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); + EMIT(PPC_RAW_CMPLI(_R3, 0)); + + /* + * Save the location of the branch and generate a nop, which is + * replaced with a conditional jump once do_fexit (i.e. the + * start of the fexit invocation) is finalized. + */ + branches[i] = ctx->idx; + EMIT(PPC_RAW_NOP()); + } + + return 0; +} + +static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int r4_off) +{ + if (IS_ENABLED(CONFIG_PPC64)) { + /* See bpf_jit_stack_tailcallcnt() */ + int tailcallcnt_offset = 6 * 8; + + EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset)); + } else { + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ + EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); + } +} + +static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int r4_off) +{ + if (IS_ENABLED(CONFIG_PPC64)) { + /* See bpf_jit_stack_tailcallcnt() */ + int tailcallcnt_offset = 6 * 8; + + EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); + } else { + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); + } +} + +static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, + int nr_regs, int regs_off) +{ + int param_save_area_offset; + + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ + + for (int i = 0; i < nr_regs; i++) { + if (i < 8) { + EMIT(PPC_RAW_STL(_R3 + i, _R1, regs_off + i * SZL)); + } else { + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); + EMIT(PPC_RAW_STL(_R3, _R1, regs_off + i * SZL)); + } + } +} + +/* Used when restoring just the register parameters when returning back */ +static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context *ctx, + int nr_regs, int regs_off) +{ + for (int i = 0; i < nr_regs && i < 8; i++) + EMIT(PPC_RAW_LL(_R3 + i, _R1, regs_off + i * SZL)); +} + +/* Used when we call into the traced function. Replicate parameter save area */ +static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int nr_regs, int regs_off) +{ + int param_save_area_offset; + + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ + + for (int i = 8; i < nr_regs; i++) { + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); + EMIT(PPC_RAW_STL(_R3, _R1, STACK_FRAME_MIN_SIZE + i * SZL)); + } + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); +} + +static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, + void *rw_image_end, void *ro_image, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; + int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct codegen_context codegen_ctx, *ctx; + u32 *image = (u32 *)rw_image; + ppc_inst_t branch_insn; + u32 *branches = NULL; + bool save_ret; + + if (IS_ENABLED(CONFIG_PPC32)) + return -EOPNOTSUPP; + + nr_regs = m->nr_args; + /* Extra registers for struct arguments */ + for (i = 0; i < m->nr_args; i++) + if (m->arg_size[i] > SZL) + nr_regs += round_up(m->arg_size[i], SZL) / SZL - 1; + + if (nr_regs > MAX_BPF_FUNC_ARGS) + return -EOPNOTSUPP; + + ctx = &codegen_ctx; + memset(ctx, 0, sizeof(*ctx)); + + /* + * Generated stack layout: + * + * func prev back chain [ back chain ] + * [ ] + * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) + * [ ] -- + * LR save area [ r0 save (64-bit) ] | header + * [ r0 save (32-bit) ] | + * dummy frame for unwind [ back chain 1 ] -- + * [ padding ] align stack frame + * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc + * alt_lr_off [ real lr (ool stub)] optional - actual lr + * [ r26 ] + * nvr_off [ r25 ] nvr save area + * retval_off [ return value ] + * [ reg argN ] + * [ ... ] + * regs_off [ reg_arg1 ] prog ctx context + * nregs_off [ args count ] + * ip_off [ traced function ] + * [ ... ] + * run_ctx_off [ bpf_tramp_run_ctx ] + * [ reg argN ] + * [ ... ] + * param_save_area [ reg_arg1 ] min 8 doublewords, per ABI + * [ TOC save (64-bit) ] -- + * [ LR save (64-bit) ] | header + * [ LR save (32-bit) ] | + * bpf trampoline frame [ back chain 2 ] -- + * + */ + + /* Minimum stack frame header */ + bpf_frame_size = STACK_FRAME_MIN_SIZE; + + /* + * Room for parameter save area. + * + * As per the ABI, this is required if we call into the traced + * function (BPF_TRAMP_F_CALL_ORIG): + * - if the function takes more than 8 arguments for the rest to spill onto the stack + * - or, if the function has variadic arguments + * - or, if this functions's prototype was not available to the caller + * + * Reserve space for at least 8 registers for now. This can be optimized later. + */ + bpf_frame_size += (nr_regs > 8 ? nr_regs : 8) * SZL; + + /* Room for struct bpf_tramp_run_ctx */ + run_ctx_off = bpf_frame_size; + bpf_frame_size += round_up(sizeof(struct bpf_tramp_run_ctx), SZL); + + /* Room for IP address argument */ + ip_off = bpf_frame_size; + if (flags & BPF_TRAMP_F_IP_ARG) + bpf_frame_size += SZL; + + /* Room for args count */ + nregs_off = bpf_frame_size; + bpf_frame_size += SZL; + + /* Room for args */ + regs_off = bpf_frame_size; + bpf_frame_size += nr_regs * SZL; + + /* Room for return value of func_addr or fentry prog */ + retval_off = bpf_frame_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + bpf_frame_size += SZL; + + /* Room for nvr save area */ + nvr_off = bpf_frame_size; + bpf_frame_size += 2 * SZL; + + /* Optional save area for actual LR in case of ool ftrace */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + alt_lr_off = bpf_frame_size; + bpf_frame_size += SZL; + } + + if (IS_ENABLED(CONFIG_PPC32)) { + if (nr_regs < 2) { + r4_off = bpf_frame_size; + bpf_frame_size += SZL; + } else { + r4_off = regs_off + SZL; + } + } + + /* Padding to align stack frame, if any */ + bpf_frame_size = round_up(bpf_frame_size, SZL * 2); + + /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ + bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; + + /* Offset to the traced function's stack frame */ + func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; + + /* Create dummy frame for unwind, store original return value */ + EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); + /* Protect red zone where tail call count goes */ + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); + + /* Create our stack frame */ + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); + + /* 64-bit: Save TOC and load kernel TOC */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + EMIT(PPC_RAW_STD(_R2, _R1, 24)); + PPC64_LOAD_PACA(); + } + + /* 32-bit: save tail call count in r4 */ + if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); + + bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); + + /* Save our return address */ + EMIT(PPC_RAW_MFLR(_R3)); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); + else + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + + /* + * Save ip address of the traced function. + * We could recover this from LR, but we will need to address for OOL trampoline, + * and optional GEP area. + */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { + EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); + EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); + EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); + EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); + EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); + } + + if (flags & BPF_TRAMP_F_IP_ARG) + EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); + + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + /* Fake our LR for unwind */ + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + + /* Save function arg count -- see bpf_get_func_arg_cnt() */ + EMIT(PPC_RAW_LI(_R3, nr_regs)); + EMIT(PPC_RAW_STL(_R3, _R1, nregs_off)); + + /* Save nv regs */ + EMIT(PPC_RAW_STL(_R25, _R1, nvr_off)); + EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL)); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + PPC_LI_ADDR(_R3, (unsigned long)im); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)__bpf_tramp_enter); + if (ret) + return ret; + } + + for (i = 0; i < fentry->nr_links; i++) + if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off, + run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) + return -EINVAL; + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL); + if (!branches) + return -ENOMEM; + + if (invoke_bpf_mod_ret(image, ro_image, ctx, fmod_ret, regs_off, retval_off, + run_ctx_off, branches)) { + ret = -EINVAL; + goto cleanup; + } + } + + /* Call the traced function */ + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* + * The address in LR save area points to the correct point in the original function + * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction + * sequence + */ + EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTCTR(_R3)); + + /* Replicate tail_call_cnt before calling the original BPF prog */ + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + + /* Restore args */ + bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); + + /* Restore TOC for 64-bit */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + EMIT(PPC_RAW_LD(_R2, _R1, 24)); + EMIT(PPC_RAW_BCTRL()); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + PPC64_LOAD_PACA(); + + /* Store return value for bpf prog to access */ + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + + /* Restore updated tail_call_cnt */ + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + + /* Reserve space to patch branch instruction to skip fexit progs */ + im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; + EMIT(PPC_RAW_NOP()); + } + + /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */ + for (i = 0; i < fmod_ret->nr_links && image; i++) { + if (create_cond_branch(&branch_insn, &image[branches[i]], + (unsigned long)&image[ctx->idx], COND_NE << 16)) { + ret = -EINVAL; + goto cleanup; + } + + image[branches[i]] = ppc_inst_val(branch_insn); + } + + for (i = 0; i < fexit->nr_links; i++) + if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off, + run_ctx_off, false)) { + ret = -EINVAL; + goto cleanup; + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; + PPC_LI_ADDR(_R3, im); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)__bpf_tramp_exit); + if (ret) + goto cleanup; + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); + + /* Restore return value of func_addr or fentry prog */ + if (save_ret) + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); + + /* Restore nv regs */ + EMIT(PPC_RAW_LL(_R26, _R1, nvr_off + SZL)); + EMIT(PPC_RAW_LL(_R25, _R1, nvr_off)); + + /* Epilogue */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + EMIT(PPC_RAW_LD(_R2, _R1, 24)); + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* Skip the traced function and return to parent */ + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_BLR()); + } else { + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_BLR()); + } else { + EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTCTR(_R0)); + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_BCTR()); + } + } + + /* Make sure the trampoline generation logic doesn't overflow */ + if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; + goto cleanup; + } + ret = ctx->idx * 4 + BPF_INSN_SAFETY * 4; + +cleanup: + kfree(branches); + return ret; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + struct bpf_tramp_image im; + void *image; + int ret; + + /* + * Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). + * This will NOT cause fragmentation in direct map, as we do not + * call set_memory_*() on this buffer. + * + * We cannot use kvmalloc here, because we need image to be in + * module memory range. + */ + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, + m, flags, tlinks, func_addr); + bpf_jit_free_exec(image); + + return ret; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + u32 size = image_end - image; + void *rw_image, *tmp; + int ret; + + /* + * rw_image doesn't need to be in module memory range, so we can + * use kvmalloc. + */ + rw_image = kvmalloc(size, GFP_KERNEL); + if (!rw_image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, + flags, tlinks, func_addr); + if (ret < 0) + goto out; + + if (bpf_jit_enable > 1) + bpf_jit_dump(1, ret - BPF_INSN_SAFETY * 4, 1, rw_image); + + tmp = bpf_arch_text_copy(image, rw_image, size); + if (IS_ERR(tmp)) + ret = PTR_ERR(tmp); + +out: + kvfree(rw_image); + return ret; +} + +static int bpf_modify_inst(void *ip, ppc_inst_t old_inst, ppc_inst_t new_inst) +{ + ppc_inst_t org_inst; + + if (copy_inst_from_kernel_nofault(&org_inst, ip)) { + pr_err("0x%lx: fetching instruction failed\n", (unsigned long)ip); + return -EFAULT; + } + + if (!ppc_inst_equal(org_inst, old_inst)) { + pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", + (unsigned long)ip, ppc_inst_as_ulong(old_inst), ppc_inst_as_ulong(org_inst)); + return -EINVAL; + } + + if (ppc_inst_equal(old_inst, new_inst)) + return 0; + + return patch_instruction(ip, new_inst); +} + +static void do_isync(void *info __maybe_unused) +{ + isync(); +} + +/* + * A 3-step process for bpf prog entry: + * 1. At bpf prog entry, a single nop/b: + * bpf_func: + * [nop|b] ool_stub + * 2. Out-of-line stub: + * ool_stub: + * mflr r0 + * [b|bl] / + * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only + * b bpf_func + 4 + * 3. Long branch stub: + * long_branch_stub: + * .long / + * mflr r11 + * bcl 20,31,$+4 + * mflr r12 + * ld r12, -16(r12) + * mtctr r12 + * mtlr r11 // needed to retain ftrace ABI + * bctr + * + * dummy_tramp is used to reduce synchronization requirements. + * + * When attaching a bpf trampoline to a bpf prog, we do not need any + * synchronization here since we always have a valid branch target regardless + * of the order in which the above stores are seen. dummy_tramp ensures that + * the long_branch stub goes to a valid destination on other cpus, even when + * the branch to the long_branch stub is seen before the updated trampoline + * address. + * + * However, when detaching a bpf trampoline from a bpf prog, or if changing + * the bpf trampoline address, we need synchronization to ensure that other + * cpus can no longer branch into the older trampoline so that it can be + * safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus + * make forward progress, but we still need to ensure that other cpus + * execute isync (or some CSI) so that they don't go back into the + * trampoline again. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + unsigned long bpf_func, bpf_func_end, size, offset; + ppc_inst_t old_inst, new_inst; + int ret = 0, branch_flags; + char name[KSYM_NAME_LEN]; + + if (IS_ENABLED(CONFIG_PPC32)) + return -EOPNOTSUPP; + + bpf_func = (unsigned long)ip; + branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0; + + /* We currently only support poking bpf programs */ + if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) { + pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func); + return -EOPNOTSUPP; + } + + /* + * If we are not poking at bpf prog entry, then we are simply patching in/out + * an unconditional branch instruction at im->ip_after_call + */ + if (offset) { + if (poke_type != BPF_MOD_JUMP) { + pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__, + bpf_func); + return -EOPNOTSUPP; + } + old_inst = ppc_inst(PPC_RAW_NOP()); + if (old_addr) + if (create_branch(&old_inst, ip, (unsigned long)old_addr, 0)) + return -ERANGE; + new_inst = ppc_inst(PPC_RAW_NOP()); + if (new_addr) + if (create_branch(&new_inst, ip, (unsigned long)new_addr, 0)) + return -ERANGE; + mutex_lock(&text_mutex); + ret = bpf_modify_inst(ip, old_inst, new_inst); + mutex_unlock(&text_mutex); + + /* Make sure all cpus see the new instruction */ + smp_call_function(do_isync, NULL, 1); + return ret; + } + + bpf_func_end = bpf_func + size; + + /* Address of the jmp/call instruction in the out-of-line stub */ + ip = (void *)(bpf_func_end - bpf_jit_ool_stub + 4); + + if (!is_offset_in_branch_range((long)ip - 4 - bpf_func)) { + pr_err("%s (0x%lx): bpf prog too large, ool stub out of branch range\n", __func__, + bpf_func); + return -ERANGE; + } + + old_inst = ppc_inst(PPC_RAW_NOP()); + if (old_addr) { + if (is_offset_in_branch_range(ip - old_addr)) + create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags); + else + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, + branch_flags); + } + new_inst = ppc_inst(PPC_RAW_NOP()); + if (new_addr) { + if (is_offset_in_branch_range(ip - new_addr)) + create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags); + else + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, + branch_flags); + } + + mutex_lock(&text_mutex); + + /* + * 1. Update the address in the long branch stub: + * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr + * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here. + */ + if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) || + (old_addr && !is_offset_in_branch_range(old_addr - ip))) + ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL), + (new_addr && !is_offset_in_branch_range(new_addr - ip)) ? + (unsigned long)new_addr : (unsigned long)dummy_tramp); + if (ret) + goto out; + + /* 2. Update the branch/call in the out-of-line stub */ + ret = bpf_modify_inst(ip, old_inst, new_inst); + if (ret) + goto out; + + /* 3. Update instruction at bpf prog entry */ + ip = (void *)bpf_func; + if (!old_addr || !new_addr) { + if (!old_addr) { + old_inst = ppc_inst(PPC_RAW_NOP()); + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); + } else { + new_inst = ppc_inst(PPC_RAW_NOP()); + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); + } + ret = bpf_modify_inst(ip, old_inst, new_inst); + } + +out: + mutex_unlock(&text_mutex); + + /* + * Sync only if we are not attaching a trampoline to a bpf prog so the older + * trampoline can be freed safely. + */ + if (old_addr) + smp_call_function(do_isync, NULL, 1); + + return ret; +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index a0c4f1bde83e8..c4db278dae360 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -127,13 +127,16 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + /* Instruction for trampoline attach */ + EMIT(PPC_RAW_NOP()); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ if (ctx->seen & SEEN_TAILCALL) EMIT(PPC_RAW_LI(_R4, 0)); else EMIT(PPC_RAW_NOP()); -#define BPF_TAILCALL_PROLOGUE_SIZE 4 +#define BPF_TAILCALL_PROLOGUE_SIZE 8 if (bpf_has_stack_frame(ctx)) EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); @@ -198,6 +201,8 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); EMIT(PPC_RAW_BLR()); + + bpf_jit_build_fentry_stubs(image, ctx); } /* Relative offset needs to be calculated based on final image location */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index f3be024fc6854..233703b06d7c9 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -84,7 +84,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) } /* - * When not setting up our own stackframe, the redzone usage is: + * When not setting up our own stackframe, the redzone (288 bytes) usage is: * * [ prev sp ] <------------- * [ ... ] | @@ -92,7 +92,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 * [ local_tmp_var ] 16 - * [ unused red zone ] 208 bytes protected + * [ unused red zone ] 224 */ static int bpf_jit_stack_local(struct codegen_context *ctx) { @@ -126,6 +126,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + /* Instruction for trampoline attach */ + EMIT(PPC_RAW_NOP()); + #ifndef CONFIG_PPC_KERNEL_PCREL if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); @@ -200,6 +203,8 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); + + bpf_jit_build_fentry_stubs(image, ctx); } int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) @@ -303,7 +308,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o */ int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); int b2p_index = bpf_to_ppc(BPF_REG_3); - int bpf_tailcall_prologue_size = 8; + int bpf_tailcall_prologue_size = 12; if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) bpf_tailcall_prologue_size += 4; /* skip past the toc load */ From d677ce521334d8f1f327cafc8b1b7854b0833158 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 30 Oct 2024 11:41:37 -0700 Subject: [PATCH 24/55] powerpc/vdso: Drop -mstack-protector-guard flags in 32-bit files with clang Under certain conditions, the 64-bit '-mstack-protector-guard' flags may end up in the 32-bit vDSO flags, resulting in build failures due to the structure of clang's argument parsing of the stack protector options, which validates the arguments of the stack protector guard flags unconditionally in the frontend, choking on the 64-bit values when targeting 32-bit: clang: error: invalid value 'r13' in 'mstack-protector-guard-reg=', expected one of: r2 clang: error: invalid value 'r13' in 'mstack-protector-guard-reg=', expected one of: r2 make[3]: *** [arch/powerpc/kernel/vdso/Makefile:85: arch/powerpc/kernel/vdso/vgettimeofday-32.o] Error 1 make[3]: *** [arch/powerpc/kernel/vdso/Makefile:87: arch/powerpc/kernel/vdso/vgetrandom-32.o] Error 1 Remove these flags by adding them to the CC32FLAGSREMOVE variable, which already handles situations similar to this. Additionally, reformat and align a comment better for the expanding CONFIG_CC_IS_CLANG block. Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030-powerpc-vdso-drop-stackp-flags-clang-v1-1-d95e7376d29c@kernel.org --- arch/powerpc/kernel/vdso/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 31ca5a5470047..c568cad6a22e6 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -54,10 +54,14 @@ ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -W CC32FLAGS := -m32 CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc - # This flag is supported by clang for 64-bit but not 32-bit so it will cause - # an unused command line flag warning for this file. ifdef CONFIG_CC_IS_CLANG +# This flag is supported by clang for 64-bit but not 32-bit so it will cause +# an unused command line flag warning for this file. CC32FLAGSREMOVE += -fno-stack-clash-protection +# -mstack-protector-guard values from the 64-bit build are not valid for the +# 32-bit one. clang validates the values passed to these arguments during +# parsing, even when -fno-stack-protector is passed afterwards. +CC32FLAGSREMOVE += -mstack-protector-guard% endif LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 AS32FLAGS := -D__VDSO32__ From 2866949ec889cf383c481119c617b9cead733070 Mon Sep 17 00:00:00 2001 From: Paulo Miguel Almeida Date: Sat, 19 Oct 2024 15:13:49 +1300 Subject: [PATCH 25/55] powerpc/ps3: replace open-coded sysfs_emit function sysfs_emit() helper function should be used when formatting the value to be returned to user space. This patch replaces open-coded sysfs_emit() in sysfs .show() callbacks Link: https://github.com/KSPP/linux/issues/105 Signed-off-by: Paulo Miguel Almeida Reviewed-by: Geert Uytterhoeven Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/ZxMV3YvSulJFZ8rk@mail.google.com --- arch/powerpc/platforms/ps3/system-bus.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index b9a7d9bae687e..afbaabf182d01 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -453,10 +453,9 @@ static ssize_t modalias_show(struct device *_dev, struct device_attribute *a, char *buf) { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); - int len = snprintf(buf, PAGE_SIZE, "ps3:%d:%d\n", dev->match_id, - dev->match_sub_id); - return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; + return sysfs_emit(buf, "ps3:%d:%d\n", dev->match_id, + dev->match_sub_id); } static DEVICE_ATTR_RO(modalias); From f1c774ba91054a749573781f9e8fd652b9a1f633 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Aug 2024 16:33:12 +1000 Subject: [PATCH 26/55] powerpc/modules: start/end_opd are only needed for ABI v1 The start_opd/end_opd members of struct mod_arch_specific are only needed for kernels built using ELF ABI v1. Guard them with an ifdef to save a little bit of space on ELF ABI v2 kernels. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240812063312.730496-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/module.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 9ee70a4a0fde1..e1ee5026ac4af 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -35,9 +35,11 @@ struct mod_arch_specific { bool toc_fixed; /* Have we fixed up .TOC.? */ #endif +#ifdef CONFIG_PPC64_ELF_ABI_V1 /* For module function descriptor dereference */ unsigned long start_opd; unsigned long end_opd; +#endif #else /* powerpc64 */ /* Indices of PLT sections within module. */ unsigned int core_plt_section; From 19e0a70e6c3c1bf800b8ce9eb45864aa9e1e2781 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 27 Oct 2024 23:22:17 +0100 Subject: [PATCH 27/55] powerpc: Use str_enabled_disabled() helper function Remove hard-coded strings by using the str_enabled_disabled() helper function. Signed-off-by: Thorsten Blum Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241027222219.1173-2-thorsten.blum@linux.dev --- arch/powerpc/kernel/secure_boot.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 9e0efb657f393..3a28795b4ed82 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -5,6 +5,7 @@ */ #include #include +#include #include static struct device_node *get_ppc_fw_sb_node(void) @@ -38,7 +39,7 @@ bool is_ppc_secureboot_enabled(void) of_node_put(node); out: - pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Secure boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } @@ -62,7 +63,7 @@ bool is_ppc_trustedboot_enabled(void) of_node_put(node); out: - pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Trusted boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } From 2abbd6d5fbe0eae3752b44c963248e19292e5104 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 7 Sep 2024 17:40:41 +0200 Subject: [PATCH 28/55] powerpc: Add __must_check to set_memory_...() After the following powerpc commits, all calls to set_memory_...() functions check returned value. - Commit 8f17bd2f4196 ("powerpc: Handle error in mark_rodata_ro() and mark_initmem_nx()") - Commit f7f18e30b468 ("powerpc/kprobes: Handle error returned by set_memory_rox()") - Commit 009cf11d4aab ("powerpc: Don't ignore errors from set_memory_{n}p() in __kernel_map_pages()") - Commit 9cbacb834b4a ("powerpc: Don't ignore errors from set_memory_{n}p() in __kernel_map_pages()") - Commit 78cb0945f714 ("powerpc: Handle error in mark_rodata_ro() and mark_initmem_nx()") All calls in core parts of the kernel also always check returned value, can be looked at with following query: $ git grep -w -e set_memory_ro -e set_memory_rw -e set_memory_x -e set_memory_nx -e set_memory_rox `find . -maxdepth 1 -type d | grep -v arch | grep /` It is now possible to flag those functions with __must_check to make sure no new unchecked call it added. Link: https://github.com/KSPP/linux/issues/7 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/775dae48064a661554802ed24ed5bdffe1784724.1725723351.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/set_memory.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index 9a025b776a4b3..9c8d5747755da 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -12,37 +12,37 @@ int change_memory_attr(unsigned long addr, int numpages, long action); -static inline int set_memory_ro(unsigned long addr, int numpages) +static inline int __must_check set_memory_ro(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_RO); } -static inline int set_memory_rw(unsigned long addr, int numpages) +static inline int __must_check set_memory_rw(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_RW); } -static inline int set_memory_nx(unsigned long addr, int numpages) +static inline int __must_check set_memory_nx(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_NX); } -static inline int set_memory_x(unsigned long addr, int numpages) +static inline int __must_check set_memory_x(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_X); } -static inline int set_memory_np(unsigned long addr, int numpages) +static inline int __must_check set_memory_np(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_NP); } -static inline int set_memory_p(unsigned long addr, int numpages) +static inline int __must_check set_memory_p(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_P); } -static inline int set_memory_rox(unsigned long addr, int numpages) +static inline int __must_check set_memory_rox(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_ROX); } From da6ffe855b5a05f29222e3d4ffa4b549413e33a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Nov 2024 14:26:25 +0100 Subject: [PATCH 29/55] powerpc/ps3: Mark ps3_setup_uhc_device() __init ps3_setup_uhc_device() is only called from ps3_setup_ehci_device() and ps3_setup_ohci_device(), which are both marked __init. Hence replace the former's __ref marker by __init. Note that before commit bd721ea73e1f9655 ("treewide: replace obsolete _refok by __ref"), the function was marked __init_refok, which probably should have been __init in the first place. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/31fe9435056fcfbf82c3a01693be278d5ce4ad0f.1730899557.git.geert+renesas@glider.be --- arch/powerpc/platforms/ps3/device-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index b18e1c92e554c..61722133eb2d3 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -178,7 +178,7 @@ static int __init ps3_setup_gelic_device( return result; } -static int __ref ps3_setup_uhc_device( +static int __init ps3_setup_uhc_device( const struct ps3_repository_device *repo, enum ps3_match_id match_id, enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type) { From 3b9bde403aafa55dcbe7dc250b95af917610f139 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:49 +1100 Subject: [PATCH 30/55] selftests/powerpc: Lower run time of count_stcx_fail test The count_stcx_fail test runs for close to or just over 2 minutes, which means it sometimes times out. That's overkill for a test that just demonstrates some PMU counters are working. Drop the 64 billion instruction case, to lower the runtime to ~30s. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/pmu/count_stcx_fail.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2070a1e2b3a57..d8dd9a9c6c1b6 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -144,9 +144,6 @@ static int test_body(void) /* Run for 16Bi instructions */ FAIL_IF(do_count_loop(events, 16000000000, overhead, true)); - /* Run for 64Bi instructions */ - FAIL_IF(do_count_loop(events, 64000000000, overhead, true)); - event_close(&events[0]); event_close(&events[1]); From 5543d595954eefb3a6faa18a6dc7b1b3d6022052 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:50 +1100 Subject: [PATCH 31/55] selftests/powerpc: Give all tests 2 minutes timeout Each of the powerpc selftests runs with a timeout of 2 minutes by default (see tools/testing/selftests/powerpc/harness.c). But when tests are run with run_kselftest.sh it uses a timeout of 45 seconds, meaning some tests run OK standalone but fail when run with the test runner. So tell run_kselftest.sh to give each test 130 seconds, that should allow the tests to complete, or be killed by the powerpc test harness after 2 minutes. If for some reason the harness fails, or for the few tests that don't use the harness, the 130 second timeout should catch them if they get stuck. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-2-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/alignment/settings | 1 + tools/testing/selftests/powerpc/cache_shape/settings | 1 + tools/testing/selftests/powerpc/copyloops/settings | 1 + tools/testing/selftests/powerpc/dexcr/settings | 1 + tools/testing/selftests/powerpc/dscr/settings | 1 + tools/testing/selftests/powerpc/lib/settings | 1 + tools/testing/selftests/powerpc/math/settings | 1 + tools/testing/selftests/powerpc/mce/settings | 1 + tools/testing/selftests/powerpc/mm/settings | 1 + tools/testing/selftests/powerpc/nx-gzip/settings | 1 + tools/testing/selftests/powerpc/papr_attributes/settings | 1 + tools/testing/selftests/powerpc/papr_sysparm/settings | 1 + tools/testing/selftests/powerpc/papr_vpd/settings | 1 + tools/testing/selftests/powerpc/pmu/settings | 1 + tools/testing/selftests/powerpc/primitives/settings | 1 + tools/testing/selftests/powerpc/ptrace/settings | 1 + tools/testing/selftests/powerpc/scripts/settings | 1 + tools/testing/selftests/powerpc/security/settings | 1 + tools/testing/selftests/powerpc/stringloops/settings | 1 + tools/testing/selftests/powerpc/switch_endian/settings | 1 + tools/testing/selftests/powerpc/syscalls/settings | 1 + tools/testing/selftests/powerpc/vphn/settings | 1 + 22 files changed, 22 insertions(+) create mode 100644 tools/testing/selftests/powerpc/alignment/settings create mode 100644 tools/testing/selftests/powerpc/cache_shape/settings create mode 100644 tools/testing/selftests/powerpc/copyloops/settings create mode 100644 tools/testing/selftests/powerpc/dexcr/settings create mode 100644 tools/testing/selftests/powerpc/dscr/settings create mode 100644 tools/testing/selftests/powerpc/lib/settings create mode 100644 tools/testing/selftests/powerpc/math/settings create mode 100644 tools/testing/selftests/powerpc/mce/settings create mode 100644 tools/testing/selftests/powerpc/mm/settings create mode 100644 tools/testing/selftests/powerpc/nx-gzip/settings create mode 100644 tools/testing/selftests/powerpc/papr_attributes/settings create mode 100644 tools/testing/selftests/powerpc/papr_sysparm/settings create mode 100644 tools/testing/selftests/powerpc/papr_vpd/settings create mode 100644 tools/testing/selftests/powerpc/pmu/settings create mode 100644 tools/testing/selftests/powerpc/primitives/settings create mode 100644 tools/testing/selftests/powerpc/ptrace/settings create mode 100644 tools/testing/selftests/powerpc/scripts/settings create mode 100644 tools/testing/selftests/powerpc/security/settings create mode 100644 tools/testing/selftests/powerpc/stringloops/settings create mode 100644 tools/testing/selftests/powerpc/switch_endian/settings create mode 100644 tools/testing/selftests/powerpc/syscalls/settings create mode 100644 tools/testing/selftests/powerpc/vphn/settings diff --git a/tools/testing/selftests/powerpc/alignment/settings b/tools/testing/selftests/powerpc/alignment/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/cache_shape/settings b/tools/testing/selftests/powerpc/cache_shape/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/copyloops/settings b/tools/testing/selftests/powerpc/copyloops/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dexcr/settings b/tools/testing/selftests/powerpc/dexcr/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/lib/settings b/tools/testing/selftests/powerpc/lib/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/lib/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/math/settings b/tools/testing/selftests/powerpc/math/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mce/settings b/tools/testing/selftests/powerpc/mce/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/settings b/tools/testing/selftests/powerpc/mm/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/nx-gzip/settings b/tools/testing/selftests/powerpc/nx-gzip/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_attributes/settings b/tools/testing/selftests/powerpc/papr_attributes/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_sysparm/settings b/tools/testing/selftests/powerpc/papr_sysparm/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_vpd/settings b/tools/testing/selftests/powerpc/papr_vpd/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/pmu/settings b/tools/testing/selftests/powerpc/pmu/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/primitives/settings b/tools/testing/selftests/powerpc/primitives/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/ptrace/settings b/tools/testing/selftests/powerpc/ptrace/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/scripts/settings b/tools/testing/selftests/powerpc/scripts/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/scripts/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/security/settings b/tools/testing/selftests/powerpc/security/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/stringloops/settings b/tools/testing/selftests/powerpc/stringloops/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/switch_endian/settings b/tools/testing/selftests/powerpc/switch_endian/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/syscalls/settings b/tools/testing/selftests/powerpc/syscalls/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/vphn/settings b/tools/testing/selftests/powerpc/vphn/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/settings @@ -0,0 +1 @@ +timeout=130 From d5f578f90a34d85f1cabd4c27af1b2d9fbffe64b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:51 +1100 Subject: [PATCH 32/55] selftests/powerpc: Fix 32-bit BE build errors on Ubuntu 24.04 Starting with Ubuntu 24.04, building the selftests with the big endian compiler (which defaults to 32-bit) fails with errors: stack_expansion_ldst.c:178:37: error: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'rlim_t' {aka 'long long unsigned int'} subpage_prot.c:214:38: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'off_t' {aka 'long long int'} Prior to 24.04 rlim_t was long unsigned int, and off_t was long int. Cast to unsigned long long and long long before passing to printf to avoid the errors. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-3-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c | 2 +- tools/testing/selftests/powerpc/mm/subpage_prot.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c index ed9143990888d..9c0d343d71375 100644 --- a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -175,7 +175,7 @@ static int test(void) page_size = getpagesize(); getrlimit(RLIMIT_STACK, &rlimit); - printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + printf("Stack rlimit is 0x%llx\n", (unsigned long long)rlimit.rlim_cur); printf("Testing loads ...\n"); test_one_type(LOAD, page_size, rlimit.rlim_cur); diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 3ae77ba93208f..8cf9fd5fed1c5 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -211,8 +211,8 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at %p\n", - file_name, filesize, fileblock); + printf("allocated %s for 0x%llx bytes at %p\n", + file_name, (long long)filesize, fileblock); printf("testing file map...\n"); From c6a75555b4b2643365a007b7162a670d69aa28fe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:52 +1100 Subject: [PATCH 33/55] selftests/powerpc: Return errors from all tests Fix some tests which weren't returning an error code from main. Although these tests only ever return success, they can still fail if they time out and the harness kills them. If that happens they still return success to the shell, which is incorrect and confuses the higher level error reporting. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-4-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/signal/sigfuz.c | 2 +- .../testing/selftests/powerpc/tm/tm-signal-context-force-tm.c | 2 +- tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index 08f9afe3b95c4..c101b1391696e 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -321,5 +321,5 @@ int main(int argc, char **argv) if (!args) args = ARG_COMPLETE; - test_harness(signal_fuzzer, "signal_fuzzer"); + return test_harness(signal_fuzzer, "signal_fuzzer"); } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 421cb082f6bef..0a4bc479ae39b 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -176,5 +176,5 @@ int tm_signal_context_force_tm(void) int main(int argc, char **argv) { - test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); + return test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c index 06b801906f275..968864b052ece 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -46,6 +46,5 @@ int tm_signal_sigreturn_nt(void) int main(int argc, char **argv) { - test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); + return test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); } - From a8a54a65cac4f8202df36f925b6746328802d05f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:53 +1100 Subject: [PATCH 34/55] selftests/powerpc: Detect taint change in mitigation patching test Currently the mitigation patching test errors out if the kernel is tainted prior to the test running. That causes the test to fail unnecessarily if some other test has caused the kernel to be tainted, or if a proprietary or force module is loaded for example. Instead just warn if the kernel is tainted to begin with, and only report a change in the taint state as an error in the test. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-5-mpe@ellerman.id.au --- .../selftests/powerpc/security/mitigation-patching.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index f43aa4b77fbaa..9a4612e2e9537 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -36,8 +36,7 @@ fi tainted=$(cat /proc/sys/kernel/tainted) if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel already tainted!" >&2 - exit 1 + echo "Warning: kernel already tainted! ($tainted)" >&2 fi mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" @@ -68,9 +67,10 @@ fi echo "Waiting for timeout ..." wait +orig_tainted=$tainted tainted=$(cat /proc/sys/kernel/tainted) -if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel became tainted!" >&2 +if [[ "$tainted" != "$orig_tainted" ]]; then + echo "Error: kernel newly tainted, before ($orig_tainted) after ($tainted)" >&2 exit 1 fi From 817a763a07f2407ca43b2134d067e7c0576f1b79 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 6 Nov 2024 15:26:39 -0600 Subject: [PATCH 35/55] powerpc/44x: Use for_each_of_range() iterator Simplify the ppc44x PCI dma-ranges parsing to use the for_each_of_range() iterator. Signed-off-by: Rob Herring (Arm) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106212640.341677-1-robh@kernel.org --- arch/powerpc/platforms/44x/pci.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/44x/pci.c b/arch/powerpc/platforms/44x/pci.c index db6d33ca753f1..364aeb86ab64e 100644 --- a/arch/powerpc/platforms/44x/pci.c +++ b/arch/powerpc/platforms/44x/pci.c @@ -94,10 +94,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, struct resource *res) { u64 size; - const u32 *ranges; - int rlen; - int pna = of_n_addr_cells(hose->dn); - int np = pna + 5; + struct of_range_parser parser; + struct of_range range; /* Default */ res->start = 0; @@ -105,18 +103,15 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, res->end = size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; - /* Get dma-ranges property */ - ranges = of_get_property(hose->dn, "dma-ranges", &rlen); - if (ranges == NULL) + if (of_pci_dma_range_parser_init(&parser, hose->dn)) goto out; - /* Walk it */ - while ((rlen -= np * 4) >= 0) { - u32 pci_space = ranges[0]; - u64 pci_addr = of_read_number(ranges + 1, 2); - u64 cpu_addr = of_translate_dma_address(hose->dn, ranges + 3); - size = of_read_number(ranges + pna + 3, 2); - ranges += np; + for_each_of_range(&parser, &range) { + u32 pci_space = range.flags; + u64 pci_addr = range.bus_addr; + u64 cpu_addr = range.cpu_addr; + size = range.size; + if (cpu_addr == OF_BAD_ADDR || size == 0) continue; From f3ef7dbda9b589cdad833001e4366eb80977b7f1 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 6 Nov 2024 15:26:46 -0600 Subject: [PATCH 36/55] powerpc/cell: Use for_each_of_range() iterator Simplify the cell_iommu_get_fixed_address() dma-ranges parsing to use the for_each_of_range() iterator. Signed-off-by: Rob Herring (Arm) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106212647.341857-1-robh@kernel.org --- arch/powerpc/platforms/cell/iommu.c | 49 ++++++++++------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 4cd9c0de22c2b..62c9679b8ca33 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -779,58 +779,41 @@ static int __init cell_iommu_init_disabled(void) static u64 cell_iommu_get_fixed_address(struct device *dev) { - u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR; + u64 best_size, dev_addr = OF_BAD_ADDR; struct device_node *np; - const u32 *ranges = NULL; - int i, len, best, naddr, nsize, pna, range_size; + struct of_range_parser parser; + struct of_range range; /* We can be called for platform devices that have no of_node */ np = of_node_get(dev->of_node); if (!np) goto out; - while (1) { - naddr = of_n_addr_cells(np); - nsize = of_n_size_cells(np); - np = of_get_next_parent(np); - if (!np) - break; - - ranges = of_get_property(np, "dma-ranges", &len); + while ((np = of_get_next_parent(np))) { + if (of_pci_dma_range_parser_init(&parser, np)) + continue; - /* Ignore empty ranges, they imply no translation required */ - if (ranges && len > 0) + if (of_range_count(&parser)) break; } - if (!ranges) { + if (!np) { dev_dbg(dev, "iommu: no dma-ranges found\n"); goto out; } - len /= sizeof(u32); - - pna = of_n_addr_cells(np); - range_size = naddr + nsize + pna; - - /* dma-ranges format: - * child addr : naddr cells - * parent addr : pna cells - * size : nsize cells - */ - for (i = 0, best = -1, best_size = 0; i < len; i += range_size) { - cpu_addr = of_translate_dma_address(np, ranges + i + naddr); - size = of_read_number(ranges + i + naddr + pna, nsize); + best_size = 0; + for_each_of_range(&parser, &range) { + if (!range.cpu_addr) + continue; - if (cpu_addr == 0 && size > best_size) { - best = i; - best_size = size; + if (range.size > best_size) { + best_size = range.size; + dev_addr = range.bus_addr; } } - if (best >= 0) { - dev_addr = of_read_number(ranges + best, naddr); - } else + if (!best_size) dev_dbg(dev, "iommu: no suitable range found!\n"); out: From cfec8463d9a19ec043845525fe5fd675e59a8aab Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 22:16:30 +1100 Subject: [PATCH 37/55] powerpc/ftrace: Fix ftrace bug with KASAN=y Booting a KASAN=y kernel with the recently added ftrace out-of-line support causes a warning at boot: ------------[ cut here ]------------ Stub index overflow (1729 > 1728) WARNING: CPU: 0 PID: 0 at arch/powerpc/kernel/trace/ftrace.c:209 ftrace_init_nop+0x408/0x444 ... NIP ftrace_init_nop+0x408/0x444 LR ftrace_init_nop+0x404/0x444 Call Trace: ftrace_init_nop+0x404/0x444 (unreliable) ftrace_process_locs+0x544/0x8a0 ftrace_init+0xb4/0x22c start_kernel+0x1dc/0x4d4 start_here_common+0x1c/0x20 ... ftrace failed to modify [] _sub_I_65535_1+0x8/0x3c actual: 00:00:00:60 Initializing ftrace call sites ftrace record flags: 0 (0) expected tramp: c00000000008b418 ------------[ cut here ]------------ The function in question, _sub_I_65535_1 is some sort of trampoline generated for KASAN, and is in the .text.startup section. That section is part of INIT_TEXT, meaning is_kernel_inittext() returns true for it. But the script that determines how many out-of-line ftrace stubs are needed isn't doesn't consider .text.startup as inittext, leading to there not being enough space for the init stubs. Conversely the logic to calculate how many stubs are needed for the text section isn't filtering out the symbols in .text.startup and so ends up over counting. Fix both problems by calculating the total number of stubs first, then the number that count as inittext, and then subtract the latter from the former to get the count for the text section. Fixes: eec37961a56a ("powerpc64/ftrace: Move ftrace sequence out of line") Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241107111630.31068-1-mpe@ellerman.id.au --- arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index 950a7778324b6..bac186bdf64a7 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -15,10 +15,11 @@ if [ -z "$is_64bit" ]; then RELOCATION=R_PPC_ADDR32 fi -num_ool_stubs_text=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | - grep -v ".init.text" | grep -c "$RELOCATION") +num_ool_stubs_total=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep -c "$RELOCATION") num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | - grep ".init.text" | grep -c "$RELOCATION") + grep -e ".init.text" -e ".text.startup" | grep -c "$RELOCATION") +num_ool_stubs_text=$((num_ool_stubs_total - num_ool_stubs_inittext)) if [ "$num_ool_stubs_text" -gt "$num_ool_stubs_text_builtin" ]; then num_ool_stubs_text_end=$((num_ool_stubs_text - num_ool_stubs_text_builtin)) From f4892c68ecc1cf45e41a78820dd2eebccc945b66 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 7 Nov 2024 11:28:16 +0530 Subject: [PATCH 38/55] powerpc/fadump: allocate memory for additional parameters early Memory for passing additional parameters to fadump capture kernel is allocated during subsys_initcall level, using memblock. But as slab is already available by this time, allocation happens via the buddy allocator. This may work for radix MMU but is likely to fail in most cases for hash MMU as hash MMU needs this memory in the first memory block for it to be accessible in real mode in the capture kernel (second boot). So, allocate memory for additional parameters area as soon as MMU mode is obvious. Fixes: 683eab94da75 ("powerpc/fadump: setup additional parameters for dump capture kernel") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/lkml/a70e4064-a040-447b-8556-1fd02f19383d@linux.vnet.ibm.com/T/#u Signed-off-by: Hari Bathini Signed-off-by: Sourabh Jain Tested-by: Venkat Rao Bagalkote Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241107055817.489795-1-sourabhjain@linux.ibm.com --- arch/powerpc/include/asm/fadump.h | 2 ++ arch/powerpc/kernel/fadump.c | 15 ++++++++++----- arch/powerpc/kernel/prom.c | 3 +++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 3638f04447f59..a48f54dde4f65 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -19,6 +19,7 @@ extern int is_fadump_active(void); extern int should_fadump_crash(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); +void fadump_setup_param_area(void); extern void fadump_append_bootargs(void); #else /* CONFIG_FA_DUMP */ @@ -26,6 +27,7 @@ static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } static inline void fadump_cleanup(void) { } +static inline void fadump_setup_param_area(void) { } static inline void fadump_append_bootargs(void) { } #endif /* !CONFIG_FA_DUMP */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index c42f89862893e..5583017bbfbce 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1587,6 +1587,12 @@ static void __init fadump_init_files(void) return; } + if (fw_dump.param_area) { + rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr); + if (rc) + pr_err("unable to create bootargs_append sysfs file (%d)\n", rc); + } + debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, &fadump_region_fops); @@ -1741,7 +1747,7 @@ static void __init fadump_process(void) * Reserve memory to store additional parameters to be passed * for fadump/capture kernel. */ -static void __init fadump_setup_param_area(void) +void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; @@ -1749,7 +1755,7 @@ static void __init fadump_setup_param_area(void) return; /* This memory can't be used by PFW or bootloader as it is shared across kernels */ - if (radix_enabled()) { + if (early_radix_enabled()) { /* * Anywhere in the upper half should be good enough as all memory * is accessible in real mode. @@ -1777,12 +1783,12 @@ static void __init fadump_setup_param_area(void) COMMAND_LINE_SIZE, range_start, range_end); - if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) { + if (!fw_dump.param_area) { pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); return; } - memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE); + memset((void *)fw_dump.param_area, 0, COMMAND_LINE_SIZE); } /* @@ -1808,7 +1814,6 @@ int __init setup_fadump(void) } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { - fadump_setup_param_area(); fw_dump.ops->fadump_init_mem_struct(&fw_dump); register_fadump(); } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 0be07ed407c70..47db1b1aef254 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -908,6 +908,9 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + /* Setup param area for passing additional parameters to fadump capture kernel. */ + fadump_setup_param_area(); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); From fb90dca828b6070709093934c6dec56489a2d91d Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Thu, 7 Nov 2024 11:28:17 +0530 Subject: [PATCH 39/55] fadump: reserve param area if below boot_mem_top The param area is a memory region where the kernel places additional command-line arguments for fadump kernel. Currently, the param memory area is reserved in fadump kernel if it is above boot_mem_top. However, it should be reserved if it is below boot_mem_top because the fadump kernel already reserves memory from boot_mem_top to the end of DRAM. Currently, there is no impact from not reserving param memory if it is below boot_mem_top, as it is not used after the early boot phase of the fadump kernel. However, if this changes in the future, it could lead to issues in the fadump kernel. Fixes: 3416c9daa6b1 ("powerpc/fadump: pass additional parameters when fadump is active") Acked-by: Hari Bathini Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241107055817.489795-2-sourabhjain@linux.ibm.com --- arch/powerpc/kernel/fadump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5583017bbfbce..4b371c738213c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -153,7 +153,7 @@ void __init fadump_append_bootargs(void) if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) return; - if (fw_dump.param_area >= fw_dump.boot_mem_top) { + if (fw_dump.param_area < fw_dump.boot_mem_top) { if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { pr_warn("WARNING: Can't use additional parameters area!\n"); fw_dump.param_area = 0; From 44e5d21e6d3fd2a1fed7f0327cf72e99397e2eaf Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Fri, 8 Nov 2024 15:18:37 +0530 Subject: [PATCH 40/55] powerpc/pseries: Fix KVM guest detection for disabling hardlockup detector As per the kernel documentation[1], hardlockup detector should be disabled in KVM guests as it may give false positives. On PPC, hardlockup detector is enabled inside KVM guests because disable_hardlockup_detector() is marked as early_initcall and it relies on kvm_guest static key (is_kvm_guest()) which is initialized later during boot by check_kvm_guest(), which is a core_initcall. check_kvm_guest() is also called in pSeries_smp_probe(), which is called before initcalls, but it is skipped if KVM guest does not have doorbell support or if the guest is launched with SMT=1. Call check_kvm_guest() in disable_hardlockup_detector() so that is_kvm_guest() check goes through fine and hardlockup detector can be disabled inside the KVM guest. [1]: Documentation/admin-guide/sysctl/kernel.rst Fixes: 633c8e9800f3 ("powerpc/pseries: Enable hardlockup watchdog for PowerVM partitions") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Gautam Menghani Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241108094839.33084-1-gautam@linux.ibm.com --- arch/powerpc/kernel/setup_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 22f83fbbc762a..1edc7cd68c10d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -920,6 +920,7 @@ static int __init disable_hardlockup_detector(void) hardlockup_detector_disable(); #else if (firmware_has_feature(FW_FEATURE_LPAR)) { + check_kvm_guest(); if (is_kvm_guest()) hardlockup_detector_disable(); } From 5b881c1f83792f5db421124171b06f1b8f1fe075 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Sat, 9 Nov 2024 00:23:27 +0800 Subject: [PATCH 41/55] powerpc/irq: use seq_put_decimal_ull_width() for decimal values On a system with n CPUs and m interrupts, there will be n*m decimal values yielded via seq_printf(.."%10u "..) which is less efficient than seq_put_decimal_ull_width(), stress reading /proc/interrupts indicates ~30% performance improvement with this patch. Signed-off-by: David Wang <00107082@163.com> [mpe: Flesh out change log based on original submission] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/all/20241103080552.4787-1-00107082@163.com Link: https://patch.msgid.link/20241108162327.9887-1-00107082@163.com --- arch/powerpc/kernel/irq.c | 44 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2e1600a8bbbbf..a0e8b998c9b52 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -89,69 +89,69 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized) { - seq_printf(p, "%*s: ", prec, "TAU"); + seq_printf(p, "%*s:", prec, "TAU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); + seq_put_decimal_ull_width(p, " ", tau_interrupts(j), 10); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10); seq_printf(p, " Local timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "BCT"); + seq_printf(p, "%*s:", prec, "BCT"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10); seq_printf(p, " Broadcast timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10); seq_printf(p, " Local timer interrupts for others\n"); - seq_printf(p, "%*s: ", prec, "SPU"); + seq_printf(p, "%*s:", prec, "SPU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "PMI"); + seq_printf(p, "%*s:", prec, "PMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "MCE"); + seq_printf(p, "%*s:", prec, "MCE"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10); seq_printf(p, " Machine check exceptions\n"); #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { - seq_printf(p, "%*s: ", prec, "HMI"); + seq_printf(p, "%*s:", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); + seq_put_decimal_ull_width(p, " ", paca_ptrs[j]->hmi_irqs, 10); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } #endif - seq_printf(p, "%*s: ", prec, "NMI"); + seq_printf(p, "%*s:", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10); seq_printf(p, " System Reset interrupts\n"); #ifdef CONFIG_PPC_WATCHDOG - seq_printf(p, "%*s: ", prec, "WDG"); + seq_printf(p, "%*s:", prec, "WDG"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10); seq_printf(p, " Watchdog soft-NMI interrupts\n"); #endif #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { - seq_printf(p, "%*s: ", prec, "DBL"); + seq_printf(p, "%*s:", prec, "DBL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10); seq_printf(p, " Doorbell interrupts\n"); } #endif From fae2987e67786a6358c0ef47189b12ff19e9543a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 12 Nov 2024 19:51:48 +1100 Subject: [PATCH 42/55] cpufreq: maple: Remove maple driver This driver is no longer buildable since the PPC_MAPLE platform was removed, see commit 62f8f307c80e ("powerpc/64: Remove maple platform"). Remove the driver. Note that the comment in the driver says it supports "SMU & 970FX based G5 Macs", but that's not true, that comment was copied from pmac64-cpufreq.c, which still exists and continues to support those machines. Acked-by: Viresh Kumar Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241112085148.415574-1-mpe@ellerman.id.au --- drivers/cpufreq/Kconfig.powerpc | 7 - drivers/cpufreq/Makefile | 1 - drivers/cpufreq/maple-cpufreq.c | 242 -------------------------------- 3 files changed, 250 deletions(-) delete mode 100644 drivers/cpufreq/maple-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc index 58151ca566958..eb678fa5260aa 100644 --- a/drivers/cpufreq/Kconfig.powerpc +++ b/drivers/cpufreq/Kconfig.powerpc @@ -17,13 +17,6 @@ config CPU_FREQ_CBE_PMI frequencies. Using PMI, the processor will not only be able to run at lower speed, but also at lower core voltage. -config CPU_FREQ_MAPLE - bool "Support for Maple 970FX Evaluation Board" - depends on PPC_MAPLE - help - This adds support for frequency switching on Maple 970FX - Evaluation Board and compatible boards (IBM JS2x blades). - config CPU_FREQ_PMAC bool "Support for Apple PowerBooks" depends on ADB_PMU && PPC32 diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 0f184031dd123..1a8f787db7e21 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -92,7 +92,6 @@ obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o obj-$(CONFIG_CPU_FREQ_CBE) += ppc-cbe-cpufreq.o ppc-cbe-cpufreq-y += ppc_cbe_cpufreq_pervasive.o ppc_cbe_cpufreq.o obj-$(CONFIG_CPU_FREQ_CBE_PMI) += ppc_cbe_cpufreq_pmi.o -obj-$(CONFIG_CPU_FREQ_MAPLE) += maple-cpufreq.o obj-$(CONFIG_QORIQ_CPUFREQ) += qoriq-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c deleted file mode 100644 index 690da85c4865a..0000000000000 --- a/drivers/cpufreq/maple-cpufreq.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2011 Dmitry Eremin-Solenikov - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt - * and Markus Demleitner - * - * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, - * that is iMac G5 and latest single CPU desktop. - */ - -#undef DEBUG - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DBG(fmt...) pr_debug(fmt) - -/* see 970FX user manual */ - -#define SCOM_PCR 0x0aa001 /* PCR scom addr */ - -#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ -#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ -#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ -#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ -#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ -#define PCR_SPEED_SHIFT 17 -#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ -#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ -#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ -#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ -#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ -#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ - -#define SCOM_PSR 0x408001 /* PSR scom addr */ -/* warning: PSR is a 64 bits register */ -#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ -#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ -#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ -#define PSR_CUR_SPEED_SHIFT (56) - -/* - * The G5 only supports two frequencies (Quarter speed is not supported) - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table maple_cpu_freqs[] = { - {0, CPUFREQ_HIGH, 0}, - {0, CPUFREQ_LOW, 0}, - {0, 0, CPUFREQ_TABLE_END}, -}; - -/* Power mode data is an array of the 32 bits PCR values to use for - * the various frequencies, retrieved from the device-tree - */ -static int maple_pmode_cur; - -static const u32 *maple_pmode_data; -static int maple_pmode_max; - -/* - * SCOM based frequency switching for 970FX rev3 - */ -static int maple_scom_switch_freq(int speed_mode) -{ - unsigned long flags; - int to; - - local_irq_save(flags); - - /* Clear PCR high */ - scom970_write(SCOM_PCR, 0); - /* Clear PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); - /* Set PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | - maple_pmode_data[speed_mode]); - - /* Wait for completion */ - for (to = 0; to < 10; to++) { - unsigned long psr = scom970_read(SCOM_PSR); - - if ((psr & PSR_CMD_RECEIVED) == 0 && - (((psr >> PSR_CUR_SPEED_SHIFT) ^ - (maple_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) - == 0) - break; - if (psr & PSR_CMD_COMPLETED) - break; - udelay(100); - } - - local_irq_restore(flags); - - maple_pmode_cur = speed_mode; - ppc_proc_freq = maple_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int maple_scom_query_freq(void) -{ - unsigned long psr = scom970_read(SCOM_PSR); - int i; - - for (i = 0; i <= maple_pmode_max; i++) - if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ - (maple_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) - break; - return i; -} - -/* - * Common interface to the cpufreq core - */ - -static int maple_cpufreq_target(struct cpufreq_policy *policy, - unsigned int index) -{ - return maple_scom_switch_freq(index); -} - -static unsigned int maple_cpufreq_get_speed(unsigned int cpu) -{ - return maple_cpu_freqs[maple_pmode_cur].frequency; -} - -static int maple_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - cpufreq_generic_init(policy, maple_cpu_freqs, 12000); - return 0; -} - -static struct cpufreq_driver maple_cpufreq_driver = { - .name = "maple", - .flags = CPUFREQ_CONST_LOOPS, - .init = maple_cpufreq_cpu_init, - .verify = cpufreq_generic_frequency_table_verify, - .target_index = maple_cpufreq_target, - .get = maple_cpufreq_get_speed, - .attr = cpufreq_generic_attr, -}; - -static int __init maple_cpufreq_init(void) -{ - struct device_node *cpunode; - unsigned int psize; - unsigned long max_freq; - const u32 *valp; - u32 pvr_hi; - int rc = -ENODEV; - - /* - * Behave here like powermac driver which checks machine compatibility - * to ease merging of two drivers in future. - */ - if (!of_machine_is_compatible("Momentum,Maple") && - !of_machine_is_compatible("Momentum,Apache")) - return 0; - - /* Get first CPU node */ - cpunode = of_cpu_device_node_get(0); - if (cpunode == NULL) { - pr_err("Can't find any CPU 0 node\n"); - goto bail_noprops; - } - - /* Check 970FX for now */ - /* we actually don't care on which CPU to access PVR */ - pvr_hi = PVR_VER(mfspr(SPRN_PVR)); - if (pvr_hi != 0x3c && pvr_hi != 0x44) { - pr_err("Unsupported CPU version (%x)\n", pvr_hi); - goto bail_noprops; - } - - /* Look for the powertune data in the device-tree */ - /* - * On Maple this property is provided by PIBS in dual-processor config, - * not provided by PIBS in CPU0 config and also not provided by SLOF, - * so YMMV - */ - maple_pmode_data = of_get_property(cpunode, "power-mode-data", &psize); - if (!maple_pmode_data) { - DBG("No power-mode-data !\n"); - goto bail_noprops; - } - maple_pmode_max = psize / sizeof(u32) - 1; - - /* - * From what I see, clock-frequency is always the maximal frequency. - * The current driver can not slew sysclk yet, so we really only deal - * with powertune steps for now. We also only implement full freq and - * half freq in this version. So far, I haven't yet seen a machine - * supporting anything else. - */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) - goto bail_noprops; - max_freq = (*valp)/1000; - maple_cpu_freqs[0].frequency = max_freq; - maple_cpu_freqs[1].frequency = max_freq/2; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - msleep(10); - maple_pmode_cur = -1; - maple_scom_switch_freq(maple_scom_query_freq()); - - pr_info("Registering Maple CPU frequency driver\n"); - pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - maple_cpu_freqs[1].frequency/1000, - maple_cpu_freqs[0].frequency/1000, - maple_cpu_freqs[maple_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&maple_cpufreq_driver); - -bail_noprops: - of_node_put(cpunode); - - return rc; -} - -module_init(maple_cpufreq_init); - - -MODULE_DESCRIPTION("cpufreq driver for Maple 970FX/970MP boards"); -MODULE_LICENSE("GPL"); From be6b0eb5c46d85e360c0ff8bdde1aaa199a8fb6d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 12 Nov 2024 22:48:05 +1100 Subject: [PATCH 43/55] powerpc/cell: Remove dead extern declaration for spu_priv1_beat_ops spu_priv1_beat_ops were removed in commit bf4981a00636 ("powerpc: Remove the celleb support"), remove the unneeded extern. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241112114805.453894-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/spu_priv1.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/include/asm/spu_priv1.h b/arch/powerpc/include/asm/spu_priv1.h index 2167d756e6d59..6fee411d973d9 100644 --- a/arch/powerpc/include/asm/spu_priv1.h +++ b/arch/powerpc/include/asm/spu_priv1.h @@ -216,7 +216,6 @@ spu_disable_spu (struct spu_context *ctx) */ extern const struct spu_priv1_ops spu_priv1_mmio_ops; -extern const struct spu_priv1_ops spu_priv1_beat_ops; extern const struct spu_management_ops spu_management_of_ops; From d7a82238cb8c77d4ed8cc97cd556c5f3e64bc749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 13 Nov 2024 09:06:58 +0100 Subject: [PATCH 44/55] powerpc/vdso: Remove unused clockmode asm offsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These offsets are not used anymore, delete them. Fixes: c39b1dcf055d ("powerpc/vdso: Add a page for non-time data") Signed-off-by: Thomas Weißschuh Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241113-vdso-powerpc-asm-offsets-v1-1-3f7e589f090d@linutronix.de --- arch/powerpc/kernel/asm-offsets.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ae198b2d9b8c6..7a390bd4f4af3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -346,8 +346,6 @@ int main(void) #else OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); #endif - OFFSET(VDSO_CLOCKMODE_OFFSET, vdso_arch_data, data[0].clock_mode); - DEFINE(VDSO_CLOCKMODE_TIMENS, VDSO_CLOCKMODE_TIMENS); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); From a26c4dbb3d9c1821cb0fc11cb2dbc32d5bf3463b Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 1 Oct 2024 15:03:49 +0200 Subject: [PATCH 45/55] powerpc/sstep: make emulate_vsx_load and emulate_vsx_store static These functions are not used outside of sstep.c Fixes: 350779a29f11 ("powerpc: Handle most loads and stores in instruction emulation code") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241001130356.14664-1-msuchanek@suse.de --- arch/powerpc/include/asm/sstep.h | 5 ----- arch/powerpc/lib/sstep.c | 12 ++++-------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 50950deedb873..e3d0e714ff280 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -173,9 +173,4 @@ int emulate_step(struct pt_regs *regs, ppc_inst_t instr); */ extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op); -extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem, bool cross_endian); -extern void emulate_vsx_store(struct instruction_op *op, - const union vsx_reg *reg, void *mem, - bool cross_endian); extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e65f3fb68d06b..ac3ee19531d8a 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -780,8 +780,8 @@ static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, #endif /* __powerpc64 */ #ifdef CONFIG_VSX -void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem, bool rev) +static nokprobe_inline void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem, bool rev) { int size, read_size; int i, j; @@ -863,11 +863,9 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_load); -NOKPROBE_SYMBOL(emulate_vsx_load); -void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem, bool rev) +static nokprobe_inline void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem, bool rev) { int size, write_size; int i, j; @@ -955,8 +953,6 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_store); -NOKPROBE_SYMBOL(emulate_vsx_store); static nokprobe_inline int do_vsx_load(struct instruction_op *op, unsigned long ea, struct pt_regs *regs, From 276e036e5844116e563fa90f676c625bb742cc57 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 30 Sep 2024 13:20:57 +0200 Subject: [PATCH 46/55] powerpc/ps3: Reorganize kerneldoc parameter names Reorganize kerneldoc parameter names to match the parameter order in the function header. Problems identified using Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Geert Uytterhoeven Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930112121.95324-12-Julia.Lawall@inria.fr --- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/ps3/repository.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 49871427f599d..af3fe9f04f24c 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -378,9 +378,9 @@ int ps3_send_event_locally(unsigned int virq) /** * ps3_sb_event_receive_port_setup - Setup a system bus event receive port. + * @dev: The system bus device instance. * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be * serviced on. - * @dev: The system bus device instance. * @virq: The assigned Linux virq. * * An event irq represents a virtual device interrupt. The interrupt_id diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 1abe33fbe5290..b8c030eab1384 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -940,7 +940,7 @@ int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port) /** * ps3_repository_read_boot_dat_info - Get address and size of cell_ext_os_area. - * address: lpar address of cell_ext_os_area + * @lpar_addr: lpar address of cell_ext_os_area * @size: size of cell_ext_os_area */ From bfd9c145533ba9cb6f504670aa8e75542c8ee54f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 14 Oct 2024 11:55:03 +0100 Subject: [PATCH 47/55] powerpc/ep8248e: Use %pa to format resource_size_t The correct format string for resource_size_t is %pa which acts on the address of the variable to be formatted [1]. [1] https://elixir.bootlin.com/linux/v6.11.3/source/Documentation/core-api/printk-formats.rst#L229 Introduced by commit 9d9326d3bc0e ("phy: Change mii_bus id field to a string") Flagged by gcc-14 as: arch/powerpc/platforms/82xx/ep8248e.c: In function 'ep8248e_mdio_probe': arch/powerpc/platforms/82xx/ep8248e.c:131:46: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long long unsigned int'} [-Wformat=] 131 | snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); | ~^ ~~~~~~~~~ | | | | | resource_size_t {aka long long unsigned int} | unsigned int | %llx No functional change intended. Compile tested only. Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/netdev/711d7f6d-b785-7560-f4dc-c6aad2cce99@linux-m68k.org/ Signed-off-by: Simon Horman Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241014-ep8248e-pa-fmt-v1-1-009ea0dcc18f@kernel.org --- arch/powerpc/platforms/82xx/ep8248e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 3dc65ce1f175d..8f918916e6318 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -128,7 +128,7 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev) bus->name = "ep8248e-mdio-bitbang"; bus->parent = &ofdev->dev; - snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); + snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res.start); ret = of_mdiobus_register(bus, ofdev->dev.of_node); if (ret) From b196db2f536645eda7684655f3fae913e33fda4b Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chaurasiya Date: Fri, 25 Oct 2024 00:42:33 +0530 Subject: [PATCH 48/55] powerpc/xmon: symbol lookup length fixed Currently this cannot lookup symbol beyond 64 characters in some cases like "ls", "lp" and "t" Fix this by using KSYM_NAME_LEN instead of fixed 64 characters Signed-off-by: Mukesh Kumar Chaurasiya Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241024191232.1570894-2-mchauras@linux.ibm.com --- arch/powerpc/xmon/xmon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e6cddbb2305f8..22b8b5cc4df05 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3662,7 +3662,7 @@ symbol_lookup(void) int type = inchar(); unsigned long addr, cpu; void __percpu *ptr = NULL; - static char tmp[64]; + static char tmp[KSYM_NAME_LEN]; switch (type) { case 'a': @@ -3671,7 +3671,7 @@ symbol_lookup(void) termch = 0; break; case 's': - getstring(tmp, 64); + getstring(tmp, KSYM_NAME_LEN); if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); @@ -3686,7 +3686,7 @@ symbol_lookup(void) termch = 0; break; case 'p': - getstring(tmp, 64); + getstring(tmp, KSYM_NAME_LEN); if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); From 7ca93aa9204b706e4afcd4fae0dc8798500598d5 Mon Sep 17 00:00:00 2001 From: zhang jiao Date: Mon, 30 Sep 2024 09:27:57 +0800 Subject: [PATCH 49/55] selftests/powerpc: Remove the path after initialization. If there were no anamolies noted, then we can simply remove the log file and return, but only after the path variable has been initialized. Signed-off-by: zhang jiao Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930012757.2395-1-zhangjiao2@cmss.chinamobile.com --- tools/testing/selftests/powerpc/mm/tlbie_test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index 48344a74b2128..35f0098399ccd 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -313,16 +313,16 @@ static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) fclose(f); - if (nr_anamolies == 0) { - remove(path); - return; - } - sprintf(logfile, logfilename, tid); strcpy(path, logdir); strcat(path, separator); strcat(path, logfile); + if (nr_anamolies == 0) { + remove(path); + return; + } + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", tid, nr_anamolies, path); } From 6da1cab4f5f8eb778fd61f0eb6ca5b0a011dd44d Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Thu, 26 Sep 2024 12:26:22 +0300 Subject: [PATCH 50/55] powerpc/xive: Use cpumask_intersects() Replace `cpumask_any_and(a, b) >= nr_cpu_ids` with the more readable `!cpumask_intersects(a, b)`. Comparison between cpumask_any_and() and cpumask_intersects() The cpumask_any_and() function expands using FIND_FIRST_BIT(), resulting in a loop that iterates through each bit of the bitmask: for (idx = 0; idx * BITS_PER_LONG < sz; idx++) { val = (FETCH); if (val) { sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(val)), sz); break; } } The cpumask_intersects() function expands using __bitmap_intersects(), resulting in that the first loop iterates through each long word of the bitmask, and the second through each bit within a long word: unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) return true; if (bits % BITS_PER_LONG) if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) return true; Conclusion: cpumask_intersects() is at least as efficient as cpumask_any_and(), if not more so, as it typically performs fewer loops and comparisons. Signed-off-by: Costa Shulyupin Reviewed-by: Ming Lei Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240926092623.399577-2-costa.shul@redhat.com --- arch/powerpc/sysdev/xive/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index fa01818c1972c..a6c388bdf5d08 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -726,7 +726,7 @@ static int xive_irq_set_affinity(struct irq_data *d, pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ - if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) + if (!cpumask_intersects(cpumask, cpu_online_mask)) return -EINVAL; /* From f20b0a03674cef555a5f48b65f81b82868b17cdd Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 27 Sep 2024 11:52:03 +0200 Subject: [PATCH 51/55] powerpc: remove dead config options for MPC85xx platform support Commit 384e338a9187 ("powerpc: drop MPC8540_ADS and MPC8560_ADS platform support") and commit b751ed04bc5e ("powerpc: drop MPC85xx_CDS platform support") removes the platform support for MPC8540_ADS, MPC8560_ADS and MPC85xx_CDS in the source tree, but misses to remove the config options in the Kconfig file. Hence, these three config options are without any effect since then. Drop these three dead config options. Fixes: 384e338a9187 ("powerpc: drop MPC8540_ADS and MPC8560_ADS platform support") Fixes: b751ed04bc5e ("powerpc: drop MPC85xx_CDS platform support") Signed-off-by: Lukas Bulwahn Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240927095203.392365-1-lukas.bulwahn@redhat.com --- arch/powerpc/platforms/85xx/Kconfig | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 9315a3b69d6df..604c1b4b6d45c 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -40,27 +40,6 @@ config BSC9132_QDS and dual StarCore SC3850 DSP cores. Manufacturer : Freescale Semiconductor, Inc -config MPC8540_ADS - bool "Freescale MPC8540 ADS" - select DEFAULT_UIMAGE - help - This option enables support for the MPC 8540 ADS board - -config MPC8560_ADS - bool "Freescale MPC8560 ADS" - select DEFAULT_UIMAGE - select CPM2 - help - This option enables support for the MPC 8560 ADS board - -config MPC85xx_CDS - bool "Freescale MPC85xx CDS" - select DEFAULT_UIMAGE - select PPC_I8259 - select HAVE_RAPIDIO - help - This option enables support for the MPC85xx CDS board - config MPC85xx_MDS bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS" select DEFAULT_UIMAGE From 2e716f5cdebed2fb98cafffaf626645c2e922dbb Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 11 Oct 2024 18:10:06 +0200 Subject: [PATCH 52/55] powerpc/powermac: Use of_property_match_string() in pmac_has_backlight_type() Replace an of_get_property() call by of_property_match_string() so that this function implementation can be simplified. Suggested-by: Christophe Leroy Link: https://lore.kernel.org/linuxppc-dev/d9bdc1b6-ea7e-47aa-80aa-02ae649abf72@csgroup.eu/ Suggested-by: Michael Ellerman Link: https://lore.kernel.org/linuxppc-dev/87cyk97ufp.fsf@mail.lhotse/ Signed-off-by: Markus Elfring Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/ede25e03-7a14-4787-ae1b-4fc9290add5a@web.de --- arch/powerpc/platforms/powermac/backlight.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c index 12bc01353bd3c..79741370c40c6 100644 --- a/arch/powerpc/platforms/powermac/backlight.c +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -57,18 +57,10 @@ struct backlight_device *pmac_backlight; int pmac_has_backlight_type(const char *type) { struct device_node* bk_node = of_find_node_by_name(NULL, "backlight"); + int i = of_property_match_string(bk_node, "backlight-control", type); - if (bk_node) { - const char *prop = of_get_property(bk_node, - "backlight-control", NULL); - if (prop && strncmp(prop, type, strlen(type)) == 0) { - of_node_put(bk_node); - return 1; - } - of_node_put(bk_node); - } - - return 0; + of_node_put(bk_node); + return i >= 0; } static void pmac_backlight_key_worker(struct work_struct *work) From 352268dc6da7b422022541c2cf846663110f775c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 3 Oct 2024 21:06:42 +0200 Subject: [PATCH 53/55] macintosh: Use common error handling code in via_pmu_led_init() Add a jump target so that a bit of exception handling can be better reused at the end of this function implementation. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/189b93e2-4e81-438d-9c77-cbe4d9d7a0d9@web.de --- drivers/macintosh/via-pmu-led.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/macintosh/via-pmu-led.c b/drivers/macintosh/via-pmu-led.c index a4fb16d7db3c1..fc1af74b65967 100644 --- a/drivers/macintosh/via-pmu-led.c +++ b/drivers/macintosh/via-pmu-led.c @@ -92,18 +92,15 @@ static int __init via_pmu_led_init(void) if (dt == NULL) return -ENODEV; model = of_get_property(dt, "model", NULL); - if (model == NULL) { - of_node_put(dt); - return -ENODEV; - } + if (!model) + goto put_node; + if (strncmp(model, "PowerBook", strlen("PowerBook")) != 0 && strncmp(model, "iBook", strlen("iBook")) != 0 && strcmp(model, "PowerMac7,2") != 0 && - strcmp(model, "PowerMac7,3") != 0) { - of_node_put(dt); - /* ignore */ - return -ENODEV; - } + strcmp(model, "PowerMac7,3") != 0) + goto put_node; + of_node_put(dt); spin_lock_init(&pmu_blink_lock); @@ -112,6 +109,10 @@ static int __init via_pmu_led_init(void) pmu_blink_req.done = pmu_req_done; return led_classdev_register(NULL, &pmu_led); + +put_node: + of_node_put(dt); + return -ENODEV; } late_initcall(via_pmu_led_init); From 83b5a407fbb73e6965adfb4bd0a803724bf87f96 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Mon, 30 Sep 2024 15:56:28 +0800 Subject: [PATCH 54/55] powerpc/kexec: Fix return of uninitialized variable of_property_read_u64() can fail and leave the variable uninitialized, which will then be used. Return error if reading the property failed. Fixes: 2e6bd221d96f ("powerpc/kexec_file: Enable early kernel OPAL calls") Signed-off-by: Zhang Zekun Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930075628.125138-1-zhangzekun11@huawei.com --- arch/powerpc/kexec/file_load_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 9738adabeb1fe..dc65c13911577 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -736,13 +736,18 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, if (dn) { u64 val; - of_property_read_u64(dn, "opal-base-address", &val); + ret = of_property_read_u64(dn, "opal-base-address", &val); + if (ret) + goto out; + ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, sizeof(val), false); if (ret) goto out; - of_property_read_u64(dn, "opal-entry-address", &val); + ret = of_property_read_u64(dn, "opal-entry-address", &val); + if (ret) + goto out; ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, sizeof(val), false); } From e9d3270007b13acd34de4256970ffe457efc6c65 Mon Sep 17 00:00:00 2001 From: Shen Lichuan Date: Mon, 30 Sep 2024 10:32:34 +0800 Subject: [PATCH 55/55] ps3: Correct some typos in comments Fixed some typos that were currently identified with codespell, the details are as follows: drivers/ps3/ps3-lpm.c:94: rigths ==> rights drivers/ps3/ps3-sys-manager.c:365: acnowledge ==> acknowledge drivers/ps3/ps3-vuart.c:470: remaning ==> remaining drivers/ps3/ps3-vuart.c:471: transmision ==> transmission drivers/ps3/sys-manager-core.c:15: Staticly ==> Statically Signed-off-by: Shen Lichuan Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930023234.7457-1-shenlichuan@vivo.com --- drivers/ps3/ps3-lpm.c | 2 +- drivers/ps3/ps3-sys-manager.c | 2 +- drivers/ps3/ps3-vuart.c | 4 ++-- drivers/ps3/sys-manager-core.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index 200ad8751860a..188ae25726740 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -91,7 +91,7 @@ struct ps3_lpm_shadow_regs { * struct ps3_lpm_priv - Private lpm device data. * * @open: An atomic variable indicating the lpm driver has been opened. - * @rights: The lpm rigths granted by the system policy module. A logical + * @rights: The lpm rights granted by the system policy module. A logical * OR of enum ps3_lpm_rights. * @node_id: The node id of a BE processor whose performance monitor this * lpar has the right to use. diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c index ad8ef59dea340..ab798b52910eb 100644 --- a/drivers/ps3/ps3-sys-manager.c +++ b/drivers/ps3/ps3-sys-manager.c @@ -362,7 +362,7 @@ static int ps3_sys_manager_send_request_shutdown( * ps3_sys_manager_send_response - Send a 'response' to the system manager. * @status: zero = success, others fail. * - * The guest sends this message to the system manager to acnowledge success or + * The guest sends this message to the system manager to acknowledge success or * failure of a command sent by the system manager. */ diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index 6328abd51ffad..5cb92535a4a14 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -467,8 +467,8 @@ struct list_buffer { * * If the port is idle on entry as much of the incoming data is written to * the port as the port will accept. Otherwise a list buffer is created - * and any remaning incoming data is copied to that buffer. The buffer is - * then enqueued for transmision via the transmit interrupt. + * and any remaining incoming data is copied to that buffer. The buffer is + * then enqueued for transmission via the transmit interrupt. */ int ps3_vuart_write(struct ps3_system_bus_device *dev, const void *buf, diff --git a/drivers/ps3/sys-manager-core.c b/drivers/ps3/sys-manager-core.c index e061b7d0632bd..f50032ad97024 100644 --- a/drivers/ps3/sys-manager-core.c +++ b/drivers/ps3/sys-manager-core.c @@ -12,7 +12,7 @@ #include /** - * Staticly linked routines that allow late binding of a loaded sys-manager + * Statically linked routines that allow late binding of a loaded sys-manager * module. */