From 1db6a4e8a3fc8ccaa4690272935e02831dc6d40d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Sun, 13 Oct 2024 22:17:00 +0200 Subject: [PATCH 01/69] KVM: PPC: replace call_rcu by kfree_rcu for simple kmem_cache_free callback Since SLOB was removed and since commit 6c6c47b063b5 ("mm, slab: call kvfree_rcu_barrier() from kmem_cache_destroy()"), it is not necessary to use call_rcu when the callback only performs kmem_cache_free. Use kfree_rcu() directly. The changes were made using Coccinelle. Signed-off-by: Julia Lawall Acked-by: Paul E. McKenney Acked-by: Vlastimil Babka Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241013201704.49576-14-Julia.Lawall@inria.fr --- arch/powerpc/kvm/book3s_mmu_hpte.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index ce79ac33e8d3b..d904e13e069bc 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -92,12 +92,6 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct hpte_cache *pte) spin_unlock(&vcpu3s->mmu_lock); } -static void free_pte_rcu(struct rcu_head *head) -{ - struct hpte_cache *pte = container_of(head, struct hpte_cache, rcu_head); - kmem_cache_free(hpte_cache, pte); -} - static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) { struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu); @@ -126,7 +120,7 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct hpte_cache *pte) spin_unlock(&vcpu3s->mmu_lock); - call_rcu(&pte->rcu_head, free_pte_rcu); + kfree_rcu(pte, rcu_head); } static void kvmppc_mmu_pte_flush_all(struct kvm_vcpu *vcpu) From 5e296fc37e1afa3fb38b886fe2bf7737777d0f61 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:17:00 +1100 Subject: [PATCH 02/69] powerpc/64: Drop IPI_PRIORITY from asm-offsets The last use of IPI_PRIORITY in asm was removed in commit 37f55d30df2e ("KVM: PPC: Book3S HV: Convert kvmppc_read_intr to a C function"). Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009051701.132282-1-mpe@ellerman.id.au --- arch/powerpc/kernel/asm-offsets.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 7b3feb6bc2103..d8fe97662a028 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -596,7 +596,6 @@ int main(void) HSTATE_FIELD(HSTATE_DABR, dabr); HSTATE_FIELD(HSTATE_DECEXP, dec_expires); HSTATE_FIELD(HSTATE_SPLIT_MODE, kvm_split_mode); - DEFINE(IPI_PRIORITY, IPI_PRIORITY); OFFSET(KVM_SPLIT_RPR, kvm_split_mode, rpr); OFFSET(KVM_SPLIT_PMMAR, kvm_split_mode, pmmar); OFFSET(KVM_SPLIT_LDBAR, kvm_split_mode, ldbar); From 3c9670df7f7e871f0d2c2208d2ce79f6cfbca0f6 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:18:25 +1100 Subject: [PATCH 03/69] powerpc/machdep: Drop include of seq_file.h Drop the include of seq_file.h in machdep.h, replace it with a forward declaration of struct seq_file, which is all that's required. Add direct includes of seq_file.h to some files that were getting seq_file.h via machdep.h. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009051826.132805-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/platforms/52xx/efika.c | 1 + arch/powerpc/platforms/embedded6xx/linkstation.c | 1 + arch/powerpc/platforms/embedded6xx/mvme5100.c | 1 + arch/powerpc/platforms/pseries/lpar.c | 1 + arch/powerpc/platforms/pseries/msi.c | 1 + arch/powerpc/platforms/pseries/papr_scm.c | 1 + arch/powerpc/sysdev/xive/spapr.c | 1 + 8 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 1862f94335ee8..3326730cd300a 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -4,7 +4,6 @@ #ifdef __KERNEL__ #include -#include #include #include #include @@ -18,6 +17,7 @@ struct file; struct pci_controller; struct kimage; struct pci_host_bridge; +struct seq_file; struct machdep_calls { const char *name; diff --git a/arch/powerpc/platforms/52xx/efika.c b/arch/powerpc/platforms/52xx/efika.c index 37a67120f257c..a7172f9ebaad9 100644 --- a/arch/powerpc/platforms/52xx/efika.c +++ b/arch/powerpc/platforms/52xx/efika.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/linkstation.c b/arch/powerpc/platforms/embedded6xx/linkstation.c index e265f026eee2a..4012f206ec63d 100644 --- a/arch/powerpc/platforms/embedded6xx/linkstation.c +++ b/arch/powerpc/platforms/embedded6xx/linkstation.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/embedded6xx/mvme5100.c b/arch/powerpc/platforms/embedded6xx/mvme5100.c index 00bec0f051be1..5ca41972ef221 100644 --- a/arch/powerpc/platforms/embedded6xx/mvme5100.c +++ b/arch/powerpc/platforms/embedded6xx/mvme5100.c @@ -14,6 +14,7 @@ #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index c1d8bee8f7018..0c428f1ae7127 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c index 6dfb55b52d363..fdc2f7f38dc9a 100644 --- a/arch/powerpc/platforms/pseries/msi.c +++ b/arch/powerpc/platforms/pseries/msi.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index 9e297f88adc5d..f84ac9fbe203c 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/sysdev/xive/spapr.c b/arch/powerpc/sysdev/xive/spapr.c index f2fa985a2c771..5aedbe3e8e6a2 100644 --- a/arch/powerpc/sysdev/xive/spapr.c +++ b/arch/powerpc/sysdev/xive/spapr.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include From b23b9edf64b6387334aa2f8687cca6792b0d9d6c Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:18:26 +1100 Subject: [PATCH 04/69] powerpc/machdep: Drop include of dma-mapping.h Drop the include of dma-mapping.h in machdep.h, replace it with forward declarations of struct device and struct pci_dev, and include time64.h and page.h which are required for time64_t and pgprot_t respectively. Add direct includes of some other headers to some files that were getting them via machdep.h. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009051826.132805-2-mpe@ellerman.id.au --- arch/powerpc/include/asm/machdep.h | 6 +++++- arch/powerpc/kernel/sysfs.c | 1 + arch/powerpc/platforms/pseries/svm.c | 1 + drivers/cpuidle/cpuidle-pseries.c | 1 + 4 files changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 3326730cd300a..3298eec123a32 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -5,15 +5,19 @@ #include #include -#include #include +#include + +#include struct pt_regs; struct pci_bus; +struct device; struct device_node; struct iommu_table; struct rtc_time; struct file; +struct pci_dev; struct pci_controller; struct kimage; struct pci_host_bridge; diff --git a/arch/powerpc/kernel/sysfs.c b/arch/powerpc/kernel/sysfs.c index b842c83ab497d..6b3dd6decdf90 100644 --- a/arch/powerpc/kernel/sysfs.c +++ b/arch/powerpc/kernel/sysfs.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/svm.c b/arch/powerpc/platforms/pseries/svm.c index 3b4045d508ec8..10b8eb6bff393 100644 --- a/arch/powerpc/platforms/pseries/svm.c +++ b/arch/powerpc/platforms/pseries/svm.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c index 14db9b7d985d1..f68c65f1d023f 100644 --- a/drivers/cpuidle/cpuidle-pseries.c +++ b/drivers/cpuidle/cpuidle-pseries.c @@ -22,6 +22,7 @@ #include #include #include +#include static struct cpuidle_driver pseries_idle_driver = { .name = "pseries_idle", From cadae3a45d23aa4f6485938a67cbc47aaaa25e38 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 19 Aug 2024 22:24:01 +1000 Subject: [PATCH 05/69] powerpc/pseries: Fix dtl_access_lock to be a rw_semaphore The dtl_access_lock needs to be a rw_sempahore, a sleeping lock, because the code calls kmalloc() while holding it, which can sleep: # echo 1 > /proc/powerpc/vcpudispatch_stats BUG: sleeping function called from invalid context at include/linux/sched/mm.h:337 in_atomic(): 1, irqs_disabled(): 0, non_block: 0, pid: 199, name: sh preempt_count: 1, expected: 0 3 locks held by sh/199: #0: c00000000a0743f8 (sb_writers#3){.+.+}-{0:0}, at: vfs_write+0x324/0x438 #1: c0000000028c7058 (dtl_enable_mutex){+.+.}-{3:3}, at: vcpudispatch_stats_write+0xd4/0x5f4 #2: c0000000028c70b8 (dtl_access_lock){+.+.}-{2:2}, at: vcpudispatch_stats_write+0x220/0x5f4 CPU: 0 PID: 199 Comm: sh Not tainted 6.10.0-rc4 #152 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries Call Trace: dump_stack_lvl+0x130/0x148 (unreliable) __might_resched+0x174/0x410 kmem_cache_alloc_noprof+0x340/0x3d0 alloc_dtl_buffers+0x124/0x1ac vcpudispatch_stats_write+0x2a8/0x5f4 proc_reg_write+0xf4/0x150 vfs_write+0xfc/0x438 ksys_write+0x88/0x148 system_call_exception+0x1c4/0x5a0 system_call_common+0xf4/0x258 Fixes: 06220d78f24a ("powerpc/pseries: Introduce rwlock to gatekeep DTLB usage") Tested-by: Kajol Jain Reviewed-by: Nysal Jan K.A Reviewed-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240819122401.513203-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/dtl.h | 4 ++-- arch/powerpc/platforms/pseries/dtl.c | 8 ++++---- arch/powerpc/platforms/pseries/lpar.c | 8 ++++---- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/include/asm/dtl.h b/arch/powerpc/include/asm/dtl.h index d6f43d149f8dc..a5c21bc623cb0 100644 --- a/arch/powerpc/include/asm/dtl.h +++ b/arch/powerpc/include/asm/dtl.h @@ -1,8 +1,8 @@ #ifndef _ASM_POWERPC_DTL_H #define _ASM_POWERPC_DTL_H +#include #include -#include /* * Layout of entries in the hypervisor's dispatch trace log buffer. @@ -35,7 +35,7 @@ struct dtl_entry { #define DTL_LOG_ALL (DTL_LOG_CEDE | DTL_LOG_PREEMPT | DTL_LOG_FAULT) extern struct kmem_cache *dtl_cache; -extern rwlock_t dtl_access_lock; +extern struct rw_semaphore dtl_access_lock; extern void register_dtl_buffer(int cpu); extern void alloc_dtl_buffers(unsigned long *time_limit); diff --git a/arch/powerpc/platforms/pseries/dtl.c b/arch/powerpc/platforms/pseries/dtl.c index 8cb9d36ea4915..f293588b8c7b5 100644 --- a/arch/powerpc/platforms/pseries/dtl.c +++ b/arch/powerpc/platforms/pseries/dtl.c @@ -191,7 +191,7 @@ static int dtl_enable(struct dtl *dtl) return -EBUSY; /* ensure there are no other conflicting dtl users */ - if (!read_trylock(&dtl_access_lock)) + if (!down_read_trylock(&dtl_access_lock)) return -EBUSY; n_entries = dtl_buf_entries; @@ -199,7 +199,7 @@ static int dtl_enable(struct dtl *dtl) if (!buf) { printk(KERN_WARNING "%s: buffer alloc failed for cpu %d\n", __func__, dtl->cpu); - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); return -ENOMEM; } @@ -217,7 +217,7 @@ static int dtl_enable(struct dtl *dtl) spin_unlock(&dtl->lock); if (rc) { - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); kmem_cache_free(dtl_cache, buf); } @@ -232,7 +232,7 @@ static void dtl_disable(struct dtl *dtl) dtl->buf = NULL; dtl->buf_entries = 0; spin_unlock(&dtl->lock); - read_unlock(&dtl_access_lock); + up_read(&dtl_access_lock); } /* file interface */ diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 0c428f1ae7127..6a415febc53b7 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -170,7 +170,7 @@ struct vcpu_dispatch_data { */ #define NR_CPUS_H NR_CPUS -DEFINE_RWLOCK(dtl_access_lock); +DECLARE_RWSEM(dtl_access_lock); static DEFINE_PER_CPU(struct vcpu_dispatch_data, vcpu_disp_data); static DEFINE_PER_CPU(u64, dtl_entry_ridx); static DEFINE_PER_CPU(struct dtl_worker, dtl_workers); @@ -464,7 +464,7 @@ static int dtl_worker_enable(unsigned long *time_limit) { int rc = 0, state; - if (!write_trylock(&dtl_access_lock)) { + if (!down_write_trylock(&dtl_access_lock)) { rc = -EBUSY; goto out; } @@ -480,7 +480,7 @@ static int dtl_worker_enable(unsigned long *time_limit) pr_err("vcpudispatch_stats: unable to setup workqueue for DTL processing\n"); free_dtl_buffers(time_limit); reset_global_dtl_mask(); - write_unlock(&dtl_access_lock); + up_write(&dtl_access_lock); rc = -EINVAL; goto out; } @@ -495,7 +495,7 @@ static void dtl_worker_disable(unsigned long *time_limit) cpuhp_remove_state(dtl_worker_state); free_dtl_buffers(time_limit); reset_global_dtl_mask(); - write_unlock(&dtl_access_lock); + up_write(&dtl_access_lock); } static ssize_t vcpudispatch_stats_write(struct file *file, const char __user *p, From c7182a0bdec16cdea912b5a3aea9a80f4f657b7d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Wed, 9 Oct 2024 16:38:06 +1100 Subject: [PATCH 06/69] powerpc/boot: Remove bogus reference to lilo The help text refers to lilo, but the install script does not run lilo and never has. The reference to lilo seems to have come originally from arch/ppc/Makefile, but it was not true there either. Remove it. Reported-by: Thorsten Leemhuis Link: https://fosstodon.org/@kernellogger/113032940928131612 Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241009053806.135807-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index cbb353ddacb7a..1c5c28d58e944 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -352,7 +352,7 @@ define archhelp echo ' install - Install kernel using' echo ' (your) ~/bin/$(INSTALLKERNEL) or' echo ' (distribution) /sbin/$(INSTALLKERNEL) or' - echo ' install to $$(INSTALL_PATH) and run lilo' + echo ' install to $$(INSTALL_PATH)' echo ' *_defconfig - Select default config from arch/powerpc/configs' echo '' echo ' Targets with
embed a device tree blob inside the image' From 62f8f307c80e99ab18d38aa1a5bbbc18128ee5f8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Sun, 13 Oct 2024 21:29:57 +1100 Subject: [PATCH 07/69] powerpc/64: Remove maple platform The maple platform was added in 2004 [1], to support the "Maple" 970FX evaluation board. It was later used for IBM JS20/JS21 machines, as well as the Bimini machine, aka "Yellow Dog Powerstation". Sadly all those machines have passed into memory, and there's been no evidence for years that anyone is still using any of them. Remove the platform and related code. It can always be reinstated if there's interest. Note that this has no impact on support for 970FX based Power Macs. [1]: https://git.kernel.org/pub/scm/linux/kernel/git/mpe/linux-fullhistory.git/commit/?id=f0d068d65c5e555ffcfbc189de32598f6f00770c Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241013102957.548291-1-mpe@ellerman.id.au --- Documentation/arch/powerpc/booting.rst | 4 +- arch/powerpc/Kconfig.debug | 6 - arch/powerpc/boot/.gitignore | 1 - arch/powerpc/boot/Makefile | 3 +- arch/powerpc/boot/wrapper | 7 +- arch/powerpc/configs/maple_defconfig | 111 ---- arch/powerpc/configs/ppc64_defconfig | 1 - arch/powerpc/include/asm/udbg.h | 1 - arch/powerpc/kernel/misc_64.S | 8 +- arch/powerpc/kernel/prom_init.c | 86 ---- arch/powerpc/kernel/udbg.c | 3 - arch/powerpc/kernel/udbg_16550.c | 23 - arch/powerpc/platforms/Kconfig | 1 - arch/powerpc/platforms/Makefile | 1 - arch/powerpc/platforms/maple/Kconfig | 19 - arch/powerpc/platforms/maple/Makefile | 2 - arch/powerpc/platforms/maple/maple.h | 14 - arch/powerpc/platforms/maple/pci.c | 672 ------------------------- arch/powerpc/platforms/maple/setup.c | 363 ------------- arch/powerpc/platforms/maple/time.c | 170 ------- 20 files changed, 8 insertions(+), 1488 deletions(-) delete mode 100644 arch/powerpc/configs/maple_defconfig delete mode 100644 arch/powerpc/platforms/maple/Kconfig delete mode 100644 arch/powerpc/platforms/maple/Makefile delete mode 100644 arch/powerpc/platforms/maple/maple.h delete mode 100644 arch/powerpc/platforms/maple/pci.c delete mode 100644 arch/powerpc/platforms/maple/setup.c delete mode 100644 arch/powerpc/platforms/maple/time.c diff --git a/Documentation/arch/powerpc/booting.rst b/Documentation/arch/powerpc/booting.rst index 11aa440f98cc9..472e97891aef2 100644 --- a/Documentation/arch/powerpc/booting.rst +++ b/Documentation/arch/powerpc/booting.rst @@ -93,8 +93,8 @@ given platform based on the content of the device-tree. Thus, you should: a) add your platform support as a _boolean_ option in - arch/powerpc/Kconfig, following the example of PPC_PSERIES, - PPC_PMAC and PPC_MAPLE. The latter is probably a good + arch/powerpc/Kconfig, following the example of PPC_PSERIES + and PPC_PMAC. The latter is probably a good example of a board support to start from. b) create your main platform file as diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index 0bbec4afc0d59..20d05605fa83f 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -223,12 +223,6 @@ config PPC_EARLY_DEBUG_RTAS_CONSOLE help Select this to enable early debugging via the RTAS console. -config PPC_EARLY_DEBUG_MAPLE - bool "Maple real mode" - depends on PPC_MAPLE - help - Select this to enable early debugging for Maple. - config PPC_EARLY_DEBUG_PAS_REALMODE bool "PA Semi real mode" depends on PPC_PASEMI diff --git a/arch/powerpc/boot/.gitignore b/arch/powerpc/boot/.gitignore index a4716d138cfc0..5a867f23fe7f7 100644 --- a/arch/powerpc/boot/.gitignore +++ b/arch/powerpc/boot/.gitignore @@ -30,7 +30,6 @@ zImage.coff zImage.epapr zImage.holly zImage.*lds -zImage.maple zImage.miboot zImage.pmac zImage.pseries diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index fa8518067d38e..1ff6ad4f6cd27 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -276,7 +276,6 @@ quiet_cmd_wrap = WRAP $@ image-$(CONFIG_PPC_PSERIES) += zImage.pseries image-$(CONFIG_PPC_POWERNV) += zImage.pseries -image-$(CONFIG_PPC_MAPLE) += zImage.maple image-$(CONFIG_PPC_IBM_CELL_BLADE) += zImage.pseries image-$(CONFIG_PPC_PS3) += dtbImage.ps3 image-$(CONFIG_PPC_CHRP) += zImage.chrp @@ -444,7 +443,7 @@ $(obj)/zImage.initrd: $(addprefix $(obj)/, $(initrd-y)) clean-files += $(image-) $(initrd-) cuImage.* dtbImage.* treeImage.* \ zImage zImage.initrd zImage.chrp zImage.coff zImage.holly \ zImage.miboot zImage.pmac zImage.pseries \ - zImage.maple simpleImage.* otheros.bld + simpleImage.* otheros.bld # clean up files cached by wrapper clean-kernel-base := vmlinux.strip vmlinux.bin diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper index b1f5549a3c9c4..1db60fe13802d 100755 --- a/arch/powerpc/boot/wrapper +++ b/arch/powerpc/boot/wrapper @@ -271,11 +271,6 @@ pseries) fi make_space=n ;; -maple) - platformo="$object/of.o $object/epapr.o" - link_address='0x400000' - make_space=n - ;; pmac|chrp) platformo="$object/of.o $object/epapr.o" make_space=n @@ -517,7 +512,7 @@ fi # post-processing needed for some platforms case "$platform" in -pseries|chrp|maple) +pseries|chrp) $objbin/addnote "$ofile" ;; coff) diff --git a/arch/powerpc/configs/maple_defconfig b/arch/powerpc/configs/maple_defconfig deleted file mode 100644 index c821a97f4a899..0000000000000 --- a/arch/powerpc/configs/maple_defconfig +++ /dev/null @@ -1,111 +0,0 @@ -CONFIG_PPC64=y -CONFIG_SMP=y -CONFIG_NR_CPUS=4 -CONFIG_SYSVIPC=y -CONFIG_POSIX_MQUEUE=y -CONFIG_NO_HZ=y -CONFIG_HIGH_RES_TIMERS=y -CONFIG_IKCONFIG=y -CONFIG_IKCONFIG_PROC=y -# CONFIG_COMPAT_BRK is not set -CONFIG_PROFILING=y -CONFIG_KPROBES=y -CONFIG_MODULES=y -CONFIG_MODULE_UNLOAD=y -CONFIG_MODVERSIONS=y -CONFIG_MODULE_SRCVERSION_ALL=y -# CONFIG_BLK_DEV_BSG is not set -CONFIG_PARTITION_ADVANCED=y -CONFIG_MAC_PARTITION=y -# CONFIG_PPC_POWERNV is not set -# CONFIG_PPC_PSERIES is not set -# CONFIG_PPC_PMAC is not set -CONFIG_PPC_MAPLE=y -CONFIG_UDBG_RTAS_CONSOLE=y -CONFIG_GEN_RTC=y -CONFIG_KEXEC=y -CONFIG_IRQ_ALL_CPUS=y -CONFIG_PPC_4K_PAGES=y -CONFIG_PCI_MSI=y -CONFIG_NET=y -CONFIG_PACKET=y -CONFIG_UNIX=y -CONFIG_XFRM_USER=m -CONFIG_INET=y -CONFIG_IP_MULTICAST=y -CONFIG_IP_PNP=y -CONFIG_IP_PNP_DHCP=y -# CONFIG_IPV6 is not set -CONFIG_BLK_DEV_RAM=y -CONFIG_BLK_DEV_RAM_SIZE=8192 -# CONFIG_SCSI_PROC_FS is not set -CONFIG_BLK_DEV_SD=y -CONFIG_BLK_DEV_SR=y -CONFIG_CHR_DEV_SG=y -CONFIG_SCSI_IPR=y -CONFIG_ATA=y -CONFIG_PATA_AMD=y -CONFIG_ATA_GENERIC=y -CONFIG_NETDEVICES=y -CONFIG_AMD8111_ETH=y -CONFIG_TIGON3=y -CONFIG_E1000=y -CONFIG_USB_PEGASUS=y -# CONFIG_INPUT_KEYBOARD is not set -# CONFIG_INPUT_MOUSE is not set -# CONFIG_SERIO is not set -CONFIG_SERIAL_8250=y -CONFIG_SERIAL_8250_CONSOLE=y -CONFIG_HVC_RTAS=y -# CONFIG_HW_RANDOM is not set -CONFIG_I2C=y -CONFIG_I2C_CHARDEV=y -CONFIG_I2C_AMD8111=y -# CONFIG_VGA_CONSOLE is not set -CONFIG_HID_GYRATION=y -CONFIG_HID_PANTHERLORD=y -CONFIG_HID_PETALYNX=y -CONFIG_HID_SAMSUNG=y -CONFIG_HID_SUNPLUS=y -CONFIG_USB=y -CONFIG_USB_MON=y -CONFIG_USB_EHCI_HCD=y -CONFIG_USB_EHCI_ROOT_HUB_TT=y -# CONFIG_USB_EHCI_HCD_PPC_OF is not set -CONFIG_USB_OHCI_HCD=y -CONFIG_USB_UHCI_HCD=y -CONFIG_USB_SERIAL=y -CONFIG_USB_SERIAL_GENERIC=y -CONFIG_USB_SERIAL_CYPRESS_M8=m -CONFIG_USB_SERIAL_GARMIN=m -CONFIG_USB_SERIAL_IPW=m -CONFIG_USB_SERIAL_KEYSPAN=y -CONFIG_USB_SERIAL_TI=m -CONFIG_EXT2_FS=y -CONFIG_EXT4_FS=y -CONFIG_FS_DAX=y -CONFIG_MSDOS_FS=y -CONFIG_VFAT_FS=y -CONFIG_PROC_KCORE=y -CONFIG_TMPFS=y -CONFIG_HUGETLBFS=y -CONFIG_CRAMFS=y -CONFIG_NFS_FS=y -CONFIG_NFS_V3_ACL=y -CONFIG_NFS_V4=y -CONFIG_ROOT_NFS=y -CONFIG_NLS_DEFAULT="utf-8" -CONFIG_NLS_UTF8=y -CONFIG_CRC_CCITT=y -CONFIG_CRC_T10DIF=y -CONFIG_MAGIC_SYSRQ=y -CONFIG_DEBUG_KERNEL=y -CONFIG_DEBUG_STACK_USAGE=y -CONFIG_DEBUG_STACKOVERFLOW=y -CONFIG_XMON=y -CONFIG_XMON_DEFAULT=y -CONFIG_BOOTX_TEXT=y -CONFIG_CRYPTO_ECB=m -CONFIG_CRYPTO_PCBC=m -# CONFIG_CRYPTO_HW is not set -CONFIG_PRINTK_TIME=y diff --git a/arch/powerpc/configs/ppc64_defconfig b/arch/powerpc/configs/ppc64_defconfig index a5e3e7f97f4d7..f39c0d000c438 100644 --- a/arch/powerpc/configs/ppc64_defconfig +++ b/arch/powerpc/configs/ppc64_defconfig @@ -44,7 +44,6 @@ CONFIG_PPC_SMLPAR=y CONFIG_IBMEBUS=y CONFIG_PAPR_SCM=m CONFIG_PPC_SVM=y -CONFIG_PPC_MAPLE=y CONFIG_PPC_PASEMI=y CONFIG_PPC_PASEMI_IOMMU=y CONFIG_PPC_PS3=y diff --git a/arch/powerpc/include/asm/udbg.h b/arch/powerpc/include/asm/udbg.h index 289023f7a6567..a8681b12864fd 100644 --- a/arch/powerpc/include/asm/udbg.h +++ b/arch/powerpc/include/asm/udbg.h @@ -38,7 +38,6 @@ void __init udbg_early_init(void); void __init udbg_init_debug_lpar(void); void __init udbg_init_debug_lpar_hvsi(void); void __init udbg_init_pmac_realmode(void); -void __init udbg_init_maple_realmode(void); void __init udbg_init_pas_realmode(void); void __init udbg_init_rtas_panel(void); void __init udbg_init_rtas_console(void); diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S index 91123e102db40..a997c7f43dc01 100644 --- a/arch/powerpc/kernel/misc_64.S +++ b/arch/powerpc/kernel/misc_64.S @@ -74,7 +74,7 @@ _GLOBAL(rmci_off) blr #endif /* CONFIG_PPC_EARLY_DEBUG_BOOTX */ -#if defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) +#ifdef CONFIG_PPC_PMAC /* * Do an IO access in real mode @@ -137,7 +137,7 @@ _GLOBAL(real_writeb) sync isync blr -#endif /* defined(CONFIG_PPC_PMAC) || defined(CONFIG_PPC_MAPLE) */ +#endif // CONFIG_PPC_PMAC #ifdef CONFIG_PPC_PASEMI @@ -174,7 +174,7 @@ _GLOBAL(real_205_writeb) #endif /* CONFIG_PPC_PASEMI */ -#if defined(CONFIG_CPU_FREQ_PMAC64) || defined(CONFIG_CPU_FREQ_MAPLE) +#ifdef CONFIG_CPU_FREQ_PMAC64 /* * SCOM access functions for 970 (FX only for now) * @@ -243,7 +243,7 @@ _GLOBAL(scom970_write) /* restore interrupts */ mtmsrd r5,1 blr -#endif /* CONFIG_CPU_FREQ_PMAC64 || CONFIG_CPU_FREQ_MAPLE */ +#endif // CONFIG_CPU_FREQ_PMAC64 /* kexec_wait(phys_cpu) * diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index fbb68fc28ed3a..73210e5bcfa77 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2792,90 +2792,6 @@ static void __init flatten_device_tree(void) dt_struct_start, dt_struct_end); } -#ifdef CONFIG_PPC_MAPLE -/* PIBS Version 1.05.0000 04/26/2005 has an incorrect /ht/isa/ranges property. - * The values are bad, and it doesn't even have the right number of cells. */ -static void __init fixup_device_tree_maple(void) -{ - phandle isa; - u32 rloc = 0x01002000; /* IO space; PCI device = 4 */ - u32 isa_ranges[6]; - char *name; - - name = "/ht@0/isa@4"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(isa)) { - name = "/ht@0/isa@6"; - isa = call_prom("finddevice", 1, 1, ADDR(name)); - rloc = 0x01003000; /* IO space; PCI device = 6 */ - } - if (!PHANDLE_VALID(isa)) - return; - - if (prom_getproplen(isa, "ranges") != 12) - return; - if (prom_getprop(isa, "ranges", isa_ranges, sizeof(isa_ranges)) - == PROM_ERROR) - return; - - if (isa_ranges[0] != 0x1 || - isa_ranges[1] != 0xf4000000 || - isa_ranges[2] != 0x00010000) - return; - - prom_printf("Fixing up bogus ISA range on Maple/Apache...\n"); - - isa_ranges[0] = 0x1; - isa_ranges[1] = 0x0; - isa_ranges[2] = rloc; - isa_ranges[3] = 0x0; - isa_ranges[4] = 0x0; - isa_ranges[5] = 0x00010000; - prom_setprop(isa, name, "ranges", - isa_ranges, sizeof(isa_ranges)); -} - -#define CPC925_MC_START 0xf8000000 -#define CPC925_MC_LENGTH 0x1000000 -/* The values for memory-controller don't have right number of cells */ -static void __init fixup_device_tree_maple_memory_controller(void) -{ - phandle mc; - u32 mc_reg[4]; - char *name = "/hostbridge@f8000000"; - u32 ac, sc; - - mc = call_prom("finddevice", 1, 1, ADDR(name)); - if (!PHANDLE_VALID(mc)) - return; - - if (prom_getproplen(mc, "reg") != 8) - return; - - prom_getprop(prom.root, "#address-cells", &ac, sizeof(ac)); - prom_getprop(prom.root, "#size-cells", &sc, sizeof(sc)); - if ((ac != 2) || (sc != 2)) - return; - - if (prom_getprop(mc, "reg", mc_reg, sizeof(mc_reg)) == PROM_ERROR) - return; - - if (mc_reg[0] != CPC925_MC_START || mc_reg[1] != CPC925_MC_LENGTH) - return; - - prom_printf("Fixing up bogus hostbridge on Maple...\n"); - - mc_reg[0] = 0x0; - mc_reg[1] = CPC925_MC_START; - mc_reg[2] = 0x0; - mc_reg[3] = CPC925_MC_LENGTH; - prom_setprop(mc, name, "reg", mc_reg, sizeof(mc_reg)); -} -#else -#define fixup_device_tree_maple() -#define fixup_device_tree_maple_memory_controller() -#endif - #ifdef CONFIG_PPC_CHRP /* * Pegasos and BriQ lacks the "ranges" property in the isa node @@ -3193,8 +3109,6 @@ static inline void fixup_device_tree_pasemi(void) { } static void __init fixup_device_tree(void) { - fixup_device_tree_maple(); - fixup_device_tree_maple_memory_controller(); fixup_device_tree_chrp(); fixup_device_tree_pmac(); fixup_device_tree_efika(); diff --git a/arch/powerpc/kernel/udbg.c b/arch/powerpc/kernel/udbg.c index 4b99208f5adcd..0a72a537f879e 100644 --- a/arch/powerpc/kernel/udbg.c +++ b/arch/powerpc/kernel/udbg.c @@ -39,9 +39,6 @@ void __init udbg_early_init(void) #elif defined(CONFIG_PPC_EARLY_DEBUG_RTAS_CONSOLE) /* RTAS console debug */ udbg_init_rtas_console(); -#elif defined(CONFIG_PPC_EARLY_DEBUG_MAPLE) - /* Maple real mode debug */ - udbg_init_maple_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_PAS_REALMODE) udbg_init_pas_realmode(); #elif defined(CONFIG_PPC_EARLY_DEBUG_BOOTX) diff --git a/arch/powerpc/kernel/udbg_16550.c b/arch/powerpc/kernel/udbg_16550.c index 313802aff5713..dfe8ed2192e8e 100644 --- a/arch/powerpc/kernel/udbg_16550.c +++ b/arch/powerpc/kernel/udbg_16550.c @@ -205,29 +205,6 @@ void __init udbg_uart_init_mmio(void __iomem *addr, unsigned int stride) udbg_use_uart(); } -#ifdef CONFIG_PPC_MAPLE - -#define UDBG_UART_MAPLE_ADDR ((void __iomem *)0xf40003f8) - -static u8 udbg_uart_in_maple(unsigned int reg) -{ - return real_readb(UDBG_UART_MAPLE_ADDR + reg); -} - -static void udbg_uart_out_maple(unsigned int reg, u8 val) -{ - real_writeb(val, UDBG_UART_MAPLE_ADDR + reg); -} - -void __init udbg_init_maple_realmode(void) -{ - udbg_uart_in = udbg_uart_in_maple; - udbg_uart_out = udbg_uart_out_maple; - udbg_use_uart(); -} - -#endif /* CONFIG_PPC_MAPLE */ - #ifdef CONFIG_PPC_PASEMI #define UDBG_UART_PAS_ADDR ((void __iomem *)0xfcff03f8UL) diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index 1112a58316191..a454149ae02fc 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -7,7 +7,6 @@ source "arch/powerpc/platforms/chrp/Kconfig" source "arch/powerpc/platforms/512x/Kconfig" source "arch/powerpc/platforms/52xx/Kconfig" source "arch/powerpc/platforms/powermac/Kconfig" -source "arch/powerpc/platforms/maple/Kconfig" source "arch/powerpc/platforms/pasemi/Kconfig" source "arch/powerpc/platforms/ps3/Kconfig" source "arch/powerpc/platforms/cell/Kconfig" diff --git a/arch/powerpc/platforms/Makefile b/arch/powerpc/platforms/Makefile index 786d374bff317..3cee4a842736d 100644 --- a/arch/powerpc/platforms/Makefile +++ b/arch/powerpc/platforms/Makefile @@ -14,7 +14,6 @@ obj-$(CONFIG_FSL_SOC_BOOKE) += 85xx/ obj-$(CONFIG_PPC_86xx) += 86xx/ obj-$(CONFIG_PPC_POWERNV) += powernv/ obj-$(CONFIG_PPC_PSERIES) += pseries/ -obj-$(CONFIG_PPC_MAPLE) += maple/ obj-$(CONFIG_PPC_PASEMI) += pasemi/ obj-$(CONFIG_PPC_CELL) += cell/ obj-$(CONFIG_PPC_PS3) += ps3/ diff --git a/arch/powerpc/platforms/maple/Kconfig b/arch/powerpc/platforms/maple/Kconfig deleted file mode 100644 index 4c058cc57c901..0000000000000 --- a/arch/powerpc/platforms/maple/Kconfig +++ /dev/null @@ -1,19 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0 -config PPC_MAPLE - depends on PPC64 && PPC_BOOK3S && CPU_BIG_ENDIAN - bool "Maple 970FX Evaluation Board" - select FORCE_PCI - select MPIC - select U3_DART - select MPIC_U3_HT_IRQS - select GENERIC_TBSYNC - select PPC_UDBG_16550 - select PPC_970_NAP - select PPC_64S_HASH_MMU - select PPC_HASH_MMU_NATIVE - select PPC_RTAS - select MMIO_NVRAM - select ATA_NONSTANDARD if ATA - help - This option enables support for the Maple 970FX Evaluation Board. - For more information, refer to diff --git a/arch/powerpc/platforms/maple/Makefile b/arch/powerpc/platforms/maple/Makefile deleted file mode 100644 index 19f35ab828a7e..0000000000000 --- a/arch/powerpc/platforms/maple/Makefile +++ /dev/null @@ -1,2 +0,0 @@ -# SPDX-License-Identifier: GPL-2.0-only -obj-y += setup.o pci.o time.o diff --git a/arch/powerpc/platforms/maple/maple.h b/arch/powerpc/platforms/maple/maple.h deleted file mode 100644 index 8ddbaa4ebd0b4..0000000000000 --- a/arch/powerpc/platforms/maple/maple.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Declarations for maple-specific code. - * - * Maple is the name of a PPC970 evaluation board. - */ -extern int maple_set_rtc_time(struct rtc_time *tm); -extern void maple_get_rtc_time(struct rtc_time *tm); -extern time64_t maple_get_boot_time(void); -extern void maple_pci_init(void); -extern void maple_pci_irq_fixup(struct pci_dev *dev); -extern int maple_pci_get_legacy_ide_irq(struct pci_dev *dev, int channel); - -extern struct pci_controller_ops maple_pci_controller_ops; diff --git a/arch/powerpc/platforms/maple/pci.c b/arch/powerpc/platforms/maple/pci.c deleted file mode 100644 index b9ff37c7f6f01..0000000000000 --- a/arch/powerpc/platforms/maple/pci.c +++ /dev/null @@ -1,672 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Copyright (C) 2004 Benjamin Herrenschmuidt (benh@kernel.crashing.org), - * IBM Corp. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include - -#include "maple.h" - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -static struct pci_controller *u3_agp, *u3_ht, *u4_pcie; - -static int __init fixup_one_level_bus_range(struct device_node *node, int higher) -{ - for (; node; node = node->sibling) { - const int *bus_range; - const unsigned int *class_code; - int len; - - /* For PCI<->PCI bridges or CardBus bridges, we go down */ - class_code = of_get_property(node, "class-code", NULL); - if (!class_code || ((*class_code >> 8) != PCI_CLASS_BRIDGE_PCI && - (*class_code >> 8) != PCI_CLASS_BRIDGE_CARDBUS)) - continue; - bus_range = of_get_property(node, "bus-range", &len); - if (bus_range != NULL && len > 2 * sizeof(int)) { - if (bus_range[1] > higher) - higher = bus_range[1]; - } - higher = fixup_one_level_bus_range(node->child, higher); - } - return higher; -} - -/* This routine fixes the "bus-range" property of all bridges in the - * system since they tend to have their "last" member wrong on macs - * - * Note that the bus numbers manipulated here are OF bus numbers, they - * are not Linux bus numbers. - */ -static void __init fixup_bus_range(struct device_node *bridge) -{ - int *bus_range; - struct property *prop; - int len; - - /* Lookup the "bus-range" property for the hose */ - prop = of_find_property(bridge, "bus-range", &len); - if (prop == NULL || prop->value == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %pOF\n", - bridge); - return; - } - bus_range = prop->value; - bus_range[1] = fixup_one_level_bus_range(bridge->child, bus_range[1]); -} - - -static unsigned long u3_agp_cfa0(u8 devfn, u8 off) -{ - return (1 << (unsigned long)PCI_SLOT(devfn)) | - ((unsigned long)PCI_FUNC(devfn) << 8) | - ((unsigned long)off & 0xFCUL); -} - -static unsigned long u3_agp_cfa1(u8 bus, u8 devfn, u8 off) -{ - return ((unsigned long)bus << 16) | - ((unsigned long)devfn << 8) | - ((unsigned long)off & 0xFCUL) | - 1UL; -} - -static volatile void __iomem *u3_agp_cfg_access(struct pci_controller* hose, - u8 bus, u8 dev_fn, u8 offset) -{ - unsigned int caddr; - - if (bus == hose->first_busno) { - if (dev_fn < (11 << 3)) - return NULL; - caddr = u3_agp_cfa0(dev_fn, offset); - } else - caddr = u3_agp_cfa1(bus, dev_fn, offset); - - /* Uninorth will return garbage if we don't read back the value ! */ - do { - out_le32(hose->cfg_addr, caddr); - } while (in_le32(hose->cfg_addr) != caddr); - - offset &= 0x07; - return hose->cfg_data + offset; -} - -static int u3_agp_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int u3_agp_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - addr = u3_agp_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u3_agp_pci_ops = -{ - .read = u3_agp_read_config, - .write = u3_agp_write_config, -}; - -static unsigned long u3_ht_cfa0(u8 devfn, u8 off) -{ - return (devfn << 8) | off; -} - -static unsigned long u3_ht_cfa1(u8 bus, u8 devfn, u8 off) -{ - return u3_ht_cfa0(devfn, off) + (bus << 16) + 0x01000000UL; -} - -static volatile void __iomem *u3_ht_cfg_access(struct pci_controller* hose, - u8 bus, u8 devfn, u8 offset) -{ - if (bus == hose->first_busno) { - if (PCI_SLOT(devfn) == 0) - return NULL; - return hose->cfg_data + u3_ht_cfa0(devfn, offset); - } else - return hose->cfg_data + u3_ht_cfa1(bus, devfn, offset); -} - -static int u3_ht_root_read_config(struct pci_controller *hose, u8 offset, - int len, u32 *val) -{ - volatile void __iomem *addr; - - addr = hose->cfg_addr; - addr += ((offset & ~3) << 2) + (4 - len - (offset & 3)); - - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_be16(addr); - break; - default: - *val = in_be32(addr); - break; - } - - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_root_write_config(struct pci_controller *hose, u8 offset, - int len, u32 val) -{ - volatile void __iomem *addr; - - addr = hose->cfg_addr + ((offset & ~3) << 2) + (4 - len - (offset & 3)); - - if (offset >= PCI_BASE_ADDRESS_0 && offset < PCI_CAPABILITY_LIST) - return PCIBIOS_SUCCESSFUL; - - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_be16(addr, val); - break; - default: - out_be32(addr, val); - break; - } - - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) - return u3_ht_root_read_config(hose, offset, len, val); - - if (offset > 0xff) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static int u3_ht_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - - if (bus->number == hose->first_busno && devfn == PCI_DEVFN(0, 0)) - return u3_ht_root_write_config(hose, offset, len, val); - - if (offset > 0xff) - return PCIBIOS_BAD_REGISTER_NUMBER; - - addr = u3_ht_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u3_ht_pci_ops = -{ - .read = u3_ht_read_config, - .write = u3_ht_write_config, -}; - -static unsigned int u4_pcie_cfa0(unsigned int devfn, unsigned int off) -{ - return (1 << PCI_SLOT(devfn)) | - (PCI_FUNC(devfn) << 8) | - ((off >> 8) << 28) | - (off & 0xfcu); -} - -static unsigned int u4_pcie_cfa1(unsigned int bus, unsigned int devfn, - unsigned int off) -{ - return (bus << 16) | - (devfn << 8) | - ((off >> 8) << 28) | - (off & 0xfcu) | 1u; -} - -static volatile void __iomem *u4_pcie_cfg_access(struct pci_controller* hose, - u8 bus, u8 dev_fn, int offset) -{ - unsigned int caddr; - - if (bus == hose->first_busno) - caddr = u4_pcie_cfa0(dev_fn, offset); - else - caddr = u4_pcie_cfa1(bus, dev_fn, offset); - - /* Uninorth will return garbage if we don't read back the value ! */ - do { - out_le32(hose->cfg_addr, caddr); - } while (in_le32(hose->cfg_addr) != caddr); - - offset &= 0x03; - return hose->cfg_data + offset; -} - -static int u4_pcie_read_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 *val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - *val = in_8(addr); - break; - case 2: - *val = in_le16(addr); - break; - default: - *val = in_le32(addr); - break; - } - return PCIBIOS_SUCCESSFUL; -} -static int u4_pcie_write_config(struct pci_bus *bus, unsigned int devfn, - int offset, int len, u32 val) -{ - struct pci_controller *hose; - volatile void __iomem *addr; - - hose = pci_bus_to_host(bus); - if (hose == NULL) - return PCIBIOS_DEVICE_NOT_FOUND; - if (offset >= 0x1000) - return PCIBIOS_BAD_REGISTER_NUMBER; - addr = u4_pcie_cfg_access(hose, bus->number, devfn, offset); - if (!addr) - return PCIBIOS_DEVICE_NOT_FOUND; - /* - * Note: the caller has already checked that offset is - * suitably aligned and that len is 1, 2 or 4. - */ - switch (len) { - case 1: - out_8(addr, val); - break; - case 2: - out_le16(addr, val); - break; - default: - out_le32(addr, val); - break; - } - return PCIBIOS_SUCCESSFUL; -} - -static struct pci_ops u4_pcie_pci_ops = -{ - .read = u4_pcie_read_config, - .write = u4_pcie_write_config, -}; - -static void __init setup_u3_agp(struct pci_controller* hose) -{ - /* On G5, we move AGP up to high bus number so we don't need - * to reassign bus numbers for HT. If we ever have P2P bridges - * on AGP, we'll have to move pci_assign_all_buses to the - * pci_controller structure so we enable it for AGP and not for - * HT childs. - * We hard code the address because of the different size of - * the reg address cell, we shall fix that by killing struct - * reg_property and using some accessor functions instead - */ - hose->first_busno = 0xf0; - hose->last_busno = 0xff; - hose->ops = &u3_agp_pci_ops; - hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); - hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); - - u3_agp = hose; -} - -static void __init setup_u4_pcie(struct pci_controller* hose) -{ - /* We currently only implement the "non-atomic" config space, to - * be optimised later. - */ - hose->ops = &u4_pcie_pci_ops; - hose->cfg_addr = ioremap(0xf0000000 + 0x800000, 0x1000); - hose->cfg_data = ioremap(0xf0000000 + 0xc00000, 0x1000); - - u4_pcie = hose; -} - -static void __init setup_u3_ht(struct pci_controller* hose) -{ - hose->ops = &u3_ht_pci_ops; - - /* We hard code the address because of the different size of - * the reg address cell, we shall fix that by killing struct - * reg_property and using some accessor functions instead - */ - hose->cfg_data = ioremap(0xf2000000, 0x02000000); - hose->cfg_addr = ioremap(0xf8070000, 0x1000); - - hose->first_busno = 0; - hose->last_busno = 0xef; - - u3_ht = hose; -} - -static int __init maple_add_bridge(struct device_node *dev) -{ - int len; - struct pci_controller *hose; - char* disp_name; - const int *bus_range; - int primary = 1; - - DBG("Adding PCI host bridge %pOF\n", dev); - - bus_range = of_get_property(dev, "bus-range", &len); - if (bus_range == NULL || len < 2 * sizeof(int)) { - printk(KERN_WARNING "Can't get bus-range for %pOF, assume bus 0\n", - dev); - } - - hose = pcibios_alloc_controller(dev); - if (hose == NULL) - return -ENOMEM; - hose->first_busno = bus_range ? bus_range[0] : 0; - hose->last_busno = bus_range ? bus_range[1] : 0xff; - hose->controller_ops = maple_pci_controller_ops; - - disp_name = NULL; - if (of_device_is_compatible(dev, "u3-agp")) { - setup_u3_agp(hose); - disp_name = "U3-AGP"; - primary = 0; - } else if (of_device_is_compatible(dev, "u3-ht")) { - setup_u3_ht(hose); - disp_name = "U3-HT"; - primary = 1; - } else if (of_device_is_compatible(dev, "u4-pcie")) { - setup_u4_pcie(hose); - disp_name = "U4-PCIE"; - primary = 0; - } - printk(KERN_INFO "Found %s PCI host bridge. Firmware bus number: %d->%d\n", - disp_name, hose->first_busno, hose->last_busno); - - /* Interpret the "ranges" property */ - /* This also maps the I/O region and sets isa_io/mem_base */ - pci_process_bridge_OF_ranges(hose, dev, primary); - - /* Fixup "bus-range" OF property */ - fixup_bus_range(dev); - - /* Check for legacy IOs */ - isa_bridge_find_early(hose); - - /* create pci_dn's for DT nodes under this PHB */ - pci_devs_phb_init_dynamic(hose); - - return 0; -} - - -void maple_pci_irq_fixup(struct pci_dev *dev) -{ - DBG(" -> maple_pci_irq_fixup\n"); - - /* Fixup IRQ for PCIe host */ - if (u4_pcie != NULL && dev->bus->number == 0 && - pci_bus_to_host(dev->bus) == u4_pcie) { - printk(KERN_DEBUG "Fixup U4 PCIe IRQ\n"); - dev->irq = irq_create_mapping(NULL, 1); - if (dev->irq) - irq_set_irq_type(dev->irq, IRQ_TYPE_LEVEL_LOW); - } - - /* Hide AMD8111 IDE interrupt when in legacy mode so - * the driver calls pci_get_legacy_ide_irq() - */ - if (dev->vendor == PCI_VENDOR_ID_AMD && - dev->device == PCI_DEVICE_ID_AMD_8111_IDE && - (dev->class & 5) != 5) { - dev->irq = 0; - } - - DBG(" <- maple_pci_irq_fixup\n"); -} - -static int maple_pci_root_bridge_prepare(struct pci_host_bridge *bridge) -{ - struct pci_controller *hose = pci_bus_to_host(bridge->bus); - struct device_node *np, *child; - - if (hose != u3_agp) - return 0; - - /* Fixup the PCI<->OF mapping for U3 AGP due to bus renumbering. We - * assume there is no P2P bridge on the AGP bus, which should be a - * safe assumptions hopefully. - */ - np = hose->dn; - PCI_DN(np)->busno = 0xf0; - for_each_child_of_node(np, child) - PCI_DN(child)->busno = 0xf0; - - return 0; -} - -void __init maple_pci_init(void) -{ - struct device_node *np, *root; - struct device_node *ht = NULL; - - /* Probe root PCI hosts, that is on U3 the AGP host and the - * HyperTransport host. That one is actually "kept" around - * and actually added last as its resource management relies - * on the AGP resources to have been setup first - */ - root = of_find_node_by_path("/"); - if (root == NULL) { - printk(KERN_CRIT "maple_find_bridges: can't find root of device tree\n"); - return; - } - for_each_child_of_node(root, np) { - if (!of_node_is_type(np, "pci") && !of_node_is_type(np, "ht")) - continue; - if ((of_device_is_compatible(np, "u4-pcie") || - of_device_is_compatible(np, "u3-agp")) && - maple_add_bridge(np) == 0) - of_node_get(np); - - if (of_device_is_compatible(np, "u3-ht")) { - of_node_get(np); - ht = np; - } - } - of_node_put(root); - - /* Now setup the HyperTransport host if we found any - */ - if (ht && maple_add_bridge(ht) != 0) - of_node_put(ht); - - ppc_md.pcibios_root_bridge_prepare = maple_pci_root_bridge_prepare; - - /* Tell pci.c to not change any resource allocations. */ - pci_add_flags(PCI_PROBE_ONLY); -} - -int maple_pci_get_legacy_ide_irq(struct pci_dev *pdev, int channel) -{ - struct device_node *np; - unsigned int defirq = channel ? 15 : 14; - unsigned int irq; - - if (pdev->vendor != PCI_VENDOR_ID_AMD || - pdev->device != PCI_DEVICE_ID_AMD_8111_IDE) - return defirq; - - np = pci_device_to_OF_node(pdev); - if (np == NULL) { - printk("Failed to locate OF node for IDE %s\n", - pci_name(pdev)); - return defirq; - } - irq = irq_of_parse_and_map(np, channel & 0x1); - if (!irq) { - printk("Failed to map onboard IDE interrupt for channel %d\n", - channel); - return defirq; - } - return irq; -} - -static void quirk_ipr_msi(struct pci_dev *dev) -{ - /* Something prevents MSIs from the IPR from working on Bimini, - * and the driver has no smarts to recover. So disable MSI - * on it for now. */ - - if (machine_is(maple)) { - dev->no_msi = 1; - dev_info(&dev->dev, "Quirk disabled MSI\n"); - } -} -DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_IBM, PCI_DEVICE_ID_IBM_OBSIDIAN, - quirk_ipr_msi); - -struct pci_controller_ops maple_pci_controller_ops = { -}; diff --git a/arch/powerpc/platforms/maple/setup.c b/arch/powerpc/platforms/maple/setup.c deleted file mode 100644 index f329a03edf4a6..0000000000000 --- a/arch/powerpc/platforms/maple/setup.c +++ /dev/null @@ -1,363 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * Maple (970 eval board) setup code - * - * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), - * IBM Corp. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include "maple.h" - -#ifdef DEBUG -#define DBG(fmt...) udbg_printf(fmt) -#else -#define DBG(fmt...) -#endif - -static unsigned long maple_find_nvram_base(void) -{ - struct device_node *rtcs; - unsigned long result = 0; - - /* find NVRAM device */ - rtcs = of_find_compatible_node(NULL, "nvram", "AMD8111"); - if (rtcs) { - struct resource r; - if (of_address_to_resource(rtcs, 0, &r)) { - printk(KERN_EMERG "Maple: Unable to translate NVRAM" - " address\n"); - goto bail; - } - if (!(r.flags & IORESOURCE_IO)) { - printk(KERN_EMERG "Maple: NVRAM address isn't PIO!\n"); - goto bail; - } - result = r.start; - } else - printk(KERN_EMERG "Maple: Unable to find NVRAM\n"); - bail: - of_node_put(rtcs); - return result; -} - -static void __noreturn maple_restart(char *cmd) -{ - unsigned int maple_nvram_base; - const unsigned int *maple_nvram_offset, *maple_nvram_command; - struct device_node *sp; - - maple_nvram_base = maple_find_nvram_base(); - if (maple_nvram_base == 0) - goto fail; - - /* find service processor device */ - sp = of_find_node_by_name(NULL, "service-processor"); - if (!sp) { - printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); - goto fail; - } - maple_nvram_offset = of_get_property(sp, "restart-addr", NULL); - maple_nvram_command = of_get_property(sp, "restart-value", NULL); - of_node_put(sp); - - /* send command */ - outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); - for (;;) ; - fail: - printk(KERN_EMERG "Maple: Manual Restart Required\n"); - for (;;) ; -} - -static void __noreturn maple_power_off(void) -{ - unsigned int maple_nvram_base; - const unsigned int *maple_nvram_offset, *maple_nvram_command; - struct device_node *sp; - - maple_nvram_base = maple_find_nvram_base(); - if (maple_nvram_base == 0) - goto fail; - - /* find service processor device */ - sp = of_find_node_by_name(NULL, "service-processor"); - if (!sp) { - printk(KERN_EMERG "Maple: Unable to find Service Processor\n"); - goto fail; - } - maple_nvram_offset = of_get_property(sp, "power-off-addr", NULL); - maple_nvram_command = of_get_property(sp, "power-off-value", NULL); - of_node_put(sp); - - /* send command */ - outb_p(*maple_nvram_command, maple_nvram_base + *maple_nvram_offset); - for (;;) ; - fail: - printk(KERN_EMERG "Maple: Manual Power-Down Required\n"); - for (;;) ; -} - -static void __noreturn maple_halt(void) -{ - maple_power_off(); -} - -#ifdef CONFIG_SMP -static struct smp_ops_t maple_smp_ops = { - .probe = smp_mpic_probe, - .message_pass = smp_mpic_message_pass, - .kick_cpu = smp_generic_kick_cpu, - .setup_cpu = smp_mpic_setup_cpu, - .give_timebase = smp_generic_give_timebase, - .take_timebase = smp_generic_take_timebase, -}; -#endif /* CONFIG_SMP */ - -static void __init maple_use_rtas_reboot_and_halt_if_present(void) -{ - if (rtas_function_implemented(RTAS_FN_SYSTEM_REBOOT) && - rtas_function_implemented(RTAS_FN_POWER_OFF)) { - ppc_md.restart = rtas_restart; - pm_power_off = rtas_power_off; - ppc_md.halt = rtas_halt; - } -} - -static void __init maple_setup_arch(void) -{ - /* init to some ~sane value until calibrate_delay() runs */ - loops_per_jiffy = 50000000; - - /* Setup SMP callback */ -#ifdef CONFIG_SMP - smp_ops = &maple_smp_ops; -#endif - maple_use_rtas_reboot_and_halt_if_present(); - - printk(KERN_DEBUG "Using native/NAP idle loop\n"); - - mmio_nvram_init(); -} - -/* - * This is almost identical to pSeries and CHRP. We need to make that - * code generic at one point, with appropriate bits in the device-tree to - * identify the presence of an HT APIC - */ -static void __init maple_init_IRQ(void) -{ - struct device_node *root, *np, *mpic_node = NULL; - const unsigned int *opprop; - unsigned long openpic_addr = 0; - int naddr, n, i, opplen, has_isus = 0; - struct mpic *mpic; - unsigned int flags = 0; - - /* Locate MPIC in the device-tree. Note that there is a bug - * in Maple device-tree where the type of the controller is - * open-pic and not interrupt-controller - */ - - for_each_node_by_type(np, "interrupt-controller") - if (of_device_is_compatible(np, "open-pic")) { - mpic_node = np; - break; - } - if (mpic_node == NULL) - for_each_node_by_type(np, "open-pic") { - mpic_node = np; - break; - } - if (mpic_node == NULL) { - printk(KERN_ERR - "Failed to locate the MPIC interrupt controller\n"); - return; - } - - /* Find address list in /platform-open-pic */ - root = of_find_node_by_path("/"); - naddr = of_n_addr_cells(root); - opprop = of_get_property(root, "platform-open-pic", &opplen); - if (opprop) { - openpic_addr = of_read_number(opprop, naddr); - has_isus = (opplen > naddr); - printk(KERN_DEBUG "OpenPIC addr: %lx, has ISUs: %d\n", - openpic_addr, has_isus); - } - - BUG_ON(openpic_addr == 0); - - /* Check for a big endian MPIC */ - if (of_property_read_bool(np, "big-endian")) - flags |= MPIC_BIG_ENDIAN; - - /* XXX Maple specific bits */ - flags |= MPIC_U3_HT_IRQS; - /* All U3/U4 are big-endian, older SLOF firmware doesn't encode this */ - flags |= MPIC_BIG_ENDIAN; - - /* Setup the openpic driver. More device-tree junks, we hard code no - * ISUs for now. I'll have to revisit some stuffs with the folks doing - * the firmware for those - */ - mpic = mpic_alloc(mpic_node, openpic_addr, flags, - /*has_isus ? 16 :*/ 0, 0, " MPIC "); - BUG_ON(mpic == NULL); - - /* Add ISUs */ - opplen /= sizeof(u32); - for (n = 0, i = naddr; i < opplen; i += naddr, n++) { - unsigned long isuaddr = of_read_number(opprop + i, naddr); - mpic_assign_isu(mpic, n, isuaddr); - } - - /* All ISUs are setup, complete initialization */ - mpic_init(mpic); - ppc_md.get_irq = mpic_get_irq; - of_node_put(mpic_node); - of_node_put(root); -} - -static void __init maple_progress(char *s, unsigned short hex) -{ - printk("*** %04x : %s\n", hex, s ? s : ""); -} - - -/* - * Called very early, MMU is off, device-tree isn't unflattened - */ -static int __init maple_probe(void) -{ - if (!of_machine_is_compatible("Momentum,Maple") && - !of_machine_is_compatible("Momentum,Apache")) - return 0; - - pm_power_off = maple_power_off; - - iommu_init_early_dart(&maple_pci_controller_ops); - - return 1; -} - -#ifdef CONFIG_EDAC -/* - * Register a platform device for CPC925 memory controller on - * all boards with U3H (CPC925) bridge. - */ -static int __init maple_cpc925_edac_setup(void) -{ - struct platform_device *pdev; - struct device_node *np = NULL; - struct resource r; - int ret; - volatile void __iomem *mem; - u32 rev; - - np = of_find_node_by_type(NULL, "memory-controller"); - if (!np) { - printk(KERN_ERR "%s: Unable to find memory-controller node\n", - __func__); - return -ENODEV; - } - - ret = of_address_to_resource(np, 0, &r); - of_node_put(np); - - if (ret < 0) { - printk(KERN_ERR "%s: Unable to get memory-controller reg\n", - __func__); - return -ENODEV; - } - - mem = ioremap(r.start, resource_size(&r)); - if (!mem) { - printk(KERN_ERR "%s: Unable to map memory-controller memory\n", - __func__); - return -ENOMEM; - } - - rev = __raw_readl(mem); - iounmap(mem); - - if (rev < 0x34 || rev > 0x3f) { /* U3H */ - printk(KERN_ERR "%s: Non-CPC925(U3H) bridge revision: %02x\n", - __func__, rev); - return 0; - } - - pdev = platform_device_register_simple("cpc925_edac", 0, &r, 1); - if (IS_ERR(pdev)) - return PTR_ERR(pdev); - - printk(KERN_INFO "%s: CPC925 platform device created\n", __func__); - - return 0; -} -machine_device_initcall(maple, maple_cpc925_edac_setup); -#endif - -define_machine(maple) { - .name = "Maple", - .probe = maple_probe, - .setup_arch = maple_setup_arch, - .discover_phbs = maple_pci_init, - .init_IRQ = maple_init_IRQ, - .pci_irq_fixup = maple_pci_irq_fixup, - .pci_get_legacy_ide_irq = maple_pci_get_legacy_ide_irq, - .restart = maple_restart, - .halt = maple_halt, - .get_boot_time = maple_get_boot_time, - .set_rtc_time = maple_set_rtc_time, - .get_rtc_time = maple_get_rtc_time, - .progress = maple_progress, - .power_save = power4_idle, -}; diff --git a/arch/powerpc/platforms/maple/time.c b/arch/powerpc/platforms/maple/time.c deleted file mode 100644 index 91606411d2e08..0000000000000 --- a/arch/powerpc/platforms/maple/time.c +++ /dev/null @@ -1,170 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later -/* - * (c) Copyright 2004 Benjamin Herrenschmidt (benh@kernel.crashing.org), - * IBM Corp. - */ - -#undef DEBUG - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#include -#include -#include -#include - -#include "maple.h" - -#ifdef DEBUG -#define DBG(x...) printk(x) -#else -#define DBG(x...) -#endif - -static int maple_rtc_addr; - -static int maple_clock_read(int addr) -{ - outb_p(addr, maple_rtc_addr); - return inb_p(maple_rtc_addr+1); -} - -static void maple_clock_write(unsigned long val, int addr) -{ - outb_p(addr, maple_rtc_addr); - outb_p(val, maple_rtc_addr+1); -} - -void maple_get_rtc_time(struct rtc_time *tm) -{ - do { - tm->tm_sec = maple_clock_read(RTC_SECONDS); - tm->tm_min = maple_clock_read(RTC_MINUTES); - tm->tm_hour = maple_clock_read(RTC_HOURS); - tm->tm_mday = maple_clock_read(RTC_DAY_OF_MONTH); - tm->tm_mon = maple_clock_read(RTC_MONTH); - tm->tm_year = maple_clock_read(RTC_YEAR); - } while (tm->tm_sec != maple_clock_read(RTC_SECONDS)); - - if (!(maple_clock_read(RTC_CONTROL) & RTC_DM_BINARY) - || RTC_ALWAYS_BCD) { - tm->tm_sec = bcd2bin(tm->tm_sec); - tm->tm_min = bcd2bin(tm->tm_min); - tm->tm_hour = bcd2bin(tm->tm_hour); - tm->tm_mday = bcd2bin(tm->tm_mday); - tm->tm_mon = bcd2bin(tm->tm_mon); - tm->tm_year = bcd2bin(tm->tm_year); - } - if ((tm->tm_year + 1900) < 1970) - tm->tm_year += 100; - - tm->tm_wday = -1; -} - -int maple_set_rtc_time(struct rtc_time *tm) -{ - unsigned char save_control, save_freq_select; - int sec, min, hour, mon, mday, year; - - spin_lock(&rtc_lock); - - save_control = maple_clock_read(RTC_CONTROL); /* tell the clock it's being set */ - - maple_clock_write((save_control|RTC_SET), RTC_CONTROL); - - save_freq_select = maple_clock_read(RTC_FREQ_SELECT); /* stop and reset prescaler */ - - maple_clock_write((save_freq_select|RTC_DIV_RESET2), RTC_FREQ_SELECT); - - sec = tm->tm_sec; - min = tm->tm_min; - hour = tm->tm_hour; - mon = tm->tm_mon; - mday = tm->tm_mday; - year = tm->tm_year; - - if (!(save_control & RTC_DM_BINARY) || RTC_ALWAYS_BCD) { - sec = bin2bcd(sec); - min = bin2bcd(min); - hour = bin2bcd(hour); - mon = bin2bcd(mon); - mday = bin2bcd(mday); - year = bin2bcd(year); - } - maple_clock_write(sec, RTC_SECONDS); - maple_clock_write(min, RTC_MINUTES); - maple_clock_write(hour, RTC_HOURS); - maple_clock_write(mon, RTC_MONTH); - maple_clock_write(mday, RTC_DAY_OF_MONTH); - maple_clock_write(year, RTC_YEAR); - - /* The following flags have to be released exactly in this order, - * otherwise the DS12887 (popular MC146818A clone with integrated - * battery and quartz) will not reset the oscillator and will not - * update precisely 500 ms later. You won't find this mentioned in - * the Dallas Semiconductor data sheets, but who believes data - * sheets anyway ... -- Markus Kuhn - */ - maple_clock_write(save_control, RTC_CONTROL); - maple_clock_write(save_freq_select, RTC_FREQ_SELECT); - - spin_unlock(&rtc_lock); - - return 0; -} - -static struct resource rtc_iores = { - .name = "rtc", - .flags = IORESOURCE_IO | IORESOURCE_BUSY, -}; - -time64_t __init maple_get_boot_time(void) -{ - struct rtc_time tm; - struct device_node *rtcs; - - rtcs = of_find_compatible_node(NULL, "rtc", "pnpPNP,b00"); - if (rtcs) { - struct resource r; - if (of_address_to_resource(rtcs, 0, &r)) { - printk(KERN_EMERG "Maple: Unable to translate RTC" - " address\n"); - goto bail; - } - if (!(r.flags & IORESOURCE_IO)) { - printk(KERN_EMERG "Maple: RTC address isn't PIO!\n"); - goto bail; - } - maple_rtc_addr = r.start; - printk(KERN_INFO "Maple: Found RTC at IO 0x%x\n", - maple_rtc_addr); - } - bail: - of_node_put(rtcs); - if (maple_rtc_addr == 0) { - maple_rtc_addr = RTC_PORT(0); /* legacy address */ - printk(KERN_INFO "Maple: No device node for RTC, assuming " - "legacy address (0x%x)\n", maple_rtc_addr); - } - - rtc_iores.start = maple_rtc_addr; - rtc_iores.end = maple_rtc_addr + 7; - request_resource(&ioport_resource, &rtc_iores); - - maple_get_rtc_time(&tm); - return rtc_tm_to_time64(&tm); -} - From 0b9846529e29ba988ce88b98df633de79675fcb3 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:34 +0530 Subject: [PATCH 08/69] powerpc/trace: Account for -fpatchable-function-entry support by toolchain So far, we have relied on the fact that gcc supports both -mprofile-kernel, as well as -fpatchable-function-entry, and clang supports neither. Our Makefile only checks for CONFIG_MPROFILE_KERNEL to decide which files to build. Clang has a feature request out [*] to implement -fpatchable-function-entry, and is unlikely to support -mprofile-kernel. Update our Makefile checks so that we pick up the correct files to build once clang picks up support for -fpatchable-function-entry. [*] https://github.com/llvm/llvm-project/issues/57031 Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-2-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/Makefile | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/trace/Makefile b/arch/powerpc/kernel/trace/Makefile index 125f4ca588b98..d6c3885453bda 100644 --- a/arch/powerpc/kernel/trace/Makefile +++ b/arch/powerpc/kernel/trace/Makefile @@ -9,12 +9,15 @@ CFLAGS_REMOVE_ftrace.o = $(CC_FLAGS_FTRACE) CFLAGS_REMOVE_ftrace_64_pg.o = $(CC_FLAGS_FTRACE) endif -obj32-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o -ifdef CONFIG_MPROFILE_KERNEL -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace.o ftrace_entry.o +ifdef CONFIG_FUNCTION_TRACER +obj32-y += ftrace.o ftrace_entry.o +ifeq ($(CONFIG_MPROFILE_KERNEL)$(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY),) +obj64-y += ftrace_64_pg.o ftrace_64_pg_entry.o else -obj64-$(CONFIG_FUNCTION_TRACER) += ftrace_64_pg.o ftrace_64_pg_entry.o +obj64-y += ftrace.o ftrace_entry.o +endif endif + obj-$(CONFIG_TRACING) += trace_clock.o obj-$(CONFIG_PPC64) += $(obj64-y) From be87d713eaddf0421ccd61cc060c4c29bc36fc9b Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:35 +0530 Subject: [PATCH 09/69] powerpc/kprobes: Use ftrace to determine if a probe is at function entry Rather than hard-coding the offset into a function to be used to determine if a kprobe is at function entry, use ftrace_location() to determine the ftrace location within the function and categorize all instructions till that offset to be function entry. For functions that cannot be traced, we fall back to using a fixed offset of 8 (two instructions) to categorize a probe as being at function entry for 64-bit elfv2, unless we are using pcrel. Acked-by: Masami Hiramatsu (Google) Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-3-hbathini@linux.ibm.com --- arch/powerpc/kernel/kprobes.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index f8aa91bc3b175..bf382c459e1f0 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -105,24 +105,22 @@ kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset) return addr; } -static bool arch_kprobe_on_func_entry(unsigned long offset) +static bool arch_kprobe_on_func_entry(unsigned long addr, unsigned long offset) { -#ifdef CONFIG_PPC64_ELF_ABI_V2 -#ifdef CONFIG_KPROBES_ON_FTRACE - return offset <= 16; -#else - return offset <= 8; -#endif -#else + unsigned long ip = ftrace_location(addr); + + if (ip) + return offset <= (ip - addr); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + return offset <= 8; return !offset; -#endif } /* XXX try and fold the magic of kprobe_lookup_name() in this */ kprobe_opcode_t *arch_adjust_kprobe_addr(unsigned long addr, unsigned long offset, bool *on_func_entry) { - *on_func_entry = arch_kprobe_on_func_entry(offset); + *on_func_entry = arch_kprobe_on_func_entry(addr, offset); return (kprobe_opcode_t *)(addr + offset); } From 161d62c2b067c4071cb515efe16475171e1c051e Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:36 +0530 Subject: [PATCH 10/69] powerpc64/ftrace: Nop out additional 'std' instruction emitted by gcc v5.x Gcc v5.x emits a 3-instruction sequence for -mprofile-kernel: mflr r0 std r0, 16(r1) bl _mcount Gcc v6.x moved to a simpler 2-instruction sequence by removing the 'std' instruction. The store saved the return address in the LR save area in the caller stack frame for stack unwinding. However, with dynamic ftrace, we no longer have a call to _mcount on kernel boot when ftrace is not enabled. When ftrace is enabled, that store is performed within ftrace_caller(). As such, the additional 'std' instruction is redundant. Nop it out on kernel boot. With this change, we now use the same 2-instruction profiling sequence with both -mprofile-kernel, as well as -fpatchable-function-entry on 64-bit powerpc. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-4-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/ftrace.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index d8d6b4fd9a14c..2ef504700e8d7 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -246,8 +246,12 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ ret = ftrace_read_inst(ip - 4, &old); if (!ret && !ppc_inst_equal(old, ppc_inst(PPC_RAW_MFLR(_R0)))) { + /* Gcc v5.x emit the additional 'std' instruction, gcc v6.x don't */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - ret |= ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STD(_R0, _R1, 16)), + ppc_inst(PPC_RAW_NOP())); } } else { return -EINVAL; From 654b3fa61b817a46037197b73a7ac6d36d01df7e Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:37 +0530 Subject: [PATCH 11/69] powerpc32/ftrace: Unify 32-bit and 64-bit ftrace entry code On 32-bit powerpc, gcc generates a three instruction sequence for function profiling: mflr r0 stw r0, 4(r1) bl _mcount On kernel boot, the call to _mcount() is nop-ed out, to be patched back in when ftrace is actually enabled. The 'stw' instruction therefore is not necessary unless ftrace is enabled. Nop it out during ftrace init. When ftrace is enabled, we want the 'stw' so that stack unwinding works properly. Perform the same within the ftrace handler, similar to 64-bit powerpc. Reviewed-by: Nicholas Piggin Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-5-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/ftrace.c | 6 ++++-- arch/powerpc/kernel/trace/ftrace_entry.S | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 2ef504700e8d7..8c3e523e4f964 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -240,8 +240,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) } else if (IS_ENABLED(CONFIG_PPC32)) { /* Expected sequence: 'mflr r0', 'stw r0,4(r1)', 'bl _mcount' */ ret = ftrace_validate_inst(ip - 8, ppc_inst(PPC_RAW_MFLR(_R0))); - if (!ret) - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4))); + if (ret) + return ret; + ret = ftrace_modify_code(ip - 4, ppc_inst(PPC_RAW_STW(_R0, _R1, 4)), + ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_MPROFILE_KERNEL)) { /* Expected sequence: 'mflr r0', ['std r0,16(r1)'], 'bl _mcount' */ ret = ftrace_read_inst(ip - 4, &old); diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 76dbe9fd2c0f2..244a1c7bb1e8e 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -33,6 +33,8 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs + /* Save the original return address in A's stack frame */ + PPC_STL r0, LRSAVE(r1) /* Create a minimal stack frame for representing B */ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) @@ -44,8 +46,6 @@ SAVE_GPRS(3, 10, r1) #ifdef CONFIG_PPC64 - /* Save the original return address in A's stack frame */ - std r0, LRSAVE+SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE(r1) /* Ok to continue? */ lbz r3, PACA_FTRACE_ENABLED(r13) cmpdi r3, 0 From c12cfe9dee077763708e0a5cf3aca02a85b1e8ba Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:38 +0530 Subject: [PATCH 12/69] powerpc/module_64: Convert #ifdef to IS_ENABLED() Minor refactor for converting #ifdef to IS_ENABLED(). Reviewed-by: Nicholas Piggin Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-6-hbathini@linux.ibm.com --- arch/powerpc/kernel/module_64.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index e9bab599d0c27..1db88409bd955 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -241,14 +241,8 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, } } -#ifdef CONFIG_DYNAMIC_FTRACE - /* make the trampoline to the ftrace_caller */ - relocs++; -#ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS - /* an additional one for ftrace_regs_caller */ - relocs++; -#endif -#endif + /* stubs for ftrace_caller and ftrace_regs_caller */ + relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); From 8b0dc1305ea0bbb015b560193cdd76fd4100f062 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:39 +0530 Subject: [PATCH 13/69] powerpc/ftrace: Remove pointer to struct module from dyn_arch_ftrace Pointer to struct module is only relevant for ftrace records belonging to kernel modules. Having this field in dyn_arch_ftrace wastes memory for all ftrace records belonging to the kernel. Remove the same in favour of looking up the module from the ftrace record address, similar to other architectures. Reviewed-by: Nicholas Piggin Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-7-hbathini@linux.ibm.com --- arch/powerpc/include/asm/ftrace.h | 1 - arch/powerpc/kernel/trace/ftrace.c | 49 +++++++++-------- arch/powerpc/kernel/trace/ftrace_64_pg.c | 69 ++++++++++-------------- 3 files changed, 56 insertions(+), 63 deletions(-) diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 559560286e6d0..278d4548e8f10 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -24,7 +24,6 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, struct module; struct dyn_ftrace; struct dyn_arch_ftrace { - struct module *mod; }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 8c3e523e4f964..fe0546fbac8e1 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -106,28 +106,43 @@ static unsigned long find_ftrace_tramp(unsigned long ip) return 0; } +#ifdef CONFIG_MODULES +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + struct module *mod = NULL; + + preempt_disable(); + mod = __module_text_address(ip); + preempt_enable(); + + if (!mod) + pr_err("No module loaded at addr=%lx\n", ip); + + return (addr == (unsigned long)ftrace_caller ? mod->arch.tramp : mod->arch.tramp_regs); +} +#else +static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long addr) +{ + return 0; +} +#endif + static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) { unsigned long ip = rec->ip; unsigned long stub; - if (is_offset_in_branch_range(addr - ip)) { + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; -#ifdef CONFIG_MODULES - } else if (rec->arch.mod) { - /* Module code would be going to one of the module stubs */ - stub = (addr == (unsigned long)ftrace_caller ? rec->arch.mod->arch.tramp : - rec->arch.mod->arch.tramp_regs); -#endif - } else if (core_kernel_text(ip)) { + else if (core_kernel_text(ip)) /* We would be branching to one of our ftrace stubs */ stub = find_ftrace_tramp(ip); - if (!stub) { - pr_err("0x%lx: No ftrace stubs reachable\n", ip); - return -EINVAL; - } - } else { + else + stub = ftrace_lookup_module_stub(ip, addr); + + if (!stub) { + pr_err("0x%lx: No ftrace stubs reachable\n", ip); return -EINVAL; } @@ -262,14 +277,6 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) if (ret) return ret; - if (!core_kernel_text(ip)) { - if (!mod) { - pr_err("0x%lx: No module provided for non-kernel address\n", ip); - return -EFAULT; - } - rec->arch.mod = mod; - } - /* Nop-out the ftrace location */ new = ppc_inst(PPC_RAW_NOP()); addr = MCOUNT_ADDR; diff --git a/arch/powerpc/kernel/trace/ftrace_64_pg.c b/arch/powerpc/kernel/trace/ftrace_64_pg.c index 12fab1803bcf4..8a551dfca3d04 100644 --- a/arch/powerpc/kernel/trace/ftrace_64_pg.c +++ b/arch/powerpc/kernel/trace/ftrace_64_pg.c @@ -116,6 +116,20 @@ static unsigned long find_bl_target(unsigned long ip, ppc_inst_t op) } #ifdef CONFIG_MODULES +static struct module *ftrace_lookup_module(struct dyn_ftrace *rec) +{ + struct module *mod; + + preempt_disable(); + mod = __module_text_address(rec->ip); + preempt_enable(); + + if (!mod) + pr_err("No module loaded at addr=%lx\n", rec->ip); + + return mod; +} + static int __ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -124,6 +138,12 @@ __ftrace_make_nop(struct module *mod, unsigned long ip = rec->ip; ppc_inst_t op, pop; + if (!mod) { + mod = ftrace_lookup_module(rec); + if (!mod) + return -EINVAL; + } + /* read where this goes */ if (copy_inst_from_kernel_nofault(&op, (void *)ip)) { pr_err("Fetching opcode failed.\n"); @@ -366,27 +386,6 @@ int ftrace_make_nop(struct module *mod, return -EINVAL; } - /* - * Out of range jumps are called from modules. - * We should either already have a pointer to the module - * or it has been passed in. - */ - if (!rec->arch.mod) { - if (!mod) { - pr_err("No module loaded addr=%lx\n", addr); - return -EFAULT; - } - rec->arch.mod = mod; - } else if (mod) { - if (mod != rec->arch.mod) { - pr_err("Record mod %p not equal to passed in mod %p\n", - rec->arch.mod, mod); - return -EINVAL; - } - /* nothing to do if mod == rec->arch.mod */ - } else - mod = rec->arch.mod; - return __ftrace_make_nop(mod, rec, addr); } @@ -411,7 +410,10 @@ __ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) ppc_inst_t op[2]; void *ip = (void *)rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* read where this goes */ if (copy_inst_from_kernel_nofault(op, ip)) @@ -533,16 +535,6 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) return -EINVAL; } - /* - * Out of range jumps are called from modules. - * Being that we are converting from nop, it had better - * already have a module defined. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_make_call(rec, addr); } @@ -555,7 +547,10 @@ __ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, ppc_inst_t op; unsigned long ip = rec->ip; unsigned long entry, ptr, tramp; - struct module *mod = rec->arch.mod; + struct module *mod = ftrace_lookup_module(rec); + + if (!mod) + return -EINVAL; /* If we never set up ftrace trampolines, then bail */ if (!mod->arch.tramp || !mod->arch.tramp_regs) { @@ -668,14 +663,6 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, return -EINVAL; } - /* - * Out of range jumps are called from modules. - */ - if (!rec->arch.mod) { - pr_err("No module loaded\n"); - return -EINVAL; - } - return __ftrace_modify_call(rec, old_addr, addr); } #endif From 1d59bd2fc07f0b2e643b2a07405cf0717b93984f Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:40 +0530 Subject: [PATCH 14/69] powerpc/ftrace: Skip instruction patching if the instructions are the same To simplify upcoming changes to ftrace, add a check to skip actual instruction patching if the old and new instructions are the same. We still validate that the instruction is what we expect, but don't actually patch the same instruction again. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-8-hbathini@linux.ibm.com --- arch/powerpc/kernel/trace/ftrace.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index fe0546fbac8e1..719517265d39a 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -82,7 +82,7 @@ static inline int ftrace_modify_code(unsigned long ip, ppc_inst_t old, ppc_inst_ { int ret = ftrace_validate_inst(ip, old); - if (!ret) + if (!ret && !ppc_inst_equal(old, new)) ret = patch_instruction((u32 *)ip, new); return ret; From ed6144656bb1ea29ad83671b48a21c89e7873b8a Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:41 +0530 Subject: [PATCH 15/69] powerpc/ftrace: Move ftrace stub used for init text before _einittext Move the ftrace stub used to cover inittext before _einittext so that it is within kernel text, as seen through core_kernel_text(). This is required for a subsequent change to ftrace. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-9-hbathini@linux.ibm.com --- arch/powerpc/kernel/vmlinux.lds.S | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vmlinux.lds.S b/arch/powerpc/kernel/vmlinux.lds.S index 7ab4e2fb28b1e..b4c9decc7a75c 100644 --- a/arch/powerpc/kernel/vmlinux.lds.S +++ b/arch/powerpc/kernel/vmlinux.lds.S @@ -265,14 +265,13 @@ SECTIONS .init.text : AT(ADDR(.init.text) - LOAD_OFFSET) { _sinittext = .; INIT_TEXT - + *(.tramp.ftrace.init); /* *.init.text might be RO so we must ensure this section ends on * a page boundary. */ . = ALIGN(PAGE_SIZE); _einittext = .; - *(.tramp.ftrace.init); } :text /* .exit.text is discarded at runtime, not link time, From 9670f6d2097c4f97e15c67920dfddc664d7ee91c Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:42 +0530 Subject: [PATCH 16/69] powerpc64/bpf: Fold bpf_jit_emit_func_call_hlp() into bpf_jit_emit_func_call_rel() Commit 61688a82e047 ("powerpc/bpf: enable kfunc call") enhanced bpf_jit_emit_func_call_hlp() to handle calls out to module region, where bpf progs are generated. The only difference now between bpf_jit_emit_func_call_hlp() and bpf_jit_emit_func_call_rel() is in handling of the initial pass where target function address is not known. Fold that logic into bpf_jit_emit_func_call_hlp() and rename it to bpf_jit_emit_func_call_rel() to simplify bpf function call JIT code. We don't actually need to load/restore TOC across a call out to a different kernel helper or to a different bpf program since they all work with the kernel TOC. We only need to do it if we have to call out to a module function. So, guard TOC load/restore with appropriate conditions. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-10-hbathini@linux.ibm.com --- arch/powerpc/net/bpf_jit_comp64.c | 61 +++++++++---------------------- 1 file changed, 17 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 2cbcdf93cc197..f3be024fc6854 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -202,14 +202,22 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_BLR()); } -static int -bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) +int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) { unsigned long func_addr = func ? ppc_function_entry((void *)func) : 0; long reladdr; - if (WARN_ON_ONCE(!kernel_text_address(func_addr))) - return -EINVAL; + /* bpf to bpf call, func is not known in the initial pass. Emit 5 nops as a placeholder */ + if (!func) { + for (int i = 0; i < 5; i++) + EMIT(PPC_RAW_NOP()); + /* elfv1 needs an additional instruction to load addr from descriptor */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V1)) + EMIT(PPC_RAW_NOP()); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + return 0; + } #ifdef CONFIG_PPC_KERNEL_PCREL reladdr = func_addr - local_paca->kernelbase; @@ -266,7 +274,8 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, * We can clobber r2 since we get called through a * function pointer (so caller will save/restore r2). */ - EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); + if (is_module_text_address(func_addr)) + EMIT(PPC_RAW_LD(_R2, bpf_to_ppc(TMP_REG_2), 8)); } else { PPC_LI64(_R12, func); EMIT(PPC_RAW_MTCTR(_R12)); @@ -276,46 +285,14 @@ bpf_jit_emit_func_call_hlp(u32 *image, u32 *fimage, struct codegen_context *ctx, * Load r2 with kernel TOC as kernel TOC is used if function address falls * within core kernel text. */ - EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); + if (is_module_text_address(func_addr)) + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); } #endif return 0; } -int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) -{ - unsigned int i, ctx_idx = ctx->idx; - - if (WARN_ON_ONCE(func && is_module_text_address(func))) - return -EINVAL; - - /* skip past descriptor if elf v1 */ - func += FUNCTION_DESCR_SIZE; - - /* Load function address into r12 */ - PPC_LI64(_R12, func); - - /* For bpf-to-bpf function calls, the callee's address is unknown - * until the last extra pass. As seen above, we use PPC_LI64() to - * load the callee's address, but this may optimize the number of - * instructions required based on the nature of the address. - * - * Since we don't want the number of instructions emitted to increase, - * we pad the optimized PPC_LI64() call with NOPs to guarantee that - * we always have a five-instruction sequence, which is the maximum - * that PPC_LI64() can emit. - */ - if (!image) - for (i = ctx->idx - ctx_idx; i < 5; i++) - EMIT(PPC_RAW_NOP()); - - EMIT(PPC_RAW_MTCTR(_R12)); - EMIT(PPC_RAW_BCTRL()); - - return 0; -} - static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 out) { /* @@ -1102,11 +1079,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code if (ret < 0) return ret; - if (func_addr_fixed) - ret = bpf_jit_emit_func_call_hlp(image, fimage, ctx, func_addr); - else - ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); - + ret = bpf_jit_emit_func_call_rel(image, fimage, ctx, func_addr); if (ret) return ret; From 782f46cbce5328da9380f166bd31cd17a04a7b10 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:43 +0530 Subject: [PATCH 17/69] powerpc/ftrace: Add a postlink script to validate function tracer Function tracer on powerpc can only work with vmlinux having a .text size of up to ~64MB due to powerpc branch instruction having a limited relative branch range of 32MB. Today, this is only detected on kernel boot when ftrace is init'ed. Add a post-link script to check the size of .text so that we can detect this at build time, and break the build if necessary. We add a dependency on !COMPILE_TEST for CONFIG_HAVE_FUNCTION_TRACER so that allyesconfig and other test builds can continue to work without enabling ftrace. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-11-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 2 +- arch/powerpc/Makefile.postlink | 8 +++++ arch/powerpc/tools/ftrace_check.sh | 50 ++++++++++++++++++++++++++++++ 3 files changed, 59 insertions(+), 1 deletion(-) create mode 100755 arch/powerpc/tools/ftrace_check.sh diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 6aaca48955a34..bb99ec9da63c9 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -243,7 +243,7 @@ config PPC select HAVE_FUNCTION_DESCRIPTORS if PPC64_ELF_ABI_V1 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_FUNCTION_TRACER if PPC64 || (PPC32 && CC_IS_GCC) + select HAVE_FUNCTION_TRACER if !COMPILE_TEST && (PPC64 || (PPC32 && CC_IS_GCC)) select HAVE_GCC_PLUGINS if GCC_VERSION >= 50200 # plugin support on gcc <= 5.1 is buggy on PPC select HAVE_GENERIC_VDSO select HAVE_HARDLOCKUP_DETECTOR_ARCH if PPC_BOOK3S_64 && SMP diff --git a/arch/powerpc/Makefile.postlink b/arch/powerpc/Makefile.postlink index ae5a4256b03d8..bb601be361736 100644 --- a/arch/powerpc/Makefile.postlink +++ b/arch/powerpc/Makefile.postlink @@ -24,6 +24,9 @@ else $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/relocs_check.sh "$(OBJDUMP)" "$(NM)" "$@" endif +quiet_cmd_ftrace_check = CHKFTRC $@ + cmd_ftrace_check = $(CONFIG_SHELL) $(srctree)/arch/powerpc/tools/ftrace_check.sh "$(NM)" "$@" + # `@true` prevents complaint when there is nothing to be done vmlinux: FORCE @@ -34,6 +37,11 @@ endif ifdef CONFIG_RELOCATABLE $(call if_changed,relocs_check) endif +ifdef CONFIG_FUNCTION_TRACER +ifndef CONFIG_PPC64_ELF_ABI_V1 + $(call cmd,ftrace_check) +endif +endif clean: rm -f .tmp_symbols.txt diff --git a/arch/powerpc/tools/ftrace_check.sh b/arch/powerpc/tools/ftrace_check.sh new file mode 100755 index 0000000000000..405e7e3066175 --- /dev/null +++ b/arch/powerpc/tools/ftrace_check.sh @@ -0,0 +1,50 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later +# +# This script checks vmlinux to ensure that all functions can call ftrace_caller() either directly, +# or through the stub, ftrace_tramp_text, at the end of kernel text. + +# Error out if any command fails +set -e + +# Allow for verbose output +if [ "$V" = "1" ]; then + set -x +fi + +if [ $# -lt 2 ]; then + echo "$0 [path to nm] [path to vmlinux]" 1>&2 + exit 1 +fi + +# Have Kbuild supply the path to nm so we handle cross compilation. +nm="$1" +vmlinux="$2" + +stext_addr=$($nm "$vmlinux" | grep -e " [TA] _stext$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +ftrace_caller_addr=$($nm "$vmlinux" | grep -e " T ftrace_caller$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') +ftrace_tramp_addr=$($nm "$vmlinux" | grep -e " T ftrace_tramp_text$" | \ + cut -d' ' -f1 | tr '[:lower:]' '[:upper:]') + +ftrace_caller_offset=$(echo "ibase=16;$ftrace_caller_addr - $stext_addr" | bc) +ftrace_tramp_offset=$(echo "ibase=16;$ftrace_tramp_addr - $ftrace_caller_addr" | bc) +sz_32m=$(printf "%d" 0x2000000) +sz_64m=$(printf "%d" 0x4000000) + +# ftrace_caller - _stext < 32M +if [ "$ftrace_caller_offset" -ge "$sz_32m" ]; then + echo "ERROR: ftrace_caller (0x$ftrace_caller_addr) is beyond 32MiB of _stext" 1>&2 + echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the size \ + of kernel text" 1>&2 + exit 1 +fi + +# ftrace_tramp_text - ftrace_caller < 64M +if [ "$ftrace_tramp_offset" -ge "$sz_64m" ]; then + echo "ERROR: kernel text extends beyond 64MiB from ftrace_caller" 1>&2 + echo "ERROR: consider disabling CONFIG_FUNCTION_TRACER, or reducing the size \ + of kernel text" 1>&2 + exit 1 +fi From 1198c9c689cfdaa2d08eb508c13ff116043f07b7 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:44 +0530 Subject: [PATCH 18/69] kbuild: Add generic hook for architectures to use before the final vmlinux link On powerpc, we would like to be able to make a pass on vmlinux.o and generate a new object file to be linked into vmlinux. Add a generic pass in Makefile.vmlinux that architectures can use for this purpose. Architectures need to select CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX and must provide arch//tools/Makefile with .arch.vmlinux.o target, which will be invoked prior to the final vmlinux link step. Acked-by: Masahiro Yamada Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-12-hbathini@linux.ibm.com --- arch/Kconfig | 6 ++++++ scripts/Makefile.vmlinux | 7 +++++++ scripts/link-vmlinux.sh | 7 ++++++- 3 files changed, 19 insertions(+), 1 deletion(-) diff --git a/arch/Kconfig b/arch/Kconfig index 8af374ea1adc2..a1538927c8c18 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -1684,4 +1684,10 @@ config CC_HAS_SANE_FUNCTION_ALIGNMENT config ARCH_NEED_CMPXCHG_1_EMU bool +config ARCH_WANTS_PRE_LINK_VMLINUX + bool + help + An architecture can select this if it provides arch//tools/Makefile + with .arch.vmlinux.o target to be linked into vmlinux. + endmenu diff --git a/scripts/Makefile.vmlinux b/scripts/Makefile.vmlinux index 1284f05555b97..dddad554e9127 100644 --- a/scripts/Makefile.vmlinux +++ b/scripts/Makefile.vmlinux @@ -22,6 +22,13 @@ targets += .vmlinux.export.o vmlinux: .vmlinux.export.o endif +ifdef CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX +vmlinux: arch/$(SRCARCH)/tools/vmlinux.arch.o + +arch/$(SRCARCH)/tools/vmlinux.arch.o: vmlinux.o FORCE + $(Q)$(MAKE) $(build)=arch/$(SRCARCH)/tools $@ +endif + ARCH_POSTLINK := $(wildcard $(srctree)/arch/$(SRCARCH)/Makefile.postlink) # Final link of vmlinux with optional arch pass after final link diff --git a/scripts/link-vmlinux.sh b/scripts/link-vmlinux.sh index a9b3f34a78d2c..a3c634b2f348f 100755 --- a/scripts/link-vmlinux.sh +++ b/scripts/link-vmlinux.sh @@ -100,7 +100,7 @@ vmlinux_link() ${ld} ${ldflags} -o ${output} \ ${wl}--whole-archive ${objs} ${wl}--no-whole-archive \ ${wl}--start-group ${libs} ${wl}--end-group \ - ${kallsymso} ${btf_vmlinux_bin_o} ${ldlibs} + ${kallsymso} ${btf_vmlinux_bin_o} ${arch_vmlinux_o} ${ldlibs} } # generate .BTF typeinfo from DWARF debuginfo @@ -198,6 +198,11 @@ fi ${MAKE} -f "${srctree}/scripts/Makefile.build" obj=init init/version-timestamp.o +arch_vmlinux_o= +if is_enabled CONFIG_ARCH_WANTS_PRE_LINK_VMLINUX; then + arch_vmlinux_o=arch/${SRCARCH}/tools/vmlinux.arch.o +fi + btf_vmlinux_bin_o= kallsymso= strip_debug= From eec37961a56aa4f3fe1c33ffd48eec7d1bb0c009 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:45 +0530 Subject: [PATCH 19/69] powerpc64/ftrace: Move ftrace sequence out of line Function profile sequence on powerpc includes two instructions at the beginning of each function: mflr r0 bl ftrace_caller The call to ftrace_caller() gets nop'ed out during kernel boot and is patched in when ftrace is enabled. Given the sequence, we cannot return from ftrace_caller with 'blr' as we need to keep LR and r0 intact. This results in link stack (return address predictor) imbalance when ftrace is enabled. To address that, we would like to use a three instruction sequence: mflr r0 bl ftrace_caller mtlr r0 Further more, to support DYNAMIC_FTRACE_WITH_CALL_OPS, we need to reserve two instruction slots before the function. This results in a total of five instruction slots to be reserved for ftrace use on each function that is traced. Move the function profile sequence out-of-line to minimize its impact. To do this, we reserve a single nop at function entry using -fpatchable-function-entry=1 and add a pass on vmlinux.o to determine the total number of functions that can be traced. This is then used to generate a .S file reserving the appropriate amount of space for use as ftrace stubs, which is built and linked into vmlinux. On bootup, the stub space is split into separate stubs per function and populated with the proper instruction sequence. A pointer to the associated stub is maintained in dyn_arch_ftrace. For modules, space for ftrace stubs is reserved from the generic module stub space. This is restricted to and enabled by default only on 64-bit powerpc, though there are some changes to accommodate 32-bit powerpc. This is done so that 32-bit powerpc could choose to opt into this based on further tests and benchmarks. As an example, after this patch, kernel functions will have a single nop at function entry: : addis r2,r12,467 addi r2,r2,-16028 nop mfocrf r11,8 ... When ftrace is enabled, the nop is converted to an unconditional branch to the stub associated with that function: : addis r2,r12,467 addi r2,r2,-16028 b ftrace_ool_stub_text_end+0x11b28 mfocrf r11,8 ... The associated stub: : mflr r0 bl ftrace_caller mtlr r0 b kernel_clone+0xc ... This change showed an improvement of ~10% in null_syscall benchmark on a Power 10 system with ftrace enabled. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-13-hbathini@linux.ibm.com --- arch/powerpc/Kbuild | 2 +- arch/powerpc/Kconfig | 4 + arch/powerpc/Makefile | 4 + arch/powerpc/include/asm/ftrace.h | 11 ++ arch/powerpc/include/asm/module.h | 5 + arch/powerpc/kernel/asm-offsets.c | 4 + arch/powerpc/kernel/module_64.c | 58 +++++++- arch/powerpc/kernel/trace/ftrace.c | 162 +++++++++++++++++++-- arch/powerpc/kernel/trace/ftrace_entry.S | 116 +++++++++++---- arch/powerpc/tools/.gitignore | 2 + arch/powerpc/tools/Makefile | 9 ++ arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 41 ++++++ 12 files changed, 380 insertions(+), 38 deletions(-) create mode 100644 arch/powerpc/tools/.gitignore create mode 100644 arch/powerpc/tools/Makefile create mode 100755 arch/powerpc/tools/ftrace-gen-ool-stubs.sh diff --git a/arch/powerpc/Kbuild b/arch/powerpc/Kbuild index 571f260b08423..b010ccb071b6d 100644 --- a/arch/powerpc/Kbuild +++ b/arch/powerpc/Kbuild @@ -19,4 +19,4 @@ obj-$(CONFIG_KEXEC_CORE) += kexec/ obj-$(CONFIG_KEXEC_FILE) += purgatory/ # for cleaning -subdir- += boot +subdir- += boot tools diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index bb99ec9da63c9..c995f3fe19e9a 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -569,6 +569,10 @@ config ARCH_USING_PATCHABLE_FUNCTION_ENTRY def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mlittle-endian) if PPC64 && CPU_LITTLE_ENDIAN def_bool $(success,$(srctree)/arch/powerpc/tools/gcc-check-fpatchable-function-entry.sh $(CC) -mbig-endian) if PPC64 && CPU_BIG_ENDIAN +config PPC_FTRACE_OUT_OF_LINE + def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY + select ARCH_WANTS_PRE_LINK_VMLINUX + config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 1c5c28d58e944..a52167830e8b6 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -148,7 +148,11 @@ CC_FLAGS_NO_FPU := $(call cc-option,-msoft-float) ifdef CONFIG_FUNCTION_TRACER ifdef CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY +ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +CC_FLAGS_FTRACE := -fpatchable-function-entry=1 +else CC_FLAGS_FTRACE := -fpatchable-function-entry=2 +endif else CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 278d4548e8f10..bdbafc668b20a 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -24,6 +24,10 @@ unsigned long prepare_ftrace_return(unsigned long parent, unsigned long ip, struct module; struct dyn_ftrace; struct dyn_arch_ftrace { +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* pointer to the associated out-of-line stub */ + unsigned long ool_stub; +#endif }; #ifdef CONFIG_DYNAMIC_FTRACE_WITH_ARGS @@ -130,6 +134,13 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } #ifdef CONFIG_FUNCTION_TRACER extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +struct ftrace_ool_stub { + u32 insn[4]; +}; +extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_inittext[]; +extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_inittext_count; +#endif void ftrace_free_init_tramp(void); unsigned long ftrace_call_adjust(unsigned long addr); #else diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 300c777cc3075..9ee70a4a0fde1 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -47,6 +47,11 @@ struct mod_arch_specific { #ifdef CONFIG_DYNAMIC_FTRACE unsigned long tramp; unsigned long tramp_regs; +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + struct ftrace_ool_stub *ool_stubs; + unsigned int ool_stub_count; + unsigned int ool_stub_index; +#endif #endif }; diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index d8fe97662a028..340f69bfcaa7f 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -675,5 +675,9 @@ int main(void) DEFINE(BPT_SIZE, BPT_SIZE); #endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); +#endif + return 0; } diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c index 1db88409bd955..6816e9967cab6 100644 --- a/arch/powerpc/kernel/module_64.c +++ b/arch/powerpc/kernel/module_64.c @@ -205,7 +205,9 @@ static int relacmp(const void *_x, const void *_y) /* Get size of potential trampolines required. */ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, - const Elf64_Shdr *sechdrs) + const Elf64_Shdr *sechdrs, + char *secstrings, + struct module *me) { /* One extra reloc so it's always 0-addr terminated */ unsigned long relocs = 1; @@ -244,6 +246,24 @@ static unsigned long get_stubs_size(const Elf64_Ehdr *hdr, /* stubs for ftrace_caller and ftrace_regs_caller */ relocs += IS_ENABLED(CONFIG_DYNAMIC_FTRACE) + IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_REGS); +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* stubs for the function tracer */ + for (i = 1; i < hdr->e_shnum; i++) { + if (!strcmp(secstrings + sechdrs[i].sh_name, "__patchable_function_entries")) { + me->arch.ool_stub_count = sechdrs[i].sh_size / sizeof(unsigned long); + me->arch.ool_stub_index = 0; + relocs += roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / + sizeof(struct ppc64_stub_entry); + break; + } + } + if (i == hdr->e_shnum) { + pr_err("%s: doesn't contain __patchable_function_entries.\n", me->name); + return -ENOEXEC; + } +#endif + pr_debug("Looks like a total of %lu stubs, max\n", relocs); return relocs * sizeof(struct ppc64_stub_entry); } @@ -454,7 +474,7 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr, #endif /* Override the stubs size */ - sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs); + sechdrs[me->arch.stubs_section].sh_size = get_stubs_size(hdr, sechdrs, secstrings, me); return 0; } @@ -1079,6 +1099,37 @@ int module_trampoline_target(struct module *mod, unsigned long addr, return 0; } +static int setup_ftrace_ool_stubs(const Elf64_Shdr *sechdrs, unsigned long addr, struct module *me) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + unsigned int i, total_stubs, num_stubs; + struct ppc64_stub_entry *stub; + + total_stubs = sechdrs[me->arch.stubs_section].sh_size / sizeof(*stub); + num_stubs = roundup(me->arch.ool_stub_count * sizeof(struct ftrace_ool_stub), + sizeof(struct ppc64_stub_entry)) / sizeof(struct ppc64_stub_entry); + + /* Find the next available entry */ + stub = (void *)sechdrs[me->arch.stubs_section].sh_addr; + for (i = 0; stub_func_addr(stub[i].funcdata); i++) + if (WARN_ON(i >= total_stubs)) + return -1; + + if (WARN_ON(i + num_stubs > total_stubs)) + return -1; + + stub += i; + me->arch.ool_stubs = (struct ftrace_ool_stub *)stub; + + /* reserve stubs */ + for (i = 0; i < num_stubs; i++) + if (patch_u32((void *)&stub->funcdata, PPC_RAW_NOP())) + return -1; +#endif + + return 0; +} + int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) { mod->arch.tramp = stub_for_addr(sechdrs, @@ -1097,6 +1148,9 @@ int module_finalize_ftrace(struct module *mod, const Elf_Shdr *sechdrs) if (!mod->arch.tramp) return -ENOENT; + if (setup_ftrace_ool_stubs(sechdrs, mod->arch.tramp, mod)) + return -ENOENT; + return 0; } #endif diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 719517265d39a..1fee074388cc6 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -37,7 +37,8 @@ unsigned long ftrace_call_adjust(unsigned long addr) if (addr >= (unsigned long)__exittext_begin && addr < (unsigned long)__exittext_end) return 0; - if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) + if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) addr += MCOUNT_INSN_SIZE; return addr; @@ -127,11 +128,25 @@ static unsigned long ftrace_lookup_module_stub(unsigned long ip, unsigned long a } #endif +static unsigned long ftrace_get_ool_stub(struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + return rec->arch.ool_stub; +#else + BUILD_BUG(); +#endif +} + static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_inst_t *call_inst) { - unsigned long ip = rec->ip; + unsigned long ip; unsigned long stub; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + else + ip = rec->ip; + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; @@ -142,7 +157,7 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ stub = ftrace_lookup_module_stub(ip, addr); if (!stub) { - pr_err("0x%lx: No ftrace stubs reachable\n", ip); + pr_err("0x%lx (0x%lx): No ftrace stubs reachable\n", ip, rec->ip); return -EINVAL; } @@ -150,6 +165,92 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ return 0; } +static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) +{ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + static int ool_stub_text_end_index, ool_stub_inittext_index; + int ret = 0, ool_stub_count, *ool_stub_index; + ppc_inst_t inst; + /* + * See ftrace_entry.S if changing the below instruction sequence, as we rely on + * decoding the last branch instruction here to recover the correct function ip. + */ + struct ftrace_ool_stub *ool_stub, ool_stub_template = { + .insn = { + PPC_RAW_MFLR(_R0), + PPC_RAW_NOP(), /* bl ftrace_caller */ + PPC_RAW_MTLR(_R0), + PPC_RAW_NOP() /* b rec->ip + 4 */ + } + }; + + WARN_ON(rec->arch.ool_stub); + + if (is_kernel_inittext(rec->ip)) { + ool_stub = ftrace_ool_stub_inittext; + ool_stub_index = &ool_stub_inittext_index; + ool_stub_count = ftrace_ool_stub_inittext_count; + } else if (is_kernel_text(rec->ip)) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; +#ifdef CONFIG_MODULES + } else if (mod) { + ool_stub = mod->arch.ool_stubs; + ool_stub_index = &mod->arch.ool_stub_index; + ool_stub_count = mod->arch.ool_stub_count; +#endif + } else { + return -EINVAL; + } + + ool_stub += (*ool_stub_index)++; + + if (WARN_ON(*ool_stub_index > ool_stub_count)) + return -EINVAL; + + if (!is_offset_in_branch_range((long)rec->ip - (long)&ool_stub->insn[0]) || + !is_offset_in_branch_range((long)(rec->ip + MCOUNT_INSN_SIZE) - + (long)&ool_stub->insn[3])) { + pr_err("%s: ftrace ool stub out of range (%p -> %p).\n", + __func__, (void *)rec->ip, (void *)&ool_stub->insn[0]); + return -EINVAL; + } + + rec->arch.ool_stub = (unsigned long)&ool_stub->insn[0]; + + /* bl ftrace_caller */ + if (!mod) + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &inst); +#ifdef CONFIG_MODULES + else + /* + * We can't use ftrace_get_call_inst() since that uses + * __module_text_address(rec->ip) to look up the module. + * But, since the module is not fully formed at this stage, + * the lookup fails. We know the target though, so generate + * the branch inst directly. + */ + inst = ftrace_create_branch_inst(ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE, + mod->arch.tramp, 1); +#endif + ool_stub_template.insn[1] = ppc_inst_val(inst); + + /* b rec->ip + 4 */ + if (!ret && create_branch(&inst, &ool_stub->insn[3], rec->ip + MCOUNT_INSN_SIZE, 0)) + return -EINVAL; + ool_stub_template.insn[3] = ppc_inst_val(inst); + + if (!ret) + ret = patch_instructions((u32 *)ool_stub, (u32 *)&ool_stub_template, + sizeof(ool_stub_template), false); + + return ret; +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + BUILD_BUG(); +#endif +} + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -162,18 +263,29 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { ppc_inst_t old, new; - int ret; + unsigned long ip = rec->ip; + int ret = 0; /* This can only ever be called during module load */ - if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(rec->ip))) + if (WARN_ON(!IS_ENABLED(CONFIG_MODULES) || core_kernel_text(ip))) return -EINVAL; old = ppc_inst(PPC_RAW_NOP()); - ret = ftrace_get_call_inst(rec, addr, &new); - if (ret) - return ret; + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; /* second instruction in stub */ + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &old); + } + + ret |= ftrace_get_call_inst(rec, addr, &new); + + if (!ret) + ret = ftrace_modify_code(ip, old, new); - return ftrace_modify_code(rec->ip, old, new); + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), + ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); + + return ret; } int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) @@ -206,6 +318,13 @@ void ftrace_replace_code(int enable) new_addr = ftrace_get_addr_new(rec); update = ftrace_update_record(rec, enable); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && update != FTRACE_UPDATE_IGNORE) { + ip = ftrace_get_ool_stub(rec) + MCOUNT_INSN_SIZE; + ret = ftrace_get_call_inst(rec, (unsigned long)ftrace_caller, &nop_inst); + if (ret) + goto out; + } + switch (update) { case FTRACE_UPDATE_IGNORE: default: @@ -230,6 +349,24 @@ void ftrace_replace_code(int enable) if (!ret) ret = ftrace_modify_code(ip, old, new); + + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) && + (update == FTRACE_UPDATE_MAKE_NOP || update == FTRACE_UPDATE_MAKE_CALL)) { + /* Update the actual ftrace location */ + call_inst = ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - + (long)rec->ip)); + nop_inst = ppc_inst(PPC_RAW_NOP()); + ip = rec->ip; + + if (update == FTRACE_UPDATE_MAKE_NOP) + ret = ftrace_modify_code(ip, call_inst, nop_inst); + else + ret = ftrace_modify_code(ip, nop_inst, call_inst); + + if (ret) + goto out; + } + if (ret) goto out; } @@ -249,7 +386,8 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) /* Verify instructions surrounding the ftrace location */ if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY)) { /* Expect nops */ - ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); + if (!IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + ret = ftrace_validate_inst(ip - 4, ppc_inst(PPC_RAW_NOP())); if (!ret) ret = ftrace_validate_inst(ip, ppc_inst(PPC_RAW_NOP())); } else if (IS_ENABLED(CONFIG_PPC32)) { @@ -277,6 +415,10 @@ int ftrace_init_nop(struct module *mod, struct dyn_ftrace *rec) if (ret) return ret; + /* Set up out-of-line stub */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return ftrace_init_ool_stub(mod, rec); + /* Nop-out the ftrace location */ new = ppc_inst(PPC_RAW_NOP()); addr = MCOUNT_ADDR; diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 244a1c7bb1e8e..5b2fc6483dce9 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -56,7 +56,7 @@ SAVE_GPR(2, r1) SAVE_GPRS(11, 31, r1) .else -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) SAVE_GPR(14, r1) #endif .endif @@ -78,10 +78,6 @@ /* Get the _mcount() call site out of LR */ mflr r7 - /* Save it as pt_regs->nip */ - PPC_STL r7, _NIP(r1) - /* Also save it in B's stackframe header for proper unwind */ - PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) /* Save the read LR in pt_regs->link */ PPC_STL r0, _LINK(r1) @@ -96,16 +92,6 @@ lwz r5,function_trace_op@l(r3) #endif -#ifdef CONFIG_LIVEPATCH_64 - mr r14, r7 /* remember old NIP */ -#endif - - /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE - - /* Put the original return address in r4 as parent_ip */ - mr r4, r0 - /* Save special regs */ PPC_STL r8, _MSR(r1) .if \allregs == 1 @@ -114,17 +100,69 @@ PPC_STL r11, _CCR(r1) .endif +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* Save our real return address in nvr for return */ + .if \allregs == 0 + SAVE_GPR(15, r1) + .endif + mr r15, r7 + /* + * We want the ftrace location in the function, but our lr (in r7) + * points at the 'mtlr r0' instruction in the out of line stub. To + * recover the ftrace location, we read the branch instruction in the + * stub, and adjust our lr by the branch offset. + * + * See ftrace_init_ool_stub() for the profile sequence. + */ + lwz r8, MCOUNT_INSN_SIZE(r7) + slwi r8, r8, 6 + srawi r8, r8, 6 + add r3, r7, r8 + /* + * Override our nip to point past the branch in the original function. + * This allows reliable stack trace and the ftrace stack tracer to work as-is. + */ + addi r7, r3, MCOUNT_INSN_SIZE +#else + /* Calculate ip from nip-4 into r3 for call below */ + subi r3, r7, MCOUNT_INSN_SIZE +#endif + + /* Save NIP as pt_regs->nip */ + PPC_STL r7, _NIP(r1) + /* Also save it in B's stackframe header for proper unwind */ + PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) + mr r14, r7 /* remember old NIP */ +#endif + + /* Put the original return address in r4 as parent_ip */ + mr r4, r0 + /* Load &pt_regs in r6 for call below */ addi r6, r1, STACK_INT_FRAME_REGS .endm .macro ftrace_regs_exit allregs +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) mtctr r3 #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ +#endif +#else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ + /* Load LR with the possibly modified NIP */ + PPC_LL r3, _NIP(r1) + cmpd r14, r3 /* has NIP been altered? */ + bne- 1f + + mr r3, r15 + .if \allregs == 0 + REST_GPR(15, r1) + .endif +1: mtlr r3 #endif /* Restore gprs */ @@ -132,14 +170,16 @@ REST_GPRS(2, 31, r1) .else REST_GPRS(3, 10, r1) -#ifdef CONFIG_LIVEPATCH_64 +#if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) REST_GPR(14, r1) #endif .endif /* Restore possibly modified LR */ PPC_LL r0, _LINK(r1) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE mtlr r0 +#endif #ifdef CONFIG_PPC64 /* Restore callee's TOC */ @@ -153,7 +193,16 @@ /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif - bctr /* jump after _mcount site */ + /* jump after _mcount site */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + /* + * Return with blr to keep the link stack balanced. The function profiling sequence + * uses 'mtlr r0' to restore LR. + */ + blr +#else + bctr +#endif .endm _GLOBAL(ftrace_regs_caller) @@ -177,6 +226,11 @@ _GLOBAL(ftrace_stub) #ifdef CONFIG_PPC64 ftrace_no_trace: +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE + REST_GPR(3, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + blr +#else mflr r3 mtctr r3 REST_GPR(3, r1) @@ -184,6 +238,7 @@ ftrace_no_trace: mtlr r0 bctr #endif +#endif #ifdef CONFIG_LIVEPATCH_64 /* @@ -194,11 +249,17 @@ ftrace_no_trace: * We get here when a function A, calls another function B, but B has * been live patched with a new function C. * - * On entry: - * - we have no stack frame and can not allocate one + * On entry, we have no stack frame and can not allocate one. + * + * With PPC_FTRACE_OUT_OF_LINE=n, on entry: * - LR points back to the original caller (in A) * - CTR holds the new NIP in C * - r0, r11 & r12 are free + * + * With PPC_FTRACE_OUT_OF_LINE=y, on entry: + * - r0 points back to the original caller (in A) + * - LR holds the new NIP in C + * - r11 & r12 are free */ livepatch_handler: ld r12, PACA_THREAD_INFO(r13) @@ -208,18 +269,23 @@ livepatch_handler: addi r11, r11, 24 std r11, TI_livepatch_sp(r12) - /* Save toc & real LR on livepatch stack */ - std r2, -24(r11) - mflr r12 - std r12, -16(r11) - /* Store stack end marker */ lis r12, STACK_END_MAGIC@h ori r12, r12, STACK_END_MAGIC@l std r12, -8(r11) - /* Put ctr in r12 for global entry and branch there */ + /* Save toc & real LR on livepatch stack */ + std r2, -24(r11) +#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE + mflr r12 + std r12, -16(r11) mfctr r12 +#else + std r0, -16(r11) + mflr r12 + /* Put ctr in r12 for global entry and branch there */ + mtctr r12 +#endif bctrl /* diff --git a/arch/powerpc/tools/.gitignore b/arch/powerpc/tools/.gitignore new file mode 100644 index 0000000000000..ec380a14a09aa --- /dev/null +++ b/arch/powerpc/tools/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +/vmlinux.arch.S diff --git a/arch/powerpc/tools/Makefile b/arch/powerpc/tools/Makefile new file mode 100644 index 0000000000000..d2e7ecd5f46f9 --- /dev/null +++ b/arch/powerpc/tools/Makefile @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-or-later + +quiet_cmd_gen_ftrace_ool_stubs = GEN $@ + cmd_gen_ftrace_ool_stubs = $< "$(CONFIG_64BIT)" "$(OBJDUMP)" vmlinux.o $@ + +$(obj)/vmlinux.arch.S: $(src)/ftrace-gen-ool-stubs.sh vmlinux.o FORCE + $(call if_changed,gen_ftrace_ool_stubs) + +targets += vmlinux.arch.S diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh new file mode 100755 index 0000000000000..96e1ca5803e4b --- /dev/null +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -0,0 +1,41 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +# Error out on error +set -e + +is_64bit="$1" +objdump="$2" +vmlinux_o="$3" +arch_vmlinux_S="$4" + +RELOCATION=R_PPC64_ADDR64 +if [ -z "$is_64bit" ]; then + RELOCATION=R_PPC_ADDR32 +fi + +num_ool_stubs_text=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep -v ".init.text" | grep -c "$RELOCATION") +num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep ".init.text" | grep -c "$RELOCATION") + +cat > "$arch_vmlinux_S" < +#include + +.pushsection .tramp.ftrace.text,"aw" +SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text) + +SYM_CODE_START(ftrace_ool_stub_text_end) + .space $num_ool_stubs_text * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text_end) +.popsection + +.pushsection .tramp.ftrace.init,"aw" +SYM_DATA(ftrace_ool_stub_inittext_count, .long $num_ool_stubs_inittext) + +SYM_CODE_START(ftrace_ool_stub_inittext) + .space $num_ool_stubs_inittext * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_inittext) +.popsection +EOF From cf9bc0efcce2c324314cf7f5138c08f85ef7b5eb Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:46 +0530 Subject: [PATCH 20/69] powerpc64/ftrace: Support .text larger than 32MB with out-of-line stubs We are restricted to a .text size of ~32MB when using out-of-line function profile sequence. Allow this to be extended up to the previous limit of ~64MB by reserving space in the middle of .text. A new config option CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE is introduced to specify the number of function stubs that are reserved in .text. On boot, ftrace utilizes stubs from this area first before using the stub area at the end of .text. A ppc64le defconfig has ~44k functions that can be traced. A more conservative value of 32k functions is chosen as the default value of PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE so that we do not allot more space than necessary by default. If building a kernel that only has 32k trace-able functions, we won't allot any more space at the end of .text during the pass on vmlinux.o. Otherwise, only the remaining functions get space for stubs at the end of .text. This default value should help cover a .text size of ~48MB in total (including space reserved at the end of .text which can cover up to 32MB), which should be sufficient for most common builds. For a very small kernel build, this can be set to 0. Or, this can be bumped up to a larger value to support vmlinux .text size up to ~64MB. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-14-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 12 ++++++++++++ arch/powerpc/include/asm/ftrace.h | 6 ++++-- arch/powerpc/kernel/trace/ftrace.c | 21 +++++++++++++++++---- arch/powerpc/kernel/trace/ftrace_entry.S | 8 ++++++++ arch/powerpc/tools/Makefile | 3 ++- arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 21 +++++++++++++++------ 6 files changed, 58 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index c995f3fe19e9a..50d418f72ee8e 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -573,6 +573,18 @@ config PPC_FTRACE_OUT_OF_LINE def_bool PPC64 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY select ARCH_WANTS_PRE_LINK_VMLINUX +config PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE + int "Number of ftrace out-of-line stubs to reserve within .text" + depends on PPC_FTRACE_OUT_OF_LINE + default 32768 + help + Number of stubs to reserve for use by ftrace. This space is + reserved within .text, and is distinct from any additional space + added at the end of .text before the final vmlinux link. Set to + zero to have stubs only be generated at the end of vmlinux (only + if the size of vmlinux is less than 32MB). Set to a higher value + if building vmlinux larger than 48MB. + config HOTPLUG_CPU bool "Support for enabling/disabling CPUs" depends on SMP && (PPC_PSERIES || \ diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index bdbafc668b20a..28f3590ca7808 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -138,8 +138,10 @@ extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; struct ftrace_ool_stub { u32 insn[4]; }; -extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_inittext[]; -extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_inittext_count; +extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_text[], + ftrace_ool_stub_inittext[]; +extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count, + ftrace_ool_stub_inittext_count; #endif void ftrace_free_init_tramp(void); unsigned long ftrace_call_adjust(unsigned long addr); diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 1fee074388cc6..bee2c54a8c047 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -168,7 +168,7 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) { #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE - static int ool_stub_text_end_index, ool_stub_inittext_index; + static int ool_stub_text_index, ool_stub_text_end_index, ool_stub_inittext_index; int ret = 0, ool_stub_count, *ool_stub_index; ppc_inst_t inst; /* @@ -191,9 +191,22 @@ static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) ool_stub_index = &ool_stub_inittext_index; ool_stub_count = ftrace_ool_stub_inittext_count; } else if (is_kernel_text(rec->ip)) { - ool_stub = ftrace_ool_stub_text_end; - ool_stub_index = &ool_stub_text_end_index; - ool_stub_count = ftrace_ool_stub_text_end_count; + /* + * ftrace records are sorted, so we first use up the stub area within .text + * (ftrace_ool_stub_text) before using the area at the end of .text + * (ftrace_ool_stub_text_end), unless the stub is out of range of the record. + */ + if (ool_stub_text_index >= ftrace_ool_stub_text_count || + !is_offset_in_branch_range((long)rec->ip - + (long)&ftrace_ool_stub_text[ool_stub_text_index])) { + ool_stub = ftrace_ool_stub_text_end; + ool_stub_index = &ool_stub_text_end_index; + ool_stub_count = ftrace_ool_stub_text_end_count; + } else { + ool_stub = ftrace_ool_stub_text; + ool_stub_index = &ool_stub_text_index; + ool_stub_count = ftrace_ool_stub_text_count; + } #ifdef CONFIG_MODULES } else if (mod) { ool_stub = mod->arch.ool_stubs; diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index 5b2fc6483dce9..a6bf7f8410403 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -374,6 +374,14 @@ _GLOBAL(return_to_handler) blr #endif /* CONFIG_FUNCTION_GRAPH_TRACER */ +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) + +SYM_CODE_START(ftrace_ool_stub_text) + .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE +SYM_CODE_END(ftrace_ool_stub_text) +#endif + .pushsection ".tramp.ftrace.text","aw",@progbits; .globl ftrace_tramp_text ftrace_tramp_text: diff --git a/arch/powerpc/tools/Makefile b/arch/powerpc/tools/Makefile index d2e7ecd5f46f9..e1f7afcd9fdfd 100644 --- a/arch/powerpc/tools/Makefile +++ b/arch/powerpc/tools/Makefile @@ -1,7 +1,8 @@ # SPDX-License-Identifier: GPL-2.0-or-later quiet_cmd_gen_ftrace_ool_stubs = GEN $@ - cmd_gen_ftrace_ool_stubs = $< "$(CONFIG_64BIT)" "$(OBJDUMP)" vmlinux.o $@ + cmd_gen_ftrace_ool_stubs = $< "$(CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE)" "$(CONFIG_64BIT)" \ + "$(OBJDUMP)" vmlinux.o $@ $(obj)/vmlinux.arch.S: $(src)/ftrace-gen-ool-stubs.sh vmlinux.o FORCE $(call if_changed,gen_ftrace_ool_stubs) diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index 96e1ca5803e4b..6a201df83524e 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -4,10 +4,11 @@ # Error out on error set -e -is_64bit="$1" -objdump="$2" -vmlinux_o="$3" -arch_vmlinux_S="$4" +num_ool_stubs_text_builtin="$1" +is_64bit="$2" +objdump="$3" +vmlinux_o="$4" +arch_vmlinux_S="$5" RELOCATION=R_PPC64_ADDR64 if [ -z "$is_64bit" ]; then @@ -19,15 +20,23 @@ num_ool_stubs_text=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | grep ".init.text" | grep -c "$RELOCATION") +if [ "$num_ool_stubs_text" -gt "$num_ool_stubs_text_builtin" ]; then + num_ool_stubs_text_end=$((num_ool_stubs_text - num_ool_stubs_text_builtin)) +else + num_ool_stubs_text_end=0 +fi + cat > "$arch_vmlinux_S" < #include .pushsection .tramp.ftrace.text,"aw" -SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text) +SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text_end) SYM_CODE_START(ftrace_ool_stub_text_end) - .space $num_ool_stubs_text * FTRACE_OOL_STUB_SIZE +#if $num_ool_stubs_text_end + .space $num_ool_stubs_text_end * FTRACE_OOL_STUB_SIZE +#endif SYM_CODE_END(ftrace_ool_stub_text_end) .popsection From e717754f0bb5c5347aac82232691340955735ce1 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:47 +0530 Subject: [PATCH 21/69] powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_CALL_OPS Implement support for DYNAMIC_FTRACE_WITH_CALL_OPS similar to the arm64 implementation. This works by patching-in a pointer to an associated ftrace_ops structure before each traceable function. If multiple ftrace_ops are associated with a call site, then a special ftrace_list_ops is used to enable iterating over all the registered ftrace_ops. If no ftrace_ops are associated with a call site, then a special ftrace_nop_ops structure is used to render the ftrace call as a no-op. ftrace trampoline can then read the associated ftrace_ops for a call site by loading from an offset from the LR, and branch directly to the associated function. The primary advantage with this approach is that we don't have to iterate over all the registered ftrace_ops for call sites that have a single ftrace_ops registered. This is the equivalent of implementing support for dynamic ftrace trampolines, which set up a special ftrace trampoline for each registered ftrace_ops and have individual call sites branch into those directly. A secondary advantage is that this gives us a way to add support for direct ftrace callers without having to resort to using stubs. The address of the direct call trampoline can be loaded from the ftrace_ops structure. To support this, we reserve a nop before each function on 32-bit powerpc. For 64-bit powerpc, two nops are reserved before each out-of-line stub. During ftrace activation, we update this location with the associated ftrace_ops pointer. Then, on ftrace entry, we load from this location and call into ftrace_ops->func(). For 64-bit powerpc, we ensure that the out-of-line stub area is doubleword aligned so that ftrace_ops address can be updated atomically. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-15-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/Makefile | 4 ++ arch/powerpc/include/asm/ftrace.h | 5 +- arch/powerpc/kernel/asm-offsets.c | 4 ++ arch/powerpc/kernel/trace/ftrace.c | 59 +++++++++++++++++++++- arch/powerpc/kernel/trace/ftrace_entry.S | 36 ++++++++++--- arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 5 +- 7 files changed, 102 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 50d418f72ee8e..79bd5a375527d 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -234,6 +234,7 @@ config PPC select HAVE_DEBUG_STACKOVERFLOW select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 + select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || (PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY) select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index a52167830e8b6..99af7953e8442 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -151,8 +151,12 @@ KBUILD_CPPFLAGS += -DCC_USING_PATCHABLE_FUNCTION_ENTRY ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE CC_FLAGS_FTRACE := -fpatchable-function-entry=1 else +ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS # PPC32 only +CC_FLAGS_FTRACE := -fpatchable-function-entry=3,1 +else CC_FLAGS_FTRACE := -fpatchable-function-entry=2 endif +endif else CC_FLAGS_FTRACE := -pg ifdef CONFIG_MPROFILE_KERNEL diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 28f3590ca7808..1ad1328cf4e31 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -136,8 +136,11 @@ static inline u8 this_cpu_get_ftrace_enabled(void) { return 1; } extern unsigned int ftrace_tramp_text[], ftrace_tramp_init[]; #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE struct ftrace_ool_stub { +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + struct ftrace_ops *ftrace_op; +#endif u32 insn[4]; -}; +} __aligned(sizeof(unsigned long)); extern struct ftrace_ool_stub ftrace_ool_stub_text_end[], ftrace_ool_stub_text[], ftrace_ool_stub_inittext[]; extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count, diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 340f69bfcaa7f..318349f78820e 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -679,5 +679,9 @@ int main(void) DEFINE(FTRACE_OOL_STUB_SIZE, sizeof(struct ftrace_ool_stub)); #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#endif + return 0; } diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index bee2c54a8c047..9090d1a21600e 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -38,8 +38,11 @@ unsigned long ftrace_call_adjust(unsigned long addr) return 0; if (IS_ENABLED(CONFIG_ARCH_USING_PATCHABLE_FUNCTION_ENTRY) && - !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + !IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { addr += MCOUNT_INSN_SIZE; + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + addr += MCOUNT_INSN_SIZE; + } return addr; } @@ -264,6 +267,46 @@ static int ftrace_init_ool_stub(struct module *mod, struct dyn_ftrace *rec) #endif } +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS +static const struct ftrace_ops *powerpc_rec_get_ops(struct dyn_ftrace *rec) +{ + const struct ftrace_ops *ops = NULL; + + if (rec->flags & FTRACE_FL_CALL_OPS_EN) { + ops = ftrace_find_unique_ops(rec); + WARN_ON_ONCE(!ops); + } + + if (!ops) + ops = &ftrace_list_ops; + + return ops; +} + +static int ftrace_rec_set_ops(struct dyn_ftrace *rec, const struct ftrace_ops *ops) +{ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + return patch_ulong((void *)(ftrace_get_ool_stub(rec) - sizeof(unsigned long)), + (unsigned long)ops); + else + return patch_ulong((void *)(rec->ip - MCOUNT_INSN_SIZE - sizeof(unsigned long)), + (unsigned long)ops); +} + +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, &ftrace_nop_ops); +} + +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) +{ + return ftrace_rec_set_ops(rec, powerpc_rec_get_ops(rec)); +} +#else +static int ftrace_rec_set_nop_ops(struct dyn_ftrace *rec) { return 0; } +static int ftrace_rec_update_ops(struct dyn_ftrace *rec) { return 0; } +#endif + #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) { @@ -294,6 +337,10 @@ int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) if (!ret) ret = ftrace_modify_code(ip, old, new); + ret = ftrace_rec_update_ops(rec); + if (ret) + return ret; + if (!ret && IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) ret = ftrace_modify_code(rec->ip, ppc_inst(PPC_RAW_NOP()), ppc_inst(PPC_RAW_BRANCH((long)ftrace_get_ool_stub(rec) - (long)rec->ip))); @@ -345,16 +392,19 @@ void ftrace_replace_code(int enable) case FTRACE_UPDATE_MODIFY_CALL: ret = ftrace_get_call_inst(rec, new_addr, &new_call_inst); ret |= ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = call_inst; new = new_call_inst; break; case FTRACE_UPDATE_MAKE_NOP: ret = ftrace_get_call_inst(rec, addr, &call_inst); + ret |= ftrace_rec_set_nop_ops(rec); old = call_inst; new = nop_inst; break; case FTRACE_UPDATE_MAKE_CALL: ret = ftrace_get_call_inst(rec, new_addr, &call_inst); + ret |= ftrace_rec_update_ops(rec); old = nop_inst; new = call_inst; break; @@ -470,6 +520,13 @@ int ftrace_update_ftrace_func(ftrace_func_t func) ppc_inst_t old, new; int ret; + /* + * When using CALL_OPS, the function to call is associated with the + * call site, and we don't have a global function pointer to update. + */ + if (IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS)) + return 0; + old = ppc_inst_read((u32 *)&ftrace_call); new = ftrace_create_branch_inst(ip, ppc_function_entry(func), 1); ret = ftrace_modify_code(ip, old, new); diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index a6bf7f8410403..ff376c9903081 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -85,11 +85,21 @@ /* Save callee's TOC in the ABI compliant location */ std r2, STK_GOT(r1) LOAD_PACA_TOC() /* get kernel TOC in r2 */ +#endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + /* r7 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r7) + PPC_LL r12, FTRACE_OPS_FUNC(r5) + mtctr r12 +#else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ +#ifdef CONFIG_PPC64 LOAD_REG_ADDR(r3, function_trace_op) ld r5,0(r3) #else lis r3,function_trace_op@ha lwz r5,function_trace_op@l(r3) +#endif #endif /* Save special regs */ @@ -205,20 +215,30 @@ #endif .endm -_GLOBAL(ftrace_regs_caller) - ftrace_regs_entry 1 - /* ftrace_call(r3, r4, r5, r6) */ +.macro ftrace_regs_func allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS + bctrl +#else + .if \allregs == 1 .globl ftrace_regs_call ftrace_regs_call: + .else +.globl ftrace_call +ftrace_call: + .endif + /* ftrace_call(r3, r4, r5, r6) */ bl ftrace_stub +#endif +.endm + +_GLOBAL(ftrace_regs_caller) + ftrace_regs_entry 1 + ftrace_regs_func 1 ftrace_regs_exit 1 _GLOBAL(ftrace_caller) ftrace_regs_entry 0 - /* ftrace_call(r3, r4, r5, r6) */ -.globl ftrace_call -ftrace_call: - bl ftrace_stub + ftrace_regs_func 0 ftrace_regs_exit 0 _GLOBAL(ftrace_stub) @@ -377,7 +397,7 @@ _GLOBAL(return_to_handler) #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE SYM_DATA(ftrace_ool_stub_text_count, .long CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE) -SYM_CODE_START(ftrace_ool_stub_text) +SYM_START(ftrace_ool_stub_text, SYM_L_GLOBAL, .balign SZL) .space CONFIG_PPC_FTRACE_OUT_OF_LINE_NUM_RESERVE * FTRACE_OOL_STUB_SIZE SYM_CODE_END(ftrace_ool_stub_text) #endif diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index 6a201df83524e..950a7778324b6 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -28,12 +28,13 @@ fi cat > "$arch_vmlinux_S" < +#include #include .pushsection .tramp.ftrace.text,"aw" SYM_DATA(ftrace_ool_stub_text_end_count, .long $num_ool_stubs_text_end) -SYM_CODE_START(ftrace_ool_stub_text_end) +SYM_START(ftrace_ool_stub_text_end, SYM_L_GLOBAL, .balign SZL) #if $num_ool_stubs_text_end .space $num_ool_stubs_text_end * FTRACE_OOL_STUB_SIZE #endif @@ -43,7 +44,7 @@ SYM_CODE_END(ftrace_ool_stub_text_end) .pushsection .tramp.ftrace.init,"aw" SYM_DATA(ftrace_ool_stub_inittext_count, .long $num_ool_stubs_inittext) -SYM_CODE_START(ftrace_ool_stub_inittext) +SYM_START(ftrace_ool_stub_inittext, SYM_L_GLOBAL, .balign SZL) .space $num_ool_stubs_inittext * FTRACE_OOL_STUB_SIZE SYM_CODE_END(ftrace_ool_stub_inittext) .popsection From a52f6043a2238d656ddd23ce0499cf4f12645faa Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:48 +0530 Subject: [PATCH 22/69] powerpc/ftrace: Add support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS Add support for DYNAMIC_FTRACE_WITH_DIRECT_CALLS similar to the arm64 implementation. ftrace direct calls allow custom trampolines to be called into directly from function ftrace call sites, bypassing the ftrace trampoline completely. This functionality is currently utilized by BPF trampolines to hook into kernel function entries. Since we have limited relative branch range, we support ftrace direct calls through support for DYNAMIC_FTRACE_WITH_CALL_OPS. In this approach, ftrace trampoline is not entirely bypassed. Rather, it is re-purposed into a stub that reads direct_call field from the associated ftrace_ops structure and branches into that, if it is not NULL. For this, it is sufficient if we can ensure that the ftrace trampoline is reachable from all traceable functions. When multiple ftrace_ops are associated with a call site, we utilize a call back to set pt_regs->orig_gpr3 that can then be tested on the return path from the ftrace trampoline to branch into the direct caller. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-16-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/ftrace.h | 16 ++++ arch/powerpc/kernel/asm-offsets.c | 3 + arch/powerpc/kernel/trace/ftrace.c | 11 +++ arch/powerpc/kernel/trace/ftrace_entry.S | 114 +++++++++++++++++------ 5 files changed, 116 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 79bd5a375527d..73e9e42b2e756 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -235,6 +235,7 @@ config PPC select HAVE_DYNAMIC_FTRACE select HAVE_DYNAMIC_FTRACE_WITH_ARGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS if PPC_FTRACE_OUT_OF_LINE || (PPC32 && ARCH_USING_PATCHABLE_FUNCTION_ENTRY) + select HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS if HAVE_DYNAMIC_FTRACE_WITH_CALL_OPS select HAVE_DYNAMIC_FTRACE_WITH_REGS if ARCH_USING_PATCHABLE_FUNCTION_ENTRY || MPROFILE_KERNEL || PPC32 select HAVE_EBPF_JIT select HAVE_EFFICIENT_UNALIGNED_ACCESS diff --git a/arch/powerpc/include/asm/ftrace.h b/arch/powerpc/include/asm/ftrace.h index 1ad1328cf4e31..5eb7631355a16 100644 --- a/arch/powerpc/include/asm/ftrace.h +++ b/arch/powerpc/include/asm/ftrace.h @@ -148,6 +148,22 @@ extern unsigned int ftrace_ool_stub_text_end_count, ftrace_ool_stub_text_count, #endif void ftrace_free_init_tramp(void); unsigned long ftrace_call_adjust(unsigned long addr); + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +/* + * When an ftrace registered caller is tracing a function that is also set by a + * register_ftrace_direct() call, it needs to be differentiated in the + * ftrace_caller trampoline so that the direct call can be invoked after the + * other ftrace ops. To do this, place the direct caller in the orig_gpr3 field + * of pt_regs. This tells ftrace_caller that there's a direct caller. + */ +static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs, unsigned long addr) +{ + struct pt_regs *regs = &fregs->regs; + + regs->orig_gpr3 = addr; +} +#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */ #else static inline void ftrace_free_init_tramp(void) { } static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 318349f78820e..ae198b2d9b8c6 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -681,6 +681,9 @@ int main(void) #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS OFFSET(FTRACE_OPS_FUNC, ftrace_ops, func); +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + OFFSET(FTRACE_OPS_DIRECT_CALL, ftrace_ops, direct_call); +#endif #endif return 0; diff --git a/arch/powerpc/kernel/trace/ftrace.c b/arch/powerpc/kernel/trace/ftrace.c index 9090d1a21600e..051f3db146066 100644 --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -150,6 +150,17 @@ static int ftrace_get_call_inst(struct dyn_ftrace *rec, unsigned long addr, ppc_ else ip = rec->ip; + if (!is_offset_in_branch_range(addr - ip) && addr != FTRACE_ADDR && + addr != FTRACE_REGS_ADDR) { + /* This can only happen with ftrace direct */ + if (!IS_ENABLED(CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS)) { + pr_err("0x%lx (0x%lx): Unexpected target address 0x%lx\n", + ip, rec->ip, addr); + return -EINVAL; + } + addr = FTRACE_ADDR; + } + if (is_offset_in_branch_range(addr - ip)) /* Within range */ stub = addr; diff --git a/arch/powerpc/kernel/trace/ftrace_entry.S b/arch/powerpc/kernel/trace/ftrace_entry.S index ff376c9903081..2c1b24100eca2 100644 --- a/arch/powerpc/kernel/trace/ftrace_entry.S +++ b/arch/powerpc/kernel/trace/ftrace_entry.S @@ -33,14 +33,38 @@ * and then arrange for the ftrace function to be called. */ .macro ftrace_regs_entry allregs - /* Save the original return address in A's stack frame */ - PPC_STL r0, LRSAVE(r1) /* Create a minimal stack frame for representing B */ PPC_STLU r1, -STACK_FRAME_MIN_SIZE(r1) /* Create our stack frame + pt_regs */ PPC_STLU r1,-SWITCH_FRAME_SIZE(r1) + .if \allregs == 1 + SAVE_GPRS(11, 12, r1) + .endif + + /* Get the _mcount() call site out of LR */ + mflr r11 + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Load the ftrace_op */ + PPC_LL r12, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) + + /* Load direct_call from the ftrace_op */ + PPC_LL r12, FTRACE_OPS_DIRECT_CALL(r12) + PPC_LCMPI r12, 0 + .if \allregs == 1 + bne .Lftrace_direct_call_regs + .else + bne .Lftrace_direct_call + .endif +#endif + + /* Save the previous LR in pt_regs->link */ + PPC_STL r0, _LINK(r1) + /* Also save it in A's stack frame */ + PPC_STL r0, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE+LRSAVE(r1) + /* Save all gprs to pt_regs */ SAVE_GPR(0, r1) SAVE_GPRS(3, 10, r1) @@ -54,7 +78,7 @@ .if \allregs == 1 SAVE_GPR(2, r1) - SAVE_GPRS(11, 31, r1) + SAVE_GPRS(13, 31, r1) .else #if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) SAVE_GPR(14, r1) @@ -67,20 +91,15 @@ .if \allregs == 1 /* Load special regs for save below */ + mfcr r7 mfmsr r8 mfctr r9 mfxer r10 - mfcr r11 .else /* Clear MSR to flag as ftrace_caller versus frace_regs_caller */ li r8, 0 .endif - /* Get the _mcount() call site out of LR */ - mflr r7 - /* Save the read LR in pt_regs->link */ - PPC_STL r0, _LINK(r1) - #ifdef CONFIG_PPC64 /* Save callee's TOC in the ABI compliant location */ std r2, STK_GOT(r1) @@ -88,8 +107,8 @@ #endif #ifdef CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS - /* r7 points to the instruction following the call to ftrace */ - PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r7) + /* r11 points to the instruction following the call to ftrace */ + PPC_LL r5, -(MCOUNT_INSN_SIZE*2 + SZL)(r11) PPC_LL r12, FTRACE_OPS_FUNC(r5) mtctr r12 #else /* !CONFIG_DYNAMIC_FTRACE_WITH_CALL_OPS */ @@ -105,45 +124,51 @@ /* Save special regs */ PPC_STL r8, _MSR(r1) .if \allregs == 1 + PPC_STL r7, _CCR(r1) PPC_STL r9, _CTR(r1) PPC_STL r10, _XER(r1) - PPC_STL r11, _CCR(r1) .endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Clear orig_gpr3 to later detect ftrace_direct call */ + li r7, 0 + PPC_STL r7, ORIG_GPR3(r1) +#endif + #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE /* Save our real return address in nvr for return */ .if \allregs == 0 SAVE_GPR(15, r1) .endif - mr r15, r7 + mr r15, r11 /* - * We want the ftrace location in the function, but our lr (in r7) + * We want the ftrace location in the function, but our lr (in r11) * points at the 'mtlr r0' instruction in the out of line stub. To * recover the ftrace location, we read the branch instruction in the * stub, and adjust our lr by the branch offset. * * See ftrace_init_ool_stub() for the profile sequence. */ - lwz r8, MCOUNT_INSN_SIZE(r7) + lwz r8, MCOUNT_INSN_SIZE(r11) slwi r8, r8, 6 srawi r8, r8, 6 - add r3, r7, r8 + add r3, r11, r8 /* * Override our nip to point past the branch in the original function. * This allows reliable stack trace and the ftrace stack tracer to work as-is. */ - addi r7, r3, MCOUNT_INSN_SIZE + addi r11, r3, MCOUNT_INSN_SIZE #else /* Calculate ip from nip-4 into r3 for call below */ - subi r3, r7, MCOUNT_INSN_SIZE + subi r3, r11, MCOUNT_INSN_SIZE #endif /* Save NIP as pt_regs->nip */ - PPC_STL r7, _NIP(r1) + PPC_STL r11, _NIP(r1) /* Also save it in B's stackframe header for proper unwind */ - PPC_STL r7, LRSAVE+SWITCH_FRAME_SIZE(r1) + PPC_STL r11, LRSAVE+SWITCH_FRAME_SIZE(r1) #if defined(CONFIG_LIVEPATCH_64) || defined(CONFIG_PPC_FTRACE_OUT_OF_LINE) - mr r14, r7 /* remember old NIP */ + mr r14, r11 /* remember old NIP */ #endif /* Put the original return address in r4 as parent_ip */ @@ -154,14 +179,32 @@ .endm .macro ftrace_regs_exit allregs +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + /* Check orig_gpr3 to detect ftrace_direct call */ + PPC_LL r3, ORIG_GPR3(r1) + PPC_LCMPI cr1, r3, 0 + mtctr r3 +#endif + + /* Restore possibly modified LR */ + PPC_LL r0, _LINK(r1) + #ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE /* Load ctr with the possibly modified NIP */ PPC_LL r3, _NIP(r1) - mtctr r3 - #ifdef CONFIG_LIVEPATCH_64 cmpd r14, r3 /* has NIP been altered? */ #endif + +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + beq cr1,2f + mtlr r3 + b 3f +#endif +2: mtctr r3 + mtlr r0 +3: + #else /* !CONFIG_PPC_FTRACE_OUT_OF_LINE */ /* Load LR with the possibly modified NIP */ PPC_LL r3, _NIP(r1) @@ -185,12 +228,6 @@ #endif .endif - /* Restore possibly modified LR */ - PPC_LL r0, _LINK(r1) -#ifndef CONFIG_PPC_FTRACE_OUT_OF_LINE - mtlr r0 -#endif - #ifdef CONFIG_PPC64 /* Restore callee's TOC */ ld r2, STK_GOT(r1) @@ -203,8 +240,12 @@ /* Based on the cmpd above, if the NIP was altered handle livepatch */ bne- livepatch_handler #endif + /* jump after _mcount site */ #ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + bnectr cr1 +#endif /* * Return with blr to keep the link stack balanced. The function profiling sequence * uses 'mtlr r0' to restore LR. @@ -260,6 +301,21 @@ ftrace_no_trace: #endif #endif +#ifdef CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS +.Lftrace_direct_call_regs: + mtctr r12 + REST_GPRS(11, 12, r1) + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +.Lftrace_direct_call: + mtctr r12 + addi r1, r1, SWITCH_FRAME_SIZE+STACK_FRAME_MIN_SIZE + bctr +SYM_FUNC_START(ftrace_stub_direct_tramp) + blr +SYM_FUNC_END(ftrace_stub_direct_tramp) +#endif + #ifdef CONFIG_LIVEPATCH_64 /* * This function runs in the mcount context, between two functions. As From 71db948b9d2744e92124720f682ed2c26f0de75b Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:49 +0530 Subject: [PATCH 23/69] samples/ftrace: Add support for ftrace direct samples on powerpc Add powerpc 32-bit and 64-bit samples for ftrace direct. This serves to show the sample instruction sequence to be used by ftrace direct calls to adhere to the ftrace ABI. On 64-bit powerpc, TOC setup requires some additional work. Signed-off-by: Naveen N Rao Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-17-hbathini@linux.ibm.com --- arch/powerpc/Kconfig | 2 + samples/ftrace/ftrace-direct-modify.c | 85 +++++++++++++++- samples/ftrace/ftrace-direct-multi-modify.c | 101 +++++++++++++++++++- samples/ftrace/ftrace-direct-multi.c | 79 ++++++++++++++- samples/ftrace/ftrace-direct-too.c | 83 +++++++++++++++- samples/ftrace/ftrace-direct.c | 69 ++++++++++++- 6 files changed, 414 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 73e9e42b2e756..a9dd4c39ec009 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -275,6 +275,8 @@ config PPC select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_RELIABLE_STACKTRACE select HAVE_RSEQ + select HAVE_SAMPLE_FTRACE_DIRECT if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS + select HAVE_SAMPLE_FTRACE_DIRECT_MULTI if HAVE_DYNAMIC_FTRACE_WITH_DIRECT_CALLS select HAVE_SETUP_PER_CPU_AREA if PPC64 select HAVE_SOFTIRQ_ON_OWN_STACK select HAVE_STACKPROTECTOR if PPC32 && $(cc-option,$(m32-flag) -mstack-protector-guard=tls -mstack-protector-guard-reg=r2 -mstack-protector-guard-offset=0) diff --git a/samples/ftrace/ftrace-direct-modify.c b/samples/ftrace/ftrace-direct-modify.c index 81220390851a3..cfea7a38befb0 100644 --- a/samples/ftrace/ftrace-direct-modify.c +++ b/samples/ftrace/ftrace-direct-modify.c @@ -2,7 +2,7 @@ #include #include #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -199,6 +199,89 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE +" bl my_direct_func1\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE +" bl my_direct_func2\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static unsigned long my_tramp = (unsigned long)my_tramp1; diff --git a/samples/ftrace/ftrace-direct-multi-modify.c b/samples/ftrace/ftrace-direct-multi-modify.c index f943e40d57fd3..8f7986d698d87 100644 --- a/samples/ftrace/ftrace-direct-multi-modify.c +++ b/samples/ftrace/ftrace-direct-multi-modify.c @@ -2,7 +2,7 @@ #include #include #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -225,6 +225,105 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#define PPC_FTRACE_RECOVER_IP \ +" lwz 8, 4(3)\n" \ +" li 9, 6\n" \ +" slw 8, 8, 9\n" \ +" sraw 8, 8, 9\n" \ +" add 3, 3, 8\n" \ +" addi 3, 3, 4\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#define PPC_FTRACE_RECOVER_IP "" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp1, @function\n" +" .globl my_tramp1\n" +" my_tramp1:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func1\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET +" .size my_tramp1, .-my_tramp1\n" + +" .type my_tramp2, @function\n" +" .globl my_tramp2\n" +" my_tramp2:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func2\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC + " .size my_tramp2, .-my_tramp2\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static unsigned long my_tramp = (unsigned long)my_tramp1; static unsigned long tramps[2] = { (unsigned long)my_tramp1, diff --git a/samples/ftrace/ftrace-direct-multi.c b/samples/ftrace/ftrace-direct-multi.c index aed6df2927ce1..db326c81a27dd 100644 --- a/samples/ftrace/ftrace-direct-multi.c +++ b/samples/ftrace/ftrace-direct-multi.c @@ -4,7 +4,7 @@ #include /* for handle_mm_fault() */ #include #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -141,6 +141,83 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#define PPC_FTRACE_RECOVER_IP \ +" lwz 8, 4(3)\n" \ +" li 9, 6\n" \ +" slw 8, 8, 9\n" \ +" sraw 8, 8, 9\n" \ +" add 3, 3, 8\n" \ +" addi 3, 3, 4\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#define PPC_FTRACE_RECOVER_IP "" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mr 3, 0\n" + PPC_FTRACE_RECOVER_IP +" bl my_direct_func\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static int __init ftrace_direct_multi_init(void) diff --git a/samples/ftrace/ftrace-direct-too.c b/samples/ftrace/ftrace-direct-too.c index 6ff546a5d7eb0..3d0fa260332d4 100644 --- a/samples/ftrace/ftrace-direct-too.c +++ b/samples/ftrace/ftrace-direct-too.c @@ -3,7 +3,7 @@ #include /* for handle_mm_fault() */ #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -153,6 +153,87 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 64 +#define STACK_FRAME_ARG1 32 +#define STACK_FRAME_ARG2 40 +#define STACK_FRAME_ARG3 48 +#define STACK_FRAME_ARG4 56 +#else +#define STACK_FRAME_SIZE 32 +#define STACK_FRAME_ARG1 16 +#define STACK_FRAME_ARG2 20 +#define STACK_FRAME_ARG3 24 +#define STACK_FRAME_ARG4 28 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n" + PPC_STL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n" + PPC_STL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n" + PPC_STL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n" +" bl my_direct_func\n" + PPC_LL" 6, "__stringify(STACK_FRAME_ARG4)"(1)\n" + PPC_LL" 5, "__stringify(STACK_FRAME_ARG3)"(1)\n" + PPC_LL" 4, "__stringify(STACK_FRAME_ARG2)"(1)\n" + PPC_LL" 3, "__stringify(STACK_FRAME_ARG1)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static int __init ftrace_direct_init(void) diff --git a/samples/ftrace/ftrace-direct.c b/samples/ftrace/ftrace-direct.c index ef0945670e1eb..956834b0d19ac 100644 --- a/samples/ftrace/ftrace-direct.c +++ b/samples/ftrace/ftrace-direct.c @@ -3,7 +3,7 @@ #include /* for wake_up_process() */ #include -#ifndef CONFIG_ARM64 +#if !defined(CONFIG_ARM64) && !defined(CONFIG_PPC32) #include #endif @@ -134,6 +134,73 @@ asm ( #endif /* CONFIG_LOONGARCH */ +#ifdef CONFIG_PPC +#include + +#ifdef CONFIG_PPC64 +#define STACK_FRAME_SIZE 48 +#else +#define STACK_FRAME_SIZE 24 +#endif + +#if defined(CONFIG_PPC64_ELF_ABI_V2) && !defined(CONFIG_PPC_KERNEL_PCREL) +#define PPC64_TOC_SAVE_AND_UPDATE \ +" std 2, 24(1)\n" \ +" bcl 20, 31, 1f\n" \ +" 1: mflr 12\n" \ +" ld 2, (99f - 1b)(12)\n" +#define PPC64_TOC_RESTORE \ +" ld 2, 24(1)\n" +#define PPC64_TOC \ +" 99: .quad .TOC.@tocbase\n" +#else +#define PPC64_TOC_SAVE_AND_UPDATE "" +#define PPC64_TOC_RESTORE "" +#define PPC64_TOC "" +#endif + +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtlr 0\n" +#define PPC_FTRACE_RET \ +" blr\n" +#else +#define PPC_FTRACE_RESTORE_LR \ + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" \ +" mtctr 0\n" +#define PPC_FTRACE_RET \ +" mtlr 0\n" \ +" bctr\n" +#endif + +asm ( +" .pushsection .text, \"ax\", @progbits\n" +" .type my_tramp, @function\n" +" .globl my_tramp\n" +" my_tramp:\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" mflr 0\n" + PPC_STL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_STLU" 1, -"__stringify(STACK_FRAME_SIZE)"(1)\n" + PPC64_TOC_SAVE_AND_UPDATE + PPC_STL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" +" bl my_direct_func\n" + PPC_LL" 3, "__stringify(STACK_FRAME_MIN_SIZE)"(1)\n" + PPC64_TOC_RESTORE +" addi 1, 1, "__stringify(STACK_FRAME_SIZE)"\n" + PPC_FTRACE_RESTORE_LR +" addi 1, 1, "__stringify(STACK_FRAME_MIN_SIZE)"\n" + PPC_LL" 0, "__stringify(PPC_LR_STKOFF)"(1)\n" + PPC_FTRACE_RET + PPC64_TOC +" .size my_tramp, .-my_tramp\n" +" .popsection\n" +); + +#endif /* CONFIG_PPC */ + static struct ftrace_ops direct; static int __init ftrace_direct_init(void) From d243b62b7bd3d5314382d3b54e4992226245e936 Mon Sep 17 00:00:00 2001 From: Naveen N Rao Date: Wed, 30 Oct 2024 12:38:50 +0530 Subject: [PATCH 24/69] powerpc64/bpf: Add support for bpf trampolines Add support for bpf_arch_text_poke() and arch_prepare_bpf_trampoline() for 64-bit powerpc. While the code is generic, BPF trampolines are only enabled on 64-bit powerpc. 32-bit powerpc will need testing and some updates. BPF Trampolines adhere to the existing ftrace ABI utilizing a two-instruction profiling sequence, as well as the newer ABI utilizing a three-instruction profiling sequence enabling return with a 'blr'. The trampoline code itself closely follows x86 implementation. BPF prog JIT is extended to mimic 64-bit powerpc approach for ftrace having a single nop at function entry, followed by the function profiling sequence out-of-line and a separate long branch stub for calls to trampolines that are out of range. A dummy_tramp is provided to simplify synchronization similar to arm64. When attaching a bpf trampoline to a bpf prog, we can patch up to three things: - the nop at bpf prog entry to go to the out-of-line stub - the instruction in the out-of-line stub to either call the bpf trampoline directly, or to branch to the long_branch stub. - the trampoline address before the long_branch stub. We do not need any synchronization here since we always have a valid branch target regardless of the order in which the above stores are seen. dummy_tramp ensures that the long_branch stub goes to a valid destination on other cpus, even when the branch to the long_branch stub is seen before the updated trampoline address. However, when detaching a bpf trampoline from a bpf prog, or if changing the bpf trampoline address, we need synchronization to ensure that other cpus can no longer branch into the older trampoline so that it can be safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus make forward progress, but we still need to ensure that other cpus execute isync (or some CSI) so that they don't go back into the trampoline again. While here, update the stale comment that describes the redzone usage in ppc64 BPF JIT. Signed-off-by: Naveen N Rao Signed-off-by: Hari Bathini Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030070850.1361304-18-hbathini@linux.ibm.com --- arch/powerpc/include/asm/ppc-opcode.h | 14 + arch/powerpc/net/bpf_jit.h | 17 + arch/powerpc/net/bpf_jit_comp.c | 847 +++++++++++++++++++++++++- arch/powerpc/net/bpf_jit_comp32.c | 7 +- arch/powerpc/net/bpf_jit_comp64.c | 11 +- 5 files changed, 891 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index b98a9e982c03b..4312bcb913a42 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -587,12 +587,26 @@ #define PPC_RAW_MTSPR(spr, d) (0x7c0003a6 | ___PPC_RS(d) | __PPC_SPR(spr)) #define PPC_RAW_EIEIO() (0x7c0006ac) +/* bcl 20,31,$+4 */ +#define PPC_RAW_BCL4() (0x429f0005) #define PPC_RAW_BRANCH(offset) (0x48000000 | PPC_LI(offset)) #define PPC_RAW_BL(offset) (0x48000001 | PPC_LI(offset)) #define PPC_RAW_TW(t0, a, b) (0x7c000008 | ___PPC_RS(t0) | ___PPC_RA(a) | ___PPC_RB(b)) #define PPC_RAW_TRAP() PPC_RAW_TW(31, 0, 0) #define PPC_RAW_SETB(t, bfa) (0x7c000100 | ___PPC_RT(t) | ___PPC_RA((bfa) << 2)) +#ifdef CONFIG_PPC32 +#define PPC_RAW_STL PPC_RAW_STW +#define PPC_RAW_STLU PPC_RAW_STWU +#define PPC_RAW_LL PPC_RAW_LWZ +#define PPC_RAW_CMPLI PPC_RAW_CMPWI +#else +#define PPC_RAW_STL PPC_RAW_STD +#define PPC_RAW_STLU PPC_RAW_STDU +#define PPC_RAW_LL PPC_RAW_LD +#define PPC_RAW_CMPLI PPC_RAW_CMPDI +#endif + /* Deal with instructions that older assemblers aren't aware of */ #define PPC_BCCTR_FLUSH stringify_in_c(.long PPC_INST_BCCTR_FLUSH) #define PPC_CP_ABORT stringify_in_c(.long PPC_RAW_CP_ABORT) diff --git a/arch/powerpc/net/bpf_jit.h b/arch/powerpc/net/bpf_jit.h index cdea5dccaefe7..6beacaec63d30 100644 --- a/arch/powerpc/net/bpf_jit.h +++ b/arch/powerpc/net/bpf_jit.h @@ -12,6 +12,7 @@ #include #include +#include #ifdef CONFIG_PPC64_ELF_ABI_V1 #define FUNCTION_DESCR_SIZE 24 @@ -21,6 +22,9 @@ #define CTX_NIA(ctx) ((unsigned long)ctx->idx * 4) +#define SZL sizeof(unsigned long) +#define BPF_INSN_SAFETY 64 + #define PLANT_INSTR(d, idx, instr) \ do { if (d) { (d)[idx] = instr; } idx++; } while (0) #define EMIT(instr) PLANT_INSTR(image, ctx->idx, instr) @@ -81,6 +85,18 @@ EMIT(PPC_RAW_ORI(d, d, (uintptr_t)(i) & \ 0xffff)); \ } } while (0) +#define PPC_LI_ADDR PPC_LI64 + +#ifndef CONFIG_PPC_KERNEL_PCREL +#define PPC64_LOAD_PACA() \ + EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))) +#else +#define PPC64_LOAD_PACA() do {} while (0) +#endif +#else +#define PPC_LI64(d, i) BUILD_BUG() +#define PPC_LI_ADDR PPC_LI32 +#define PPC64_LOAD_PACA() BUILD_BUG() #endif /* @@ -165,6 +181,7 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code u32 *addrs, int pass, bool extra_pass); void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx); void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx); +void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx); void bpf_jit_realloc_regs(struct codegen_context *ctx); int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 2a36cc2e7e9e2..28e2fd8b7900a 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -22,11 +22,81 @@ #include "bpf_jit.h" +/* These offsets are from bpf prog end and stay the same across progs */ +static int bpf_jit_ool_stub, bpf_jit_long_branch_stub; + static void bpf_jit_fill_ill_insns(void *area, unsigned int size) { memset32(area, BREAKPOINT_INSTRUCTION, size / 4); } +void dummy_tramp(void); + +asm ( +" .pushsection .text, \"ax\", @progbits ;" +" .global dummy_tramp ;" +" .type dummy_tramp, @function ;" +"dummy_tramp: ;" +#ifdef CONFIG_PPC_FTRACE_OUT_OF_LINE +" blr ;" +#else +/* LR is always in r11, so we don't need a 'mflr r11' here */ +" mtctr 11 ;" +" mtlr 0 ;" +" bctr ;" +#endif +" .size dummy_tramp, .-dummy_tramp ;" +" .popsection ;" +); + +void bpf_jit_build_fentry_stubs(u32 *image, struct codegen_context *ctx) +{ + int ool_stub_idx, long_branch_stub_idx; + + /* + * Out-of-line stub: + * mflr r0 + * [b|bl] tramp + * mtlr r0 // only with CONFIG_PPC_FTRACE_OUT_OF_LINE + * b bpf_func + 4 + */ + ool_stub_idx = ctx->idx; + EMIT(PPC_RAW_MFLR(_R0)); + EMIT(PPC_RAW_NOP()); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + EMIT(PPC_RAW_MTLR(_R0)); + WARN_ON_ONCE(!is_offset_in_branch_range(4 - (long)ctx->idx * 4)); + EMIT(PPC_RAW_BRANCH(4 - (long)ctx->idx * 4)); + + /* + * Long branch stub: + * .long + * mflr r11 + * bcl 20,31,$+4 + * mflr r12 + * ld r12, -8-SZL(r12) + * mtctr r12 + * mtlr r11 // needed to retain ftrace ABI + * bctr + */ + if (image) + *((unsigned long *)&image[ctx->idx]) = (unsigned long)dummy_tramp; + ctx->idx += SZL / 4; + long_branch_stub_idx = ctx->idx; + EMIT(PPC_RAW_MFLR(_R11)); + EMIT(PPC_RAW_BCL4()); + EMIT(PPC_RAW_MFLR(_R12)); + EMIT(PPC_RAW_LL(_R12, _R12, -8-SZL)); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_MTLR(_R11)); + EMIT(PPC_RAW_BCTR()); + + if (!bpf_jit_ool_stub) { + bpf_jit_ool_stub = (ctx->idx - ool_stub_idx) * 4; + bpf_jit_long_branch_stub = (ctx->idx - long_branch_stub_idx) * 4; + } +} + int bpf_jit_emit_exit_insn(u32 *image, struct codegen_context *ctx, int tmp_reg, long exit_addr) { if (!exit_addr || is_offset_in_branch_range(exit_addr - (ctx->idx * 4))) { @@ -222,7 +292,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp->bpf_func = (void *)fimage; fp->jited = 1; - fp->jited_len = proglen + FUNCTION_DESCR_SIZE; + fp->jited_len = cgctx.idx * 4 + FUNCTION_DESCR_SIZE; if (!fp->is_func || extra_pass) { if (bpf_jit_binary_pack_finalize(fhdr, hdr)) { @@ -369,3 +439,778 @@ bool bpf_jit_supports_far_kfunc_call(void) { return IS_ENABLED(CONFIG_PPC64); } + +void *arch_alloc_bpf_trampoline(unsigned int size) +{ + return bpf_prog_pack_alloc(size, bpf_jit_fill_ill_insns); +} + +void arch_free_bpf_trampoline(void *image, unsigned int size) +{ + bpf_prog_pack_free(image, size); +} + +int arch_protect_bpf_trampoline(void *image, unsigned int size) +{ + return 0; +} + +static int invoke_bpf_prog(u32 *image, u32 *ro_image, struct codegen_context *ctx, + struct bpf_tramp_link *l, int regs_off, int retval_off, + int run_ctx_off, bool save_ret) +{ + struct bpf_prog *p = l->link.prog; + ppc_inst_t branch_insn; + u32 jmp_idx; + int ret = 0; + + /* Save cookie */ + if (IS_ENABLED(CONFIG_PPC64)) { + PPC_LI64(_R3, l->cookie); + EMIT(PPC_RAW_STD(_R3, _R1, run_ctx_off + offsetof(struct bpf_tramp_run_ctx, + bpf_cookie))); + } else { + PPC_LI32(_R3, l->cookie >> 32); + PPC_LI32(_R4, l->cookie); + EMIT(PPC_RAW_STW(_R3, _R1, + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie))); + EMIT(PPC_RAW_STW(_R4, _R1, + run_ctx_off + offsetof(struct bpf_tramp_run_ctx, bpf_cookie) + 4)); + } + + /* __bpf_prog_enter(p, &bpf_tramp_run_ctx) */ + PPC_LI_ADDR(_R3, p); + EMIT(PPC_RAW_MR(_R25, _R3)); + EMIT(PPC_RAW_ADDI(_R4, _R1, run_ctx_off)); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)bpf_trampoline_enter(p)); + if (ret) + return ret; + + /* Remember prog start time returned by __bpf_prog_enter */ + EMIT(PPC_RAW_MR(_R26, _R3)); + + /* + * if (__bpf_prog_enter(p) == 0) + * goto skip_exec_of_prog; + * + * Emit a nop to be later patched with conditional branch, once offset is known + */ + EMIT(PPC_RAW_CMPLI(_R3, 0)); + jmp_idx = ctx->idx; + EMIT(PPC_RAW_NOP()); + + /* p->bpf_func(ctx) */ + EMIT(PPC_RAW_ADDI(_R3, _R1, regs_off)); + if (!p->jited) + PPC_LI_ADDR(_R4, (unsigned long)p->insnsi); + if (!create_branch(&branch_insn, (u32 *)&ro_image[ctx->idx], (unsigned long)p->bpf_func, + BRANCH_SET_LINK)) { + if (image) + image[ctx->idx] = ppc_inst_val(branch_insn); + ctx->idx++; + } else { + EMIT(PPC_RAW_LL(_R12, _R25, offsetof(struct bpf_prog, bpf_func))); + EMIT(PPC_RAW_MTCTR(_R12)); + EMIT(PPC_RAW_BCTRL()); + } + + if (save_ret) + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + + /* Fix up branch */ + if (image) { + if (create_cond_branch(&branch_insn, &image[jmp_idx], + (unsigned long)&image[ctx->idx], COND_EQ << 16)) + return -EINVAL; + image[jmp_idx] = ppc_inst_val(branch_insn); + } + + /* __bpf_prog_exit(p, start_time, &bpf_tramp_run_ctx) */ + EMIT(PPC_RAW_MR(_R3, _R25)); + EMIT(PPC_RAW_MR(_R4, _R26)); + EMIT(PPC_RAW_ADDI(_R5, _R1, run_ctx_off)); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)bpf_trampoline_exit(p)); + + return ret; +} + +static int invoke_bpf_mod_ret(u32 *image, u32 *ro_image, struct codegen_context *ctx, + struct bpf_tramp_links *tl, int regs_off, int retval_off, + int run_ctx_off, u32 *branches) +{ + int i; + + /* + * The first fmod_ret program will receive a garbage return value. + * Set this to 0 to avoid confusing the program. + */ + EMIT(PPC_RAW_LI(_R3, 0)); + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + for (i = 0; i < tl->nr_links; i++) { + if (invoke_bpf_prog(image, ro_image, ctx, tl->links[i], regs_off, retval_off, + run_ctx_off, true)) + return -EINVAL; + + /* + * mod_ret prog stored return value after prog ctx. Emit: + * if (*(u64 *)(ret_val) != 0) + * goto do_fexit; + */ + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); + EMIT(PPC_RAW_CMPLI(_R3, 0)); + + /* + * Save the location of the branch and generate a nop, which is + * replaced with a conditional jump once do_fexit (i.e. the + * start of the fexit invocation) is finalized. + */ + branches[i] = ctx->idx; + EMIT(PPC_RAW_NOP()); + } + + return 0; +} + +static void bpf_trampoline_setup_tail_call_cnt(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int r4_off) +{ + if (IS_ENABLED(CONFIG_PPC64)) { + /* See bpf_jit_stack_tailcallcnt() */ + int tailcallcnt_offset = 6 * 8; + + EMIT(PPC_RAW_LL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, -tailcallcnt_offset)); + } else { + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ + EMIT(PPC_RAW_LL(_R4, _R1, r4_off)); + } +} + +static void bpf_trampoline_restore_tail_call_cnt(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int r4_off) +{ + if (IS_ENABLED(CONFIG_PPC64)) { + /* See bpf_jit_stack_tailcallcnt() */ + int tailcallcnt_offset = 6 * 8; + + EMIT(PPC_RAW_LL(_R3, _R1, -tailcallcnt_offset)); + EMIT(PPC_RAW_STL(_R3, _R1, func_frame_offset - tailcallcnt_offset)); + } else { + /* See bpf_jit_stack_offsetof() and BPF_PPC_TC */ + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); + } +} + +static void bpf_trampoline_save_args(u32 *image, struct codegen_context *ctx, int func_frame_offset, + int nr_regs, int regs_off) +{ + int param_save_area_offset; + + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ + + for (int i = 0; i < nr_regs; i++) { + if (i < 8) { + EMIT(PPC_RAW_STL(_R3 + i, _R1, regs_off + i * SZL)); + } else { + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); + EMIT(PPC_RAW_STL(_R3, _R1, regs_off + i * SZL)); + } + } +} + +/* Used when restoring just the register parameters when returning back */ +static void bpf_trampoline_restore_args_regs(u32 *image, struct codegen_context *ctx, + int nr_regs, int regs_off) +{ + for (int i = 0; i < nr_regs && i < 8; i++) + EMIT(PPC_RAW_LL(_R3 + i, _R1, regs_off + i * SZL)); +} + +/* Used when we call into the traced function. Replicate parameter save area */ +static void bpf_trampoline_restore_args_stack(u32 *image, struct codegen_context *ctx, + int func_frame_offset, int nr_regs, int regs_off) +{ + int param_save_area_offset; + + param_save_area_offset = func_frame_offset; /* the two frames we alloted */ + param_save_area_offset += STACK_FRAME_MIN_SIZE; /* param save area is past frame header */ + + for (int i = 8; i < nr_regs; i++) { + EMIT(PPC_RAW_LL(_R3, _R1, param_save_area_offset + i * SZL)); + EMIT(PPC_RAW_STL(_R3, _R1, STACK_FRAME_MIN_SIZE + i * SZL)); + } + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); +} + +static int __arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *rw_image, + void *rw_image_end, void *ro_image, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + int regs_off, nregs_off, ip_off, run_ctx_off, retval_off, nvr_off, alt_lr_off, r4_off = 0; + int i, ret, nr_regs, bpf_frame_size = 0, bpf_dummy_frame_size = 0, func_frame_offset; + struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; + struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; + struct codegen_context codegen_ctx, *ctx; + u32 *image = (u32 *)rw_image; + ppc_inst_t branch_insn; + u32 *branches = NULL; + bool save_ret; + + if (IS_ENABLED(CONFIG_PPC32)) + return -EOPNOTSUPP; + + nr_regs = m->nr_args; + /* Extra registers for struct arguments */ + for (i = 0; i < m->nr_args; i++) + if (m->arg_size[i] > SZL) + nr_regs += round_up(m->arg_size[i], SZL) / SZL - 1; + + if (nr_regs > MAX_BPF_FUNC_ARGS) + return -EOPNOTSUPP; + + ctx = &codegen_ctx; + memset(ctx, 0, sizeof(*ctx)); + + /* + * Generated stack layout: + * + * func prev back chain [ back chain ] + * [ ] + * bpf prog redzone/tailcallcnt [ ... ] 64 bytes (64-bit powerpc) + * [ ] -- + * LR save area [ r0 save (64-bit) ] | header + * [ r0 save (32-bit) ] | + * dummy frame for unwind [ back chain 1 ] -- + * [ padding ] align stack frame + * r4_off [ r4 (tailcallcnt) ] optional - 32-bit powerpc + * alt_lr_off [ real lr (ool stub)] optional - actual lr + * [ r26 ] + * nvr_off [ r25 ] nvr save area + * retval_off [ return value ] + * [ reg argN ] + * [ ... ] + * regs_off [ reg_arg1 ] prog ctx context + * nregs_off [ args count ] + * ip_off [ traced function ] + * [ ... ] + * run_ctx_off [ bpf_tramp_run_ctx ] + * [ reg argN ] + * [ ... ] + * param_save_area [ reg_arg1 ] min 8 doublewords, per ABI + * [ TOC save (64-bit) ] -- + * [ LR save (64-bit) ] | header + * [ LR save (32-bit) ] | + * bpf trampoline frame [ back chain 2 ] -- + * + */ + + /* Minimum stack frame header */ + bpf_frame_size = STACK_FRAME_MIN_SIZE; + + /* + * Room for parameter save area. + * + * As per the ABI, this is required if we call into the traced + * function (BPF_TRAMP_F_CALL_ORIG): + * - if the function takes more than 8 arguments for the rest to spill onto the stack + * - or, if the function has variadic arguments + * - or, if this functions's prototype was not available to the caller + * + * Reserve space for at least 8 registers for now. This can be optimized later. + */ + bpf_frame_size += (nr_regs > 8 ? nr_regs : 8) * SZL; + + /* Room for struct bpf_tramp_run_ctx */ + run_ctx_off = bpf_frame_size; + bpf_frame_size += round_up(sizeof(struct bpf_tramp_run_ctx), SZL); + + /* Room for IP address argument */ + ip_off = bpf_frame_size; + if (flags & BPF_TRAMP_F_IP_ARG) + bpf_frame_size += SZL; + + /* Room for args count */ + nregs_off = bpf_frame_size; + bpf_frame_size += SZL; + + /* Room for args */ + regs_off = bpf_frame_size; + bpf_frame_size += nr_regs * SZL; + + /* Room for return value of func_addr or fentry prog */ + retval_off = bpf_frame_size; + save_ret = flags & (BPF_TRAMP_F_CALL_ORIG | BPF_TRAMP_F_RET_FENTRY_RET); + if (save_ret) + bpf_frame_size += SZL; + + /* Room for nvr save area */ + nvr_off = bpf_frame_size; + bpf_frame_size += 2 * SZL; + + /* Optional save area for actual LR in case of ool ftrace */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + alt_lr_off = bpf_frame_size; + bpf_frame_size += SZL; + } + + if (IS_ENABLED(CONFIG_PPC32)) { + if (nr_regs < 2) { + r4_off = bpf_frame_size; + bpf_frame_size += SZL; + } else { + r4_off = regs_off + SZL; + } + } + + /* Padding to align stack frame, if any */ + bpf_frame_size = round_up(bpf_frame_size, SZL * 2); + + /* Dummy frame size for proper unwind - includes 64-bytes red zone for 64-bit powerpc */ + bpf_dummy_frame_size = STACK_FRAME_MIN_SIZE + 64; + + /* Offset to the traced function's stack frame */ + func_frame_offset = bpf_dummy_frame_size + bpf_frame_size; + + /* Create dummy frame for unwind, store original return value */ + EMIT(PPC_RAW_STL(_R0, _R1, PPC_LR_STKOFF)); + /* Protect red zone where tail call count goes */ + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_dummy_frame_size)); + + /* Create our stack frame */ + EMIT(PPC_RAW_STLU(_R1, _R1, -bpf_frame_size)); + + /* 64-bit: Save TOC and load kernel TOC */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) { + EMIT(PPC_RAW_STD(_R2, _R1, 24)); + PPC64_LOAD_PACA(); + } + + /* 32-bit: save tail call count in r4 */ + if (IS_ENABLED(CONFIG_PPC32) && nr_regs < 2) + EMIT(PPC_RAW_STL(_R4, _R1, r4_off)); + + bpf_trampoline_save_args(image, ctx, func_frame_offset, nr_regs, regs_off); + + /* Save our return address */ + EMIT(PPC_RAW_MFLR(_R3)); + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + EMIT(PPC_RAW_STL(_R3, _R1, alt_lr_off)); + else + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + + /* + * Save ip address of the traced function. + * We could recover this from LR, but we will need to address for OOL trampoline, + * and optional GEP area. + */ + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE) || flags & BPF_TRAMP_F_IP_ARG) { + EMIT(PPC_RAW_LWZ(_R4, _R3, 4)); + EMIT(PPC_RAW_SLWI(_R4, _R4, 6)); + EMIT(PPC_RAW_SRAWI(_R4, _R4, 6)); + EMIT(PPC_RAW_ADD(_R3, _R3, _R4)); + EMIT(PPC_RAW_ADDI(_R3, _R3, 4)); + } + + if (flags & BPF_TRAMP_F_IP_ARG) + EMIT(PPC_RAW_STL(_R3, _R1, ip_off)); + + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) + /* Fake our LR for unwind */ + EMIT(PPC_RAW_STL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + + /* Save function arg count -- see bpf_get_func_arg_cnt() */ + EMIT(PPC_RAW_LI(_R3, nr_regs)); + EMIT(PPC_RAW_STL(_R3, _R1, nregs_off)); + + /* Save nv regs */ + EMIT(PPC_RAW_STL(_R25, _R1, nvr_off)); + EMIT(PPC_RAW_STL(_R26, _R1, nvr_off + SZL)); + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + PPC_LI_ADDR(_R3, (unsigned long)im); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)__bpf_tramp_enter); + if (ret) + return ret; + } + + for (i = 0; i < fentry->nr_links; i++) + if (invoke_bpf_prog(image, ro_image, ctx, fentry->links[i], regs_off, retval_off, + run_ctx_off, flags & BPF_TRAMP_F_RET_FENTRY_RET)) + return -EINVAL; + + if (fmod_ret->nr_links) { + branches = kcalloc(fmod_ret->nr_links, sizeof(u32), GFP_KERNEL); + if (!branches) + return -ENOMEM; + + if (invoke_bpf_mod_ret(image, ro_image, ctx, fmod_ret, regs_off, retval_off, + run_ctx_off, branches)) { + ret = -EINVAL; + goto cleanup; + } + } + + /* Call the traced function */ + if (flags & BPF_TRAMP_F_CALL_ORIG) { + /* + * The address in LR save area points to the correct point in the original function + * with both PPC_FTRACE_OUT_OF_LINE as well as with traditional ftrace instruction + * sequence + */ + EMIT(PPC_RAW_LL(_R3, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTCTR(_R3)); + + /* Replicate tail_call_cnt before calling the original BPF prog */ + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + bpf_trampoline_setup_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + + /* Restore args */ + bpf_trampoline_restore_args_stack(image, ctx, func_frame_offset, nr_regs, regs_off); + + /* Restore TOC for 64-bit */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + EMIT(PPC_RAW_LD(_R2, _R1, 24)); + EMIT(PPC_RAW_BCTRL()); + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + PPC64_LOAD_PACA(); + + /* Store return value for bpf prog to access */ + EMIT(PPC_RAW_STL(_R3, _R1, retval_off)); + + /* Restore updated tail_call_cnt */ + if (flags & BPF_TRAMP_F_TAIL_CALL_CTX) + bpf_trampoline_restore_tail_call_cnt(image, ctx, func_frame_offset, r4_off); + + /* Reserve space to patch branch instruction to skip fexit progs */ + im->ip_after_call = &((u32 *)ro_image)[ctx->idx]; + EMIT(PPC_RAW_NOP()); + } + + /* Update branches saved in invoke_bpf_mod_ret with address of do_fexit */ + for (i = 0; i < fmod_ret->nr_links && image; i++) { + if (create_cond_branch(&branch_insn, &image[branches[i]], + (unsigned long)&image[ctx->idx], COND_NE << 16)) { + ret = -EINVAL; + goto cleanup; + } + + image[branches[i]] = ppc_inst_val(branch_insn); + } + + for (i = 0; i < fexit->nr_links; i++) + if (invoke_bpf_prog(image, ro_image, ctx, fexit->links[i], regs_off, retval_off, + run_ctx_off, false)) { + ret = -EINVAL; + goto cleanup; + } + + if (flags & BPF_TRAMP_F_CALL_ORIG) { + im->ip_epilogue = &((u32 *)ro_image)[ctx->idx]; + PPC_LI_ADDR(_R3, im); + ret = bpf_jit_emit_func_call_rel(image, ro_image, ctx, + (unsigned long)__bpf_tramp_exit); + if (ret) + goto cleanup; + } + + if (flags & BPF_TRAMP_F_RESTORE_REGS) + bpf_trampoline_restore_args_regs(image, ctx, nr_regs, regs_off); + + /* Restore return value of func_addr or fentry prog */ + if (save_ret) + EMIT(PPC_RAW_LL(_R3, _R1, retval_off)); + + /* Restore nv regs */ + EMIT(PPC_RAW_LL(_R26, _R1, nvr_off + SZL)); + EMIT(PPC_RAW_LL(_R25, _R1, nvr_off)); + + /* Epilogue */ + if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2) && !IS_ENABLED(CONFIG_PPC_KERNEL_PCREL)) + EMIT(PPC_RAW_LD(_R2, _R1, 24)); + if (flags & BPF_TRAMP_F_SKIP_FRAME) { + /* Skip the traced function and return to parent */ + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_BLR()); + } else { + if (IS_ENABLED(CONFIG_PPC_FTRACE_OUT_OF_LINE)) { + EMIT(PPC_RAW_LL(_R0, _R1, alt_lr_off)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_BLR()); + } else { + EMIT(PPC_RAW_LL(_R0, _R1, bpf_frame_size + PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTCTR(_R0)); + EMIT(PPC_RAW_ADDI(_R1, _R1, func_frame_offset)); + EMIT(PPC_RAW_LL(_R0, _R1, PPC_LR_STKOFF)); + EMIT(PPC_RAW_MTLR(_R0)); + EMIT(PPC_RAW_BCTR()); + } + } + + /* Make sure the trampoline generation logic doesn't overflow */ + if (image && WARN_ON_ONCE(&image[ctx->idx] > (u32 *)rw_image_end - BPF_INSN_SAFETY)) { + ret = -EFAULT; + goto cleanup; + } + ret = ctx->idx * 4 + BPF_INSN_SAFETY * 4; + +cleanup: + kfree(branches); + return ret; +} + +int arch_bpf_trampoline_size(const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, void *func_addr) +{ + struct bpf_tramp_image im; + void *image; + int ret; + + /* + * Allocate a temporary buffer for __arch_prepare_bpf_trampoline(). + * This will NOT cause fragmentation in direct map, as we do not + * call set_memory_*() on this buffer. + * + * We cannot use kvmalloc here, because we need image to be in + * module memory range. + */ + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(&im, image, image + PAGE_SIZE, image, + m, flags, tlinks, func_addr); + bpf_jit_free_exec(image); + + return ret; +} + +int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, + const struct btf_func_model *m, u32 flags, + struct bpf_tramp_links *tlinks, + void *func_addr) +{ + u32 size = image_end - image; + void *rw_image, *tmp; + int ret; + + /* + * rw_image doesn't need to be in module memory range, so we can + * use kvmalloc. + */ + rw_image = kvmalloc(size, GFP_KERNEL); + if (!rw_image) + return -ENOMEM; + + ret = __arch_prepare_bpf_trampoline(im, rw_image, rw_image + size, image, m, + flags, tlinks, func_addr); + if (ret < 0) + goto out; + + if (bpf_jit_enable > 1) + bpf_jit_dump(1, ret - BPF_INSN_SAFETY * 4, 1, rw_image); + + tmp = bpf_arch_text_copy(image, rw_image, size); + if (IS_ERR(tmp)) + ret = PTR_ERR(tmp); + +out: + kvfree(rw_image); + return ret; +} + +static int bpf_modify_inst(void *ip, ppc_inst_t old_inst, ppc_inst_t new_inst) +{ + ppc_inst_t org_inst; + + if (copy_inst_from_kernel_nofault(&org_inst, ip)) { + pr_err("0x%lx: fetching instruction failed\n", (unsigned long)ip); + return -EFAULT; + } + + if (!ppc_inst_equal(org_inst, old_inst)) { + pr_err("0x%lx: expected (%08lx) != found (%08lx)\n", + (unsigned long)ip, ppc_inst_as_ulong(old_inst), ppc_inst_as_ulong(org_inst)); + return -EINVAL; + } + + if (ppc_inst_equal(old_inst, new_inst)) + return 0; + + return patch_instruction(ip, new_inst); +} + +static void do_isync(void *info __maybe_unused) +{ + isync(); +} + +/* + * A 3-step process for bpf prog entry: + * 1. At bpf prog entry, a single nop/b: + * bpf_func: + * [nop|b] ool_stub + * 2. Out-of-line stub: + * ool_stub: + * mflr r0 + * [b|bl] / + * mtlr r0 // CONFIG_PPC_FTRACE_OUT_OF_LINE only + * b bpf_func + 4 + * 3. Long branch stub: + * long_branch_stub: + * .long / + * mflr r11 + * bcl 20,31,$+4 + * mflr r12 + * ld r12, -16(r12) + * mtctr r12 + * mtlr r11 // needed to retain ftrace ABI + * bctr + * + * dummy_tramp is used to reduce synchronization requirements. + * + * When attaching a bpf trampoline to a bpf prog, we do not need any + * synchronization here since we always have a valid branch target regardless + * of the order in which the above stores are seen. dummy_tramp ensures that + * the long_branch stub goes to a valid destination on other cpus, even when + * the branch to the long_branch stub is seen before the updated trampoline + * address. + * + * However, when detaching a bpf trampoline from a bpf prog, or if changing + * the bpf trampoline address, we need synchronization to ensure that other + * cpus can no longer branch into the older trampoline so that it can be + * safely freed. bpf_tramp_image_put() uses rcu_tasks to ensure all cpus + * make forward progress, but we still need to ensure that other cpus + * execute isync (or some CSI) so that they don't go back into the + * trampoline again. + */ +int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type poke_type, + void *old_addr, void *new_addr) +{ + unsigned long bpf_func, bpf_func_end, size, offset; + ppc_inst_t old_inst, new_inst; + int ret = 0, branch_flags; + char name[KSYM_NAME_LEN]; + + if (IS_ENABLED(CONFIG_PPC32)) + return -EOPNOTSUPP; + + bpf_func = (unsigned long)ip; + branch_flags = poke_type == BPF_MOD_CALL ? BRANCH_SET_LINK : 0; + + /* We currently only support poking bpf programs */ + if (!__bpf_address_lookup(bpf_func, &size, &offset, name)) { + pr_err("%s (0x%lx): kernel/modules are not supported\n", __func__, bpf_func); + return -EOPNOTSUPP; + } + + /* + * If we are not poking at bpf prog entry, then we are simply patching in/out + * an unconditional branch instruction at im->ip_after_call + */ + if (offset) { + if (poke_type != BPF_MOD_JUMP) { + pr_err("%s (0x%lx): calls are not supported in bpf prog body\n", __func__, + bpf_func); + return -EOPNOTSUPP; + } + old_inst = ppc_inst(PPC_RAW_NOP()); + if (old_addr) + if (create_branch(&old_inst, ip, (unsigned long)old_addr, 0)) + return -ERANGE; + new_inst = ppc_inst(PPC_RAW_NOP()); + if (new_addr) + if (create_branch(&new_inst, ip, (unsigned long)new_addr, 0)) + return -ERANGE; + mutex_lock(&text_mutex); + ret = bpf_modify_inst(ip, old_inst, new_inst); + mutex_unlock(&text_mutex); + + /* Make sure all cpus see the new instruction */ + smp_call_function(do_isync, NULL, 1); + return ret; + } + + bpf_func_end = bpf_func + size; + + /* Address of the jmp/call instruction in the out-of-line stub */ + ip = (void *)(bpf_func_end - bpf_jit_ool_stub + 4); + + if (!is_offset_in_branch_range((long)ip - 4 - bpf_func)) { + pr_err("%s (0x%lx): bpf prog too large, ool stub out of branch range\n", __func__, + bpf_func); + return -ERANGE; + } + + old_inst = ppc_inst(PPC_RAW_NOP()); + if (old_addr) { + if (is_offset_in_branch_range(ip - old_addr)) + create_branch(&old_inst, ip, (unsigned long)old_addr, branch_flags); + else + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, + branch_flags); + } + new_inst = ppc_inst(PPC_RAW_NOP()); + if (new_addr) { + if (is_offset_in_branch_range(ip - new_addr)) + create_branch(&new_inst, ip, (unsigned long)new_addr, branch_flags); + else + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_long_branch_stub, + branch_flags); + } + + mutex_lock(&text_mutex); + + /* + * 1. Update the address in the long branch stub: + * If new_addr is out of range, we will have to use the long branch stub, so patch new_addr + * here. Otherwise, revert to dummy_tramp, but only if we had patched old_addr here. + */ + if ((new_addr && !is_offset_in_branch_range(new_addr - ip)) || + (old_addr && !is_offset_in_branch_range(old_addr - ip))) + ret = patch_ulong((void *)(bpf_func_end - bpf_jit_long_branch_stub - SZL), + (new_addr && !is_offset_in_branch_range(new_addr - ip)) ? + (unsigned long)new_addr : (unsigned long)dummy_tramp); + if (ret) + goto out; + + /* 2. Update the branch/call in the out-of-line stub */ + ret = bpf_modify_inst(ip, old_inst, new_inst); + if (ret) + goto out; + + /* 3. Update instruction at bpf prog entry */ + ip = (void *)bpf_func; + if (!old_addr || !new_addr) { + if (!old_addr) { + old_inst = ppc_inst(PPC_RAW_NOP()); + create_branch(&new_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); + } else { + new_inst = ppc_inst(PPC_RAW_NOP()); + create_branch(&old_inst, ip, bpf_func_end - bpf_jit_ool_stub, 0); + } + ret = bpf_modify_inst(ip, old_inst, new_inst); + } + +out: + mutex_unlock(&text_mutex); + + /* + * Sync only if we are not attaching a trampoline to a bpf prog so the older + * trampoline can be freed safely. + */ + if (old_addr) + smp_call_function(do_isync, NULL, 1); + + return ret; +} diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index a0c4f1bde83e8..c4db278dae360 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -127,13 +127,16 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + /* Instruction for trampoline attach */ + EMIT(PPC_RAW_NOP()); + /* Initialize tail_call_cnt, to be skipped if we do tail calls. */ if (ctx->seen & SEEN_TAILCALL) EMIT(PPC_RAW_LI(_R4, 0)); else EMIT(PPC_RAW_NOP()); -#define BPF_TAILCALL_PROLOGUE_SIZE 4 +#define BPF_TAILCALL_PROLOGUE_SIZE 8 if (bpf_has_stack_frame(ctx)) EMIT(PPC_RAW_STWU(_R1, _R1, -BPF_PPC_STACKFRAME(ctx))); @@ -198,6 +201,8 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) bpf_jit_emit_common_epilogue(image, ctx); EMIT(PPC_RAW_BLR()); + + bpf_jit_build_fentry_stubs(image, ctx); } /* Relative offset needs to be calculated based on final image location */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index f3be024fc6854..233703b06d7c9 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -84,7 +84,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) } /* - * When not setting up our own stackframe, the redzone usage is: + * When not setting up our own stackframe, the redzone (288 bytes) usage is: * * [ prev sp ] <------------- * [ ... ] | @@ -92,7 +92,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ nv gpr save area ] 5*8 * [ tail_call_cnt ] 8 * [ local_tmp_var ] 16 - * [ unused red zone ] 208 bytes protected + * [ unused red zone ] 224 */ static int bpf_jit_stack_local(struct codegen_context *ctx) { @@ -126,6 +126,9 @@ void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; + /* Instruction for trampoline attach */ + EMIT(PPC_RAW_NOP()); + #ifndef CONFIG_PPC_KERNEL_PCREL if (IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) EMIT(PPC_RAW_LD(_R2, _R13, offsetof(struct paca_struct, kernel_toc))); @@ -200,6 +203,8 @@ void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) EMIT(PPC_RAW_MR(_R3, bpf_to_ppc(BPF_REG_0))); EMIT(PPC_RAW_BLR()); + + bpf_jit_build_fentry_stubs(image, ctx); } int bpf_jit_emit_func_call_rel(u32 *image, u32 *fimage, struct codegen_context *ctx, u64 func) @@ -303,7 +308,7 @@ static int bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 o */ int b2p_bpf_array = bpf_to_ppc(BPF_REG_2); int b2p_index = bpf_to_ppc(BPF_REG_3); - int bpf_tailcall_prologue_size = 8; + int bpf_tailcall_prologue_size = 12; if (!IS_ENABLED(CONFIG_PPC_KERNEL_PCREL) && IS_ENABLED(CONFIG_PPC64_ELF_ABI_V2)) bpf_tailcall_prologue_size += 4; /* skip past the toc load */ From d677ce521334d8f1f327cafc8b1b7854b0833158 Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Wed, 30 Oct 2024 11:41:37 -0700 Subject: [PATCH 25/69] powerpc/vdso: Drop -mstack-protector-guard flags in 32-bit files with clang Under certain conditions, the 64-bit '-mstack-protector-guard' flags may end up in the 32-bit vDSO flags, resulting in build failures due to the structure of clang's argument parsing of the stack protector options, which validates the arguments of the stack protector guard flags unconditionally in the frontend, choking on the 64-bit values when targeting 32-bit: clang: error: invalid value 'r13' in 'mstack-protector-guard-reg=', expected one of: r2 clang: error: invalid value 'r13' in 'mstack-protector-guard-reg=', expected one of: r2 make[3]: *** [arch/powerpc/kernel/vdso/Makefile:85: arch/powerpc/kernel/vdso/vgettimeofday-32.o] Error 1 make[3]: *** [arch/powerpc/kernel/vdso/Makefile:87: arch/powerpc/kernel/vdso/vgetrandom-32.o] Error 1 Remove these flags by adding them to the CC32FLAGSREMOVE variable, which already handles situations similar to this. Additionally, reformat and align a comment better for the expanding CONFIG_CC_IS_CLANG block. Cc: stable@vger.kernel.org # v6.1+ Signed-off-by: Nathan Chancellor Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241030-powerpc-vdso-drop-stackp-flags-clang-v1-1-d95e7376d29c@kernel.org --- arch/powerpc/kernel/vdso/Makefile | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index 31ca5a5470047..c568cad6a22e6 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -54,10 +54,14 @@ ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -W CC32FLAGS := -m32 CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc - # This flag is supported by clang for 64-bit but not 32-bit so it will cause - # an unused command line flag warning for this file. ifdef CONFIG_CC_IS_CLANG +# This flag is supported by clang for 64-bit but not 32-bit so it will cause +# an unused command line flag warning for this file. CC32FLAGSREMOVE += -fno-stack-clash-protection +# -mstack-protector-guard values from the 64-bit build are not valid for the +# 32-bit one. clang validates the values passed to these arguments during +# parsing, even when -fno-stack-protector is passed afterwards. +CC32FLAGSREMOVE += -mstack-protector-guard% endif LD32FLAGS := -Wl,-soname=linux-vdso32.so.1 AS32FLAGS := -D__VDSO32__ From 2866949ec889cf383c481119c617b9cead733070 Mon Sep 17 00:00:00 2001 From: Paulo Miguel Almeida Date: Sat, 19 Oct 2024 15:13:49 +1300 Subject: [PATCH 26/69] powerpc/ps3: replace open-coded sysfs_emit function sysfs_emit() helper function should be used when formatting the value to be returned to user space. This patch replaces open-coded sysfs_emit() in sysfs .show() callbacks Link: https://github.com/KSPP/linux/issues/105 Signed-off-by: Paulo Miguel Almeida Reviewed-by: Geert Uytterhoeven Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/ZxMV3YvSulJFZ8rk@mail.google.com --- arch/powerpc/platforms/ps3/system-bus.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index b9a7d9bae687e..afbaabf182d01 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -453,10 +453,9 @@ static ssize_t modalias_show(struct device *_dev, struct device_attribute *a, char *buf) { struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev); - int len = snprintf(buf, PAGE_SIZE, "ps3:%d:%d\n", dev->match_id, - dev->match_sub_id); - return (len >= PAGE_SIZE) ? (PAGE_SIZE - 1) : len; + return sysfs_emit(buf, "ps3:%d:%d\n", dev->match_id, + dev->match_sub_id); } static DEVICE_ATTR_RO(modalias); From f1c774ba91054a749573781f9e8fd652b9a1f633 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 12 Aug 2024 16:33:12 +1000 Subject: [PATCH 27/69] powerpc/modules: start/end_opd are only needed for ABI v1 The start_opd/end_opd members of struct mod_arch_specific are only needed for kernels built using ELF ABI v1. Guard them with an ifdef to save a little bit of space on ELF ABI v2 kernels. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240812063312.730496-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/module.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/module.h b/arch/powerpc/include/asm/module.h index 9ee70a4a0fde1..e1ee5026ac4af 100644 --- a/arch/powerpc/include/asm/module.h +++ b/arch/powerpc/include/asm/module.h @@ -35,9 +35,11 @@ struct mod_arch_specific { bool toc_fixed; /* Have we fixed up .TOC.? */ #endif +#ifdef CONFIG_PPC64_ELF_ABI_V1 /* For module function descriptor dereference */ unsigned long start_opd; unsigned long end_opd; +#endif #else /* powerpc64 */ /* Indices of PLT sections within module. */ unsigned int core_plt_section; From 19e0a70e6c3c1bf800b8ce9eb45864aa9e1e2781 Mon Sep 17 00:00:00 2001 From: Thorsten Blum Date: Sun, 27 Oct 2024 23:22:17 +0100 Subject: [PATCH 28/69] powerpc: Use str_enabled_disabled() helper function Remove hard-coded strings by using the str_enabled_disabled() helper function. Signed-off-by: Thorsten Blum Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241027222219.1173-2-thorsten.blum@linux.dev --- arch/powerpc/kernel/secure_boot.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/secure_boot.c b/arch/powerpc/kernel/secure_boot.c index 9e0efb657f393..3a28795b4ed82 100644 --- a/arch/powerpc/kernel/secure_boot.c +++ b/arch/powerpc/kernel/secure_boot.c @@ -5,6 +5,7 @@ */ #include #include +#include #include static struct device_node *get_ppc_fw_sb_node(void) @@ -38,7 +39,7 @@ bool is_ppc_secureboot_enabled(void) of_node_put(node); out: - pr_info("Secure boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Secure boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } @@ -62,7 +63,7 @@ bool is_ppc_trustedboot_enabled(void) of_node_put(node); out: - pr_info("Trusted boot mode %s\n", enabled ? "enabled" : "disabled"); + pr_info("Trusted boot mode %s\n", str_enabled_disabled(enabled)); return enabled; } From 96e266e3bcd6ed03f0be62c2fcf92bf1e3dc8a6a Mon Sep 17 00:00:00 2001 From: Amit Machhiwal Date: Mon, 28 Oct 2024 15:46:22 +0530 Subject: [PATCH 29/69] KVM: PPC: Book3S HV: Add Power11 capability support for Nested PAPR guests The Power11 architected and raw mode support in Linux was merged in commit c2ed087ed35c ("powerpc: Add Power11 architected and raw mode"), and the corresponding support in QEMU is pending in [1], which is currently in its V6. Currently, booting a KVM guest inside a pseries LPAR (Logical Partition) on a kernel without P11 support results the guest boot in a Power10 compatibility mode (i.e., with logical PVR of Power10). However, booting a KVM guest on a kernel with P11 support causes the following boot crash. On a Power11 LPAR, the Power Hypervisor (L0) returns a support for both Power10 and Power11 capabilities through H_GUEST_GET_CAPABILITIES hcall. However, KVM currently supports only Power10 capabilities, resulting in only Power10 capabilities being set as "nested capabilities" via an H_GUEST_SET_CAPABILITIES hcall. In the guest entry path, gs_msg_ops_kvmhv_nestedv2_config_fill_info() is called by kvmhv_nestedv2_flush_vcpu() to fill the GSB (Guest State Buffer) elements. The arch_compat is set to the logical PVR of Power11, followed by an H_GUEST_SET_STATE hcall. This hcall returns H_INVALID_ELEMENT_VALUE as a return code when setting a Power11 logical PVR, as only Power10 capabilities were communicated as supported between PHYP and KVM, utimately resulting in the KVM guest boot crash. KVM: unknown exit, hardware reason ffffffffffffffea NIP 000000007daf97e0 LR 000000007daf1aec CTR 000000007daf1ab4 XER 0000000020040000 CPU#0 MSR 8000000000103000 HID0 0000000000000000 HF 6c002000 iidx 3 didx 3 TB 00000000 00000000 DECR 0 GPR00 8000000000003000 000000007e580e20 000000007db26700 0000000000000000 GPR04 00000000041a0c80 000000007df7f000 0000000000200000 000000007df7f000 GPR08 000000007db6d5d8 000000007e65fa90 000000007db6d5d0 0000000000003000 GPR12 8000000000000001 0000000000000000 0000000000000000 0000000000000000 GPR16 0000000000000000 0000000000000000 0000000000000000 0000000000000000 GPR20 0000000000000000 0000000000000000 0000000000000000 000000007db21a30 GPR24 000000007db65000 0000000000000000 0000000000000000 0000000000000003 GPR28 000000007db6d5e0 000000007db22220 000000007daf27ac 000000007db75000 CR 20000404 [ E - - - - G - G ] RES 000@ffffffffffffffff SRR0 000000007daf97e0 SRR1 8000000000102000 PVR 0000000000820200 VRSAVE 0000000000000000 SPRG0 0000000000000000 SPRG1 000000000000ff20 SPRG2 0000000000000000 SPRG3 0000000000000000 SPRG4 0000000000000000 SPRG5 0000000000000000 SPRG6 0000000000000000 SPRG7 0000000000000000 CFAR 0000000000000000 LPCR 0000000000020400 PTCR 0000000000000000 DAR 0000000000000000 DSISR 0000000000000000 Fix this by adding the Power11 capability support and the required plumbing in place. Note: * Booting a Power11 KVM nested PAPR guest requires [1] in QEMU. [1] https://lore.kernel.org/all/20240731055022.696051-1-adityag@linux.ibm.com/ Signed-off-by: Amit Machhiwal Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241028101622.741573-1-amachhiw@linux.ibm.com --- arch/powerpc/include/asm/cputable.h | 11 ++++++----- arch/powerpc/include/asm/hvcall.h | 1 + arch/powerpc/kvm/book3s_hv.c | 7 +++++-- arch/powerpc/kvm/book3s_hv_nested.c | 2 ++ arch/powerpc/kvm/book3s_hv_nestedv2.c | 4 +++- 5 files changed, 17 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 201218faed614..29a529d2ab8b4 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -193,6 +193,7 @@ static inline void cpu_feature_keys_init(void) { } #define CPU_FTR_ARCH_31 LONG_ASM_CONST(0x0004000000000000) #define CPU_FTR_DAWR1 LONG_ASM_CONST(0x0008000000000000) #define CPU_FTR_DEXCR_NPHIE LONG_ASM_CONST(0x0010000000000000) +#define CPU_FTR_P11_PVR LONG_ASM_CONST(0x0020000000000000) #ifndef __ASSEMBLY__ @@ -454,7 +455,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTR_DAWR | CPU_FTR_DAWR1 | \ CPU_FTR_DEXCR_NPHIE) -#define CPU_FTRS_POWER11 CPU_FTRS_POWER10 +#define CPU_FTRS_POWER11 (CPU_FTRS_POWER10 | CPU_FTR_P11_PVR) #define CPU_FTRS_CELL (CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ @@ -475,7 +476,7 @@ static inline void cpu_feature_keys_init(void) { } (CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_VSX_COMP | CPU_FTRS_POWER9 | \ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \ - CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10) + CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10 | CPU_FTRS_POWER11) #else #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | \ @@ -483,7 +484,7 @@ static inline void cpu_feature_keys_init(void) { } CPU_FTRS_POWER8 | CPU_FTRS_CELL | CPU_FTRS_PA6T | \ CPU_FTR_VSX_COMP | CPU_FTR_ALTIVEC_COMP | CPU_FTRS_POWER9 | \ CPU_FTRS_POWER9_DD2_1 | CPU_FTRS_POWER9_DD2_2 | \ - CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10) + CPU_FTRS_POWER9_DD2_3 | CPU_FTRS_POWER10 | CPU_FTRS_POWER11) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif #else @@ -547,7 +548,7 @@ enum { (CPU_FTRS_POSSIBLE & ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & \ CPU_FTRS_POWER7 & CPU_FTRS_POWER8E & CPU_FTRS_POWER8 & \ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \ - CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE) + CPU_FTRS_POWER10 & CPU_FTRS_POWER11 & CPU_FTRS_DT_CPU_BASE) #else #define CPU_FTRS_ALWAYS \ (CPU_FTRS_PPC970 & CPU_FTRS_POWER5 & \ @@ -555,7 +556,7 @@ enum { CPU_FTRS_PA6T & CPU_FTRS_POWER8 & CPU_FTRS_POWER8E & \ ~CPU_FTR_HVMODE & ~CPU_FTR_DBELL & CPU_FTRS_POSSIBLE & \ CPU_FTRS_POWER9 & CPU_FTRS_POWER9_DD2_1 & CPU_FTRS_POWER9_DD2_2 & \ - CPU_FTRS_POWER10 & CPU_FTRS_DT_CPU_BASE) + CPU_FTRS_POWER10 & CPU_FTRS_POWER11 & CPU_FTRS_DT_CPU_BASE) #endif /* CONFIG_CPU_LITTLE_ENDIAN */ #endif #else diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 7a8495660c2f8..65d1f291393d2 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -495,6 +495,7 @@ #define H_GUEST_CAP_COPY_MEM (1UL<<(63-0)) #define H_GUEST_CAP_POWER9 (1UL<<(63-1)) #define H_GUEST_CAP_POWER10 (1UL<<(63-2)) +#define H_GUEST_CAP_POWER11 (1UL<<(63-3)) #define H_GUEST_CAP_BITMAP2 (1UL<<(63-63)) #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index ba0492f9de650..3cdd6d6df041a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -400,7 +400,10 @@ static inline unsigned long map_pcr_to_cap(unsigned long pcr) cap = H_GUEST_CAP_POWER9; break; case PCR_ARCH_31: - cap = H_GUEST_CAP_POWER10; + if (cpu_has_feature(CPU_FTR_P11_PVR)) + cap = H_GUEST_CAP_POWER11; + else + cap = H_GUEST_CAP_POWER10; break; default: break; @@ -415,7 +418,7 @@ static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) struct kvmppc_vcore *vc = vcpu->arch.vcore; /* We can (emulate) our own architecture version and anything older */ - if (cpu_has_feature(CPU_FTR_ARCH_31)) + if (cpu_has_feature(CPU_FTR_P11_PVR) || cpu_has_feature(CPU_FTR_ARCH_31)) host_pcr_bit = PCR_ARCH_31; else if (cpu_has_feature(CPU_FTR_ARCH_300)) host_pcr_bit = PCR_ARCH_300; diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 05f5220960c63..db54b259c251b 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -445,6 +445,8 @@ long kvmhv_nested_init(void) if (rc == H_SUCCESS) { unsigned long capabilities = 0; + if (cpu_has_feature(CPU_FTR_P11_PVR)) + capabilities |= H_GUEST_CAP_POWER11; if (cpu_has_feature(CPU_FTR_ARCH_31)) capabilities |= H_GUEST_CAP_POWER10; if (cpu_has_feature(CPU_FTR_ARCH_300)) diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index eeecea8f202b3..e5c7ce1fb7612 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -370,7 +370,9 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, * default to L1's PVR. */ if (!vcpu->arch.vcore->arch_compat) { - if (cpu_has_feature(CPU_FTR_ARCH_31)) + if (cpu_has_feature(CPU_FTR_P11_PVR)) + arch_compat = PVR_ARCH_31_P11; + else if (cpu_has_feature(CPU_FTR_ARCH_31)) arch_compat = PVR_ARCH_31; else if (cpu_has_feature(CPU_FTR_ARCH_300)) arch_compat = PVR_ARCH_300; From 2abbd6d5fbe0eae3752b44c963248e19292e5104 Mon Sep 17 00:00:00 2001 From: Christophe Leroy Date: Sat, 7 Sep 2024 17:40:41 +0200 Subject: [PATCH 30/69] powerpc: Add __must_check to set_memory_...() After the following powerpc commits, all calls to set_memory_...() functions check returned value. - Commit 8f17bd2f4196 ("powerpc: Handle error in mark_rodata_ro() and mark_initmem_nx()") - Commit f7f18e30b468 ("powerpc/kprobes: Handle error returned by set_memory_rox()") - Commit 009cf11d4aab ("powerpc: Don't ignore errors from set_memory_{n}p() in __kernel_map_pages()") - Commit 9cbacb834b4a ("powerpc: Don't ignore errors from set_memory_{n}p() in __kernel_map_pages()") - Commit 78cb0945f714 ("powerpc: Handle error in mark_rodata_ro() and mark_initmem_nx()") All calls in core parts of the kernel also always check returned value, can be looked at with following query: $ git grep -w -e set_memory_ro -e set_memory_rw -e set_memory_x -e set_memory_nx -e set_memory_rox `find . -maxdepth 1 -type d | grep -v arch | grep /` It is now possible to flag those functions with __must_check to make sure no new unchecked call it added. Link: https://github.com/KSPP/linux/issues/7 Signed-off-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/775dae48064a661554802ed24ed5bdffe1784724.1725723351.git.christophe.leroy@csgroup.eu --- arch/powerpc/include/asm/set_memory.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h index 9a025b776a4b3..9c8d5747755da 100644 --- a/arch/powerpc/include/asm/set_memory.h +++ b/arch/powerpc/include/asm/set_memory.h @@ -12,37 +12,37 @@ int change_memory_attr(unsigned long addr, int numpages, long action); -static inline int set_memory_ro(unsigned long addr, int numpages) +static inline int __must_check set_memory_ro(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_RO); } -static inline int set_memory_rw(unsigned long addr, int numpages) +static inline int __must_check set_memory_rw(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_RW); } -static inline int set_memory_nx(unsigned long addr, int numpages) +static inline int __must_check set_memory_nx(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_NX); } -static inline int set_memory_x(unsigned long addr, int numpages) +static inline int __must_check set_memory_x(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_X); } -static inline int set_memory_np(unsigned long addr, int numpages) +static inline int __must_check set_memory_np(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_NP); } -static inline int set_memory_p(unsigned long addr, int numpages) +static inline int __must_check set_memory_p(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_P); } -static inline int set_memory_rox(unsigned long addr, int numpages) +static inline int __must_check set_memory_rox(unsigned long addr, int numpages) { return change_memory_attr(addr, numpages, SET_MEMORY_ROX); } From da6ffe855b5a05f29222e3d4ffa4b549413e33a4 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 6 Nov 2024 14:26:25 +0100 Subject: [PATCH 31/69] powerpc/ps3: Mark ps3_setup_uhc_device() __init ps3_setup_uhc_device() is only called from ps3_setup_ehci_device() and ps3_setup_ohci_device(), which are both marked __init. Hence replace the former's __ref marker by __init. Note that before commit bd721ea73e1f9655 ("treewide: replace obsolete _refok by __ref"), the function was marked __init_refok, which probably should have been __init in the first place. Signed-off-by: Geert Uytterhoeven Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/31fe9435056fcfbf82c3a01693be278d5ce4ad0f.1730899557.git.geert+renesas@glider.be --- arch/powerpc/platforms/ps3/device-init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/ps3/device-init.c b/arch/powerpc/platforms/ps3/device-init.c index b18e1c92e554c..61722133eb2d3 100644 --- a/arch/powerpc/platforms/ps3/device-init.c +++ b/arch/powerpc/platforms/ps3/device-init.c @@ -178,7 +178,7 @@ static int __init ps3_setup_gelic_device( return result; } -static int __ref ps3_setup_uhc_device( +static int __init ps3_setup_uhc_device( const struct ps3_repository_device *repo, enum ps3_match_id match_id, enum ps3_interrupt_type interrupt_type, enum ps3_reg_type reg_type) { From 3b9bde403aafa55dcbe7dc250b95af917610f139 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:49 +1100 Subject: [PATCH 32/69] selftests/powerpc: Lower run time of count_stcx_fail test The count_stcx_fail test runs for close to or just over 2 minutes, which means it sometimes times out. That's overkill for a test that just demonstrates some PMU counters are working. Drop the 64 billion instruction case, to lower the runtime to ~30s. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-1-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/pmu/count_stcx_fail.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c index 2070a1e2b3a57..d8dd9a9c6c1b6 100644 --- a/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c +++ b/tools/testing/selftests/powerpc/pmu/count_stcx_fail.c @@ -144,9 +144,6 @@ static int test_body(void) /* Run for 16Bi instructions */ FAIL_IF(do_count_loop(events, 16000000000, overhead, true)); - /* Run for 64Bi instructions */ - FAIL_IF(do_count_loop(events, 64000000000, overhead, true)); - event_close(&events[0]); event_close(&events[1]); From 5543d595954eefb3a6faa18a6dc7b1b3d6022052 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:50 +1100 Subject: [PATCH 33/69] selftests/powerpc: Give all tests 2 minutes timeout Each of the powerpc selftests runs with a timeout of 2 minutes by default (see tools/testing/selftests/powerpc/harness.c). But when tests are run with run_kselftest.sh it uses a timeout of 45 seconds, meaning some tests run OK standalone but fail when run with the test runner. So tell run_kselftest.sh to give each test 130 seconds, that should allow the tests to complete, or be killed by the powerpc test harness after 2 minutes. If for some reason the harness fails, or for the few tests that don't use the harness, the 130 second timeout should catch them if they get stuck. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-2-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/alignment/settings | 1 + tools/testing/selftests/powerpc/cache_shape/settings | 1 + tools/testing/selftests/powerpc/copyloops/settings | 1 + tools/testing/selftests/powerpc/dexcr/settings | 1 + tools/testing/selftests/powerpc/dscr/settings | 1 + tools/testing/selftests/powerpc/lib/settings | 1 + tools/testing/selftests/powerpc/math/settings | 1 + tools/testing/selftests/powerpc/mce/settings | 1 + tools/testing/selftests/powerpc/mm/settings | 1 + tools/testing/selftests/powerpc/nx-gzip/settings | 1 + tools/testing/selftests/powerpc/papr_attributes/settings | 1 + tools/testing/selftests/powerpc/papr_sysparm/settings | 1 + tools/testing/selftests/powerpc/papr_vpd/settings | 1 + tools/testing/selftests/powerpc/pmu/settings | 1 + tools/testing/selftests/powerpc/primitives/settings | 1 + tools/testing/selftests/powerpc/ptrace/settings | 1 + tools/testing/selftests/powerpc/scripts/settings | 1 + tools/testing/selftests/powerpc/security/settings | 1 + tools/testing/selftests/powerpc/stringloops/settings | 1 + tools/testing/selftests/powerpc/switch_endian/settings | 1 + tools/testing/selftests/powerpc/syscalls/settings | 1 + tools/testing/selftests/powerpc/vphn/settings | 1 + 22 files changed, 22 insertions(+) create mode 100644 tools/testing/selftests/powerpc/alignment/settings create mode 100644 tools/testing/selftests/powerpc/cache_shape/settings create mode 100644 tools/testing/selftests/powerpc/copyloops/settings create mode 100644 tools/testing/selftests/powerpc/dexcr/settings create mode 100644 tools/testing/selftests/powerpc/dscr/settings create mode 100644 tools/testing/selftests/powerpc/lib/settings create mode 100644 tools/testing/selftests/powerpc/math/settings create mode 100644 tools/testing/selftests/powerpc/mce/settings create mode 100644 tools/testing/selftests/powerpc/mm/settings create mode 100644 tools/testing/selftests/powerpc/nx-gzip/settings create mode 100644 tools/testing/selftests/powerpc/papr_attributes/settings create mode 100644 tools/testing/selftests/powerpc/papr_sysparm/settings create mode 100644 tools/testing/selftests/powerpc/papr_vpd/settings create mode 100644 tools/testing/selftests/powerpc/pmu/settings create mode 100644 tools/testing/selftests/powerpc/primitives/settings create mode 100644 tools/testing/selftests/powerpc/ptrace/settings create mode 100644 tools/testing/selftests/powerpc/scripts/settings create mode 100644 tools/testing/selftests/powerpc/security/settings create mode 100644 tools/testing/selftests/powerpc/stringloops/settings create mode 100644 tools/testing/selftests/powerpc/switch_endian/settings create mode 100644 tools/testing/selftests/powerpc/syscalls/settings create mode 100644 tools/testing/selftests/powerpc/vphn/settings diff --git a/tools/testing/selftests/powerpc/alignment/settings b/tools/testing/selftests/powerpc/alignment/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/alignment/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/cache_shape/settings b/tools/testing/selftests/powerpc/cache_shape/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/cache_shape/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/copyloops/settings b/tools/testing/selftests/powerpc/copyloops/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/copyloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dexcr/settings b/tools/testing/selftests/powerpc/dexcr/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/dexcr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/dscr/settings b/tools/testing/selftests/powerpc/dscr/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/lib/settings b/tools/testing/selftests/powerpc/lib/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/lib/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/math/settings b/tools/testing/selftests/powerpc/math/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/math/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mce/settings b/tools/testing/selftests/powerpc/mce/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/mce/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/mm/settings b/tools/testing/selftests/powerpc/mm/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/mm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/nx-gzip/settings b/tools/testing/selftests/powerpc/nx-gzip/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/nx-gzip/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_attributes/settings b/tools/testing/selftests/powerpc/papr_attributes/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_attributes/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_sysparm/settings b/tools/testing/selftests/powerpc/papr_sysparm/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_sysparm/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/papr_vpd/settings b/tools/testing/selftests/powerpc/papr_vpd/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/papr_vpd/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/pmu/settings b/tools/testing/selftests/powerpc/pmu/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/pmu/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/primitives/settings b/tools/testing/selftests/powerpc/primitives/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/primitives/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/ptrace/settings b/tools/testing/selftests/powerpc/ptrace/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/ptrace/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/scripts/settings b/tools/testing/selftests/powerpc/scripts/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/scripts/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/security/settings b/tools/testing/selftests/powerpc/security/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/security/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/stringloops/settings b/tools/testing/selftests/powerpc/stringloops/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/stringloops/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/switch_endian/settings b/tools/testing/selftests/powerpc/switch_endian/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/syscalls/settings b/tools/testing/selftests/powerpc/syscalls/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/syscalls/settings @@ -0,0 +1 @@ +timeout=130 diff --git a/tools/testing/selftests/powerpc/vphn/settings b/tools/testing/selftests/powerpc/vphn/settings new file mode 100644 index 0000000000000..2e85661833189 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/settings @@ -0,0 +1 @@ +timeout=130 From d5f578f90a34d85f1cabd4c27af1b2d9fbffe64b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:51 +1100 Subject: [PATCH 34/69] selftests/powerpc: Fix 32-bit BE build errors on Ubuntu 24.04 Starting with Ubuntu 24.04, building the selftests with the big endian compiler (which defaults to 32-bit) fails with errors: stack_expansion_ldst.c:178:37: error: format '%lx' expects argument of type 'long unsigned int', but argument 2 has type 'rlim_t' {aka 'long long unsigned int'} subpage_prot.c:214:38: error: format '%lx' expects argument of type 'long unsigned int', but argument 3 has type 'off_t' {aka 'long long int'} Prior to 24.04 rlim_t was long unsigned int, and off_t was long int. Cast to unsigned long long and long long before passing to printf to avoid the errors. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-3-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c | 2 +- tools/testing/selftests/powerpc/mm/subpage_prot.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c index ed9143990888d..9c0d343d71375 100644 --- a/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c +++ b/tools/testing/selftests/powerpc/mm/stack_expansion_ldst.c @@ -175,7 +175,7 @@ static int test(void) page_size = getpagesize(); getrlimit(RLIMIT_STACK, &rlimit); - printf("Stack rlimit is 0x%lx\n", rlimit.rlim_cur); + printf("Stack rlimit is 0x%llx\n", (unsigned long long)rlimit.rlim_cur); printf("Testing loads ...\n"); test_one_type(LOAD, page_size, rlimit.rlim_cur); diff --git a/tools/testing/selftests/powerpc/mm/subpage_prot.c b/tools/testing/selftests/powerpc/mm/subpage_prot.c index 3ae77ba93208f..8cf9fd5fed1c5 100644 --- a/tools/testing/selftests/powerpc/mm/subpage_prot.c +++ b/tools/testing/selftests/powerpc/mm/subpage_prot.c @@ -211,8 +211,8 @@ int test_file(void) perror("failed to map file"); return 1; } - printf("allocated %s for 0x%lx bytes at %p\n", - file_name, filesize, fileblock); + printf("allocated %s for 0x%llx bytes at %p\n", + file_name, (long long)filesize, fileblock); printf("testing file map...\n"); From c6a75555b4b2643365a007b7162a670d69aa28fe Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:52 +1100 Subject: [PATCH 35/69] selftests/powerpc: Return errors from all tests Fix some tests which weren't returning an error code from main. Although these tests only ever return success, they can still fail if they time out and the harness kills them. If that happens they still return success to the shell, which is incorrect and confuses the higher level error reporting. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-4-mpe@ellerman.id.au --- tools/testing/selftests/powerpc/signal/sigfuz.c | 2 +- .../testing/selftests/powerpc/tm/tm-signal-context-force-tm.c | 2 +- tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c | 3 +-- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index 08f9afe3b95c4..c101b1391696e 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -321,5 +321,5 @@ int main(int argc, char **argv) if (!args) args = ARG_COMPLETE; - test_harness(signal_fuzzer, "signal_fuzzer"); + return test_harness(signal_fuzzer, "signal_fuzzer"); } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c index 421cb082f6bef..0a4bc479ae39b 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-context-force-tm.c @@ -176,5 +176,5 @@ int tm_signal_context_force_tm(void) int main(int argc, char **argv) { - test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); + return test_harness(tm_signal_context_force_tm, "tm_signal_context_force_tm"); } diff --git a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c index 06b801906f275..968864b052ece 100644 --- a/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c +++ b/tools/testing/selftests/powerpc/tm/tm-signal-sigreturn-nt.c @@ -46,6 +46,5 @@ int tm_signal_sigreturn_nt(void) int main(int argc, char **argv) { - test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); + return test_harness(tm_signal_sigreturn_nt, "tm_signal_sigreturn_nt"); } - From a8a54a65cac4f8202df36f925b6746328802d05f Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 00:04:53 +1100 Subject: [PATCH 36/69] selftests/powerpc: Detect taint change in mitigation patching test Currently the mitigation patching test errors out if the kernel is tainted prior to the test running. That causes the test to fail unnecessarily if some other test has caused the kernel to be tainted, or if a proprietary or force module is loaded for example. Instead just warn if the kernel is tainted to begin with, and only report a change in the taint state as an error in the test. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106130453.1741013-5-mpe@ellerman.id.au --- .../selftests/powerpc/security/mitigation-patching.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/security/mitigation-patching.sh b/tools/testing/selftests/powerpc/security/mitigation-patching.sh index f43aa4b77fbaa..9a4612e2e9537 100755 --- a/tools/testing/selftests/powerpc/security/mitigation-patching.sh +++ b/tools/testing/selftests/powerpc/security/mitigation-patching.sh @@ -36,8 +36,7 @@ fi tainted=$(cat /proc/sys/kernel/tainted) if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel already tainted!" >&2 - exit 1 + echo "Warning: kernel already tainted! ($tainted)" >&2 fi mitigations="barrier_nospec stf_barrier count_cache_flush rfi_flush entry_flush uaccess_flush" @@ -68,9 +67,10 @@ fi echo "Waiting for timeout ..." wait +orig_tainted=$tainted tainted=$(cat /proc/sys/kernel/tainted) -if [[ "$tainted" -ne 0 ]]; then - echo "Error: kernel became tainted!" >&2 +if [[ "$tainted" != "$orig_tainted" ]]; then + echo "Error: kernel newly tainted, before ($orig_tainted) after ($tainted)" >&2 exit 1 fi From 817a763a07f2407ca43b2134d067e7c0576f1b79 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 6 Nov 2024 15:26:39 -0600 Subject: [PATCH 37/69] powerpc/44x: Use for_each_of_range() iterator Simplify the ppc44x PCI dma-ranges parsing to use the for_each_of_range() iterator. Signed-off-by: Rob Herring (Arm) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106212640.341677-1-robh@kernel.org --- arch/powerpc/platforms/44x/pci.c | 23 +++++++++-------------- 1 file changed, 9 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/platforms/44x/pci.c b/arch/powerpc/platforms/44x/pci.c index db6d33ca753f1..364aeb86ab64e 100644 --- a/arch/powerpc/platforms/44x/pci.c +++ b/arch/powerpc/platforms/44x/pci.c @@ -94,10 +94,8 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, struct resource *res) { u64 size; - const u32 *ranges; - int rlen; - int pna = of_n_addr_cells(hose->dn); - int np = pna + 5; + struct of_range_parser parser; + struct of_range range; /* Default */ res->start = 0; @@ -105,18 +103,15 @@ static int __init ppc4xx_parse_dma_ranges(struct pci_controller *hose, res->end = size - 1; res->flags = IORESOURCE_MEM | IORESOURCE_PREFETCH; - /* Get dma-ranges property */ - ranges = of_get_property(hose->dn, "dma-ranges", &rlen); - if (ranges == NULL) + if (of_pci_dma_range_parser_init(&parser, hose->dn)) goto out; - /* Walk it */ - while ((rlen -= np * 4) >= 0) { - u32 pci_space = ranges[0]; - u64 pci_addr = of_read_number(ranges + 1, 2); - u64 cpu_addr = of_translate_dma_address(hose->dn, ranges + 3); - size = of_read_number(ranges + pna + 3, 2); - ranges += np; + for_each_of_range(&parser, &range) { + u32 pci_space = range.flags; + u64 pci_addr = range.bus_addr; + u64 cpu_addr = range.cpu_addr; + size = range.size; + if (cpu_addr == OF_BAD_ADDR || size == 0) continue; From f3ef7dbda9b589cdad833001e4366eb80977b7f1 Mon Sep 17 00:00:00 2001 From: "Rob Herring (Arm)" Date: Wed, 6 Nov 2024 15:26:46 -0600 Subject: [PATCH 38/69] powerpc/cell: Use for_each_of_range() iterator Simplify the cell_iommu_get_fixed_address() dma-ranges parsing to use the for_each_of_range() iterator. Signed-off-by: Rob Herring (Arm) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241106212647.341857-1-robh@kernel.org --- arch/powerpc/platforms/cell/iommu.c | 49 ++++++++++------------------- 1 file changed, 16 insertions(+), 33 deletions(-) diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 4cd9c0de22c2b..62c9679b8ca33 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -779,58 +779,41 @@ static int __init cell_iommu_init_disabled(void) static u64 cell_iommu_get_fixed_address(struct device *dev) { - u64 cpu_addr, size, best_size, dev_addr = OF_BAD_ADDR; + u64 best_size, dev_addr = OF_BAD_ADDR; struct device_node *np; - const u32 *ranges = NULL; - int i, len, best, naddr, nsize, pna, range_size; + struct of_range_parser parser; + struct of_range range; /* We can be called for platform devices that have no of_node */ np = of_node_get(dev->of_node); if (!np) goto out; - while (1) { - naddr = of_n_addr_cells(np); - nsize = of_n_size_cells(np); - np = of_get_next_parent(np); - if (!np) - break; - - ranges = of_get_property(np, "dma-ranges", &len); + while ((np = of_get_next_parent(np))) { + if (of_pci_dma_range_parser_init(&parser, np)) + continue; - /* Ignore empty ranges, they imply no translation required */ - if (ranges && len > 0) + if (of_range_count(&parser)) break; } - if (!ranges) { + if (!np) { dev_dbg(dev, "iommu: no dma-ranges found\n"); goto out; } - len /= sizeof(u32); - - pna = of_n_addr_cells(np); - range_size = naddr + nsize + pna; - - /* dma-ranges format: - * child addr : naddr cells - * parent addr : pna cells - * size : nsize cells - */ - for (i = 0, best = -1, best_size = 0; i < len; i += range_size) { - cpu_addr = of_translate_dma_address(np, ranges + i + naddr); - size = of_read_number(ranges + i + naddr + pna, nsize); + best_size = 0; + for_each_of_range(&parser, &range) { + if (!range.cpu_addr) + continue; - if (cpu_addr == 0 && size > best_size) { - best = i; - best_size = size; + if (range.size > best_size) { + best_size = range.size; + dev_addr = range.bus_addr; } } - if (best >= 0) { - dev_addr = of_read_number(ranges + best, naddr); - } else + if (!best_size) dev_dbg(dev, "iommu: no suitable range found!\n"); out: From cfec8463d9a19ec043845525fe5fd675e59a8aab Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Thu, 7 Nov 2024 22:16:30 +1100 Subject: [PATCH 39/69] powerpc/ftrace: Fix ftrace bug with KASAN=y Booting a KASAN=y kernel with the recently added ftrace out-of-line support causes a warning at boot: ------------[ cut here ]------------ Stub index overflow (1729 > 1728) WARNING: CPU: 0 PID: 0 at arch/powerpc/kernel/trace/ftrace.c:209 ftrace_init_nop+0x408/0x444 ... NIP ftrace_init_nop+0x408/0x444 LR ftrace_init_nop+0x404/0x444 Call Trace: ftrace_init_nop+0x404/0x444 (unreliable) ftrace_process_locs+0x544/0x8a0 ftrace_init+0xb4/0x22c start_kernel+0x1dc/0x4d4 start_here_common+0x1c/0x20 ... ftrace failed to modify [] _sub_I_65535_1+0x8/0x3c actual: 00:00:00:60 Initializing ftrace call sites ftrace record flags: 0 (0) expected tramp: c00000000008b418 ------------[ cut here ]------------ The function in question, _sub_I_65535_1 is some sort of trampoline generated for KASAN, and is in the .text.startup section. That section is part of INIT_TEXT, meaning is_kernel_inittext() returns true for it. But the script that determines how many out-of-line ftrace stubs are needed isn't doesn't consider .text.startup as inittext, leading to there not being enough space for the init stubs. Conversely the logic to calculate how many stubs are needed for the text section isn't filtering out the symbols in .text.startup and so ends up over counting. Fix both problems by calculating the total number of stubs first, then the number that count as inittext, and then subtract the latter from the former to get the count for the text section. Fixes: eec37961a56a ("powerpc64/ftrace: Move ftrace sequence out of line") Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241107111630.31068-1-mpe@ellerman.id.au --- arch/powerpc/tools/ftrace-gen-ool-stubs.sh | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh index 950a7778324b6..bac186bdf64a7 100755 --- a/arch/powerpc/tools/ftrace-gen-ool-stubs.sh +++ b/arch/powerpc/tools/ftrace-gen-ool-stubs.sh @@ -15,10 +15,11 @@ if [ -z "$is_64bit" ]; then RELOCATION=R_PPC_ADDR32 fi -num_ool_stubs_text=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | - grep -v ".init.text" | grep -c "$RELOCATION") +num_ool_stubs_total=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | + grep -c "$RELOCATION") num_ool_stubs_inittext=$($objdump -r -j __patchable_function_entries "$vmlinux_o" | - grep ".init.text" | grep -c "$RELOCATION") + grep -e ".init.text" -e ".text.startup" | grep -c "$RELOCATION") +num_ool_stubs_text=$((num_ool_stubs_total - num_ool_stubs_inittext)) if [ "$num_ool_stubs_text" -gt "$num_ool_stubs_text_builtin" ]; then num_ool_stubs_text_end=$((num_ool_stubs_text - num_ool_stubs_text_builtin)) From f4892c68ecc1cf45e41a78820dd2eebccc945b66 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Thu, 7 Nov 2024 11:28:16 +0530 Subject: [PATCH 40/69] powerpc/fadump: allocate memory for additional parameters early Memory for passing additional parameters to fadump capture kernel is allocated during subsys_initcall level, using memblock. But as slab is already available by this time, allocation happens via the buddy allocator. This may work for radix MMU but is likely to fail in most cases for hash MMU as hash MMU needs this memory in the first memory block for it to be accessible in real mode in the capture kernel (second boot). So, allocate memory for additional parameters area as soon as MMU mode is obvious. Fixes: 683eab94da75 ("powerpc/fadump: setup additional parameters for dump capture kernel") Reported-by: Venkat Rao Bagalkote Closes: https://lore.kernel.org/lkml/a70e4064-a040-447b-8556-1fd02f19383d@linux.vnet.ibm.com/T/#u Signed-off-by: Hari Bathini Signed-off-by: Sourabh Jain Tested-by: Venkat Rao Bagalkote Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241107055817.489795-1-sourabhjain@linux.ibm.com --- arch/powerpc/include/asm/fadump.h | 2 ++ arch/powerpc/kernel/fadump.c | 15 ++++++++++----- arch/powerpc/kernel/prom.c | 3 +++ 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/fadump.h b/arch/powerpc/include/asm/fadump.h index 3638f04447f59..a48f54dde4f65 100644 --- a/arch/powerpc/include/asm/fadump.h +++ b/arch/powerpc/include/asm/fadump.h @@ -19,6 +19,7 @@ extern int is_fadump_active(void); extern int should_fadump_crash(void); extern void crash_fadump(struct pt_regs *, const char *); extern void fadump_cleanup(void); +void fadump_setup_param_area(void); extern void fadump_append_bootargs(void); #else /* CONFIG_FA_DUMP */ @@ -26,6 +27,7 @@ static inline int is_fadump_active(void) { return 0; } static inline int should_fadump_crash(void) { return 0; } static inline void crash_fadump(struct pt_regs *regs, const char *str) { } static inline void fadump_cleanup(void) { } +static inline void fadump_setup_param_area(void) { } static inline void fadump_append_bootargs(void) { } #endif /* !CONFIG_FA_DUMP */ diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index c42f89862893e..5583017bbfbce 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -1587,6 +1587,12 @@ static void __init fadump_init_files(void) return; } + if (fw_dump.param_area) { + rc = sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr); + if (rc) + pr_err("unable to create bootargs_append sysfs file (%d)\n", rc); + } + debugfs_create_file("fadump_region", 0444, arch_debugfs_dir, NULL, &fadump_region_fops); @@ -1741,7 +1747,7 @@ static void __init fadump_process(void) * Reserve memory to store additional parameters to be passed * for fadump/capture kernel. */ -static void __init fadump_setup_param_area(void) +void __init fadump_setup_param_area(void) { phys_addr_t range_start, range_end; @@ -1749,7 +1755,7 @@ static void __init fadump_setup_param_area(void) return; /* This memory can't be used by PFW or bootloader as it is shared across kernels */ - if (radix_enabled()) { + if (early_radix_enabled()) { /* * Anywhere in the upper half should be good enough as all memory * is accessible in real mode. @@ -1777,12 +1783,12 @@ static void __init fadump_setup_param_area(void) COMMAND_LINE_SIZE, range_start, range_end); - if (!fw_dump.param_area || sysfs_create_file(fadump_kobj, &bootargs_append_attr.attr)) { + if (!fw_dump.param_area) { pr_warn("WARNING: Could not setup area to pass additional parameters!\n"); return; } - memset(phys_to_virt(fw_dump.param_area), 0, COMMAND_LINE_SIZE); + memset((void *)fw_dump.param_area, 0, COMMAND_LINE_SIZE); } /* @@ -1808,7 +1814,6 @@ int __init setup_fadump(void) } /* Initialize the kernel dump memory structure and register with f/w */ else if (fw_dump.reserve_dump_area_size) { - fadump_setup_param_area(); fw_dump.ops->fadump_init_mem_struct(&fw_dump); register_fadump(); } diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c index 0be07ed407c70..47db1b1aef254 100644 --- a/arch/powerpc/kernel/prom.c +++ b/arch/powerpc/kernel/prom.c @@ -908,6 +908,9 @@ void __init early_init_devtree(void *params) mmu_early_init_devtree(); + /* Setup param area for passing additional parameters to fadump capture kernel. */ + fadump_setup_param_area(); + #ifdef CONFIG_PPC_POWERNV /* Scan and build the list of machine check recoverable ranges */ of_scan_flat_dt(early_init_dt_scan_recoverable_ranges, NULL); From fb90dca828b6070709093934c6dec56489a2d91d Mon Sep 17 00:00:00 2001 From: Sourabh Jain Date: Thu, 7 Nov 2024 11:28:17 +0530 Subject: [PATCH 41/69] fadump: reserve param area if below boot_mem_top The param area is a memory region where the kernel places additional command-line arguments for fadump kernel. Currently, the param memory area is reserved in fadump kernel if it is above boot_mem_top. However, it should be reserved if it is below boot_mem_top because the fadump kernel already reserves memory from boot_mem_top to the end of DRAM. Currently, there is no impact from not reserving param memory if it is below boot_mem_top, as it is not used after the early boot phase of the fadump kernel. However, if this changes in the future, it could lead to issues in the fadump kernel. Fixes: 3416c9daa6b1 ("powerpc/fadump: pass additional parameters when fadump is active") Acked-by: Hari Bathini Signed-off-by: Sourabh Jain Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241107055817.489795-2-sourabhjain@linux.ibm.com --- arch/powerpc/kernel/fadump.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/fadump.c b/arch/powerpc/kernel/fadump.c index 5583017bbfbce..4b371c738213c 100644 --- a/arch/powerpc/kernel/fadump.c +++ b/arch/powerpc/kernel/fadump.c @@ -153,7 +153,7 @@ void __init fadump_append_bootargs(void) if (!fw_dump.dump_active || !fw_dump.param_area_supported || !fw_dump.param_area) return; - if (fw_dump.param_area >= fw_dump.boot_mem_top) { + if (fw_dump.param_area < fw_dump.boot_mem_top) { if (memblock_reserve(fw_dump.param_area, COMMAND_LINE_SIZE)) { pr_warn("WARNING: Can't use additional parameters area!\n"); fw_dump.param_area = 0; From 44e5d21e6d3fd2a1fed7f0327cf72e99397e2eaf Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Fri, 8 Nov 2024 15:18:37 +0530 Subject: [PATCH 42/69] powerpc/pseries: Fix KVM guest detection for disabling hardlockup detector As per the kernel documentation[1], hardlockup detector should be disabled in KVM guests as it may give false positives. On PPC, hardlockup detector is enabled inside KVM guests because disable_hardlockup_detector() is marked as early_initcall and it relies on kvm_guest static key (is_kvm_guest()) which is initialized later during boot by check_kvm_guest(), which is a core_initcall. check_kvm_guest() is also called in pSeries_smp_probe(), which is called before initcalls, but it is skipped if KVM guest does not have doorbell support or if the guest is launched with SMT=1. Call check_kvm_guest() in disable_hardlockup_detector() so that is_kvm_guest() check goes through fine and hardlockup detector can be disabled inside the KVM guest. [1]: Documentation/admin-guide/sysctl/kernel.rst Fixes: 633c8e9800f3 ("powerpc/pseries: Enable hardlockup watchdog for PowerVM partitions") Cc: stable@vger.kernel.org # v5.14+ Signed-off-by: Gautam Menghani Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241108094839.33084-1-gautam@linux.ibm.com --- arch/powerpc/kernel/setup_64.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index 22f83fbbc762a..1edc7cd68c10d 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -920,6 +920,7 @@ static int __init disable_hardlockup_detector(void) hardlockup_detector_disable(); #else if (firmware_has_feature(FW_FEATURE_LPAR)) { + check_kvm_guest(); if (is_kvm_guest()) hardlockup_detector_disable(); } From 5b881c1f83792f5db421124171b06f1b8f1fe075 Mon Sep 17 00:00:00 2001 From: David Wang <00107082@163.com> Date: Sat, 9 Nov 2024 00:23:27 +0800 Subject: [PATCH 43/69] powerpc/irq: use seq_put_decimal_ull_width() for decimal values On a system with n CPUs and m interrupts, there will be n*m decimal values yielded via seq_printf(.."%10u "..) which is less efficient than seq_put_decimal_ull_width(), stress reading /proc/interrupts indicates ~30% performance improvement with this patch. Signed-off-by: David Wang <00107082@163.com> [mpe: Flesh out change log based on original submission] Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/all/20241103080552.4787-1-00107082@163.com Link: https://patch.msgid.link/20241108162327.9887-1-00107082@163.com --- arch/powerpc/kernel/irq.c | 44 +++++++++++++++++++-------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/arch/powerpc/kernel/irq.c b/arch/powerpc/kernel/irq.c index 2e1600a8bbbbf..a0e8b998c9b52 100644 --- a/arch/powerpc/kernel/irq.c +++ b/arch/powerpc/kernel/irq.c @@ -89,69 +89,69 @@ int arch_show_interrupts(struct seq_file *p, int prec) #if defined(CONFIG_PPC32) && defined(CONFIG_TAU_INT) if (tau_initialized) { - seq_printf(p, "%*s: ", prec, "TAU"); + seq_printf(p, "%*s:", prec, "TAU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", tau_interrupts(j)); + seq_put_decimal_ull_width(p, " ", tau_interrupts(j), 10); seq_puts(p, " PowerPC Thermal Assist (cpu temp)\n"); } #endif /* CONFIG_PPC32 && CONFIG_TAU_INT */ - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_event, 10); seq_printf(p, " Local timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "BCT"); + seq_printf(p, "%*s:", prec, "BCT"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).broadcast_irqs_event); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).broadcast_irqs_event, 10); seq_printf(p, " Broadcast timer interrupts for timer event device\n"); - seq_printf(p, "%*s: ", prec, "LOC"); + seq_printf(p, "%*s:", prec, "LOC"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).timer_irqs_others); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).timer_irqs_others, 10); seq_printf(p, " Local timer interrupts for others\n"); - seq_printf(p, "%*s: ", prec, "SPU"); + seq_printf(p, "%*s:", prec, "SPU"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).spurious_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).spurious_irqs, 10); seq_printf(p, " Spurious interrupts\n"); - seq_printf(p, "%*s: ", prec, "PMI"); + seq_printf(p, "%*s:", prec, "PMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).pmu_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).pmu_irqs, 10); seq_printf(p, " Performance monitoring interrupts\n"); - seq_printf(p, "%*s: ", prec, "MCE"); + seq_printf(p, "%*s:", prec, "MCE"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).mce_exceptions); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).mce_exceptions, 10); seq_printf(p, " Machine check exceptions\n"); #ifdef CONFIG_PPC_BOOK3S_64 if (cpu_has_feature(CPU_FTR_HVMODE)) { - seq_printf(p, "%*s: ", prec, "HMI"); + seq_printf(p, "%*s:", prec, "HMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", paca_ptrs[j]->hmi_irqs); + seq_put_decimal_ull_width(p, " ", paca_ptrs[j]->hmi_irqs, 10); seq_printf(p, " Hypervisor Maintenance Interrupts\n"); } #endif - seq_printf(p, "%*s: ", prec, "NMI"); + seq_printf(p, "%*s:", prec, "NMI"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).sreset_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).sreset_irqs, 10); seq_printf(p, " System Reset interrupts\n"); #ifdef CONFIG_PPC_WATCHDOG - seq_printf(p, "%*s: ", prec, "WDG"); + seq_printf(p, "%*s:", prec, "WDG"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).soft_nmi_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).soft_nmi_irqs, 10); seq_printf(p, " Watchdog soft-NMI interrupts\n"); #endif #ifdef CONFIG_PPC_DOORBELL if (cpu_has_feature(CPU_FTR_DBELL)) { - seq_printf(p, "%*s: ", prec, "DBL"); + seq_printf(p, "%*s:", prec, "DBL"); for_each_online_cpu(j) - seq_printf(p, "%10u ", per_cpu(irq_stat, j).doorbell_irqs); + seq_put_decimal_ull_width(p, " ", per_cpu(irq_stat, j).doorbell_irqs, 10); seq_printf(p, " Doorbell interrupts\n"); } #endif From fae2987e67786a6358c0ef47189b12ff19e9543a Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 12 Nov 2024 19:51:48 +1100 Subject: [PATCH 44/69] cpufreq: maple: Remove maple driver This driver is no longer buildable since the PPC_MAPLE platform was removed, see commit 62f8f307c80e ("powerpc/64: Remove maple platform"). Remove the driver. Note that the comment in the driver says it supports "SMU & 970FX based G5 Macs", but that's not true, that comment was copied from pmac64-cpufreq.c, which still exists and continues to support those machines. Acked-by: Viresh Kumar Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241112085148.415574-1-mpe@ellerman.id.au --- drivers/cpufreq/Kconfig.powerpc | 7 - drivers/cpufreq/Makefile | 1 - drivers/cpufreq/maple-cpufreq.c | 242 -------------------------------- 3 files changed, 250 deletions(-) delete mode 100644 drivers/cpufreq/maple-cpufreq.c diff --git a/drivers/cpufreq/Kconfig.powerpc b/drivers/cpufreq/Kconfig.powerpc index 58151ca566958..eb678fa5260aa 100644 --- a/drivers/cpufreq/Kconfig.powerpc +++ b/drivers/cpufreq/Kconfig.powerpc @@ -17,13 +17,6 @@ config CPU_FREQ_CBE_PMI frequencies. Using PMI, the processor will not only be able to run at lower speed, but also at lower core voltage. -config CPU_FREQ_MAPLE - bool "Support for Maple 970FX Evaluation Board" - depends on PPC_MAPLE - help - This adds support for frequency switching on Maple 970FX - Evaluation Board and compatible boards (IBM JS2x blades). - config CPU_FREQ_PMAC bool "Support for Apple PowerBooks" depends on ADB_PMU && PPC32 diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile index 0f184031dd123..1a8f787db7e21 100644 --- a/drivers/cpufreq/Makefile +++ b/drivers/cpufreq/Makefile @@ -92,7 +92,6 @@ obj-$(CONFIG_ARM_VEXPRESS_SPC_CPUFREQ) += vexpress-spc-cpufreq.o obj-$(CONFIG_CPU_FREQ_CBE) += ppc-cbe-cpufreq.o ppc-cbe-cpufreq-y += ppc_cbe_cpufreq_pervasive.o ppc_cbe_cpufreq.o obj-$(CONFIG_CPU_FREQ_CBE_PMI) += ppc_cbe_cpufreq_pmi.o -obj-$(CONFIG_CPU_FREQ_MAPLE) += maple-cpufreq.o obj-$(CONFIG_QORIQ_CPUFREQ) += qoriq-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC) += pmac32-cpufreq.o obj-$(CONFIG_CPU_FREQ_PMAC64) += pmac64-cpufreq.o diff --git a/drivers/cpufreq/maple-cpufreq.c b/drivers/cpufreq/maple-cpufreq.c deleted file mode 100644 index 690da85c4865a..0000000000000 --- a/drivers/cpufreq/maple-cpufreq.c +++ /dev/null @@ -1,242 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * Copyright (C) 2011 Dmitry Eremin-Solenikov - * Copyright (C) 2002 - 2005 Benjamin Herrenschmidt - * and Markus Demleitner - * - * This driver adds basic cpufreq support for SMU & 970FX based G5 Macs, - * that is iMac G5 and latest single CPU desktop. - */ - -#undef DEBUG - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DBG(fmt...) pr_debug(fmt) - -/* see 970FX user manual */ - -#define SCOM_PCR 0x0aa001 /* PCR scom addr */ - -#define PCR_HILO_SELECT 0x80000000U /* 1 = PCR, 0 = PCRH */ -#define PCR_SPEED_FULL 0x00000000U /* 1:1 speed value */ -#define PCR_SPEED_HALF 0x00020000U /* 1:2 speed value */ -#define PCR_SPEED_QUARTER 0x00040000U /* 1:4 speed value */ -#define PCR_SPEED_MASK 0x000e0000U /* speed mask */ -#define PCR_SPEED_SHIFT 17 -#define PCR_FREQ_REQ_VALID 0x00010000U /* freq request valid */ -#define PCR_VOLT_REQ_VALID 0x00008000U /* volt request valid */ -#define PCR_TARGET_TIME_MASK 0x00006000U /* target time */ -#define PCR_STATLAT_MASK 0x00001f00U /* STATLAT value */ -#define PCR_SNOOPLAT_MASK 0x000000f0U /* SNOOPLAT value */ -#define PCR_SNOOPACC_MASK 0x0000000fU /* SNOOPACC value */ - -#define SCOM_PSR 0x408001 /* PSR scom addr */ -/* warning: PSR is a 64 bits register */ -#define PSR_CMD_RECEIVED 0x2000000000000000U /* command received */ -#define PSR_CMD_COMPLETED 0x1000000000000000U /* command completed */ -#define PSR_CUR_SPEED_MASK 0x0300000000000000U /* current speed */ -#define PSR_CUR_SPEED_SHIFT (56) - -/* - * The G5 only supports two frequencies (Quarter speed is not supported) - */ -#define CPUFREQ_HIGH 0 -#define CPUFREQ_LOW 1 - -static struct cpufreq_frequency_table maple_cpu_freqs[] = { - {0, CPUFREQ_HIGH, 0}, - {0, CPUFREQ_LOW, 0}, - {0, 0, CPUFREQ_TABLE_END}, -}; - -/* Power mode data is an array of the 32 bits PCR values to use for - * the various frequencies, retrieved from the device-tree - */ -static int maple_pmode_cur; - -static const u32 *maple_pmode_data; -static int maple_pmode_max; - -/* - * SCOM based frequency switching for 970FX rev3 - */ -static int maple_scom_switch_freq(int speed_mode) -{ - unsigned long flags; - int to; - - local_irq_save(flags); - - /* Clear PCR high */ - scom970_write(SCOM_PCR, 0); - /* Clear PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | 0); - /* Set PCR low */ - scom970_write(SCOM_PCR, PCR_HILO_SELECT | - maple_pmode_data[speed_mode]); - - /* Wait for completion */ - for (to = 0; to < 10; to++) { - unsigned long psr = scom970_read(SCOM_PSR); - - if ((psr & PSR_CMD_RECEIVED) == 0 && - (((psr >> PSR_CUR_SPEED_SHIFT) ^ - (maple_pmode_data[speed_mode] >> PCR_SPEED_SHIFT)) & 0x3) - == 0) - break; - if (psr & PSR_CMD_COMPLETED) - break; - udelay(100); - } - - local_irq_restore(flags); - - maple_pmode_cur = speed_mode; - ppc_proc_freq = maple_cpu_freqs[speed_mode].frequency * 1000ul; - - return 0; -} - -static int maple_scom_query_freq(void) -{ - unsigned long psr = scom970_read(SCOM_PSR); - int i; - - for (i = 0; i <= maple_pmode_max; i++) - if ((((psr >> PSR_CUR_SPEED_SHIFT) ^ - (maple_pmode_data[i] >> PCR_SPEED_SHIFT)) & 0x3) == 0) - break; - return i; -} - -/* - * Common interface to the cpufreq core - */ - -static int maple_cpufreq_target(struct cpufreq_policy *policy, - unsigned int index) -{ - return maple_scom_switch_freq(index); -} - -static unsigned int maple_cpufreq_get_speed(unsigned int cpu) -{ - return maple_cpu_freqs[maple_pmode_cur].frequency; -} - -static int maple_cpufreq_cpu_init(struct cpufreq_policy *policy) -{ - cpufreq_generic_init(policy, maple_cpu_freqs, 12000); - return 0; -} - -static struct cpufreq_driver maple_cpufreq_driver = { - .name = "maple", - .flags = CPUFREQ_CONST_LOOPS, - .init = maple_cpufreq_cpu_init, - .verify = cpufreq_generic_frequency_table_verify, - .target_index = maple_cpufreq_target, - .get = maple_cpufreq_get_speed, - .attr = cpufreq_generic_attr, -}; - -static int __init maple_cpufreq_init(void) -{ - struct device_node *cpunode; - unsigned int psize; - unsigned long max_freq; - const u32 *valp; - u32 pvr_hi; - int rc = -ENODEV; - - /* - * Behave here like powermac driver which checks machine compatibility - * to ease merging of two drivers in future. - */ - if (!of_machine_is_compatible("Momentum,Maple") && - !of_machine_is_compatible("Momentum,Apache")) - return 0; - - /* Get first CPU node */ - cpunode = of_cpu_device_node_get(0); - if (cpunode == NULL) { - pr_err("Can't find any CPU 0 node\n"); - goto bail_noprops; - } - - /* Check 970FX for now */ - /* we actually don't care on which CPU to access PVR */ - pvr_hi = PVR_VER(mfspr(SPRN_PVR)); - if (pvr_hi != 0x3c && pvr_hi != 0x44) { - pr_err("Unsupported CPU version (%x)\n", pvr_hi); - goto bail_noprops; - } - - /* Look for the powertune data in the device-tree */ - /* - * On Maple this property is provided by PIBS in dual-processor config, - * not provided by PIBS in CPU0 config and also not provided by SLOF, - * so YMMV - */ - maple_pmode_data = of_get_property(cpunode, "power-mode-data", &psize); - if (!maple_pmode_data) { - DBG("No power-mode-data !\n"); - goto bail_noprops; - } - maple_pmode_max = psize / sizeof(u32) - 1; - - /* - * From what I see, clock-frequency is always the maximal frequency. - * The current driver can not slew sysclk yet, so we really only deal - * with powertune steps for now. We also only implement full freq and - * half freq in this version. So far, I haven't yet seen a machine - * supporting anything else. - */ - valp = of_get_property(cpunode, "clock-frequency", NULL); - if (!valp) - goto bail_noprops; - max_freq = (*valp)/1000; - maple_cpu_freqs[0].frequency = max_freq; - maple_cpu_freqs[1].frequency = max_freq/2; - - /* Force apply current frequency to make sure everything is in - * sync (voltage is right for example). Firmware may leave us with - * a strange setting ... - */ - msleep(10); - maple_pmode_cur = -1; - maple_scom_switch_freq(maple_scom_query_freq()); - - pr_info("Registering Maple CPU frequency driver\n"); - pr_info("Low: %d Mhz, High: %d Mhz, Cur: %d MHz\n", - maple_cpu_freqs[1].frequency/1000, - maple_cpu_freqs[0].frequency/1000, - maple_cpu_freqs[maple_pmode_cur].frequency/1000); - - rc = cpufreq_register_driver(&maple_cpufreq_driver); - -bail_noprops: - of_node_put(cpunode); - - return rc; -} - -module_init(maple_cpufreq_init); - - -MODULE_DESCRIPTION("cpufreq driver for Maple 970FX/970MP boards"); -MODULE_LICENSE("GPL"); From be6b0eb5c46d85e360c0ff8bdde1aaa199a8fb6d Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 12 Nov 2024 22:48:05 +1100 Subject: [PATCH 45/69] powerpc/cell: Remove dead extern declaration for spu_priv1_beat_ops spu_priv1_beat_ops were removed in commit bf4981a00636 ("powerpc: Remove the celleb support"), remove the unneeded extern. Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241112114805.453894-1-mpe@ellerman.id.au --- arch/powerpc/include/asm/spu_priv1.h | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/include/asm/spu_priv1.h b/arch/powerpc/include/asm/spu_priv1.h index 2167d756e6d59..6fee411d973d9 100644 --- a/arch/powerpc/include/asm/spu_priv1.h +++ b/arch/powerpc/include/asm/spu_priv1.h @@ -216,7 +216,6 @@ spu_disable_spu (struct spu_context *ctx) */ extern const struct spu_priv1_ops spu_priv1_mmio_ops; -extern const struct spu_priv1_ops spu_priv1_beat_ops; extern const struct spu_management_ops spu_management_of_ops; From d7a82238cb8c77d4ed8cc97cd556c5f3e64bc749 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Thomas=20Wei=C3=9Fschuh?= Date: Wed, 13 Nov 2024 09:06:58 +0100 Subject: [PATCH 46/69] powerpc/vdso: Remove unused clockmode asm offsets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These offsets are not used anymore, delete them. Fixes: c39b1dcf055d ("powerpc/vdso: Add a page for non-time data") Signed-off-by: Thomas Weißschuh Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241113-vdso-powerpc-asm-offsets-v1-1-3f7e589f090d@linutronix.de --- arch/powerpc/kernel/asm-offsets.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index ae198b2d9b8c6..7a390bd4f4af3 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -346,8 +346,6 @@ int main(void) #else OFFSET(CFG_SYSCALL_MAP32, vdso_arch_data, syscall_map); #endif - OFFSET(VDSO_CLOCKMODE_OFFSET, vdso_arch_data, data[0].clock_mode); - DEFINE(VDSO_CLOCKMODE_TIMENS, VDSO_CLOCKMODE_TIMENS); #ifdef CONFIG_BUG DEFINE(BUG_ENTRY_SIZE, sizeof(struct bug_entry)); From ed351c57432122c4499be4f4aee8711d6fa93f3b Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Sat, 9 Nov 2024 12:02:55 +0530 Subject: [PATCH 47/69] Revert "KVM: PPC: Book3S HV Nested: Stop forwarding all HFUs to L1" This reverts commit 7c3ded5735141ff4d049747c9f76672a8b737c49. On PowerNV, when a nested guest tries to use a feature prohibited by HFSCR, the nested hypervisor (L1) should get a H_FAC_UNAVAILABLE trap so that L1 can emulate the feature. But with the change introduced by commit 7c3ded573514 ("KVM: PPC: Book3S HV Nested: Stop forwarding all HFUs to L1") the L1 ends up getting a H_EMUL_ASSIST because of which, the L1 ends up injecting a SIGILL when L2 (nested guest) tries to use doorbells. Reviewed-by: Nicholas Piggin Signed-off-by: Gautam Menghani Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241109063301.105289-2-gautam@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 31 ++----------------------------- 1 file changed, 2 insertions(+), 29 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3cdd6d6df041a..563cae92d45d3 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -2063,36 +2063,9 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) fallthrough; /* go to facility unavailable handler */ #endif - case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: { - u64 cause = vcpu->arch.hfscr >> 56; - - /* - * Only pass HFU interrupts to the L1 if the facility is - * permitted but disabled by the L1's HFSCR, otherwise - * the interrupt does not make sense to the L1 so turn - * it into a HEAI. - */ - if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) || - (vcpu->arch.nested_hfscr & (1UL << cause))) { - ppc_inst_t pinst; - vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST; - - /* - * If the fetch failed, return to guest and - * try executing it again. - */ - r = kvmppc_get_last_inst(vcpu, INST_GENERIC, &pinst); - vcpu->arch.emul_inst = ppc_inst_val(pinst); - if (r != EMULATE_DONE) - r = RESUME_GUEST; - else - r = RESUME_HOST; - } else { - r = RESUME_HOST; - } - + case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: + r = RESUME_HOST; break; - } case BOOK3S_INTERRUPT_HV_RM_HARD: vcpu->arch.trap = 0; From 0d3c6b28896f9889c8864dab469e0343a0ad1c0c Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Sat, 9 Nov 2024 12:02:56 +0530 Subject: [PATCH 48/69] KVM: PPC: Book3S HV: Stop using vc->dpdes for nested KVM guests commit 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes") introduced an optimization to use only vcpu->doorbell_request for SMT emulation for Power9 and above guests, but the code for nested guests still relies on the old way of handling doorbells, due to which an L2 guest (see [1]) cannot be booted with XICS with SMT>1. The command to repro this issue is: // To be run in L1 qemu-system-ppc64 \ -drive file=rhel.qcow2,format=qcow2 \ -m 20G \ -smp 8,cores=1,threads=8 \ -cpu host \ -nographic \ -machine pseries,ic-mode=xics -accel kvm Fix the plumbing to utilize vcpu->doorbell_request instead of vcore->dpdes for nested KVM guests on P9 and above. [1] Terminology 1. L0 : PowerNV linux running with HV privileges 2. L1 : Pseries KVM guest running on top of L0 2. L2 : Nested KVM guest running on top of L1 Fixes: 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes") Signed-off-by: Gautam Menghani Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241109063301.105289-3-gautam@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 9 +++++++++ arch/powerpc/kvm/book3s_hv_nested.c | 14 ++++++++++---- 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 563cae92d45d3..a27ad568d2d09 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4285,6 +4285,15 @@ static int kvmhv_vcpu_entry_p9_nested(struct kvm_vcpu *vcpu, u64 time_limit, uns } hvregs.hdec_expiry = time_limit; + /* + * hvregs has the doorbell status, so zero it here which + * enables us to receive doorbells when H_ENTER_NESTED is + * in progress for this vCPU + */ + + if (vcpu->arch.doorbell_request) + vcpu->arch.doorbell_request = 0; + /* * When setting DEC, we must always deal with irq_work_raise * via NMI vs setting DEC. The problem occurs right as we diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index db54b259c251b..ef97f58d0d973 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -32,7 +32,7 @@ void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr) struct kvmppc_vcore *vc = vcpu->arch.vcore; hr->pcr = vc->pcr | PCR_MASK; - hr->dpdes = vc->dpdes; + hr->dpdes = vcpu->arch.doorbell_request; hr->hfscr = vcpu->arch.hfscr; hr->tb_offset = vc->tb_offset; hr->dawr0 = vcpu->arch.dawr0; @@ -105,7 +105,7 @@ static void save_hv_return_state(struct kvm_vcpu *vcpu, { struct kvmppc_vcore *vc = vcpu->arch.vcore; - hr->dpdes = vc->dpdes; + hr->dpdes = vcpu->arch.doorbell_request; hr->purr = vcpu->arch.purr; hr->spurr = vcpu->arch.spurr; hr->ic = vcpu->arch.ic; @@ -143,7 +143,7 @@ static void restore_hv_regs(struct kvm_vcpu *vcpu, const struct hv_guest_state * struct kvmppc_vcore *vc = vcpu->arch.vcore; vc->pcr = hr->pcr | PCR_MASK; - vc->dpdes = hr->dpdes; + vcpu->arch.doorbell_request = hr->dpdes; vcpu->arch.hfscr = hr->hfscr; vcpu->arch.dawr0 = hr->dawr0; vcpu->arch.dawrx0 = hr->dawrx0; @@ -170,7 +170,13 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, { struct kvmppc_vcore *vc = vcpu->arch.vcore; - vc->dpdes = hr->dpdes; + /* + * This L2 vCPU might have received a doorbell while H_ENTER_NESTED was being handled. + * Make sure we preserve the doorbell if it was either: + * a) Sent after H_ENTER_NESTED was called on this vCPU (arch.doorbell_request would be 1) + * b) Doorbell was not handled and L2 exited for some other reason (hr->dpdes would be 1) + */ + vcpu->arch.doorbell_request = vcpu->arch.doorbell_request | hr->dpdes; vcpu->arch.hfscr = hr->hfscr; vcpu->arch.purr = hr->purr; vcpu->arch.spurr = hr->spurr; From 26686db69917399fa30e3b3135360771e90f83ec Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Sat, 9 Nov 2024 12:02:57 +0530 Subject: [PATCH 49/69] KVM: PPC: Book3S HV: Avoid returning to nested hypervisor on pending doorbells Commit 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes") dropped the use of vcore->dpdes for msgsndp / SMT emulation. Prior to that commit, the below code at L1 level (see [1] for terminology) was responsible for setting vc->dpdes for the respective L2 vCPU: if (!nested) { kvmppc_core_prepare_to_enter(vcpu); if (vcpu->arch.doorbell_request) { vc->dpdes = 1; smp_wmb(); vcpu->arch.doorbell_request = 0; } L1 then sent vc->dpdes to L0 via kvmhv_save_hv_regs(), and while servicing H_ENTER_NESTED at L0, the below condition at L0 level made sure to abort and go back to L1 if vcpu->arch.doorbell_request = 1 so that L1 sets vc->dpdes as per above if condition: } else if (vcpu->arch.pending_exceptions || vcpu->arch.doorbell_request || xive_interrupt_pending(vcpu)) { vcpu->arch.ret = RESUME_HOST; goto out; } This worked fine since vcpu->arch.doorbell_request was used more like a flag and vc->dpdes was used to pass around the doorbell state. But after Commit 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes"), vcpu->arch.doorbell_request is the only variable used to pass around doorbell state. With the plumbing for handling doorbells for nested guests updated to use vcpu->arch.doorbell_request over vc->dpdes, the above "else if" stops doorbells from working correctly as L0 aborts execution of L2 and instead goes back to L1. Remove vcpu->arch.doorbell_request from the above "else if" condition as it is no longer needed for L0 to correctly handle the doorbell status while running L2. [1] Terminology 1. L0 : PowerNV linux running with HV privileges 2. L1 : Pseries KVM guest running on top of L0 2. L2 : Nested KVM guest running on top of L1 Fixes: 6398326b9ba1 ("KVM: PPC: Book3S HV P9: Stop using vc->dpdes") Signed-off-by: Gautam Menghani Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241109063301.105289-4-gautam@linux.ibm.com --- arch/powerpc/kvm/book3s_hv.c | 1 - 1 file changed, 1 deletion(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a27ad568d2d09..64cce0fce2960 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4885,7 +4885,6 @@ int kvmhv_run_single_vcpu(struct kvm_vcpu *vcpu, u64 time_limit, lpcr |= LPCR_MER; } } else if (vcpu->arch.pending_exceptions || - vcpu->arch.doorbell_request || xive_interrupt_pending(vcpu)) { vcpu->arch.ret = RESUME_HOST; goto out; From a26c4dbb3d9c1821cb0fc11cb2dbc32d5bf3463b Mon Sep 17 00:00:00 2001 From: Michal Suchanek Date: Tue, 1 Oct 2024 15:03:49 +0200 Subject: [PATCH 50/69] powerpc/sstep: make emulate_vsx_load and emulate_vsx_store static These functions are not used outside of sstep.c Fixes: 350779a29f11 ("powerpc: Handle most loads and stores in instruction emulation code") Signed-off-by: Michal Suchanek Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241001130356.14664-1-msuchanek@suse.de --- arch/powerpc/include/asm/sstep.h | 5 ----- arch/powerpc/lib/sstep.c | 12 ++++-------- 2 files changed, 4 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/include/asm/sstep.h b/arch/powerpc/include/asm/sstep.h index 50950deedb873..e3d0e714ff280 100644 --- a/arch/powerpc/include/asm/sstep.h +++ b/arch/powerpc/include/asm/sstep.h @@ -173,9 +173,4 @@ int emulate_step(struct pt_regs *regs, ppc_inst_t instr); */ extern int emulate_loadstore(struct pt_regs *regs, struct instruction_op *op); -extern void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem, bool cross_endian); -extern void emulate_vsx_store(struct instruction_op *op, - const union vsx_reg *reg, void *mem, - bool cross_endian); extern int emulate_dcbz(unsigned long ea, struct pt_regs *regs); diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index e65f3fb68d06b..ac3ee19531d8a 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -780,8 +780,8 @@ static nokprobe_inline int emulate_stq(struct pt_regs *regs, unsigned long ea, #endif /* __powerpc64 */ #ifdef CONFIG_VSX -void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, - const void *mem, bool rev) +static nokprobe_inline void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, + const void *mem, bool rev) { int size, read_size; int i, j; @@ -863,11 +863,9 @@ void emulate_vsx_load(struct instruction_op *op, union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_load); -NOKPROBE_SYMBOL(emulate_vsx_load); -void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, - void *mem, bool rev) +static nokprobe_inline void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, + void *mem, bool rev) { int size, write_size; int i, j; @@ -955,8 +953,6 @@ void emulate_vsx_store(struct instruction_op *op, const union vsx_reg *reg, break; } } -EXPORT_SYMBOL_GPL(emulate_vsx_store); -NOKPROBE_SYMBOL(emulate_vsx_store); static nokprobe_inline int do_vsx_load(struct instruction_op *op, unsigned long ea, struct pt_regs *regs, From 590d2f9347f7974d7954400e5d937672fd844a8b Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Thu, 14 Nov 2024 14:20:20 +0530 Subject: [PATCH 51/69] KVM: PPC: Book3S HV: Fix kmv -> kvm typo Fix typo in the following kvm function names from: kmvhv_counters_tracepoint_regfunc -> kvmhv_counters_tracepoint_regfunc kmvhv_counters_tracepoint_unregfunc -> kvmhv_counters_tracepoint_unregfunc Fixes: e1f288d2f9c6 ("KVM: PPC: Book3S HV nestedv2: Add support for reading VPA counters for pseries guests") Reported-by: Madhavan Srinivasan Reviewed-by: Ritesh Harjani (IBM) Reviewed-by: Amit Machhiwal Signed-off-by: Kajol Jain Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241114085020.1147912-1-kjain@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 4 ++-- arch/powerpc/kvm/book3s_hv.c | 4 ++-- arch/powerpc/kvm/trace_hv.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 2ef9a5f4e5d14..11065313d4c12 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -684,8 +684,8 @@ int kvmhv_nestedv2_set_ptbl_entry(unsigned long lpid, u64 dw0, u64 dw1); int kvmhv_nestedv2_parse_output(struct kvm_vcpu *vcpu); int kvmhv_nestedv2_set_vpa(struct kvm_vcpu *vcpu, unsigned long vpa); -int kmvhv_counters_tracepoint_regfunc(void); -void kmvhv_counters_tracepoint_unregfunc(void); +int kvmhv_counters_tracepoint_regfunc(void); +void kvmhv_counters_tracepoint_unregfunc(void); int kvmhv_get_l2_counters_status(void); void kvmhv_set_l2_counters_status(int cpu, bool status); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 64cce0fce2960..febb9c433f642 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4130,7 +4130,7 @@ void kvmhv_set_l2_counters_status(int cpu, bool status) lppaca_of(cpu).l2_counters_enable = 0; } -int kmvhv_counters_tracepoint_regfunc(void) +int kvmhv_counters_tracepoint_regfunc(void) { int cpu; @@ -4140,7 +4140,7 @@ int kmvhv_counters_tracepoint_regfunc(void) return 0; } -void kmvhv_counters_tracepoint_unregfunc(void) +void kvmhv_counters_tracepoint_unregfunc(void) { int cpu; diff --git a/arch/powerpc/kvm/trace_hv.h b/arch/powerpc/kvm/trace_hv.h index 77ebc724e6cdf..35fccaa575cc1 100644 --- a/arch/powerpc/kvm/trace_hv.h +++ b/arch/powerpc/kvm/trace_hv.h @@ -538,7 +538,7 @@ TRACE_EVENT_FN_COND(kvmppc_vcpu_stats, TP_printk("VCPU %d: l1_to_l2_cs_time=%llu ns l2_to_l1_cs_time=%llu ns l2_runtime=%llu ns", __entry->vcpu_id, __entry->l1_to_l2_cs, __entry->l2_to_l1_cs, __entry->l2_runtime), - kmvhv_counters_tracepoint_regfunc, kmvhv_counters_tracepoint_unregfunc + kvmhv_counters_tracepoint_regfunc, kvmhv_counters_tracepoint_unregfunc ); #endif #endif /* _TRACE_KVM_HV_H */ From 276e036e5844116e563fa90f676c625bb742cc57 Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Mon, 30 Sep 2024 13:20:57 +0200 Subject: [PATCH 52/69] powerpc/ps3: Reorganize kerneldoc parameter names Reorganize kerneldoc parameter names to match the parameter order in the function header. Problems identified using Coccinelle. Signed-off-by: Julia Lawall Reviewed-by: Geert Uytterhoeven Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930112121.95324-12-Julia.Lawall@inria.fr --- arch/powerpc/platforms/ps3/interrupt.c | 2 +- arch/powerpc/platforms/ps3/repository.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/ps3/interrupt.c b/arch/powerpc/platforms/ps3/interrupt.c index 49871427f599d..af3fe9f04f24c 100644 --- a/arch/powerpc/platforms/ps3/interrupt.c +++ b/arch/powerpc/platforms/ps3/interrupt.c @@ -378,9 +378,9 @@ int ps3_send_event_locally(unsigned int virq) /** * ps3_sb_event_receive_port_setup - Setup a system bus event receive port. + * @dev: The system bus device instance. * @cpu: enum ps3_cpu_binding indicating the cpu the interrupt should be * serviced on. - * @dev: The system bus device instance. * @virq: The assigned Linux virq. * * An event irq represents a virtual device interrupt. The interrupt_id diff --git a/arch/powerpc/platforms/ps3/repository.c b/arch/powerpc/platforms/ps3/repository.c index 1abe33fbe5290..b8c030eab1384 100644 --- a/arch/powerpc/platforms/ps3/repository.c +++ b/arch/powerpc/platforms/ps3/repository.c @@ -940,7 +940,7 @@ int __init ps3_repository_read_vuart_sysmgr_port(unsigned int *port) /** * ps3_repository_read_boot_dat_info - Get address and size of cell_ext_os_area. - * address: lpar address of cell_ext_os_area + * @lpar_addr: lpar address of cell_ext_os_area * @size: size of cell_ext_os_area */ From bfd9c145533ba9cb6f504670aa8e75542c8ee54f Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Mon, 14 Oct 2024 11:55:03 +0100 Subject: [PATCH 53/69] powerpc/ep8248e: Use %pa to format resource_size_t The correct format string for resource_size_t is %pa which acts on the address of the variable to be formatted [1]. [1] https://elixir.bootlin.com/linux/v6.11.3/source/Documentation/core-api/printk-formats.rst#L229 Introduced by commit 9d9326d3bc0e ("phy: Change mii_bus id field to a string") Flagged by gcc-14 as: arch/powerpc/platforms/82xx/ep8248e.c: In function 'ep8248e_mdio_probe': arch/powerpc/platforms/82xx/ep8248e.c:131:46: warning: format '%x' expects argument of type 'unsigned int', but argument 4 has type 'resource_size_t' {aka 'long long unsigned int'} [-Wformat=] 131 | snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); | ~^ ~~~~~~~~~ | | | | | resource_size_t {aka long long unsigned int} | unsigned int | %llx No functional change intended. Compile tested only. Reported-by: Geert Uytterhoeven Link: https://lore.kernel.org/netdev/711d7f6d-b785-7560-f4dc-c6aad2cce99@linux-m68k.org/ Signed-off-by: Simon Horman Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241014-ep8248e-pa-fmt-v1-1-009ea0dcc18f@kernel.org --- arch/powerpc/platforms/82xx/ep8248e.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/82xx/ep8248e.c b/arch/powerpc/platforms/82xx/ep8248e.c index 3dc65ce1f175d..8f918916e6318 100644 --- a/arch/powerpc/platforms/82xx/ep8248e.c +++ b/arch/powerpc/platforms/82xx/ep8248e.c @@ -128,7 +128,7 @@ static int ep8248e_mdio_probe(struct platform_device *ofdev) bus->name = "ep8248e-mdio-bitbang"; bus->parent = &ofdev->dev; - snprintf(bus->id, MII_BUS_ID_SIZE, "%x", res.start); + snprintf(bus->id, MII_BUS_ID_SIZE, "%pa", &res.start); ret = of_mdiobus_register(bus, ofdev->dev.of_node); if (ret) From b196db2f536645eda7684655f3fae913e33fda4b Mon Sep 17 00:00:00 2001 From: Mukesh Kumar Chaurasiya Date: Fri, 25 Oct 2024 00:42:33 +0530 Subject: [PATCH 54/69] powerpc/xmon: symbol lookup length fixed Currently this cannot lookup symbol beyond 64 characters in some cases like "ls", "lp" and "t" Fix this by using KSYM_NAME_LEN instead of fixed 64 characters Signed-off-by: Mukesh Kumar Chaurasiya Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241024191232.1570894-2-mchauras@linux.ibm.com --- arch/powerpc/xmon/xmon.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index e6cddbb2305f8..22b8b5cc4df05 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -3662,7 +3662,7 @@ symbol_lookup(void) int type = inchar(); unsigned long addr, cpu; void __percpu *ptr = NULL; - static char tmp[64]; + static char tmp[KSYM_NAME_LEN]; switch (type) { case 'a': @@ -3671,7 +3671,7 @@ symbol_lookup(void) termch = 0; break; case 's': - getstring(tmp, 64); + getstring(tmp, KSYM_NAME_LEN); if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); @@ -3686,7 +3686,7 @@ symbol_lookup(void) termch = 0; break; case 'p': - getstring(tmp, 64); + getstring(tmp, KSYM_NAME_LEN); if (setjmp(bus_error_jmp) == 0) { catch_memory_errors = 1; sync(); From 7ca93aa9204b706e4afcd4fae0dc8798500598d5 Mon Sep 17 00:00:00 2001 From: zhang jiao Date: Mon, 30 Sep 2024 09:27:57 +0800 Subject: [PATCH 55/69] selftests/powerpc: Remove the path after initialization. If there were no anamolies noted, then we can simply remove the log file and return, but only after the path variable has been initialized. Signed-off-by: zhang jiao Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930012757.2395-1-zhangjiao2@cmss.chinamobile.com --- tools/testing/selftests/powerpc/mm/tlbie_test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/powerpc/mm/tlbie_test.c b/tools/testing/selftests/powerpc/mm/tlbie_test.c index 48344a74b2128..35f0098399ccd 100644 --- a/tools/testing/selftests/powerpc/mm/tlbie_test.c +++ b/tools/testing/selftests/powerpc/mm/tlbie_test.c @@ -313,16 +313,16 @@ static inline void end_verification_log(unsigned int tid, unsigned nr_anamolies) fclose(f); - if (nr_anamolies == 0) { - remove(path); - return; - } - sprintf(logfile, logfilename, tid); strcpy(path, logdir); strcat(path, separator); strcat(path, logfile); + if (nr_anamolies == 0) { + remove(path); + return; + } + printf("Thread %02d chunk has %d corrupted words. For details check %s\n", tid, nr_anamolies, path); } From 6da1cab4f5f8eb778fd61f0eb6ca5b0a011dd44d Mon Sep 17 00:00:00 2001 From: Costa Shulyupin Date: Thu, 26 Sep 2024 12:26:22 +0300 Subject: [PATCH 56/69] powerpc/xive: Use cpumask_intersects() Replace `cpumask_any_and(a, b) >= nr_cpu_ids` with the more readable `!cpumask_intersects(a, b)`. Comparison between cpumask_any_and() and cpumask_intersects() The cpumask_any_and() function expands using FIND_FIRST_BIT(), resulting in a loop that iterates through each bit of the bitmask: for (idx = 0; idx * BITS_PER_LONG < sz; idx++) { val = (FETCH); if (val) { sz = min(idx * BITS_PER_LONG + __ffs(MUNGE(val)), sz); break; } } The cpumask_intersects() function expands using __bitmap_intersects(), resulting in that the first loop iterates through each long word of the bitmask, and the second through each bit within a long word: unsigned int k, lim = bits/BITS_PER_LONG; for (k = 0; k < lim; ++k) if (bitmap1[k] & bitmap2[k]) return true; if (bits % BITS_PER_LONG) if ((bitmap1[k] & bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) return true; Conclusion: cpumask_intersects() is at least as efficient as cpumask_any_and(), if not more so, as it typically performs fewer loops and comparisons. Signed-off-by: Costa Shulyupin Reviewed-by: Ming Lei Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240926092623.399577-2-costa.shul@redhat.com --- arch/powerpc/sysdev/xive/common.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/sysdev/xive/common.c b/arch/powerpc/sysdev/xive/common.c index fa01818c1972c..a6c388bdf5d08 100644 --- a/arch/powerpc/sysdev/xive/common.c +++ b/arch/powerpc/sysdev/xive/common.c @@ -726,7 +726,7 @@ static int xive_irq_set_affinity(struct irq_data *d, pr_debug("%s: irq %d/0x%x\n", __func__, d->irq, hw_irq); /* Is this valid ? */ - if (cpumask_any_and(cpumask, cpu_online_mask) >= nr_cpu_ids) + if (!cpumask_intersects(cpumask, cpu_online_mask)) return -EINVAL; /* From f20b0a03674cef555a5f48b65f81b82868b17cdd Mon Sep 17 00:00:00 2001 From: Lukas Bulwahn Date: Fri, 27 Sep 2024 11:52:03 +0200 Subject: [PATCH 57/69] powerpc: remove dead config options for MPC85xx platform support Commit 384e338a9187 ("powerpc: drop MPC8540_ADS and MPC8560_ADS platform support") and commit b751ed04bc5e ("powerpc: drop MPC85xx_CDS platform support") removes the platform support for MPC8540_ADS, MPC8560_ADS and MPC85xx_CDS in the source tree, but misses to remove the config options in the Kconfig file. Hence, these three config options are without any effect since then. Drop these three dead config options. Fixes: 384e338a9187 ("powerpc: drop MPC8540_ADS and MPC8560_ADS platform support") Fixes: b751ed04bc5e ("powerpc: drop MPC85xx_CDS platform support") Signed-off-by: Lukas Bulwahn Reviewed-by: Ritesh Harjani (IBM) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240927095203.392365-1-lukas.bulwahn@redhat.com --- arch/powerpc/platforms/85xx/Kconfig | 21 --------------------- 1 file changed, 21 deletions(-) diff --git a/arch/powerpc/platforms/85xx/Kconfig b/arch/powerpc/platforms/85xx/Kconfig index 9315a3b69d6df..604c1b4b6d45c 100644 --- a/arch/powerpc/platforms/85xx/Kconfig +++ b/arch/powerpc/platforms/85xx/Kconfig @@ -40,27 +40,6 @@ config BSC9132_QDS and dual StarCore SC3850 DSP cores. Manufacturer : Freescale Semiconductor, Inc -config MPC8540_ADS - bool "Freescale MPC8540 ADS" - select DEFAULT_UIMAGE - help - This option enables support for the MPC 8540 ADS board - -config MPC8560_ADS - bool "Freescale MPC8560 ADS" - select DEFAULT_UIMAGE - select CPM2 - help - This option enables support for the MPC 8560 ADS board - -config MPC85xx_CDS - bool "Freescale MPC85xx CDS" - select DEFAULT_UIMAGE - select PPC_I8259 - select HAVE_RAPIDIO - help - This option enables support for the MPC85xx CDS board - config MPC85xx_MDS bool "Freescale MPC8568 MDS / MPC8569 MDS / P1021 MDS" select DEFAULT_UIMAGE From 2e716f5cdebed2fb98cafffaf626645c2e922dbb Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Fri, 11 Oct 2024 18:10:06 +0200 Subject: [PATCH 58/69] powerpc/powermac: Use of_property_match_string() in pmac_has_backlight_type() Replace an of_get_property() call by of_property_match_string() so that this function implementation can be simplified. Suggested-by: Christophe Leroy Link: https://lore.kernel.org/linuxppc-dev/d9bdc1b6-ea7e-47aa-80aa-02ae649abf72@csgroup.eu/ Suggested-by: Michael Ellerman Link: https://lore.kernel.org/linuxppc-dev/87cyk97ufp.fsf@mail.lhotse/ Signed-off-by: Markus Elfring Reviewed-by: Christophe Leroy Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/ede25e03-7a14-4787-ae1b-4fc9290add5a@web.de --- arch/powerpc/platforms/powermac/backlight.c | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/arch/powerpc/platforms/powermac/backlight.c b/arch/powerpc/platforms/powermac/backlight.c index 12bc01353bd3c..79741370c40c6 100644 --- a/arch/powerpc/platforms/powermac/backlight.c +++ b/arch/powerpc/platforms/powermac/backlight.c @@ -57,18 +57,10 @@ struct backlight_device *pmac_backlight; int pmac_has_backlight_type(const char *type) { struct device_node* bk_node = of_find_node_by_name(NULL, "backlight"); + int i = of_property_match_string(bk_node, "backlight-control", type); - if (bk_node) { - const char *prop = of_get_property(bk_node, - "backlight-control", NULL); - if (prop && strncmp(prop, type, strlen(type)) == 0) { - of_node_put(bk_node); - return 1; - } - of_node_put(bk_node); - } - - return 0; + of_node_put(bk_node); + return i >= 0; } static void pmac_backlight_key_worker(struct work_struct *work) From 352268dc6da7b422022541c2cf846663110f775c Mon Sep 17 00:00:00 2001 From: Markus Elfring Date: Thu, 3 Oct 2024 21:06:42 +0200 Subject: [PATCH 59/69] macintosh: Use common error handling code in via_pmu_led_init() Add a jump target so that a bit of exception handling can be better reused at the end of this function implementation. This issue was detected by using the Coccinelle software. Signed-off-by: Markus Elfring Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/189b93e2-4e81-438d-9c77-cbe4d9d7a0d9@web.de --- drivers/macintosh/via-pmu-led.c | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/drivers/macintosh/via-pmu-led.c b/drivers/macintosh/via-pmu-led.c index a4fb16d7db3c1..fc1af74b65967 100644 --- a/drivers/macintosh/via-pmu-led.c +++ b/drivers/macintosh/via-pmu-led.c @@ -92,18 +92,15 @@ static int __init via_pmu_led_init(void) if (dt == NULL) return -ENODEV; model = of_get_property(dt, "model", NULL); - if (model == NULL) { - of_node_put(dt); - return -ENODEV; - } + if (!model) + goto put_node; + if (strncmp(model, "PowerBook", strlen("PowerBook")) != 0 && strncmp(model, "iBook", strlen("iBook")) != 0 && strcmp(model, "PowerMac7,2") != 0 && - strcmp(model, "PowerMac7,3") != 0) { - of_node_put(dt); - /* ignore */ - return -ENODEV; - } + strcmp(model, "PowerMac7,3") != 0) + goto put_node; + of_node_put(dt); spin_lock_init(&pmu_blink_lock); @@ -112,6 +109,10 @@ static int __init via_pmu_led_init(void) pmu_blink_req.done = pmu_req_done; return led_classdev_register(NULL, &pmu_led); + +put_node: + of_node_put(dt); + return -ENODEV; } late_initcall(via_pmu_led_init); From 83b5a407fbb73e6965adfb4bd0a803724bf87f96 Mon Sep 17 00:00:00 2001 From: Zhang Zekun Date: Mon, 30 Sep 2024 15:56:28 +0800 Subject: [PATCH 60/69] powerpc/kexec: Fix return of uninitialized variable of_property_read_u64() can fail and leave the variable uninitialized, which will then be used. Return error if reading the property failed. Fixes: 2e6bd221d96f ("powerpc/kexec_file: Enable early kernel OPAL calls") Signed-off-by: Zhang Zekun Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930075628.125138-1-zhangzekun11@huawei.com --- arch/powerpc/kexec/file_load_64.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kexec/file_load_64.c b/arch/powerpc/kexec/file_load_64.c index 9738adabeb1fe..dc65c13911577 100644 --- a/arch/powerpc/kexec/file_load_64.c +++ b/arch/powerpc/kexec/file_load_64.c @@ -736,13 +736,18 @@ int setup_purgatory_ppc64(struct kimage *image, const void *slave_code, if (dn) { u64 val; - of_property_read_u64(dn, "opal-base-address", &val); + ret = of_property_read_u64(dn, "opal-base-address", &val); + if (ret) + goto out; + ret = kexec_purgatory_get_set_symbol(image, "opal_base", &val, sizeof(val), false); if (ret) goto out; - of_property_read_u64(dn, "opal-entry-address", &val); + ret = of_property_read_u64(dn, "opal-entry-address", &val); + if (ret) + goto out; ret = kexec_purgatory_get_set_symbol(image, "opal_entry", &val, sizeof(val), false); } From e9d3270007b13acd34de4256970ffe457efc6c65 Mon Sep 17 00:00:00 2001 From: Shen Lichuan Date: Mon, 30 Sep 2024 10:32:34 +0800 Subject: [PATCH 61/69] ps3: Correct some typos in comments Fixed some typos that were currently identified with codespell, the details are as follows: drivers/ps3/ps3-lpm.c:94: rigths ==> rights drivers/ps3/ps3-sys-manager.c:365: acnowledge ==> acknowledge drivers/ps3/ps3-vuart.c:470: remaning ==> remaining drivers/ps3/ps3-vuart.c:471: transmision ==> transmission drivers/ps3/sys-manager-core.c:15: Staticly ==> Statically Signed-off-by: Shen Lichuan Acked-by: Geoff Levand Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240930023234.7457-1-shenlichuan@vivo.com --- drivers/ps3/ps3-lpm.c | 2 +- drivers/ps3/ps3-sys-manager.c | 2 +- drivers/ps3/ps3-vuart.c | 4 ++-- drivers/ps3/sys-manager-core.c | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/ps3/ps3-lpm.c b/drivers/ps3/ps3-lpm.c index 200ad8751860a..188ae25726740 100644 --- a/drivers/ps3/ps3-lpm.c +++ b/drivers/ps3/ps3-lpm.c @@ -91,7 +91,7 @@ struct ps3_lpm_shadow_regs { * struct ps3_lpm_priv - Private lpm device data. * * @open: An atomic variable indicating the lpm driver has been opened. - * @rights: The lpm rigths granted by the system policy module. A logical + * @rights: The lpm rights granted by the system policy module. A logical * OR of enum ps3_lpm_rights. * @node_id: The node id of a BE processor whose performance monitor this * lpar has the right to use. diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c index ad8ef59dea340..ab798b52910eb 100644 --- a/drivers/ps3/ps3-sys-manager.c +++ b/drivers/ps3/ps3-sys-manager.c @@ -362,7 +362,7 @@ static int ps3_sys_manager_send_request_shutdown( * ps3_sys_manager_send_response - Send a 'response' to the system manager. * @status: zero = success, others fail. * - * The guest sends this message to the system manager to acnowledge success or + * The guest sends this message to the system manager to acknowledge success or * failure of a command sent by the system manager. */ diff --git a/drivers/ps3/ps3-vuart.c b/drivers/ps3/ps3-vuart.c index 6328abd51ffad..5cb92535a4a14 100644 --- a/drivers/ps3/ps3-vuart.c +++ b/drivers/ps3/ps3-vuart.c @@ -467,8 +467,8 @@ struct list_buffer { * * If the port is idle on entry as much of the incoming data is written to * the port as the port will accept. Otherwise a list buffer is created - * and any remaning incoming data is copied to that buffer. The buffer is - * then enqueued for transmision via the transmit interrupt. + * and any remaining incoming data is copied to that buffer. The buffer is + * then enqueued for transmission via the transmit interrupt. */ int ps3_vuart_write(struct ps3_system_bus_device *dev, const void *buf, diff --git a/drivers/ps3/sys-manager-core.c b/drivers/ps3/sys-manager-core.c index e061b7d0632bd..f50032ad97024 100644 --- a/drivers/ps3/sys-manager-core.c +++ b/drivers/ps3/sys-manager-core.c @@ -12,7 +12,7 @@ #include /** - * Staticly linked routines that allow late binding of a loaded sys-manager + * Statically linked routines that allow late binding of a loaded sys-manager * module. */ From 4aa5cc1e0012f784bc7f637458e597564833b425 Mon Sep 17 00:00:00 2001 From: David Hunter Date: Tue, 9 Jul 2024 10:35:53 -0400 Subject: [PATCH 62/69] powerpc-km82xx.c: replace of_node_put() with __free The use of the __free macro allows the cleanup to be based on scope instead of on another function called later. This makes the cleanup automatic and less susceptible to errors later. Signed-off-by: David Hunter [mpe: Fix over-long line & change log wording] Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20240709143553.117053-1-david.hunter.linux@gmail.com --- arch/powerpc/platforms/82xx/km82xx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/platforms/82xx/km82xx.c b/arch/powerpc/platforms/82xx/km82xx.c index c86da3f2b74bd..99f0f0f418767 100644 --- a/arch/powerpc/platforms/82xx/km82xx.c +++ b/arch/powerpc/platforms/82xx/km82xx.c @@ -27,15 +27,15 @@ static void __init km82xx_pic_init(void) { - struct device_node *np = of_find_compatible_node(NULL, NULL, - "fsl,pq2-pic"); + struct device_node *np __free(device_node); + np = of_find_compatible_node(NULL, NULL, "fsl,pq2-pic"); + if (!np) { pr_err("PIC init: can not find cpm-pic node\n"); return; } cpm2_pic_init(np); - of_node_put(np); } struct cpm_pin { From a5371018eefdd81f4152926a6d2b9480a75ac2a6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 7 Nov 2024 22:26:46 +1100 Subject: [PATCH 63/69] powerpc/Makefile: Allow overriding CPP Unlike all other arches, powerpc doesn't allow the user to override CPP, because it sets it unconditionally in the arch Makefile. This can lead to strange build failures. Instead add the required flags to KBUILD_CPPFLAGS, which are passed to CPP, CC and AS invocations by the generic Makefile logic. Reported-by: Arnd Bergmann Tested-by: Nathan Chancellor Reviewed-by: Nathan Chancellor Closes: https://lore.kernel.org/all/20240607061629.530301-1-arnd@kernel.org Signed-off-by: Arnd Bergmann [mpe: Rebase, write change log, add Arnd's SoB as communicated privately] Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241107112646.32401-1-mpe@ellerman.id.au --- arch/powerpc/Makefile | 7 +++---- arch/powerpc/kernel/vdso/Makefile | 2 +- 2 files changed, 4 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 99af7953e8442..41489483a602f 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -62,14 +62,14 @@ KBUILD_LDFLAGS_MODULE += arch/powerpc/lib/crtsavres.o endif ifdef CONFIG_CPU_LITTLE_ENDIAN -KBUILD_CFLAGS += -mlittle-endian +KBUILD_CPPFLAGS += -mlittle-endian KBUILD_LDFLAGS += -EL LDEMULATION := lppc GNUTARGET := powerpcle MULTIPLEWORD := -mno-multiple KBUILD_CFLAGS_MODULE += $(call cc-option,-mno-save-toc-indirect) else -KBUILD_CFLAGS += $(call cc-option,-mbig-endian) +KBUILD_CPPFLAGS += $(call cc-option,-mbig-endian) KBUILD_LDFLAGS += -EB LDEMULATION := ppc GNUTARGET := powerpc @@ -95,7 +95,7 @@ aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian ifeq ($(HAS_BIARCH),y) -KBUILD_CFLAGS += -m$(BITS) +KBUILD_CPPFLAGS += -m$(BITS) KBUILD_AFLAGS += -m$(BITS) KBUILD_LDFLAGS += -m elf$(BITS)$(LDEMULATION) endif @@ -176,7 +176,6 @@ KBUILD_CPPFLAGS += -I $(srctree)/arch/powerpc $(asinstr) KBUILD_AFLAGS += $(AFLAGS-y) KBUILD_CFLAGS += $(CC_FLAGS_NO_FPU) KBUILD_CFLAGS += $(CFLAGS-y) -CPP = $(CC) -E $(KBUILD_CFLAGS) CHECKFLAGS += -m$(BITS) -D__powerpc__ -D__powerpc$(BITS)__ ifdef CONFIG_CPU_BIG_ENDIAN diff --git a/arch/powerpc/kernel/vdso/Makefile b/arch/powerpc/kernel/vdso/Makefile index c568cad6a22e6..0e3ed6fb199ff 100644 --- a/arch/powerpc/kernel/vdso/Makefile +++ b/arch/powerpc/kernel/vdso/Makefile @@ -50,7 +50,7 @@ ldflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld) ldflags-$(CONFIG_LD_ORPHAN_WARN) += -Wl,--orphan-handling=$(CONFIG_LD_ORPHAN_WARN_LEVEL) # Filter flags that clang will warn are unused for linking -ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CFLAGS)) +ldflags-y += $(filter-out $(CC_AUTO_VAR_INIT_ZERO_ENABLER) $(CC_FLAGS_FTRACE) -Wa$(comma)%, $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS)) CC32FLAGS := -m32 CC32FLAGSREMOVE := -mcmodel=medium -mabi=elfv1 -mabi=elfv2 -mcall-aixdesc From 27aef9391bd3bc08eb07fe23e6c4d7c9afabe1e8 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 15 Nov 2024 15:54:42 +1100 Subject: [PATCH 64/69] MAINTAINERS: powerpc: Mark Maddy as "M" Mark Maddy as a co-maintainer, so that he can get a kernel.org account and help manage the powerpc tree on kernel.org. Acked-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241115045442.675721-1-mpe@ellerman.id.au --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index a097afd76ded4..313ce65c1eafe 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -13106,7 +13106,7 @@ M: Michael Ellerman R: Nicholas Piggin R: Christophe Leroy R: Naveen N Rao -R: Madhavan Srinivasan +M: Madhavan Srinivasan L: linuxppc-dev@lists.ozlabs.org S: Supported W: https://github.com/linuxppc/wiki/wiki From 176cda0619b6c17a553625f6e2fcbc3981ad667d Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 18 Nov 2024 17:11:11 +0530 Subject: [PATCH 65/69] powerpc/perf: Add perf interface to expose vpa counters To support performance measurement for KVM on PowerVM(KoP) feature, PowerVM hypervisor has added couple of new software counters in Virtual Process Area(VPA) of the partition. Commit e1f288d2f9c69 ("KVM: PPC: Book3S HV nestedv2: Add support for reading VPA counters for pseries guests") have updated the paca fields with corresponding changes. Proposed perf interface is to expose these new software counters for monitoring of context switch latencies and runtime aggregate. Perf interface driver is called "vpa_pmu" and it has dependency on KVM and perf, hence added new config called "VPA_PMU" which depends on "CONFIG_KVM_BOOK3S_64_HV" and "CONFIG_HV_PERF_CTRS". Since, kvm and kvm_host are currently compiled as built-in modules, this perf interface takes the same path and registered as a module. vpa_pmu perf interface needs access to some of the kvm functions and structures like kvmhv_get_l2_counters_status(), hence kvm_book3s_64.h and kvm_ppc.h are included. Below are the events added to monitor KoP: vpa_pmu/l1_to_l2_lat/ vpa_pmu/l2_to_l1_lat/ vpa_pmu/l2_runtime_agg/ and vpa_pmu driver supports only per-cpu monitoring with this patch. Example usage: [command]# perf stat -e vpa_pmu/l1_to_l2_lat/ -a -I 1000 1.001017682 727,200 vpa_pmu/l1_to_l2_lat/ 2.003540491 1,118,824 vpa_pmu/l1_to_l2_lat/ 3.005699458 1,919,726 vpa_pmu/l1_to_l2_lat/ 4.007827011 2,364,630 vpa_pmu/l1_to_l2_lat/ Signed-off-by: Kajol Jain Co-developed-by: Madhavan Srinivasan Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241118114114.208964-1-kjain@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 3 + arch/powerpc/kvm/book3s_hv.c | 19 +++ arch/powerpc/perf/Makefile | 2 + arch/powerpc/perf/vpa-pmu.c | 194 +++++++++++++++++++++++ arch/powerpc/platforms/pseries/Kconfig | 14 ++ 5 files changed, 232 insertions(+) create mode 100644 arch/powerpc/perf/vpa-pmu.c diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 11065313d4c12..f620e3126d688 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -688,6 +688,9 @@ int kvmhv_counters_tracepoint_regfunc(void); void kvmhv_counters_tracepoint_unregfunc(void); int kvmhv_get_l2_counters_status(void); void kvmhv_set_l2_counters_status(int cpu, bool status); +u64 kvmhv_get_l1_to_l2_cs_time(void); +u64 kvmhv_get_l2_to_l1_cs_time(void); +u64 kvmhv_get_l2_runtime_agg(void); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index febb9c433f642..4843063df88cc 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4129,6 +4129,7 @@ void kvmhv_set_l2_counters_status(int cpu, bool status) else lppaca_of(cpu).l2_counters_enable = 0; } +EXPORT_SYMBOL(kvmhv_set_l2_counters_status); int kvmhv_counters_tracepoint_regfunc(void) { @@ -4168,6 +4169,24 @@ static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu) *l2_runtime_agg_ptr = l2_runtime_ns; } +u64 kvmhv_get_l1_to_l2_cs_time(void) +{ + return tb_to_ns(be64_to_cpu(get_lppaca()->l1_to_l2_cs_tb)); +} +EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time); + +u64 kvmhv_get_l2_to_l1_cs_time(void) +{ + return tb_to_ns(be64_to_cpu(get_lppaca()->l2_to_l1_cs_tb)); +} +EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time); + +u64 kvmhv_get_l2_runtime_agg(void) +{ + return tb_to_ns(be64_to_cpu(get_lppaca()->l2_runtime_tb)); +} +EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg); + #else int kvmhv_get_l2_counters_status(void) { diff --git a/arch/powerpc/perf/Makefile b/arch/powerpc/perf/Makefile index 4f53d0b97539b..ac2cf58d62dbb 100644 --- a/arch/powerpc/perf/Makefile +++ b/arch/powerpc/perf/Makefile @@ -16,6 +16,8 @@ obj-$(CONFIG_FSL_EMB_PERF_EVENT_E500) += e500-pmu.o e6500-pmu.o obj-$(CONFIG_HV_PERF_CTRS) += hv-24x7.o hv-gpci.o hv-common.o +obj-$(CONFIG_VPA_PMU) += vpa-pmu.o + obj-$(CONFIG_PPC_8xx) += 8xx-pmu.o obj-$(CONFIG_PPC64) += $(obj64-y) diff --git a/arch/powerpc/perf/vpa-pmu.c b/arch/powerpc/perf/vpa-pmu.c new file mode 100644 index 0000000000000..c143c626b4a02 --- /dev/null +++ b/arch/powerpc/perf/vpa-pmu.c @@ -0,0 +1,194 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Performance monitoring support for Virtual Processor Area(VPA) based counters + * + * Copyright (C) 2024 IBM Corporation + */ +#define pr_fmt(fmt) "vpa_pmu: " fmt + +#include +#include +#include +#include + +#define MODULE_VERS "1.0" +#define MODULE_NAME "pseries_vpa_pmu" + +#define EVENT(_name, _code) enum{_name = _code} + +#define VPA_PMU_EVENT_VAR(_id) event_attr_##_id +#define VPA_PMU_EVENT_PTR(_id) (&event_attr_##_id.attr.attr) + +static ssize_t vpa_pmu_events_sysfs_show(struct device *dev, + struct device_attribute *attr, char *page) +{ + struct perf_pmu_events_attr *pmu_attr; + + pmu_attr = container_of(attr, struct perf_pmu_events_attr, attr); + + return sprintf(page, "event=0x%02llx\n", pmu_attr->id); +} + +#define VPA_PMU_EVENT_ATTR(_name, _id) \ + PMU_EVENT_ATTR(_name, VPA_PMU_EVENT_VAR(_id), _id, \ + vpa_pmu_events_sysfs_show) + +EVENT(L1_TO_L2_CS_LAT, 0x1); +EVENT(L2_TO_L1_CS_LAT, 0x2); +EVENT(L2_RUNTIME_AGG, 0x3); + +VPA_PMU_EVENT_ATTR(l1_to_l2_lat, L1_TO_L2_CS_LAT); +VPA_PMU_EVENT_ATTR(l2_to_l1_lat, L2_TO_L1_CS_LAT); +VPA_PMU_EVENT_ATTR(l2_runtime_agg, L2_RUNTIME_AGG); + +static struct attribute *vpa_pmu_events_attr[] = { + VPA_PMU_EVENT_PTR(L1_TO_L2_CS_LAT), + VPA_PMU_EVENT_PTR(L2_TO_L1_CS_LAT), + VPA_PMU_EVENT_PTR(L2_RUNTIME_AGG), + NULL +}; + +static const struct attribute_group vpa_pmu_events_group = { + .name = "events", + .attrs = vpa_pmu_events_attr, +}; + +PMU_FORMAT_ATTR(event, "config:0-31"); +static struct attribute *vpa_pmu_format_attr[] = { + &format_attr_event.attr, + NULL, +}; + +static struct attribute_group vpa_pmu_format_group = { + .name = "format", + .attrs = vpa_pmu_format_attr, +}; + +static const struct attribute_group *vpa_pmu_attr_groups[] = { + &vpa_pmu_events_group, + &vpa_pmu_format_group, + NULL +}; + +static int vpa_pmu_event_init(struct perf_event *event) +{ + if (event->attr.type != event->pmu->type) + return -ENOENT; + + /* it does not support event sampling mode */ + if (is_sampling_event(event)) + return -EOPNOTSUPP; + + /* no branch sampling */ + if (has_branch_stack(event)) + return -EOPNOTSUPP; + + /* Invalid event code */ + if ((event->attr.config <= 0) || (event->attr.config > 3)) + return -EINVAL; + + return 0; +} + +static unsigned long get_counter_data(struct perf_event *event) +{ + unsigned int config = event->attr.config; + u64 data; + + switch (config) { + case L1_TO_L2_CS_LAT: + data = kvmhv_get_l1_to_l2_cs_time(); + break; + case L2_TO_L1_CS_LAT: + data = kvmhv_get_l2_to_l1_cs_time(); + break; + case L2_RUNTIME_AGG: + data = kvmhv_get_l2_runtime_agg(); + break; + default: + data = 0; + break; + } + + return data; +} + +static int vpa_pmu_add(struct perf_event *event, int flags) +{ + u64 data; + + kvmhv_set_l2_counters_status(smp_processor_id(), true); + + data = get_counter_data(event); + local64_set(&event->hw.prev_count, data); + + return 0; +} + +static void vpa_pmu_read(struct perf_event *event) +{ + u64 prev_data, new_data, final_data; + + prev_data = local64_read(&event->hw.prev_count); + new_data = get_counter_data(event); + final_data = new_data - prev_data; + + local64_add(final_data, &event->count); +} + +static void vpa_pmu_del(struct perf_event *event, int flags) +{ + vpa_pmu_read(event); + + /* + * Disable vpa counter accumulation + */ + kvmhv_set_l2_counters_status(smp_processor_id(), false); +} + +static struct pmu vpa_pmu = { + .task_ctx_nr = perf_sw_context, + .name = "vpa_pmu", + .event_init = vpa_pmu_event_init, + .add = vpa_pmu_add, + .del = vpa_pmu_del, + .read = vpa_pmu_read, + .attr_groups = vpa_pmu_attr_groups, + .capabilities = PERF_PMU_CAP_NO_EXCLUDE | PERF_PMU_CAP_NO_INTERRUPT, +}; + +static int __init pseries_vpa_pmu_init(void) +{ + /* + * List of current Linux on Power platforms and + * this driver is supported only in PowerVM LPAR + * (L1) platform. + * + * Enabled Linux on Power Platforms + * ---------------------------------------- + * [X] PowerVM LPAR (L1) + * [ ] KVM Guest On PowerVM KoP(L2) + * [ ] Baremetal(PowerNV) + * [ ] KVM Guest On PowerNV + */ + if (!firmware_has_feature(FW_FEATURE_LPAR) || is_kvm_guest()) + return -ENODEV; + + perf_pmu_register(&vpa_pmu, vpa_pmu.name, -1); + pr_info("Virtual Processor Area PMU registered.\n"); + + return 0; +} + +static void __exit pseries_vpa_pmu_cleanup(void) +{ + perf_pmu_unregister(&vpa_pmu); + pr_info("Virtual Processor Area PMU unregistered.\n"); +} + +module_init(pseries_vpa_pmu_init); +module_exit(pseries_vpa_pmu_cleanup); +MODULE_DESCRIPTION("Perf Driver for pSeries VPA pmu counter"); +MODULE_AUTHOR("Kajol Jain "); +MODULE_AUTHOR("Madhavan Srinivasan "); +MODULE_LICENSE("GPL"); diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index afc0f6a613372..42fc66e97539b 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -140,6 +140,20 @@ config HV_PERF_CTRS If unsure, select Y. +config VPA_PMU + tristate "VPA PMU events" + depends on KVM_BOOK3S_64_HV && HV_PERF_CTRS + help + Enable access to the VPA PMU counters via perf. This enables + code that support measurement for KVM on PowerVM(KoP) feature. + PAPR hypervisor has introduced three new counters in the VPA area + of LPAR CPUs for KVM L2 guest observability. Two for context switches + from host to guest and vice versa, and one counter for getting + the total time spent inside the KVM guest. This config enables code + that access these software counters via perf. + + If unsure, Select N. + config IBMVIO depends on PPC_PSERIES bool From 4ae0b32ecee730a41f65eb122bbb40fda7dca34a Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 18 Nov 2024 17:11:12 +0530 Subject: [PATCH 66/69] docs: ABI: sysfs-bus-event_source-devices-vpa-pmu: Document sysfs event format entries for vpa_pmu Details are added for the vpa_pmu event and format attributes in the ABI documentation. Signed-off-by: Kajol Jain Co-developed-by: Madhavan Srinivasan Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241118114114.208964-2-kjain@linux.ibm.com --- .../sysfs-bus-event_source-devices-vpa-pmu | 24 +++++++++++++++++++ 1 file changed, 24 insertions(+) create mode 100644 Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu diff --git a/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu new file mode 100644 index 0000000000000..8285263ff78dc --- /dev/null +++ b/Documentation/ABI/testing/sysfs-bus-event_source-devices-vpa-pmu @@ -0,0 +1,24 @@ +What: /sys/bus/event_source/devices/vpa_pmu/format +Date: November 2024 +Contact: Linux on PowerPC Developer List +Description: Read-only. Attribute group to describe the magic bits + that go into perf_event_attr.config for a particular pmu. + (See ABI/testing/sysfs-bus-event_source-devices-format). + + Each attribute under this group defines a bit range of the + perf_event_attr.config. Supported attribute are listed + below:: + event = "config:0-31" - event ID + + For example:: + + l1_to_l2_lat = "event=0x1" + +What: /sys/bus/event_source/devices/vpa_pmu/events +Date: November 2024 +Contact: Linux on PowerPC Developer List +Description: Read-only. Attribute group to describe performance monitoring + events for the Virtual Processor Area events. Each attribute + in this group describes a single performance monitoring event + supported by vpa_pmu. The name of the file is the name of + the event (See ABI/testing/sysfs-bus-event_source-devices-events). From 5f0b48c6a168994cc09d02888c2d939eba2af193 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 18 Nov 2024 17:11:13 +0530 Subject: [PATCH 67/69] powerpc/kvm: Add vpa latency counters to kvm_vcpu_arch Commit e1f288d2f9c69 ("KVM: PPC: Book3S HV nestedv2: Add support for reading VPA counters for pseries guests") introduced support for new Virtual Process Area(VPA) based software counters. These counters are useful when observing context switch latency of L1 <-> L2. It also added access to counters in lppaca, which is good enough to understand latency details per-cpu level. But to extend and aggregate per-process level(qemu) or per-pid/tid level(vcpu), these counters also needs to be added as part of kvm_vcpu_arch struct. Additional code added to update these new kvm_vcpu_arch variables in do_trace_nested_cs_time function. Signed-off-by: Kajol Jain Co-developed-by: Madhavan Srinivasan Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241118114114.208964-3-kjain@linux.ibm.com --- arch/powerpc/include/asm/kvm_host.h | 5 +++++ arch/powerpc/kvm/book3s_hv.c | 3 +++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 37e581c5b2013..6e1108f8fce6f 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -871,6 +871,11 @@ struct kvm_vcpu_arch { struct kvmhv_tb_accumulator cede_time; /* time napping inside guest */ #endif #endif /* CONFIG_KVM_BOOK3S_HV_EXIT_TIMING */ +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + u64 l1_to_l2_cs; + u64 l2_to_l1_cs; + u64 l2_runtime_agg; +#endif }; #define VCPU_FPR(vcpu, i) (vcpu)->arch.fp.fpr[i][TS_FPROFFSET] diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 4843063df88cc..613231c436e7b 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4167,6 +4167,9 @@ static void do_trace_nested_cs_time(struct kvm_vcpu *vcpu) *l1_to_l2_cs_ptr = l1_to_l2_ns; *l2_to_l1_cs_ptr = l2_to_l1_ns; *l2_runtime_agg_ptr = l2_runtime_ns; + vcpu->arch.l1_to_l2_cs = l1_to_l2_ns; + vcpu->arch.l2_to_l1_cs = l2_to_l1_ns; + vcpu->arch.l2_runtime_agg = l2_runtime_ns; } u64 kvmhv_get_l1_to_l2_cs_time(void) From f26f9933e3e31b2117b804b6b8932388db88a131 Mon Sep 17 00:00:00 2001 From: Kajol Jain Date: Mon, 18 Nov 2024 17:11:14 +0530 Subject: [PATCH 68/69] powerpc/perf: Add per-task/process monitoring to vpa_pmu driver Enhance the vpa_pmu driver with a feature to observe context switch latency event for both per-task (tid) and per-pid (pid) option. Couple of new helper functions are added to hide the abstraction of reading the context switch latency counter from kvm_vcpu_arch struct and these helper functions are defined in the "kvm/book3s_hv.c". "PERF_ATTACH_TASK" flag is used to decide whether to read the counter values from lppaca or kvm_vcpu_arch struct. Signed-off-by: Kajol Jain Co-developed-by: Madhavan Srinivasan Signed-off-by: Madhavan Srinivasan Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241118114114.208964-4-kjain@linux.ibm.com --- arch/powerpc/include/asm/kvm_book3s_64.h | 3 ++ arch/powerpc/kvm/book3s_hv.c | 45 ++++++++++++++++++++++++ arch/powerpc/perf/vpa-pmu.c | 15 ++++++-- 3 files changed, 60 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index f620e3126d688..b936e174eefd1 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -691,6 +691,9 @@ void kvmhv_set_l2_counters_status(int cpu, bool status); u64 kvmhv_get_l1_to_l2_cs_time(void); u64 kvmhv_get_l2_to_l1_cs_time(void); u64 kvmhv_get_l2_runtime_agg(void); +u64 kvmhv_get_l1_to_l2_cs_time_vcpu(void); +u64 kvmhv_get_l2_to_l1_cs_time_vcpu(void); +u64 kvmhv_get_l2_runtime_agg_vcpu(void); #endif /* CONFIG_KVM_BOOK3S_HV_POSSIBLE */ diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 613231c436e7b..d8938fdf19362 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4190,6 +4190,51 @@ u64 kvmhv_get_l2_runtime_agg(void) } EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg); +u64 kvmhv_get_l1_to_l2_cs_time_vcpu(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_arch *arch; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (vcpu) { + arch = &vcpu->arch; + return arch->l1_to_l2_cs; + } else { + return 0; + } +} +EXPORT_SYMBOL(kvmhv_get_l1_to_l2_cs_time_vcpu); + +u64 kvmhv_get_l2_to_l1_cs_time_vcpu(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_arch *arch; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (vcpu) { + arch = &vcpu->arch; + return arch->l2_to_l1_cs; + } else { + return 0; + } +} +EXPORT_SYMBOL(kvmhv_get_l2_to_l1_cs_time_vcpu); + +u64 kvmhv_get_l2_runtime_agg_vcpu(void) +{ + struct kvm_vcpu *vcpu; + struct kvm_vcpu_arch *arch; + + vcpu = local_paca->kvm_hstate.kvm_vcpu; + if (vcpu) { + arch = &vcpu->arch; + return arch->l2_runtime_agg; + } else { + return 0; + } +} +EXPORT_SYMBOL(kvmhv_get_l2_runtime_agg_vcpu); + #else int kvmhv_get_l2_counters_status(void) { diff --git a/arch/powerpc/perf/vpa-pmu.c b/arch/powerpc/perf/vpa-pmu.c index c143c626b4a02..6a5bfd2a13b5a 100644 --- a/arch/powerpc/perf/vpa-pmu.c +++ b/arch/powerpc/perf/vpa-pmu.c @@ -97,13 +97,22 @@ static unsigned long get_counter_data(struct perf_event *event) switch (config) { case L1_TO_L2_CS_LAT: - data = kvmhv_get_l1_to_l2_cs_time(); + if (event->attach_state & PERF_ATTACH_TASK) + data = kvmhv_get_l1_to_l2_cs_time_vcpu(); + else + data = kvmhv_get_l1_to_l2_cs_time(); break; case L2_TO_L1_CS_LAT: - data = kvmhv_get_l2_to_l1_cs_time(); + if (event->attach_state & PERF_ATTACH_TASK) + data = kvmhv_get_l2_to_l1_cs_time_vcpu(); + else + data = kvmhv_get_l2_to_l1_cs_time(); break; case L2_RUNTIME_AGG: - data = kvmhv_get_l2_runtime_agg(); + if (event->attach_state & PERF_ATTACH_TASK) + data = kvmhv_get_l2_runtime_agg_vcpu(); + else + data = kvmhv_get_l2_runtime_agg(); break; default: data = 0; From 3c592ce7991cdf03bc7d139d790ce58c82c5903b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Tue, 12 Nov 2024 19:41:34 +1100 Subject: [PATCH 69/69] EDAC/powerpc: Remove PPC_MAPLE drivers These two drivers are only buildable for the powerpc "maple" platform (CONFIG_PPC_MAPLE), which has now been removed, see commit 62f8f307c80e ("powerpc/64: Remove maple platform"). Remove the drivers. Acked-by: Borislav Petkov (AMD) Signed-off-by: Michael Ellerman Link: https://patch.msgid.link/20241112084134.411964-1-mpe@ellerman.id.au --- drivers/edac/Kconfig | 18 -- drivers/edac/Makefile | 2 - drivers/edac/amd8111_edac.c | 596 ------------------------------------ drivers/edac/amd8111_edac.h | 118 ------- drivers/edac/amd8131_edac.c | 358 ---------------------- drivers/edac/amd8131_edac.h | 107 ------- 6 files changed, 1199 deletions(-) delete mode 100644 drivers/edac/amd8111_edac.c delete mode 100644 drivers/edac/amd8111_edac.h delete mode 100644 drivers/edac/amd8131_edac.c delete mode 100644 drivers/edac/amd8131_edac.h diff --git a/drivers/edac/Kconfig b/drivers/edac/Kconfig index 81af6c344d6ba..06f7b43a6f788 100644 --- a/drivers/edac/Kconfig +++ b/drivers/edac/Kconfig @@ -311,24 +311,6 @@ config EDAC_CELL Cell Broadband Engine internal memory controller on platform without a hypervisor -config EDAC_AMD8131 - tristate "AMD8131 HyperTransport PCI-X Tunnel" - depends on PCI && PPC_MAPLE - help - Support for error detection and correction on the - AMD8131 HyperTransport PCI-X Tunnel chip. - Note, add more Kconfig dependency if it's adopted - on some machine other than Maple. - -config EDAC_AMD8111 - tristate "AMD8111 HyperTransport I/O Hub" - depends on PCI && PPC_MAPLE - help - Support for error detection and correction on the - AMD8111 HyperTransport I/O Hub chip. - Note, add more Kconfig dependency if it's adopted - on some machine other than Maple. - config EDAC_CPC925 tristate "IBM CPC925 Memory Controller (PPC970FX)" depends on PPC64 diff --git a/drivers/edac/Makefile b/drivers/edac/Makefile index faf310eec4a67..f9cf19d8d13d4 100644 --- a/drivers/edac/Makefile +++ b/drivers/edac/Makefile @@ -63,8 +63,6 @@ i10nm_edac-y := i10nm_base.o obj-$(CONFIG_EDAC_I10NM) += i10nm_edac.o skx_edac_common.o obj-$(CONFIG_EDAC_CELL) += cell_edac.o -obj-$(CONFIG_EDAC_AMD8111) += amd8111_edac.o -obj-$(CONFIG_EDAC_AMD8131) += amd8131_edac.o obj-$(CONFIG_EDAC_HIGHBANK_MC) += highbank_mc_edac.o obj-$(CONFIG_EDAC_HIGHBANK_L2) += highbank_l2_edac.o diff --git a/drivers/edac/amd8111_edac.c b/drivers/edac/amd8111_edac.c deleted file mode 100644 index a6d3013d58234..0000000000000 --- a/drivers/edac/amd8111_edac.c +++ /dev/null @@ -1,596 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * amd8111_edac.c, AMD8111 Hyper Transport chip EDAC kernel module - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao - * Benjamin Walsh - * Hu Yongqi - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "edac_module.h" -#include "amd8111_edac.h" - -#define AMD8111_EDAC_REVISION " Ver: 1.0.0" -#define AMD8111_EDAC_MOD_STR "amd8111_edac" - -#define PCI_DEVICE_ID_AMD_8111_PCI 0x7460 - -enum amd8111_edac_devs { - LPC_BRIDGE = 0, -}; - -enum amd8111_edac_pcis { - PCI_BRIDGE = 0, -}; - -/* Wrapper functions for accessing PCI configuration space */ -static int edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32) -{ - int ret; - - ret = pci_read_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); - - return ret; -} - -static void edac_pci_read_byte(struct pci_dev *dev, int reg, u8 *val8) -{ - int ret; - - ret = pci_read_config_byte(dev, reg, val8); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); -} - -static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32) -{ - int ret; - - ret = pci_write_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -static void edac_pci_write_byte(struct pci_dev *dev, int reg, u8 val8) -{ - int ret; - - ret = pci_write_config_byte(dev, reg, val8); - if (ret != 0) - printk(KERN_ERR AMD8111_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -/* - * device-specific methods for amd8111 PCI Bridge Controller - * - * Error Reporting and Handling for amd8111 chipset could be found - * in its datasheet 3.1.2 section, P37 - */ -static void amd8111_pci_bridge_init(struct amd8111_pci_info *pci_info) -{ - u32 val32; - struct pci_dev *dev = pci_info->dev; - - /* First clear error detection flags on the host interface */ - - /* Clear SSE/SMA/STA flags in the global status register*/ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - if (val32 & PCI_STSCMD_CLEAR_MASK) - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Clear CRC and Link Fail flags in HT Link Control reg */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - if (val32 & HT_LINK_CLEAR_MASK) - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Second clear all fault on the secondary interface */ - - /* Clear error flags in the memory-base limit reg. */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_CLEAR_MASK) - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - /* Clear Discard Timer Expired flag in Interrupt/Bridge Control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - if (val32 & PCI_INTBRG_CTRL_CLEAR_MASK) - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - - /* Last enable error detections */ - if (edac_op_state == EDAC_OPSTATE_POLL) { - /* Enable System Error reporting in global status register */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - val32 |= PCI_STSCMD_SERREN; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Enable CRC Sync flood packets to HyperTransport Link */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - val32 |= HT_LINK_CRCFEN; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Enable SSE reporting etc in Interrupt control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - val32 |= PCI_INTBRG_CTRL_POLL_MASK; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - } -} - -static void amd8111_pci_bridge_exit(struct amd8111_pci_info *pci_info) -{ - u32 val32; - struct pci_dev *dev = pci_info->dev; - - if (edac_op_state == EDAC_OPSTATE_POLL) { - /* Disable System Error reporting */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - val32 &= ~PCI_STSCMD_SERREN; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - /* Disable CRC flood packets */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - val32 &= ~HT_LINK_CRCFEN; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - /* Disable DTSERREN/MARSP/SERREN in Interrupt Control reg */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - val32 &= ~PCI_INTBRG_CTRL_POLL_MASK; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - } -} - -static void amd8111_pci_bridge_check(struct edac_pci_ctl_info *edac_dev) -{ - struct amd8111_pci_info *pci_info = edac_dev->pvt_info; - struct pci_dev *dev = pci_info->dev; - u32 val32; - - /* Check out PCI Bridge Status and Command Register */ - edac_pci_read_dword(dev, REG_PCI_STSCMD, &val32); - if (val32 & PCI_STSCMD_CLEAR_MASK) { - printk(KERN_INFO "Error(s) in PCI bridge status and command" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "SSE: %d, RMA: %d, RTA: %d\n", - (val32 & PCI_STSCMD_SSE) != 0, - (val32 & PCI_STSCMD_RMA) != 0, - (val32 & PCI_STSCMD_RTA) != 0); - - val32 |= PCI_STSCMD_CLEAR_MASK; - edac_pci_write_dword(dev, REG_PCI_STSCMD, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out HyperTransport Link Control Register */ - edac_pci_read_dword(dev, REG_HT_LINK, &val32); - if (val32 & HT_LINK_LKFAIL) { - printk(KERN_INFO "Error(s) in hypertransport link control" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "LKFAIL: %d\n", - (val32 & HT_LINK_LKFAIL) != 0); - - val32 |= HT_LINK_LKFAIL; - edac_pci_write_dword(dev, REG_HT_LINK, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out PCI Interrupt and Bridge Control Register */ - edac_pci_read_dword(dev, REG_PCI_INTBRG_CTRL, &val32); - if (val32 & PCI_INTBRG_CTRL_DTSTAT) { - printk(KERN_INFO "Error(s) in PCI interrupt and bridge control" - "register on device %s\n", pci_info->ctl_name); - printk(KERN_INFO "DTSTAT: %d\n", - (val32 & PCI_INTBRG_CTRL_DTSTAT) != 0); - - val32 |= PCI_INTBRG_CTRL_DTSTAT; - edac_pci_write_dword(dev, REG_PCI_INTBRG_CTRL, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check out PCI Bridge Memory Base-Limit Register */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_CLEAR_MASK) { - printk(KERN_INFO - "Error(s) in mem limit register on %s device\n", - pci_info->ctl_name); - printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n" - "RTA: %d, STA: %d, MDPE: %d\n", - (val32 & MEM_LIMIT_DPE) != 0, - (val32 & MEM_LIMIT_RSE) != 0, - (val32 & MEM_LIMIT_RMA) != 0, - (val32 & MEM_LIMIT_RTA) != 0, - (val32 & MEM_LIMIT_STA) != 0, - (val32 & MEM_LIMIT_MDPE) != 0); - - val32 |= MEM_LIMIT_CLEAR_MASK; - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } -} - -static struct resource *legacy_io_res; -static int at_compat_reg_broken; -#define LEGACY_NR_PORTS 1 - -/* device-specific methods for amd8111 LPC Bridge device */ -static void amd8111_lpc_bridge_init(struct amd8111_dev_info *dev_info) -{ - u8 val8; - struct pci_dev *dev = dev_info->dev; - - /* First clear REG_AT_COMPAT[SERR, IOCHK] if necessary */ - legacy_io_res = request_region(REG_AT_COMPAT, LEGACY_NR_PORTS, - AMD8111_EDAC_MOD_STR); - if (!legacy_io_res) - printk(KERN_INFO "%s: failed to request legacy I/O region " - "start %d, len %d\n", __func__, - REG_AT_COMPAT, LEGACY_NR_PORTS); - else { - val8 = __do_inb(REG_AT_COMPAT); - if (val8 == 0xff) { /* buggy port */ - printk(KERN_INFO "%s: port %d is buggy, not supported" - " by hardware?\n", __func__, REG_AT_COMPAT); - at_compat_reg_broken = 1; - release_region(REG_AT_COMPAT, LEGACY_NR_PORTS); - legacy_io_res = NULL; - } else { - u8 out8 = 0; - if (val8 & AT_COMPAT_SERR) - out8 = AT_COMPAT_CLRSERR; - if (val8 & AT_COMPAT_IOCHK) - out8 |= AT_COMPAT_CLRIOCHK; - if (out8 > 0) - __do_outb(out8, REG_AT_COMPAT); - } - } - - /* Second clear error flags on LPC bridge */ - edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8); - if (val8 & IO_CTRL_1_CLEAR_MASK) - edac_pci_write_byte(dev, REG_IO_CTRL_1, val8); -} - -static void amd8111_lpc_bridge_exit(struct amd8111_dev_info *dev_info) -{ - if (legacy_io_res) - release_region(REG_AT_COMPAT, LEGACY_NR_PORTS); -} - -static void amd8111_lpc_bridge_check(struct edac_device_ctl_info *edac_dev) -{ - struct amd8111_dev_info *dev_info = edac_dev->pvt_info; - struct pci_dev *dev = dev_info->dev; - u8 val8; - - edac_pci_read_byte(dev, REG_IO_CTRL_1, &val8); - if (val8 & IO_CTRL_1_CLEAR_MASK) { - printk(KERN_INFO - "Error(s) in IO control register on %s device\n", - dev_info->ctl_name); - printk(KERN_INFO "LPC ERR: %d, PW2LPC: %d\n", - (val8 & IO_CTRL_1_LPC_ERR) != 0, - (val8 & IO_CTRL_1_PW2LPC) != 0); - - val8 |= IO_CTRL_1_CLEAR_MASK; - edac_pci_write_byte(dev, REG_IO_CTRL_1, val8); - - edac_device_handle_ue(edac_dev, 0, 0, edac_dev->ctl_name); - } - - if (at_compat_reg_broken == 0) { - u8 out8 = 0; - val8 = __do_inb(REG_AT_COMPAT); - if (val8 & AT_COMPAT_SERR) - out8 = AT_COMPAT_CLRSERR; - if (val8 & AT_COMPAT_IOCHK) - out8 |= AT_COMPAT_CLRIOCHK; - if (out8 > 0) { - __do_outb(out8, REG_AT_COMPAT); - edac_device_handle_ue(edac_dev, 0, 0, - edac_dev->ctl_name); - } - } -} - -/* General devices represented by edac_device_ctl_info */ -static struct amd8111_dev_info amd8111_devices[] = { - [LPC_BRIDGE] = { - .err_dev = PCI_DEVICE_ID_AMD_8111_LPC, - .ctl_name = "lpc", - .init = amd8111_lpc_bridge_init, - .exit = amd8111_lpc_bridge_exit, - .check = amd8111_lpc_bridge_check, - }, - {0}, -}; - -/* PCI controllers represented by edac_pci_ctl_info */ -static struct amd8111_pci_info amd8111_pcis[] = { - [PCI_BRIDGE] = { - .err_dev = PCI_DEVICE_ID_AMD_8111_PCI, - .ctl_name = "AMD8111_PCI_Controller", - .init = amd8111_pci_bridge_init, - .exit = amd8111_pci_bridge_exit, - .check = amd8111_pci_bridge_check, - }, - {0}, -}; - -static int amd8111_dev_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct amd8111_dev_info *dev_info = &amd8111_devices[id->driver_data]; - int ret = -ENODEV; - - dev_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, - dev_info->err_dev, NULL); - - if (!dev_info->dev) { - printk(KERN_ERR "EDAC device not found:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - goto err; - } - - if (pci_enable_device(dev_info->dev)) { - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - goto err_dev_put; - } - - /* - * we do not allocate extra private structure for - * edac_device_ctl_info, but make use of existing - * one instead. - */ - dev_info->edac_idx = edac_device_alloc_index(); - dev_info->edac_dev = - edac_device_alloc_ctl_info(0, dev_info->ctl_name, 1, - NULL, 0, 0, dev_info->edac_idx); - if (!dev_info->edac_dev) { - ret = -ENOMEM; - goto err_dev_put; - } - - dev_info->edac_dev->pvt_info = dev_info; - dev_info->edac_dev->dev = &dev_info->dev->dev; - dev_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; - dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - dev_info->edac_dev->edac_check = dev_info->check; - - if (dev_info->init) - dev_info->init(dev_info); - - if (edac_device_add_device(dev_info->edac_dev) > 0) { - printk(KERN_ERR "failed to add edac_dev for %s\n", - dev_info->ctl_name); - goto err_edac_free_ctl; - } - - printk(KERN_INFO "added one edac_dev on AMD8111 " - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, dev_info->err_dev, - dev_info->ctl_name); - - return 0; - -err_edac_free_ctl: - edac_device_free_ctl_info(dev_info->edac_dev); -err_dev_put: - pci_dev_put(dev_info->dev); -err: - return ret; -} - -static void amd8111_dev_remove(struct pci_dev *dev) -{ - struct amd8111_dev_info *dev_info; - - for (dev_info = amd8111_devices; dev_info->err_dev; dev_info++) - if (dev_info->dev->device == dev->device) - break; - - if (!dev_info->err_dev) /* should never happen */ - return; - - if (dev_info->edac_dev) { - edac_device_del_device(dev_info->edac_dev->dev); - edac_device_free_ctl_info(dev_info->edac_dev); - } - - if (dev_info->exit) - dev_info->exit(dev_info); - - pci_dev_put(dev_info->dev); -} - -static int amd8111_pci_probe(struct pci_dev *dev, - const struct pci_device_id *id) -{ - struct amd8111_pci_info *pci_info = &amd8111_pcis[id->driver_data]; - int ret = -ENODEV; - - pci_info->dev = pci_get_device(PCI_VENDOR_ID_AMD, - pci_info->err_dev, NULL); - - if (!pci_info->dev) { - printk(KERN_ERR "EDAC device not found:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - goto err; - } - - if (pci_enable_device(pci_info->dev)) { - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - goto err_dev_put; - } - - /* - * we do not allocate extra private structure for - * edac_pci_ctl_info, but make use of existing - * one instead. - */ - pci_info->edac_idx = edac_pci_alloc_index(); - pci_info->edac_dev = edac_pci_alloc_ctl_info(0, pci_info->ctl_name); - if (!pci_info->edac_dev) { - ret = -ENOMEM; - goto err_dev_put; - } - - pci_info->edac_dev->pvt_info = pci_info; - pci_info->edac_dev->dev = &pci_info->dev->dev; - pci_info->edac_dev->mod_name = AMD8111_EDAC_MOD_STR; - pci_info->edac_dev->ctl_name = pci_info->ctl_name; - pci_info->edac_dev->dev_name = dev_name(&pci_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - pci_info->edac_dev->edac_check = pci_info->check; - - if (pci_info->init) - pci_info->init(pci_info); - - if (edac_pci_add_device(pci_info->edac_dev, pci_info->edac_idx) > 0) { - printk(KERN_ERR "failed to add edac_pci for %s\n", - pci_info->ctl_name); - goto err_edac_free_ctl; - } - - printk(KERN_INFO "added one edac_pci on AMD8111 " - "vendor %x, device %x, name %s\n", - PCI_VENDOR_ID_AMD, pci_info->err_dev, - pci_info->ctl_name); - - return 0; - -err_edac_free_ctl: - edac_pci_free_ctl_info(pci_info->edac_dev); -err_dev_put: - pci_dev_put(pci_info->dev); -err: - return ret; -} - -static void amd8111_pci_remove(struct pci_dev *dev) -{ - struct amd8111_pci_info *pci_info; - - for (pci_info = amd8111_pcis; pci_info->err_dev; pci_info++) - if (pci_info->dev->device == dev->device) - break; - - if (!pci_info->err_dev) /* should never happen */ - return; - - if (pci_info->edac_dev) { - edac_pci_del_device(pci_info->edac_dev->dev); - edac_pci_free_ctl_info(pci_info->edac_dev); - } - - if (pci_info->exit) - pci_info->exit(pci_info); - - pci_dev_put(pci_info->dev); -} - -/* PCI Device ID talbe for general EDAC device */ -static const struct pci_device_id amd8111_edac_dev_tbl[] = { - { - PCI_VEND_DEV(AMD, 8111_LPC), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = LPC_BRIDGE, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8111_edac_dev_tbl); - -static struct pci_driver amd8111_edac_dev_driver = { - .name = "AMD8111_EDAC_DEV", - .probe = amd8111_dev_probe, - .remove = amd8111_dev_remove, - .id_table = amd8111_edac_dev_tbl, -}; - -/* PCI Device ID table for EDAC PCI controller */ -static const struct pci_device_id amd8111_edac_pci_tbl[] = { - { - PCI_VEND_DEV(AMD, 8111_PCI), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = PCI_BRIDGE, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8111_edac_pci_tbl); - -static struct pci_driver amd8111_edac_pci_driver = { - .name = "AMD8111_EDAC_PCI", - .probe = amd8111_pci_probe, - .remove = amd8111_pci_remove, - .id_table = amd8111_edac_pci_tbl, -}; - -static int __init amd8111_edac_init(void) -{ - int val; - - printk(KERN_INFO "AMD8111 EDAC driver " AMD8111_EDAC_REVISION "\n"); - printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n"); - - /* Only POLL mode supported so far */ - edac_op_state = EDAC_OPSTATE_POLL; - - val = pci_register_driver(&amd8111_edac_dev_driver); - val |= pci_register_driver(&amd8111_edac_pci_driver); - - return val; -} - -static void __exit amd8111_edac_exit(void) -{ - pci_unregister_driver(&amd8111_edac_pci_driver); - pci_unregister_driver(&amd8111_edac_dev_driver); -} - - -module_init(amd8111_edac_init); -module_exit(amd8111_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao "); -MODULE_DESCRIPTION("AMD8111 HyperTransport I/O Hub EDAC kernel module"); diff --git a/drivers/edac/amd8111_edac.h b/drivers/edac/amd8111_edac.h deleted file mode 100644 index 200cab1b3e425..0000000000000 --- a/drivers/edac/amd8111_edac.h +++ /dev/null @@ -1,118 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * amd8111_edac.h, EDAC defs for AMD8111 hypertransport chip - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao - * Benjamin Walsh - * Hu Yongqi - */ - -#ifndef _AMD8111_EDAC_H_ -#define _AMD8111_EDAC_H_ - -/************************************************************ - * PCI Bridge Status and Command Register, DevA:0x04 - ************************************************************/ -#define REG_PCI_STSCMD 0x04 -enum pci_stscmd_bits { - PCI_STSCMD_SSE = BIT(30), - PCI_STSCMD_RMA = BIT(29), - PCI_STSCMD_RTA = BIT(28), - PCI_STSCMD_SERREN = BIT(8), - PCI_STSCMD_CLEAR_MASK = (PCI_STSCMD_SSE | - PCI_STSCMD_RMA | - PCI_STSCMD_RTA) -}; - -/************************************************************ - * PCI Bridge Memory Base-Limit Register, DevA:0x1c - ************************************************************/ -#define REG_MEM_LIM 0x1c -enum mem_limit_bits { - MEM_LIMIT_DPE = BIT(31), - MEM_LIMIT_RSE = BIT(30), - MEM_LIMIT_RMA = BIT(29), - MEM_LIMIT_RTA = BIT(28), - MEM_LIMIT_STA = BIT(27), - MEM_LIMIT_MDPE = BIT(24), - MEM_LIMIT_CLEAR_MASK = (MEM_LIMIT_DPE | - MEM_LIMIT_RSE | - MEM_LIMIT_RMA | - MEM_LIMIT_RTA | - MEM_LIMIT_STA | - MEM_LIMIT_MDPE) -}; - -/************************************************************ - * HyperTransport Link Control Register, DevA:0xc4 - ************************************************************/ -#define REG_HT_LINK 0xc4 -enum ht_link_bits { - HT_LINK_LKFAIL = BIT(4), - HT_LINK_CRCFEN = BIT(1), - HT_LINK_CLEAR_MASK = (HT_LINK_LKFAIL) -}; - -/************************************************************ - * PCI Bridge Interrupt and Bridge Control, DevA:0x3c - ************************************************************/ -#define REG_PCI_INTBRG_CTRL 0x3c -enum pci_intbrg_ctrl_bits { - PCI_INTBRG_CTRL_DTSERREN = BIT(27), - PCI_INTBRG_CTRL_DTSTAT = BIT(26), - PCI_INTBRG_CTRL_MARSP = BIT(21), - PCI_INTBRG_CTRL_SERREN = BIT(17), - PCI_INTBRG_CTRL_PEREN = BIT(16), - PCI_INTBRG_CTRL_CLEAR_MASK = (PCI_INTBRG_CTRL_DTSTAT), - PCI_INTBRG_CTRL_POLL_MASK = (PCI_INTBRG_CTRL_DTSERREN | - PCI_INTBRG_CTRL_MARSP | - PCI_INTBRG_CTRL_SERREN) -}; - -/************************************************************ - * I/O Control 1 Register, DevB:0x40 - ************************************************************/ -#define REG_IO_CTRL_1 0x40 -enum io_ctrl_1_bits { - IO_CTRL_1_NMIONERR = BIT(7), - IO_CTRL_1_LPC_ERR = BIT(6), - IO_CTRL_1_PW2LPC = BIT(1), - IO_CTRL_1_CLEAR_MASK = (IO_CTRL_1_LPC_ERR | IO_CTRL_1_PW2LPC) -}; - -/************************************************************ - * Legacy I/O Space Registers - ************************************************************/ -#define REG_AT_COMPAT 0x61 -enum at_compat_bits { - AT_COMPAT_SERR = BIT(7), - AT_COMPAT_IOCHK = BIT(6), - AT_COMPAT_CLRIOCHK = BIT(3), - AT_COMPAT_CLRSERR = BIT(2), -}; - -struct amd8111_dev_info { - u16 err_dev; /* PCI Device ID */ - struct pci_dev *dev; - int edac_idx; /* device index */ - char *ctl_name; - struct edac_device_ctl_info *edac_dev; - void (*init)(struct amd8111_dev_info *dev_info); - void (*exit)(struct amd8111_dev_info *dev_info); - void (*check)(struct edac_device_ctl_info *edac_dev); -}; - -struct amd8111_pci_info { - u16 err_dev; /* PCI Device ID */ - struct pci_dev *dev; - int edac_idx; /* pci index */ - const char *ctl_name; - struct edac_pci_ctl_info *edac_dev; - void (*init)(struct amd8111_pci_info *dev_info); - void (*exit)(struct amd8111_pci_info *dev_info); - void (*check)(struct edac_pci_ctl_info *edac_dev); -}; - -#endif /* _AMD8111_EDAC_H_ */ diff --git a/drivers/edac/amd8131_edac.c b/drivers/edac/amd8131_edac.c deleted file mode 100644 index 28610ba514f4d..0000000000000 --- a/drivers/edac/amd8131_edac.c +++ /dev/null @@ -1,358 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-only -/* - * amd8131_edac.c, AMD8131 hypertransport chip EDAC kernel module - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao - * Benjamin Walsh - * Hu Yongqi - */ - -#include -#include -#include -#include -#include -#include -#include - -#include "edac_module.h" -#include "amd8131_edac.h" - -#define AMD8131_EDAC_REVISION " Ver: 1.0.0" -#define AMD8131_EDAC_MOD_STR "amd8131_edac" - -/* Wrapper functions for accessing PCI configuration space */ -static void edac_pci_read_dword(struct pci_dev *dev, int reg, u32 *val32) -{ - int ret; - - ret = pci_read_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8131_EDAC_MOD_STR - " PCI Access Read Error at 0x%x\n", reg); -} - -static void edac_pci_write_dword(struct pci_dev *dev, int reg, u32 val32) -{ - int ret; - - ret = pci_write_config_dword(dev, reg, val32); - if (ret != 0) - printk(KERN_ERR AMD8131_EDAC_MOD_STR - " PCI Access Write Error at 0x%x\n", reg); -} - -/* Support up to two AMD8131 chipsets on a platform */ -static struct amd8131_dev_info amd8131_devices[] = { - { - .inst = NORTH_A, - .devfn = DEVFN_PCIX_BRIDGE_NORTH_A, - .ctl_name = "AMD8131_PCIX_NORTH_A", - }, - { - .inst = NORTH_B, - .devfn = DEVFN_PCIX_BRIDGE_NORTH_B, - .ctl_name = "AMD8131_PCIX_NORTH_B", - }, - { - .inst = SOUTH_A, - .devfn = DEVFN_PCIX_BRIDGE_SOUTH_A, - .ctl_name = "AMD8131_PCIX_SOUTH_A", - }, - { - .inst = SOUTH_B, - .devfn = DEVFN_PCIX_BRIDGE_SOUTH_B, - .ctl_name = "AMD8131_PCIX_SOUTH_B", - }, - {.inst = NO_BRIDGE,}, -}; - -static void amd8131_pcix_init(struct amd8131_dev_info *dev_info) -{ - u32 val32; - struct pci_dev *dev = dev_info->dev; - - /* First clear error detection flags */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_MASK) - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - /* Clear Discard Timer Timedout flag */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - if (val32 & INT_CTLR_DTS) - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Clear CRC Error flag on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - if (val32 & LNK_CTRL_CRCERR_A) - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Clear CRC Error flag on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - if (val32 & LNK_CTRL_CRCERR_B) - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); - - /* - * Then enable all error detections. - * - * Setup Discard Timer Sync Flood Enable, - * System Error Enable and Parity Error Enable. - */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - val32 |= INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE; - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Enable overall SERR Error detection */ - edac_pci_read_dword(dev, REG_STS_CMD, &val32); - val32 |= STS_CMD_SERREN; - edac_pci_write_dword(dev, REG_STS_CMD, val32); - - /* Setup CRC Flood Enable for link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - val32 |= LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Setup CRC Flood Enable for link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - val32 |= LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); -} - -static void amd8131_pcix_exit(struct amd8131_dev_info *dev_info) -{ - u32 val32; - struct pci_dev *dev = dev_info->dev; - - /* Disable SERR, PERR and DTSE Error detection */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - val32 &= ~(INT_CTLR_PERR | INT_CTLR_SERR | INT_CTLR_DTSE); - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - /* Disable overall System Error detection */ - edac_pci_read_dword(dev, REG_STS_CMD, &val32); - val32 &= ~STS_CMD_SERREN; - edac_pci_write_dword(dev, REG_STS_CMD, val32); - - /* Disable CRC Sync Flood on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - val32 &= ~LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - /* Disable CRC Sync Flood on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - val32 &= ~LNK_CTRL_CRCFEN; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); -} - -static void amd8131_pcix_check(struct edac_pci_ctl_info *edac_dev) -{ - struct amd8131_dev_info *dev_info = edac_dev->pvt_info; - struct pci_dev *dev = dev_info->dev; - u32 val32; - - /* Check PCI-X Bridge Memory Base-Limit Register for errors */ - edac_pci_read_dword(dev, REG_MEM_LIM, &val32); - if (val32 & MEM_LIMIT_MASK) { - printk(KERN_INFO "Error(s) in mem limit register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "DPE: %d, RSE: %d, RMA: %d\n" - "RTA: %d, STA: %d, MDPE: %d\n", - val32 & MEM_LIMIT_DPE, - val32 & MEM_LIMIT_RSE, - val32 & MEM_LIMIT_RMA, - val32 & MEM_LIMIT_RTA, - val32 & MEM_LIMIT_STA, - val32 & MEM_LIMIT_MDPE); - - val32 |= MEM_LIMIT_MASK; - edac_pci_write_dword(dev, REG_MEM_LIM, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if Discard Timer timed out */ - edac_pci_read_dword(dev, REG_INT_CTLR, &val32); - if (val32 & INT_CTLR_DTS) { - printk(KERN_INFO "Error(s) in interrupt and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "DTS: %d\n", val32 & INT_CTLR_DTS); - - val32 |= INT_CTLR_DTS; - edac_pci_write_dword(dev, REG_INT_CTLR, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if CRC error happens on link side A */ - edac_pci_read_dword(dev, REG_LNK_CTRL_A, &val32); - if (val32 & LNK_CTRL_CRCERR_A) { - printk(KERN_INFO "Error(s) in link conf and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_A); - - val32 |= LNK_CTRL_CRCERR_A; - edac_pci_write_dword(dev, REG_LNK_CTRL_A, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } - - /* Check if CRC error happens on link side B */ - edac_pci_read_dword(dev, REG_LNK_CTRL_B, &val32); - if (val32 & LNK_CTRL_CRCERR_B) { - printk(KERN_INFO "Error(s) in link conf and control register " - "on %s bridge\n", dev_info->ctl_name); - printk(KERN_INFO "CRCERR: %d\n", val32 & LNK_CTRL_CRCERR_B); - - val32 |= LNK_CTRL_CRCERR_B; - edac_pci_write_dword(dev, REG_LNK_CTRL_B, val32); - - edac_pci_handle_npe(edac_dev, edac_dev->ctl_name); - } -} - -static struct amd8131_info amd8131_chipset = { - .err_dev = PCI_DEVICE_ID_AMD_8131_APIC, - .devices = amd8131_devices, - .init = amd8131_pcix_init, - .exit = amd8131_pcix_exit, - .check = amd8131_pcix_check, -}; - -/* - * There are 4 PCIX Bridges on ATCA-6101 that share the same PCI Device ID, - * so amd8131_probe() would be called by kernel 4 times, with different - * address of pci_dev for each of them each time. - */ -static int amd8131_probe(struct pci_dev *dev, const struct pci_device_id *id) -{ - struct amd8131_dev_info *dev_info; - - for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE; - dev_info++) - if (dev_info->devfn == dev->devfn) - break; - - if (dev_info->inst == NO_BRIDGE) /* should never happen */ - return -ENODEV; - - /* - * We can't call pci_get_device() as we are used to do because - * there are 4 of them but pci_dev_get() instead. - */ - dev_info->dev = pci_dev_get(dev); - - if (pci_enable_device(dev_info->dev)) { - pci_dev_put(dev_info->dev); - printk(KERN_ERR "failed to enable:" - "vendor %x, device %x, devfn %x, name %s\n", - PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev, - dev_info->devfn, dev_info->ctl_name); - return -ENODEV; - } - - /* - * we do not allocate extra private structure for - * edac_pci_ctl_info, but make use of existing - * one instead. - */ - dev_info->edac_idx = edac_pci_alloc_index(); - dev_info->edac_dev = edac_pci_alloc_ctl_info(0, dev_info->ctl_name); - if (!dev_info->edac_dev) - return -ENOMEM; - - dev_info->edac_dev->pvt_info = dev_info; - dev_info->edac_dev->dev = &dev_info->dev->dev; - dev_info->edac_dev->mod_name = AMD8131_EDAC_MOD_STR; - dev_info->edac_dev->ctl_name = dev_info->ctl_name; - dev_info->edac_dev->dev_name = dev_name(&dev_info->dev->dev); - - if (edac_op_state == EDAC_OPSTATE_POLL) - dev_info->edac_dev->edac_check = amd8131_chipset.check; - - if (amd8131_chipset.init) - amd8131_chipset.init(dev_info); - - if (edac_pci_add_device(dev_info->edac_dev, dev_info->edac_idx) > 0) { - printk(KERN_ERR "failed edac_pci_add_device() for %s\n", - dev_info->ctl_name); - edac_pci_free_ctl_info(dev_info->edac_dev); - return -ENODEV; - } - - printk(KERN_INFO "added one device on AMD8131 " - "vendor %x, device %x, devfn %x, name %s\n", - PCI_VENDOR_ID_AMD, amd8131_chipset.err_dev, - dev_info->devfn, dev_info->ctl_name); - - return 0; -} - -static void amd8131_remove(struct pci_dev *dev) -{ - struct amd8131_dev_info *dev_info; - - for (dev_info = amd8131_chipset.devices; dev_info->inst != NO_BRIDGE; - dev_info++) - if (dev_info->devfn == dev->devfn) - break; - - if (dev_info->inst == NO_BRIDGE) /* should never happen */ - return; - - if (dev_info->edac_dev) { - edac_pci_del_device(dev_info->edac_dev->dev); - edac_pci_free_ctl_info(dev_info->edac_dev); - } - - if (amd8131_chipset.exit) - amd8131_chipset.exit(dev_info); - - pci_dev_put(dev_info->dev); -} - -static const struct pci_device_id amd8131_edac_pci_tbl[] = { - { - PCI_VEND_DEV(AMD, 8131_BRIDGE), - .subvendor = PCI_ANY_ID, - .subdevice = PCI_ANY_ID, - .class = 0, - .class_mask = 0, - .driver_data = 0, - }, - { - 0, - } /* table is NULL-terminated */ -}; -MODULE_DEVICE_TABLE(pci, amd8131_edac_pci_tbl); - -static struct pci_driver amd8131_edac_driver = { - .name = AMD8131_EDAC_MOD_STR, - .probe = amd8131_probe, - .remove = amd8131_remove, - .id_table = amd8131_edac_pci_tbl, -}; - -static int __init amd8131_edac_init(void) -{ - printk(KERN_INFO "AMD8131 EDAC driver " AMD8131_EDAC_REVISION "\n"); - printk(KERN_INFO "\t(c) 2008 Wind River Systems, Inc.\n"); - - /* Only POLL mode supported so far */ - edac_op_state = EDAC_OPSTATE_POLL; - - return pci_register_driver(&amd8131_edac_driver); -} - -static void __exit amd8131_edac_exit(void) -{ - pci_unregister_driver(&amd8131_edac_driver); -} - -module_init(amd8131_edac_init); -module_exit(amd8131_edac_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Cao Qingtao "); -MODULE_DESCRIPTION("AMD8131 HyperTransport PCI-X Tunnel EDAC kernel module"); diff --git a/drivers/edac/amd8131_edac.h b/drivers/edac/amd8131_edac.h deleted file mode 100644 index 5f362abdaf12c..0000000000000 --- a/drivers/edac/amd8131_edac.h +++ /dev/null @@ -1,107 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * amd8131_edac.h, EDAC defs for AMD8131 hypertransport chip - * - * Copyright (c) 2008 Wind River Systems, Inc. - * - * Authors: Cao Qingtao - * Benjamin Walsh - * Hu Yongqi - */ - -#ifndef _AMD8131_EDAC_H_ -#define _AMD8131_EDAC_H_ - -#define DEVFN_PCIX_BRIDGE_NORTH_A 8 -#define DEVFN_PCIX_BRIDGE_NORTH_B 16 -#define DEVFN_PCIX_BRIDGE_SOUTH_A 24 -#define DEVFN_PCIX_BRIDGE_SOUTH_B 32 - -/************************************************************ - * PCI-X Bridge Status and Command Register, DevA:0x04 - ************************************************************/ -#define REG_STS_CMD 0x04 -enum sts_cmd_bits { - STS_CMD_SSE = BIT(30), - STS_CMD_SERREN = BIT(8) -}; - -/************************************************************ - * PCI-X Bridge Interrupt and Bridge Control Register, - ************************************************************/ -#define REG_INT_CTLR 0x3c -enum int_ctlr_bits { - INT_CTLR_DTSE = BIT(27), - INT_CTLR_DTS = BIT(26), - INT_CTLR_SERR = BIT(17), - INT_CTLR_PERR = BIT(16) -}; - -/************************************************************ - * PCI-X Bridge Memory Base-Limit Register, DevA:0x1C - ************************************************************/ -#define REG_MEM_LIM 0x1c -enum mem_limit_bits { - MEM_LIMIT_DPE = BIT(31), - MEM_LIMIT_RSE = BIT(30), - MEM_LIMIT_RMA = BIT(29), - MEM_LIMIT_RTA = BIT(28), - MEM_LIMIT_STA = BIT(27), - MEM_LIMIT_MDPE = BIT(24), - MEM_LIMIT_MASK = MEM_LIMIT_DPE|MEM_LIMIT_RSE|MEM_LIMIT_RMA| - MEM_LIMIT_RTA|MEM_LIMIT_STA|MEM_LIMIT_MDPE -}; - -/************************************************************ - * Link Configuration And Control Register, side A - ************************************************************/ -#define REG_LNK_CTRL_A 0xc4 - -/************************************************************ - * Link Configuration And Control Register, side B - ************************************************************/ -#define REG_LNK_CTRL_B 0xc8 - -enum lnk_ctrl_bits { - LNK_CTRL_CRCERR_A = BIT(9), - LNK_CTRL_CRCERR_B = BIT(8), - LNK_CTRL_CRCFEN = BIT(1) -}; - -enum pcix_bridge_inst { - NORTH_A = 0, - NORTH_B = 1, - SOUTH_A = 2, - SOUTH_B = 3, - NO_BRIDGE = 4 -}; - -struct amd8131_dev_info { - int devfn; - enum pcix_bridge_inst inst; - struct pci_dev *dev; - int edac_idx; /* pci device index */ - char *ctl_name; - struct edac_pci_ctl_info *edac_dev; -}; - -/* - * AMD8131 chipset has two pairs of PCIX Bridge and related IOAPIC - * Controller, and ATCA-6101 has two AMD8131 chipsets, so there are - * four PCIX Bridges on ATCA-6101 altogether. - * - * These PCIX Bridges share the same PCI Device ID and are all of - * Function Zero, they could be discrimated by their pci_dev->devfn. - * They share the same set of init/check/exit methods, and their - * private structures are collected in the devices[] array. - */ -struct amd8131_info { - u16 err_dev; /* PCI Device ID for AMD8131 APIC*/ - struct amd8131_dev_info *devices; - void (*init)(struct amd8131_dev_info *dev_info); - void (*exit)(struct amd8131_dev_info *dev_info); - void (*check)(struct edac_pci_ctl_info *edac_dev); -}; - -#endif /* _AMD8131_EDAC_H_ */ -