From ec92a349db9a688953a7b09c97a661b8fe30719c Mon Sep 17 00:00:00 2001 From: Puranjay Mohan Date: Mon, 13 May 2024 10:02:48 +0000 Subject: [PATCH 1/2] powerpc/bpf: enforce full ordering for ATOMIC operations with BPF_FETCH The Linux Kernel Memory Model [1][2] requires RMW operations that have a return value to be fully ordered. BPF atomic operations with BPF_FETCH (including BPF_XCHG and BPF_CMPXCHG) return a value back so they need to be JITed to fully ordered operations. POWERPC currently emits relaxed operations for these. We can show this by running the following litmus-test: PPC SB+atomic_add+fetch { 0:r0=x; (* dst reg assuming offset is 0 *) 0:r1=2; (* src reg *) 0:r2=1; 0:r4=y; (* P0 writes to this, P1 reads this *) 0:r5=z; (* P1 writes to this, P0 reads this *) 0:r6=0; 1:r2=1; 1:r4=y; 1:r5=z; } P0 | P1 ; stw r2, 0(r4) | stw r2,0(r5) ; | ; loop:lwarx r3, r6, r0 | ; mr r8, r3 | ; add r3, r3, r1 | sync ; stwcx. r3, r6, r0 | ; bne loop | ; mr r1, r8 | ; | ; lwa r7, 0(r5) | lwa r7,0(r4) ; ~exists(0:r7=0 /\ 1:r7=0) Witnesses Positive: 9 Negative: 3 Condition ~exists (0:r7=0 /\ 1:r7=0) Observation SB+atomic_add+fetch Sometimes 3 9 This test shows that the older store in P0 is reordered with a newer load to a different address. Although there is a RMW operation with fetch between them. Adding a sync before and after RMW fixes the issue: Witnesses Positive: 9 Negative: 0 Condition ~exists (0:r7=0 /\ 1:r7=0) Observation SB+atomic_add+fetch Never 0 9 [1] https://www.kernel.org/doc/Documentation/memory-barriers.txt [2] https://www.kernel.org/doc/Documentation/atomic_t.txt Fixes: aea7ef8a82c0 ("powerpc/bpf/32: add support for BPF_ATOMIC bitwise operations") Fixes: 2d9206b22743 ("powerpc/bpf/32: Add instructions for atomic_[cmp]xchg") Fixes: dbe6e2456fb0 ("powerpc/bpf/64: add support for atomic fetch operations") Fixes: 1e82dfaa7819 ("powerpc/bpf/64: Add instructions for atomic_[cmp]xchg") Cc: stable@vger.kernel.org # v6.0+ Signed-off-by: Puranjay Mohan Reviewed-by: Christophe Leroy Reviewed-by: Naveen N Rao Acked-by: Paul E. McKenney Signed-off-by: Michael Ellerman Link: https://msgid.link/20240513100248.110535-1-puranjay@kernel.org --- arch/powerpc/net/bpf_jit_comp32.c | 12 ++++++++++++ arch/powerpc/net/bpf_jit_comp64.c | 12 ++++++++++++ 2 files changed, 24 insertions(+) diff --git a/arch/powerpc/net/bpf_jit_comp32.c b/arch/powerpc/net/bpf_jit_comp32.c index 43b97032a91c0..a0c4f1bde83e8 100644 --- a/arch/powerpc/net/bpf_jit_comp32.c +++ b/arch/powerpc/net/bpf_jit_comp32.c @@ -900,6 +900,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code /* Get offset into TMP_REG */ EMIT(PPC_RAW_LI(tmp_reg, off)); + /* + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync' + * before and after the operation. + * + * This is a requirement in the Linux Kernel Memory Model. + * See __cmpxchg_u32() in asm/cmpxchg.h as an example. + */ + if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP)) + EMIT(PPC_RAW_SYNC()); tmp_idx = ctx->idx * 4; /* load value from memory into r0 */ EMIT(PPC_RAW_LWARX(_R0, tmp_reg, dst_reg, 0)); @@ -953,6 +962,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code /* For the BPF_FETCH variant, get old data into src_reg */ if (imm & BPF_FETCH) { + /* Emit 'sync' to enforce full ordering */ + if (IS_ENABLED(CONFIG_SMP)) + EMIT(PPC_RAW_SYNC()); EMIT(PPC_RAW_MR(ret_reg, ax_reg)); if (!fp->aux->verifier_zext) EMIT(PPC_RAW_LI(ret_reg - 1, 0)); /* higher 32-bit */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 8afc14a4a1258..7703dcf48be86 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -846,6 +846,15 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code /* Get offset into TMP_REG_1 */ EMIT(PPC_RAW_LI(tmp1_reg, off)); + /* + * Enforce full ordering for operations with BPF_FETCH by emitting a 'sync' + * before and after the operation. + * + * This is a requirement in the Linux Kernel Memory Model. + * See __cmpxchg_u64() in asm/cmpxchg.h as an example. + */ + if ((imm & BPF_FETCH) && IS_ENABLED(CONFIG_SMP)) + EMIT(PPC_RAW_SYNC()); tmp_idx = ctx->idx * 4; /* load value from memory into TMP_REG_2 */ if (size == BPF_DW) @@ -908,6 +917,9 @@ int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, u32 *fimage, struct code PPC_BCC_SHORT(COND_NE, tmp_idx); if (imm & BPF_FETCH) { + /* Emit 'sync' to enforce full ordering */ + if (IS_ENABLED(CONFIG_SMP)) + EMIT(PPC_RAW_SYNC()); EMIT(PPC_RAW_MR(ret_reg, _R0)); /* * Skip unnecessary zero-extension for 32-bit cmpxchg. From c8ed83bfbdf560fb2191468f07b5bc1a499f689d Mon Sep 17 00:00:00 2001 From: Gautam Menghani Date: Wed, 22 May 2024 14:19:45 +0530 Subject: [PATCH 2/2] KVM: PPC: Book3S HV: Fix doorbell emulation by adding DPDES support Doorbell emulation is broken for KVM on PowerVM guests as support for DPDES was not added in the initial patch series. Due to this, a KVM on PowerVM guest cannot be booted with the XICS interrupt controller as doorbells are to be setup in the initial probe path when using XICS (pSeries_smp_probe()). Add DPDES support in the host KVM code to fix doorbell emulation. Fixes: 6ccbbc33f06a ("KVM: PPC: Add helper library for Guest State Buffers") Cc: stable@vger.kernel.org # v6.7+ Signed-off-by: Gautam Menghani Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/stable/20240522082838.121769-1-gautam%40linux.ibm.com --- Documentation/arch/powerpc/kvm-nested.rst | 4 +++- arch/powerpc/include/asm/guest-state-buffer.h | 3 ++- arch/powerpc/include/asm/kvm_book3s.h | 1 + arch/powerpc/kvm/book3s_hv.c | 14 +++++++++++++- arch/powerpc/kvm/book3s_hv_nestedv2.c | 7 +++++++ arch/powerpc/kvm/test-guest-state-buffer.c | 2 +- 6 files changed, 27 insertions(+), 4 deletions(-) diff --git a/Documentation/arch/powerpc/kvm-nested.rst b/Documentation/arch/powerpc/kvm-nested.rst index 630602a8aa008..5defd13cc6c17 100644 --- a/Documentation/arch/powerpc/kvm-nested.rst +++ b/Documentation/arch/powerpc/kvm-nested.rst @@ -546,7 +546,9 @@ table information. +--------+-------+----+--------+----------------------------------+ | 0x1052 | 0x08 | RW | T | CTRL | +--------+-------+----+--------+----------------------------------+ -| 0x1053-| | | | Reserved | +| 0x1053 | 0x08 | RW | T | DPDES | ++--------+-------+----+--------+----------------------------------+ +| 0x1054-| | | | Reserved | | 0x1FFF | | | | | +--------+-------+----+--------+----------------------------------+ | 0x2000 | 0x04 | RW | T | CR | diff --git a/arch/powerpc/include/asm/guest-state-buffer.h b/arch/powerpc/include/asm/guest-state-buffer.h index 808149f315763..d107abe1468fe 100644 --- a/arch/powerpc/include/asm/guest-state-buffer.h +++ b/arch/powerpc/include/asm/guest-state-buffer.h @@ -81,6 +81,7 @@ #define KVMPPC_GSID_HASHKEYR 0x1050 #define KVMPPC_GSID_HASHPKEYR 0x1051 #define KVMPPC_GSID_CTRL 0x1052 +#define KVMPPC_GSID_DPDES 0x1053 #define KVMPPC_GSID_CR 0x2000 #define KVMPPC_GSID_PIDR 0x2001 @@ -110,7 +111,7 @@ #define KVMPPC_GSE_META_COUNT (KVMPPC_GSE_META_END - KVMPPC_GSE_META_START + 1) #define KVMPPC_GSE_DW_REGS_START KVMPPC_GSID_GPR(0) -#define KVMPPC_GSE_DW_REGS_END KVMPPC_GSID_CTRL +#define KVMPPC_GSE_DW_REGS_END KVMPPC_GSID_DPDES #define KVMPPC_GSE_DW_REGS_COUNT \ (KVMPPC_GSE_DW_REGS_END - KVMPPC_GSE_DW_REGS_START + 1) diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 3e1e2a698c9e7..10618622d7efc 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -594,6 +594,7 @@ static inline u##size kvmppc_get_##reg(struct kvm_vcpu *vcpu) \ KVMPPC_BOOK3S_VCORE_ACCESSOR(vtb, 64, KVMPPC_GSID_VTB) +KVMPPC_BOOK3S_VCORE_ACCESSOR(dpdes, 64, KVMPPC_GSID_DPDES) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(arch_compat, 32, KVMPPC_GSID_LOGICAL_PVR) KVMPPC_BOOK3S_VCORE_ACCESSOR_GET(lpcr, 64, KVMPPC_GSID_LPCR) KVMPPC_BOOK3S_VCORE_ACCESSOR_SET(tb_offset, 64, KVMPPC_GSID_TB_OFFSET) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index daaf7faf21a5e..a4e51a31dd5a0 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4116,6 +4116,11 @@ static int kvmhv_vcpu_entry_nestedv2(struct kvm_vcpu *vcpu, u64 time_limit, int trap; long rc; + if (vcpu->arch.doorbell_request) { + vcpu->arch.doorbell_request = 0; + kvmppc_set_dpdes(vcpu, 1); + } + io = &vcpu->arch.nestedv2_io; msr = mfmsr(); @@ -4278,9 +4283,16 @@ static int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, if (kvmhv_on_pseries()) { if (kvmhv_is_nestedv1()) trap = kvmhv_vcpu_entry_p9_nested(vcpu, time_limit, lpcr, tb); - else + else { trap = kvmhv_vcpu_entry_nestedv2(vcpu, time_limit, lpcr, tb); + /* Remember doorbell if it is pending */ + if (kvmppc_get_dpdes(vcpu)) { + vcpu->arch.doorbell_request = 1; + kvmppc_set_dpdes(vcpu, 0); + } + } + /* H_CEDE has to be handled now, not later */ if (trap == BOOK3S_INTERRUPT_SYSCALL && !nested && kvmppc_get_gpr(vcpu, 3) == H_CEDE) { diff --git a/arch/powerpc/kvm/book3s_hv_nestedv2.c b/arch/powerpc/kvm/book3s_hv_nestedv2.c index 1091f7a83b255..342f583147709 100644 --- a/arch/powerpc/kvm/book3s_hv_nestedv2.c +++ b/arch/powerpc/kvm/book3s_hv_nestedv2.c @@ -311,6 +311,10 @@ static int gs_msg_ops_vcpu_fill_info(struct kvmppc_gs_buff *gsb, rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.vcore->vtb); break; + case KVMPPC_GSID_DPDES: + rc = kvmppc_gse_put_u64(gsb, iden, + vcpu->arch.vcore->dpdes); + break; case KVMPPC_GSID_LPCR: rc = kvmppc_gse_put_u64(gsb, iden, vcpu->arch.vcore->lpcr); @@ -543,6 +547,9 @@ static int gs_msg_ops_vcpu_refresh_info(struct kvmppc_gs_msg *gsm, case KVMPPC_GSID_VTB: vcpu->arch.vcore->vtb = kvmppc_gse_get_u64(gse); break; + case KVMPPC_GSID_DPDES: + vcpu->arch.vcore->dpdes = kvmppc_gse_get_u64(gse); + break; case KVMPPC_GSID_LPCR: vcpu->arch.vcore->lpcr = kvmppc_gse_get_u64(gse); break; diff --git a/arch/powerpc/kvm/test-guest-state-buffer.c b/arch/powerpc/kvm/test-guest-state-buffer.c index 4720b8dc88379..91ae660cfe21d 100644 --- a/arch/powerpc/kvm/test-guest-state-buffer.c +++ b/arch/powerpc/kvm/test-guest-state-buffer.c @@ -151,7 +151,7 @@ static void test_gs_bitmap(struct kunit *test) i++; } - for (u16 iden = KVMPPC_GSID_GPR(0); iden <= KVMPPC_GSID_CTRL; iden++) { + for (u16 iden = KVMPPC_GSID_GPR(0); iden <= KVMPPC_GSID_DPDES; iden++) { kvmppc_gsbm_set(&gsbm, iden); kvmppc_gsbm_set(&gsbm1, iden); KUNIT_EXPECT_TRUE(test, kvmppc_gsbm_test(&gsbm, iden));