From a986fa57fd81a1430e00b3c6cf8a325d6f894a63 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 14 Jun 2024 22:29:10 +1000 Subject: [PATCH 1/2] KVM: PPC: Book3S HV: Prevent UAF in kvm_spapr_tce_attach_iommu_group() Al reported a possible use-after-free (UAF) in kvm_spapr_tce_attach_iommu_group(). It looks up `stt` from tablefd, but then continues to use it after doing fdput() on the returned fd. After the fdput() the tablefd is free to be closed by another thread. The close calls kvm_spapr_tce_release() and then release_spapr_tce_table() (via call_rcu()) which frees `stt`. Although there are calls to rcu_read_lock() in kvm_spapr_tce_attach_iommu_group() they are not sufficient to prevent the UAF, because `stt` is used outside the locked regions. With an artifcial delay after the fdput() and a userspace program which triggers the race, KASAN detects the UAF: BUG: KASAN: slab-use-after-free in kvm_spapr_tce_attach_iommu_group+0x298/0x720 [kvm] Read of size 4 at addr c000200027552c30 by task kvm-vfio/2505 CPU: 54 PID: 2505 Comm: kvm-vfio Not tainted 6.10.0-rc3-next-20240612-dirty #1 Hardware name: 8335-GTH POWER9 0x4e1202 opal:skiboot-v6.5.3-35-g1851b2a06 PowerNV Call Trace: dump_stack_lvl+0xb4/0x108 (unreliable) print_report+0x2b4/0x6ec kasan_report+0x118/0x2b0 __asan_load4+0xb8/0xd0 kvm_spapr_tce_attach_iommu_group+0x298/0x720 [kvm] kvm_vfio_set_attr+0x524/0xac0 [kvm] kvm_device_ioctl+0x144/0x240 [kvm] sys_ioctl+0x62c/0x1810 system_call_exception+0x190/0x440 system_call_vectored_common+0x15c/0x2ec ... Freed by task 0: ... kfree+0xec/0x3e0 release_spapr_tce_table+0xd4/0x11c [kvm] rcu_core+0x568/0x16a0 handle_softirqs+0x23c/0x920 do_softirq_own_stack+0x6c/0x90 do_softirq_own_stack+0x58/0x90 __irq_exit_rcu+0x218/0x2d0 irq_exit+0x30/0x80 arch_local_irq_restore+0x128/0x230 arch_local_irq_enable+0x1c/0x30 cpuidle_enter_state+0x134/0x5cc cpuidle_enter+0x6c/0xb0 call_cpuidle+0x7c/0x100 do_idle+0x394/0x410 cpu_startup_entry+0x60/0x70 start_secondary+0x3fc/0x410 start_secondary_prolog+0x10/0x14 Fix it by delaying the fdput() until `stt` is no longer in use, which is effectively the entire function. To keep the patch minimal add a call to fdput() at each of the existing return paths. Future work can convert the function to goto or __cleanup style cleanup. With the fix in place the test case no longer triggers the UAF. Reported-by: Al Viro Closes: https://lore.kernel.org/all/20240610024437.GA1464458@ZenIV/ Signed-off-by: Michael Ellerman Link: https://msgid.link/20240614122910.3499489-1-mpe@ellerman.id.au --- arch/powerpc/kvm/book3s_64_vio.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index b569ebaa590e2..3ff3de9a52acf 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -130,14 +130,16 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, } rcu_read_unlock(); - fdput(f); - - if (!found) + if (!found) { + fdput(f); return -EINVAL; + } table_group = iommu_group_get_iommudata(grp); - if (WARN_ON(!table_group)) + if (WARN_ON(!table_group)) { + fdput(f); return -EFAULT; + } for (i = 0; i < IOMMU_TABLE_GROUP_MAX_TABLES; ++i) { struct iommu_table *tbltmp = table_group->tables[i]; @@ -158,8 +160,10 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, break; } } - if (!tbl) + if (!tbl) { + fdput(f); return -EINVAL; + } rcu_read_lock(); list_for_each_entry_rcu(stit, &stt->iommu_tables, next) { @@ -170,6 +174,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, /* stit is being destroyed */ iommu_tce_table_put(tbl); rcu_read_unlock(); + fdput(f); return -ENOTTY; } /* @@ -177,6 +182,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, * its KVM reference counter and can return. */ rcu_read_unlock(); + fdput(f); return 0; } rcu_read_unlock(); @@ -184,6 +190,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, stit = kzalloc(sizeof(*stit), GFP_KERNEL); if (!stit) { iommu_tce_table_put(tbl); + fdput(f); return -ENOMEM; } @@ -192,6 +199,7 @@ long kvm_spapr_tce_attach_iommu_group(struct kvm *kvm, int tablefd, list_add_rcu(&stit->next, &stt->iommu_tables); + fdput(f); return 0; } From a10f9aeaa95c29b83caf3bc0aefd6164f384df23 Mon Sep 17 00:00:00 2001 From: Anjali K Date: Fri, 14 Jun 2024 23:08:44 +0530 Subject: [PATCH 2/2] powerpc/pseries: Whitelist dtl slub object for copying to userspace Reading the dispatch trace log from /sys/kernel/debug/powerpc/dtl/cpu-* results in a BUG() when the config CONFIG_HARDENED_USERCOPY is enabled as shown below. kernel BUG at mm/usercopy.c:102! Oops: Exception in kernel mode, sig: 5 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: xfs libcrc32c dm_service_time sd_mod t10_pi sg ibmvfc scsi_transport_fc ibmveth pseries_wdt dm_multipath dm_mirror dm_region_hash dm_log dm_mod fuse CPU: 27 PID: 1815 Comm: python3 Not tainted 6.10.0-rc3 #85 Hardware name: IBM,9040-MRX POWER10 (raw) 0x800200 0xf000006 of:IBM,FW1060.00 (NM1060_042) hv:phyp pSeries NIP: c0000000005d23d4 LR: c0000000005d23d0 CTR: 00000000006ee6f8 REGS: c000000120c078c0 TRAP: 0700 Not tainted (6.10.0-rc3) MSR: 8000000000029033 CR: 2828220f XER: 0000000e CFAR: c0000000001fdc80 IRQMASK: 0 [ ... GPRs omitted ... ] NIP [c0000000005d23d4] usercopy_abort+0x78/0xb0 LR [c0000000005d23d0] usercopy_abort+0x74/0xb0 Call Trace: usercopy_abort+0x74/0xb0 (unreliable) __check_heap_object+0xf8/0x120 check_heap_object+0x218/0x240 __check_object_size+0x84/0x1a4 dtl_file_read+0x17c/0x2c4 full_proxy_read+0x8c/0x110 vfs_read+0xdc/0x3a0 ksys_read+0x84/0x144 system_call_exception+0x124/0x330 system_call_vectored_common+0x15c/0x2ec --- interrupt: 3000 at 0x7fff81f3ab34 Commit 6d07d1cd300f ("usercopy: Restrict non-usercopy caches to size 0") requires that only whitelisted areas in slab/slub objects can be copied to userspace when usercopy hardening is enabled using CONFIG_HARDENED_USERCOPY. Dtl contains hypervisor dispatch events which are expected to be read by privileged users. Hence mark this safe for user access. Specify useroffset=0 and usersize=DISPATCH_LOG_BYTES to whitelist the entire object. Co-developed-by: Vishal Chourasia Signed-off-by: Vishal Chourasia Signed-off-by: Anjali K Reviewed-by: Srikar Dronamraju Signed-off-by: Michael Ellerman Link: https://msgid.link/20240614173844.746818-1-anjalik@linux.ibm.com --- arch/powerpc/platforms/pseries/setup.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c index 284a6fa04b0c2..cba40d9d12848 100644 --- a/arch/powerpc/platforms/pseries/setup.c +++ b/arch/powerpc/platforms/pseries/setup.c @@ -343,8 +343,8 @@ static int alloc_dispatch_log_kmem_cache(void) { void (*ctor)(void *) = get_dtl_cache_ctor(); - dtl_cache = kmem_cache_create("dtl", DISPATCH_LOG_BYTES, - DISPATCH_LOG_BYTES, 0, ctor); + dtl_cache = kmem_cache_create_usercopy("dtl", DISPATCH_LOG_BYTES, + DISPATCH_LOG_BYTES, 0, 0, DISPATCH_LOG_BYTES, ctor); if (!dtl_cache) { pr_warn("Failed to create dispatch trace log buffer cache\n"); pr_warn("Stolen time statistics will be unreliable\n");