Protect perf_guest_cbs with READ_ONCE/WRITE_ONCE to ensure it's not
reloaded between a !NULL check and a dereference, and wait for all
readers via syncrhonize_rcu() to prevent use-after-free, e.g. if the
callbacks are being unregistered during module unload. Because the
callbacks are global, it's possible for readers to run in parallel with
an unregister operation.
The bug has escaped notice because all dereferences of perf_guest_cbs
follow the same "perf_guest_cbs && perf_guest_cbs->is_in_guest()" pattern,
and it's extremely unlikely a compiler will reload perf_guest_cbs in this
sequence. Compilers do reload perf_guest_cbs for future derefs, e.g. for
->is_user_mode(), but the ->is_in_guest() guard all but guarantees the
PMI handler will win the race, e.g. to nullify perf_guest_cbs, KVM has to
completely exit the guest and teardown down all VMs before KVM start its
module unload / unregister sequence.
But with help, unloading kvm_intel can trigger a NULL pointer derference,
e.g. wrapping perf_guest_cbs with READ_ONCE in perf_misc_flags() while
spamming kvm_intel module load/unload leads to:
BUG: kernel NULL pointer dereference, address:
#PF: supervisor read access in kernel mode
#PF: error_code(0x) - not-present page
PGD 0 P4D 0
Oops: [#1] PREEMPT SMP
CPU: 6 PID: 1825 Comm: stress Not tainted 5.14.0-rc2+ #459
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 0.0.0 02/06/2015
RIP: 0010:perf_misc_flags+0x1c/0x70
Call Trace:
perf_prepare_sample+0x53/0x6b0
perf_event_output_forward+0x67/0x160
__perf_event_overflow+0x52/0xf0
handle_pmi_common+0x207/0x300
intel_pmu_handle_irq+0xcf/0x410
perf_event_nmi_handler+0x28/0x50
nmi_handle+0xc7/0x260
default_do_nmi+0x6b/0x170
exc_nmi+0x103/0x130
asm_exc_nmi+0x76/0xbf
Fixes: 39447b386c84 ("perf: Enhance perf to allow for guest statistic
collection from host")
Cc: sta...@vger.kernel.org
Signed-off-by: Sean Christopherson
---
arch/arm/kernel/perf_callchain.c | 17 +++--
arch/arm64/kernel/perf_callchain.c | 18 --
arch/csky/kernel/perf_callchain.c | 6 --
arch/nds32/kernel/perf_event_cpu.c | 17 +++--
arch/riscv/kernel/perf_callchain.c | 7 +--
arch/x86/events/core.c | 17 +++--
arch/x86/events/intel/core.c | 9 ++---
include/linux/perf_event.h | 8
kernel/events/core.c | 9 +++--
9 files changed, 75 insertions(+), 33 deletions(-)
diff --git a/arch/arm/kernel/perf_callchain.c b/arch/arm/kernel/perf_callchain.c
index 3b69a76d341e..1626dfc6f6ce 100644
--- a/arch/arm/kernel/perf_callchain.c
+++ b/arch/arm/kernel/perf_callchain.c
@@ -62,9 +62,10 @@ user_backtrace(struct frame_tail __user *tail,
void
perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs
*regs)
{
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
struct frame_tail __user *tail;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (guest_cbs && guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
@@ -98,9 +99,10 @@ callchain_trace(struct stackframe *fr,
void
perf_callchain_kernel(struct perf_callchain_entry_ctx *entry, struct pt_regs
*regs)
{
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
struct stackframe fr;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
+ if (guest_cbs && guest_cbs->is_in_guest()) {
/* We don't support guest os callchain now */
return;
}
@@ -111,18 +113,21 @@ perf_callchain_kernel(struct perf_callchain_entry_ctx
*entry, struct pt_regs *re
unsigned long perf_instruction_pointer(struct pt_regs *regs)
{
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest())
- return perf_guest_cbs->get_guest_ip();
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
+
+ if (guest_cbs && guest_cbs->is_in_guest())
+ return guest_cbs->get_guest_ip();
return instruction_pointer(regs);
}
unsigned long perf_misc_flags(struct pt_regs *regs)
{
+ struct perf_guest_info_callbacks *guest_cbs = perf_get_guest_cbs();
int misc = 0;
- if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) {
- if (perf_guest_cbs->is_user_mode())
+ if (guest_cbs && guest_cbs->is_in_guest()) {
+ if (guest_cbs->is_user_mode())
misc |= PERF_RECORD_MISC_GUEST_USER;
else
misc |= PERF_RECORD_MISC_GUEST_KERNEL;
diff --git a/arch/arm64/kernel/perf_callchain.c
b/arch/arm64/kernel/perf_callchain.c
index 4a72c2727309..86d9f2013172 100644
--- a/arch/arm64/kernel/perf_callchain.c
+++ b/arch/arm64/kernel/perf_callchain.c
@@ -102,7 +102,9 @@ compat_user_backtrace(struct compat_frame_tail __user *tail,