When doing sampling without PEBS perf record -e cycles:u ...
On workloads that do a lot of kernel entry/exits we see kernel samples, even though :u is specified. This is due to skid existing. This is a security issue because it can leak kernel addresses even though kernel sampling support is disabled. The patch drops the kernel samples if exclude_kernel is specified. For example, test on Haswell desktop. perf record -e cycles:u <mgen> perf report --stdio Before patch applied: 99.77% mgen mgen [.] buf_read 0.20% mgen mgen [.] rand_buf_init 0.01% mgen [kernel.vmlinux] [k] apic_timer_interrupt 0.00% mgen mgen [.] last_free_elem 0.00% mgen libc-2.23.so [.] __random_r 0.00% mgen libc-2.23.so [.] _int_malloc 0.00% mgen mgen [.] rand_array_init 0.00% mgen [kernel.vmlinux] [k] page_fault 0.00% mgen libc-2.23.so [.] __random 0.00% mgen libc-2.23.so [.] __strcasestr 0.00% mgen ld-2.23.so [.] strcmp 0.00% mgen ld-2.23.so [.] _dl_start 0.00% mgen libc-2.23.so [.] sched_setaffinity@@GLIBC_2.3.4 0.00% mgen ld-2.23.so [.] _start We can see kernel symbols apic_timer_interrupt and page_fault. After patch applied: 99.79% mgen mgen [.] buf_read 0.19% mgen mgen [.] rand_buf_init 0.00% mgen libc-2.23.so [.] __random_r 0.00% mgen mgen [.] rand_array_init 0.00% mgen mgen [.] last_free_elem 0.00% mgen libc-2.23.so [.] vfprintf 0.00% mgen libc-2.23.so [.] rand 0.00% mgen libc-2.23.so [.] __random 0.00% mgen libc-2.23.so [.] _int_malloc 0.00% mgen libc-2.23.so [.] _IO_doallocbuf 0.00% mgen ld-2.23.so [.] do_lookup_x 0.00% mgen ld-2.23.so [.] open_verify.constprop.7 0.00% mgen ld-2.23.so [.] _dl_important_hwcaps 0.00% mgen libc-2.23.so [.] sched_setaffinity@@GLIBC_2.3.4 0.00% mgen ld-2.23.so [.] _start There are only userspace symbols. Signed-off-by: Jin Yao <yao....@linux.intel.com> --- arch/x86/events/core.c | 24 ++++++++++++++++++++++++ arch/x86/events/intel/core.c | 6 ++++++ arch/x86/events/perf_event.h | 2 ++ 3 files changed, 32 insertions(+) diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 580b60f..e6745e1 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -1463,6 +1463,12 @@ int x86_pmu_handle_irq(struct pt_regs *regs) if (!x86_perf_event_set_period(event)) continue; + /* + * For security, drop the skid kernel samples. + */ + if (skid_kernel_samples(event, regs)) + continue; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } @@ -1679,6 +1685,24 @@ ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, pmu_attr->event_str_noht); } +bool skid_kernel_samples(struct perf_event *event, struct pt_regs *regs) +{ + u64 ip; + + /* + * Without PEBS, we may get kernel samples even though + * exclude_kernel is specified due to skid in sampling. + */ + if ((event->attr.exclude_kernel) && + (event->attr.sample_type & PERF_SAMPLE_IP)) { + ip = perf_instruction_pointer(regs); + if (kernel_ip(ip)) + return true; + } + + return false; +} + EVENT_ATTR(cpu-cycles, CPU_CYCLES ); EVENT_ATTR(instructions, INSTRUCTIONS ); EVENT_ATTR(cache-references, CACHE_REFERENCES ); diff --git a/arch/x86/events/intel/core.c b/arch/x86/events/intel/core.c index a6d91d4..8e9c9e8 100644 --- a/arch/x86/events/intel/core.c +++ b/arch/x86/events/intel/core.c @@ -2193,6 +2193,12 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) if (has_branch_stack(event)) data.br_stack = &cpuc->lbr_stack; + /* + * For security, drop the skid kernel samples. + */ + if (skid_kernel_samples(event, regs)) + continue; + if (perf_event_overflow(event, &data, regs)) x86_pmu_stop(event, 0); } diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index be3d362..73fe023 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -818,6 +818,8 @@ ssize_t events_sysfs_show(struct device *dev, struct device_attribute *attr, ssize_t events_ht_sysfs_show(struct device *dev, struct device_attribute *attr, char *page); +bool skid_kernel_samples(struct perf_event *event, struct pt_regs *regs); + #ifdef CONFIG_CPU_SUP_AMD int amd_pmu_init(void); -- 2.7.4