Some versions of Intel PT do not support tracing across VMXON, more
specifically, VMXON will clear TraceEn control bit and any attempt to
set it before VMXOFF will throw a #GP, which in the current state of
things will crash the kernel. Namely,

$ perf record -e intel_pt// kvm -nographic

on such a machine will kill it.

To avoid this, notify the intel_pt driver before VMXON and after
VMXOFF so that it knows when not to enable itself.

Signed-off-by: Alexander Shishkin <alexander.shish...@linux.intel.com>
---
 arch/x86/events/intel/pt.c        | 71 +++++++++++++++++++++++++++++++++------
 arch/x86/events/intel/pt.h        |  2 ++
 arch/x86/include/asm/perf_event.h |  4 +++
 arch/x86/kvm/vmx.c                |  4 +++
 4 files changed, 70 insertions(+), 11 deletions(-)

diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c
index 127f58c179..a16f2ebc12 100644
--- a/arch/x86/events/intel/pt.c
+++ b/arch/x86/events/intel/pt.c
@@ -136,9 +136,17 @@ static int __init pt_pmu_hw_init(void)
        struct dev_ext_attribute *de_attrs;
        struct attribute **attrs;
        size_t size;
+       u64 reg;
        int ret;
        long i;
 
+       if (test_cpu_cap(&boot_cpu_data, X86_FEATURE_VMX)) {
+               /* Intel SDM, 36.5 "Tracing post-VMXON" */
+               rdmsrl(MSR_IA32_VMX_MISC, reg);
+               if (reg & BIT(14))
+                       pt_pmu.vmx = true;
+       }
+
        attrs = NULL;
 
        for (i = 0; i < PT_CPUID_LEAVES; i++) {
@@ -269,20 +277,23 @@ static void pt_config(struct perf_event *event)
 
        reg |= (event->attr.config & PT_CONFIG_MASK);
 
+       event->hw.config = reg;
        wrmsrl(MSR_IA32_RTIT_CTL, reg);
 }
 
-static void pt_config_start(bool start)
+static void pt_config_stop(struct perf_event *event)
 {
-       u64 ctl;
+       u64 ctl = READ_ONCE(event->hw.config);
 
-       rdmsrl(MSR_IA32_RTIT_CTL, ctl);
-       if (start)
-               ctl |= RTIT_CTL_TRACEEN;
-       else
-               ctl &= ~RTIT_CTL_TRACEEN;
+       /* may be already stopped by a PMI*/
+       if (!(ctl & RTIT_CTL_TRACEEN))
+               return;
+
+       ctl ^= RTIT_CTL_TRACEEN;
        wrmsrl(MSR_IA32_RTIT_CTL, ctl);
 
+       WRITE_ONCE(event->hw.config, ctl);
+
        /*
         * A wrmsr that disables trace generation serializes other PT
         * registers and causes all data packets to be written to memory,
@@ -291,8 +302,7 @@ static void pt_config_start(bool start)
         * The below WMB, separating data store and aux_head store matches
         * the consumer's RMB that separates aux_head load and data load.
         */
-       if (!start)
-               wmb();
+       wmb();
 }
 
 static void pt_config_buffer(void *buf, unsigned int topa_idx,
@@ -922,11 +932,17 @@ void intel_pt_interrupt(void)
        if (!ACCESS_ONCE(pt->handle_nmi))
                return;
 
-       pt_config_start(false);
+       /*
+        * If VMX is on and PT does not support it, don't touch anything.
+        */
+       if (ACCESS_ONCE(pt->vmx_on))
+               return;
 
        if (!event)
                return;
 
+       pt_config_stop(event);
+
        buf = perf_get_aux(&pt->handle);
        if (!buf)
                return;
@@ -963,6 +979,35 @@ void intel_pt_interrupt(void)
        }
 }
 
+void intel_pt_vmxon(int entry)
+{
+       struct pt *pt = this_cpu_ptr(&pt_ctx);
+       struct perf_event *event;
+       unsigned long flags;
+
+       /* PT plays nice with VMX, do nothing */
+       if (pt_pmu.vmx)
+               return;
+
+       /*
+        * VMX entry will clear RTIT_CTL.TraceEn; we need to make
+        * sure to not try to set it while VMX is on. Disable
+        * interrupts to avoid racing with pmu callbacks;
+        * concurrent PMI should be handled fine.
+        */
+       local_irq_save(flags);
+       WRITE_ONCE(pt->vmx_on, entry);
+
+       if (entry) {
+               /* prevent pt_config_stop() from writing RTIT_CTL */
+               event = pt->handle.event;
+               if (event)
+                       event->hw.config = 0;
+       }
+       local_irq_restore(flags);
+}
+EXPORT_SYMBOL_GPL(intel_pt_vmxon);
+
 /*
  * PMU callbacks
  */
@@ -973,6 +1018,9 @@ static void pt_event_start(struct perf_event *event, int 
mode)
        struct pt *pt = this_cpu_ptr(&pt_ctx);
        struct pt_buffer *buf;
 
+       if (ACCESS_ONCE(pt->vmx_on))
+               return;
+
        buf = perf_aux_output_begin(&pt->handle, event);
        if (!buf)
                goto fail_stop;
@@ -1007,7 +1055,8 @@ static void pt_event_stop(struct perf_event *event, int 
mode)
         * see comment in intel_pt_interrupt().
         */
        ACCESS_ONCE(pt->handle_nmi) = 0;
-       pt_config_start(false);
+
+       pt_config_stop(event);
 
        if (event->hw.state == PERF_HES_STOPPED)
                return;
diff --git a/arch/x86/events/intel/pt.h b/arch/x86/events/intel/pt.h
index 336878a5d2..b0731630cd 100644
--- a/arch/x86/events/intel/pt.h
+++ b/arch/x86/events/intel/pt.h
@@ -65,6 +65,7 @@ enum pt_capabilities {
 struct pt_pmu {
        struct pmu              pmu;
        u32                     caps[PT_CPUID_REGS_NUM * PT_CPUID_LEAVES];
+       bool                    vmx;
 };
 
 /**
@@ -111,6 +112,7 @@ struct pt_buffer {
 struct pt {
        struct perf_output_handle handle;
        int                     handle_nmi;
+       int                     vmx_on;
 };
 
 #endif /* __INTEL_PT_H__ */
diff --git a/arch/x86/include/asm/perf_event.h 
b/arch/x86/include/asm/perf_event.h
index 5a2ed3ed2f..8d34c39982 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -285,6 +285,10 @@ static inline void perf_events_lapic_init(void)    { }
 static inline void perf_check_microcode(void) { }
 #endif
 
+#ifdef CONFIG_CPU_SUP_INTEL
+ extern void intel_pt_vmxon(int entry);
+#endif
+
 #if defined(CONFIG_PERF_EVENTS) && defined(CONFIG_CPU_SUP_AMD)
  extern void amd_pmu_enable_virt(void);
  extern void amd_pmu_disable_virt(void);
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1735ae9d68..744ea43b79 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3075,6 +3075,8 @@ static __init int vmx_disabled_by_bios(void)
 
 static void kvm_cpu_vmxon(u64 addr)
 {
+       intel_pt_vmxon(1);
+
        asm volatile (ASM_VMX_VMXON_RAX
                        : : "a"(&addr), "m"(addr)
                        : "memory", "cc");
@@ -3144,6 +3146,8 @@ static void vmclear_local_loaded_vmcss(void)
 static void kvm_cpu_vmxoff(void)
 {
        asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
+
+       intel_pt_vmxon(0);
 }
 
 static void hardware_disable(void)
-- 
2.8.0.rc3

Reply via email to