On 05/05/20 15:41, Thomas Gleixner wrote: > Move the functions which are inside the RCU off region into the > non-instrumentable text section. > > Signed-off-by: Thomas Gleixner <t...@linutronix.de> > Cc: Paolo Bonzini <pbonz...@redhat.com> > Cc: Sean Christopherson <sean.j.christopher...@intel.com> > --- > arch/x86/include/asm/hardirq.h | 4 - > arch/x86/include/asm/kvm_host.h | 8 +++ > arch/x86/kvm/vmx/ops.h | 4 + > arch/x86/kvm/vmx/vmenter.S | 5 + > arch/x86/kvm/vmx/vmx.c | 105 > ++++++++++++++++++++++------------------ > arch/x86/kvm/x86.c | 2 > 6 files changed, 79 insertions(+), 49 deletions(-) > > --- a/arch/x86/include/asm/hardirq.h > +++ b/arch/x86/include/asm/hardirq.h > @@ -67,12 +67,12 @@ static inline void kvm_set_cpu_l1tf_flus > __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 1); > } > > -static inline void kvm_clear_cpu_l1tf_flush_l1d(void) > +static __always_inline void kvm_clear_cpu_l1tf_flush_l1d(void) > { > __this_cpu_write(irq_stat.kvm_cpu_l1tf_flush_l1d, 0); > } > > -static inline bool kvm_get_cpu_l1tf_flush_l1d(void) > +static __always_inline bool kvm_get_cpu_l1tf_flush_l1d(void) > { > return __this_cpu_read(irq_stat.kvm_cpu_l1tf_flush_l1d); > } > --- a/arch/x86/include/asm/kvm_host.h > +++ b/arch/x86/include/asm/kvm_host.h > @@ -1601,7 +1601,15 @@ asmlinkage void kvm_spurious_fault(void) > insn "\n\t" \ > "jmp 668f \n\t" \ > "667: \n\t" \ > + "1: \n\t" \ > + ".pushsection .discard.instr_begin \n\t" \ > + ".long 1b - . \n\t" \ > + ".popsection \n\t" \ > "call kvm_spurious_fault \n\t" \ > + "1: \n\t" \ > + ".pushsection .discard.instr_end \n\t" \ > + ".long 1b - . \n\t" \ > + ".popsection \n\t" \ > "668: \n\t" \ > _ASM_EXTABLE(666b, 667b) > > --- a/arch/x86/kvm/vmx/ops.h > +++ b/arch/x86/kvm/vmx/ops.h > @@ -146,7 +146,9 @@ do { > \ > : : op1 : "cc" : error, fault); \ > return; \ > error: > \ > + instr_begin(); \ > insn##_error(error_args); \ > + instr_end(); \ > return; \ > fault: > \ > kvm_spurious_fault(); \ > @@ -161,7 +163,9 @@ do { > \ > : : op1, op2 : "cc" : error, fault); \ > return; \ > error: > \ > + instr_begin(); \ > insn##_error(error_args); \ > + instr_end(); \ > return; \ > fault: > \ > kvm_spurious_fault(); \ > --- a/arch/x86/kvm/vmx/vmenter.S > +++ b/arch/x86/kvm/vmx/vmenter.S > @@ -27,7 +27,7 @@ > #define VCPU_R15 __VCPU_REGS_R15 * WORD_SIZE > #endif > > - .text > +.section .noinstr.text, "ax" > > /** > * vmx_vmenter - VM-Enter the current loaded VMCS > @@ -231,6 +231,9 @@ SYM_FUNC_START(__vmx_vcpu_run) > jmp 1b > SYM_FUNC_END(__vmx_vcpu_run) > > + > +.section .text, "ax" > + > /** > * vmread_error_trampoline - Trampoline from inline asm to vmread_error() > * @field: VMCS field encoding that failed > --- a/arch/x86/kvm/vmx/vmx.c > +++ b/arch/x86/kvm/vmx/vmx.c > @@ -6000,7 +6000,7 @@ static int vmx_handle_exit(struct kvm_vc > * information but as all relevant affected CPUs have 32KiB L1D cache size > * there is no point in doing so. > */ > -static void vmx_l1d_flush(struct kvm_vcpu *vcpu) > +static noinstr void vmx_l1d_flush(struct kvm_vcpu *vcpu) > { > int size = PAGE_SIZE << L1D_CACHE_ORDER; > > @@ -6033,7 +6033,7 @@ static void vmx_l1d_flush(struct kvm_vcp > vcpu->stat.l1d_flush++; > > if (static_cpu_has(X86_FEATURE_FLUSH_L1D)) { > - wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); > + native_wrmsrl(MSR_IA32_FLUSH_CMD, L1D_FLUSH); > return; > } > > @@ -6514,7 +6514,7 @@ static void vmx_update_hv_timer(struct k > } > } > > -void vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long host_rsp) > +void noinstr vmx_update_host_rsp(struct vcpu_vmx *vmx, unsigned long > host_rsp) > { > if (unlikely(host_rsp != vmx->loaded_vmcs->host_state.rsp)) { > vmx->loaded_vmcs->host_state.rsp = host_rsp; > @@ -6524,6 +6524,61 @@ void vmx_update_host_rsp(struct vcpu_vmx > > bool __vmx_vcpu_run(struct vcpu_vmx *vmx, unsigned long *regs, bool > launched); > > +static noinstr void vmx_vcpu_enter_exit(struct kvm_vcpu *vcpu, > + struct vcpu_vmx *vmx) > +{ > + instr_begin(); > + /* > + * VMENTER enables interrupts (host state), but the kernel state is > + * interrupts disabled when this is invoked. Also tell RCU about > + * it. This is the same logic as for exit_to_user_mode(). > + * > + * 1) Trace interrupts on state > + * 2) Prepare lockdep with RCU on > + * 3) Invoke context tracking if enabled to adjust RCU state > + * 4) Tell lockdep that interrupts are enabled > + */ > + trace_hardirqs_on_prepare(); > + lockdep_hardirqs_on_prepare(CALLER_ADDR0); > + instr_end(); > + > + guest_enter_irqoff(); > + lockdep_hardirqs_on(CALLER_ADDR0); > + > + /* L1D Flush includes CPU buffer clear to mitigate MDS */ > + if (static_branch_unlikely(&vmx_l1d_should_flush)) > + vmx_l1d_flush(vcpu); > + else if (static_branch_unlikely(&mds_user_clear)) > + mds_clear_cpu_buffers(); > + > + if (vcpu->arch.cr2 != read_cr2()) > + write_cr2(vcpu->arch.cr2); > + > + vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, > + vmx->loaded_vmcs->launched); > + > + vcpu->arch.cr2 = read_cr2(); > + > + /* > + * VMEXIT disables interrupts (host state), but tracing and lockdep > + * have them in state 'on'. Same as enter_from_user_mode(). > + * > + * 1) Tell lockdep that interrupts are disabled > + * 2) Invoke context tracking if enabled to reactivate RCU > + * 3) Trace interrupts off state > + * > + * This needs to be done before the below as native_read_msr() > + * contains a tracepoint and x86_spec_ctrl_restore_host() calls > + * into world and some more. > + */ > + lockdep_hardirqs_off(CALLER_ADDR0); > + guest_exit_irqoff(); > + > + instr_begin(); > + trace_hardirqs_off_prepare(); > + instr_end(); > +} > + > static void vmx_vcpu_run(struct kvm_vcpu *vcpu) > { > struct vcpu_vmx *vmx = to_vmx(vcpu); > @@ -6604,49 +6659,9 @@ static void vmx_vcpu_run(struct kvm_vcpu > x86_spec_ctrl_set_guest(vmx->spec_ctrl, 0); > > /* > - * VMENTER enables interrupts (host state), but the kernel state is > - * interrupts disabled when this is invoked. Also tell RCU about > - * it. This is the same logic as for exit_to_user_mode(). > - * > - * 1) Trace interrupts on state > - * 2) Prepare lockdep with RCU on > - * 3) Invoke context tracking if enabled to adjust RCU state > - * 4) Tell lockdep that interrupts are enabled > + * The actual VMENTER/EXIT is in the .noinstr.text section. > */ > - trace_hardirqs_on_prepare(); > - lockdep_hardirqs_on_prepare(CALLER_ADDR0); > - guest_enter_irqoff(); > - lockdep_hardirqs_on(CALLER_ADDR0); > - > - /* L1D Flush includes CPU buffer clear to mitigate MDS */ > - if (static_branch_unlikely(&vmx_l1d_should_flush)) > - vmx_l1d_flush(vcpu); > - else if (static_branch_unlikely(&mds_user_clear)) > - mds_clear_cpu_buffers(); > - > - if (vcpu->arch.cr2 != read_cr2()) > - write_cr2(vcpu->arch.cr2); > - > - vmx->fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs, > - vmx->loaded_vmcs->launched); > - > - vcpu->arch.cr2 = read_cr2(); > - > - /* > - * VMEXIT disables interrupts (host state), but tracing and lockdep > - * have them in state 'on'. Same as enter_from_user_mode(). > - * > - * 1) Tell lockdep that interrupts are disabled > - * 2) Invoke context tracking if enabled to reactivate RCU > - * 3) Trace interrupts off state > - * > - * This needs to be done before the below as native_read_msr() > - * contains a tracepoint and x86_spec_ctrl_restore_host() calls > - * into world and some more. > - */ > - lockdep_hardirqs_off(CALLER_ADDR0); > - guest_exit_irqoff(); > - trace_hardirqs_off_prepare(); > + vmx_vcpu_enter_exit(vcpu, vmx); > > /* > * We do not use IBRS in the kernel. If this vCPU has used the > --- a/arch/x86/kvm/x86.c > +++ b/arch/x86/kvm/x86.c > @@ -381,7 +381,7 @@ int kvm_set_apic_base(struct kvm_vcpu *v > } > EXPORT_SYMBOL_GPL(kvm_set_apic_base); > > -asmlinkage __visible void kvm_spurious_fault(void) > +asmlinkage __visible noinstr void kvm_spurious_fault(void) > { > /* Fault while not rebooting. We want the trace. */ > BUG_ON(!kvm_rebooting); >
Acked-by: Paolo Bonzini <pbonz...@redhat.com>