Re: [PATCH] KVM: x86: remove code for lazy FPU handling

Paolo Bonzini Fri, 17 Feb 2017 01:38:30 -0800


On 17/02/2017 01:45, Bandan Das wrote:
> Paolo Bonzini <pbonz...@redhat.com> writes:
> 
>> The FPU is always active now when running KVM.
> 
> The lazy code was a performance optimization, correct ?
> Is this just dormant code and being removed ? Maybe
> mentioning the reasoning in a little more detail is a good
> idea.


Lazy FPU support was removed completely from arch/x86.  Apparently,
things such as SSE-optimized mem* and str* functions made it much less
useful.  At this point the KVM code is unnecessary too.

Paolo

> The removal itself looks clean. I was really hoping that you
> would have forgotten removing "fpu_active" from struct kvm_vcpu()
> but you hadn't ;)
> 
> Bandan
> 
>> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com>
>> ---
>>  arch/x86/include/asm/kvm_host.h |   3 --
>>  arch/x86/kvm/cpuid.c            |   2 -
>>  arch/x86/kvm/svm.c              |  43 ++-------------
>>  arch/x86/kvm/vmx.c              | 112 
>> ++++++----------------------------------
>>  arch/x86/kvm/x86.c              |   7 +--
>>  include/linux/kvm_host.h        |   1 -
>>  6 files changed, 19 insertions(+), 149 deletions(-)
>>
>> diff --git a/arch/x86/include/asm/kvm_host.h 
>> b/arch/x86/include/asm/kvm_host.h
>> index e4f13e714bcf..74ef58c8ff53 100644
>> --- a/arch/x86/include/asm/kvm_host.h
>> +++ b/arch/x86/include/asm/kvm_host.h
>> @@ -55,7 +55,6 @@
>>  #define KVM_REQ_TRIPLE_FAULT      10
>>  #define KVM_REQ_MMU_SYNC          11
>>  #define KVM_REQ_CLOCK_UPDATE      12
>> -#define KVM_REQ_DEACTIVATE_FPU    13
>>  #define KVM_REQ_EVENT             14
>>  #define KVM_REQ_APF_HALT          15
>>  #define KVM_REQ_STEAL_UPDATE      16
>> @@ -936,8 +935,6 @@ struct kvm_x86_ops {
>>      unsigned long (*get_rflags)(struct kvm_vcpu *vcpu);
>>      void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags);
>>      u32 (*get_pkru)(struct kvm_vcpu *vcpu);
>> -    void (*fpu_activate)(struct kvm_vcpu *vcpu);
>> -    void (*fpu_deactivate)(struct kvm_vcpu *vcpu);
>>  
>>      void (*tlb_flush)(struct kvm_vcpu *vcpu);
>>  
>> diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
>> index c0e2036217ad..1d155cc56629 100644
>> --- a/arch/x86/kvm/cpuid.c
>> +++ b/arch/x86/kvm/cpuid.c
>> @@ -123,8 +123,6 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu)
>>      if (best && (best->eax & (F(XSAVES) | F(XSAVEC))))
>>              best->ebx = xstate_required_size(vcpu->arch.xcr0, true);
>>  
>> -    kvm_x86_ops->fpu_activate(vcpu);
>> -
>>      /*
>>       * The existing code assumes virtual address is 48-bit in the canonical
>>       * address checks; exit if it is ever changed.
>> diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
>> index 4e5905a1ce70..d1efe2c62b3f 100644
>> --- a/arch/x86/kvm/svm.c
>> +++ b/arch/x86/kvm/svm.c
>> @@ -1157,7 +1157,6 @@ static void init_vmcb(struct vcpu_svm *svm)
>>      struct vmcb_control_area *control = &svm->vmcb->control;
>>      struct vmcb_save_area *save = &svm->vmcb->save;
>>  
>> -    svm->vcpu.fpu_active = 1;
>>      svm->vcpu.arch.hflags = 0;
>>  
>>      set_cr_intercept(svm, INTERCEPT_CR0_READ);
>> @@ -1899,15 +1898,12 @@ static void update_cr0_intercept(struct vcpu_svm 
>> *svm)
>>      ulong gcr0 = svm->vcpu.arch.cr0;
>>      u64 *hcr0 = &svm->vmcb->save.cr0;
>>  
>> -    if (!svm->vcpu.fpu_active)
>> -            *hcr0 |= SVM_CR0_SELECTIVE_MASK;
>> -    else
>> -            *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
>> -                    | (gcr0 & SVM_CR0_SELECTIVE_MASK);
>> +    *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK)
>> +            | (gcr0 & SVM_CR0_SELECTIVE_MASK);
>>  
>>      mark_dirty(svm->vmcb, VMCB_CR);
>>  
>> -    if (gcr0 == *hcr0 && svm->vcpu.fpu_active) {
>> +    if (gcr0 == *hcr0) {
>>              clr_cr_intercept(svm, INTERCEPT_CR0_READ);
>>              clr_cr_intercept(svm, INTERCEPT_CR0_WRITE);
>>      } else {
>> @@ -1938,8 +1934,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, 
>> unsigned long cr0)
>>      if (!npt_enabled)
>>              cr0 |= X86_CR0_PG | X86_CR0_WP;
>>  
>> -    if (!vcpu->fpu_active)
>> -            cr0 |= X86_CR0_TS;
>>      /*
>>       * re-enable caching here because the QEMU bios
>>       * does not do it - this results in some delay at
>> @@ -2158,22 +2152,6 @@ static int ac_interception(struct vcpu_svm *svm)
>>      return 1;
>>  }
>>  
>> -static void svm_fpu_activate(struct kvm_vcpu *vcpu)
>> -{
>> -    struct vcpu_svm *svm = to_svm(vcpu);
>> -
>> -    clr_exception_intercept(svm, NM_VECTOR);
>> -
>> -    svm->vcpu.fpu_active = 1;
>> -    update_cr0_intercept(svm);
>> -}
>> -
>> -static int nm_interception(struct vcpu_svm *svm)
>> -{
>> -    svm_fpu_activate(&svm->vcpu);
>> -    return 1;
>> -}
>> -
>>  static bool is_erratum_383(void)
>>  {
>>      int err, i;
>> @@ -2571,9 +2549,6 @@ static int nested_svm_exit_special(struct vcpu_svm 
>> *svm)
>>              if (!npt_enabled && svm->apf_reason == 0)
>>                      return NESTED_EXIT_HOST;
>>              break;
>> -    case SVM_EXIT_EXCP_BASE + NM_VECTOR:
>> -            nm_interception(svm);
>> -            break;
>>      default:
>>              break;
>>      }
>> @@ -4018,7 +3993,6 @@ static int (*const svm_exit_handlers[])(struct 
>> vcpu_svm *svm) = {
>>      [SVM_EXIT_EXCP_BASE + BP_VECTOR]        = bp_interception,
>>      [SVM_EXIT_EXCP_BASE + UD_VECTOR]        = ud_interception,
>>      [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
>> -    [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
>>      [SVM_EXIT_EXCP_BASE + MC_VECTOR]        = mc_interception,
>>      [SVM_EXIT_EXCP_BASE + AC_VECTOR]        = ac_interception,
>>      [SVM_EXIT_INTR]                         = intr_interception,
>> @@ -5072,14 +5046,6 @@ static bool svm_has_wbinvd_exit(void)
>>      return true;
>>  }
>>  
>> -static void svm_fpu_deactivate(struct kvm_vcpu *vcpu)
>> -{
>> -    struct vcpu_svm *svm = to_svm(vcpu);
>> -
>> -    set_exception_intercept(svm, NM_VECTOR);
>> -    update_cr0_intercept(svm);
>> -}
>> -
>>  #define PRE_EX(exit)  { .exit_code = (exit), \
>>                      .stage = X86_ICPT_PRE_EXCEPT, }
>>  #define POST_EX(exit) { .exit_code = (exit), \
>> @@ -5340,9 +5306,6 @@ static inline void avic_post_state_restore(struct 
>> kvm_vcpu *vcpu)
>>  
>>      .get_pkru = svm_get_pkru,
>>  
>> -    .fpu_activate = svm_fpu_activate,
>> -    .fpu_deactivate = svm_fpu_deactivate,
>> -
>>      .tlb_flush = svm_flush_tlb,
>>  
>>      .run = svm_vcpu_run,
>> diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
>> index 0e0b5d09597e..9856b73a21ad 100644
>> --- a/arch/x86/kvm/vmx.c
>> +++ b/arch/x86/kvm/vmx.c
>> @@ -1856,7 +1856,7 @@ static void update_exception_bitmap(struct kvm_vcpu 
>> *vcpu)
>>      u32 eb;
>>  
>>      eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) |
>> -         (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR);
>> +         (1u << DB_VECTOR) | (1u << AC_VECTOR);
>>      if ((vcpu->guest_debug &
>>           (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) ==
>>          (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP))
>> @@ -1865,8 +1865,6 @@ static void update_exception_bitmap(struct kvm_vcpu 
>> *vcpu)
>>              eb = ~0;
>>      if (enable_ept)
>>              eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
>> -    if (vcpu->fpu_active)
>> -            eb &= ~(1u << NM_VECTOR);
>>  
>>      /* When we are running a nested L2 guest and L1 specified for it a
>>       * certain exception bitmap, we must trap the same exceptions and pass
>> @@ -2340,25 +2338,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu)
>>      }
>>  }
>>  
>> -static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
>> -{
>> -    ulong cr0;
>> -
>> -    if (vcpu->fpu_active)
>> -            return;
>> -    vcpu->fpu_active = 1;
>> -    cr0 = vmcs_readl(GUEST_CR0);
>> -    cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
>> -    cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
>> -    vmcs_writel(GUEST_CR0, cr0);
>> -    update_exception_bitmap(vcpu);
>> -    vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
>> -    if (is_guest_mode(vcpu))
>> -            vcpu->arch.cr0_guest_owned_bits &=
>> -                    ~get_vmcs12(vcpu)->cr0_guest_host_mask;
>> -    vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
>> -}
>> -
>>  static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu);
>>  
>>  /*
>> @@ -2377,33 +2356,6 @@ static inline unsigned long nested_read_cr4(struct 
>> vmcs12 *fields)
>>              (fields->cr4_read_shadow & fields->cr4_guest_host_mask);
>>  }
>>  
>> -static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
>> -{
>> -    /* Note that there is no vcpu->fpu_active = 0 here. The caller must
>> -     * set this *before* calling this function.
>> -     */
>> -    vmx_decache_cr0_guest_bits(vcpu);
>> -    vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
>> -    update_exception_bitmap(vcpu);
>> -    vcpu->arch.cr0_guest_owned_bits = 0;
>> -    vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
>> -    if (is_guest_mode(vcpu)) {
>> -            /*
>> -             * L1's specified read shadow might not contain the TS bit,
>> -             * so now that we turned on shadowing of this bit, we need to
>> -             * set this bit of the shadow. Like in nested_vmx_run we need
>> -             * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet
>> -             * up-to-date here because we just decached cr0.TS (and we'll
>> -             * only update vmcs12->guest_cr0 on nested exit).
>> -             */
>> -            struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
>> -            vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) |
>> -                    (vcpu->arch.cr0 & X86_CR0_TS);
>> -            vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
>> -    } else
>> -            vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
>> -}
>> -
>>  static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
>>  {
>>      unsigned long rflags, save_rflags;
>> @@ -4232,9 +4184,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, 
>> unsigned long cr0)
>>      if (enable_ept)
>>              ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu);
>>  
>> -    if (!vcpu->fpu_active)
>> -            hw_cr0 |= X86_CR0_TS | X86_CR0_MP;
>> -
>>      vmcs_writel(CR0_READ_SHADOW, cr0);
>>      vmcs_writel(GUEST_CR0, hw_cr0);
>>      vcpu->arch.cr0 = cr0;
>> @@ -5321,7 +5270,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx)
>>      /* 22.2.1, 20.8.1 */
>>      vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl);
>>  
>> -    vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL);
>> +    vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS;
>> +    vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS);
>> +
>>      set_cr4_guest_host_mask(vmx);
>>  
>>      if (vmx_xsaves_supported())
>> @@ -5425,7 +5376,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool 
>> init_event)
>>      vmx_set_cr0(vcpu, cr0); /* enter rmode */
>>      vmx_set_cr4(vcpu, 0);
>>      vmx_set_efer(vcpu, 0);
>> -    vmx_fpu_activate(vcpu);
>> +
>>      update_exception_bitmap(vcpu);
>>  
>>      vpid_sync_context(vmx->vpid);
>> @@ -5698,11 +5649,6 @@ static int handle_exception(struct kvm_vcpu *vcpu)
>>      if (is_nmi(intr_info))
>>              return 1;  /* already handled by vmx_vcpu_run() */
>>  
>> -    if (is_no_device(intr_info)) {
>> -            vmx_fpu_activate(vcpu);
>> -            return 1;
>> -    }
>> -
>>      if (is_invalid_opcode(intr_info)) {
>>              if (is_guest_mode(vcpu)) {
>>                      kvm_queue_exception(vcpu, UD_VECTOR);
>> @@ -5892,22 +5838,6 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, 
>> unsigned long val)
>>              return kvm_set_cr4(vcpu, val);
>>  }
>>  
>> -/* called to set cr0 as appropriate for clts instruction exit. */
>> -static void handle_clts(struct kvm_vcpu *vcpu)
>> -{
>> -    if (is_guest_mode(vcpu)) {
>> -            /*
>> -             * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS
>> -             * but we did (!fpu_active). We need to keep GUEST_CR0.TS on,
>> -             * just pretend it's off (also in arch.cr0 for fpu_activate).
>> -             */
>> -            vmcs_writel(CR0_READ_SHADOW,
>> -                    vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS);
>> -            vcpu->arch.cr0 &= ~X86_CR0_TS;
>> -    } else
>> -            vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
>> -}
>> -
>>  static int handle_cr(struct kvm_vcpu *vcpu)
>>  {
>>      unsigned long exit_qualification, val;
>> @@ -5953,9 +5883,9 @@ static int handle_cr(struct kvm_vcpu *vcpu)
>>              }
>>              break;
>>      case 2: /* clts */
>> -            handle_clts(vcpu);
>> +            WARN_ONCE(1, "Guest should always own CR0.TS");
>> +            vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS));
>>              trace_kvm_cr_write(0, kvm_read_cr0(vcpu));
>> -            vmx_fpu_activate(vcpu);
>>              return kvm_skip_emulated_instruction(vcpu);
>>      case 1: /*mov from cr*/
>>              switch (cr) {
>> @@ -10349,8 +10279,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, 
>> struct vmcs12 *vmcs12,
>>      }
>>  
>>      /*
>> -     * This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified
>> -     * TS bit (for lazy fpu) and bits which we consider mandatory enabled.
>> +     * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
>> +     * bits which we consider mandatory enabled.
>>       * The CR0_READ_SHADOW is what L2 should have expected to read given
>>       * the specifications by L1; It's not enough to take
>>       * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
>> @@ -10963,24 +10893,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu 
>> *vcpu,
>>      vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
>>      /*
>>       * Note that calling vmx_set_cr0 is important, even if cr0 hasn't
>> -     * actually changed, because it depends on the current state of
>> -     * fpu_active (which may have changed).
>> -     * Note that vmx_set_cr0 refers to efer set above.
>> +     * actually changed, because vmx_set_cr0 refers to efer set above.
>> +     *
>> +     * CR0_GUEST_HOST_MASK is already set in the original vmcs01
>> +     * (KVM doesn't change it);
>>       */
>> +    vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
>>      vmx_set_cr0(vcpu, vmcs12->host_cr0);
>> -    /*
>> -     * If we did fpu_activate()/fpu_deactivate() during L2's run, we need
>> -     * to apply the same changes to L1's vmcs. We just set cr0 correctly,
>> -     * but we also need to update cr0_guest_host_mask and exception_bitmap.
>> -     */
>> -    update_exception_bitmap(vcpu);
>> -    vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0);
>> -    vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
>>  
>> -    /*
>> -     * Note that CR4_GUEST_HOST_MASK is already set in the original vmcs01
>> -     * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask();
>> -     */
>> +    /* Same as above - no reason to call set_cr4_guest_host_mask().  */
>>      vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
>>      kvm_set_cr4(vcpu, vmcs12->host_cr4);
>>  
>> @@ -11609,9 +11530,6 @@ static void vmx_setup_mce(struct kvm_vcpu *vcpu)
>>  
>>      .get_pkru = vmx_get_pkru,
>>  
>> -    .fpu_activate = vmx_fpu_activate,
>> -    .fpu_deactivate = vmx_fpu_deactivate,
>> -
>>      .tlb_flush = vmx_flush_tlb,
>>  
>>      .run = vmx_vcpu_run,
>> diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
>> index 8d3047c8cce7..c48404017e4f 100644
>> --- a/arch/x86/kvm/x86.c
>> +++ b/arch/x86/kvm/x86.c
>> @@ -6751,10 +6751,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>                      r = 0;
>>                      goto out;
>>              }
>> -            if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) {
>> -                    vcpu->fpu_active = 0;
>> -                    kvm_x86_ops->fpu_deactivate(vcpu);
>> -            }
>>              if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
>>                      /* Page is swapped out. Do synthetic halt */
>>                      vcpu->arch.apf.halted = true;
>> @@ -6856,8 +6852,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
>>      preempt_disable();
>>  
>>      kvm_x86_ops->prepare_guest_switch(vcpu);
>> -    if (vcpu->fpu_active)
>> -            kvm_load_guest_fpu(vcpu);
>> +    kvm_load_guest_fpu(vcpu);
>>  
>>      /*
>>       * Disable IRQs before setting IN_GUEST_MODE.  Posted interrupt
>> diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
>> index 2db458ee94b0..8d69d5150748 100644
>> --- a/include/linux/kvm_host.h
>> +++ b/include/linux/kvm_host.h
>> @@ -221,7 +221,6 @@ struct kvm_vcpu {
>>      struct mutex mutex;
>>      struct kvm_run *run;
>>  
>> -    int fpu_active;
>>      int guest_fpu_loaded, guest_xcr0_loaded;
>>      struct swait_queue_head wq;
>>      struct pid *pid;
>

Re: [PATCH] KVM: x86: remove code for lazy FPU handling

Reply via email to