Anthony Liguori wrote:
> Attached patch implements lazy FPU save/restore for SVM.  It's much 
> more conservative than my previous patch.  We can now mark the guest 
> FPU as inactive whenever we want which will trigger CR0.TS to be set 
> in the guest's shadowed CR0.
>
> Right now, we only mark the FPU as inactive when the guest does (via a 
> mov %cr0, clts, etc.) or during a mov %cr3.  There may be a better 
> heuristic out there but this seemed like the obvious one.
>
> I'm still playing around with the VT version of this patch so I'll 
> post that tomorrow.
>
> I've tested on a 32bit and 64bit SVM host using Avi's previously 
> posted fpu-test and some others.  Everything seems to be fine.
>
> Regards,
>
> Anthony Liguori
> ------------------------------------------------------------------------
>
> Author: Anthony Liguori <[EMAIL PROTECTED]>
> Date:   Sun Apr 22 20:34:03 2007 -0500
>
>     Lazy FPU support for SVM.
>     
>     Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]>
>
> diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h
> index d1a90c5..4859c32 100644
> --- a/drivers/kvm/kvm.h
> +++ b/drivers/kvm/kvm.h
> @@ -63,6 +63,9 @@
>  #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN)
>  
>  #define DE_VECTOR 0
> +#define DB_VECTOR 2
> +#define UD_VECTOR 6
> +#define NM_VECTOR 7
>   

This, while a nice cleanup, is unrelated.

>  #define DF_VECTOR 8
>  #define TS_VECTOR 10
>  #define NP_VECTOR 11
> @@ -301,6 +304,7 @@ struct kvm_vcpu {
>       char fx_buf[FX_BUF_SIZE];
>       char *host_fx_image;
>       char *guest_fx_image;
> +     int fpu_active;
>  
>       int mmio_needed;
>       int mmio_read_completed;
> diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c
> index 644efc5..bbde031 100644
> --- a/drivers/kvm/svm.c
> +++ b/drivers/kvm/svm.c
> @@ -30,10 +30,6 @@ MODULE_LICENSE("GPL");
>  #define IOPM_ALLOC_ORDER 2
>  #define MSRPM_ALLOC_ORDER 1
>  
> -#define DB_VECTOR 1
> -#define UD_VECTOR 6
> -#define GP_VECTOR 13
> -
>  #define DR7_GD_MASK (1 << 13)
>  #define DR6_BD_MASK (1 << 13)
>  #define CR4_DE_MASK (1UL << 3)
> @@ -587,6 +583,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu)
>       init_vmcb(vcpu->svm->vmcb);
>  
>       fx_init(vcpu);
> +     vcpu->fpu_active = 1;
>       vcpu->apic_base = 0xfee00000 |
>                       /*for vcpu 0*/ MSR_IA32_APICBASE_BSP |
>                       MSR_IA32_APICBASE_ENABLE;
> @@ -756,6 +753,11 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned 
> long cr0)
>               }
>       }
>  #endif
> +     if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) {
> +             vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << 
> NM_VECTOR);
> +             vcpu->fpu_active = 1;
> +     }
> + 
>       vcpu->cr0 = cr0;
>       cr0 |= CR0_PG_MASK | CR0_WP_MASK;
>       cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK);
> @@ -928,6 +930,20 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct 
> kvm_run *kvm_run)
>       return 0;
>  }
>  
> +static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run)
> +{
> +       spin_lock(&vcpu->kvm->lock);
>   

Why is the lock needed? everything below is vcpu-local AFAICS.

> +
> +       vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR);
> +       if (!(vcpu->cr0 & CR0_TS_MASK))
> +               vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK;
> +       vcpu->fpu_active = 1;
> +
> +       spin_unlock(&vcpu->kvm->lock);
> +
> +       return 1;
> +}
> +
>  static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run 
> *kvm_run)
>  {
>       /*
> @@ -1292,6 +1308,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu,
>       [SVM_EXIT_WRITE_DR5]                    = emulate_on_interception,
>       [SVM_EXIT_WRITE_DR7]                    = emulate_on_interception,
>       [SVM_EXIT_EXCP_BASE + PF_VECTOR]        = pf_interception,
> +     [SVM_EXIT_EXCP_BASE + NM_VECTOR]        = nm_interception,
>       [SVM_EXIT_INTR]                         = nop_on_interception,
>       [SVM_EXIT_NMI]                          = nop_on_interception,
>       [SVM_EXIT_SMI]                          = nop_on_interception,
> @@ -1481,8 +1498,10 @@ again:
>               load_db_regs(vcpu->svm->db_regs);
>       }
>  
> -     fx_save(vcpu->host_fx_image);
> -     fx_restore(vcpu->guest_fx_image);
> +     if (vcpu->fpu_active) {
> +             fx_save(vcpu->host_fx_image);
> +             fx_restore(vcpu->guest_fx_image);
> +     }
>  
>       asm volatile (
>  #ifdef CONFIG_X86_64
> @@ -1593,8 +1612,10 @@ again:
>  #endif
>               : "cc", "memory" );
>  
> -     fx_save(vcpu->guest_fx_image);
> -     fx_restore(vcpu->host_fx_image);
> +     if (vcpu->fpu_active) {
> +             fx_save(vcpu->guest_fx_image);
> +             fx_restore(vcpu->host_fx_image);
> +     }
>  
>       if ((vcpu->svm->vmcb->save.dr7 & 0xff))
>               load_db_regs(vcpu->svm->host_db_regs);
> @@ -1664,6 +1685,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, 
> unsigned long root)
>  {
>       vcpu->svm->vmcb->save.cr3 = root;
>       force_new_asid(vcpu);
> + 
> +     if (vcpu->fpu_active) {
> +             vcpu->svm->vmcb->control.intercept_exceptions |= (1 << 
> NM_VECTOR);
> +             vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK;
> +             vcpu->fpu_active = 0;
> +     }
>  }
>  
>  static void svm_inject_page_fault(struct kvm_vcpu *vcpu,
>   

Any numbers?  I use user/test/vmexit.c to test as it gives the highest 
relative speedup.


-- 
error compiling committee.c: too many arguments to function


-------------------------------------------------------------------------
This SF.net email is sponsored by DB2 Express
Download DB2 Express C - the FREE version of DB2 express and take
control of your XML. No limits. Just data. Click to get it now.
http://sourceforge.net/powerbar/db2/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to