Anthony Liguori wrote: > Attached patch implements lazy FPU save/restore for SVM. It's much > more conservative than my previous patch. We can now mark the guest > FPU as inactive whenever we want which will trigger CR0.TS to be set > in the guest's shadowed CR0. > > Right now, we only mark the FPU as inactive when the guest does (via a > mov %cr0, clts, etc.) or during a mov %cr3. There may be a better > heuristic out there but this seemed like the obvious one. > > I'm still playing around with the VT version of this patch so I'll > post that tomorrow. > > I've tested on a 32bit and 64bit SVM host using Avi's previously > posted fpu-test and some others. Everything seems to be fine. > > Regards, > > Anthony Liguori > ------------------------------------------------------------------------ > > Author: Anthony Liguori <[EMAIL PROTECTED]> > Date: Sun Apr 22 20:34:03 2007 -0500 > > Lazy FPU support for SVM. > > Signed-off-by: Anthony Liguori <[EMAIL PROTECTED]> > > diff --git a/drivers/kvm/kvm.h b/drivers/kvm/kvm.h > index d1a90c5..4859c32 100644 > --- a/drivers/kvm/kvm.h > +++ b/drivers/kvm/kvm.h > @@ -63,6 +63,9 @@ > #define FX_BUF_SIZE (2 * FX_IMAGE_SIZE + FX_IMAGE_ALIGN) > > #define DE_VECTOR 0 > +#define DB_VECTOR 2 > +#define UD_VECTOR 6 > +#define NM_VECTOR 7 >
This, while a nice cleanup, is unrelated. > #define DF_VECTOR 8 > #define TS_VECTOR 10 > #define NP_VECTOR 11 > @@ -301,6 +304,7 @@ struct kvm_vcpu { > char fx_buf[FX_BUF_SIZE]; > char *host_fx_image; > char *guest_fx_image; > + int fpu_active; > > int mmio_needed; > int mmio_read_completed; > diff --git a/drivers/kvm/svm.c b/drivers/kvm/svm.c > index 644efc5..bbde031 100644 > --- a/drivers/kvm/svm.c > +++ b/drivers/kvm/svm.c > @@ -30,10 +30,6 @@ MODULE_LICENSE("GPL"); > #define IOPM_ALLOC_ORDER 2 > #define MSRPM_ALLOC_ORDER 1 > > -#define DB_VECTOR 1 > -#define UD_VECTOR 6 > -#define GP_VECTOR 13 > - > #define DR7_GD_MASK (1 << 13) > #define DR6_BD_MASK (1 << 13) > #define CR4_DE_MASK (1UL << 3) > @@ -587,6 +583,7 @@ static int svm_create_vcpu(struct kvm_vcpu *vcpu) > init_vmcb(vcpu->svm->vmcb); > > fx_init(vcpu); > + vcpu->fpu_active = 1; > vcpu->apic_base = 0xfee00000 | > /*for vcpu 0*/ MSR_IA32_APICBASE_BSP | > MSR_IA32_APICBASE_ENABLE; > @@ -756,6 +753,11 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned > long cr0) > } > } > #endif > + if ((vcpu->cr0 & CR0_TS_MASK) && !(cr0 & CR0_TS_MASK)) { > + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << > NM_VECTOR); > + vcpu->fpu_active = 1; > + } > + > vcpu->cr0 = cr0; > cr0 |= CR0_PG_MASK | CR0_WP_MASK; > cr0 &= ~(CR0_CD_MASK | CR0_NW_MASK); > @@ -928,6 +930,20 @@ static int pf_interception(struct kvm_vcpu *vcpu, struct > kvm_run *kvm_run) > return 0; > } > > +static int nm_interception(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) > +{ > + spin_lock(&vcpu->kvm->lock); > Why is the lock needed? everything below is vcpu-local AFAICS. > + > + vcpu->svm->vmcb->control.intercept_exceptions &= ~(1 << NM_VECTOR); > + if (!(vcpu->cr0 & CR0_TS_MASK)) > + vcpu->svm->vmcb->save.cr0 &= ~CR0_TS_MASK; > + vcpu->fpu_active = 1; > + > + spin_unlock(&vcpu->kvm->lock); > + > + return 1; > +} > + > static int shutdown_interception(struct kvm_vcpu *vcpu, struct kvm_run > *kvm_run) > { > /* > @@ -1292,6 +1308,7 @@ static int (*svm_exit_handlers[])(struct kvm_vcpu *vcpu, > [SVM_EXIT_WRITE_DR5] = emulate_on_interception, > [SVM_EXIT_WRITE_DR7] = emulate_on_interception, > [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, > + [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, > [SVM_EXIT_INTR] = nop_on_interception, > [SVM_EXIT_NMI] = nop_on_interception, > [SVM_EXIT_SMI] = nop_on_interception, > @@ -1481,8 +1498,10 @@ again: > load_db_regs(vcpu->svm->db_regs); > } > > - fx_save(vcpu->host_fx_image); > - fx_restore(vcpu->guest_fx_image); > + if (vcpu->fpu_active) { > + fx_save(vcpu->host_fx_image); > + fx_restore(vcpu->guest_fx_image); > + } > > asm volatile ( > #ifdef CONFIG_X86_64 > @@ -1593,8 +1612,10 @@ again: > #endif > : "cc", "memory" ); > > - fx_save(vcpu->guest_fx_image); > - fx_restore(vcpu->host_fx_image); > + if (vcpu->fpu_active) { > + fx_save(vcpu->guest_fx_image); > + fx_restore(vcpu->host_fx_image); > + } > > if ((vcpu->svm->vmcb->save.dr7 & 0xff)) > load_db_regs(vcpu->svm->host_db_regs); > @@ -1664,6 +1685,12 @@ static void svm_set_cr3(struct kvm_vcpu *vcpu, > unsigned long root) > { > vcpu->svm->vmcb->save.cr3 = root; > force_new_asid(vcpu); > + > + if (vcpu->fpu_active) { > + vcpu->svm->vmcb->control.intercept_exceptions |= (1 << > NM_VECTOR); > + vcpu->svm->vmcb->save.cr0 |= CR0_TS_MASK; > + vcpu->fpu_active = 0; > + } > } > > static void svm_inject_page_fault(struct kvm_vcpu *vcpu, > Any numbers? I use user/test/vmexit.c to test as it gives the highest relative speedup. -- error compiling committee.c: too many arguments to function ------------------------------------------------------------------------- This SF.net email is sponsored by DB2 Express Download DB2 Express C - the FREE version of DB2 express and take control of your XML. No limits. Just data. Click to get it now. http://sourceforge.net/powerbar/db2/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel