[PATCH 25/28] nVMX: Further fixes for lazy FPU loading

2010-12-08 Thread Nadav Har'El
KVM's "Lazy FPU loading" means that sometimes L0 needs to set CR0.TS, even
if a guest didn't set it. Moreover, L0 must also trap CR0.TS changes and
NM exceptions, even if we have a guest hypervisor (L1) who didn't want these
traps. And of course, conversely: If L1 wanted to trap these events, we
must let it, even if L0 is not interested in them.

This patch fixes some existing KVM code (in update_exception_bitmap(),
vmx_fpu_activate(), vmx_fpu_deactivate()) to do the correct merging of L0's
and L1's needs. Note that handle_cr() was already fixed in the above patch,
and that new code in introduced in previous patches already handles CR0
correctly (see prepare_vmcs02(), prepare_vmcs12(), and nested_vmx_vmexit()).

Signed-off-by: Nadav Har'El 
---
 arch/x86/kvm/vmx.c |   38 +++---
 1 file changed, 35 insertions(+), 3 deletions(-)

--- .before/arch/x86/kvm/vmx.c  2010-12-08 18:56:52.0 +0200
+++ .after/arch/x86/kvm/vmx.c   2010-12-08 18:56:52.0 +0200
@@ -1098,6 +1098,15 @@ static void update_exception_bitmap(stru
eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */
if (vcpu->fpu_active)
eb &= ~(1u << NM_VECTOR);
+
+   /* When we are running a nested L2 guest and L1 specified for it a
+* certain exception bitmap, we must trap the same exceptions and pass
+* them to L1. When running L2, we will only handle the exceptions
+* specified above if L1 did not want them.
+*/
+   if (is_guest_mode(vcpu))
+   eb |= get_vmcs12_fields(vcpu)->exception_bitmap;
+
vmcs_write32(EXCEPTION_BITMAP, eb);
 }
 
@@ -1415,8 +1424,19 @@ static void vmx_fpu_activate(struct kvm_
cr0 &= ~(X86_CR0_TS | X86_CR0_MP);
cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
vmcs_writel(GUEST_CR0, cr0);
-   update_exception_bitmap(vcpu);
vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
+   if (is_guest_mode(vcpu)) {
+   /* While we (L0) no longer care about NM exceptions or cr0.TS
+* changes, our guest hypervisor (L1) might care in which case
+* we must trap them for it.
+*/
+   u32 eb = vmcs_read32(EXCEPTION_BITMAP) & ~(1u << NM_VECTOR);
+   struct vmcs_fields *vmcs12 = get_vmcs12_fields(vcpu);
+   eb |= vmcs12->exception_bitmap;
+   vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
+   vmcs_write32(EXCEPTION_BITMAP, eb);
+   } else
+   update_exception_bitmap(vcpu);
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
 }
 
@@ -1424,12 +1444,24 @@ static void vmx_decache_cr0_guest_bits(s
 
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 {
+   /* Note that there is no vcpu->fpu_active = 0 here. The caller must
+* set this *before* calling this function.
+*/
vmx_decache_cr0_guest_bits(vcpu);
vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
-   update_exception_bitmap(vcpu);
+   vmcs_write32(EXCEPTION_BITMAP,
+   vmcs_read32(EXCEPTION_BITMAP) | (1u << NM_VECTOR));
vcpu->arch.cr0_guest_owned_bits = 0;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
-   vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
+   if (is_guest_mode(vcpu))
+   /* Unfortunately in nested mode we play with arch.cr0's PG
+* bit, so we musn't copy it all, just the relevant TS bit
+*/
+   vmcs_writel(CR0_READ_SHADOW,
+   (vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS) |
+   (vcpu->arch.cr0 & X86_CR0_TS));
+   else
+   vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
 }
 
 static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 25/28] nVMX: Further fixes for lazy FPU loading

2010-12-09 Thread Avi Kivity

On 12/08/2010 07:12 PM, Nadav Har'El wrote:

KVM's "Lazy FPU loading" means that sometimes L0 needs to set CR0.TS, even
if a guest didn't set it. Moreover, L0 must also trap CR0.TS changes and
NM exceptions, even if we have a guest hypervisor (L1) who didn't want these
traps. And of course, conversely: If L1 wanted to trap these events, we
must let it, even if L0 is not interested in them.

This patch fixes some existing KVM code (in update_exception_bitmap(),
vmx_fpu_activate(), vmx_fpu_deactivate()) to do the correct merging of L0's
and L1's needs. Note that handle_cr() was already fixed in the above patch,
and that new code in introduced in previous patches already handles CR0
correctly (see prepare_vmcs02(), prepare_vmcs12(), and nested_vmx_vmexit()).


@@ -1415,8 +1424,19 @@ static void vmx_fpu_activate(struct kvm_
cr0&= ~(X86_CR0_TS | X86_CR0_MP);
cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP);
vmcs_writel(GUEST_CR0, cr0);
-   update_exception_bitmap(vcpu);
vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS;
+   if (is_guest_mode(vcpu)) {
+   /* While we (L0) no longer care about NM exceptions or cr0.TS
+* changes, our guest hypervisor (L1) might care in which case
+* we must trap them for it.
+*/
+   u32 eb = vmcs_read32(EXCEPTION_BITMAP)&  ~(1u<<  NM_VECTOR);
+   struct vmcs_fields *vmcs12 = get_vmcs12_fields(vcpu);
+   eb |= vmcs12->exception_bitmap;
+   vcpu->arch.cr0_guest_owned_bits&= ~vmcs12->cr0_guest_host_mask;
+   vmcs_write32(EXCEPTION_BITMAP, eb);
+   } else
+   update_exception_bitmap(vcpu);


Isn't update_exception_bitmap() sufficient for both cases?


vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
  }

@@ -1424,12 +1444,24 @@ static void vmx_decache_cr0_guest_bits(s

  static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
  {
+   /* Note that there is no vcpu->fpu_active = 0 here. The caller must
+* set this *before* calling this function.
+*/
vmx_decache_cr0_guest_bits(vcpu);
vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP);
-   update_exception_bitmap(vcpu);
+   vmcs_write32(EXCEPTION_BITMAP,
+   vmcs_read32(EXCEPTION_BITMAP) | (1u<<  NM_VECTOR));


Why not fold the logic into update_exception_bitmap()?


vcpu->arch.cr0_guest_owned_bits = 0;
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
-   vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);
+   if (is_guest_mode(vcpu))
+   /* Unfortunately in nested mode we play with arch.cr0's PG
+* bit, so we musn't copy it all, just the relevant TS bit
+*/
+   vmcs_writel(CR0_READ_SHADOW,
+   (vmcs_readl(CR0_READ_SHADOW)&  ~X86_CR0_TS) |
+   (vcpu->arch.cr0&  X86_CR0_TS));
+   else
+   vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0);


Didn't you have a nice guest_readable_cr0() function that did this?


  }

  static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu)



--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html