Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff

2009-10-22 Thread Orit Wasserman


Avi Kivity a...@redhat.com wrote on 20/10/2009 06:00:50:

 From:

 Avi Kivity a...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/i...@ibmil, Abel Gordon/
 Haifa/i...@ibmil, Muli Ben-Yehuda/Haifa/i...@ibmil,
 aligu...@us.ibm.com, md...@us.ibm.com

 Date:

 20/10/2009 06:02

 Subject:

 Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff

 On 10/15/2009 11:41 PM, or...@il.ibm.com wrote:
 
/*
  + * Handles msr read for nested virtualization
  + */
  +static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
  +   u64 *pdata)
  +{
  +   u64 vmx_msr = 0;
  +
  +   switch (msr_index) {
  +   case MSR_IA32_FEATURE_CONTROL:
  +  *pdata = 0;
  +  break;
  +   case MSR_IA32_VMX_BASIC:
  +  *pdata = 0;
  +  rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
  +  *pdata = (vmx_msr  0x00cf);
  +  break;
  +
 

 This (and the rest of the msrs) must be controllable from userspace.
 Otherwise a live migration from a newer host to an older host would
break.
OK.

 
/*
  + * Writes msr value for nested virtualization
  + * Returns 0 on success, non-0 otherwise.
  + */
  +static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32
 msr_index, u64 data)
  +{
  +   switch (msr_index) {
  +   case MSR_IA32_FEATURE_CONTROL:
  +  if ((data  (FEATURE_CONTROL_LOCKED |
  +  FEATURE_CONTROL_VMXON_ENABLED))
  +  != (FEATURE_CONTROL_LOCKED |
  + FEATURE_CONTROL_VMXON_ENABLED))
  + return 1;
  +  break;
  +   default:
  +  return 1;
  +   }
  +
  +   return 0;
  +}
  +
 

 Need to export this msr to userspace for live migration.  See
 msrs_to_save[].
OK.

 
  +/*
  + * Check to see if vcpu can execute vmx command
  + * Inject the corrseponding exception
  + */
  +static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
  +{
  +   struct kvm_segment cs;
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +   struct kvm_msr_entry *msr;
  +
  +   vmx_get_segment(vcpu,cs, VCPU_SREG_CS);
  +
  +   if (!vmx-nested.vmxon) {
  +  printk(KERN_DEBUG %s: vmx not on\n, __func__);
 

 pr_debug
I will change it.

  +  kvm_queue_exception(vcpu, UD_VECTOR);
  +  return 0;
  +   }
  +
  +   msr = find_msr_entry(vmx, MSR_EFER);
  +
  +   if ((vmx_get_rflags(vcpu)  X86_EFLAGS_VM) ||
  +   ((msr-data  EFER_LMA)  !cs.l)) {
 

 is_long_mode()
I will change it.

static int handle_vmx_insn(struct kvm_vcpu *vcpu)
{
   kvm_queue_exception(vcpu, UD_VECTOR);
   return 1;
}
 
  +static int handle_vmoff(struct kvm_vcpu *vcpu)
  +{
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +
  +   if (!nested_vmx_check_permission(vcpu))
  +  return 1;
  +
  +   vmx-nested.vmxon = 0;
  +
  +   skip_emulated_instruction(vcpu);
  +   return 1;
  +}
  +
  +static int handle_vmon(struct kvm_vcpu *vcpu)
  +{
  +   struct kvm_segment cs;
  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +
  +   if (!nested) {
  +  printk(KERN_DEBUG %s: nested vmx not enabled\n, __func__);
  +  kvm_queue_exception(vcpu, UD_VECTOR);
  +  return 1;
  +   }
  +
  +   vmx_get_segment(vcpu,cs, VCPU_SREG_CS);
  +
  +   if (!(vcpu-arch.cr4  X86_CR4_VMXE) ||
  +   !(vcpu-arch.cr0  X86_CR0_PE) ||
  +   (vmx_get_rflags(vcpu)  X86_EFLAGS_VM)) {
  +  kvm_queue_exception(vcpu, UD_VECTOR);
  +  printk(KERN_INFO %s invalid register state\n, __func__);
  +  return 1;
  +   }
  +#ifdef CONFIG_X86_64
  +   if (((find_msr_entry(to_vmx(vcpu),
  +  MSR_EFER)-data  EFER_LMA)  !cs.l)) {
 

 is_long_mode(), and you can avoid the #ifdef.
I will change it.


 VMXON is supposed to block INIT, please add that (in a separate patch).
I will add it.

 --
 I have a truly marvellous patch that fixes the bug which this
 signature is too narrow to contain.


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff

2009-10-19 Thread Avi Kivity

On 10/15/2009 11:41 PM, or...@il.ibm.com wrote:


  /*
+ * Handles msr read for nested virtualization
+ */
+static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
+ u64 *pdata)
+{
+   u64 vmx_msr = 0;
+
+   switch (msr_index) {
+   case MSR_IA32_FEATURE_CONTROL:
+   *pdata = 0;
+   break;
+   case MSR_IA32_VMX_BASIC:
+   *pdata = 0;
+   rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
+   *pdata = (vmx_msr  0x00cf);
+   break;
+
   


This (and the rest of the msrs) must be controllable from userspace.  
Otherwise a live migration from a newer host to an older host would break.




  /*
+ * Writes msr value for nested virtualization
+ * Returns 0 on success, non-0 otherwise.
+ */
+static int nested_vmx_set_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
+{
+   switch (msr_index) {
+   case MSR_IA32_FEATURE_CONTROL:
+   if ((data  (FEATURE_CONTROL_LOCKED |
+FEATURE_CONTROL_VMXON_ENABLED))
+   != (FEATURE_CONTROL_LOCKED |
+   FEATURE_CONTROL_VMXON_ENABLED))
+   return 1;
+   break;
+   default:
+   return 1;
+   }
+
+   return 0;
+}
+
   


Need to export this msr to userspace for live migration.  See 
msrs_to_save[].




+/*
+ * Check to see if vcpu can execute vmx command
+ * Inject the corrseponding exception
+ */
+static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
+{
+   struct kvm_segment cs;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   struct kvm_msr_entry *msr;
+
+   vmx_get_segment(vcpu,cs, VCPU_SREG_CS);
+
+   if (!vmx-nested.vmxon) {
+   printk(KERN_DEBUG %s: vmx not on\n, __func__);
   


pr_debug


+   kvm_queue_exception(vcpu, UD_VECTOR);
+   return 0;
+   }
+
+   msr = find_msr_entry(vmx, MSR_EFER);
+
+   if ((vmx_get_rflags(vcpu)  X86_EFLAGS_VM) ||
+((msr-data  EFER_LMA)  !cs.l)) {
   


is_long_mode()


  static int handle_vmx_insn(struct kvm_vcpu *vcpu)
  {
kvm_queue_exception(vcpu, UD_VECTOR);
return 1;
  }

+static int handle_vmoff(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   if (!nested_vmx_check_permission(vcpu))
+   return 1;
+
+   vmx-nested.vmxon = 0;
+
+   skip_emulated_instruction(vcpu);
+   return 1;
+}
+
+static int handle_vmon(struct kvm_vcpu *vcpu)
+{
+   struct kvm_segment cs;
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+
+   if (!nested) {
+   printk(KERN_DEBUG %s: nested vmx not enabled\n, __func__);
+   kvm_queue_exception(vcpu, UD_VECTOR);
+   return 1;
+   }
+
+   vmx_get_segment(vcpu,cs, VCPU_SREG_CS);
+
+   if (!(vcpu-arch.cr4  X86_CR4_VMXE) ||
+   !(vcpu-arch.cr0  X86_CR0_PE) ||
+   (vmx_get_rflags(vcpu)  X86_EFLAGS_VM)) {
+   kvm_queue_exception(vcpu, UD_VECTOR);
+   printk(KERN_INFO %s invalid register state\n, __func__);
+   return 1;
+   }
+#ifdef CONFIG_X86_64
+   if (((find_msr_entry(to_vmx(vcpu),
+MSR_EFER)-data  EFER_LMA)  !cs.l)) {
   


is_long_mode(), and you can avoid the #ifdef.


VMXON is supposed to block INIT, please add that (in a separate patch).

--
I have a truly marvellous patch that fixes the bug which this
signature is too narrow to contain.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff

2009-10-15 Thread oritw
From: Orit Wasserman or...@il.ibm.com

---
 arch/x86/kvm/svm.c |3 -
 arch/x86/kvm/vmx.c |  217 +++-
 arch/x86/kvm/x86.c |6 +-
 arch/x86/kvm/x86.h |2 +
 4 files changed, 222 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2df9b45..3c1f22a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -124,9 +124,6 @@ static int npt = 1;
 
 module_param(npt, int, S_IRUGO);
 
-static int nested = 1;
-module_param(nested, int, S_IRUGO);
-
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 78101dd..71bd91a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -67,6 +67,11 @@ struct vmcs {
char data[0];
 };
 
+struct nested_vmx {
+   /* Has the level1 guest done vmxon? */
+   bool vmxon;
+};
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
struct list_head  local_vcpus_link;
@@ -114,6 +119,9 @@ struct vcpu_vmx {
ktime_t entry_time;
s64 vnmi_blocked_time;
u32 exit_reason;
+
+   /* Nested vmx */
+   struct nested_vmx nested;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -967,6 +975,95 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
 }
 
 /*
+ * Handles msr read for nested virtualization
+ */
+static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
+ u64 *pdata)
+{
+   u64 vmx_msr = 0;
+
+   switch (msr_index) {
+   case MSR_IA32_FEATURE_CONTROL:
+   *pdata = 0;
+   break;
+   case MSR_IA32_VMX_BASIC:
+   *pdata = 0;
+   rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
+   *pdata = (vmx_msr  0x00cf);
+   break;
+   case MSR_IA32_VMX_PINBASED_CTLS:
+   rdmsrl(MSR_IA32_VMX_PINBASED_CTLS, vmx_msr);
+   *pdata = (PIN_BASED_EXT_INTR_MASK  
vmcs_config.pin_based_exec_ctrl) |
+   (PIN_BASED_NMI_EXITING  
vmcs_config.pin_based_exec_ctrl) |
+   (PIN_BASED_VIRTUAL_NMIS  
vmcs_config.pin_based_exec_ctrl);
+   break;
+   case MSR_IA32_VMX_PROCBASED_CTLS:
+   {
+   u32 vmx_msr_high, vmx_msr_low;
+   u32 control = CPU_BASED_HLT_EXITING |
+#ifdef CONFIG_X86_64
+   CPU_BASED_CR8_LOAD_EXITING |
+   CPU_BASED_CR8_STORE_EXITING |
+#endif
+   CPU_BASED_CR3_LOAD_EXITING |
+   CPU_BASED_CR3_STORE_EXITING |
+   CPU_BASED_USE_IO_BITMAPS |
+   CPU_BASED_MOV_DR_EXITING |
+   CPU_BASED_USE_TSC_OFFSETING |
+   CPU_BASED_INVLPG_EXITING |
+   CPU_BASED_TPR_SHADOW |
+   CPU_BASED_USE_MSR_BITMAPS |
+   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+
+   rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+
+   control = vmx_msr_high; /* bit == 0 in high word == must be 
zero */
+   control |= vmx_msr_low;  /* bit == 1 in low word  == must be 
one  */
+
+   *pdata = (CPU_BASED_HLT_EXITING  control) |
+#ifdef CONFIG_X86_64
+   (CPU_BASED_CR8_LOAD_EXITING  control) |
+   (CPU_BASED_CR8_STORE_EXITING  control) |
+#endif
+   (CPU_BASED_CR3_LOAD_EXITING  control) |
+   (CPU_BASED_CR3_STORE_EXITING  control) |
+   (CPU_BASED_USE_IO_BITMAPS  control) |
+   (CPU_BASED_MOV_DR_EXITING  control) |
+   (CPU_BASED_USE_TSC_OFFSETING  control) |
+   (CPU_BASED_INVLPG_EXITING  control) ;
+
+   if (cpu_has_secondary_exec_ctrls())
+   *pdata |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+
+   if (vm_need_tpr_shadow(vcpu-kvm))
+   *pdata |= CPU_BASED_TPR_SHADOW;
+   break;
+   }
+   case MSR_IA32_VMX_EXIT_CTLS:
+   *pdata = 0;
+#ifdef CONFIG_X86_64
+   *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
+#endif
+   break;
+   case MSR_IA32_VMX_ENTRY_CTLS:
+   *pdata = 0;
+   break;
+   case MSR_IA32_VMX_PROCBASED_CTLS2:
+   *pdata = 0;
+   if (vm_need_virtualize_apic_accesses(vcpu-kvm))
+   *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+   break;
+   case MSR_IA32_VMX_EPT_VPID_CAP:
+   *pdata = 0;
+   break;
+   default:
+   return 1;
+   }
+
+   return 0;
+}
+
+/*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1005,6 +1102,9 @@ static int 

[PATCH 1/5] Nested VMX patch 1 implements vmon and vmoff

2009-09-30 Thread oritw
From: Orit Wasserman or...@il.ibm.com

---
 arch/x86/kvm/svm.c |3 -
 arch/x86/kvm/vmx.c |  217 +++-
 arch/x86/kvm/x86.c |6 +-
 arch/x86/kvm/x86.h |2 +
 4 files changed, 222 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 2df9b45..3c1f22a 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -124,9 +124,6 @@ static int npt = 1;
 
 module_param(npt, int, S_IRUGO);
 
-static int nested = 1;
-module_param(nested, int, S_IRUGO);
-
 static void svm_flush_tlb(struct kvm_vcpu *vcpu);
 static void svm_complete_interrupts(struct vcpu_svm *svm);
 
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 78101dd..71bd91a 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -67,6 +67,11 @@ struct vmcs {
char data[0];
 };
 
+struct nested_vmx {
+   /* Has the level1 guest done vmxon? */
+   bool vmxon;
+};
+
 struct vcpu_vmx {
struct kvm_vcpu   vcpu;
struct list_head  local_vcpus_link;
@@ -114,6 +119,9 @@ struct vcpu_vmx {
ktime_t entry_time;
s64 vnmi_blocked_time;
u32 exit_reason;
+
+   /* Nested vmx */
+   struct nested_vmx nested;
 };
 
 static inline struct vcpu_vmx *to_vmx(struct kvm_vcpu *vcpu)
@@ -967,6 +975,95 @@ static void guest_write_tsc(u64 guest_tsc, u64 host_tsc)
 }
 
 /*
+ * Handles msr read for nested virtualization
+ */
+static int nested_vmx_get_msr(struct kvm_vcpu *vcpu, u32 msr_index,
+ u64 *pdata)
+{
+   u64 vmx_msr = 0;
+
+   switch (msr_index) {
+   case MSR_IA32_FEATURE_CONTROL:
+   *pdata = 0;
+   break;
+   case MSR_IA32_VMX_BASIC:
+   *pdata = 0;
+   rdmsrl(MSR_IA32_VMX_BASIC, vmx_msr);
+   *pdata = (vmx_msr  0x00cf);
+   break;
+   case MSR_IA32_VMX_PINBASED_CTLS:
+   rdmsrl(MSR_IA32_VMX_PINBASED_CTLS, vmx_msr);
+   *pdata = (PIN_BASED_EXT_INTR_MASK  
vmcs_config.pin_based_exec_ctrl) |
+   (PIN_BASED_NMI_EXITING  
vmcs_config.pin_based_exec_ctrl) |
+   (PIN_BASED_VIRTUAL_NMIS  
vmcs_config.pin_based_exec_ctrl);
+   break;
+   case MSR_IA32_VMX_PROCBASED_CTLS:
+   {
+   u32 vmx_msr_high, vmx_msr_low;
+   u32 control = CPU_BASED_HLT_EXITING |
+#ifdef CONFIG_X86_64
+   CPU_BASED_CR8_LOAD_EXITING |
+   CPU_BASED_CR8_STORE_EXITING |
+#endif
+   CPU_BASED_CR3_LOAD_EXITING |
+   CPU_BASED_CR3_STORE_EXITING |
+   CPU_BASED_USE_IO_BITMAPS |
+   CPU_BASED_MOV_DR_EXITING |
+   CPU_BASED_USE_TSC_OFFSETING |
+   CPU_BASED_INVLPG_EXITING |
+   CPU_BASED_TPR_SHADOW |
+   CPU_BASED_USE_MSR_BITMAPS |
+   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+
+   rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+
+   control = vmx_msr_high; /* bit == 0 in high word == must be 
zero */
+   control |= vmx_msr_low;  /* bit == 1 in low word  == must be 
one  */
+
+   *pdata = (CPU_BASED_HLT_EXITING  control) |
+#ifdef CONFIG_X86_64
+   (CPU_BASED_CR8_LOAD_EXITING  control) |
+   (CPU_BASED_CR8_STORE_EXITING  control) |
+#endif
+   (CPU_BASED_CR3_LOAD_EXITING  control) |
+   (CPU_BASED_CR3_STORE_EXITING  control) |
+   (CPU_BASED_USE_IO_BITMAPS  control) |
+   (CPU_BASED_MOV_DR_EXITING  control) |
+   (CPU_BASED_USE_TSC_OFFSETING  control) |
+   (CPU_BASED_INVLPG_EXITING  control) ;
+
+   if (cpu_has_secondary_exec_ctrls())
+   *pdata |= CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+
+   if (vm_need_tpr_shadow(vcpu-kvm))
+   *pdata |= CPU_BASED_TPR_SHADOW;
+   break;
+   }
+   case MSR_IA32_VMX_EXIT_CTLS:
+   *pdata = 0;
+#ifdef CONFIG_X86_64
+   *pdata |= VM_EXIT_HOST_ADDR_SPACE_SIZE;
+#endif
+   break;
+   case MSR_IA32_VMX_ENTRY_CTLS:
+   *pdata = 0;
+   break;
+   case MSR_IA32_VMX_PROCBASED_CTLS2:
+   *pdata = 0;
+   if (vm_need_virtualize_apic_accesses(vcpu-kvm))
+   *pdata |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
+   break;
+   case MSR_IA32_VMX_EPT_VPID_CAP:
+   *pdata = 0;
+   break;
+   default:
+   return 1;
+   }
+
+   return 0;
+}
+
+/*
  * Reads an msr value (of 'msr_index') into 'pdata'.
  * Returns 0 on success, non-0 otherwise.
  * Assumes vcpu_load() was already called.
@@ -1005,6 +1102,9 @@ static int