Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume

2009-09-06 Thread Avi Kivity

On 09/03/2009 05:53 PM, Orit Wasserman wrote:


Need to auto-ack the interrupt if requested by the guest.
 

The is_interrupt means L1 has interrupts, kvm regular code will handle it.
   


If the VM-Exit Controls bit 15 (Acknowledge interrupts on exit) is set, 
when the nested guest exits you need to run kvm_cpu_get_interrupt() and 
put the vector number in the VM-Exit interruption-information field.  
kvm doesn't set this bit but I think Xen does.


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume

2009-09-06 Thread Orit Wasserman


Avi Kivity a...@redhat.com wrote on 06/09/2009 12:29:58:

 From:

 Avi Kivity a...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 Abel Gordon/Haifa/i...@ibmil, aligu...@us.ibm.com, Ben-Ami Yassour1/
 Haifa/i...@ibmil, kvm@vger.kernel.org, mm...@us.ibm.com, Muli Ben-
 Yehuda/Haifa/i...@ibmil

 Date:

 06/09/2009 12:30

 Subject:

 Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume

 On 09/03/2009 05:53 PM, Orit Wasserman wrote:
 
  Need to auto-ack the interrupt if requested by the guest.
 
  The is_interrupt means L1 has interrupts, kvm regular code will handle
it.
 

 If the VM-Exit Controls bit 15 (Acknowledge interrupts on exit) is set,
 when the nested guest exits you need to run kvm_cpu_get_interrupt() and
 put the vector number in the VM-Exit interruption-information field.
 kvm doesn't set this bit but I think Xen does.
VMware doesn't set it either.
We have to run L2 with the bit off even if the L1 hypervisor set it, and
emulate the L2 exit to L1 hypervisor correctly.
I will look at it.

 --
 error compiling committee.c: too many arguments to function


--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume

2009-09-03 Thread Orit Wasserman


Avi Kivity a...@redhat.com wrote on 03/09/2009 00:38:16:

 From:

 Avi Kivity a...@redhat.com

 To:

 Orit Wasserman/Haifa/i...@ibmil

 Cc:

 kvm@vger.kernel.org, Ben-Ami Yassour1/Haifa/i...@ibmil, Muli Ben-
 Yehuda/Haifa/i...@ibmil, Abel Gordon/Haifa/i...@ibmil,
 aligu...@us.ibm.com, mm...@us.ibm.com

 Date:

 03/09/2009 00:38

 Subject:

 Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume

 On 09/02/2009 06:38 PM, or...@il.ibm.com wrote:
  -struct nested_vmx {
  -   /* Has the level1 guest done vmon? */
  +struct nested_vmx {   /* Has the level1 guest done vmon? */
 

 A \n died here.
I will fix it.

   bool vmon;
   /* Has the level1 guest done vmclear? */
   bool vmclear;
  +
  +   /* Are we running nested guest */
  +   bool nested_mode;
  +
  +   /* L1 requested VMLAUNCH or VMRESUME but we didn't run L2 yet */
  +   bool nested_run_pending;
  +
  +   /* flag indicating if there was a valid IDT after exiting from l2
*/
  +   bool nested_pending_valid_idt;
 

 What does this mean?  pending event?
I will rename it.
 
  +
  +static inline int nested_cpu_has_vmx_tpr_shadow(struct  kvm_vcpu
*vcpu)
  +{
  +   return to_vmx(vcpu)-nested.l2_state-shadow_vmcs-
  +  cpu_based_vm_exec_control  CPU_BASED_TPR_SHADOW;
  +}
 

 Don't we need to check if the host supports it too?
We check it separately but I can add it here

  +static inline bool nested_vm_need_virtualize_apic_accesses(struct
kvm_vcpu
  +*vcpu)
  +{
  +   struct shadow_vmcs *shadow = to_vmx(vcpu)-nested.l2_state-
shadow_vmcs;
  +
  +   return (shadow-secondary_vm_exec_control
  +  SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
  +  to_vmx(vcpu)-nested.l2_state-shadow_vmcs-apic_access_addr !=
0;
  +}
 

 Why check apic_access_addr?
I will remove it.

  +
  +static inline int nested_cpu_has_vmx_ept(struct kvm_vcpu *vcpu)
  +{
  +   return to_vmx(vcpu)-nested.l2_state-shadow_vmcs-
  +  secondary_vm_exec_control  SECONDARY_EXEC_ENABLE_EPT;
  +}
 

 Need to check if secondary controls enabled?
If the secondary controls are not enabled this field is zero.

  +static void vmx_set_irq(struct kvm_vcpu *vcpu)
  +{
  +   if (to_vmx(vcpu)-nested.nested_mode)
  +  return;
 

 Why?

 Note if the guest didn't enable external interrupt exiting, we need to
 inject as usual.
I look into it.

 
  +static int nested_handle_pending_idt(struct kvm_vcpu *vcpu)
  +{
 

 Again the name is confusing.  pending_event_injection?
I will rename it.

  +   struct vcpu_vmx *vmx = to_vmx(vcpu);
  +   int irq;
  +   int type;
  +   int errCodeValid;
  +   u32 idt_vectoring_info;
  +   u32 guest_intr;
  +   bool nmi_window_open;
  +   bool interrupt_window_open;
  +
  +   if (vmx-nested.nested_mode  vmx-
nested.nested_pending_valid_idt) {
  +  idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
  +  irq  = idt_vectoring_info  VECTORING_INFO_VECTOR_MASK;
  +  type = idt_vectoring_info  VECTORING_INFO_TYPE_MASK;
  +  errCodeValid = idt_vectoring_info
  + VECTORING_INFO_DELIVER_CODE_MASK;
  +
  +  guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
  +  nmi_window_open =
  + !(guest_intr  (GUEST_INTR_STATE_STI |
  +   GUEST_INTR_STATE_MOV_SS |
  +   GUEST_INTR_STATE_NMI));
  +
  +  interrupt_window_open =
  + ((vmcs_readl(GUEST_RFLAGS)  X86_EFLAGS_IF)
  +  !(guest_intr  (GUEST_INTR_STATE_STI |
  +GUEST_INTR_STATE_MOV_SS)));
  +
  +  if (type == INTR_TYPE_EXT_INTR  !interrupt_window_open) {
  + printk(KERN_INFO IDT ignored, l2 interrupt window
closed!\n);
  + return 0;
  +  }
 

 How can this happen?  Unless it's on nested entry, in which case we need
 to abort the entry.
Ok i will fix it. The truth I never saw it happen.

  +
#ifdef CONFIG_X86_64
#define R r
#define Q q
  @@ -4646,6 +4842,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
{
   struct vcpu_vmx *vmx = to_vmx(vcpu);
 
  +   nested_handle_pending_idt(vcpu);
 

 You're not checking the return code (need to do that on entry).
I will fix it.

  +
  +   if (vmx-nested.nested_mode) {
  +  vmcs_writel(GUEST_CR0, vmx-nested.l2_state-shadow_vmcs-
guest_cr0);
 

 Might not be legal.  We may also want to force-enable caching.  Lastly,
 don't we need to handle cr0.ts and ct0.mp specially to manage the fpu
state?
We are working on implementing this correctly . kvm seems to handle it fine
but vmware doesn't like it.

 
  +   if (vmx-nested.nested_mode)
  +  vmx-nested.vmclear = 0;
  +
 

 Why?
I will check it.

free_vmcs:
  @@ -5122,6 +5339,228 @@ static int shadow_vmcs_load(struct kvm_vcpu
*vcpu)
   return 0;
}
 
  +void prepare_vmcs_12(struct kvm_vcpu *vcpu)
  +{
  +   struct shadow_vmcs *l2_shadow_vmcs =
  +  to_vmx(vcpu)-nested.l2_state-shadow_vmcs;
  +   struct shadow_vmcs *l1_shadow_vmcs =
  +  to_vmx(vcpu)-nested.l1_state-shadow_vmcs;
  +
  +   l2_shadow_vmcs-guest_es_selector

[PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume

2009-09-02 Thread oritw
From: Orit Wasserman or...@il.ibm.com

---
 arch/x86/kvm/vmx.c | 1142 +++-
 1 files changed, 1130 insertions(+), 12 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 62139b5..a7a62df 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -221,17 +221,30 @@ static inline int vmcs_field_length(unsigned long field)
return (VMCS_FIELD_LENGTH_MASK  field)  13;
 }
 
+#define NESTED_VM_EXIT_CONTROLS_MASK (~(VM_EXIT_LOAD_IA32_PAT | \
+   VM_EXIT_SAVE_IA32_PAT))
+#define NESTED_VM_ENTRY_CONTROLS_MASK (~(VM_ENTRY_LOAD_IA32_PAT | \
+VM_ENTRY_IA32E_MODE))
 struct vmcs {
u32 revision_id;
u32 abort;
char data[0];
 };
 
-struct nested_vmx {
-   /* Has the level1 guest done vmon? */
+struct nested_vmx {/* Has the level1 guest done vmon? */
bool vmon;
/* Has the level1 guest done vmclear? */
bool vmclear;
+
+   /* Are we running nested guest */
+   bool nested_mode;
+
+   /* L1 requested VMLAUNCH or VMRESUME but we didn't run L2 yet */
+   bool nested_run_pending;
+
+   /* flag indicating if there was a valid IDT after exiting from l2 */
+   bool nested_pending_valid_idt;
+
/* What is the location of the  vmcs l1 keeps for l2? (in level1 gpa) */
u64 l1_cur_vmcs;
/*
@@ -704,6 +717,53 @@ static inline void nested_vmcs_write64(struct kvm_vcpu 
*vcpu,
 #endif
 }
 
+
+static inline int nested_cpu_has_vmx_tpr_shadow(struct  kvm_vcpu *vcpu)
+{
+   return to_vmx(vcpu)-nested.l2_state-shadow_vmcs-
+   cpu_based_vm_exec_control  CPU_BASED_TPR_SHADOW;
+}
+
+static inline int nested_cpu_has_secondary_exec_ctrls(struct kvm_vcpu *vcpu)
+{
+   return 
to_vmx(vcpu)-nested.l2_state-shadow_vmcs-cpu_based_vm_exec_control 
+   CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
+}
+
+static inline bool nested_vm_need_virtualize_apic_accesses(struct kvm_vcpu
+  *vcpu)
+{
+   struct shadow_vmcs *shadow = to_vmx(vcpu)-nested.l2_state-shadow_vmcs;
+
+   return (shadow-secondary_vm_exec_control 
+   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) 
+   to_vmx(vcpu)-nested.l2_state-shadow_vmcs-apic_access_addr != 
0;
+}
+
+static inline int nested_cpu_has_vmx_ept(struct kvm_vcpu *vcpu)
+{
+   return to_vmx(vcpu)-nested.l2_state-shadow_vmcs-
+   secondary_vm_exec_control  SECONDARY_EXEC_ENABLE_EPT;
+}
+
+static inline int nested_cpu_has_vmx_vpid(struct kvm_vcpu *vcpu)
+{
+   return 
to_vmx(vcpu)-nested.l2_state-shadow_vmcs-secondary_vm_exec_control 
+   SECONDARY_EXEC_ENABLE_VPID;
+}
+
+static inline int nested_cpu_has_vmx_pat(struct kvm_vcpu *vcpu)
+{
+   return to_vmx(vcpu)-nested.l2_state-shadow_vmcs-vm_entry_controls 
+   VM_ENTRY_LOAD_IA32_PAT;
+}
+
+static inline int nested_cpu_has_vmx_msr_bitmap(struct kvm_vcpu *vcpu)
+{
+   return 
to_vmx(vcpu)-nested.l2_state-shadow_vmcs-cpu_based_vm_exec_control 
+   CPU_BASED_USE_MSR_BITMAPS;
+}
+
 static struct page *nested_get_page(struct kvm_vcpu *vcpu,
u64 vmcs_addr)
 {
@@ -779,9 +839,16 @@ static struct kvm_vmx_segment_field {
 };
 
 static void ept_save_pdptrs(struct kvm_vcpu *vcpu);
-
+static int nested_vmx_check_exception(struct vcpu_vmx *vmx, unsigned nr,
+ bool has_error_code, u32 error_code);
+static int nested_vmx_intr(struct kvm_vcpu *vcpu);
 static int create_l1_state(struct kvm_vcpu *vcpu);
 static int create_l2_state(struct kvm_vcpu *vcpu);
+static int launch_guest(struct kvm_vcpu *vcpu);
+static int nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu);
+static int nested_vmx_exit_handled(struct kvm_vcpu *vcpu, bool kvm_override);
+static int nested_vmx_vmexit(struct kvm_vcpu *vcpu,
+bool is_interrupt);
 static int shadow_vmcs_load(struct kvm_vcpu *vcpu);
 
 /*
@@ -899,6 +966,18 @@ static inline bool cpu_has_vmx_ept_2m_page(void)
return !!(vmx_capability.ept  VMX_EPT_2MB_PAGE_BIT);
 }
 
+static inline int is_exception(u32 intr_info)
+{
+   return (intr_info  (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+   == (INTR_TYPE_HARD_EXCEPTION | INTR_INFO_VALID_MASK);
+}
+
+static inline int is_nmi(u32 intr_info)
+{
+   return (intr_info  (INTR_INFO_INTR_TYPE_MASK | INTR_INFO_VALID_MASK))
+   == (INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK);
+}
+
 static inline int cpu_has_vmx_invept_individual_addr(void)
 {
return !!(vmx_capability.ept  VMX_EPT_EXTENT_INDIVIDUAL_BIT);
@@ -1460,6 +1539,9 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu, 
unsigned nr,
struct vcpu_vmx *vmx = to_vmx(vcpu);
u32 intr_info = nr | INTR_INFO_VALID_MASK;
 
+   if (nested_vmx_check_exception(vmx, nr, has_error_code, error_code))
+ 

Re: [PATCH 5/6] Nested VMX patch 5 implements vmlaunch and vmresume

2009-09-02 Thread Avi Kivity

On 09/02/2009 06:38 PM, or...@il.ibm.com wrote:

-struct nested_vmx {
-   /* Has the level1 guest done vmon? */
+struct nested_vmx {/* Has the level1 guest done vmon? */
   


A \n died here.


bool vmon;
/* Has the level1 guest done vmclear? */
bool vmclear;
+
+   /* Are we running nested guest */
+   bool nested_mode;
+
+   /* L1 requested VMLAUNCH or VMRESUME but we didn't run L2 yet */
+   bool nested_run_pending;
+
+   /* flag indicating if there was a valid IDT after exiting from l2 */
+   bool nested_pending_valid_idt;
   


What does this mean?  pending event?



+
+static inline int nested_cpu_has_vmx_tpr_shadow(struct  kvm_vcpu *vcpu)
+{
+   return to_vmx(vcpu)-nested.l2_state-shadow_vmcs-
+   cpu_based_vm_exec_control  CPU_BASED_TPR_SHADOW;
+}
   


Don't we need to check if the host supports it too?


+static inline bool nested_vm_need_virtualize_apic_accesses(struct kvm_vcpu
+  *vcpu)
+{
+   struct shadow_vmcs *shadow = to_vmx(vcpu)-nested.l2_state-shadow_vmcs;
+
+   return (shadow-secondary_vm_exec_control
+   SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)
+   to_vmx(vcpu)-nested.l2_state-shadow_vmcs-apic_access_addr != 
0;
+}
   


Why check apic_access_addr?


+
+static inline int nested_cpu_has_vmx_ept(struct kvm_vcpu *vcpu)
+{
+   return to_vmx(vcpu)-nested.l2_state-shadow_vmcs-
+   secondary_vm_exec_control  SECONDARY_EXEC_ENABLE_EPT;
+}
   


Need to check if secondary controls enabled?


+static void vmx_set_irq(struct kvm_vcpu *vcpu)
+{
+   if (to_vmx(vcpu)-nested.nested_mode)
+   return;
   


Why?

Note if the guest didn't enable external interrupt exiting, we need to 
inject as usual.




+static int nested_handle_pending_idt(struct kvm_vcpu *vcpu)
+{
   


Again the name is confusing.  pending_event_injection?


+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   int irq;
+   int type;
+   int errCodeValid;
+   u32 idt_vectoring_info;
+   u32 guest_intr;
+   bool nmi_window_open;
+   bool interrupt_window_open;
+
+   if (vmx-nested.nested_mode  vmx-nested.nested_pending_valid_idt) {
+   idt_vectoring_info = vmcs_read32(IDT_VECTORING_INFO_FIELD);
+   irq  = idt_vectoring_info  VECTORING_INFO_VECTOR_MASK;
+   type = idt_vectoring_info  VECTORING_INFO_TYPE_MASK;
+   errCodeValid = idt_vectoring_info
+   VECTORING_INFO_DELIVER_CODE_MASK;
+
+   guest_intr = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
+   nmi_window_open =
+   !(guest_intr  (GUEST_INTR_STATE_STI |
+   GUEST_INTR_STATE_MOV_SS |
+   GUEST_INTR_STATE_NMI));
+
+   interrupt_window_open =
+   ((vmcs_readl(GUEST_RFLAGS)  X86_EFLAGS_IF)
+!(guest_intr  (GUEST_INTR_STATE_STI |
+GUEST_INTR_STATE_MOV_SS)));
+
+   if (type == INTR_TYPE_EXT_INTR  !interrupt_window_open) {
+   printk(KERN_INFO IDT ignored, l2 interrupt window 
closed!\n);
+   return 0;
+   }
   


How can this happen?  Unless it's on nested entry, in which case we need 
to abort the entry.



+
  #ifdef CONFIG_X86_64
  #define R r
  #define Q q
@@ -4646,6 +4842,15 @@ static void vmx_vcpu_run(struct kvm_vcpu *vcpu)
  {
struct vcpu_vmx *vmx = to_vmx(vcpu);

+   nested_handle_pending_idt(vcpu);
   


You're not checking the return code (need to do that on entry).


+
+   if (vmx-nested.nested_mode) {
+   vmcs_writel(GUEST_CR0, 
vmx-nested.l2_state-shadow_vmcs-guest_cr0);
   


Might not be legal.  We may also want to force-enable caching.  Lastly, 
don't we need to handle cr0.ts and ct0.mp specially to manage the fpu state?




+   if (vmx-nested.nested_mode)
+   vmx-nested.vmclear = 0;
+
   


Why?


  free_vmcs:
@@ -5122,6 +5339,228 @@ static int shadow_vmcs_load(struct kvm_vcpu *vcpu)
return 0;
  }

+void prepare_vmcs_12(struct kvm_vcpu *vcpu)
+{
+   struct shadow_vmcs *l2_shadow_vmcs =
+   to_vmx(vcpu)-nested.l2_state-shadow_vmcs;
+   struct shadow_vmcs *l1_shadow_vmcs =
+   to_vmx(vcpu)-nested.l1_state-shadow_vmcs;
+
+   l2_shadow_vmcs-guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
+   l2_shadow_vmcs-guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
+   l2_shadow_vmcs-guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
+   l2_shadow_vmcs-guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
+   l2_shadow_vmcs-guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
+   l2_shadow_vmcs-guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
+   l2_shadow_vmcs-guest_ldtr_selector =