[PATCH v2] KVM: VMX: disable SMEP feature when guest is in non-paging mode

2013-02-03 Thread Dongxiao Xu
Changes from v1 to v2:
 - Modify commit message and comments according to Gleb's suggestions.

SMEP is disabled if CPU is in non-paging mode in hardware.
However KVM always uses paging mode to emulate guest non-paging
mode with TDP. To emulate this behavior, SMEP needs to be manually
disabled when guest switches to non-paging mode.

We met an issue that, SMP Linux guest with recent kernel (enable
SMEP support, for example, 3.5.3) would crash with triple fault if
setting unrestricted_guest=0. This is because KVM uses an identity
mapping page table to emulate the non-paging mode, where the page
table is set with USER flag. If SMEP is still enabled in this case,
guest will meet unhandlable page fault and then crash.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
Signed-off-by: Xiantao Zhang xiantao.zh...@intel.com
---
 arch/x86/kvm/vmx.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9120ae1..70393e2 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3155,6 +3155,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned 
long cr4)
if (!is_paging(vcpu)) {
hw_cr4 = ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
+   /*
+* SMEP is disabled if CPU is in non-paging mode in
+* hardware. However KVM always uses paging mode to
+* emulate guest non-paging mode with TDP.
+* To emulate this behavior, SMEP needs to be manually
+* disabled when guest switches to non-paging mode.
+*/
+   hw_cr4 = ~X86_CR4_SMEP;
} else if (!(cr4  X86_CR4_PAE)) {
hw_cr4 = ~X86_CR4_PAE;
}
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] KVM: VMX: disable SMEP feature when guest is in non-paging mode

2013-02-01 Thread Dongxiao Xu
SMEP is disabled if CPU is in non-paging mode in hardware.
However KVM always uses paging mode to emulate guest non-paging
mode with HAP. To emulate this behavior, SMEP needs to be manually
disabled when guest switches to non-paging mode.

We met an issue that, SMP Linux guest with recent kernel (enable
SMEP support, for example, 3.5.3) would crash with triple fault if
setting unrestricted_guest=0. This is because KVM uses an identity
mapping page table to emulate the non-paging mode, where the page
table is set with USER flag. If SMEP is still enabled in this case,
guest will meet unhandlable page fault and then crash.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
Signed-off-by: Xiantao Zhang xiantao.zh...@intel.com
---
 arch/x86/kvm/vmx.c |8 
 1 files changed, 8 insertions(+), 0 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 9120ae1..e82f20d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -3155,6 +3155,14 @@ static int vmx_set_cr4(struct kvm_vcpu *vcpu, unsigned 
long cr4)
if (!is_paging(vcpu)) {
hw_cr4 = ~X86_CR4_PAE;
hw_cr4 |= X86_CR4_PSE;
+   /*
+* SMEP is disabled if CPU is in non-paging mode in
+* hardware. However KVM always uses paging mode to
+* emulate guest non-paging mode with HAP.
+* To emulate this behavior, SMEP needs to be manually
+* disabled when guest switches to non-paging mode.
+*/
+   hw_cr4 = ~X86_CR4_SMEP;
} else if (!(cr4  X86_CR4_PAE)) {
hw_cr4 = ~X86_CR4_PAE;
}
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM

2012-11-22 Thread Dongxiao Xu
The launch state is not a member in the VMCS area, use a separate
variable (list) to store it instead.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |   86 +---
 1 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 20de88b..3be9265 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -177,8 +177,7 @@ struct __packed vmcs12 {
u32 revision_id;
u32 abort;
 
-   u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
-   u32 padding[7]; /* room for future expansion */
+   u32 padding[8]; /* room for future expansion */
 
u64 io_bitmap_a;
u64 io_bitmap_b;
@@ -339,6 +338,11 @@ struct vmcs02_list {
struct loaded_vmcs vmcs02;
 };
 
+struct vmcs12_list {
+   unsigned long vmcs12_pa;
+   struct list_head node;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -364,6 +368,8 @@ struct nested_vmx {
 * we must keep them pinned while L2 runs.
 */
struct page *apic_access_page;
+   /* vmcs12_pool contains the launched vmcs12. */
+   struct list_head vmcs12_pool;
 };
 
 struct vcpu_vmx {
@@ -619,6 +625,58 @@ static void nested_release_page_clean(struct page *page)
kvm_release_page_clean(page);
 }
 
+static int vmcs12_launched(struct list_head *vmcs12_pool,
+  unsigned long vmcs12_pa)
+{
+   struct vmcs12_list *iter;
+   struct list_head *pos;
+   int launched = 0;
+
+   list_for_each(pos, vmcs12_pool) {
+   iter = list_entry(pos, struct vmcs12_list, node);
+   if (vmcs12_pa == iter-vmcs12_pa) {
+   launched = 1;
+   break;
+   }
+   }
+
+   return launched;
+}
+
+static int set_vmcs12_launched(struct list_head *vmcs12_pool,
+  unsigned long vmcs12_pa)
+{
+   struct vmcs12_list *vmcs12;
+
+   if (vmcs12_launched(vmcs12_pool, vmcs12_pa))
+   return 0;
+
+   vmcs12 = kzalloc(sizeof(struct vmcs12_list), GFP_KERNEL);
+   if (!vmcs12)
+   return -ENOMEM;
+
+   vmcs12-vmcs12_pa = vmcs12_pa;
+   list_add(vmcs12-node, vmcs12_pool);
+
+   return 0;
+}
+
+static void clear_vmcs12_launched(struct list_head *vmcs12_pool,
+  unsigned long vmcs12_pa)
+{
+   struct vmcs12_list *iter;
+   struct list_head *pos;
+
+   list_for_each(pos, vmcs12_pool) {
+   iter = list_entry(pos, struct vmcs12_list, node);
+   if (vmcs12_pa == iter-vmcs12_pa) {
+   list_del(iter-node);
+   kfree(iter);
+   break;
+   }
+   }
+}
+
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
@@ -5116,6 +5174,18 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx 
*vmx)
 }
 
 /*
+ * Free the vmcs12 list.
+ */
+static void nested_free_vmcs12_list(struct vcpu_vmx *vmx)
+{
+   struct vmcs12_list *item, *n;
+   list_for_each_entry_safe(item, n, vmx-nested.vmcs12_pool, node) {
+   list_del(item-node);
+   kfree(item);
+   }
+}
+
+/*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
  * inspect the argument to VMXON (the so-called VMXON pointer) because we
@@ -5212,6 +5282,7 @@ static void free_nested(struct vcpu_vmx *vmx)
}
 
nested_free_all_saved_vmcss(vmx);
+   nested_free_vmcs12_list(vmx);
 }
 
 /* Emulate the VMXOFF instruction */
@@ -5364,7 +5435,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return 1;
}
vmcs12 = kmap(page);
-   vmcs12-launch_state = 0;
+   clear_vmcs12_launched(vmx-nested.vmcs12_pool, __pa(vmcs12));
kunmap(page);
nested_release_page(page);
 
@@ -6460,6 +6531,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
 
vmx-nested.current_vmptr = -1ull;
vmx-nested.current_vmcs12 = NULL;
+   INIT_LIST_HEAD(vmx-nested.vmcs12_pool);
 
return vmx-vcpu;
 
@@ -6839,6 +6911,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
struct loaded_vmcs *vmcs02;
+   int is_launched;
 
if (!nested_vmx_check_permission(vcpu) ||
!nested_vmx_check_vmcs12(vcpu))
@@ -6857,7 +6930,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
 * for misconfigurations which will anyway be caught by the processor
 * when using the merged vmcs02.
 */
-   if (vmcs12-launch_state == launch) {
+   is_launched =
+   vmcs12_launched(vmx

[PATCH v2 1/4] nested vmx: clean up for vmcs12 read and write

2012-11-22 Thread Dongxiao Xu
abstract vmcs12_read and vmcs12_write functions to do the vmcs12
read/write operations.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |   85 +---
 1 files changed, 41 insertions(+), 44 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f858159..2f8344f 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -583,10 +583,15 @@ static const unsigned short vmcs_field_to_offset_table[] 
= {
 };
 static const int max_vmcs_field = ARRAY_SIZE(vmcs_field_to_offset_table);
 
-static inline short vmcs_field_to_offset(unsigned long field)
+static inline bool vmcs_field_valid(unsigned long field)
 {
if (field = max_vmcs_field || vmcs_field_to_offset_table[field] == 0)
-   return -1;
+   return 0;
+   return 1;
+}
+
+static inline short vmcs_field_to_offset(unsigned long field)
+{
return vmcs_field_to_offset_table[field];
 }
 
@@ -5407,32 +5412,45 @@ static inline int vmcs_field_readonly(unsigned long 
field)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-   unsigned long field, u64 *ret)
+static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field)
 {
-   short offset = vmcs_field_to_offset(field);
-   char *p;
+   char *p = ((char *)(get_vmcs12(vcpu))) + vmcs_field_to_offset(field);
 
-   if (offset  0)
-   return 0;
+   switch (vmcs_field_type(field)) {
+   case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+   return *((natural_width *)p);
+   case VMCS_FIELD_TYPE_U16:
+   return *((u16 *)p);
+   case VMCS_FIELD_TYPE_U32:
+   return *((u32 *)p);
+   case VMCS_FIELD_TYPE_U64:
+   return *((u64 *)p);
+   default:
+   return 0; /* can never happen. */
+   }
+}
 
-   p = ((char *)(get_vmcs12(vcpu))) + offset;
+static inline void vmcs12_write(struct kvm_vcpu *vcpu,
+   unsigned long field,
+   u64 value)
+{
+   char *p = ((char *)(get_vmcs12(vcpu))) + vmcs_field_to_offset(field);
 
switch (vmcs_field_type(field)) {
case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-   *ret = *((natural_width *)p);
-   return 1;
+   *(natural_width *)p = value;
+   break;
case VMCS_FIELD_TYPE_U16:
-   *ret = *((u16 *)p);
-   return 1;
+   *(u16 *)p = value;
+   break;
case VMCS_FIELD_TYPE_U32:
-   *ret = *((u32 *)p);
-   return 1;
+   *(u32 *)p = value;
+   break;
case VMCS_FIELD_TYPE_U64:
-   *ret = *((u64 *)p);
-   return 1;
+   *(u64 *)p = value;
+   break;
default:
-   return 0; /* can never happen. */
+   break; /* can never happen. */
}
 }
 
@@ -5465,12 +5483,13 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
 
/* Decode instruction info and find the field to read */
field = kvm_register_read(vcpu, (((vmx_instruction_info)  28)  0xf));
-   /* Read the field, zero-extended to a u64 field_value */
-   if (!vmcs12_read_any(vcpu, field, field_value)) {
+   if (!vmcs_field_valid(field)) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
skip_emulated_instruction(vcpu);
return 1;
}
+   /* Read the field, zero-extended to a u64 field_value */
+   field_value = vmcs12_read(vcpu, field);
/*
 * Now copy part of this value to register or memory, as requested.
 * Note that the number of bits actually copied is 32 or 64 depending
@@ -5500,8 +5519,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
gva_t gva;
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-   char *p;
-   short offset;
/* The value to write might be 32 or 64 bits, depending on L1's long
 * mode, and eventually we need to write that into a field of several
 * possible lengths. The code below first zero-extends the value to 64
@@ -5537,33 +5554,13 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu);
return 1;
}
-
-   offset = vmcs_field_to_offset(field);
-   if (offset  0) {
+   if (!vmcs_field_valid(field)) {
nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
skip_emulated_instruction(vcpu);
return 1;
}
-   p = ((char *) get_vmcs12(vcpu)) + offset;
 
-   switch (vmcs_field_type(field)) {
-   case

[PATCH v2 3/4] nested vmx: use vmcs12_read/write() to operate VMCS fields

2012-11-22 Thread Dongxiao Xu
When referencing vmcs12 fields, the current approach is to use
struct.field style. This commit replace all the current solution
by calling vmcs12_read() and vmcs12_write() fucntions.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |  591 
 1 files changed, 317 insertions(+), 274 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 639cad0..20de88b 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -629,6 +629,11 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
 
+static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field);
+static inline void vmcs12_write(struct kvm_vcpu *vcpu,
+   unsigned long field,
+   u64 value);
+
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
@@ -891,19 +896,19 @@ static inline bool report_flexpriority(void)
 
 static inline bool nested_cpu_has(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return get_vmcs12(vcpu)-cpu_based_vm_exec_control  bit;
+   return vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL)  bit;
 }
 
 static inline bool nested_cpu_has2(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return (get_vmcs12(vcpu)-cpu_based_vm_exec_control 
+   return (vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) 
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) 
-   (get_vmcs12(vcpu)-secondary_vm_exec_control  bit);
+   (vmcs12_read(vcpu, SECONDARY_VM_EXEC_CONTROL)  bit);
 }
 
 static inline bool nested_cpu_has_virtual_nmis(struct kvm_vcpu *vcpu)
 {
-   return get_vmcs12(vcpu)-pin_based_vm_exec_control 
+   return vmcs12_read(vcpu, PIN_BASED_VM_EXEC_CONTROL) 
PIN_BASED_VIRTUAL_NMIS;
 }
 
@@ -915,7 +920,6 @@ static inline bool is_exception(u32 intr_info)
 
 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
 static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
-   struct vmcs12 *vmcs12,
u32 reason, unsigned long qualification);
 
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
@@ -1220,7 +1224,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 * specified above if L1 did not want them.
 */
if (is_guest_mode(vcpu))
-   eb |= get_vmcs12(vcpu)-exception_bitmap;
+   eb |= vmcs12_read(vcpu, EXCEPTION_BITMAP);
 
vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -1582,7 +1586,7 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
vcpu-arch.cr0_guest_owned_bits = X86_CR0_TS;
if (is_guest_mode(vcpu))
vcpu-arch.cr0_guest_owned_bits =
-   ~get_vmcs12(vcpu)-cr0_guest_host_mask;
+   ~vmcs12_read(vcpu, CR0_GUEST_HOST_MASK);
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu-arch.cr0_guest_owned_bits);
 }
 
@@ -1593,15 +1597,19 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu 
*vcpu);
  * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
  * its hypervisor (cr0_read_shadow).
  */
-static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
+static inline unsigned long nested_read_cr0(struct kvm_vcpu *vcpu)
 {
-   return (fields-guest_cr0  ~fields-cr0_guest_host_mask) |
-   (fields-cr0_read_shadow  fields-cr0_guest_host_mask);
+   return (vmcs12_read(vcpu, GUEST_CR0) 
+   ~vmcs12_read(vcpu, CR0_GUEST_HOST_MASK)) |
+   (vmcs12_read(vcpu, CR0_READ_SHADOW) 
+   vmcs12_read(vcpu, CR0_GUEST_HOST_MASK));
 }
-static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
+static inline unsigned long nested_read_cr4(struct kvm_vcpu *vcpu)
 {
-   return (fields-guest_cr4  ~fields-cr4_guest_host_mask) |
-   (fields-cr4_read_shadow  fields-cr4_guest_host_mask);
+   return (vmcs12_read(vcpu, GUEST_CR4) 
+   ~vmcs12_read(vcpu, CR4_GUEST_HOST_MASK)) |
+   (vmcs12_read(vcpu, CR4_READ_SHADOW) 
+   vmcs12_read(vcpu, CR4_GUEST_HOST_MASK));
 }
 
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
@@ -1623,10 +1631,10 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 * up-to-date here because we just decached cr0.TS (and we'll
 * only update vmcs12-guest_cr0 on nested exit).
 */
-   struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-   vmcs12-guest_cr0 = (vmcs12-guest_cr0  ~X86_CR0_TS) |
+   u64 guest_cr0 = (vmcs12_read(vcpu, GUEST_CR0)  ~X86_CR0_TS) |
(vcpu-arch.cr0  X86_CR0_TS);
-   vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
+   vmcs12_write(vcpu, GUEST_CR0, guest_cr0

[PATCH v2 2/4] nested vmx: clean up for nested_cpu_has_xxx functions

2012-11-22 Thread Dongxiao Xu
This is a preparation for the later change, which use vmcs12_read()
and vmcs12_write() to replace the way to access vmcs12 fields.

Since the above functions uses 'vcpu' as parameter, we also use
'vcpu' as the parameter in nested_cpu_has_xxx functions.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |   57 +--
 1 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 2f8344f..639cad0 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -889,22 +889,22 @@ static inline bool report_flexpriority(void)
return flexpriority_enabled;
 }
 
-static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
+static inline bool nested_cpu_has(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return vmcs12-cpu_based_vm_exec_control  bit;
+   return get_vmcs12(vcpu)-cpu_based_vm_exec_control  bit;
 }
 
-static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
+static inline bool nested_cpu_has2(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return (vmcs12-cpu_based_vm_exec_control 
+   return (get_vmcs12(vcpu)-cpu_based_vm_exec_control 
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) 
-   (vmcs12-secondary_vm_exec_control  bit);
+   (get_vmcs12(vcpu)-secondary_vm_exec_control  bit);
 }
 
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
-   struct kvm_vcpu *vcpu)
+static inline bool nested_cpu_has_virtual_nmis(struct kvm_vcpu *vcpu)
 {
-   return vmcs12-pin_based_vm_exec_control  PIN_BASED_VIRTUAL_NMIS;
+   return get_vmcs12(vcpu)-pin_based_vm_exec_control 
+   PIN_BASED_VIRTUAL_NMIS;
 }
 
 static inline bool is_exception(u32 intr_info)
@@ -1888,7 +1888,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, 
u64 offset)
/* recalculate vmcs02.TSC_OFFSET: */
vmcs12 = get_vmcs12(vcpu);
vmcs_write64(TSC_OFFSET, offset +
-   (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
+   (nested_cpu_has(vcpu, CPU_BASED_USE_TSC_OFFSETING) ?
 vmcs12-tsc_offset : 0));
} else {
vmcs_write64(TSC_OFFSET, offset);
@@ -5712,7 +5712,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu 
*vcpu,
u32 msr_index = vcpu-arch.regs[VCPU_REGS_RCX];
gpa_t bitmap;
 
-   if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS))
+   if (!nested_cpu_has(vcpu, CPU_BASED_USE_MSR_BITMAPS))
return 1;
 
/*
@@ -5768,7 +5768,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu 
*vcpu,
(vmcs12-cr3_target_count = 4 
vmcs12-cr3_target_value3 == val))
return 0;
-   if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
+   if (nested_cpu_has(vcpu, CPU_BASED_CR3_LOAD_EXITING))
return 1;
break;
case 4:
@@ -5777,7 +5777,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu 
*vcpu,
return 1;
break;
case 8:
-   if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
+   if (nested_cpu_has(vcpu, CPU_BASED_CR8_LOAD_EXITING))
return 1;
break;
}
@@ -5865,15 +5865,15 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu 
*vcpu)
case EXIT_REASON_CPUID:
return 1;
case EXIT_REASON_HLT:
-   return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_HLT_EXITING);
case EXIT_REASON_INVD:
return 1;
case EXIT_REASON_INVLPG:
-   return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
-   return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_RDPMC_EXITING);
case EXIT_REASON_RDTSC:
-   return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
@@ -5887,7 +5887,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_CR_ACCESS:
return nested_vmx_exit_handled_cr(vcpu, vmcs12);
case EXIT_REASON_DR_ACCESS:
-   return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
+   return nested_cpu_has(vcpu

[PATCH v2 0/4] nested vmx code clean up and restructure

2012-11-22 Thread Dongxiao Xu
This patch series clean up and restructure part of the nested vmx code.
The main purpose is to abstract the vmcs12_read() and vmcs12_write() functions.
With this change, we have a unified API to get/set field values from/to vmcs12.

Changes from v1 to v2:
Move the VMCS field valid check into handle_vmread() and handle_vmwrite() 
functions.

Thanks,
Dongxiao

Dongxiao Xu (4):
  nested vmx: clean up for vmcs12 read and write
  nested vmx: clean up for nested_cpu_has_xxx functions
  nested vmx: use vmcs12_read/write() to operate VMCS fields
  nested vmx: use a list to store the launched vmcs12 for L1 VMM

 arch/x86/kvm/vmx.c |  811 ++--
 1 files changed, 463 insertions(+), 348 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/4] nested vmx: clean up for vmcs12 read and write

2012-11-21 Thread Dongxiao Xu
abstract vmcs12_read and vmcs12_write functions to do the vmcs12
read/write operations.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |   86 +++-
 1 files changed, 45 insertions(+), 41 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index f858159..d8670e4 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -5407,32 +5407,67 @@ static inline int vmcs_field_readonly(unsigned long 
field)
  * some of the bits we return here (e.g., on 32-bit guests, only 32 bits of
  * 64-bit fields are to be returned).
  */
-static inline bool vmcs12_read_any(struct kvm_vcpu *vcpu,
-   unsigned long field, u64 *ret)
+static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field)
 {
short offset = vmcs_field_to_offset(field);
char *p;
 
-   if (offset  0)
+   if (offset  0) {
+   nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+   skip_emulated_instruction(vcpu);
return 0;
+   }
 
p = ((char *)(get_vmcs12(vcpu))) + offset;
 
switch (vmcs_field_type(field)) {
case VMCS_FIELD_TYPE_NATURAL_WIDTH:
-   *ret = *((natural_width *)p);
+   return *((natural_width *)p);
+   case VMCS_FIELD_TYPE_U16:
+   return *((u16 *)p);
+   case VMCS_FIELD_TYPE_U32:
+   return *((u32 *)p);
+   case VMCS_FIELD_TYPE_U64:
+   return *((u64 *)p);
+   default:
+   nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+   skip_emulated_instruction(vcpu);
+   return 0; /* can never happen. */
+   }
+}
+
+static inline int vmcs12_write(struct kvm_vcpu *vcpu,
+   unsigned long field,
+   u64 value)
+{
+   short offset = vmcs_field_to_offset(field);
+   char *p;
+
+   if (offset  0) {
+   nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+   skip_emulated_instruction(vcpu);
+   return 0;
+   }
+
+   p = ((char *)(get_vmcs12(vcpu))) + offset;
+
+   switch (vmcs_field_type(field)) {
+   case VMCS_FIELD_TYPE_NATURAL_WIDTH:
+   *(natural_width *)p = value;
return 1;
case VMCS_FIELD_TYPE_U16:
-   *ret = *((u16 *)p);
+   *(u16 *)p = value;
return 1;
case VMCS_FIELD_TYPE_U32:
-   *ret = *((u32 *)p);
+   *(u32 *)p = value;
return 1;
case VMCS_FIELD_TYPE_U64:
-   *ret = *((u64 *)p);
+   *(u64 *)p = value;
return 1;
default:
-   return 0; /* can never happen. */
+   nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
+   skip_emulated_instruction(vcpu);
+   return 0;
}
 }
 
@@ -5466,11 +5501,7 @@ static int handle_vmread(struct kvm_vcpu *vcpu)
/* Decode instruction info and find the field to read */
field = kvm_register_read(vcpu, (((vmx_instruction_info)  28)  0xf));
/* Read the field, zero-extended to a u64 field_value */
-   if (!vmcs12_read_any(vcpu, field, field_value)) {
-   nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-   skip_emulated_instruction(vcpu);
-   return 1;
-   }
+   field_value = vmcs12_read(vcpu, field);
/*
 * Now copy part of this value to register or memory, as requested.
 * Note that the number of bits actually copied is 32 or 64 depending
@@ -5500,8 +5531,6 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
gva_t gva;
unsigned long exit_qualification = vmcs_readl(EXIT_QUALIFICATION);
u32 vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
-   char *p;
-   short offset;
/* The value to write might be 32 or 64 bits, depending on L1's long
 * mode, and eventually we need to write that into a field of several
 * possible lengths. The code below first zero-extends the value to 64
@@ -5537,33 +5566,8 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu)
skip_emulated_instruction(vcpu);
return 1;
}
-
-   offset = vmcs_field_to_offset(field);
-   if (offset  0) {
-   nested_vmx_failValid(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
-   skip_emulated_instruction(vcpu);
-   return 1;
-   }
-   p = ((char *) get_vmcs12(vcpu)) + offset;
-
-   switch (vmcs_field_type(field)) {
-   case VMCS_FIELD_TYPE_U16:
-   *(u16 *)p = field_value;
-   break;
-   case VMCS_FIELD_TYPE_U32:
-   *(u32 *)p = field_value;
-   break;
-   case VMCS_FIELD_TYPE_U64:
-   *(u64 *)p = field_value;
-   break

[PATCH 2/4] nested vmx: clean up for nested_cpu_has_xxx functions

2012-11-21 Thread Dongxiao Xu
This is a preparation for the later change, which use vmcs12_read()
and vmcs12_write() to replace the way to access vmcs12 fields.

Since the above functions uses 'vcpu' as parameter, we also use
'vcpu' as the parameter in nested_cpu_has_xxx functions.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |   57 +--
 1 files changed, 28 insertions(+), 29 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d8670e4..b036f9c 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -884,22 +884,22 @@ static inline bool report_flexpriority(void)
return flexpriority_enabled;
 }
 
-static inline bool nested_cpu_has(struct vmcs12 *vmcs12, u32 bit)
+static inline bool nested_cpu_has(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return vmcs12-cpu_based_vm_exec_control  bit;
+   return get_vmcs12(vcpu)-cpu_based_vm_exec_control  bit;
 }
 
-static inline bool nested_cpu_has2(struct vmcs12 *vmcs12, u32 bit)
+static inline bool nested_cpu_has2(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return (vmcs12-cpu_based_vm_exec_control 
+   return (get_vmcs12(vcpu)-cpu_based_vm_exec_control 
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) 
-   (vmcs12-secondary_vm_exec_control  bit);
+   (get_vmcs12(vcpu)-secondary_vm_exec_control  bit);
 }
 
-static inline bool nested_cpu_has_virtual_nmis(struct vmcs12 *vmcs12,
-   struct kvm_vcpu *vcpu)
+static inline bool nested_cpu_has_virtual_nmis(struct kvm_vcpu *vcpu)
 {
-   return vmcs12-pin_based_vm_exec_control  PIN_BASED_VIRTUAL_NMIS;
+   return get_vmcs12(vcpu)-pin_based_vm_exec_control 
+   PIN_BASED_VIRTUAL_NMIS;
 }
 
 static inline bool is_exception(u32 intr_info)
@@ -1883,7 +1883,7 @@ static void vmx_write_tsc_offset(struct kvm_vcpu *vcpu, 
u64 offset)
/* recalculate vmcs02.TSC_OFFSET: */
vmcs12 = get_vmcs12(vcpu);
vmcs_write64(TSC_OFFSET, offset +
-   (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETING) ?
+   (nested_cpu_has(vcpu, CPU_BASED_USE_TSC_OFFSETING) ?
 vmcs12-tsc_offset : 0));
} else {
vmcs_write64(TSC_OFFSET, offset);
@@ -5719,7 +5719,7 @@ static bool nested_vmx_exit_handled_msr(struct kvm_vcpu 
*vcpu,
u32 msr_index = vcpu-arch.regs[VCPU_REGS_RCX];
gpa_t bitmap;
 
-   if (!nested_cpu_has(get_vmcs12(vcpu), CPU_BASED_USE_MSR_BITMAPS))
+   if (!nested_cpu_has(vcpu, CPU_BASED_USE_MSR_BITMAPS))
return 1;
 
/*
@@ -5775,7 +5775,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu 
*vcpu,
(vmcs12-cr3_target_count = 4 
vmcs12-cr3_target_value3 == val))
return 0;
-   if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
+   if (nested_cpu_has(vcpu, CPU_BASED_CR3_LOAD_EXITING))
return 1;
break;
case 4:
@@ -5784,7 +5784,7 @@ static bool nested_vmx_exit_handled_cr(struct kvm_vcpu 
*vcpu,
return 1;
break;
case 8:
-   if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
+   if (nested_cpu_has(vcpu, CPU_BASED_CR8_LOAD_EXITING))
return 1;
break;
}
@@ -5872,15 +5872,15 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu 
*vcpu)
case EXIT_REASON_CPUID:
return 1;
case EXIT_REASON_HLT:
-   return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_HLT_EXITING);
case EXIT_REASON_INVD:
return 1;
case EXIT_REASON_INVLPG:
-   return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_INVLPG_EXITING);
case EXIT_REASON_RDPMC:
-   return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_RDPMC_EXITING);
case EXIT_REASON_RDTSC:
-   return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
+   return nested_cpu_has(vcpu, CPU_BASED_RDTSC_EXITING);
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMREAD:
@@ -5894,7 +5894,7 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu)
case EXIT_REASON_CR_ACCESS:
return nested_vmx_exit_handled_cr(vcpu, vmcs12);
case EXIT_REASON_DR_ACCESS:
-   return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
+   return nested_cpu_has(vcpu

[PATCH 3/4] nested vmx: use vmcs12_read/write() to operate VMCS fields

2012-11-21 Thread Dongxiao Xu
When referencing vmcs12 fields, the current approach is to use
struct.field style. This commit replace all the current solution
by calling vmcs12_read() and vmcs12_write() fucntions.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |  591 
 1 files changed, 317 insertions(+), 274 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index b036f9c..6687fb6 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -624,6 +624,11 @@ static void vmx_set_segment(struct kvm_vcpu *vcpu,
 static void vmx_get_segment(struct kvm_vcpu *vcpu,
struct kvm_segment *var, int seg);
 
+static inline u64 vmcs12_read(struct kvm_vcpu *vcpu, unsigned long field);
+static inline int vmcs12_write(struct kvm_vcpu *vcpu,
+   unsigned long field,
+   u64 value);
+
 static DEFINE_PER_CPU(struct vmcs *, vmxarea);
 static DEFINE_PER_CPU(struct vmcs *, current_vmcs);
 /*
@@ -886,19 +891,19 @@ static inline bool report_flexpriority(void)
 
 static inline bool nested_cpu_has(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return get_vmcs12(vcpu)-cpu_based_vm_exec_control  bit;
+   return vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL)  bit;
 }
 
 static inline bool nested_cpu_has2(struct kvm_vcpu *vcpu, u32 bit)
 {
-   return (get_vmcs12(vcpu)-cpu_based_vm_exec_control 
+   return (vmcs12_read(vcpu, CPU_BASED_VM_EXEC_CONTROL) 
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) 
-   (get_vmcs12(vcpu)-secondary_vm_exec_control  bit);
+   (vmcs12_read(vcpu, SECONDARY_VM_EXEC_CONTROL)  bit);
 }
 
 static inline bool nested_cpu_has_virtual_nmis(struct kvm_vcpu *vcpu)
 {
-   return get_vmcs12(vcpu)-pin_based_vm_exec_control 
+   return vmcs12_read(vcpu, PIN_BASED_VM_EXEC_CONTROL) 
PIN_BASED_VIRTUAL_NMIS;
 }
 
@@ -910,7 +915,6 @@ static inline bool is_exception(u32 intr_info)
 
 static void nested_vmx_vmexit(struct kvm_vcpu *vcpu);
 static void nested_vmx_entry_failure(struct kvm_vcpu *vcpu,
-   struct vmcs12 *vmcs12,
u32 reason, unsigned long qualification);
 
 static int __find_msr_index(struct vcpu_vmx *vmx, u32 msr)
@@ -1215,7 +1219,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu)
 * specified above if L1 did not want them.
 */
if (is_guest_mode(vcpu))
-   eb |= get_vmcs12(vcpu)-exception_bitmap;
+   eb |= vmcs12_read(vcpu, EXCEPTION_BITMAP);
 
vmcs_write32(EXCEPTION_BITMAP, eb);
 }
@@ -1577,7 +1581,7 @@ static void vmx_fpu_activate(struct kvm_vcpu *vcpu)
vcpu-arch.cr0_guest_owned_bits = X86_CR0_TS;
if (is_guest_mode(vcpu))
vcpu-arch.cr0_guest_owned_bits =
-   ~get_vmcs12(vcpu)-cr0_guest_host_mask;
+   ~vmcs12_read(vcpu, CR0_GUEST_HOST_MASK);
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu-arch.cr0_guest_owned_bits);
 }
 
@@ -1588,15 +1592,19 @@ static void vmx_decache_cr0_guest_bits(struct kvm_vcpu 
*vcpu);
  * of the real cr0 used to run the guest (guest_cr0), and the bits shadowed by
  * its hypervisor (cr0_read_shadow).
  */
-static inline unsigned long nested_read_cr0(struct vmcs12 *fields)
+static inline unsigned long nested_read_cr0(struct kvm_vcpu *vcpu)
 {
-   return (fields-guest_cr0  ~fields-cr0_guest_host_mask) |
-   (fields-cr0_read_shadow  fields-cr0_guest_host_mask);
+   return (vmcs12_read(vcpu, GUEST_CR0) 
+   ~vmcs12_read(vcpu, CR0_GUEST_HOST_MASK)) |
+   (vmcs12_read(vcpu, CR0_READ_SHADOW) 
+   vmcs12_read(vcpu, CR0_GUEST_HOST_MASK));
 }
-static inline unsigned long nested_read_cr4(struct vmcs12 *fields)
+static inline unsigned long nested_read_cr4(struct kvm_vcpu *vcpu)
 {
-   return (fields-guest_cr4  ~fields-cr4_guest_host_mask) |
-   (fields-cr4_read_shadow  fields-cr4_guest_host_mask);
+   return (vmcs12_read(vcpu, GUEST_CR4) 
+   ~vmcs12_read(vcpu, CR4_GUEST_HOST_MASK)) |
+   (vmcs12_read(vcpu, CR4_READ_SHADOW) 
+   vmcs12_read(vcpu, CR4_GUEST_HOST_MASK));
 }
 
 static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
@@ -1618,10 +1626,10 @@ static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu)
 * up-to-date here because we just decached cr0.TS (and we'll
 * only update vmcs12-guest_cr0 on nested exit).
 */
-   struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
-   vmcs12-guest_cr0 = (vmcs12-guest_cr0  ~X86_CR0_TS) |
+   u64 guest_cr0 = (vmcs12_read(vcpu, GUEST_CR0)  ~X86_CR0_TS) |
(vcpu-arch.cr0  X86_CR0_TS);
-   vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
+   vmcs12_write(vcpu, GUEST_CR0, guest_cr0);
+   vmcs_writel

[PATCH 4/4] nested vmx: use a list to store the launched vmcs12 for L1 VMM

2012-11-21 Thread Dongxiao Xu
The launch state is not a member in the VMCS area, use a separate
variable (list) to store it instead.

Signed-off-by: Dongxiao Xu dongxiao...@intel.com
---
 arch/x86/kvm/vmx.c |   86 +---
 1 files changed, 81 insertions(+), 5 deletions(-)

diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 6687fb6..d03ab4e 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -177,8 +177,7 @@ struct __packed vmcs12 {
u32 revision_id;
u32 abort;
 
-   u32 launch_state; /* set to 0 by VMCLEAR, to 1 by VMLAUNCH */
-   u32 padding[7]; /* room for future expansion */
+   u32 padding[8]; /* room for future expansion */
 
u64 io_bitmap_a;
u64 io_bitmap_b;
@@ -339,6 +338,11 @@ struct vmcs02_list {
struct loaded_vmcs vmcs02;
 };
 
+struct vmcs12_list {
+   unsigned long vmcs12_pa;
+   struct list_head node;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu.
@@ -364,6 +368,8 @@ struct nested_vmx {
 * we must keep them pinned while L2 runs.
 */
struct page *apic_access_page;
+   /* vmcs12_pool contains the launched vmcs12. */
+   struct list_head vmcs12_pool;
 };
 
 struct vcpu_vmx {
@@ -614,6 +620,58 @@ static void nested_release_page_clean(struct page *page)
kvm_release_page_clean(page);
 }
 
+static int vmcs12_launched(struct list_head *vmcs12_pool,
+  unsigned long vmcs12_pa)
+{
+   struct vmcs12_list *iter;
+   struct list_head *pos;
+   int launched = 0;
+
+   list_for_each(pos, vmcs12_pool) {
+   iter = list_entry(pos, struct vmcs12_list, node);
+   if (vmcs12_pa == iter-vmcs12_pa) {
+   launched = 1;
+   break;
+   }
+   }
+
+   return launched;
+}
+
+static int set_vmcs12_launched(struct list_head *vmcs12_pool,
+  unsigned long vmcs12_pa)
+{
+   struct vmcs12_list *vmcs12;
+
+   if (vmcs12_launched(vmcs12_pool, vmcs12_pa))
+   return 0;
+
+   vmcs12 = kzalloc(sizeof(struct vmcs12_list), GFP_KERNEL);
+   if (!vmcs12)
+   return -ENOMEM;
+
+   vmcs12-vmcs12_pa = vmcs12_pa;
+   list_add(vmcs12-node, vmcs12_pool);
+
+   return 0;
+}
+
+static void clear_vmcs12_launched(struct list_head *vmcs12_pool,
+  unsigned long vmcs12_pa)
+{
+   struct vmcs12_list *iter;
+   struct list_head *pos;
+
+   list_for_each(pos, vmcs12_pool) {
+   iter = list_entry(pos, struct vmcs12_list, node);
+   if (vmcs12_pa == iter-vmcs12_pa) {
+   list_del(iter-node);
+   kfree(iter);
+   break;
+   }
+   }
+}
+
 static u64 construct_eptp(unsigned long root_hpa);
 static void kvm_cpu_vmxon(u64 addr);
 static void kvm_cpu_vmxoff(void);
@@ -5111,6 +5169,18 @@ static void nested_free_all_saved_vmcss(struct vcpu_vmx 
*vmx)
 }
 
 /*
+ * Free the vmcs12 list.
+ */
+static void nested_free_vmcs12_list(struct vcpu_vmx *vmx)
+{
+   struct vmcs12_list *item, *n;
+   list_for_each_entry_safe(item, n, vmx-nested.vmcs12_pool, node) {
+   list_del(item-node);
+   kfree(item);
+   }
+}
+
+/*
  * Emulate the VMXON instruction.
  * Currently, we just remember that VMX is active, and do not save or even
  * inspect the argument to VMXON (the so-called VMXON pointer) because we
@@ -5207,6 +5277,7 @@ static void free_nested(struct vcpu_vmx *vmx)
}
 
nested_free_all_saved_vmcss(vmx);
+   nested_free_vmcs12_list(vmx);
 }
 
 /* Emulate the VMXOFF instruction */
@@ -5359,7 +5430,7 @@ static int handle_vmclear(struct kvm_vcpu *vcpu)
return 1;
}
vmcs12 = kmap(page);
-   vmcs12-launch_state = 0;
+   clear_vmcs12_launched(vmx-nested.vmcs12_pool, __pa(vmcs12));
kunmap(page);
nested_release_page(page);
 
@@ -6467,6 +6538,7 @@ static struct kvm_vcpu *vmx_create_vcpu(struct kvm *kvm, 
unsigned int id)
 
vmx-nested.current_vmptr = -1ull;
vmx-nested.current_vmcs12 = NULL;
+   INIT_LIST_HEAD(vmx-nested.vmcs12_pool);
 
return vmx-vcpu;
 
@@ -6846,6 +6918,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
struct vcpu_vmx *vmx = to_vmx(vcpu);
int cpu;
struct loaded_vmcs *vmcs02;
+   int is_launched;
 
if (!nested_vmx_check_permission(vcpu) ||
!nested_vmx_check_vmcs12(vcpu))
@@ -6864,7 +6937,9 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool 
launch)
 * for misconfigurations which will anyway be caught by the processor
 * when using the merged vmcs02.
 */
-   if (vmcs12-launch_state == launch) {
+   is_launched =
+   vmcs12_launched(vmx

[PATCH 0/4] nested vmx code clean up and restructure

2012-11-21 Thread Dongxiao Xu
This patch series clean up and restructure part of the nested vmx code.
The main purpose is to abstract the vmcs12_read() and vmcs12_write() functions.
With this change, we have a unified API to get/set field values from/to vmcs12.

Thanks,
Dongxiao

Dongxiao Xu (4):
  nested vmx: clean up for vmcs12 read and write
  nested vmx: clean up for nested_cpu_has_xxx functions
  nested vmx: use vmcs12_read/write() to operate VMCS fields
  nested vmx: use a list to store the launched vmcs12 for L1 VMM

 arch/x86/kvm/vmx.c |  812 ++--
 1 files changed, 467 insertions(+), 345 deletions(-)

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html