Re: [PATCH 08/28] nVMX: Hold a vmcs02 for each vmcs12

2010-12-09 Thread Avi Kivity

On 12/08/2010 07:04 PM, Nadav Har'El wrote:

In this patch we add a list of L0 (hardware) VMCSs, which we'll use to hold a
hardware VMCS for each active vmcs12 (i.e., for each L2 guest).

We call each of these L0 VMCSs a "vmcs02", as it is the VMCS that L0 uses
to run its nested guest L2.

Signed-off-by: Nadav Har'El
---
  arch/x86/kvm/vmx.c |   96 +++
  1 file changed, 96 insertions(+)

--- .before/arch/x86/kvm/vmx.c  2010-12-08 18:56:49.0 +0200
+++ .after/arch/x86/kvm/vmx.c   2010-12-08 18:56:49.0 +0200
@@ -155,6 +155,12 @@ struct __packed vmcs12 {
   */
  #define VMCS12_REVISION 0x11e57ed0

+struct vmcs_list {
+   struct list_head list;
+   gpa_t vmcs12_addr;
+   struct vmcs *vmcs02;
+};
+
  /*
   * The nested_vmx structure is part of vcpu_vmx, and holds information we need
   * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example,
@@ -170,6 +176,10 @@ struct nested_vmx {
/* The host-usable pointer to the above */
struct page *current_vmcs12_page;
struct vmcs12 *current_vmcs12;
+
+   /* list of real (hardware) VMCS, one for each L2 guest of L1 */
+   struct list_head vmcs02_list; /* a vmcs_list */
+   int vmcs02_num;
  };

  struct vcpu_vmx {
@@ -1736,6 +1746,85 @@ static void free_vmcs(struct vmcs *vmcs)
free_pages((unsigned long)vmcs, vmcs_config.order);
  }

+static struct vmcs *nested_get_current_vmcs(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   struct vmcs_list *list_item, *n;
+
+   list_for_each_entry_safe(list_item, n,&vmx->nested.vmcs02_list, list)
+   if (list_item->vmcs12_addr == vmx->nested.current_vmptr)
+   return list_item->vmcs02;
+
+   return NULL;
+}
+
+/*
+ * Allocate an L0 VMCS (vmcs02) for the current L1 VMCS (vmcs12), if one
+ * does not already exist. The allocation is done in L0 memory, so to avoid
+ * denial-of-service attack by guests, we limit the number of concurrently-
+ * allocated vmcss. A well-behaving L1 will VMCLEAR unused vmcs12s and not
+ * trigger this limit.
+ */
+static const int NESTED_MAX_VMCS = 256;
+static int nested_create_current_vmcs(struct kvm_vcpu *vcpu)
+{
+   struct vmcs_list *new_l2_guest;
+   struct vmcs *vmcs02;
+
+   if (nested_get_current_vmcs(vcpu))
+   return 0; /* nothing to do - we already have a VMCS */
+
+   if (to_vmx(vcpu)->nested.vmcs02_num>= NESTED_MAX_VMCS)
+   return -ENOMEM;


I asked for this to be fixed (say by freeing one vmcs02 from the list).  
The guest can easily crash by running a lot of nested guests.


Actually you don't have to free it, simply reuse it for the new vmcs12.


+
+   new_l2_guest = (struct vmcs_list *)
+   kmalloc(sizeof(struct vmcs_list), GFP_KERNEL);
+   if (!new_l2_guest)
+   return -ENOMEM;
+
+   vmcs02 = alloc_vmcs();
+   if (!vmcs02) {
+   kfree(new_l2_guest);
+   return -ENOMEM;
+   }
+
+   new_l2_guest->vmcs12_addr = to_vmx(vcpu)->nested.current_vmptr;
+   new_l2_guest->vmcs02 = vmcs02;
+   list_add(&(new_l2_guest->list),&(to_vmx(vcpu)->nested.vmcs02_list));
+   to_vmx(vcpu)->nested.vmcs02_num++;
+   return 0;
+}
+


--
error compiling committee.c: too many arguments to function

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 08/28] nVMX: Hold a vmcs02 for each vmcs12

2010-12-08 Thread Nadav Har'El
In this patch we add a list of L0 (hardware) VMCSs, which we'll use to hold a 
hardware VMCS for each active vmcs12 (i.e., for each L2 guest).

We call each of these L0 VMCSs a "vmcs02", as it is the VMCS that L0 uses
to run its nested guest L2.

Signed-off-by: Nadav Har'El 
---
 arch/x86/kvm/vmx.c |   96 +++
 1 file changed, 96 insertions(+)

--- .before/arch/x86/kvm/vmx.c  2010-12-08 18:56:49.0 +0200
+++ .after/arch/x86/kvm/vmx.c   2010-12-08 18:56:49.0 +0200
@@ -155,6 +155,12 @@ struct __packed vmcs12 {
  */
 #define VMCS12_REVISION 0x11e57ed0
 
+struct vmcs_list {
+   struct list_head list;
+   gpa_t vmcs12_addr;
+   struct vmcs *vmcs02;
+};
+
 /*
  * The nested_vmx structure is part of vcpu_vmx, and holds information we need
  * for correct emulation of VMX (i.e., nested VMX) on this vcpu. For example,
@@ -170,6 +176,10 @@ struct nested_vmx {
/* The host-usable pointer to the above */
struct page *current_vmcs12_page;
struct vmcs12 *current_vmcs12;
+
+   /* list of real (hardware) VMCS, one for each L2 guest of L1 */
+   struct list_head vmcs02_list; /* a vmcs_list */
+   int vmcs02_num;
 };
 
 struct vcpu_vmx {
@@ -1736,6 +1746,85 @@ static void free_vmcs(struct vmcs *vmcs)
free_pages((unsigned long)vmcs, vmcs_config.order);
 }
 
+static struct vmcs *nested_get_current_vmcs(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   struct vmcs_list *list_item, *n;
+
+   list_for_each_entry_safe(list_item, n, &vmx->nested.vmcs02_list, list)
+   if (list_item->vmcs12_addr == vmx->nested.current_vmptr)
+   return list_item->vmcs02;
+
+   return NULL;
+}
+
+/*
+ * Allocate an L0 VMCS (vmcs02) for the current L1 VMCS (vmcs12), if one
+ * does not already exist. The allocation is done in L0 memory, so to avoid
+ * denial-of-service attack by guests, we limit the number of concurrently-
+ * allocated vmcss. A well-behaving L1 will VMCLEAR unused vmcs12s and not
+ * trigger this limit.
+ */
+static const int NESTED_MAX_VMCS = 256;
+static int nested_create_current_vmcs(struct kvm_vcpu *vcpu)
+{
+   struct vmcs_list *new_l2_guest;
+   struct vmcs *vmcs02;
+
+   if (nested_get_current_vmcs(vcpu))
+   return 0; /* nothing to do - we already have a VMCS */
+
+   if (to_vmx(vcpu)->nested.vmcs02_num >= NESTED_MAX_VMCS)
+   return -ENOMEM;
+
+   new_l2_guest = (struct vmcs_list *)
+   kmalloc(sizeof(struct vmcs_list), GFP_KERNEL);
+   if (!new_l2_guest)
+   return -ENOMEM;
+
+   vmcs02 = alloc_vmcs();
+   if (!vmcs02) {
+   kfree(new_l2_guest);
+   return -ENOMEM;
+   }
+
+   new_l2_guest->vmcs12_addr = to_vmx(vcpu)->nested.current_vmptr;
+   new_l2_guest->vmcs02 = vmcs02;
+   list_add(&(new_l2_guest->list), &(to_vmx(vcpu)->nested.vmcs02_list));
+   to_vmx(vcpu)->nested.vmcs02_num++;
+   return 0;
+}
+
+/* Free a vmcs12's associated vmcs02, and remove it from vmcs02_list */
+static void nested_free_vmcs(struct kvm_vcpu *vcpu, gpa_t vmptr)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   struct vmcs_list *list_item, *n;
+
+   list_for_each_entry_safe(list_item, n, &vmx->nested.vmcs02_list, list)
+   if (list_item->vmcs12_addr == vmptr) {
+   free_vmcs(list_item->vmcs02);
+   list_del(&(list_item->list));
+   kfree(list_item);
+   vmx->nested.vmcs02_num--;
+   return;
+   }
+}
+
+static void free_l1_state(struct kvm_vcpu *vcpu)
+{
+   struct vcpu_vmx *vmx = to_vmx(vcpu);
+   struct vmcs_list *list_item, *n;
+
+   list_for_each_entry_safe(list_item, n,
+   &vmx->nested.vmcs02_list, list) {
+   free_vmcs(list_item->vmcs02);
+   list_del(&(list_item->list));
+   kfree(list_item);
+   }
+   vmx->nested.vmcs02_num = 0;
+}
+
 static void free_kvm_area(void)
 {
int cpu;
@@ -3884,6 +3973,9 @@ static int handle_vmon(struct kvm_vcpu *
return 1;
}
 
+   INIT_LIST_HEAD(&(vmx->nested.vmcs02_list));
+   vmx->nested.vmcs02_num = 0;
+
vmx->nested.vmxon = true;
 
skip_emulated_instruction(vcpu);
@@ -3931,6 +4023,8 @@ static int handle_vmoff(struct kvm_vcpu 
if (to_vmx(vcpu)->nested.current_vmptr != -1ull)
nested_release_page(to_vmx(vcpu)->nested.current_vmcs12_page);
 
+   free_l1_state(vcpu);
+
skip_emulated_instruction(vcpu);
return 1;
 }
@@ -4420,6 +4514,8 @@ static void vmx_free_vcpu(struct kvm_vcp
free_vpid(vmx);
if (vmx->nested.vmxon && to_vmx(vcpu)->nested.current_vmptr != -1ull)
nested_release_page(to_vmx(vcpu)->nested.current_vmcs12_page);
+   if (vmx->nested.vmxon)
+   free_l1_