Re: [PATCH v3 10/13] nEPT: Nested INVEPT

2013-05-21 Thread Xiao Guangrong
On 05/19/2013 12:52 PM, Jun Nakajima wrote:
 From: Nadav Har'El n...@il.ibm.com
 
 If we let L1 use EPT, we should probably also support the INVEPT instruction.
 
 In our current nested EPT implementation, when L1 changes its EPT table for
 L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in the course

Hmm?

L0 can not always intercept L1's changes due to unsync shadow pages...

 of this modification already calls INVEPT. Therefore, when L1 calls INVEPT,
 we don't really need to do anything. In particular we *don't* need to call

So, i can not understand why we need not handle INVEPT.

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 10/13] nEPT: Nested INVEPT

2013-05-20 Thread Paolo Bonzini
Il 19/05/2013 06:52, Jun Nakajima ha scritto:
 + switch (type) {
 + case VMX_EPT_EXTENT_GLOBAL:
 + if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_GLOBAL_BIT))
 + nested_vmx_failValid(vcpu,
 + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 + else {
 + /*
 +  * Do nothing: when L1 changes EPT12, we already
 +  * update EPT02 (the shadow EPT table) and call INVEPT.
 +  * So when L1 calls INVEPT, there's nothing left to do.
 +  */
 + nested_vmx_succeed(vcpu);
 + }
 + break;

Duplicate code:

switch (type) {
case VMX_EPT_EXTENT_GLOBAL
ok = (nested_vmx_ept_caps  VMX_EPT_EXTENT_GLOBAL_BIT) != 0;
break;
...
default:
ok = false;
break;
}
if (ok) {
/* Do nothing: ... */
nested_vmx_succeed(vcpu);
} else {
nested_vmx_failValid(vcpu, ...);
}
break;

Paolo

 + case VMX_EPT_EXTENT_CONTEXT:
 + if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_CONTEXT_BIT))
 + nested_vmx_failValid(vcpu,
 + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 + else {
 + /* Do nothing */
 + nested_vmx_succeed(vcpu);
 + }
 + break;
 + case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
 + if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_INDIVIDUAL_BIT))
 + nested_vmx_failValid(vcpu,
 + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
 + else {
 + /* Do nothing */
 + nested_vmx_succeed(vcpu);
 + }
 + break;
 + default:
 + nested_vmx_failValid(vcpu,
 + VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);

--
To unsubscribe from this list: send the line unsubscribe kvm in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 10/13] nEPT: Nested INVEPT

2013-05-18 Thread Jun Nakajima
From: Nadav Har'El n...@il.ibm.com

If we let L1 use EPT, we should probably also support the INVEPT instruction.

In our current nested EPT implementation, when L1 changes its EPT table for
L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in the course
of this modification already calls INVEPT. Therefore, when L1 calls INVEPT,
we don't really need to do anything. In particular we *don't* need to call
the real INVEPT again. All we do in our INVEPT is verify the validity of the
call, and its parameters, and then do nothing.

In KVM Forum 2010, Dong et al. presented Nested Virtualization Friendly KVM
and classified our current nested EPT implementation as shadow-like virtual
EPT. He recommended instead a different approach, which he called VTLB-like
virtual EPT. If we had taken that alternative approach, INVEPT would have had
a bigger role: L0 would only rebuild the shadow EPT table when L1 calls INVEPT.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com
Signed-off-by: Xinhao Xu xinhao...@intel.com
---
 arch/x86/include/uapi/asm/vmx.h |  1 +
 arch/x86/kvm/vmx.c  | 83 +
 2 files changed, 84 insertions(+)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d651082..7a34e8f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,7 @@
 #define EXIT_REASON_EOI_INDUCED 45
 #define EXIT_REASON_EPT_VIOLATION   48
 #define EXIT_REASON_EPT_MISCONFIG   49
+#define EXIT_REASON_INVEPT  50
 #define EXIT_REASON_PREEMPTION_TIMER52
 #define EXIT_REASON_WBINVD  54
 #define EXIT_REASON_XSETBV  55
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 1cf8a41..d9d991d 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6251,6 +6251,87 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return 1;
 }
 
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+   u32 vmx_instruction_info;
+   unsigned long type;
+   gva_t gva;
+   struct x86_exception e;
+   struct {
+   u64 eptp, gpa;
+   } operand;
+
+   if (!(nested_vmx_secondary_ctls_high  SECONDARY_EXEC_ENABLE_EPT) ||
+   !(nested_vmx_ept_caps  VMX_EPT_INVEPT_BIT)) {
+   kvm_queue_exception(vcpu, UD_VECTOR);
+   return 1;
+   }
+
+   if (!nested_vmx_check_permission(vcpu))
+   return 1;
+
+   if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+   kvm_queue_exception(vcpu, UD_VECTOR);
+   return 1;
+   }
+
+   /* According to the Intel VMX instruction reference, the memory
+* operand is read even if it isn't needed (e.g., for type==global)
+*/
+   vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+   if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+   vmx_instruction_info, gva))
+   return 1;
+   if (kvm_read_guest_virt(vcpu-arch.emulate_ctxt, gva, operand,
+   sizeof(operand), e)) {
+   kvm_inject_page_fault(vcpu, e);
+   return 1;
+   }
+
+   type = kvm_register_read(vcpu, (vmx_instruction_info  28)  0xf);
+
+   switch (type) {
+   case VMX_EPT_EXTENT_GLOBAL:
+   if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_GLOBAL_BIT))
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   else {
+   /*
+* Do nothing: when L1 changes EPT12, we already
+* update EPT02 (the shadow EPT table) and call INVEPT.
+* So when L1 calls INVEPT, there's nothing left to do.
+*/
+   nested_vmx_succeed(vcpu);
+   }
+   break;
+   case VMX_EPT_EXTENT_CONTEXT:
+   if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_CONTEXT_BIT))
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   else {
+   /* Do nothing */
+   nested_vmx_succeed(vcpu);
+   }
+   break;
+   case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
+   if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_INDIVIDUAL_BIT))
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   else {
+   /* Do nothing */
+   nested_vmx_succeed(vcpu);
+   }
+   break;
+   default:
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   }
+
+   skip_emulated_instruction(vcpu);
+   return 1;

[PATCH v3 10/13] nEPT: Nested INVEPT

2013-05-08 Thread Jun Nakajima
If we let L1 use EPT, we should probably also support the INVEPT instruction.

In our current nested EPT implementation, when L1 changes its EPT table for
L2 (i.e., EPT12), L0 modifies the shadow EPT table (EPT02), and in the course
of this modification already calls INVEPT. Therefore, when L1 calls INVEPT,
we don't really need to do anything. In particular we *don't* need to call
the real INVEPT again. All we do in our INVEPT is verify the validity of the
call, and its parameters, and then do nothing.

In KVM Forum 2010, Dong et al. presented Nested Virtualization Friendly KVM
and classified our current nested EPT implementation as shadow-like virtual
EPT. He recommended instead a different approach, which he called VTLB-like
virtual EPT. If we had taken that alternative approach, INVEPT would have had
a bigger role: L0 would only rebuild the shadow EPT table when L1 calls INVEPT.

Signed-off-by: Nadav Har'El n...@il.ibm.com
Signed-off-by: Jun Nakajima jun.nakaj...@intel.com
Signed-off-by: Xinhao Xu xinhao...@intel.com
---
 arch/x86/include/uapi/asm/vmx.h |  1 +
 arch/x86/kvm/vmx.c  | 83 +
 2 files changed, 84 insertions(+)

diff --git a/arch/x86/include/uapi/asm/vmx.h b/arch/x86/include/uapi/asm/vmx.h
index d651082..7a34e8f 100644
--- a/arch/x86/include/uapi/asm/vmx.h
+++ b/arch/x86/include/uapi/asm/vmx.h
@@ -65,6 +65,7 @@
 #define EXIT_REASON_EOI_INDUCED 45
 #define EXIT_REASON_EPT_VIOLATION   48
 #define EXIT_REASON_EPT_MISCONFIG   49
+#define EXIT_REASON_INVEPT  50
 #define EXIT_REASON_PREEMPTION_TIMER52
 #define EXIT_REASON_WBINVD  54
 #define EXIT_REASON_XSETBV  55
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index 136fc25..9ceab42 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -6245,6 +6245,87 @@ static int handle_vmptrst(struct kvm_vcpu *vcpu)
return 1;
 }
 
+/* Emulate the INVEPT instruction */
+static int handle_invept(struct kvm_vcpu *vcpu)
+{
+   u32 vmx_instruction_info;
+   unsigned long type;
+   gva_t gva;
+   struct x86_exception e;
+   struct {
+   u64 eptp, gpa;
+   } operand;
+
+   if (!(nested_vmx_secondary_ctls_high  SECONDARY_EXEC_ENABLE_EPT) ||
+   !(nested_vmx_ept_caps  VMX_EPT_INVEPT_BIT)) {
+   kvm_queue_exception(vcpu, UD_VECTOR);
+   return 1;
+   }
+
+   if (!nested_vmx_check_permission(vcpu))
+   return 1;
+
+   if (!kvm_read_cr0_bits(vcpu, X86_CR0_PE)) {
+   kvm_queue_exception(vcpu, UD_VECTOR);
+   return 1;
+   }
+
+   /* According to the Intel VMX instruction reference, the memory
+* operand is read even if it isn't needed (e.g., for type==global)
+*/
+   vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
+   if (get_vmx_mem_address(vcpu, vmcs_readl(EXIT_QUALIFICATION),
+   vmx_instruction_info, gva))
+   return 1;
+   if (kvm_read_guest_virt(vcpu-arch.emulate_ctxt, gva, operand,
+   sizeof(operand), e)) {
+   kvm_inject_page_fault(vcpu, e);
+   return 1;
+   }
+
+   type = kvm_register_read(vcpu, (vmx_instruction_info  28)  0xf);
+
+   switch (type) {
+   case VMX_EPT_EXTENT_GLOBAL:
+   if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_GLOBAL_BIT))
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   else {
+   /*
+* Do nothing: when L1 changes EPT12, we already
+* update EPT02 (the shadow EPT table) and call INVEPT.
+* So when L1 calls INVEPT, there's nothing left to do.
+*/
+   nested_vmx_succeed(vcpu);
+   }
+   break;
+   case VMX_EPT_EXTENT_CONTEXT:
+   if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_CONTEXT_BIT))
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   else {
+   /* Do nothing */
+   nested_vmx_succeed(vcpu);
+   }
+   break;
+   case VMX_EPT_EXTENT_INDIVIDUAL_ADDR:
+   if (!(nested_vmx_ept_caps  VMX_EPT_EXTENT_INDIVIDUAL_BIT))
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   else {
+   /* Do nothing */
+   nested_vmx_succeed(vcpu);
+   }
+   break;
+   default:
+   nested_vmx_failValid(vcpu,
+   VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
+   }
+
+   skip_emulated_instruction(vcpu);
+   return 1;
+}
+
 /*
  * The exit handlers return