KVM's existing shadow MMU code already supports nested TDP. To use it, we
need to set up a new "MMU context" for nested EPT, and create a few callbacks
for it (nested_ept_*()). We then need to switch back and forth between this
nested context and the regular MMU context when switching between L1 and L2.

Signed-off-by: Nadav Har'El <n...@il.ibm.com>
---
 arch/x86/kvm/vmx.c |   60 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 60 insertions(+)

--- .before/arch/x86/kvm/vmx.c  2011-11-10 11:33:58.000000000 +0200
+++ .after/arch/x86/kvm/vmx.c   2011-11-10 11:33:58.000000000 +0200
@@ -6443,6 +6443,59 @@ static void vmx_set_supported_cpuid(u32 
                entry->ecx |= bit(X86_FEATURE_VMX);
 }
 
+/* Callbacks for nested_ept_init_mmu_context: */
+static unsigned long nested_ept_get_cr3(struct kvm_vcpu *vcpu)
+{
+       /* return the page table to be shadowed - in our case, EPT12 */
+       return get_vmcs12(vcpu)->ept_pointer;
+}
+
+static u64 nested_ept_get_pdptr(struct kvm_vcpu *vcpu, int index)
+{
+       /*
+        * This function is called (as mmu.get_pdptr()) in mmu.c to help read
+        * a to-be-shadowed page table in PAE (3-level) format. However, the
+        * EPT table we're now shadowing (this is the nested EPT mmu), must
+        * always have 4 levels, and is not in PAE format, so this function
+        * should never be called.
+        */
+       kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
+}
+
+static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
+       struct x86_exception *fault)
+{
+       struct vmcs12 *vmcs12;
+       nested_vmx_vmexit(vcpu);
+       vmcs12 = get_vmcs12(vcpu);
+       /*
+        * Note no need to set vmcs12->vm_exit_reason as it is already copied
+        * from vmcs02 in nested_vmx_vmexit() above, i.e., EPT_VIOLATION.
+        */
+       vmcs12->exit_qualification = fault->error_code;
+       vmcs12->guest_physical_address = fault->address;
+}
+
+static int nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
+{
+       int r = kvm_init_shadow_mmu(vcpu, &vcpu->arch.mmu);
+
+       vcpu->arch.mmu.set_cr3           = vmx_set_cr3;
+       vcpu->arch.mmu.get_cr3           = nested_ept_get_cr3;
+       vcpu->arch.mmu.get_pdptr         = nested_ept_get_pdptr;
+       vcpu->arch.mmu.inject_page_fault = nested_ept_inject_page_fault;
+       vcpu->arch.mmu.shadow_root_level = get_ept_level();
+
+       vcpu->arch.walk_mmu              = &vcpu->arch.nested_mmu;
+
+       return r;
+}
+
+static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
+{
+       vcpu->arch.walk_mmu = &vcpu->arch.mmu;
+}
+
 /*
  * prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
  * L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
@@ -6652,6 +6705,11 @@ static void prepare_vmcs02(struct kvm_vc
                vmx_flush_tlb(vcpu);
        }
 
+       if (nested_cpu_has_ept(vmcs12)) {
+               kvm_mmu_unload(vcpu);
+               nested_ept_init_mmu_context(vcpu);
+       }
+
        if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)
                vcpu->arch.efer = vmcs12->guest_ia32_efer;
        if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
@@ -6982,6 +7040,8 @@ void load_vmcs12_host_state(struct kvm_v
        vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
        kvm_set_cr4(vcpu, vmcs12->host_cr4);
 
+       if (nested_cpu_has_ept(vmcs12))
+               nested_ept_uninit_mmu_context(vcpu);
        /* shadow page tables on either EPT or shadow page tables */
        kvm_set_cr3(vcpu, vmcs12->host_cr3);
        kvm_mmu_reset_context(vcpu);
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to