[kvm-devel] [patch 2/5] KVM: hypercall based pte updates and TLB flushes

Marcelo Tosatti Sat, 16 Feb 2008 14:20:22 -0800

Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.


Don't report the feature if two dimensional paging is enabled.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Cc: Anthony Liguori <[EMAIL PROTECTED]>


Index: kvm.paravirt/arch/x86/kernel/kvm.c
===================================================================
--- kvm.paravirt.orig/arch/x86/kernel/kvm.c
+++ kvm.paravirt/arch/x86/kernel/kvm.c
@@ -33,6 +33,91 @@ static void kvm_io_delay(void)
 {
 }
 
+static void kvm_mmu_write(void *dest, const void *src, size_t size)
+{
+       const uint8_t *p = src;
+       unsigned long a0 = *(unsigned long *)p;
+       unsigned long a1 = 0;
+
+       size >>= 2;
+#ifdef CONFIG_X86_32
+       if (size == 2)
+               a1 = *(u32 *)&p[4];
+#endif
+       kvm_hypercall4(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
+                      a1);
+}
+
+/*
+ * We only need to hook operations that are MMU writes.  We hook these so that
+ * we can use lazy MMU mode to batch these operations.  We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+                          pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+       kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+                         unsigned long addr, pte_t *ptep)
+{
+       pte_t pte = __pte(0);
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+       pmd_t pmd = __pmd(0);
+       kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+#endif
+
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+       kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+       kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+       kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT);
+}
+
 static void paravirt_ops_setup(void)
 {
        pv_info.name = "KVM";
@@ -41,6 +126,24 @@ static void paravirt_ops_setup(void)
        if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
                pv_cpu_ops.io_delay = kvm_io_delay;
 
+       if (kvm_para_has_feature(KVM_FEATURE_MMU_WRITE)) {
+               pv_mmu_ops.set_pte = kvm_set_pte;
+               pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+               pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+               pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+               pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+               pv_mmu_ops.pte_clear = kvm_pte_clear;
+               pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+               pv_mmu_ops.set_pud = kvm_set_pud;
+               pv_mmu_ops.set_pgd = kvm_set_pgd;
+#endif
+               pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+               pv_mmu_ops.release_pt = kvm_release_pt;
+               pv_mmu_ops.release_pd = kvm_release_pt;
+       }
 }
 
 void __init kvm_guest_init(void)
Index: kvm.paravirt/arch/x86/kvm/mmu.c
===================================================================
--- kvm.paravirt.orig/arch/x86/kvm/mmu.c
+++ kvm.paravirt/arch/x86/kvm/mmu.c
@@ -39,7 +39,7 @@
  * 2. while doing 1. it walks guest-physical to host-physical
  * If the hardware supports that we don't need to do shadow paging.
  */
-static bool tdp_enabled = false;
+bool tdp_enabled = false;
 
 #undef MMU_DEBUG
 
@@ -288,7 +288,7 @@ static void mmu_free_memory_cache_page(s
                free_page((unsigned long)mc->objects[--mc->nobjs]);
 }
 
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 {
        int r;
 
@@ -857,7 +857,7 @@ static int kvm_mmu_unprotect_page(struct
        return r;
 }
 
-static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
        struct kvm_mmu_page *sp;
 
Index: kvm.paravirt/arch/x86/kvm/mmu.h
===================================================================
--- kvm.paravirt.orig/arch/x86/kvm/mmu.h
+++ kvm.paravirt/arch/x86/kvm/mmu.h
@@ -47,4 +47,7 @@ static inline int is_paging(struct kvm_v
        return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn);
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu);
+
 #endif
Index: kvm.paravirt/arch/x86/kvm/x86.c
===================================================================
--- kvm.paravirt.orig/arch/x86/kvm/x86.c
+++ kvm.paravirt/arch/x86/kvm/x86.c
@@ -764,6 +764,8 @@ long kvm_arch_dev_ioctl(struct file *fil
        }
        case KVM_GET_PARA_FEATURES: {
                __u32 para_features = KVM_PARA_FEATURES;
+               if (tdp_enabled)
+                       para_features &= ~(1UL << KVM_FEATURE_MMU_WRITE);
 
                r = -EFAULT;
                if (copy_to_user(argp, &para_features, sizeof para_features))
@@ -2269,6 +2271,52 @@ int kvm_emulate_halt(struct kvm_vcpu *vc
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+static int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gva_t addr,
+                                  unsigned long size, unsigned long a0,
+                                  unsigned long a1)
+{
+       gpa_t gpa;
+       u64 value;
+
+       if (mmu_topup_memory_caches(vcpu))
+               return -KVM_EFAULT;
+
+       down_read(&vcpu->kvm->slots_lock);
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+       up_read(&vcpu->kvm->slots_lock);
+
+       if (gpa == UNMAPPED_GVA)
+               return -KVM_EFAULT;
+       if (size == 1) {
+               if (!emulator_write_phys(vcpu, gpa, &a0, sizeof(a0)))
+                       return -KVM_EFAULT;
+       } else if (size == 2) {
+               if (!is_long_mode(vcpu) && is_pae(vcpu))
+                       value = (u64)a1 << 32 | a0;
+               else
+                       value = a0;
+               if (!emulator_write_phys(vcpu, gpa, &value, sizeof(value)))
+                       return -KVM_EFAULT;
+       } else
+               return -KVM_E2BIG;
+
+       return 0;
+}
+
+static int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->tlb_flush(vcpu);
+       return 0;
+}
+
+static int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+       spin_lock(&vcpu->kvm->mmu_lock);
+       mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       return 0;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
@@ -2293,6 +2341,15 @@ int kvm_emulate_hypercall(struct kvm_vcp
        case KVM_HC_VAPIC_POLL_IRQ:
                ret = 0;
                break;
+       case KVM_HYPERCALL_MMU_WRITE:
+               ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
+               break;
+       case KVM_HYPERCALL_FLUSH_TLB:
+               ret = kvm_hypercall_flush_tlb(vcpu);
+               break;
+       case KVM_HYPERCALL_RELEASE_PT:
+               ret = kvm_hypercall_release_pt(vcpu, a0);
+               break;
        default:
                ret = -KVM_ENOSYS;
                break;
Index: kvm.paravirt/include/asm-x86/kvm_para.h
===================================================================
--- kvm.paravirt.orig/include/asm-x86/kvm_para.h
+++ kvm.paravirt/include/asm-x86/kvm_para.h
@@ -6,6 +6,7 @@
  */
 #define KVM_CPUID_SIGNATURE    0x40000000
 #define KVM_FEATURE_NOP_IO_DELAY       0
+#define KVM_FEATURE_MMU_WRITE          1
 
 /* This CPUID returns a feature bitmap in eax.  Before enabling a particular
  * paravirtualization, the appropriate feature bit should be checked.
@@ -15,7 +16,8 @@
 #ifdef __KERNEL__
 #include <asm/processor.h>
 
-#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY)
+#define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY)   |       \
+                          (1UL << KVM_FEATURE_MMU_WRITE))
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
Index: kvm.paravirt/include/linux/kvm_para.h
===================================================================
--- kvm.paravirt.orig/include/linux/kvm_para.h
+++ kvm.paravirt/include/linux/kvm_para.h
@@ -11,8 +11,13 @@
 
 /* Return values for hypercalls */
 #define KVM_ENOSYS             1000
+#define KVM_EFAULT             EFAULT
+#define KVM_E2BIG              E2BIG
 
-#define KVM_HC_VAPIC_POLL_IRQ            1
+#define KVM_HC_VAPIC_POLL_IRQ          1
+#define KVM_HYPERCALL_MMU_WRITE                2
+#define KVM_HYPERCALL_FLUSH_TLB                3
+#define KVM_HYPERCALL_RELEASE_PT       4
 
 /*
  * hypercalls use architecture specific
Index: kvm.paravirt/include/asm-x86/kvm_host.h
===================================================================
--- kvm.paravirt.orig/include/asm-x86/kvm_host.h
+++ kvm.paravirt/include/asm-x86/kvm_host.h
@@ -412,6 +412,8 @@ void kvm_mmu_change_mmu_pages(struct kvm
 
 int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3);
 
+extern bool tdp_enabled;
+
 enum emulation_result {
        EMULATE_DONE,       /* no further processing */
        EMULATE_DO_MMIO,      /* kvm_run filled with mmio request */

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel

[kvm-devel] [patch 2/5] KVM: hypercall based pte updates and TLB flushes

Reply via email to