[kvm-devel] [patch 2/4] KVM: hypercall based pte updates and TLB flushes

Marcelo Tosatti Wed, 30 Jan 2008 14:23:00 -0800

Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.


Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kernel/kvm.c
+++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c
@@ -33,6 +33,104 @@ static void kvm_io_delay(void)
 {
 }
 
+static void kvm_mmu_write(void *dest, const void *src, size_t size)
+{
+       const uint8_t *p = src;
+       unsigned long a0 = *(unsigned long *)p;
+       unsigned long a1 = 0;
+
+       size >>= 2;
+#ifdef CONFIG_X86_32
+       if (size == 2)
+               a1 = *(u32 *)&p[4];
+#endif
+       kvm_hypercall4(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0,
+                      a1);
+}
+
+/*
+ * We only need to hook operations that are MMU writes.  We hook these so that
+ * we can use lazy MMU mode to batch these operations.  We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+                          pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+       kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+                         unsigned long addr, pte_t *ptep)
+{
+       pte_t pte = __pte(0);
+       kvm_mmu_write(ptep, &pte, sizeof(pte));
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+       pmd_t pmd = __pmd(0);
+       kvm_mmu_write(pmdp, &pmd, sizeof(pmd));
+}
+#endif
+
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_pgd_clear(pgd_t *pgdp)
+{
+       pgd_t pgd = __pgd(0);
+       kvm_mmu_write(pgdp, &pgd, sizeof(pgd));
+}
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+       kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#if PAGETABLE_LEVELS == 4
+static void kvm_pud_clear(pud_t *pudp)
+{
+       pud_t pud = __pud(0);
+       kvm_mmu_write(pudp, &pud, sizeof(pud));
+}
+#endif
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+       kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+       kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT);
+}
+
 static void paravirt_ops_setup(void)
 {
        pv_info.name = "KVM";
@@ -41,6 +139,28 @@ static void paravirt_ops_setup(void)
        if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
                pv_cpu_ops.io_delay = kvm_io_delay;
 
+       if (kvm_para_has_feature(KVM_FEATURE_MMU_WRITE)) {
+               pv_mmu_ops.set_pte = kvm_set_pte;
+               pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+               pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+               pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+               pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+               pv_mmu_ops.pte_clear = kvm_pte_clear;
+               pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+               pv_mmu_ops.set_pud = kvm_set_pud;
+               pv_mmu_ops.set_pgd = kvm_set_pgd;
+               pv_mmu_ops.pgd_clear = kvm_pgd_clear;
+#if PAGETABLE_LEVELS == 4
+               pv_mmu_ops.pud_clear = kvm_pud_clear;
+#endif
+#endif
+               pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+               pv_mmu_ops.release_pt = kvm_release_pt;
+               pv_mmu_ops.release_pd = kvm_release_pt;
+       }
 }
 
 void __init kvm_guest_init(void)
Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.c
@@ -287,7 +287,7 @@ static void mmu_free_memory_cache_page(s
                free_page((unsigned long)mc->objects[--mc->nobjs]);
 }
 
-static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu)
 {
        int r;
 
@@ -856,7 +856,7 @@ static int kvm_mmu_unprotect_page(struct
        return r;
 }
 
-static void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn)
 {
        struct kvm_mmu_page *sp;
 
Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.h
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.h
+++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.h
@@ -41,4 +41,7 @@ static inline int is_paging(struct kvm_v
        return vcpu->arch.cr0 & X86_CR0_PG;
 }
 
+void mmu_unshadow(struct kvm *kvm, gfn_t gfn);
+int mmu_topup_memory_caches(struct kvm_vcpu *vcpu);
+
 #endif
Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c
===================================================================
--- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c
+++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c
@@ -2250,6 +2250,52 @@ int kvm_emulate_halt(struct kvm_vcpu *vc
 }
 EXPORT_SYMBOL_GPL(kvm_emulate_halt);
 
+static int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gva_t addr,
+                                  unsigned long size, unsigned long a0,
+                                  unsigned long a1)
+{
+       gpa_t gpa;
+       u64 value;
+
+       if (mmu_topup_memory_caches(vcpu))
+               return -KVM_EFAULT;
+
+       down_read(&current->mm->mmap_sem);
+       gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr);
+       up_read(&current->mm->mmap_sem);
+
+       if (gpa == UNMAPPED_GVA)
+               return -KVM_EFAULT;
+       if (size == 1) {
+               if (!emulator_write_phys(vcpu, gpa, &a0, sizeof(a0)))
+                       return -KVM_EFAULT;
+       } else if (size == 2) {
+               if (!is_long_mode(vcpu) && is_pae(vcpu))
+                       value = (u64)a1 << 32 | a0;
+               else
+                       value = a0;
+               if (!emulator_write_phys(vcpu, gpa, &value, sizeof(value)))
+                       return -KVM_EFAULT;
+       } else
+               return -KVM_E2BIG;
+
+       return 0;
+}
+
+static int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu)
+{
+       kvm_x86_ops->tlb_flush(vcpu);
+       return 0;
+}
+
+static int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr)
+{
+       spin_lock(&vcpu->kvm->mmu_lock);
+       mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT);
+       spin_unlock(&vcpu->kvm->mmu_lock);
+       return 0;
+}
+
 int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
 {
        unsigned long nr, a0, a1, a2, a3, ret;
@@ -2274,6 +2320,15 @@ int kvm_emulate_hypercall(struct kvm_vcp
        case KVM_HC_VAPIC_POLL_IRQ:
                ret = 0;
                break;
+       case KVM_HYPERCALL_MMU_WRITE:
+               ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3);
+               break;
+       case KVM_HYPERCALL_FLUSH_TLB:
+               ret = kvm_hypercall_flush_tlb(vcpu);
+               break;
+       case KVM_HYPERCALL_RELEASE_PT:
+               ret = kvm_hypercall_release_pt(vcpu, a0);
+               break;
        default:
                ret = -KVM_ENOSYS;
                break;
Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h
+++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h
@@ -6,6 +6,7 @@
  */
 #define KVM_CPUID_SIGNATURE    0x40000000
 #define KVM_FEATURE_NOP_IO_DELAY       0
+#define KVM_FEATURE_MMU_WRITE          1
 
 /* This CPUID returns a feature bitmap in eax.  Before enabling a particular
  * paravirtualization, the appropriate feature bit should be checked.
@@ -15,7 +16,8 @@
 #ifdef __KERNEL__
 #include <asm/processor.h>
 
-#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY)
+#define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY)   |       \
+                          (1UL << KVM_FEATURE_MMU_WRITE))
 
 /* This instruction is vmcall.  On non-VT architectures, it will generate a
  * trap that we will then rewrite to the appropriate instruction.
Index: linux-2.6-x86-kvm/include/linux/kvm_para.h
===================================================================
--- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h
+++ linux-2.6-x86-kvm/include/linux/kvm_para.h
@@ -11,8 +11,13 @@
 
 /* Return values for hypercalls */
 #define KVM_ENOSYS             1000
+#define KVM_EFAULT             EFAULT
+#define KVM_E2BIG              E2BIG
 
-#define KVM_HC_VAPIC_POLL_IRQ            1
+#define KVM_HC_VAPIC_POLL_IRQ          1
+#define KVM_HYPERCALL_MMU_WRITE                2
+#define KVM_HYPERCALL_FLUSH_TLB                3
+#define KVM_HYPERCALL_RELEASE_PT       4
 
 /*
  * hypercalls use architecture specific

-- 


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

[kvm-devel] [patch 2/4] KVM: hypercall based pte updates and TLB flushes

Reply via email to