From: Marcelo Tosatti <[EMAIL PROTECTED]>

Hypercall based pte updates are faster than faults, and also allow use
of the lazy MMU mode to batch operations.

Don't report the feature if two dimensional paging is enabled.

v1->v2:
- guest passes physical destination addr, which is cheaper than doing v->p
translation in the host.
- infer size of pte from guest mode

v2->v3:
- switch to one ioctl per paravirt feature
- move hypercall handling to mmu.c

v3->v4:
- guest/host split
- fix 32-bit truncation issues
- adjust to mmu_op

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kernel/kvm.c |  120 +++++++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/x86.c    |    2 +-
 include/linux/kvm.h   |    2 +-
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index a8e36da..e28d818 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -33,6 +33,108 @@ static void kvm_io_delay(void)
 {
 }
 
+static void kvm_mmu_op(void *buffer, unsigned len)
+{
+       int r;
+       unsigned long a1, a2;
+
+       do {
+               a1 = __pa(buffer);
+               a2 = 0;   /* on i386 __pa() always returns <4G */
+               r = kvm_hypercall3(KVM_HC_MMU_OP, len, a1, a2);
+               buffer += r;
+               len -= r;
+       } while (len);
+}
+
+static void kvm_mmu_write(void *dest, u64 val)
+{
+       struct kvm_mmu_op_write_pte wpte = {
+               .header.op = KVM_MMU_OP_WRITE_PTE,
+               .pte_phys = (unsigned long)__pa(dest),
+               .pte_val = val,
+       };
+
+       kvm_mmu_op(&wpte, sizeof wpte);
+}
+
+/*
+ * We only need to hook operations that are MMU writes.  We hook these so that
+ * we can use lazy MMU mode to batch these operations.  We could probably
+ * improve the performance of the host code if we used some of the information
+ * here to simplify processing of batched writes.
+ */
+static void kvm_set_pte(pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr,
+                          pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd)
+{
+       kvm_mmu_write(pmdp, pmd_val(pmd));
+}
+
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr,
+                               pte_t *ptep, pte_t pte)
+{
+       kvm_mmu_write(ptep, pte_val(pte));
+}
+
+static void kvm_pte_clear(struct mm_struct *mm,
+                         unsigned long addr, pte_t *ptep)
+{
+       kvm_mmu_write(ptep, 0);
+}
+
+static void kvm_pmd_clear(pmd_t *pmdp)
+{
+       kvm_mmu_write(pmdp, 0);
+}
+#endif
+
+static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd)
+{
+       kvm_mmu_write(pgdp, pgd_val(pgd));
+}
+
+static void kvm_set_pud(pud_t *pudp, pud_t pud)
+{
+       kvm_mmu_write(pudp, pud_val(pud));
+}
+#endif /* PAGETABLE_LEVELS >= 3 */
+
+static void kvm_flush_tlb(void)
+{
+       struct kvm_mmu_op_flush_tlb ftlb = {
+               .header.op = KVM_MMU_OP_FLUSH_TLB,
+       };
+
+       kvm_mmu_op(&ftlb, sizeof ftlb);
+}
+
+static void kvm_release_pt(u32 pfn)
+{
+       struct kvm_mmu_op_release_pt rpt = {
+               .header.op = KVM_MMU_OP_RELEASE_PT,
+               .pt_phys = (u64)pfn << PAGE_SHIFT,
+       };
+
+       kvm_mmu_op(&rpt, sizeof rpt);
+}
+
 static void paravirt_ops_setup(void)
 {
        pv_info.name = "KVM";
@@ -41,6 +143,24 @@ static void paravirt_ops_setup(void)
        if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY))
                pv_cpu_ops.io_delay = kvm_io_delay;
 
+       if (kvm_para_has_feature(KVM_FEATURE_MMU_OP)) {
+               pv_mmu_ops.set_pte = kvm_set_pte;
+               pv_mmu_ops.set_pte_at = kvm_set_pte_at;
+               pv_mmu_ops.set_pmd = kvm_set_pmd;
+#if PAGETABLE_LEVELS >= 3
+#ifdef CONFIG_X86_PAE
+               pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic;
+               pv_mmu_ops.set_pte_present = kvm_set_pte_present;
+               pv_mmu_ops.pte_clear = kvm_pte_clear;
+               pv_mmu_ops.pmd_clear = kvm_pmd_clear;
+#endif
+               pv_mmu_ops.set_pud = kvm_set_pud;
+               pv_mmu_ops.set_pgd = kvm_set_pgd;
+#endif
+               pv_mmu_ops.flush_tlb_user = kvm_flush_tlb;
+               pv_mmu_ops.release_pt = kvm_release_pt;
+               pv_mmu_ops.release_pd = kvm_release_pt;
+       }
 }
 
 void __init kvm_guest_init(void)
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 29f4f5d..92a51d3 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -817,7 +817,7 @@ int kvm_dev_ioctl_check_extension(long ext)
        case KVM_CAP_NR_MEMSLOTS:
                r = KVM_MEMORY_SLOTS;
                break;
-       case KVM_CAP_MMU_WRITE:
+       case KVM_CAP_PV_MMU:
                r = !tdp_enabled;
                break;
        default:
diff --git a/include/linux/kvm.h b/include/linux/kvm.h
index c4b1c44..074a107 100644
--- a/include/linux/kvm.h
+++ b/include/linux/kvm.h
@@ -237,7 +237,7 @@ struct kvm_vapic_addr {
 #define KVM_CAP_NR_VCPUS 9       /* returns max vcpus per vm */
 #define KVM_CAP_NR_MEMSLOTS 10   /* returns max memory slots per vm */
 #define KVM_CAP_NOP_IO_DELAY 11
-#define KVM_CAP_MMU_WRITE 12
+#define KVM_CAP_PV_MMU 12
 
 /*
  * ioctls for VM fds
-- 
1.5.4.2


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to