From: Marcelo Tosatti <[EMAIL PROTECTED]>

Add support for the cr3 cache feature on Intel VMX CPU's. This avoids
vmexits on context switch if the cr3 value is cached in one of the
entries (currently 4 are present).

This is especially important for Xenner, where each guest syscall
involves a cr3 switch.

v1->v2:
- handle the race which happens when the guest has the cache cleared
in the middle of kvm_write_cr3 by injecting a GP and trapping it to
fallback to hypercall variant (suggested by Avi).

v2->v3:
- one ioctl per paravirt feature

v3->v4:
- switch to mmu_op

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
 arch/x86/kernel/kvm.c |  145 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 files changed, 144 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8405984..30e3568 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,14 +26,17 @@
 #include <linux/cpu.h>
 #include <linux/mm.h>
 #include <linux/hardirq.h>
+#include <asm/tlbflush.h>
+#include <asm/asm.h>
 
 #define MMU_QUEUE_SIZE 1024
 
 struct kvm_para_state {
+       struct kvm_cr3_cache cr3_cache;
        u8 mmu_queue[MMU_QUEUE_SIZE];
        int mmu_queue_len;
        enum paravirt_lazy_mode mode;
-};
+} __attribute__ ((aligned(PAGE_SIZE)));
 
 static DEFINE_PER_CPU(struct kvm_para_state, para_state);
 
@@ -85,6 +88,121 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
        state->mmu_queue_len += len;
 }
 
+static void kvm_new_cr3(unsigned long cr3)
+{
+       struct kvm_mmu_op_set_cr3 scr3 = {
+               .header.op = KVM_MMU_OP_SET_CR3,
+               .cr3 = cr3,
+       };
+
+       kvm_mmu_op(&scr3, sizeof scr3);
+}
+
+static unsigned long __force_order;
+
+/*
+ * Special, register-to-cr3 instruction based hypercall API
+ * variant to the KVM host. This utilizes the cr3 filter capability
+ * of the hardware - if this works out then no VM exit happens,
+ * if a VM exit happens then KVM will get the virtual address too.
+ */
+static void kvm_write_cr3(unsigned long guest_cr3)
+{
+       struct kvm_para_state *para_state = &get_cpu_var(para_state);
+       struct kvm_cr3_cache *cache = &para_state->cr3_cache;
+       int idx;
+
+       /*
+        * Check the cache (maintained by the host) for a matching
+        * guest_cr3 => host_cr3 mapping. Use it if found:
+        */
+       for (idx = 0; idx < cache->max_idx; idx++) {
+               if (cache->entry[idx].guest_cr3 == guest_cr3) {
+                       unsigned long trap;
+
+                       /*
+                        * Cache-hit: we load the cached host-CR3 value.
+                        * Fallback to hypercall variant if it raced with
+                        * the host clearing the cache after guest_cr3
+                        * comparison.
+                        */
+                       __asm__ __volatile__ (
+                               "    mov %2, %0\n"
+                               "0:  mov %3, %%cr3\n"
+                               "1:\n"
+                               ".section .fixup,\"ax\"\n"
+                               "2:  mov %1, %0\n"
+                               "    jmp 1b\n"
+                               ".previous\n"
+                               _ASM_EXTABLE(0b, 2b)
+                               : "=&r" (trap)
+                               : "n" (1UL), "n" (0UL),
+                                 "b" (cache->entry[idx].host_cr3),
+                                 "m" (__force_order));
+                       if (!trap)
+                               goto out;
+                       break;
+               }
+       }
+
+       /*
+        * Cache-miss. Tell the host the new cr3 via hypercall (to avoid
+        * aliasing problems with a cached host_cr3 == guest_cr3).
+        */
+       kvm_new_cr3(guest_cr3);
+out:
+       put_cpu_var(para_state);
+}
+
+/*
+ * Avoid the VM exit upon cr3 load by using the cached
+ * ->active_mm->pgd value:
+ */
+static void kvm_flush_tlb_user(void)
+{
+       kvm_write_cr3(__pa(current->active_mm->pgd));
+}
+
+/*
+ * Disable global pages, do a flush, then enable global pages:
+ */
+static void kvm_flush_tlb_kernel(void)
+{
+       unsigned long orig_cr4 = read_cr4();
+
+       write_cr4(orig_cr4 & ~X86_CR4_PGE);
+       kvm_flush_tlb_user();
+       write_cr4(orig_cr4);
+}
+
+static void register_cr3_cache(void *cache)
+{
+       struct kvm_para_state *state;
+
+       state = &per_cpu(para_state, raw_smp_processor_id());
+       wrmsrl(KVM_MSR_SET_CR3_CACHE, __pa(&state->cr3_cache));
+}
+
+static unsigned __init kvm_patch(u8 type, u16 clobbers, void *ibuf,
+                                unsigned long addr, unsigned len)
+{
+       switch (type) {
+       case PARAVIRT_PATCH(pv_mmu_ops.write_cr3):
+               return paravirt_patch_default(type, clobbers, ibuf, addr, len);
+       default:
+               return native_patch(type, clobbers, ibuf, addr, len);
+       }
+}
+
+static void __init setup_guest_cr3_cache(void)
+{
+       on_each_cpu(register_cr3_cache, NULL, 0, 1);
+
+       pv_mmu_ops.write_cr3 = kvm_write_cr3;
+       pv_mmu_ops.flush_tlb_user = kvm_flush_tlb_user;
+       pv_mmu_ops.flush_tlb_kernel = kvm_flush_tlb_kernel;
+}
+
 static void kvm_mmu_write(void *dest, u64 val)
 {
        struct kvm_mmu_op_write_pte wpte = {
@@ -97,6 +215,28 @@ static void kvm_mmu_write(void *dest, u64 val)
 }
 
 /*
+ * CR3 cache initialization uses on_each_cpu(), so it can't
+ * happen at kvm_guest_init time.
+ */
+int __init kvm_cr3_cache_init(void)
+{
+       unsigned long flags;
+
+       if (!kvm_para_available())
+               return -ENOSYS;
+
+       if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE)) {
+               setup_guest_cr3_cache();
+               local_irq_save(flags);
+               apply_paravirt(__parainstructions, __parainstructions_end);
+               local_irq_restore(flags);
+       }
+
+       return 0;
+}
+module_init(kvm_cr3_cache_init);
+
+/*
  * We only need to hook operations that are MMU writes.  We hook these so that
  * we can use lazy MMU mode to batch these operations.  We could probably
  * improve the performance of the host code if we used some of the information
@@ -219,6 +359,9 @@ static void paravirt_ops_setup(void)
                pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
                pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
        }
+
+       if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE))
+               pv_init_ops.patch = kvm_patch;
 }
 
 void __init kvm_guest_init(void)
-- 
1.5.4.2


-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
kvm-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/kvm-devel

Reply via email to