From: Marcelo Tosatti <[EMAIL PROTECTED]>
Add support for the cr3 cache feature on Intel VMX CPU's. This avoids
vmexits on context switch if the cr3 value is cached in one of the
entries (currently 4 are present).
This is especially important for Xenner, where each guest syscall
involves a cr3 switch.
v1->v2:
- handle the race which happens when the guest has the cache cleared
in the middle of kvm_write_cr3 by injecting a GP and trapping it to
fallback to hypercall variant (suggested by Avi).
v2->v3:
- one ioctl per paravirt feature
v3->v4:
- switch to mmu_op
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>
Signed-off-by: Avi Kivity <[EMAIL PROTECTED]>
---
arch/x86/kernel/kvm.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++-
1 files changed, 144 insertions(+), 1 deletions(-)
diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c
index 8405984..30e3568 100644
--- a/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@ -26,14 +26,17 @@
#include <linux/cpu.h>
#include <linux/mm.h>
#include <linux/hardirq.h>
+#include <asm/tlbflush.h>
+#include <asm/asm.h>
#define MMU_QUEUE_SIZE 1024
struct kvm_para_state {
+ struct kvm_cr3_cache cr3_cache;
u8 mmu_queue[MMU_QUEUE_SIZE];
int mmu_queue_len;
enum paravirt_lazy_mode mode;
-};
+} __attribute__ ((aligned(PAGE_SIZE)));
static DEFINE_PER_CPU(struct kvm_para_state, para_state);
@@ -85,6 +88,121 @@ static void kvm_deferred_mmu_op(void *buffer, int len)
state->mmu_queue_len += len;
}
+static void kvm_new_cr3(unsigned long cr3)
+{
+ struct kvm_mmu_op_set_cr3 scr3 = {
+ .header.op = KVM_MMU_OP_SET_CR3,
+ .cr3 = cr3,
+ };
+
+ kvm_mmu_op(&scr3, sizeof scr3);
+}
+
+static unsigned long __force_order;
+
+/*
+ * Special, register-to-cr3 instruction based hypercall API
+ * variant to the KVM host. This utilizes the cr3 filter capability
+ * of the hardware - if this works out then no VM exit happens,
+ * if a VM exit happens then KVM will get the virtual address too.
+ */
+static void kvm_write_cr3(unsigned long guest_cr3)
+{
+ struct kvm_para_state *para_state = &get_cpu_var(para_state);
+ struct kvm_cr3_cache *cache = ¶_state->cr3_cache;
+ int idx;
+
+ /*
+ * Check the cache (maintained by the host) for a matching
+ * guest_cr3 => host_cr3 mapping. Use it if found:
+ */
+ for (idx = 0; idx < cache->max_idx; idx++) {
+ if (cache->entry[idx].guest_cr3 == guest_cr3) {
+ unsigned long trap;
+
+ /*
+ * Cache-hit: we load the cached host-CR3 value.
+ * Fallback to hypercall variant if it raced with
+ * the host clearing the cache after guest_cr3
+ * comparison.
+ */
+ __asm__ __volatile__ (
+ " mov %2, %0\n"
+ "0: mov %3, %%cr3\n"
+ "1:\n"
+ ".section .fixup,\"ax\"\n"
+ "2: mov %1, %0\n"
+ " jmp 1b\n"
+ ".previous\n"
+ _ASM_EXTABLE(0b, 2b)
+ : "=&r" (trap)
+ : "n" (1UL), "n" (0UL),
+ "b" (cache->entry[idx].host_cr3),
+ "m" (__force_order));
+ if (!trap)
+ goto out;
+ break;
+ }
+ }
+
+ /*
+ * Cache-miss. Tell the host the new cr3 via hypercall (to avoid
+ * aliasing problems with a cached host_cr3 == guest_cr3).
+ */
+ kvm_new_cr3(guest_cr3);
+out:
+ put_cpu_var(para_state);
+}
+
+/*
+ * Avoid the VM exit upon cr3 load by using the cached
+ * ->active_mm->pgd value:
+ */
+static void kvm_flush_tlb_user(void)
+{
+ kvm_write_cr3(__pa(current->active_mm->pgd));
+}
+
+/*
+ * Disable global pages, do a flush, then enable global pages:
+ */
+static void kvm_flush_tlb_kernel(void)
+{
+ unsigned long orig_cr4 = read_cr4();
+
+ write_cr4(orig_cr4 & ~X86_CR4_PGE);
+ kvm_flush_tlb_user();
+ write_cr4(orig_cr4);
+}
+
+static void register_cr3_cache(void *cache)
+{
+ struct kvm_para_state *state;
+
+ state = &per_cpu(para_state, raw_smp_processor_id());
+ wrmsrl(KVM_MSR_SET_CR3_CACHE, __pa(&state->cr3_cache));
+}
+
+static unsigned __init kvm_patch(u8 type, u16 clobbers, void *ibuf,
+ unsigned long addr, unsigned len)
+{
+ switch (type) {
+ case PARAVIRT_PATCH(pv_mmu_ops.write_cr3):
+ return paravirt_patch_default(type, clobbers, ibuf, addr, len);
+ default:
+ return native_patch(type, clobbers, ibuf, addr, len);
+ }
+}
+
+static void __init setup_guest_cr3_cache(void)
+{
+ on_each_cpu(register_cr3_cache, NULL, 0, 1);
+
+ pv_mmu_ops.write_cr3 = kvm_write_cr3;
+ pv_mmu_ops.flush_tlb_user = kvm_flush_tlb_user;
+ pv_mmu_ops.flush_tlb_kernel = kvm_flush_tlb_kernel;
+}
+
static void kvm_mmu_write(void *dest, u64 val)
{
struct kvm_mmu_op_write_pte wpte = {
@@ -97,6 +215,28 @@ static void kvm_mmu_write(void *dest, u64 val)
}
/*
+ * CR3 cache initialization uses on_each_cpu(), so it can't
+ * happen at kvm_guest_init time.
+ */
+int __init kvm_cr3_cache_init(void)
+{
+ unsigned long flags;
+
+ if (!kvm_para_available())
+ return -ENOSYS;
+
+ if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE)) {
+ setup_guest_cr3_cache();
+ local_irq_save(flags);
+ apply_paravirt(__parainstructions, __parainstructions_end);
+ local_irq_restore(flags);
+ }
+
+ return 0;
+}
+module_init(kvm_cr3_cache_init);
+
+/*
* We only need to hook operations that are MMU writes. We hook these so that
* we can use lazy MMU mode to batch these operations. We could probably
* improve the performance of the host code if we used some of the information
@@ -219,6 +359,9 @@ static void paravirt_ops_setup(void)
pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu;
pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu;
}
+
+ if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE))
+ pv_init_ops.patch = kvm_patch;
}
void __init kvm_guest_init(void)
--
1.5.4.2
-------------------------------------------------------------------------
This SF.net email is sponsored by: Microsoft
Defy all challenges. Microsoft(R) Visual Studio 2008.
http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/
_______________________________________________
kvm-devel mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/kvm-devel