From: Marcelo Tosatti <[EMAIL PROTECTED]> Add support for the cr3 cache feature on Intel VMX CPU's. This avoids vmexits on context switch if the cr3 value is cached in one of the entries (currently 4 are present).
This is especially important for Xenner, where each guest syscall involves a cr3 switch. v1->v2: - handle the race which happens when the guest has the cache cleared in the middle of kvm_write_cr3 by injecting a GP and trapping it to fallback to hypercall variant (suggested by Avi). v2->v3: - one ioctl per paravirt feature v3->v4: - switch to mmu_op Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Signed-off-by: Avi Kivity <[EMAIL PROTECTED]> --- arch/x86/kernel/kvm.c | 145 ++++++++++++++++++++++++++++++++++++++++++++++++- 1 files changed, 144 insertions(+), 1 deletions(-) diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 8405984..30e3568 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -26,14 +26,17 @@ #include <linux/cpu.h> #include <linux/mm.h> #include <linux/hardirq.h> +#include <asm/tlbflush.h> +#include <asm/asm.h> #define MMU_QUEUE_SIZE 1024 struct kvm_para_state { + struct kvm_cr3_cache cr3_cache; u8 mmu_queue[MMU_QUEUE_SIZE]; int mmu_queue_len; enum paravirt_lazy_mode mode; -}; +} __attribute__ ((aligned(PAGE_SIZE))); static DEFINE_PER_CPU(struct kvm_para_state, para_state); @@ -85,6 +88,121 @@ static void kvm_deferred_mmu_op(void *buffer, int len) state->mmu_queue_len += len; } +static void kvm_new_cr3(unsigned long cr3) +{ + struct kvm_mmu_op_set_cr3 scr3 = { + .header.op = KVM_MMU_OP_SET_CR3, + .cr3 = cr3, + }; + + kvm_mmu_op(&scr3, sizeof scr3); +} + +static unsigned long __force_order; + +/* + * Special, register-to-cr3 instruction based hypercall API + * variant to the KVM host. This utilizes the cr3 filter capability + * of the hardware - if this works out then no VM exit happens, + * if a VM exit happens then KVM will get the virtual address too. + */ +static void kvm_write_cr3(unsigned long guest_cr3) +{ + struct kvm_para_state *para_state = &get_cpu_var(para_state); + struct kvm_cr3_cache *cache = ¶_state->cr3_cache; + int idx; + + /* + * Check the cache (maintained by the host) for a matching + * guest_cr3 => host_cr3 mapping. Use it if found: + */ + for (idx = 0; idx < cache->max_idx; idx++) { + if (cache->entry[idx].guest_cr3 == guest_cr3) { + unsigned long trap; + + /* + * Cache-hit: we load the cached host-CR3 value. + * Fallback to hypercall variant if it raced with + * the host clearing the cache after guest_cr3 + * comparison. + */ + __asm__ __volatile__ ( + " mov %2, %0\n" + "0: mov %3, %%cr3\n" + "1:\n" + ".section .fixup,\"ax\"\n" + "2: mov %1, %0\n" + " jmp 1b\n" + ".previous\n" + _ASM_EXTABLE(0b, 2b) + : "=&r" (trap) + : "n" (1UL), "n" (0UL), + "b" (cache->entry[idx].host_cr3), + "m" (__force_order)); + if (!trap) + goto out; + break; + } + } + + /* + * Cache-miss. Tell the host the new cr3 via hypercall (to avoid + * aliasing problems with a cached host_cr3 == guest_cr3). + */ + kvm_new_cr3(guest_cr3); +out: + put_cpu_var(para_state); +} + +/* + * Avoid the VM exit upon cr3 load by using the cached + * ->active_mm->pgd value: + */ +static void kvm_flush_tlb_user(void) +{ + kvm_write_cr3(__pa(current->active_mm->pgd)); +} + +/* + * Disable global pages, do a flush, then enable global pages: + */ +static void kvm_flush_tlb_kernel(void) +{ + unsigned long orig_cr4 = read_cr4(); + + write_cr4(orig_cr4 & ~X86_CR4_PGE); + kvm_flush_tlb_user(); + write_cr4(orig_cr4); +} + +static void register_cr3_cache(void *cache) +{ + struct kvm_para_state *state; + + state = &per_cpu(para_state, raw_smp_processor_id()); + wrmsrl(KVM_MSR_SET_CR3_CACHE, __pa(&state->cr3_cache)); +} + +static unsigned __init kvm_patch(u8 type, u16 clobbers, void *ibuf, + unsigned long addr, unsigned len) +{ + switch (type) { + case PARAVIRT_PATCH(pv_mmu_ops.write_cr3): + return paravirt_patch_default(type, clobbers, ibuf, addr, len); + default: + return native_patch(type, clobbers, ibuf, addr, len); + } +} + +static void __init setup_guest_cr3_cache(void) +{ + on_each_cpu(register_cr3_cache, NULL, 0, 1); + + pv_mmu_ops.write_cr3 = kvm_write_cr3; + pv_mmu_ops.flush_tlb_user = kvm_flush_tlb_user; + pv_mmu_ops.flush_tlb_kernel = kvm_flush_tlb_kernel; +} + static void kvm_mmu_write(void *dest, u64 val) { struct kvm_mmu_op_write_pte wpte = { @@ -97,6 +215,28 @@ static void kvm_mmu_write(void *dest, u64 val) } /* + * CR3 cache initialization uses on_each_cpu(), so it can't + * happen at kvm_guest_init time. + */ +int __init kvm_cr3_cache_init(void) +{ + unsigned long flags; + + if (!kvm_para_available()) + return -ENOSYS; + + if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE)) { + setup_guest_cr3_cache(); + local_irq_save(flags); + apply_paravirt(__parainstructions, __parainstructions_end); + local_irq_restore(flags); + } + + return 0; +} +module_init(kvm_cr3_cache_init); + +/* * We only need to hook operations that are MMU writes. We hook these so that * we can use lazy MMU mode to batch these operations. We could probably * improve the performance of the host code if we used some of the information @@ -219,6 +359,9 @@ static void paravirt_ops_setup(void) pv_mmu_ops.lazy_mode.enter = kvm_enter_lazy_mmu; pv_mmu_ops.lazy_mode.leave = kvm_leave_lazy_mmu; } + + if (kvm_para_has_feature(KVM_FEATURE_CR3_CACHE)) + pv_init_ops.patch = kvm_patch; } void __init kvm_guest_init(void) -- 1.5.4.2 ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel