Hypercall based pte updates are faster than faults, and also allow use of the lazy MMU mode to batch operations.
Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Index: linux-2.6-x86-kvm/arch/x86/kernel/kvm.c =================================================================== --- linux-2.6-x86-kvm.orig/arch/x86/kernel/kvm.c +++ linux-2.6-x86-kvm/arch/x86/kernel/kvm.c @@ -33,6 +33,104 @@ static void kvm_io_delay(void) { } +static void kvm_mmu_write(void *dest, const void *src, size_t size) +{ + const uint8_t *p = src; + unsigned long a0 = *(unsigned long *)p; + unsigned long a1 = 0; + + size >>= 2; +#ifdef CONFIG_X86_32 + if (size == 2) + a1 = *(u32 *)&p[4]; +#endif + kvm_hypercall4(KVM_HYPERCALL_MMU_WRITE, (unsigned long)dest, size, a0, + a1); +} + +/* + * We only need to hook operations that are MMU writes. We hook these so that + * we can use lazy MMU mode to batch these operations. We could probably + * improve the performance of the host code if we used some of the information + * here to simplify processing of batched writes. + */ +static void kvm_set_pte(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + kvm_mmu_write(pmdp, &pmd, sizeof(pmd)); +} + +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE +static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = __pte(0); + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_pmd_clear(pmd_t *pmdp) +{ + pmd_t pmd = __pmd(0); + kvm_mmu_write(pmdp, &pmd, sizeof(pmd)); +} +#endif + +static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + kvm_mmu_write(pgdp, &pgd, sizeof(pgd)); +} + +static void kvm_pgd_clear(pgd_t *pgdp) +{ + pgd_t pgd = __pgd(0); + kvm_mmu_write(pgdp, &pgd, sizeof(pgd)); +} + +static void kvm_set_pud(pud_t *pudp, pud_t pud) +{ + kvm_mmu_write(pudp, &pud, sizeof(pud)); +} +#if PAGETABLE_LEVELS == 4 +static void kvm_pud_clear(pud_t *pudp) +{ + pud_t pud = __pud(0); + kvm_mmu_write(pudp, &pud, sizeof(pud)); +} +#endif +#endif /* PAGETABLE_LEVELS >= 3 */ + +static void kvm_flush_tlb(void) +{ + kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB); +} + +static void kvm_release_pt(u32 pfn) +{ + kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT); +} + static void paravirt_ops_setup(void) { pv_info.name = "KVM"; @@ -41,6 +139,28 @@ static void paravirt_ops_setup(void) if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; + if (kvm_para_has_feature(KVM_FEATURE_MMU_WRITE)) { + pv_mmu_ops.set_pte = kvm_set_pte; + pv_mmu_ops.set_pte_at = kvm_set_pte_at; + pv_mmu_ops.set_pmd = kvm_set_pmd; +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE + pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; + pv_mmu_ops.set_pte_present = kvm_set_pte_present; + pv_mmu_ops.pte_clear = kvm_pte_clear; + pv_mmu_ops.pmd_clear = kvm_pmd_clear; +#endif + pv_mmu_ops.set_pud = kvm_set_pud; + pv_mmu_ops.set_pgd = kvm_set_pgd; + pv_mmu_ops.pgd_clear = kvm_pgd_clear; +#if PAGETABLE_LEVELS == 4 + pv_mmu_ops.pud_clear = kvm_pud_clear; +#endif +#endif + pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; + pv_mmu_ops.release_pt = kvm_release_pt; + pv_mmu_ops.release_pd = kvm_release_pt; + } } void __init kvm_guest_init(void) Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.c =================================================================== --- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.c +++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.c @@ -287,7 +287,7 @@ static void mmu_free_memory_cache_page(s free_page((unsigned long)mc->objects[--mc->nobjs]); } -static int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) +int mmu_topup_memory_caches(struct kvm_vcpu *vcpu) { int r; @@ -856,7 +856,7 @@ static int kvm_mmu_unprotect_page(struct return r; } -static void mmu_unshadow(struct kvm *kvm, gfn_t gfn) +void mmu_unshadow(struct kvm *kvm, gfn_t gfn) { struct kvm_mmu_page *sp; Index: linux-2.6-x86-kvm/arch/x86/kvm/mmu.h =================================================================== --- linux-2.6-x86-kvm.orig/arch/x86/kvm/mmu.h +++ linux-2.6-x86-kvm/arch/x86/kvm/mmu.h @@ -41,4 +41,7 @@ static inline int is_paging(struct kvm_v return vcpu->arch.cr0 & X86_CR0_PG; } +void mmu_unshadow(struct kvm *kvm, gfn_t gfn); +int mmu_topup_memory_caches(struct kvm_vcpu *vcpu); + #endif Index: linux-2.6-x86-kvm/arch/x86/kvm/x86.c =================================================================== --- linux-2.6-x86-kvm.orig/arch/x86/kvm/x86.c +++ linux-2.6-x86-kvm/arch/x86/kvm/x86.c @@ -2250,6 +2250,52 @@ int kvm_emulate_halt(struct kvm_vcpu *vc } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +static int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gva_t addr, + unsigned long size, unsigned long a0, + unsigned long a1) +{ + gpa_t gpa; + u64 value; + + if (mmu_topup_memory_caches(vcpu)) + return -KVM_EFAULT; + + down_read(¤t->mm->mmap_sem); + gpa = vcpu->arch.mmu.gva_to_gpa(vcpu, addr); + up_read(¤t->mm->mmap_sem); + + if (gpa == UNMAPPED_GVA) + return -KVM_EFAULT; + if (size == 1) { + if (!emulator_write_phys(vcpu, gpa, &a0, sizeof(a0))) + return -KVM_EFAULT; + } else if (size == 2) { + if (!is_long_mode(vcpu) && is_pae(vcpu)) + value = (u64)a1 << 32 | a0; + else + value = a0; + if (!emulator_write_phys(vcpu, gpa, &value, sizeof(value))) + return -KVM_EFAULT; + } else + return -KVM_E2BIG; + + return 0; +} + +static int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->tlb_flush(vcpu); + return 0; +} + +static int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) +{ + spin_lock(&vcpu->kvm->mmu_lock); + mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); + spin_unlock(&vcpu->kvm->mmu_lock); + return 0; +} + int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) { unsigned long nr, a0, a1, a2, a3, ret; @@ -2274,6 +2320,15 @@ int kvm_emulate_hypercall(struct kvm_vcp case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; + case KVM_HYPERCALL_MMU_WRITE: + ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2, a3); + break; + case KVM_HYPERCALL_FLUSH_TLB: + ret = kvm_hypercall_flush_tlb(vcpu); + break; + case KVM_HYPERCALL_RELEASE_PT: + ret = kvm_hypercall_release_pt(vcpu, a0); + break; default: ret = -KVM_ENOSYS; break; Index: linux-2.6-x86-kvm/include/asm-x86/kvm_para.h =================================================================== --- linux-2.6-x86-kvm.orig/include/asm-x86/kvm_para.h +++ linux-2.6-x86-kvm/include/asm-x86/kvm_para.h @@ -6,6 +6,7 @@ */ #define KVM_CPUID_SIGNATURE 0x40000000 #define KVM_FEATURE_NOP_IO_DELAY 0 +#define KVM_FEATURE_MMU_WRITE 1 /* This CPUID returns a feature bitmap in eax. Before enabling a particular * paravirtualization, the appropriate feature bit should be checked. @@ -15,7 +16,8 @@ #ifdef __KERNEL__ #include <asm/processor.h> -#define KVM_PARA_FEATURES (1UL << KVM_FEATURE_NOP_IO_DELAY) +#define KVM_PARA_FEATURES ((1UL << KVM_FEATURE_NOP_IO_DELAY) | \ + (1UL << KVM_FEATURE_MMU_WRITE)) /* This instruction is vmcall. On non-VT architectures, it will generate a * trap that we will then rewrite to the appropriate instruction. Index: linux-2.6-x86-kvm/include/linux/kvm_para.h =================================================================== --- linux-2.6-x86-kvm.orig/include/linux/kvm_para.h +++ linux-2.6-x86-kvm/include/linux/kvm_para.h @@ -11,8 +11,13 @@ /* Return values for hypercalls */ #define KVM_ENOSYS 1000 +#define KVM_EFAULT EFAULT +#define KVM_E2BIG E2BIG -#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HYPERCALL_MMU_WRITE 2 +#define KVM_HYPERCALL_FLUSH_TLB 3 +#define KVM_HYPERCALL_RELEASE_PT 4 /* * hypercalls use architecture specific -- ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel