Hypercall based pte updates are faster than faults, and also allow use of the lazy MMU mode to batch operations.
Don't report the feature if two dimensional paging is enabled. v1->v2: - guest passes physical destination addr, which is cheaper than doing v->p translation in the host. - infer size of pte from guest mode v2->v3: - switch to one ioctl per paravirt feature - move hypercall handling to mmu.c Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]> Index: kvm.paravirt3/arch/x86/kernel/kvm.c =================================================================== --- kvm.paravirt3.orig/arch/x86/kernel/kvm.c +++ kvm.paravirt3/arch/x86/kernel/kvm.c @@ -33,6 +33,91 @@ static void kvm_io_delay(void) { } +static void kvm_mmu_write(void *dest, const void *src, size_t size) +{ + const uint8_t *p = src; + unsigned long a0 = *(unsigned long *)p; + unsigned long a1 = 0; + +#ifdef CONFIG_X86_32 + size >>= 2; + if (size == 2) + a1 = *(u32 *)&p[4]; +#endif + kvm_hypercall3(KVM_HYPERCALL_MMU_WRITE, (unsigned long)__pa(dest), a0, + a1); +} + +/* + * We only need to hook operations that are MMU writes. We hook these so that + * we can use lazy MMU mode to batch these operations. We could probably + * improve the performance of the host code if we used some of the information + * here to simplify processing of batched writes. + */ +static void kvm_set_pte(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_set_pte_at(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_set_pmd(pmd_t *pmdp, pmd_t pmd) +{ + kvm_mmu_write(pmdp, &pmd, sizeof(pmd)); +} + +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE +static void kvm_set_pte_atomic(pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_set_pte_present(struct mm_struct *mm, unsigned long addr, + pte_t *ptep, pte_t pte) +{ + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_pte_clear(struct mm_struct *mm, + unsigned long addr, pte_t *ptep) +{ + pte_t pte = __pte(0); + kvm_mmu_write(ptep, &pte, sizeof(pte)); +} + +static void kvm_pmd_clear(pmd_t *pmdp) +{ + pmd_t pmd = __pmd(0); + kvm_mmu_write(pmdp, &pmd, sizeof(pmd)); +} +#endif + +static void kvm_set_pgd(pgd_t *pgdp, pgd_t pgd) +{ + kvm_mmu_write(pgdp, &pgd, sizeof(pgd)); +} + +static void kvm_set_pud(pud_t *pudp, pud_t pud) +{ + kvm_mmu_write(pudp, &pud, sizeof(pud)); +} +#endif /* PAGETABLE_LEVELS >= 3 */ + +static void kvm_flush_tlb(void) +{ + kvm_hypercall0(KVM_HYPERCALL_FLUSH_TLB); +} + +static void kvm_release_pt(u32 pfn) +{ + kvm_hypercall1(KVM_HYPERCALL_RELEASE_PT, pfn << PAGE_SHIFT); +} + static void paravirt_ops_setup(void) { pv_info.name = "KVM"; @@ -41,6 +126,24 @@ static void paravirt_ops_setup(void) if (kvm_para_has_feature(KVM_FEATURE_NOP_IO_DELAY)) pv_cpu_ops.io_delay = kvm_io_delay; + if (kvm_para_has_feature(KVM_FEATURE_MMU_WRITE)) { + pv_mmu_ops.set_pte = kvm_set_pte; + pv_mmu_ops.set_pte_at = kvm_set_pte_at; + pv_mmu_ops.set_pmd = kvm_set_pmd; +#if PAGETABLE_LEVELS >= 3 +#ifdef CONFIG_X86_PAE + pv_mmu_ops.set_pte_atomic = kvm_set_pte_atomic; + pv_mmu_ops.set_pte_present = kvm_set_pte_present; + pv_mmu_ops.pte_clear = kvm_pte_clear; + pv_mmu_ops.pmd_clear = kvm_pmd_clear; +#endif + pv_mmu_ops.set_pud = kvm_set_pud; + pv_mmu_ops.set_pgd = kvm_set_pgd; +#endif + pv_mmu_ops.flush_tlb_user = kvm_flush_tlb; + pv_mmu_ops.release_pt = kvm_release_pt; + pv_mmu_ops.release_pd = kvm_release_pt; + } } void __init kvm_guest_init(void) Index: kvm.paravirt3/arch/x86/kvm/mmu.c =================================================================== --- kvm.paravirt3.orig/arch/x86/kvm/mmu.c +++ kvm.paravirt3/arch/x86/kvm/mmu.c @@ -39,7 +39,7 @@ * 2. while doing 1. it walks guest-physical to host-physical * If the hardware supports that we don't need to do shadow paging. */ -static bool tdp_enabled = false; +bool tdp_enabled = false; #undef MMU_DEBUG @@ -1826,6 +1826,46 @@ unsigned int kvm_mmu_calculate_mmu_pages return nr_mmu_pages; } + +int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned long a1, unsigned long a2) +{ + u64 value; + int bytes; + + if (mmu_topup_memory_caches(vcpu)) + return -KVM_EFAULT; + + bytes = 8; + value = a1; + + if (!is_long_mode(vcpu)) { + if (is_pae(vcpu)) + value = (u64)a2 << 32 | a1; + else + bytes = 4; + } + + if (!emulator_write_phys(vcpu, addr, &value, bytes)) + return -KVM_EFAULT; + + return 0; +} + +int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu) +{ + kvm_x86_ops->tlb_flush(vcpu); + return 0; +} + +int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr) +{ + spin_lock(&vcpu->kvm->mmu_lock); + mmu_unshadow(vcpu->kvm, addr >> PAGE_SHIFT); + spin_unlock(&vcpu->kvm->mmu_lock); + return 0; +} + #ifdef AUDIT static const char *audit_msg; Index: kvm.paravirt3/arch/x86/kvm/x86.c =================================================================== --- kvm.paravirt3.orig/arch/x86/kvm/x86.c +++ kvm.paravirt3/arch/x86/kvm/x86.c @@ -809,6 +809,9 @@ int kvm_dev_ioctl_check_extension(long e case KVM_CAP_NR_MEMSLOTS: r = KVM_MEMORY_SLOTS; break; + case KVM_CAP_MMU_WRITE: + r = !tdp_enabled; + break; default: r = 0; break; @@ -1763,8 +1766,8 @@ mmio: return X86EMUL_UNHANDLEABLE; } -static int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, - const void *val, int bytes) +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes) { int ret; @@ -2392,6 +2395,15 @@ int kvm_emulate_hypercall(struct kvm_vcp case KVM_HC_VAPIC_POLL_IRQ: ret = 0; break; + case KVM_HYPERCALL_MMU_WRITE: + ret = kvm_hypercall_mmu_write(vcpu, a0, a1, a2); + break; + case KVM_HYPERCALL_FLUSH_TLB: + ret = kvm_hypercall_flush_tlb(vcpu); + break; + case KVM_HYPERCALL_RELEASE_PT: + ret = kvm_hypercall_release_pt(vcpu, a0); + break; default: ret = -KVM_ENOSYS; break; Index: kvm.paravirt3/include/asm-x86/kvm_host.h =================================================================== --- kvm.paravirt3.orig/include/asm-x86/kvm_host.h +++ kvm.paravirt3/include/asm-x86/kvm_host.h @@ -421,6 +421,16 @@ void kvm_mmu_change_mmu_pages(struct kvm int load_pdptrs(struct kvm_vcpu *vcpu, unsigned long cr3); +int kvm_hypercall_mmu_write(struct kvm_vcpu *vcpu, gpa_t addr, + unsigned long a1, unsigned long a2); +int kvm_hypercall_flush_tlb(struct kvm_vcpu *vcpu); +int kvm_hypercall_release_pt(struct kvm_vcpu *vcpu, gpa_t addr); + +int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, + const void *val, int bytes); + +extern bool tdp_enabled; + enum emulation_result { EMULATE_DONE, /* no further processing */ EMULATE_DO_MMIO, /* kvm_run filled with mmio request */ Index: kvm.paravirt3/include/asm-x86/kvm_para.h =================================================================== --- kvm.paravirt3.orig/include/asm-x86/kvm_para.h +++ kvm.paravirt3/include/asm-x86/kvm_para.h @@ -12,6 +12,7 @@ #define KVM_CPUID_FEATURES 0x40000001 #define KVM_FEATURE_CLOCKSOURCE 0 #define KVM_FEATURE_NOP_IO_DELAY 1 +#define KVM_FEATURE_MMU_WRITE 2 #define MSR_KVM_WALL_CLOCK 0x11 #define MSR_KVM_SYSTEM_TIME 0x12 Index: kvm.paravirt3/include/linux/kvm.h =================================================================== --- kvm.paravirt3.orig/include/linux/kvm.h +++ kvm.paravirt3/include/linux/kvm.h @@ -237,6 +237,7 @@ struct kvm_vapic_addr { #define KVM_CAP_NR_VCPUS 9 /* returns max vcpus per vm */ #define KVM_CAP_NR_MEMSLOTS 10 /* returns max memory slots per vm */ #define KVM_CAP_NOP_IO_DELAY 11 +#define KVM_CAP_MMU_WRITE 12 /* * ioctls for VM fds Index: kvm.paravirt3/include/linux/kvm_para.h =================================================================== --- kvm.paravirt3.orig/include/linux/kvm_para.h +++ kvm.paravirt3/include/linux/kvm_para.h @@ -11,8 +11,13 @@ /* Return values for hypercalls */ #define KVM_ENOSYS 1000 +#define KVM_EFAULT EFAULT +#define KVM_E2BIG E2BIG -#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HC_VAPIC_POLL_IRQ 1 +#define KVM_HYPERCALL_MMU_WRITE 2 +#define KVM_HYPERCALL_FLUSH_TLB 3 +#define KVM_HYPERCALL_RELEASE_PT 4 /* * hypercalls use architecture specific -- ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2008. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ kvm-devel mailing list kvm-devel@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/kvm-devel