The write protect verification in set_spte is unnecessary for page sync.

Its guaranteed that, if the unsync spte was writable, the target page
does not have a write protected shadow (if it had, the spte would have
been write protected under mmu_lock by rmap_write_protect before).

Same reasoning applies to mark_page_dirty: the gfn has been marked as
dirty via the pagefault path.

The cost of hash table and memslot lookups are quite significant if the
workload is pagetable write intensive resulting in increased mmu_lock
contention.

Signed-off-by: Marcelo Tosatti <[EMAIL PROTECTED]>

Index: kvm/arch/x86/kvm/mmu.c
===================================================================
--- kvm.orig/arch/x86/kvm/mmu.c
+++ kvm/arch/x86/kvm/mmu.c
@@ -1529,7 +1529,7 @@ static int kvm_unsync_page(struct kvm_vc
 }
 
 static int mmu_need_write_protect(struct kvm_vcpu *vcpu, gfn_t gfn,
-                                 bool can_unsync)
+                                 bool sync_page)
 {
        struct kvm_mmu_page *shadow;
 
@@ -1539,7 +1539,7 @@ static int mmu_need_write_protect(struct
                        return 1;
                if (shadow->unsync)
                        return 0;
-               if (can_unsync && oos_shadow)
+               if (!sync_page && oos_shadow)
                        return kvm_unsync_page(vcpu, shadow);
                return 1;
        }
@@ -1550,7 +1550,7 @@ static int set_spte(struct kvm_vcpu *vcp
                    unsigned pte_access, int user_fault,
                    int write_fault, int dirty, int largepage,
                    gfn_t gfn, pfn_t pfn, bool speculative,
-                   bool can_unsync)
+                   bool sync_page)
 {
        u64 spte;
        int ret = 0;
@@ -1593,7 +1593,16 @@ static int set_spte(struct kvm_vcpu *vcp
 
                spte |= PT_WRITABLE_MASK;
 
-               if (mmu_need_write_protect(vcpu, gfn, can_unsync)) {
+               /*
+                * Optimization: for pte sync, if spte was writable the hash
+                * lookup is unnecessary (and expensive). Write protection
+                * is responsibility of mmu_get_page / kvm_sync_page.
+                * Same reasoning can be applied to dirty page accounting.
+                */
+               if (sync_page && is_writeble_pte(*shadow_pte))
+                       goto set_pte;
+
+               if (mmu_need_write_protect(vcpu, gfn, sync_page)) {
                        pgprintk("%s: found shadow page for %lx, marking ro\n",
                                 __func__, gfn);
                        ret = 1;
@@ -1648,7 +1657,7 @@ static void mmu_set_spte(struct kvm_vcpu
                }
        }
        if (set_spte(vcpu, shadow_pte, pte_access, user_fault, write_fault,
-                     dirty, largepage, gfn, pfn, speculative, true)) {
+                     dirty, largepage, gfn, pfn, speculative, false)) {
                if (write_fault)
                        *ptwrite = 1;
                kvm_x86_ops->tlb_flush(vcpu);
Index: kvm/arch/x86/kvm/paging_tmpl.h
===================================================================
--- kvm.orig/arch/x86/kvm/paging_tmpl.h
+++ kvm/arch/x86/kvm/paging_tmpl.h
@@ -580,7 +580,7 @@ static int FNAME(sync_page)(struct kvm_v
                pte_access = sp->role.access & FNAME(gpte_access)(vcpu, gpte);
                set_spte(vcpu, &sp->spt[i], pte_access, 0, 0,
                         is_dirty_pte(gpte), 0, gfn,
-                        spte_to_pfn(sp->spt[i]), true, false);
+                        spte_to_pfn(sp->spt[i]), true, true);
        }
 
        return !nr_present;
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to