From: Peter Feiner <pfei...@google.com>

Optimization for avoiding lookups in mmu_page_hash. When there's a
single direct root, a shadow page has at most one parent SPTE
(non-root SPs have exactly one; the root has none). Thus, if an SPTE
is non-present, it can be linked to a newly allocated SP without
first checking if the SP already exists.

This optimization has proven significant in batch large SP shattering
where the hash lookup accounted for 95% of the overhead.

Signed-off-by: Peter Feiner <pfei...@google.com>
Signed-off-by: Jon Cargille <jcarg...@google.com>
Reviewed-by: Jim Mattson <jmatt...@google.com>

---
 arch/x86/include/asm/kvm_host.h | 13 ++++++++
 arch/x86/kvm/mmu/mmu.c          | 55 +++++++++++++++++++--------------
 2 files changed, 45 insertions(+), 23 deletions(-)

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index a239a297be33..9b70d764b626 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -913,6 +913,19 @@ struct kvm_arch {
        struct kvm_page_track_notifier_node mmu_sp_tracker;
        struct kvm_page_track_notifier_head track_notifier_head;
 
+       /*
+        * Optimization for avoiding lookups in mmu_page_hash. When there's a
+        * single direct root, a shadow page has at most one parent SPTE
+        * (non-root SPs have exactly one; the root has none). Thus, if an SPTE
+        * is non-present, it can be linked to a newly allocated SP without
+        * first checking if the SP already exists.
+        *
+        * False initially because there are no indirect roots.
+        *
+        * Guarded by mmu_lock.
+        */
+       bool shadow_page_may_have_multiple_parents;
+
        struct list_head assigned_dev_head;
        struct iommu_domain *iommu_domain;
        bool iommu_noncoherent;
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index e618472c572b..d94552b0ed77 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -2499,35 +2499,40 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct 
kvm_vcpu *vcpu,
                quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1;
                role.quadrant = quadrant;
        }
-       for_each_valid_sp(vcpu->kvm, sp, gfn) {
-               if (sp->gfn != gfn) {
-                       collisions++;
-                       continue;
-               }
 
-               if (!need_sync && sp->unsync)
-                       need_sync = true;
+       if (vcpu->kvm->arch.shadow_page_may_have_multiple_parents ||
+           level == vcpu->arch.mmu->root_level) {
+               for_each_valid_sp(vcpu->kvm, sp, gfn) {
+                       if (sp->gfn != gfn) {
+                               collisions++;
+                               continue;
+                       }
 
-               if (sp->role.word != role.word)
-                       continue;
+                       if (!need_sync && sp->unsync)
+                               need_sync = true;
 
-               if (sp->unsync) {
-                       /* The page is good, but __kvm_sync_page might still end
-                        * up zapping it.  If so, break in order to rebuild it.
-                        */
-                       if (!__kvm_sync_page(vcpu, sp, &invalid_list))
-                               break;
+                       if (sp->role.word != role.word)
+                               continue;
 
-                       WARN_ON(!list_empty(&invalid_list));
-                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
-               }
+                       if (sp->unsync) {
+                               /* The page is good, but __kvm_sync_page might
+                                * still end up zapping it.  If so, break in
+                                * order to rebuild it.
+                                */
+                               if (!__kvm_sync_page(vcpu, sp, &invalid_list))
+                                       break;
 
-               if (sp->unsync_children)
-                       kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
+                               WARN_ON(!list_empty(&invalid_list));
+                               kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, 
vcpu);
+                       }
 
-               __clear_sp_write_flooding_count(sp);
-               trace_kvm_mmu_get_page(sp, false);
-               goto out;
+                       if (sp->unsync_children)
+                               kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, 
vcpu);
+
+                       __clear_sp_write_flooding_count(sp);
+                       trace_kvm_mmu_get_page(sp, false);
+                       goto out;
+               }
        }
 
        ++vcpu->kvm->stat.mmu_cache_miss;
@@ -3735,6 +3740,10 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        gfn_t root_gfn, root_pgd;
        int i;
 
+       spin_lock(&vcpu->kvm->mmu_lock);
+       vcpu->kvm->arch.shadow_page_may_have_multiple_parents = true;
+       spin_unlock(&vcpu->kvm->mmu_lock);
+
        root_pgd = vcpu->arch.mmu->get_guest_pgd(vcpu);
        root_gfn = root_pgd >> PAGE_SHIFT;
 
-- 
2.26.2.303.gf8c07b1a785-goog

Reply via email to