To reduce lock contention and interference with page fault handlers,
allow the TDP MMU functions which enable and disable dirty logging
to operate under the MMU read lock.

Signed-off-by: Ben Gardon <bgar...@google.com>
---
 arch/x86/kvm/mmu/mmu.c     | 16 +++++++---
 arch/x86/kvm/mmu/tdp_mmu.c | 62 ++++++++++++++++++++++++++++++--------
 2 files changed, 61 insertions(+), 17 deletions(-)

diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index 81967b4e7d76..bf535c9f7ff2 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -5543,10 +5543,14 @@ void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
        write_lock(&kvm->mmu_lock);
        flush = slot_handle_level(kvm, memslot, slot_rmap_write_protect,
                                start_level, KVM_MAX_HUGEPAGE_LEVEL, false);
-       if (is_tdp_mmu_enabled(kvm))
-               flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K);
        write_unlock(&kvm->mmu_lock);
 
+       if (is_tdp_mmu_enabled(kvm)) {
+               read_lock(&kvm->mmu_lock);
+               flush |= kvm_tdp_mmu_wrprot_slot(kvm, memslot, PG_LEVEL_4K);
+               read_unlock(&kvm->mmu_lock);
+       }
+
        /*
         * We can flush all the TLBs out of the mmu lock without TLB
         * corruption since we just change the spte from writable to
@@ -5641,10 +5645,14 @@ void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
 
        write_lock(&kvm->mmu_lock);
        flush = slot_handle_leaf(kvm, memslot, __rmap_clear_dirty, false);
-       if (is_tdp_mmu_enabled(kvm))
-               flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
        write_unlock(&kvm->mmu_lock);
 
+       if (is_tdp_mmu_enabled(kvm)) {
+               read_lock(&kvm->mmu_lock);
+               flush |= kvm_tdp_mmu_clear_dirty_slot(kvm, memslot);
+               read_unlock(&kvm->mmu_lock);
+       }
+
        /*
         * It's also safe to flush TLBs out of mmu lock here as currently this
         * function is only used for dirty logging, in which case flushing TLB
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 862acb868abd..0c90dc034819 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -491,8 +491,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, 
gfn_t gfn,
 }
 
 /*
- * tdp_mmu_set_spte_atomic - Set a TDP MMU SPTE atomically and handle the
- * associated bookkeeping
+ * tdp_mmu_set_spte_atomic_no_dirty_log - Set a TDP MMU SPTE atomically
+ * and handle the associated bookkeeping, but do not mark the page dirty
+ * in KVM's dirty bitmaps.
  *
  * @kvm: kvm instance
  * @iter: a tdp_iter instance currently on the SPTE that should be set
@@ -500,9 +501,9 @@ static void handle_changed_spte(struct kvm *kvm, int as_id, 
gfn_t gfn,
  * Returns: true if the SPTE was set, false if it was not. If false is 
returned,
  *         this function will have no side-effects.
  */
-static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
-                                          struct tdp_iter *iter,
-                                          u64 new_spte)
+static inline bool tdp_mmu_set_spte_atomic_no_dirty_log(struct kvm *kvm,
+                                                       struct tdp_iter *iter,
+                                                       u64 new_spte)
 {
        lockdep_assert_held_read(&kvm->mmu_lock);
 
@@ -517,9 +518,22 @@ static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
                      new_spte) != iter->old_spte)
                return false;
 
-       handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
-                           new_spte, iter->level, true);
+       __handle_changed_spte(kvm, iter->as_id, iter->gfn, iter->old_spte,
+                             new_spte, iter->level, true);
+       handle_changed_spte_acc_track(iter->old_spte, new_spte, iter->level);
+
+       return true;
+}
+
+static inline bool tdp_mmu_set_spte_atomic(struct kvm *kvm,
+                                          struct tdp_iter *iter,
+                                          u64 new_spte)
+{
+       if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, iter, new_spte))
+               return false;
 
+       handle_changed_spte_dirty_log(kvm, iter->as_id, iter->gfn,
+                                     iter->old_spte, new_spte, iter->level);
        return true;
 }
 
@@ -1142,7 +1156,8 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct 
kvm_mmu_page *root,
 
        for_each_tdp_pte_min_level(iter, root->spt, root->role.level,
                                   min_level, start, end) {
-               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false))
+retry:
+               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
                        continue;
 
                if (!is_shadow_present_pte(iter.old_spte) ||
@@ -1152,7 +1167,15 @@ static bool wrprot_gfn_range(struct kvm *kvm, struct 
kvm_mmu_page *root,
 
                new_spte = iter.old_spte & ~PT_WRITABLE_MASK;
 
-               tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+               if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
+                                                         new_spte)) {
+                       /*
+                        * The iter must explicitly re-read the SPTE because
+                        * the atomic cmpxchg failed.
+                        */
+                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+                       goto retry;
+               }
                spte_set = true;
        }
 
@@ -1172,7 +1195,9 @@ bool kvm_tdp_mmu_wrprot_slot(struct kvm *kvm, struct 
kvm_memory_slot *slot,
        int root_as_id;
        bool spte_set = false;
 
-       for_each_tdp_mmu_root_yield_safe(kvm, root, false) {
+       lockdep_assert_held_read(&kvm->mmu_lock);
+
+       for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
                root_as_id = kvm_mmu_page_as_id(root);
                if (root_as_id != slot->as_id)
                        continue;
@@ -1201,7 +1226,8 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, struct 
kvm_mmu_page *root,
        rcu_read_lock();
 
        tdp_root_for_each_leaf_pte(iter, root, start, end) {
-               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, false))
+retry:
+               if (tdp_mmu_iter_cond_resched(kvm, &iter, false, true))
                        continue;
 
                if (spte_ad_need_write_protect(iter.old_spte)) {
@@ -1216,7 +1242,15 @@ static bool clear_dirty_gfn_range(struct kvm *kvm, 
struct kvm_mmu_page *root,
                                continue;
                }
 
-               tdp_mmu_set_spte_no_dirty_log(kvm, &iter, new_spte);
+               if (!tdp_mmu_set_spte_atomic_no_dirty_log(kvm, &iter,
+                                                         new_spte)) {
+                       /*
+                        * The iter must explicitly re-read the SPTE because
+                        * the atomic cmpxchg failed.
+                        */
+                       iter.old_spte = READ_ONCE(*rcu_dereference(iter.sptep));
+                       goto retry;
+               }
                spte_set = true;
        }
 
@@ -1237,7 +1271,9 @@ bool kvm_tdp_mmu_clear_dirty_slot(struct kvm *kvm, struct 
kvm_memory_slot *slot)
        int root_as_id;
        bool spte_set = false;
 
-       for_each_tdp_mmu_root_yield_safe(kvm, root, false) {
+       lockdep_assert_held_read(&kvm->mmu_lock);
+
+       for_each_tdp_mmu_root_yield_safe(kvm, root, true) {
                root_as_id = kvm_mmu_page_as_id(root);
                if (root_as_id != slot->as_id)
                        continue;
-- 
2.31.0.291.g576ba9dcdaf-goog

Reply via email to