Re: [RFC PATCH 11/11] KVM: PPC: Eliminate global spinlock in kvmppc_h_enter

2011-11-23 Thread Marcelo Tosatti
On Thu, Nov 17, 2011 at 10:55:49AM +1100, Paul Mackerras wrote:
> >From dfd5bcfac841f8a36593edf60d9fb15e0d633287 Mon Sep 17 00:00:00 2001
> From: Paul Mackerras 
> Date: Mon, 14 Nov 2011 13:30:38 +1100
> Subject: 
> 
> Currently, kvmppc_h_enter takes a spinlock that is global to the guest,
> kvm->mmu_lock, in order to check for pending PTE invalidations safely.
> On some workloads, kvmppc_h_enter is called heavily and the use of a
> global spinlock could compromise scalability.  We already use a per-
> guest page spinlock in the form of the bit spinlock on the rmap chain,
> and this gives us synchronization with the PTE invalidation side, which
> also takes the bit spinlock on the rmap chain for each page being
> invalidated.  Thus it is sufficient to check for pending invalidations
> while the rmap chain bit spinlock is held.  However, now we require
> barriers in mmu_notifier_retry() and in the places where
> mmu_notifier_count and mmu_notifier_seq are updated, since we can now
> call mmu_notifier_retry() concurrently with updates to those fields.
> 
> Signed-off-by: Paul Mackerras 
> ---
> Cc'd to kvm@vger.kernel.org for review of the generic kvm changes.

Looks good to me.

--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 11/11] KVM: PPC: Eliminate global spinlock in kvmppc_h_enter

2011-11-16 Thread Paul Mackerras
>From dfd5bcfac841f8a36593edf60d9fb15e0d633287 Mon Sep 17 00:00:00 2001
From: Paul Mackerras 
Date: Mon, 14 Nov 2011 13:30:38 +1100
Subject: 

Currently, kvmppc_h_enter takes a spinlock that is global to the guest,
kvm->mmu_lock, in order to check for pending PTE invalidations safely.
On some workloads, kvmppc_h_enter is called heavily and the use of a
global spinlock could compromise scalability.  We already use a per-
guest page spinlock in the form of the bit spinlock on the rmap chain,
and this gives us synchronization with the PTE invalidation side, which
also takes the bit spinlock on the rmap chain for each page being
invalidated.  Thus it is sufficient to check for pending invalidations
while the rmap chain bit spinlock is held.  However, now we require
barriers in mmu_notifier_retry() and in the places where
mmu_notifier_count and mmu_notifier_seq are updated, since we can now
call mmu_notifier_retry() concurrently with updates to those fields.

Signed-off-by: Paul Mackerras 
---
Cc'd to kvm@vger.kernel.org for review of the generic kvm changes.

 arch/powerpc/include/asm/kvm_book3s_64.h |   13 +
 arch/powerpc/kvm/book3s_64_mmu_hv.c  |   19 
 arch/powerpc/kvm/book3s_hv_rm_mmu.c  |   75 -
 include/linux/kvm_host.h |   13 +++--
 virt/kvm/kvm_main.c  |4 ++
 5 files changed, 66 insertions(+), 58 deletions(-)

diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h 
b/arch/powerpc/include/asm/kvm_book3s_64.h
index 3745337..db6cbd5 100644
--- a/arch/powerpc/include/asm/kvm_book3s_64.h
+++ b/arch/powerpc/include/asm/kvm_book3s_64.h
@@ -161,4 +161,17 @@ static inline unsigned long 
kvmppc_read_update_linux_pte(pte_t *p)
return pfn;
 }
 
+static inline void lock_rmap(unsigned long *rmap)
+{
+   do {
+   while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
+   cpu_relax();
+   } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
+}
+
+static inline void unlock_rmap(unsigned long *rmap)
+{
+   __clear_bit_unlock(KVMPPC_RMAP_LOCK_BIT, rmap);
+}
+
 #endif /* __ASM_KVM_BOOK3S_64_H__ */
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c 
b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 8c497b8..bb75bfb 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -611,12 +611,6 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
goto out_put;
pfn = page_to_pfn(page);
 
-   /* Check if we might have been invalidated; let the guest retry if so */
-   ret = RESUME_GUEST;
-   spin_lock(&kvm->mmu_lock);
-   if (mmu_notifier_retry(vcpu, mmu_seq))
-   goto out_unlock;
-
/* Set the HPTE to point to pfn */
ret = RESUME_GUEST;
hptep = (unsigned long *)(kvm->arch.hpt_virt + (index << 4));
@@ -627,19 +621,26 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, 
struct kvm_vcpu *vcpu,
rev->guest_rpte != hpte[2]) {
/* HPTE has been changed under us; let the guest retry */
hptep[0] &= ~HPTE_V_HVLOCK;
-   goto out_unlock;
+   goto out_put;
}
hpte[0] = (hpte[0] & ~HPTE_V_ABSENT) | HPTE_V_VALID;
hpte[1] = (rev->guest_rpte & ~(HPTE_R_PP0 - pte_size)) |
(pfn << PAGE_SHIFT);
rmap = &memslot->rmap[gfn - memslot->base_gfn];
+   lock_rmap(rmap);
+
+   /* Check if we might have been invalidated; let the guest retry if so */
+   ret = RESUME_GUEST;
+   if (mmu_notifier_retry(vcpu, mmu_seq)) {
+   unlock_rmap(rmap);
+   hptep[0] &= ~HPTE_V_HVLOCK;
+   goto out_put;
+   }
kvmppc_add_revmap_chain(kvm, rev, rmap, index, 0);
kvmppc_modify_hpte(kvm, hptep, hpte, index);
if (page)
SetPageDirty(page);
 
- out_unlock:
-   spin_unlock(&kvm->mmu_lock);
  out_put:
if (page)
put_page(page);
diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c 
b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
index 2cadd06..4070920 100644
--- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c
+++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c
@@ -57,22 +57,16 @@ static struct kvm_memory_slot 
*builtin_gfn_to_memslot(struct kvm *kvm,
return NULL;
 }
 
-static void lock_rmap(unsigned long *rmap)
-{
-   do {
-   while (test_bit(KVMPPC_RMAP_LOCK_BIT, rmap))
-   cpu_relax();
-   } while (test_and_set_bit_lock(KVMPPC_RMAP_LOCK_BIT, rmap));
-}
-
-/* Add this HPTE into the chain for the real page */
+/*
+ * Add this HPTE into the chain for the real page.
+ * Must be called with the chain locked; it unlocks the chain.
+ */
 void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev,
 unsigned long *rmap, long pte_index, int realmode)
 {
struct revmap_entry *head, *tail;
unsigned long i;
 
-   lock_rmap(rmap