> -----Original Message-----
> From: kvm-ow...@vger.kernel.org [mailto:kvm-ow...@vger.kernel.org] On Behalf 
> Of
> Paul Mackerras
> Sent: Tuesday, August 06, 2013 9:58 AM
> To: Alexander Graf; Benjamin Herrenschmidt
> Cc: kvm-ppc@vger.kernel.org; k...@vger.kernel.org
> Subject: [PATCH 21/23] KVM: PPC: Book3S PR: Use mmu_notifier_retry() in
> kvmppc_mmu_map_page()
> 
> When the MM code is invalidating a range of pages, it calls the KVM
> kvm_mmu_notifier_invalidate_range_start() notifier function, which calls
> kvm_unmap_hva_range(), which arranges to flush all the existing host
> HPTEs for guest pages.  However, the Linux PTEs for the range being
> flushed are still valid at that point.  We are not supposed to establish
> any new references to pages in the range until the ...range_end()
> notifier gets called.  The PPC-specific KVM code doesn't get any
> explicit notification of that; instead, we are supposed to use
> mmu_notifier_retry() to test whether we are or have been inside a
> range flush notifier pair while we have been getting a page and
> instantiating a host HPTE for the page.
> 
> This therefore adds a call to mmu_notifier_retry inside
> kvmppc_mmu_map_page().  This call is inside a region locked with
> kvm->mmu_lock, which is the same lock that is called by the KVM
> MMU notifier functions, thus ensuring that no new notification can
> proceed while we are in the locked region.  Inside this region we
> also create the host HPTE and link the corresponding hpte_cache
> structure into the lists used to find it later.  We cannot allocate
> the hpte_cache structure inside this locked region because that can
> lead to deadlock, so we allocate it outside the region and free it
> if we end up not using it.
> 
> This also moves the updates of vcpu3s->hpte_cache_count inside the
> regions locked with vcpu3s->mmu_lock, and does the increment in
> kvmppc_mmu_hpte_cache_map() when the pte is added to the cache
> rather than when it is allocated, in order that the hpte_cache_count
> is accurate.
> 
> Signed-off-by: Paul Mackerras <pau...@samba.org>
> ---
>  arch/powerpc/include/asm/kvm_book3s.h |  1 +
>  arch/powerpc/kvm/book3s_64_mmu_host.c | 37 
> ++++++++++++++++++++++++++---------
>  arch/powerpc/kvm/book3s_mmu_hpte.c    | 14 +++++++++----
>  3 files changed, 39 insertions(+), 13 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/kvm_book3s.h
> b/arch/powerpc/include/asm/kvm_book3s.h
> index 4fe6864..e711e77 100644
> --- a/arch/powerpc/include/asm/kvm_book3s.h
> +++ b/arch/powerpc/include/asm/kvm_book3s.h
> @@ -143,6 +143,7 @@ extern long kvmppc_hv_find_lock_hpte(struct kvm *kvm, 
> gva_t
> eaddr,
> 
>  extern void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct 
> hpte_cache
> *pte);
>  extern struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct kvm_vcpu *vcpu);
> +extern void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte);
>  extern void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu);
>  extern int kvmppc_mmu_hpte_init(struct kvm_vcpu *vcpu);
>  extern void kvmppc_mmu_invalidate_pte(struct kvm_vcpu *vcpu, struct 
> hpte_cache
> *pte);
> diff --git a/arch/powerpc/kvm/book3s_64_mmu_host.c
> b/arch/powerpc/kvm/book3s_64_mmu_host.c
> index 7fcf38f..b7e9504 100644
> --- a/arch/powerpc/kvm/book3s_64_mmu_host.c
> +++ b/arch/powerpc/kvm/book3s_64_mmu_host.c
> @@ -93,6 +93,13 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct
> kvmppc_pte *orig_pte,
>       int r = 0;
>       int hpsize = MMU_PAGE_4K;
>       bool writable;
> +     unsigned long mmu_seq;
> +     struct kvm *kvm = vcpu->kvm;
> +     struct hpte_cache *cpte;
> +
> +     /* used to check for invalidations in progress */
> +     mmu_seq = kvm->mmu_notifier_seq;
> +     smp_rmb();
> 
>       /* Get host physical address for gpa */
>       hpaddr = kvmppc_gfn_to_pfn(vcpu, orig_pte->raddr >> PAGE_SHIFT,
> @@ -143,6 +150,14 @@ int kvmppc_mmu_map_page(struct kvm_vcpu *vcpu, struct
> kvmppc_pte *orig_pte,
> 
>       hash = hpt_hash(vpn, mmu_psize_defs[hpsize].shift, MMU_SEGSIZE_256M);
> 
> +     cpte = kvmppc_mmu_hpte_cache_next(vcpu);
> +
> +     spin_lock(&kvm->mmu_lock);
> +     if (!cpte || mmu_notifier_retry(kvm, mmu_seq)) {
> +             r = -EAGAIN;

Pauls, I am trying to understand the flow; does retry mean that we do not 
create the mapping and return to guest, which will fault again and then we will 
retry? 

Thanks
-Bharat

> +             goto out_unlock;
> +     }
> +
>  map_again:
>       hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
> 
> @@ -150,7 +165,7 @@ map_again:
>       if (attempt > 1)
>               if (ppc_md.hpte_remove(hpteg) < 0) {
>                       r = -1;
> -                     goto out;
> +                     goto out_unlock;
>               }
> 
>       ret = ppc_md.hpte_insert(hpteg, vpn, hpaddr, rflags, vflags,
> @@ -163,8 +178,6 @@ map_again:
>               attempt++;
>               goto map_again;
>       } else {
> -             struct hpte_cache *pte = kvmppc_mmu_hpte_cache_next(vcpu);
> -
>               trace_kvm_book3s_64_mmu_map(rflags, hpteg,
>                                           vpn, hpaddr, orig_pte);
> 
> @@ -175,15 +188,21 @@ map_again:
>                       hpteg = ((hash & htab_hash_mask) * HPTES_PER_GROUP);
>               }
> 
> -             pte->slot = hpteg + (ret & 7);
> -             pte->host_vpn = vpn;
> -             pte->pte = *orig_pte;
> -             pte->pfn = hpaddr >> PAGE_SHIFT;
> -             pte->pagesize = hpsize;
> +             cpte->slot = hpteg + (ret & 7);
> +             cpte->host_vpn = vpn;
> +             cpte->pte = *orig_pte;
> +             cpte->pfn = hpaddr >> PAGE_SHIFT;
> +             cpte->pagesize = hpsize;
> 
> -             kvmppc_mmu_hpte_cache_map(vcpu, pte);
> +             kvmppc_mmu_hpte_cache_map(vcpu, cpte);
> +             cpte = NULL;
>       }
> +
> +out_unlock:
> +     spin_unlock(&kvm->mmu_lock);
>       kvm_release_pfn_clean(hpaddr >> PAGE_SHIFT);
> +     if (cpte)
> +             kvmppc_mmu_hpte_cache_free(cpte);
> 
>  out:
>       return r;
> diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c
> b/arch/powerpc/kvm/book3s_mmu_hpte.c
> index d2d280b..6b79bfc 100644
> --- a/arch/powerpc/kvm/book3s_mmu_hpte.c
> +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c
> @@ -98,6 +98,8 @@ void kvmppc_mmu_hpte_cache_map(struct kvm_vcpu *vcpu, struct
> hpte_cache *pte)
>                          &vcpu3s->hpte_hash_vpte_64k[index]);
>  #endif
> 
> +     vcpu3s->hpte_cache_count++;
> +
>       spin_unlock(&vcpu3s->mmu_lock);
>  }
> 
> @@ -131,10 +133,10 @@ static void invalidate_pte(struct kvm_vcpu *vcpu, struct
> hpte_cache *pte)
>  #ifdef CONFIG_PPC_BOOK3S_64
>       hlist_del_init_rcu(&pte->list_vpte_64k);
>  #endif
> +     vcpu3s->hpte_cache_count--;
> 
>       spin_unlock(&vcpu3s->mmu_lock);
> 
> -     vcpu3s->hpte_cache_count--;
>       call_rcu(&pte->rcu_head, free_pte_rcu);
>  }
> 
> @@ -331,15 +333,19 @@ struct hpte_cache *kvmppc_mmu_hpte_cache_next(struct
> kvm_vcpu *vcpu)
>       struct kvmppc_vcpu_book3s *vcpu3s = to_book3s(vcpu);
>       struct hpte_cache *pte;
> 
> -     pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
> -     vcpu3s->hpte_cache_count++;
> -
>       if (vcpu3s->hpte_cache_count == HPTEG_CACHE_NUM)
>               kvmppc_mmu_pte_flush_all(vcpu);
> 
> +     pte = kmem_cache_zalloc(hpte_cache, GFP_KERNEL);
> +
>       return pte;
>  }
> 
> +void kvmppc_mmu_hpte_cache_free(struct hpte_cache *pte)
> +{
> +     kmem_cache_free(hpte_cache, pte);
> +}
> +
>  void kvmppc_mmu_hpte_destroy(struct kvm_vcpu *vcpu)
>  {
>       kvmppc_mmu_pte_flush(vcpu, 0, 0);
> --
> 1.8.3.1
> 
> --
> To unsubscribe from this list: send the line "unsubscribe kvm" in
> the body of a message to majord...@vger.kernel.org
> More majordomo info at  http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe kvm-ppc" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to