On Thu, 2017-02-09 at 08:28 +0530, Aneesh Kumar K.V wrote: > This helps us to do some optimization for application exit case, where we can > skip the DD1 style pte update sequence. > > Signed-off-by: Aneesh Kumar K.V <aneesh.ku...@linux.vnet.ibm.com>
Tested-by: Michael Neuling <mi...@neuling.org> > --- > arch/powerpc/include/asm/book3s/64/pgtable.h | 17 +++++++++++++++++ > arch/powerpc/include/asm/book3s/64/radix.h | 23 ++++++++++++++++++++++- > 2 files changed, 39 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h > b/arch/powerpc/include/asm/book3s/64/pgtable.h > index 6f15bde94da2..e91ada786d48 100644 > --- a/arch/powerpc/include/asm/book3s/64/pgtable.h > +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h > @@ -373,6 +373,23 @@ static inline pte_t ptep_get_and_clear(struct mm_struct > *mm, > return __pte(old); > } > > +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL > +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, int full) > +{ > + if (full && radix_enabled()) { > + /* > + * Let's skip the DD1 style pte update here. We know that > + * this is a full mm pte clear and hence can be sure there is > + * no parallel set_pte. > + */ > + return radix__ptep_get_and_clear_full(mm, addr, ptep, full); > + } > + return ptep_get_and_clear(mm, addr, ptep); > +} > + > + > static inline void pte_clear(struct mm_struct *mm, unsigned long addr, > pte_t * ptep) > { > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h > b/arch/powerpc/include/asm/book3s/64/radix.h > index 70a3cdcdbe47..fcf822d6c204 100644 > --- a/arch/powerpc/include/asm/book3s/64/radix.h > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > @@ -139,7 +139,7 @@ static inline unsigned long radix__pte_update(struct > mm_struct *mm, > > unsigned long new_pte; > > - old_pte = __radix_pte_update(ptep, ~0, 0); > + old_pte = __radix_pte_update(ptep, ~0ul, 0); > /* > * new value of pte > */ > @@ -157,6 +157,27 @@ static inline unsigned long radix__pte_update(struct > mm_struct *mm, > return old_pte; > } > > +static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm, > + unsigned long addr, > + pte_t *ptep, int full) > +{ > + unsigned long old_pte; > + > + if (full) { > + /* > + * If we are trying to clear the pte, we can skip > + * the DD1 pte update sequence and batch the tlb flush. The > + * tlb flush batching is done by mmu gather code. We > + * still keep the cmp_xchg update to make sure we get > + * correct R/C bit which might be updated via Nest MMU. > + */ > + old_pte = __radix_pte_update(ptep, ~0ul, 0); > + } else > + old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0); > + > + return __pte(old_pte); > +} > + > /* > * Set the dirty and/or accessed bits atomically in a linux PTE, this > * function doesn't need to invalidate tlb.