Re: [PATCH 2/3] powerpc/mm/radix: Use ptep_get_and_clear_full when clearing pte for full mm

2017-02-13 Thread Michael Neuling
On Thu, 2017-02-09 at 08:28 +0530, Aneesh Kumar K.V wrote:
> This helps us to do some optimization for application exit case, where we can
> skip the DD1 style pte update sequence.
> 
> Signed-off-by: Aneesh Kumar K.V 

Tested-by: Michael Neuling 

> ---
>  arch/powerpc/include/asm/book3s/64/pgtable.h | 17 +
>  arch/powerpc/include/asm/book3s/64/radix.h   | 23 ++-
>  2 files changed, 39 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 6f15bde94da2..e91ada786d48 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -373,6 +373,23 @@ static inline pte_t ptep_get_and_clear(struct mm_struct
> *mm,
>   return __pte(old);
>  }
>  
> +#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
> +static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
> + unsigned long addr,
> + pte_t *ptep, int full)
> +{
> + if (full && radix_enabled()) {
> + /*
> +  * Let's skip the DD1 style pte update here. We know that
> +  * this is a full mm pte clear and hence can be sure there is
> +  * no parallel set_pte.
> +  */
> + return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
> + }
> + return ptep_get_and_clear(mm, addr, ptep);
> +}
> +
> +
>  static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
>    pte_t * ptep)
>  {
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h
> b/arch/powerpc/include/asm/book3s/64/radix.h
> index 70a3cdcdbe47..fcf822d6c204 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -139,7 +139,7 @@ static inline unsigned long radix__pte_update(struct
> mm_struct *mm,
>  
>   unsigned long new_pte;
>  
> - old_pte = __radix_pte_update(ptep, ~0, 0);
> + old_pte = __radix_pte_update(ptep, ~0ul, 0);
>   /*
>    * new value of pte
>    */
> @@ -157,6 +157,27 @@ static inline unsigned long radix__pte_update(struct
> mm_struct *mm,
>   return old_pte;
>  }
>  
> +static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
> +    unsigned long addr,
> +    pte_t *ptep, int full)
> +{
> + unsigned long old_pte;
> +
> + if (full) {
> + /*
> +  * If we are trying to clear the pte, we can skip
> +  * the DD1 pte update sequence and batch the tlb flush. The
> +  * tlb flush batching is done by mmu gather code. We
> +  * still keep the cmp_xchg update to make sure we get
> +  * correct R/C bit which might be updated via Nest MMU.
> +  */
> + old_pte = __radix_pte_update(ptep, ~0ul, 0);
> + } else
> + old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
> +
> + return __pte(old_pte);
> +}
> +
>  /*
>   * Set the dirty and/or accessed bits atomically in a linux PTE, this
>   * function doesn't need to invalidate tlb.


[PATCH 2/3] powerpc/mm/radix: Use ptep_get_and_clear_full when clearing pte for full mm

2017-02-08 Thread Aneesh Kumar K.V
This helps us to do some optimization for application exit case, where we can
skip the DD1 style pte update sequence.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 17 +
 arch/powerpc/include/asm/book3s/64/radix.h   | 23 ++-
 2 files changed, 39 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 6f15bde94da2..e91ada786d48 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -373,6 +373,23 @@ static inline pte_t ptep_get_and_clear(struct mm_struct 
*mm,
return __pte(old);
 }
 
+#define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
+static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm,
+   unsigned long addr,
+   pte_t *ptep, int full)
+{
+   if (full && radix_enabled()) {
+   /*
+* Let's skip the DD1 style pte update here. We know that
+* this is a full mm pte clear and hence can be sure there is
+* no parallel set_pte.
+*/
+   return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   }
+   return ptep_get_and_clear(mm, addr, ptep);
+}
+
+
 static inline void pte_clear(struct mm_struct *mm, unsigned long addr,
 pte_t * ptep)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
b/arch/powerpc/include/asm/book3s/64/radix.h
index 70a3cdcdbe47..fcf822d6c204 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -139,7 +139,7 @@ static inline unsigned long radix__pte_update(struct 
mm_struct *mm,
 
unsigned long new_pte;
 
-   old_pte = __radix_pte_update(ptep, ~0, 0);
+   old_pte = __radix_pte_update(ptep, ~0ul, 0);
/*
 * new value of pte
 */
@@ -157,6 +157,27 @@ static inline unsigned long radix__pte_update(struct 
mm_struct *mm,
return old_pte;
 }
 
+static inline pte_t radix__ptep_get_and_clear_full(struct mm_struct *mm,
+  unsigned long addr,
+  pte_t *ptep, int full)
+{
+   unsigned long old_pte;
+
+   if (full) {
+   /*
+* If we are trying to clear the pte, we can skip
+* the DD1 pte update sequence and batch the tlb flush. The
+* tlb flush batching is done by mmu gather code. We
+* still keep the cmp_xchg update to make sure we get
+* correct R/C bit which might be updated via Nest MMU.
+*/
+   old_pte = __radix_pte_update(ptep, ~0ul, 0);
+   } else
+   old_pte = radix__pte_update(mm, addr, ptep, ~0ul, 0, 0);
+
+   return __pte(old_pte);
+}
+
 /*
  * Set the dirty and/or accessed bits atomically in a linux PTE, this
  * function doesn't need to invalidate tlb.
-- 
2.7.4