Re: [PATCH 3/4] powerpc/mm/radix: Use different pte update sequence for different POWER9 revs

2016-09-05 Thread Michael Neuling
On Wed, 2016-08-24 at 15:03 +0530, Aneesh Kumar K.V wrote:
> POWER9 DD1 requires pte to be marked invalid (V=0) before updating
> it with the new value. This makes this distinction for the different
> revisions.
> 
> Signed-off-by: Aneesh Kumar K.V 

Acked-by: Michael Neuling 

> ---
>  arch/powerpc/include/asm/book3s/32/pgtable.h |  3 +-
>  arch/powerpc/include/asm/book3s/64/pgtable.h |  5 +-
>  arch/powerpc/include/asm/book3s/64/radix.h   | 75 
> ++--
>  arch/powerpc/include/asm/nohash/32/pgtable.h |  3 +-
>  arch/powerpc/include/asm/nohash/64/pgtable.h |  3 +-
>  arch/powerpc/mm/pgtable-book3s64.c   |  2 +-
>  arch/powerpc/mm/pgtable.c|  2 +-
>  7 files changed, 71 insertions(+), 22 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
> b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 38b33dcfcc9d..6b8b2d57fdc8 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -223,7 +223,8 @@ static inline void huge_ptep_set_wrprotect(struct 
> mm_struct *mm,
>  }
>  
>  
> -static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
> +static inline void __ptep_set_access_flags(struct mm_struct *mm,
> +    pte_t *ptep, pte_t entry)
>  {
>   unsigned long set = pte_val(entry) &
>   (_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 263bf39ced40..8ec8be9495ba 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -565,10 +565,11 @@ static inline bool check_pte_access(unsigned long 
> access, unsigned long ptev)
>   * Generic functions with hash/radix callbacks
>   */
>  
> -static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
> +static inline void __ptep_set_access_flags(struct mm_struct *mm,
> +    pte_t *ptep, pte_t entry)
>  {
>   if (radix_enabled())
> - return radix__ptep_set_access_flags(ptep, entry);
> + return radix__ptep_set_access_flags(mm, ptep, entry);
>   return hash__ptep_set_access_flags(ptep, entry);
>  }
>  
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> b/arch/powerpc/include/asm/book3s/64/radix.h
> index a2fe8fbfbd3d..2a46dea8e1b1 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -11,6 +11,11 @@
>  #include 
>  #endif
>  
> +#ifndef __ASSEMBLY__
> +#include 
> +#include 
> +#endif
> +
>  /* An empty PTE can still have a R or C writeback */
>  #define RADIX_PTE_NONE_MASK  (_PAGE_DIRTY | _PAGE_ACCESSED)
>  
> @@ -105,11 +110,8 @@
>  #define RADIX_PUD_TABLE_SIZE (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
>  #define RADIX_PGD_TABLE_SIZE (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
>  
> -static inline unsigned long radix__pte_update(struct mm_struct *mm,
> - unsigned long addr,
> - pte_t *ptep, unsigned long clr,
> - unsigned long set,
> - int huge)
> +static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long 
> clr,
> +    unsigned long set)
>  {
>   pte_t pte;
>   unsigned long old_pte, new_pte;
> @@ -121,9 +123,39 @@ static inline unsigned long radix__pte_update(struct 
> mm_struct *mm,
>  
>   } while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
>  
> - /* We already do a sync in cmpxchg, is ptesync needed ?*/
> + return old_pte;
> +}
> +
> +
> +static inline unsigned long radix__pte_update(struct mm_struct *mm,
> + unsigned long addr,
> + pte_t *ptep, unsigned long clr,
> + unsigned long set,
> + int huge)
> +{
> + unsigned long old_pte;
> +
> + if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
> +
> + unsigned long new_pte;
> +
> + old_pte = __radix_pte_update(ptep, ~0, 0);
> + asm volatile("ptesync" : : : "memory");
> + /*
> +  * new value of pte
> +  */
> + new_pte = (old_pte | set) & ~clr;
> +
> + /*
> +  * For now let's do heavy pid flush
> +  * radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
> +  */
> + radix__flush_tlb_mm(mm);
> +
> + __radix_pte_update(ptep, 0, new_pte);
> + } else
> + old_pte = __radix_pte_update(ptep, clr, set);
>   asm volatile("ptesync" : : : "memory");
> - /* huge pages use the old page table lock */
>   if (!huge)
>   assert_pte_locked(mm, addr);
>  
> @@ 

[PATCH 3/4] powerpc/mm/radix: Use different pte update sequence for different POWER9 revs

2016-08-24 Thread Aneesh Kumar K.V
POWER9 DD1 requires pte to be marked invalid (V=0) before updating
it with the new value. This makes this distinction for the different
revisions.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  3 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h |  5 +-
 arch/powerpc/include/asm/book3s/64/radix.h   | 75 ++--
 arch/powerpc/include/asm/nohash/32/pgtable.h |  3 +-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  3 +-
 arch/powerpc/mm/pgtable-book3s64.c   |  2 +-
 arch/powerpc/mm/pgtable.c|  2 +-
 7 files changed, 71 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 38b33dcfcc9d..6b8b2d57fdc8 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -223,7 +223,8 @@ static inline void huge_ptep_set_wrprotect(struct mm_struct 
*mm,
 }
 
 
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void __ptep_set_access_flags(struct mm_struct *mm,
+  pte_t *ptep, pte_t entry)
 {
unsigned long set = pte_val(entry) &
(_PAGE_DIRTY | _PAGE_ACCESSED | _PAGE_RW | _PAGE_EXEC);
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 263bf39ced40..8ec8be9495ba 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -565,10 +565,11 @@ static inline bool check_pte_access(unsigned long access, 
unsigned long ptev)
  * Generic functions with hash/radix callbacks
  */
 
-static inline void __ptep_set_access_flags(pte_t *ptep, pte_t entry)
+static inline void __ptep_set_access_flags(struct mm_struct *mm,
+  pte_t *ptep, pte_t entry)
 {
if (radix_enabled())
-   return radix__ptep_set_access_flags(ptep, entry);
+   return radix__ptep_set_access_flags(mm, ptep, entry);
return hash__ptep_set_access_flags(ptep, entry);
 }
 
diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
b/arch/powerpc/include/asm/book3s/64/radix.h
index a2fe8fbfbd3d..2a46dea8e1b1 100644
--- a/arch/powerpc/include/asm/book3s/64/radix.h
+++ b/arch/powerpc/include/asm/book3s/64/radix.h
@@ -11,6 +11,11 @@
 #include 
 #endif
 
+#ifndef __ASSEMBLY__
+#include 
+#include 
+#endif
+
 /* An empty PTE can still have a R or C writeback */
 #define RADIX_PTE_NONE_MASK(_PAGE_DIRTY | _PAGE_ACCESSED)
 
@@ -105,11 +110,8 @@
 #define RADIX_PUD_TABLE_SIZE   (sizeof(pud_t) << RADIX_PUD_INDEX_SIZE)
 #define RADIX_PGD_TABLE_SIZE   (sizeof(pgd_t) << RADIX_PGD_INDEX_SIZE)
 
-static inline unsigned long radix__pte_update(struct mm_struct *mm,
-   unsigned long addr,
-   pte_t *ptep, unsigned long clr,
-   unsigned long set,
-   int huge)
+static inline unsigned long __radix_pte_update(pte_t *ptep, unsigned long clr,
+  unsigned long set)
 {
pte_t pte;
unsigned long old_pte, new_pte;
@@ -121,9 +123,39 @@ static inline unsigned long radix__pte_update(struct 
mm_struct *mm,
 
} while (!pte_xchg(ptep, __pte(old_pte), __pte(new_pte)));
 
-   /* We already do a sync in cmpxchg, is ptesync needed ?*/
+   return old_pte;
+}
+
+
+static inline unsigned long radix__pte_update(struct mm_struct *mm,
+   unsigned long addr,
+   pte_t *ptep, unsigned long clr,
+   unsigned long set,
+   int huge)
+{
+   unsigned long old_pte;
+
+   if (cpu_has_feature(CPU_FTR_POWER9_DD1)) {
+
+   unsigned long new_pte;
+
+   old_pte = __radix_pte_update(ptep, ~0, 0);
+   asm volatile("ptesync" : : : "memory");
+   /*
+* new value of pte
+*/
+   new_pte = (old_pte | set) & ~clr;
+
+   /*
+* For now let's do heavy pid flush
+* radix__flush_tlb_page_psize(mm, addr, mmu_virtual_psize);
+*/
+   radix__flush_tlb_mm(mm);
+
+   __radix_pte_update(ptep, 0, new_pte);
+   } else
+   old_pte = __radix_pte_update(ptep, clr, set);
asm volatile("ptesync" : : : "memory");
-   /* huge pages use the old page table lock */
if (!huge)
assert_pte_locked(mm, addr);
 
@@ -134,20 +166,33 @@ static inline unsigned long radix__pte_update(struct 
mm_struct *mm,
  * Set the dirty and/or accessed bits atomically in a linux PTE, this
  * function doesn't need to invalidate tlb.
  */
-static inline void radix__ptep_set_access_fl