[PATCH v7 6/7] powerpc: mm: Add p{te,md,ud}_user_accessible_page helpers

2023-02-14 Thread Rohan McLure
Add the following helpers for detecting whether a page table entry
is a leaf and is accessible to user space.

 * pte_user_accessible_page
 * pmd_user_accessible_page
 * pud_user_accessible_page

Also implement missing pud_user definitions for both Book3S/nohash 64-bit
systems, and pmd_user for Book3S/nohash 32-bit systems.

Signed-off-by: Rohan McLure 
---
V2: Provide missing pud_user implementations, use p{u,m}d_is_leaf.
V3: Provide missing pmd_user implementations as stubs in 32-bit.
V4: Use pmd_leaf, pud_leaf, and define pmd_user for 32 Book3E with
static inline method rather than macro.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  5 +
 arch/powerpc/include/asm/nohash/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/pgtable.h   | 15 +++
 5 files changed, 44 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index a090cb13a4a0..afd672e84791 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -516,6 +516,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return 0;
+}
 
 
 /* This low level function performs the actual PTE insertion
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index df5ee856444d..a6ed93d01da1 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -538,6 +538,16 @@ static inline bool pte_user(pte_t pte)
return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return !(pmd_raw(pmd) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline bool pud_user(pud_t pud)
+{
+   return !(pud_raw(pud) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
 #define pte_access_permitted pte_access_permitted
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 70edad44dff6..d953533c56ff 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -209,6 +209,11 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return false;
+}
+
 /*
  * PTE updates. This function is called whenever an existing
  * valid PTE is updated. This does -not- include set_pte_at()
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index d391a45e0f11..14e69ebad31f 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -123,6 +123,11 @@ static inline pte_t pmd_pte(pmd_t pmd)
return __pte(pmd_val(pmd));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return (pmd_val(pmd) & _PAGE_USER) == _PAGE_USER;
+}
+
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(!is_kernel_addr(pmd_val(pmd)) \
 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -164,6 +169,11 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
 }
 
+static inline bool pud_user(pud_t pud)
+{
+   return (pud_val(pud) & _PAGE_USER) == _PAGE_USER;
+}
+
 static inline pud_t pte_pud(pte_t pte)
 {
return __pud(pte_val(pte));
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index ad0829f816e9..b76fdb80b6c9 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -167,6 +167,21 @@ static inline int pud_pfn(pud_t pud)
 }
 #endif
 
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+   return pte_present(pte) && pte_user(pte);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+   return pmd_leaf(pmd) && pmd_present(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+   return pud_leaf(pud) && pud_present(pud) && pud_user(pud);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.37.2



[PATCH v7 7/7] powerpc: mm: Support page table check

2023-02-14 Thread Rohan McLure
On creation and clearing of a page table mapping, instrument such calls
by invoking page_table_check_pte_set and page_table_check_pte_clear
respectively. These calls serve as a sanity check against illegal
mappings.

Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK for all platforms.

Use set_pte internally, and cause this function to reassign a page table
entry without instrumentation. Generic code will be instrumented, as it
references set_pte_at.

See also:

riscv support in commit 3fee229a8eb9 ("riscv/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
arm64 in commit 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check")

Signed-off-by: Rohan McLure 
---
V2: Update spacing and types assigned to pte_update calls.
V3: Update one last pte_update call to remove __pte invocation.
V5: Fix 32-bit nohash double set
V6: Omit __set_pte_at instrumentation - should be instrumented by
set_pte_at, with set_pte in between, performing all prior checks.
Instrument pmds. Use set_pte where needed.
V7: Make set_pte_at an inline function. Fix commit message.
Detail changes of internal references to set_pte_at, and its semantics.
---
 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h |  8 +++-
 arch/powerpc/include/asm/book3s/64/pgtable.h | 44 
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 +++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  8 +++-
 arch/powerpc/include/asm/pgtable.h   | 10 -
 arch/powerpc/mm/book3s64/hash_pgtable.c  |  2 +-
 arch/powerpc/mm/book3s64/pgtable.c   | 16 ---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 10 ++---
 arch/powerpc/mm/nohash/book3e_pgtable.c  |  2 +-
 arch/powerpc/mm/pgtable_32.c |  2 +-
 11 files changed, 83 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2c9cdf1d8761..2474e2699037 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -154,6 +154,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx || 40x
+   select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index afd672e84791..8850b4fb22a4 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -53,6 +53,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 static inline bool pte_user(pte_t pte)
 {
return pte_val(pte) & _PAGE_USER;
@@ -338,7 +340,11 @@ static inline int __ptep_test_and_clear_young(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr,
   pte_t *ptep)
 {
-   return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index a6ed93d01da1..0c6838875720 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -162,6 +162,8 @@
 #define PAGE_KERNEL_ROX__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 
 #ifndef __ASSEMBLY__
+#include 
+
 /*
  * page table defines
  */
@@ -431,8 +433,11 @@ static inline void huge_ptep_set_wrprotect(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
-   unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-   return __pte(old);
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
@@ -441,11 +446,16 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
pte_t *ptep, int full)
 {
if (full && radix_enabled()) {
+   pte_t old_pte;
+
/*
 * We know that this is a full mm pte clear and
 * hence can be sure there is no parallel set_pte.
 */
-   return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
}
return ptep_get_and_clear(mm, addr, pt

[PATCH v7 5/7] powerpc: mm: Add common pud_pfn stub for all platforms

2023-02-14 Thread Rohan McLure
Prior to this commit, pud_pfn was implemented with BUILD_BUG as the inline
function for 64-bit Book3S systems but is never included, as its
invocations in generic code are guarded by calls to pud_devmap which return
zero on such systems. A future patch will provide support for page table
checks, the generic code for which depends on a pud_pfn stub being
implemented, even while the patch will not interact with puds directly.

Remove the 64-bit Book3S stub and define pud_pfn to warn on all
platforms. pud_pfn may be defined properly on a per-platform basis
should it grow real usages in future.

Signed-off-by: Rohan McLure 
---
V2: Remove conditional BUILD_BUG and BUG. Instead warn on usage.
V3: Replace WARN with WARN_ONCE, which should suffice to demonstrate
misuse of puds.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
 arch/powerpc/include/asm/pgtable.h   | 14 ++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 589d2dbe3873..df5ee856444d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1327,16 +1327,6 @@ static inline int pgd_devmap(pgd_t pgd)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int pud_pfn(pud_t pud)
-{
-   /*
-* Currently all calls to pud_pfn() are gated around a pud_devmap()
-* check so this should never be used. If it grows another user we
-* want to know about it.
-*/
-   BUILD_BUG();
-   return 0;
-}
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
 void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 284408829fa3..ad0829f816e9 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -153,6 +153,20 @@ struct seq_file;
 void arch_report_meminfo(struct seq_file *m);
 #endif /* CONFIG_PPC64 */
 
+/*
+ * Currently only consumed by page_table_check_pud_{set,clear}. Since clears
+ * and sets to page table entries at any level are done through
+ * page_table_check_pte_{set,clear}, provide stub implementation.
+ */
+#ifndef pud_pfn
+#define pud_pfn pud_pfn
+static inline int pud_pfn(pud_t pud)
+{
+   WARN_ONCE(1, "pud: platform does not use pud entries directly");
+   return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.37.2



[PATCH v7 0/7] Support page table check

2023-02-14 Thread Rohan McLure
Support the page table check sanitiser on all PowerPC platforms. This
sanitiser works by serialising assignments, reassignments and clears of
page table entries at each level in order to ensure that anonymous
mappings have at most one writable consumer, and likewise that
file-backed mappings are not simultaneously also anonymous mappings.

In order to support this infrastructure, a number of stubs must be
defined for all powerpc platforms. Additionally, seperate set_pte_at
and set_pte, to allow for internal, uninstrumented mappings.

v7:
 * Remove use of extern in set_pte prototypes
 * Clean up pmdp_collapse_flush macro
 * Replace set_pte_at with static inline function
 * Fix commit message for patch 7

v6:
 * Support huge pages and p{m,u}d accounting.
 * Remove instrumentation from set_pte from kernel internal pages.
 * 64s: Implement pmdp_collapse_flush in terms of __pmdp_collapse_flush
   as access to the mm_struct * is required.
Link: 
https://lore.kernel.org/linuxppc-dev/20230214015939.1853438-1-rmcl...@linux.ibm.com/

v5:
Link: 
https://lore.kernel.org/linuxppc-dev/20221118002146.25979-1-rmcl...@linux.ibm.com/

Rohan McLure (7):
  powerpc: mm: Separate set_pte, set_pte_at for internal, external use
  powerpc/64s: mm: Introduce __pmdp_collapse_flush with mm_struct
argument
  powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf
  powerpc: mm: Implement p{m,u,4}d_leaf on all platforms
  powerpc: mm: Add common pud_pfn stub for all platforms
  powerpc: mm: Add p{te,md,ud}_user_accessible_page helpers
  powerpc: mm: Support page table check

 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h | 17 +++-
 arch/powerpc/include/asm/book3s/64/pgtable.h | 85 +---
 arch/powerpc/include/asm/book3s/pgtable.h|  3 +-
 arch/powerpc/include/asm/nohash/32/pgtable.h | 12 ++-
 arch/powerpc/include/asm/nohash/64/pgtable.h | 24 +-
 arch/powerpc/include/asm/nohash/pgtable.h|  9 ++-
 arch/powerpc/include/asm/pgtable.h   | 60 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 +--
 arch/powerpc/mm/book3s64/hash_pgtable.c  |  2 +-
 arch/powerpc/mm/book3s64/pgtable.c   | 16 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 24 +++---
 arch/powerpc/mm/nohash/book3e_pgtable.c  |  2 +-
 arch/powerpc/mm/pgtable.c|  9 +--
 arch/powerpc/mm/pgtable_32.c |  2 +-
 arch/powerpc/mm/pgtable_64.c |  6 +-
 arch/powerpc/xmon/xmon.c |  6 +-
 17 files changed, 197 insertions(+), 93 deletions(-)

-- 
2.37.2



[PATCH v7 4/7] powerpc: mm: Implement p{m,u,4}d_leaf on all platforms

2023-02-14 Thread Rohan McLure
The check that a higher-level entry in multi-level pages contains a page
translation entry (pte) is performed by p{m,u,4}d_leaf stubs, which may
be specialised for each choice of mmu. In a prior commit, we replace
uses to the catch-all stubs, p{m,u,4}d_is_leaf with p{m,u,4}d_leaf.

Replace the catch-all stub definitions for p{m,u,4}d_is_leaf with
definitions for p{m,u,4}d_leaf. A future patch will assume that
p{m,u,4}d_leaf is defined on all platforms.

In particular, implement pud_leaf for Book3E-64, pmd_leaf for all Book3E
and Book3S-64 platforms, with a catch-all definition for p4d_leaf.

Signed-off-by: Rohan McLure 
---
v5: Split patch that replaces p{m,u,4}d_is_leaf into two patches, first
replacing callsites and afterward providing generic definition.
Remove ifndef-defines implementing p{m,u}d_leaf in favour of
implementing stubs in headers belonging to the particular platforms
needing them.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  5 +
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 -
 arch/powerpc/include/asm/nohash/64/pgtable.h |  6 ++
 arch/powerpc/include/asm/nohash/pgtable.h|  6 ++
 arch/powerpc/include/asm/pgtable.h   | 22 ++--
 5 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 75823f39e042..a090cb13a4a0 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -242,6 +242,11 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
+{
+   return false;
+}
 
 /*
  * When flushing the tlb entry for a page, we also need to flush the hash
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 7e0d546f4b3c..589d2dbe3873 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1359,16 +1359,14 @@ static inline bool is_pte_rw_upgrade(unsigned long 
old_val, unsigned long new_va
 /*
  * Like pmd_huge() and pmd_large(), but works regardless of config options
  */
-#define pmd_is_leaf pmd_is_leaf
-#define pmd_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
 }
 
-#define pud_is_leaf pud_is_leaf
-#define pud_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
 }
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 879e9a6e5a87..d391a45e0f11 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -141,6 +141,12 @@ static inline void pud_clear(pud_t *pudp)
*pudp = __pud(0);
 }
 
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
+{
+   return false;
+}
+
 #define pud_none(pud)  (!pud_val(pud))
 #definepud_bad(pud)(!is_kernel_addr(pud_val(pud)) \
 || (pud_val(pud) & PUD_BAD_BITS))
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index f36dd2e2d591..43b50fd8d236 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -60,6 +60,12 @@ static inline bool pte_hw_valid(pte_t pte)
return pte_val(pte) & _PAGE_PRESENT;
 }
 
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
+{
+   return false;
+}
+
 /*
  * Don't just check for any non zero bits in __PAGE_USER, since for book3e
  * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 17d30359d1f4..284408829fa3 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -128,29 +128,11 @@ static inline void pte_frag_set(mm_context_t *ctx, void 
*p)
 }
 #endif
 
-#ifndef pmd_is_leaf
-#define pmd_is_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#define p4d_leaf p4d_leaf
+static inline bool p4d_leaf(p4d_t p4d)
 {
return false;
 }
-#endif
-
-#ifndef pud_is_leaf
-#define pud_is_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
-{
-   return false;
-}
-#endif
-
-#ifndef p4d_is_leaf
-#define p4d_is_leaf p4d_is_leaf
-static inline bool p4d_is_leaf(p4d_t p4d)
-{
-   return false;
-}
-#endif
 
 #define pmd_pgtable pmd_pgtable
 static inline pgtable_t pmd_pgtable(pmd_t pmd)
-- 
2.37.2



[PATCH v7 2/7] powerpc/64s: mm: Introduce __pmdp_collapse_flush with mm_struct argument

2023-02-14 Thread Rohan McLure
pmdp_collapse_flush has references in generic code with just three
parameters, due to the choice of mm context being implied by the vm_area
context parameter.

Define __pmdp_collapse_flush to accept an additional mm_struct *
parameter, with pmdp_collapse_flush an inline function that unpacks
the vma and calls __pmdp_collapse_flush. The mm_struct * parameter
is needed in a future patch providing Page Table Check support,
which is defined in terms of mm context objects.

Signed-off-by: Rohan McLure 
---
v6: New patch
v7: Remove explicit `return' in macro. Prefix macro args with __,
as done with ptep_test_and_clear_young.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 11 ---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index cb4c67bf45d7..7e0d546f4b3c 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1244,14 +1244,19 @@ static inline pmd_t pmdp_huge_get_and_clear(struct 
mm_struct *mm,
return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
 }
 
-static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
-   unsigned long address, pmd_t *pmdp)
+static inline pmd_t __pmdp_collapse_flush(struct vm_area_struct *vma, struct 
mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
 {
if (radix_enabled())
return radix__pmdp_collapse_flush(vma, address, pmdp);
return hash__pmdp_collapse_flush(vma, address, pmdp);
 }
-#define pmdp_collapse_flush pmdp_collapse_flush
+#define pmdp_collapse_flush(__vma, __addr, __pmdp) \
+({ \
+   struct vm_area_struct *_vma = (__vma);  \
+   \
+   __pmdp_collapse_flush(_vma, _vma->vm_mm, (__addr), (__pmdp));   \
+})
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
-- 
2.37.2



[PATCH v7 1/7] powerpc: mm: Separate set_pte, set_pte_at for internal, external use

2023-02-14 Thread Rohan McLure
Produce separate symbols for set_pte, which is to be used in
arch/powerpc for reassignment of pte's, and set_pte_at, used in generic
code.

The reason for this distinction is to support the Page Table Check
sanitiser. Having this distinction allows for set_pte_at to
instrumented, but set_pte not to be, permitting for uninstrumented
internal mappings. This distinction in names is also present in x86.

Signed-off-by: Rohan McLure 
---
v6: new patch
v7: Remove extern, move set_pte args to be in a single line.
---
 arch/powerpc/include/asm/book3s/pgtable.h | 3 +--
 arch/powerpc/include/asm/nohash/pgtable.h | 3 +--
 arch/powerpc/include/asm/pgtable.h| 1 +
 arch/powerpc/mm/pgtable.c | 3 +--
 4 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/pgtable.h 
b/arch/powerpc/include/asm/book3s/pgtable.h
index d18b748ea3ae..1386ed705e66 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -12,8 +12,7 @@
 /* Insert a PTE, top-level function is out of line. It uses an inline
  * low level function in the respective pgtable-* files
  */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-  pte_t pte);
+void set_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
 
 
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index 69c3a050a3d8..f36dd2e2d591 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -154,8 +154,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 /* Insert a PTE, top-level function is out of line. It uses an inline
  * low level function in the respective pgtable-* files
  */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-  pte_t pte);
+void set_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
 
 /* This low level function performs the actual PTE insertion
  * Setting the PTE depends on the MMU type and other factors. It's
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 9972626ddaf6..17d30359d1f4 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -48,6 +48,7 @@ struct mm_struct;
 /* Keep these as a macros to avoid include dependency mess */
 #define pte_page(x)pfn_to_page(pte_pfn(x))
 #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
+#define set_pte_at set_pte
 /*
  * Select all bits except the pfn
  */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cb2dcdb18f8e..d7cce317cef8 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -187,8 +187,7 @@ static pte_t set_access_flags_filter(pte_t pte, struct 
vm_area_struct *vma,
 /*
  * set_pte stores a linux PTE into the linux page table.
  */
-void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-   pte_t pte)
+void set_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
 {
/*
 * Make sure hardware valid bit is not set. We don't do
-- 
2.37.2



[PATCH v7 3/7] powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf

2023-02-14 Thread Rohan McLure
Replace occurrences of p{u,m,4}d_is_leaf with p{u,m,4}_leaf, as the
latter is the name given to checking that a higher-level entry in
multi-level paging contains a page translation entry (pte) throughout
all other archs.

A future patch will implement p{u,m,4}_leaf stubs on all platforms so
that they may be referenced in generic code.

Signed-off-by: Rohan McLure 
---
V4: New patch
V5: Previously replaced stub definition for *_is_leaf with *_leaf. Do
that in a later patch
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 14 +++---
 arch/powerpc/mm/pgtable.c|  6 +++---
 arch/powerpc/mm/pgtable_64.c |  6 +++---
 arch/powerpc/xmon/xmon.c |  6 +++---
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 9d3743ca16d5..0d24fd984d16 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -497,7 +497,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t 
*pmd, bool full,
for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
if (!pmd_present(*p))
continue;
-   if (pmd_is_leaf(*p)) {
+   if (pmd_leaf(*p)) {
if (full) {
pmd_clear(p);
} else {
@@ -526,7 +526,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t 
*pud,
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
if (!pud_present(*p))
continue;
-   if (pud_is_leaf(*p)) {
+   if (pud_leaf(*p)) {
pud_clear(p);
} else {
pmd_t *pmd;
@@ -629,12 +629,12 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pud = pud_alloc_one(kvm->mm, gpa);
 
pmd = NULL;
-   if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
+   if (pud && pud_present(*pud) && !pud_leaf(*pud))
pmd = pmd_offset(pud, gpa);
else if (level <= 1)
new_pmd = kvmppc_pmd_alloc();
 
-   if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+   if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_leaf(*pmd)))
new_ptep = kvmppc_pte_alloc();
 
/* Check if we might have been invalidated; let the guest retry if so */
@@ -652,7 +652,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pud = NULL;
}
pud = pud_offset(p4d, gpa);
-   if (pud_is_leaf(*pud)) {
+   if (pud_leaf(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
 
/* Check if we raced and someone else has set the same thing */
@@ -703,7 +703,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pmd = NULL;
}
pmd = pmd_offset(pud, gpa);
-   if (pmd_is_leaf(*pmd)) {
+   if (pmd_leaf(*pmd)) {
unsigned long lgpa = gpa & PMD_MASK;
 
/* Check if we raced and someone else has set the same thing */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 26245aaf12b8..4e46e001c3c3 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -205,14 +205,14 @@ static void radix__change_memory_range(unsigned long 
start, unsigned long end,
pudp = pud_alloc(_mm, p4dp, idx);
if (!pudp)
continue;
-   if (pud_is_leaf(*pudp)) {
+   if (pud_leaf(*pudp)) {
ptep = (pte_t *)pudp;
goto update_the_pte;
}
pmdp = pmd_alloc(_mm, pudp, idx);
if (!pmdp)
continue;
-   if (pmd_is_leaf(*pmdp)) {
+   if (pmd_leaf(*pmdp)) {
ptep = pmdp_ptep(pmdp);
goto update_the_pte;
}
@@ -786,7 +786,7 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, 
unsigned long addr,
if (!pmd_present(*pmd))
continue;
 
-   if (pmd_is_leaf(*pmd)) {
+   if (pmd_leaf(*pmd)) {
if (!IS_ALIGNED(addr, PMD_SIZE) ||
!IS_ALIGNED(next, PMD_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
@@ -816,7 +816,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, 
unsigned long addr,
if (!pud_present(*pud))
continue;
 
-   if (pud_is_leaf(*pud)) {
+   if (pud_leaf(*p

Re: [PATCH v6 2/7] powerpc/64s: mm: Introduce __pmdp_collapse_flush with mm_struct argument

2023-02-14 Thread Rohan McLure
> On 15 Feb 2023, at 11:17 am, Rohan McLure  wrote:
> 
>> On 14 Feb 2023, at 5:02 pm, Christophe Leroy  
>> wrote:
>> 
>> 
>> 
>> Le 14/02/2023 à 02:59, Rohan McLure a écrit :
>>> pmdp_collapse_flush has references in generic code with just three
>>> parameters, due to the choice of mm context being implied by the vm_area
>>> context parameter.
>>> 
>>> Define __pmdp_collapse_flush to accept an additional mm_struct *
>>> parameter, with pmdp_collapse_flush a macro that unpacks the vma and
>>> calls __pmdp_collapse_flush. The mm_struct * parameter is needed in a
>>> future patch providing Page Table Check support, which is defined in
>>> terms of mm context objects.
>>> 
>>> Signed-off-by: Rohan McLure 
>>> ---
>>> v6: New patch
>>> ---
>>> arch/powerpc/include/asm/book3s/64/pgtable.h | 14 +++---
>>> 1 file changed, 11 insertions(+), 3 deletions(-)
>>> 
>>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>>> index cb4c67bf45d7..9d8b4e25f5ed 100644
>>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>>> @@ -1244,14 +1244,22 @@ static inline pmd_t pmdp_huge_get_and_clear(struct 
>>> mm_struct *mm,
>>>  return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
>>> }
>>> 
>>> -static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
>>> - unsigned long address, pmd_t *pmdp)
>>> +static inline pmd_t __pmdp_collapse_flush(struct vm_area_struct *vma, 
>>> struct mm_struct *mm,
>>> +  unsigned long address, pmd_t *pmdp)
>>> {
>>>  if (radix_enabled())
>>>  return radix__pmdp_collapse_flush(vma, address, pmdp);
>>>  return hash__pmdp_collapse_flush(vma, address, pmdp);
>>> }
>>> -#define pmdp_collapse_flush pmdp_collapse_flush
>>> +#define pmdp_collapse_flush(vma, addr, pmdp) \
>>> +({ \
>>> + struct vm_area_struct *_vma = (vma); \
>>> + pmd_t _r; \
>>> + \
>>> + _r = __pmdp_collapse_flush(_vma, _vma->vm_mm, (addr), (pmdp)); \
>>> + \
>>> + _r; \
>>> +})
>> 
>> Can you make it a static inline function instead of a ugly macro ?
> 
> Due to some header hell, it’s looking like this location only has access to
> a prototype for struct vm_area_struct. Might have to remain a macro then.
> 
> Probably don’t need to expliclty declare a variable for the macro ‘return’
> though.

It’s the same solution opted for by ptep_test_and_clear_young.

#define __HAVE_ARCH_PTEP_TEST_AND_CLEAR_YOUNG
#define ptep_test_and_clear_young(__vma, __addr, __ptep)\
({  \
__ptep_test_and_clear_young((__vma)->vm_mm, __addr, __ptep); \
})

> 
>> 
>>> 
>>> #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
>>> pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,




Re: [PATCH v6 2/7] powerpc/64s: mm: Introduce __pmdp_collapse_flush with mm_struct argument

2023-02-14 Thread Rohan McLure
> On 14 Feb 2023, at 5:02 pm, Christophe Leroy  
> wrote:
> 
> 
> 
> Le 14/02/2023 à 02:59, Rohan McLure a écrit :
>> pmdp_collapse_flush has references in generic code with just three
>> parameters, due to the choice of mm context being implied by the vm_area
>> context parameter.
>> 
>> Define __pmdp_collapse_flush to accept an additional mm_struct *
>> parameter, with pmdp_collapse_flush a macro that unpacks the vma and
>> calls __pmdp_collapse_flush. The mm_struct * parameter is needed in a
>> future patch providing Page Table Check support, which is defined in
>> terms of mm context objects.
>> 
>> Signed-off-by: Rohan McLure 
>> ---
>> v6: New patch
>> ---
>>  arch/powerpc/include/asm/book3s/64/pgtable.h | 14 +++---
>>  1 file changed, 11 insertions(+), 3 deletions(-)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index cb4c67bf45d7..9d8b4e25f5ed 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -1244,14 +1244,22 @@ static inline pmd_t pmdp_huge_get_and_clear(struct 
>> mm_struct *mm,
>>   return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
>>  }
>> 
>> -static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
>> - unsigned long address, pmd_t *pmdp)
>> +static inline pmd_t __pmdp_collapse_flush(struct vm_area_struct *vma, 
>> struct mm_struct *mm,
>> +  unsigned long address, pmd_t *pmdp)
>>  {
>>   if (radix_enabled())
>>   return radix__pmdp_collapse_flush(vma, address, pmdp);
>>   return hash__pmdp_collapse_flush(vma, address, pmdp);
>>  }
>> -#define pmdp_collapse_flush pmdp_collapse_flush
>> +#define pmdp_collapse_flush(vma, addr, pmdp) \
>> +({ \
>> + struct vm_area_struct *_vma = (vma); \
>> + pmd_t _r; \
>> + \
>> + _r = __pmdp_collapse_flush(_vma, _vma->vm_mm, (addr), (pmdp)); \
>> + \
>> + _r; \
>> +})
> 
> Can you make it a static inline function instead of a ugly macro ?

Due to some header hell, it’s looking like this location only has access to
a prototype for struct vm_area_struct. Might have to remain a macro then.

Probably don’t need to expliclty declare a variable for the macro ‘return’
though.

> 
>> 
>>  #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
>>  pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,



[PATCH v6 7/7] powerpc: mm: Support page table check

2023-02-13 Thread Rohan McLure
On creation and clearing of a page table mapping, instrument such calls
by invoking page_table_check_pte_set and page_table_check_pte_clear
respectively. These calls serve as a sanity check against illegal
mappings.

Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK for all ppc64, and 32-bit
platforms implementing Book3S.

Change pud_pfn to be a runtime bug rather than a build bug as it is
consumed by page_table_check_pud_{clear,set} which are not called.

See also:

riscv support in commit 3fee229a8eb9 ("riscv/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
arm64 in commit 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check")

Signed-off-by: Rohan McLure 
---
V2: Update spacing and types assigned to pte_update calls.
V3: Update one last pte_update call to remove __pte invocation.
V5: Fix 32-bit nohash double set
V6: Omit __set_pte_at instrumentation - should be instrumented by
set_pte_at, with set_pte in between, performing all prior checks.
Instrument pmds. Use set_pte where needed.
---
 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h |  8 +++-
 arch/powerpc/include/asm/book3s/64/pgtable.h | 44 
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 +++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  8 +++-
 arch/powerpc/include/asm/pgtable.h   | 11 -
 arch/powerpc/mm/book3s64/hash_pgtable.c  |  2 +-
 arch/powerpc/mm/book3s64/pgtable.c   | 16 ---
 arch/powerpc/mm/book3s64/radix_pgtable.c | 10 ++---
 arch/powerpc/mm/nohash/book3e_pgtable.c  |  2 +-
 arch/powerpc/mm/pgtable_32.c |  2 +-
 11 files changed, 84 insertions(+), 27 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2c9cdf1d8761..2474e2699037 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -154,6 +154,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx || 40x
+   select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index afd672e84791..8850b4fb22a4 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -53,6 +53,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 static inline bool pte_user(pte_t pte)
 {
return pte_val(pte) & _PAGE_USER;
@@ -338,7 +340,11 @@ static inline int __ptep_test_and_clear_young(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr,
   pte_t *ptep)
 {
-   return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 93601ef4c665..1835ba2c2309 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -162,6 +162,8 @@
 #define PAGE_KERNEL_ROX__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 
 #ifndef __ASSEMBLY__
+#include 
+
 /*
  * page table defines
  */
@@ -431,8 +433,11 @@ static inline void huge_ptep_set_wrprotect(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
-   unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-   return __pte(old);
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
@@ -441,11 +446,16 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
pte_t *ptep, int full)
 {
if (full && radix_enabled()) {
+   pte_t old_pte;
+
/*
 * We know that this is a full mm pte clear and
 * hence can be sure there is no parallel set_pte.
 */
-   return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
}
return ptep_get_and_clear(mm, addr, ptep);
 }
@@ -1249,17 +1259,33 @@ extern int pmdp_test_and_clear_young(struct 
vm_area_struct *vma,
 static inline pmd_t pm

[PATCH v6 5/7] powerpc: mm: Add common pud_pfn stub for all platforms

2023-02-13 Thread Rohan McLure
Prior to this commit, pud_pfn was implemented with BUILD_BUG as the inline
function for 64-bit Book3S systems but is never included, as its
invocations in generic code are guarded by calls to pud_devmap which return
zero on such systems. A future patch will provide support for page table
checks, the generic code for which depends on a pud_pfn stub being
implemented, even while the patch will not interact with puds directly.

Remove the 64-bit Book3S stub and define pud_pfn to warn on all
platforms. pud_pfn may be defined properly on a per-platform basis
should it grow real usages in future.

Signed-off-by: Rohan McLure 
---
V2: Remove conditional BUILD_BUG and BUG. Instead warn on usage.
V3: Replace WARN with WARN_ONCE, which should suffice to demonstrate
misuse of puds.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
 arch/powerpc/include/asm/pgtable.h   | 14 ++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 5be0a4c8bf32..8bbd3e1df93e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1330,16 +1330,6 @@ static inline int pgd_devmap(pgd_t pgd)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int pud_pfn(pud_t pud)
-{
-   /*
-* Currently all calls to pud_pfn() are gated around a pud_devmap()
-* check so this should never be used. If it grows another user we
-* want to know about it.
-*/
-   BUILD_BUG();
-   return 0;
-}
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
 void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 284408829fa3..ad0829f816e9 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -153,6 +153,20 @@ struct seq_file;
 void arch_report_meminfo(struct seq_file *m);
 #endif /* CONFIG_PPC64 */
 
+/*
+ * Currently only consumed by page_table_check_pud_{set,clear}. Since clears
+ * and sets to page table entries at any level are done through
+ * page_table_check_pte_{set,clear}, provide stub implementation.
+ */
+#ifndef pud_pfn
+#define pud_pfn pud_pfn
+static inline int pud_pfn(pud_t pud)
+{
+   WARN_ONCE(1, "pud: platform does not use pud entries directly");
+   return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.37.2



[PATCH v6 6/7] powerpc: mm: Add p{te,md,ud}_user_accessible_page helpers

2023-02-13 Thread Rohan McLure
Add the following helpers for detecting whether a page table entry
is a leaf and is accessible to user space.

 * pte_user_accessible_page
 * pmd_user_accessible_page
 * pud_user_accessible_page

Also implement missing pud_user definitions for both Book3S/nohash 64-bit
systems, and pmd_user for Book3S/nohash 32-bit systems.

Signed-off-by: Rohan McLure 
---
V2: Provide missing pud_user implementations, use p{u,m}d_is_leaf.
V3: Provide missing pmd_user implementations as stubs in 32-bit.
V4: Use pmd_leaf, pud_leaf, and define pmd_user for 32 Book3E with
static inline method rather than macro.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  5 +
 arch/powerpc/include/asm/nohash/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/pgtable.h   | 15 +++
 5 files changed, 44 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index a090cb13a4a0..afd672e84791 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -516,6 +516,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return 0;
+}
 
 
 /* This low level function performs the actual PTE insertion
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 8bbd3e1df93e..93601ef4c665 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -538,6 +538,16 @@ static inline bool pte_user(pte_t pte)
return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return !(pmd_raw(pmd) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline bool pud_user(pud_t pud)
+{
+   return !(pud_raw(pud) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
 #define pte_access_permitted pte_access_permitted
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 70edad44dff6..d953533c56ff 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -209,6 +209,11 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return false;
+}
+
 /*
  * PTE updates. This function is called whenever an existing
  * valid PTE is updated. This does -not- include set_pte_at()
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index d391a45e0f11..14e69ebad31f 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -123,6 +123,11 @@ static inline pte_t pmd_pte(pmd_t pmd)
return __pte(pmd_val(pmd));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return (pmd_val(pmd) & _PAGE_USER) == _PAGE_USER;
+}
+
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(!is_kernel_addr(pmd_val(pmd)) \
 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -164,6 +169,11 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
 }
 
+static inline bool pud_user(pud_t pud)
+{
+   return (pud_val(pud) & _PAGE_USER) == _PAGE_USER;
+}
+
 static inline pud_t pte_pud(pte_t pte)
 {
return __pud(pte_val(pte));
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index ad0829f816e9..b76fdb80b6c9 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -167,6 +167,21 @@ static inline int pud_pfn(pud_t pud)
 }
 #endif
 
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+   return pte_present(pte) && pte_user(pte);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+   return pmd_leaf(pmd) && pmd_present(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+   return pud_leaf(pud) && pud_present(pud) && pud_user(pud);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.37.2



[PATCH v6 4/7] powerpc: mm: Implement p{m,u,4}d_leaf on all platforms

2023-02-13 Thread Rohan McLure
The check that a higher-level entry in multi-level pages contains a page
translation entry (pte) is performed by p{m,u,4}d_leaf stubs, which may
be specialised for each choice of mmu. In a prior commit, we replace
uses to the catch-all stubs, p{m,u,4}d_is_leaf with p{m,u,4}d_leaf.

Replace the catch-all stub definitions for p{m,u,4}d_is_leaf with
definitions for p{m,u,4}d_leaf. A future patch will assume that
p{m,u,4}d_leaf is defined on all platforms.

In particular, implement pud_leaf for Book3E-64, pmd_leaf for all Book3E
and Book3S-64 platforms, with a catch-all definition for p4d_leaf.

Signed-off-by: Rohan McLure 
---
v5: Split patch that replaces p{m,u,4}d_is_leaf into two patches, first
replacing callsites and afterward providing generic definition.
Remove ifndef-defines implementing p{m,u}d_leaf in favour of
implementing stubs in headers belonging to the particular platforms
needing them.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  5 +
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 -
 arch/powerpc/include/asm/nohash/64/pgtable.h |  6 ++
 arch/powerpc/include/asm/nohash/pgtable.h|  6 ++
 arch/powerpc/include/asm/pgtable.h   | 22 ++--
 5 files changed, 23 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 75823f39e042..a090cb13a4a0 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -242,6 +242,11 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
+{
+   return false;
+}
 
 /*
  * When flushing the tlb entry for a page, we also need to flush the hash
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9d8b4e25f5ed..5be0a4c8bf32 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1362,16 +1362,14 @@ static inline bool is_pte_rw_upgrade(unsigned long 
old_val, unsigned long new_va
 /*
  * Like pmd_huge() and pmd_large(), but works regardless of config options
  */
-#define pmd_is_leaf pmd_is_leaf
-#define pmd_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
 }
 
-#define pud_is_leaf pud_is_leaf
-#define pud_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
 }
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 879e9a6e5a87..d391a45e0f11 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -141,6 +141,12 @@ static inline void pud_clear(pud_t *pudp)
*pudp = __pud(0);
 }
 
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
+{
+   return false;
+}
+
 #define pud_none(pud)  (!pud_val(pud))
 #definepud_bad(pud)(!is_kernel_addr(pud_val(pud)) \
 || (pud_val(pud) & PUD_BAD_BITS))
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index ac3e69a18253..cc3941a4790f 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -60,6 +60,12 @@ static inline bool pte_hw_valid(pte_t pte)
return pte_val(pte) & _PAGE_PRESENT;
 }
 
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
+{
+   return false;
+}
+
 /*
  * Don't just check for any non zero bits in __PAGE_USER, since for book3e
  * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 17d30359d1f4..284408829fa3 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -128,29 +128,11 @@ static inline void pte_frag_set(mm_context_t *ctx, void 
*p)
 }
 #endif
 
-#ifndef pmd_is_leaf
-#define pmd_is_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#define p4d_leaf p4d_leaf
+static inline bool p4d_leaf(p4d_t p4d)
 {
return false;
 }
-#endif
-
-#ifndef pud_is_leaf
-#define pud_is_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
-{
-   return false;
-}
-#endif
-
-#ifndef p4d_is_leaf
-#define p4d_is_leaf p4d_is_leaf
-static inline bool p4d_is_leaf(p4d_t p4d)
-{
-   return false;
-}
-#endif
 
 #define pmd_pgtable pmd_pgtable
 static inline pgtable_t pmd_pgtable(pmd_t pmd)
-- 
2.37.2



[PATCH v6 3/7] powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf

2023-02-13 Thread Rohan McLure
Replace occurrences of p{u,m,4}d_is_leaf with p{u,m,4}_leaf, as the
latter is the name given to checking that a higher-level entry in
multi-level paging contains a page translation entry (pte) throughout
all other archs.

A future patch will implement p{u,m,4}_leaf stubs on all platforms so
that they may be referenced in generic code.

Signed-off-by: Rohan McLure 
---
V4: New patch
V5: Previously replaced stub definition for *_is_leaf with *_leaf. Do
that in a later patch
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 14 +++---
 arch/powerpc/mm/pgtable.c|  6 +++---
 arch/powerpc/mm/pgtable_64.c |  6 +++---
 arch/powerpc/xmon/xmon.c |  6 +++---
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 9d3743ca16d5..0d24fd984d16 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -497,7 +497,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t 
*pmd, bool full,
for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
if (!pmd_present(*p))
continue;
-   if (pmd_is_leaf(*p)) {
+   if (pmd_leaf(*p)) {
if (full) {
pmd_clear(p);
} else {
@@ -526,7 +526,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t 
*pud,
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
if (!pud_present(*p))
continue;
-   if (pud_is_leaf(*p)) {
+   if (pud_leaf(*p)) {
pud_clear(p);
} else {
pmd_t *pmd;
@@ -629,12 +629,12 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pud = pud_alloc_one(kvm->mm, gpa);
 
pmd = NULL;
-   if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
+   if (pud && pud_present(*pud) && !pud_leaf(*pud))
pmd = pmd_offset(pud, gpa);
else if (level <= 1)
new_pmd = kvmppc_pmd_alloc();
 
-   if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+   if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_leaf(*pmd)))
new_ptep = kvmppc_pte_alloc();
 
/* Check if we might have been invalidated; let the guest retry if so */
@@ -652,7 +652,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pud = NULL;
}
pud = pud_offset(p4d, gpa);
-   if (pud_is_leaf(*pud)) {
+   if (pud_leaf(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
 
/* Check if we raced and someone else has set the same thing */
@@ -703,7 +703,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pmd = NULL;
}
pmd = pmd_offset(pud, gpa);
-   if (pmd_is_leaf(*pmd)) {
+   if (pmd_leaf(*pmd)) {
unsigned long lgpa = gpa & PMD_MASK;
 
/* Check if we raced and someone else has set the same thing */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index 26245aaf12b8..4e46e001c3c3 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -205,14 +205,14 @@ static void radix__change_memory_range(unsigned long 
start, unsigned long end,
pudp = pud_alloc(_mm, p4dp, idx);
if (!pudp)
continue;
-   if (pud_is_leaf(*pudp)) {
+   if (pud_leaf(*pudp)) {
ptep = (pte_t *)pudp;
goto update_the_pte;
}
pmdp = pmd_alloc(_mm, pudp, idx);
if (!pmdp)
continue;
-   if (pmd_is_leaf(*pmdp)) {
+   if (pmd_leaf(*pmdp)) {
ptep = pmdp_ptep(pmdp);
goto update_the_pte;
}
@@ -786,7 +786,7 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, 
unsigned long addr,
if (!pmd_present(*pmd))
continue;
 
-   if (pmd_is_leaf(*pmd)) {
+   if (pmd_leaf(*pmd)) {
if (!IS_ALIGNED(addr, PMD_SIZE) ||
!IS_ALIGNED(next, PMD_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
@@ -816,7 +816,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, 
unsigned long addr,
if (!pud_present(*pud))
continue;
 
-   if (pud_is_leaf(*pud)) {
+   if (pud_leaf(*p

[PATCH v6 2/7] powerpc/64s: mm: Introduce __pmdp_collapse_flush with mm_struct argument

2023-02-13 Thread Rohan McLure
pmdp_collapse_flush has references in generic code with just three
parameters, due to the choice of mm context being implied by the vm_area
context parameter.

Define __pmdp_collapse_flush to accept an additional mm_struct *
parameter, with pmdp_collapse_flush a macro that unpacks the vma and
calls __pmdp_collapse_flush. The mm_struct * parameter is needed in a
future patch providing Page Table Check support, which is defined in
terms of mm context objects.

Signed-off-by: Rohan McLure 
---
v6: New patch
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index cb4c67bf45d7..9d8b4e25f5ed 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1244,14 +1244,22 @@ static inline pmd_t pmdp_huge_get_and_clear(struct 
mm_struct *mm,
return hash__pmdp_huge_get_and_clear(mm, addr, pmdp);
 }
 
-static inline pmd_t pmdp_collapse_flush(struct vm_area_struct *vma,
-   unsigned long address, pmd_t *pmdp)
+static inline pmd_t __pmdp_collapse_flush(struct vm_area_struct *vma, struct 
mm_struct *mm,
+ unsigned long address, pmd_t *pmdp)
 {
if (radix_enabled())
return radix__pmdp_collapse_flush(vma, address, pmdp);
return hash__pmdp_collapse_flush(vma, address, pmdp);
 }
-#define pmdp_collapse_flush pmdp_collapse_flush
+#define pmdp_collapse_flush(vma, addr, pmdp)   \
+({ \
+   struct vm_area_struct *_vma = (vma);\
+   pmd_t _r;   \
+   \
+   _r = __pmdp_collapse_flush(_vma, _vma->vm_mm, (addr), (pmdp));  \
+   \
+   _r; \
+})
 
 #define __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL
 pmd_t pmdp_huge_get_and_clear_full(struct vm_area_struct *vma,
-- 
2.37.2



[PATCH v6 1/7] powerpc: mm: Separate set_pte, set_pte_at for internal, external use

2023-02-13 Thread Rohan McLure
Produce separate symbols for set_pte, which is to be used in
arch/powerpc for reassignment of pte's, and set_pte_at, used in generic
code.

The reason for this distinction is to support the Page Table Check
sanitiser. Having this distinction allows for set_pte_at to
instrumented, but set_pte not to be, permitting for uninstrumented
internal mappings. This distinction in names is also present in x86.

Signed-off-by: Rohan McLure 
---
v6: new patch
---
 arch/powerpc/include/asm/book3s/pgtable.h | 4 ++--
 arch/powerpc/include/asm/nohash/pgtable.h | 4 ++--
 arch/powerpc/include/asm/pgtable.h| 1 +
 arch/powerpc/mm/pgtable.c | 4 ++--
 4 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/pgtable.h 
b/arch/powerpc/include/asm/book3s/pgtable.h
index d18b748ea3ae..dbcdc2103c59 100644
--- a/arch/powerpc/include/asm/book3s/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/pgtable.h
@@ -12,8 +12,8 @@
 /* Insert a PTE, top-level function is out of line. It uses an inline
  * low level function in the respective pgtable-* files
  */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-  pte_t pte);
+extern void set_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+   pte_t pte);
 
 
 #define __HAVE_ARCH_PTEP_SET_ACCESS_FLAGS
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index 69c3a050a3d8..ac3e69a18253 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -154,8 +154,8 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 /* Insert a PTE, top-level function is out of line. It uses an inline
  * low level function in the respective pgtable-* files
  */
-extern void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-  pte_t pte);
+extern void set_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+   pte_t pte);
 
 /* This low level function performs the actual PTE insertion
  * Setting the PTE depends on the MMU type and other factors. It's
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 9972626ddaf6..17d30359d1f4 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -48,6 +48,7 @@ struct mm_struct;
 /* Keep these as a macros to avoid include dependency mess */
 #define pte_page(x)pfn_to_page(pte_pfn(x))
 #define mk_pte(page, pgprot)   pfn_pte(page_to_pfn(page), (pgprot))
+#define set_pte_at set_pte
 /*
  * Select all bits except the pfn
  */
diff --git a/arch/powerpc/mm/pgtable.c b/arch/powerpc/mm/pgtable.c
index cb2dcdb18f8e..e9a464e0d081 100644
--- a/arch/powerpc/mm/pgtable.c
+++ b/arch/powerpc/mm/pgtable.c
@@ -187,8 +187,8 @@ static pte_t set_access_flags_filter(pte_t pte, struct 
vm_area_struct *vma,
 /*
  * set_pte stores a linux PTE into the linux page table.
  */
-void set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
-   pte_t pte)
+void set_pte(struct mm_struct *mm, unsigned long addr, pte_t *ptep,
+pte_t pte)
 {
/*
 * Make sure hardware valid bit is not set. We don't do
-- 
2.37.2



[PATCH v6 0/7] Support page table check

2023-02-13 Thread Rohan McLure
Support the page table check sanitiser on all PowerPC platforms. This
sanitiser works by serialising assignments, reassignments and clears of
page table entries at each level in order to ensure that anonymous
mappings have at most one writable consumer, and likewise that
file-backed mappings are not simultaneously also anonymous mappings.

In order to support this infrastructure, a number of stubs must be
defined for all powerpc platforms. Additionally, seperate set_pte_at
and set_pte, to allow for internal, uninstrumented mappings.

v6:
 * Support huge pages and p{m,u}d accounting.
 * Remove instrumentation from set_pte from kernel internal pages.
 * 64s: Implement pmdp_collapse_flush in terms of __pmdp_collapse_flush
   as access to the mm_struct * is required.

v5: 
Link:

Rohan McLure (7):
  powerpc: mm: Separate set_pte, set_pte_at for internal, external use
  powerpc/64s: mm: Introduce __pmdp_collapse_flush with mm_struct
argument
  powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf
  powerpc: mm: Implement p{m,u,4}d_leaf on all platforms
  powerpc: mm: Add common pud_pfn stub for all platforms
  powerpc: mm: Add p{te,md,ud}_user_accessible_page helpers
  powerpc: mm: Support page table check

 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h | 17 +++-
 arch/powerpc/include/asm/book3s/64/pgtable.h | 88 +---
 arch/powerpc/include/asm/book3s/pgtable.h|  4 +-
 arch/powerpc/include/asm/nohash/32/pgtable.h | 12 ++-
 arch/powerpc/include/asm/nohash/64/pgtable.h | 24 +-
 arch/powerpc/include/asm/nohash/pgtable.h| 10 ++-
 arch/powerpc/include/asm/pgtable.h   | 61 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 +--
 arch/powerpc/mm/book3s64/hash_pgtable.c  |  2 +-
 arch/powerpc/mm/book3s64/pgtable.c   | 16 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 24 +++---
 arch/powerpc/mm/nohash/book3e_pgtable.c  |  2 +-
 arch/powerpc/mm/pgtable.c| 10 +--
 arch/powerpc/mm/pgtable_32.c |  2 +-
 arch/powerpc/mm/pgtable_64.c |  6 +-
 arch/powerpc/xmon/xmon.c |  6 +-
 17 files changed, 204 insertions(+), 93 deletions(-)

-- 
2.37.2



Re: [PATCH v4 1/7] kcsan: Add atomic builtin stubs for 32-bit systems

2023-02-09 Thread Rohan McLure



> On 9 Feb 2023, at 10:14 am, Rohan McLure  wrote:
> 
> 
> 
>> On 8 Feb 2023, at 11:23 pm, Christophe Leroy  
>> wrote:
>> 
>> 
>> 
>> Le 08/02/2023 à 04:21, Rohan McLure a écrit :
>>> KCSAN instruments calls to atomic builtins, and will in turn call these
>>> builtins itself. As such, architectures supporting KCSAN must have
>>> compiler support for these atomic primitives.
>>> 
>>> Since 32-bit systems are unlikely to have 64-bit compiler builtins,
>>> provide a stub for each missing builtin, and use BUG() to assert
>>> unreachability.
>>> 
>>> In commit 725aea873261 ("xtensa: enable KCSAN"), xtensa implements these
>>> locally, but does not advertise the fact with preprocessor macros. To
>>> avoid production of duplicate symbols, do not build the stubs on xtensa.
>>> A future patch will remove the xtensa implementation of these stubs.
>>> 
>>> Signed-off-by: Rohan McLure 
>>> ---
>>> v4: New patch
>>> ---
>>> kernel/kcsan/Makefile |  3 ++
>>> kernel/kcsan/stubs.c  | 78 +++
>>> 2 files changed, 81 insertions(+)
>>> create mode 100644 kernel/kcsan/stubs.c
>> 
>> I think it would be better to merge patch 1 and patch 2, that way we 
>> would keep the history and see that stubs.c almost comes from xtensa.
>> 
>> The summary would then be:
>> 
>> arch/xtensa/lib/Makefile  |  1 -
>> kernel/kcsan/Makefile |  2 +-
>> arch/xtensa/lib/kcsan-stubs.c => kernel/kcsan/stubs.c | 26 
>> +-
>> 3 files changed, 26 insertions(+), 3 deletions(-)
>> 
>> 
>>> 
>>> diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
>>> index 8cf70f068d92..5dfc5825aae9 100644
>>> --- a/kernel/kcsan/Makefile
>>> +++ b/kernel/kcsan/Makefile
>>> @@ -12,6 +12,9 @@ CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \
>>>  -fno-stack-protector -DDISABLE_BRANCH_PROFILING
>>> 
>>> obj-y := core.o debugfs.o report.o
>>> +ifndef XTENSA
>>> + obj-y += stubs.o
>> 
>> obj-$(CONFIG_32BIT) += stubs.o
>> 
>> That would avoid the #if CONFIG_32BIT in stubs.c
> 
> Thanks. Yes happy to do this.
> 
>> 
>>> +endif
>>> 
>>> KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
>>> obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
>>> diff --git a/kernel/kcsan/stubs.c b/kernel/kcsan/stubs.c
>>> new file mode 100644
>>> index ..ec5cd99be422
>>> --- /dev/null
>>> +++ b/kernel/kcsan/stubs.c
>>> @@ -0,0 +1,78 @@
>>> +// SPDX-License Identifier: GPL-2.0
>> 
>> Missing - between License and Identifier ?
>> 
>>> +
>>> +#include 
>>> +#include 
>>> +
>>> +#ifdef CONFIG_32BIT
>> 
>> Should be handled in Makefile
>> 
>>> +
>>> +#if !__has_builtin(__atomic_store_8)
>> 
>> Does any 32 bit ARCH support that ? Is that #if required ?
>> 
>> If yes, do we really need the #if for each and every function, can't we 
>> just check for one and assume that if we don't have __atomic_store_8 we 
>> don't have any of the functions ?
> 
> Turns out that testing with gcc provides 8-byte atomic builtins on x86
> and arm on 32-bit. However I believe it should just suffice to check for
> __atomic_store_8 or any other such builtin i.e. if an arch implements one it
> likely implements them all from what I’ve seen.

In reality, __has_builtin only specifies that GCC is aware of the existance of
the builtin, but linking against libatomic may still be required. Let’s
remove this check, and have ppc32 and xtensa opt into compiling this stubs file.

Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=108734

> 
>> 
>> 
>>> +void __atomic_store_8(volatile void *p, u64 v, int i)
>>> +{
>>> + BUG();
>>> +}
>>> +#endif
>>> +
>>> +#if !__has_builtin(__atomic_load_8)
>>> +u64 __atomic_load_8(const volatile void *p, int i)
>>> +{
>>> + BUG();
>>> +}
>>> +#endif
>>> +
>>> +#if !__has_builtin(__atomic_exchange_8)
>>> +u64 __atomic_exchange_8(volatile void *p, u64 v, int i)
>>> +{
>>> + BUG();
>>> +}
>>> +#endif
>>> +
>>> +#if !__has_builtin(__atomic_compare_exchange_8)
>>> +bool __atomic_compare_exchange_8(volatile void *p1, void *p2, u64 v, bool 
>>>

Re: [PATCH v4 1/7] kcsan: Add atomic builtin stubs for 32-bit systems

2023-02-08 Thread Rohan McLure



> On 8 Feb 2023, at 11:23 pm, Christophe Leroy  
> wrote:
> 
> 
> 
> Le 08/02/2023 à 04:21, Rohan McLure a écrit :
>> KCSAN instruments calls to atomic builtins, and will in turn call these
>> builtins itself. As such, architectures supporting KCSAN must have
>> compiler support for these atomic primitives.
>> 
>> Since 32-bit systems are unlikely to have 64-bit compiler builtins,
>> provide a stub for each missing builtin, and use BUG() to assert
>> unreachability.
>> 
>> In commit 725aea873261 ("xtensa: enable KCSAN"), xtensa implements these
>> locally, but does not advertise the fact with preprocessor macros. To
>> avoid production of duplicate symbols, do not build the stubs on xtensa.
>> A future patch will remove the xtensa implementation of these stubs.
>> 
>> Signed-off-by: Rohan McLure 
>> ---
>> v4: New patch
>> ---
>>  kernel/kcsan/Makefile |  3 ++
>>  kernel/kcsan/stubs.c  | 78 +++
>>  2 files changed, 81 insertions(+)
>>  create mode 100644 kernel/kcsan/stubs.c
> 
> I think it would be better to merge patch 1 and patch 2, that way we 
> would keep the history and see that stubs.c almost comes from xtensa.
> 
> The summary would then be:
> 
>  arch/xtensa/lib/Makefile  |  1 -
>  kernel/kcsan/Makefile |  2 +-
>  arch/xtensa/lib/kcsan-stubs.c => kernel/kcsan/stubs.c | 26 
> +-
>  3 files changed, 26 insertions(+), 3 deletions(-)
> 
> 
>> 
>> diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
>> index 8cf70f068d92..5dfc5825aae9 100644
>> --- a/kernel/kcsan/Makefile
>> +++ b/kernel/kcsan/Makefile
>> @@ -12,6 +12,9 @@ CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \
>>   -fno-stack-protector -DDISABLE_BRANCH_PROFILING
>> 
>>  obj-y := core.o debugfs.o report.o
>> +ifndef XTENSA
>> + obj-y += stubs.o
> 
> obj-$(CONFIG_32BIT) += stubs.o
> 
> That would avoid the #if CONFIG_32BIT in stubs.c

Thanks. Yes happy to do this.

> 
>> +endif
>> 
>>  KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
>>  obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
>> diff --git a/kernel/kcsan/stubs.c b/kernel/kcsan/stubs.c
>> new file mode 100644
>> index ..ec5cd99be422
>> --- /dev/null
>> +++ b/kernel/kcsan/stubs.c
>> @@ -0,0 +1,78 @@
>> +// SPDX-License Identifier: GPL-2.0
> 
> Missing - between License and Identifier ?
> 
>> +
>> +#include 
>> +#include 
>> +
>> +#ifdef CONFIG_32BIT
> 
> Should be handled in Makefile
> 
>> +
>> +#if !__has_builtin(__atomic_store_8)
> 
> Does any 32 bit ARCH support that ? Is that #if required ?
> 
> If yes, do we really need the #if for each and every function, can't we 
> just check for one and assume that if we don't have __atomic_store_8 we 
> don't have any of the functions ?

Turns out that testing with gcc provides 8-byte atomic builtins on x86
and arm on 32-bit. However I believe it should just suffice to check for
__atomic_store_8 or any other such builtin i.e. if an arch implements one it
likely implements them all from what I’ve seen.

> 
> 
>> +void __atomic_store_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_load_8)
>> +u64 __atomic_load_8(const volatile void *p, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_exchange_8)
>> +u64 __atomic_exchange_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_compare_exchange_8)
>> +bool __atomic_compare_exchange_8(volatile void *p1, void *p2, u64 v, bool 
>> b, int i1, int i2)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_fetch_add_8)
>> +u64 __atomic_fetch_add_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_fetch_sub_8)
>> +u64 __atomic_fetch_sub_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_fetch_and_8)
>> +u64 __atomic_fetch_and_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_fetch_or_8)
>> +u64 __atomic_fetch_or_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_fetch_xor_8)
>> +u64 __atomic_fetch_xor_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#if !__has_builtin(__atomic_fetch_nand_8)
>> +u64 __atomic_fetch_nand_8(volatile void *p, u64 v, int i)
>> +{
>> + BUG();
>> +}
>> +#endif
>> +
>> +#endif /* CONFIG_32BIT */




[PATCH v4 7/7] powerpc: kcsan: Add KCSAN Support

2023-02-07 Thread Rohan McLure
Enable HAVE_ARCH_KCSAN on all powerpc platforms, permitting use of the
kernel concurrency sanitiser through the CONFIG_KCSAN_* kconfig options.
KCSAN requires compiler builtins __atomic_* 64-bit values, and so only
report support on PPC64.

See documentation in Documentation/dev-tools/kcsan.rst for more
information.

Signed-off-by: Rohan McLure 
---
v3: Restrict support to 64-bit, as TSAN expects 64-bit __atomic_* compiler
built-ins.
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b8c4ac56bddc..55bc2d724c73 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -198,6 +198,7 @@ config PPC
select HAVE_ARCH_KASAN  if PPC_RADIX_MMU
select HAVE_ARCH_KASAN  if PPC_BOOK3E_64
select HAVE_ARCH_KASAN_VMALLOC  if HAVE_ARCH_KASAN
+   select HAVE_ARCH_KCSANif PPC64
select HAVE_ARCH_KFENCE if ARCH_SUPPORTS_DEBUG_PAGEALLOC
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_KGDB
-- 
2.37.2



[PATCH v4 6/7] powerpc: kcsan: Prevent recursive instrumentation with IRQ save/restores

2023-02-07 Thread Rohan McLure
Instrumented memory accesses provided by KCSAN will access core-local
memories (which will save and restore IRQs) as well as restoring IRQs
directly. Avoid recursive instrumentation by applying __no_kcsan
annotation to IRQ restore routines.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/irq_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index eb2b380e52a0..3a1e0bffe9e0 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -97,7 +97,7 @@ static inline bool irq_happened_test_and_clear(u8 irq)
return false;
 }
 
-void replay_soft_interrupts(void)
+__no_kcsan void replay_soft_interrupts(void)
 {
struct pt_regs regs;
 
@@ -185,7 +185,7 @@ void replay_soft_interrupts(void)
 }
 
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
-static inline void replay_soft_interrupts_irqrestore(void)
+__no_kcsan static inline void replay_soft_interrupts_irqrestore(void)
 {
unsigned long kuap_state = get_kuap();
 
@@ -209,7 +209,7 @@ static inline void replay_soft_interrupts_irqrestore(void)
 #define replay_soft_interrupts_irqrestore() replay_soft_interrupts()
 #endif
 
-notrace void arch_local_irq_restore(unsigned long mask)
+notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
 {
unsigned char irq_happened;
 
-- 
2.37.2



[PATCH v4 5/7] powerpc: kcsan: Memory barriers semantics

2023-02-07 Thread Rohan McLure
Annotate memory barriers *mb() with calls to kcsan_mb(), signaling to
compilers supporting KCSAN that the respective memory barrier has been
issued. Rename memory barrier *mb() to __*mb() to opt in for
asm-generic/barrier.h to generate the respective *mb() macro.

Signed-off-by: Rohan McLure 
---
v2: Implement __smp_mb() in terms of __mb() to avoid duplicate calls to
kcsan_mb()
---
 arch/powerpc/include/asm/barrier.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/barrier.h 
b/arch/powerpc/include/asm/barrier.h
index e80b2c0e9315..b95b666f0374 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -35,9 +35,9 @@
  * However, on CPUs that don't support lwsync, lwsync actually maps to a
  * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
  */
-#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
-#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define __rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
 /* The sub-arch has lwsync */
 #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC)
@@ -51,12 +51,12 @@
 /* clang defines this macro for a builtin, which will not work with runtime 
patching */
 #undef __lwsync
 #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : 
:"memory")
-#define dma_rmb()  __lwsync()
-#define dma_wmb()  __asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
+#define __dma_rmb()__lwsync()
+#define __dma_wmb()__asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
 
 #define __smp_lwsync() __lwsync()
 
-#define __smp_mb() mb()
+#define __smp_mb() __mb()
 #define __smp_rmb()__lwsync()
 #define __smp_wmb()__asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
 
-- 
2.37.2



[PATCH v4 4/7] powerpc: kcsan: Exclude udelay to prevent recursive instrumentation

2023-02-07 Thread Rohan McLure
In order for KCSAN to increase its likelihood of observing a data race,
it sets a watchpoint on memory accesses and stalls, allowing for
detection of conflicting accesses by other kernel threads or interrupts.

Stalls are implemented by injecting a call to udelay in instrumented code.
To prevent recursive instrumentation, exclude udelay from being instrumented.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d68de3618741..b894029f53db 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -356,7 +356,7 @@ void vtime_flush(struct task_struct *tsk)
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
-void __delay(unsigned long loops)
+void __no_kcsan __delay(unsigned long loops)
 {
unsigned long start;
 
@@ -377,7 +377,7 @@ void __delay(unsigned long loops)
 }
 EXPORT_SYMBOL(__delay);
 
-void udelay(unsigned long usecs)
+void __no_kcsan udelay(unsigned long usecs)
 {
__delay(tb_ticks_per_usec * usecs);
 }
-- 
2.37.2



[PATCH v4 3/7] powerpc: kcsan: Add exclusions from instrumentation

2023-02-07 Thread Rohan McLure
Exclude various incompatible compilation units from KCSAN
instrumentation.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/Makefile   | 10 ++
 arch/powerpc/kernel/trace/Makefile |  1 +
 arch/powerpc/kernel/vdso/Makefile  |  1 +
 arch/powerpc/lib/Makefile  |  2 ++
 arch/powerpc/purgatory/Makefile|  1 +
 arch/powerpc/xmon/Makefile |  1 +
 6 files changed, 16 insertions(+)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9b6146056e48..9bf2be123093 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
+KCSAN_SANITIZE_early_32.o := n
+KCSAN_SANITIZE_early_64.o := n
+KCSAN_SANITIZE_cputable.o := n
+KCSAN_SANITIZE_btext.o := n
+KCSAN_SANITIZE_paca.o := n
+KCSAN_SANITIZE_setup_64.o := n
+
 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 # Remove stack protector to avoid triggering unneeded stack canary
 # checks due to randomize_kstack_offset.
@@ -177,12 +184,15 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS)+= secvar-sysfs.o
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 KCOV_INSTRUMENT_prom_init.o := n
+KCSAN_SANITIZE_prom_init.o := n
 UBSAN_SANITIZE_prom_init.o := n
 GCOV_PROFILE_kprobes.o := n
 KCOV_INSTRUMENT_kprobes.o := n
+KCSAN_SANITIZE_kprobes.o := n
 UBSAN_SANITIZE_kprobes.o := n
 GCOV_PROFILE_kprobes-ftrace.o := n
 KCOV_INSTRUMENT_kprobes-ftrace.o := n
+KCSAN_SANITIZE_kprobes-ftrace.o := n
 UBSAN_SANITIZE_kprobes-ftrace.o := n
 GCOV_PROFILE_syscall_64.o := n
 KCOV_INSTRUMENT_syscall_64.o := n
diff --git a/arch/powerpc/kernel/trace/Makefile 
b/arch/powerpc/kernel/trace/Makefile
index af8527538fe4..b16a9f9c0b35 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -23,4 +23,5 @@ obj-$(CONFIG_PPC32)   += $(obj32-y)
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_ftrace.o := n
 KCOV_INSTRUMENT_ftrace.o := n
+KCSAN_SANITIZE_ftrace.o := n
 UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/vdso/Makefile 
b/arch/powerpc/kernel/vdso/Makefile
index 6a977b0d8ffc..3a2f32929fcf 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -46,6 +46,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both
 ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4de71cbf6e8e..c4db459d304a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,6 +16,8 @@ KASAN_SANITIZE_feature-fixups.o := n
 # restart_table.o contains functions called in the NMI interrupt path
 # which can be in real mode. Disable KASAN.
 KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
 
 ifdef CONFIG_KASAN
 CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index a81d155b89ae..6f5e2727963c 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 targets += trampoline_$(BITS).o purgatory.ro
 
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index eb25d7554ffd..d334de392e6c 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -5,6 +5,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 # Disable ftrace for the entire directory
 ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
-- 
2.37.2



[PATCH v4 2/7] xtensa: kcsan: Remove kcsan stubs for atomic builtins

2023-02-07 Thread Rohan McLure
A prior patch implemented stubs in place of the __atomic_* builtins in
generic code, as it is useful for other 32-bit architectures such as
32-bit powerpc.

Remove the kcsan-stubs.c translation unit and instead use
the generic implementation.

Signed-off-by: Rohan McLure 
---
V4: New patch
---
 arch/xtensa/lib/Makefile  |  1 -
 arch/xtensa/lib/kcsan-stubs.c | 54 ---
 kernel/kcsan/Makefile |  5 +---
 3 files changed, 1 insertion(+), 59 deletions(-)
 delete mode 100644 arch/xtensa/lib/kcsan-stubs.c

diff --git a/arch/xtensa/lib/Makefile b/arch/xtensa/lib/Makefile
index 7ecef0519a27..d69356dc97df 100644
--- a/arch/xtensa/lib/Makefile
+++ b/arch/xtensa/lib/Makefile
@@ -8,5 +8,4 @@ lib-y   += memcopy.o memset.o checksum.o \
   divsi3.o udivsi3.o modsi3.o umodsi3.o mulsi3.o umulsidi3.o \
   usercopy.o strncpy_user.o strnlen_user.o
 lib-$(CONFIG_PCI) += pci-auto.o
-lib-$(CONFIG_KCSAN) += kcsan-stubs.o
 KCSAN_SANITIZE_kcsan-stubs.o := n
diff --git a/arch/xtensa/lib/kcsan-stubs.c b/arch/xtensa/lib/kcsan-stubs.c
deleted file mode 100644
index 2b08faa62b86..
--- a/arch/xtensa/lib/kcsan-stubs.c
+++ /dev/null
@@ -1,54 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-
-#include 
-#include 
-
-void __atomic_store_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
-
-u64 __atomic_load_8(const volatile void *p, int i)
-{
-   BUG();
-}
-
-u64 __atomic_exchange_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
-
-bool __atomic_compare_exchange_8(volatile void *p1, void *p2, u64 v, bool b, 
int i1, int i2)
-{
-   BUG();
-}
-
-u64 __atomic_fetch_add_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
-
-u64 __atomic_fetch_sub_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
-
-u64 __atomic_fetch_and_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
-
-u64 __atomic_fetch_or_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
-
-u64 __atomic_fetch_xor_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
-
-u64 __atomic_fetch_nand_8(volatile void *p, u64 v, int i)
-{
-   BUG();
-}
diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
index 5dfc5825aae9..377b81be94fa 100644
--- a/kernel/kcsan/Makefile
+++ b/kernel/kcsan/Makefile
@@ -11,10 +11,7 @@ CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \
$(call cc-option,-mno-outline-atomics) \
-fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
-obj-y := core.o debugfs.o report.o
-ifndef XTENSA
-   obj-y += stubs.o
-endif
+obj-y := core.o debugfs.o report.o stubs.o
 
 KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
 obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
-- 
2.37.2



[PATCH v4 1/7] kcsan: Add atomic builtin stubs for 32-bit systems

2023-02-07 Thread Rohan McLure
KCSAN instruments calls to atomic builtins, and will in turn call these
builtins itself. As such, architectures supporting KCSAN must have
compiler support for these atomic primitives.

Since 32-bit systems are unlikely to have 64-bit compiler builtins,
provide a stub for each missing builtin, and use BUG() to assert
unreachability.

In commit 725aea873261 ("xtensa: enable KCSAN"), xtensa implements these
locally, but does not advertise the fact with preprocessor macros. To
avoid production of duplicate symbols, do not build the stubs on xtensa.
A future patch will remove the xtensa implementation of these stubs.

Signed-off-by: Rohan McLure 
---
v4: New patch
---
 kernel/kcsan/Makefile |  3 ++
 kernel/kcsan/stubs.c  | 78 +++
 2 files changed, 81 insertions(+)
 create mode 100644 kernel/kcsan/stubs.c

diff --git a/kernel/kcsan/Makefile b/kernel/kcsan/Makefile
index 8cf70f068d92..5dfc5825aae9 100644
--- a/kernel/kcsan/Makefile
+++ b/kernel/kcsan/Makefile
@@ -12,6 +12,9 @@ CFLAGS_core.o := $(call cc-option,-fno-conserve-stack) \
-fno-stack-protector -DDISABLE_BRANCH_PROFILING
 
 obj-y := core.o debugfs.o report.o
+ifndef XTENSA
+   obj-y += stubs.o
+endif
 
 KCSAN_INSTRUMENT_BARRIERS_selftest.o := y
 obj-$(CONFIG_KCSAN_SELFTEST) += selftest.o
diff --git a/kernel/kcsan/stubs.c b/kernel/kcsan/stubs.c
new file mode 100644
index ..ec5cd99be422
--- /dev/null
+++ b/kernel/kcsan/stubs.c
@@ -0,0 +1,78 @@
+// SPDX-License Identifier: GPL-2.0
+
+#include 
+#include 
+
+#ifdef CONFIG_32BIT
+
+#if !__has_builtin(__atomic_store_8)
+void __atomic_store_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_load_8)
+u64 __atomic_load_8(const volatile void *p, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_exchange_8)
+u64 __atomic_exchange_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_compare_exchange_8)
+bool __atomic_compare_exchange_8(volatile void *p1, void *p2, u64 v, bool b, 
int i1, int i2)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_fetch_add_8)
+u64 __atomic_fetch_add_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_fetch_sub_8)
+u64 __atomic_fetch_sub_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_fetch_and_8)
+u64 __atomic_fetch_and_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_fetch_or_8)
+u64 __atomic_fetch_or_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_fetch_xor_8)
+u64 __atomic_fetch_xor_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#if !__has_builtin(__atomic_fetch_nand_8)
+u64 __atomic_fetch_nand_8(volatile void *p, u64 v, int i)
+{
+   BUG();
+}
+#endif
+
+#endif /* CONFIG_32BIT */
-- 
2.37.2



[PATCH v4 0/7] powerpc: Add KCSAN Support

2023-02-07 Thread Rohan McLure
Add Kernel Concurrency Sanitiser support for PPC64. Doing so involves
exclusion of a number of compilation units from instrumentation, as was
done with KASAN.

KCSAN uses watchpoints on memory accesses to enforce the semantics of
the Linux kernel memory model, notifying the user of observed data races
which have not been declared to be intended in source through the
data_race() macro, in order to remove false positives.

A number of such race conditions are identified. This patch series
provides support for the instrumentation, with bug fixes as well as
removal of false positives to be issued in future patches.

As of v4, provide stubs for __atomic_* builtins for 8-byte values
for 32-bit systems without toolchain support. This generalises xtensa's
stubs, and causes xtensa to use generic implementation.

v3: Restrict support to PPC64 as kcsan code expects support for
__atomic* builtins for 64-bit atomic types.
Link: 
https://lore.kernel.org/linuxppc-dev/449b9d60-18f6-ebf3-9878-ae54a61d1...@csgroup.eu/

v2: Implement __smp_mb() in terms of __mb() to avoid multiple calls to
kcsan_mb().
Link: 
https://lore.kernel.org/linuxppc-dev/20230201043438.1301212-4-rmcl...@linux.ibm.com/

v1: 
https://lore.kernel.org/linuxppc-dev/20230131234859.1275125-1-rmcl...@linux.ibm.com/

Rohan McLure (7):
  kcsan: Add atomic builtin stubs for 32-bit systems
  xtensa: kcsan: Remove kcsan stubs for atomic builtins
  powerpc: kcsan: Add exclusions from instrumentation
  powerpc: kcsan: Exclude udelay to prevent recursive instrumentation
  powerpc: kcsan: Memory barriers semantics
  powerpc: kcsan: Prevent recursive instrumentation with IRQ
save/restores
  powerpc: kcsan: Add KCSAN Support

 arch/powerpc/Kconfig  |  1 +
 arch/powerpc/include/asm/barrier.h| 12 -
 arch/powerpc/kernel/Makefile  | 10 +++
 arch/powerpc/kernel/irq_64.c  |  6 ++---
 arch/powerpc/kernel/time.c|  4 +--
 arch/powerpc/kernel/trace/Makefile|  1 +
 arch/powerpc/kernel/vdso/Makefile |  1 +
 arch/powerpc/lib/Makefile |  2 ++
 arch/powerpc/purgatory/Makefile   |  1 +
 arch/powerpc/xmon/Makefile|  1 +
 arch/xtensa/lib/Makefile  |  1 -
 kernel/kcsan/Makefile |  2 +-
 .../lib/kcsan-stubs.c => kernel/kcsan/stubs.c | 26 ++-
 13 files changed, 54 insertions(+), 14 deletions(-)
 rename arch/xtensa/lib/kcsan-stubs.c => kernel/kcsan/stubs.c (58%)

-- 
2.37.2



[PATCH v3 5/5] powerpc: kcsan: Add KCSAN Support

2023-02-05 Thread Rohan McLure
Enable HAVE_ARCH_KCSAN on all powerpc platforms, permitting use of the
kernel concurrency sanitiser through the CONFIG_KCSAN_* kconfig options.
KCSAN requires compiler builtins __atomic_* 64-bit values, and so only
report support on PPC64.

See documentation in Documentation/dev-tools/kcsan.rst for more
information.

Signed-off-by: Rohan McLure 
---
v3: Restrict support to 64-bit, as TSAN expects 64-bit __atomic_* compiler
built-ins.
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b8c4ac56bddc..55bc2d724c73 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -198,6 +198,7 @@ config PPC
select HAVE_ARCH_KASAN  if PPC_RADIX_MMU
select HAVE_ARCH_KASAN  if PPC_BOOK3E_64
select HAVE_ARCH_KASAN_VMALLOC  if HAVE_ARCH_KASAN
+   select HAVE_ARCH_KCSANif PPC64
select HAVE_ARCH_KFENCE if ARCH_SUPPORTS_DEBUG_PAGEALLOC
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_KGDB
-- 
2.37.2



[PATCH v3 2/5] powerpc: kcsan: Exclude udelay to prevent recursive instrumentation

2023-02-05 Thread Rohan McLure
In order for KCSAN to increase its likelihood of observing a data race,
it sets a watchpoint on memory accesses and stalls, allowing for
detection of conflicting accesses by other kernel threads or interrupts.

Stalls are implemented by injecting a call to udelay in instrumented code.
To prevent recursive instrumentation, exclude udelay from being instrumented.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d68de3618741..b894029f53db 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -356,7 +356,7 @@ void vtime_flush(struct task_struct *tsk)
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
-void __delay(unsigned long loops)
+void __no_kcsan __delay(unsigned long loops)
 {
unsigned long start;
 
@@ -377,7 +377,7 @@ void __delay(unsigned long loops)
 }
 EXPORT_SYMBOL(__delay);
 
-void udelay(unsigned long usecs)
+void __no_kcsan udelay(unsigned long usecs)
 {
__delay(tb_ticks_per_usec * usecs);
 }
-- 
2.37.2



[PATCH v3 4/5] powerpc: kcsan: Prevent recursive instrumentation with IRQ save/restores

2023-02-05 Thread Rohan McLure
Instrumented memory accesses provided by KCSAN will access core-local
memories (which will save and restore IRQs) as well as restoring IRQs
directly. Avoid recursive instrumentation by applying __no_kcsan
annotation to IRQ restore routines.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/irq_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index eb2b380e52a0..3a1e0bffe9e0 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -97,7 +97,7 @@ static inline bool irq_happened_test_and_clear(u8 irq)
return false;
 }
 
-void replay_soft_interrupts(void)
+__no_kcsan void replay_soft_interrupts(void)
 {
struct pt_regs regs;
 
@@ -185,7 +185,7 @@ void replay_soft_interrupts(void)
 }
 
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
-static inline void replay_soft_interrupts_irqrestore(void)
+__no_kcsan static inline void replay_soft_interrupts_irqrestore(void)
 {
unsigned long kuap_state = get_kuap();
 
@@ -209,7 +209,7 @@ static inline void replay_soft_interrupts_irqrestore(void)
 #define replay_soft_interrupts_irqrestore() replay_soft_interrupts()
 #endif
 
-notrace void arch_local_irq_restore(unsigned long mask)
+notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
 {
unsigned char irq_happened;
 
-- 
2.37.2



[PATCH v3 0/5] powerpc: Add KCSAN support

2023-02-05 Thread Rohan McLure
Add Kernel Concurrency Sanitiser support for PPC64. Doing so involves
exclusion of a number of compilation units from instrumentation, as was
done with KASAN.

KCSAN uses watchpoints on memory accesses to enforce the semantics of
the Linux kernel memory model, notifying the user of observed data races
which have not been declared to be intended in source through the
data_race() macro, in order to remove false positives.

A number of such race conditions are identified. This patch series
provides support for the instrumentation, with bug fixes as well as
removal of false positives to be issued in future patches.

v3: Restrict support to PPC64 as kcsan code expects support for
__atomic* builtins for 64-bit atomic types.

v2: Implement __smp_mb() in terms of __mb() to avoid multiple calls to
kcsan_mb().
Link: 
https://lore.kernel.org/linuxppc-dev/20230201043438.1301212-4-rmcl...@linux.ibm.com/

v1: 
https://lore.kernel.org/linuxppc-dev/20230131234859.1275125-1-rmcl...@linux.ibm.com/

Rohan McLure (5):
  powerpc: kcsan: Add exclusions from instrumentation
  powerpc: kcsan: Exclude udelay to prevent recursive instrumentation
  powerpc: kcsan: Memory barriers semantics
  powerpc: kcsan: Prevent recursive instrumentation with IRQ
save/restores
  powerpc: kcsan: Add KCSAN Support

 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/barrier.h | 12 ++--
 arch/powerpc/kernel/Makefile   | 10 ++
 arch/powerpc/kernel/irq_64.c   |  6 +++---
 arch/powerpc/kernel/time.c |  4 ++--
 arch/powerpc/kernel/trace/Makefile |  1 +
 arch/powerpc/kernel/vdso/Makefile  |  1 +
 arch/powerpc/lib/Makefile  |  2 ++
 arch/powerpc/purgatory/Makefile|  1 +
 arch/powerpc/xmon/Makefile |  1 +
 10 files changed, 28 insertions(+), 11 deletions(-)

-- 
2.37.2



[PATCH v3 3/5] powerpc: kcsan: Memory barriers semantics

2023-02-05 Thread Rohan McLure
Annotate memory barriers *mb() with calls to kcsan_mb(), signaling to
compilers supporting KCSAN that the respective memory barrier has been
issued. Rename memory barrier *mb() to __*mb() to opt in for
asm-generic/barrier.h to generate the respective *mb() macro.

Signed-off-by: Rohan McLure 
---
v2: Implement __smp_mb() in terms of __mb() to avoid duplicate calls to
kcsan_mb()
---
 arch/powerpc/include/asm/barrier.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/barrier.h 
b/arch/powerpc/include/asm/barrier.h
index e80b2c0e9315..b95b666f0374 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -35,9 +35,9 @@
  * However, on CPUs that don't support lwsync, lwsync actually maps to a
  * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
  */
-#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
-#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define __rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
 /* The sub-arch has lwsync */
 #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC)
@@ -51,12 +51,12 @@
 /* clang defines this macro for a builtin, which will not work with runtime 
patching */
 #undef __lwsync
 #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : 
:"memory")
-#define dma_rmb()  __lwsync()
-#define dma_wmb()  __asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
+#define __dma_rmb()__lwsync()
+#define __dma_wmb()__asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
 
 #define __smp_lwsync() __lwsync()
 
-#define __smp_mb() mb()
+#define __smp_mb() __mb()
 #define __smp_rmb()__lwsync()
 #define __smp_wmb()__asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
 
-- 
2.37.2



[PATCH v3 1/5] powerpc: kcsan: Add exclusions from instrumentation

2023-02-05 Thread Rohan McLure
Exclude various incompatible compilation units from KCSAN
instrumentation.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/Makefile   | 10 ++
 arch/powerpc/kernel/trace/Makefile |  1 +
 arch/powerpc/kernel/vdso/Makefile  |  1 +
 arch/powerpc/lib/Makefile  |  2 ++
 arch/powerpc/purgatory/Makefile|  1 +
 arch/powerpc/xmon/Makefile |  1 +
 6 files changed, 16 insertions(+)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9b6146056e48..9bf2be123093 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
+KCSAN_SANITIZE_early_32.o := n
+KCSAN_SANITIZE_early_64.o := n
+KCSAN_SANITIZE_cputable.o := n
+KCSAN_SANITIZE_btext.o := n
+KCSAN_SANITIZE_paca.o := n
+KCSAN_SANITIZE_setup_64.o := n
+
 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 # Remove stack protector to avoid triggering unneeded stack canary
 # checks due to randomize_kstack_offset.
@@ -177,12 +184,15 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS)+= secvar-sysfs.o
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 KCOV_INSTRUMENT_prom_init.o := n
+KCSAN_SANITIZE_prom_init.o := n
 UBSAN_SANITIZE_prom_init.o := n
 GCOV_PROFILE_kprobes.o := n
 KCOV_INSTRUMENT_kprobes.o := n
+KCSAN_SANITIZE_kprobes.o := n
 UBSAN_SANITIZE_kprobes.o := n
 GCOV_PROFILE_kprobes-ftrace.o := n
 KCOV_INSTRUMENT_kprobes-ftrace.o := n
+KCSAN_SANITIZE_kprobes-ftrace.o := n
 UBSAN_SANITIZE_kprobes-ftrace.o := n
 GCOV_PROFILE_syscall_64.o := n
 KCOV_INSTRUMENT_syscall_64.o := n
diff --git a/arch/powerpc/kernel/trace/Makefile 
b/arch/powerpc/kernel/trace/Makefile
index af8527538fe4..b16a9f9c0b35 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -23,4 +23,5 @@ obj-$(CONFIG_PPC32)   += $(obj32-y)
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_ftrace.o := n
 KCOV_INSTRUMENT_ftrace.o := n
+KCSAN_SANITIZE_ftrace.o := n
 UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/vdso/Makefile 
b/arch/powerpc/kernel/vdso/Makefile
index 6a977b0d8ffc..3a2f32929fcf 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -46,6 +46,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both
 ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4de71cbf6e8e..c4db459d304a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,6 +16,8 @@ KASAN_SANITIZE_feature-fixups.o := n
 # restart_table.o contains functions called in the NMI interrupt path
 # which can be in real mode. Disable KASAN.
 KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
 
 ifdef CONFIG_KASAN
 CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index a81d155b89ae..6f5e2727963c 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 targets += trampoline_$(BITS).o purgatory.ro
 
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index eb25d7554ffd..d334de392e6c 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -5,6 +5,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 # Disable ftrace for the entire directory
 ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
-- 
2.37.2



[PATCH v2 5/5] powerpc: kcsan: Add KCSAN Support

2023-01-31 Thread Rohan McLure
Enable HAVE_ARCH_KCSAN on all powerpc platforms, permitting use of the
kernel concurrency sanitiser through the CONFIG_KCSAN_* kconfig options.

See documentation in Documentation/dev-tools/kcsan.rst for more
information.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b8c4ac56bddc..66c777c78677 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -198,6 +198,7 @@ config PPC
select HAVE_ARCH_KASAN  if PPC_RADIX_MMU
select HAVE_ARCH_KASAN  if PPC_BOOK3E_64
select HAVE_ARCH_KASAN_VMALLOC  if HAVE_ARCH_KASAN
+   select HAVE_ARCH_KCSAN
select HAVE_ARCH_KFENCE if ARCH_SUPPORTS_DEBUG_PAGEALLOC
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_KGDB
-- 
2.37.2



[PATCH v2 4/5] powerpc: kcsan: Prevent recursive instrumentation with IRQ save/restores

2023-01-31 Thread Rohan McLure
Instrumented memory accesses provided by KCSAN will access core-local
memories (which will save and restore IRQs) as well as restoring IRQs
directly. Avoid recursive instrumentation by applying __no_kcsan
annotation to IRQ restore routines.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/irq_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index eb2b380e52a0..3a1e0bffe9e0 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -97,7 +97,7 @@ static inline bool irq_happened_test_and_clear(u8 irq)
return false;
 }
 
-void replay_soft_interrupts(void)
+__no_kcsan void replay_soft_interrupts(void)
 {
struct pt_regs regs;
 
@@ -185,7 +185,7 @@ void replay_soft_interrupts(void)
 }
 
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
-static inline void replay_soft_interrupts_irqrestore(void)
+__no_kcsan static inline void replay_soft_interrupts_irqrestore(void)
 {
unsigned long kuap_state = get_kuap();
 
@@ -209,7 +209,7 @@ static inline void replay_soft_interrupts_irqrestore(void)
 #define replay_soft_interrupts_irqrestore() replay_soft_interrupts()
 #endif
 
-notrace void arch_local_irq_restore(unsigned long mask)
+notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
 {
unsigned char irq_happened;
 
-- 
2.37.2



[PATCH v2 3/5] powerpc: kcsan: Memory barriers semantics

2023-01-31 Thread Rohan McLure
Annotate memory barriers *mb() with calls to kcsan_mb(), signaling to
compilers supporting KCSAN that the respective memory barrier has been
issued. Rename memory barrier *mb() to __*mb() to opt in for
asm-generic/barrier.h to generate the respective *mb() macro.

Signed-off-by: Rohan McLure 
---
v2: Implement __smp_mb() in terms of __mb() to avoid duplicate calls to
kcsan_mb()
---
 arch/powerpc/include/asm/barrier.h | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/barrier.h 
b/arch/powerpc/include/asm/barrier.h
index e80b2c0e9315..b95b666f0374 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -35,9 +35,9 @@
  * However, on CPUs that don't support lwsync, lwsync actually maps to a
  * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
  */
-#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
-#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define __rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
 /* The sub-arch has lwsync */
 #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC)
@@ -51,12 +51,12 @@
 /* clang defines this macro for a builtin, which will not work with runtime 
patching */
 #undef __lwsync
 #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : 
:"memory")
-#define dma_rmb()  __lwsync()
-#define dma_wmb()  __asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
+#define __dma_rmb()__lwsync()
+#define __dma_wmb()__asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
 
 #define __smp_lwsync() __lwsync()
 
-#define __smp_mb() mb()
+#define __smp_mb() __mb()
 #define __smp_rmb()__lwsync()
 #define __smp_wmb()__asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
 
-- 
2.37.2



[PATCH v2 2/5] powerpc: kcsan: Exclude udelay to prevent recursive instrumentation

2023-01-31 Thread Rohan McLure
In order for KCSAN to increase its likelihood of observing a data race,
it sets a watchpoint on memory accesses and stalls, allowing for
detection of conflicting accesses by other kernel threads or interrupts.

Stalls are implemented by injecting a call to udelay in instrumented code.
To prevent recursive instrumentation, exclude udelay from being instrumented.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d68de3618741..b894029f53db 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -356,7 +356,7 @@ void vtime_flush(struct task_struct *tsk)
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
-void __delay(unsigned long loops)
+void __no_kcsan __delay(unsigned long loops)
 {
unsigned long start;
 
@@ -377,7 +377,7 @@ void __delay(unsigned long loops)
 }
 EXPORT_SYMBOL(__delay);
 
-void udelay(unsigned long usecs)
+void __no_kcsan udelay(unsigned long usecs)
 {
__delay(tb_ticks_per_usec * usecs);
 }
-- 
2.37.2



[PATCH v2 1/5] powerpc: kcsan: Add exclusions from instrumentation

2023-01-31 Thread Rohan McLure
Exclude various incompatible compilation units from KCSAN
instrumentation.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/Makefile   | 10 ++
 arch/powerpc/kernel/trace/Makefile |  1 +
 arch/powerpc/kernel/vdso/Makefile  |  1 +
 arch/powerpc/lib/Makefile  |  2 ++
 arch/powerpc/purgatory/Makefile|  1 +
 arch/powerpc/xmon/Makefile |  1 +
 6 files changed, 16 insertions(+)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9b6146056e48..9bf2be123093 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
+KCSAN_SANITIZE_early_32.o := n
+KCSAN_SANITIZE_early_64.o := n
+KCSAN_SANITIZE_cputable.o := n
+KCSAN_SANITIZE_btext.o := n
+KCSAN_SANITIZE_paca.o := n
+KCSAN_SANITIZE_setup_64.o := n
+
 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 # Remove stack protector to avoid triggering unneeded stack canary
 # checks due to randomize_kstack_offset.
@@ -177,12 +184,15 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS)+= secvar-sysfs.o
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 KCOV_INSTRUMENT_prom_init.o := n
+KCSAN_SANITIZE_prom_init.o := n
 UBSAN_SANITIZE_prom_init.o := n
 GCOV_PROFILE_kprobes.o := n
 KCOV_INSTRUMENT_kprobes.o := n
+KCSAN_SANITIZE_kprobes.o := n
 UBSAN_SANITIZE_kprobes.o := n
 GCOV_PROFILE_kprobes-ftrace.o := n
 KCOV_INSTRUMENT_kprobes-ftrace.o := n
+KCSAN_SANITIZE_kprobes-ftrace.o := n
 UBSAN_SANITIZE_kprobes-ftrace.o := n
 GCOV_PROFILE_syscall_64.o := n
 KCOV_INSTRUMENT_syscall_64.o := n
diff --git a/arch/powerpc/kernel/trace/Makefile 
b/arch/powerpc/kernel/trace/Makefile
index af8527538fe4..b16a9f9c0b35 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -23,4 +23,5 @@ obj-$(CONFIG_PPC32)   += $(obj32-y)
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_ftrace.o := n
 KCOV_INSTRUMENT_ftrace.o := n
+KCSAN_SANITIZE_ftrace.o := n
 UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/vdso/Makefile 
b/arch/powerpc/kernel/vdso/Makefile
index 6a977b0d8ffc..3a2f32929fcf 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -46,6 +46,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both
 ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4de71cbf6e8e..c4db459d304a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,6 +16,8 @@ KASAN_SANITIZE_feature-fixups.o := n
 # restart_table.o contains functions called in the NMI interrupt path
 # which can be in real mode. Disable KASAN.
 KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
 
 ifdef CONFIG_KASAN
 CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index a81d155b89ae..6f5e2727963c 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 targets += trampoline_$(BITS).o purgatory.ro
 
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index eb25d7554ffd..d334de392e6c 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -5,6 +5,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 # Disable ftrace for the entire directory
 ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
-- 
2.37.2



[PATCH v2 0/5] powerpc: Add KCSAN support

2023-01-31 Thread Rohan McLure
Add Kernel Concurrency Sanitiser support for powerpc. Doing so involves
exclusion of a number of compilation units from instrumentation, as was
done with KASAN.

KCSAN uses watchpoints on memory accesses to enforce the semantics of
the Linux kernel memory model, notifying the user of observed data races
which have not been declared to be intended in source through the
data_race() macro, in order to remove false positives.

A number of such race conditions are identified. This patch series
provides support for the instrumentation, with bug fixes as well as
removal of false positives to be issued in future patches.

v2: Implement __smp_mb() in terms of __mb() to avoid multiple calls to
kcsan_mb().

v1: 
https://lore.kernel.org/linuxppc-dev/20230131234859.1275125-1-rmcl...@linux.ibm.com/

Rohan McLure (5):
  powerpc: kcsan: Add exclusions from instrumentation
  powerpc: kcsan: Exclude udelay to prevent recursive instrumentation
  powerpc: kcsan: Memory barriers semantics
  powerpc: kcsan: Prevent recursive instrumentation with IRQ
save/restores
  powerpc: kcsan: Add KCSAN Support

 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/barrier.h | 12 ++--
 arch/powerpc/kernel/Makefile   | 10 ++
 arch/powerpc/kernel/irq_64.c   |  6 +++---
 arch/powerpc/kernel/time.c |  4 ++--
 arch/powerpc/kernel/trace/Makefile |  1 +
 arch/powerpc/kernel/vdso/Makefile  |  1 +
 arch/powerpc/lib/Makefile  |  2 ++
 arch/powerpc/purgatory/Makefile|  1 +
 arch/powerpc/xmon/Makefile |  1 +
 10 files changed, 28 insertions(+), 11 deletions(-)

-- 
2.37.2



[PATCH 0/5] powerpc: Add KCSAN support

2023-01-31 Thread Rohan McLure
Add Kernel Concurrency Sanitiser support for powerpc. Doing so involves
exclusion of a number of compilation units from instrumentation, as was
done with KASAN.

KCSAN uses watchpoints on memory accesses to enforce the semantics of
the Linux kernel memory model, notifying the user of observed data races
which have not been declared to be intended in source through the
data_race() macro, in order to remove false positives.

A number of such race conditions are identified. This patch series
provides support for the instrumentation, with bug fixes as well as
removal of false positives to be issued in future patches.

Rohan McLure (5):
  powerpc: kcsan: Add exclusions from instrumentation
  powerpc: kcsan: Exclude udelay to prevent recursive instrumentation
  powerpc: kcsan: Memory barriers semantics
  powerpc: kcsan: Prevent recursive instrumentation with IRQ
save/restores
  powerpc: kcsan: Add KCSAN Support

 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/barrier.h | 10 +-
 arch/powerpc/kernel/Makefile   | 10 ++
 arch/powerpc/kernel/irq_64.c   |  6 +++---
 arch/powerpc/kernel/time.c |  4 ++--
 arch/powerpc/kernel/trace/Makefile |  1 +
 arch/powerpc/kernel/vdso/Makefile  |  1 +
 arch/powerpc/lib/Makefile  |  2 ++
 arch/powerpc/purgatory/Makefile|  1 +
 arch/powerpc/xmon/Makefile |  1 +
 10 files changed, 27 insertions(+), 10 deletions(-)

-- 
2.37.2



[PATCH 5/5] powerpc: kcsan: Add KCSAN Support

2023-01-31 Thread Rohan McLure
Enable HAVE_ARCH_KCSAN on all powerpc platforms, permitting use of the
kernel concurrency sanitiser through the CONFIG_KCSAN_* kconfig options.

See documentation in Documentation/dev-tools/kcsan.rst for more
information.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index b8c4ac56bddc..66c777c78677 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -198,6 +198,7 @@ config PPC
select HAVE_ARCH_KASAN  if PPC_RADIX_MMU
select HAVE_ARCH_KASAN  if PPC_BOOK3E_64
select HAVE_ARCH_KASAN_VMALLOC  if HAVE_ARCH_KASAN
+   select HAVE_ARCH_KCSAN
select HAVE_ARCH_KFENCE if ARCH_SUPPORTS_DEBUG_PAGEALLOC
select HAVE_ARCH_RANDOMIZE_KSTACK_OFFSET
select HAVE_ARCH_KGDB
-- 
2.37.2



[PATCH 4/5] powerpc: kcsan: Prevent recursive instrumentation with IRQ save/restores

2023-01-31 Thread Rohan McLure
Instrumented memory accesses provided by KCSAN will access core-local
memories (which will save and restore IRQs) as well as restoring IRQs
directly. Avoid recursive instrumentation by applying __no_kcsan
annotation to IRQ restore routines.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/irq_64.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/irq_64.c b/arch/powerpc/kernel/irq_64.c
index eb2b380e52a0..3a1e0bffe9e0 100644
--- a/arch/powerpc/kernel/irq_64.c
+++ b/arch/powerpc/kernel/irq_64.c
@@ -97,7 +97,7 @@ static inline bool irq_happened_test_and_clear(u8 irq)
return false;
 }
 
-void replay_soft_interrupts(void)
+__no_kcsan void replay_soft_interrupts(void)
 {
struct pt_regs regs;
 
@@ -185,7 +185,7 @@ void replay_soft_interrupts(void)
 }
 
 #if defined(CONFIG_PPC_BOOK3S_64) && defined(CONFIG_PPC_KUAP)
-static inline void replay_soft_interrupts_irqrestore(void)
+__no_kcsan static inline void replay_soft_interrupts_irqrestore(void)
 {
unsigned long kuap_state = get_kuap();
 
@@ -209,7 +209,7 @@ static inline void replay_soft_interrupts_irqrestore(void)
 #define replay_soft_interrupts_irqrestore() replay_soft_interrupts()
 #endif
 
-notrace void arch_local_irq_restore(unsigned long mask)
+notrace __no_kcsan void arch_local_irq_restore(unsigned long mask)
 {
unsigned char irq_happened;
 
-- 
2.37.2



[PATCH 3/5] powerpc: kcsan: Memory barriers semantics

2023-01-31 Thread Rohan McLure
Annotate memory barriers *mb() with calls to kcsan_mb(), signaling to
compilers supporting KCSAN that the respective memory barrier has been
issued. Rename memory barrier *mb() to __*mb() to opt in for
asm-generic/barrier.h to generate the respective *mb() macro.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/include/asm/barrier.h | 10 +-
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/barrier.h 
b/arch/powerpc/include/asm/barrier.h
index e80b2c0e9315..f51f4be5fa4e 100644
--- a/arch/powerpc/include/asm/barrier.h
+++ b/arch/powerpc/include/asm/barrier.h
@@ -35,9 +35,9 @@
  * However, on CPUs that don't support lwsync, lwsync actually maps to a
  * heavy-weight sync, so smp_wmb() can be a lighter-weight eieio.
  */
-#define mb()   __asm__ __volatile__ ("sync" : : : "memory")
-#define rmb()  __asm__ __volatile__ ("sync" : : : "memory")
-#define wmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __mb()   __asm__ __volatile__ ("sync" : : : "memory")
+#define __rmb()  __asm__ __volatile__ ("sync" : : : "memory")
+#define __wmb()  __asm__ __volatile__ ("sync" : : : "memory")
 
 /* The sub-arch has lwsync */
 #if defined(CONFIG_PPC64) || defined(CONFIG_PPC_E500MC)
@@ -51,8 +51,8 @@
 /* clang defines this macro for a builtin, which will not work with runtime 
patching */
 #undef __lwsync
 #define __lwsync() __asm__ __volatile__ (stringify_in_c(LWSYNC) : : 
:"memory")
-#define dma_rmb()  __lwsync()
-#define dma_wmb()  __asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
+#define __dma_rmb()__lwsync()
+#define __dma_wmb()__asm__ __volatile__ (stringify_in_c(SMPWMB) : : 
:"memory")
 
 #define __smp_lwsync() __lwsync()
 
-- 
2.37.2



[PATCH 2/5] powerpc: kcsan: Exclude udelay to prevent recursive instrumentation

2023-01-31 Thread Rohan McLure
In order for KCSAN to increase its likelihood of observing a data race,
it sets a watchpoint on memory accesses and stalls, allowing for
detection of conflicting accesses by other kernel threads or interrupts.

Stalls are implemented by injecting a call to udelay in instrumented code.
To prevent recursive instrumentation, exclude udelay from being instrumented.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/time.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index d68de3618741..b894029f53db 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -356,7 +356,7 @@ void vtime_flush(struct task_struct *tsk)
 }
 #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
-void __delay(unsigned long loops)
+void __no_kcsan __delay(unsigned long loops)
 {
unsigned long start;
 
@@ -377,7 +377,7 @@ void __delay(unsigned long loops)
 }
 EXPORT_SYMBOL(__delay);
 
-void udelay(unsigned long usecs)
+void __no_kcsan udelay(unsigned long usecs)
 {
__delay(tb_ticks_per_usec * usecs);
 }
-- 
2.37.2



[PATCH 1/5] powerpc: kcsan: Add exclusions from instrumentation

2023-01-31 Thread Rohan McLure
Exclude various incompatible compilation units from KCSAN
instrumentation.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/kernel/Makefile   | 10 ++
 arch/powerpc/kernel/trace/Makefile |  1 +
 arch/powerpc/kernel/vdso/Makefile  |  1 +
 arch/powerpc/lib/Makefile  |  2 ++
 arch/powerpc/purgatory/Makefile|  1 +
 arch/powerpc/xmon/Makefile |  1 +
 6 files changed, 16 insertions(+)

diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 9b6146056e48..9bf2be123093 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -54,6 +54,13 @@ CFLAGS_cputable.o += -DDISABLE_BRANCH_PROFILING
 CFLAGS_btext.o += -DDISABLE_BRANCH_PROFILING
 endif
 
+KCSAN_SANITIZE_early_32.o := n
+KCSAN_SANITIZE_early_64.o := n
+KCSAN_SANITIZE_cputable.o := n
+KCSAN_SANITIZE_btext.o := n
+KCSAN_SANITIZE_paca.o := n
+KCSAN_SANITIZE_setup_64.o := n
+
 #ifdef CONFIG_RANDOMIZE_KSTACK_OFFSET
 # Remove stack protector to avoid triggering unneeded stack canary
 # checks due to randomize_kstack_offset.
@@ -177,12 +184,15 @@ obj-$(CONFIG_PPC_SECVAR_SYSFS)+= secvar-sysfs.o
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_prom_init.o := n
 KCOV_INSTRUMENT_prom_init.o := n
+KCSAN_SANITIZE_prom_init.o := n
 UBSAN_SANITIZE_prom_init.o := n
 GCOV_PROFILE_kprobes.o := n
 KCOV_INSTRUMENT_kprobes.o := n
+KCSAN_SANITIZE_kprobes.o := n
 UBSAN_SANITIZE_kprobes.o := n
 GCOV_PROFILE_kprobes-ftrace.o := n
 KCOV_INSTRUMENT_kprobes-ftrace.o := n
+KCSAN_SANITIZE_kprobes-ftrace.o := n
 UBSAN_SANITIZE_kprobes-ftrace.o := n
 GCOV_PROFILE_syscall_64.o := n
 KCOV_INSTRUMENT_syscall_64.o := n
diff --git a/arch/powerpc/kernel/trace/Makefile 
b/arch/powerpc/kernel/trace/Makefile
index af8527538fe4..b16a9f9c0b35 100644
--- a/arch/powerpc/kernel/trace/Makefile
+++ b/arch/powerpc/kernel/trace/Makefile
@@ -23,4 +23,5 @@ obj-$(CONFIG_PPC32)   += $(obj32-y)
 # Disable GCOV, KCOV & sanitizers in odd or sensitive code
 GCOV_PROFILE_ftrace.o := n
 KCOV_INSTRUMENT_ftrace.o := n
+KCSAN_SANITIZE_ftrace.o := n
 UBSAN_SANITIZE_ftrace.o := n
diff --git a/arch/powerpc/kernel/vdso/Makefile 
b/arch/powerpc/kernel/vdso/Makefile
index 6a977b0d8ffc..3a2f32929fcf 100644
--- a/arch/powerpc/kernel/vdso/Makefile
+++ b/arch/powerpc/kernel/vdso/Makefile
@@ -46,6 +46,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 ccflags-y := -shared -fno-common -fno-builtin -nostdlib -Wl,--hash-style=both
 ccflags-$(CONFIG_LD_IS_LLD) += $(call cc-option,--ld-path=$(LD),-fuse-ld=lld)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index 4de71cbf6e8e..c4db459d304a 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -16,6 +16,8 @@ KASAN_SANITIZE_feature-fixups.o := n
 # restart_table.o contains functions called in the NMI interrupt path
 # which can be in real mode. Disable KASAN.
 KASAN_SANITIZE_restart_table.o := n
+KCSAN_SANITIZE_code-patching.o := n
+KCSAN_SANITIZE_feature-fixups.o := n
 
 ifdef CONFIG_KASAN
 CFLAGS_code-patching.o += -DDISABLE_BRANCH_PROFILING
diff --git a/arch/powerpc/purgatory/Makefile b/arch/powerpc/purgatory/Makefile
index a81d155b89ae..6f5e2727963c 100644
--- a/arch/powerpc/purgatory/Makefile
+++ b/arch/powerpc/purgatory/Makefile
@@ -1,6 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0
 
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 targets += trampoline_$(BITS).o purgatory.ro
 
diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile
index eb25d7554ffd..d334de392e6c 100644
--- a/arch/powerpc/xmon/Makefile
+++ b/arch/powerpc/xmon/Makefile
@@ -5,6 +5,7 @@ GCOV_PROFILE := n
 KCOV_INSTRUMENT := n
 UBSAN_SANITIZE := n
 KASAN_SANITIZE := n
+KCSAN_SANITIZE := n
 
 # Disable ftrace for the entire directory
 ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
-- 
2.37.2



Re: [PATCH v5 2/5] powerpc: mm: Implement p{m,u,4}d_leaf on all platforms

2022-12-06 Thread Rohan McLure
Great job spotting this. Somehow lost these throughout the revisions. Thanks.

> On 7 Dec 2022, at 9:24 am, Michael Ellerman  wrote:
> 
> Rohan McLure  writes:
>> The check that a higher-level entry in multi-level pages contains a page
>> translation entry (pte) is performed by p{m,u,4}d_leaf stubs, which may
>> be specialised for each choice of mmu. In a prior commit, we replace
>> uses to the catch-all stubs, p{m,u,4}d_is_leaf with p{m,u,4}d_leaf.
>> 
>> Replace the catch-all stub definitions for p{m,u,4}d_is_leaf with
>> definitions for p{m,u,4}d_leaf. A future patch will assume that
>> p{m,u,4}d_leaf is defined on all platforms.
>> 
>> In particular, implement pud_leaf for Book3E-64, pmd_leaf for all Book3E
>> and Book3S-64 platforms, with a catch-all definition for p4d_leaf.
>> 
>> Signed-off-by: Rohan McLure 
>> ---
>> v5: Split patch that replaces p{m,u,4}d_is_leaf into two patches, first
>> replacing callsites and afterward providing generic definition.
>> Remove ifndef-defines implementing p{m,u}d_leaf in favour of
>> implementing stubs in headers belonging to the particular platforms
>> needing them.
>> ---
>> arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
>> arch/powerpc/include/asm/book3s/64/pgtable.h |  8 ++-
>> arch/powerpc/include/asm/nohash/64/pgtable.h |  5 +
>> arch/powerpc/include/asm/nohash/pgtable.h|  5 +
>> arch/powerpc/include/asm/pgtable.h   | 22 ++--
>> 5 files changed, 18 insertions(+), 26 deletions(-)
> 
> I needed the delta below to prevent the generic versions being defined
> and overriding our versions.
> 
> cheers
> 
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
> b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 44703c8c590c..117135be8cc2 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -244,6 +244,7 @@ static inline void pmd_clear(pmd_t *pmdp)
> *pmdp = __pmd(0);
> }
> 
> +#define pmd_leaf pmd_leaf
> static inline bool pmd_leaf(pmd_t pmd)
> {
> return false;
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index 436632d04304..f00aa2d203c2 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -1438,11 +1438,13 @@ static inline bool is_pte_rw_upgrade(unsigned long 
> old_val, unsigned long new_va
> /*
>  * Like pmd_huge() and pmd_large(), but works regardless of config options
>  */
> +#define pmd_leaf pmd_leaf
> static inline bool pmd_leaf(pmd_t pmd)
> {
> return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
> }
> 
> +#define pud_leaf pud_leaf
> static inline bool pud_leaf(pud_t pud)
> {
> return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
> diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
> b/arch/powerpc/include/asm/nohash/64/pgtable.h
> index 2488da8f0deb..d88b22c753d3 100644
> --- a/arch/powerpc/include/asm/nohash/64/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
> @@ -147,6 +147,7 @@ static inline void pud_clear(pud_t *pudp)
> *pudp = __pud(0);
> }
> 
> +#define pud_leaf pud_leaf
> static inline bool pud_leaf(pud_t pud)
> {
> return false;
> diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
> b/arch/powerpc/include/asm/nohash/pgtable.h
> index 487804f5b1d1..dfae1dbb9c3b 100644
> --- a/arch/powerpc/include/asm/nohash/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/pgtable.h
> @@ -60,6 +60,7 @@ static inline bool pte_hw_valid(pte_t pte)
> return pte_val(pte) & _PAGE_PRESENT;
> }
> 
> +#define pmd_leaf pmd_leaf
> static inline bool pmd_leaf(pmd_t pmd)
> {
> return false;




[PATCH] powerpc: qspinlock: Use asm-generic definition for queued_spin_lock

2022-12-05 Thread Rohan McLure
asm-generic/qspinlock.h provides an identical implementation of
queued_spin_lock. Remove the variant in asm/qspinlock.h.

Signed-off-by: Rohan McLure 
---
 arch/powerpc/include/asm/qspinlock.h | 11 ---
 1 file changed, 11 deletions(-)

diff --git a/arch/powerpc/include/asm/qspinlock.h 
b/arch/powerpc/include/asm/qspinlock.h
index b676c4fb90fd..bf5ba0f00258 100644
--- a/arch/powerpc/include/asm/qspinlock.h
+++ b/arch/powerpc/include/asm/qspinlock.h
@@ -33,17 +33,6 @@ static inline void queued_spin_unlock(struct qspinlock *lock)
 extern void queued_spin_lock_slowpath(struct qspinlock *lock, u32 val);
 #endif
 
-static __always_inline void queued_spin_lock(struct qspinlock *lock)
-{
-   u32 val = 0;
-
-   if (likely(arch_atomic_try_cmpxchg_lock(>val, , 
_Q_LOCKED_VAL)))
-   return;
-
-   queued_spin_lock_slowpath(lock, val);
-}
-#define queued_spin_lock queued_spin_lock
-
 #ifdef CONFIG_PARAVIRT_SPINLOCKS
 #define SPIN_THRESHOLD (1<<15) /* not tuned */
 
-- 
2.37.2



[PATCH v5 7/7] powerpc/64: Sanitise user registers on interrupt in pseries, POWERNV

2022-11-30 Thread Rohan McLure
Cause pseries and POWERNV platforms to default to zeroising all potentially
user-defined registers when entering the kernel by means of any interrupt
source, reducing user-influence of the kernel and the likelihood or
producing speculation gadgets.

Acked-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
v4: Default on POWERNV as well.
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 280c797e0f30..2ab114a02f62 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -534,7 +534,7 @@ config HOTPLUG_CPU
 config INTERRUPT_SANITIZE_REGISTERS
bool "Clear gprs on interrupt arrival"
depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
-   default PPC_BOOK3E_64
+   default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV
help
  Reduce the influence of user register state on interrupt handlers and
  syscalls through clearing user state from registers before handling
-- 
2.37.2



[PATCH v5 6/7] powerpc/64e: Clear gprs on interrupt routine entry on Book3E

2022-11-30 Thread Rohan McLure
Zero GPRS r14-r31 on entry into the kernel for interrupt sources to
limit influence of user-space values in potential speculation gadgets.
Prior to this commit, all other GPRS are reassigned during the common
prologue to interrupt handlers and so need not be zeroised explicitly.

This may be done safely, without loss of register state prior to the
interrupt, as the common prologue saves the initial values of
non-volatiles, which are unconditionally restored in interrupt_64.S.
Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
---
 arch/powerpc/kernel/exceptions-64e.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 930e36099015..52431cb0c083 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -358,7 +358,6 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13);  \
std r15,PACA_EXMC+EX_R15(r13)
 
-
 /* Core exception code for all exceptions except TLB misses. */
 #define EXCEPTION_COMMON_LVL(n, scratch, excf) \
 exc_##n##_common:  \
@@ -394,7 +393,8 @@ exc_##n##_common:   
\
std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */   \
std r3,_TRAP(r1);   /* set trap number  */  \
std r0,RESULT(r1);  /* clear regs->result */\
-   SAVE_NVGPRS(r1);
+   SAVE_NVGPRS(r1);\
+   SANITIZE_NVGPRS();  /* minimise speculation influence */
 
 #define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
-- 
2.37.2



[PATCH v5 5/7] powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S

2022-11-30 Thread Rohan McLure
Zeroise user state in gprs (assign to zero) to reduce the influence of user
registers on speculation within kernel syscall handlers. Clears occur
at the very beginning of the sc and scv 0 interrupt handlers, with
restores occurring following the execution of the syscall handler.

Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values. If an interrupt does not
select the IMSR_R12 IOption, zeroise r12.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored on 64bit Book3S. Remove explicit
REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear
user registers on interrupt, and continue to depend on the return value
of interrupt_exit_user_prepare to determine whether or not to restore
non-volatiles.

The mmap_bench benchmark in selftests should rapidly invoke pagefaults.
See ~0.8% performance regression with this mitigation, but this
indicates the worst-case performance due to heavier-weight interrupt
handlers. This mitigation is able to be enabled/disabled through
CONFIG_INTERRUPT_SANITIZE_REGISTERS.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
v2: REST_NVGPRS should be conditional on mitigation in scv handler. Fix
improper multi-line preprocessor macro in interrupt_64.S
v4: Split off IMSR_R12 definition into its own patch. Move macro
definitions for register sanitisation into asm/ppc_asm.h
v5: Replace unconditional ZEROIZE_... with conditional SANITIZE_...
counterparts
---
 arch/powerpc/kernel/exceptions-64s.S | 27 ++-
 arch/powerpc/kernel/interrupt_64.S   | 16 ++--
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 58d72db1d484..68de42e42268 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -506,6 +506,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
std r10,0(r1)   /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe*/
std r10,GPR1(r1)/* save r1 in stackframe*/
+   SANITIZE_GPR(0)
 
/* Mark our [H]SRRs valid for return */
li  r10,1
@@ -548,8 +549,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+   .if !IMSR_R12
+   SANITIZE_GPRS(9, 12)
+   .else
+   SANITIZE_GPRS(9, 11)
+   .endif
 
SAVE_NVGPRS(r1)
+   SANITIZE_NVGPRS()
 
.if IDAR
.if IISIDE
@@ -581,8 +588,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld  r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
-   std r2,GPR2(r1) /* save r2 in stackframe*/
-   SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe   */
+   SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe   */
+   SANITIZE_GPRS(2, 8)
mflrr9  /* Get LR, later save to stack  */
LOAD_PACA_TOC() /* get kernel TOC into r2   */
std r9,_LINK(r1)
@@ -700,6 +707,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlrr9
ld  r9,_CCR(r1)
mtcrr9
+   SANITIZE_RESTORE_NVGPRS()
REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
@@ -1445,7 +1453,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 * do_break() may have changed the NV GPRS while handling a breakpoint.
 * If so, we need to restore them with their updated values.
 */
-   REST_NVGPRS(r1)
+   HANDLER_RESTORE_NVGPRS()
b   interrupt_return_srr
 
 
@@ -1671,7 +1679,7 @@ EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
addir3,r1,STACK_FRAME_OVERHEAD
bl  alignment_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+   HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b   interrupt_return_srr
 
 
@@ -1737,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common)
 .Ldo_program_check:
addir3,r1,STACK_FRAME_OVERHEAD
bl  program_check_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+   HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b   interrupt_return_srr
 
 
@@ -2169,7 +2177,7 @@ EXC_COMMON_BEGIN(emulation_assist_common)
GEN_COMMON emulation_assist
addir3,r1,STACK_FRAME_OVERHEAD
bl  emulation_assist_interrupt
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+   HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs

[PATCH v5 4/7] powerpc/64s: IOption for MSR stored in r12

2022-11-30 Thread Rohan McLure
Interrupt handlers in asm/exceptions-64s.S contain a great deal of common
code produced by the GEN_COMMON macros. Currently, at the exit point of
the macro, r12 will contain the contents of the MSR. A future patch will
cause these macros to zeroise architected registers to avoid potential
speculation influence of user data.

Provide an IOption that signals that r12 must be retained, as the
interrupt handler assumes it to hold the contents of the MSR.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
v4: Split 64s register sanitisation commit to establish this IOption
---
 arch/powerpc/kernel/exceptions-64s.S | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 5381a43e50fe..58d72db1d484 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -111,6 +111,7 @@ name:
 #define ISTACK .L_ISTACK_\name\()  /* Set regular kernel stack */
 #define __ISTACK(name) .L_ISTACK_ ## name
 #define IKUAP  .L_IKUAP_\name\()   /* Do KUAP lock */
+#define IMSR_R12   .L_IMSR_R12_\name\()/* Assumes MSR saved to r12 */
 
 #define INT_DEFINE_BEGIN(n)\
 .macro int_define_ ## n name
@@ -176,6 +177,9 @@ do_define_int n
.ifndef IKUAP
IKUAP=1
.endif
+   .ifndef IMSR_R12
+   IMSR_R12=0
+   .endif
 .endm
 
 /*
@@ -1751,6 +1755,7 @@ INT_DEFINE_BEGIN(fp_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
 #endif
+   IMSR_R12=1
 INT_DEFINE_END(fp_unavailable)
 
 EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
@@ -2372,6 +2377,7 @@ INT_DEFINE_BEGIN(altivec_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
 #endif
+   IMSR_R12=1
 INT_DEFINE_END(altivec_unavailable)
 
 EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
@@ -2421,6 +2427,7 @@ INT_DEFINE_BEGIN(vsx_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
 #endif
+   IMSR_R12=1
 INT_DEFINE_END(vsx_unavailable)
 
 EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
-- 
2.37.2



[PATCH v5 1/7] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS Kconfig

2022-11-30 Thread Rohan McLure
Add Kconfig option for enabling clearing of registers on arrival in an
interrupt handler. This reduces the speculation influence of registers
on kernel internals. The option will be consumed by 64-bit systems that
feature speculation and wish to implement this mitigation.

This patch only introduces the Kconfig option, no actual mitigations.

The primary overhead of this mitigation lies in an increased number of
registers that must be saved and restored by interrupt handlers on
Book3S systems. Enable by default on Book3E systems, which prior to
this patch eagerly save and restore register state, meaning that the
mitigation when implemented will have minimal overhead.

Acked-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
---
 arch/powerpc/Kconfig | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4fd4924f6d50..280c797e0f30 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -531,6 +531,15 @@ config HOTPLUG_CPU
 
  Say N if you are unsure.
 
+config INTERRUPT_SANITIZE_REGISTERS
+   bool "Clear gprs on interrupt arrival"
+   depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+   default PPC_BOOK3E_64
+   help
+ Reduce the influence of user register state on interrupt handlers and
+ syscalls through clearing user state from registers before handling
+ the exception.
+
 config PPC_QUEUED_SPINLOCKS
bool "Queued spinlocks" if EXPERT
depends on SMP
-- 
2.37.2



[PATCH v5 2/7] powerpc/64: Add interrupt register sanitisation macros

2022-11-30 Thread Rohan McLure
Include in asm/ppc_asm.h macros to be used in multiple successive
patches to implement zeroising architected registers in interrupt
handlers. Registers will be sanitised in this fashion in future patches
to reduce the speculation influence of user-controlled register values.
These mitigations will be configurable through the
CONFIG_INTERRUPT_SANITIZE_REGISTERS Kconfig option.

Included are macros for conditionally zeroising registers and restoring
as required with the mitigation enabled. With the mitigation disabled,
non-volatiles must be restored on demand at separate locations to
those required by the mitigation.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
v4: New patch
v5: Remove unnecessary _ZEROIZE_ parts of macro titles, as the
implementation of how registers are sanitised doesn't need to be
immediately accessible, only that values will be clobbered. Introduce
arbitrary sanitize gpr(s) macros.
---
 arch/powerpc/include/asm/ppc_asm.h | 19 +++
 1 file changed, 19 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 753a2757bcd4..d2f44612f4b0 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -74,6 +74,25 @@
 #define SAVE_GPR(n, base)  SAVE_GPRS(n, n, base)
 #define REST_GPR(n, base)  REST_GPRS(n, n, base)
 
+/* macros for handling user register sanitisation */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_SYSCALL_GPRS()ZEROIZE_GPR(0); 
\
+   ZEROIZE_GPRS(5, 12);\
+   ZEROIZE_NVGPRS()
+#define SANITIZE_GPR(n)ZEROIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)  ZEROIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()  REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_SYSCALL_GPRS()
+#define SANITIZE_GPR(n)
+#define SANITIZE_GPRS(start, end)
+#define SANITIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS()   REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
 #define SAVE_FPR(n, base)  stfdn,8*TS_FPRWIDTH*(n)(base)
 #define SAVE_2FPRS(n, base)SAVE_FPR(n, base); SAVE_FPR(n+1, base)
 #define SAVE_4FPRS(n, base)SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
-- 
2.37.2



[PATCH v5 3/7] powerpc/64: Sanitise common exit code for interrupts

2022-11-30 Thread Rohan McLure
Interrupt code is shared between Book3E/S 64-bit systems for interrupt
handlers. Ensure that exit code correctly restores non-volatile gprs on
each system when CONFIG_INTERRUPT_SANITIZE_REGISTERS is enabled.

Also introduce macros for clearing/restoring registers on interrupt
entry for when this configuration option is either disabled or enabled.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
v4: New patch
---
 arch/powerpc/kernel/interrupt_64.S | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index 978a173eb339..1ef4fdef74fb 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -408,9 +408,11 @@ interrupt_return_\srr\()_user: /* make backtraces match 
the _kernel variant */
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
addir3,r1,STACK_FRAME_OVERHEAD
bl  interrupt_exit_user_prepare
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
cmpdi   r3,0
bne-.Lrestore_nvgprs_\srr
 .Lrestore_nvgprs_\srr\()_cont:
+#endif
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
 #ifdef CONFIG_PPC_BOOK3S
 .Linterrupt_return_\srr\()_user_rst_start:
@@ -424,6 +426,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
 
 .Lfast_user_interrupt_return_\srr\():
+   SANITIZE_RESTORE_NVGPRS()
 #ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
lbz r4,PACASRR_VALID(r13)
@@ -493,9 +496,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
b   .   /* prevent speculative execution */
 .Linterrupt_return_\srr\()_user_rst_end:
 
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
 .Lrestore_nvgprs_\srr\():
REST_NVGPRS(r1)
b   .Lrestore_nvgprs_\srr\()_cont
+#endif
 
 #ifdef CONFIG_PPC_BOOK3S
 interrupt_return_\srr\()_user_restart:
@@ -576,6 +581,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
 
 .Lfast_kernel_interrupt_return_\srr\():
+   SANITIZE_RESTORE_NVGPRS()
cmpdi   cr1,r3,0
 #ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
-- 
2.37.2



[PATCH v4 7/7] powerpc/64: Sanitise user registers on interrupt in pseries, POWERNV

2022-11-28 Thread Rohan McLure
Cause pseries and POWERNV platforms to default to zeroising all potentially
user-defined registers when entering the kernel by means of any interrupt
source, reducing user-influence of the kernel and the likelihood or
producing speculation gadgets.

Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
v4: Default on POWERNV as well.
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 280c797e0f30..2ab114a02f62 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -534,7 +534,7 @@ config HOTPLUG_CPU
 config INTERRUPT_SANITIZE_REGISTERS
bool "Clear gprs on interrupt arrival"
depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
-   default PPC_BOOK3E_64
+   default PPC_BOOK3E_64 || PPC_PSERIES || PPC_POWERNV
help
  Reduce the influence of user register state on interrupt handlers and
  syscalls through clearing user state from registers before handling
-- 
2.37.2



[PATCH v4 6/7] powerpc/64e: Clear gprs on interrupt routine entry on Book3E

2022-11-28 Thread Rohan McLure
Zero GPRS r14-r31 on entry into the kernel for interrupt sources to
limit influence of user-space values in potential speculation gadgets.
Prior to this commit, all other GPRS are reassigned during the common
prologue to interrupt handlers and so need not be zeroised explicitly.

This may be done safely, without loss of register state prior to the
interrupt, as the common prologue saves the initial values of
non-volatiles, which are unconditionally restored in interrupt_64.S.
Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
---
 arch/powerpc/kernel/exceptions-64e.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 930e36099015..6b842d5ea4e1 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -358,7 +358,6 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13);  \
std r15,PACA_EXMC+EX_R15(r13)
 
-
 /* Core exception code for all exceptions except TLB misses. */
 #define EXCEPTION_COMMON_LVL(n, scratch, excf) \
 exc_##n##_common:  \
@@ -394,7 +393,8 @@ exc_##n##_common:   
\
std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */   \
std r3,_TRAP(r1);   /* set trap number  */  \
std r0,RESULT(r1);  /* clear regs->result */\
-   SAVE_NVGPRS(r1);
+   SAVE_NVGPRS(r1);\
+   SANITIZE_ZEROIZE_NVGPRS();  /* minimise speculation influence */
 
 #define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
-- 
2.37.2



[PATCH v4 5/7] powerpc/64s: Zeroise gprs on interrupt routine entry on Book3S

2022-11-28 Thread Rohan McLure
Zeroise user state in gprs (assign to zero) to reduce the influence of user
registers on speculation within kernel syscall handlers. Clears occur
at the very beginning of the sc and scv 0 interrupt handlers, with
restores occurring following the execution of the syscall handler.

Zeroise GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values. If an interrupt does not
select the IMSR_R12 IOption, zeroise r12.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored on 64bit Book3S. Remove explicit
REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear
user registers on interrupt, and continue to depend on the return value
of interrupt_exit_user_prepare to determine whether or not to restore
non-volatiles.

The mmap_bench benchmark in selftests should rapidly invoke pagefaults.
See ~0.8% performance regression with this mitigation, but this
indicates the worst-case performance due to heavier-weight interrupt
handlers. This mitigation is able to be enabled/disabled through
CONFIG_INTERRUPT_SANITIZE_REGISTERS.

Reviewed-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
v2: REST_NVGPRS should be conditional on mitigation in scv handler. Fix
improper multi-line preprocessor macro in interrupt_64.S
v4: Split off IMSR_R12 definition into its own patch. Move macro
definitions for register sanitisation into asm/ppc_asm.h
---
 arch/powerpc/kernel/exceptions-64s.S | 27 ++-
 arch/powerpc/kernel/interrupt_64.S   | 16 ++--
 2 files changed, 32 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 58d72db1d484..8ee3db3b6595 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -506,6 +506,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
std r10,0(r1)   /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe*/
std r10,GPR1(r1)/* save r1 in stackframe*/
+   ZEROIZE_GPR(0)
 
/* Mark our [H]SRRs valid for return */
li  r10,1
@@ -548,8 +549,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+   .if !IMSR_R12
+   ZEROIZE_GPRS(9, 12)
+   .else
+   ZEROIZE_GPRS(9, 11)
+   .endif
 
SAVE_NVGPRS(r1)
+   SANITIZE_ZEROIZE_NVGPRS()
 
.if IDAR
.if IISIDE
@@ -581,8 +588,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld  r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
-   std r2,GPR2(r1) /* save r2 in stackframe*/
-   SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe   */
+   SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe   */
+   ZEROIZE_GPRS(2, 8)
mflrr9  /* Get LR, later save to stack  */
LOAD_PACA_TOC() /* get kernel TOC into r2   */
std r9,_LINK(r1)
@@ -700,6 +707,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlrr9
ld  r9,_CCR(r1)
mtcrr9
+   SANITIZE_RESTORE_NVGPRS()
REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
@@ -1445,7 +1453,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 * do_break() may have changed the NV GPRS while handling a breakpoint.
 * If so, we need to restore them with their updated values.
 */
-   REST_NVGPRS(r1)
+   HANDLER_RESTORE_NVGPRS()
b   interrupt_return_srr
 
 
@@ -1671,7 +1679,7 @@ EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
addir3,r1,STACK_FRAME_OVERHEAD
bl  alignment_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+   HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b   interrupt_return_srr
 
 
@@ -1737,7 +1745,7 @@ EXC_COMMON_BEGIN(program_check_common)
 .Ldo_program_check:
addir3,r1,STACK_FRAME_OVERHEAD
bl  program_check_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+   HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b   interrupt_return_srr
 
 
@@ -2169,7 +2177,7 @@ EXC_COMMON_BEGIN(emulation_assist_common)
GEN_COMMON emulation_assist
addir3,r1,STACK_FRAME_OVERHEAD
bl  emulation_assist_interrupt
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs */
+   HANDLER_RESTORE_NVGPRS() /* instruction emulation may change GPRs */
b   interrupt_return_hsrr
 
 
@@ -2489,7 +2497,7 @@ EXC_COMMON_BEGIN

[PATCH v4 4/7] powerpc/64s: IOption for MSR stored in r12

2022-11-28 Thread Rohan McLure
Interrupt handlers in asm/exceptions-64s.S contain a great deal of common
code produced by the GEN_COMMON macros. Currently, at the exit point of
the macro, r12 will contain the contents of the MSR. A future patch will
cause these macros to zeroise architected registers to avoid potential
speculation influence of user data.

Provide an IOption that signals that r12 must be retained, as the
interrupt handler assumes it to hold the contents of the MSR.

Signed-off-by: Rohan McLure 
---
v4: Split 64s register sanitisation commit to establish this IOption
---
 arch/powerpc/kernel/exceptions-64s.S | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 5381a43e50fe..58d72db1d484 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -111,6 +111,7 @@ name:
 #define ISTACK .L_ISTACK_\name\()  /* Set regular kernel stack */
 #define __ISTACK(name) .L_ISTACK_ ## name
 #define IKUAP  .L_IKUAP_\name\()   /* Do KUAP lock */
+#define IMSR_R12   .L_IMSR_R12_\name\()/* Assumes MSR saved to r12 */
 
 #define INT_DEFINE_BEGIN(n)\
 .macro int_define_ ## n name
@@ -176,6 +177,9 @@ do_define_int n
.ifndef IKUAP
IKUAP=1
.endif
+   .ifndef IMSR_R12
+   IMSR_R12=0
+   .endif
 .endm
 
 /*
@@ -1751,6 +1755,7 @@ INT_DEFINE_BEGIN(fp_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
 #endif
+   IMSR_R12=1
 INT_DEFINE_END(fp_unavailable)
 
 EXC_REAL_BEGIN(fp_unavailable, 0x800, 0x100)
@@ -2372,6 +2377,7 @@ INT_DEFINE_BEGIN(altivec_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
 #endif
+   IMSR_R12=1
 INT_DEFINE_END(altivec_unavailable)
 
 EXC_REAL_BEGIN(altivec_unavailable, 0xf20, 0x20)
@@ -2421,6 +2427,7 @@ INT_DEFINE_BEGIN(vsx_unavailable)
 #ifdef CONFIG_KVM_BOOK3S_PR_POSSIBLE
IKVM_REAL=1
 #endif
+   IMSR_R12=1
 INT_DEFINE_END(vsx_unavailable)
 
 EXC_REAL_BEGIN(vsx_unavailable, 0xf40, 0x20)
-- 
2.37.2



[PATCH v4 3/7] powerpc/64: Sanitise common exit code for interrupts

2022-11-28 Thread Rohan McLure
Interrupt code is shared between Book3E/S 64-bit systems for interrupt
handlers. Ensure that exit code correctly restores non-volatile gprs on
each system when CONFIG_INTERRUPT_SANITIZE_REGISTERS is enabled.

Also introduce macros for clearing/restoring registers on interrupt
entry for when this configuration option is either disabled or enabled.

Signed-off-by: Rohan McLure 
---
v4: New patch
---
 arch/powerpc/kernel/interrupt_64.S | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index 978a173eb339..1ef4fdef74fb 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -408,9 +408,11 @@ interrupt_return_\srr\()_user: /* make backtraces match 
the _kernel variant */
 _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
addir3,r1,STACK_FRAME_OVERHEAD
bl  interrupt_exit_user_prepare
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
cmpdi   r3,0
bne-.Lrestore_nvgprs_\srr
 .Lrestore_nvgprs_\srr\()_cont:
+#endif
std r1,PACA_EXIT_SAVE_R1(r13) /* save r1 for restart */
 #ifdef CONFIG_PPC_BOOK3S
 .Linterrupt_return_\srr\()_user_rst_start:
@@ -424,6 +426,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_user)
stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
 
 .Lfast_user_interrupt_return_\srr\():
+   SANITIZE_RESTORE_NVGPRS()
 #ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
lbz r4,PACASRR_VALID(r13)
@@ -493,9 +496,11 @@ ALT_FTR_SECTION_END_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
b   .   /* prevent speculative execution */
 .Linterrupt_return_\srr\()_user_rst_end:
 
+#ifndef CONFIG_INTERRUPT_SANITIZE_REGISTERS
 .Lrestore_nvgprs_\srr\():
REST_NVGPRS(r1)
b   .Lrestore_nvgprs_\srr\()_cont
+#endif
 
 #ifdef CONFIG_PPC_BOOK3S
 interrupt_return_\srr\()_user_restart:
@@ -576,6 +581,7 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
 
 .Lfast_kernel_interrupt_return_\srr\():
+   SANITIZE_RESTORE_NVGPRS()
cmpdi   cr1,r3,0
 #ifdef CONFIG_PPC_BOOK3S
.ifc \srr,srr
-- 
2.37.2



[PATCH v4 1/7] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS Kconfig

2022-11-28 Thread Rohan McLure
Add Kconfig option for enabling clearing of registers on arrival in an
interrupt handler. This reduces the speculation influence of registers
on kernel internals. The option will be consumed by 64-bit systems that
feature speculation and wish to implement this mitigation.

This patch only introduces the Kconfig option, no actual mitigations.

The primary overhead of this mitigation lies in an increased number of
registers that must be saved and restored by interrupt handlers on
Book3S systems. Enable by default on Book3E systems, which prior to
this patch eagerly save and restore register state, meaning that the
mitigation when implemented will have minimal overhead.

Acked-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
---
 arch/powerpc/Kconfig | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4fd4924f6d50..280c797e0f30 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -531,6 +531,15 @@ config HOTPLUG_CPU
 
  Say N if you are unsure.
 
+config INTERRUPT_SANITIZE_REGISTERS
+   bool "Clear gprs on interrupt arrival"
+   depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+   default PPC_BOOK3E_64
+   help
+ Reduce the influence of user register state on interrupt handlers and
+ syscalls through clearing user state from registers before handling
+ the exception.
+
 config PPC_QUEUED_SPINLOCKS
bool "Queued spinlocks" if EXPERT
depends on SMP
-- 
2.37.2



[PATCH v4 2/7] powerpc/64: Add interrupt register sanitisation macros

2022-11-28 Thread Rohan McLure
Include in asm/ppc_asm.h macros to be used in multiple successive
patches to implement zeroising architected registers in interrupt
handlers. Registers will be sanitised in this fashion in future patches
to reduce the speculation influence of user-controlled register values.
These mitigations will be configurable through the
CONFIG_INTERRUPT_SANITIZE_REGISTERS Kconfig option.

Included are macros for conditionally zeroising registers and restoring
as required with the mitigation enabled. With the mitigation disabled,
non-volatiles must be restored on demand at separate locations to
those required by the mitigation.

Signed-off-by: Rohan McLure 
---
v4: New patch
---
 arch/powerpc/include/asm/ppc_asm.h | 17 +
 1 file changed, 17 insertions(+)

diff --git a/arch/powerpc/include/asm/ppc_asm.h 
b/arch/powerpc/include/asm/ppc_asm.h
index 753a2757bcd4..272b2795c36a 100644
--- a/arch/powerpc/include/asm/ppc_asm.h
+++ b/arch/powerpc/include/asm/ppc_asm.h
@@ -74,6 +74,23 @@
 #define SAVE_GPR(n, base)  SAVE_GPRS(n, n, base)
 #define REST_GPR(n, base)  REST_GPRS(n, n, base)
 
+/* macros for handling user register sanitisation */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_ZEROIZE_SYSCALL_GPRS()ZEROIZE_GPR(0); 
\
+   ZEROIZE_GPRS(5, 12);\
+   ZEROIZE_NVGPRS()
+#define SANITIZE_ZEROIZE_INTERRUPT_NVGPRS()ZEROIZE_NVGPRS()
+#define SANITIZE_ZEROIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()  REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_ZEROIZE_INTERRUPT_NVGPRS()
+#define SANITIZE_ZEROIZE_SYSCALL_GPRS()
+#define SANITIZE_ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS()   REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
 #define SAVE_FPR(n, base)  stfdn,8*TS_FPRWIDTH*(n)(base)
 #define SAVE_2FPRS(n, base)SAVE_FPR(n, base); SAVE_FPR(n+1, base)
 #define SAVE_4FPRS(n, base)SAVE_2FPRS(n, base); SAVE_2FPRS(n+2, base)
-- 
2.37.2



[PATCH v5 3/5] powerpc: mm: Add common pud_pfn stub for all platforms

2022-11-17 Thread Rohan McLure
Prior to this commit, pud_pfn was implemented with BUILD_BUG as the inline
function for 64-bit Book3S systems but is never included, as its
invocations in generic code are guarded by calls to pud_devmap which return
zero on such systems. A future patch will provide support for page table
checks, the generic code for which depends on a pud_pfn stub being
implemented, even while the patch will not interact with puds directly.

Remove the 64-bit Book3S stub and define pud_pfn to warn on all
platforms. pud_pfn may be defined properly on a per-platform basis
should it grow real usages in future.

Signed-off-by: Rohan McLure 
---
V2: Remove conditional BUILD_BUG and BUG. Instead warn on usage.
V3: Replace WARN with WARN_ONCE, which should suffice to demonstrate
misuse of puds.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
 arch/powerpc/include/asm/pgtable.h   | 14 ++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 9a6db4ad3228..5fb910ab250d 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1394,16 +1394,6 @@ static inline int pgd_devmap(pgd_t pgd)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int pud_pfn(pud_t pud)
-{
-   /*
-* Currently all calls to pud_pfn() are gated around a pud_devmap()
-* check so this should never be used. If it grows another user we
-* want to know about it.
-*/
-   BUILD_BUG();
-   return 0;
-}
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
 void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 68a3f271aebe..5db9fa157685 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -159,6 +159,20 @@ struct seq_file;
 void arch_report_meminfo(struct seq_file *m);
 #endif /* CONFIG_PPC64 */
 
+/*
+ * Currently only consumed by page_table_check_pud_{set,clear}. Since clears
+ * and sets to page table entries at any level are done through
+ * page_table_check_pte_{set,clear}, provide stub implementation.
+ */
+#ifndef pud_pfn
+#define pud_pfn pud_pfn
+static inline int pud_pfn(pud_t pud)
+{
+   WARN_ONCE(1, "pud: platform does not use pud entries directly");
+   return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.37.2



[PATCH v5 5/5] powerpc: mm: support page table check

2022-11-17 Thread Rohan McLure
On creation and clearing of a page table mapping, instrument such calls
by invoking page_table_check_pte_set and page_table_check_pte_clear
respectively. These calls serve as a sanity check against illegal
mappings.

Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK for all ppc64, and 32-bit
platforms implementing Book3S.

Change pud_pfn to be a runtime bug rather than a build bug as it is
consumed by page_table_check_pud_{clear,set} which are not called.

See also:

riscv support in commit 3fee229a8eb9 ("riscv/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
arm64 in commit 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check")

Reviewed-by: Russell Currey 
Reviewed-by: Christophe Leroy 
Signed-off-by: Rohan McLure 
---
V2: Update spacing and types assigned to pte_update calls.
V3: Update one last pte_update call to remove __pte invocation.
---
 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h |  9 -
 arch/powerpc/include/asm/book3s/64/pgtable.h | 18 +++---
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 ++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  8 ++--
 arch/powerpc/include/asm/nohash/pgtable.h|  1 +
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 699df27b0e2f..1d943a13a204 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -150,6 +150,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx || 40x
+   select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index b2fdd3cc81de..44703c8c590c 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -53,6 +53,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 static inline bool pte_user(pte_t pte)
 {
return pte_val(pte) & _PAGE_USER;
@@ -337,7 +339,11 @@ static inline int __ptep_test_and_clear_young(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr,
   pte_t *ptep)
 {
-   return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -529,6 +535,7 @@ static inline bool pmd_user(pmd_t pmd)
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
 {
+   page_table_check_pte_set(mm, addr, ptep, pte);
 #if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use 
the
 * helper pte_update() which does an atomic update. We need to do that
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index e04e3cd378a6..436632d04304 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -162,6 +162,8 @@
 #define PAGE_KERNEL_ROX__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 
 #ifndef __ASSEMBLY__
+#include 
+
 /*
  * page table defines
  */
@@ -465,8 +467,11 @@ static inline void huge_ptep_set_wrprotect(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
-   unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-   return __pte(old);
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
@@ -475,11 +480,16 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
pte_t *ptep, int full)
 {
if (full && radix_enabled()) {
+   pte_t old_pte;
+
/*
 * We know that this is a full mm pte clear and
 * hence can be sure there is no parallel set_pte.
 */
-   return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
}
return ptep_get_and_clear(mm, addr, ptep);
 }
@@ -865,6 +875,8 @@ static inline void __set_pte_at(struct mm_struct

[PATCH v5 4/5] powerpc: mm: add p{te,md,ud}_user_accessible_page helpers

2022-11-17 Thread Rohan McLure
Add the following helpers for detecting whether a page table entry
is a leaf and is accessible to user space.

 * pte_user_accessible_page
 * pmd_user_accessible_page
 * pud_user_accessible_page

Also implement missing pud_user definitions for both Book3S/nohash 64-bit
systems, and pmd_user for Book3S/nohash 32-bit systems.

Signed-off-by: Rohan McLure 
---
V2: Provide missing pud_user implementations, use p{u,m}d_is_leaf.
V3: Provide missing pmd_user implementations as stubs in 32-bit.
V4: Use pmd_leaf, pud_leaf, and define pmd_user for 32 Book3E with
static inline method rather than macro.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  5 +
 arch/powerpc/include/asm/nohash/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/pgtable.h   | 15 +++
 5 files changed, 44 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 921ae95cd939..b2fdd3cc81de 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -515,6 +515,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return 0;
+}
 
 
 /* This low level function performs the actual PTE insertion
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 5fb910ab250d..e04e3cd378a6 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -602,6 +602,16 @@ static inline bool pte_user(pte_t pte)
return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return !(pmd_raw(pmd) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline bool pud_user(pud_t pud)
+{
+   return !(pud_raw(pud) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
 #define pte_access_permitted pte_access_permitted
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0d40b33184eb..94b2a53f73d5 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -209,6 +209,11 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return false;
+}
+
 /*
  * PTE updates. This function is called whenever an existing
  * valid PTE is updated. This does -not- include set_pte_at()
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 34e618bb9140..69304159aafc 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -123,6 +123,11 @@ static inline pte_t pmd_pte(pmd_t pmd)
return __pte(pmd_val(pmd));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return (pmd_val(pmd) & _PAGE_USER) == _PAGE_USER;
+}
+
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(!is_kernel_addr(pmd_val(pmd)) \
 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -163,6 +168,11 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
 }
 
+static inline bool pud_user(pud_t pud)
+{
+   return (pud_val(pud) & _PAGE_USER) == _PAGE_USER;
+}
+
 static inline pud_t pte_pud(pte_t pte)
 {
return __pud(pte_val(pte));
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 5db9fa157685..5227bbaa7acf 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -173,6 +173,21 @@ static inline int pud_pfn(pud_t pud)
 }
 #endif
 
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+   return pte_present(pte) && pte_user(pte);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+   return pmd_leaf(pmd) && pmd_present(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+   return pud_leaf(pud) && pud_present(pud) && pud_user(pud);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.37.2



[PATCH v5 2/5] powerpc: mm: Implement p{m,u,4}d_leaf on all platforms

2022-11-17 Thread Rohan McLure
The check that a higher-level entry in multi-level pages contains a page
translation entry (pte) is performed by p{m,u,4}d_leaf stubs, which may
be specialised for each choice of mmu. In a prior commit, we replace
uses to the catch-all stubs, p{m,u,4}d_is_leaf with p{m,u,4}d_leaf.

Replace the catch-all stub definitions for p{m,u,4}d_is_leaf with
definitions for p{m,u,4}d_leaf. A future patch will assume that
p{m,u,4}d_leaf is defined on all platforms.

In particular, implement pud_leaf for Book3E-64, pmd_leaf for all Book3E
and Book3S-64 platforms, with a catch-all definition for p4d_leaf.

Signed-off-by: Rohan McLure 
---
v5: Split patch that replaces p{m,u,4}d_is_leaf into two patches, first
replacing callsites and afterward providing generic definition.
Remove ifndef-defines implementing p{m,u}d_leaf in favour of
implementing stubs in headers belonging to the particular platforms
needing them.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
 arch/powerpc/include/asm/book3s/64/pgtable.h |  8 ++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  5 +
 arch/powerpc/include/asm/nohash/pgtable.h|  5 +
 arch/powerpc/include/asm/pgtable.h   | 22 ++--
 5 files changed, 18 insertions(+), 26 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 75823f39e042..921ae95cd939 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -242,6 +242,10 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
+static inline bool pmd_leaf(pmd_t pmd)
+{
+   return false;
+}
 
 /*
  * When flushing the tlb entry for a page, we also need to flush the hash
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c436d8422654..9a6db4ad3228 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1426,16 +1426,12 @@ static inline bool is_pte_rw_upgrade(unsigned long 
old_val, unsigned long new_va
 /*
  * Like pmd_huge() and pmd_large(), but works regardless of config options
  */
-#define pmd_is_leaf pmd_is_leaf
-#define pmd_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+static inline bool pmd_leaf(pmd_t pmd)
 {
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
 }
 
-#define pud_is_leaf pud_is_leaf
-#define pud_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
+static inline bool pud_leaf(pud_t pud)
 {
return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
 }
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 879e9a6e5a87..34e618bb9140 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -141,6 +141,11 @@ static inline void pud_clear(pud_t *pudp)
*pudp = __pud(0);
 }
 
+static inline bool pud_leaf(pud_t pud)
+{
+   return false;
+}
+
 #define pud_none(pud)  (!pud_val(pud))
 #definepud_bad(pud)(!is_kernel_addr(pud_val(pud)) \
 || (pud_val(pud) & PUD_BAD_BITS))
diff --git a/arch/powerpc/include/asm/nohash/pgtable.h 
b/arch/powerpc/include/asm/nohash/pgtable.h
index d9067dfc531c..455ae13822ee 100644
--- a/arch/powerpc/include/asm/nohash/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/pgtable.h
@@ -60,6 +60,11 @@ static inline bool pte_hw_valid(pte_t pte)
return pte_val(pte) & _PAGE_PRESENT;
 }
 
+static inline bool pmd_leaf(pmd_t pmd)
+{
+   return false;
+}
+
 /*
  * Don't just check for any non zero bits in __PAGE_USER, since for book3e
  * and PTE_64BIT, PAGE_KERNEL_X contains _PAGE_BAP_SR which is also in
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 283f40d05a4d..68a3f271aebe 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -134,29 +134,11 @@ static inline void pte_frag_set(mm_context_t *ctx, void 
*p)
 }
 #endif
 
-#ifndef pmd_is_leaf
-#define pmd_is_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#define p4d_leaf p4d_leaf
+static inline bool p4d_leaf(p4d_t p4d)
 {
return false;
 }
-#endif
-
-#ifndef pud_is_leaf
-#define pud_is_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
-{
-   return false;
-}
-#endif
-
-#ifndef p4d_is_leaf
-#define p4d_is_leaf p4d_is_leaf
-static inline bool p4d_is_leaf(p4d_t p4d)
-{
-   return false;
-}
-#endif
 
 #define pmd_pgtable pmd_pgtable
 static inline pgtable_t pmd_pgtable(pmd_t pmd)
-- 
2.37.2



[PATCH v5 1/5] powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf

2022-11-17 Thread Rohan McLure
Replace occurrences of p{u,m,4}d_is_leaf with p{u,m,4}_leaf, as the
latter is the name given to checking that a higher-level entry in
multi-level paging contains a page translation entry (pte) throughout
all other archs.

A future patch will implement p{u,m,4}_leaf stubs on all platforms so
that they may be referenced in generic code.

Signed-off-by: Rohan McLure 
---
V4: New patch
V5: Previously replaced stub definition for *_is_leaf with *_leaf. Do
that in a later patch
---
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 14 +++---
 arch/powerpc/mm/pgtable.c|  6 +++---
 arch/powerpc/mm/pgtable_64.c |  6 +++---
 arch/powerpc/xmon/xmon.c |  6 +++---
 5 files changed, 22 insertions(+), 22 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 5d5e12f3bf86..d29f8d1d97a6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -497,7 +497,7 @@ static void kvmppc_unmap_free_pmd(struct kvm *kvm, pmd_t 
*pmd, bool full,
for (im = 0; im < PTRS_PER_PMD; ++im, ++p) {
if (!pmd_present(*p))
continue;
-   if (pmd_is_leaf(*p)) {
+   if (pmd_leaf(*p)) {
if (full) {
pmd_clear(p);
} else {
@@ -526,7 +526,7 @@ static void kvmppc_unmap_free_pud(struct kvm *kvm, pud_t 
*pud,
for (iu = 0; iu < PTRS_PER_PUD; ++iu, ++p) {
if (!pud_present(*p))
continue;
-   if (pud_is_leaf(*p)) {
+   if (pud_leaf(*p)) {
pud_clear(p);
} else {
pmd_t *pmd;
@@ -629,12 +629,12 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pud = pud_alloc_one(kvm->mm, gpa);
 
pmd = NULL;
-   if (pud && pud_present(*pud) && !pud_is_leaf(*pud))
+   if (pud && pud_present(*pud) && !pud_leaf(*pud))
pmd = pmd_offset(pud, gpa);
else if (level <= 1)
new_pmd = kvmppc_pmd_alloc();
 
-   if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_is_leaf(*pmd)))
+   if (level == 0 && !(pmd && pmd_present(*pmd) && !pmd_leaf(*pmd)))
new_ptep = kvmppc_pte_alloc();
 
/* Check if we might have been invalidated; let the guest retry if so */
@@ -652,7 +652,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pud = NULL;
}
pud = pud_offset(p4d, gpa);
-   if (pud_is_leaf(*pud)) {
+   if (pud_leaf(*pud)) {
unsigned long hgpa = gpa & PUD_MASK;
 
/* Check if we raced and someone else has set the same thing */
@@ -703,7 +703,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, 
pte_t pte,
new_pmd = NULL;
}
pmd = pmd_offset(pud, gpa);
-   if (pmd_is_leaf(*pmd)) {
+   if (pmd_leaf(*pmd)) {
unsigned long lgpa = gpa & PMD_MASK;
 
/* Check if we raced and someone else has set the same thing */
diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index cac727b01799..8ac27e031ff4 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -205,14 +205,14 @@ static void radix__change_memory_range(unsigned long 
start, unsigned long end,
pudp = pud_alloc(_mm, p4dp, idx);
if (!pudp)
continue;
-   if (pud_is_leaf(*pudp)) {
+   if (pud_leaf(*pudp)) {
ptep = (pte_t *)pudp;
goto update_the_pte;
}
pmdp = pmd_alloc(_mm, pudp, idx);
if (!pmdp)
continue;
-   if (pmd_is_leaf(*pmdp)) {
+   if (pmd_leaf(*pmdp)) {
ptep = pmdp_ptep(pmdp);
goto update_the_pte;
}
@@ -762,7 +762,7 @@ static void __meminit remove_pmd_table(pmd_t *pmd_start, 
unsigned long addr,
if (!pmd_present(*pmd))
continue;
 
-   if (pmd_is_leaf(*pmd)) {
+   if (pmd_leaf(*pmd)) {
if (!IS_ALIGNED(addr, PMD_SIZE) ||
!IS_ALIGNED(next, PMD_SIZE)) {
WARN_ONCE(1, "%s: unaligned range\n", __func__);
@@ -792,7 +792,7 @@ static void __meminit remove_pud_table(pud_t *pud_start, 
unsigned long addr,
if (!pud_present(*pud))
continue;
 
-   if (pud_is_leaf(*pud)) {
+   if (pud_leaf(*p

[PATCH v2 3/4] powerpc/64e: Clear gprs on interrupt routine entry on Book3E

2022-11-06 Thread Rohan McLure
Zero GPRS r14-r31 on entry into the kernel for interrupt sources to
limit influence of user-space values in potential speculation gadgets.
Prior to this commit, all other GPRS are reassigned during the common
prologue to interrupt handlers and so need not be zeroised explicitly.

This may be done safely, without loss of register state prior to the
interrupt, as the common prologue saves the initial values of
non-volatiles, which are unconditionally restored in interrupt_64.S.
Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS.

Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
---
 arch/powerpc/kernel/exceptions-64e.S | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 2f68fb2ee4fc..91d8019123c2 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -358,6 +358,11 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13);  \
std r15,PACA_EXMC+EX_R15(r13)
 
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_ZEROIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#else
+#define SANITIZE_ZEROIZE_NVGPRS()
+#endif
 
 /* Core exception code for all exceptions except TLB misses. */
 #define EXCEPTION_COMMON_LVL(n, scratch, excf) \
@@ -394,7 +399,8 @@ exc_##n##_common:   
\
std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */   \
std r3,_TRAP(r1);   /* set trap number  */  \
std r0,RESULT(r1);  /* clear regs->result */\
-   SAVE_NVGPRS(r1);
+   SAVE_NVGPRS(r1);\
+   SANITIZE_ZEROIZE_NVGPRS();  /* minimise speculation influence */
 
 #define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
-- 
2.34.1



[PATCH v2 4/4] powerpc/64s: Sanitise user registers on interrupt in pseries

2022-11-06 Thread Rohan McLure
Cause pseries platforms to default to zeroising all potentially user-defined
registers when entering the kernel by means of any interrupt source,
reducing user-influence of the kernel and the likelihood or producing
speculation gadgets.

Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9d3d20c6f365..2eb328b25e49 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -532,7 +532,7 @@ config HOTPLUG_CPU
 config INTERRUPT_SANITIZE_REGISTERS
bool "Clear gprs on interrupt arrival"
depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
-   default PPC_BOOK3E_64
+   default PPC_BOOK3E_64 || PPC_PSERIES
help
  Reduce the influence of user register state on interrupt handlers and
  syscalls through clearing user state from registers before handling
-- 
2.34.1



[PATCH v2 1/4] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS Kconfig

2022-11-06 Thread Rohan McLure
Add Kconfig option for enabling clearing of registers on arrival in an
interrupt handler. This reduces the speculation influence of registers
on kernel internals. The option will be consumed by 64-bit systems that
feature speculation and wish to implement this mitigation.

This patch only introduces the Kconfig option, no actual mitigations.

The primary overhead of this mitigation lies in an increased number of
registers that must be saved and restored by interrupt handlers on
Book3S systems. Enable by default on Book3E systems, which prior to
this patch eagerly save and restore register state, meaning that the
mitigation when implemented will have minimal overhead.

Acked-by: Nicholas Piggin 
Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
---
 arch/powerpc/Kconfig | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2ca5418457ed..9d3d20c6f365 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -529,6 +529,15 @@ config HOTPLUG_CPU
 
  Say N if you are unsure.
 
+config INTERRUPT_SANITIZE_REGISTERS
+   bool "Clear gprs on interrupt arrival"
+   depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+   default PPC_BOOK3E_64
+   help
+ Reduce the influence of user register state on interrupt handlers and
+ syscalls through clearing user state from registers before handling
+ the exception.
+
 config PPC_QUEUED_SPINLOCKS
bool "Queued spinlocks" if EXPERT
depends on SMP
-- 
2.34.1



[PATCH v2 2/4] powerpc/64s: Clear gprs on interrupt routine entry on Book3S

2022-11-06 Thread Rohan McLure
Zero user state in gprs (assign to zero) to reduce the influence of user
registers on speculation within kernel syscall handlers. Clears occur
at the very beginning of the sc and scv 0 interrupt handlers, with
restores occurring following the execution of the syscall handler.

Zero GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored on 64bit Book3S. Remove explicit
REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear
user registers on interrupt, and continue to depend on the return value
of interrupt_exit_user_prepare to determine whether or not to restore
non-volatiles.

The mmap_bench benchmark in selftests should rapidly invoke pagefaults.
See ~0.8% performance regression with this mitigation, but this
indicates the worst-case performance due to heavier-weight interrupt
handlers. This mitigation is able to be enabled/disabled through
CONFIG_INTERRUPT_SANITIZE_REGISTERS.

Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/

v2: REST_NVGPRS should be conditional on mitigation in scv handler. Fix
improper multi-line preprocessor macro in interrupt_64.S
---
 arch/powerpc/kernel/exceptions-64s.S | 47 +-
 arch/powerpc/kernel/interrupt_64.S   | 36 
 2 files changed, 74 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 651c36b056bd..0605018762d1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,6 +21,19 @@
 #include 
 #include 
 
+/*
+ * macros for handling user register sanitisation
+ */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_ZEROIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()  REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS()   REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
 /*
  * Following are fixed section helper macros.
  *
@@ -111,6 +124,7 @@ name:
 #define ISTACK .L_ISTACK_\name\()  /* Set regular kernel stack */
 #define __ISTACK(name) .L_ISTACK_ ## name
 #define IKUAP  .L_IKUAP_\name\()   /* Do KUAP lock */
+#define IMSR_R12   .L_IMSR_R12_\name\()/* Assumes MSR saved to r12 */
 
 #define INT_DEFINE_BEGIN(n)\
 .macro int_define_ ## n name
@@ -176,6 +190,9 @@ do_define_int n
.ifndef IKUAP
IKUAP=1
.endif
+   .ifndef IMSR_R12
+   IMSR_R12=0
+   .endif
 .endm
 
 /*
@@ -502,6 +519,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
std r10,0(r1)   /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe*/
std r10,GPR1(r1)/* save r1 in stackframe*/
+   ZEROIZE_GPR(0)
 
/* Mark our [H]SRRs valid for return */
li  r10,1
@@ -544,8 +562,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+   .if !IMSR_R12
+   ZEROIZE_GPRS(9, 12)
+   .else
+   ZEROIZE_GPRS(9, 11)
+   .endif
 
SAVE_NVGPRS(r1)
+   SANITIZE_ZEROIZE_NVGPRS()
 
.if IDAR
.if IISIDE
@@ -577,8 +601,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld  r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
-   std r2,GPR2(r1) /* save r2 in stackframe*/
-   SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe   */
+   SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe   */
+   ZEROIZE_GPRS(2, 8)
mflrr9  /* Get LR, later save to stack  */
LOAD_PACA_TOC() /* get kernel TOC into r2   */
std r9,_LINK(r1)
@@ -696,6 +720,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlrr9
ld  r9,_CCR(r1)
mtcrr9
+   SANITIZE_RESTORE_NVGPRS()
REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
@@ -1441,7 +1466,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 * do_break() may have changed the NV GPRS while handling a breakpoint.
 * If so, we need to restore them with their updated values.
 */
-   REST_NVGPRS(r1)
+   HANDLER_RESTORE_NVGPRS()
b   interrupt_return_srr
 
 
@@ -1667,7 +1692,7 @@ EXC_COMMON_BEGIN(alignment_common

Re: [PATCH 1/4] powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf

2022-11-06 Thread Rohan McLure
> On 7 Nov 2022, at 9:49 am, Rohan McLure  wrote:
> 
> Replace occurrences of p{u,m,4}d_is_leaf with p{u,m,4}_leaf, as the
> latter is the name given to checking that a higher-level entry in
> multi-level paging contains a page translation entry (pte). This commit
> allows for p{u,m,4}d_leaf to be used on all powerpc platforms.
> 
> Prior to this commit, the two names have both been present in the
> kernel, having as far as I can tell the same exact purpose. While the
> 'is' in the title may better indicate that the macro/function is a
> boolean returning check, the former naming scheme is standard through
> all other architectures.
> 
> 32-bit systems import pgtable-nop4d.h which defines a default pud_leaf.
> Define pud_leaf preprocessor macro on both Book3E/S 32-bit to avoid
> including the default definition in asm/pgtable.h.
> 
> Signed-off-by: Rohan McLure 
> ---
> V4: new patch.
> ---
> arch/powerpc/include/asm/book3s/32/pgtable.h |  2 +-
> arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
> arch/powerpc/include/asm/nohash/32/pgtable.h |  1 +
> arch/powerpc/include/asm/pgtable.h   | 18 +-
> arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 ++--
> arch/powerpc/mm/book3s64/radix_pgtable.c | 14 +++---
> arch/powerpc/mm/pgtable.c|  6 +++---
> arch/powerpc/mm/pgtable_64.c |  6 +++---
> arch/powerpc/xmon/xmon.c |  6 +++---
> 9 files changed, 37 insertions(+), 38 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
> b/arch/powerpc/include/asm/book3s/32/pgtable.h
> index 75823f39e042..f1b91ad8f3a5 100644
> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
> @@ -234,6 +234,7 @@ void unmap_kernel_page(unsigned long va);
> #define pte_clear(mm, addr, ptep) \
> do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0)
> 
> +#define pud_leaf pud_leaf
> #define pmd_none(pmd) (!pmd_val(pmd))
> #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
> #define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
> @@ -242,7 +243,6 @@ static inline void pmd_clear(pmd_t *pmdp)
> *pmdp = __pmd(0);
> }
> 
> -
> /*
>  * When flushing the tlb entry for a page, we also need to flush the hash
>  * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
> b/arch/powerpc/include/asm/book3s/64/pgtable.h
> index c436d8422654..3f51de24e4fc 100644
> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
> @@ -1426,16 +1426,14 @@ static inline bool is_pte_rw_upgrade(unsigned long 
> old_val, unsigned long new_va
> /*
>  * Like pmd_huge() and pmd_large(), but works regardless of config options
>  */
> -#define pmd_is_leaf pmd_is_leaf
> -#define pmd_leaf pmd_is_leaf
> -static inline bool pmd_is_leaf(pmd_t pmd)
> +#define pmd_leaf pmd_leaf
> +static inline bool pmd_leaf(pmd_t pmd)
> {
> return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
> }
> 
> -#define pud_is_leaf pud_is_leaf
> -#define pud_leaf pud_is_leaf
> -static inline bool pud_is_leaf(pud_t pud)
> +#define pud_leaf pud_leaf
> +static inline bool pud_leaf(pud_t pud)
> {
> return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
> }
> diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
> b/arch/powerpc/include/asm/nohash/32/pgtable.h
> index 0d40b33184eb..04a3b0b128eb 100644
> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
> @@ -201,6 +201,7 @@ static inline pte_t pte_mkexec(pte_t pte)
> }
> #endif
> 
> +#define pud_leaf pud_leaf
> #define pmd_none(pmd) (!pmd_val(pmd))
> #define pmd_bad(pmd) (pmd_val(pmd) & _PMD_BAD)
> #define pmd_present(pmd) (pmd_val(pmd) & _PMD_PRESENT_MASK)
> diff --git a/arch/powerpc/include/asm/pgtable.h 
> b/arch/powerpc/include/asm/pgtable.h
> index 283f40d05a4d..8e7625a89922 100644
> --- a/arch/powerpc/include/asm/pgtable.h
> +++ b/arch/powerpc/include/asm/pgtable.h
> @@ -134,25 +134,25 @@ static inline void pte_frag_set(mm_context_t *ctx, void 
> *p)
> }
> #endif
> 
> -#ifndef pmd_is_leaf
> -#define pmd_is_leaf pmd_is_leaf
> -static inline bool pmd_is_leaf(pmd_t pmd)
> +#ifndef pmd_leaf
> +#define pmd_leaf pmd_leaf
> +static inline bool pmd_leaf(pmd_t pmd)
> {
> return false;
> }
> #endif
> 
> -#ifndef pud_is_leaf
> -#define pud_is_leaf pud_is_leaf
> -static inline bool pud_is_leaf(pud_t pud)
> +#ifndef pud_leaf
> +#define pud_leaf pud_leaf
> +static inline bool pud_leaf(pud_t pud)
> {
> return fal

[PATCH 3/4] powerpc: mm: add p{te,md,ud}_user_accessible_page helpers

2022-11-06 Thread Rohan McLure
Add the following helpers for detecting whether a page table entry
is a leaf and is accessible to user space.

 * pte_user_accessible_page
 * pmd_user_accessible_page
 * pud_user_accessible_page

Also implement missing pud_user definitions for both Book3S/nohash 64-bit
systems, and pmd_user for Book3S/nohash 32-bit systems.

Signed-off-by: Rohan McLure 
---
V2: Provide missing pud_user implementations, use p{u,m}d_is_leaf.
V3: Provide missing pmd_user implementations as stubs in 32-bit.
V4: Use pmd_leaf, pud_leaf, and define pmd_user for 32 Book3E with
static inline method rather than macro.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  5 +
 arch/powerpc/include/asm/nohash/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/pgtable.h   | 15 +++
 5 files changed, 44 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index f1b91ad8f3a5..06e89bc4418a 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -511,6 +511,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return 0;
+}
 
 
 /* This low level function performs the actual PTE insertion
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 788a9effb03e..acbb37e5f82f 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -602,6 +602,16 @@ static inline bool pte_user(pte_t pte)
return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return !(pmd_raw(pmd) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline bool pud_user(pud_t pud)
+{
+   return !(pud_raw(pud) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
 #define pte_access_permitted pte_access_permitted
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 04a3b0b128eb..1d3cf7fe60ec 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -210,6 +210,11 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return false;
+}
+
 /*
  * PTE updates. This function is called whenever an existing
  * valid PTE is updated. This does -not- include set_pte_at()
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 879e9a6e5a87..2f6b5124e64c 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -123,6 +123,11 @@ static inline pte_t pmd_pte(pmd_t pmd)
return __pte(pmd_val(pmd));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return (pmd_val(pmd) & _PAGE_USER) == _PAGE_USER;
+}
+
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(!is_kernel_addr(pmd_val(pmd)) \
 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -158,6 +163,11 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
 }
 
+static inline bool pud_user(pud_t pud)
+{
+   return (pud_val(pud) & _PAGE_USER) == _PAGE_USER;
+}
+
 static inline pud_t pte_pud(pte_t pte)
 {
return __pud(pte_val(pte));
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index c9ce5019f8a8..6851a7a90553 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -191,6 +191,21 @@ static inline int pud_pfn(pud_t pud)
 }
 #endif
 
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+   return pte_present(pte) && pte_user(pte);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+   return pmd_leaf(pmd) && pmd_present(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+   return pud_leaf(pud) && pud_present(pud) && pud_user(pud);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.34.1



[PATCH 4/4] powerpc: mm: support page table check

2022-11-06 Thread Rohan McLure
On creation and clearing of a page table mapping, instrument such calls
by invoking page_table_check_pte_set and page_table_check_pte_clear
respectively. These calls serve as a sanity check against illegal
mappings.

Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK for all ppc64, and 32-bit
platforms implementing Book3S.

Change pud_pfn to be a runtime bug rather than a build bug as it is
consumed by page_table_check_pud_{clear,set} which are not called.

See also:

riscv support in commit 3fee229a8eb9 ("riscv/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
arm64 in commit 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check")

Signed-off-by: Rohan McLure 
Reviewed-by: Russell Currey 
Reviewed-by: Christophe Leroy 
---
V2: Update spacing and types assigned to pte_update calls.
V3: Update one last pte_update call to remove __pte invocation.
---
 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h |  9 -
 arch/powerpc/include/asm/book3s/64/pgtable.h | 18 +++---
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 ++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  8 ++--
 arch/powerpc/include/asm/nohash/pgtable.h|  1 +
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 699df27b0e2f..1d943a13a204 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -150,6 +150,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx || 40x
+   select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 06e89bc4418a..6614170b6c17 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -53,6 +53,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 static inline bool pte_user(pte_t pte)
 {
return pte_val(pte) & _PAGE_USER;
@@ -333,7 +335,11 @@ static inline int __ptep_test_and_clear_young(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr,
   pte_t *ptep)
 {
-   return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -525,6 +531,7 @@ static inline bool pmd_user(pmd_t pmd)
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
 {
+   page_table_check_pte_set(mm, addr, ptep, pte);
 #if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use 
the
 * helper pte_update() which does an atomic update. We need to do that
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index acbb37e5f82f..f00aa2d203c2 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -162,6 +162,8 @@
 #define PAGE_KERNEL_ROX__pgprot(_PAGE_BASE | _PAGE_KERNEL_ROX)
 
 #ifndef __ASSEMBLY__
+#include 
+
 /*
  * page table defines
  */
@@ -465,8 +467,11 @@ static inline void huge_ptep_set_wrprotect(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
-   unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-   return __pte(old);
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
@@ -475,11 +480,16 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
pte_t *ptep, int full)
 {
if (full && radix_enabled()) {
+   pte_t old_pte;
+
/*
 * We know that this is a full mm pte clear and
 * hence can be sure there is no parallel set_pte.
 */
-   return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
}
return ptep_get_and_clear(mm, addr, ptep);
 }
@@ -865,6 +875,8 @@ static inline void __set_pte_at(struct mm_struct

[PATCH 1/4] powerpc: mm: Replace p{u,m,4}d_is_leaf with p{u,m,4}_leaf

2022-11-06 Thread Rohan McLure
Replace occurrences of p{u,m,4}d_is_leaf with p{u,m,4}_leaf, as the
latter is the name given to checking that a higher-level entry in
multi-level paging contains a page translation entry (pte). This commit
allows for p{u,m,4}d_leaf to be used on all powerpc platforms.

Prior to this commit, the two names have both been present in the
kernel, having as far as I can tell the same exact purpose. While the
'is' in the title may better indicate that the macro/function is a
boolean returning check, the former naming scheme is standard through
all other architectures.

32-bit systems import pgtable-nop4d.h which defines a default pud_leaf.
Define pud_leaf preprocessor macro on both Book3E/S 32-bit to avoid
including the default definition in asm/pgtable.h.

Signed-off-by: Rohan McLure 
---
V4: new patch.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  2 +-
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
 arch/powerpc/include/asm/nohash/32/pgtable.h |  1 +
 arch/powerpc/include/asm/pgtable.h   | 18 +-
 arch/powerpc/kvm/book3s_64_mmu_radix.c   | 12 ++--
 arch/powerpc/mm/book3s64/radix_pgtable.c | 14 +++---
 arch/powerpc/mm/pgtable.c|  6 +++---
 arch/powerpc/mm/pgtable_64.c |  6 +++---
 arch/powerpc/xmon/xmon.c |  6 +++---
 9 files changed, 37 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 75823f39e042..f1b91ad8f3a5 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -234,6 +234,7 @@ void unmap_kernel_page(unsigned long va);
 #define pte_clear(mm, addr, ptep) \
do { pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0); } while (0)
 
+#define pud_leaf   pud_leaf
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(pmd_val(pmd) & _PMD_BAD)
 #definepmd_present(pmd)(pmd_val(pmd) & _PMD_PRESENT_MASK)
@@ -242,7 +243,6 @@ static inline void pmd_clear(pmd_t *pmdp)
*pmdp = __pmd(0);
 }
 
-
 /*
  * When flushing the tlb entry for a page, we also need to flush the hash
  * table entry.  flush_hash_pages is assembler (for speed) in hashtable.S.
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index c436d8422654..3f51de24e4fc 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1426,16 +1426,14 @@ static inline bool is_pte_rw_upgrade(unsigned long 
old_val, unsigned long new_va
 /*
  * Like pmd_huge() and pmd_large(), but works regardless of config options
  */
-#define pmd_is_leaf pmd_is_leaf
-#define pmd_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
return !!(pmd_raw(pmd) & cpu_to_be64(_PAGE_PTE));
 }
 
-#define pud_is_leaf pud_is_leaf
-#define pud_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
return !!(pud_raw(pud) & cpu_to_be64(_PAGE_PTE));
 }
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 0d40b33184eb..04a3b0b128eb 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -201,6 +201,7 @@ static inline pte_t pte_mkexec(pte_t pte)
 }
 #endif
 
+#define pud_leaf   pud_leaf
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(pmd_val(pmd) & _PMD_BAD)
 #definepmd_present(pmd)(pmd_val(pmd) & _PMD_PRESENT_MASK)
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 283f40d05a4d..8e7625a89922 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -134,25 +134,25 @@ static inline void pte_frag_set(mm_context_t *ctx, void 
*p)
 }
 #endif
 
-#ifndef pmd_is_leaf
-#define pmd_is_leaf pmd_is_leaf
-static inline bool pmd_is_leaf(pmd_t pmd)
+#ifndef pmd_leaf
+#define pmd_leaf pmd_leaf
+static inline bool pmd_leaf(pmd_t pmd)
 {
return false;
 }
 #endif
 
-#ifndef pud_is_leaf
-#define pud_is_leaf pud_is_leaf
-static inline bool pud_is_leaf(pud_t pud)
+#ifndef pud_leaf
+#define pud_leaf pud_leaf
+static inline bool pud_leaf(pud_t pud)
 {
return false;
 }
 #endif
 
-#ifndef p4d_is_leaf
-#define p4d_is_leaf p4d_is_leaf
-static inline bool p4d_is_leaf(p4d_t p4d)
+#ifndef p4d_leaf
+#define p4d_leaf p4d_leaf
+static inline bool p4d_leaf(p4d_t p4d)
 {
return false;
 }
diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c 
b/arch/powerpc/kvm/book3s_64_mmu_radix.c
index 5d5e12f3bf86..d29f8d1d97a6 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_radix.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c
@@ -497

[PATCH 2/4] powerpc: mm: Add common pud_pfn stub for all platforms

2022-11-06 Thread Rohan McLure
Prior to this commit, pud_pfn was implemented with BUILD_BUG as the inline
function for 64-bit Book3S systems but is never included, as its
invocations in generic code are guarded by calls to pud_devmap which return
zero on such systems. A future patch will provide support for page table
checks, the generic code for which depends on a pud_pfn stub being
implemented, even while the patch will not interact with puds directly.

Remove the 64-bit Book3S stub and define pud_pfn to warn on all
platforms. pud_pfn may be defined properly on a per-platform basis
should it grow real usages in future.

Signed-off-by: Rohan McLure 
---
V2: Remove conditional BUILD_BUG and BUG. Instead warn on usage.
V3: Replace WARN with WARN_ONCE, which should suffice to demonstrate
misuse of puds.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
 arch/powerpc/include/asm/pgtable.h   | 14 ++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 3f51de24e4fc..788a9effb03e 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1394,16 +1394,6 @@ static inline int pgd_devmap(pgd_t pgd)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int pud_pfn(pud_t pud)
-{
-   /*
-* Currently all calls to pud_pfn() are gated around a pud_devmap()
-* check so this should never be used. If it grows another user we
-* want to know about it.
-*/
-   BUILD_BUG();
-   return 0;
-}
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
 void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 8e7625a89922..c9ce5019f8a8 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -177,6 +177,20 @@ struct seq_file;
 void arch_report_meminfo(struct seq_file *m);
 #endif /* CONFIG_PPC64 */
 
+/*
+ * Currently only consumed by page_table_check_pud_{set,clear}. Since clears
+ * and sets to page table entries at any level are done through
+ * page_table_check_pte_{set,clear}, provide stub implementation.
+ */
+#ifndef pud_pfn
+#define pud_pfn pud_pfn
+static inline int pud_pfn(pud_t pud)
+{
+   WARN_ONCE(1, "pud: platform does not use pud entries directly");
+   return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.34.1



Re: [PATCH v3 2/3] powerpc: mm: add p{te,md,ud}_user_accessible_page helpers

2022-11-06 Thread Rohan McLure
> On 3 Nov 2022, at 7:02 pm, Christophe Leroy  
> wrote:
> 
> 
> 
> Le 24/10/2022 à 02:35, Rohan McLure a écrit :
>> Add the following helpers for detecting whether a page table entry
>> is a leaf and is accessible to user space.
>> 
>>  * pte_user_accessible_page
>>  * pmd_user_accessible_page
>>  * pud_user_accessible_page
>> 
>> Also implement missing pud_user definitions for both Book3S/nohash 64-bit
>> systems, and pmd_user for Book3S/nohash 32-bit systems.
>> 
>> Signed-off-by: Rohan McLure 
>> ---
>> V2: Provide missing pud_user implementations, use p{u,m}d_is_leaf.
>> V3: Provide missing pmd_user implementations as stubs in 32-bit.
>> ---
>>  arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
>>  arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++
>>  arch/powerpc/include/asm/nohash/32/pgtable.h |  1 +
>>  arch/powerpc/include/asm/nohash/64/pgtable.h | 10 ++
>>  arch/powerpc/include/asm/pgtable.h   | 15 +++
>>  5 files changed, 40 insertions(+)
>> 
>> diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/32/pgtable.h
>> index 40041ac713d9..8bf1c538839a 100644
>> --- a/arch/powerpc/include/asm/book3s/32/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
>> @@ -531,6 +531,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t 
>> newprot)
>>   return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
>>  }
>> 
>> +static inline bool pmd_user(pmd_t pmd)
>> +{
>> + return 0;
>> +}
>> 
>> 
>>  /* This low level function performs the actual PTE insertion
>> diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
>> b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> index f9aefa492df0..3083111f9d0a 100644
>> --- a/arch/powerpc/include/asm/book3s/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
>> @@ -621,6 +621,16 @@ static inline bool pte_user(pte_t pte)
>>   return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
>>  }
>> 
>> +static inline bool pmd_user(pmd_t pmd)
>> +{
>> + return !(pmd_raw(pmd) & cpu_to_be64(_PAGE_PRIVILEGED));
>> +}
>> +
>> +static inline bool pud_user(pud_t pud)
>> +{
>> + return !(pud_raw(pud) & cpu_to_be64(_PAGE_PRIVILEGED));
>> +}
>> +
>>  #define pte_access_permitted pte_access_permitted
>>  static inline bool pte_access_permitted(pte_t pte, bool write)
>>  {
>> diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
>> b/arch/powerpc/include/asm/nohash/32/pgtable.h
>> index 9091e4904a6b..b92044d9d778 100644
>> --- a/arch/powerpc/include/asm/nohash/32/pgtable.h
>> +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
>> @@ -354,6 +354,7 @@ static inline int pte_young(pte_t pte)
>>  #endif
>> 
>>  #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
>> +#define pmd_user(pmd) 0
>>  /*
>>   * Encode and decode a swap entry.
>>   * Note that the bits we use in a PTE for representing a swap entry
>> diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
>> b/arch/powerpc/include/asm/nohash/64/pgtable.h
>> index 599921cc257e..23c5135178d1 100644
>> --- a/arch/powerpc/include/asm/nohash/64/pgtable.h
>> +++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
>> @@ -123,6 +123,11 @@ static inline pte_t pmd_pte(pmd_t pmd)
>>   return __pte(pmd_val(pmd));
>>  }
>> 
>> +static inline bool pmd_user(pmd_t pmd)
>> +{
>> + return (pmd_val(pmd) & _PAGE_USER) == _PAGE_USER;
>> +}
>> +
>>  #define pmd_none(pmd) (!pmd_val(pmd))
>>  #define pmd_bad(pmd) (!is_kernel_addr(pmd_val(pmd)) \
>>|| (pmd_val(pmd) & PMD_BAD_BITS))
>> @@ -158,6 +163,11 @@ static inline pte_t pud_pte(pud_t pud)
>>   return __pte(pud_val(pud));
>>  }
>> 
>> +static inline bool pud_user(pud_t pud)
>> +{
>> + return (pud_val(pud) & _PAGE_USER) == _PAGE_USER;
>> +}
>> +
>>  static inline pud_t pte_pud(pte_t pte)
>>  {
>>   return __pud(pte_val(pte));
>> diff --git a/arch/powerpc/include/asm/pgtable.h 
>> b/arch/powerpc/include/asm/pgtable.h
>> index 36956fb440e1..3cb5de9f1aa4 100644
>> --- a/arch/powerpc/include/asm/pgtable.h
>> +++ b/arch/powerpc/include/asm/pgtable.h
>> @@ -172,6 +172,21 @@ static inline int pud_pfn(pud_t pud)
>>  }
>>  #endif
>> 
>> +static inline bool pte_user_accessible_page(pte_t pte)
>> +{
>> + return pte_present(pte) && pte_user(pte);
>> +}
>> +
>> +static inline bool pmd_user_accessible_page(pmd_t pmd)
>> +{
>> + return pmd_is_leaf(pmd) && pmd_present(pmd) && pmd_user(pmd);
> 
> pmd_is_leaf() is specific to powerpc and we may want to get rid of it.
> 
> Can you use pmd_leaf() instead ?
> 
>> +}
>> +
>> +static inline bool pud_user_accessible_page(pud_t pud)
>> +{
>> + return pud_is_leaf(pud) && pud_present(pud) && pud_user(pud);
> 
> pud_is_leaf() is specific to powerpc and we may want to get rid of it.
> 
> Can you use pud_leaf() instead ?

Going to resend, replacing all usages/definitions of p{m,u,4}d_is_leaf()
with p{m,u,4}_leaf() in arch/powerpc prior to this patch.

> 
>> +}
>> +
>>  #endif /* __ASSEMBLY__ */
>> 
>>  #endif /* _ASM_POWERPC_PGTABLE_H */




[PATCH 4/4] powerpc/64s: Sanitise user registers on interrupt in pseries

2022-11-02 Thread Rohan McLure
Cause pseries platforms to default to zeroising all potentially user-defined
registers when entering the kernel by means of any interrupt source,
reducing user-influence of the kernel and the likelihood or producing
speculation gadgets. Interrupt sources include syscalls.

Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
Standalone series: new patch
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 9d3d20c6f365..2eb328b25e49 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -532,7 +532,7 @@ config HOTPLUG_CPU
 config INTERRUPT_SANITIZE_REGISTERS
bool "Clear gprs on interrupt arrival"
depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
-   default PPC_BOOK3E_64
+   default PPC_BOOK3E_64 || PPC_PSERIES
help
  Reduce the influence of user register state on interrupt handlers and
  syscalls through clearing user state from registers before handling
-- 
2.34.1



[PATCH 3/4] powerpc/64e: Clear gprs on interrupt routine entry on Book3E

2022-11-02 Thread Rohan McLure
Zero GPRS r14-r31 on entry into the kernel for interrupt sources to
limit influence of user-space values in potential speculation gadgets.
Prior to this commit, all other GPRS are reassigned during the common
prologue to interrupt handlers and so need not be zeroised explicitly.

This may be done safely, without loss of register state prior to the
interrupt, as the common prologue saves the initial values of
non-volatiles, which are unconditionally restored in interrupt_64.S.
Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS.

Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/

V4: New patch.
V5: Depend on Kconfig option. Remove ZEROIZE_NVGPRS on bad kernel
stack handler.
V6: Change name of zeroize macro to be consistent with Book3S.
---
 arch/powerpc/kernel/exceptions-64e.S | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index 2f68fb2ee4fc..91d8019123c2 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -358,6 +358,11 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13);  \
std r15,PACA_EXMC+EX_R15(r13)
 
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_ZEROIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#else
+#define SANITIZE_ZEROIZE_NVGPRS()
+#endif
 
 /* Core exception code for all exceptions except TLB misses. */
 #define EXCEPTION_COMMON_LVL(n, scratch, excf) \
@@ -394,7 +399,8 @@ exc_##n##_common:   
\
std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */   \
std r3,_TRAP(r1);   /* set trap number  */  \
std r0,RESULT(r1);  /* clear regs->result */\
-   SAVE_NVGPRS(r1);
+   SAVE_NVGPRS(r1);\
+   SANITIZE_ZEROIZE_NVGPRS();  /* minimise speculation influence */
 
 #define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
-- 
2.34.1



[PATCH 1/4] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS Kconfig

2022-11-02 Thread Rohan McLure
Add Kconfig option for enabling clearing of registers on arrival in an
interrupt handler. This reduces the speculation influence of registers
on kernel internals. The option will be consumed by 64-bit systems that
feature speculation and wish to implement this mitigation.

This patch only introduces the Kconfig option, no actual mitigations.

The primary overhead of this mitigation lies in an increased number of
registers that must be saved and restored by interrupt handlers on
Book3S systems. Enable by default on Book3E systems, which prior to
this patch eagerly save and restore register state, meaning that the
mitigation when implemented will have minimal overhead.

Signed-off-by: Rohan McLure 
Acked-by: Nicholas Piggin 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/
V5: New patch
---
 arch/powerpc/Kconfig | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 2ca5418457ed..9d3d20c6f365 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -529,6 +529,15 @@ config HOTPLUG_CPU
 
  Say N if you are unsure.
 
+config INTERRUPT_SANITIZE_REGISTERS
+   bool "Clear gprs on interrupt arrival"
+   depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+   default PPC_BOOK3E_64
+   help
+ Reduce the influence of user register state on interrupt handlers and
+ syscalls through clearing user state from registers before handling
+ the exception.
+
 config PPC_QUEUED_SPINLOCKS
bool "Queued spinlocks" if EXPERT
depends on SMP
-- 
2.34.1



[PATCH 2/4] powerpc/64s: Clear gprs on interrupt routine entry on Book3S

2022-11-02 Thread Rohan McLure
Zero user state in gprs (assign to zero) to reduce the influence of user
registers on speculation within kernel syscall handlers. Clears occur
at the very beginning of the sc and scv 0 interrupt handlers, with
restores occurring following the execution of the syscall handler.

Zero GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored on 64bit Book3S. Remove explicit
REST_NVGPRS invocations on 64-bit Book3S. 32-bit systems do not clear
user registers on interrupt, and continue to depend on the return value
of interrupt_exit_user_prepare to determine whether or not to restore
non-volatiles.

The mmap_bench benchmark in selftests should rapidly invoke pagefaults.
See ~0.8% performance regression with this mitigation, but this
indicates the worst-case performance due to heavier-weight interrupt
handlers. This mitigation is able to be enabled/disabled through
CONFIG_INTERRUPT_SANITIZE_REGISTERS.

Signed-off-by: Rohan McLure 
---
Resubmitting patches as their own series after v6 partially merged:
Link: 
https://lore.kernel.org/all/166488988686.779920.13794870102696416283.b4...@ellerman.id.au/t/

Standalone series: Now syscall register clearing is included
under the same configuration option. This now matches the
description given for CONFIG_INTERRUPT_SANITIZE_REGISTERS.
---
 arch/powerpc/kernel/exceptions-64s.S | 47 +-
 arch/powerpc/kernel/interrupt_64.S   | 34 +++
 2 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 651c36b056bd..0605018762d1 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,6 +21,19 @@
 #include 
 #include 
 
+/*
+ * macros for handling user register sanitisation
+ */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_ZEROIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()  REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS()   REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
 /*
  * Following are fixed section helper macros.
  *
@@ -111,6 +124,7 @@ name:
 #define ISTACK .L_ISTACK_\name\()  /* Set regular kernel stack */
 #define __ISTACK(name) .L_ISTACK_ ## name
 #define IKUAP  .L_IKUAP_\name\()   /* Do KUAP lock */
+#define IMSR_R12   .L_IMSR_R12_\name\()/* Assumes MSR saved to r12 */
 
 #define INT_DEFINE_BEGIN(n)\
 .macro int_define_ ## n name
@@ -176,6 +190,9 @@ do_define_int n
.ifndef IKUAP
IKUAP=1
.endif
+   .ifndef IMSR_R12
+   IMSR_R12=0
+   .endif
 .endm
 
 /*
@@ -502,6 +519,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
std r10,0(r1)   /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe*/
std r10,GPR1(r1)/* save r1 in stackframe*/
+   ZEROIZE_GPR(0)
 
/* Mark our [H]SRRs valid for return */
li  r10,1
@@ -544,8 +562,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+   .if !IMSR_R12
+   ZEROIZE_GPRS(9, 12)
+   .else
+   ZEROIZE_GPRS(9, 11)
+   .endif
 
SAVE_NVGPRS(r1)
+   SANITIZE_ZEROIZE_NVGPRS()
 
.if IDAR
.if IISIDE
@@ -577,8 +601,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld  r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
-   std r2,GPR2(r1) /* save r2 in stackframe*/
-   SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe   */
+   SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe   */
+   ZEROIZE_GPRS(2, 8)
mflrr9  /* Get LR, later save to stack  */
LOAD_PACA_TOC() /* get kernel TOC into r2   */
std r9,_LINK(r1)
@@ -696,6 +720,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlrr9
ld  r9,_CCR(r1)
mtcrr9
+   SANITIZE_RESTORE_NVGPRS()
REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
@@ -1441,7 +1466,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 * do_break() may have changed the NV GPRS while handling a breakpoint.
 * If so, we need to restore them with their updated values.
 */
-   REST_NVGPRS(r1)
+   HANDLER_RESTORE_NVGPRS()
b   interrupt_return_srr
 
 
@@ -1667,7

Re: [PATCH v6 22/25] powerpc/64s: Clear user GPRs in syscall interrupt entry

2022-10-31 Thread Rohan McLure
> On 23 Sep 2022, at 6:02 pm, Nicholas Piggin  wrote:
> 
> On Wed Sep 21, 2022 at 4:56 PM AEST, Rohan McLure wrote:
>> Clear user state in gprs (assign to zero) to reduce the influence of user
>> registers on speculation within kernel syscall handlers. Clears occur
>> at the very beginning of the sc and scv 0 interrupt handlers, with
>> restores occurring following the execution of the syscall handler.
>> 
>> Signed-off-by: Rohan McLure 
>> ---
>> V2: Update summary
>> V3: Remove erroneous summary paragraph on syscall_exit_prepare
>> V4: Use ZEROIZE instead of NULLIFY. Clear r0 also.
>> V5: Move to end of patch series.
>> V6: Include clears which were previously in the syscall wrapper patch.
>> Move comment on r3-r8 register save to when we alter the calling
>> convention for system_call_exception.
> 
> The series looks good to here, I just need to find a bit more time to
> look at the code and do some tests with the next few patches. I don't
> see much problem with them, looks a lot better now with fewer ifdefs
> so that's good. Possibly you could share some of those new sanitize
> macros in a header file but that's a minor nit.
> 
> Coud we have this zeroize also under the same config option as the
> next? I figure if we care about speculative security we want both,
> and if we don't we need neither.

Thanks for this. I’ll resubmit the last four patches with a common
configuration option for register clearing. Just pinging to hear your
thoughts on whether we are willing to wear the small performance
regression in general interrupts, and whether this mitigation should
be a default on pseries.

Rohan

> 
> Thanks,
> Nick
> 
>> ---
>> arch/powerpc/kernel/interrupt_64.S | 17 -
>> 1 file changed, 16 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/kernel/interrupt_64.S 
>> b/arch/powerpc/kernel/interrupt_64.S
>> index a5dd78bdbe6d..40147558e1a6 100644
>> --- a/arch/powerpc/kernel/interrupt_64.S
>> +++ b/arch/powerpc/kernel/interrupt_64.S
>> @@ -106,6 +106,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>>  * but this is the best we can do.
>>  */
>> 
>> + /*
>> +  * Zero user registers to prevent influencing speculative execution
>> +  * state of kernel code.
>> +  */
>> + ZEROIZE_GPR(0)
>> + ZEROIZE_GPRS(5, 12)
>> + ZEROIZE_NVGPRS()
>> bl system_call_exception
>> 
>> .Lsyscall_vectored_\name\()_exit:
>> @@ -134,6 +141,7 @@ BEGIN_FTR_SECTION
>> HMT_MEDIUM_LOW
>> END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>> 
>> + REST_NVGPRS(r1)
>> cmpdi r3,0
>> bne .Lsyscall_vectored_\name\()_restore_regs
>> 
>> @@ -285,6 +293,13 @@ END_BTB_FLUSH_SECTION
>> wrteei 1
>> #endif
>> 
>> + /*
>> +  * Zero user registers to prevent influencing speculative execution
>> +  * state of kernel code.
>> +  */
>> + ZEROIZE_GPR(0)
>> + ZEROIZE_GPRS(5, 12)
>> + ZEROIZE_NVGPRS()
>> bl system_call_exception
>> 
>> .Lsyscall_exit:
>> @@ -325,6 +340,7 @@ BEGIN_FTR_SECTION
>> stdcx. r0,0,r1 /* to clear the reservation */
>> END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
>> 
>> + REST_NVGPRS(r1)
>> cmpdi r3,0
>> bne .Lsyscall_restore_regs
>> /* Zero volatile regs that may contain sensitive kernel data */
>> @@ -352,7 +368,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
>> .Lsyscall_restore_regs:
>> ld r3,_CTR(r1)
>> ld r4,_XER(r1)
>> - REST_NVGPRS(r1)
>> mtctr r3
>> mtspr SPRN_XER,r4
>> REST_GPR(0, r1)
>> -- 
>> 2.34.1




[PATCH] powerpc: Interrupt handler stack randomisation

2022-10-24 Thread Rohan McLure
Stack frames used by syscall handlers support random offsets as of
commit f4a0318f278d (powerpc: add support for syscall stack randomization).
Implement the same for general interrupt handlers, by applying the
random stack offset and then updating this offset from within the
DEFINE_INTERRUPT_HANDLER macros.

Applying this offset perturbs the layout of interrupt handler stack
frames, rendering to the kernel stack more difficult to control by means
of user invoked interrupts.

Link: https://lists.ozlabs.org/pipermail/linuxppc-dev/2022-May/243238.html

Signed-off-by: Rohan McLure 
---
 arch/powerpc/include/asm/interrupt.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 4745bb9998bd..b7f7beff4e13 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -68,6 +68,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -448,9 +449,12 @@ interrupt_handler long func(struct pt_regs *regs)  
\
long ret;   \
\
__hard_RI_enable(); \
+   add_random_kstack_offset(); \
\
ret = ##func (regs);\
\
+   choose_random_kstack_offset(mftb());\
+   \
return ret; \
 }  \
 NOKPROBE_SYMBOL(func); \
@@ -480,9 +484,11 @@ static __always_inline void ##func(struct pt_regs 
*regs);  \
 interrupt_handler void func(struct pt_regs *regs)  \
 {  \
interrupt_enter_prepare(regs);  \
+   add_random_kstack_offset(); \
\
##func (regs);  \
\
+   choose_random_kstack_offset(mftb());\
interrupt_exit_prepare(regs);   \
 }  \
 NOKPROBE_SYMBOL(func); \
@@ -515,9 +521,11 @@ interrupt_handler long func(struct pt_regs *regs)  
\
long ret;   \
\
interrupt_enter_prepare(regs);  \
+   add_random_kstack_offset(); \
\
ret = ##func (regs);\
\
+   choose_random_kstack_offset(mftb());\
interrupt_exit_prepare(regs);   \
\
return ret; \
@@ -548,9 +556,11 @@ static __always_inline void ##func(struct pt_regs 
*regs);  \
 interrupt_handler void func(struct pt_regs *regs)  \
 {  \
interrupt_async_enter_prepare(regs);\
+   add_random_kstack_offset(); \
\
##func (regs);  \
\
+   choose_random_kstack_offset(mftb());\
interrupt_async_exit_prepare(regs); \
 }  \
 NOKPROBE_SYMBOL(func); \
@@ -585,9 +595,11 @@ interrupt_handler long func(struct pt_regs *regs)  
\
long ret;   \
\
interrupt_nmi_enter_prepare(regs

[PATCH v3 1/3] powerpc: Add common pud_pfn stub for all platforms

2022-10-23 Thread Rohan McLure
Prior to this commit, pud_pfn was implemented with BUILD_BUG as the inline
function for 64-bit Book3S systems but is never included, as its
invocations in generic code are guarded by calls to pud_devmap which return
zero on such systems. A future patch will provide support for page table
checks, the generic code for which depends on a pud_pfn stub being
implemented, even while the patch will not interact with puds directly.

Remove the 64-bit Book3S stub and define pud_pfn to warn on all
platforms. pud_pfn may be defined properly on a per-platform basis
should it grow real usages in future.

Signed-off-by: Rohan McLure 
---
V2: Remove conditional BUILD_BUG and BUG. Instead warn on usage.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
 arch/powerpc/include/asm/pgtable.h   | 14 ++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 486902aff040..f9aefa492df0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1413,16 +1413,6 @@ static inline int pgd_devmap(pgd_t pgd)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int pud_pfn(pud_t pud)
-{
-   /*
-* Currently all calls to pud_pfn() are gated around a pud_devmap()
-* check so this should never be used. If it grows another user we
-* want to know about it.
-*/
-   BUILD_BUG();
-   return 0;
-}
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
 void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 33f4bf8d22b0..36956fb440e1 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -158,6 +158,20 @@ struct seq_file;
 void arch_report_meminfo(struct seq_file *m);
 #endif /* CONFIG_PPC64 */
 
+/*
+ * Currently only consumed by page_table_check_pud_{set,clear}. Since clears
+ * and sets to page table entries at any level are done through
+ * page_table_check_pte_{set,clear}, provide stub implementation.
+ */
+#ifndef pud_pfn
+#define pud_pfn pud_pfn
+static inline int pud_pfn(pud_t pud)
+{
+   WARN(1, "pud: platform does not use pud entries directly");
+   return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.34.1



[PATCH v3 2/3] powerpc: mm: add p{te,md,ud}_user_accessible_page helpers

2022-10-23 Thread Rohan McLure
Add the following helpers for detecting whether a page table entry
is a leaf and is accessible to user space.

 * pte_user_accessible_page
 * pmd_user_accessible_page
 * pud_user_accessible_page

Also implement missing pud_user definitions for both Book3S/nohash 64-bit
systems, and pmd_user for Book3S/nohash 32-bit systems.

Signed-off-by: Rohan McLure 
---
V2: Provide missing pud_user implementations, use p{u,m}d_is_leaf.
V3: Provide missing pmd_user implementations as stubs in 32-bit.
---
 arch/powerpc/include/asm/book3s/32/pgtable.h |  4 
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/nohash/32/pgtable.h |  1 +
 arch/powerpc/include/asm/nohash/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/pgtable.h   | 15 +++
 5 files changed, 40 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 40041ac713d9..8bf1c538839a 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -531,6 +531,10 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
return __pte((pte_val(pte) & _PAGE_CHG_MASK) | pgprot_val(newprot));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return 0;
+}
 
 
 /* This low level function performs the actual PTE insertion
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f9aefa492df0..3083111f9d0a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -621,6 +621,16 @@ static inline bool pte_user(pte_t pte)
return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return !(pmd_raw(pmd) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline bool pud_user(pud_t pud)
+{
+   return !(pud_raw(pud) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
 #define pte_access_permitted pte_access_permitted
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h 
b/arch/powerpc/include/asm/nohash/32/pgtable.h
index 9091e4904a6b..b92044d9d778 100644
--- a/arch/powerpc/include/asm/nohash/32/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/32/pgtable.h
@@ -354,6 +354,7 @@ static inline int pte_young(pte_t pte)
 #endif
 
 #define pmd_page(pmd)  pfn_to_page(pmd_pfn(pmd))
+#define pmd_user(pmd)  0
 /*
  * Encode and decode a swap entry.
  * Note that the bits we use in a PTE for representing a swap entry
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 599921cc257e..23c5135178d1 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -123,6 +123,11 @@ static inline pte_t pmd_pte(pmd_t pmd)
return __pte(pmd_val(pmd));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return (pmd_val(pmd) & _PAGE_USER) == _PAGE_USER;
+}
+
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(!is_kernel_addr(pmd_val(pmd)) \
 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -158,6 +163,11 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
 }
 
+static inline bool pud_user(pud_t pud)
+{
+   return (pud_val(pud) & _PAGE_USER) == _PAGE_USER;
+}
+
 static inline pud_t pte_pud(pte_t pte)
 {
return __pud(pte_val(pte));
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 36956fb440e1..3cb5de9f1aa4 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -172,6 +172,21 @@ static inline int pud_pfn(pud_t pud)
 }
 #endif
 
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+   return pte_present(pte) && pte_user(pte);
+}
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+   return pmd_is_leaf(pmd) && pmd_present(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+   return pud_is_leaf(pud) && pud_present(pud) && pud_user(pud);
+}
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.34.1



[PATCH v3 3/3] powerpc: mm: support page table check

2022-10-23 Thread Rohan McLure
On creation and clearing of a page table mapping, instrument such calls
by invoking page_table_check_pte_set and page_table_check_pte_clear
respectively. These calls serve as a sanity check against illegal
mappings.

Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK for all ppc64, and 32-bit
platforms implementing Book3S.

Change pud_pfn to be a runtime bug rather than a build bug as it is
consumed by page_table_check_pud_{clear,set} which are not called.

See also:

riscv support in commit 3fee229a8eb9 ("riscv/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
arm64 in commit 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check")

Signed-off-by: Rohan McLure 
---
V2: Update spacing and types assigned to pte_update calls.
V3: Update one last pte_update call to remove __pte invocation.
---
 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h |  9 -
 arch/powerpc/include/asm/book3s/64/pgtable.h | 18 +++---
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 ++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  8 ++--
 arch/powerpc/include/asm/nohash/pgtable.h|  1 +
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4c466acdc70d..6c213ac46a92 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -149,6 +149,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx || 40x
+   select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 8bf1c538839a..6a592426b935 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -53,6 +53,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 static inline bool pte_user(pte_t pte)
 {
return pte_val(pte) & _PAGE_USER;
@@ -353,7 +355,11 @@ static inline int __ptep_test_and_clear_young(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr,
   pte_t *ptep)
 {
-   return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -545,6 +551,7 @@ static inline bool pmd_user(pmd_t pmd)
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
 {
+   page_table_check_pte_set(mm, addr, ptep, pte);
 #if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use 
the
 * helper pte_update() which does an atomic update. We need to do that
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 3083111f9d0a..b5c5718d9b90 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -181,6 +181,8 @@
 #define PAGE_AGP   (PAGE_KERNEL_NC)
 
 #ifndef __ASSEMBLY__
+#include 
+
 /*
  * page table defines
  */
@@ -484,8 +486,11 @@ static inline void huge_ptep_set_wrprotect(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
-   unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-   return __pte(old);
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
@@ -494,11 +499,16 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
pte_t *ptep, int full)
 {
if (full && radix_enabled()) {
+   pte_t old_pte;
+
/*
 * We know that this is a full mm pte clear and
 * hence can be sure there is no parallel set_pte.
 */
-   return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
}
return ptep_get_and_clear(mm, addr, ptep);
 }
@@ -884,6 +894,8 @@ static inline void __set_pte_at(struct mm_struct *mm, 
unsigned long addr,
 */
pte = __pte_raw(pte_raw(pte) | cpu_to_be

[PATCH v2 1/3] powerpc: Add common pud_pfn stub for all platforms

2022-09-23 Thread Rohan McLure
Prior to this commit, pud_pfn was implemented with BUILD_BUG as the inline
function for 64-bit Book3S systems but is never included, as its
invocations in generic code are guarded by calls to pud_devmap which return
zero on such systems. A future patch will provide support for page table
checks, the generic code for which depends on a pud_pfn stub being
implemented, even while the patch will not interact with puds directly.

Remove the 64-bit Book3S stub and define pud_pfn to warn on all
platforms. pud_pfn may be defined properly on a per-platform basis
should it grow real usages in future.

Signed-off-by: Rohan McLure 
---
V2: Remove conditional BUILD_BUG and BUG. Instead warn on usage.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 --
 arch/powerpc/include/asm/pgtable.h   | 14 ++
 2 files changed, 14 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 486902aff040..f9aefa492df0 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -1413,16 +1413,6 @@ static inline int pgd_devmap(pgd_t pgd)
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
 
-static inline int pud_pfn(pud_t pud)
-{
-   /*
-* Currently all calls to pud_pfn() are gated around a pud_devmap()
-* check so this should never be used. If it grows another user we
-* want to know about it.
-*/
-   BUILD_BUG();
-   return 0;
-}
 #define __HAVE_ARCH_PTEP_MODIFY_PROT_TRANSACTION
 pte_t ptep_modify_prot_start(struct vm_area_struct *, unsigned long, pte_t *);
 void ptep_modify_prot_commit(struct vm_area_struct *, unsigned long,
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 33f4bf8d22b0..36956fb440e1 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -158,6 +158,20 @@ struct seq_file;
 void arch_report_meminfo(struct seq_file *m);
 #endif /* CONFIG_PPC64 */
 
+/*
+ * Currently only consumed by page_table_check_pud_{set,clear}. Since clears
+ * and sets to page table entries at any level are done through
+ * page_table_check_pte_{set,clear}, provide stub implementation.
+ */
+#ifndef pud_pfn
+#define pud_pfn pud_pfn
+static inline int pud_pfn(pud_t pud)
+{
+   WARN(1, "pud: platform does not use pud entries directly");
+   return 0;
+}
+#endif
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.34.1



[PATCH v2 2/3] powerpc: mm: add p{te,md,ud}_user_accessible_page helpers

2022-09-23 Thread Rohan McLure
Add the following helpers for detecting whether a page table entry
is a leaf and is accessible to user space.

 * pte_user_accessible_page
 * pmd_user_accessible_page
 * pud_user_accessible_page

Also implement missing pud_user definitions for both Book3S/E 64-bit
systems.

Signed-off-by: Rohan McLure 
---
V2: Provide missing pud_user implementations, use p{u,m}d_is_leaf.
---
 arch/powerpc/include/asm/book3s/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/nohash/64/pgtable.h | 10 ++
 arch/powerpc/include/asm/pgtable.h   | 33 ++
 3 files changed, 53 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index f9aefa492df0..3083111f9d0a 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -621,6 +621,16 @@ static inline bool pte_user(pte_t pte)
return !(pte_raw(pte) & cpu_to_be64(_PAGE_PRIVILEGED));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return !(pmd_raw(pmd) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
+static inline bool pud_user(pud_t pud)
+{
+   return !(pud_raw(pud) & cpu_to_be64(_PAGE_PRIVILEGED));
+}
+
 #define pte_access_permitted pte_access_permitted
 static inline bool pte_access_permitted(pte_t pte, bool write)
 {
diff --git a/arch/powerpc/include/asm/nohash/64/pgtable.h 
b/arch/powerpc/include/asm/nohash/64/pgtable.h
index 599921cc257e..23c5135178d1 100644
--- a/arch/powerpc/include/asm/nohash/64/pgtable.h
+++ b/arch/powerpc/include/asm/nohash/64/pgtable.h
@@ -123,6 +123,11 @@ static inline pte_t pmd_pte(pmd_t pmd)
return __pte(pmd_val(pmd));
 }
 
+static inline bool pmd_user(pmd_t pmd)
+{
+   return (pmd_val(pmd) & _PAGE_USER) == _PAGE_USER;
+}
+
 #define pmd_none(pmd)  (!pmd_val(pmd))
 #definepmd_bad(pmd)(!is_kernel_addr(pmd_val(pmd)) \
 || (pmd_val(pmd) & PMD_BAD_BITS))
@@ -158,6 +163,11 @@ static inline pte_t pud_pte(pud_t pud)
return __pte(pud_val(pud));
 }
 
+static inline bool pud_user(pud_t pud)
+{
+   return (pud_val(pud) & _PAGE_USER) == _PAGE_USER;
+}
+
 static inline pud_t pte_pud(pte_t pte)
 {
return __pud(pte_val(pte));
diff --git a/arch/powerpc/include/asm/pgtable.h 
b/arch/powerpc/include/asm/pgtable.h
index 36956fb440e1..69eed4e03da0 100644
--- a/arch/powerpc/include/asm/pgtable.h
+++ b/arch/powerpc/include/asm/pgtable.h
@@ -172,6 +172,39 @@ static inline int pud_pfn(pud_t pud)
 }
 #endif
 
+static inline bool pte_user_accessible_page(pte_t pte)
+{
+   return pte_present(pte) && pte_user(pte);
+}
+
+#ifdef CONFIG_PPC64
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+   return pmd_is_leaf(pmd) && pmd_present(pmd) && pmd_user(pmd);
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+   return pud_is_leaf(pud) && pud_present(pud) && pud_user(pud);
+}
+
+#else
+
+static inline bool pmd_user_accessible_page(pmd_t pmd)
+{
+   WARN(1, "pmd: multi-level paging unsupported on ppc32");
+   return false;
+}
+
+static inline bool pud_user_accessible_page(pud_t pud)
+{
+   WARN(1, "pud: multi-level paging unsupported on ppc32");
+   return false;
+}
+
+#endif /* CONFIG_PPC64 */
+
 #endif /* __ASSEMBLY__ */
 
 #endif /* _ASM_POWERPC_PGTABLE_H */
-- 
2.34.1



[PATCH v2 3/3] powerpc: mm: support page table check

2022-09-23 Thread Rohan McLure
On creation and clearing of a page table mapping, instrument such calls
by invoking page_table_check_pte_set and page_table_check_pte_clear
respectively. These calls serve as a sanity check against illegal
mappings.

Enable ARCH_SUPPORTS_PAGE_TABLE_CHECK for all ppc64, and 32-bit
platforms implementing Book3S.

Change pud_pfn to be a runtime bug rather than a build bug as it is
consumed by page_table_check_pud_{clear,set} which are not called.

See also:

riscv support in commit 3fee229a8eb9 ("riscv/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
arm64 in commit 42b2547137f5 ("arm64/mm: enable
ARCH_SUPPORTS_PAGE_TABLE_CHECK")
x86_64 in commit d283d422c6c4 ("x86: mm: add x86_64 support for page table
check")

Signed-off-by: Rohan McLure 
---
V2: Update spacing and types assigned to pte_update calls.
---
 arch/powerpc/Kconfig |  1 +
 arch/powerpc/include/asm/book3s/32/pgtable.h |  9 -
 arch/powerpc/include/asm/book3s/64/pgtable.h | 18 +++---
 arch/powerpc/include/asm/nohash/32/pgtable.h |  7 ++-
 arch/powerpc/include/asm/nohash/64/pgtable.h |  8 ++--
 arch/powerpc/include/asm/nohash/pgtable.h|  1 +
 6 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4c466acdc70d..6c213ac46a92 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -149,6 +149,7 @@ config PPC
select ARCH_STACKWALK
select ARCH_SUPPORTS_ATOMIC_RMW
select ARCH_SUPPORTS_DEBUG_PAGEALLOCif PPC_BOOK3S || PPC_8xx || 40x
+   select ARCH_SUPPORTS_PAGE_TABLE_CHECK
select ARCH_USE_BUILTIN_BSWAP
select ARCH_USE_CMPXCHG_LOCKREF if PPC64
select ARCH_USE_MEMTEST
diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h 
b/arch/powerpc/include/asm/book3s/32/pgtable.h
index 40041ac713d9..e76aca557d48 100644
--- a/arch/powerpc/include/asm/book3s/32/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/32/pgtable.h
@@ -53,6 +53,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include 
+
 static inline bool pte_user(pte_t pte)
 {
return pte_val(pte) & _PAGE_USER;
@@ -353,7 +355,11 @@ static inline int __ptep_test_and_clear_young(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm, unsigned long 
addr,
   pte_t *ptep)
 {
-   return __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~_PAGE_HASHPTE, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_SET_WRPROTECT
@@ -541,6 +547,7 @@ static inline pte_t pte_modify(pte_t pte, pgprot_t newprot)
 static inline void __set_pte_at(struct mm_struct *mm, unsigned long addr,
pte_t *ptep, pte_t pte, int percpu)
 {
+   page_table_check_pte_set(mm, addr, ptep, pte);
 #if defined(CONFIG_SMP) && !defined(CONFIG_PTE_64BIT)
/* First case is 32-bit Hash MMU in SMP mode with 32-bit PTEs. We use 
the
 * helper pte_update() which does an atomic update. We need to do that
diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h 
b/arch/powerpc/include/asm/book3s/64/pgtable.h
index 3083111f9d0a..b5c5718d9b90 100644
--- a/arch/powerpc/include/asm/book3s/64/pgtable.h
+++ b/arch/powerpc/include/asm/book3s/64/pgtable.h
@@ -181,6 +181,8 @@
 #define PAGE_AGP   (PAGE_KERNEL_NC)
 
 #ifndef __ASSEMBLY__
+#include 
+
 /*
  * page table defines
  */
@@ -484,8 +486,11 @@ static inline void huge_ptep_set_wrprotect(struct 
mm_struct *mm,
 static inline pte_t ptep_get_and_clear(struct mm_struct *mm,
   unsigned long addr, pte_t *ptep)
 {
-   unsigned long old = pte_update(mm, addr, ptep, ~0UL, 0, 0);
-   return __pte(old);
+   pte_t old_pte = __pte(pte_update(mm, addr, ptep, ~0UL, 0, 0));
+
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
 }
 
 #define __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL
@@ -494,11 +499,16 @@ static inline pte_t ptep_get_and_clear_full(struct 
mm_struct *mm,
pte_t *ptep, int full)
 {
if (full && radix_enabled()) {
+   pte_t old_pte;
+
/*
 * We know that this is a full mm pte clear and
 * hence can be sure there is no parallel set_pte.
 */
-   return radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   old_pte = radix__ptep_get_and_clear_full(mm, addr, ptep, full);
+   page_table_check_pte_clear(mm, addr, old_pte);
+
+   return old_pte;
}
return ptep_get_and_clear(mm, addr, ptep);
 }
@@ -884,6 +894,8 @@ static inline void __set_pte_at(struct mm_struct *mm, 
unsigned long addr,
 */
pte = __pte_raw(pte_raw(pte) | cpu_to_be64(_PAGE_PTE));
 
+ 

[PATCH v6 25/25] powerpc/64e: Clear gprs on interrupt routine entry on Book3E

2022-09-21 Thread Rohan McLure
Zero GPRS r14-r31 on entry into the kernel for interrupt sources to
limit influence of user-space values in potential speculation gadgets.
Prior to this commit, all other GPRS are reassigned during the common
prologue to interrupt handlers and so need not be zeroised explicitly.

This may be done safely, without loss of register state prior to the
interrupt, as the common prologue saves the initial values of
non-volatiles, which are unconditionally restored in interrupt_64.S.
Mitigation defaults to enabled by INTERRUPT_SANITIZE_REGISTERS.

Signed-off-by: Rohan McLure 
---
V4: New patch.
V5: Depend on Kconfig option. Remove ZEROIZE_NVGPRS on bad kernel
stack handler.
V6: Change name of zeroize macro to be consistent with Book3S.
---
 arch/powerpc/kernel/exceptions-64e.S | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64e.S 
b/arch/powerpc/kernel/exceptions-64e.S
index f1d714acc945..d55c0a368dcb 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -366,6 +366,11 @@ ret_from_mc_except:
std r14,PACA_EXMC+EX_R14(r13);  \
std r15,PACA_EXMC+EX_R15(r13)
 
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_ZEROIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#else
+#define SANITIZE_ZEROIZE_NVGPRS()
+#endif
 
 /* Core exception code for all exceptions except TLB misses. */
 #define EXCEPTION_COMMON_LVL(n, scratch, excf) \
@@ -402,7 +407,8 @@ exc_##n##_common:   
\
std r12,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */   \
std r3,_TRAP(r1);   /* set trap number  */  \
std r0,RESULT(r1);  /* clear regs->result */\
-   SAVE_NVGPRS(r1);
+   SAVE_NVGPRS(r1);\
+   SANITIZE_ZEROIZE_NVGPRS();  /* minimise speculation influence */
 
 #define EXCEPTION_COMMON(n) \
EXCEPTION_COMMON_LVL(n, SPRN_SPRG_GEN_SCRATCH, PACA_EXGEN)
-- 
2.34.1



[PATCH v6 24/25] powerpc/64s: Clear gprs on interrupt routine entry in Book3S

2022-09-21 Thread Rohan McLure
Zero GPRS r0, r2-r11, r14-r31, on entry into the kernel for all
other interrupt sources to limit influence of user-space values
in potential speculation gadgets. The remaining gprs are overwritten by
entry macros to interrupt handlers, irrespective of whether or not a
given handler consumes these register values.

Prior to this commit, r14-r31 are restored on a per-interrupt basis at
exit, but now they are always restored. Remove explicit REST_NVGPRS
invocations as non-volatiles must now always be restored. 32-bit systems
do not clear user registers on interrupt, and continue to depend on the
return value of interrupt_exit_user_prepare to determine whether or not
to restore non-volatiles.

The mmap_bench benchmark in selftests should rapidly invoke pagefaults.
See ~0.8% performance regression with this mitigation, but this
indicates the worst-case performance due to heavier-weight interrupt
handlers. This mitigation is disabled by default, but enabled with
CONFIG_INTERRUPT_SANITIZE_REGISTERS.

Signed-off-by: Rohan McLure 
---
V2: Add benchmark data
V3: Use ZEROIZE_GPR{,S} macro renames, clarify
upt_exit_user_prepare changes in summary.
V5: Configurable now with INTERRUPT_SANITIZE_REGISTERS. Zero r12
(containing MSR) from common macro on per-interrupt basis with IOPTION.
V6: Replace ifdefs with invocations of conditionally defined macros.
---
 arch/powerpc/kernel/exceptions-64s.S | 47 +-
 arch/powerpc/kernel/interrupt_64.S   | 15 
 2 files changed, 53 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index a3b51441b039..b3f5ef1c712f 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,6 +21,19 @@
 #include 
 #include 
 
+/*
+ * macros for handling user register sanitisation
+ */
+#ifdef CONFIG_INTERRUPT_SANITIZE_REGISTERS
+#define SANITIZE_ZEROIZE_NVGPRS()  ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()  REST_NVGPRS(r1)
+#define HANDLER_RESTORE_NVGPRS()
+#else
+#define SANITIZE_ZEROIZE_NVGPRS()
+#define SANITIZE_RESTORE_NVGPRS()
+#define HANDLER_RESTORE_NVGPRS()   REST_NVGPRS(r1)
+#endif /* CONFIG_INTERRUPT_SANITIZE_REGISTERS */
+
 /*
  * Following are fixed section helper macros.
  *
@@ -111,6 +124,7 @@ name:
 #define ISTACK .L_ISTACK_\name\()  /* Set regular kernel stack */
 #define __ISTACK(name) .L_ISTACK_ ## name
 #define IKUAP  .L_IKUAP_\name\()   /* Do KUAP lock */
+#define IMSR_R12   .L_IMSR_R12_\name\()/* Assumes MSR saved to r12 */
 
 #define INT_DEFINE_BEGIN(n)\
 .macro int_define_ ## n name
@@ -176,6 +190,9 @@ do_define_int n
.ifndef IKUAP
IKUAP=1
.endif
+   .ifndef IMSR_R12
+   IMSR_R12=0
+   .endif
 .endm
 
 /*
@@ -502,6 +519,7 @@ DEFINE_FIXED_SYMBOL(\name\()_common_real, text)
std r10,0(r1)   /* make stack chain pointer */
std r0,GPR0(r1) /* save r0 in stackframe*/
std r10,GPR1(r1)/* save r1 in stackframe*/
+   ZEROIZE_GPR(0)
 
/* Mark our [H]SRRs valid for return */
li  r10,1
@@ -544,8 +562,14 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
std r9,GPR11(r1)
std r10,GPR12(r1)
std r11,GPR13(r1)
+   .if !IMSR_R12
+   ZEROIZE_GPRS(9, 12)
+   .else
+   ZEROIZE_GPRS(9, 11)
+   .endif
 
SAVE_NVGPRS(r1)
+   SANITIZE_ZEROIZE_NVGPRS()
 
.if IDAR
.if IISIDE
@@ -577,8 +601,8 @@ BEGIN_FTR_SECTION
 END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
ld  r10,IAREA+EX_CTR(r13)
std r10,_CTR(r1)
-   std r2,GPR2(r1) /* save r2 in stackframe*/
-   SAVE_GPRS(3, 8, r1) /* save r3 - r8 in stackframe   */
+   SAVE_GPRS(2, 8, r1) /* save r2 - r8 in stackframe   */
+   ZEROIZE_GPRS(2, 8)
mflrr9  /* Get LR, later save to stack  */
ld  r2,PACATOC(r13) /* get kernel TOC into r2   */
std r9,_LINK(r1)
@@ -696,6 +720,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
mtlrr9
ld  r9,_CCR(r1)
mtcrr9
+   SANITIZE_RESTORE_NVGPRS()
REST_GPRS(2, 13, r1)
REST_GPR(0, r1)
/* restore original r1. */
@@ -1372,7 +1397,7 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 * do_break() may have changed the NV GPRS while handling a breakpoint.
 * If so, we need to restore them with their updated values.
 */
-   REST_NVGPRS(r1)
+   HANDLER_RESTORE_NVGPRS()
b   interrupt_return_srr
 
 
@@ -1598,7 +1623,7 @@ EXC_COMMON_BEGIN(alignment_common)
GEN_COMMON alignment
addir3,r1,STACK_FRAME_OVERHEAD
bl  alignment_exception
-   REST_NVGPRS(r1) /* instruction emulation may change GPRs

[PATCH v6 23/25] powerpc/64: Add INTERRUPT_SANITIZE_REGISTERS Kconfig

2022-09-21 Thread Rohan McLure
Add Kconfig option for enabling clearing of registers on arrival in an
interrupt handler. This reduces the speculation influence of registers
on kernel internals. The option will be consumed by 64-bit systems that
feature speculation and wish to implement this mitigation.

This patch only introduces the Kconfig option, no actual mitigations.

The primary overhead of this mitigation lies in an increased number of
registers that must be saved and restored by interrupt handlers on
Book3S systems. Enable by default on Book3E systems, which prior to
this patch eagerly save and restore register state, meaning that the
mitigation when implemented will have minimal overhead.

Signed-off-by: Rohan McLure 
Acked-by: Nicholas Piggin 
---
V5: New patch
---
 arch/powerpc/Kconfig | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index ef6c83e79c9b..a643ebd83349 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -528,6 +528,15 @@ config HOTPLUG_CPU
 
  Say N if you are unsure.
 
+config INTERRUPT_SANITIZE_REGISTERS
+   bool "Clear gprs on interrupt arrival"
+   depends on PPC64 && ARCH_HAS_SYSCALL_WRAPPER
+   default PPC_BOOK3E_64
+   help
+ Reduce the influence of user register state on interrupt handlers and
+ syscalls through clearing user state from registers before handling
+ the exception.
+
 config PPC_QUEUED_SPINLOCKS
bool "Queued spinlocks" if EXPERT
depends on SMP
-- 
2.34.1



[PATCH v6 22/25] powerpc/64s: Clear user GPRs in syscall interrupt entry

2022-09-21 Thread Rohan McLure
Clear user state in gprs (assign to zero) to reduce the influence of user
registers on speculation within kernel syscall handlers. Clears occur
at the very beginning of the sc and scv 0 interrupt handlers, with
restores occurring following the execution of the syscall handler.

Signed-off-by: Rohan McLure 
---
V2: Update summary
V3: Remove erroneous summary paragraph on syscall_exit_prepare
V4: Use ZEROIZE instead of NULLIFY. Clear r0 also.
V5: Move to end of patch series.
V6: Include clears which were previously in the syscall wrapper patch.
Move comment on r3-r8 register save to when we alter the calling
convention for system_call_exception.
---
 arch/powerpc/kernel/interrupt_64.S | 17 -
 1 file changed, 16 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index a5dd78bdbe6d..40147558e1a6 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -106,6 +106,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 * but this is the best we can do.
 */
 
+   /*
+* Zero user registers to prevent influencing speculative execution
+* state of kernel code.
+*/
+   ZEROIZE_GPR(0)
+   ZEROIZE_GPRS(5, 12)
+   ZEROIZE_NVGPRS()
bl  system_call_exception
 
 .Lsyscall_vectored_\name\()_exit:
@@ -134,6 +141,7 @@ BEGIN_FTR_SECTION
HMT_MEDIUM_LOW
 END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 
+   REST_NVGPRS(r1)
cmpdi   r3,0
bne .Lsyscall_vectored_\name\()_restore_regs
 
@@ -285,6 +293,13 @@ END_BTB_FLUSH_SECTION
wrteei  1
 #endif
 
+   /*
+* Zero user registers to prevent influencing speculative execution
+* state of kernel code.
+*/
+   ZEROIZE_GPR(0)
+   ZEROIZE_GPRS(5, 12)
+   ZEROIZE_NVGPRS()
bl  system_call_exception
 
 .Lsyscall_exit:
@@ -325,6 +340,7 @@ BEGIN_FTR_SECTION
stdcx.  r0,0,r1 /* to clear the reservation */
 END_FTR_SECTION_IFCLR(CPU_FTR_STCX_CHECKS_ADDRESS)
 
+   REST_NVGPRS(r1)
cmpdi   r3,0
bne .Lsyscall_restore_regs
/* Zero volatile regs that may contain sensitive kernel data */
@@ -352,7 +368,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 .Lsyscall_restore_regs:
ld  r3,_CTR(r1)
ld  r4,_XER(r1)
-   REST_NVGPRS(r1)
mtctr   r3
mtspr   SPRN_XER,r4
REST_GPR(0, r1)
-- 
2.34.1



[PATCH v6 21/25] powerpc: Provide syscall wrapper

2022-09-21 Thread Rohan McLure
Implement syscall wrapper as per s390, x86, arm64. When enabled
cause handlers to accept parameters from a stack frame rather than
from user scratch register state. This allows for user registers to be
safely cleared in order to reduce caller influence on speculation
within syscall routine. The wrapper is a macro that emits syscall
handler symbols that call into the target handler, obtaining its
parameters from a struct pt_regs on the stack.

As registers are already saved to the stack prior to calling
system_call_exception, it appears that this function is executed more
efficiently with the new stack-pointer convention than with parameters
passed by registers, avoiding the allocation of a stack frame for this
method. On a 32-bit system, we see >20% performance increases on the
null_syscall microbenchmark, and on a Power 8 the performance gains
amortise the cost of clearing and restoring registers which is
implemented at the end of this series, seeing final result of ~5.6%
performance improvement on null_syscall.

Syscalls are wrapped in this fashion on all platforms except for the
Cell processor as this commit does not provide SPU support. This can be
quickly fixed in a successive patch, but requires spu_sys_callback to
allocate a pt_regs structure to satisfy the wrapped calling convention.

Co-developed-by: Andrew Donnellan 
Signed-off-by: Andrew Donnellan 
Signed-off-by: Rohan McLure 
---
V2: Generate prototypes for symbols produced by the wrapper.
V3: Rebased to remove conflict with 1547db7d1f44
("powerpc: Move system_call_exception() to syscall.c"). Also remove copy
from gpr3 save slot on stackframe to orig_r3's slot. Fix whitespace with
preprocessor defines in system_call_exception.
V5: Move systbl.c syscall wrapper support to this patch. Swap
calling convention for system_call_exception to be (, r0)
V6: Change calling convention for system_call_exception in another
patch.
---
 arch/powerpc/Kconfig   |  1 +
 arch/powerpc/include/asm/syscall.h |  4 +
 arch/powerpc/include/asm/syscall_wrapper.h | 84 
 arch/powerpc/include/asm/syscalls.h| 30 ++-
 arch/powerpc/kernel/syscall.c  |  7 +-
 arch/powerpc/kernel/systbl.c   |  7 ++
 arch/powerpc/kernel/vdso.c |  2 +
 7 files changed, 132 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4c466acdc70d..ef6c83e79c9b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -137,6 +137,7 @@ config PPC
select ARCH_HAS_STRICT_KERNEL_RWX   if (PPC_BOOK3S || PPC_8xx || 
40x) && !HIBERNATION
select ARCH_HAS_STRICT_KERNEL_RWX   if FSL_BOOKE && !HIBERNATION && 
!RANDOMIZE_BASE
select ARCH_HAS_STRICT_MODULE_RWX   if ARCH_HAS_STRICT_KERNEL_RWX
+   select ARCH_HAS_SYSCALL_WRAPPER if !SPU_BASE
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
select ARCH_HAS_UBSAN_SANITIZE_ALL
diff --git a/arch/powerpc/include/asm/syscall.h 
b/arch/powerpc/include/asm/syscall.h
index d2a8dfd5de33..3dd36c5e334a 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -14,8 +14,12 @@
 #include 
 #include 
 
+#ifdef CONFIG_ARCH_HAS_SYSCALL_WRAPPER
+typedef long (*syscall_fn)(const struct pt_regs *);
+#else
 typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long,
   unsigned long, unsigned long, unsigned long);
+#endif
 
 /* ftrace syscalls requires exporting the sys_call_table */
 extern const syscall_fn sys_call_table[];
diff --git a/arch/powerpc/include/asm/syscall_wrapper.h 
b/arch/powerpc/include/asm/syscall_wrapper.h
new file mode 100644
index ..91bcfa40f740
--- /dev/null
+++ b/arch/powerpc/include/asm/syscall_wrapper.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * syscall_wrapper.h - powerpc specific wrappers to syscall definitions
+ *
+ * Based on arch/{x86,arm64}/include/asm/syscall_wrapper.h
+ */
+
+#ifndef __ASM_SYSCALL_WRAPPER_H
+#define __ASM_SYSCALL_WRAPPER_H
+
+struct pt_regs;
+
+#define SC_POWERPC_REGS_TO_ARGS(x, ...)\
+   __MAP(x,__SC_ARGS   \
+ ,,regs->gpr[3],,regs->gpr[4],,regs->gpr[5]\
+ ,,regs->gpr[6],,regs->gpr[7],,regs->gpr[8])
+
+#ifdef CONFIG_COMPAT
+
+#define COMPAT_SYSCALL_DEFINEx(x, name, ...)   
\
+   long __powerpc_compat_sys##name(const struct pt_regs *regs);
\
+   ALLOW_ERROR_INJECTION(__powerpc_compat_sys##name, ERRNO);   
\
+   static long __se_compat_sys##name(__MAP(x,__SC_LONG,__VA_ARGS__));  
\
+   static inline long 
__do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__));   \
+   lon

[PATCH v6 20/25] powerpc: Change system_call_exception calling convention

2022-09-21 Thread Rohan McLure
Change system_call_exception arguments to pass a pointer to a stack frame
container caller state, as well as the original r0, which determines the
number of the syscall. This has been observed to yield improved performance
to passing them by registers, circumventing the need to allocate a stack frame.

Signed-off-by: Rohan McLure 
---
V6: Split off from syscall wrapper patch.
---
 arch/powerpc/include/asm/interrupt.h |  3 +--
 arch/powerpc/kernel/entry_32.S   |  6 +++---
 arch/powerpc/kernel/interrupt_64.S   | 20 ++--
 arch/powerpc/kernel/syscall.c| 10 +-
 4 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 8069dbc4b8d1..48eec9cd1429 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -665,8 +665,7 @@ static inline void interrupt_cond_local_irq_enable(struct 
pt_regs *regs)
local_irq_enable();
 }
 
-long system_call_exception(long r3, long r4, long r5, long r6, long r7, long 
r8,
-  unsigned long r0, struct pt_regs *regs);
+long system_call_exception(struct pt_regs *regs, unsigned long r0);
 notrace unsigned long syscall_exit_prepare(unsigned long r3, struct pt_regs 
*regs, long scv);
 notrace unsigned long interrupt_exit_user_prepare(struct pt_regs *regs);
 notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs);
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index e4b694cebc44..96782aa72083 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -122,9 +122,9 @@ transfer_to_syscall:
SAVE_NVGPRS(r1)
kuep_lock
 
-   /* Calling convention has r9 = orig r0, r10 = regs */
-   addir10,r1,STACK_FRAME_OVERHEAD
-   mr  r9,r0
+   /* Calling convention has r3 = regs, r4 = orig r0 */
+   addir3,r1,STACK_FRAME_OVERHEAD
+   mr  r4,r0
bl  system_call_exception
 
 ret_from_syscall:
diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index 7d92a7a54727..a5dd78bdbe6d 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -70,7 +70,7 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
ld  r2,PACATOC(r13)
mfcrr12
li  r11,0
-   /* Can we avoid saving r3-r8 in common case? */
+   /* Save syscall parameters in r3-r8 */
SAVE_GPRS(3, 8, r1)
/* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
@@ -87,9 +87,11 @@ _ASM_NOKPROBE_SYMBOL(system_call_vectored_\name)
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
-   addir10,r1,STACK_FRAME_OVERHEAD
+   /* Calling convention has r3 = regs, r4 = orig r0 */
+   addir3,r1,STACK_FRAME_OVERHEAD
+   mr  r4,r0
ld  r11,exception_marker@toc(r2)
-   std r11,-16(r10)/* "regshere" marker */
+   std r11,-16(r3) /* "regshere" marker */
 
 BEGIN_FTR_SECTION
HMT_MEDIUM
@@ -104,8 +106,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_HAS_PPR)
 * but this is the best we can do.
 */
 
-   /* Calling convention has r9 = orig r0, r10 = regs */
-   mr  r9,r0
bl  system_call_exception
 
 .Lsyscall_vectored_\name\()_exit:
@@ -237,7 +237,7 @@ END_BTB_FLUSH_SECTION
ld  r2,PACATOC(r13)
mfcrr12
li  r11,0
-   /* Can we avoid saving r3-r8 in common case? */
+   /* Save syscall parameters in r3-r8 */
SAVE_GPRS(3, 8, r1)
/* Zero r9-r12, this should only be required when restoring all GPRs */
std r11,GPR9(r1)
@@ -260,9 +260,11 @@ END_BTB_FLUSH_SECTION
std r11,_TRAP(r1)
std r12,_CCR(r1)
std r3,ORIG_GPR3(r1)
-   addir10,r1,STACK_FRAME_OVERHEAD
+   /* Calling convention has r3 = regs, r4 = orig r0 */
+   addir3,r1,STACK_FRAME_OVERHEAD
+   mr  r4,r0
ld  r11,exception_marker@toc(r2)
-   std r11,-16(r10)/* "regshere" marker */
+   std r11,-16(r3) /* "regshere" marker */
 
 #ifdef CONFIG_PPC_BOOK3S
li  r11,1
@@ -283,8 +285,6 @@ END_BTB_FLUSH_SECTION
wrteei  1
 #endif
 
-   /* Calling convention has r9 = orig r0, r10 = regs */
-   mr  r9,r0
bl  system_call_exception
 
 .Lsyscall_exit:
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 15af0ed019a7..0e9ba3efee94 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -13,9 +13,7 @@
 
 
 /* Has to run notrace because it is entered not completely "reconciled" */
-notrace long system_call_exception(long r3, long r4, long r5,
- 

[PATCH v6 19/25] powerpc: Remove high-order word clearing on compat syscall entry

2022-09-21 Thread Rohan McLure
Remove explicit clearing of the high order-word of user parameters when
handling compatibility syscalls in system_call_exception. The
COMPAT_SYSCALL_DEFINEx macros handle this clearing through an
explicit cast to the signature type of the target handler.

Signed-off-by: Rohan McLure 
Reported-by: Nicholas Piggin 
---
V6: New patch
---
 arch/powerpc/kernel/syscall.c | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 9875486f6168..15af0ed019a7 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -157,14 +157,6 @@ notrace long system_call_exception(long r3, long r4, long 
r5,
 
if (unlikely(is_compat_task())) {
f = (void *)compat_sys_call_table[r0];
-
-   r3 &= 0xULL;
-   r4 &= 0xULL;
-   r5 &= 0xULL;
-   r6 &= 0xULL;
-   r7 &= 0xULL;
-   r8 &= 0xULL;
-
} else {
f = (void *)sys_call_table[r0];
}
-- 
2.34.1



[PATCH v6 18/25] powerpc: Use common syscall handler type

2022-09-21 Thread Rohan McLure
Cause syscall handlers to be typed as follows when called indirectly
throughout the kernel. This is to allow for better type checking.

typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long,
   unsigned long, unsigned long, unsigned long);

Since both 32 and 64-bit abis allow for at least the first six
machine-word length parameters to a function to be passed by registers,
even handlers which admit fewer than six parameters may be viewed as
having the above type.

Coercing syscalls to syscall_fn requires a cast to void* to avoid
-Wcast-function-type.

Fixup comparisons in VDSO to avoid pointer-integer comparison. Introduce
explicit cast on systems with SPUs.

Signed-off-by: Rohan McLure 
Reviewed-by: Nicholas Piggin 
---
V2: New patch.
V3: Remove unnecessary cast from const syscall_fn to syscall_fn
V5: Update patch desctiption.
V6: Remove syscall_fn typedef in syscall.c. Comment void* cast.
---
 arch/powerpc/include/asm/syscall.h  |  7 +--
 arch/powerpc/include/asm/syscalls.h |  1 +
 arch/powerpc/kernel/syscall.c   |  2 --
 arch/powerpc/kernel/systbl.c| 11 ---
 arch/powerpc/kernel/vdso.c  |  4 ++--
 arch/powerpc/platforms/cell/spu_callbacks.c |  6 +++---
 6 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/include/asm/syscall.h 
b/arch/powerpc/include/asm/syscall.h
index 25fc8ad9a27a..d2a8dfd5de33 100644
--- a/arch/powerpc/include/asm/syscall.h
+++ b/arch/powerpc/include/asm/syscall.h
@@ -14,9 +14,12 @@
 #include 
 #include 
 
+typedef long (*syscall_fn)(unsigned long, unsigned long, unsigned long,
+  unsigned long, unsigned long, unsigned long);
+
 /* ftrace syscalls requires exporting the sys_call_table */
-extern const unsigned long sys_call_table[];
-extern const unsigned long compat_sys_call_table[];
+extern const syscall_fn sys_call_table[];
+extern const syscall_fn compat_sys_call_table[];
 
 static inline int syscall_get_nr(struct task_struct *task, struct pt_regs 
*regs)
 {
diff --git a/arch/powerpc/include/asm/syscalls.h 
b/arch/powerpc/include/asm/syscalls.h
index 5d106acf7906..cc87168d6ecb 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -8,6 +8,7 @@
 #include 
 #include 
 
+#include 
 #ifdef CONFIG_PPC64
 #include 
 #endif
diff --git a/arch/powerpc/kernel/syscall.c b/arch/powerpc/kernel/syscall.c
index 64102a64fd84..9875486f6168 100644
--- a/arch/powerpc/kernel/syscall.c
+++ b/arch/powerpc/kernel/syscall.c
@@ -12,8 +12,6 @@
 #include 
 
 
-typedef long (*syscall_fn)(long, long, long, long, long, long);
-
 /* Has to run notrace because it is entered not completely "reconciled" */
 notrace long system_call_exception(long r3, long r4, long r5,
   long r6, long r7, long r8,
diff --git a/arch/powerpc/kernel/systbl.c b/arch/powerpc/kernel/systbl.c
index ce52bd2ec292..d64dfafc2e5e 100644
--- a/arch/powerpc/kernel/systbl.c
+++ b/arch/powerpc/kernel/systbl.c
@@ -16,9 +16,14 @@
 #include 
 
 #define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
-#define __SYSCALL(nr, entry) [nr] = (unsigned long) ,
 
-const unsigned long sys_call_table[] = {
+/*
+ * Coerce syscall handlers with arbitrary parameters to common type
+ * requires cast to void* to avoid -Wcast-function-type.
+ */
+#define __SYSCALL(nr, entry) [nr] = (void *) entry,
+
+const syscall_fn sys_call_table[] = {
 #ifdef CONFIG_PPC64
 #include 
 #else
@@ -29,7 +34,7 @@ const unsigned long sys_call_table[] = {
 #ifdef CONFIG_COMPAT
 #undef __SYSCALL_WITH_COMPAT
 #define __SYSCALL_WITH_COMPAT(nr, native, compat)  __SYSCALL(nr, compat)
-const unsigned long compat_sys_call_table[] = {
+const syscall_fn compat_sys_call_table[] = {
 #include 
 };
 #endif /* CONFIG_COMPAT */
diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c
index bf9574ec26ce..fcca06d200d3 100644
--- a/arch/powerpc/kernel/vdso.c
+++ b/arch/powerpc/kernel/vdso.c
@@ -304,10 +304,10 @@ static void __init vdso_setup_syscall_map(void)
unsigned int i;
 
for (i = 0; i < NR_syscalls; i++) {
-   if (sys_call_table[i] != (unsigned long)_ni_syscall)
+   if (sys_call_table[i] != (void *)_ni_syscall)
vdso_data->syscall_map[i >> 5] |= 0x8000UL >> (i & 
0x1f);
if (IS_ENABLED(CONFIG_COMPAT) &&
-   compat_sys_call_table[i] != (unsigned long)_ni_syscall)
+   compat_sys_call_table[i] != (void *)_ni_syscall)
vdso_data->compat_syscall_map[i >> 5] |= 0x8000UL 
>> (i & 0x1f);
}
 }
diff --git a/arch/powerpc/platforms/cell/spu_callbacks.c 
b/arch/powerpc/platforms/cell/spu_callbacks.c
index fe0d8797a00a..e780c14c5733 100644
--- a/arch/powerpc/platforms/cell/spu_callbacks.c
+++ b/arch/powerpc/platforms/cell/spu_callbacks.c
@@ -34,1

[PATCH v6 17/25] powerpc: Enable compile-time check for syscall handlers

2022-09-21 Thread Rohan McLure
The table of syscall handlers and registered compatibility syscall
handlers has in past been produced using assembly, with function
references resolved at link time. This moves link-time errors to
compile-time, by rewriting systbl.S in C, and including the
linux/syscalls.h, linux/compat.h and asm/syscalls.h headers for
prototypes.

Reported-by: Arnd Bergmann 
Signed-off-by: Rohan McLure 
Reviewed-by: Nicholas Piggin 
---
V2: New patch.
V5: For this patch only, represent handler function pointers as
unsigned long. Remove reference to syscall wrappers. Use asm/syscalls.h
which implies asm/syscall.h
---
 arch/powerpc/kernel/{systbl.S => systbl.c} | 28 
 1 file changed, 11 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/kernel/systbl.S b/arch/powerpc/kernel/systbl.c
similarity index 61%
rename from arch/powerpc/kernel/systbl.S
rename to arch/powerpc/kernel/systbl.c
index 6c1db3b6de2d..ce52bd2ec292 100644
--- a/arch/powerpc/kernel/systbl.S
+++ b/arch/powerpc/kernel/systbl.c
@@ -10,32 +10,26 @@
  * PPC64 updates by Dave Engebretsen (engeb...@us.ibm.com) 
  */
 
-#include 
+#include 
+#include 
+#include 
+#include 
 
-.section .rodata,"a"
+#define __SYSCALL_WITH_COMPAT(nr, entry, compat) __SYSCALL(nr, entry)
+#define __SYSCALL(nr, entry) [nr] = (unsigned long) ,
 
-#ifdef CONFIG_PPC64
-   .p2align3
-#define __SYSCALL(nr, entry)   .8byte entry
-#else
-   .p2align2
-#define __SYSCALL(nr, entry)   .long entry
-#endif
-
-#define __SYSCALL_WITH_COMPAT(nr, native, compat)  __SYSCALL(nr, native)
-.globl sys_call_table
-sys_call_table:
+const unsigned long sys_call_table[] = {
 #ifdef CONFIG_PPC64
 #include 
 #else
 #include 
 #endif
+};
 
 #ifdef CONFIG_COMPAT
 #undef __SYSCALL_WITH_COMPAT
 #define __SYSCALL_WITH_COMPAT(nr, native, compat)  __SYSCALL(nr, compat)
-.globl compat_sys_call_table
-compat_sys_call_table:
-#define compat_sys_sigsuspend  sys_sigsuspend
+const unsigned long compat_sys_call_table[] = {
 #include 
-#endif
+};
+#endif /* CONFIG_COMPAT */
-- 
2.34.1



[PATCH v6 16/25] powerpc: Include all arch-specific syscall prototypes

2022-09-21 Thread Rohan McLure
Forward declare all syscall handler prototypes where a generic prototype
is not provided in either linux/syscalls.h or linux/compat.h in
asm/syscalls.h. This is required for compile-time type-checking for
syscall handlers, which is implemented later in this series.

32-bit compatibility syscall handlers are expressed in terms of types in
ppc32.h. Expose this header globally.

Signed-off-by: Rohan McLure 
Acked-by: Nicholas Piggin 
---
V2: Explicitly include prototypes.
V3: Remove extraneous #include  and ppc_fallocate
prototype. Rename header.
V5: Clean. Elaborate comment on long long munging. Remove
prototype hiding conditional on SYSCALL_WRAPPER.
---
 arch/powerpc/include/asm/syscalls.h  | 97 ++
 .../ppc32.h => include/asm/syscalls_32.h}|  0
 arch/powerpc/kernel/signal_32.c  |  2 +-
 arch/powerpc/perf/callchain_32.c |  2 +-
 4 files changed, 77 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/syscalls.h 
b/arch/powerpc/include/asm/syscalls.h
index 525d2aa0c8ca..5d106acf7906 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -8,6 +8,14 @@
 #include 
 #include 
 
+#ifdef CONFIG_PPC64
+#include 
+#endif
+#include 
+#include 
+
+struct rtas_args;
+
 /*
  * long long munging:
  * The 32 bit ABI passes long longs in an odd even register pair.
@@ -20,44 +28,89 @@
 #define merge_64(high, low) ((u64)high << 32) | low
 #endif
 
-struct rtas_args;
+long sys_ni_syscall(void);
+
+/*
+ * PowerPC architecture-specific syscalls
+ */
+
+long sys_rtas(struct rtas_args __user *uargs);
+
+#ifdef CONFIG_PPC64
+long sys_ppc64_personality(unsigned long personality);
+#ifdef CONFIG_COMPAT
+long compat_sys_ppc64_personality(unsigned long personality);
+#endif /* CONFIG_COMPAT */
+#endif /* CONFIG_PPC64 */
 
+long sys_swapcontext(struct ucontext __user *old_ctx,
+struct ucontext __user *new_ctx, long ctx_size);
 long sys_mmap(unsigned long addr, size_t len,
  unsigned long prot, unsigned long flags,
  unsigned long fd, off_t offset);
 long sys_mmap2(unsigned long addr, size_t len,
   unsigned long prot, unsigned long flags,
   unsigned long fd, unsigned long pgoff);
-long sys_ppc64_personality(unsigned long personality);
-long sys_rtas(struct rtas_args __user *uargs);
-long sys_ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
- u32 len_high, u32 len_low);
+long sys_switch_endian(void);
 
-#ifdef CONFIG_COMPAT
-unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
-  unsigned long prot, unsigned long flags,
-  unsigned long fd, unsigned long pgoff);
-
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, 
compat_size_t count,
- u32 reg6, u32 pos1, u32 pos2);
+#ifdef CONFIG_PPC32
+long sys_sigreturn(void);
+long sys_debug_setcontext(struct ucontext __user *ctx, int ndbg,
+ struct sig_dbg_op __user *dbg);
+#endif
 
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, 
compat_size_t count,
-  u32 reg6, u32 pos1, u32 pos2);
+long sys_rt_sigreturn(void);
 
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, 
u32 count);
+long sys_subpage_prot(unsigned long addr,
+ unsigned long len, u32 __user *map);
 
-int compat_sys_truncate64(const char __user *path, u32 reg4,
- unsigned long len1, unsigned long len2);
+#ifdef CONFIG_COMPAT
+long compat_sys_swapcontext(struct ucontext32 __user *old_ctx,
+   struct ucontext32 __user *new_ctx,
+   int ctx_size);
+long compat_sys_old_getrlimit(unsigned int resource,
+ struct compat_rlimit __user *rlim);
+long compat_sys_sigreturn(void);
+long compat_sys_rt_sigreturn(void);
+#endif /* CONFIG_COMPAT */
 
-int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
-  unsigned long len2);
+/*
+ * Architecture specific signatures required by long long munging:
+ * The 32 bit ABI passes long longs in an odd even register pair.
+ * The following signatures provide a machine long parameter for
+ * each register that will be supplied. The implementation is
+ * responsible for combining parameter pairs.
+ */
 
+#ifdef CONFIG_COMPAT
+long compat_sys_mmap2(unsigned long addr, size_t len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff);
+long compat_sys_ppc_pread64(unsigned int fd,
+   char __user *ubuf, compat_size_t count,
+   u32 reg6, u32 pos1, u32 pos2);
+long compat_sys_ppc_pwrite64(unsigned int fd,
+const char __user *ubuf, compat_siz

[PATCH v6 15/25] powerpc: Adopt SYSCALL_DEFINE for arch-specific syscall handlers

2022-09-21 Thread Rohan McLure
Arch-specific implementations of syscall handlers are currently used
over generic implementations for the following reasons:

1. Semantics unique to powerpc
2. Compatibility syscalls require 'argument padding' to comply with
   64-bit argument convention in ELF32 abi.
3. Parameter types or order is different in other architectures.

These syscall handlers have been defined prior to this patch series
without invoking the SYSCALL_DEFINE or COMPAT_SYSCALL_DEFINE macros with
custom input and output types. We remove every such direct definition in
favour of the aforementioned macros.

Also update syscalls.tbl in order to refer to the symbol names generated
by each of these macros. Since ppc64_personality can be called by both
64 bit and 32 bit binaries through compatibility, we must generate both
both compat_sys_ and sys_ symbols for this handler.

As an aside:
A number of architectures including arm and powerpc agree on an
alternative argument order and numbering for most of these arch-specific
handlers. A future patch series may allow for asm/unistd.h to signal
through its defines that a generic implementation of these syscall
handlers with the correct calling convention be emitted, through the
__ARCH_WANT_COMPAT_SYS_... convention.

Signed-off-by: Rohan McLure 
Reviewed-by: Nicholas Piggin 
---
V2: All syscall handlers wrapped by this macro.
V3: Move creation of do_ppc64_personality helper to prior patch.
V4: Fix parenthesis alignment. Don't emit sys_*** symbols.
V5: Use 'aside' in the asm-generic rant in commit message.
---
 arch/powerpc/include/asm/syscalls.h  | 10 ++---
 arch/powerpc/kernel/sys_ppc32.c  | 38 +++---
 arch/powerpc/kernel/syscalls.c   | 17 ++--
 arch/powerpc/kernel/syscalls/syscall.tbl | 22 +-
 .../arch/powerpc/entry/syscalls/syscall.tbl  | 22 +-
 5 files changed, 64 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/include/asm/syscalls.h 
b/arch/powerpc/include/asm/syscalls.h
index 20cbd29b1228..525d2aa0c8ca 100644
--- a/arch/powerpc/include/asm/syscalls.h
+++ b/arch/powerpc/include/asm/syscalls.h
@@ -28,10 +28,10 @@ long sys_mmap(unsigned long addr, size_t len,
 long sys_mmap2(unsigned long addr, size_t len,
   unsigned long prot, unsigned long flags,
   unsigned long fd, unsigned long pgoff);
-long ppc64_personality(unsigned long personality);
+long sys_ppc64_personality(unsigned long personality);
 long sys_rtas(struct rtas_args __user *uargs);
-long ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
- u32 len_high, u32 len_low);
+long sys_ppc_fadvise64_64(int fd, int advice, u32 offset_high, u32 offset_low,
+ u32 len_high, u32 len_low);
 
 #ifdef CONFIG_COMPAT
 unsigned long compat_sys_mmap2(unsigned long addr, size_t len,
@@ -52,8 +52,8 @@ int compat_sys_truncate64(const char __user *path, u32 reg4,
 int compat_sys_ftruncate64(unsigned int fd, u32 reg4, unsigned long len1,
   unsigned long len2);
 
-long ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
-size_t len, int advice);
+long compat_sys_ppc32_fadvise64(int fd, u32 unused, u32 offset1, u32 offset2,
+   size_t len, int advice);
 
 long compat_sys_sync_file_range2(int fd, unsigned int flags,
 unsigned int offset1, unsigned int offset2,
diff --git a/arch/powerpc/kernel/sys_ppc32.c b/arch/powerpc/kernel/sys_ppc32.c
index 776ae7565fc5..dcc3c9fd4cfd 100644
--- a/arch/powerpc/kernel/sys_ppc32.c
+++ b/arch/powerpc/kernel/sys_ppc32.c
@@ -47,45 +47,55 @@
 #include 
 #include 
 
-compat_ssize_t compat_sys_pread64(unsigned int fd, char __user *ubuf, 
compat_size_t count,
-u32 reg6, u32 pos1, u32 pos2)
+COMPAT_SYSCALL_DEFINE6(ppc_pread64,
+  unsigned int, fd,
+  char __user *, ubuf, compat_size_t, count,
+  u32, reg6, u32, pos1, u32, pos2)
 {
return ksys_pread64(fd, ubuf, count, merge_64(pos1, pos2));
 }
 
-compat_ssize_t compat_sys_pwrite64(unsigned int fd, const char __user *ubuf, 
compat_size_t count,
- u32 reg6, u32 pos1, u32 pos2)
+COMPAT_SYSCALL_DEFINE6(ppc_pwrite64,
+  unsigned int, fd,
+  const char __user *, ubuf, compat_size_t, count,
+  u32, reg6, u32, pos1, u32, pos2)
 {
return ksys_pwrite64(fd, ubuf, count, merge_64(pos1, pos2));
 }
 
-compat_ssize_t compat_sys_readahead(int fd, u32 r4, u32 offset1, u32 offset2, 
u32 count)
+COMPAT_SYSCALL_DEFINE5(ppc_readahead,
+  int, fd, u32, r4,
+  u32, offset1, u32, offset2, u32, count)
 {
return ksys_readahead(fd, merge_64(offset1, offset2), count);
 }
 
-int compat_sys_truncate64(const char __user * path, u32 reg4,
-   unsigned long len1, unsigned

<    1   2   3   4   >