Re: [PATCH v8 5/6] powerpc/code-patching: Use temporary mm for Radix MMU

2022-10-24 Thread kernel test robot
Hi Benjamin,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on 8636df94ec917019c4cb744ba0a1f94cf9057790]

url:
https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gray/Use-per-CPU-temporary-mappings-for-patching/20221021-133129
base:   8636df94ec917019c4cb744ba0a1f94cf9057790
patch link:
https://lore.kernel.org/r/20221021052238.580986-6-bgray%40linux.ibm.com
patch subject: [PATCH v8 5/6] powerpc/code-patching: Use temporary mm for Radix 
MMU
config: powerpc-tqm8xx_defconfig
compiler: powerpc-linux-gcc (GCC) 12.1.0
reproduce (this is a W=1 build):
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# 
https://github.com/intel-lab-lkp/linux/commit/13c3eee268f72a6f6b47b978b3c472b8bed253d9
git remote add linux-review https://github.com/intel-lab-lkp/linux
git fetch --no-tags linux-review 
Benjamin-Gray/Use-per-CPU-temporary-mappings-for-patching/20221021-133129
git checkout 13c3eee268f72a6f6b47b978b3c472b8bed253d9
# save the config file
mkdir build_dir && cp config build_dir/.config
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 
O=build_dir ARCH=powerpc SHELL=/bin/bash

If you fix the issue, kindly add following tag where applicable
| Reported-by: kernel test robot 

All errors (new ones prefixed by >>):

   arch/powerpc/lib/code-patching.c: In function '__do_patch_instruction_mm':
>> arch/powerpc/lib/code-patching.c:355:9: error: implicit declaration of 
>> function 'local_flush_tlb_page_psize'; did you mean 'local_flush_tlb_page'? 
>> [-Werror=implicit-function-declaration]
 355 | local_flush_tlb_page_psize(patching_mm, text_poke_addr, 
mmu_virtual_psize);
 | ^~
 | local_flush_tlb_page
   cc1: all warnings being treated as errors


vim +355 arch/powerpc/lib/code-patching.c

   312  
   313  static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
   314  {
   315  int err;
   316  u32 *patch_addr;
   317  unsigned long text_poke_addr;
   318  pte_t *pte;
   319  unsigned long pfn = get_patch_pfn(addr);
   320  struct mm_struct *patching_mm;
   321  struct temp_mm_state prev;
   322  
   323  patching_mm = __this_cpu_read(cpu_patching_mm);
   324  pte = __this_cpu_read(cpu_patching_pte);
   325  text_poke_addr = __this_cpu_read(cpu_patching_addr);
   326  patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
   327  
   328  if (unlikely(!patching_mm))
   329  return -ENOMEM;
   330  
   331  set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, 
PAGE_KERNEL));
   332  
   333  /* order PTE update before use, also serves as the hwsync */
   334  asm volatile("ptesync": : :"memory");
   335  
   336  /* order context switch after arbitrary prior code */
   337  isync();
   338  
   339  prev = start_using_temp_mm(patching_mm);
   340  
   341  err = __patch_instruction(addr, instr, patch_addr);
   342  
   343  /* hwsync performed by __patch_instruction (sync) if successful 
*/
   344  if (err)
   345  mb();  /* sync */
   346  
   347  /* context synchronisation performed by __patch_instruction 
(isync or exception) */
   348  stop_using_temp_mm(patching_mm, prev);
   349  
   350  pte_clear(patching_mm, text_poke_addr, pte);
   351  /*
   352   * ptesync to order PTE update before TLB invalidation done
   353   * by radix__local_flush_tlb_page_psize (in _tlbiel_va)
   354   */
 > 355  local_flush_tlb_page_psize(patching_mm, text_poke_addr, 
 > mmu_virtual_psize);
   356  
   357  return err;
   358  }
   359  

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp
#
# Automatically generated file; DO NOT EDIT.
# Linux/powerpc 6.1.0-rc1 Kernel Configuration
#
CONFIG_CC_VERSION_TEXT="powerpc-linux-gcc (GCC) 12.1.0"
CONFIG_CC_IS_GCC=y
CONFIG_GCC_VERSION=120100
CONFIG_CLANG_VERSION=0
CONFIG_AS_IS_GNU=y
CONFIG_AS_VERSION=23800
CONFIG_LD_IS_BFD=y
CONFIG_LD_VERSION=23800
CONFIG_LLD_VERSION=0
CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y
CONFIG_CC_HAS_ASM_INLINE=y
CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y
CONFIG_PAHOLE_VERSION=123
CONFIG_IRQ_WORK=y
CONFIG_BUILDTIME_TABLE_SORT=y
CONFIG_THREAD_INFO_IN_TASK=y

#
# General setup
#
CONFIG_BROKEN_ON_SMP=y
CONFIG_INIT_ENV_ARG_LIMIT=32
# CONFIG_COMPILE_TEST is not set
# CONFIG_WERROR is not set
CONFIG_LOCALVERSION=""
CONFIG_LOCALVERSION_AUTO=y
CONFIG_BUILD_SALT=""
CONFIG_HAVE_KERNEL_GZIP=y
CONFIG_KERNEL_GZIP=y
CONFIG_DEFAULT_INIT=""
CONFIG_DEFAULT_HOSTNAME="(none)"
CONFIG_SYSVIPC=y
CONFIG_SYSVIPC_SYSCTL=y
# CONFIG_POSIX_MQUEUE is not set
# CONFIG_WATCH_QUEUE is not set
CONFIG_CROSS_MEMORY_ATTACH=y
# CONFIG_USELIB is not set
# CONFIG_AUDIT

[PATCH] soc: fsl: qe: Avoid using gpio_to_desc()

2022-10-24 Thread Linus Walleij
We want to get rid of the old GPIO numberspace, so instead of
calling gpio_to_desc() we get the gpio descriptor for the requested
line from the device tree directly without passing through the
GPIO numberspace, and then we get the gpiochip from the descriptor.

Cc: Bartosz Golaszewski 
Cc: linux-g...@vger.kernel.org
Signed-off-by: Linus Walleij 
---
 drivers/soc/fsl/qe/gpio.c | 18 --
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/soc/fsl/qe/gpio.c b/drivers/soc/fsl/qe/gpio.c
index 99f7de43c3c6..cc5602b679fe 100644
--- a/drivers/soc/fsl/qe/gpio.c
+++ b/drivers/soc/fsl/qe/gpio.c
@@ -13,10 +13,8 @@
 #include 
 #include 
 #include 
-#include 
+#include 
 #include 
-/* FIXME: needed for gpio_to_chip() get rid of this */
-#include 
 #include 
 #include 
 #include 
@@ -161,6 +159,7 @@ struct qe_pin *qe_pin_request(struct device_node *np, int 
index)
struct qe_pin *qe_pin;
struct gpio_chip *gc;
struct qe_gpio_chip *qe_gc;
+   struct gpio_desc *gpiod;
int err;
unsigned long flags;
 
@@ -170,14 +169,21 @@ struct qe_pin *qe_pin_request(struct device_node *np, int 
index)
return ERR_PTR(-ENOMEM);
}
 
-   err = of_get_gpio(np, index);
-   if (err < 0)
+   gpiod = fwnode_gpiod_get_index(of_fwnode_handle(np), NULL, index, 
GPIOD_ASIS, "qe");
+   if (IS_ERR(gpiod)) {
+   err = PTR_ERR(gpiod);
goto err0;
-   gc = gpio_to_chip(err);
+   }
+   if (!gpiod) {
+   err = -EINVAL;
+   goto err0;
+   }
+   gc = gpiod_to_chip(gpiod);
if (WARN_ON(!gc)) {
err = -ENODEV;
goto err0;
}
+   gpiod_put(gpiod);
 
if (!of_device_is_compatible(gc->of_node, "fsl,mpc8323-qe-pario-bank")) 
{
pr_debug("%s: tried to get a non-qe pin\n", __func__);
-- 
2.34.1



Re: [PATCH v1 0/5] convert tree to get_random_u32_{below,above,between}()

2022-10-24 Thread Jason Gunthorpe
On Fri, Oct 21, 2022 at 09:43:58PM -0400, Jason A. Donenfeld wrote:
> Hey everyone,
> 
> Here's the second and final tranche of tree-wide conversions to get
> random integer handling a bit tamer. It's predominantly another
> Coccinelle-based patchset.
> 
> First we s/prandom_u32_max/get_random_u32_below/, since the former is
> just a deprecated alias for the latter. Then in the next commit we can
> remove prandom_u32_max all together. I'm quite happy about finally being
> able to do that. It means that prandom.h is now only for deterministic and 
> repeatable randomness, not non-deterministic/cryptographic randomness.
> That line is no longer blurred.
> 
> Then, in order to clean up a bunch of inefficient patterns, we introduce
> two trivial static inline helper functions built on top of
> get_random_u32_below: get_random_u32_above and get_random_u32_between.
> These are pretty straight forward to use and understand. Then the final
> two patches convert some gnarly open-coded number juggling to use these
> helpers.
> 
> I've used Coccinelle for all the treewide patches, so hopefully review
> is rather uneventful. I didn't accept all of the changes that Coccinelle
> proposed, though, as these tend to be somewhat context-specific. I erred
> on the side of just going with the most obvious cases, at least this
> time through. And then we can address more complicated cases through
> actual maintainer trees.
> 
> Since get_random_u32_below() sits in my random.git tree, these patches
> too will flow through that same tree.
> 
For drivers/infiniband

Reviewed-by: Jason Gunthorpe 

Jason


Re: [PATCH v1 0/5] convert tree to get_random_u32_{below,above,between}()

2022-10-24 Thread Jason A. Donenfeld
On Sun, Oct 23, 2022 at 05:07:13PM -0400, Theodore Ts'o wrote:
> On Fri, Oct 21, 2022 at 11:03:22PM -0700, Jakub Kicinski wrote:
> > On Sat, 22 Oct 2022 07:47:06 +0200 Jason A. Donenfeld wrote:
> > > On Fri, Oct 21, 2022 at 10:32:42PM -0700, Jakub Kicinski wrote:
> > > > But whatever. I mean - hopefully there aren't any conflicts in the ~50
> > > > networking files you touch. I just wish that people didn't pipe up with
> > > > the tree wide changes right after the merge window. Feels like the
> > > > worst possible timing.  
> > > 
> > > Oh, if the timing is what makes this especially worrisome, I have
> > > no qualms about rebasing much later, and reposting this series then.
> > > I'll do that.
> > 
> > Cool, thanks! I promise to not be grumpy if you repost around rc6 :)
> 
> One way of making things less painful for the stable branch and for
> the upstream branch is to *add* new helpers instead of playing
> replacement games like s/prandom_u32_max/get_random_u32_below/.  This
> is what causes the patch conflict problems.
> 
> One advantage of at least adding the new functions to the stable
> branches, even if we don't do the wholesale replacement, is that it

That's a good idea. I'll also save the removal commit, anyhow, for a
separate thing at the end of 6.2 rc1, so that -next doesn't have issues
either.  That's how things wound up going down for the first tranche of
these, and that worked well.

Jason


Re: [PATCH v1 1/2] stackprotector: move CANARY_MASK and get_random_canary() into stackprotector.h

2022-10-24 Thread Philippe Mathieu-Daudé

On 23/10/22 22:32, Jason A. Donenfeld wrote:

This has nothing to do with random.c and everything to do with stack
protectors. Yes, it uses randomness. But many things use randomness.
random.h and random.c are concerned with the generation of randomness,
not with each and every use. So move this function into the more
specific stackprotector.h file where it belongs.

Signed-off-by: Jason A. Donenfeld 
---
  arch/x86/kernel/cpu/common.c   |  2 +-
  arch/x86/kernel/setup_percpu.c |  2 +-
  arch/x86/kernel/smpboot.c  |  2 +-
  arch/x86/xen/enlighten_pv.c|  2 +-
  include/linux/random.h | 19 ---
  include/linux/stackprotector.h | 19 +++
  kernel/fork.c  |  2 +-
  7 files changed, 24 insertions(+), 24 deletions(-)


Reviewed-by: Philippe Mathieu-Daudé 



Re: [PATCH v8 5/6] powerpc/code-patching: Use temporary mm for Radix MMU

2022-10-24 Thread Christopher M. Riedl
On Mon Oct 24, 2022 at 12:17 AM CDT, Benjamin Gray wrote:
> On Mon, 2022-10-24 at 14:45 +1100, Russell Currey wrote:
> > On Fri, 2022-10-21 at 16:22 +1100, Benjamin Gray wrote:
> > > From: "Christopher M. Riedl" 
> > >

-%<--

> > >
> > > ---
> >
> > Is the section following the --- your addendum to Chris' patch?  That
> > cuts it off from git, including your signoff.  It'd be better to have
> > it together as one commit message and note the bits you contributed
> > below the --- after your signoff.
> >
> > Commits where you're modifying someone else's previous work should
> > include their signoff above yours, as well.
>
> Addendum to his wording, to break it off from the "From..." section
> (which is me splicing together his comments from previous patches with
> some minor changes to account for the patch changes). I found out
> earlier today that Git will treat it as a comment :(
>
> I'll add the signed off by back, I wasn't sure whether to leave it
> there after making changes (same in patch 2).
>  

This commit has lots of my words so should probably keep the sign-off - if only
to guarantee that blame is properly directed at me for any nonsense therein ^^.

Patch 2 probably doesn't need my sign-off any more - iirc, I actually defended
the BUG_ON()s (which are WARN_ON()s now) at some point.


Re: [PATCH v4 2/6] powerpc/module: Handle caller-saved TOC in module linker

2022-10-24 Thread Andrew Donnellan
On Mon, 2022-10-10 at 11:29 +1100, Benjamin Gray wrote:
> > A function symbol may set a value in the st_other field to indicate
> > the TOC should be treated as caller-saved. The linker should
> > ensure> the
> > current TOC is saved before calling it and restore the TOC>
> > afterwards,
> > much like external calls.

As I suggested on the last revision, worth mentioning here that it's
the '.localentry , 1' directive we're talking about here.

> > 
> > This is necessary for supporting V2 ABI static calls that do not
> > preserve the TOC.
> > 
> > Signed-off-by: Benjamin Gray 
> > ---
> >  arch/powerpc/kernel/module_64.c | 14 +-
> >  1 file changed, 13 insertions(+), 1 deletion(-)
> > 
> > diff --git a/arch/powerpc/kernel/module_64.c>
> > b/arch/powerpc/kernel/module_64.c
> > index 7e45dc98df8a..83a6f6e22e3b 100644
> > --- a/arch/powerpc/kernel/module_64.c
> > +++ b/arch/powerpc/kernel/module_64.c
> > @@ -55,6 +55,12 @@ static unsigned int local_entry_offset(const>
> > Elf64_Sym *sym)
> >  * of function and try to derive r2 from it). */
> > return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
> >  }
> > +
> > +static bool need_r2save_stub(unsigned char st_other)
> > +{
> > +   return (st_other & STO_PPC64_LOCAL_MASK) == (1 <<>
> > STO_PPC64_LOCAL_BIT);
> > +}
> > +
> >  #else
> >  
> >  static func_desc_t func_desc(unsigned long addr)
> > @@ -66,6 +72,11 @@ static unsigned int local_entry_offset(const>
> > Elf64_Sym *sym)
> > return 0;
> >  }
> >  
> > +static bool need_r2save_stub(unsigned char st_other)
> > +{
> > +   return false;
> > +}
> > +
> >  void *dereference_module_function_descriptor(struct module *mod,>
> > void *ptr)
> >  {
> > if (ptr < (void *)mod->arch.start_opd ||
> > @@ -632,7 +643,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
> > case R_PPC_REL24:
> > /* FIXME: Handle weak symbols here --RR */
> > if (sym->st_shndx == SHN_UNDEF ||
> > -   sym->st_shndx == SHN_LIVEPATCH) {
> > +   sym->st_shndx == SHN_LIVEPATCH ||
> > +   need_r2save_stub(sym->st_other)) {
> > /* External: go via stub */

Perhaps this comment should be updated to mention that there are non-
external but external-like calls?

Otherwise

Reviewed-by: Andrew Donnellan 

> > value = stub_for_addr(sechdrs,
> > value,> me,
> > strtab +> sym-
> > >st_name);

-- 
Andrew DonnellanOzLabs, ADL Canberra
a...@linux.ibm.com   IBM Australia Limited


Re: [PATCH v8 3/6] powerpc/code-patching: Verify instruction patch succeeded

2022-10-24 Thread Benjamin Gray
On Mon, 2022-10-24 at 14:20 +1100, Russell Currey wrote:
> On Fri, 2022-10-21 at 16:22 +1100, Benjamin Gray wrote:
> > diff --git a/arch/powerpc/lib/code-patching.c
> > b/arch/powerpc/lib/code-patching.c
> > index 34fc7ac34d91..9b9eba574d7e 100644
> > --- a/arch/powerpc/lib/code-patching.c
> > +++ b/arch/powerpc/lib/code-patching.c
> > @@ -186,6 +186,8 @@ static int do_patch_instruction(u32 *addr,
> > ppc_inst_t instr)
> > err = __do_patch_instruction(addr, instr);
> > local_irq_restore(flags);
> >  
> > +   WARN_ON(!err && !ppc_inst_equal(instr,
> > ppc_inst_read(addr)));
> > +
> 
> As a side note, I had a look at test-code-patching.c and it doesn't
> look like we don't have a test for ppc_inst_equal() with prefixed
> instructions.  We should fix that.

Yeah, for a different series though I assume. And I think it would be
better suited in a suite dedicated to testing asm/inst.h functions.


[PATCH] powerpc: Interrupt handler stack randomisation

2022-10-24 Thread Rohan McLure
Stack frames used by syscall handlers support random offsets as of
commit f4a0318f278d (powerpc: add support for syscall stack randomization).
Implement the same for general interrupt handlers, by applying the
random stack offset and then updating this offset from within the
DEFINE_INTERRUPT_HANDLER macros.

Applying this offset perturbs the layout of interrupt handler stack
frames, rendering to the kernel stack more difficult to control by means
of user invoked interrupts.

Link: https://lists.ozlabs.org/pipermail/linuxppc-dev/2022-May/243238.html

Signed-off-by: Rohan McLure 
---
 arch/powerpc/include/asm/interrupt.h | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 4745bb9998bd..b7f7beff4e13 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -68,6 +68,7 @@
 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -448,9 +449,12 @@ interrupt_handler long func(struct pt_regs *regs)  
\
long ret;   \
\
__hard_RI_enable(); \
+   add_random_kstack_offset(); \
\
ret = ##func (regs);\
\
+   choose_random_kstack_offset(mftb());\
+   \
return ret; \
 }  \
 NOKPROBE_SYMBOL(func); \
@@ -480,9 +484,11 @@ static __always_inline void ##func(struct pt_regs 
*regs);  \
 interrupt_handler void func(struct pt_regs *regs)  \
 {  \
interrupt_enter_prepare(regs);  \
+   add_random_kstack_offset(); \
\
##func (regs);  \
\
+   choose_random_kstack_offset(mftb());\
interrupt_exit_prepare(regs);   \
 }  \
 NOKPROBE_SYMBOL(func); \
@@ -515,9 +521,11 @@ interrupt_handler long func(struct pt_regs *regs)  
\
long ret;   \
\
interrupt_enter_prepare(regs);  \
+   add_random_kstack_offset(); \
\
ret = ##func (regs);\
\
+   choose_random_kstack_offset(mftb());\
interrupt_exit_prepare(regs);   \
\
return ret; \
@@ -548,9 +556,11 @@ static __always_inline void ##func(struct pt_regs 
*regs);  \
 interrupt_handler void func(struct pt_regs *regs)  \
 {  \
interrupt_async_enter_prepare(regs);\
+   add_random_kstack_offset(); \
\
##func (regs);  \
\
+   choose_random_kstack_offset(mftb());\
interrupt_async_exit_prepare(regs); \
 }  \
 NOKPROBE_SYMBOL(func); \
@@ -585,9 +595,11 @@ interrupt_handler long func(struct pt_regs *regs)  
\
long ret;   \
\
interrupt_nmi_enter_prepare(regs, &state);  

[PATCH v9 3/7] powerpc/code-patching: Use WARN_ON and fix check in poking_init

2022-10-24 Thread Benjamin Gray
BUG_ON() when failing to initialise the code patching window is
excessive, as most critical patching happens during boot before strict
RWX control is enabled. Failure to patch after boot is not inherently
fatal, so aborting the kernel is better determined by the caller.

The return value of cpuhp_setup_state() is also >= 0 on success,
so check for < 0.

Signed-off-by: Benjamin Gray 
Reviewed-by: Russell Currey 
---
v9: * Reword commit message to explain why init failure is not fatal
---
 arch/powerpc/lib/code-patching.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 54e145247643..3b3b09d5d2e1 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -82,16 +82,13 @@ static int text_area_cpu_down(unsigned int cpu)
 
 static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done);
 
-/*
- * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and
- * we judge it as being preferable to a kernel that will crash later when
- * someone tries to use patch_instruction().
- */
 void __init poking_init(void)
 {
-   BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
-   "powerpc/text_poke:online", text_area_cpu_up,
-   text_area_cpu_down));
+   WARN_ON(cpuhp_setup_state(CPUHP_AP_ONLINE_DYN,
+ "powerpc/text_poke:online",
+ text_area_cpu_up,
+ text_area_cpu_down) < 0);
+
static_branch_enable(&poking_init_done);
 }
 
-- 
2.37.3



[PATCH v9 4/7] powerpc/code-patching: Verify instruction patch succeeded

2022-10-24 Thread Benjamin Gray
Verifies that if the instruction patching did not return an error then
the value stored at the given address to patch is now equal to the
instruction we patched it to.

Signed-off-by: Benjamin Gray 
---
 arch/powerpc/lib/code-patching.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 3b3b09d5d2e1..b0a12b2d5a9b 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -192,6 +192,8 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr)
err = __do_patch_instruction(addr, instr);
local_irq_restore(flags);
 
+   WARN_ON(!err && !ppc_inst_equal(instr, ppc_inst_read(addr)));
+
return err;
 }
 #else /* !CONFIG_STRICT_KERNEL_RWX */
-- 
2.37.3



[PATCH v9 2/7] powerpc/code-patching: Handle RWX patching initialisation error

2022-10-24 Thread Benjamin Gray
Detect and abort __do_patch_instruction() when there is no text_poke_area,
which implies there is no patching address. This allows patch_instruction()
to fail gracefully and let the caller decide what to do, as opposed to
the current behaviour of kernel panicking when the null pointer is
dereferenced.

Signed-off-by: Benjamin Gray 
---
v9: * New in v9
---
 arch/powerpc/lib/code-patching.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index ad0cf3108dd0..54e145247643 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -76,6 +76,7 @@ static int text_area_cpu_up(unsigned int cpu)
 static int text_area_cpu_down(unsigned int cpu)
 {
free_vm_area(this_cpu_read(text_poke_area));
+   this_cpu_write(text_poke_area, NULL);
return 0;
 }
 
@@ -151,11 +152,16 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
instr)
 {
int err;
u32 *patch_addr;
+   struct vm_struct *area;
unsigned long text_poke_addr;
pte_t *pte;
unsigned long pfn = get_patch_pfn(addr);
 
-   text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & 
PAGE_MASK;
+   area = __this_cpu_read(text_poke_area);
+   if (unlikely(!area))
+   return -ENOMEM;
+
+   text_poke_addr = (unsigned long)area->addr & PAGE_MASK;
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
 
pte = virt_to_kpte(text_poke_addr);
-- 
2.37.3



[PATCH v9 5/7] powerpc/tlb: Add local flush for page given mm_struct and psize

2022-10-24 Thread Benjamin Gray
Adds a local TLB flush operation that works given an mm_struct, VA to
flush, and page size representation. Most implementations mirror the
surrounding code. The book3s/32/tlbflush.h implementation is left as
a WARN_ONCE_ON because it is more complicated and not required for
anything as yet.

This removes the need to create a vm_area_struct, which the temporary
patching mm work does not need.

Signed-off-by: Benjamin Gray 
---
v9: * Replace book3s/32/tlbflush.h implementation with warning
---
 arch/powerpc/include/asm/book3s/32/tlbflush.h  | 9 +
 arch/powerpc/include/asm/book3s/64/tlbflush-hash.h | 5 +
 arch/powerpc/include/asm/book3s/64/tlbflush.h  | 8 
 arch/powerpc/include/asm/nohash/tlbflush.h | 8 
 4 files changed, 30 insertions(+)

diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h 
b/arch/powerpc/include/asm/book3s/32/tlbflush.h
index ba1743c52b56..14d989d41f75 100644
--- a/arch/powerpc/include/asm/book3s/32/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
 #define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H
 
+#include 
+
 #define MMU_NO_CONTEXT  (0)
 /*
  * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx
@@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct 
vm_area_struct *vma,
 {
flush_tlb_page(vma, vmaddr);
 }
+
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+   WARN_ONCE(true, "local TLB flush not implemented");
+}
+
 static inline void local_flush_tlb_mm(struct mm_struct *mm)
 {
flush_tlb_mm(mm);
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h 
b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
index fab8332fe1ad..8fd9dc49b2a1 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h
@@ -94,6 +94,11 @@ static inline void hash__local_flush_tlb_page(struct 
vm_area_struct *vma,
 {
 }
 
+static inline void hash__local_flush_tlb_page_psize(struct mm_struct *mm,
+   unsigned long vmaddr, int 
psize)
+{
+}
+
 static inline void hash__flush_tlb_page(struct vm_area_struct *vma,
unsigned long vmaddr)
 {
diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h 
b/arch/powerpc/include/asm/book3s/64/tlbflush.h
index 67655cd60545..2d839dd5c08c 100644
--- a/arch/powerpc/include/asm/book3s/64/tlbflush.h
+++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h
@@ -92,6 +92,14 @@ static inline void local_flush_tlb_page(struct 
vm_area_struct *vma,
return hash__local_flush_tlb_page(vma, vmaddr);
 }
 
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+   if (radix_enabled())
+   return radix__local_flush_tlb_page_psize(mm, vmaddr, psize);
+   return hash__local_flush_tlb_page_psize(mm, vmaddr, psize);
+}
+
 static inline void local_flush_all_mm(struct mm_struct *mm)
 {
if (radix_enabled())
diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h 
b/arch/powerpc/include/asm/nohash/tlbflush.h
index bdaf34ad41ea..432bca4cac62 100644
--- a/arch/powerpc/include/asm/nohash/tlbflush.h
+++ b/arch/powerpc/include/asm/nohash/tlbflush.h
@@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct 
vm_area_struct *vma, unsigned lon
asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
 }
 
+static inline void local_flush_tlb_page_psize(struct mm_struct *mm,
+ unsigned long vmaddr, int psize)
+{
+   asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory");
+}
+
 static inline void flush_tlb_kernel_range(unsigned long start, unsigned long 
end)
 {
start &= PAGE_MASK;
@@ -58,6 +64,8 @@ static inline void flush_tlb_kernel_range(unsigned long 
start, unsigned long end
 extern void flush_tlb_kernel_range(unsigned long start, unsigned long end);
 extern void local_flush_tlb_mm(struct mm_struct *mm);
 extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
+extern void local_flush_tlb_page_psize(struct mm_struct *mm,
+  unsigned long vmaddr, int psize);
 
 extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr,
   int tsize, int ind);
-- 
2.37.3



[PATCH v9 1/7] powerpc: Allow clearing and restoring registers independent of saved breakpoint state

2022-10-24 Thread Benjamin Gray
From: Jordan Niethe 

For the coming temporary mm used for instruction patching, the
breakpoint registers need to be cleared to prevent them from
accidentally being triggered. As soon as the patching is done, the
breakpoints will be restored.

The breakpoint state is stored in the per-cpu variable current_brk[].
Add a suspend_breakpoints() function which will clear the breakpoint
registers without touching the state in current_brk[]. Add a pair
function restore_breakpoints() which will move the state in
current_brk[] back to the registers.

Signed-off-by: Jordan Niethe 
Signed-off-by: Benjamin Gray 
---
v9: * Renamed set_breakpoint to set_hw_breakpoint
* Renamed pause/unpause to suspend/restore
* Removed unrelated whitespace change
---
 arch/powerpc/include/asm/debug.h |  2 ++
 arch/powerpc/kernel/process.c| 38 +---
 2 files changed, 37 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 86a14736c76c..51c744608f37 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -46,6 +46,8 @@ static inline int debugger_fault_handler(struct pt_regs 
*regs) { return 0; }
 #endif
 
 void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
+void suspend_breakpoints(void);
+void restore_breakpoints(void);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 67da147fe34d..5d5109ed01a5 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -862,10 +862,8 @@ static inline int set_breakpoint_8xx(struct 
arch_hw_breakpoint *brk)
return 0;
 }
 
-void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk)
 {
-   memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk));
-
if (dawr_enabled())
// Power8 or later
set_dawr(nr, brk);
@@ -879,6 +877,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint 
*brk)
WARN_ON_ONCE(1);
 }
 
+void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+   memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk));
+   set_hw_breakpoint(nr, brk);
+}
+
 /* Check if we have DAWR or DABR hardware */
 bool ppc_breakpoint_available(void)
 {
@@ -891,6 +895,34 @@ bool ppc_breakpoint_available(void)
 }
 EXPORT_SYMBOL_GPL(ppc_breakpoint_available);
 
+/* Disable the breakpoint in hardware without touching current_brk[] */
+void suspend_breakpoints(void)
+{
+   struct arch_hw_breakpoint brk = {0};
+   int i;
+
+   if (!ppc_breakpoint_available())
+   return;
+
+   for (i = 0; i < nr_wp_slots(); i++)
+   set_hw_breakpoint(i, &brk);
+}
+
+/*
+ * Re-enable breakpoints suspended by suspend_breakpoints() in hardware
+ * from current_brk[]
+ */
+void restore_breakpoints(void)
+{
+   int i;
+
+   if (!ppc_breakpoint_available())
+   return;
+
+   for (i = 0; i < nr_wp_slots(); i++)
+   set_hw_breakpoint(i, this_cpu_ptr(¤t_brk[i]));
+}
+
 #ifdef CONFIG_PPC_TRANSACTIONAL_MEM
 
 static inline bool tm_enabled(struct task_struct *tsk)
-- 
2.37.3



[PATCH v9 0/7] Use per-CPU temporary mappings for patching on Radix MMU

2022-10-24 Thread Benjamin Gray
This is a revision of Chris and Jordan's series to introduce a per-cpu temporary
mm to be used for patching with strict rwx on radix mmus.

v9: * Fixed patch series name to include "on Radix MMU" again
* Renamed breakpoint functions
* Introduce patch to gracefully return when patching not possible
* Make book3s/32/tlbflush.h TLB page flush implementation a warning
* Removed temp_mm_state
* Consolidate patching context into single struct shared by both paths

Previous versions:
v8: https://lore.kernel.org/all/20221021052238.580986-1-bg...@linux.ibm.com/
v7: https://lore.kernel.org/all/2020003717.1150965-1-jniet...@gmail.com/
v6: https://lore.kernel.org/all/20210911022904.30962-1-...@bluescreens.de/
v5: https://lore.kernel.org/all/20210713053113.4632-1-...@linux.ibm.com/
v4: https://lore.kernel.org/all/20210429072057.8870-1-...@bluescreens.de/
v3: https://lore.kernel.org/all/20200827052659.24922-1-...@codefail.de/
v2: https://lore.kernel.org/all/20200709040316.12789-1-...@informatik.wtf/
v1: https://lore.kernel.org/all/20200603051912.23296-1-...@informatik.wtf/
RFC: https://lore.kernel.org/all/20200323045205.20314-1-...@informatik.wtf/
x86: 
https://lore.kernel.org/kernel-hardening/20190426232303.28381-1-nadav.a...@gmail.com/


Benjamin Gray (7):
  powerpc: Allow clearing and restoring registers independent of saved
breakpoint state
  powerpc/code-patching: Handle RWX patching initialisation error
  powerpc/code-patching: Use WARN_ON and fix check in poking_init
  powerpc/code-patching: Verify instruction patch succeeded
  powerpc/tlb: Add local flush for page given mm_struct and psize
  powerpc/code-patching: Use temporary mm for Radix MMU
  powerpc/code-patching: Consolidate and cache per-cpu patching context

 arch/powerpc/include/asm/book3s/32/tlbflush.h |   9 +
 .../include/asm/book3s/64/tlbflush-hash.h |   5 +
 arch/powerpc/include/asm/book3s/64/tlbflush.h |   8 +
 arch/powerpc/include/asm/debug.h  |   2 +
 arch/powerpc/include/asm/nohash/tlbflush.h|   8 +
 arch/powerpc/kernel/process.c |  38 ++-
 arch/powerpc/lib/code-patching.c  | 252 +-
 7 files changed, 305 insertions(+), 17 deletions(-)


base-commit: 9abf2313adc1ca1b6180c508c25f22f9395cc780
--
2.37.3


[PATCH v9 7/7] powerpc/code-patching: Consolidate and cache per-cpu patching context

2022-10-24 Thread Benjamin Gray
With the temp mm context support, there are CPU local variables to hold
the patch address and pte. Use these in the non-temp mm path as well
instead of adding a level of indirection through the text_poke_area
vm_struct and pointer chasing the pte.

As both paths use these fields now, there is no need to let unreferenced
variables be dropped by the compiler, so it is cleaner to merge them into
a single context struct. This has the additional benefit of removing a
redundant CPU local pointer, as only one of cpu_patching_mm /
text_poke_area is ever used, while remaining well-typed.

Signed-off-by: Benjamin Gray 
---
v9: * Consolidate patching context into single struct
---
 arch/powerpc/lib/code-patching.c | 58 ++--
 1 file changed, 33 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 3fe99d0086fc..cefb938f7217 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -48,10 +48,16 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 
-static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
-static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm);
-static DEFINE_PER_CPU(unsigned long, cpu_patching_addr);
-static DEFINE_PER_CPU(pte_t *, cpu_patching_pte);
+struct patch_context {
+   union {
+   struct vm_struct *text_poke_area;
+   struct mm_struct *mm;
+   };
+   unsigned long addr;
+   pte_t * pte;
+};
+
+static DEFINE_PER_CPU(struct patch_context, cpu_patching_context);
 
 static int map_patch_area(void *addr, unsigned long text_poke_addr);
 static void unmap_patch_area(unsigned long addr);
@@ -116,15 +122,19 @@ static int text_area_cpu_up(unsigned int cpu)
 
unmap_patch_area(addr);
 
-   this_cpu_write(text_poke_area, area);
+   this_cpu_write(cpu_patching_context.text_poke_area, area);
+   this_cpu_write(cpu_patching_context.addr, addr);
+   this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr));
 
return 0;
 }
 
 static int text_area_cpu_down(unsigned int cpu)
 {
-   free_vm_area(this_cpu_read(text_poke_area));
-   this_cpu_write(text_poke_area, NULL);
+   free_vm_area(this_cpu_read(cpu_patching_context.text_poke_area));
+   this_cpu_write(cpu_patching_context.text_poke_area, NULL);
+   this_cpu_write(cpu_patching_context.addr, 0);
+   this_cpu_write(cpu_patching_context.pte, NULL);
return 0;
 }
 
@@ -172,9 +182,9 @@ static int text_area_cpu_up_mm(unsigned int cpu)
if (WARN_ON(!ptep))
goto fail_no_pte;
 
-   this_cpu_write(cpu_patching_mm, mm);
-   this_cpu_write(cpu_patching_addr, addr);
-   this_cpu_write(cpu_patching_pte, ptep);
+   this_cpu_write(cpu_patching_context.mm, mm);
+   this_cpu_write(cpu_patching_context.addr, addr);
+   this_cpu_write(cpu_patching_context.pte, ptep);
 
return 0;
 
@@ -202,8 +212,8 @@ static int text_area_cpu_down_mm(unsigned int cpu)
p4d_t *p4dp;
pgd_t *pgdp;
 
-   mm = this_cpu_read(cpu_patching_mm);
-   addr = this_cpu_read(cpu_patching_addr);
+   mm = this_cpu_read(cpu_patching_context.mm);
+   addr = this_cpu_read(cpu_patching_context.addr);
 
pgdp = pgd_offset(mm, addr);
p4dp = p4d_offset(pgdp, addr);
@@ -223,9 +233,9 @@ static int text_area_cpu_down_mm(unsigned int cpu)
 
mmput(mm);
 
-   this_cpu_write(cpu_patching_mm, NULL);
-   this_cpu_write(cpu_patching_addr, 0);
-   this_cpu_write(cpu_patching_pte, NULL);
+   this_cpu_write(cpu_patching_context.mm, NULL);
+   this_cpu_write(cpu_patching_context.addr, 0);
+   this_cpu_write(cpu_patching_context.pte, NULL);
 
return 0;
 }
@@ -316,9 +326,9 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t 
instr)
struct mm_struct *patching_mm;
struct mm_struct *orig_mm;
 
-   patching_mm = __this_cpu_read(cpu_patching_mm);
-   pte = __this_cpu_read(cpu_patching_pte);
-   text_poke_addr = __this_cpu_read(cpu_patching_addr);
+   patching_mm = __this_cpu_read(cpu_patching_context.mm);
+   pte = __this_cpu_read(cpu_patching_context.pte);
+   text_poke_addr = __this_cpu_read(cpu_patching_context.addr);
patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr));
 
if (unlikely(!patching_mm))
@@ -357,19 +367,17 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
instr)
 {
int err;
u32 *patch_addr;
-   struct vm_struct *area;
unsigned long text_poke_addr;
pte_t *pte;
unsigned long pfn = get_patch_pfn(addr);
 
-   area = __this_cpu_read(text_poke_area);
-   if (unlikely(!area))
-   return -ENOMEM;
-
-   text_poke_addr = (unsigned long)area->addr & PAGE_MASK;
+   text_poke_addr = (unsigned 
long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK;
patch_addr 

[PATCH v9 6/7] powerpc/code-patching: Use temporary mm for Radix MMU

2022-10-24 Thread Benjamin Gray
From: "Christopher M. Riedl" 

x86 supports the notion of a temporary mm which restricts access to
temporary PTEs to a single CPU. A temporary mm is useful for situations
where a CPU needs to perform sensitive operations (such as patching a
STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
said mappings to other CPUs. Another benefit is that other CPU TLBs do
not need to be flushed when the temporary mm is torn down.

Mappings in the temporary mm can be set in the userspace portion of the
address-space.

Interrupts must be disabled while the temporary mm is in use. HW
breakpoints, which may have been set by userspace as watchpoints on
addresses now within the temporary mm, are saved and disabled when
loading the temporary mm. The HW breakpoints are restored when unloading
the temporary mm. All HW breakpoints are indiscriminately disabled while
the temporary mm is in use - this may include breakpoints set by perf.

Use the `poking_init` init hook to prepare a temporary mm and patching
address. Initialize the temporary mm by copying the init mm. Choose a
randomized patching address inside the temporary mm userspace address
space. The patching address is randomized between PAGE_SIZE and
DEFAULT_MAP_WINDOW-PAGE_SIZE.

Bits of entropy with 64K page size on BOOK3S_64:

bits of entropy = log2(DEFAULT_MAP_WINDOW_USER64 / PAGE_SIZE)

PAGE_SIZE=64K, DEFAULT_MAP_WINDOW_USER64=128TB
bits of entropy = log2(128TB / 64K)
bits of entropy = 31

The upper limit is DEFAULT_MAP_WINDOW due to how the Book3s64 Hash MMU
operates - by default the space above DEFAULT_MAP_WINDOW is not
available. Currently the Hash MMU does not use a temporary mm so
technically this upper limit isn't necessary; however, a larger
randomization range does not further "harden" this overall approach and
future work may introduce patching with a temporary mm on Hash as well.

Randomization occurs only once during initialization for each CPU as it
comes online.

The patching page is mapped with PAGE_KERNEL to set EAA[0] for the PTE
which ignores the AMR (so no need to unlock/lock KUAP) according to
PowerISA v3.0b Figure 35 on Radix.

Based on x86 implementation:

commit 4fc19708b165
("x86/alternatives: Initialize temporary mm for patching")

and:

commit b3fd8e83ada0
("x86/alternatives: Use temporary mm for text poking")

From: Benjamin Gray 

Synchronisation is done according to ISA 3.1B Book 3 Chapter 13
"Synchronization Requirements for Context Alterations". Switching the mm
is a change to the PID, which requires a CSI before and after the change,
and a hwsync between the last instruction that performs address
translation for an associated storage access.

Instruction fetch is an associated storage access, but the instruction
address mappings are not being changed, so it should not matter which
context they use. We must still perform a hwsync to guard arbitrary
prior code that may have accessed a userspace address.

TLB invalidation is local and VA specific. Local because only this core
used the patching mm, and VA specific because we only care that the
writable mapping is purged. Leaving the other mappings intact is more
efficient, especially when performing many code patches in a row (e.g.,
as ftrace would).

Signed-off-by: Christopher M. Riedl 
Signed-off-by: Benjamin Gray 
---
v9: * Add back Christopher M. Riedl signed-off-by
* Remove temp_mm_state
---
 arch/powerpc/lib/code-patching.c | 221 ++-
 1 file changed, 216 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index b0a12b2d5a9b..3fe99d0086fc 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -4,12 +4,17 @@
  */
 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
 #include 
 #include 
 
+#include 
+#include 
+#include 
 #include 
 #include 
 #include 
@@ -42,11 +47,54 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
+
 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm);
+static DEFINE_PER_CPU(unsigned long, cpu_patching_addr);
+static DEFINE_PER_CPU(pte_t *, cpu_patching_pte);
 
 static int map_patch_area(void *addr, unsigned long text_poke_addr);
 static void unmap_patch_area(unsigned long addr);
 
+static bool mm_patch_enabled(void)
+{
+   return IS_ENABLED(CONFIG_SMP) && radix_enabled();
+}
+
+/*
+ * The following applies for Radix MMU. Hash MMU has different requirements,
+ * and so is not supported.
+ *
+ * Changing mm requires context synchronising instructions on both sides of
+ * the context switch, as well as a hwsync between the last instruction for
+ * which the address of an associated storage access was translated using
+ * the current context.
+ *
+ * switch_mm_irqs_off() performs an isync after the context switch. It is
+ * the responsibility of the caller