Re: [PATCH v8 5/6] powerpc/code-patching: Use temporary mm for Radix MMU
Hi Benjamin, Thank you for the patch! Yet something to improve: [auto build test ERROR on 8636df94ec917019c4cb744ba0a1f94cf9057790] url: https://github.com/intel-lab-lkp/linux/commits/Benjamin-Gray/Use-per-CPU-temporary-mappings-for-patching/20221021-133129 base: 8636df94ec917019c4cb744ba0a1f94cf9057790 patch link: https://lore.kernel.org/r/20221021052238.580986-6-bgray%40linux.ibm.com patch subject: [PATCH v8 5/6] powerpc/code-patching: Use temporary mm for Radix MMU config: powerpc-tqm8xx_defconfig compiler: powerpc-linux-gcc (GCC) 12.1.0 reproduce (this is a W=1 build): wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # https://github.com/intel-lab-lkp/linux/commit/13c3eee268f72a6f6b47b978b3c472b8bed253d9 git remote add linux-review https://github.com/intel-lab-lkp/linux git fetch --no-tags linux-review Benjamin-Gray/Use-per-CPU-temporary-mappings-for-patching/20221021-133129 git checkout 13c3eee268f72a6f6b47b978b3c472b8bed253d9 # save the config file mkdir build_dir && cp config build_dir/.config COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-12.1.0 make.cross W=1 O=build_dir ARCH=powerpc SHELL=/bin/bash If you fix the issue, kindly add following tag where applicable | Reported-by: kernel test robot All errors (new ones prefixed by >>): arch/powerpc/lib/code-patching.c: In function '__do_patch_instruction_mm': >> arch/powerpc/lib/code-patching.c:355:9: error: implicit declaration of >> function 'local_flush_tlb_page_psize'; did you mean 'local_flush_tlb_page'? >> [-Werror=implicit-function-declaration] 355 | local_flush_tlb_page_psize(patching_mm, text_poke_addr, mmu_virtual_psize); | ^~ | local_flush_tlb_page cc1: all warnings being treated as errors vim +355 arch/powerpc/lib/code-patching.c 312 313 static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) 314 { 315 int err; 316 u32 *patch_addr; 317 unsigned long text_poke_addr; 318 pte_t *pte; 319 unsigned long pfn = get_patch_pfn(addr); 320 struct mm_struct *patching_mm; 321 struct temp_mm_state prev; 322 323 patching_mm = __this_cpu_read(cpu_patching_mm); 324 pte = __this_cpu_read(cpu_patching_pte); 325 text_poke_addr = __this_cpu_read(cpu_patching_addr); 326 patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); 327 328 if (unlikely(!patching_mm)) 329 return -ENOMEM; 330 331 set_pte_at(patching_mm, text_poke_addr, pte, pfn_pte(pfn, PAGE_KERNEL)); 332 333 /* order PTE update before use, also serves as the hwsync */ 334 asm volatile("ptesync": : :"memory"); 335 336 /* order context switch after arbitrary prior code */ 337 isync(); 338 339 prev = start_using_temp_mm(patching_mm); 340 341 err = __patch_instruction(addr, instr, patch_addr); 342 343 /* hwsync performed by __patch_instruction (sync) if successful */ 344 if (err) 345 mb(); /* sync */ 346 347 /* context synchronisation performed by __patch_instruction (isync or exception) */ 348 stop_using_temp_mm(patching_mm, prev); 349 350 pte_clear(patching_mm, text_poke_addr, pte); 351 /* 352 * ptesync to order PTE update before TLB invalidation done 353 * by radix__local_flush_tlb_page_psize (in _tlbiel_va) 354 */ > 355 local_flush_tlb_page_psize(patching_mm, text_poke_addr, > mmu_virtual_psize); 356 357 return err; 358 } 359 -- 0-DAY CI Kernel Test Service https://01.org/lkp # # Automatically generated file; DO NOT EDIT. # Linux/powerpc 6.1.0-rc1 Kernel Configuration # CONFIG_CC_VERSION_TEXT="powerpc-linux-gcc (GCC) 12.1.0" CONFIG_CC_IS_GCC=y CONFIG_GCC_VERSION=120100 CONFIG_CLANG_VERSION=0 CONFIG_AS_IS_GNU=y CONFIG_AS_VERSION=23800 CONFIG_LD_IS_BFD=y CONFIG_LD_VERSION=23800 CONFIG_LLD_VERSION=0 CONFIG_CC_HAS_ASM_GOTO_OUTPUT=y CONFIG_CC_HAS_ASM_INLINE=y CONFIG_CC_HAS_NO_PROFILE_FN_ATTR=y CONFIG_PAHOLE_VERSION=123 CONFIG_IRQ_WORK=y CONFIG_BUILDTIME_TABLE_SORT=y CONFIG_THREAD_INFO_IN_TASK=y # # General setup # CONFIG_BROKEN_ON_SMP=y CONFIG_INIT_ENV_ARG_LIMIT=32 # CONFIG_COMPILE_TEST is not set # CONFIG_WERROR is not set CONFIG_LOCALVERSION="" CONFIG_LOCALVERSION_AUTO=y CONFIG_BUILD_SALT="" CONFIG_HAVE_KERNEL_GZIP=y CONFIG_KERNEL_GZIP=y CONFIG_DEFAULT_INIT="" CONFIG_DEFAULT_HOSTNAME="(none)" CONFIG_SYSVIPC=y CONFIG_SYSVIPC_SYSCTL=y # CONFIG_POSIX_MQUEUE is not set # CONFIG_WATCH_QUEUE is not set CONFIG_CROSS_MEMORY_ATTACH=y # CONFIG_USELIB is not set # CONFIG_AUDIT
[PATCH] soc: fsl: qe: Avoid using gpio_to_desc()
We want to get rid of the old GPIO numberspace, so instead of calling gpio_to_desc() we get the gpio descriptor for the requested line from the device tree directly without passing through the GPIO numberspace, and then we get the gpiochip from the descriptor. Cc: Bartosz Golaszewski Cc: linux-g...@vger.kernel.org Signed-off-by: Linus Walleij --- drivers/soc/fsl/qe/gpio.c | 18 -- 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/drivers/soc/fsl/qe/gpio.c b/drivers/soc/fsl/qe/gpio.c index 99f7de43c3c6..cc5602b679fe 100644 --- a/drivers/soc/fsl/qe/gpio.c +++ b/drivers/soc/fsl/qe/gpio.c @@ -13,10 +13,8 @@ #include #include #include -#include +#include #include -/* FIXME: needed for gpio_to_chip() get rid of this */ -#include #include #include #include @@ -161,6 +159,7 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index) struct qe_pin *qe_pin; struct gpio_chip *gc; struct qe_gpio_chip *qe_gc; + struct gpio_desc *gpiod; int err; unsigned long flags; @@ -170,14 +169,21 @@ struct qe_pin *qe_pin_request(struct device_node *np, int index) return ERR_PTR(-ENOMEM); } - err = of_get_gpio(np, index); - if (err < 0) + gpiod = fwnode_gpiod_get_index(of_fwnode_handle(np), NULL, index, GPIOD_ASIS, "qe"); + if (IS_ERR(gpiod)) { + err = PTR_ERR(gpiod); goto err0; - gc = gpio_to_chip(err); + } + if (!gpiod) { + err = -EINVAL; + goto err0; + } + gc = gpiod_to_chip(gpiod); if (WARN_ON(!gc)) { err = -ENODEV; goto err0; } + gpiod_put(gpiod); if (!of_device_is_compatible(gc->of_node, "fsl,mpc8323-qe-pario-bank")) { pr_debug("%s: tried to get a non-qe pin\n", __func__); -- 2.34.1
Re: [PATCH v1 0/5] convert tree to get_random_u32_{below,above,between}()
On Fri, Oct 21, 2022 at 09:43:58PM -0400, Jason A. Donenfeld wrote: > Hey everyone, > > Here's the second and final tranche of tree-wide conversions to get > random integer handling a bit tamer. It's predominantly another > Coccinelle-based patchset. > > First we s/prandom_u32_max/get_random_u32_below/, since the former is > just a deprecated alias for the latter. Then in the next commit we can > remove prandom_u32_max all together. I'm quite happy about finally being > able to do that. It means that prandom.h is now only for deterministic and > repeatable randomness, not non-deterministic/cryptographic randomness. > That line is no longer blurred. > > Then, in order to clean up a bunch of inefficient patterns, we introduce > two trivial static inline helper functions built on top of > get_random_u32_below: get_random_u32_above and get_random_u32_between. > These are pretty straight forward to use and understand. Then the final > two patches convert some gnarly open-coded number juggling to use these > helpers. > > I've used Coccinelle for all the treewide patches, so hopefully review > is rather uneventful. I didn't accept all of the changes that Coccinelle > proposed, though, as these tend to be somewhat context-specific. I erred > on the side of just going with the most obvious cases, at least this > time through. And then we can address more complicated cases through > actual maintainer trees. > > Since get_random_u32_below() sits in my random.git tree, these patches > too will flow through that same tree. > For drivers/infiniband Reviewed-by: Jason Gunthorpe Jason
Re: [PATCH v1 0/5] convert tree to get_random_u32_{below,above,between}()
On Sun, Oct 23, 2022 at 05:07:13PM -0400, Theodore Ts'o wrote: > On Fri, Oct 21, 2022 at 11:03:22PM -0700, Jakub Kicinski wrote: > > On Sat, 22 Oct 2022 07:47:06 +0200 Jason A. Donenfeld wrote: > > > On Fri, Oct 21, 2022 at 10:32:42PM -0700, Jakub Kicinski wrote: > > > > But whatever. I mean - hopefully there aren't any conflicts in the ~50 > > > > networking files you touch. I just wish that people didn't pipe up with > > > > the tree wide changes right after the merge window. Feels like the > > > > worst possible timing. > > > > > > Oh, if the timing is what makes this especially worrisome, I have > > > no qualms about rebasing much later, and reposting this series then. > > > I'll do that. > > > > Cool, thanks! I promise to not be grumpy if you repost around rc6 :) > > One way of making things less painful for the stable branch and for > the upstream branch is to *add* new helpers instead of playing > replacement games like s/prandom_u32_max/get_random_u32_below/. This > is what causes the patch conflict problems. > > One advantage of at least adding the new functions to the stable > branches, even if we don't do the wholesale replacement, is that it That's a good idea. I'll also save the removal commit, anyhow, for a separate thing at the end of 6.2 rc1, so that -next doesn't have issues either. That's how things wound up going down for the first tranche of these, and that worked well. Jason
Re: [PATCH v1 1/2] stackprotector: move CANARY_MASK and get_random_canary() into stackprotector.h
On 23/10/22 22:32, Jason A. Donenfeld wrote: This has nothing to do with random.c and everything to do with stack protectors. Yes, it uses randomness. But many things use randomness. random.h and random.c are concerned with the generation of randomness, not with each and every use. So move this function into the more specific stackprotector.h file where it belongs. Signed-off-by: Jason A. Donenfeld --- arch/x86/kernel/cpu/common.c | 2 +- arch/x86/kernel/setup_percpu.c | 2 +- arch/x86/kernel/smpboot.c | 2 +- arch/x86/xen/enlighten_pv.c| 2 +- include/linux/random.h | 19 --- include/linux/stackprotector.h | 19 +++ kernel/fork.c | 2 +- 7 files changed, 24 insertions(+), 24 deletions(-) Reviewed-by: Philippe Mathieu-Daudé
Re: [PATCH v8 5/6] powerpc/code-patching: Use temporary mm for Radix MMU
On Mon Oct 24, 2022 at 12:17 AM CDT, Benjamin Gray wrote: > On Mon, 2022-10-24 at 14:45 +1100, Russell Currey wrote: > > On Fri, 2022-10-21 at 16:22 +1100, Benjamin Gray wrote: > > > From: "Christopher M. Riedl" > > > -%<-- > > > > > > --- > > > > Is the section following the --- your addendum to Chris' patch? That > > cuts it off from git, including your signoff. It'd be better to have > > it together as one commit message and note the bits you contributed > > below the --- after your signoff. > > > > Commits where you're modifying someone else's previous work should > > include their signoff above yours, as well. > > Addendum to his wording, to break it off from the "From..." section > (which is me splicing together his comments from previous patches with > some minor changes to account for the patch changes). I found out > earlier today that Git will treat it as a comment :( > > I'll add the signed off by back, I wasn't sure whether to leave it > there after making changes (same in patch 2). > This commit has lots of my words so should probably keep the sign-off - if only to guarantee that blame is properly directed at me for any nonsense therein ^^. Patch 2 probably doesn't need my sign-off any more - iirc, I actually defended the BUG_ON()s (which are WARN_ON()s now) at some point.
Re: [PATCH v4 2/6] powerpc/module: Handle caller-saved TOC in module linker
On Mon, 2022-10-10 at 11:29 +1100, Benjamin Gray wrote: > > A function symbol may set a value in the st_other field to indicate > > the TOC should be treated as caller-saved. The linker should > > ensure> the > > current TOC is saved before calling it and restore the TOC> > > afterwards, > > much like external calls. As I suggested on the last revision, worth mentioning here that it's the '.localentry , 1' directive we're talking about here. > > > > This is necessary for supporting V2 ABI static calls that do not > > preserve the TOC. > > > > Signed-off-by: Benjamin Gray > > --- > > arch/powerpc/kernel/module_64.c | 14 +- > > 1 file changed, 13 insertions(+), 1 deletion(-) > > > > diff --git a/arch/powerpc/kernel/module_64.c> > > b/arch/powerpc/kernel/module_64.c > > index 7e45dc98df8a..83a6f6e22e3b 100644 > > --- a/arch/powerpc/kernel/module_64.c > > +++ b/arch/powerpc/kernel/module_64.c > > @@ -55,6 +55,12 @@ static unsigned int local_entry_offset(const> > > Elf64_Sym *sym) > > * of function and try to derive r2 from it). */ > > return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); > > } > > + > > +static bool need_r2save_stub(unsigned char st_other) > > +{ > > + return (st_other & STO_PPC64_LOCAL_MASK) == (1 <<> > > STO_PPC64_LOCAL_BIT); > > +} > > + > > #else > > > > static func_desc_t func_desc(unsigned long addr) > > @@ -66,6 +72,11 @@ static unsigned int local_entry_offset(const> > > Elf64_Sym *sym) > > return 0; > > } > > > > +static bool need_r2save_stub(unsigned char st_other) > > +{ > > + return false; > > +} > > + > > void *dereference_module_function_descriptor(struct module *mod,> > > void *ptr) > > { > > if (ptr < (void *)mod->arch.start_opd || > > @@ -632,7 +643,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs, > > case R_PPC_REL24: > > /* FIXME: Handle weak symbols here --RR */ > > if (sym->st_shndx == SHN_UNDEF || > > - sym->st_shndx == SHN_LIVEPATCH) { > > + sym->st_shndx == SHN_LIVEPATCH || > > + need_r2save_stub(sym->st_other)) { > > /* External: go via stub */ Perhaps this comment should be updated to mention that there are non- external but external-like calls? Otherwise Reviewed-by: Andrew Donnellan > > value = stub_for_addr(sechdrs, > > value,> me, > > strtab +> sym- > > >st_name); -- Andrew DonnellanOzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
Re: [PATCH v8 3/6] powerpc/code-patching: Verify instruction patch succeeded
On Mon, 2022-10-24 at 14:20 +1100, Russell Currey wrote: > On Fri, 2022-10-21 at 16:22 +1100, Benjamin Gray wrote: > > diff --git a/arch/powerpc/lib/code-patching.c > > b/arch/powerpc/lib/code-patching.c > > index 34fc7ac34d91..9b9eba574d7e 100644 > > --- a/arch/powerpc/lib/code-patching.c > > +++ b/arch/powerpc/lib/code-patching.c > > @@ -186,6 +186,8 @@ static int do_patch_instruction(u32 *addr, > > ppc_inst_t instr) > > err = __do_patch_instruction(addr, instr); > > local_irq_restore(flags); > > > > + WARN_ON(!err && !ppc_inst_equal(instr, > > ppc_inst_read(addr))); > > + > > As a side note, I had a look at test-code-patching.c and it doesn't > look like we don't have a test for ppc_inst_equal() with prefixed > instructions. We should fix that. Yeah, for a different series though I assume. And I think it would be better suited in a suite dedicated to testing asm/inst.h functions.
[PATCH] powerpc: Interrupt handler stack randomisation
Stack frames used by syscall handlers support random offsets as of commit f4a0318f278d (powerpc: add support for syscall stack randomization). Implement the same for general interrupt handlers, by applying the random stack offset and then updating this offset from within the DEFINE_INTERRUPT_HANDLER macros. Applying this offset perturbs the layout of interrupt handler stack frames, rendering to the kernel stack more difficult to control by means of user invoked interrupts. Link: https://lists.ozlabs.org/pipermail/linuxppc-dev/2022-May/243238.html Signed-off-by: Rohan McLure --- arch/powerpc/include/asm/interrupt.h | 12 1 file changed, 12 insertions(+) diff --git a/arch/powerpc/include/asm/interrupt.h b/arch/powerpc/include/asm/interrupt.h index 4745bb9998bd..b7f7beff4e13 100644 --- a/arch/powerpc/include/asm/interrupt.h +++ b/arch/powerpc/include/asm/interrupt.h @@ -68,6 +68,7 @@ #include #include +#include #include #include #include @@ -448,9 +449,12 @@ interrupt_handler long func(struct pt_regs *regs) \ long ret; \ \ __hard_RI_enable(); \ + add_random_kstack_offset(); \ \ ret = ##func (regs);\ \ + choose_random_kstack_offset(mftb());\ + \ return ret; \ } \ NOKPROBE_SYMBOL(func); \ @@ -480,9 +484,11 @@ static __always_inline void ##func(struct pt_regs *regs); \ interrupt_handler void func(struct pt_regs *regs) \ { \ interrupt_enter_prepare(regs); \ + add_random_kstack_offset(); \ \ ##func (regs); \ \ + choose_random_kstack_offset(mftb());\ interrupt_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ @@ -515,9 +521,11 @@ interrupt_handler long func(struct pt_regs *regs) \ long ret; \ \ interrupt_enter_prepare(regs); \ + add_random_kstack_offset(); \ \ ret = ##func (regs);\ \ + choose_random_kstack_offset(mftb());\ interrupt_exit_prepare(regs); \ \ return ret; \ @@ -548,9 +556,11 @@ static __always_inline void ##func(struct pt_regs *regs); \ interrupt_handler void func(struct pt_regs *regs) \ { \ interrupt_async_enter_prepare(regs);\ + add_random_kstack_offset(); \ \ ##func (regs); \ \ + choose_random_kstack_offset(mftb());\ interrupt_async_exit_prepare(regs); \ } \ NOKPROBE_SYMBOL(func); \ @@ -585,9 +595,11 @@ interrupt_handler long func(struct pt_regs *regs) \ long ret; \ \ interrupt_nmi_enter_prepare(regs, &state);
[PATCH v9 3/7] powerpc/code-patching: Use WARN_ON and fix check in poking_init
BUG_ON() when failing to initialise the code patching window is excessive, as most critical patching happens during boot before strict RWX control is enabled. Failure to patch after boot is not inherently fatal, so aborting the kernel is better determined by the caller. The return value of cpuhp_setup_state() is also >= 0 on success, so check for < 0. Signed-off-by: Benjamin Gray Reviewed-by: Russell Currey --- v9: * Reword commit message to explain why init failure is not fatal --- arch/powerpc/lib/code-patching.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 54e145247643..3b3b09d5d2e1 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -82,16 +82,13 @@ static int text_area_cpu_down(unsigned int cpu) static __ro_after_init DEFINE_STATIC_KEY_FALSE(poking_init_done); -/* - * Although BUG_ON() is rude, in this case it should only happen if ENOMEM, and - * we judge it as being preferable to a kernel that will crash later when - * someone tries to use patch_instruction(). - */ void __init poking_init(void) { - BUG_ON(!cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, - "powerpc/text_poke:online", text_area_cpu_up, - text_area_cpu_down)); + WARN_ON(cpuhp_setup_state(CPUHP_AP_ONLINE_DYN, + "powerpc/text_poke:online", + text_area_cpu_up, + text_area_cpu_down) < 0); + static_branch_enable(&poking_init_done); } -- 2.37.3
[PATCH v9 4/7] powerpc/code-patching: Verify instruction patch succeeded
Verifies that if the instruction patching did not return an error then the value stored at the given address to patch is now equal to the instruction we patched it to. Signed-off-by: Benjamin Gray --- arch/powerpc/lib/code-patching.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3b3b09d5d2e1..b0a12b2d5a9b 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -192,6 +192,8 @@ static int do_patch_instruction(u32 *addr, ppc_inst_t instr) err = __do_patch_instruction(addr, instr); local_irq_restore(flags); + WARN_ON(!err && !ppc_inst_equal(instr, ppc_inst_read(addr))); + return err; } #else /* !CONFIG_STRICT_KERNEL_RWX */ -- 2.37.3
[PATCH v9 2/7] powerpc/code-patching: Handle RWX patching initialisation error
Detect and abort __do_patch_instruction() when there is no text_poke_area, which implies there is no patching address. This allows patch_instruction() to fail gracefully and let the caller decide what to do, as opposed to the current behaviour of kernel panicking when the null pointer is dereferenced. Signed-off-by: Benjamin Gray --- v9: * New in v9 --- arch/powerpc/lib/code-patching.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index ad0cf3108dd0..54e145247643 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -76,6 +76,7 @@ static int text_area_cpu_up(unsigned int cpu) static int text_area_cpu_down(unsigned int cpu) { free_vm_area(this_cpu_read(text_poke_area)); + this_cpu_write(text_poke_area, NULL); return 0; } @@ -151,11 +152,16 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; u32 *patch_addr; + struct vm_struct *area; unsigned long text_poke_addr; pte_t *pte; unsigned long pfn = get_patch_pfn(addr); - text_poke_addr = (unsigned long)__this_cpu_read(text_poke_area)->addr & PAGE_MASK; + area = __this_cpu_read(text_poke_area); + if (unlikely(!area)) + return -ENOMEM; + + text_poke_addr = (unsigned long)area->addr & PAGE_MASK; patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); pte = virt_to_kpte(text_poke_addr); -- 2.37.3
[PATCH v9 5/7] powerpc/tlb: Add local flush for page given mm_struct and psize
Adds a local TLB flush operation that works given an mm_struct, VA to flush, and page size representation. Most implementations mirror the surrounding code. The book3s/32/tlbflush.h implementation is left as a WARN_ONCE_ON because it is more complicated and not required for anything as yet. This removes the need to create a vm_area_struct, which the temporary patching mm work does not need. Signed-off-by: Benjamin Gray --- v9: * Replace book3s/32/tlbflush.h implementation with warning --- arch/powerpc/include/asm/book3s/32/tlbflush.h | 9 + arch/powerpc/include/asm/book3s/64/tlbflush-hash.h | 5 + arch/powerpc/include/asm/book3s/64/tlbflush.h | 8 arch/powerpc/include/asm/nohash/tlbflush.h | 8 4 files changed, 30 insertions(+) diff --git a/arch/powerpc/include/asm/book3s/32/tlbflush.h b/arch/powerpc/include/asm/book3s/32/tlbflush.h index ba1743c52b56..14d989d41f75 100644 --- a/arch/powerpc/include/asm/book3s/32/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/32/tlbflush.h @@ -2,6 +2,8 @@ #ifndef _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H #define _ASM_POWERPC_BOOK3S_32_TLBFLUSH_H +#include + #define MMU_NO_CONTEXT (0) /* * TLB flushing for "classic" hash-MMU 32-bit CPUs, 6xx, 7xx, 7xxx @@ -74,6 +76,13 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, { flush_tlb_page(vma, vmaddr); } + +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + WARN_ONCE(true, "local TLB flush not implemented"); +} + static inline void local_flush_tlb_mm(struct mm_struct *mm) { flush_tlb_mm(mm); diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h index fab8332fe1ad..8fd9dc49b2a1 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush-hash.h @@ -94,6 +94,11 @@ static inline void hash__local_flush_tlb_page(struct vm_area_struct *vma, { } +static inline void hash__local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ +} + static inline void hash__flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr) { diff --git a/arch/powerpc/include/asm/book3s/64/tlbflush.h b/arch/powerpc/include/asm/book3s/64/tlbflush.h index 67655cd60545..2d839dd5c08c 100644 --- a/arch/powerpc/include/asm/book3s/64/tlbflush.h +++ b/arch/powerpc/include/asm/book3s/64/tlbflush.h @@ -92,6 +92,14 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, return hash__local_flush_tlb_page(vma, vmaddr); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + if (radix_enabled()) + return radix__local_flush_tlb_page_psize(mm, vmaddr, psize); + return hash__local_flush_tlb_page_psize(mm, vmaddr, psize); +} + static inline void local_flush_all_mm(struct mm_struct *mm) { if (radix_enabled()) diff --git a/arch/powerpc/include/asm/nohash/tlbflush.h b/arch/powerpc/include/asm/nohash/tlbflush.h index bdaf34ad41ea..432bca4cac62 100644 --- a/arch/powerpc/include/asm/nohash/tlbflush.h +++ b/arch/powerpc/include/asm/nohash/tlbflush.h @@ -45,6 +45,12 @@ static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned lon asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); } +static inline void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize) +{ + asm volatile ("tlbie %0; sync" : : "r" (vmaddr) : "memory"); +} + static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end) { start &= PAGE_MASK; @@ -58,6 +64,8 @@ static inline void flush_tlb_kernel_range(unsigned long start, unsigned long end extern void flush_tlb_kernel_range(unsigned long start, unsigned long end); extern void local_flush_tlb_mm(struct mm_struct *mm); extern void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long vmaddr); +extern void local_flush_tlb_page_psize(struct mm_struct *mm, + unsigned long vmaddr, int psize); extern void __local_flush_tlb_page(struct mm_struct *mm, unsigned long vmaddr, int tsize, int ind); -- 2.37.3
[PATCH v9 1/7] powerpc: Allow clearing and restoring registers independent of saved breakpoint state
From: Jordan Niethe For the coming temporary mm used for instruction patching, the breakpoint registers need to be cleared to prevent them from accidentally being triggered. As soon as the patching is done, the breakpoints will be restored. The breakpoint state is stored in the per-cpu variable current_brk[]. Add a suspend_breakpoints() function which will clear the breakpoint registers without touching the state in current_brk[]. Add a pair function restore_breakpoints() which will move the state in current_brk[] back to the registers. Signed-off-by: Jordan Niethe Signed-off-by: Benjamin Gray --- v9: * Renamed set_breakpoint to set_hw_breakpoint * Renamed pause/unpause to suspend/restore * Removed unrelated whitespace change --- arch/powerpc/include/asm/debug.h | 2 ++ arch/powerpc/kernel/process.c| 38 +--- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h index 86a14736c76c..51c744608f37 100644 --- a/arch/powerpc/include/asm/debug.h +++ b/arch/powerpc/include/asm/debug.h @@ -46,6 +46,8 @@ static inline int debugger_fault_handler(struct pt_regs *regs) { return 0; } #endif void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk); +void suspend_breakpoints(void); +void restore_breakpoints(void); bool ppc_breakpoint_available(void); #ifdef CONFIG_PPC_ADV_DEBUG_REGS extern void do_send_trap(struct pt_regs *regs, unsigned long address, diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 67da147fe34d..5d5109ed01a5 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -862,10 +862,8 @@ static inline int set_breakpoint_8xx(struct arch_hw_breakpoint *brk) return 0; } -void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +static void set_hw_breakpoint(int nr, struct arch_hw_breakpoint *brk) { - memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); - if (dawr_enabled()) // Power8 or later set_dawr(nr, brk); @@ -879,6 +877,12 @@ void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) WARN_ON_ONCE(1); } +void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk) +{ + memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk)); + set_hw_breakpoint(nr, brk); +} + /* Check if we have DAWR or DABR hardware */ bool ppc_breakpoint_available(void) { @@ -891,6 +895,34 @@ bool ppc_breakpoint_available(void) } EXPORT_SYMBOL_GPL(ppc_breakpoint_available); +/* Disable the breakpoint in hardware without touching current_brk[] */ +void suspend_breakpoints(void) +{ + struct arch_hw_breakpoint brk = {0}; + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, &brk); +} + +/* + * Re-enable breakpoints suspended by suspend_breakpoints() in hardware + * from current_brk[] + */ +void restore_breakpoints(void) +{ + int i; + + if (!ppc_breakpoint_available()) + return; + + for (i = 0; i < nr_wp_slots(); i++) + set_hw_breakpoint(i, this_cpu_ptr(¤t_brk[i])); +} + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM static inline bool tm_enabled(struct task_struct *tsk) -- 2.37.3
[PATCH v9 0/7] Use per-CPU temporary mappings for patching on Radix MMU
This is a revision of Chris and Jordan's series to introduce a per-cpu temporary mm to be used for patching with strict rwx on radix mmus. v9: * Fixed patch series name to include "on Radix MMU" again * Renamed breakpoint functions * Introduce patch to gracefully return when patching not possible * Make book3s/32/tlbflush.h TLB page flush implementation a warning * Removed temp_mm_state * Consolidate patching context into single struct shared by both paths Previous versions: v8: https://lore.kernel.org/all/20221021052238.580986-1-bg...@linux.ibm.com/ v7: https://lore.kernel.org/all/2020003717.1150965-1-jniet...@gmail.com/ v6: https://lore.kernel.org/all/20210911022904.30962-1-...@bluescreens.de/ v5: https://lore.kernel.org/all/20210713053113.4632-1-...@linux.ibm.com/ v4: https://lore.kernel.org/all/20210429072057.8870-1-...@bluescreens.de/ v3: https://lore.kernel.org/all/20200827052659.24922-1-...@codefail.de/ v2: https://lore.kernel.org/all/20200709040316.12789-1-...@informatik.wtf/ v1: https://lore.kernel.org/all/20200603051912.23296-1-...@informatik.wtf/ RFC: https://lore.kernel.org/all/20200323045205.20314-1-...@informatik.wtf/ x86: https://lore.kernel.org/kernel-hardening/20190426232303.28381-1-nadav.a...@gmail.com/ Benjamin Gray (7): powerpc: Allow clearing and restoring registers independent of saved breakpoint state powerpc/code-patching: Handle RWX patching initialisation error powerpc/code-patching: Use WARN_ON and fix check in poking_init powerpc/code-patching: Verify instruction patch succeeded powerpc/tlb: Add local flush for page given mm_struct and psize powerpc/code-patching: Use temporary mm for Radix MMU powerpc/code-patching: Consolidate and cache per-cpu patching context arch/powerpc/include/asm/book3s/32/tlbflush.h | 9 + .../include/asm/book3s/64/tlbflush-hash.h | 5 + arch/powerpc/include/asm/book3s/64/tlbflush.h | 8 + arch/powerpc/include/asm/debug.h | 2 + arch/powerpc/include/asm/nohash/tlbflush.h| 8 + arch/powerpc/kernel/process.c | 38 ++- arch/powerpc/lib/code-patching.c | 252 +- 7 files changed, 305 insertions(+), 17 deletions(-) base-commit: 9abf2313adc1ca1b6180c508c25f22f9395cc780 -- 2.37.3
[PATCH v9 7/7] powerpc/code-patching: Consolidate and cache per-cpu patching context
With the temp mm context support, there are CPU local variables to hold the patch address and pte. Use these in the non-temp mm path as well instead of adding a level of indirection through the text_poke_area vm_struct and pointer chasing the pte. As both paths use these fields now, there is no need to let unreferenced variables be dropped by the compiler, so it is cleaner to merge them into a single context struct. This has the additional benefit of removing a redundant CPU local pointer, as only one of cpu_patching_mm / text_poke_area is ever used, while remaining well-typed. Signed-off-by: Benjamin Gray --- v9: * Consolidate patching context into single struct --- arch/powerpc/lib/code-patching.c | 58 ++-- 1 file changed, 33 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 3fe99d0086fc..cefb938f7217 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -48,10 +48,16 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) #ifdef CONFIG_STRICT_KERNEL_RWX -static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); -static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm); -static DEFINE_PER_CPU(unsigned long, cpu_patching_addr); -static DEFINE_PER_CPU(pte_t *, cpu_patching_pte); +struct patch_context { + union { + struct vm_struct *text_poke_area; + struct mm_struct *mm; + }; + unsigned long addr; + pte_t * pte; +}; + +static DEFINE_PER_CPU(struct patch_context, cpu_patching_context); static int map_patch_area(void *addr, unsigned long text_poke_addr); static void unmap_patch_area(unsigned long addr); @@ -116,15 +122,19 @@ static int text_area_cpu_up(unsigned int cpu) unmap_patch_area(addr); - this_cpu_write(text_poke_area, area); + this_cpu_write(cpu_patching_context.text_poke_area, area); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, virt_to_kpte(addr)); return 0; } static int text_area_cpu_down(unsigned int cpu) { - free_vm_area(this_cpu_read(text_poke_area)); - this_cpu_write(text_poke_area, NULL); + free_vm_area(this_cpu_read(cpu_patching_context.text_poke_area)); + this_cpu_write(cpu_patching_context.text_poke_area, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } @@ -172,9 +182,9 @@ static int text_area_cpu_up_mm(unsigned int cpu) if (WARN_ON(!ptep)) goto fail_no_pte; - this_cpu_write(cpu_patching_mm, mm); - this_cpu_write(cpu_patching_addr, addr); - this_cpu_write(cpu_patching_pte, ptep); + this_cpu_write(cpu_patching_context.mm, mm); + this_cpu_write(cpu_patching_context.addr, addr); + this_cpu_write(cpu_patching_context.pte, ptep); return 0; @@ -202,8 +212,8 @@ static int text_area_cpu_down_mm(unsigned int cpu) p4d_t *p4dp; pgd_t *pgdp; - mm = this_cpu_read(cpu_patching_mm); - addr = this_cpu_read(cpu_patching_addr); + mm = this_cpu_read(cpu_patching_context.mm); + addr = this_cpu_read(cpu_patching_context.addr); pgdp = pgd_offset(mm, addr); p4dp = p4d_offset(pgdp, addr); @@ -223,9 +233,9 @@ static int text_area_cpu_down_mm(unsigned int cpu) mmput(mm); - this_cpu_write(cpu_patching_mm, NULL); - this_cpu_write(cpu_patching_addr, 0); - this_cpu_write(cpu_patching_pte, NULL); + this_cpu_write(cpu_patching_context.mm, NULL); + this_cpu_write(cpu_patching_context.addr, 0); + this_cpu_write(cpu_patching_context.pte, NULL); return 0; } @@ -316,9 +326,9 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr) struct mm_struct *patching_mm; struct mm_struct *orig_mm; - patching_mm = __this_cpu_read(cpu_patching_mm); - pte = __this_cpu_read(cpu_patching_pte); - text_poke_addr = __this_cpu_read(cpu_patching_addr); + patching_mm = __this_cpu_read(cpu_patching_context.mm); + pte = __this_cpu_read(cpu_patching_context.pte); + text_poke_addr = __this_cpu_read(cpu_patching_context.addr); patch_addr = (u32 *)(text_poke_addr + offset_in_page(addr)); if (unlikely(!patching_mm)) @@ -357,19 +367,17 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t instr) { int err; u32 *patch_addr; - struct vm_struct *area; unsigned long text_poke_addr; pte_t *pte; unsigned long pfn = get_patch_pfn(addr); - area = __this_cpu_read(text_poke_area); - if (unlikely(!area)) - return -ENOMEM; - - text_poke_addr = (unsigned long)area->addr & PAGE_MASK; + text_poke_addr = (unsigned long)__this_cpu_read(cpu_patching_context.addr) & PAGE_MASK; patch_addr
[PATCH v9 6/7] powerpc/code-patching: Use temporary mm for Radix MMU
From: "Christopher M. Riedl" x86 supports the notion of a temporary mm which restricts access to temporary PTEs to a single CPU. A temporary mm is useful for situations where a CPU needs to perform sensitive operations (such as patching a STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing said mappings to other CPUs. Another benefit is that other CPU TLBs do not need to be flushed when the temporary mm is torn down. Mappings in the temporary mm can be set in the userspace portion of the address-space. Interrupts must be disabled while the temporary mm is in use. HW breakpoints, which may have been set by userspace as watchpoints on addresses now within the temporary mm, are saved and disabled when loading the temporary mm. The HW breakpoints are restored when unloading the temporary mm. All HW breakpoints are indiscriminately disabled while the temporary mm is in use - this may include breakpoints set by perf. Use the `poking_init` init hook to prepare a temporary mm and patching address. Initialize the temporary mm by copying the init mm. Choose a randomized patching address inside the temporary mm userspace address space. The patching address is randomized between PAGE_SIZE and DEFAULT_MAP_WINDOW-PAGE_SIZE. Bits of entropy with 64K page size on BOOK3S_64: bits of entropy = log2(DEFAULT_MAP_WINDOW_USER64 / PAGE_SIZE) PAGE_SIZE=64K, DEFAULT_MAP_WINDOW_USER64=128TB bits of entropy = log2(128TB / 64K) bits of entropy = 31 The upper limit is DEFAULT_MAP_WINDOW due to how the Book3s64 Hash MMU operates - by default the space above DEFAULT_MAP_WINDOW is not available. Currently the Hash MMU does not use a temporary mm so technically this upper limit isn't necessary; however, a larger randomization range does not further "harden" this overall approach and future work may introduce patching with a temporary mm on Hash as well. Randomization occurs only once during initialization for each CPU as it comes online. The patching page is mapped with PAGE_KERNEL to set EAA[0] for the PTE which ignores the AMR (so no need to unlock/lock KUAP) according to PowerISA v3.0b Figure 35 on Radix. Based on x86 implementation: commit 4fc19708b165 ("x86/alternatives: Initialize temporary mm for patching") and: commit b3fd8e83ada0 ("x86/alternatives: Use temporary mm for text poking") From: Benjamin Gray Synchronisation is done according to ISA 3.1B Book 3 Chapter 13 "Synchronization Requirements for Context Alterations". Switching the mm is a change to the PID, which requires a CSI before and after the change, and a hwsync between the last instruction that performs address translation for an associated storage access. Instruction fetch is an associated storage access, but the instruction address mappings are not being changed, so it should not matter which context they use. We must still perform a hwsync to guard arbitrary prior code that may have accessed a userspace address. TLB invalidation is local and VA specific. Local because only this core used the patching mm, and VA specific because we only care that the writable mapping is purged. Leaving the other mappings intact is more efficient, especially when performing many code patches in a row (e.g., as ftrace would). Signed-off-by: Christopher M. Riedl Signed-off-by: Benjamin Gray --- v9: * Add back Christopher M. Riedl signed-off-by * Remove temp_mm_state --- arch/powerpc/lib/code-patching.c | 221 ++- 1 file changed, 216 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index b0a12b2d5a9b..3fe99d0086fc 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -4,12 +4,17 @@ */ #include +#include +#include #include #include #include #include #include +#include +#include +#include #include #include #include @@ -42,11 +47,54 @@ int raw_patch_instruction(u32 *addr, ppc_inst_t instr) } #ifdef CONFIG_STRICT_KERNEL_RWX + static DEFINE_PER_CPU(struct vm_struct *, text_poke_area); +static DEFINE_PER_CPU(struct mm_struct *, cpu_patching_mm); +static DEFINE_PER_CPU(unsigned long, cpu_patching_addr); +static DEFINE_PER_CPU(pte_t *, cpu_patching_pte); static int map_patch_area(void *addr, unsigned long text_poke_addr); static void unmap_patch_area(unsigned long addr); +static bool mm_patch_enabled(void) +{ + return IS_ENABLED(CONFIG_SMP) && radix_enabled(); +} + +/* + * The following applies for Radix MMU. Hash MMU has different requirements, + * and so is not supported. + * + * Changing mm requires context synchronising instructions on both sides of + * the context switch, as well as a hwsync between the last instruction for + * which the address of an associated storage access was translated using + * the current context. + * + * switch_mm_irqs_off() performs an isync after the context switch. It is + * the responsibility of the caller