[PATCH v4 1/4] powerpc/mm: Implement set_memory() routines
The set_memory_{ro/rw/nx/x}() functions are required for STRICT_MODULE_RWX, and are generally useful primitives to have. This implementation is designed to be completely generic across powerpc's many MMUs. It's possible that this could be optimised to be faster for specific MMUs, but the focus is on having a generic and safe implementation for now. Signed-off-by: Russell Currey --- arch/powerpc/Kconfig | 1 + arch/powerpc/include/asm/set_memory.h | 32 ++ arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/pageattr.c| 60 +++ 4 files changed, 94 insertions(+) create mode 100644 arch/powerpc/include/asm/set_memory.h create mode 100644 arch/powerpc/mm/pageattr.c diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 3e56c9c2f16e..8f7005f0d097 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -133,6 +133,7 @@ config PPC select ARCH_HAS_PTE_SPECIAL select ARCH_HAS_MEMBARRIER_CALLBACKS select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 + select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE diff --git a/arch/powerpc/include/asm/set_memory.h b/arch/powerpc/include/asm/set_memory.h new file mode 100644 index ..5230ddb2fefd --- /dev/null +++ b/arch/powerpc/include/asm/set_memory.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _ASM_POWERPC_SET_MEMORY_H +#define _ASM_POWERPC_SET_MEMORY_H + +#define SET_MEMORY_RO 1 +#define SET_MEMORY_RW 2 +#define SET_MEMORY_NX 3 +#define SET_MEMORY_X 4 + +int change_memory_attr(unsigned long addr, int numpages, int action); + +static inline int set_memory_ro(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RO); +} + +static inline int set_memory_rw(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_RW); +} + +static inline int set_memory_nx(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_NX); +} + +static inline int set_memory_x(unsigned long addr, int numpages) +{ + return change_memory_attr(addr, numpages, SET_MEMORY_X); +} + +#endif diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile index 5e147986400d..d0a0bcbc9289 100644 --- a/arch/powerpc/mm/Makefile +++ b/arch/powerpc/mm/Makefile @@ -20,3 +20,4 @@ obj-$(CONFIG_HIGHMEM) += highmem.o obj-$(CONFIG_PPC_COPRO_BASE) += copro_fault.o obj-$(CONFIG_PPC_PTDUMP) += ptdump/ obj-$(CONFIG_KASAN)+= kasan/ +obj-$(CONFIG_ARCH_HAS_SET_MEMORY) += pageattr.o diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c new file mode 100644 index ..fe3ecbfb8e10 --- /dev/null +++ b/arch/powerpc/mm/pageattr.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 + +/* + * MMU-generic set_memory implementation for powerpc + * + * Author: Russell Currey + * + * Copyright 2019, IBM Corporation. + */ + +#include +#include + +#include +#include +#include + +static int change_page_attr(pte_t *ptep, unsigned long addr, void *data) +{ + int action = *((int *)data); + pte_t pte_val; + + // invalidate the PTE so it's safe to modify + pte_val = ptep_get_and_clear(&init_mm, addr, ptep); + flush_tlb_kernel_range(addr, addr + PAGE_SIZE); + + // modify the PTE bits as desired, then apply + switch (action) { + case SET_MEMORY_RO: + pte_val = pte_wrprotect(pte_val); + break; + case SET_MEMORY_RW: + pte_val = pte_mkwrite(pte_val); + break; + case SET_MEMORY_NX: + pte_val = pte_exprotect(pte_val); + break; + case SET_MEMORY_X: + pte_val = pte_mkexec(pte_val); + break; + default: + WARN_ON(true); + return -EINVAL; + } + + set_pte_at(&init_mm, addr, ptep, pte_val); + + return 0; +} + +int change_memory_attr(unsigned long addr, int numpages, int action) +{ + unsigned long start = ALIGN_DOWN(addr, PAGE_SIZE); + unsigned long size = numpages * PAGE_SIZE; + + if (!numpages) + return 0; + + return apply_to_page_range(&init_mm, start, size, change_page_attr, &action); +} -- 2.23.0
[PATCH v4 0/4] Implement STRICT_MODULE_RWX for powerpc
v3 cover letter here: https://lists.ozlabs.org/pipermail/linuxppc-dev/2019-October/198023.html Only minimal changes since then: - patch 2/4 commit message update thanks to Andrew Donnellan - patch 3/4 made neater thanks to Christophe Leroy - patch 3/4 updated Kconfig description thanks to Daniel Axtens Russell Currey (4): powerpc/mm: Implement set_memory() routines powerpc/kprobes: Mark newly allocated probes as RO powerpc/mm/ptdump: debugfs handler for W+X checks at runtime powerpc: Enable STRICT_MODULE_RWX arch/powerpc/Kconfig | 2 + arch/powerpc/Kconfig.debug | 6 ++- arch/powerpc/configs/skiroot_defconfig | 1 + arch/powerpc/include/asm/set_memory.h | 32 ++ arch/powerpc/kernel/kprobes.c | 3 ++ arch/powerpc/mm/Makefile | 1 + arch/powerpc/mm/pageattr.c | 60 ++ arch/powerpc/mm/ptdump/ptdump.c| 21 - 8 files changed, 123 insertions(+), 3 deletions(-) create mode 100644 arch/powerpc/include/asm/set_memory.h create mode 100644 arch/powerpc/mm/pageattr.c -- 2.23.0
[PATCH v4 3/4] powerpc/mm/ptdump: debugfs handler for W+X checks at runtime
Very rudimentary, just echo 1 > [debugfs]/check_wx_pages and check the kernel log. Useful for testing strict module RWX. Updated the Kconfig entry to reflect this. Also fixed a typo. Signed-off-by: Russell Currey --- arch/powerpc/Kconfig.debug | 6 -- arch/powerpc/mm/ptdump/ptdump.c | 21 - 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/Kconfig.debug b/arch/powerpc/Kconfig.debug index c59920920ddc..dcfe83d4c211 100644 --- a/arch/powerpc/Kconfig.debug +++ b/arch/powerpc/Kconfig.debug @@ -370,7 +370,7 @@ config PPC_PTDUMP If you are unsure, say N. config PPC_DEBUG_WX - bool "Warn on W+X mappings at boot" + bool "Warn on W+X mappings at boot & enable manual checks at runtime" depends on PPC_PTDUMP help Generate a warning if any W+X mappings are found at boot. @@ -384,7 +384,9 @@ config PPC_DEBUG_WX of other unfixed kernel bugs easier. There is no runtime or memory usage effect of this option - once the kernel has booted up - it's a one time check. + once the kernel has booted up, it only automatically checks once. + + Enables the "check_wx_pages" debugfs entry for checking at runtime. If in doubt, say "Y". diff --git a/arch/powerpc/mm/ptdump/ptdump.c b/arch/powerpc/mm/ptdump/ptdump.c index 2f9ddc29c535..b6cba29ae4a0 100644 --- a/arch/powerpc/mm/ptdump/ptdump.c +++ b/arch/powerpc/mm/ptdump/ptdump.c @@ -4,7 +4,7 @@ * * This traverses the kernel pagetables and dumps the * information about the used sections of memory to - * /sys/kernel/debug/kernel_pagetables. + * /sys/kernel/debug/kernel_page_tables. * * Derived from the arm64 implementation: * Copyright (c) 2014, The Linux Foundation, Laura Abbott. @@ -409,6 +409,25 @@ void ptdump_check_wx(void) else pr_info("Checked W+X mappings: passed, no W+X pages found\n"); } + +static int check_wx_debugfs_set(void *data, u64 val) +{ + if (val != 1ULL) + return -EINVAL; + + ptdump_check_wx(); + + return 0; +} + +DEFINE_SIMPLE_ATTRIBUTE(check_wx_fops, NULL, check_wx_debugfs_set, "%llu\n"); + +static int ptdump_check_wx_init(void) +{ + return debugfs_create_file("check_wx_pages", 0200, NULL, + NULL, &check_wx_fops) ? 0 : -ENOMEM; +} +device_initcall(ptdump_check_wx_init); #endif static int ptdump_init(void) -- 2.23.0
[PATCH v4 4/4] powerpc: Enable STRICT_MODULE_RWX
Whether STRICT_MODULE_RWX is enabled by default depends on powerpc platform - in arch/Kconfig, STRICT_MODULE_RWX depends on ARCH_OPTIONAL_KERNEL_RWX, which in arch/powerpc/Kconfig is selected if ARCH_HAS_STRICT_KERNEL_RWX is selected, which is only true with CONFIG_RELOCATABLE *disabled*. defconfigs like skiroot_defconfig which turn STRICT_KERNEL_RWX on when it is not already on by default also do NOT enable STRICT_MODULE_RWX automatically, so it is explicitly enabled there in this patch. Thus, on by default for ppc32 only. Module RWX doesn't provide a whole lot of value with Kernel RWX off, but it doesn't hurt, either. The next step is to make STRICT_KERNEL_RWX compatible with RELOCATABLE so it can be on by default. Tested-by: Daniel Axtens # e6500 Signed-off-by: Russell Currey --- arch/powerpc/Kconfig | 1 + arch/powerpc/configs/skiroot_defconfig | 1 + 2 files changed, 2 insertions(+) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 8f7005f0d097..212c4d02be40 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -135,6 +135,7 @@ config PPC select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE && PPC_BOOK3S_64 select ARCH_HAS_SET_MEMORY select ARCH_HAS_STRICT_KERNEL_RWX if ((PPC_BOOK3S_64 || PPC32) && !RELOCATABLE && !HIBERNATION) + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAS_UACCESS_FLUSHCACHE select ARCH_HAS_UACCESS_MCSAFE if PPC64 diff --git a/arch/powerpc/configs/skiroot_defconfig b/arch/powerpc/configs/skiroot_defconfig index 1253482a67c0..719d899081b3 100644 --- a/arch/powerpc/configs/skiroot_defconfig +++ b/arch/powerpc/configs/skiroot_defconfig @@ -31,6 +31,7 @@ CONFIG_PERF_EVENTS=y CONFIG_SLAB_FREELIST_HARDENED=y CONFIG_JUMP_LABEL=y CONFIG_STRICT_KERNEL_RWX=y +CONFIG_STRICT_MODULE_RWX=y CONFIG_MODULES=y CONFIG_MODULE_UNLOAD=y CONFIG_MODULE_SIG=y -- 2.23.0
[PATCH v4 2/4] powerpc/kprobes: Mark newly allocated probes as RO
With CONFIG_STRICT_KERNEL_RWX=y and CONFIG_KPROBES=y, there will be one W+X page at boot by default. This can be tested with CONFIG_PPC_PTDUMP=y and CONFIG_PPC_DEBUG_WX=y set, and checking the kernel log during boot. powerpc doesn't implement its own alloc() for kprobes like other architectures do, but we couldn't immediately mark RO anyway since we do a memcpy to the page we allocate later. After that, nothing should be allowed to modify the page, and write permissions are removed well before the kprobe is armed. Thus mark newly allocated probes as read-only once it's safe to do so. Signed-off-by: Russell Currey --- arch/powerpc/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2d27ec4feee4..2610496de7c7 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -24,6 +24,7 @@ #include #include #include +#include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -131,6 +132,8 @@ int arch_prepare_kprobe(struct kprobe *p) (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); } + set_memory_ro((unsigned long)p->ainsn.insn, 1); + p->ainsn.boostable = 0; return ret; } -- 2.23.0
Re: [PATCH 2/2] virtio_ring: Use DMA API if memory is encrypted
On Fri, Oct 11, 2019 at 06:25:19PM -0700, Ram Pai wrote: > From: Thiago Jung Bauermann > > Normally, virtio enables DMA API with VIRTIO_F_IOMMU_PLATFORM, which must > be set by both device and guest driver. However, as a hack, when DMA API > returns physical addresses, guest driver can use the DMA API; even though > device does not set VIRTIO_F_IOMMU_PLATFORM and just uses physical > addresses. > > Doing this works-around POWER secure guests for which only the bounce > buffer is accessible to the device, but which don't set > VIRTIO_F_IOMMU_PLATFORM due to a set of hypervisor and architectural bugs. > To guard against platform changes, breaking any of these assumptions down > the road, we check at probe time and fail if that's not the case. > > cc: Benjamin Herrenschmidt > cc: David Gibson > cc: Michael Ellerman > cc: Paul Mackerras > cc: Michael Roth > cc: Alexey Kardashevskiy > cc: Jason Wang > cc: Christoph Hellwig > Suggested-by: Michael S. Tsirkin > Signed-off-by: Ram Pai > Signed-off-by: Thiago Jung Bauermann Reviewed-by: David Gibson I don't know that this is the most elegant solution possible. But it's simple, gets the job done and pretty unlikely to cause mysterious breakage down the road. -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson signature.asc Description: PGP signature
Re: [PATCH 1/2] dma-mapping: Add dma_addr_is_phys_addr()
On Fri, Oct 11, 2019 at 06:25:18PM -0700, Ram Pai wrote: > From: Thiago Jung Bauermann > > In order to safely use the DMA API, virtio needs to know whether DMA > addresses are in fact physical addresses and for that purpose, > dma_addr_is_phys_addr() is introduced. > > cc: Benjamin Herrenschmidt > cc: David Gibson > cc: Michael Ellerman > cc: Paul Mackerras > cc: Michael Roth > cc: Alexey Kardashevskiy > cc: Paul Burton > cc: Robin Murphy > cc: Bartlomiej Zolnierkiewicz > cc: Marek Szyprowski > cc: Christoph Hellwig > Suggested-by: Michael S. Tsirkin > Signed-off-by: Ram Pai > Signed-off-by: Thiago Jung Bauermann The change itself looks ok, so Reviewed-by: David Gibson However, I would like to see the commit message (and maybe the inline comments) expanded a bit on what the distinction here is about. Some of the text from the next patch would be suitable, about DMA addresses usually being in a different address space but not in the case of bounce buffering. > --- > arch/powerpc/include/asm/dma-mapping.h | 21 + > arch/powerpc/platforms/pseries/Kconfig | 1 + > include/linux/dma-mapping.h| 20 > kernel/dma/Kconfig | 3 +++ > 4 files changed, 45 insertions(+) > > diff --git a/arch/powerpc/include/asm/dma-mapping.h > b/arch/powerpc/include/asm/dma-mapping.h > index 565d6f7..f92c0a4b 100644 > --- a/arch/powerpc/include/asm/dma-mapping.h > +++ b/arch/powerpc/include/asm/dma-mapping.h > @@ -5,6 +5,8 @@ > #ifndef _ASM_DMA_MAPPING_H > #define _ASM_DMA_MAPPING_H > > +#include > + > static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type > *bus) > { > /* We don't handle the NULL dev case for ISA for now. We could > @@ -15,4 +17,23 @@ static inline const struct dma_map_ops > *get_arch_dma_ops(struct bus_type *bus) > return NULL; > } > > +#ifdef CONFIG_ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR > +/** > + * dma_addr_is_phys_addr - check whether a device DMA address is a physical > + * address > + * @dev: device to check > + * > + * Returns %true if any DMA address for this device happens to also be a > valid > + * physical address (not necessarily of the same page). > + */ > +static inline bool dma_addr_is_phys_addr(struct device *dev) > +{ > + /* > + * Secure guests always use the SWIOTLB, therefore DMA addresses are > + * actually the physical address of the bounce buffer. > + */ > + return is_secure_guest(); > +} > +#endif > + > #endif /* _ASM_DMA_MAPPING_H */ > diff --git a/arch/powerpc/platforms/pseries/Kconfig > b/arch/powerpc/platforms/pseries/Kconfig > index 9e35cdd..0108150 100644 > --- a/arch/powerpc/platforms/pseries/Kconfig > +++ b/arch/powerpc/platforms/pseries/Kconfig > @@ -152,6 +152,7 @@ config PPC_SVM > select SWIOTLB > select ARCH_HAS_MEM_ENCRYPT > select ARCH_HAS_FORCE_DMA_UNENCRYPTED > + select ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR > help >There are certain POWER platforms which support secure guests using >the Protected Execution Facility, with the help of an Ultravisor > diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h > index f7d1eea..6df5664 100644 > --- a/include/linux/dma-mapping.h > +++ b/include/linux/dma-mapping.h > @@ -693,6 +693,26 @@ static inline bool dma_addressing_limited(struct device > *dev) > dma_get_required_mask(dev); > } > > +#ifndef CONFIG_ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR > +/** > + * dma_addr_is_phys_addr - check whether a device DMA address is a physical > + * address > + * @dev: device to check > + * > + * Returns %true if any DMA address for this device happens to also be a > valid > + * physical address (not necessarily of the same page). > + */ > +static inline bool dma_addr_is_phys_addr(struct device *dev) > +{ > + /* > + * Except in very specific setups, DMA addresses exist in a different > + * address space from CPU physical addresses and cannot be directly used > + * to reference system memory. > + */ > + return false; > +} > +#endif > + > #ifdef CONFIG_ARCH_HAS_SETUP_DMA_OPS > void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, > const struct iommu_ops *iommu, bool coherent); > diff --git a/kernel/dma/Kconfig b/kernel/dma/Kconfig > index 9decbba..6209b46 100644 > --- a/kernel/dma/Kconfig > +++ b/kernel/dma/Kconfig > @@ -51,6 +51,9 @@ config ARCH_HAS_DMA_MMAP_PGPROT > config ARCH_HAS_FORCE_DMA_UNENCRYPTED > bool > > +config ARCH_HAS_DMA_ADDR_IS_PHYS_ADDR > + bool > + > config DMA_NONCOHERENT_CACHE_SYNC > bool > -- David Gibson| I'll have my music baroque, and my code david AT gibson.dropbear.id.au | minimalist, thank you. NOT _the_ _other_ | _way_ _around_! http://www.ozlabs.org/~dgibson signature.asc Description: PGP signature
Re: [PATCH v4 0/5] Powerpc/Watchpoint: Few important fixes
On 10/12/19 2:21 PM, Christophe Leroy wrote: Le 10/10/2019 à 06:44, Ravi Bangoria a écrit : @Christophe, Is patch5 works for you on 8xx? Getting the following : root@vgoip:~# ./ptrace-hwbreak test: ptrace-hwbreak tags: git_version:v5.4-rc2-710-gf0082e173fe4-dirty PTRACE_SET_DEBUGREG, WO, len: 1: Ok PTRACE_SET_DEBUGREG, WO, len: 2: Ok PTRACE_SET_DEBUGREG, WO, len: 4: Ok PTRACE_SET_DEBUGREG, WO, len: 8: Ok PTRACE_SET_DEBUGREG, RO, len: 1: Ok PTRACE_SET_DEBUGREG, RO, len: 2: Ok PTRACE_SET_DEBUGREG, RO, len: 4: Ok PTRACE_SET_DEBUGREG, RO, len: 8: Ok PTRACE_SET_DEBUGREG, RW, len: 1: Ok PTRACE_SET_DEBUGREG, RW, len: 2: Ok PTRACE_SET_DEBUGREG, RW, len: 4: Ok PTRACE_SET_DEBUGREG, RW, len: 8: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO, len: 6: Fail failure: ptrace-hwbreak I also tried on a 83xx (book3s/32). This one has a regular DABR : root@vgoippro:~# ./ptrace-hwbreak test: ptrace-hwbreak tags: git_version:v5.4-rc2-710-gf0082e173fe4-dirty PTRACE_SET_DEBUGREG, WO, len: 1: Ok PTRACE_SET_DEBUGREG, WO, len: 2: Ok PTRACE_SET_DEBUGREG, WO, len: 4: Ok PTRACE_SET_DEBUGREG, WO, len: 8: Ok PTRACE_SET_DEBUGREG, RO, len: 1: Ok PTRACE_SET_DEBUGREG, RO, len: 2: Ok PTRACE_SET_DEBUGREG, RO, len: 4: Ok PTRACE_SET_DEBUGREG, RO, len: 8: Ok PTRACE_SET_DEBUGREG, RW, len: 1: Ok PTRACE_SET_DEBUGREG, RW, len: 2: Ok PTRACE_SET_DEBUGREG, RW, len: 4: Ok PTRACE_SET_DEBUGREG, RW, len: 8: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RW, len: 6: Ok PPC_PTRACE_SETHWDEBUG failed: Invalid argument Ok. If it has the DABR _only_, creation of MODE RANGE UNALINGED breakpoint will failed because DABR supports only 8 bytes wide breakpoint and it must be doubleword aligned. So this means I've to add all unaligned tests inside if (dawr) condition. Ravi
Re: [PATCH v4 0/5] Powerpc/Watchpoint: Few important fixes
On 10/12/19 1:01 PM, Christophe Leroy wrote: Le 10/10/2019 à 08:25, Ravi Bangoria a écrit : On 10/10/19 10:14 AM, Ravi Bangoria wrote: @Christophe, Is patch5 works for you on 8xx? Getting the following : root@vgoip:~# ./ptrace-hwbreak test: ptrace-hwbreak tags: git_version:v5.4-rc2-710-gf0082e173fe4-dirty PTRACE_SET_DEBUGREG, WO, len: 1: Ok PTRACE_SET_DEBUGREG, WO, len: 2: Ok PTRACE_SET_DEBUGREG, WO, len: 4: Ok PTRACE_SET_DEBUGREG, WO, len: 8: Ok PTRACE_SET_DEBUGREG, RO, len: 1: Ok PTRACE_SET_DEBUGREG, RO, len: 2: Ok PTRACE_SET_DEBUGREG, RO, len: 4: Ok PTRACE_SET_DEBUGREG, RO, len: 8: Ok PTRACE_SET_DEBUGREG, RW, len: 1: Ok PTRACE_SET_DEBUGREG, RW, len: 2: Ok PTRACE_SET_DEBUGREG, RW, len: 4: Ok PTRACE_SET_DEBUGREG, RW, len: 8: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, WO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RO, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_EXACT, RW, len: 1: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, WO, len: 6: Ok PPC_PTRACE_SETHWDEBUG, MODE_RANGE, DW ALIGNED, RO, len: 6: Fail failure: ptrace-hwbreak Thanks Christophe. I don't have any 8xx box. I checked qemu and it seems qemu emulation for 8xx is not yet supported. So I can't debug this. Can you please check why it's failing? PPC_PTRACE_SETHWDEBUG internally uses DAWR register and probably 8xx does not emulate DAWR logic, it only uses DABR to emulate double-word watchpoint. In that case, all testcases that uses PPC_PTRACE_SETHWDEBUG should be disabled for 8xx. I'll change [PATCH 5] accordingly and resend. I think the MODE_EXACT ones are OK with the 8xx at the time being. Ok. I'll disable other tests for 8xx. Also, I was bit wrong in above point. Actually, PPC_PTRACE_SETHWDEBUG with RANGE breakpoint also support DABR but the length will be 8 only. So I've to change my patch 1 also a bit (ptrace stuff). I'll resend the series with these changes. Also, do you think I should fix hw_breakpoint_validate_len() from [PARCH 1] for 8xx? I re-checked you recent patch* to allow any address range size for 8xx. With that patch, hw_breakpoint_validate_len() won't get called at all for 8xx. At the time being, the 8xx emulates DABR so it has the same limitations as BOOK3S. My patch needs to be rebased on top of your series and I think it needs some modifications, as it seems it doesn't properly handle size 1 and size 2 breakpoints at least. So I think that you should leave your Patch1 as is, and I'll modify the validate_len() logic while rebasing my patch. Sure. Thanks for helping! Ravi
[PATCH v4 3/3] powerpc/prom_init: Use -ffreestanding to avoid a reference to bcmp
r374662 gives LLVM the ability to convert certain loops into a reference to bcmp as an optimization; this breaks prom_init_check.sh: CALLarch/powerpc/kernel/prom_init_check.sh Error: External symbol 'bcmp' referenced from prom_init.c make[2]: *** [arch/powerpc/kernel/Makefile:196: prom_init_check] Error 1 bcmp is defined in lib/string.c as a wrapper for memcmp so this could be added to the whitelist. However, commit 450e7dd4001f ("powerpc/prom_init: don't use string functions from lib/") copied memcmp as prom_memcmp to avoid KASAN instrumentation so having bcmp be resolved to regular memcmp would break that assumption. Furthermore, because the compiler is the one that inserted bcmp, we cannot provide something like prom_bcmp. To prevent LLVM from being clever with optimizations like this, use -ffreestanding to tell LLVM we are not hosted so it is not free to make transformations like this. Link: https://github.com/ClangBuiltLinux/linux/issues/647 Link: https://github.com/llvm/llvm-project/commit/76cdcf25b883751d83402baea6316772aa73865c Reviewed-by: Nick Desaulneris Signed-off-by: Nathan Chancellor --- v1 -> v3: * New patch in the series v3 -> v4: * Rebase on v5.4-rc3. * Add Nick's reviewed-by tag. * Update the LLVM commit reference to the latest applied version (r374662) as it was originally committed as r370454, reverted in r370788, and reapplied as r374662. arch/powerpc/kernel/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index f1f362146135..7f0ee465dfb6 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -21,7 +21,7 @@ CFLAGS_prom_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN) -CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) +CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector) -ffreestanding ifdef CONFIG_FUNCTION_TRACER # Do not trace early boot code -- 2.23.0
[PATCH v4 2/3] powerpc: Avoid clang warnings around setjmp and longjmp
Commit aea447141c7e ("powerpc: Disable -Wbuiltin-requires-header when setjmp is used") disabled -Wbuiltin-requires-header because of a warning about the setjmp and longjmp declarations. r367387 in clang added another diagnostic around this, complaining that there is no jmp_buf declaration. In file included from ../arch/powerpc/xmon/xmon.c:47: ../arch/powerpc/include/asm/setjmp.h:10:13: error: declaration of built-in function 'setjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header . [-Werror,-Wincomplete-setjmp-declaration] extern long setjmp(long *); ^ ../arch/powerpc/include/asm/setjmp.h:11:13: error: declaration of built-in function 'longjmp' requires the declaration of the 'jmp_buf' type, commonly provided in the header . [-Werror,-Wincomplete-setjmp-declaration] extern void longjmp(long *, long); ^ 2 errors generated. We are not using the standard library's longjmp/setjmp implementations for obvious reasons; make this clear to clang by using -ffreestanding on these files. Cc: sta...@vger.kernel.org # 4.14+ Link: https://github.com/ClangBuiltLinux/linux/issues/625 Link: https://github.com/llvm/llvm-project/commit/3be25e79477db2d31ac46493d97eca8c20592b07 Link: https://godbolt.org/z/B2oQnl Suggested-by: Segher Boessenkool Reviewed-by: Nick Desaulniers Signed-off-by: Nathan Chancellor --- v1 -> v3 (I skipped v2 because the first patch in the series already had a v2): * Use -ffreestanding instead of outright disabling the warning because it is legitimate. v3 -> v4: * Rebase on v5.4-rc3 * Add Nick's reviewed-by and Compiler Explorer link. arch/powerpc/kernel/Makefile | 4 ++-- arch/powerpc/xmon/Makefile | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index a7ca8fe62368..f1f362146135 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -5,8 +5,8 @@ CFLAGS_ptrace.o+= -DUTS_MACHINE='"$(UTS_MACHINE)"' -# Disable clang warning for using setjmp without setjmp.h header -CFLAGS_crash.o += $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +CFLAGS_crash.o += -ffreestanding ifdef CONFIG_PPC64 CFLAGS_prom_init.o += $(NO_MINIMAL_TOC) diff --git a/arch/powerpc/xmon/Makefile b/arch/powerpc/xmon/Makefile index f142570ad860..c3842dbeb1b7 100644 --- a/arch/powerpc/xmon/Makefile +++ b/arch/powerpc/xmon/Makefile @@ -1,8 +1,8 @@ # SPDX-License-Identifier: GPL-2.0 # Makefile for xmon -# Disable clang warning for using setjmp without setjmp.h header -subdir-ccflags-y := $(call cc-disable-warning, builtin-requires-header) +# Avoid clang warnings around longjmp/setjmp declarations +subdir-ccflags-y := -ffreestanding GCOV_PROFILE := n KCOV_INSTRUMENT := n -- 2.23.0
[PATCH v4 0/3] LLVM/Clang fixes for pseries_defconfig
Hi all, This series includes a set of fixes for LLVM/Clang when building pseries_defconfig. These have been floating around as standalone patches so I decided to gather them up as a series so it was easier to review/apply them. This has been broken for a bit now, it would be nice to get these reviewed and applied. Please let me know if I need to do anything to move these along. Previous versions: https://lore.kernel.org/lkml/20190911182049.77853-1-natechancel...@gmail.com/ Cheers, Nathan
[PATCH v4 1/3] powerpc: Don't add -mabi= flags when building with Clang
When building pseries_defconfig, building vdso32 errors out: error: unknown target ABI 'elfv1' This happens because -m32 in clang changes the target to 32-bit, which does not allow the ABI to be changed, as the setABI virtual function is not overridden: https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/clang/include/clang/Basic/TargetInfo.h#L1073-L1078 https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/clang/lib/Basic/Targets/PPC.h#L327-L365 Commit 4dc831aa8813 ("powerpc: Fix compiling a BE kernel with a powerpc64le toolchain") added these flags to fix building big endian kernels with a little endian GCC. Clang doesn't need -mabi because the target triple controls the default value. -mlittle-endian and -mbig-endian manipulate the triple into either powerpc64-* or powerpc64le-*, which properly sets the default ABI: https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/clang/lib/Driver/Driver.cpp#L450-L463 https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/llvm/lib/Support/Triple.cpp#L1432-L1516 https://github.com/llvm/llvm-project/blob/llvmorg-9.0.0/clang/lib/Basic/Targets/PPC.h#L377-L383 Adding a debug print out in the PPC64TargetInfo constructor after line 383 above shows this: $ echo | ./clang -E --target=powerpc64-linux -mbig-endian -o /dev/null - Default ABI: elfv1 $ echo | ./clang -E --target=powerpc64-linux -mlittle-endian -o /dev/null - Default ABI: elfv2 $ echo | ./clang -E --target=powerpc64le-linux -mbig-endian -o /dev/null - Default ABI: elfv1 $ echo | ./clang -E --target=powerpc64le-linux -mlittle-endian -o /dev/null - Default ABI: elfv2 Don't specify -mabi when building with clang to avoid the build error with -m32 and not change any code generation. -mcall-aixdesc is not an implemented flag in clang so it can be safely excluded as well, see commit 238abecde8ad ("powerpc: Don't use gcc specific options on clang"). pseries_defconfig successfully builds after this patch and powernv_defconfig and ppc44x_defconfig don't regress. Link: https://github.com/ClangBuiltLinux/linux/issues/240 Reviewed-by: Daniel Axtens Signed-off-by: Nathan Chancellor --- v1 -> v2: * Improve commit message v2 -> v3: * Rebase and merge into a single series. v3 -> v4: * Rebase on v5.4-rc3. * Update links to point to llvmorg-9.0.0 instead of llvmorg-9.0.0-rc2. arch/powerpc/Makefile | 4 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 83522c9fc7b6..37ac731a556b 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -91,11 +91,13 @@ MULTIPLEWORD:= -mmultiple endif ifdef CONFIG_PPC64 +ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_BIG_ENDIAN)+= $(call cc-option,-mabi=elfv1) cflags-$(CONFIG_CPU_BIG_ENDIAN)+= $(call cc-option,-mcall-aixdesc) aflags-$(CONFIG_CPU_BIG_ENDIAN)+= $(call cc-option,-mabi=elfv1) aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 endif +endif ifndef CONFIG_CC_IS_CLANG cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align @@ -141,6 +143,7 @@ endif endif CFLAGS-$(CONFIG_PPC64) := $(call cc-option,-mtraceback=no) +ifndef CONFIG_CC_IS_CLANG ifdef CONFIG_CPU_LITTLE_ENDIAN CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) @@ -149,6 +152,7 @@ CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif +endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) -- 2.23.0
Re: [PATCH v3 3/4] powerpc/mm/ptdump: debugfs handler for W+X checks at runtime
On Tue, 2019-10-08 at 08:21 +0200, Christophe Leroy wrote: > > Le 04/10/2019 à 09:50, Russell Currey a écrit : > > Very rudimentary, just > > > > echo 1 > [debugfs]/check_wx_pages > > > > and check the kernel log. Useful for testing strict module RWX. > > > > Also fixed a typo. > > > > Signed-off-by: Russell Currey > > --- > > arch/powerpc/mm/ptdump/ptdump.c | 31 +--- > > --- > > 1 file changed, 25 insertions(+), 6 deletions(-) > > > > diff --git a/arch/powerpc/mm/ptdump/ptdump.c > > b/arch/powerpc/mm/ptdump/ptdump.c > > index 2f9ddc29c535..0547cd9f264e 100644 > > --- a/arch/powerpc/mm/ptdump/ptdump.c > > +++ b/arch/powerpc/mm/ptdump/ptdump.c > > @@ -4,7 +4,7 @@ > >* > >* This traverses the kernel pagetables and dumps the > >* information about the used sections of memory to > > - * /sys/kernel/debug/kernel_pagetables. > > + * /sys/kernel/debug/kernel_page_tables. > >* > >* Derived from the arm64 implementation: > >* Copyright (c) 2014, The Linux Foundation, Laura Abbott. > > @@ -409,16 +409,35 @@ void ptdump_check_wx(void) > > else > > pr_info("Checked W+X mappings: passed, no W+X pages > > found\n"); > > } > > + > > +static int check_wx_debugfs_set(void *data, u64 val) > > +{ > > + if (val != 1ULL) > > + return -EINVAL; > > + > > + ptdump_check_wx(); > > + > > + return 0; > > +} > > + > > +DEFINE_SIMPLE_ATTRIBUTE(check_wx_fops, NULL, check_wx_debugfs_set, > > "%llu\n"); > > #endif > > > > static int ptdump_init(void) > > { > > - struct dentry *debugfs_file; > > - > > populate_markers(); > > build_pgtable_complete_mask(); > > - debugfs_file = debugfs_create_file("kernel_page_tables", 0400, > > NULL, > > - NULL, &ptdump_fops); > > - return debugfs_file ? 0 : -ENOMEM; > > + > > + if (!debugfs_create_file("kernel_page_tables", 0400, NULL, > > +NULL, &ptdump_fops)) > > + return -ENOMEM; > > + > > +#ifdef CONFIG_PPC_DEBUG_WX > > + if (!debugfs_create_file("check_wx_pages", 0200, NULL, > > +NULL, &check_wx_fops)) > > + return -ENOMEM; > > +#endif > > The above seems to be completely independant from everything else in > ptdump_init(). > > Could we avoid this #ifdef block inside ptdump_init() by creating a > selfstanding device_initcall() for that through a function called > ptdump_check_wx_init() defined inside the same #ifdef block as > ptdump_check_wx() ? Yes that would be nicer, thanks > > Christophe > > > + > > + return 0; > > } > > device_initcall(ptdump_init); > >
[PATCH] selftests/powerpc: Don't list r1 in clobbers for TM tests
Some of our TM (Transactional Memory) tests, list "r1" (the stack pointer) as a clobbered register. GCC >= 9 doesn't accept this, and the build breaks: ptrace-tm-spd-tar.c: In function 'tm_spd_tar': ptrace-tm-spd-tar.c:31:2: error: listing the stack pointer register 'r1' in a clobber list is deprecated [-Werror=deprecated] 31 | asm __volatile__( | ^~~ ptrace-tm-spd-tar.c:31:2: note: the value of the stack pointer after an 'asm' statement must be the same as it was before the statement We do have some fairly large inline asm blocks in these tests, and some of them do change the value of r1. However they should all return to C with the value in r1 restored, so I think it's legitimate to say r1 is not clobbered. We should probably rewrite some of these in real assembler one day. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c | 2 +- tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c index 25e23e73c72e..7b835ef4f8a6 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-tar.c @@ -73,7 +73,7 @@ void tm_spd_tar(void) [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2), [tar_3]"i"(TAR_3), [dscr_3]"i"(DSCR_3) - : "memory", "r0", "r1", "r3", "r4", "r5", "r6" + : "memory", "r0", "r3", "r4", "r5", "r6" ); /* TM failed, analyse */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c index f603fe5a445b..f497c2cbbdc3 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-spd-vsx.c @@ -74,7 +74,7 @@ void tm_spd_vsx(void) "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [sprn_texasr] "i" (SPRN_TEXASR) - : "memory", "r0", "r1", "r3", "r4", + : "memory", "r0", "r3", "r4", "r7", "r8", "r9", "r10", "r11" ); diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c index e0d37f07bdeb..46ef378a15ec 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-tar.c @@ -62,7 +62,7 @@ void tm_tar(void) [sprn_ppr]"i"(SPRN_PPR), [sprn_texasr]"i"(SPRN_TEXASR), [tar_1]"i"(TAR_1), [dscr_1]"i"(DSCR_1), [tar_2]"i"(TAR_2), [dscr_2]"i"(DSCR_2), [cptr1] "b" (&cptr[1]) - : "memory", "r0", "r1", "r3", "r4", "r5", "r6" + : "memory", "r0", "r3", "r4", "r5", "r6" ); /* TM failed, analyse */ diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c index 8027457b97b7..a72fcea16876 100644 --- a/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c +++ b/tools/testing/selftests/powerpc/ptrace/ptrace-tm-vsx.c @@ -62,7 +62,7 @@ void tm_vsx(void) "3: ;" : [res] "=r" (result), [texasr] "=r" (texasr) : [sprn_texasr] "i" (SPRN_TEXASR), [cptr1] "b" (&cptr[1]) - : "memory", "r0", "r1", "r3", "r4", + : "memory", "r0", "r3", "r4", "r7", "r8", "r9", "r10", "r11" ); -- 2.21.0
Re: [PATCH v3 2/4] powerpc/kprobes: Mark newly allocated probes as RO
On 4/10/19 5:50 pm, Russell Currey wrote: With CONFIG_STRICT_KERNEL_RWX=y and CONFIG_KPROBES=y, there will be one W+X page at boot by default. This can be tested with CONFIG_PPC_PTDUMP=y and CONFIG_PPC_DEBUG_WX=y set, and checking the kernel log during boot. powerpc doesn't implement its own alloc() for kprobes like other architectures do, but we couldn't immediately mark RO anyway since we do a memcpy to the page we allocate later. After that, nothing should be allowed to modify the page, and write permissions are removed well before the kprobe is armed. Signed-off-by: Russell Currey Commit message nit: if there's an important detail in the summary line, repeat that in the body of the commit message, those two paragraphs don't tell you what the commit actually _does_, that's in the summary line --- arch/powerpc/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c index 2d27ec4feee4..2610496de7c7 100644 --- a/arch/powerpc/kernel/kprobes.c +++ b/arch/powerpc/kernel/kprobes.c @@ -24,6 +24,7 @@ #include #include #include +#include DEFINE_PER_CPU(struct kprobe *, current_kprobe) = NULL; DEFINE_PER_CPU(struct kprobe_ctlblk, kprobe_ctlblk); @@ -131,6 +132,8 @@ int arch_prepare_kprobe(struct kprobe *p) (unsigned long)p->ainsn.insn + sizeof(kprobe_opcode_t)); } + set_memory_ro((unsigned long)p->ainsn.insn, 1); + p->ainsn.boostable = 0; return ret; } -- Andrew Donnellan OzLabs, ADL Canberra a...@linux.ibm.com IBM Australia Limited
[PATCH] selftests/powerpc: Reduce sigfuz runtime to ~60s
The defaults for the sigfuz test is to run for 4000 iterations, but that can take quite a while and the test harness may kill the test. Reduce the number of iterations to 600, which gives a runtime of roughly 1 minute on a Power8 system. Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/signal/sigfuz.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/powerpc/signal/sigfuz.c b/tools/testing/selftests/powerpc/signal/sigfuz.c index dade00c698c2..08f9afe3b95c 100644 --- a/tools/testing/selftests/powerpc/signal/sigfuz.c +++ b/tools/testing/selftests/powerpc/signal/sigfuz.c @@ -42,7 +42,7 @@ #include "utils.h" /* Selftest defaults */ -#define COUNT_MAX 4000/* Number of interactions */ +#define COUNT_MAX 600 /* Number of interactions */ #define THREADS16 /* Number of threads */ /* Arguments options */ -- 2.21.0
Re: [PATCH] xfs: introduce "metasync" api to sync metadata to fsblock
On Sun, Oct 13, 2019 at 10:37:00PM +0800, Pingfan Liu wrote: > When using fadump (fireware assist dump) mode on powerpc, a mismatch > between grub xfs driver and kernel xfs driver has been obsevered. Note: > fadump boots up in the following sequence: fireware -> grub reads kernel > and initramfs -> kernel boots. > > The process to reproduce this mismatch: > - On powerpc, boot kernel with fadump=on and edit /etc/kdump.conf. > - Replacing "path /var/crash" with "path /var/crashnew", then, "kdumpctl > restart" to rebuild the initramfs. Detail about the rebuilding looks > like: mkdumprd /boot/initramfs-`uname -r`.img.tmp; > mv /boot/initramfs-`uname -r`.img.tmp /boot/initramfs-`uname -r`.img > sync > - "echo c >/proc/sysrq-trigger". > > The result: > The dump image will not be saved under /var/crashnew/* as expected, but > still saved under /var/crash. > > The root cause: > As Eric pointed out that on xfs, 'sync' ensures the consistency by writing > back metadata to xlog, but not necessary to fsblock. This raises issue if > grub can not replay the xlog before accessing the xfs files. Since the > above dir entry of initramfs should be saved as inline data with xfs_inode, > so xfs_fs_sync_fs() does not guarantee it written to fsblock. > > umount can be used to write metadata fsblock, but the filesystem can not be > umounted if still in use. > > There are two ways to fix this mismatch, either grub or xfs. It may be > easier to do this in xfs side by introducing an interface to flush metadata > to fsblock explicitly. > > With this patch, metadata can be written to fsblock by: > # update AIL > sync > # new introduced interface to flush metadata to fsblock > mount -o remount,metasync mountpoint I think this ought to be an ioctl or some sort of generic call since the jbd2 filesystems (ext3, ext4, ocfs2) suffer from the same "$BOOTLOADER is too dumb to recover logs but still wants to write to the fs" checkpointing problem. (Or maybe we should just put all that stuff in a vfat filesystem, I don't know...) --D > Signed-off-by: Pingfan Liu > Cc: "Darrick J. Wong" > Cc: Dave Chinner > Cc: Eric Sandeen > Cc: Hari Bathini > Cc: linuxppc-dev@lists.ozlabs.org > To: linux-...@vger.kernel.org > --- > fs/xfs/xfs_mount.h | 1 + > fs/xfs/xfs_super.c | 15 ++- > fs/xfs/xfs_trans.h | 2 ++ > fs/xfs/xfs_trans_ail.c | 26 +- > fs/xfs/xfs_trans_priv.h | 1 + > 5 files changed, 43 insertions(+), 2 deletions(-) > > diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h > index fdb60e0..85f32e6 100644 > --- a/fs/xfs/xfs_mount.h > +++ b/fs/xfs/xfs_mount.h > @@ -243,6 +243,7 @@ typedef struct xfs_mount { > #define XFS_MOUNT_FILESTREAMS(1ULL << 24)/* enable the > filestreams > allocator */ > #define XFS_MOUNT_NOATTR2(1ULL << 25)/* disable use of attr2 format > */ > +#define XFS_MOUNT_METASYNC (1ull << 26)/* write meta to fsblock */ > > #define XFS_MOUNT_DAX(1ULL << 62)/* TEST ONLY! */ > > diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c > index 8d1df9f..41df810 100644 > --- a/fs/xfs/xfs_super.c > +++ b/fs/xfs/xfs_super.c > @@ -59,7 +59,7 @@ enum { > Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, > Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, > Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, > - Opt_discard, Opt_nodiscard, Opt_dax, Opt_err, > + Opt_discard, Opt_nodiscard, Opt_dax, Opt_metasync, Opt_err > }; > > static const match_table_t tokens = { > @@ -106,6 +106,7 @@ static const match_table_t tokens = { > {Opt_discard, "discard"}, /* Discard unused blocks */ > {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ > {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ > + {Opt_metasync, "metasync"},/* one shot to write meta to fsblock */ > {Opt_err, NULL}, > }; > > @@ -338,6 +339,9 @@ xfs_parseargs( > mp->m_flags |= XFS_MOUNT_DAX; > break; > #endif > + case Opt_metasync: > + mp->m_flags |= XFS_MOUNT_METASYNC; > + break; > default: > xfs_warn(mp, "unknown mount option [%s].", p); > return -EINVAL; > @@ -1259,6 +1263,9 @@ xfs_fs_remount( > mp->m_flags |= XFS_MOUNT_SMALL_INUMS; > mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); > break; > + case Opt_metasync: > + mp->m_flags |= XFS_MOUNT_METASYNC; > + break; > default: > /* >* Logically we would return an error here to prevent > @@ -1286,6 +1293,12 @@ xfs_fs_remount( > } >
Re: [PATCH 0/4] crypto: nx - convert to skcipher API
On Sun, 13 Oct 2019 at 20:56, Eric Biggers wrote: > > On Sun, Oct 13, 2019 at 05:31:31PM +0200, Ard Biesheuvel wrote: > > On Sun, 13 Oct 2019 at 08:29, Ard Biesheuvel > > wrote: > > > > > > On Sun, 13 Oct 2019 at 06:40, Eric Biggers wrote: > > > > > > > > This series converts the PowerPC Nest (NX) implementations of AES modes > > > > from the deprecated "blkcipher" API to the "skcipher" API. This is > > > > needed in order for the blkcipher API to be removed. > > > > > > > > This patchset is compile-tested only, as I don't have this hardware. > > > > If anyone has this hardware, please test this patchset with > > > > CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y. > > > > > > > > Eric Biggers (4): > > > > crypto: nx - don't abuse blkcipher_desc to pass iv around > > > > crypto: nx - convert AES-ECB to skcipher API > > > > crypto: nx - convert AES-CBC to skcipher API > > > > crypto: nx - convert AES-CTR to skcipher API > > > > > > > > drivers/crypto/nx/nx-aes-cbc.c | 81 ++- > > > > drivers/crypto/nx/nx-aes-ccm.c | 40 ++-- > > > > drivers/crypto/nx/nx-aes-ctr.c | 87 +++--- > > > > drivers/crypto/nx/nx-aes-ecb.c | 76 + > > > > drivers/crypto/nx/nx-aes-gcm.c | 24 -- > > > > drivers/crypto/nx/nx.c | 64 ++--- > > > > drivers/crypto/nx/nx.h | 19 > > > > 7 files changed, 176 insertions(+), 215 deletions(-) > > > > > > > > > > Hi Eric, > > > > > > Thanks for taking this on. I'll look in more detail at these patches > > > during the week. In the meantime, I may have a stab at converting ccp, > > > virtio-crypto and omap aes/des myself, since i have the hardware to > > > test those. > > > > > > > OK, I got a bit carried away, and converted a bunch of platforms in > > drivers/crypto (build tested only, except for the virtio driver) > > > > crypto: qce - switch to skcipher API > > crypto: rockchip - switch to skcipher API > > crypto: stm32 - switch to skcipher API > > crypto: sahara - switch to skcipher API > > crypto: picoxcell - switch to skcipher API > > crypto: mediatek - switch to skcipher API > > crypto: mxs - switch to skcipher API > > crypto: ixp4xx - switch to skcipher API > > crypto: hifn - switch to skcipher API > > crypto: chelsio - switch to skcipher API > > crypto: cavium/cpt - switch to skcipher API > > crypto: nitrox - remove cra_type reference to ablkcipher > > crypto: bcm-spu - switch to skcipher API > > crypto: atmel-tdes - switch to skcipher API > > crypto: atmel-aes - switch to skcipher API > > crypto: s5p - switch to skcipher API > > crypto: ux500 - switch to skcipher API > > crypto: omap - switch to skcipher API > > crypto: virtio - switch to skcipher API > > crypto: virtio - deal with unsupported input sizes > > crypto: virtio - implement missing support for output IVs > > crypto: ccp - switch from ablkcipher to skcipher > > > > https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/log/?h=ablkcipher-removal > > > > I pushed the branch to kernelci, so hopefully we'll get some automated > > results, but I think only a small subset of these are boot tested atm. > > Awesome, thanks for doing this! I was just planning to do "blkcipher" for > now, > but your patches will take care of almost all of "ablkcipher" too. > Yeah. I thought it was about time we take care of this, and ablkcipher is much more straightforward anyway. > A few things I noticed from quickly skimming through your patches: > Thanks. These are all mistakes on my part - I went through these at high speed with lots of mechanical search-replace. > "ecb-des3-omap", "cbc-des3-omap", "atmel-ecb-tdes", "atmel-cbc-tdes", and > "atmel-ofb-tdes" had their min and/or max key size incorrectly changed to 8 > (DES_BLOCK_SIZE or DES3_EDE_BLOCK_SIZE) rather than left as 24 > (DES3_EDE_KEY_SIZE or 3*DES_KEY_SIZE). > > cra_blocksize for "atmel-cfb64-aes" was changed from CFB64_BLOCK_SIZE to > AES_BLOCKSIZE. Intentional? > > cra_blocksize for "stm32-ctr-aes" and for "cfb-aes-mtk" was changed from 1 to > AES_BLOCK_SIZE. Intentional? > > CRYPTO_ALG_NEED_FALLBACK was added to "cbc-des-picoxcell" and > "ecb-des-picoxcell". > Intentional? > > In drivers/crypto/ixp4xx_crypto.c, .walksize was set on "rfc3686(ctr(aes))" > rather than .chunksize. Intentional? > > In drivers/crypto/qce/, CRYPTO_ALG_TYPE_ABLKCIPHER should be replaced with > CRYPTO_ALG_TYPE_SKCIPHER. > > In drivers/crypto/stm32/, could rename crypto_algs[] to skcipher_algs[]. > > Thanks! > > - Eric
Re: [PATCH 0/4] crypto: nx - convert to skcipher API
On Sun, Oct 13, 2019 at 05:31:31PM +0200, Ard Biesheuvel wrote: > On Sun, 13 Oct 2019 at 08:29, Ard Biesheuvel > wrote: > > > > On Sun, 13 Oct 2019 at 06:40, Eric Biggers wrote: > > > > > > This series converts the PowerPC Nest (NX) implementations of AES modes > > > from the deprecated "blkcipher" API to the "skcipher" API. This is > > > needed in order for the blkcipher API to be removed. > > > > > > This patchset is compile-tested only, as I don't have this hardware. > > > If anyone has this hardware, please test this patchset with > > > CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y. > > > > > > Eric Biggers (4): > > > crypto: nx - don't abuse blkcipher_desc to pass iv around > > > crypto: nx - convert AES-ECB to skcipher API > > > crypto: nx - convert AES-CBC to skcipher API > > > crypto: nx - convert AES-CTR to skcipher API > > > > > > drivers/crypto/nx/nx-aes-cbc.c | 81 ++- > > > drivers/crypto/nx/nx-aes-ccm.c | 40 ++-- > > > drivers/crypto/nx/nx-aes-ctr.c | 87 +++--- > > > drivers/crypto/nx/nx-aes-ecb.c | 76 + > > > drivers/crypto/nx/nx-aes-gcm.c | 24 -- > > > drivers/crypto/nx/nx.c | 64 ++--- > > > drivers/crypto/nx/nx.h | 19 > > > 7 files changed, 176 insertions(+), 215 deletions(-) > > > > > > > Hi Eric, > > > > Thanks for taking this on. I'll look in more detail at these patches > > during the week. In the meantime, I may have a stab at converting ccp, > > virtio-crypto and omap aes/des myself, since i have the hardware to > > test those. > > > > OK, I got a bit carried away, and converted a bunch of platforms in > drivers/crypto (build tested only, except for the virtio driver) > > crypto: qce - switch to skcipher API > crypto: rockchip - switch to skcipher API > crypto: stm32 - switch to skcipher API > crypto: sahara - switch to skcipher API > crypto: picoxcell - switch to skcipher API > crypto: mediatek - switch to skcipher API > crypto: mxs - switch to skcipher API > crypto: ixp4xx - switch to skcipher API > crypto: hifn - switch to skcipher API > crypto: chelsio - switch to skcipher API > crypto: cavium/cpt - switch to skcipher API > crypto: nitrox - remove cra_type reference to ablkcipher > crypto: bcm-spu - switch to skcipher API > crypto: atmel-tdes - switch to skcipher API > crypto: atmel-aes - switch to skcipher API > crypto: s5p - switch to skcipher API > crypto: ux500 - switch to skcipher API > crypto: omap - switch to skcipher API > crypto: virtio - switch to skcipher API > crypto: virtio - deal with unsupported input sizes > crypto: virtio - implement missing support for output IVs > crypto: ccp - switch from ablkcipher to skcipher > > https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/log/?h=ablkcipher-removal > > I pushed the branch to kernelci, so hopefully we'll get some automated > results, but I think only a small subset of these are boot tested atm. Awesome, thanks for doing this! I was just planning to do "blkcipher" for now, but your patches will take care of almost all of "ablkcipher" too. A few things I noticed from quickly skimming through your patches: "ecb-des3-omap", "cbc-des3-omap", "atmel-ecb-tdes", "atmel-cbc-tdes", and "atmel-ofb-tdes" had their min and/or max key size incorrectly changed to 8 (DES_BLOCK_SIZE or DES3_EDE_BLOCK_SIZE) rather than left as 24 (DES3_EDE_KEY_SIZE or 3*DES_KEY_SIZE). cra_blocksize for "atmel-cfb64-aes" was changed from CFB64_BLOCK_SIZE to AES_BLOCKSIZE. Intentional? cra_blocksize for "stm32-ctr-aes" and for "cfb-aes-mtk" was changed from 1 to AES_BLOCK_SIZE. Intentional? CRYPTO_ALG_NEED_FALLBACK was added to "cbc-des-picoxcell" and "ecb-des-picoxcell". Intentional? In drivers/crypto/ixp4xx_crypto.c, .walksize was set on "rfc3686(ctr(aes))" rather than .chunksize. Intentional? In drivers/crypto/qce/, CRYPTO_ALG_TYPE_ABLKCIPHER should be replaced with CRYPTO_ALG_TYPE_SKCIPHER. In drivers/crypto/stm32/, could rename crypto_algs[] to skcipher_algs[]. Thanks! - Eric
Re: [PATCH] net/ibmvnic: Fix EOI when running in XIVE mode.
From: Cédric Le Goater Date: Fri, 11 Oct 2019 07:52:54 +0200 > pSeries machines on POWER9 processors can run with the XICS (legacy) > interrupt mode or with the XIVE exploitation interrupt mode. These > interrupt contollers have different interfaces for interrupt > management : XICS uses hcalls and XIVE loads and stores on a page. > H_EOI being a XICS interface the enable_scrq_irq() routine can fail > when the machine runs in XIVE mode. > > Fix that by calling the EOI handler of the interrupt chip. > > Fixes: f23e0643cd0b ("ibmvnic: Clear pending interrupt after device reset") > Signed-off-by: Cédric Le Goater Applied and queued up for -stable, thanks.
[Bug 205183] New: PPC64: Signal delivery fails with SIGSEGV if between about 1KB and 4KB bytes of stack remain
https://bugzilla.kernel.org/show_bug.cgi?id=205183 Bug ID: 205183 Summary: PPC64: Signal delivery fails with SIGSEGV if between about 1KB and 4KB bytes of stack remain Product: Platform Specific/Hardware Version: 2.5 Kernel Version: 4.19.15 and others Hardware: PPC-64 OS: Linux Tree: Mainline Status: NEW Severity: normal Priority: P1 Component: PPC-64 Assignee: platform_ppc...@kernel-bugs.osdl.org Reporter: t...@sss.pgh.pa.us Regression: No Created attachment 285487 --> https://bugzilla.kernel.org/attachment.cgi?id=285487&action=edit stacktest.c If there are between about 1K and 4K bytes remaining in a process' existing stack segment, an attempt to deliver a signal that the process has a signal handler for will result in SIGSEGV instead. This situation should result in extending the process' stack to allow handling the signal, but it does not. The attached test program illustrates this. It requires a parameter specifying the amount of stack to consume before sleeping. Waken the process with a manual kill -USR1. An example of a successful case is [tgl@postgresql-fedora ~]$ gcc -g -Wall -O stacktest.c [tgl@postgresql-fedora ~]$ ./a.out 124 & [1] 7922 [tgl@postgresql-fedora ~]$ cat /proc/7922/maps | grep stack 7fffc997-7fffc9aa rw-p 00:00 0 [stack] [tgl@postgresql-fedora ~]$ kill -USR1 7922 [tgl@postgresql-fedora ~]$ signal delivered, stack base 0x7fffc9aa top 0x7fffc9971420 (1240032 used) [1]+ Done./a.out 124 The above example shows that 0x7fffc9971420 - 0x7fffc997 = 5152 bytes are enough to deliver the signal. But with a slightly larger parameter, [tgl@postgresql-fedora ~]$ ./a.out 1241000 & [1] 7941 [tgl@postgresql-fedora ~]$ kill -USR1 7941 [tgl@postgresql-fedora ~]$ [1]+ Segmentation fault (core dumped) ./a.out 1241000 With a still larger parameter, corresponding to just a few hundred bytes left, it works again, showing that the kernel does know how to enlarge the stack in such cases --- it's just got a boundary condition wrong somewhere. On the particular userland toolchain I'm using here, parameters between about 1241000 and 1244000 (free space between about 1200 and 4200 bytes) will show the error, but you might need to tweak it a bit with a different system. The Postgres project has been chasing errors caused by this bug for months, and we've seen it happen on a range of PPC64 kernels from 4.4.0 up to 4.19.15, but not on other architectures, nor on non-Linux PPC64. My colleague Thomas Munro found a possible explanation in https://github.com/torvalds/linux/blob/master/arch/powerpc/mm/fault.c#L251 which claims that * The kernel signal delivery code writes up to about 1.5kB * below the stack pointer (r1) before decrementing it. and that seems to be the justification for the "2048" magic number at line 276. Perhaps that number applies only to PPC32, and PPC64 requires more space? At the very least, this function's other magic number of 0x10 seems highly suspicious in view of the fact that we don't see the bug until the process has consumed at least 1MB of stack space. (Hence, please use values > 1MB with the test program.) -- You are receiving this mail because: You are watching the assignee of the bug.
Re: [PATCH 0/4] crypto: nx - convert to skcipher API
On Sun, 13 Oct 2019 at 08:29, Ard Biesheuvel wrote: > > On Sun, 13 Oct 2019 at 06:40, Eric Biggers wrote: > > > > This series converts the PowerPC Nest (NX) implementations of AES modes > > from the deprecated "blkcipher" API to the "skcipher" API. This is > > needed in order for the blkcipher API to be removed. > > > > This patchset is compile-tested only, as I don't have this hardware. > > If anyone has this hardware, please test this patchset with > > CONFIG_CRYPTO_MANAGER_EXTRA_TESTS=y. > > > > Eric Biggers (4): > > crypto: nx - don't abuse blkcipher_desc to pass iv around > > crypto: nx - convert AES-ECB to skcipher API > > crypto: nx - convert AES-CBC to skcipher API > > crypto: nx - convert AES-CTR to skcipher API > > > > drivers/crypto/nx/nx-aes-cbc.c | 81 ++- > > drivers/crypto/nx/nx-aes-ccm.c | 40 ++-- > > drivers/crypto/nx/nx-aes-ctr.c | 87 +++--- > > drivers/crypto/nx/nx-aes-ecb.c | 76 + > > drivers/crypto/nx/nx-aes-gcm.c | 24 -- > > drivers/crypto/nx/nx.c | 64 ++--- > > drivers/crypto/nx/nx.h | 19 > > 7 files changed, 176 insertions(+), 215 deletions(-) > > > > Hi Eric, > > Thanks for taking this on. I'll look in more detail at these patches > during the week. In the meantime, I may have a stab at converting ccp, > virtio-crypto and omap aes/des myself, since i have the hardware to > test those. > OK, I got a bit carried away, and converted a bunch of platforms in drivers/crypto (build tested only, except for the virtio driver) crypto: qce - switch to skcipher API crypto: rockchip - switch to skcipher API crypto: stm32 - switch to skcipher API crypto: sahara - switch to skcipher API crypto: picoxcell - switch to skcipher API crypto: mediatek - switch to skcipher API crypto: mxs - switch to skcipher API crypto: ixp4xx - switch to skcipher API crypto: hifn - switch to skcipher API crypto: chelsio - switch to skcipher API crypto: cavium/cpt - switch to skcipher API crypto: nitrox - remove cra_type reference to ablkcipher crypto: bcm-spu - switch to skcipher API crypto: atmel-tdes - switch to skcipher API crypto: atmel-aes - switch to skcipher API crypto: s5p - switch to skcipher API crypto: ux500 - switch to skcipher API crypto: omap - switch to skcipher API crypto: virtio - switch to skcipher API crypto: virtio - deal with unsupported input sizes crypto: virtio - implement missing support for output IVs crypto: ccp - switch from ablkcipher to skcipher https://git.kernel.org/pub/scm/linux/kernel/git/ardb/linux.git/log/?h=ablkcipher-removal I pushed the branch to kernelci, so hopefully we'll get some automated results, but I think only a small subset of these are boot tested atm.
[PATCH] xfs: introduce "metasync" api to sync metadata to fsblock
When using fadump (fireware assist dump) mode on powerpc, a mismatch between grub xfs driver and kernel xfs driver has been obsevered. Note: fadump boots up in the following sequence: fireware -> grub reads kernel and initramfs -> kernel boots. The process to reproduce this mismatch: - On powerpc, boot kernel with fadump=on and edit /etc/kdump.conf. - Replacing "path /var/crash" with "path /var/crashnew", then, "kdumpctl restart" to rebuild the initramfs. Detail about the rebuilding looks like: mkdumprd /boot/initramfs-`uname -r`.img.tmp; mv /boot/initramfs-`uname -r`.img.tmp /boot/initramfs-`uname -r`.img sync - "echo c >/proc/sysrq-trigger". The result: The dump image will not be saved under /var/crashnew/* as expected, but still saved under /var/crash. The root cause: As Eric pointed out that on xfs, 'sync' ensures the consistency by writing back metadata to xlog, but not necessary to fsblock. This raises issue if grub can not replay the xlog before accessing the xfs files. Since the above dir entry of initramfs should be saved as inline data with xfs_inode, so xfs_fs_sync_fs() does not guarantee it written to fsblock. umount can be used to write metadata fsblock, but the filesystem can not be umounted if still in use. There are two ways to fix this mismatch, either grub or xfs. It may be easier to do this in xfs side by introducing an interface to flush metadata to fsblock explicitly. With this patch, metadata can be written to fsblock by: # update AIL sync # new introduced interface to flush metadata to fsblock mount -o remount,metasync mountpoint Signed-off-by: Pingfan Liu Cc: "Darrick J. Wong" Cc: Dave Chinner Cc: Eric Sandeen Cc: Hari Bathini Cc: linuxppc-dev@lists.ozlabs.org To: linux-...@vger.kernel.org --- fs/xfs/xfs_mount.h | 1 + fs/xfs/xfs_super.c | 15 ++- fs/xfs/xfs_trans.h | 2 ++ fs/xfs/xfs_trans_ail.c | 26 +- fs/xfs/xfs_trans_priv.h | 1 + 5 files changed, 43 insertions(+), 2 deletions(-) diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h index fdb60e0..85f32e6 100644 --- a/fs/xfs/xfs_mount.h +++ b/fs/xfs/xfs_mount.h @@ -243,6 +243,7 @@ typedef struct xfs_mount { #define XFS_MOUNT_FILESTREAMS (1ULL << 24)/* enable the filestreams allocator */ #define XFS_MOUNT_NOATTR2 (1ULL << 25)/* disable use of attr2 format */ +#define XFS_MOUNT_METASYNC (1ull << 26)/* write meta to fsblock */ #define XFS_MOUNT_DAX (1ULL << 62)/* TEST ONLY! */ diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c index 8d1df9f..41df810 100644 --- a/fs/xfs/xfs_super.c +++ b/fs/xfs/xfs_super.c @@ -59,7 +59,7 @@ enum { Opt_filestreams, Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota, Opt_uquota, Opt_gquota, Opt_pquota, Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce, - Opt_discard, Opt_nodiscard, Opt_dax, Opt_err, + Opt_discard, Opt_nodiscard, Opt_dax, Opt_metasync, Opt_err }; static const match_table_t tokens = { @@ -106,6 +106,7 @@ static const match_table_t tokens = { {Opt_discard, "discard"}, /* Discard unused blocks */ {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */ {Opt_dax, "dax"}, /* Enable direct access to bdev pages */ + {Opt_metasync, "metasync"},/* one shot to write meta to fsblock */ {Opt_err, NULL}, }; @@ -338,6 +339,9 @@ xfs_parseargs( mp->m_flags |= XFS_MOUNT_DAX; break; #endif + case Opt_metasync: + mp->m_flags |= XFS_MOUNT_METASYNC; + break; default: xfs_warn(mp, "unknown mount option [%s].", p); return -EINVAL; @@ -1259,6 +1263,9 @@ xfs_fs_remount( mp->m_flags |= XFS_MOUNT_SMALL_INUMS; mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount); break; + case Opt_metasync: + mp->m_flags |= XFS_MOUNT_METASYNC; + break; default: /* * Logically we would return an error here to prevent @@ -1286,6 +1293,12 @@ xfs_fs_remount( } } + if (mp->m_flags & XFS_MOUNT_METASYNC) { + xfs_ail_push_sync(mp->m_ail); + /* one shot flag */ + mp->m_flags &= ~XFS_MOUNT_METASYNC; + } + /* ro -> rw */ if ((mp->m_flags & XFS_MOUNT_RDONLY) && !(*flags & SB_RDONLY)) { if (mp->m_flags & XFS_MOUNT_NORECOVERY) { diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h index 64d7f17..fcdb902 100644 --- a/fs/xfs/xfs_trans.h +++ b/fs/xfs/xfs_trans.h @@ -242,6 +242,8 @@ voidxfs_trans_buf_set_ty