Re: [PATCH v4] mm: Avoid unnecessary page fault retires on shared memory types
Am 30.05.22 um 18:00 schrieb Peter Xu: On Mon, May 30, 2022 at 11:52:54AM -0400, Peter Xu wrote: On Mon, May 30, 2022 at 11:35:10AM +0200, Christian Borntraeger wrote: Am 29.05.22 um 22:33 schrieb Heiko Carstens: [...] Guess the patch below on top of your patch is what we want. Just for clarification: if gmap is not NULL then the process is a kvm process. So, depending on the workload, this optimization makes sense. diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 4608cc962ecf..e1d40ca341b7 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -436,12 +436,11 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) /* The fault is fully completed (including releasing mmap lock) */ if (fault & VM_FAULT_COMPLETED) { - /* -* Gmap will need the mmap lock again, so retake it. TODO: -* only conditionally take the lock when CONFIG_PGSTE set. -*/ - mmap_read_lock(mm); - goto out_gmap; + if (gmap) { + mmap_read_lock(mm); + goto out_gmap; + } + goto out; Hmm, right after I replied I found "goto out" could be problematic, since all s390 callers of do_exception() will assume it an error condition (side note: "goto out_gmap" contains one step to clear "fault" to 0). I'll replace this with "return 0" instead if it looks good to both of you. I'll wait for a confirmation before reposting. Thanks, Yes, that sounds good and thank you for double checking. ___ linux-snps-arc mailing list linux-snps-arc@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-snps-arc
Re: [PATCH v4] mm: Avoid unnecessary page fault retires on shared memory types
Am 29.05.22 um 22:33 schrieb Heiko Carstens: [...] Guess the patch below on top of your patch is what we want. Just for clarification: if gmap is not NULL then the process is a kvm process. So, depending on the workload, this optimization makes sense. diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index 4608cc962ecf..e1d40ca341b7 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -436,12 +436,11 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access) /* The fault is fully completed (including releasing mmap lock) */ if (fault & VM_FAULT_COMPLETED) { - /* -* Gmap will need the mmap lock again, so retake it. TODO: -* only conditionally take the lock when CONFIG_PGSTE set. -*/ - mmap_read_lock(mm); - goto out_gmap; + if (gmap) { + mmap_read_lock(mm); + goto out_gmap; + } + goto out; Yes, that makes sense. With that Acked-by: Christian Borntraeger ___ linux-snps-arc mailing list linux-snps-arc@lists.infradead.org http://lists.infradead.org/mailman/listinfo/linux-snps-arc
Re: [PATCH 2/2] futex: remove futex_cmpxchg detection
Am 26.10.21 um 12:03 schrieb Arnd Bergmann: From: Arnd Bergmann Now that all architectures have a working futex implementation in any configuration, remove the runtime detection code. Signed-off-by: Arnd Bergmann s390 part Acked-by: Christian Borntraeger --- arch/arc/Kconfig | 1 - arch/arm/Kconfig | 1 - arch/arm64/Kconfig| 1 - arch/csky/Kconfig | 1 - arch/m68k/Kconfig | 1 - arch/riscv/Kconfig| 1 - arch/s390/Kconfig | 1 - arch/sh/Kconfig | 1 - arch/um/Kconfig | 1 - arch/um/kernel/skas/uaccess.c | 1 - arch/xtensa/Kconfig | 1 - init/Kconfig | 8 kernel/futex/core.c | 35 --- kernel/futex/futex.h | 6 -- kernel/futex/syscalls.c | 22 -- 15 files changed, 82 deletions(-) diff --git a/arch/arc/Kconfig b/arch/arc/Kconfig index 248389278e8f..f9413041686f 100644 --- a/arch/arc/Kconfig +++ b/arch/arc/Kconfig @@ -31,7 +31,6 @@ config ARC select HAVE_ARCH_TRANSPARENT_HUGEPAGE if ARC_MMU_V4 select HAVE_DEBUG_STACKOVERFLOW select HAVE_DEBUG_KMEMLEAK - select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_IOREMAP_PROT select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZMA diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index bb5d2c45477b..6448d311635d 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -93,7 +93,6 @@ config ARM select HAVE_FTRACE_MCOUNT_RECORD if !XIP_KERNEL select HAVE_FUNCTION_GRAPH_TRACER if !THUMB2_KERNEL && !CC_IS_CLANG select HAVE_FUNCTION_TRACER if !XIP_KERNEL && !(THUMB2_KERNEL && CC_IS_CLANG) - select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_HW_BREAKPOINT if PERF_EVENTS && (CPU_V6 || CPU_V6K || CPU_V7) select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 0efc501f77aa..6c3c2ff5cef8 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -193,7 +193,6 @@ config ARM64 select HAVE_PERF_USER_STACK_DUMP select HAVE_REGS_AND_STACK_ACCESS_API select HAVE_FUNCTION_ARG_ACCESS_API - select HAVE_FUTEX_CMPXCHG if FUTEX select MMU_GATHER_RCU_TABLE_FREE select HAVE_RSEQ select HAVE_STACKPROTECTOR diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig index 823d3d5a9e11..efd7c5feac8b 100644 --- a/arch/csky/Kconfig +++ b/arch/csky/Kconfig @@ -53,7 +53,6 @@ config CSKY select HAVE_FUNCTION_TRACER select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUTEX_CMPXCHG if FUTEX && SMP select HAVE_FTRACE_MCOUNT_RECORD select HAVE_KERNEL_GZIP select HAVE_KERNEL_LZO diff --git a/arch/m68k/Kconfig b/arch/m68k/Kconfig index 0b50da08a9c5..15a793c5b2dc 100644 --- a/arch/m68k/Kconfig +++ b/arch/m68k/Kconfig @@ -20,7 +20,6 @@ config M68K select HAVE_ASM_MODVERSIONS select HAVE_DEBUG_BUGVERBOSE select HAVE_EFFICIENT_UNALIGNED_ACCESS if !CPU_HAS_NO_UNALIGNED - select HAVE_FUTEX_CMPXCHG if MMU && FUTEX select HAVE_MOD_ARCH_SPECIFIC select HAVE_UID16 select MMU_GATHER_NO_RANGE if MMU diff --git a/arch/riscv/Kconfig b/arch/riscv/Kconfig index 77a088d0a7e9..037fea9fac14 100644 --- a/arch/riscv/Kconfig +++ b/arch/riscv/Kconfig @@ -84,7 +84,6 @@ config RISCV select HAVE_DMA_CONTIGUOUS if MMU select HAVE_EBPF_JIT if MMU select HAVE_FUNCTION_ERROR_INJECTION - select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO if MMU && 64BIT select HAVE_IRQ_TIME_ACCOUNTING diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index f615c3f65f5a..1c9ecf619e04 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -164,7 +164,6 @@ config S390 select HAVE_FUNCTION_ERROR_INJECTION select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER - select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_GCC_PLUGINS select HAVE_GENERIC_VDSO select HAVE_IOREMAP_PROT if PCI diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig index 6904f4bdbf00..93195d3368c0 100644 --- a/arch/sh/Kconfig +++ b/arch/sh/Kconfig @@ -34,7 +34,6 @@ config SUPERH select HAVE_FAST_GUP if MMU select HAVE_FUNCTION_GRAPH_TRACER select HAVE_FUNCTION_TRACER - select HAVE_FUTEX_CMPXCHG if FUTEX select HAVE_FTRACE_MCOUNT_RECORD select HAVE_HW_BREAKPOINT select HAVE_IOREMAP_PROT if MMU && !X2TLB diff --git a/arch/um/Kconfig b/arch/um/Kconfig index c18b45f75d41..c906250d4970 100644 --- a/arch/um/Kconfig +++ b/arch/um/Kconfig @@ -14,7 +14,6 @@ config UML select HAVE_ARCH_SECCOMP_FILTER select HAVE_ASM_MODVERSIONS
Re: [PATCH v2 2/2] mm: speed up mremap by 500x on large regions
On 10/12/2018 03:37 AM, Joel Fernandes (Google) wrote: > Android needs to mremap large regions of memory during memory management > related operations. The mremap system call can be really slow if THP is > not enabled. The bottleneck is move_page_tables, which is copying each > pte at a time, and can be really slow across a large map. Turning on THP > may not be a viable option, and is not for us. This patch speeds up the > performance for non-THP system by copying at the PMD level when possible. > > The speed up is three orders of magnitude. On a 1GB mremap, the mremap > completion times drops from 160-250 millesconds to 380-400 microseconds. > > Before: > Total mremap time for 1GB data: 242321014 nanoseconds. > Total mremap time for 1GB data: 196842467 nanoseconds. > Total mremap time for 1GB data: 167051162 nanoseconds. > > After: > Total mremap time for 1GB data: 385781 nanoseconds. > Total mremap time for 1GB data: 388959 nanoseconds. > Total mremap time for 1GB data: 402813 nanoseconds. > > Incase THP is enabled, the optimization is skipped. I also flush the > tlb every time we do this optimization since I couldn't find a way to > determine if the low-level PTEs are dirty. It is seen that the cost of > doing so is not much compared the improvement, on both x86-64 and arm64. > > Cc: minc...@kernel.org > Cc: pan...@google.com > Cc: hu...@google.com > Cc: lokeshgi...@google.com > Cc: dan...@google.com > Cc: mho...@kernel.org > Cc: kir...@shutemov.name > Cc: a...@linux-foundation.org > Signed-off-by: Joel Fernandes (Google) > --- > mm/mremap.c | 62 + > 1 file changed, 62 insertions(+) > > diff --git a/mm/mremap.c b/mm/mremap.c > index 9e68a02a52b1..d82c485822ef 100644 > --- a/mm/mremap.c > +++ b/mm/mremap.c > @@ -191,6 +191,54 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t > *old_pmd, > drop_rmap_locks(vma); > } > > +static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long > old_addr, > + unsigned long new_addr, unsigned long old_end, > + pmd_t *old_pmd, pmd_t *new_pmd, bool *need_flush) > +{ > + spinlock_t *old_ptl, *new_ptl; > + struct mm_struct *mm = vma->vm_mm; > + > + if ((old_addr & ~PMD_MASK) || (new_addr & ~PMD_MASK) > + || old_end - old_addr < PMD_SIZE) > + return false; > + > + /* > + * The destination pmd shouldn't be established, free_pgtables() > + * should have release it. > + */ > + if (WARN_ON(!pmd_none(*new_pmd))) > + return false; > + > + /* > + * We don't have to worry about the ordering of src and dst > + * ptlocks because exclusive mmap_sem prevents deadlock. > + */ > + old_ptl = pmd_lock(vma->vm_mm, old_pmd); > + if (old_ptl) { > + pmd_t pmd; > + > + new_ptl = pmd_lockptr(mm, new_pmd); > + if (new_ptl != old_ptl) > + spin_lock_nested(new_ptl, SINGLE_DEPTH_NESTING); > + > + /* Clear the pmd */ > + pmd = *old_pmd; > + pmd_clear(old_pmd); Adding Martin Schwidefsky. Is this mapping maybe still in use on other CPUs? If yes, I think for s390 we need to flush here as well (in other word we might need to introduce pmd_clear_flush). On s390 you have to use instructions like CRDTE,IPTE or IDTE to modify page table entries that are still in use. Otherwise you can get a delayed access exception which is - in contrast to page faults - not recoverable. > + > + VM_BUG_ON(!pmd_none(*new_pmd)); > + > + /* Set the new pmd */ > + set_pmd_at(mm, new_addr, new_pmd, pmd); > + if (new_ptl != old_ptl) > + spin_unlock(new_ptl); > + spin_unlock(old_ptl); > + > + *need_flush = true; > + return true; > + } > + return false; > +} > + > unsigned long move_page_tables(struct vm_area_struct *vma, > unsigned long old_addr, struct vm_area_struct *new_vma, > unsigned long new_addr, unsigned long len, > @@ -239,7 +287,21 @@ unsigned long move_page_tables(struct vm_area_struct > *vma, > split_huge_pmd(vma, old_pmd, old_addr); > if (pmd_trans_unstable(old_pmd)) > continue; > + } else if (extent == PMD_SIZE) { > + bool moved; > + > + /* See comment in move_ptes() */ > + if (need_rmap_locks) > + take_rmap_locks(vma); > + moved = move_normal_pmd(vma, old_addr, new_addr, > + old_end, old_pmd, new_pmd, > + _flush); > + if (need_rmap_locks) > + drop_rmap_locks(vma); > + if (moved) > + continue; >