[PATCH v10 07/13] powerpc/mm: Remove CONFIG_PPC_MM_SLICES

2022-04-09 Thread Christophe Leroy
CONFIG_PPC_MM_SLICES is always selected by hash book3s/64.
CONFIG_PPC_MM_SLICES is never selected by other platforms.

Remove it.

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/hugetlb.h |  2 +-
 arch/powerpc/include/asm/paca.h|  7 ---
 arch/powerpc/kernel/paca.c |  5 -
 arch/powerpc/mm/book3s64/Makefile  |  3 +--
 arch/powerpc/mm/book3s64/hash_utils.c  | 14 --
 arch/powerpc/mm/hugetlbpage.c  |  2 +-
 arch/powerpc/mm/mmap.c |  4 ++--
 arch/powerpc/platforms/Kconfig.cputype |  4 
 8 files changed, 5 insertions(+), 36 deletions(-)

diff --git a/arch/powerpc/include/asm/hugetlb.h 
b/arch/powerpc/include/asm/hugetlb.h
index 6a1a1ac5743b..ef86197d1c0a 100644
--- a/arch/powerpc/include/asm/hugetlb.h
+++ b/arch/powerpc/include/asm/hugetlb.h
@@ -24,7 +24,7 @@ static inline int is_hugepage_only_range(struct mm_struct *mm,
 unsigned long addr,
 unsigned long len)
 {
-   if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled())
+   if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU) && !radix_enabled())
return slice_is_hugepage_only_range(mm, addr, len);
return 0;
 }
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 8330968ca346..03330b7d835f 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -152,16 +152,9 @@ struct paca_struct {
struct tlb_core_data tcd;
 #endif /* CONFIG_PPC_BOOK3E */
 
-#ifdef CONFIG_PPC_BOOK3S
 #ifdef CONFIG_PPC_64S_HASH_MMU
-#ifdef CONFIG_PPC_MM_SLICES
unsigned char mm_ctx_low_slices_psize[BITS_PER_LONG / BITS_PER_BYTE];
unsigned char mm_ctx_high_slices_psize[SLICE_ARRAY_SIZE];
-#else
-   u16 mm_ctx_user_psize;
-   u16 mm_ctx_sllp;
-#endif
-#endif
 #endif
 
/*
diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c
index 39da688a9455..ba593fd60124 100644
--- a/arch/powerpc/kernel/paca.c
+++ b/arch/powerpc/kernel/paca.c
@@ -344,15 +344,10 @@ void copy_mm_to_paca(struct mm_struct *mm)
 {
mm_context_t *context = >context;
 
-#ifdef CONFIG_PPC_MM_SLICES
VM_BUG_ON(!mm_ctx_slb_addr_limit(context));
memcpy(_paca()->mm_ctx_low_slices_psize, mm_ctx_low_slices(context),
   LOW_SLICE_ARRAY_SZ);
memcpy(_paca()->mm_ctx_high_slices_psize, 
mm_ctx_high_slices(context),
   TASK_SLICE_ARRAY_SZ(context));
-#else /* CONFIG_PPC_MM_SLICES */
-   get_paca()->mm_ctx_user_psize = context->user_psize;
-   get_paca()->mm_ctx_sllp = context->sllp;
-#endif
 }
 #endif /* CONFIG_PPC_64S_HASH_MMU */
diff --git a/arch/powerpc/mm/book3s64/Makefile 
b/arch/powerpc/mm/book3s64/Makefile
index af2f3e75d458..d527dc8e30a8 100644
--- a/arch/powerpc/mm/book3s64/Makefile
+++ b/arch/powerpc/mm/book3s64/Makefile
@@ -5,7 +5,7 @@ ccflags-y   := $(NO_MINIMAL_TOC)
 obj-y  += mmu_context.o pgtable.o trace.o
 ifdef CONFIG_PPC_64S_HASH_MMU
 CFLAGS_REMOVE_slb.o = $(CC_FLAGS_FTRACE)
-obj-y  += hash_pgtable.o hash_utils.o hash_tlb.o slb.o
+obj-y  += hash_pgtable.o hash_utils.o hash_tlb.o slb.o 
slice.o
 obj-$(CONFIG_PPC_HASH_MMU_NATIVE)  += hash_native.o
 obj-$(CONFIG_PPC_4K_PAGES) += hash_4k.o
 obj-$(CONFIG_PPC_64K_PAGES)+= hash_64k.o
@@ -21,7 +21,6 @@ obj-$(CONFIG_PPC_RADIX_MMU)   += radix_hugetlbpage.o
 endif
 obj-$(CONFIG_SPAPR_TCE_IOMMU)  += iommu_api.o
 obj-$(CONFIG_PPC_PKEY) += pkeys.o
-obj-$(CONFIG_PPC_MM_SLICES)+= slice.o
 
 # Instrumenting the SLB fault path can lead to duplicate SLB entries
 KCOV_INSTRUMENT_slb.o := n
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 985cabdd7f67..43ecad8f9564 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -1264,7 +1264,6 @@ unsigned int hash_page_do_lazy_icache(unsigned int pp, 
pte_t pte, int trap)
return pp;
 }
 
-#ifdef CONFIG_PPC_MM_SLICES
 static unsigned int get_paca_psize(unsigned long addr)
 {
unsigned char *psizes;
@@ -1281,12 +1280,6 @@ static unsigned int get_paca_psize(unsigned long addr)
return (psizes[index >> 1] >> (mask_index * 4)) & 0xF;
 }
 
-#else
-unsigned int get_paca_psize(unsigned long addr)
-{
-   return get_paca()->mm_ctx_user_psize;
-}
-#endif
 
 /*
  * Demote a segment to using 4k pages.
@@ -1680,7 +1673,6 @@ DEFINE_INTERRUPT_HANDLER(do_hash_fault)
}
 }
 
-#ifdef CONFIG_PPC_MM_SLICES
 static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
 {
int psize = get_slice_psize(mm, ea);
@@ -1697,12 +1689,6 @@ static bool should_hash_preload(struct mm_struct *mm, 
unsigned long ea)
 
return true;
 }
-#else
-static bool should_hash_preload(struct mm_struct *mm, unsigned long ea)
-{
-   return true;
-}
-#endif
 
 static void 

[PATCH v10 13/13] powerpc: Simplify and move arch_randomize_brk()

2022-04-09 Thread Christophe Leroy
arch_randomize_brk() is only needed for hash on book3s/64, for other
platforms the one provided by the default mmap layout is good enough.

Move it to hash_utils.c and use randomize_page() like the generic one.

And properly opt out the radix case instead of making an assumption
on mmu_highuser_ssize.

Also change to a 32M range like most other architectures instead of 8M.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/kernel/process.c | 41 ---
 arch/powerpc/mm/book3s64/hash_utils.c | 19 +
 2 files changed, 19 insertions(+), 41 deletions(-)

diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 984813a4d5dc..e7f809bdd433 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -34,10 +34,8 @@
 #include 
 #include 
 #include 
-#include 
 #include 
 #include 
-#include 
 #include 
 #include 
 
@@ -2313,42 +2311,3 @@ unsigned long arch_align_stack(unsigned long sp)
sp -= get_random_int() & ~PAGE_MASK;
return sp & ~0xf;
 }
-
-static inline unsigned long brk_rnd(void)
-{
-unsigned long rnd = 0;
-
-   /* 8MB for 32bit, 1GB for 64bit */
-   if (is_32bit_task())
-   rnd = (get_random_long() % (1UL<<(23-PAGE_SHIFT)));
-   else
-   rnd = (get_random_long() % (1UL<<(30-PAGE_SHIFT)));
-
-   return rnd << PAGE_SHIFT;
-}
-
-unsigned long arch_randomize_brk(struct mm_struct *mm)
-{
-   unsigned long base = mm->brk;
-   unsigned long ret;
-
-#ifdef CONFIG_PPC_BOOK3S_64
-   /*
-* If we are using 1TB segments and we are allowed to randomise
-* the heap, we can put it above 1TB so it is backed by a 1TB
-* segment. Otherwise the heap will be in the bottom 1TB
-* which always uses 256MB segments and this may result in a
-* performance penalty.
-*/
-   if (!radix_enabled() && !is_32bit_task() && (mmu_highuser_ssize == 
MMU_SEGSIZE_1T))
-   base = max_t(unsigned long, mm->brk, 1UL << SID_SHIFT_1T);
-#endif
-
-   ret = PAGE_ALIGN(base + brk_rnd());
-
-   if (ret < mm->brk)
-   return mm->brk;
-
-   return ret;
-}
-
diff --git a/arch/powerpc/mm/book3s64/hash_utils.c 
b/arch/powerpc/mm/book3s64/hash_utils.c
index 43ecad8f9564..72a3eca02449 100644
--- a/arch/powerpc/mm/book3s64/hash_utils.c
+++ b/arch/powerpc/mm/book3s64/hash_utils.c
@@ -37,6 +37,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -2133,3 +2135,20 @@ void __init print_system_hash_info(void)
if (htab_hash_mask)
pr_info("htab_hash_mask= 0x%lx\n", htab_hash_mask);
 }
+
+unsigned long arch_randomize_brk(struct mm_struct *mm)
+{
+   /*
+* If we are using 1TB segments and we are allowed to randomise
+* the heap, we can put it above 1TB so it is backed by a 1TB
+* segment. Otherwise the heap will be in the bottom 1TB
+* which always uses 256MB segments and this may result in a
+* performance penalty.
+*/
+   if (is_32bit_task())
+   return randomize_page(mm->brk, SZ_32M);
+   else if (!radix_enabled() && mmu_highuser_ssize == MMU_SEGSIZE_1T)
+   return randomize_page(max_t(unsigned long, mm->brk, SZ_1T), 
SZ_1G);
+   else
+   return randomize_page(mm->brk, SZ_1G);
+}
-- 
2.35.1



[PATCH v10 03/13] mm, hugetlbfs: Allow an arch to always use generic versions of get_unmapped_area functions

2022-04-09 Thread Christophe Leroy
Unlike most architectures, powerpc can only define at runtime
if it is going to use the generic arch_get_unmapped_area() or not.

Today, powerpc has a copy of the generic arch_get_unmapped_area()
because when selection HAVE_ARCH_UNMAPPED_AREA the generic
arch_get_unmapped_area() is not available.

Rename it generic_get_unmapped_area() and make it independent of
HAVE_ARCH_UNMAPPED_AREA.

Do the same for arch_get_unmapped_area_topdown() versus
HAVE_ARCH_UNMAPPED_AREA_TOPDOWN.

Do the same for hugetlb_get_unmapped_area() versus
HAVE_ARCH_HUGETLB_UNMAPPED_AREA.

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
Acked-by: Andrew Morton 
---
 fs/hugetlbfs/inode.c | 17 +
 include/linux/hugetlb.h  |  5 +
 include/linux/sched/mm.h |  9 +
 mm/mmap.c| 31 ---
 4 files changed, 51 insertions(+), 11 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index dd3a088db11d..ec611c7de9e3 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -195,7 +195,6 @@ static int hugetlbfs_file_mmap(struct file *file, struct 
vm_area_struct *vma)
  * Called under mmap_write_lock(mm).
  */
 
-#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
 static unsigned long
 hugetlb_get_unmapped_area_bottomup(struct file *file, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags)
@@ -244,9 +243,10 @@ hugetlb_get_unmapped_area_topdown(struct file *file, 
unsigned long addr,
return addr;
 }
 
-static unsigned long
-hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff, unsigned long flags)
+unsigned long
+generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
@@ -283,6 +283,15 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long 
addr,
return hugetlb_get_unmapped_area_bottomup(file, addr, len,
pgoff, flags);
 }
+
+#ifndef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+static unsigned long
+hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
+{
+   return generic_hugetlb_get_unmapped_area(file, addr, len, pgoff, flags);
+}
 #endif
 
 static size_t
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 53c1b6082a4c..10895adc7635 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -513,6 +513,11 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
unsigned long flags);
 #endif /* HAVE_ARCH_HUGETLB_UNMAPPED_AREA */
 
+unsigned long
+generic_hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
+
 /*
  * huegtlb page specific state flags.  These flags are located in page.private
  * of the hugetlb head page.  Functions created via the below macros should be
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index 1ad1f4bfa025..da3a32d05b34 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -153,6 +153,15 @@ extern unsigned long
 arch_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
  unsigned long len, unsigned long pgoff,
  unsigned long flags);
+
+unsigned long
+generic_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
+unsigned long
+generic_get_unmapped_area_topdown(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags);
 #else
 static inline void arch_pick_mmap_layout(struct mm_struct *mm,
 struct rlimit *rlim_stack) {}
diff --git a/mm/mmap.c b/mm/mmap.c
index 313b57d55a63..d2b6c3fbfb8c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2128,10 +2128,10 @@ unsigned long vm_unmapped_area(struct 
vm_unmapped_area_info *info)
  *
  * This function "knows" that -ENOMEM has the bits set.
  */
-#ifndef HAVE_ARCH_UNMAPPED_AREA
 unsigned long
-arch_get_unmapped_area(struct file *filp, unsigned long addr,
-   unsigned long len, unsigned long pgoff, unsigned long flags)
+generic_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff,
+ unsigned long flags)
 {
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
@@ -2161,17 

[PATCH v10 01/13] mm, hugetlbfs: Allow for "high" userspace addresses

2022-04-09 Thread Christophe Leroy
This is a complement of f6795053dac8 ("mm: mmap: Allow for "high"
userspace addresses") for hugetlb.

This patch adds support for "high" userspace addresses that are
optionally supported on the system and have to be requested via a hint
mechanism ("high" addr parameter to mmap).

Architectures such as powerpc and x86 achieve this by making changes to
their architectural versions of hugetlb_get_unmapped_area() function.
However, arm64 uses the generic version of that function.

So take into account arch_get_mmap_base() and arch_get_mmap_end() in
hugetlb_get_unmapped_area(). To allow that, move those two macros
out of mm/mmap.c into include/linux/sched/mm.h

If these macros are not defined in architectural code then they default
to (TASK_SIZE) and (base) so should not introduce any behavioural
changes to architectures that do not define them.

For the time being, only ARM64 is affected by this change.

>From Catalin (ARM64):
  We should have fixed hugetlb_get_unmapped_area() as well when we
added support for 52-bit VA. The reason for commit f6795053dac8 was to
prevent normal mmap() from returning addresses above 48-bit by default
as some user-space had hard assumptions about this.

It's a slight ABI change if you do this for hugetlb_get_unmapped_area()
but I doubt anyone would notice. It's more likely that the current
behaviour would cause issues, so I'd rather have them consistent.

Signed-off-by: Christophe Leroy 
Cc: Steve Capper 
Cc: Will Deacon 
Cc: Catalin Marinas 
Fixes: f6795053dac8 ("mm: mmap: Allow for "high" userspace addresses")
Cc:  # 5.0.x
Reviewed-by: Catalin Marinas 
Acked-by: Andrew Morton 
---
v10:
- Moved as first patch of the series so that it can be applied
separately as a flag and be easily applied back on stable.
- Added text from Catalin explaining why it is a fixup.
---
 fs/hugetlbfs/inode.c | 9 +
 include/linux/sched/mm.h | 8 
 mm/mmap.c| 8 
 3 files changed, 13 insertions(+), 12 deletions(-)

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index 99c7477cee5c..dd3a088db11d 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -206,7 +206,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, 
unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
-   info.high_limit = TASK_SIZE;
+   info.high_limit = arch_get_mmap_end(addr);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area();
@@ -222,7 +222,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, 
unsigned long addr,
info.flags = VM_UNMAPPED_AREA_TOPDOWN;
info.length = len;
info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-   info.high_limit = current->mm->mmap_base;
+   info.high_limit = arch_get_mmap_base(addr, current->mm->mmap_base);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
addr = vm_unmapped_area();
@@ -237,7 +237,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, 
unsigned long addr,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = current->mm->mmap_base;
-   info.high_limit = TASK_SIZE;
+   info.high_limit = arch_get_mmap_end(addr);
addr = vm_unmapped_area();
}
 
@@ -251,6 +251,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long 
addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
+   const unsigned long mmap_end = arch_get_mmap_end(addr);
 
if (len & ~huge_page_mask(h))
return -EINVAL;
@@ -266,7 +267,7 @@ hugetlb_get_unmapped_area(struct file *file, unsigned long 
addr,
if (addr) {
addr = ALIGN(addr, huge_page_size(h));
vma = find_vma(mm, addr);
-   if (TASK_SIZE - len >= addr &&
+   if (mmap_end - len >= addr &&
(!vma || addr + len <= vm_start_gap(vma)))
return addr;
}
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index a80356e9dc69..1ad1f4bfa025 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -136,6 +136,14 @@ static inline void mm_update_next_owner(struct mm_struct 
*mm)
 #endif /* CONFIG_MEMCG */
 
 #ifdef CONFIG_MMU
+#ifndef arch_get_mmap_end
+#define arch_get_mmap_end(addr)(TASK_SIZE)
+#endif
+
+#ifndef arch_get_mmap_base
+#define arch_get_mmap_base(addr, base) (base)
+#endif
+
 extern void arch_pick_mmap_layout(struct mm_struct *mm,
  struct rlimit *rlim_stack);
 extern unsigned long
diff --git a/mm/mmap.c b/mm/mmap.c
index 3aa839f81e63..313b57d55a63 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2117,14 +2117,6 @@ unsigned long vm_unmapped_area(struct 
vm_unmapped_area_info *info)
return 

[PATCH v10 00/13] Convert powerpc to default topdown mmap layout (v10)

2022-04-09 Thread Christophe Leroy
Rebased on top of v5.18-rc1

This series converts powerpc to default topdown mmap layout.

First patch is a mm fix that should go into v5.18 and stable.

Andrew, can you take it ?

It is a complement/fix of f6795053dac8 ("mm: mmap: Allow for
"high" userspace addresses") for hugetlb. It adds support for "high"
userspace addresses that are optionally supported on the system and
have to be requested via a hint mechanism ("high" addr parameter to mmap).

powerpc requires its own arch_get_unmapped_area() only when
slices are needed, which is only for book3s/64. First part of
the series moves slices into book3s/64 specific directories
and cleans up other subarchitectures.

Last part converts to default topdown mmap layout.

A small modification is done to core mm to allow
powerpc to still provide its own arch_randomize_brk()

Another modification is done to core mm to allow powerpc
to use generic versions of get_unmapped_area functions for Radix
while still providing its own implementation for Hash, the
selection between Radix and Hash being doing at runtime.

Last modification to core mm is to give len and flags to
arch_get_mmap_end().

Changes in v10:
- Moved patch 4 as first patch in order to allow merging a stable backporting 
independently

Changes in v9:
- v9 was just a split of v8 for tentatively getting the mm part merge through 
mm and the rest through powerpc tree

Changes in v8:
- Moved patch "sizes.h: Add SZ_1T macro" up from which is already in linux-next 
but not in Linus tree yet.
- Rebased on today's powerpc/next

Changes in v7:
- Taken into account comments from Catalin (patches 3 and 4)

Changes in v6:
- New patch (patch 4) to take arch_get_mmap_base() and arch_get_mmap_end() into 
account in generic hugetlb_get_unmapped_area()
- Get back arch_randomize_brk() simplification as it relies on default topdown 
mmap layout.
- Fixed precedence between || and && in powerpc's arch_get_mmap_end() (patch 9)

Changes in v5:
- Added patch 3
- Added arch_get_mmap_base() and arch_get_mmap_end() to patch 7 to better match 
original powerpc behaviour
- Switched patched 10 and 11 and performed full randomisation in patch 10 just 
before switching to default implementation, as suggested by Nic.

Changes in v4:
- Move arch_randomize_brk() simplification out of this series
- Add a change to core mm to enable using generic implementation
while providing arch specific one at the same time.
- Reworked radix get_unmapped_area to use generic implementation
- Rebase on top of Nic's series v6

Changes in v3:
- Fixed missing  in last patch
- Added a patch to move SZ_1T out of drivers/pci/controller/pci-xgene.c

Changes in v2:
- Moved patch 4 before patch 2
- Make generic arch_randomize_brk() __weak
- Added patch 9

Christophe Leroy (13):
  mm, hugetlbfs: Allow for "high" userspace addresses
  mm: Allow arch specific arch_randomize_brk() with
CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
  mm, hugetlbfs: Allow an arch to always use generic versions of
get_unmapped_area functions
  mm: Add len and flags parameters to arch_get_mmap_end()
  powerpc/mm: Move vma_mmu_pagesize()
  powerpc/mm: Make slice specific to book3s/64
  powerpc/mm: Remove CONFIG_PPC_MM_SLICES
  powerpc/mm: Use generic_get_unmapped_area() and call it from
arch_get_unmapped_area()
  powerpc/mm: Use generic_hugetlb_get_unmapped_area()
  powerpc/mm: Move get_unmapped_area functions to slice.c
  powerpc/mm: Enable full randomisation of memory mappings
  powerpc/mm: Convert to default topdown mmap layout
  powerpc: Simplify and move arch_randomize_brk()

 arch/arm64/include/asm/processor.h|   4 +-
 arch/powerpc/Kconfig  |   2 +-
 arch/powerpc/include/asm/book3s/64/hugetlb.h  |   4 -
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |   1 +
 arch/powerpc/include/asm/book3s/64/mmu.h  |   6 -
 arch/powerpc/include/asm/book3s/64/slice.h|  24 ++
 arch/powerpc/include/asm/hugetlb.h|   2 +-
 arch/powerpc/include/asm/paca.h   |   7 -
 arch/powerpc/include/asm/page.h   |   1 -
 arch/powerpc/include/asm/processor.h  |   2 -
 arch/powerpc/include/asm/slice.h  |  46 
 arch/powerpc/include/asm/task_size_64.h   |   8 +
 arch/powerpc/kernel/paca.c|   5 -
 arch/powerpc/kernel/process.c |  41 ---
 arch/powerpc/mm/Makefile  |   3 +-
 arch/powerpc/mm/book3s64/Makefile |   2 +-
 arch/powerpc/mm/book3s64/hash_utils.c |  33 ++-
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c  |  55 
 arch/powerpc/mm/{ => book3s64}/slice.c|  71 -
 arch/powerpc/mm/hugetlbpage.c |  34 ---
 arch/powerpc/mm/mmap.c| 256 --
 arch/powerpc/mm/nohash/mmu_context.c  |   9 -
 arch/powerpc/mm/nohash/tlb.c  |   4 -
 arch/powerpc/platforms/Kconfig.cputype|   4 -
 fs/hugetlbfs/inode.c  |  26 +-
 

[PATCH v10 02/13] mm: Allow arch specific arch_randomize_brk() with CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT

2022-04-09 Thread Christophe Leroy
Commit e7142bf5d231 ("arm64, mm: make randomization selected by
generic topdown mmap layout") introduced a default version of
arch_randomize_brk() provided when
CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT is selected.

powerpc could select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
but needs to provide its own arch_randomize_brk().

In order to allow that, define generic version of arch_randomize_brk()
as a __weak symbol.

Cc: Alexandre Ghiti 
Signed-off-by: Christophe Leroy 
Acked-by: Andrew Morton 
---
 mm/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/mm/util.c b/mm/util.c
index 54e5e761a9a9..0787fe4af4ac 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -344,7 +344,7 @@ unsigned long randomize_stack_top(unsigned long stack_top)
 }
 
 #ifdef CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
-unsigned long arch_randomize_brk(struct mm_struct *mm)
+unsigned long __weak arch_randomize_brk(struct mm_struct *mm)
 {
/* Is the current task 32bit ? */
if (!IS_ENABLED(CONFIG_64BIT) || is_compat_task())
-- 
2.35.1



[PATCH v10 12/13] powerpc/mm: Convert to default topdown mmap layout

2022-04-09 Thread Christophe Leroy
Select CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT and
remove arch/powerpc/mm/mmap.c

This change reuses the generic framework added by
commit 67f3977f805b ("arm64, mm: move generic mmap layout
functions to mm") without any functional change.

Comparison between powerpc implementation and the generic one:
- mmap_is_legacy() is identical.
- arch_mmap_rnd() does exactly the same allthough it's written
slightly differently.
- MIN_GAP and MAX_GAP are identical.
- mmap_base() does the same but uses STACK_RND_MASK which provides
the same values as stack_maxrandom_size().
- arch_pick_mmap_layout() is identical.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/Kconfig |   2 +-
 arch/powerpc/include/asm/processor.h |   2 -
 arch/powerpc/mm/Makefile |   2 +-
 arch/powerpc/mm/mmap.c   | 105 ---
 4 files changed, 2 insertions(+), 109 deletions(-)
 delete mode 100644 arch/powerpc/mm/mmap.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 174edabb74fa..145af02df3dc 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -118,7 +118,6 @@ config PPC
select ARCH_HAS_DEBUG_WXif STRICT_KERNEL_RWX
select ARCH_HAS_DEVMEM_IS_ALLOWED
select ARCH_HAS_DMA_MAP_DIRECT  if PPC_PSERIES
-   select ARCH_HAS_ELF_RANDOMIZE
select ARCH_HAS_FORTIFY_SOURCE
select ARCH_HAS_GCOV_PROFILE_ALL
select ARCH_HAS_HUGEPD  if HUGETLB_PAGE
@@ -154,6 +153,7 @@ config PPC
select ARCH_USE_MEMTEST
select ARCH_USE_QUEUED_RWLOCKS  if PPC_QUEUED_SPINLOCKS
select ARCH_USE_QUEUED_SPINLOCKSif PPC_QUEUED_SPINLOCKS
+   select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT
select ARCH_WANT_IPC_PARSE_VERSION
select ARCH_WANT_IRQS_OFF_ACTIVATE_MM
select ARCH_WANT_LD_ORPHAN_WARN
diff --git a/arch/powerpc/include/asm/processor.h 
b/arch/powerpc/include/asm/processor.h
index 39c25021030f..fdfaae194ddd 100644
--- a/arch/powerpc/include/asm/processor.h
+++ b/arch/powerpc/include/asm/processor.h
@@ -392,8 +392,6 @@ static inline void prefetchw(const void *x)
 
 #define spin_lock_prefetch(x)  prefetchw(x)
 
-#define HAVE_ARCH_PICK_MMAP_LAYOUT
-
 /* asm stubs */
 extern unsigned long isa300_idle_stop_noloss(unsigned long psscr_val);
 extern unsigned long isa300_idle_stop_mayloss(unsigned long psscr_val);
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index d4c20484dad9..503a6e249940 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64):= $(NO_MINIMAL_TOC)
 
-obj-y  := fault.o mem.o pgtable.o mmap.o maccess.o 
pageattr.o \
+obj-y  := fault.o mem.o pgtable.o maccess.o pageattr.o 
\
   init_$(BITS).o pgtable_$(BITS).o \
   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
   init-common.o mmu_context.o drmem.o \
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
deleted file mode 100644
index d9eae456558a..
--- a/arch/powerpc/mm/mmap.c
+++ /dev/null
@@ -1,105 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-or-later
-/*
- *  flexible mmap layout support
- *
- * Copyright 2003-2004 Red Hat Inc., Durham, North Carolina.
- * All Rights Reserved.
- *
- * Started by Ingo Molnar 
- */
-
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-#include 
-
-/*
- * Top of mmap area (just below the process stack).
- *
- * Leave at least a ~128 MB hole.
- */
-#define MIN_GAP (128*1024*1024)
-#define MAX_GAP (TASK_SIZE/6*5)
-
-static inline int mmap_is_legacy(struct rlimit *rlim_stack)
-{
-   if (current->personality & ADDR_COMPAT_LAYOUT)
-   return 1;
-
-   if (rlim_stack->rlim_cur == RLIM_INFINITY)
-   return 1;
-
-   return sysctl_legacy_va_layout;
-}
-
-unsigned long arch_mmap_rnd(void)
-{
-   unsigned long shift, rnd;
-
-   shift = mmap_rnd_bits;
-#ifdef CONFIG_COMPAT
-   if (is_32bit_task())
-   shift = mmap_rnd_compat_bits;
-#endif
-   rnd = get_random_long() % (1ul << shift);
-
-   return rnd << PAGE_SHIFT;
-}
-
-static inline unsigned long stack_maxrandom_size(void)
-{
-   if (!(current->flags & PF_RANDOMIZE))
-   return 0;
-
-   /* 8MB for 32bit, 1GB for 64bit */
-   if (is_32bit_task())
-   return (1<<23);
-   else
-   return (1<<30);
-}
-
-static inline unsigned long mmap_base(unsigned long rnd,
- struct rlimit *rlim_stack)
-{
-   unsigned long gap = rlim_stack->rlim_cur;
-   unsigned long pad = stack_maxrandom_size() + stack_guard_gap;
-
-   /* Values close to RLIM_INFINITY can overflow. */
-   if (gap + pad > gap)
-   gap += pad;
-
-   if (gap < MIN_GAP)
-   gap = MIN_GAP;
-   else 

[PATCH v10 09/13] powerpc/mm: Use generic_hugetlb_get_unmapped_area()

2022-04-09 Thread Christophe Leroy
Use the generic version of arch_hugetlb_get_unmapped_area()
which is now available at all time.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/hugetlb.h |  4 --
 arch/powerpc/mm/book3s64/radix_hugetlbpage.c | 55 
 arch/powerpc/mm/hugetlbpage.c|  4 +-
 3 files changed, 1 insertion(+), 62 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/hugetlb.h 
b/arch/powerpc/include/asm/book3s/64/hugetlb.h
index 12e150e615b7..b37a28f62cf6 100644
--- a/arch/powerpc/include/asm/book3s/64/hugetlb.h
+++ b/arch/powerpc/include/asm/book3s/64/hugetlb.h
@@ -8,10 +8,6 @@
  */
 void radix__flush_hugetlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
 void radix__local_flush_hugetlb_page(struct vm_area_struct *vma, unsigned long 
vmaddr);
-extern unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags);
 
 extern void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
unsigned long addr, pte_t *ptep,
diff --git a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c 
b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
index 23d3e08911d3..d2fb776febb4 100644
--- a/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
+++ b/arch/powerpc/mm/book3s64/radix_hugetlbpage.c
@@ -41,61 +41,6 @@ void radix__flush_hugetlb_tlb_range(struct vm_area_struct 
*vma, unsigned long st
radix__flush_tlb_range_psize(vma->vm_mm, start, end, psize);
 }
 
-/*
- * A vairant of hugetlb_get_unmapped_area doing topdown search
- * FIXME!! should we do as x86 does or non hugetlb area does ?
- * ie, use topdown or not based on mmap_is_legacy check ?
- */
-unsigned long
-radix__hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags)
-{
-   struct mm_struct *mm = current->mm;
-   struct vm_area_struct *vma;
-   struct hstate *h = hstate_file(file);
-   int fixed = (flags & MAP_FIXED);
-   unsigned long high_limit;
-   struct vm_unmapped_area_info info;
-
-   high_limit = DEFAULT_MAP_WINDOW;
-   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
-   high_limit = TASK_SIZE;
-
-   if (len & ~huge_page_mask(h))
-   return -EINVAL;
-   if (len > high_limit)
-   return -ENOMEM;
-
-   if (fixed) {
-   if (addr > high_limit - len)
-   return -ENOMEM;
-   if (prepare_hugepage_range(file, addr, len))
-   return -EINVAL;
-   return addr;
-   }
-
-   if (addr) {
-   addr = ALIGN(addr, huge_page_size(h));
-   vma = find_vma(mm, addr);
-   if (high_limit - len >= addr && addr >= mmap_min_addr &&
-   (!vma || addr + len <= vm_start_gap(vma)))
-   return addr;
-   }
-   /*
-* We are always doing an topdown search here. Slice code
-* does that too.
-*/
-   info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-   info.length = len;
-   info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-   info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
-   info.align_mask = PAGE_MASK & ~huge_page_mask(h);
-   info.align_offset = 0;
-
-   return vm_unmapped_area();
-}
-
 void radix__huge_ptep_modify_prot_commit(struct vm_area_struct *vma,
 unsigned long addr, pte_t *ptep,
 pte_t old_pte, pte_t pte)
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index f71ac14018e2..a87c886042e9 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -553,11 +553,9 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
unsigned long len, unsigned long pgoff,
unsigned long flags)
 {
-#ifdef CONFIG_PPC_RADIX_MMU
if (radix_enabled())
-   return radix__hugetlb_get_unmapped_area(file, addr, len,
+   return generic_hugetlb_get_unmapped_area(file, addr, len,
   pgoff, flags);
-#endif
 #ifdef CONFIG_PPC_64S_HASH_MMU
return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 
1);
 #endif
-- 
2.35.1



[PATCH v10 11/13] powerpc/mm: Enable full randomisation of memory mappings

2022-04-09 Thread Christophe Leroy
Do like most other architectures and provide randomisation also to
"legacy" memory mappings, by adding the random factor to
mm->mmap_base in arch_pick_mmap_layout().

See commit 8b8addf891de ("x86/mm/32: Enable full randomization on
i386 and X86_32") for all explanations and benefits of that mmap
randomisation.

At the moment, slice_find_area_bottomup() doesn't use mm->mmap_base
but uses the fixed TASK_UNMAPPED_BASE instead.
slice_find_area_bottomup() being used as a fallback to
slice_find_area_topdown(), it can't use mm->mmap_base
directly.

Instead of always using TASK_UNMAPPED_BASE as base address, leave
it to the caller. When called from slice_find_area_topdown()
TASK_UNMAPPED_BASE is used. Otherwise mm->mmap_base is used.

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/mm/book3s64/slice.c | 18 +++---
 arch/powerpc/mm/mmap.c   |  2 +-
 2 files changed, 8 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index 03681042b807..c0b58afb9a47 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -276,20 +276,18 @@ static bool slice_scan_available(unsigned long addr,
 }
 
 static unsigned long slice_find_area_bottomup(struct mm_struct *mm,
- unsigned long len,
+ unsigned long addr, unsigned long 
len,
  const struct slice_mask 
*available,
  int psize, unsigned long 
high_limit)
 {
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-   unsigned long addr, found, next_end;
+   unsigned long found, next_end;
struct vm_unmapped_area_info info;
 
info.flags = 0;
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
-   addr = TASK_UNMAPPED_BASE;
/*
 * Check till the allow max value for this mmap request
 */
@@ -322,12 +320,12 @@ static unsigned long slice_find_area_bottomup(struct 
mm_struct *mm,
 }
 
 static unsigned long slice_find_area_topdown(struct mm_struct *mm,
-unsigned long len,
+unsigned long addr, unsigned long 
len,
 const struct slice_mask *available,
 int psize, unsigned long 
high_limit)
 {
int pshift = max_t(int, mmu_psize_defs[psize].shift, PAGE_SHIFT);
-   unsigned long addr, found, prev;
+   unsigned long found, prev;
struct vm_unmapped_area_info info;
unsigned long min_addr = max(PAGE_SIZE, mmap_min_addr);
 
@@ -335,8 +333,6 @@ static unsigned long slice_find_area_topdown(struct 
mm_struct *mm,
info.length = len;
info.align_mask = PAGE_MASK & ((1ul << pshift) - 1);
info.align_offset = 0;
-
-   addr = mm->mmap_base;
/*
 * If we are trying to allocate above DEFAULT_MAP_WINDOW
 * Add the different to the mmap_base.
@@ -377,7 +373,7 @@ static unsigned long slice_find_area_topdown(struct 
mm_struct *mm,
 * can happen with large stack limits and large mmap()
 * allocations.
 */
-   return slice_find_area_bottomup(mm, len, available, psize, high_limit);
+   return slice_find_area_bottomup(mm, TASK_UNMAPPED_BASE, len, available, 
psize, high_limit);
 }
 
 
@@ -386,9 +382,9 @@ static unsigned long slice_find_area(struct mm_struct *mm, 
unsigned long len,
 int topdown, unsigned long high_limit)
 {
if (topdown)
-   return slice_find_area_topdown(mm, len, mask, psize, 
high_limit);
+   return slice_find_area_topdown(mm, mm->mmap_base, len, mask, 
psize, high_limit);
else
-   return slice_find_area_bottomup(mm, len, mask, psize, 
high_limit);
+   return slice_find_area_bottomup(mm, mm->mmap_base, len, mask, 
psize, high_limit);
 }
 
 static inline void slice_copy_mask(struct slice_mask *dst,
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 5972d619d274..d9eae456558a 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -96,7 +96,7 @@ void arch_pick_mmap_layout(struct mm_struct *mm, struct 
rlimit *rlim_stack)
 * bit is set, or if the expected stack growth is unlimited:
 */
if (mmap_is_legacy(rlim_stack)) {
-   mm->mmap_base = TASK_UNMAPPED_BASE;
+   mm->mmap_base = TASK_UNMAPPED_BASE + random_factor;
mm->get_unmapped_area = arch_get_unmapped_area;
} else {
mm->mmap_base = mmap_base(random_factor, rlim_stack);
-- 
2.35.1



[PATCH v10 04/13] mm: Add len and flags parameters to arch_get_mmap_end()

2022-04-09 Thread Christophe Leroy
Powerpc needs flags and len to make decision on arch_get_mmap_end().

So add them as parameters to arch_get_mmap_end().

Signed-off-by: Christophe Leroy 
Cc: Steve Capper 
Cc: Catalin Marinas 
Cc: Will Deacon 
Acked-by: Catalin Marinas 
Acked-by: Andrew Morton 
---
 arch/arm64/include/asm/processor.h | 4 ++--
 fs/hugetlbfs/inode.c   | 6 +++---
 include/linux/sched/mm.h   | 2 +-
 mm/mmap.c  | 4 ++--
 4 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/arm64/include/asm/processor.h 
b/arch/arm64/include/asm/processor.h
index 73e38d9a540c..bd22d94e844d 100644
--- a/arch/arm64/include/asm/processor.h
+++ b/arch/arm64/include/asm/processor.h
@@ -92,8 +92,8 @@
 #endif /* CONFIG_COMPAT */
 
 #ifndef CONFIG_ARM64_FORCE_52BIT
-#define arch_get_mmap_end(addr) ((addr > DEFAULT_MAP_WINDOW) ? TASK_SIZE :\
-   DEFAULT_MAP_WINDOW)
+#define arch_get_mmap_end(addr, len, flags) \
+   (((addr) > DEFAULT_MAP_WINDOW) ? TASK_SIZE : DEFAULT_MAP_WINDOW)
 
 #define arch_get_mmap_base(addr, base) ((addr > DEFAULT_MAP_WINDOW) ? \
base + TASK_SIZE - DEFAULT_MAP_WINDOW :\
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index ec611c7de9e3..6f863497bd69 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -205,7 +205,7 @@ hugetlb_get_unmapped_area_bottomup(struct file *file, 
unsigned long addr,
info.flags = 0;
info.length = len;
info.low_limit = current->mm->mmap_base;
-   info.high_limit = arch_get_mmap_end(addr);
+   info.high_limit = arch_get_mmap_end(addr, len, flags);
info.align_mask = PAGE_MASK & ~huge_page_mask(h);
info.align_offset = 0;
return vm_unmapped_area();
@@ -236,7 +236,7 @@ hugetlb_get_unmapped_area_topdown(struct file *file, 
unsigned long addr,
VM_BUG_ON(addr != -ENOMEM);
info.flags = 0;
info.low_limit = current->mm->mmap_base;
-   info.high_limit = arch_get_mmap_end(addr);
+   info.high_limit = arch_get_mmap_end(addr, len, flags);
addr = vm_unmapped_area();
}
 
@@ -251,7 +251,7 @@ generic_hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma;
struct hstate *h = hstate_file(file);
-   const unsigned long mmap_end = arch_get_mmap_end(addr);
+   const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
 
if (len & ~huge_page_mask(h))
return -EINVAL;
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index da3a32d05b34..8cd975a8bfeb 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -137,7 +137,7 @@ static inline void mm_update_next_owner(struct mm_struct 
*mm)
 
 #ifdef CONFIG_MMU
 #ifndef arch_get_mmap_end
-#define arch_get_mmap_end(addr)(TASK_SIZE)
+#define arch_get_mmap_end(addr, len, flags)(TASK_SIZE)
 #endif
 
 #ifndef arch_get_mmap_base
diff --git a/mm/mmap.c b/mm/mmap.c
index d2b6c3fbfb8c..e9b7d74e58bc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -2136,7 +2136,7 @@ generic_get_unmapped_area(struct file *filp, unsigned 
long addr,
struct mm_struct *mm = current->mm;
struct vm_area_struct *vma, *prev;
struct vm_unmapped_area_info info;
-   const unsigned long mmap_end = arch_get_mmap_end(addr);
+   const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
 
if (len > mmap_end - mmap_min_addr)
return -ENOMEM;
@@ -2184,7 +2184,7 @@ generic_get_unmapped_area_topdown(struct file *filp, 
unsigned long addr,
struct vm_area_struct *vma, *prev;
struct mm_struct *mm = current->mm;
struct vm_unmapped_area_info info;
-   const unsigned long mmap_end = arch_get_mmap_end(addr);
+   const unsigned long mmap_end = arch_get_mmap_end(addr, len, flags);
 
/* requested length too big for entire address space */
if (len > mmap_end - mmap_min_addr)
-- 
2.35.1



[PATCH v10 05/13] powerpc/mm: Move vma_mmu_pagesize()

2022-04-09 Thread Christophe Leroy
vma_mmu_pagesize() is only required for slices,
otherwise there is a generic weak version doing the
exact same thing.

Move it to slice.c

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/mm/hugetlbpage.c | 11 ---
 arch/powerpc/mm/slice.c   |  9 +
 2 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index b642a5a8668f..7b89f0799d82 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -565,17 +565,6 @@ unsigned long hugetlb_get_unmapped_area(struct file *file, 
unsigned long addr,
 }
 #endif
 
-unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
-{
-   /* With radix we don't use slice, so derive it from vma*/
-   if (IS_ENABLED(CONFIG_PPC_MM_SLICES) && !radix_enabled()) {
-   unsigned int psize = get_slice_psize(vma->vm_mm, vma->vm_start);
-
-   return 1UL << mmu_psize_to_shift(psize);
-   }
-   return vma_kernel_pagesize(vma);
-}
-
 bool __init arch_hugetlb_valid_size(unsigned long size)
 {
int shift = __ffs(size);
diff --git a/arch/powerpc/mm/slice.c b/arch/powerpc/mm/slice.c
index f42711f865f3..8a3ac062b71e 100644
--- a/arch/powerpc/mm/slice.c
+++ b/arch/powerpc/mm/slice.c
@@ -759,4 +759,13 @@ int slice_is_hugepage_only_range(struct mm_struct *mm, 
unsigned long addr,
 
return !slice_check_range_fits(mm, maskp, addr, len);
 }
+
+unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
+{
+   /* With radix we don't use slice, so derive it from vma*/
+   if (radix_enabled())
+   return vma_kernel_pagesize(vma);
+
+   return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, 
vma->vm_start));
+}
 #endif
-- 
2.35.1



[PATCH v10 08/13] powerpc/mm: Use generic_get_unmapped_area() and call it from arch_get_unmapped_area()

2022-04-09 Thread Christophe Leroy
Use the generic version of arch_get_unmapped_area() which
is now available at all time instead of its copy
radix__arch_get_unmapped_area()

To allow that for PPC64, add arch_get_mmap_base() and
arch_get_mmap_end() macros.

Instead of setting mm->get_unmapped_area() to either
arch_get_unmapped_area() or generic_get_unmapped_area(),
always set it to arch_get_unmapped_area() and call
generic_get_unmapped_area() from there when radix is enabled.

Do the same with radix__arch_get_unmapped_area_topdown()

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/task_size_64.h |   8 ++
 arch/powerpc/mm/mmap.c  | 127 ++--
 2 files changed, 14 insertions(+), 121 deletions(-)

diff --git a/arch/powerpc/include/asm/task_size_64.h 
b/arch/powerpc/include/asm/task_size_64.h
index 38fdf8041d12..5a709951c901 100644
--- a/arch/powerpc/include/asm/task_size_64.h
+++ b/arch/powerpc/include/asm/task_size_64.h
@@ -72,4 +72,12 @@
 #define STACK_TOP_MAX TASK_SIZE_USER64
 #define STACK_TOP (is_32bit_task() ? STACK_TOP_USER32 : STACK_TOP_USER64)
 
+#define arch_get_mmap_base(addr, base) \
+   (((addr) > DEFAULT_MAP_WINDOW) ? (base) + TASK_SIZE - 
DEFAULT_MAP_WINDOW : (base))
+
+#define arch_get_mmap_end(addr, len, flags) \
+   (((addr) > DEFAULT_MAP_WINDOW) || \
+(((flags) & MAP_FIXED) && ((addr) + (len) > DEFAULT_MAP_WINDOW)) ? 
TASK_SIZE : \
+   
DEFAULT_MAP_WINDOW)
+
 #endif /* _ASM_POWERPC_TASK_SIZE_64_H */
diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index 9b0d6e395bc0..46781d0103d1 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -81,115 +81,15 @@ static inline unsigned long mmap_base(unsigned long rnd,
 }
 
 #ifdef HAVE_ARCH_UNMAPPED_AREA
-#ifdef CONFIG_PPC_RADIX_MMU
-/*
- * Same function as generic code used only for radix, because we don't need to 
overload
- * the generic one. But we will have to duplicate, because hash select
- * HAVE_ARCH_UNMAPPED_AREA
- */
-static unsigned long
-radix__arch_get_unmapped_area(struct file *filp, unsigned long addr,
-unsigned long len, unsigned long pgoff,
-unsigned long flags)
-{
-   struct mm_struct *mm = current->mm;
-   struct vm_area_struct *vma;
-   int fixed = (flags & MAP_FIXED);
-   unsigned long high_limit;
-   struct vm_unmapped_area_info info;
-
-   high_limit = DEFAULT_MAP_WINDOW;
-   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
-   high_limit = TASK_SIZE;
-
-   if (len > high_limit)
-   return -ENOMEM;
-
-   if (fixed) {
-   if (addr > high_limit - len)
-   return -ENOMEM;
-   return addr;
-   }
-
-   if (addr) {
-   addr = PAGE_ALIGN(addr);
-   vma = find_vma(mm, addr);
-   if (high_limit - len >= addr && addr >= mmap_min_addr &&
-   (!vma || addr + len <= vm_start_gap(vma)))
-   return addr;
-   }
-
-   info.flags = 0;
-   info.length = len;
-   info.low_limit = mm->mmap_base;
-   info.high_limit = high_limit;
-   info.align_mask = 0;
-
-   return vm_unmapped_area();
-}
-
-static unsigned long
-radix__arch_get_unmapped_area_topdown(struct file *filp,
-const unsigned long addr0,
-const unsigned long len,
-const unsigned long pgoff,
-const unsigned long flags)
-{
-   struct vm_area_struct *vma;
-   struct mm_struct *mm = current->mm;
-   unsigned long addr = addr0;
-   int fixed = (flags & MAP_FIXED);
-   unsigned long high_limit;
-   struct vm_unmapped_area_info info;
-
-   high_limit = DEFAULT_MAP_WINDOW;
-   if (addr >= high_limit || (fixed && (addr + len > high_limit)))
-   high_limit = TASK_SIZE;
-
-   if (len > high_limit)
-   return -ENOMEM;
-
-   if (fixed) {
-   if (addr > high_limit - len)
-   return -ENOMEM;
-   return addr;
-   }
-
-   if (addr) {
-   addr = PAGE_ALIGN(addr);
-   vma = find_vma(mm, addr);
-   if (high_limit - len >= addr && addr >= mmap_min_addr &&
-   (!vma || addr + len <= vm_start_gap(vma)))
-   return addr;
-   }
-
-   info.flags = VM_UNMAPPED_AREA_TOPDOWN;
-   info.length = len;
-   info.low_limit = max(PAGE_SIZE, mmap_min_addr);
-   info.high_limit = mm->mmap_base + (high_limit - DEFAULT_MAP_WINDOW);
-   info.align_mask = 0;
-
-   addr = vm_unmapped_area();
-   if (!(addr & ~PAGE_MASK))
-   return addr;
-   VM_BUG_ON(addr != -ENOMEM);
-
-   /*
-* A failed mmap() very likely causes application failure,
-

[PATCH v10 10/13] powerpc/mm: Move get_unmapped_area functions to slice.c

2022-04-09 Thread Christophe Leroy
hugetlb_get_unmapped_area() is now identical to the
generic version if only RADIX is enabled, so move it
to slice.c and let it fallback on the generic one
when HASH MMU is not compiled in.

Do the same with arch_get_unmapped_area() and
arch_get_unmapped_area_topdown().

Signed-off-by: Christophe Leroy 
---
 arch/powerpc/include/asm/book3s/64/mmu.h   |  6 
 arch/powerpc/include/asm/book3s/64/slice.h |  6 
 arch/powerpc/mm/book3s64/slice.c   | 42 ++
 arch/powerpc/mm/hugetlbpage.c  | 21 ---
 arch/powerpc/mm/mmap.c | 36 ---
 5 files changed, 48 insertions(+), 63 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index 006cbec70ffe..570a4960cf17 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -4,12 +4,6 @@
 
 #include 
 
-#ifdef CONFIG_HUGETLB_PAGE
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-
 #ifndef __ASSEMBLY__
 /*
  * Page size definition
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h 
b/arch/powerpc/include/asm/book3s/64/slice.h
index 5b0f7105bc8b..b8eb4ad271b9 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -4,6 +4,12 @@
 
 #ifndef __ASSEMBLY__
 
+#ifdef CONFIG_HUGETLB_PAGE
+#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
+#endif
+#define HAVE_ARCH_UNMAPPED_AREA
+#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
+
 #define SLICE_LOW_SHIFT28
 #define SLICE_LOW_TOP  (0x1ul)
 #define SLICE_NUM_LOW  (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
diff --git a/arch/powerpc/mm/book3s64/slice.c b/arch/powerpc/mm/book3s64/slice.c
index e4382713746d..03681042b807 100644
--- a/arch/powerpc/mm/book3s64/slice.c
+++ b/arch/powerpc/mm/book3s64/slice.c
@@ -639,6 +639,32 @@ unsigned long slice_get_unmapped_area(unsigned long addr, 
unsigned long len,
 }
 EXPORT_SYMBOL_GPL(slice_get_unmapped_area);
 
+unsigned long arch_get_unmapped_area(struct file *filp,
+unsigned long addr,
+unsigned long len,
+unsigned long pgoff,
+unsigned long flags)
+{
+   if (radix_enabled())
+   return generic_get_unmapped_area(filp, addr, len, pgoff, flags);
+
+   return slice_get_unmapped_area(addr, len, flags,
+  
mm_ctx_user_psize(>mm->context), 0);
+}
+
+unsigned long arch_get_unmapped_area_topdown(struct file *filp,
+const unsigned long addr0,
+const unsigned long len,
+const unsigned long pgoff,
+const unsigned long flags)
+{
+   if (radix_enabled())
+   return generic_get_unmapped_area_topdown(filp, addr0, len, 
pgoff, flags);
+
+   return slice_get_unmapped_area(addr0, len, flags,
+  
mm_ctx_user_psize(>mm->context), 1);
+}
+
 unsigned int notrace get_slice_psize(struct mm_struct *mm, unsigned long addr)
 {
unsigned char *psizes;
@@ -766,4 +792,20 @@ unsigned long vma_mmu_pagesize(struct vm_area_struct *vma)
 
return 1UL << mmu_psize_to_shift(get_slice_psize(vma->vm_mm, 
vma->vm_start));
 }
+
+static int file_to_psize(struct file *file)
+{
+   struct hstate *hstate = hstate_file(file);
+   return shift_to_mmu_psize(huge_page_shift(hstate));
+}
+
+unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
+   unsigned long len, unsigned long pgoff,
+   unsigned long flags)
+{
+   if (radix_enabled())
+   return generic_hugetlb_get_unmapped_area(file, addr, len, 
pgoff, flags);
+
+   return slice_get_unmapped_area(addr, len, flags, file_to_psize(file), 
1);
+}
 #endif
diff --git a/arch/powerpc/mm/hugetlbpage.c b/arch/powerpc/mm/hugetlbpage.c
index a87c886042e9..b282af39fcf6 100644
--- a/arch/powerpc/mm/hugetlbpage.c
+++ b/arch/powerpc/mm/hugetlbpage.c
@@ -542,27 +542,6 @@ struct page *follow_huge_pd(struct vm_area_struct *vma,
return page;
 }
 
-#ifdef HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-static inline int file_to_psize(struct file *file)
-{
-   struct hstate *hstate = hstate_file(file);
-   return shift_to_mmu_psize(huge_page_shift(hstate));
-}
-
-unsigned long hugetlb_get_unmapped_area(struct file *file, unsigned long addr,
-   unsigned long len, unsigned long pgoff,
-   unsigned long flags)
-{
-   if (radix_enabled())
-   return generic_hugetlb_get_unmapped_area(file, addr, len,
-  

Re: [PATCH v2 0/4] Fix perf bench numa, futex and epoll to work with machines having #CPUs > 1K

2022-04-09 Thread Arnaldo Carvalho de Melo
Em Sat, Apr 09, 2022 at 12:28:01PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Wed, Apr 06, 2022 at 11:21:09PM +0530, Athira Rajeev escreveu:
> > The perf benchmark for collections: numa, futex and epoll
> > hits failure in system configuration with CPU's more than 1024.
> > These benchmarks uses "sched_getaffinity" and "sched_setaffinity"
> > in the code to work with affinity.
> 
> Applied 1-3, 4 needs some reworking and can wait for v5.19, the first 3
> are fixes, so can go now.

Now trying to fix this:

  26 7.89 debian:9  : FAIL gcc version 6.3.0 20170516 
(Debian 6.3.0-18+deb9u1)
bench/numa.c: In function 'alloc_data':
bench/numa.c:359:6: error: 'orig_mask' may be used uninitialized in this 
function [-Werror=maybe-uninitialized]
  ret = sched_setaffinity(0, size, mask);
  ^~
bench/numa.c:409:13: note: 'orig_mask' was declared here
  cpu_set_t *orig_mask;
 ^
cc1: all warnings being treated as errors
/git/perf-5.18.0-rc1/tools/build/Makefile.build:139: recipe for target 
'bench' failed
make[3]: *** [bench] Error 2


Happened in several distros.

- Arnaldo


[PATCH v10 06/13] powerpc/mm: Make slice specific to book3s/64

2022-04-09 Thread Christophe Leroy
Since commit 555904d07eef ("powerpc/8xx: MM_SLICE is not needed
anymore") only book3s/64 selects CONFIG_PPC_MM_SLICES.

Move slice.c into mm/book3s64/

Move necessary stuff in asm/book3s/64/slice.h and
remove asm/slice.h

Signed-off-by: Christophe Leroy 
Reviewed-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h |  1 +
 arch/powerpc/include/asm/book3s/64/slice.h| 18 
 arch/powerpc/include/asm/page.h   |  1 -
 arch/powerpc/include/asm/slice.h  | 46 ---
 arch/powerpc/mm/Makefile  |  1 -
 arch/powerpc/mm/book3s64/Makefile |  1 +
 arch/powerpc/mm/{ => book3s64}/slice.c|  2 -
 arch/powerpc/mm/nohash/mmu_context.c  |  9 
 arch/powerpc/mm/nohash/tlb.c  |  4 --
 9 files changed, 20 insertions(+), 63 deletions(-)
 delete mode 100644 arch/powerpc/include/asm/slice.h
 rename arch/powerpc/mm/{ => book3s64}/slice.c (99%)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 21f780942911..1c4eebbc69c9 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -18,6 +18,7 @@
  * complete pgtable.h but only a portion of it.
  */
 #include 
+#include 
 #include 
 #include 
 
diff --git a/arch/powerpc/include/asm/book3s/64/slice.h 
b/arch/powerpc/include/asm/book3s/64/slice.h
index f0d3194ba41b..5b0f7105bc8b 100644
--- a/arch/powerpc/include/asm/book3s/64/slice.h
+++ b/arch/powerpc/include/asm/book3s/64/slice.h
@@ -2,6 +2,8 @@
 #ifndef _ASM_POWERPC_BOOK3S_64_SLICE_H
 #define _ASM_POWERPC_BOOK3S_64_SLICE_H
 
+#ifndef __ASSEMBLY__
+
 #define SLICE_LOW_SHIFT28
 #define SLICE_LOW_TOP  (0x1ul)
 #define SLICE_NUM_LOW  (SLICE_LOW_TOP >> SLICE_LOW_SHIFT)
@@ -13,4 +15,20 @@
 
 #define SLB_ADDR_LIMIT_DEFAULT DEFAULT_MAP_WINDOW_USER64
 
+struct mm_struct;
+
+unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
+ unsigned long flags, unsigned int psize,
+ int topdown);
+
+unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
+
+void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
+  unsigned long len, unsigned int psize);
+
+void slice_init_new_context_exec(struct mm_struct *mm);
+void slice_setup_new_exec(void);
+
+#endif /* __ASSEMBLY__ */
+
 #endif /* _ASM_POWERPC_BOOK3S_64_SLICE_H */
diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 254687258f42..62e0c6f12869 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -329,6 +329,5 @@ static inline unsigned long kaslr_offset(void)
 
 #include 
 #endif /* __ASSEMBLY__ */
-#include 
 
 #endif /* _ASM_POWERPC_PAGE_H */
diff --git a/arch/powerpc/include/asm/slice.h b/arch/powerpc/include/asm/slice.h
deleted file mode 100644
index 0bdd9c62eca0..
--- a/arch/powerpc/include/asm/slice.h
+++ /dev/null
@@ -1,46 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _ASM_POWERPC_SLICE_H
-#define _ASM_POWERPC_SLICE_H
-
-#ifdef CONFIG_PPC_BOOK3S_64
-#include 
-#endif
-
-#ifndef __ASSEMBLY__
-
-struct mm_struct;
-
-#ifdef CONFIG_PPC_MM_SLICES
-
-#ifdef CONFIG_HUGETLB_PAGE
-#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
-#endif
-#define HAVE_ARCH_UNMAPPED_AREA
-#define HAVE_ARCH_UNMAPPED_AREA_TOPDOWN
-
-unsigned long slice_get_unmapped_area(unsigned long addr, unsigned long len,
- unsigned long flags, unsigned int psize,
- int topdown);
-
-unsigned int get_slice_psize(struct mm_struct *mm, unsigned long addr);
-
-void slice_set_range_psize(struct mm_struct *mm, unsigned long start,
-  unsigned long len, unsigned int psize);
-
-void slice_init_new_context_exec(struct mm_struct *mm);
-void slice_setup_new_exec(void);
-
-#else /* CONFIG_PPC_MM_SLICES */
-
-static inline void slice_init_new_context_exec(struct mm_struct *mm) {}
-
-static inline unsigned int get_slice_psize(struct mm_struct *mm, unsigned long 
addr)
-{
-   return 0;
-}
-
-#endif /* CONFIG_PPC_MM_SLICES */
-
-#endif /* __ASSEMBLY__ */
-
-#endif /* _ASM_POWERPC_SLICE_H */
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index df8172da2301..d4c20484dad9 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -14,7 +14,6 @@ obj-$(CONFIG_PPC_MMU_NOHASH)  += nohash/
 obj-$(CONFIG_PPC_BOOK3S_32)+= book3s32/
 obj-$(CONFIG_PPC_BOOK3S_64)+= book3s64/
 obj-$(CONFIG_NUMA) += numa.o
-obj-$(CONFIG_PPC_MM_SLICES)+= slice.o
 obj-$(CONFIG_HUGETLB_PAGE) += hugetlbpage.o
 obj-$(CONFIG_NOT_COHERENT_CACHE) += dma-noncoherent.o
 obj-$(CONFIG_PPC_COPRO_BASE)   += copro_fault.o
diff --git a/arch/powerpc/mm/book3s64/Makefile 
b/arch/powerpc/mm/book3s64/Makefile
index 2d50cac499c5..af2f3e75d458 100644

Re: [PATCH v2 0/4] Fix perf bench numa, futex and epoll to work with machines having #CPUs > 1K

2022-04-09 Thread Arnaldo Carvalho de Melo
Em Wed, Apr 06, 2022 at 11:21:09PM +0530, Athira Rajeev escreveu:
> The perf benchmark for collections: numa, futex and epoll
> hits failure in system configuration with CPU's more than 1024.
> These benchmarks uses "sched_getaffinity" and "sched_setaffinity"
> in the code to work with affinity.

Applied 1-3, 4 needs some reworking and can wait for v5.19, the first 3
are fixes, so can go now.

- Arnaldo


Re: [PATCH v2 4/4] tools/perf: Fix perf bench numa testcase to check if CPU used to bind task is online

2022-04-09 Thread Arnaldo Carvalho de Melo
Em Wed, Apr 06, 2022 at 11:21:13PM +0530, Athira Rajeev escreveu:
> Perf numa bench test fails with error:
> 
> Testcase:
> ./perf bench numa mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq
> --thp  1 --no-data_rand_walk
> 
> Failure snippet:
> <<>>
>  Running 'numa/mem' benchmark:
> 
>  # Running main, "perf bench numa numa-mem -p 2 -t 1 -P 1024 -C 0,8
> -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk"
> 
> perf: bench/numa.c:333: bind_to_cpumask: Assertion `!(ret)' failed.
> <<>>
> 
> The Testcases uses CPU’s 0 and 8. In function "parse_setup_cpu_list",
> There is check to see if cpu number is greater than max cpu’s possible
> in the system ie via "if (bind_cpu_0 >= g->p.nr_cpus ||
> bind_cpu_1 >= g->p.nr_cpus) {". But it could happen that system has
> say 48 CPU’s, but only number of online CPU’s is 0-7. Other CPU’s
> are offlined. Since "g->p.nr_cpus" is 48, so function will go ahead
> and set bit for CPU 8 also in cpumask ( td->bind_cpumask).
> 
> bind_to_cpumask function is called to set affinity using
> sched_setaffinity and the cpumask. Since the CPU8 is not present,
> set affinity will fail here with EINVAL. Fix this issue by adding a
> check to make sure that, CPU’s provided in the input argument values
> are online before proceeding further and skip the test. For this,
> include new helper function "is_cpu_online" in
> "tools/perf/util/header.c".
> 
> Since "BIT(x)" definition will get included from header.h, remove
> that from bench/numa.c
> 
> Tested-by: Disha Goel 
> Signed-off-by: Athira Rajeev 
> Reported-by: Disha Goel 
> ---
>  tools/perf/bench/numa.c  |  8 ++--
>  tools/perf/util/header.c | 43 
>  tools/perf/util/header.h |  1 +
>  3 files changed, 50 insertions(+), 2 deletions(-)
> 
> diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
> index 29e41e32bd88..7992d79b3e41 100644
> --- a/tools/perf/bench/numa.c
> +++ b/tools/perf/bench/numa.c
> @@ -34,6 +34,7 @@
>  #include 
>  #include 
>  
> +#include "../util/header.h"
>  #include 
>  #include 
>  
> @@ -616,6 +617,11 @@ static int parse_setup_cpu_list(void)
>   return -1;
>   }
>  
> + if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) 
> != 1) {
> + printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 
> is offline\n");
> + return -1;
> + }
> +
>   BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
>   BUG_ON(bind_cpu_0 > bind_cpu_1);
>  
> @@ -786,8 +792,6 @@ static int parse_nodes_opt(const struct option *opt 
> __maybe_unused,
>   return parse_node_list(arg);
>  }
>  
> -#define BIT(x) (1ul << x)
> -
>  static inline uint32_t lfsr_32(uint32_t lfsr)
>  {
>   const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
> index 6da12e522edc..3f5fcf5d4b3f 100644
> --- a/tools/perf/util/header.c
> +++ b/tools/perf/util/header.c
> @@ -983,6 +983,49 @@ static int write_dir_format(struct feat_fd *ff,
>   return do_write(ff, >dir.version, sizeof(data->dir.version));
>  }
>  
> +#define SYSFS "/sys/devices/system/cpu/"

Please use

int sysfs__read_str(const char *entry, char **buf, size_t *sizep)

See how to use it in the smt_on() function at tools/perf/util/smt.c, for
example.

Now looking at the first patches in the series.

- Arnaldo

> +/*
> + * Check whether a CPU is online
> + *
> + * Returns:
> + * 1 -> if CPU is online
> + * 0 -> if CPU is offline
> + *-1 -> error case
> + */
> +int is_cpu_online(unsigned int cpu)
> +{
> + char sysfs_cpu[255];
> + char buf[255];
> + struct stat statbuf;
> + size_t len;
> + int fd;
> +
> + snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u", cpu);
> +
> + if (stat(sysfs_cpu, ) != 0)
> + return 0;
> +
> + /*
> +  * Check if /sys/devices/system/cpu/cpux/online file
> +  * exists. In kernels without CONFIG_HOTPLUG_CPU, this
> +  * file won't exist.
> +  */
> + snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u/online", cpu);
> + if (stat(sysfs_cpu, ) != 0)
> + return 1;
> +
> + fd = open(sysfs_cpu, O_RDONLY);
> + if (fd == -1)
> + return -1;
> +
> + len = read(fd, buf, sizeof(buf) - 1);
> + buf[len] = '\0';
> + close(fd);
> +
> + return strtoul(buf, NULL, 16);
> +}
> +
>  #ifdef HAVE_LIBBPF_SUPPORT
>  static int write_bpf_prog_info(struct feat_fd *ff,
>  struct evlist *evlist __maybe_unused)
> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
> index c9e3265832d9..0eb4bc29a5a4 100644
> --- a/tools/perf/util/header.h
> +++ b/tools/perf/util/header.h
> @@ -158,6 +158,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t 
> size);
>  int write_padded(struct feat_fd *fd, const void *bf,
>size_t count, size_t count_aligned);
>  
> +int is_cpu_online(unsigned int 

Re: False positive kmemleak report for dtb properties names on powerpc

2022-04-09 Thread Ariel Marcovitch

Hi Christophe, did you get the chance to look at this?

On 23/03/2022 21:06, Mike Rapoport wrote:

Hi Catalin,

On Wed, Mar 23, 2022 at 05:22:38PM +, Catalin Marinas wrote:

Hi Ariel,

On Fri, Feb 18, 2022 at 09:45:51PM +0200, Ariel Marcovitch wrote:

I was running a powerpc 32bit kernel (built using
qemu_ppc_mpc8544ds_defconfig
buildroot config, with enabling DEBUGFS+KMEMLEAK+HIGHMEM in the kernel
config)
on qemu and invoked the kmemleak scan (twice. for some reason the first time
wasn't enough).

[...]

I got 97 leak reports, all similar to the following:

[...]

memblock_alloc lets kmemleak know about the allocated memory using
kmemleak_alloc_phys (in mm/memblock.c:memblock_alloc_range_nid()).

The problem is with the following code (mm/kmemleak.c):

```c

void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
    gfp_t gfp)
{
     if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
     kmemleak_alloc(__va(phys), size, min_count, gfp);
}

```

When CONFIG_HIGHMEM is enabled, the pfn of the allocated memory is checked
against max_low_pfn, to make sure it is not in the HIGHMEM zone.

However, when called through unflatten_device_tree(), max_low_pfn is not yet
initialized in powerpc.

max_low_pfn is initialized (when NUMA is disabled) in
arch/powerpc/mm/mem.c:mem_topology_setup() which is called only after
unflatten_device_tree() is called in the same function (setup_arch()).

Because max_low_pfn is global it is 0 before initialization, so as far as
kmemleak_alloc_phys() is concerned, every memory is HIGHMEM (: and the
allocated memory is not tracked by kmemleak, causing references to objects
allocated later with kmalloc() to be ignored and these objects are marked as
leaked.

Thanks for digging into this. It looks like the logic doesn't work (not
sure whether it ever worked).


I actually tried to find out whether this happen on other arches as well,
and it seems like arm64 also have this problem when dtb is used instead of
acpi, although I haven't had the chance to confirm this.

arm64 doesn't enable CONFIG_HIGHMEM, so it's not affected.


I don't suppose I can just shuffle the calls in setup_arch() around, so I
wanted to hear your opinions first

I think it's better if we change the logic than shuffling the calls.
IIUC MEMBLOCK_ALLOC_ACCESSIBLE means that __va() works on the phys
address return by memblock, so something like below (untested):

MEMBLOCK_ALLOC_ACCESSIBLE means "anywhere", see commit e63075a3c937
("memblock: Introduce default allocation limit and use it to replace
explicit ones"), so it won't help to detect high memory.

If I remember correctly, ppc initializes memblock *very* early, so setting
max_low_pfn along with lowmem_end_addr in
arch/powerpc/mm/init_32::MMU_init() makes sense to me.

Maybe ppc folks have other ideas...
I've added Christophe who works on ppc32 these days.
  

-8<--
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index 7580baa76af1..f3599e857c13 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1127,8 +1127,7 @@ EXPORT_SYMBOL(kmemleak_no_scan);
  void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
   gfp_t gfp)
  {
-   if (!IS_ENABLED(CONFIG_HIGHMEM) || PHYS_PFN(phys) < max_low_pfn)
-   kmemleak_alloc(__va(phys), size, min_count, gfp);
+   kmemleak_alloc(__va(phys), size, min_count, gfp);
  }
  EXPORT_SYMBOL(kmemleak_alloc_phys);
  
diff --git a/mm/memblock.c b/mm/memblock.c

index b12a364f2766..2515bd4331e8 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1397,7 +1397,7 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t 
size,
 * Skip kmemleak for those places like kasan_init() and
 * early_pgtable_alloc() due to high volume.
 */
-   if (end != MEMBLOCK_ALLOC_NOLEAKTRACE)
+   if (end == MEMBLOCK_ALLOC_ACCESSIBLE)

This change would enable kmemleak for KASAN on arm and arm64 that AFAIR
caused OOM in kmemleak and it also will limit tracking only to allocations
that do not specify 'end' explicitly ;-)


/*
 * The min_count is set to 0 so that memblock allocated
 * blocks are never reported as leaks. This is because many
-8<--

But I'm not sure whether we'd now miss some genuine allocations where
the memblock limit is different from MEMBLOCK_ALLOC_ACCESSIBLE but still
within the lowmem limit. If the above works, we can probably get rid of
some other kmemleak callbacks in the kernel.

Adding Mike for any input on memblock.

--
Catalin


Re: [PATCH] powerpc: Mark arch_get_ima_policy() and is_ppc_trustedboot_enabled() as __init

2022-04-09 Thread Michael Ellerman
Mimi Zohar  writes:
> On Fri, 2022-04-08 at 13:31 -0400, Mimi Zohar wrote:
>> On Fri, 2022-04-08 at 12:05 -0400, Mimi Zohar wrote:
>> > On Fri, 2022-04-08 at 00:15 +1000, Michael Ellerman wrote:
>> > > We can mark arch_get_ima_policy() as __init because it's only caller
>> > > ima_init_arch_policy() is __init. We can then mark
>> > > is_ppc_trustedboot_enabled() __init because its only caller is
>> > > arch_get_ima_policy().
>> > > 
>> > > Signed-off-by: Michael Ellerman 
>> > 
>> > I assume you want to upstream this via power,
>> > 
>> > Reviewed-by: Mimi Zohar 
>> 
>> Sorry, I just noticed that is_ppc_trustedboot_enabled() is also called
>> by arch_ima_get_secureboot().
>
> Never mind, arch_ima_get_secureboot() calls
> is_ppc_secureboot_enabled(), not is_ppc_trustedboot_enabled().

Yeah despite the long names they are still easy to confuse :)

cheers


Re: [PATCH] powerpc: Mark arch_get_ima_policy() and is_ppc_trustedboot_enabled() as __init

2022-04-09 Thread Michael Ellerman
Mimi Zohar  writes:
> On Fri, 2022-04-08 at 00:15 +1000, Michael Ellerman wrote:
>> We can mark arch_get_ima_policy() as __init because it's only caller
>> ima_init_arch_policy() is __init. We can then mark
>> is_ppc_trustedboot_enabled() __init because its only caller is
>> arch_get_ima_policy().
>> 
>> Signed-off-by: Michael Ellerman 
>
> I assume you want to upstream this via power,
>
> Reviewed-by: Mimi Zohar 

Yep, thanks.

cheers


Re: [PATCH v9 0/4] mm: Enable conversion of powerpc to default topdown mmap layout

2022-04-09 Thread Michael Ellerman
Christophe Leroy  writes:
> Le 09/04/2022 à 05:25, Andrew Morton a écrit :
>> On Fri,  8 Apr 2022 09:24:58 +0200 Christophe Leroy 
>>  wrote:
>> 
>>> Rebased on top of Linux 5.18-rc1
>>>
>>> This is the mm part of the series that converts powerpc to default
>>> topdown mmap layout, for merge into v5.18
>> 
>> We're at 5.18-rc1.  The 5.18 merge window has closed and we're in
>> fixes-only mode.
>
> Umm ... There must have been a misunderstanding then.

That's probably my fault for not getting back to Andrew.

> Le 11/03/2022 à 05:26, Michael Ellerman a écrit :
>  >
>  > Yeah I didn't pick it up because the mm changes don't have many acks and
>  > I'm always nervous about carrying generic mm changes.
>  >
>  > It would be my preference if Andrew could take 2-5 through mm for v5.18,
>  > but it is quite late, so I'm not sure how he will feel about that.
>  >
>  > Arguably 2, 3, 4 do very little. It's only patch 5 that has much effect,
>  > and it has a reviewed-by from Catalin at least.
>
> Michael, is it now ok for you to merge it via powerpc tree with Andrew's 
> Ack ?

Yes.

>> Also, [4/4] has a cc:stable.  This is a bit odd because -stable
>> candidates should be standalone patches, staged ahead of all
>> for-next-merge-window material, so we can get them merged up quickly.
>> 
>> More oddly, [4/4]'s changelog provides no explanation for why the patch
>> should be considered for backporting.
 
Yeah it's just a bit too politely worded :)

It says it's "a complement of f6795053dac8", but it's actually a fix for
a bug in that commit, that commit should have updated hugetlb behaviour.

> That was a request from Catalin from ARM64:
>
> Le 04/01/2022 à 17:21, Catalin Marinas a écrit :
>  > I wonder whether we should add a fixes tag (or at least the cc stable):
>  >
>  > Fixes: f6795053dac8 ("mm: mmap: Allow for "high" userspace addresses")
>  > Cc:  # 5.0.x
>  >
>  > I think the original commit should have changed
>  > hugetlb_get_unmapped_area() to have the same behaviour as
>  > arch_get_unmapped_area(). Steve, any thoughts?
>  >
>  > FWIW,
>  >
>  > Reviewed-by: Catalin Marinas 
>
>  From 
> https://patchwork.ozlabs.org/project/linuxppc-dev/patch/db238c1ca2d46e33c57328f8d450f2563e92f8c2.1639736449.git.christophe.le...@csgroup.eu/
>
> I can try and see whether this can be moved in front of the other patches.

Thanks, that would be preferable.

cheers


Re: [PATCH V4 2/7] powerpc/mm: Enable ARCH_HAS_VM_GET_PAGE_PROT

2022-04-09 Thread Christophe Leroy




Le 08/04/2022 à 14:53, Christophe Leroy a écrit :



Le 07/04/2022 à 12:32, Anshuman Khandual a écrit :
This defines and exports a platform specific custom vm_get_page_prot() 
via

subscribing ARCH_HAS_VM_GET_PAGE_PROT. While here, this also localizes
arch_vm_get_page_prot() as powerpc_vm_get_page_prot() and moves it near
vm_get_page_prot().

Cc: Michael Ellerman 
Cc: Paul Mackerras 
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-ker...@vger.kernel.org
Signed-off-by: Anshuman Khandual 
---
  arch/powerpc/Kconfig    |  1 +
  arch/powerpc/include/asm/mman.h | 12 
  arch/powerpc/mm/mmap.c  | 26 ++
  3 files changed, 27 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 174edabb74fa..eb9b6ddbf92f 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -140,6 +140,7 @@ config PPC
  select ARCH_HAS_TICK_BROADCAST    if 
GENERIC_CLOCKEVENTS_BROADCAST

  select ARCH_HAS_UACCESS_FLUSHCACHE
  select ARCH_HAS_UBSAN_SANITIZE_ALL
+    select ARCH_HAS_VM_GET_PAGE_PROT
  select ARCH_HAVE_NMI_SAFE_CMPXCHG
  select ARCH_KEEP_MEMBLOCK
  select ARCH_MIGHT_HAVE_PC_PARPORT
diff --git a/arch/powerpc/include/asm/mman.h 
b/arch/powerpc/include/asm/mman.h

index 7cb6d18f5cd6..1b024e64c8ec 100644
--- a/arch/powerpc/include/asm/mman.h
+++ b/arch/powerpc/include/asm/mman.h
@@ -24,18 +24,6 @@ static inline unsigned long 
arch_calc_vm_prot_bits(unsigned long prot,

  }
  #define arch_calc_vm_prot_bits(prot, pkey) 
arch_calc_vm_prot_bits(prot, pkey)

-static inline pgprot_t arch_vm_get_page_prot(unsigned long vm_flags)
-{
-#ifdef CONFIG_PPC_MEM_KEYS
-    return (vm_flags & VM_SAO) ?
-    __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
-    __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
-#else
-    return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
-#endif
-}
-#define arch_vm_get_page_prot(vm_flags) arch_vm_get_page_prot(vm_flags)
-
  static inline bool arch_validate_prot(unsigned long prot, unsigned 
long addr)

  {
  if (prot & ~(PROT_READ | PROT_WRITE | PROT_EXEC | PROT_SEM | 
PROT_SAO))

diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c
index c475cf810aa8..cd17bd6fa36b 100644
--- a/arch/powerpc/mm/mmap.c
+++ b/arch/powerpc/mm/mmap.c
@@ -254,3 +254,29 @@ void arch_pick_mmap_layout(struct mm_struct *mm, 
struct rlimit *rlim_stack)

  mm->get_unmapped_area = arch_get_unmapped_area_topdown;
  }
  }
+
+#ifdef CONFIG_PPC64
+static pgprot_t powerpc_vm_get_page_prot(unsigned long vm_flags)
+{
+#ifdef CONFIG_PPC_MEM_KEYS
+    return (vm_flags & VM_SAO) ?
+    __pgprot(_PAGE_SAO | vmflag_to_pte_pkey_bits(vm_flags)) :
+    __pgprot(0 | vmflag_to_pte_pkey_bits(vm_flags));
+#else
+    return (vm_flags & VM_SAO) ? __pgprot(_PAGE_SAO) : __pgprot(0);
+#endif
+}
+#else
+static pgprot_t powerpc_vm_get_page_prot(unsigned long vm_flags)
+{
+    return __pgprot(0);
+}
+#endif /* CONFIG_PPC64 */


Can we reduce this forest of #ifdefs and make it more readable ?

mm/mmap.c is going away with patch 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/d6d849621f821af253e777a24eda4c648814a76e.1646847562.git.christophe.le...@csgroup.eu/ 



So it would be better to add two versions of vm_get_page_prot(), for 
instance one in mm/pgtable_64.c and one in mm/pgtable_32.c


Indeed, you don't need anything at all for PPC32. All you need to do is

select ARCH_HAS_VM_GET_PAGE_PROT if PPC64

And in fact it could even be PPC_BOOK3S_64 instead of PPC64 because 
CONFIG_PPC_MEM_KEYS depends on PPC_BOOK3S_64 and _PAGE_SAO is 0 on 
nohash/64.


So you can then put it into arch/powerpc/mm/book3s64/pgtable.c






+
+pgprot_t vm_get_page_prot(unsigned long vm_flags)
+{
+    return __pgprot(pgprot_val(protection_map[vm_flags &
+    (VM_READ|VM_WRITE|VM_EXEC|VM_SHARED)]) |
+   pgprot_val(powerpc_vm_get_page_prot(vm_flags)));
+}
+EXPORT_SYMBOL(vm_get_page_prot);


By the way I'm a bit puzzled with the name powerpc_vm_get_page_prot(), 
the name suggests that it's just a powerpc replacement of 
vm_get_page_prot(). I'd prefer if it was named __vm_get_page_prot(), it 
would be clearer that it is a complement to vm_get_page_prot() and not a 
remplacement.


Christophe


Re: [PATCH v9 0/4] mm: Enable conversion of powerpc to default topdown mmap layout

2022-04-09 Thread Christophe Leroy


Le 09/04/2022 à 05:25, Andrew Morton a écrit :
> On Fri,  8 Apr 2022 09:24:58 +0200 Christophe Leroy 
>  wrote:
> 
>> Rebased on top of Linux 5.18-rc1
>>
>> This is the mm part of the series that converts powerpc to default
>> topdown mmap layout, for merge into v5.18
> 
> We're at 5.18-rc1.  The 5.18 merge window has closed and we're in
> fixes-only mode.

Umm ... There must have been a misunderstanding then.

I contacted you before the merge window, and your answer was:

Le 11/03/2022 à 05:49, Andrew Morton a écrit :
>
> 5.18 isn't a problem.  Perhaps you meant 5.17, which would be real tough.
>
> Can we take a look after 5.18-rc1?


> 
> If there's a case to be made that these patches are needed by 5.18
> users then please let's make that case.  Otherwise, this is 5.19-rc1 material.

It's not really needed for 5.18. The idea was to merge that common part 
in 5.18 in order to minimise risks on conflicts. As far as I understand 
it often happens that changes of that kind get merged at the very end of 
the merge window or between rc1 and rc2. I was therefore not surprised 
that you offered to handle it past rc1.

History at:
https://patchwork.ozlabs.org/project/linuxppc-dev/cover/cover.1646847561.git.christophe.le...@csgroup.eu/#2856080


> 
> And if it is indeed all 5.19-rc1 material, then please carry all four
> in the powerpc tree with Acked-by: Andrew Morton
> .

Well, Michael was a bit unconfortable with doing it that way, see below:


Le 11/03/2022 à 05:26, Michael Ellerman a écrit :
 >
 > Yeah I didn't pick it up because the mm changes don't have many acks and
 > I'm always nervous about carrying generic mm changes.
 >
 > It would be my preference if Andrew could take 2-5 through mm for v5.18,
 > but it is quite late, so I'm not sure how he will feel about that.
 >
 > Arguably 2, 3, 4 do very little. It's only patch 5 that has much effect,
 > and it has a reviewed-by from Catalin at least.

Michael, is it now ok for you to merge it via powerpc tree with Andrew's 
Ack ?

> 
> Also, [4/4] has a cc:stable.  This is a bit odd because -stable
> candidates should be standalone patches, staged ahead of all
> for-next-merge-window material, so we can get them merged up quickly.
> 
> More oddly, [4/4]'s changelog provides no explanation for why the patch
> should be considered for backporting.
> 

That was a request from Catalin from ARM64:

Le 04/01/2022 à 17:21, Catalin Marinas a écrit :
 > I wonder whether we should add a fixes tag (or at least the cc stable):
 >
 > Fixes: f6795053dac8 ("mm: mmap: Allow for "high" userspace addresses")
 > Cc:  # 5.0.x
 >
 > I think the original commit should have changed
 > hugetlb_get_unmapped_area() to have the same behaviour as
 > arch_get_unmapped_area(). Steve, any thoughts?
 >
 > FWIW,
 >
 > Reviewed-by: Catalin Marinas 

 From 
https://patchwork.ozlabs.org/project/linuxppc-dev/patch/db238c1ca2d46e33c57328f8d450f2563e92f8c2.1639736449.git.christophe.le...@csgroup.eu/

I can try and see whether this can be moved in front of the other patches.

Thanks
Christophe

[PATCH] powerpc/pseries/vas: sysfs comments with the correct entries

2022-04-09 Thread Haren Myneni


VAS entry is created as a misc device and the sysfs comments
should list the proper entries

Reported-by: Matheus Castanho 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/platforms/pseries/vas-sysfs.c | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/vas-sysfs.c 
b/arch/powerpc/platforms/pseries/vas-sysfs.c
index f3c58c309cff..e05d2bac8824 100644
--- a/arch/powerpc/platforms/pseries/vas-sysfs.c
+++ b/arch/powerpc/platforms/pseries/vas-sysfs.c
@@ -74,26 +74,26 @@ struct vas_sysfs_entry {
 
 /*
  * Create sysfs interface:
- * /sys/devices/vas/vas0/gzip/default_capabilities
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities
  * This directory contains the following VAS GZIP capabilities
  * for the defaule credit type.
- * /sys/devices/vas/vas0/gzip/default_capabilities/nr_total_credits
+ * 
/sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_total_credits
  * Total number of default credits assigned to the LPAR which
  * can be changed with DLPAR operation.
- * /sys/devices/vas/vas0/gzip/default_capabilities/nr_used_credits
+ * /sys/devices/virtual/misc/vas/vas0/gzip/default_capabilities/nr_used_credits
  * Number of credits used by the user space. One credit will
  * be assigned for each window open.
  *
- * /sys/devices/vas/vas0/gzip/qos_capabilities
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities
  * This directory contains the following VAS GZIP capabilities
  * for the Quality of Service (QoS) credit type.
- * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_total_credits
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_total_credits
  * Total number of QoS credits assigned to the LPAR. The user
  * has to define this value using HMC interface. It can be
  * changed dynamically by the user.
- * /sys/devices/vas/vas0/gzip/qos_capabilities/nr_used_credits
+ * /sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/nr_used_credits
  * Number of credits used by the user space.
- * /sys/devices/vas/vas0/gzip/qos_capabilities/update_total_credits
+ * 
/sys/devices/virtual/misc/vas/vas0/gzip/qos_capabilities/update_total_credits
  * Update total QoS credits dynamically
  */
 
-- 
2.27.0




[PATCH] powerpc/powernv/vas: Assign real address to rx_fifo in vas_rx_win_attr

2022-04-09 Thread Haren Myneni
In init_winctx_regs(), __pa() is called on winctx->rx_fifo and this
function is called to initialize registers for receive and fault
windows. But the real address is passed in winctx->rx_fifo for
receive windows and the virtual address for fault windows which
causes errors with DEBUG_VIRTUAL enabled. Fixes this issue by
assigning only real address to rx_fifo in vas_rx_win_attr struct
for both receive and fault windows.

Reported-by: Michael Ellerman 
Signed-off-by: Haren Myneni 
---
 arch/powerpc/include/asm/vas.h  | 2 +-
 arch/powerpc/platforms/powernv/vas-fault.c  | 2 +-
 arch/powerpc/platforms/powernv/vas-window.c | 4 ++--
 arch/powerpc/platforms/powernv/vas.h| 2 +-
 drivers/crypto/nx/nx-common-powernv.c   | 2 +-
 5 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/include/asm/vas.h b/arch/powerpc/include/asm/vas.h
index 83afcb6c194b..c36f71e01c0f 100644
--- a/arch/powerpc/include/asm/vas.h
+++ b/arch/powerpc/include/asm/vas.h
@@ -126,7 +126,7 @@ static inline void vas_user_win_add_mm_context(struct 
vas_user_win_ref *ref)
  * Receive window attributes specified by the (in-kernel) owner of window.
  */
 struct vas_rx_win_attr {
-   void *rx_fifo;
+   u64 rx_fifo;
int rx_fifo_size;
int wcreds_max;
 
diff --git a/arch/powerpc/platforms/powernv/vas-fault.c 
b/arch/powerpc/platforms/powernv/vas-fault.c
index a7aabc18039e..c1bfad56447d 100644
--- a/arch/powerpc/platforms/powernv/vas-fault.c
+++ b/arch/powerpc/platforms/powernv/vas-fault.c
@@ -216,7 +216,7 @@ int vas_setup_fault_window(struct vas_instance *vinst)
vas_init_rx_win_attr(, VAS_COP_TYPE_FAULT);
 
attr.rx_fifo_size = vinst->fault_fifo_size;
-   attr.rx_fifo = vinst->fault_fifo;
+   attr.rx_fifo = __pa(vinst->fault_fifo);
 
/*
 * Max creds is based on number of CRBs can fit in the FIFO.
diff --git a/arch/powerpc/platforms/powernv/vas-window.c 
b/arch/powerpc/platforms/powernv/vas-window.c
index 0f8d39fbf2b2..0072682531d8 100644
--- a/arch/powerpc/platforms/powernv/vas-window.c
+++ b/arch/powerpc/platforms/powernv/vas-window.c
@@ -404,7 +404,7 @@ static void init_winctx_regs(struct pnv_vas_window *window,
 *
 * See also: Design note in function header.
 */
-   val = __pa(winctx->rx_fifo);
+   val = winctx->rx_fifo;
val = SET_FIELD(VAS_PAGE_MIGRATION_SELECT, val, 0);
write_hvwc_reg(window, VREG(LFIFO_BAR), val);
 
@@ -739,7 +739,7 @@ static void init_winctx_for_rxwin(struct pnv_vas_window 
*rxwin,
 */
winctx->fifo_disable = true;
winctx->intr_disable = true;
-   winctx->rx_fifo = NULL;
+   winctx->rx_fifo = 0;
}
 
winctx->lnotify_lpid = rxattr->lnotify_lpid;
diff --git a/arch/powerpc/platforms/powernv/vas.h 
b/arch/powerpc/platforms/powernv/vas.h
index 8bb08e395de0..08d9d3d5a22b 100644
--- a/arch/powerpc/platforms/powernv/vas.h
+++ b/arch/powerpc/platforms/powernv/vas.h
@@ -376,7 +376,7 @@ struct pnv_vas_window {
  * is a container for the register fields in the window context.
  */
 struct vas_winctx {
-   void *rx_fifo;
+   u64 rx_fifo;
int rx_fifo_size;
int wcreds_max;
int rsvd_txbuf_count;
diff --git a/drivers/crypto/nx/nx-common-powernv.c 
b/drivers/crypto/nx/nx-common-powernv.c
index 32a036ada5d0..f418817c0f43 100644
--- a/drivers/crypto/nx/nx-common-powernv.c
+++ b/drivers/crypto/nx/nx-common-powernv.c
@@ -827,7 +827,7 @@ static int __init vas_cfg_coproc_info(struct device_node 
*dn, int chip_id,
goto err_out;
 
vas_init_rx_win_attr(, coproc->ct);
-   rxattr.rx_fifo = (void *)rx_fifo;
+   rxattr.rx_fifo = rx_fifo;
rxattr.rx_fifo_size = fifo_size;
rxattr.lnotify_lpid = lpid;
rxattr.lnotify_pid = pid;
-- 
2.27.0




Re: [PATCH v2 4/4] tools/perf: Fix perf bench numa testcase to check if CPU used to bind task is online

2022-04-09 Thread Athira Rajeev



> On 08-Apr-2022, at 5:56 PM, Srikar Dronamraju  
> wrote:
> 
> * Athira Rajeev  [2022-04-06 23:21:13]:
> 
>> Perf numa bench test fails with error:
>> 
>> Testcase:
>> ./perf bench numa mem -p 2 -t 1 -P 1024 -C 0,8 -M 1,0 -s 20 -zZq
>> --thp  1 --no-data_rand_walk
>> 
>> Failure snippet:
>> <<>>
>> Running 'numa/mem' benchmark:
>> 
>> # Running main, "perf bench numa numa-mem -p 2 -t 1 -P 1024 -C 0,8
>> -M 1,0 -s 20 -zZq --thp 1 --no-data_rand_walk"
>> 
>> perf: bench/numa.c:333: bind_to_cpumask: Assertion `!(ret)' failed.
>> <<>>
>> 
>> The Testcases uses CPU???s 0 and 8. In function "parse_setup_cpu_list",
>> There is check to see if cpu number is greater than max cpu???s possible
>> in the system ie via "if (bind_cpu_0 >= g->p.nr_cpus ||
>> bind_cpu_1 >= g->p.nr_cpus) {". But it could happen that system has
>> say 48 CPU???s, but only number of online CPU???s is 0-7. Other CPU???s
>> are offlined. Since "g->p.nr_cpus" is 48, so function will go ahead
>> and set bit for CPU 8 also in cpumask ( td->bind_cpumask).
>> 
>> bind_to_cpumask function is called to set affinity using
>> sched_setaffinity and the cpumask. Since the CPU8 is not present,
>> set affinity will fail here with EINVAL. Fix this issue by adding a
>> check to make sure that, CPU???s provided in the input argument values
>> are online before proceeding further and skip the test. For this,
>> include new helper function "is_cpu_online" in
>> "tools/perf/util/header.c".
>> 
>> Since "BIT(x)" definition will get included from header.h, remove
>> that from bench/numa.c
>> 
>> Tested-by: Disha Goel 
>> Signed-off-by: Athira Rajeev 
>> Reported-by: Disha Goel 
> 
> Looks good to me.
> Reviewed-by: Srikar Dronamraju 

Hi Srikar,

Thanks for the review

Athira
> 
>> ---
>> tools/perf/bench/numa.c  |  8 ++--
>> tools/perf/util/header.c | 43 
>> tools/perf/util/header.h |  1 +
>> 3 files changed, 50 insertions(+), 2 deletions(-)
>> 
>> diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c
>> index 29e41e32bd88..7992d79b3e41 100644
>> --- a/tools/perf/bench/numa.c
>> +++ b/tools/perf/bench/numa.c
>> @@ -34,6 +34,7 @@
>> #include 
>> #include 
>> 
>> +#include "../util/header.h"
>> #include 
>> #include 
>> 
>> @@ -616,6 +617,11 @@ static int parse_setup_cpu_list(void)
>>  return -1;
>>  }
>> 
>> +if (is_cpu_online(bind_cpu_0) != 1 || is_cpu_online(bind_cpu_1) 
>> != 1) {
>> +printf("\nTest not applicable, bind_cpu_0 or bind_cpu_1 
>> is offline\n");
>> +return -1;
>> +}
>> +
>>  BUG_ON(bind_cpu_0 < 0 || bind_cpu_1 < 0);
>>  BUG_ON(bind_cpu_0 > bind_cpu_1);
>> 
>> @@ -786,8 +792,6 @@ static int parse_nodes_opt(const struct option *opt 
>> __maybe_unused,
>>  return parse_node_list(arg);
>> }
>> 
>> -#define BIT(x) (1ul << x)
>> -
>> static inline uint32_t lfsr_32(uint32_t lfsr)
>> {
>>  const uint32_t taps = BIT(1) | BIT(5) | BIT(6) | BIT(31);
>> diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c
>> index 6da12e522edc..3f5fcf5d4b3f 100644
>> --- a/tools/perf/util/header.c
>> +++ b/tools/perf/util/header.c
>> @@ -983,6 +983,49 @@ static int write_dir_format(struct feat_fd *ff,
>>  return do_write(ff, >dir.version, sizeof(data->dir.version));
>> }
>> 
>> +#define SYSFS "/sys/devices/system/cpu/"
>> +
>> +/*
>> + * Check whether a CPU is online
>> + *
>> + * Returns:
>> + * 1 -> if CPU is online
>> + * 0 -> if CPU is offline
>> + *-1 -> error case
>> + */
>> +int is_cpu_online(unsigned int cpu)
>> +{
>> +char sysfs_cpu[255];
>> +char buf[255];
>> +struct stat statbuf;
>> +size_t len;
>> +int fd;
>> +
>> +snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u", cpu);
>> +
>> +if (stat(sysfs_cpu, ) != 0)
>> +return 0;
>> +
>> +/*
>> + * Check if /sys/devices/system/cpu/cpux/online file
>> + * exists. In kernels without CONFIG_HOTPLUG_CPU, this
>> + * file won't exist.
>> + */
>> +snprintf(sysfs_cpu, sizeof(sysfs_cpu), SYSFS "cpu%u/online", cpu);
>> +if (stat(sysfs_cpu, ) != 0)
>> +return 1;
>> +
>> +fd = open(sysfs_cpu, O_RDONLY);
>> +if (fd == -1)
>> +return -1;
>> +
>> +len = read(fd, buf, sizeof(buf) - 1);
>> +buf[len] = '\0';
>> +close(fd);
>> +
>> +return strtoul(buf, NULL, 16);
>> +}
>> +
>> #ifdef HAVE_LIBBPF_SUPPORT
>> static int write_bpf_prog_info(struct feat_fd *ff,
>> struct evlist *evlist __maybe_unused)
>> diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h
>> index c9e3265832d9..0eb4bc29a5a4 100644
>> --- a/tools/perf/util/header.h
>> +++ b/tools/perf/util/header.h
>> @@ -158,6 +158,7 @@ int do_write(struct feat_fd *fd, const void *buf, size_t 
>> size);
>> int write_padded(struct feat_fd *fd, const void *bf,
>>   size_t count, size_t count_aligned);
>> 
>> +int 

Re: [PATCH V3] testing/selftests/mqueue: Fix mq_perf_tests to free the allocated cpu set

2022-04-09 Thread Athira Rajeev



> On 09-Apr-2022, at 12:00 AM, Shuah Khan  wrote:
> 
> On 4/8/22 1:24 AM, Athira Rajeev wrote:
>> The selftest "mqueue/mq_perf_tests.c" use CPU_ALLOC to allocate
>> CPU set. This cpu set is used further in pthread_attr_setaffinity_np
>> and by pthread_create in the code. But in current code, allocated
>> cpu set is not freed.
>> Fix this issue by adding CPU_FREE in the "shutdown" function which
>> is called in most of the error/exit path for the cleanup. There are
>> few error paths which exit without using shutdown. Add a common goto
>> error path with CPU_FREE for these cases.
>> Fixes: 7820b0715b6f ("tools/selftests: add mq_perf_tests")
>> Signed-off-by: Athira Rajeev 
>> ---
>> Changelog:
>>  From v2 -> v3:
>>   Addressed review comment from Shuah Khan to add
>>   common "goto" error path with CPU_FREE for few exit
>>   cases.
>>  From v1 -> v2:
>>   Addressed review comment from Shuah Khan to add
>>   CPU_FREE in other exit paths where it is needed
> 
> Thank you. I will queue this up for Linux 5.18-rc3

Thanks Shuah

Athira
> 
> thanks,
> -- Shuah