[PATCH v4 09/11] lkdtm/powerpc: Fix code patching hijack test

2021-04-29 Thread Christopher M. Riedl
Code patching on powerpc with a STRICT_KERNEL_RWX uses a userspace
address in a temporary mm now. Use __put_user() to avoid write failures
due to KUAP when attempting a "hijack" on the patching address.

Signed-off-by: Christopher M. Riedl 
---
 drivers/misc/lkdtm/perms.c | 9 -
 1 file changed, 9 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 55c3bec6d3b72..af9bf285fe326 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -268,16 +268,7 @@ static inline u32 lkdtm_read_patch_site(void)
 /* Returns True if the write succeeds */
 static inline bool lkdtm_try_write(u32 data, u32 *addr)
 {
-#ifdef CONFIG_PPC
-   __put_kernel_nofault(addr, &data, u32, err);
-   return true;
-
-err:
-   return false;
-#endif
-#ifdef CONFIG_X86_64
return !__put_user(data, addr);
-#endif
 }
 
 static int lkdtm_patching_cpu(void *data)
-- 
2.26.1



[PATCH v4 01/11] powerpc: Add LKDTM accessor for patching addr

2021-04-29 Thread Christopher M. Riedl
When live patching with STRICT_KERNEL_RWX a mapping is installed at a
"patching address" with temporary write permissions. Provide a
LKDTM-only accessor function for this address in preparation for a LKDTM
test which attempts to "hijack" this mapping by writing to it from
another CPU.

Signed-off-by: Christopher M. Riedl 
---
 arch/powerpc/include/asm/code-patching.h | 4 
 arch/powerpc/lib/code-patching.c | 7 +++
 2 files changed, 11 insertions(+)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index f1d029bf906e5..e51c81e4a9bda 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -188,4 +188,8 @@ static inline unsigned long ppc_kallsyms_lookup_name(const 
char *name)
 ___PPC_RA(__REG_R1) | PPC_LR_STKOFF)
 #endif /* CONFIG_PPC64 */
 
+#if IS_BUILTIN(CONFIG_LKDTM) && IS_ENABLED(CONFIG_STRICT_KERNEL_RWX)
+unsigned long read_cpu_patching_addr(unsigned int cpu);
+#endif
+
 #endif /* _ASM_POWERPC_CODE_PATCHING_H */
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 870b30d9be2f8..2b1b3e9043ade 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -48,6 +48,13 @@ int raw_patch_instruction(struct ppc_inst *addr, struct 
ppc_inst instr)
 #ifdef CONFIG_STRICT_KERNEL_RWX
 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
 
+#if IS_BUILTIN(CONFIG_LKDTM)
+unsigned long read_cpu_patching_addr(unsigned int cpu)
+{
+   return (unsigned long)(per_cpu(text_poke_area, cpu))->addr;
+}
+#endif
+
 static int text_area_cpu_up(unsigned int cpu)
 {
struct vm_struct *area;
-- 
2.26.1



[PATCH v4 04/11] lkdtm/x86_64: Add test to hijack a patch mapping

2021-04-29 Thread Christopher M. Riedl
A previous commit implemented an LKDTM test on powerpc to exploit the
temporary mapping established when patching code with STRICT_KERNEL_RWX
enabled. Extend the test to work on x86_64 as well.

Signed-off-by: Christopher M. Riedl 
---
 drivers/misc/lkdtm/perms.c | 29 ++---
 1 file changed, 26 insertions(+), 3 deletions(-)

diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index c6f96ebffccfd..55c3bec6d3b72 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -224,7 +224,7 @@ void lkdtm_ACCESS_NULL(void)
 }
 
 #if (IS_BUILTIN(CONFIG_LKDTM) && defined(CONFIG_STRICT_KERNEL_RWX) && \
-   defined(CONFIG_PPC))
+   (defined(CONFIG_PPC) || defined(CONFIG_X86_64)))
 /*
  * This is just a dummy location to patch-over.
  */
@@ -233,28 +233,51 @@ static void patching_target(void)
return;
 }
 
+#ifdef CONFIG_PPC
 #include 
 struct ppc_inst * const patch_site = (struct ppc_inst *)&patching_target;
+#endif
+
+#ifdef CONFIG_X86_64
+#include 
+u32 * const patch_site = (u32 *)&patching_target;
+#endif
 
 static inline int lkdtm_do_patch(u32 data)
 {
+#ifdef CONFIG_PPC
return patch_instruction(patch_site, ppc_inst(data));
+#endif
+#ifdef CONFIG_X86_64
+   text_poke(patch_site, &data, sizeof(u32));
+   return 0;
+#endif
 }
 
 static inline u32 lkdtm_read_patch_site(void)
 {
+#ifdef CONFIG_PPC
struct ppc_inst inst = READ_ONCE(*patch_site);
return ppc_inst_val(ppc_inst_read(&inst));
+#endif
+#ifdef CONFIG_X86_64
+   return READ_ONCE(*patch_site);
+#endif
 }
 
 /* Returns True if the write succeeds */
 static inline bool lkdtm_try_write(u32 data, u32 *addr)
 {
+#ifdef CONFIG_PPC
__put_kernel_nofault(addr, &data, u32, err);
return true;
 
 err:
return false;
+#endif
+#ifdef CONFIG_X86_64
+   return !__put_user(data, addr);
+#endif
 }
 
 static int lkdtm_patching_cpu(void *data)
@@ -347,8 +370,8 @@ void lkdtm_HIJACK_PATCH(void)
 
 void lkdtm_HIJACK_PATCH(void)
 {
-   if (!IS_ENABLED(CONFIG_PPC))
-   pr_err("XFAIL: this test only runs on powerpc\n");
+   if (!IS_ENABLED(CONFIG_PPC) && !IS_ENABLED(CONFIG_X86_64))
+   pr_err("XFAIL: this test only runs on powerpc and x86_64\n");
if (!IS_ENABLED(CONFIG_STRICT_KERNEL_RWX))
pr_err("XFAIL: this test requires CONFIG_STRICT_KERNEL_RWX\n");
if (!IS_BUILTIN(CONFIG_LKDTM))
-- 
2.26.1



[PATCH v4 10/11] powerpc: Protect patching_mm with a lock

2021-04-29 Thread Christopher M. Riedl
Powerpc allows for multiple CPUs to patch concurrently. When patching
with STRICT_KERNEL_RWX a single patching_mm is allocated for use by all
CPUs for the few times that patching occurs. Use a spinlock to protect
the patching_mm from concurrent use.

Modify patch_instruction() to acquire the lock, perform the patch op,
and then release the lock.

Also introduce {lock,unlock}_patching() along with
patch_instruction_unlocked() to avoid per-iteration lock overhead when
patch_instruction() is called in a loop. A follow-up patch converts some
uses of patch_instruction() to use patch_instruction_unlocked() instead.

Signed-off-by: Christopher M. Riedl 

---

v4:  * New to series.
---
 arch/powerpc/include/asm/code-patching.h |  4 ++
 arch/powerpc/lib/code-patching.c | 85 +---
 2 files changed, 79 insertions(+), 10 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index e51c81e4a9bda..2efa11b68cd8f 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -28,8 +28,12 @@ int create_branch(struct ppc_inst *instr, const struct 
ppc_inst *addr,
 int create_cond_branch(struct ppc_inst *instr, const struct ppc_inst *addr,
   unsigned long target, int flags);
 int patch_branch(struct ppc_inst *addr, unsigned long target, int flags);
+int patch_branch_unlocked(struct ppc_inst *addr, unsigned long target, int 
flags);
 int patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
+int patch_instruction_unlocked(struct ppc_inst *addr, struct ppc_inst instr);
 int raw_patch_instruction(struct ppc_inst *addr, struct ppc_inst instr);
+unsigned long lock_patching(void);
+void unlock_patching(unsigned long flags);
 
 static inline unsigned long patch_site_addr(s32 *site)
 {
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 7e15abc09ec04..0a496bb52bbf4 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -52,13 +52,17 @@ int raw_patch_instruction(struct ppc_inst *addr, struct 
ppc_inst instr)
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
 
+static DEFINE_SPINLOCK(patching_lock);
+
 struct temp_mm {
struct mm_struct *temp;
struct mm_struct *prev;
struct arch_hw_breakpoint brk[HBP_NUM_MAX];
+   spinlock_t *lock; /* protect access to the temporary mm */
 };
 
-static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
+static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm,
+   spinlock_t *lock)
 {
/* Do not preload SLB entries from the thread_info struct */
if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
@@ -66,12 +70,14 @@ static inline void init_temp_mm(struct temp_mm *temp_mm, 
struct mm_struct *mm)
 
temp_mm->temp = mm;
temp_mm->prev = NULL;
+   temp_mm->lock = lock;
memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
 }
 
 static inline void use_temporary_mm(struct temp_mm *temp_mm)
 {
lockdep_assert_irqs_disabled();
+   lockdep_assert_held(temp_mm->lock);
 
temp_mm->prev = current->active_mm;
switch_mm_irqs_off(temp_mm->prev, temp_mm->temp, current);
@@ -93,11 +99,13 @@ static inline void use_temporary_mm(struct temp_mm *temp_mm)
 static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
 {
lockdep_assert_irqs_disabled();
+   lockdep_assert_held(temp_mm->lock);
 
switch_mm_irqs_off(temp_mm->temp, temp_mm->prev, current);
 
/*
-* On book3s64 the active_cpus counter increments in
+* The temporary mm can only be in use on a single CPU at a time due to
+* the temp_mm->lock. On book3s64 the active_cpus counter increments in
 * switch_mm_irqs_off(). With the Hash MMU this counter affects if TLB
 * flushes are local. We have to manually decrement that counter here
 * along with removing our current CPU from the mm's cpumask so that in
@@ -230,7 +238,7 @@ static int map_patch(const void *addr, struct patch_mapping 
*patch_mapping)
pte = pte_mkdirty(pte);
set_pte_at(patching_mm, patching_addr, patch_mapping->ptep, pte);
 
-   init_temp_mm(&patch_mapping->temp_mm, patching_mm);
+   init_temp_mm(&patch_mapping->temp_mm, patching_mm, &patching_lock);
use_temporary_mm(&patch_mapping->temp_mm);
 
/*
@@ -258,7 +266,6 @@ static int do_patch_instruction(struct ppc_inst *addr, 
struct ppc_inst instr)
 {
int err;
struct ppc_inst *patch_addr = NULL;
-   unsigned long flags;
struct patch_mapping patch_mapping;
 
/*
@@ -269,11 +276,12 @@ static int do_patch_instruction(struct ppc_inst *addr, 
struct ppc_inst instr)
if (!patching_mm)
return raw_patch_instruction(addr, instr);
 
-   local_irq_save(flags);
+   lockdep_assert_held(&patching_lock);
+   lockdep_assert_

[PATCH v4 00/11] Use per-CPU temporary mappings for patching

2021-04-29 Thread Christopher M. Riedl
When compiled with CONFIG_STRICT_KERNEL_RWX, the kernel must create
temporary mappings when patching itself. These mappings temporarily
override the strict RWX text protections to permit a write. Currently,
powerpc allocates a per-CPU VM area for patching. Patching occurs as
follows:

1. Map page in per-CPU VM area w/ PAGE_KERNEL protection
2. Patch text
3. Remove the temporary mapping

While the VM area is per-CPU, the mapping is actually inserted into the
kernel page tables. Presumably, this could allow another CPU to access
the normally write-protected text - either malicously or accidentally -
via this same mapping if the address of the VM area is known. Ideally,
the mapping should be kept local to the CPU doing the patching [0].

x86 introduced "temporary mm" structs which allow the creation of
mappings local to a particular CPU [1]. This series intends to bring the
notion of a temporary mm to powerpc and harden powerpc by using such a
mapping for patching a kernel with strict RWX permissions.

The first four patches implement an LKDTM test "proof-of-concept" which
exploits the potential vulnerability (ie. the temporary mapping during
patching is exposed in the kernel page tables and accessible by other
CPUs) using a simple brute-force approach. This test is implemented for
both powerpc and x86_64. The test passes on powerpc with this new
series, fails on upstream powerpc, passes on upstream x86_64, and fails
on an older (ancient) x86_64 tree without the x86_64 temporary mm
patches. The remaining patches add support for and use a temporary mm
for code patching on powerpc.

Tested boot, ftrace, and repeated LKDTM "hijack":
- QEMU+KVM (host: POWER9 Blackbird): Radix MMU w/ KUAP
- QEMU+KVM (host: POWER9 Blackbird): Hash MMU w/o KUAP
- QEMU+KVM (host: POWER9 Blackbird): Hash MMU w/ KUAP

Tested repeated LKDTM "hijack":
- QEMU+KVM (host: AMD desktop): x86_64 upstream
- QEMU+KVM (host: AMD desktop): x86_64 w/o percpu temp mm to
  verify the LKDTM "hijack" fails

Tested boot and ftrace:
- QEMU+TCG: ppc44x (bamboo)
- QEMU+TCG: g5 (mac99)

I also tested with various extra config options enabled as suggested in
section 12) in Documentation/process/submit-checklist.rst.

v4: * It's time to revisit this series again since @jpn and @mpe fixed
  our known STRICT_*_RWX bugs on powerpc/64s.
* Rebase on linuxppc/next:
  commit ee1bc694fbaec ("powerpc/kvm: Fix build error when 
PPC_MEM_KEYS/PPC_PSERIES=n")
* Completely rework how map_patch() works on book3s64 Hash MMU
* Split the LKDTM x86_64 and powerpc bits into separate patches
* Annotate commit messages with changes from v3 instead of
  listing them here completely out-of context...

v3: * Rebase on linuxppc/next: commit 9123e3a74ec7 ("Linux 5.9-rc1")
* Move temporary mm implementation into code-patching.c where it
  belongs
* Implement LKDTM hijacker test on x86_64 (on IBM time oof) Do
* not use address zero for the patching address in the
  temporary mm (thanks @dja for pointing this out!)
* Wrap the LKDTM test w/ CONFIG_SMP as suggested by Christophe
  Leroy
* Comments to clarify PTE pre-allocation and patching addr
  selection

v2: * Rebase on linuxppc/next:
  commit 105fb38124a4 ("powerpc/8xx: Modify ptep_get()")
* Always dirty pte when mapping patch
* Use `ppc_inst_len` instead of `sizeof` on instructions
* Declare LKDTM patching addr accessor in header where it belongs   

v1: * Rebase on linuxppc/next (4336b9337824)
* Save and restore second hw watchpoint
* Use new ppc_inst_* functions for patching check and in LKDTM test

rfc-v2: * Many fixes and improvements mostly based on extensive feedback
  and testing by Christophe Leroy (thanks!).
* Make patching_mm and patching_addr static and move
  '__ro_after_init' to after the variable name (more common in
  other parts of the kernel)
* Use 'asm/debug.h' header instead of 'asm/hw_breakpoint.h' to
  fix PPC64e compile
* Add comment explaining why we use BUG_ON() during the init
  call to setup for patching later
* Move ptep into patch_mapping to avoid walking page tables a
  second time when unmapping the temporary mapping
* Use KUAP under non-radix, also manually dirty the PTE for patch
  mapping on non-BOOK3S_64 platforms
* Properly return any error from __patch_instruction
* Do not use 'memcmp' where a simple comparison is appropriate
* Simplify expression for patch address by removing pointer maths
* Add LKDTM test

[0]: https://github.com/linuxppc/issues/issues/224
[1]: 
https://lore.kernel.org/kernel-hardening/20190426232303.28381-1-nadav.a...@gmail.com/

Christopher M. Riedl (11):
  powerpc: Add LKDTM

[PATCH v4 11/11] powerpc: Use patch_instruction_unlocked() in loops

2021-04-29 Thread Christopher M. Riedl
Now that patching requires a lock to prevent concurrent access to
patching_mm, every call to patch_instruction() acquires and releases a
spinlock. There are several places where patch_instruction() is called
in a loop. Convert these to acquire the lock once before the loop, call
patch_instruction_unlocked() in the loop body, and then release the lock
again after the loop terminates - as in:

for (i = 0; i < n; ++i)
patch_instruction(...); <-- lock/unlock every iteration

changes to:

flags = lock_patching(); <-- lock once

for (i = 0; i < n; ++i)
patch_instruction_unlocked(...);

unlock_patching(flags); <-- unlock once

Signed-off-by: Christopher M. Riedl 

---

v4:  * New to series.
---
 arch/powerpc/kernel/epapr_paravirt.c |   9 ++-
 arch/powerpc/kernel/optprobes.c  |  22 --
 arch/powerpc/lib/feature-fixups.c| 114 +++
 arch/powerpc/xmon/xmon.c |  22 --
 4 files changed, 120 insertions(+), 47 deletions(-)

diff --git a/arch/powerpc/kernel/epapr_paravirt.c 
b/arch/powerpc/kernel/epapr_paravirt.c
index 2ed14d4a47f59..b639e71cf9dec 100644
--- a/arch/powerpc/kernel/epapr_paravirt.c
+++ b/arch/powerpc/kernel/epapr_paravirt.c
@@ -28,6 +28,7 @@ static int __init early_init_dt_scan_epapr(unsigned long node,
const u32 *insts;
int len;
int i;
+   unsigned long flags;
 
insts = of_get_flat_dt_prop(node, "hcall-instructions", &len);
if (!insts)
@@ -36,14 +37,18 @@ static int __init early_init_dt_scan_epapr(unsigned long 
node,
if (len % 4 || len > (4 * 4))
return -1;
 
+   flags = lock_patching();
+
for (i = 0; i < (len / 4); i++) {
struct ppc_inst inst = ppc_inst(be32_to_cpu(insts[i]));
-   patch_instruction((struct ppc_inst *)(epapr_hypercall_start + 
i), inst);
+   patch_instruction_unlocked((struct ppc_inst 
*)(epapr_hypercall_start + i), inst);
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
-   patch_instruction((struct ppc_inst *)(epapr_ev_idle_start + i), 
inst);
+   patch_instruction_unlocked((struct ppc_inst 
*)(epapr_ev_idle_start + i), inst);
 #endif
}
 
+   unlock_patching(flags);
+
 #if !defined(CONFIG_64BIT) || defined(CONFIG_PPC_BOOK3E_64)
if (of_get_flat_dt_prop(node, "has-idle", NULL))
epapr_has_idle = true;
diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index cdf87086fa33a..deaeb6e8d1a00 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -200,7 +200,7 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
struct ppc_inst branch_op_callback, branch_emulate_step, temp;
kprobe_opcode_t *op_callback_addr, *emulate_step_addr, *buff;
long b_offset;
-   unsigned long nip, size;
+   unsigned long nip, size, flags;
int rc, i;
 
kprobe_ppc_optinsn_slots.insn_size = MAX_OPTINSN_SIZE;
@@ -237,13 +237,20 @@ int arch_prepare_optimized_kprobe(struct optimized_kprobe 
*op, struct kprobe *p)
/* We can optimize this via patch_instruction_window later */
size = (TMPL_END_IDX * sizeof(kprobe_opcode_t)) / sizeof(int);
pr_devel("Copying template to %p, size %lu\n", buff, size);
+
+   flags = lock_patching();
+
for (i = 0; i < size; i++) {
-   rc = patch_instruction((struct ppc_inst *)(buff + i),
-  ppc_inst(*(optprobe_template_entry + 
i)));
-   if (rc < 0)
+   rc = patch_instruction_unlocked((struct ppc_inst *)(buff + i),
+   
ppc_inst(*(optprobe_template_entry + i)));
+   if (rc < 0) {
+   unlock_patching(flags);
goto error;
+   }
}
 
+   unlock_patching(flags);
+
/*
 * Fixup the template with instructions to:
 * 1. load the address of the actual probepoint
@@ -322,6 +329,9 @@ void arch_optimize_kprobes(struct list_head *oplist)
struct ppc_inst instr;
struct optimized_kprobe *op;
struct optimized_kprobe *tmp;
+   unsigned long flags;
+
+   flags = lock_patching();
 
list_for_each_entry_safe(op, tmp, oplist, list) {
/*
@@ -333,9 +343,11 @@ void arch_optimize_kprobes(struct list_head *oplist)
create_branch(&instr,
  (struct ppc_inst *)op->kp.addr,
  (unsigned long)op->optinsn.insn, 0);
-   patch_instruction((struct ppc_inst *)op->kp.addr, instr);
+   patch_instruction_unlocked((struct ppc_inst *)op->kp.addr, 
instr);
list_del_init(&op->list);
}
+
+   unlock_patching(flags);
 }
 
 void arch_unoptimize_kprobe(struct optimized_kprobe *op)
diff --git 

[PATCH v4 02/11] lkdtm/powerpc: Add test to hijack a patch mapping

2021-04-29 Thread Christopher M. Riedl
When live patching with STRICT_KERNEL_RWX the CPU doing the patching
must temporarily remap the page(s) containing the patch site with +W
permissions. While this temporary mapping is in use, another CPU could
write to the same mapping and maliciously alter kernel text. Implement a
LKDTM test to attempt to exploit such an opening during code patching.
The test is implemented on powerpc and requires LKDTM built into the
kernel (building LKDTM as a module is insufficient).

The LKDTM "hijack" test works as follows:

  1. A CPU executes an infinite loop to patch an instruction. This is
 the "patching" CPU.
  2. Another CPU attempts to write to the address of the temporary
 mapping used by the "patching" CPU. This other CPU is the
 "hijacker" CPU. The hijack either fails with a fault/error or
 succeeds, in which case some kernel text is now overwritten.

The virtual address of the temporary patch mapping is provided via an
LKDTM-specific accessor to the hijacker CPU. This test assumes a
hypothetical situation where this address was leaked previously.

How to run the test:

mount -t debugfs none /sys/kernel/debug
(echo HIJACK_PATCH > /sys/kernel/debug/provoke-crash/DIRECT)

A passing test indicates that it is not possible to overwrite kernel
text from another CPU by using the temporary mapping established by
a CPU for patching.

Signed-off-by: Christopher M. Riedl 

---

v4:  * Separate the powerpc and x86_64 bits into individual patches.
 * Use __put_kernel_nofault() when attempting to hijack the mapping
 * Use raw_smp_processor_id() to avoid triggering the BUG() when
   calling smp_processor_id() in preemptible code - the only thing
   that matters is that one of the threads is bound to a different
   CPU - we are not using smp_processor_id() to access any per-cpu
   data or similar where preemption should be disabled.
 * Rework the patching_cpu() kthread stop condition to avoid:
   https://lwn.net/Articles/628628/
---
 drivers/misc/lkdtm/core.c  |   1 +
 drivers/misc/lkdtm/lkdtm.h |   1 +
 drivers/misc/lkdtm/perms.c | 135 +
 3 files changed, 137 insertions(+)

diff --git a/drivers/misc/lkdtm/core.c b/drivers/misc/lkdtm/core.c
index b2aff4d87c014..857d218840eb8 100644
--- a/drivers/misc/lkdtm/core.c
+++ b/drivers/misc/lkdtm/core.c
@@ -146,6 +146,7 @@ static const struct crashtype crashtypes[] = {
CRASHTYPE(WRITE_RO),
CRASHTYPE(WRITE_RO_AFTER_INIT),
CRASHTYPE(WRITE_KERN),
+   CRASHTYPE(HIJACK_PATCH),
CRASHTYPE(REFCOUNT_INC_OVERFLOW),
CRASHTYPE(REFCOUNT_ADD_OVERFLOW),
CRASHTYPE(REFCOUNT_INC_NOT_ZERO_OVERFLOW),
diff --git a/drivers/misc/lkdtm/lkdtm.h b/drivers/misc/lkdtm/lkdtm.h
index 5ae48c64df24d..c8de54d189c27 100644
--- a/drivers/misc/lkdtm/lkdtm.h
+++ b/drivers/misc/lkdtm/lkdtm.h
@@ -61,6 +61,7 @@ void lkdtm_EXEC_USERSPACE(void);
 void lkdtm_EXEC_NULL(void);
 void lkdtm_ACCESS_USERSPACE(void);
 void lkdtm_ACCESS_NULL(void);
+void lkdtm_HIJACK_PATCH(void);
 
 /* refcount.c */
 void lkdtm_REFCOUNT_INC_OVERFLOW(void);
diff --git a/drivers/misc/lkdtm/perms.c b/drivers/misc/lkdtm/perms.c
index 2dede2ef658f3..c6f96ebffccfd 100644
--- a/drivers/misc/lkdtm/perms.c
+++ b/drivers/misc/lkdtm/perms.c
@@ -9,6 +9,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 /* Whether or not to fill the target memory area with do_nothing(). */
@@ -222,6 +223,140 @@ void lkdtm_ACCESS_NULL(void)
pr_err("FAIL: survived bad write\n");
 }
 
+#if (IS_BUILTIN(CONFIG_LKDTM) && defined(CONFIG_STRICT_KERNEL_RWX) && \
+   defined(CONFIG_PPC))
+/*
+ * This is just a dummy location to patch-over.
+ */
+static void patching_target(void)
+{
+   return;
+}
+
+#include 
+struct ppc_inst * const patch_site = (struct ppc_inst *)&patching_target;
+
+static inline int lkdtm_do_patch(u32 data)
+{
+   return patch_instruction(patch_site, ppc_inst(data));
+}
+
+static inline u32 lkdtm_read_patch_site(void)
+{
+   struct ppc_inst inst = READ_ONCE(*patch_site);
+   return ppc_inst_val(ppc_inst_read(&inst));
+}
+
+/* Returns True if the write succeeds */
+static inline bool lkdtm_try_write(u32 data, u32 *addr)
+{
+   __put_kernel_nofault(addr, &data, u32, err);
+   return true;
+
+err:
+   return false;
+}
+
+static int lkdtm_patching_cpu(void *data)
+{
+   int err = 0;
+   u32 val = 0xdeadbeef;
+
+   pr_info("starting patching_cpu=%d\n", raw_smp_processor_id());
+
+   do {
+   err = lkdtm_do_patch(val);
+   } while (lkdtm_read_patch_site() == val && !err && 
!kthread_should_stop());
+
+   if (err)
+   pr_warn("XFAIL: patch_instruction returned error: %d\n", err);
+
+   while (!kthread_should_stop()) {
+   set_current_state(TASK_INTERRUPTIBLE);
+   schedule();
+   }
+
+   return err;
+}
+
+void lkdtm_HIJACK_PATCH(void)
+{
+   struct task_struct *patching_kthrd;
+   int

[PATCH v4 05/11] powerpc/64s: Add ability to skip SLB preload

2021-04-29 Thread Christopher M. Riedl
Switching to a different mm with Hash translation causes SLB entries to
be preloaded from the current thread_info. This reduces SLB faults, for
example when threads share a common mm but operate on different address
ranges.

Preloading entries from the thread_info struct may not always be
appropriate - such as when switching to a temporary mm. Introduce a new
boolean in mm_context_t to skip the SLB preload entirely. Also move the
SLB preload code into a separate function since switch_slb() is already
quite long. The default behavior (preloading SLB entries from the
current thread_info struct) remains unchanged.

Signed-off-by: Christopher M. Riedl 

---

v4:  * New to series.
---
 arch/powerpc/include/asm/book3s/64/mmu.h |  3 ++
 arch/powerpc/include/asm/mmu_context.h   | 13 ++
 arch/powerpc/mm/book3s64/mmu_context.c   |  2 +
 arch/powerpc/mm/book3s64/slb.c   | 56 ++--
 4 files changed, 50 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu.h 
b/arch/powerpc/include/asm/book3s/64/mmu.h
index eace8c3f7b0a1..b23a9dcdee5af 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu.h
@@ -130,6 +130,9 @@ typedef struct {
u32 pkey_allocation_map;
s16 execute_only_pkey; /* key holding execute-only protection */
 #endif
+
+   /* Do not preload SLB entries from thread_info during switch_slb() */
+   bool skip_slb_preload;
 } mm_context_t;
 
 static inline u16 mm_ctx_user_psize(mm_context_t *ctx)
diff --git a/arch/powerpc/include/asm/mmu_context.h 
b/arch/powerpc/include/asm/mmu_context.h
index 4bc45d3ed8b0e..264787e90b1a1 100644
--- a/arch/powerpc/include/asm/mmu_context.h
+++ b/arch/powerpc/include/asm/mmu_context.h
@@ -298,6 +298,19 @@ static inline int arch_dup_mmap(struct mm_struct *oldmm,
return 0;
 }
 
+#ifdef CONFIG_PPC_BOOK3S_64
+
+static inline void skip_slb_preload_mm(struct mm_struct *mm)
+{
+   mm->context.skip_slb_preload = true;
+}
+
+#else
+
+static inline void skip_slb_preload_mm(struct mm_struct *mm) {}
+
+#endif /* CONFIG_PPC_BOOK3S_64 */
+
 #include 
 
 #endif /* __KERNEL__ */
diff --git a/arch/powerpc/mm/book3s64/mmu_context.c 
b/arch/powerpc/mm/book3s64/mmu_context.c
index c10fc8a72fb37..3479910264c59 100644
--- a/arch/powerpc/mm/book3s64/mmu_context.c
+++ b/arch/powerpc/mm/book3s64/mmu_context.c
@@ -202,6 +202,8 @@ int init_new_context(struct task_struct *tsk, struct 
mm_struct *mm)
atomic_set(&mm->context.active_cpus, 0);
atomic_set(&mm->context.copros, 0);
 
+   mm->context.skip_slb_preload = false;
+
return 0;
 }
 
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index c91bd85eb90e3..da0836cb855af 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -441,10 +441,39 @@ static void slb_cache_slbie_user(unsigned int index)
asm volatile("slbie %0" : : "r" (slbie_data));
 }
 
+static void preload_slb_entries(struct task_struct *tsk, struct mm_struct *mm)
+{
+   struct thread_info *ti = task_thread_info(tsk);
+   unsigned char i;
+
+   /*
+* We gradually age out SLBs after a number of context switches to
+* reduce reload overhead of unused entries (like we do with FP/VEC
+* reload). Each time we wrap 256 switches, take an entry out of the
+* SLB preload cache.
+*/
+   tsk->thread.load_slb++;
+   if (!tsk->thread.load_slb) {
+   unsigned long pc = KSTK_EIP(tsk);
+
+   preload_age(ti);
+   preload_add(ti, pc);
+   }
+
+   for (i = 0; i < ti->slb_preload_nr; i++) {
+   unsigned char idx;
+   unsigned long ea;
+
+   idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
+   ea = (unsigned long)ti->slb_preload_esid[idx] << SID_SHIFT;
+
+   slb_allocate_user(mm, ea);
+   }
+}
+
 /* Flush all user entries from the segment table of the current processor. */
 void switch_slb(struct task_struct *tsk, struct mm_struct *mm)
 {
-   struct thread_info *ti = task_thread_info(tsk);
unsigned char i;
 
/*
@@ -502,29 +531,8 @@ void switch_slb(struct task_struct *tsk, struct mm_struct 
*mm)
 
copy_mm_to_paca(mm);
 
-   /*
-* We gradually age out SLBs after a number of context switches to
-* reduce reload overhead of unused entries (like we do with FP/VEC
-* reload). Each time we wrap 256 switches, take an entry out of the
-* SLB preload cache.
-*/
-   tsk->thread.load_slb++;
-   if (!tsk->thread.load_slb) {
-   unsigned long pc = KSTK_EIP(tsk);
-
-   preload_age(ti);
-   preload_add(ti, pc);
-   }
-
-   for (i = 0; i < ti->slb_preload_nr; i++) {
-   unsigned char idx;
-   unsigned long ea;
-
-   idx = (ti->slb_preload_tail + i) % SLB_PRELOAD_NR;
-   ea = (

[PATCH v4 07/11] powerpc/64s: Make slb_allocate_user() non-static

2021-04-29 Thread Christopher M. Riedl
With Book3s64 Hash translation, manually inserting a PTE requires
updating the Linux PTE, inserting a SLB entry, and inserting the hashed
page. The first is handled via the usual kernel abstractions, the second
requires slb_allocate_user() which is currently 'static', and the third
is available via hash_page_mm() already.

Make slb_allocate_user() non-static and add a prototype so the next
patch can use it during code-patching.

Signed-off-by: Christopher M. Riedl 

---

v4:  * New to series.
---
 arch/powerpc/include/asm/book3s/64/mmu-hash.h | 1 +
 arch/powerpc/mm/book3s64/slb.c| 4 +---
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/64/mmu-hash.h 
b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
index 3004f3323144d..189854eebba77 100644
--- a/arch/powerpc/include/asm/book3s/64/mmu-hash.h
+++ b/arch/powerpc/include/asm/book3s/64/mmu-hash.h
@@ -525,6 +525,7 @@ void slb_dump_contents(struct slb_entry *slb_ptr);
 extern void slb_vmalloc_update(void);
 extern void slb_set_size(u16 size);
 void preload_new_slb_context(unsigned long start, unsigned long sp);
+long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
 #endif /* __ASSEMBLY__ */
 
 /*
diff --git a/arch/powerpc/mm/book3s64/slb.c b/arch/powerpc/mm/book3s64/slb.c
index da0836cb855af..532eb51bc5211 100644
--- a/arch/powerpc/mm/book3s64/slb.c
+++ b/arch/powerpc/mm/book3s64/slb.c
@@ -29,8 +29,6 @@
 #include "internal.h"
 
 
-static long slb_allocate_user(struct mm_struct *mm, unsigned long ea);
-
 bool stress_slb_enabled __initdata;
 
 static int __init parse_stress_slb(char *p)
@@ -791,7 +789,7 @@ static long slb_allocate_kernel(unsigned long ea, unsigned 
long id)
return slb_insert_entry(ea, context, flags, ssize, true);
 }
 
-static long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
+long slb_allocate_user(struct mm_struct *mm, unsigned long ea)
 {
unsigned long context;
unsigned long flags;
-- 
2.26.1



[PATCH v4 03/11] x86_64: Add LKDTM accessor for patching addr

2021-04-29 Thread Christopher M. Riedl
When live patching with STRICT_KERNEL_RWX a mapping is installed at a
"patching address" with temporary write permissions. Provide a
LKDTM-only accessor function for this address in preparation for a LKDTM
test which attempts to "hijack" this mapping by writing to it from
another CPU.

Signed-off-by: Christopher M. Riedl 
---
 arch/x86/include/asm/text-patching.h | 4 
 arch/x86/kernel/alternative.c| 7 +++
 2 files changed, 11 insertions(+)

diff --git a/arch/x86/include/asm/text-patching.h 
b/arch/x86/include/asm/text-patching.h
index b7421780e4e92..f0caf9ee13bd8 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -167,4 +167,8 @@ void int3_emulate_ret(struct pt_regs *regs)
 }
 #endif /* !CONFIG_UML_X86 */
 
+#if IS_BUILTIN(CONFIG_LKDTM)
+unsigned long read_cpu_patching_addr(unsigned int cpu);
+#endif
+
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c
index 8d778e46725d2..4c95fdd9b1965 100644
--- a/arch/x86/kernel/alternative.c
+++ b/arch/x86/kernel/alternative.c
@@ -852,6 +852,13 @@ static inline void unuse_temporary_mm(temp_mm_state_t 
prev_state)
 __ro_after_init struct mm_struct *poking_mm;
 __ro_after_init unsigned long poking_addr;
 
+#if IS_BUILTIN(CONFIG_LKDTM)
+unsigned long read_cpu_patching_addr(unsigned int cpu)
+{
+   return poking_addr;
+}
+#endif
+
 static void *__text_poke(void *addr, const void *opcode, size_t len)
 {
bool cross_page_boundary = offset_in_page(addr) + len > PAGE_SIZE;
-- 
2.26.1



[PATCH v4 08/11] powerpc: Initialize and use a temporary mm for patching

2021-04-29 Thread Christopher M. Riedl
When code patching a STRICT_KERNEL_RWX kernel the page containing the
address to be patched is temporarily mapped as writeable. Currently, a
per-cpu vmalloc patch area is used for this purpose. While the patch
area is per-cpu, the temporary page mapping is inserted into the kernel
page tables for the duration of patching. The mapping is exposed to CPUs
other than the patching CPU - this is undesirable from a hardening
perspective. Use a temporary mm instead which keeps the mapping local to
the CPU doing the patching.

Use the `poking_init` init hook to prepare a temporary mm and patching
address. Initialize the temporary mm by copying the init mm. Choose a
randomized patching address inside the temporary mm userspace address
space. The patching address is randomized between PAGE_SIZE and
DEFAULT_MAP_WINDOW-PAGE_SIZE. The upper limit is necessary due to how
the Book3s64 Hash MMU operates - by default the space above
DEFAULT_MAP_WINDOW is not available. For now, the patching address for
all platforms/MMUs is randomized inside this range.  The number of
possible random addresses is dependent on PAGE_SIZE and limited by
DEFAULT_MAP_WINDOW.

Bits of entropy with 64K page size on BOOK3S_64:

bits of entropy = log2(DEFAULT_MAP_WINDOW_USER64 / PAGE_SIZE)

PAGE_SIZE=64K, DEFAULT_MAP_WINDOW_USER64=128TB
bits of entropy = log2(128TB / 64K) bits of entropy = 31

Randomization occurs only once during initialization at boot.

Introduce two new functions, map_patch() and unmap_patch(), to
respectively create and remove the temporary mapping with write
permissions at patching_addr. The Hash MMU on Book3s64 requires mapping
the page for patching with PAGE_SHARED since the kernel cannot access
userspace pages with the PAGE_PRIVILEGED (PAGE_KERNEL) bit set.

Also introduce hash_prefault_mapping() to preload the SLB entry and HPTE
for the patching_addr when using the Hash MMU on Book3s64 to avoid
taking an SLB and Hash fault during patching.

Since patching_addr is now a userspace address, lock/unlock KUAP on
non-Book3s64 platforms. On Book3s64 with a Radix MMU, mapping the page
with PAGE_KERNEL sets EAA[0] for the PTE which ignores the AMR (KUAP)
according to PowerISA v3.0b Figure 35. On Book3s64 with a Hash MMU, the
hash PTE for the mapping is inserted with HPTE_USE_KERNEL_KEY which
similarly avoids the need for switching KUAP.

Finally, add a new WARN_ON() to check that the instruction was patched
as intended after the temporary mapping is torn down.

Based on x86 implementation:

commit 4fc19708b165
("x86/alternatives: Initialize temporary mm for patching")

and:

commit b3fd8e83ada0
("x86/alternatives: Use temporary mm for text poking")

Signed-off-by: Christopher M. Riedl 

---

v4:  * In the previous series this was two separate patches: one to init
   the temporary mm in poking_init() (unused in powerpc at the time)
   and the other to use it for patching (which removed all the
   per-cpu vmalloc code). Now that we use poking_init() in the
   existing per-cpu vmalloc approach, that separation doesn't work
   as nicely anymore so I just merged the two patches into one.
 * Preload the SLB entry and hash the page for the patching_addr
   when using Hash on book3s64 to avoid taking an SLB and Hash fault
   during patching. The previous implementation was a hack which
   changed current->mm to allow the SLB and Hash fault handlers to
   work with the temporary mm since both of those code-paths always
   assume mm == current->mm.
 * Also (hmm - seeing a trend here) with the book3s64 Hash MMU we
   have to manage the mm->context.active_cpus counter and mm cpumask
   since they determine (via mm_is_thread_local()) if the TLB flush
   in pte_clear() is local or not - it should always be local when
   we're using the temporary mm. On book3s64's Radix MMU we can
   just call local_flush_tlb_mm().
 * Use HPTE_USE_KERNEL_KEY on Hash to avoid costly lock/unlock of
   KUAP.
---
 arch/powerpc/lib/code-patching.c | 209 ++-
 1 file changed, 121 insertions(+), 88 deletions(-)

diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index cbdfba8a39360..7e15abc09ec04 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -11,6 +11,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 #include 
 #include 
@@ -19,6 +21,7 @@
 #include 
 #include 
 #include 
+#include 
 
 static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst 
instr,
   struct ppc_inst *patch_addr)
@@ -113,113 +116,142 @@ static inline void unuse_temporary_mm(struct temp_mm 
*temp_mm)
}
 }
 
-static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
+static struct mm_struct *patching_mm __ro_after_init;
+static unsigned long patching_addr __ro_after_init;
+
+void __init poking_init(void)
+{
+   spinlock_t *ptl; /* for prote

[PATCH v4 06/11] powerpc: Introduce temporary mm

2021-04-29 Thread Christopher M. Riedl
x86 supports the notion of a temporary mm which restricts access to
temporary PTEs to a single CPU. A temporary mm is useful for situations
where a CPU needs to perform sensitive operations (such as patching a
STRICT_KERNEL_RWX kernel) requiring temporary mappings without exposing
said mappings to other CPUs. A side benefit is that other CPU TLBs do
not need to be flushed when the temporary mm is torn down.

Mappings in the temporary mm can be set in the userspace portion of the
address-space.

Interrupts must be disabled while the temporary mm is in use. HW
breakpoints, which may have been set by userspace as watchpoints on
addresses now within the temporary mm, are saved and disabled when
loading the temporary mm. The HW breakpoints are restored when unloading
the temporary mm. All HW breakpoints are indiscriminately disabled while
the temporary mm is in use.

With the Book3s64 Hash MMU the SLB is preloaded with entries from the
current thread_info struct during switch_slb(). This could cause a
Machine Check (MCE) due to an SLB Multihit when creating arbitrary
userspace mappings in the temporary mm later. Disable SLB preload from
the thread_info struct for any temporary mm to avoid this.

Based on x86 implementation:

commit cefa929c034e
("x86/mm: Introduce temporary mm structs")

Signed-off-by: Christopher M. Riedl 

---

v4:  * Pass the prev mm instead of NULL to switch_mm_irqs_off() when
   using/unusing the temp mm as suggested by Jann Horn to keep
   the context.active counter in-sync on mm/nohash.
 * Disable SLB preload in the temporary mm when initializing the
   temp_mm struct.
 * Include asm/debug.h header to fix build issue with
   ppc44x_defconfig.
---
 arch/powerpc/include/asm/debug.h |  1 +
 arch/powerpc/kernel/process.c|  5 +++
 arch/powerpc/lib/code-patching.c | 67 
 3 files changed, 73 insertions(+)

diff --git a/arch/powerpc/include/asm/debug.h b/arch/powerpc/include/asm/debug.h
index 86a14736c76c3..dfd82635ea8b3 100644
--- a/arch/powerpc/include/asm/debug.h
+++ b/arch/powerpc/include/asm/debug.h
@@ -46,6 +46,7 @@ static inline int debugger_fault_handler(struct pt_regs 
*regs) { return 0; }
 #endif
 
 void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk);
+void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk);
 bool ppc_breakpoint_available(void);
 #ifdef CONFIG_PPC_ADV_DEBUG_REGS
 extern void do_send_trap(struct pt_regs *regs, unsigned long address,
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 89e34aa273e21..8e94cabaea3c3 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -864,6 +864,11 @@ static inline int set_breakpoint_8xx(struct 
arch_hw_breakpoint *brk)
return 0;
 }
 
+void __get_breakpoint(int nr, struct arch_hw_breakpoint *brk)
+{
+   memcpy(brk, this_cpu_ptr(¤t_brk[nr]), sizeof(*brk));
+}
+
 void __set_breakpoint(int nr, struct arch_hw_breakpoint *brk)
 {
memcpy(this_cpu_ptr(¤t_brk[nr]), brk, sizeof(*brk));
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 2b1b3e9043ade..cbdfba8a39360 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -17,6 +17,8 @@
 #include 
 #include 
 #include 
+#include 
+#include 
 
 static int __patch_instruction(struct ppc_inst *exec_addr, struct ppc_inst 
instr,
   struct ppc_inst *patch_addr)
@@ -46,6 +48,71 @@ int raw_patch_instruction(struct ppc_inst *addr, struct 
ppc_inst instr)
 }
 
 #ifdef CONFIG_STRICT_KERNEL_RWX
+
+struct temp_mm {
+   struct mm_struct *temp;
+   struct mm_struct *prev;
+   struct arch_hw_breakpoint brk[HBP_NUM_MAX];
+};
+
+static inline void init_temp_mm(struct temp_mm *temp_mm, struct mm_struct *mm)
+{
+   /* Do not preload SLB entries from the thread_info struct */
+   if (IS_ENABLED(CONFIG_PPC_BOOK3S_64) && !radix_enabled())
+   skip_slb_preload_mm(mm);
+
+   temp_mm->temp = mm;
+   temp_mm->prev = NULL;
+   memset(&temp_mm->brk, 0, sizeof(temp_mm->brk));
+}
+
+static inline void use_temporary_mm(struct temp_mm *temp_mm)
+{
+   lockdep_assert_irqs_disabled();
+
+   temp_mm->prev = current->active_mm;
+   switch_mm_irqs_off(temp_mm->prev, temp_mm->temp, current);
+
+   WARN_ON(!mm_is_thread_local(temp_mm->temp));
+
+   if (ppc_breakpoint_available()) {
+   struct arch_hw_breakpoint null_brk = {0};
+   int i = 0;
+
+   for (; i < nr_wp_slots(); ++i) {
+   __get_breakpoint(i, &temp_mm->brk[i]);
+   if (temp_mm->brk[i].type != 0)
+   __set_breakpoint(i, &null_brk);
+   }
+   }
+}
+
+static inline void unuse_temporary_mm(struct temp_mm *temp_mm)
+{
+   lockdep_assert_irqs_disabled();
+
+   switch_mm_irqs_off(temp_mm->temp, temp_mm->prev, current);
+
+   /*
+* On book3s

Re: [PATCH v11 1/9] powerpc/mm: Implement set_memory() routines

2021-04-29 Thread Christophe Leroy




Le 29/04/2021 à 05:15, Jordan Niethe a écrit :

From: Russell Currey 

The set_memory_{ro/rw/nx/x}() functions are required for
STRICT_MODULE_RWX, and are generally useful primitives to have.  This
implementation is designed to be generic across powerpc's many MMUs.
It's possible that this could be optimised to be faster for specific
MMUs.

This implementation does not handle cases where the caller is attempting
to change the mapping of the page it is executing from, or if another
CPU is concurrently using the page being altered.  These cases likely
shouldn't happen, but a more complex implementation with MMU-specific code
could safely handle them.

On hash, the linear mapping is not kept in the linux pagetable, so this
will not change the protection if used on that range. Currently these
functions are not used on the linear map so just WARN for now.

Reviewed-by: Daniel Axtens 
Signed-off-by: Russell Currey 
Signed-off-by: Christophe Leroy 
[jpn: - Allow set memory functions to be used without Strict RWX
   - Hash: Disallow certain regions
   - Have change_page_attr() take function pointers to manipulate ptes


Did you look at the resulting generated code ? I find it awful.

pte manipulation helpers are meant to be inlined. Here you force the compiler to outline them. This 
also means that the input and output goes through memory.


And now set_memory_xx are not tiny inlined functions anymore.

What is the reason you abandonned the way it was done up to now, through the use of an 'action' 
value ? With the previous approach the generated code was a lot lighter.



   - Radix: Add ptesync after set_pte_at()]
Signed-off-by: Jordan Niethe 
---
v10: WARN if trying to change the hash linear map
v11: - Update copywrite dates
  - Allow set memory functions to be used without Strict RWX
  - Hash: Disallow certain regions and add comment explaining why
  - Have change_page_attr() take function pointers to manipulate ptes
  - Clarify change_page_attr()'s comment
  - Radix: Add ptesync after set_pte_at()
---
  arch/powerpc/Kconfig  |   1 +
  arch/powerpc/include/asm/set_memory.h |  10 +++
  arch/powerpc/mm/Makefile  |   2 +-
  arch/powerpc/mm/pageattr.c| 105 ++
  4 files changed, 117 insertions(+), 1 deletion(-)
  create mode 100644 arch/powerpc/include/asm/set_memory.h
  create mode 100644 arch/powerpc/mm/pageattr.c

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index cb2d44ee4e38..94c34932a74b 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -138,6 +138,7 @@ config PPC
select ARCH_HAS_MEMBARRIER_CALLBACKS
select ARCH_HAS_MEMBARRIER_SYNC_CORE
select ARCH_HAS_SCALED_CPUTIME  if VIRT_CPU_ACCOUNTING_NATIVE 
&& PPC_BOOK3S_64
+   select ARCH_HAS_SET_MEMORY
select ARCH_HAS_STRICT_KERNEL_RWX   if ((PPC_BOOK3S_64 || PPC32) && 
!HIBERNATION)
select ARCH_HAS_TICK_BROADCAST  if GENERIC_CLOCKEVENTS_BROADCAST
select ARCH_HAS_UACCESS_FLUSHCACHE
diff --git a/arch/powerpc/include/asm/set_memory.h 
b/arch/powerpc/include/asm/set_memory.h
new file mode 100644
index ..d1cd69b1a43a
--- /dev/null
+++ b/arch/powerpc/include/asm/set_memory.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _ASM_POWERPC_SET_MEMORY_H
+#define _ASM_POWERPC_SET_MEMORY_H
+
+int set_memory_ro(unsigned long addr, int numpages);
+int set_memory_rw(unsigned long addr, int numpages);
+int set_memory_nx(unsigned long addr, int numpages);
+int set_memory_x(unsigned long addr, int numpages);
+
+#endif
diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index c3df3a8501d4..9142cf1fb0d5 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
  
  ccflags-$(CONFIG_PPC64)	:= $(NO_MINIMAL_TOC)
  
-obj-y:= fault.o mem.o pgtable.o mmap.o maccess.o \

+obj-y  := fault.o mem.o pgtable.o mmap.o maccess.o 
pageattr.o \
   init_$(BITS).o pgtable_$(BITS).o \
   pgtable-frag.o ioremap.o ioremap_$(BITS).o \
   init-common.o mmu_context.o drmem.o \
diff --git a/arch/powerpc/mm/pageattr.c b/arch/powerpc/mm/pageattr.c
new file mode 100644
index ..3b4aa72e555e
--- /dev/null
+++ b/arch/powerpc/mm/pageattr.c
@@ -0,0 +1,105 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * MMU-generic set_memory implementation for powerpc
+ *
+ * Copyright 2019-2021, IBM Corporation.
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+
+
+/*
+ * Updates the attributes of a page in three steps:
+ *
+ * 1. invalidate the page table entry
+ * 2. flush the TLB
+ * 3. install the new entry with the updated attributes
+ *
+ * Invalidating the pte means there are situations where this will not work
+ * when in theory it should.
+ * For example:
+ * - removing write from page whilst it is being executed

[PATCH] powerpc: mark local variables around longjmp as volatile

2021-04-29 Thread Arnd Bergmann
From: Arnd Bergmann 

gcc-11 points out that modifying local variables next to a
longjmp/setjmp may cause undefined behavior:

arch/powerpc/kexec/crash.c: In function 'crash_kexec_prepare_cpus.constprop':
arch/powerpc/kexec/crash.c:108:22: error: variable 'ncpus' might be clobbered 
by 'longjmp' or 'vfork' [-Werror=clobbere
d]
arch/powerpc/kexec/crash.c:109:13: error: variable 'tries' might be clobbered 
by 'longjmp' or 'vfork' [-Werror=clobbere
d]
arch/powerpc/xmon/xmon.c: In function 'xmon_print_symbol':
arch/powerpc/xmon/xmon.c:3625:21: error: variable 'name' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c: In function 'stop_spus':
arch/powerpc/xmon/xmon.c:4057:13: error: variable 'i' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c: In function 'restart_spus':
arch/powerpc/xmon/xmon.c:4098:13: error: variable 'i' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c: In function 'dump_opal_msglog':
arch/powerpc/xmon/xmon.c:3008:16: error: variable 'pos' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c: In function 'show_pte':
arch/powerpc/xmon/xmon.c:3207:29: error: variable 'tsk' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c: In function 'show_tasks':
arch/powerpc/xmon/xmon.c:3302:29: error: variable 'tsk' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c: In function 'xmon_core':
arch/powerpc/xmon/xmon.c:494:13: error: variable 'cmd' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c:860:21: error: variable 'bp' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c:860:21: error: variable 'bp' might be clobbered by 
'longjmp' or 'vfork' [-Werror=clobbered]
arch/powerpc/xmon/xmon.c:492:48: error: argument 'fromipi' might be clobbered 
by 'longjmp' or 'vfork' [-Werror=clobbered]

According to the documentation, marking these as 'volatile' is
sufficient to avoid the problem, and it shuts up the warning.

Signed-off-by: Arnd Bergmann 
---
 arch/powerpc/kexec/crash.c |  4 ++--
 arch/powerpc/xmon/xmon.c   | 22 +++---
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kexec/crash.c b/arch/powerpc/kexec/crash.c
index 0196d0c211ac..10f997e6bb95 100644
--- a/arch/powerpc/kexec/crash.c
+++ b/arch/powerpc/kexec/crash.c
@@ -105,8 +105,8 @@ void crash_ipi_callback(struct pt_regs *regs)
 static void crash_kexec_prepare_cpus(int cpu)
 {
unsigned int msecs;
-   unsigned int ncpus = num_online_cpus() - 1;/* Excluding the panic cpu */
-   int tries = 0;
+   volatile unsigned int ncpus = num_online_cpus() - 1;/* Excluding the 
panic cpu */
+   volatile int tries = 0;
int (*old_handler)(struct pt_regs *regs);
 
printk(KERN_EMERG "Sending IPI to other CPUs\n");
diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c
index c8173e92f19d..ce0eacf77645 100644
--- a/arch/powerpc/xmon/xmon.c
+++ b/arch/powerpc/xmon/xmon.c
@@ -489,10 +489,10 @@ static void xmon_touch_watchdogs(void)
touch_nmi_watchdog();
 }
 
-static int xmon_core(struct pt_regs *regs, int fromipi)
+static int xmon_core(struct pt_regs *regs, volatile int fromipi)
 {
-   int cmd = 0;
-   struct bpt *bp;
+   volatile int cmd = 0;
+   struct bpt *volatile bp;
long recurse_jmp[JMP_BUF_LEN];
bool locked_down;
unsigned long offset;
@@ -857,7 +857,7 @@ static inline void force_enable_xmon(void)
 static struct bpt *at_breakpoint(unsigned long pc)
 {
int i;
-   struct bpt *bp;
+   struct bpt *volatile bp;
 
bp = bpts;
for (i = 0; i < NBPTS; ++i, ++bp)
@@ -3005,7 +3005,7 @@ static void dump_opal_msglog(void)
 {
unsigned char buf[128];
ssize_t res;
-   loff_t pos = 0;
+   volatile loff_t pos = 0;
 
if (!firmware_has_feature(FW_FEATURE_OPAL)) {
printf("Machine is not running OPAL firmware.\n");
@@ -3160,7 +3160,7 @@ memzcan(void)
printf("%.8lx\n", a - mskip);
 }
 
-static void show_task(struct task_struct *tsk)
+static void show_task(struct task_struct *volatile tsk)
 {
char state;
 
@@ -3204,7 +3204,7 @@ static void format_pte(void *ptep, unsigned long pte)
 static void show_pte(unsigned long addr)
 {
unsigned long tskv = 0;
-   struct task_struct *tsk = NULL;
+   struct task_struct *volatile tsk = NULL;
struct mm_struct *mm;
pgd_t *pgdp;
p4d_t *p4dp;
@@ -3299,7 +3299,7 @@ static void show_pte(unsigned long addr)
 static void show_tasks(void)
 {
unsigned long tskv;
-   struct task_struct *tsk = NULL;
+   struct task_struct *volatile tsk = NULL;
 
printf(" task_struct ->thread.ksp->thread.regsPID   
PPID S  P CMD\n");
 
@@ -3622,7 +3622,7 @@ static v

Re: [PATCH v2] cpuidle/pseries: Fixup CEDE0 latency only for POWER10 onwards

2021-04-29 Thread Michael Ellerman
"Gautham R. Shenoy"  writes:
> From: "Gautham R. Shenoy" 
>
> Commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
> CEDE(0)") sets the exit latency of CEDE(0) based on the latency values
> of the Extended CEDE states advertised by the platform
>
> On POWER9 LPARs, the firmwares advertise a very low value of 2us for
> CEDE1 exit latency on a Dedicated LPAR. The latency advertized by the
> PHYP hypervisor corresponds to the latency required to wakeup from the
> underlying hardware idle state. However the wakeup latency from the
> LPAR perspective should include
>
> 1. The time taken to transition the CPU from the Hypervisor into the
>LPAR post wakeup from platform idle state
>
> 2. Time taken to send the IPI from the source CPU (waker) to the idle
>target CPU (wakee).
>
> 1. can be measured via timer idle test, where we queue a timer, say
> for 1ms, and enter the CEDE state. When the timer fires, in the timer
> handler we compute how much extra timer over the expected 1ms have we
> consumed. On a a POWER9 LPAR the numbers are
>
> CEDE latency measured using a timer (numbers in ns)
> N   Min  Median   Avg   90%ile  99%ileMaxStddev
> 400 2601 5677 5668.7459176413 9299   455.01
>
> 1. and 2. combined can be determined by an IPI latency test where we
> send an IPI to an idle CPU and in the handler compute the time
> difference between when the IPI was sent and when the handler ran. We
> see the following numbers on POWER9 LPAR.
>
> CEDE latency measured using an IPI (numbers in ns)
> N   Min  Median   Avg   90%ile  99%ileMaxStddev
> 400 711  7564 7369.43   85599514  9698   1200.01
>
> Suppose, we consider the 99th percentile latency value measured using
> the IPI to be the wakeup latency, the value would be 9.5us This is in
> the ballpark of the default value of 10us.
>
> Hence, use the exit latency of CEDE(0) based on the latency values
> advertized by platform only from POWER10 onwards. The values
   ^^^
> advertized on POWER10 platforms is more realistic and informed by the
> latency measurements. For earlier platforms stick to the default value
> of 10us.

...

> diff --git a/drivers/cpuidle/cpuidle-pseries.c 
> b/drivers/cpuidle/cpuidle-pseries.c
> index a2b5c6f..7207467 100644
> --- a/drivers/cpuidle/cpuidle-pseries.c
> +++ b/drivers/cpuidle/cpuidle-pseries.c
> @@ -419,7 +419,8 @@ static int pseries_idle_probe(void)
>   cpuidle_state_table = shared_states;
>   max_idle_state = ARRAY_SIZE(shared_states);
>   } else {
> - fixup_cede0_latency();
> + if (pvr_version_is(PVR_POWER10))
> + fixup_cede0_latency();

A PVR check like that tests for *only* Power10, not Power10 and onwards
as you say in the change log.

The other question is what should happen on a Power10 LPAR that's
running in Power9 compat mode. I assume in that case we *do* want to use
the firmware provided values, because they're tied to the underlying
CPU, not the compat mode?

In which case a check for !PVR_POWER9 would seem to achieve what we
want?

cheers


Re: [PATCH v3] pseries/drmem: update LMBs after LPM

2021-04-29 Thread Aneesh Kumar K.V
Laurent Dufour  writes:

> After a LPM, the device tree node ibm,dynamic-reconfiguration-memory may be
> updated by the hypervisor in the case the NUMA topology of the LPAR's
> memory is updated.
>
> This is caught by the kernel, but the memory's node is updated because
> there is no way to move a memory block between nodes.
>
> If later a memory block is added or removed, drmem_update_dt() is called
> and it is overwriting the DT node to match the added or removed LMB. But
> the LMB's associativity node has not been updated after the DT node update
> and thus the node is overwritten by the Linux's topology instead of the
> hypervisor one.
>
> Introduce a hook called when the ibm,dynamic-reconfiguration-memory node is
> updated to force an update of the LMB's associativity.
>
> Cc: Tyrel Datwyler 
> Signed-off-by: Laurent Dufour 
> ---
>
> V3:
>  - Check rd->dn->name instead of rd->dn->full_name
> V2:
>  - Take Tyrel's idea to rely on OF_RECONFIG_UPDATE_PROPERTY instead of
>  introducing a new hook mechanism.
> ---
>  arch/powerpc/include/asm/drmem.h  |  1 +
>  arch/powerpc/mm/drmem.c   | 35 +++
>  .../platforms/pseries/hotplug-memory.c|  4 +++
>  3 files changed, 40 insertions(+)
>
> diff --git a/arch/powerpc/include/asm/drmem.h 
> b/arch/powerpc/include/asm/drmem.h
> index bf2402fed3e0..4265d5e95c2c 100644
> --- a/arch/powerpc/include/asm/drmem.h
> +++ b/arch/powerpc/include/asm/drmem.h
> @@ -111,6 +111,7 @@ int drmem_update_dt(void);
>  int __init
>  walk_drmem_lmbs_early(unsigned long node, void *data,
> int (*func)(struct drmem_lmb *, const __be32 **, void *));
> +void drmem_update_lmbs(struct property *prop);
>  #endif
>  
>  static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
> index 9af3832c9d8d..f0a6633132af 100644
> --- a/arch/powerpc/mm/drmem.c
> +++ b/arch/powerpc/mm/drmem.c
> @@ -307,6 +307,41 @@ int __init walk_drmem_lmbs_early(unsigned long node, 
> void *data,
>   return ret;
>  }
>  
> +/*
> + * Update the LMB associativity index.
> + */
> +static int update_lmb(struct drmem_lmb *updated_lmb,
> +   __maybe_unused const __be32 **usm,
> +   __maybe_unused void *data)
> +{
> + struct drmem_lmb *lmb;
> +
> + /*
> +  * Brut force there may be better way to fetch the LMB
> +  */
> + for_each_drmem_lmb(lmb) {
> + if (lmb->drc_index != updated_lmb->drc_index)
> + continue;
> +
> + lmb->aa_index = updated_lmb->aa_index;
> + break;
> + }
> + return 0;
> +}
> +
> +/*
> + * Update the LMB associativity index.
> + *
> + * This needs to be called when the hypervisor is updating the
> + * dynamic-reconfiguration-memory node property.
> + */
> +void drmem_update_lmbs(struct property *prop)
> +{
> + if (!strcmp(prop->name, "ibm,dynamic-memory"))
> + __walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
> + else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
> + __walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
> +}
>  #endif
>  
>  static int init_drmem_lmb_size(struct device_node *dn)
> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
> b/arch/powerpc/platforms/pseries/hotplug-memory.c
> index 8377f1f7c78e..672ffbee2e78 100644
> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
> @@ -949,6 +949,10 @@ static int pseries_memory_notifier(struct notifier_block 
> *nb,
>   case OF_RECONFIG_DETACH_NODE:
>   err = pseries_remove_mem_node(rd->dn);
>   break;
> + case OF_RECONFIG_UPDATE_PROPERTY:
> + if (!strcmp(rd->dn->name,
> + "ibm,dynamic-reconfiguration-memory"))
> + drmem_update_lmbs(rd->prop);
>   }
>   return notifier_from_errno(err);

How will this interact with DLPAR memory? When we dlpar memory,
ibm,configure-connector is used to fetch the new associativity details
and set drmem_lmb->aa_index correctly there. Once that is done kernel
then call drmem_update_dt() which will result in the above notifier
callback? 

IIUC, the call back then will update drmem_lmb->aa_index again?

-aneesh



Re: [PATCH kernel] powerpc/makefile: Do not redefine $(CPP) for preprocessor

2021-04-29 Thread Michael Ellerman
Daniel Axtens  writes:
> Hi Alexey,
>
>> The $(CPP) (do only preprocessing) macro is already defined in Makefile.
>> However POWERPC redefines it and adds $(KBUILD_CFLAGS) which results
>> in flags duplication. Which is not a big deal by itself except for
>> the flags which depend on other flags and the compiler checks them
>> as it parses the command line.
>>
>> Specifically, scripts/Makefile.build:304 generates ksyms for .S files.
>> If clang+llvm+sanitizer are enabled, this results in
>> -fno-lto -flto -fsanitize=cfi-mfcall   -fno-lto -flto 
>> -fsanitize=cfi-mfcall
>
> Checkpatch doesn't like this line:
> WARNING:COMMIT_LOG_LONG_LINE: Possible unwrapped commit description (prefer a 
> maximum 75 chars per line)
> #14: 
> -fno-lto -flto -fsanitize=cfi-mfcall   -fno-lto -flto 
> -fsanitize=cfi-mfcall
> However, it doesn't make sense to wrap the line so we should just ignore
> checkpatch here.
>
>> in the clang command line and triggers error:
>>
>> clang-13: error: invalid argument '-fsanitize=cfi-mfcall' only allowed with 
>> '-flto'
>>
>> This removes unnecessary CPP redifinition.
>>
>> Signed-off-by: Alexey Kardashevskiy 
>> ---
>>  arch/powerpc/Makefile | 1 -
>>  1 file changed, 1 deletion(-)
>>
>> diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile
>> index c9d2c7825cd6..3a2f2001c62b 100644
>> --- a/arch/powerpc/Makefile
>> +++ b/arch/powerpc/Makefile
>> @@ -214,7 +214,6 @@ KBUILD_CPPFLAGS  += -I $(srctree)/arch/$(ARCH) $(asinstr)
>>  KBUILD_AFLAGS   += $(AFLAGS-y)
>>  KBUILD_CFLAGS   += $(call cc-option,-msoft-float)
>>  KBUILD_CFLAGS   += -pipe $(CFLAGS-y)
>> -CPP = $(CC) -E $(KBUILD_CFLAGS)
>
> I was trying to check the history to see why powerpc has its own
> definition. It seems to date back at least as far as merging the two
> powerpc platforms into one, maybe it was helpful then. I agree it
> doesn't seem to be needed now.
>
> Snowpatch claims that this breaks the build, but I haven't been able to
> reproduce the issue in either pmac32 or ppc64 defconfig. I have sent it
> off to a fork of mpe's linux-ci repo to see if any of those builds hit
> any issues: https://github.com/daxtens/linux-ci/actions

It does break the build.

cheers


Re: [PATCH v2] cpuidle/pseries: Fixup CEDE0 latency only for POWER10 onwards

2021-04-29 Thread Gautham R Shenoy
Hello Michael,

On Thu, Apr 29, 2021 at 07:56:25PM +1000, Michael Ellerman wrote:
> "Gautham R. Shenoy"  writes:
> > From: "Gautham R. Shenoy" 
> >
> > Commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
> > CEDE(0)") sets the exit latency of CEDE(0) based on the latency values
> > of the Extended CEDE states advertised by the platform
> >
> > On POWER9 LPARs, the firmwares advertise a very low value of 2us for
> > CEDE1 exit latency on a Dedicated LPAR. The latency advertized by the
> > PHYP hypervisor corresponds to the latency required to wakeup from the
> > underlying hardware idle state. However the wakeup latency from the
> > LPAR perspective should include
> >
> > 1. The time taken to transition the CPU from the Hypervisor into the
> >LPAR post wakeup from platform idle state
> >
> > 2. Time taken to send the IPI from the source CPU (waker) to the idle
> >target CPU (wakee).
> >
> > 1. can be measured via timer idle test, where we queue a timer, say
> > for 1ms, and enter the CEDE state. When the timer fires, in the timer
> > handler we compute how much extra timer over the expected 1ms have we
> > consumed. On a a POWER9 LPAR the numbers are
> >
> > CEDE latency measured using a timer (numbers in ns)
> > N   Min  Median   Avg   90%ile  99%ileMaxStddev
> > 400 2601 5677 5668.7459176413 9299   455.01
> >
> > 1. and 2. combined can be determined by an IPI latency test where we
> > send an IPI to an idle CPU and in the handler compute the time
> > difference between when the IPI was sent and when the handler ran. We
> > see the following numbers on POWER9 LPAR.
> >
> > CEDE latency measured using an IPI (numbers in ns)
> > N   Min  Median   Avg   90%ile  99%ileMaxStddev
> > 400 711  7564 7369.43   85599514  9698   1200.01
> >
> > Suppose, we consider the 99th percentile latency value measured using
> > the IPI to be the wakeup latency, the value would be 9.5us This is in
> > the ballpark of the default value of 10us.
> >
> > Hence, use the exit latency of CEDE(0) based on the latency values
> > advertized by platform only from POWER10 onwards. The values
>^^^
> > advertized on POWER10 platforms is more realistic and informed by the
> > latency measurements. For earlier platforms stick to the default value
> > of 10us.
> 
> ...
> 
> > diff --git a/drivers/cpuidle/cpuidle-pseries.c 
> > b/drivers/cpuidle/cpuidle-pseries.c
> > index a2b5c6f..7207467 100644
> > --- a/drivers/cpuidle/cpuidle-pseries.c
> > +++ b/drivers/cpuidle/cpuidle-pseries.c
> > @@ -419,7 +419,8 @@ static int pseries_idle_probe(void)
> > cpuidle_state_table = shared_states;
> > max_idle_state = ARRAY_SIZE(shared_states);
> > } else {
> > -   fixup_cede0_latency();
> > +   if (pvr_version_is(PVR_POWER10))
> > +   fixup_cede0_latency();
> 
> A PVR check like that tests for *only* Power10, not Power10 and onwards
> as you say in the change log.

Right. The accurate thing would be to check not do the fix up for


!(PVR_POWER4 || PVR_POWER4p || POWER_POWER5 || PVR_POWER5p  || PVR_POWER6  || 
PVR_POWER7
 || PVR_POWER8  || PVR_POWER9)

But that was a bit mouthful. I will go with your suggestion (from
private correspondence)

if (cpu_has_feature(CPU_FTR_ARCH_31) || pvr_version_is(PVR_POWER10))
fixup_cede0_latency(); 

since it will allow the fixup for Processors suporting ISA 3.1
(POWER10 and above) and also on POWER10 CPUs running in compat mode.


> 
> The other question is what should happen on a Power10 LPAR that's
> running in Power9 compat mode. I assume in that case we *do* want to use
> the firmware provided values, because they're tied to the underlying
> CPU, not the compat mode?
>

Yes, the firmware provided values are tied to the underlying CPU. Not
the compat mode.


> In which case a check for !PVR_POWER9 would seem to achieve what we
> want?
> 
> cheers

--
Thanks and Regards
gautham.


Re: [PATCH v3] pseries/drmem: update LMBs after LPM

2021-04-29 Thread Laurent Dufour

Le 29/04/2021 à 12:27, Aneesh Kumar K.V a écrit :

Laurent Dufour  writes:


After a LPM, the device tree node ibm,dynamic-reconfiguration-memory may be
updated by the hypervisor in the case the NUMA topology of the LPAR's
memory is updated.

This is caught by the kernel, but the memory's node is updated because
there is no way to move a memory block between nodes.

If later a memory block is added or removed, drmem_update_dt() is called
and it is overwriting the DT node to match the added or removed LMB. But
the LMB's associativity node has not been updated after the DT node update
and thus the node is overwritten by the Linux's topology instead of the
hypervisor one.

Introduce a hook called when the ibm,dynamic-reconfiguration-memory node is
updated to force an update of the LMB's associativity.

Cc: Tyrel Datwyler 
Signed-off-by: Laurent Dufour 
---

V3:
  - Check rd->dn->name instead of rd->dn->full_name
V2:
  - Take Tyrel's idea to rely on OF_RECONFIG_UPDATE_PROPERTY instead of
  introducing a new hook mechanism.
---
  arch/powerpc/include/asm/drmem.h  |  1 +
  arch/powerpc/mm/drmem.c   | 35 +++
  .../platforms/pseries/hotplug-memory.c|  4 +++
  3 files changed, 40 insertions(+)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index bf2402fed3e0..4265d5e95c2c 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -111,6 +111,7 @@ int drmem_update_dt(void);
  int __init
  walk_drmem_lmbs_early(unsigned long node, void *data,
  int (*func)(struct drmem_lmb *, const __be32 **, void *));
+void drmem_update_lmbs(struct property *prop);
  #endif
  
  static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)

diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 9af3832c9d8d..f0a6633132af 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -307,6 +307,41 @@ int __init walk_drmem_lmbs_early(unsigned long node, void 
*data,
return ret;
  }
  
+/*

+ * Update the LMB associativity index.
+ */
+static int update_lmb(struct drmem_lmb *updated_lmb,
+ __maybe_unused const __be32 **usm,
+ __maybe_unused void *data)
+{
+   struct drmem_lmb *lmb;
+
+   /*
+* Brut force there may be better way to fetch the LMB
+*/
+   for_each_drmem_lmb(lmb) {
+   if (lmb->drc_index != updated_lmb->drc_index)
+   continue;
+
+   lmb->aa_index = updated_lmb->aa_index;
+   break;
+   }
+   return 0;
+}
+
+/*
+ * Update the LMB associativity index.
+ *
+ * This needs to be called when the hypervisor is updating the
+ * dynamic-reconfiguration-memory node property.
+ */
+void drmem_update_lmbs(struct property *prop)
+{
+   if (!strcmp(prop->name, "ibm,dynamic-memory"))
+   __walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
+   else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
+   __walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
+}
  #endif
  
  static int init_drmem_lmb_size(struct device_node *dn)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 8377f1f7c78e..672ffbee2e78 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -949,6 +949,10 @@ static int pseries_memory_notifier(struct notifier_block 
*nb,
case OF_RECONFIG_DETACH_NODE:
err = pseries_remove_mem_node(rd->dn);
break;
+   case OF_RECONFIG_UPDATE_PROPERTY:
+   if (!strcmp(rd->dn->name,
+   "ibm,dynamic-reconfiguration-memory"))
+   drmem_update_lmbs(rd->prop);
}
return notifier_from_errno(err);


How will this interact with DLPAR memory? When we dlpar memory,
ibm,configure-connector is used to fetch the new associativity details
and set drmem_lmb->aa_index correctly there. Once that is done kernel
then call drmem_update_dt() which will result in the above notifier
callback?


When a memory DLPAR operation is done, the in memory DT property 
"ibm,dynamic-memory" or "ibm,dynamic-memory-v2" (if existing) have to be updated 
to reflect the added/removed memory part. This is done by calling drmem_update_dt().


This patch is addressing the case where the hypervisor has updated the DT 
property mentioned above. In that case, the LMB tree should be updated so the 
aa_index fields are matching the DT one. This way the next time a memory DLPAR 
operation is done the DT properties "ibm,dynamic-memory" or 
"ibm,dynamic-memory-v2" will be rebuilt correctly.



IIUC, the call back then will update drmem_lmb->aa_index again?


drmem_update_dt() is not updating drmem_lmb->aa_index, that's the oppposite, it 
is rebuilding the in memory DT property "ibm,dynamic-memor

Re: [PATCH v3] pseries/drmem: update LMBs after LPM

2021-04-29 Thread Laurent Dufour

Le 29/04/2021 à 13:31, Laurent Dufour a écrit :

Le 29/04/2021 à 12:27, Aneesh Kumar K.V a écrit :

Laurent Dufour  writes:


After a LPM, the device tree node ibm,dynamic-reconfiguration-memory may be
updated by the hypervisor in the case the NUMA topology of the LPAR's
memory is updated.

This is caught by the kernel, but the memory's node is updated because
there is no way to move a memory block between nodes.

If later a memory block is added or removed, drmem_update_dt() is called
and it is overwriting the DT node to match the added or removed LMB. But
the LMB's associativity node has not been updated after the DT node update
and thus the node is overwritten by the Linux's topology instead of the
hypervisor one.

Introduce a hook called when the ibm,dynamic-reconfiguration-memory node is
updated to force an update of the LMB's associativity.

Cc: Tyrel Datwyler 
Signed-off-by: Laurent Dufour 
---

V3:
  - Check rd->dn->name instead of rd->dn->full_name
V2:
  - Take Tyrel's idea to rely on OF_RECONFIG_UPDATE_PROPERTY instead of
  introducing a new hook mechanism.
---
  arch/powerpc/include/asm/drmem.h  |  1 +
  arch/powerpc/mm/drmem.c   | 35 +++
  .../platforms/pseries/hotplug-memory.c    |  4 +++
  3 files changed, 40 insertions(+)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index bf2402fed3e0..4265d5e95c2c 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -111,6 +111,7 @@ int drmem_update_dt(void);
  int __init
  walk_drmem_lmbs_early(unsigned long node, void *data,
    int (*func)(struct drmem_lmb *, const __be32 **, void *));
+void drmem_update_lmbs(struct property *prop);
  #endif
  static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 9af3832c9d8d..f0a6633132af 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -307,6 +307,41 @@ int __init walk_drmem_lmbs_early(unsigned long node, 
void *data,

  return ret;
  }
+/*
+ * Update the LMB associativity index.
+ */
+static int update_lmb(struct drmem_lmb *updated_lmb,
+  __maybe_unused const __be32 **usm,
+  __maybe_unused void *data)
+{
+    struct drmem_lmb *lmb;
+
+    /*
+ * Brut force there may be better way to fetch the LMB
+ */
+    for_each_drmem_lmb(lmb) {
+    if (lmb->drc_index != updated_lmb->drc_index)
+    continue;
+
+    lmb->aa_index = updated_lmb->aa_index;
+    break;
+    }
+    return 0;
+}
+
+/*
+ * Update the LMB associativity index.
+ *
+ * This needs to be called when the hypervisor is updating the
+ * dynamic-reconfiguration-memory node property.
+ */
+void drmem_update_lmbs(struct property *prop)
+{
+    if (!strcmp(prop->name, "ibm,dynamic-memory"))
+    __walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
+    else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
+    __walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
+}
  #endif
  static int init_drmem_lmb_size(struct device_node *dn)
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c

index 8377f1f7c78e..672ffbee2e78 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -949,6 +949,10 @@ static int pseries_memory_notifier(struct notifier_block 
*nb,

  case OF_RECONFIG_DETACH_NODE:
  err = pseries_remove_mem_node(rd->dn);
  break;
+    case OF_RECONFIG_UPDATE_PROPERTY:
+    if (!strcmp(rd->dn->name,
+    "ibm,dynamic-reconfiguration-memory"))
+    drmem_update_lmbs(rd->prop);
  }
  return notifier_from_errno(err);


How will this interact with DLPAR memory? When we dlpar memory,
ibm,configure-connector is used to fetch the new associativity details
and set drmem_lmb->aa_index correctly there. Once that is done kernel
then call drmem_update_dt() which will result in the above notifier
callback?


When a memory DLPAR operation is done, the in memory DT property 
"ibm,dynamic-memory" or "ibm,dynamic-memory-v2" (if existing) have to be updated 
to reflect the added/removed memory part. This is done by calling 
drmem_update_dt().


This patch is addressing the case where the hypervisor has updated the DT 
property mentioned above. In that case, the LMB tree should be updated so the 
aa_index fields are matching the DT one. This way the next time a memory DLPAR 
operation is done the DT properties "ibm,dynamic-memory" or 
"ibm,dynamic-memory-v2" will be rebuilt correctly.



IIUC, the call back then will update drmem_lmb->aa_index again?


Oh I missed what you pointed out.
Please ignore my previous answer, I need to double check code.

drmem_update_dt() is not updating drmem_lmb->aa_index, that's the oppposite, it 
is rebuilding the in memory DT property "ibm,dynamic-memory" or 

[PATCH v3] cpuidle/pseries: Fixup CEDE0 latency only for POWER10 onwards

2021-04-29 Thread Gautham R. Shenoy
From: "Gautham R. Shenoy" 

Commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
CEDE(0)") sets the exit latency of CEDE(0) based on the latency values
of the Extended CEDE states advertised by the platform

On POWER9 LPARs, the firmwares advertise a very low value of 2us for
CEDE1 exit latency on a Dedicated LPAR. The latency advertized by the
PHYP hypervisor corresponds to the latency required to wakeup from the
underlying hardware idle state. However the wakeup latency from the
LPAR perspective should include

1. The time taken to transition the CPU from the Hypervisor into the
   LPAR post wakeup from platform idle state

2. Time taken to send the IPI from the source CPU (waker) to the idle
   target CPU (wakee).

1. can be measured via timer idle test, where we queue a timer, say
for 1ms, and enter the CEDE state. When the timer fires, in the timer
handler we compute how much extra timer over the expected 1ms have we
consumed. On a a POWER9 LPAR the numbers are

CEDE latency measured using a timer (numbers in ns)
N   Min  Median   Avg   90%ile  99%ileMaxStddev
400 2601 5677 5668.7459176413 9299   455.01

1. and 2. combined can be determined by an IPI latency test where we
send an IPI to an idle CPU and in the handler compute the time
difference between when the IPI was sent and when the handler ran. We
see the following numbers on POWER9 LPAR.

CEDE latency measured using an IPI (numbers in ns)
N   Min  Median   Avg   90%ile  99%ileMaxStddev
400 711  7564 7369.43   85599514  9698   1200.01

Suppose, we consider the 99th percentile latency value measured using
the IPI to be the wakeup latency, the value would be 9.5us This is in
the ballpark of the default value of 10us.

Hence, use the exit latency of CEDE(0) based on the latency values
advertized by platform only from POWER10 onwards. The values
advertized on POWER10 platforms is more realistic and informed by the
latency measurements. For earlier platforms stick to the default value
of 10us. The fix was suggested by Michael Ellerman.

Reported-by: Enrico Joedecke 
Fixes: commit d947fb4c965c ("cpuidle: pseries: Fixup exit latency for
CEDE(0)")
Cc: Michal Suchanek 
Cc: Vaidyanathan Srinivasan 
Signed-off-by: Gautham R. Shenoy 
---
v2-->v3: Modify the condition to preclude only the platforms prior to
 POWER10 from using the firmware provided values.
 
 drivers/cpuidle/cpuidle-pseries.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/drivers/cpuidle/cpuidle-pseries.c 
b/drivers/cpuidle/cpuidle-pseries.c
index a2b5c6f..694d71e 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -349,6 +349,15 @@ static void __init fixup_cede0_latency(void)
u64 min_latency_us;
int i;
 
+   /*
+* Use firmware provided latency values on POWER10 onwards and
+* also on POWER10 running in POWER9-compat mode. On platforms
+* prior to POWER10, we cannot rely on the firmware provided
+* values, so we go with the conservative default value.
+*/
+   if (!cpu_has_feature(CPU_FTR_ARCH_31) && !pvr_version_is(PVR_POWER10))
+   return;
+
min_latency_us = dedicated_states[1].exit_latency; // CEDE latency
 
if (parse_cede_parameters())
-- 
1.9.4



Re: [PATCH v3] pseries/drmem: update LMBs after LPM

2021-04-29 Thread Laurent Dufour

Le 29/04/2021 à 12:27, Aneesh Kumar K.V a écrit :

Laurent Dufour  writes:


After a LPM, the device tree node ibm,dynamic-reconfiguration-memory may be
updated by the hypervisor in the case the NUMA topology of the LPAR's
memory is updated.

This is caught by the kernel, but the memory's node is updated because
there is no way to move a memory block between nodes.

If later a memory block is added or removed, drmem_update_dt() is called
and it is overwriting the DT node to match the added or removed LMB. But
the LMB's associativity node has not been updated after the DT node update
and thus the node is overwritten by the Linux's topology instead of the
hypervisor one.

Introduce a hook called when the ibm,dynamic-reconfiguration-memory node is
updated to force an update of the LMB's associativity.

Cc: Tyrel Datwyler 
Signed-off-by: Laurent Dufour 
---

V3:
  - Check rd->dn->name instead of rd->dn->full_name
V2:
  - Take Tyrel's idea to rely on OF_RECONFIG_UPDATE_PROPERTY instead of
  introducing a new hook mechanism.
---
  arch/powerpc/include/asm/drmem.h  |  1 +
  arch/powerpc/mm/drmem.c   | 35 +++
  .../platforms/pseries/hotplug-memory.c|  4 +++
  3 files changed, 40 insertions(+)

diff --git a/arch/powerpc/include/asm/drmem.h b/arch/powerpc/include/asm/drmem.h
index bf2402fed3e0..4265d5e95c2c 100644
--- a/arch/powerpc/include/asm/drmem.h
+++ b/arch/powerpc/include/asm/drmem.h
@@ -111,6 +111,7 @@ int drmem_update_dt(void);
  int __init
  walk_drmem_lmbs_early(unsigned long node, void *data,
  int (*func)(struct drmem_lmb *, const __be32 **, void *));
+void drmem_update_lmbs(struct property *prop);
  #endif
  
  static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)

diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index 9af3832c9d8d..f0a6633132af 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -307,6 +307,41 @@ int __init walk_drmem_lmbs_early(unsigned long node, void 
*data,
return ret;
  }
  
+/*

+ * Update the LMB associativity index.
+ */
+static int update_lmb(struct drmem_lmb *updated_lmb,
+ __maybe_unused const __be32 **usm,
+ __maybe_unused void *data)
+{
+   struct drmem_lmb *lmb;
+
+   /*
+* Brut force there may be better way to fetch the LMB
+*/
+   for_each_drmem_lmb(lmb) {
+   if (lmb->drc_index != updated_lmb->drc_index)
+   continue;
+
+   lmb->aa_index = updated_lmb->aa_index;
+   break;
+   }
+   return 0;
+}
+
+/*
+ * Update the LMB associativity index.
+ *
+ * This needs to be called when the hypervisor is updating the
+ * dynamic-reconfiguration-memory node property.
+ */
+void drmem_update_lmbs(struct property *prop)
+{
+   if (!strcmp(prop->name, "ibm,dynamic-memory"))
+   __walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
+   else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
+   __walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
+}
  #endif
  
  static int init_drmem_lmb_size(struct device_node *dn)

diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 8377f1f7c78e..672ffbee2e78 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -949,6 +949,10 @@ static int pseries_memory_notifier(struct notifier_block 
*nb,
case OF_RECONFIG_DETACH_NODE:
err = pseries_remove_mem_node(rd->dn);
break;
+   case OF_RECONFIG_UPDATE_PROPERTY:
+   if (!strcmp(rd->dn->name,
+   "ibm,dynamic-reconfiguration-memory"))
+   drmem_update_lmbs(rd->prop);
}
return notifier_from_errno(err);


How will this interact with DLPAR memory? When we dlpar memory,
ibm,configure-connector is used to fetch the new associativity details
and set drmem_lmb->aa_index correctly there. Once that is done kernel
then call drmem_update_dt() which will result in the above notifier
callback?

IIUC, the call back then will update drmem_lmb->aa_index again?


Thanks for pointing this Aneesh,

You're right I missed that callback and it was quite invisible during my test 
because the value set back in the aa_index was the same.


When dmrem_update_dt() is called, there is no need to update the LMB back and 
the DT modify notifier should be ignored.


As DLPAR operations are serialized (by lock_device_hotplug()), I'm proposing to 
 rely on a boolean static variable to do skip this notification, like this:


diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
index f0a6633132af..3c0130720086 100644
--- a/arch/powerpc/mm/drmem.c
+++ b/arch/powerpc/mm/drmem.c
@@ -18,6 +18,7 @@ static int n_root_addr_cells, n_root_size_cells;

 static struct drmem_lmb_info __drmem_i

Re: [PATCH kernel v2] powerpc/iommu: Annotate nested lock for lockdep

2021-04-29 Thread Michael Ellerman
On Mon, 1 Mar 2021 17:36:53 +1100, Alexey Kardashevskiy wrote:
> The IOMMU table is divided into pools for concurrent mappings and each
> pool has a separate spinlock. When taking the ownership of an IOMMU group
> to pass through a device to a VM, we lock these spinlocks which triggers
> a false negative warning in lockdep (below).
> 
> This fixes it by annotating the large pool's spinlock as a nest lock
> which makes lockdep not complaining when locking nested locks if
> the nest lock is locked already.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/iommu: Annotate nested lock for lockdep
  https://git.kernel.org/powerpc/c/cc7130bf119add37f36238343a593b71ef6ecc1e

cheers


Re: [PATCH] powerpc/perf: Fix sampled instruction type for larx/stcx

2021-04-29 Thread Michael Ellerman
On Thu, 4 Mar 2021 06:55:37 -0500, Athira Rajeev wrote:
> Sampled Instruction Event Register (SIER) field [46:48]
> identifies the sampled instruction type. ISA v3.1 says value
> of 0b111 for this field as reserved, but in POWER10 it denotes
> LARX/STCX type which will hopefully be fixed in ISA v3.1 update.
> 
> Patch fixes the functions to handle type value 7 for
> CPU_FTR_ARCH_31.

Applied to powerpc/next.

[1/1] powerpc/perf: Fix sampled instruction type for larx/stcx
  https://git.kernel.org/powerpc/c/b4ded42268ee3d703da208278342b9901abe145a

cheers


Re: [PATCH] powerpc/perf: Fix the threshold event selection for memory events in power10

2021-04-29 Thread Michael Ellerman
On Thu, 4 Mar 2021 01:40:15 -0500, Athira Rajeev wrote:
> Memory events (mem-loads and mem-stores) currently use the threshold
> event selection as issue to finish. Power10 supports issue to complete
> as part of thresholding which is more appropriate for mem-loads and
> mem-stores. Hence fix the event code for memory events to use issue
> to complete.

Applied to powerpc/next.

[1/1] powerpc/perf: Fix the threshold event selection for memory events in 
power10
  https://git.kernel.org/powerpc/c/66d9b7492887d34c711bc05b36c22438acba51b4

cheers


Re: [PATCH kernel 0/2] powerpc/iommu: Stop crashing the host when VM is terminated

2021-04-29 Thread Michael Ellerman
On Tue, 16 Feb 2021 14:33:05 +1100, Alexey Kardashevskiy wrote:
> Killing a VM on a host under memory pressure kills a host which is
> annoying. 1/2 reduces the chances, 2/2 eliminates panic() on
> ioda2.
> 
> 
> This is based on sha1
> f40ddce88593 Linus Torvalds "Linux 5.11".
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc/iommu: Allocate it_map by vmalloc
  https://git.kernel.org/powerpc/c/7f1fa82d79947dfabb4046e1d787da9db2bc1c20
[2/2] powerpc/iommu: Do not immediately panic when failed IOMMU table allocation
  https://git.kernel.org/powerpc/c/4be518d838809e21354f32087aa9c26efc50b410

cheers


Re: [PATCH] powerpc/52xx: Fix an invalid ASM expression ('addi' used instead of 'add')

2021-04-29 Thread Michael Ellerman
On Wed, 21 Apr 2021 17:24:03 + (UTC), Christophe Leroy wrote:
>   AS  arch/powerpc/platforms/52xx/lite5200_sleep.o
> arch/powerpc/platforms/52xx/lite5200_sleep.S: Assembler messages:
> arch/powerpc/platforms/52xx/lite5200_sleep.S:184: Warning: invalid register 
> expression
> 
> In the following code, 'addi' is wrong, has to be 'add'
> 
>   /* local udelay in sram is needed */
>   udelay: /* r11 - tb_ticks_per_usec, r12 - usecs, overwrites r13 */
>   mullw   r12, r12, r11
>   mftbr13 /* start */
>   addir12, r13, r12 /* end */

Applied to powerpc/next.

[1/1] powerpc/52xx: Fix an invalid ASM expression ('addi' used instead of 'add')
  https://git.kernel.org/powerpc/c/8a87a507714386efc39c3ae6fa24d4f79846b522

cheers


Re: [PATCH] powerpc/kasan: Fix shadow start address with modules

2021-04-29 Thread Michael Ellerman
On Sat, 24 Apr 2021 10:34:43 + (UTC), Christophe Leroy wrote:
> Modules are now located before kernel, KASAN area has to
> be extended accordingly.

Applied to powerpc/next.

[1/1] powerpc/kasan: Fix shadow start address with modules
  https://git.kernel.org/powerpc/c/30c400886bad4ac1801516683b71d7714bc2b1b1

cheers


Re: [PATCH] powerpc/signal32: Fix erroneous SIGSEGV on RT signal return

2021-04-29 Thread Michael Ellerman
On Fri, 23 Apr 2021 13:52:10 + (UTC), Christophe Leroy wrote:
> Return of user_read_access_begin() is tested the wrong way,
> leading to a SIGSEGV when the user address is valid and likely
> an Oops when the user address is bad.
> 
> Fix the test.

Applied to powerpc/next.

[1/1] powerpc/signal32: Fix erroneous SIGSEGV on RT signal return
  https://git.kernel.org/powerpc/c/5256426247837feb8703625bda7fcfc824af04cf

cheers


Re: [PATCH v2 1/2] powerpc/64: Fix the definition of the fixmap area

2021-04-29 Thread Michael Ellerman
On Tue, 20 Apr 2021 13:32:48 + (UTC), Christophe Leroy wrote:
> At the time being, the fixmap area is defined at the top of
> the address space or just below KASAN.
> 
> This definition is not valid for PPC64.
> 
> For PPC64, use the top of the I/O space.
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc/64: Fix the definition of the fixmap area
  https://git.kernel.org/powerpc/c/9ccba66d4d2aff9a3909aa77d57ea8b7cc166f3c
[2/2] powerpc/legacy_serial: Use early_ioremap()
  https://git.kernel.org/powerpc/c/0bd3f9e953bd3636e73d296e9bed11a25c09c118

cheers


Re: [PATCH] powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants"

2021-04-29 Thread Michael Ellerman
On Wed, 16 Dec 2020 11:36:08 +, Colin King wrote:
> There is a spelling mistake in the Kconfig help text. Fix it.

Applied to powerpc/next.

[1/1] powerpc/44x: fix spelling mistake in Kconfig "varients" -> "variants"
  https://git.kernel.org/powerpc/c/ee6b25fa7c037e42cc5f3b5c024b2a779edab6dd

cheers


Re: [PATCH] selftests/powerpc: Add uaccess flush test

2021-04-29 Thread Michael Ellerman
On Thu, 25 Feb 2021 17:19:49 +1100, Daniel Axtens wrote:
> Also based on the RFI and entry flush tests, it counts the L1D misses
> by doing a syscall that does user access: uname, in this case.

Applied to powerpc/next.

[1/1] selftests/powerpc: Add uaccess flush test
  https://git.kernel.org/powerpc/c/da650ada100956b0f00aa4fe9ce33103378ce9ca

cheers


Re: [PATCH 1/1] powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs

2021-04-29 Thread Michael Ellerman
On Thu, 18 Mar 2021 14:44:14 -0300, Leonardo Bras wrote:
> Currently both iommu_alloc_coherent() and iommu_free_coherent() align the
> desired allocation size to PAGE_SIZE, and gets system pages and IOMMU
> mappings (TCEs) for that value.
> 
> When IOMMU_PAGE_SIZE < PAGE_SIZE, this behavior may cause unnecessary
> TCEs to be created for mapping the whole system page.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/kernel/iommu: Align size for IOMMU_PAGE_SIZE() to save TCEs
  https://git.kernel.org/powerpc/c/3c0468d4451eb6b4f6604370639f163f9637a479

cheers


Re: [PATCH 1/1] powerpc/kernel/iommu: Use largepool as a last resort when !largealloc

2021-04-29 Thread Michael Ellerman
On Thu, 18 Mar 2021 14:44:17 -0300, Leonardo Bras wrote:
> As of today, doing iommu_range_alloc() only for !largealloc (npages <= 15)
> will only be able to use 3/4 of the available pages, given pages on
> largepool  not being available for !largealloc.
> 
> This could mean some drivers not being able to fully use all the available
> pages for the DMA window.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/kernel/iommu: Use largepool as a last resort when !largealloc
  https://git.kernel.org/powerpc/c/fc5590fd56c9608f317729b59a56dad2a75d633f

cheers


Re: [PATCH 1/2] powerpc/fadump: Fix sparse warnings

2021-04-29 Thread Michael Ellerman
On Wed, 21 Apr 2021 22:54:01 +1000, Michael Ellerman wrote:
> Sparse says:
>   arch/powerpc/kernel/fadump.c:48:16: warning: symbol 'fadump_kobj' was not 
> declared. Should it be static?
>   arch/powerpc/kernel/fadump.c:55:27: warning: symbol 'crash_mrange_info' was 
> not declared. Should it be static?
>   arch/powerpc/kernel/fadump.c:61:27: warning: symbol 'reserved_mrange_info' 
> was not declared. Should it be static?
>   arch/powerpc/kernel/fadump.c:83:12: warning: symbol 'fadump_cma_init' was 
> not declared. Should it be static?
> 
> And indeed none of them are used outside this file, they can all be made
> static. Also fadump_kobj needs to be moved inside the ifdef where it's
> used.

Applied to powerpc/next.

[1/2] powerpc/fadump: Fix sparse warnings
  https://git.kernel.org/powerpc/c/2e341f56a16a71f240c87ec69711aad0d95a704c
[2/2] powerpc/powernv: Fix type of opal_mpipl_query_tag() addr argument
  https://git.kernel.org/powerpc/c/d936f8182e1bd18f5e9e6c5e8d8b69261200ca96

cheers


Re: [PATCH] powerpc/64s: Add FA_DUMP to defconfig

2021-04-29 Thread Michael Ellerman
On Tue, 20 Apr 2021 14:22:09 +1000, Michael Ellerman wrote:
> FA_DUMP (Firmware Assisted Dump) is a powerpc only feature that should
> be enabled in our defconfig to get some build / test coverage.

Applied to powerpc/next.

[1/1] powerpc/64s: Add FA_DUMP to defconfig
  https://git.kernel.org/powerpc/c/7d946276570755d6b53d29bd100271f18cb8bf95

cheers


Re: [PATCH] powerpc/configs: Add IBMVNIC to some 64-bit configs

2021-04-29 Thread Michael Ellerman
On Tue, 2 Mar 2021 13:09:54 +1100, Michael Ellerman wrote:
> This is an IBM specific driver that we should enable to get some
> build/boot testing.

Applied to powerpc/next.

[1/1] powerpc/configs: Add IBMVNIC to some 64-bit configs
  https://git.kernel.org/powerpc/c/421a7483878cf3f356ebb871effe81997a45dda7

cheers


Re: [PATCH] powerpc/kvm: Fix build error when PPC_MEM_KEYS/PPC_PSERIES=n

2021-04-29 Thread Michael Ellerman
On Sun, 25 Apr 2021 21:58:31 +1000, Michael Ellerman wrote:
> lkp reported a randconfig failure:
> 
>  In file included from arch/powerpc/include/asm/book3s/64/pkeys.h:6,
> from arch/powerpc/kvm/book3s_64_mmu_host.c:15:
>  arch/powerpc/include/asm/book3s/64/hash-pkey.h: In function 
> 'hash__vmflag_to_pte_pkey_bits':
>   >> arch/powerpc/include/asm/book3s/64/hash-pkey.h:10:23: error: 
> 'VM_PKEY_BIT0' undeclared
> 10 |  return (((vm_flags & VM_PKEY_BIT0) ? H_PTE_PKEY_BIT0 : 0x0UL) |
>  |   ^~~~
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/kvm: Fix build error when PPC_MEM_KEYS/PPC_PSERIES=n
  https://git.kernel.org/powerpc/c/ee1bc694fbaec1a662770703fc34a74abf418938

cheers


Re: [PATCH] powerpc/64s: Fix mm_cpumask memory ordering comment

2021-04-29 Thread Michael Ellerman
On Thu, 22 Apr 2021 01:17:32 +1000, Nicholas Piggin wrote:
> The memory ordering comment no longer applies, because mm_ctx_id is
> no longer used anywhere. At best always been difficult to follow.
> 
> It's better to consider the load on which the slbmte depends on, which
> the MMU depends on before it can start loading TLBs, rather than a
> store which may or may not have a subsequent dependency chain to the
> slbmte.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/64s: Fix mm_cpumask memory ordering comment
  https://git.kernel.org/powerpc/c/0f197ddce403af33aa7f15af55644549778a9988

cheers


Re: [PATCH v3] powerpc: make ALTIVEC select PPC_FPU

2021-04-29 Thread Michael Ellerman
On Wed, 21 Apr 2021 14:06:47 -0700, Randy Dunlap wrote:
> On a kernel config with ALTIVEC=y and PPC_FPU not set/enabled,
> there are build errors:
> 
> drivers/cpufreq/pmac32-cpufreq.c:262:2: error: implicit declaration of 
> function 'enable_kernel_fp' [-Werror,-Wimplicit-function-declaration]
>enable_kernel_fp();
> ../arch/powerpc/lib/sstep.c: In function 'do_vec_load':
> ../arch/powerpc/lib/sstep.c:637:3: error: implicit declaration of function 
> 'put_vr' [-Werror=implicit-function-declaration]
>   637 |   put_vr(rn, &u.v);
>   |   ^~
> ../arch/powerpc/lib/sstep.c: In function 'do_vec_store':
> ../arch/powerpc/lib/sstep.c:660:3: error: implicit declaration of function 
> 'get_vr'; did you mean 'get_oc'? [-Werror=implicit-function-declaration]
>   660 |   get_vr(rn, &u.v);
>   |   ^~
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: make ALTIVEC select PPC_FPU
  https://git.kernel.org/powerpc/c/389586333c0229a4fbc5c1a7f89148d141293682

cheers


Re: [PATCH] powerpc: Avoid clang uninitialized warning in __get_user_size_allowed

2021-04-29 Thread Michael Ellerman
On Mon, 26 Apr 2021 13:35:18 -0700, Nathan Chancellor wrote:
> Commit 9975f852ce1b ("powerpc/uaccess: Remove calls to __get_user_bad()
> and __put_user_bad()") switch to BUILD_BUG() in the default case, which
> leaves x uninitialized. This will not be an issue because the build will
> be broken in that case but clang does static analysis before it realizes
> the default case will be done so it warns about x being uninitialized
> (trimmed for brevity):
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Avoid clang uninitialized warning in __get_user_size_allowed
  https://git.kernel.org/powerpc/c/f9cd5f91a897ea0c45d0059ceeb091cee78c6ebe

cheers


Re: [PATCH v3 0/4] powerpc/selftests: Add Power10 2nd DAWR selftests

2021-04-29 Thread Michael Ellerman
On Mon, 12 Apr 2021 16:52:14 +0530, Ravi Bangoria wrote:
> Power10 introduced 2nd watchpoint (DAWR). ISA 3.1, Book 3S, Ch 9 -
> 'Debug Facilities' covers the feature in detail. Kernel patches to
> enable the 2nd DAWR are already in[1], including kvm enablement[2].
> These patches adds selftests for 2nd DAWR.
> 
> [1]: 
> https://git.kernel.org/torvalds/c/deb2bd9bcc8428d4b65b6ba640ba8b57c1b20b17
> [2]: 
> https://git.kernel.org/torvalds/c/bd1de1a0e6eff4bde5ceae969673b85b8446fd6a
> 
> [...]

Applied to powerpc/next.

[1/4] powerpc/selftests/ptrace-hwbreak: Add testcases for 2nd DAWR
  https://git.kernel.org/powerpc/c/dae4ff8031b49af4721101d6298fc14cb9c16a4c
[2/4] powerpc/selftests/perf-hwbreak: Coalesce event creation code
  https://git.kernel.org/powerpc/c/c9cb0afb4eaa03801322f48dad4093979ff45e88
[3/4] powerpc/selftests/perf-hwbreak: Add testcases for 2nd DAWR
  https://git.kernel.org/powerpc/c/c65c64cc7bbd273121edf96a7a5a0269038ab454
[4/4] powerpc/selftests: Add selftest to test concurrent perf/ptrace events
  https://git.kernel.org/powerpc/c/290f7d8ce2b1eea5413bb120e0d9d610675b7fba

cheers


Re: [PATCH] powerpc/papr_scm: Mark nvdimm as unarmed if needed during probe

2021-04-29 Thread Michael Ellerman
On Mon, 29 Mar 2021 17:01:03 +0530, Vaibhav Jain wrote:
> In case an nvdimm is found to be unarmed during probe then set its
> NDD_UNARMED flag before nvdimm_create(). This would enforce a
> read-only access to the ndimm region. Presently even if an nvdimm is
> unarmed its not marked as read-only on ppc64 guests.
> 
> The patch updates papr_scm_nvdimm_init() to force query of nvdimm
> health via __drc_pmem_query_health() and if nvdimm is found to be
> unarmed then set the nvdimm flag ND_UNARMED for nvdimm_create().

Applied to powerpc/next.

[1/1] powerpc/papr_scm: Mark nvdimm as unarmed if needed during probe
  https://git.kernel.org/powerpc/c/adb68c38d8d49a3d60805479c558649bb2182473

cheers


Re: [PATCH] powerpc/64s: remove unneeded semicolon

2021-04-29 Thread Michael Ellerman
On Tue, 2 Feb 2021 11:34:36 +0800, Yang Li wrote:
> Eliminate the following coccicheck warning:
> ./arch/powerpc/platforms/powernv/setup.c:160:2-3: Unneeded semicolon

Applied to powerpc/next.

[1/1] powerpc/64s: remove unneeded semicolon
  https://git.kernel.org/powerpc/c/caea7b833d866e0badf4b12dc41bf9fe6a7295f3

cheers


Re: [PATCH] powerpc/eeh: remove unneeded semicolon

2021-04-29 Thread Michael Ellerman
On Tue, 2 Feb 2021 11:21:36 +0800, Yang Li wrote:
> Eliminate the following coccicheck warning:
> ./arch/powerpc/kernel/eeh.c:782:2-3: Unneeded semicolon

Applied to powerpc/next.

[1/1] powerpc/eeh: remove unneeded semicolon
  https://git.kernel.org/powerpc/c/f3d03fc748d4e48f4cd8dea1bfeb173cb3b0c19f

cheers


Re: [PATCH] selftests/powerpc: remove unneeded semicolon

2021-04-29 Thread Michael Ellerman
On Mon, 8 Feb 2021 18:41:10 +0800, Yang Li wrote:
> Eliminate the following coccicheck warning:
> ./tools/testing/selftests/powerpc/nx-gzip/gzfht_test.c:327:4-5: Unneeded
> semicolon

Applied to powerpc/next.

[1/1] selftests/powerpc: remove unneeded semicolon
  https://git.kernel.org/powerpc/c/0db11461677aa5105b9ebbd939aee0ceb77a988b

cheers


Re: PPC32: Boot regression on Nintendo Wii, after create_branch rework in 5.8

2021-04-29 Thread Christophe Leroy

Hi,

Le 28/04/2021 à 20:14, Jonathan Neuschäfer a écrit :

Hi,

On Wed, Apr 28, 2021 at 11:33:24AM +1000, Jordan Niethe wrote:

On Mon, Apr 26, 2021 at 1:40 AM Jonathan Neuschäfer
 wrote:


Hi,

I recently booted my Wii again, and I noticed a regression at boot time.
Output stops after the "Finalizing device tree... flat tree at 0xXX"
message. I bisected it to this commit in the 5.8 development cycle:

commit 7c95d8893fb55869882c9f68f4c94840dc43f18f
Author: Jordan Niethe 
Date:   Wed May 6 13:40:25 2020 +1000

 powerpc: Change calling convention for create_branch() et. al.

[...]

Do you have any hints on how to debug and/or fix this issue?

Thanks for bisecting and reporting.
The "Finalizing device tree... flat tree at 0xXX" message comes
from the bootwrapper so if that is the last output it must be crashing
pretty early.
Commit 7c95d8893fb5 ("powerpc: Change calling convention for
create_branch() et. al.") made a change to machine_init() in
setup_32.c which seems like it might be a likely culprit for causing
early crashing.
The branch that is created and patched is just for optimization, so to
see if that is in fact the problem it might be worth trying to boot
with a patch like below

diff --git a/arch/powerpc/kernel/setup_32.c b/arch/powerpc/kernel/setup_32.c
--- a/arch/powerpc/kernel/setup_32.c
+++ b/arch/powerpc/kernel/setup_32.c
@@ -87,9 +87,6 @@ notrace void __init machine_init(u64 dt_ptr)

 patch_instruction_site(&patch__memcpy_nocache, ppc_inst(PPC_INST_NOP));

-   create_cond_branch(&insn, addr, branch_target(addr), 0x82);
-   patch_instruction(addr, insn);  /* replace b by bne cr0 */
-
 /* Do some early initialization based on the flat device tree */
 early_init_devtree(__va(dt_ptr));


This brings no improvement, unfortunately. The output is still:

top of MEM2 @ 13F0

zImage starting: loaded at 0x00b0 (sp: 0x01145f90)
Allocating 0xae3970 bytes for kernel...
Decompressing (0x <- 0x00b11000:0x01143576)...
Done! Decompressed 0xa65fdc bytes

Linux/PowerPC load: root=/dev/mmcblk0p2 rootwait console=usbgecko1
Finalizing device tree... flat tree at 0x11467a0


I'll probably dig deeper next weekend.



I think the problem is when calling apply_feature_fixups() from early_init().

At that time, code is not relocated yet and 'current' is not set up yet.

You probably have CONFIG_STACKPROTECTOR=y

Before this patch, do_feature_fixups() was a simple fonction that was not using the stack for 
storing data. But that patch changed it because it addresses the 'instr' by reference so it can't go 
in a general reg anymore, it goes into the stack.


So GCC sets up a stack guard:

 :
   0:   7c 04 28 40 cmplw   r4,r5
   4:   94 21 ff b0 stwur1,-80(r1)
   8:   81 22 02 28 lwz r9,552(r2)  <= r2 is not set up yet
   c:   91 21 00 1c stw r9,28(r1)
...
 180:   81 21 00 1c lwz r9,28(r1)
 184:   81 42 02 28 lwz r10,552(r2)
 188:   7d 29 52 79 xor.r9,r9,r10
 18c:   39 40 00 00 li  r10,0
 190:   40 82 00 84 bne 214 
 194:   38 21 00 50 addir1,r1,80
 198:   4e 80 00 20 blr
...
 214:   7c 08 02 a6 mflrr0
 218:   90 01 00 54 stw r0,84(r1)
 21c:   92 e1 00 2c stw r23,44(r1)
 220:   93 01 00 30 stw r24,48(r1)
 224:   93 21 00 34 stw r25,52(r1)
 228:   93 41 00 38 stw r26,56(r1)
 22c:   93 61 00 3c stw r27,60(r1)
 230:   93 81 00 40 stw r28,64(r1)
 234:   93 a1 00 44 stw r29,68(r1)
 238:   93 c1 00 48 stw r30,72(r1)
 23c:   93 e1 00 4c stw r31,76(r1)
 240:   48 00 00 01 bl  240 
240: R_PPC_REL24__stack_chk_fail

So all feature fixup and code patching stuff it uses needs to be protected 
against the stack protector.

By the way, I also see some printk in do_feature_fixups(). That won't work either because of the 
relocation, the string won't be found. But that's not the problem you have at the moment.


Christophe


[powerpc:next-test 263/291] arch/powerpc/platforms/powernv/opal-fadump.c:683:60: sparse: sparse: incorrect type in argument 2 (different base types)

2021-04-29 Thread kernel test robot
tree:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next-test
head:   5256426247837feb8703625bda7fcfc824af04cf
commit: d936f8182e1bd18f5e9e6c5e8d8b69261200ca96 [263/291] powerpc/powernv: Fix 
type of opal_mpipl_query_tag() addr argument
config: powerpc64-randconfig-s031-20210429 (attached as .config)
compiler: powerpc64le-linux-gcc (GCC) 9.3.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# apt-get install sparse
# sparse version: v0.6.3-341-g8af24329-dirty
# 
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id=d936f8182e1bd18f5e9e6c5e8d8b69261200ca96
git remote add powerpc 
https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
git fetch --no-tags powerpc next-test
git checkout d936f8182e1bd18f5e9e6c5e8d8b69261200ca96
# save the attached .config to linux build tree
COMPILER_INSTALL_PATH=$HOME/0day COMPILER=gcc-9.3.0 make.cross C=1 
CF='-fdiagnostic-prefix -D__CHECK_ENDIAN__' W=1 ARCH=powerpc64 

If you fix the issue, kindly add following tag as appropriate
Reported-by: kernel test robot 


sparse warnings: (new ones prefixed by >>)
   arch/powerpc/platforms/powernv/opal-fadump.c:104:41: sparse: sparse: 
incorrect type in assignment (different base types) @@ expected unsigned 
long long [usertype] boot_mem_dest_addr @@ got restricted __be64 const 
[usertype] dest @@
   arch/powerpc/platforms/powernv/opal-fadump.c:104:41: sparse: expected 
unsigned long long [usertype] boot_mem_dest_addr
   arch/powerpc/platforms/powernv/opal-fadump.c:104:41: sparse: got 
restricted __be64 const [usertype] dest
   arch/powerpc/platforms/powernv/opal-fadump.c:130:22: sparse: sparse: 
incorrect type in assignment (different base types) @@ expected unsigned 
long base @@ got restricted __be64 const [usertype] src @@
   arch/powerpc/platforms/powernv/opal-fadump.c:130:22: sparse: expected 
unsigned long base
   arch/powerpc/platforms/powernv/opal-fadump.c:130:22: sparse: got 
restricted __be64 const [usertype] src
   arch/powerpc/platforms/powernv/opal-fadump.c:131:22: sparse: sparse: 
incorrect type in assignment (different base types) @@ expected unsigned 
long size @@ got restricted __be64 const [usertype] size @@
   arch/powerpc/platforms/powernv/opal-fadump.c:131:22: sparse: expected 
unsigned long size
   arch/powerpc/platforms/powernv/opal-fadump.c:131:22: sparse: got 
restricted __be64 const [usertype] size
   arch/powerpc/platforms/powernv/opal-fadump.c:146:46: sparse: sparse: 
incorrect type in assignment (different base types) @@ expected unsigned 
long reserve_dump_area_start @@ got restricted __be64 const [usertype] dest 
@@
   arch/powerpc/platforms/powernv/opal-fadump.c:146:46: sparse: expected 
unsigned long reserve_dump_area_start
   arch/powerpc/platforms/powernv/opal-fadump.c:146:46: sparse: got 
restricted __be64 const [usertype] dest
   arch/powerpc/platforms/powernv/opal-fadump.c:196:41: sparse: sparse: 
incorrect type in assignment (different base types) @@ expected restricted 
__be64 [usertype] src @@ got unsigned long long @@
   arch/powerpc/platforms/powernv/opal-fadump.c:196:41: sparse: expected 
restricted __be64 [usertype] src
   arch/powerpc/platforms/powernv/opal-fadump.c:196:41: sparse: got 
unsigned long long
   arch/powerpc/platforms/powernv/opal-fadump.c:197:41: sparse: sparse: 
incorrect type in assignment (different base types) @@ expected restricted 
__be64 [usertype] dest @@ got unsigned long long [usertype] addr @@
   arch/powerpc/platforms/powernv/opal-fadump.c:197:41: sparse: expected 
restricted __be64 [usertype] dest
   arch/powerpc/platforms/powernv/opal-fadump.c:197:41: sparse: got 
unsigned long long [usertype] addr
   arch/powerpc/platforms/powernv/opal-fadump.c:198:41: sparse: sparse: 
incorrect type in assignment (different base types) @@ expected restricted 
__be64 [usertype] size @@ got unsigned long long @@
   arch/powerpc/platforms/powernv/opal-fadump.c:198:41: sparse: expected 
restricted __be64 [usertype] size
   arch/powerpc/platforms/powernv/opal-fadump.c:198:41: sparse: got 
unsigned long long
   arch/powerpc/platforms/powernv/opal-fadump.c:208:53: sparse: sparse: 
restricted __be64 degrades to integer
   arch/powerpc/platforms/powernv/opal-fadump.c:276:56: sparse: sparse: 
incorrect type in argument 2 (different base types) @@ expected unsigned 
long long [usertype] src @@ got restricted __be64 [usertype] src @@
   arch/powerpc/platforms/powernv/opal-fadump.c:276:56: sparse: expected 
unsigned long long [usertype] src
   arch/powerpc/platforms/powernv/opal-fadump.c:276:56: sparse: got 
restricted __be64 [usertype] src
   arch/powerpc/platforms/powernv/opal-fadump.c:277:56: sparse: sparse: 
incorrect 

[PATCH] powerpc/32: Fix boot failure with CONFIG_STACKPROTECTOR

2021-04-29 Thread Christophe Leroy
Commit 7c95d8893fb5 ("powerpc: Change calling convention for
create_branch() et. al.") complexified the frame of function
do_feature_fixups(), leading to GCC setting up a stack
guard when CONFIG_STACKPROTECTOR is selected.

The problem is that do_feature_fixups() is called very early
while 'current' in r2 is not set up yet and the code is still
not at the final address used at link time.

So, like other instrumentation, stack protection needs to be
deactivated for feature-fixups.c and code-patching.c

Reported-by: Jonathan Neuschaefer 
Fixes: 7c95d8893fb5 ("powerpc: Change calling convention for create_branch() 
et. al.")
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/lib/Makefile | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index f2c690ee75d1..cc1a8a0f311e 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -5,6 +5,9 @@
 
 ccflags-$(CONFIG_PPC64):= $(NO_MINIMAL_TOC)
 
+CFLAGS_code-patching.o += -fno-stack-protector
+CFLAGS_feature-fixups.o += -fno-stack-protector
+
 CFLAGS_REMOVE_code-patching.o = $(CC_FLAGS_FTRACE)
 CFLAGS_REMOVE_feature-fixups.o = $(CC_FLAGS_FTRACE)
 
-- 
2.25.0



[PATCH v5] pseries: prevent free CPU ids to be reused on another node

2021-04-29 Thread Laurent Dufour
When a CPU is hot added, the CPU ids are taken from the available mask from
the lower possible set. If that set of values was previously used for CPU
attached to a different node, this seems to application like if these CPUs
have migrated from a node to another one which is not expected in real
life.

To prevent this, it is needed to record the CPU ids used for each node and
to not reuse them on another node. However, to prevent CPU hot plug to
fail, in the case the CPU ids is starved on a node, the capability to reuse
other nodes’ free CPU ids is kept. A warning is displayed in such a case
to warn the user.

A new CPU bit mask (node_recorded_ids_map) is introduced for each possible
node. It is populated with the CPU onlined at boot time, and then when a
CPU is hot plug to a node. The bits in that mask remain when the CPU is hot
unplugged, to remind this CPU ids have been used for this node.

If no id set was found, a retry is made without removing the ids used on
the other nodes to try reusing them. This is the way ids have been
allocated prior to this patch.

The effect of this patch can be seen by removing and adding CPUs using the
Qemu monitor. In the following case, the first CPU from the node 2 is
removed, then the first one from the node 1 is removed too. Later, the
first CPU of the node 2 is added back. Without that patch, the kernel will
numbered these CPUs using the first CPU ids available which are the ones
freed when removing the second CPU of the node 0. This leads to the CPU ids
16-23 to move from the node 1 to the node 2. With the patch applied, the
CPU ids 32-39 are used since they are the lowest free ones which have not
been used on another node.

At boot time:
[root@vm40 ~]# numactl -H | grep cpus
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
node 1 cpus: 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
node 2 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

Vanilla kernel, after the CPU hot unplug/plug operations:
[root@vm40 ~]# numactl -H | grep cpus
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
node 1 cpus: 24 25 26 27 28 29 30 31
node 2 cpus: 16 17 18 19 20 21 22 23 40 41 42 43 44 45 46 47

Patched kernel, after the CPU hot unplug/plug operations:
[root@vm40 ~]# numactl -H | grep cpus
node 0 cpus: 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
node 1 cpus: 24 25 26 27 28 29 30 31
node 2 cpus: 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47

Signed-off-by: Laurent Dufour 
---
V5:
 - Rework code structure
 - Reintroduce the capability to reuse other node's ids.
V4: addressing Nathan's comment
 - Rename the local variable named 'nid' into 'assigned_node'
V3: addressing Nathan's comments
 - Remove the retry feature
 - Reduce the number of local variables (removing 'i')
 - Add comment about the cpu_add_remove_lock protecting the added CPU mask.
 V2: (no functional changes)
 - update the test's output in the commit's description
 - node_recorded_ids_map should be static
---
 arch/powerpc/platforms/pseries/hotplug-cpu.c | 171 ++-
 1 file changed, 132 insertions(+), 39 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c 
b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 7e970f81d8ff..e1f224320102 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -39,6 +39,12 @@
 /* This version can't take the spinlock, because it never returns */
 static int rtas_stop_self_token = RTAS_UNKNOWN_SERVICE;
 
+/*
+ * Record the CPU ids used on each nodes.
+ * Protected by cpu_add_remove_lock.
+ */
+static cpumask_var_t node_recorded_ids_map[MAX_NUMNODES];
+
 static void rtas_stop_self(void)
 {
static struct rtas_args args;
@@ -139,72 +145,148 @@ static void pseries_cpu_die(unsigned int cpu)
paca_ptrs[cpu]->cpu_start = 0;
 }
 
+/**
+ * find_cpu_id_range - found a linear ranger of @nthreads free CPU ids.
+ * @nthreads : the number of threads (cpu ids)
+ * @assigned_node : the node it belongs to or NUMA_NO_NODE if free ids from any
+ *  node can be peek.
+ * @cpu_mask: the returned CPU mask.
+ *
+ * Returns 0 on success.
+ */
+static int find_cpu_id_range(unsigned int nthreads, int assigned_node,
+cpumask_var_t *cpu_mask)
+{
+   cpumask_var_t candidate_mask;
+   unsigned int cpu, node;
+   int rc = -ENOSPC;
+
+   if (!zalloc_cpumask_var(&candidate_mask, GFP_KERNEL))
+   return -ENOMEM;
+
+   cpumask_clear(*cpu_mask);
+   for (cpu = 0; cpu < nthreads; cpu++)
+   cpumask_set_cpu(cpu, *cpu_mask);
+
+   BUG_ON(!cpumask_subset(cpu_present_mask, cpu_possible_mask));
+
+   /* Get a bitmap of unoccupied slots. */
+   cpumask_xor(candidate_mask, cpu_possible_mask, cpu_present_mask);
+
+   if (assigned_node != NUMA_NO_NODE) {
+   /*
+* Remove free ids previously assigned on the other nodes. We
+* can walk only online nodes because once a node became online

Re: [PATCH] powerpc/32: Fix boot failure with CONFIG_STACKPROTECTOR

2021-04-29 Thread Jonathan Neuschäfer
On Thu, Apr 29, 2021 at 04:52:09PM +, Christophe Leroy wrote:
> Commit 7c95d8893fb5 ("powerpc: Change calling convention for
> create_branch() et. al.") complexified the frame of function
> do_feature_fixups(), leading to GCC setting up a stack
> guard when CONFIG_STACKPROTECTOR is selected.
> 
> The problem is that do_feature_fixups() is called very early
> while 'current' in r2 is not set up yet and the code is still
> not at the final address used at link time.
> 
> So, like other instrumentation, stack protection needs to be
> deactivated for feature-fixups.c and code-patching.c
> 
> Reported-by: Jonathan Neuschaefer 
> Fixes: 7c95d8893fb5 ("powerpc: Change calling convention for create_branch() 
> et. al.")
> Signed-off-by: Christophe Leroy 
> ---

Thank you for looking into this issue. This patch does indeed fix my
issue.

Tested-by: Jonathan Neuschaefer 


Thanks again,
Jonathan


signature.asc
Description: PGP signature


[PATCH] ppc64/numa: consider the max numa node for migratable LPAR

2021-04-29 Thread Laurent Dufour
When a LPAR is migratable, we should consider the maximum possible NUMA
node instead the number of NUMA node from the actual system.

The DT property 'ibm,current-associativity-domains' is defining the maximum
number of nodes the LPAR can see when running on that box. But if the LPAR
is being migrated on another box, it may seen up to the nodes defined by
'ibm,max-associativity-domains'. So if a LPAR is migratable, that value
should be used.

Unfortunately, there is no easy way to know if a LPAR is migratable or
not. The hypervisor is exporting the property 'ibm,migratable-partition' in
the case it set to migrate partition, but that would not mean that the
current partition is migratable.

Without that patch, when a LPAR is started on a 2 nodes box and then
migrated to a 3 nodes box, the hypervisor may spread the LPAR's CPUs on the
3rd node. In that case if a CPU from that 3rd node is added to the LPAR, it
will be wrongly assigned to the node because the kernel has been set to use
up to 2 nodes (the configuration of the departure node). With that patch
applies, the CPU is correctly added to the 3rd node.

Cc: Srikar Dronamraju 
Signed-off-by: Laurent Dufour 
---
 arch/powerpc/mm/numa.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index f2bf98bdcea2..673fa6e47850 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -893,7 +893,7 @@ static void __init setup_node_data(int nid, u64 start_pfn, 
u64 end_pfn)
 static void __init find_possible_nodes(void)
 {
struct device_node *rtas;
-   const __be32 *domains;
+   const __be32 *domains = NULL;
int prop_length, max_nodes;
u32 i;
 
@@ -909,9 +909,14 @@ static void __init find_possible_nodes(void)
 * it doesn't exist, then fallback on ibm,max-associativity-domains.
 * Current denotes what the platform can support compared to max
 * which denotes what the Hypervisor can support.
+*
+* If the LPAR is migratable, new nodes might be activated after a LPM,
+* so we should consider the max number in that case.
 */
-   domains = of_get_property(rtas, "ibm,current-associativity-domains",
-   &prop_length);
+   if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
+   domains = of_get_property(rtas,
+ "ibm,current-associativity-domains",
+ &prop_length);
if (!domains) {
domains = of_get_property(rtas, "ibm,max-associativity-domains",
&prop_length);
@@ -920,6 +925,9 @@ static void __init find_possible_nodes(void)
}
 
max_nodes = of_read_number(&domains[min_common_depth], 1);
+   printk(KERN_INFO "Partition configured for %d NUMA nodes.\n",
+  max_nodes);
+
for (i = 0; i < max_nodes; i++) {
if (!node_possible(i))
node_set(i, node_possible_map);
-- 
2.31.1



Re: [PATCH v3] pseries/drmem: update LMBs after LPM

2021-04-29 Thread Tyrel Datwyler
On 4/29/21 3:27 AM, Aneesh Kumar K.V wrote:
> Laurent Dufour  writes:
> 
>> After a LPM, the device tree node ibm,dynamic-reconfiguration-memory may be
>> updated by the hypervisor in the case the NUMA topology of the LPAR's
>> memory is updated.
>>
>> This is caught by the kernel, but the memory's node is updated because
>> there is no way to move a memory block between nodes.
>>
>> If later a memory block is added or removed, drmem_update_dt() is called
>> and it is overwriting the DT node to match the added or removed LMB. But
>> the LMB's associativity node has not been updated after the DT node update
>> and thus the node is overwritten by the Linux's topology instead of the
>> hypervisor one.
>>
>> Introduce a hook called when the ibm,dynamic-reconfiguration-memory node is
>> updated to force an update of the LMB's associativity.
>>
>> Cc: Tyrel Datwyler 
>> Signed-off-by: Laurent Dufour 
>> ---
>>
>> V3:
>>  - Check rd->dn->name instead of rd->dn->full_name
>> V2:
>>  - Take Tyrel's idea to rely on OF_RECONFIG_UPDATE_PROPERTY instead of
>>  introducing a new hook mechanism.
>> ---
>>  arch/powerpc/include/asm/drmem.h  |  1 +
>>  arch/powerpc/mm/drmem.c   | 35 +++
>>  .../platforms/pseries/hotplug-memory.c|  4 +++
>>  3 files changed, 40 insertions(+)
>>
>> diff --git a/arch/powerpc/include/asm/drmem.h 
>> b/arch/powerpc/include/asm/drmem.h
>> index bf2402fed3e0..4265d5e95c2c 100644
>> --- a/arch/powerpc/include/asm/drmem.h
>> +++ b/arch/powerpc/include/asm/drmem.h
>> @@ -111,6 +111,7 @@ int drmem_update_dt(void);
>>  int __init
>>  walk_drmem_lmbs_early(unsigned long node, void *data,
>>int (*func)(struct drmem_lmb *, const __be32 **, void *));
>> +void drmem_update_lmbs(struct property *prop);
>>  #endif
>>  
>>  static inline void invalidate_lmb_associativity_index(struct drmem_lmb *lmb)
>> diff --git a/arch/powerpc/mm/drmem.c b/arch/powerpc/mm/drmem.c
>> index 9af3832c9d8d..f0a6633132af 100644
>> --- a/arch/powerpc/mm/drmem.c
>> +++ b/arch/powerpc/mm/drmem.c
>> @@ -307,6 +307,41 @@ int __init walk_drmem_lmbs_early(unsigned long node, 
>> void *data,
>>  return ret;
>>  }
>>  
>> +/*
>> + * Update the LMB associativity index.
>> + */
>> +static int update_lmb(struct drmem_lmb *updated_lmb,
>> +  __maybe_unused const __be32 **usm,
>> +  __maybe_unused void *data)
>> +{
>> +struct drmem_lmb *lmb;
>> +
>> +/*
>> + * Brut force there may be better way to fetch the LMB
>> + */
>> +for_each_drmem_lmb(lmb) {
>> +if (lmb->drc_index != updated_lmb->drc_index)
>> +continue;
>> +
>> +lmb->aa_index = updated_lmb->aa_index;
>> +break;
>> +}
>> +return 0;
>> +}
>> +
>> +/*
>> + * Update the LMB associativity index.
>> + *
>> + * This needs to be called when the hypervisor is updating the
>> + * dynamic-reconfiguration-memory node property.
>> + */
>> +void drmem_update_lmbs(struct property *prop)
>> +{
>> +if (!strcmp(prop->name, "ibm,dynamic-memory"))
>> +__walk_drmem_v1_lmbs(prop->value, NULL, NULL, update_lmb);
>> +else if (!strcmp(prop->name, "ibm,dynamic-memory-v2"))
>> +__walk_drmem_v2_lmbs(prop->value, NULL, NULL, update_lmb);
>> +}
>>  #endif
>>  
>>  static int init_drmem_lmb_size(struct device_node *dn)
>> diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c 
>> b/arch/powerpc/platforms/pseries/hotplug-memory.c
>> index 8377f1f7c78e..672ffbee2e78 100644
>> --- a/arch/powerpc/platforms/pseries/hotplug-memory.c
>> +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
>> @@ -949,6 +949,10 @@ static int pseries_memory_notifier(struct 
>> notifier_block *nb,
>>  case OF_RECONFIG_DETACH_NODE:
>>  err = pseries_remove_mem_node(rd->dn);
>>  break;
>> +case OF_RECONFIG_UPDATE_PROPERTY:
>> +if (!strcmp(rd->dn->name,
>> +"ibm,dynamic-reconfiguration-memory"))
>> +drmem_update_lmbs(rd->prop);
>>  }
>>  return notifier_from_errno(err);
> 
> How will this interact with DLPAR memory? When we dlpar memory,
> ibm,configure-connector is used to fetch the new associativity details
> and set drmem_lmb->aa_index correctly there. Once that is done kernel
> then call drmem_update_dt() which will result in the above notifier
> callback? 
> 
> IIUC, the call back then will update drmem_lmb->aa_index again?

After digging through some of this code I'm a bit concerned about all the kernel
device tree manipulation around memory DLPAR both with the assoc-lookup-array
prop update and post dynamic-memory prop updating. We build a drmem_info array
of the LMBs from the device-tree at boot. I don't really understand why we are
manipulating the device tree property every time we add/remove an LMB. Not sure
the reasoning was to write back in particular the aa_index and flags for each
LMB into the device tree wh

Re: [PATCH] ppc64/numa: consider the max numa node for migratable LPAR

2021-04-29 Thread Tyrel Datwyler
On 4/29/21 11:19 AM, Laurent Dufour wrote:
> When a LPAR is migratable, we should consider the maximum possible NUMA
> node instead the number of NUMA node from the actual system.
> 
> The DT property 'ibm,current-associativity-domains' is defining the maximum
> number of nodes the LPAR can see when running on that box. But if the LPAR
> is being migrated on another box, it may seen up to the nodes defined by
> 'ibm,max-associativity-domains'. So if a LPAR is migratable, that value
> should be used.
> 
> Unfortunately, there is no easy way to know if a LPAR is migratable or
> not. The hypervisor is exporting the property 'ibm,migratable-partition' in
> the case it set to migrate partition, but that would not mean that the
> current partition is migratable.

Wording is a little hard to follow for me here. From PAPR the
'ibm,migratable-partition' property presence indicates that the platform
supports the potential migration of the partition. I guess maybe the point is
that all migratable partitions define 'ibm,migratable-partition', but all
partitions that define 'ibm,migratable-partition' are not necessarily 
migratable.

-Tyrel

> 
> Without that patch, when a LPAR is started on a 2 nodes box and then
> migrated to a 3 nodes box, the hypervisor may spread the LPAR's CPUs on the
> 3rd node. In that case if a CPU from that 3rd node is added to the LPAR, it
> will be wrongly assigned to the node because the kernel has been set to use
> up to 2 nodes (the configuration of the departure node). With that patch
> applies, the CPU is correctly added to the 3rd node.
> 
> Cc: Srikar Dronamraju 
> Signed-off-by: Laurent Dufour 
> ---
>  arch/powerpc/mm/numa.c | 14 +++---
>  1 file changed, 11 insertions(+), 3 deletions(-)
> 
> diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
> index f2bf98bdcea2..673fa6e47850 100644
> --- a/arch/powerpc/mm/numa.c
> +++ b/arch/powerpc/mm/numa.c
> @@ -893,7 +893,7 @@ static void __init setup_node_data(int nid, u64 
> start_pfn, u64 end_pfn)
>  static void __init find_possible_nodes(void)
>  {
>   struct device_node *rtas;
> - const __be32 *domains;
> + const __be32 *domains = NULL;
>   int prop_length, max_nodes;
>   u32 i;
> 
> @@ -909,9 +909,14 @@ static void __init find_possible_nodes(void)
>* it doesn't exist, then fallback on ibm,max-associativity-domains.
>* Current denotes what the platform can support compared to max
>* which denotes what the Hypervisor can support.
> +  *
> +  * If the LPAR is migratable, new nodes might be activated after a LPM,
> +  * so we should consider the max number in that case.
>*/
> - domains = of_get_property(rtas, "ibm,current-associativity-domains",
> - &prop_length);
> + if (!of_get_property(of_root, "ibm,migratable-partition", NULL))
> + domains = of_get_property(rtas,
> +   "ibm,current-associativity-domains",
> +   &prop_length);
>   if (!domains) {
>   domains = of_get_property(rtas, "ibm,max-associativity-domains",
>   &prop_length);
> @@ -920,6 +925,9 @@ static void __init find_possible_nodes(void)
>   }
> 
>   max_nodes = of_read_number(&domains[min_common_depth], 1);
> + printk(KERN_INFO "Partition configured for %d NUMA nodes.\n",
> +max_nodes);
> +
>   for (i = 0; i < max_nodes; i++) {
>   if (!node_possible(i))
>   node_set(i, node_possible_map);
> 



Re: [RFC PATCH v1 2/4] mm/hugetlb: Change parameters of arch_make_huge_pte()

2021-04-29 Thread Mike Kravetz
On 4/28/21 9:46 AM, Christophe Leroy wrote:
> At the time being, arch_make_huge_pte() has the following prototype:
> 
>   pte_t arch_make_huge_pte(pte_t entry, struct vm_area_struct *vma,
>struct page *page, int writable);
> 
> vma is used to get the pages shift or size.
> vma is also used on Sparc to get vm_flags.
> page is not used.
> writable is not used.
> 
> In order to use this function without a vma, and replace vma by shift
> and flags. Also remove the used parameters.
> 
> Signed-off-by: Christophe Leroy 
> ---
>  arch/arm64/include/asm/hugetlb.h | 3 +--
>  arch/arm64/mm/hugetlbpage.c  | 5 ++---
>  arch/powerpc/include/asm/nohash/32/hugetlb-8xx.h | 5 ++---
>  arch/sparc/include/asm/pgtable_64.h  | 3 +--
>  arch/sparc/mm/hugetlbpage.c  | 6 ++
>  include/linux/hugetlb.h  | 4 ++--
>  mm/hugetlb.c | 6 --
>  mm/migrate.c | 4 +++-
>  8 files changed, 17 insertions(+), 19 deletions(-)

Hi Christophe,

Sorry, no suggestion for how to make a beautiful generic implementation.

This patch is straight forward.
Acked-by: Mike Kravetz 
-- 
Mike Kravetz


[PATCH v2] kbuild: replace LANG=C with LC_ALL=C

2021-04-29 Thread Masahiro Yamada
LANG gives a weak default to each LC_* in case it is not explicitly
defined. LC_ALL, if set, overrides all other LC_* variables.

  LANG  <  LC_CTYPE, LC_COLLATE, LC_MONETARY, LC_NUMERIC, ...  <  LC_ALL

This is why documentation such as [1] suggests to set LC_ALL in build
scripts to get the deterministic result.

LANG=C is not strong enough to override LC_* that may be set by end
users.

[1]: https://reproducible-builds.org/docs/locales/

Signed-off-by: Masahiro Yamada 
Acked-by: Michael Ellerman  (powerpc)
Reviewed-by: Matthias Maennich 
Acked-by: Matthieu Baerts  (mptcp)
---

Changes in v2:
 - rebase

 arch/powerpc/boot/wrapper  | 2 +-
 scripts/nsdeps | 2 +-
 scripts/recordmcount.pl| 2 +-
 scripts/setlocalversion| 2 +-
 scripts/tags.sh| 2 +-
 tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +-
 usr/gen_initramfs.sh   | 2 +-
 7 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/boot/wrapper b/arch/powerpc/boot/wrapper
index 41fa0a8715e3..cdb796b76e2e 100755
--- a/arch/powerpc/boot/wrapper
+++ b/arch/powerpc/boot/wrapper
@@ -191,7 +191,7 @@ if [ -z "$kernel" ]; then
 kernel=vmlinux
 fi
 
-LANG=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk 
'{print $4}'`"
+LC_ALL=C elfformat="`${CROSS}objdump -p "$kernel" | grep 'file format' | awk 
'{print $4}'`"
 case "$elfformat" in
 elf64-powerpcle)   format=elf64lppc;;
 elf64-powerpc) format=elf32ppc ;;
diff --git a/scripts/nsdeps b/scripts/nsdeps
index e8ce2a4d704a..04c4b96e95ec 100644
--- a/scripts/nsdeps
+++ b/scripts/nsdeps
@@ -44,7 +44,7 @@ generate_deps() {
for source_file in $mod_source_files; do
sed '/MODULE_IMPORT_NS/Q' $source_file > 
${source_file}.tmp
offset=$(wc -l ${source_file}.tmp | awk '{print $1;}')
-   cat $source_file | grep MODULE_IMPORT_NS | LANG=C sort 
-u >> ${source_file}.tmp
+   cat $source_file | grep MODULE_IMPORT_NS | LC_ALL=C 
sort -u >> ${source_file}.tmp
tail -n +$((offset +1)) ${source_file} | grep -v 
MODULE_IMPORT_NS >> ${source_file}.tmp
if ! diff -q ${source_file} ${source_file}.tmp; then
mv ${source_file}.tmp ${source_file}
diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl
index 867860ea57da..0a7fc9507d6f 100755
--- a/scripts/recordmcount.pl
+++ b/scripts/recordmcount.pl
@@ -497,7 +497,7 @@ sub update_funcs
 #
 # Step 2: find the sections and mcount call sites
 #
-open(IN, "LANG=C $objdump -hdr $inputfile|") || die "error running $objdump";
+open(IN, "LC_ALL=C $objdump -hdr $inputfile|") || die "error running $objdump";
 
 my $text;
 
diff --git a/scripts/setlocalversion b/scripts/setlocalversion
index bb709eda96cd..db941f6d9591 100755
--- a/scripts/setlocalversion
+++ b/scripts/setlocalversion
@@ -126,7 +126,7 @@ scm_version()
fi
 
# Check for svn and a svn repo.
-   if rev=$(LANG= LC_ALL= LC_MESSAGES=C svn info 2>/dev/null | grep '^Last 
Changed Rev'); then
+   if rev=$(LC_ALL=C svn info 2>/dev/null | grep '^Last Changed Rev'); then
rev=$(echo $rev | awk '{print $NF}')
printf -- '-svn%s' "$rev"
 
diff --git a/scripts/tags.sh b/scripts/tags.sh
index fd96734deff1..db8ba411860a 100755
--- a/scripts/tags.sh
+++ b/scripts/tags.sh
@@ -326,5 +326,5 @@ esac
 
 # Remove structure forward declarations.
 if [ -n "$remove_structs" ]; then
-LANG=C sed -i -e '/^\([a-zA-Z_][a-zA-Z0-9_]*\)\t.*\t\/\^struct 
\1;.*\$\/;"\tx$/d' $1
+LC_ALL=C sed -i -e '/^\([a-zA-Z_][a-zA-Z0-9_]*\)\t.*\t\/\^struct 
\1;.*\$\/;"\tx$/d' $1
 fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh 
b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index 9236609731b1..3c4cb72ed8a4 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -274,7 +274,7 @@ check_mptcp_disabled()
ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0
 
local err=0
-   LANG=C ip netns exec ${disabled_ns} ./mptcp_connect -p 1 -s MPTCP 
127.0.0.1 < "$cin" 2>&1 | \
+   LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 1 -s MPTCP 
127.0.0.1 < "$cin" 2>&1 | \
grep -q "^socket: Protocol not available$" && err=1
ip netns delete ${disabled_ns}
 
diff --git a/usr/gen_initramfs.sh b/usr/gen_initramfs.sh
index 8ae831657e5d..63476bb70b41 100755
--- a/usr/gen_initramfs.sh
+++ b/usr/gen_initramfs.sh
@@ -147,7 +147,7 @@ dir_filelist() {
header "$1"
 
srcdir=$(echo "$1" | sed -e 's://*:/:g')
-   dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LANG=C sort)
+   dirlist=$(find "${srcdir}" -printf "%p %m %U %G\n" | LC_ALL=C sort)
 

[powerpc:merge] BUILD SUCCESS e3a9b9d6a03f5fbf99b540e863b001d46ba1735c

2021-04-29 Thread kernel test robot
defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a002-20210429
x86_64   randconfig-a001-20210429
x86_64   randconfig-a003-20210429
x86_64   randconfig-a004-20210429
x86_64   randconfig-a005-20210429
x86_64   randconfig-a006-20210429
i386 randconfig-a005-20210429
i386 randconfig-a002-20210429
i386 randconfig-a001-20210429
i386 randconfig-a006-20210429
i386 randconfig-a003-20210429
i386 randconfig-a004-20210429
i386 randconfig-a012-20210429
i386 randconfig-a014-20210429
i386 randconfig-a013-20210429
i386 randconfig-a011-20210429
i386 randconfig-a015-20210429
i386 randconfig-a016-20210429
riscvnommu_k210_defconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
um   allmodconfig
um   allyesconfig
um  defconfig
x86_64rhel-8.3-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  rhel-8.3-kbuiltin
x86_64  kexec

clang tested configs:
x86_64   randconfig-a015-20210429
x86_64   randconfig-a016-20210429
x86_64   randconfig-a011-20210429
x86_64   randconfig-a014-20210429
x86_64   randconfig-a013-20210429
x86_64   randconfig-a012-20210429

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[powerpc:next] BUILD SUCCESS 5256426247837feb8703625bda7fcfc824af04cf

2021-04-29 Thread kernel test robot
  alldefconfig
m68km5272c3_defconfig
openrisc simple_smp_defconfig
arc  axs103_defconfig
mips   gcw0_defconfig
sh  kfr2r09_defconfig
arm  pxa168_defconfig
sh   rts7751r2dplus_defconfig
ia64 allyesconfig
powerpc mpc8272_ads_defconfig
shedosk7760_defconfig
sh   sh7770_generic_defconfig
shhp6xx_defconfig
arm  lpd270_defconfig
openriscor1ksim_defconfig
sh   se7724_defconfig
mips   rs90_defconfig
powerpc  acadia_defconfig
powerpc  ppc44x_defconfig
ia64 allmodconfig
m68k allmodconfig
m68kdefconfig
m68k allyesconfig
arc  allyesconfig
nds32   defconfig
alpha   defconfig
alphaallyesconfig
xtensa   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
s390 allyesconfig
s390 allmodconfig
s390defconfig
sparcallyesconfig
sparc   defconfig
i386defconfig
mips allyesconfig
powerpc  allyesconfig
powerpc  allmodconfig
powerpc   allnoconfig
x86_64   randconfig-a005-20210429
x86_64   randconfig-a006-20210429
x86_64   randconfig-a004-20210429
x86_64   randconfig-a002-20210429
x86_64   randconfig-a001-20210429
x86_64   randconfig-a003-20210429
i386 randconfig-a005-20210429
i386 randconfig-a002-20210429
i386 randconfig-a001-20210429
i386 randconfig-a006-20210429
i386 randconfig-a003-20210429
i386 randconfig-a004-20210429
i386 randconfig-a012-20210429
i386 randconfig-a014-20210429
i386 randconfig-a013-20210429
i386 randconfig-a011-20210429
i386 randconfig-a015-20210429
i386 randconfig-a016-20210429
riscvnommu_k210_defconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
um   allmodconfig
um  defconfig
x86_64rhel-8.3-kselftests
x86_64  defconfig
x86_64   rhel-8.3
x86_64  rhel-8.3-kbuiltin
x86_64  kexec

clang tested configs:
x86_64   randconfig-a015-20210429
x86_64   randconfig-a016-20210429
x86_64   randconfig-a011-20210429
x86_64   randconfig-a014-20210429
x86_64   randconfig-a013-20210429
x86_64   randconfig-a012-20210429

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


[GIT PULL] Please pull powerpc/linux.git powerpc-5.13-1 tag

2021-04-29 Thread Michael Ellerman
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA256

Hi Linus,

Please pull powerpc updates for 5.13.

No conflicts that I'm aware of.

Notable out of area changes:

  51c66ad849a7 powerpc/bpf: Implement extended BPF on PPC32
Documentation/admin-guide/sysctl/net.rst

  808094fcbf41 lib/vdso: Add vdso_data pointer as input to 
__arch_get_timens_vdso_data()
arch/arm64/include/asm/vdso/compat_gettimeofday.h
arch/arm64/include/asm/vdso/gettimeofday.h
arch/s390/include/asm/vdso/gettimeofday.h
arch/x86/include/asm/vdso/gettimeofday.h
lib/vdso/gettimeofday.c

  fb05121fd6a2 signal: Add unsafe_get_compat_sigset()
include/linux/compat.h
include/linux/uaccess.h

cheers


The following changes since commit 1e28eed17697bcf343c6743f0028cc3b5dd88bf0:

  Linux 5.12-rc3 (2021-03-14 14:41:02 -0700)

are available in the git repository at:

  https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
tags/powerpc-5.13-1

for you to fetch changes up to 5256426247837feb8703625bda7fcfc824af04cf:

  powerpc/signal32: Fix erroneous SIGSEGV on RT signal return (2021-04-28 
23:35:11 +1000)

- --
powerpc updates for 5.13

 - Enable KFENCE for 32-bit.

 - Implement EBPF for 32-bit.

 - Convert 32-bit to do interrupt entry/exit in C.

 - Convert 64-bit BookE to do interrupt entry/exit in C.

 - Changes to our signal handling code to use user_access_begin/end() more 
extensively.

 - Add support for time namespaces (CONFIG_TIME_NS)

 - A series of fixes that allow us to reenable STRICT_KERNEL_RWX.

 - Other smaller features, fixes & cleanups.

Thanks to: Alexey Kardashevskiy, Andreas Schwab, Andrew Donnellan, Aneesh Kumar 
K.V,
  Athira Rajeev, Bhaskar Chowdhury, Bixuan Cui, Cédric Le Goater, Chen Huang, 
Chris
  Packham, Christophe Leroy, Christopher M. Riedl, Colin Ian King, Dan 
Carpenter, Daniel
  Axtens, Daniel Henrique Barboza, David Gibson, Davidlohr Bueso, Denis Efremov,
  dingsenjie, Dmitry Safonov, Dominic DeMarco, Fabiano Rosas, Ganesh Goudar, 
Geert
  Uytterhoeven, Geetika Moolchandani, Greg Kurz, Guenter Roeck, Haren Myneni, 
He Ying,
  Jiapeng Chong, Jordan Niethe, Laurent Dufour, Lee Jones, Leonardo Bras, Li 
Huafei,
  Madhavan Srinivasan, Mahesh Salgaonkar, Masahiro Yamada, Nathan Chancellor, 
Nathan
  Lynch, Nicholas Piggin, Oliver O'Halloran, Paul Menzel, Pu Lehui, Randy 
Dunlap, Ravi
  Bangoria, Rosen Penev, Russell Currey, Santosh Sivaraj, Sebastian Andrzej 
Siewior,
  Segher Boessenkool, Shivaprasad G Bhat, Srikar Dronamraju, Stephen Rothwell, 
Thadeu Lima
  de Souza Cascardo, Thomas Gleixner, Tony Ambardar, Tyrel Datwyler, Vaibhav 
Jain,
  Vincenzo Frascino, Xiongwei Song, Yang Li, Yu Kuai, Zhang Yunkai.

- --
Alexey Kardashevskiy (3):
  powerpc/iommu: Allocate it_map by vmalloc
  powerpc/iommu: Do not immediately panic when failed IOMMU table allocation
  powerpc/iommu: Annotate nested lock for lockdep

Aneesh Kumar K.V (2):
  powerpc/book3s64/kuap: Move Kconfig varriables to BOOK3S_64
  powerpc/mm: Revert "powerpc/mm: Remove DEBUG_VM_PGTABLE support on 
powerpc"

Athira Rajeev (4):
  powerpc/perf: Fix PMU constraint check for EBB events
  powerpc/perf: Expose processor pipeline stage cycles using 
PERF_SAMPLE_WEIGHT_STRUCT
  powerpc/perf: Fix sampled instruction type for larx/stcx
  powerpc/perf: Fix the threshold event selection for memory events in 
power10

Bhaskar Chowdhury (6):
  powerpc: Fix spelling of "droping" to "dropping" in traps.c
  powerpc/64e: Trivial spelling fixes throughout head_fsl_booke.S
  powerpc/mm/book3s64: Fix a typo in mmu_context.c
  powerpc/kernel: Trivial typo fix in kgdb.c
  cxl: Fix couple of spellings
  powerpc: Spelling/typo fixes

Bixuan Cui (5):
  powerpc/pseries: Make symbol '__pcpu_scope_hcall_stats' static
  powerpc/pseries/pmem: Make symbol 'drc_pmem_match' static
  powerpc/perf: Make symbol 'isa207_pmu_format_attr' static
  powerpc/perf/hv-24x7: Make some symbols static
  powerpc/powernv: make symbol 'mpipl_kobj' static

Chen Huang (1):
  powerpc: Fix HAVE_HARDLOCKUP_DETECTOR_ARCH build configuration

Christophe Leroy (130):
  powerpc: Enable KFENCE for PPC32
  powerpc/uaccess: Also perform 64 bits copies in unsafe_copy_to_user() on 
ppc32
  powerpc/uaccess: Swap clear_user() and __clear_user()
  powerpc/uaccess: Move copy_mc_xxx() functions down
  powerpc/syscalls: Use sys_old_select() in ppc_select()
  powerpc/lib: Don't use __put_user_asm_goto() outside of uaccess.h
  powerpc/net: Switch csum_and_copy_{to/from}_user to user_access block
  powerpc/futex: Switch to user_access block
  powerpc/ptrace: Convert gpr32_set_common() to user access block
  powerpc/traps: Declare unrecoverable_exception() as __noreturn
  powerpc/40x: Don't use SPRN_SPRG_SCRATCH0/1 in TLB miss handlers
  p

[PATCH 2/3] powerpc: prom_init: switch to early string functions

2021-04-29 Thread Daniel Walker
This converts the prom_init string users to the early string function
which don't suffer from KASAN or any other debugging enabled.

Cc: xe-linux-exter...@cisco.com
Signed-off-by: Daniel Walker 
---
 arch/powerpc/kernel/prom_init.c| 185 ++---
 arch/powerpc/kernel/prom_init_check.sh |   9 +-
 2 files changed, 51 insertions(+), 143 deletions(-)

diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index ccf77b985c8f..4d4343da1280 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -225,105 +225,6 @@ static bool  __prombss rtas_has_query_cpu_stopped;
 #define PHANDLE_VALID(p)   ((p) != 0 && (p) != PROM_ERROR)
 #define IHANDLE_VALID(i)   ((i) != 0 && (i) != PROM_ERROR)
 
-/* Copied from lib/string.c and lib/kstrtox.c */
-
-static int __init prom_strcmp(const char *cs, const char *ct)
-{
-   unsigned char c1, c2;
-
-   while (1) {
-   c1 = *cs++;
-   c2 = *ct++;
-   if (c1 != c2)
-   return c1 < c2 ? -1 : 1;
-   if (!c1)
-   break;
-   }
-   return 0;
-}
-
-static char __init *prom_strcpy(char *dest, const char *src)
-{
-   char *tmp = dest;
-
-   while ((*dest++ = *src++) != '\0')
-   /* nothing */;
-   return tmp;
-}
-
-static int __init prom_strncmp(const char *cs, const char *ct, size_t count)
-{
-   unsigned char c1, c2;
-
-   while (count) {
-   c1 = *cs++;
-   c2 = *ct++;
-   if (c1 != c2)
-   return c1 < c2 ? -1 : 1;
-   if (!c1)
-   break;
-   count--;
-   }
-   return 0;
-}
-
-static size_t __init prom_strlen(const char *s)
-{
-   const char *sc;
-
-   for (sc = s; *sc != '\0'; ++sc)
-   /* nothing */;
-   return sc - s;
-}
-
-static int __init prom_memcmp(const void *cs, const void *ct, size_t count)
-{
-   const unsigned char *su1, *su2;
-   int res = 0;
-
-   for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
-   if ((res = *su1 - *su2) != 0)
-   break;
-   return res;
-}
-
-static char __init *prom_strstr(const char *s1, const char *s2)
-{
-   size_t l1, l2;
-
-   l2 = prom_strlen(s2);
-   if (!l2)
-   return (char *)s1;
-   l1 = prom_strlen(s1);
-   while (l1 >= l2) {
-   l1--;
-   if (!prom_memcmp(s1, s2, l2))
-   return (char *)s1;
-   s1++;
-   }
-   return NULL;
-}
-
-static size_t __init prom_strlcat(char *dest, const char *src, size_t count)
-{
-   size_t dsize = prom_strlen(dest);
-   size_t len = prom_strlen(src);
-   size_t res = dsize + len;
-
-   /* This would be a bug */
-   if (dsize >= count)
-   return count;
-
-   dest += dsize;
-   count -= dsize;
-   if (len >= count)
-   len = count-1;
-   memcpy(dest, src, len);
-   dest[len] = 0;
-   return res;
-
-}
-
 #ifdef CONFIG_PPC_PSERIES
 static int __init prom_strtobool(const char *s, bool *res)
 {
@@ -694,7 +595,7 @@ static int __init prom_setprop(phandle node, const char 
*nodename,
add_string(&p, tohex((u32)(unsigned long) value));
add_string(&p, tohex(valuelen));
add_string(&p, tohex(ADDR(pname)));
-   add_string(&p, tohex(prom_strlen(pname)));
+   add_string(&p, tohex(early_strlen(pname)));
add_string(&p, "property");
*p = 0;
return call_prom("interpret", 1, 1, (u32)(unsigned long) cmd);
@@ -779,25 +680,25 @@ static void __init early_cmdline_parse(void)
l = prom_getprop(prom.chosen, "bootargs", p, 
COMMAND_LINE_SIZE-1);
 
if (IS_ENABLED(CONFIG_CMDLINE_EXTEND) || l <= 0 || p[0] == '\0')
-   prom_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
+   early_strlcat(prom_cmd_line, " " CONFIG_CMDLINE,
 sizeof(prom_cmd_line));
 
prom_printf("command line: %s\n", prom_cmd_line);
 
 #ifdef CONFIG_PPC64
-   opt = prom_strstr(prom_cmd_line, "iommu=");
+   opt = early_strstr(prom_cmd_line, "iommu=");
if (opt) {
prom_printf("iommu opt is: %s\n", opt);
opt += 6;
while (*opt && *opt == ' ')
opt++;
-   if (!prom_strncmp(opt, "off", 3))
+   if (!early_strncmp(opt, "off", 3))
prom_iommu_off = 1;
-   else if (!prom_strncmp(opt, "force", 5))
+   else if (!early_strncmp(opt, "force", 5))
prom_iommu_force_on = 1;
}
 #endif
-   opt = prom_strstr(prom_cmd_line, "mem=");
+   opt = early_strstr(prom_cmd_line, "mem=");
if (opt) {
opt += 4;
prom_memory_limit = prom_memparse(opt, (const char **)&opt);
@@ -809,7 +710,7 

[PATCH 3/3] x86: switch amd mem encrypt to early string functions

2021-04-29 Thread Daniel Walker
This switched x86 early string users to use the early string variants
and re-enabled KASAN on general string functions use thru out the rest
of the system.

Cc: xe-linux-exter...@cisco.com
Signed-off-by: Daniel Walker 
---
 arch/x86/mm/mem_encrypt_identity.c | 4 ++--
 lib/Makefile   | 7 ---
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/arch/x86/mm/mem_encrypt_identity.c 
b/arch/x86/mm/mem_encrypt_identity.c
index 6c5eb6f3f14f..212fe90cf5e2 100644
--- a/arch/x86/mm/mem_encrypt_identity.c
+++ b/arch/x86/mm/mem_encrypt_identity.c
@@ -575,9 +575,9 @@ void __init sme_enable(struct boot_params *bp)
 
cmdline_find_option(cmdline_ptr, cmdline_arg, buffer, sizeof(buffer));
 
-   if (!strncmp(buffer, cmdline_on, sizeof(buffer)))
+   if (!early_strncmp(buffer, cmdline_on, sizeof(buffer)))
sme_me_mask = me_mask;
-   else if (!strncmp(buffer, cmdline_off, sizeof(buffer)))
+   else if (!early_strncmp(buffer, cmdline_off, sizeof(buffer)))
sme_me_mask = 0;
else
sme_me_mask = active_by_default ? me_mask : 0;
diff --git a/lib/Makefile b/lib/Makefile
index 25cc664f027e..314db12c0e98 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -28,13 +28,6 @@ KASAN_SANITIZE_early_string.nosan.o := n
 
 CFLAGS_early_string.nosan.o += -fno-stack-protector
 
-# Early boot use of cmdline, don't instrument it
-ifdef CONFIG_AMD_MEM_ENCRYPT
-KASAN_SANITIZE_string.o := n
-
-CFLAGS_string.o += -fno-stack-protector
-endif
-
 $(obj)/early_string.nosan.o: $(src)/early_string.c $(recordmcount_source) 
$(objtool_dep) FORCE
$(call if_changed_rule,cc_o_c)
$(call cmd,force_checksrc)
-- 
2.25.1



[PATCH 1/3] lib: early_string: allow early usage of some string functions

2021-04-29 Thread Daniel Walker
This systems allows some string functions to be moved into
lib/early_string.c and they will be prepended with "early_" and compiled
without debugging like KASAN.

This is already done on x86 for,
"AMD Secure Memory Encryption (SME) support"

and on powerpc prom_init.c , and EFI's libstub.

The AMD memory feature disabled KASAN for all string functions, and
prom_init.c and efi libstub implement their own versions of the
functions.

This implementation allows sharing of the string functions without
removing the debugging features for the whole system.

Cc: xe-linux-exter...@cisco.com
Signed-off-by: Daniel Walker 
---
 include/linux/string.h |   6 ++
 lib/Makefile   |  23 +-
 lib/early_string.c | 172 +
 lib/string.c   | 154 
 4 files changed, 200 insertions(+), 155 deletions(-)
 create mode 100644 lib/early_string.c

diff --git a/include/linux/string.h b/include/linux/string.h
index 9521d8cab18e..c0d45b92ba9e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -20,6 +20,7 @@ extern void *memdup_user_nul(const void __user *, size_t);
  */
 #include 
 
+extern char * early_strcpy(char *,const char *);
 #ifndef __HAVE_ARCH_STRCPY
 extern char * strcpy(char *,const char *);
 #endif
@@ -42,12 +43,15 @@ extern char * strcat(char *, const char *);
 #ifndef __HAVE_ARCH_STRNCAT
 extern char * strncat(char *, const char *, __kernel_size_t);
 #endif
+extern size_t early_strlcat(char *, const char *, __kernel_size_t);
 #ifndef __HAVE_ARCH_STRLCAT
 extern size_t strlcat(char *, const char *, __kernel_size_t);
 #endif
+extern int early_strcmp(const char *,const char *);
 #ifndef __HAVE_ARCH_STRCMP
 extern int strcmp(const char *,const char *);
 #endif
+extern int early_strncmp(const char *,const char *,__kernel_size_t);
 #ifndef __HAVE_ARCH_STRNCMP
 extern int strncmp(const char *,const char *,__kernel_size_t);
 #endif
@@ -79,12 +83,14 @@ static inline __must_check char *strstrip(char *str)
return strim(str);
 }
 
+extern char * early_strstr(const char *, const char *);
 #ifndef __HAVE_ARCH_STRSTR
 extern char * strstr(const char *, const char *);
 #endif
 #ifndef __HAVE_ARCH_STRNSTR
 extern char * strnstr(const char *, const char *, size_t);
 #endif
+extern __kernel_size_t early_strlen(const char *);
 #ifndef __HAVE_ARCH_STRLEN
 extern __kernel_size_t strlen(const char *);
 #endif
diff --git a/lib/Makefile b/lib/Makefile
index b5307d3eec1a..25cc664f027e 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -9,6 +9,8 @@ ccflags-remove-$(CONFIG_FUNCTION_TRACER) += $(CC_FLAGS_FTRACE)
 # flaky coverage that is not a function of syscall inputs. For example,
 # rbtree can be global and individual rotations don't correlate with inputs.
 KCOV_INSTRUMENT_string.o := n
+KCOV_INSTRUMENT_early_string.o := n
+KCOV_INSTRUMENT_early_string.nosan.o := n
 KCOV_INSTRUMENT_rbtree.o := n
 KCOV_INSTRUMENT_list_debug.o := n
 KCOV_INSTRUMENT_debugobjects.o := n
@@ -19,6 +21,12 @@ KCOV_INSTRUMENT_fault-inject.o := n
 # Use -ffreestanding to ensure that the compiler does not try to "optimize"
 # them into calls to themselves.
 CFLAGS_string.o := -ffreestanding
+CFLAGS_early_string.o := -ffreestanding
+CFLAGS_early_string.nosan.o := -ffreestanding -D__EARLY_STRING_ENABLED
+
+KASAN_SANITIZE_early_string.nosan.o := n
+
+CFLAGS_early_string.nosan.o += -fno-stack-protector
 
 # Early boot use of cmdline, don't instrument it
 ifdef CONFIG_AMD_MEM_ENCRYPT
@@ -27,7 +35,20 @@ KASAN_SANITIZE_string.o := n
 CFLAGS_string.o += -fno-stack-protector
 endif
 
-lib-y := ctype.o string.o vsprintf.o cmdline.o \
+$(obj)/early_string.nosan.o: $(src)/early_string.c $(recordmcount_source) 
$(objtool_dep) FORCE
+   $(call if_changed_rule,cc_o_c)
+   $(call cmd,force_checksrc)
+   $(Q)$(OBJCOPY) \
+   --rename-section .text=.init.text \
+   --redefine-sym strcmp=early_strcmp \
+   --redefine-sym strncmp=early_strncmp \
+   --redefine-sym strcpy=early_strcpy \
+   --redefine-sym strlcat=early_strlcat \
+   --redefine-sym strlen=early_strlen \
+   --redefine-sym strstr=early_strstr \
+   --redefine-sym memcmp=early_memcmp $@
+
+lib-y := ctype.o string.o early_string.o early_string.nosan.o vsprintf.o 
cmdline.o \
 rbtree.o radix-tree.o timerqueue.o xarray.o \
 idr.o extable.o sha1.o irq_regs.o argv_split.o \
 flex_proportions.o ratelimit.o show_mem.o \
diff --git a/lib/early_string.c b/lib/early_string.c
new file mode 100644
index ..21004e82159c
--- /dev/null
+++ b/lib/early_string.c
@@ -0,0 +1,172 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ *  linux/lib/string.c
+ *
+ *  Copyright (C) 1991, 1992  Linus Torvalds
+ */
+
+#include 
+#include 
+#include 
+
+#ifdef __EARLY_STRING_ENABLED
+#undef EXPORT_SYMBOL
+#define EXPORT_SYMBOL(x)
+#endif
+
+#include 
+
+#if !defined(__HAVE_ARCH_MEMCMP) || defined(__EARLY_STRING_E

Re: [PATCH v2] kbuild: replace LANG=C with LC_ALL=C

2021-04-29 Thread Greg KH
On Fri, Apr 30, 2021 at 10:56:27AM +0900, Masahiro Yamada wrote:
> LANG gives a weak default to each LC_* in case it is not explicitly
> defined. LC_ALL, if set, overrides all other LC_* variables.
> 
>   LANG  <  LC_CTYPE, LC_COLLATE, LC_MONETARY, LC_NUMERIC, ...  <  LC_ALL
> 
> This is why documentation such as [1] suggests to set LC_ALL in build
> scripts to get the deterministic result.
> 
> LANG=C is not strong enough to override LC_* that may be set by end
> users.
> 
> [1]: https://reproducible-builds.org/docs/locales/
> 
> Signed-off-by: Masahiro Yamada 
> Acked-by: Michael Ellerman  (powerpc)
> Reviewed-by: Matthias Maennich 
> Acked-by: Matthieu Baerts  (mptcp)

Reviewed-by: Greg Kroah-Hartman