[PATCH v3 1/6] powerpc/code-patching: Implement generic text patching function

2022-10-04 Thread Benjamin Gray
Adds a generic text patching mechanism for patches of size int or long
bytes.

The patch_instruction function is reimplemented in terms of this
more generic function. This generic implementation allows patching of
arbitrary long data, such as pointers on 64-bit.

On 32-bit patch_int is marked noinline to prevent a mis-optimisation.
Without noinline, inside patch_branch the compiler may inline all the
way to do_patch_memory, preventing the compiler from inlining
do_patch_memory into patch_int. This would needlessly force patch_int
to be a branch to do_patch_memory.

Signed-off-by: Benjamin Gray 
---
 arch/powerpc/include/asm/code-patching.h | 29 ++
 arch/powerpc/lib/code-patching.c | 73 ++--
 2 files changed, 85 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 3f881548fb61..170bfa848c7c 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -72,7 +72,36 @@ static inline int create_branch(ppc_inst_t *instr, const u32 
*addr,
 int create_cond_branch(ppc_inst_t *instr, const u32 *addr,
   unsigned long target, int flags);
 int patch_branch(u32 *addr, unsigned long target, int flags);
+
+/* patch_uint and patch_ulong must only be called on addresses where the patch
+ * does not cross a cacheline, otherwise it may not be flushed properly and
+ * mixes of new and stale data may be observed.
+ *
+ * patch_instruction and other instruction patchers automatically satisfy this
+ * requirement due to instruction alignment requirements.
+ */
+
+int patch_uint(void *addr, unsigned int val);
+
+#ifdef CONFIG_PPC64
+
+int patch_ulong(void *addr, unsigned long val);
 int patch_instruction(u32 *addr, ppc_inst_t instr);
+
+#else
+
+static inline int patch_ulong(void *addr, unsigned long val)
+{
+   return patch_uint(addr, val);
+}
+
+static inline int patch_instruction(u32 *addr, ppc_inst_t instr)
+{
+   return patch_uint(addr, ppc_inst_val(instr));
+}
+
+#endif
+
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
 
 static inline unsigned long patch_site_addr(s32 *site)
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 125c55e3e148..ecdd2e523d9a 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -15,20 +15,24 @@
 #include 
 #include 
 
-static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 
*patch_addr)
+static int __patch_memory(void *patch_addr, unsigned long val, void *exec_addr,
+  bool is_dword)
 {
-   if (!ppc_inst_prefixed(instr)) {
-   u32 val = ppc_inst_val(instr);
-
-   __put_kernel_nofault(patch_addr, , u32, failed);
-   } else {
-   u64 val = ppc_inst_as_ulong(instr);
+   /* Prefixed instruction may cross cacheline if cacheline smaller than 
64 bytes */
+   BUILD_BUG_ON(IS_ENABLED(CONFIG_PPC64) && L1_CACHE_BYTES < 64);
 
+   if (unlikely(is_dword))
__put_kernel_nofault(patch_addr, , u64, failed);
-   }
+   else
+   __put_kernel_nofault(patch_addr, , u32, failed);
 
-   asm ("dcbst 0, %0; sync; icbi 0,%1; sync; isync" :: "r" (patch_addr),
-   "r" (exec_addr));
+   /* Assume data is inside a single cacheline */
+   dcbst(patch_addr);
+   mb(); /* sync */
+   /* Flush on the EA that may be executed in case of a non-coherent 
icache */
+   icbi(exec_addr);
+   mb(); /* sync */
+   isync();
 
return 0;
 
@@ -38,7 +42,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t 
instr, u32 *patch_addr
 
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 {
-   return __patch_instruction(addr, instr, addr);
+   if (ppc_inst_prefixed(instr))
+   return __patch_memory(addr, ppc_inst_as_ulong(instr), addr, 
true);
+   else
+   return __patch_memory(addr, ppc_inst_val(instr), addr, false);
 }
 
 static DEFINE_PER_CPU(struct vm_struct *, text_poke_area);
@@ -149,7 +156,7 @@ static void unmap_patch_area(unsigned long addr)
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 }
 
-static int __do_patch_instruction(u32 *addr, ppc_inst_t instr)
+static int __do_patch_memory(void *addr, unsigned long val, bool is_dword)
 {
int err;
u32 *patch_addr;
@@ -166,7 +173,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
instr)
if (radix_enabled())
asm volatile("ptesync": : :"memory");
 
-   err = __patch_instruction(addr, instr, patch_addr);
+   err = __patch_memory(patch_addr, val, addr, is_dword);
 
pte_clear(_mm, text_poke_addr, pte);
flush_tlb_kernel_range(text_poke_addr, text_poke_addr + PAGE_SIZE);
@@ -174,7 +181,7 @@ static int __do_patch_instruction(u32 *addr, ppc_inst_t 
instr)
return err;
 }
 
-int 

[PATCH v3 4/6] static_call: Move static call selftest to static_call_selftest.c

2022-10-04 Thread Benjamin Gray
These tests are out-of-line only, so moving them to the
their own file allows them to be run when an arch does
not implement inline static calls.

Signed-off-by: Benjamin Gray 
Reviewed-by: Andrew Donnellan 
---
 kernel/Makefile   |  1 +
 kernel/static_call_inline.c   | 43 ---
 kernel/static_call_selftest.c | 41 +
 3 files changed, 42 insertions(+), 43 deletions(-)
 create mode 100644 kernel/static_call_selftest.c

diff --git a/kernel/Makefile b/kernel/Makefile
index 318789c728d3..8ce8beaa3cc0 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -113,6 +113,7 @@ obj-$(CONFIG_KCSAN) += kcsan/
 obj-$(CONFIG_SHADOW_CALL_STACK) += scs.o
 obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
 obj-$(CONFIG_HAVE_STATIC_CALL_INLINE) += static_call_inline.o
+obj-$(CONFIG_STATIC_CALL_SELFTEST) += static_call_selftest.o
 obj-$(CONFIG_CFI_CLANG) += cfi.o
 
 obj-$(CONFIG_PERF_EVENTS) += events/
diff --git a/kernel/static_call_inline.c b/kernel/static_call_inline.c
index dc5665b62814..64d04d054698 100644
--- a/kernel/static_call_inline.c
+++ b/kernel/static_call_inline.c
@@ -498,46 +498,3 @@ int __init static_call_init(void)
return 0;
 }
 early_initcall(static_call_init);
-
-#ifdef CONFIG_STATIC_CALL_SELFTEST
-
-static int func_a(int x)
-{
-   return x+1;
-}
-
-static int func_b(int x)
-{
-   return x+2;
-}
-
-DEFINE_STATIC_CALL(sc_selftest, func_a);
-
-static struct static_call_data {
-  int (*func)(int);
-  int val;
-  int expect;
-} static_call_data [] __initdata = {
-  { NULL,   2, 3 },
-  { func_b, 2, 4 },
-  { func_a, 2, 3 }
-};
-
-static int __init test_static_call_init(void)
-{
-  int i;
-
-  for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) {
- struct static_call_data *scd = _call_data[i];
-
-  if (scd->func)
-  static_call_update(sc_selftest, scd->func);
-
-  WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect);
-  }
-
-  return 0;
-}
-early_initcall(test_static_call_init);
-
-#endif /* CONFIG_STATIC_CALL_SELFTEST */
diff --git a/kernel/static_call_selftest.c b/kernel/static_call_selftest.c
new file mode 100644
index ..246ad89f64eb
--- /dev/null
+++ b/kernel/static_call_selftest.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+#include 
+
+static int func_a(int x)
+{
+   return x+1;
+}
+
+static int func_b(int x)
+{
+   return x+2;
+}
+
+DEFINE_STATIC_CALL(sc_selftest, func_a);
+
+static struct static_call_data {
+   int (*func)(int);
+   int val;
+   int expect;
+} static_call_data [] __initdata = {
+   { NULL,   2, 3 },
+   { func_b, 2, 4 },
+   { func_a, 2, 3 }
+};
+
+static int __init test_static_call_init(void)
+{
+   int i;
+
+   for (i = 0; i < ARRAY_SIZE(static_call_data); i++ ) {
+   struct static_call_data *scd = _call_data[i];
+
+   if (scd->func)
+   static_call_update(sc_selftest, scd->func);
+
+   WARN_ON(static_call(sc_selftest)(scd->val) != scd->expect);
+   }
+
+   return 0;
+}
+early_initcall(test_static_call_init);
-- 
2.37.3



[PATCH v3 3/6] powerpc/module: Optimise nearby branches in ELF V2 ABI stub

2022-10-04 Thread Benjamin Gray
Inserts a direct branch to the stub target when possible, replacing the
mtctr/btctr sequence.

The load into r12 could potentially be skipped too, but that change
would need to refactor the arguments to indicate that the address
does not have a separate local entry point.

This helps the static call implementation, where modules calling their
own trampolines are called through this stub and the trampoline is
easily within range of a direct branch.

Signed-off-by: Benjamin Gray 
---
 arch/powerpc/kernel/module_64.c | 12 
 1 file changed, 12 insertions(+)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 4d816f7785b4..13ce7a4d8a8d 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -141,6 +141,12 @@ static u32 ppc64_stub_insns[] = {
PPC_RAW_BCTR(),
 };
 
+#ifdef CONFIG_PPC64_ELF_ABI_V1
+#define PPC64_STUB_MTCTR_OFFSET 5
+#else
+#define PPC64_STUB_MTCTR_OFFSET 4
+#endif
+
 /* Count how many different 24-bit relocations (different symbol,
different addend) */
 static unsigned int count_relocs(const Elf64_Rela *rela, unsigned int num)
@@ -426,6 +432,7 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
  struct module *me,
  const char *name)
 {
+   int err;
long reladdr;
func_desc_t desc;
int i;
@@ -439,6 +446,11 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
return 0;
}
 
+   /* Replace indirect branch sequence with direct branch where possible */
+   err = patch_branch(>jump[PPC64_STUB_MTCTR_OFFSET], addr, 0);
+   if (err && err != -ERANGE)
+   return 0;
+
/* Stub uses address relative to r2. */
reladdr = (unsigned long)entry - my_r2(sechdrs, me);
if (reladdr > 0x7FFF || reladdr < -(0x8000L)) {
-- 
2.37.3



[PATCH v3 6/6] powerpc: Add tests for out-of-line static calls

2022-10-04 Thread Benjamin Gray
KUnit tests for the various combinations of caller/trampoline/target and
kernel/module. They must be run from a module loaded at runtime to
guarantee they have a different TOC to the kernel (64-bit ELF V2) and
increase the chance of testing the non-direct branch path of the
trampoline.

For 64-bit ELF V2 ABI the tests try to mitigate the chance of panicking
by restoring the TOC after every static call. Not all possible errors
can be caught by this (we can't stop a trampoline from using a bad TOC
itself), but it makes certain errors easier to debug.

Signed-off-by: Benjamin Gray 
---
 arch/powerpc/Kconfig   |  12 ++
 arch/powerpc/kernel/Makefile   |   1 +
 arch/powerpc/kernel/static_call.c  |  53 +
 arch/powerpc/kernel/static_call_test.c | 263 +
 arch/powerpc/kernel/static_call_test.h |  56 ++
 5 files changed, 385 insertions(+)
 create mode 100644 arch/powerpc/kernel/static_call_test.c
 create mode 100644 arch/powerpc/kernel/static_call_test.h

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 962e36ec34ec..5b9d5fa96a9e 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -1035,6 +1035,18 @@ config PPC_RTAS_FILTER
  Say Y unless you know what you are doing and the filter is causing
  problems for you.
 
+config PPC_STATIC_CALL_KUNIT_TEST
+   tristate "KUnit tests for static calls"
+   default KUNIT_ALL_TESTS
+   depends on HAVE_STATIC_CALL && KUNIT && m
+   help
+ Tests for static calls across all combinations of 
caller/trampoline/target
+ being kernel/module. On ELF ABI V2 the tests check the TOC is kept 
consistent.
+
+ Must be built as a module and loaded at runtime to ensure the module 
has
+ a different TOC to the kernel and make it likely that non-direct 
branch
+ path of the trampoline is tested.
+
 endmenu
 
 config ISA_DMA_API
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index a30d0d0f5499..22c07e3d34df 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -131,6 +131,7 @@ obj-$(CONFIG_RELOCATABLE)   += reloc_$(BITS).o
 obj-$(CONFIG_PPC32)+= entry_32.o setup_32.o early_32.o
 obj-$(CONFIG_PPC64)+= dma-iommu.o iommu.o
 obj-$(CONFIG_HAVE_STATIC_CALL) += static_call.o
+obj-$(CONFIG_PPC_STATIC_CALL_KUNIT_TEST)   += static_call_test.o
 obj-$(CONFIG_KGDB) += kgdb.o
 obj-$(CONFIG_BOOTX_TEXT)   += btext.o
 obj-$(CONFIG_SMP)  += smp.o
diff --git a/arch/powerpc/kernel/static_call.c 
b/arch/powerpc/kernel/static_call.c
index 9211b2e189bb..44957ba91e3f 100644
--- a/arch/powerpc/kernel/static_call.c
+++ b/arch/powerpc/kernel/static_call.c
@@ -153,3 +153,56 @@ void arch_static_call_transform(void *site, void *tramp, 
void *func, bool tail)
panic("%s: patching failed %pS at %pS\n", __func__, func, 
tramp);
 }
 EXPORT_SYMBOL_GPL(arch_static_call_transform);
+
+
+#if IS_MODULE(CONFIG_PPC_STATIC_CALL_KUNIT_TEST)
+
+#include "static_call_test.h"
+
+int ppc_sc_kernel_target_1(struct kunit *test)
+{
+   toc_fixup(test);
+   return 1;
+}
+
+int ppc_sc_kernel_target_2(struct kunit *test)
+{
+   toc_fixup(test);
+   return 2;
+}
+
+DEFINE_STATIC_CALL(ppc_sc_kernel, ppc_sc_kernel_target_1);
+
+int ppc_sc_kernel_call(struct kunit *test)
+{
+   return PROTECTED_SC(test, int, static_call(ppc_sc_kernel)(test));
+}
+
+int ppc_sc_kernel_call_indirect(struct kunit *test, int (*fn)(struct kunit 
*test))
+{
+   return PROTECTED_SC(test, int, fn(test));
+}
+
+long ppc_sc_kernel_target_big(struct kunit *test, long a, long b, long c, long 
d,
+ long e, long f, long g, long h, long i)
+{
+   toc_fixup(test);
+   KUNIT_EXPECT_EQ(test, a, b);
+   KUNIT_EXPECT_EQ(test, a, c);
+   KUNIT_EXPECT_EQ(test, a, d);
+   KUNIT_EXPECT_EQ(test, a, e);
+   KUNIT_EXPECT_EQ(test, a, f);
+   KUNIT_EXPECT_EQ(test, a, g);
+   KUNIT_EXPECT_EQ(test, a, h);
+   KUNIT_EXPECT_EQ(test, a, i);
+   return ~a;
+}
+
+EXPORT_SYMBOL_GPL(ppc_sc_kernel_target_1);
+EXPORT_SYMBOL_GPL(ppc_sc_kernel_target_2);
+EXPORT_SYMBOL_GPL(ppc_sc_kernel_target_big);
+EXPORT_STATIC_CALL_GPL(ppc_sc_kernel);
+EXPORT_SYMBOL_GPL(ppc_sc_kernel_call);
+EXPORT_SYMBOL_GPL(ppc_sc_kernel_call_indirect);
+
+#endif /* IS_MODULE(CONFIG_PPC_STATIC_CALL_KUNIT_TEST) */
diff --git a/arch/powerpc/kernel/static_call_test.c 
b/arch/powerpc/kernel/static_call_test.c
new file mode 100644
index ..10a09ef455cf
--- /dev/null
+++ b/arch/powerpc/kernel/static_call_test.c
@@ -0,0 +1,263 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "static_call_test.h"
+
+#include 
+#include 
+#include 
+
+/* Tests to ensure correctness in a variety of cases for static calls.
+ *
+ * The tests focus on ensuring the TOC is kept consistent across the
+ * module-kernel boundary, as compilers can't see that a trampoline
+ * defined locally to a 

[PATCH v3 2/6] powerpc/module: Handle caller-saved TOC in module linker

2022-10-04 Thread Benjamin Gray
The callee may set a field in st_other to 1 to indicate r2 should be
treated as caller-saved. This means a trampoline must be used to save
the current TOC before calling it and restore it afterwards, much like
external calls.

This is necessary for supporting V2 ABI static calls that do not
preserve the TOC.

Signed-off-by: Benjamin Gray 
---
 arch/powerpc/kernel/module_64.c | 14 +-
 1 file changed, 13 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 7e45dc98df8a..4d816f7785b4 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -55,6 +55,12 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
 * of function and try to derive r2 from it). */
return PPC64_LOCAL_ENTRY_OFFSET(sym->st_other);
 }
+
+static bool need_r2save_stub(unsigned char st_other)
+{
+   return ((st_other & STO_PPC64_LOCAL_MASK) >> STO_PPC64_LOCAL_BIT) == 1;
+}
+
 #else
 
 static func_desc_t func_desc(unsigned long addr)
@@ -66,6 +72,11 @@ static unsigned int local_entry_offset(const Elf64_Sym *sym)
return 0;
 }
 
+static bool need_r2save_stub(unsigned char st_other)
+{
+   return false;
+}
+
 void *dereference_module_function_descriptor(struct module *mod, void *ptr)
 {
if (ptr < (void *)mod->arch.start_opd ||
@@ -632,7 +643,8 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
case R_PPC_REL24:
/* FIXME: Handle weak symbols here --RR */
if (sym->st_shndx == SHN_UNDEF ||
-   sym->st_shndx == SHN_LIVEPATCH) {
+   sym->st_shndx == SHN_LIVEPATCH ||
+   need_r2save_stub(sym->st_other)) {
/* External: go via stub */
value = stub_for_addr(sechdrs, value, me,
strtab + sym->st_name);
-- 
2.37.3



[PATCH v3 5/6] powerpc/64: Add support for out-of-line static calls

2022-10-04 Thread Benjamin Gray
Implement static call support for 64 bit V2 ABI. This requires making
sure the TOC is kept correct across kernel-module boundaries. As a
secondary concern, it tries to use the local entry point of a target
wherever possible. It does so by checking if both tramp & target are
kernel code, and falls back to detecting the common global entry point
patterns if modules are involved. Detecting the global entry point is
also required for setting the local entry point as the trampoline
target: if we cannot detect the local entry point, then we need to
convservatively initialise r12 and use the global entry point.

The trampolines are marked with `.localentry NAME, 1` to make the
linker save and restore the TOC on each call to the trampoline. This
allows the trampoline to safely target functions with different TOC
values.

However this directive also implies the TOC is not initialised on entry
to the trampoline. The kernel TOC is easily found in the PACA, but not
an arbitrary module TOC. Therefore the trampoline implementation depends
on whether it's in the kernel or not. If in the kernel, we initialise
the TOC using the PACA. If in a module, we have to initialise the TOC
with zero context, so it's quite expensive.

Signed-off-by: Benjamin Gray 
---
 arch/powerpc/Kconfig |  14 ++-
 arch/powerpc/include/asm/code-patching.h |   1 +
 arch/powerpc/include/asm/static_call.h   |  80 +-
 arch/powerpc/kernel/Makefile |   3 +-
 arch/powerpc/kernel/static_call.c| 130 +--
 5 files changed, 216 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 4c466acdc70d..962e36ec34ec 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -102,6 +102,18 @@ config GENERIC_HWEIGHT
bool
default y
 
+config TOOLCHAIN_SUPPORTS_LOCALENTRY1
+   bool
+   depends on PPC64_ELF_ABI_V2
+   default y if LD_VERSION >= 23200 || LLD_VERSION >= 11
+   help
+ A section of the ELF symbol st_other field can be given the value 1
+ using the directive '.localentry NAME, 1' to mean the local and global
+ entry points are the same, and r2 should be treated as caller-saved.
+
+ Older versions of Clang and binutils do not recognise this form of the
+ directive and will error if it is used.
+
 config PPC
bool
default y
@@ -248,7 +260,7 @@ config PPC
select HAVE_SOFTIRQ_ON_OWN_STACK
select HAVE_STACKPROTECTOR  if PPC32 && 
$(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r2)
select HAVE_STACKPROTECTOR  if PPC64 && 
$(cc-option,-mstack-protector-guard=tls -mstack-protector-guard-reg=r13)
-   select HAVE_STATIC_CALL if PPC32
+   select HAVE_STATIC_CALL if PPC32 || (PPC64_ELF_ABI_V2 
&& TOOLCHAIN_SUPPORTS_LOCALENTRY1)
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_VIRT_CPU_ACCOUNTING
select HUGETLB_PAGE_SIZE_VARIABLE   if PPC_BOOK3S_64 && HUGETLB_PAGE
diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 170bfa848c7c..cb4629e55e57 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -152,6 +152,7 @@ int translate_branch(ppc_inst_t *instr, const u32 *dest, 
const u32 *src);
 bool is_conditional_branch(ppc_inst_t instr);
 
 #define OP_RT_RA_MASK  0xUL
+#define OP_SI_MASK 0xUL
 #define LIS_R2 (PPC_RAW_LIS(_R2, 0))
 #define ADDIS_R2_R12   (PPC_RAW_ADDIS(_R2, _R12, 0))
 #define ADDI_R2_R2 (PPC_RAW_ADDI(_R2, _R2, 0))
diff --git a/arch/powerpc/include/asm/static_call.h 
b/arch/powerpc/include/asm/static_call.h
index de1018cc522b..3d6e82200cb7 100644
--- a/arch/powerpc/include/asm/static_call.h
+++ b/arch/powerpc/include/asm/static_call.h
@@ -2,12 +2,75 @@
 #ifndef _ASM_POWERPC_STATIC_CALL_H
 #define _ASM_POWERPC_STATIC_CALL_H
 
+#ifdef CONFIG_PPC64_ELF_ABI_V2
+
+#ifdef MODULE
+
+#define __PPC_SCT(name, inst)  \
+   asm(".pushsection .text, \"ax\" \n" \
+   ".align 6   \n" \
+   ".globl " STATIC_CALL_TRAMP_STR(name) " \n" \
+   ".localentry " STATIC_CALL_TRAMP_STR(name) ", 1 \n" \
+   STATIC_CALL_TRAMP_STR(name) ":  \n" \
+   "   mflr11  \n" \
+   "   bcl 20, 31, $+4 \n" \
+   "0: mflr12  \n" \
+   "   mtlr11  \n" \
+   "   addi12, 12, (" STATIC_CALL_TRAMP_STR(name) " - 0b)  \n" 
\
+   "   addis 2, 12, (.TOC.-" STATIC_CALL_TRAMP_STR(name) ")@ha \n" 
\
+   "   addi 2, 2, (.TOC.-" 

[PATCH v3 0/6] Out-of-line static calls for powerpc64 ELF V2

2022-10-04 Thread Benjamin Gray
Implementation of out-of-line static calls for PowerPC 64-bit ELF V2 ABI.
Static calls patch an indirect branch into a direct branch at runtime.
Out-of-line specifically has a caller directly call a trampoline, and
the trampoline gets patched to directly call the target.

Previous versions here:
V2: https://lore.kernel.org/all/20220926064316.765967-1-bg...@linux.ibm.com/
V1: https://lore.kernel.org/all/20220916062330.430468-1-bg...@linux.ibm.com/

Changed for V3:

[general]
* Rebased on top of
  
https://lore.kernel.org/all/0df84a2eea551c1d000c34c36d0c1d23cbe26a97.1664289176.git.christophe.le...@csgroup.eu/
  for removing the separate CONFIG_STRICT_KERNEL_RWX cases. Can rebase back 
onto next if necessary.
* Some some minor refactoring/style fixes throughout

[1/6]
* Code patching rewritten again
* This time it only adds support for what is needed:
* int or long sized writes only
* assumed within a cacheline (static call pointers are aligned
  for atomic updates, instructions are aligned anyway)
* unconditional instruction syncing because non-instruction
  patching is not used in any performance sensitive paths
* similarly, dword case is marked unlikely. ftrace activation is biggest
  performance concern, and it only uses non-prefixed instructions.
* Should be zero difference on 32-bit, minor differences on 64-bit
* Design doesn't need to be revisited unless specifically 1 or 2 byte
  patching is needed. Most such patches can be emulated by read-update-store
  of 4 bytes. Non-cacheline safe patches can be split similarly (they
  can't have atomicity requirements if they aren't aligned).

[3/6]
* Refactored to use patch_branch (thx Christophe)

[5/6]
* Required .localentry NAME, 1 directive guarded by toolchain version check
* Removed #ifdef's from static call implementation. Added sign_extend_long to
  support this.
* Fixed a bug in ppc_function_toc handling of lis case & made it more verbose
  to make such errors stand out more. New layout splits into calculating 
required
  values, and then applying them in two steps.

[6/6]
* Replaced SAVE_REGS/RESTORE_REGS macros with functions
* Reduced global register usage of tests
* Support running on 32-bit as well


Benjamin Gray (6):
  powerpc/code-patching: Implement generic text patching function
  powerpc/module: Handle caller-saved TOC in module linker
  powerpc/module: Optimise nearby branches in ELF V2 ABI stub
  static_call: Move static call selftest to static_call_selftest.c
  powerpc/64: Add support for out-of-line static calls
  powerpc: Add tests for out-of-line static calls

 arch/powerpc/Kconfig |  26 ++-
 arch/powerpc/include/asm/code-patching.h |  30 +++
 arch/powerpc/include/asm/static_call.h   |  80 ++-
 arch/powerpc/kernel/Makefile |   4 +-
 arch/powerpc/kernel/module_64.c  |  26 ++-
 arch/powerpc/kernel/static_call.c| 183 +++-
 arch/powerpc/kernel/static_call_test.c   | 263 +++
 arch/powerpc/kernel/static_call_test.h   |  56 +
 arch/powerpc/lib/code-patching.c |  73 +--
 kernel/Makefile  |   1 +
 kernel/static_call_inline.c  |  43 
 kernel/static_call_selftest.c|  41 
 12 files changed, 753 insertions(+), 73 deletions(-)
 create mode 100644 arch/powerpc/kernel/static_call_test.c
 create mode 100644 arch/powerpc/kernel/static_call_test.h
 create mode 100644 kernel/static_call_selftest.c


base-commit: 9a5e80596e50f1ab19fecb2d337e7ea3287ee083
--
2.37.3


Re: [PATCH] tools/perf: Fix aggr_printout to display cpu field irrespective of core value

2022-10-04 Thread Athira Rajeev



> On 04-Oct-2022, at 11:44 PM, Arnaldo Carvalho de Melo  wrote:
> 
> Em Tue, Oct 04, 2022 at 03:14:27PM -0300, Arnaldo Carvalho de Melo escreveu:
>> Em Tue, Oct 04, 2022 at 07:49:21AM -0700, Ian Rogers escreveu:
>>> On Tue, Oct 4, 2022, 12:06 AM Athira Rajeev 
 Thanks for helping with testing. Can I add your Tested-by for the patch ?
>> 
>>> Yep.
>> 
>>> Tested-by: Ian Rogers 
> 
> 
> Thanks, applied.
> 
> - Arnaldo

Hi Arnaldo,

Looks like you have taken change to remove id.core check:
https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/commit/?h=tmp.perf/core=db83f447b323958cdc5fedcf2134effb2ec9a6fe

But the patch that has to go in is :
"[PATCH] tools/perf: Fix cpu check to use id.cpu.cpu in ggr_printout"
which is tested by Ian and "pasted" by me in same mail thread.

Re-pasting here for reference:

>From 4dd98d953940deb2f85176cb6b4ecbfd18dbdbf9 Mon Sep 17 00:00:00 2001
From: Athira Rajeev 
Date: Mon, 3 Oct 2022 15:47:27 +0530
Subject: [PATCH] tools/perf: Fix cpu check to use id.cpu.cpu in aggr_printout

perf stat has options to aggregate the counts in different
modes like per socket, per core etc. The function "aggr_printout"
in util/stat-display.c which is used to print the aggregates,
has a check for cpu in case of AGGR_NONE. This check was
originally using condition : "if (id.cpu.cpu > -1)". But
this got changed after commit df936cadfb58 ("perf stat: Add
JSON output option"), which added option to output json format
for different aggregation modes. After this commit, the
check in "aggr_printout" is using "if (id.core > -1)".

The old code was using "id.cpu.cpu > -1" while the new code
is using "id.core > -1". But since the value printed is
id.cpu.cpu, fix this check to use cpu and not core.

Signed-off-by: Athira Rajeev 
Suggested-by: James Clark 
Suggested-by: Ian Rogers 
---
tools/perf/util/stat-display.c | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c
index b82844cb0ce7..cf28020798ec 100644
--- a/tools/perf/util/stat-display.c
+++ b/tools/perf/util/stat-display.c
@@ -168,7 +168,7 @@ static void aggr_printout(struct perf_stat_config *config,
id.socket,
id.die,
id.core);
-   } else if (id.core > -1) {
+   } else if (id.cpu.cpu > -1) {
fprintf(config->output, "\"cpu\" : \"%d\", ",
id.cpu.cpu);
}
@@ -179,7 +179,7 @@ static void aggr_printout(struct perf_stat_config *config,
id.die,
config->csv_output ? 0 : -3,
id.core, config->csv_sep);
-   } else if (id.core > -1) {
+   } else if (id.cpu.cpu > -1) {
fprintf(config->output, "CPU%*d%s",
config->csv_output ? 0 : -7,
id.cpu.cpu, config->csv_sep);
-- 
2.31.1

If it is confusing, shall I send it as a separate patch along with Tested-by 
from Ian ?

Please revert 
https://git.kernel.org/pub/scm/linux/kernel/git/acme/linux.git/commit/?h=tmp.perf/core=db83f447b323958cdc5fedcf2134effb2ec9a6fe

Thanks
Athira

> 



[powerpc:next] BUILD SUCCESS 0fa6831811f62cfc10415d731bcf9fde2647ad81

2022-10-04 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
next
branch HEAD: 0fa6831811f62cfc10415d731bcf9fde2647ad81  powerpc/64: Fix 
msr_check_and_set/clear MSR[EE] race

elapsed time: 759m

configs tested: 82
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

gcc tested configs:
um i386_defconfig
um   x86_64_defconfig
arc defconfig
alpha   defconfig
s390 allmodconfig
s390defconfig
x86_64  rhel-8.3-func
s390 allyesconfig
powerpc   allnoconfig
x86_64rhel-8.3-kselftests
x86_64  defconfig
x86_64   rhel-8.3-syz
alphaallyesconfig
x86_64 rhel-8.3-kunit
arc  allyesconfig
x86_64   rhel-8.3-kvm
x86_64   rhel-8.3
m68k allmodconfig
x86_64   allyesconfig
sh   allmodconfig
m68k allyesconfig
i386 randconfig-a011-20221003
i386 randconfig-a015-20221003
i386 randconfig-a012-20221003
i386 randconfig-a016-20221003
i386 randconfig-a013-20221003
i386 randconfig-a014-20221003
mips allyesconfig
powerpc  allmodconfig
i386defconfig
x86_64   randconfig-a015-20221003
x86_64   randconfig-a016-20221003
x86_64   randconfig-a011-20221003
x86_64   randconfig-a014-20221003
x86_64   randconfig-a013-20221003
x86_64   randconfig-a012-20221003
i386 allyesconfig
riscvrandconfig-r042-20221003
arc  randconfig-r043-20221003
arm defconfig
s390 randconfig-r044-20221003
arm64allyesconfig
arm  allyesconfig
arc  randconfig-r043-20221002
ia64 allmodconfig
csky  allnoconfig
alpha allnoconfig
arc   allnoconfig
riscv allnoconfig
shsh7763rdp_defconfig
loongarch loongson3_defconfig
arm cm_x300_defconfig
nios2alldefconfig
mipsgpr_defconfig
riscvnommu_virt_defconfig
riscv  rv32_defconfig
riscvnommu_k210_defconfig
i386   debian-10.3-kselftests
i386  debian-10.3
riscvallyesconfig
mipsvocore2_defconfig

clang tested configs:
i386 randconfig-a001-20221003
i386 randconfig-a006-20221003
i386 randconfig-a002-20221003
i386 randconfig-a003-20221003
i386 randconfig-a005-20221003
i386 randconfig-a004-20221003
x86_64   randconfig-a002-20221003
x86_64   randconfig-a001-20221003
x86_64   randconfig-a006-20221003
x86_64   randconfig-a003-20221003
x86_64   randconfig-a005-20221003
x86_64   randconfig-a004-20221003
hexagon  randconfig-r045-20221003
hexagon  randconfig-r041-20221003
hexagon  randconfig-r041-20221002
hexagon  randconfig-r045-20221002
riscvrandconfig-r042-20221002
s390 randconfig-r044-20221002
x86_64randconfig-k001
mipsmalta_qemu_32r6_defconfig
powerpc mpc836x_mds_defconfig

-- 
0-DAY CI Kernel Test Service
https://01.org/lkp


Re: [PATCH 1/5] KVM: PPC: Book3S HV P9: Clear vcpu cpu fields before enabling host irqs

2022-10-04 Thread Michael Ellerman
On Thu, 8 Sep 2022 23:25:41 +1000, Nicholas Piggin wrote:
> On guest entry, vcpu->cpu and vcpu->arch.thread_cpu are set after
> disabling host irqs. On guest exit there is a window whre tick time
> accounting briefly enables irqs before these fields are cleared.
> 
> Move them up to ensure they are cleared before host irqs are run.
> This is possibly not a problem, but is more symmetric and makes the
> fields less surprising.
> 
> [...]

Patches 1-4 applied to powerpc/topic/ppc-kvm.

[1/5] KVM: PPC: Book3S HV P9: Clear vcpu cpu fields before enabling host irqs
  https://git.kernel.org/powerpc/c/bc91c04bfff7cdf676011b97bb21b2861d7b21c9
[2/5] KVM: PPC: Book3S HV P9: Fix irq disabling in tick accounting
  https://git.kernel.org/powerpc/c/c953f7500b65f2b157d1eb468ca8b86328834cce
[3/5] KVM: PPC: Book3S HV: Update guest state entry/exit accounting to new API
  https://git.kernel.org/powerpc/c/b31bc24a49037aad7aa00d2b0354e9704d8134dc
[4/5] KVM: PPC: Book3S HV P9: Restore stolen time logging in dtl
  https://git.kernel.org/powerpc/c/1a5486b3c3517aa1f608a10003ade4da122cb175

cheers


[PATCH] powerpc/kasan/book3s_64: warn when running with hash MMU

2022-10-04 Thread Nathan Lynch
kasan is known to crash at boot on book3s_64 with non-radix MMU. As
noted in commit 41b7a347bf14 ("powerpc: Book3S 64-bit outline-only
KASAN support"):

  A kernel with CONFIG_KASAN=y will crash during boot on a machine
  using HPT translation because not all the entry points to the
  generic KASAN code are protected with a call to kasan_arch_is_ready().

Such crashes look like this:

  BUG: Unable to handle kernel data access at 0xc00e0308b100
  Faulting instruction address: 0xc06d0fcc
  Oops: Kernel access of bad area, sig: 11 [#1]
  LE PAGE_SIZE=64K MMU=Hash SMP NR_CPUS=2048 NUMA pSeries
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted 6.0.0-rc5-02183-g3ab165dea2a2 #13
  [...regs...]
  NIP [c06d0fcc] kasan_byte_accessible+0xc/0x20
  LR [c06cd9cc] __kasan_check_byte+0x2c/0xa0
  Call Trace:
  [c0001688f930] [c0001688f970] 0xc0001688f970 (unreliable)
  [c0001688f970] [c05f6a74] ksize+0x34/0xa0
  [c0001688f9a0] [c24c03a8] __alloc_skb+0xd8/0x2d0
  [c0001688fa00] [c03c48c0] audit_log_start+0x260/0x660
  [c0001688fb30] [c03c50ec] audit_log+0x3c/0x70
  [c0001688fb60] [c404590c] audit_init+0x188/0x1ac
  [c0001688fbe0] [c00127e0] do_one_initcall+0xe0/0x610
  [c0001688fcd0] [c400a1f0] kernel_init_freeable+0x4c0/0x574
  [c0001688fda0] [c0013450] kernel_init+0x30/0x1d0
  [c0001688fe10] [c000cd54] ret_from_kernel_thread+0x5c/0x64

If you look carefully enough at the full kernel output, you might
notice this message, much earlier:

  KASAN not enabled as it requires radix!

But the eventual oops does not carry any indication that the real
problem was detected early on and is a known limitation.

Change init_book3s_64.c::kasan_init() to emit a warning backtrace and
taint the kernel when not running on radix. When the kernel likely
oopses later, the 'W' taint flag in the report should help minimize
developer time spent trying to understand what's gone wrong.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/mm/kasan/init_book3s_64.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/arch/powerpc/mm/kasan/init_book3s_64.c 
b/arch/powerpc/mm/kasan/init_book3s_64.c
index 9300d641cf9a..5d9894d7fb97 100644
--- a/arch/powerpc/mm/kasan/init_book3s_64.c
+++ b/arch/powerpc/mm/kasan/init_book3s_64.c
@@ -56,10 +56,8 @@ void __init kasan_init(void)
u64 i;
pte_t zero_pte = pfn_pte(virt_to_pfn(kasan_early_shadow_page), 
PAGE_KERNEL);
 
-   if (!early_radix_enabled()) {
-   pr_warn("KASAN not enabled as it requires radix!");
+   if (WARN(!early_radix_enabled(), "KASAN known broken on HPT"))
return;
-   }
 
for_each_mem_range(i, , )
kasan_init_phys_region((void *)start, (void *)end);
-- 
2.37.1



Re: [PATCH v2 33/44] ftrace: WARN on rcuidle

2022-10-04 Thread Mark Rutland
On Mon, Sep 19, 2022 at 12:00:12PM +0200, Peter Zijlstra wrote:
> CONFIG_GENERIC_ENTRY disallows any and all tracing when RCU isn't
> enabled.
> 
> XXX if s390 (the only other GENERIC_ENTRY user as of this writing)
> isn't comfortable with this, we could switch to
> HAVE_NOINSTR_VALIDATION which is x86_64 only atm.
> 
> Signed-off-by: Peter Zijlstra (Intel) 
> ---
>  include/linux/tracepoint.h |   13 -
>  kernel/trace/trace.c   |3 +++
>  2 files changed, 15 insertions(+), 1 deletion(-)
> 
> --- a/include/linux/tracepoint.h
> +++ b/include/linux/tracepoint.h
> @@ -178,6 +178,16 @@ static inline struct tracepoint *tracepo
>  #endif /* CONFIG_HAVE_STATIC_CALL */
>  
>  /*
> + * CONFIG_GENERIC_ENTRY archs are expected to have sanitized entry and idle
> + * code that disallow any/all tracing/instrumentation when RCU isn't 
> watching.
> + */
> +#ifdef CONFIG_GENERIC_ENTRY
> +#define RCUIDLE_COND(rcuidle)(rcuidle)
> +#else
> +#define RCUIDLE_COND(rcuidle)(rcuidle && in_nmi())
> +#endif

Could we make this depend on ARCH_WANTS_NO_INSTR instead?

That'll allow arm64 to check this even though we're not using the generic entry
code (and there's lots of work necessary to make that possible...).

Thanks,
Mark.

> +
> +/*
>   * it_func[0] is never NULL because there is at least one element in the 
> array
>   * when the array itself is non NULL.
>   */
> @@ -189,7 +199,8 @@ static inline struct tracepoint *tracepo
>   return; \
>   \
>   /* srcu can't be used from NMI */   \
> - WARN_ON_ONCE(rcuidle && in_nmi());  \
> + if (WARN_ON_ONCE(RCUIDLE_COND(rcuidle)))\
> + return; \
>   \
>   /* keep srcu and sched-rcu usage consistent */  \
>   preempt_disable_notrace();  \
> --- a/kernel/trace/trace.c
> +++ b/kernel/trace/trace.c
> @@ -3104,6 +3104,9 @@ void __trace_stack(struct trace_array *t
>   return;
>   }
>  
> + if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
> + return;
> +
>   /*
>* When an NMI triggers, RCU is enabled via ct_nmi_enter(),
>* but if the above rcu_is_watching() failed, then the NMI
> 
> 


Re: [PATCH v2 00/44] cpuidle,rcu: Clean up the mess

2022-10-04 Thread Ulf Hansson
On Mon, 19 Sept 2022 at 12:18, Peter Zijlstra  wrote:
>
> Hi All!
>
> At long last, a respin of the cpuidle vs rcu cleanup patches.
>
> v1: https://lkml.kernel.org/r/20220608142723.103523...@infradead.org
>
> These here patches clean up the mess that is cpuidle vs rcuidle.
>
> At the end of the ride there's only on RCU_NONIDLE user left:
>
>   arch/arm64/kernel/suspend.c:RCU_NONIDLE(__cpu_suspend_exit());
>
> and 'one' trace_*_rcuidle() user:
>
>   kernel/trace/trace_preemptirq.c:
> trace_irq_enable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>   kernel/trace/trace_preemptirq.c:
> trace_irq_disable_rcuidle(CALLER_ADDR0, CALLER_ADDR1);
>   kernel/trace/trace_preemptirq.c:
> trace_irq_enable_rcuidle(CALLER_ADDR0, caller_addr);
>   kernel/trace/trace_preemptirq.c:
> trace_irq_disable_rcuidle(CALLER_ADDR0, caller_addr);
>   kernel/trace/trace_preemptirq.c:
> trace_preempt_enable_rcuidle(a0, a1);
>   kernel/trace/trace_preemptirq.c:
> trace_preempt_disable_rcuidle(a0, a1);
>
> However this last is all in deprecated code that should be unused for 
> GENERIC_ENTRY.
>
> I've touched a lot of code that I can't test and I might've broken something 
> by
> accident. In particular the whole ARM cpuidle stuff was quite involved.
>
> Please all; have a look where you haven't already.
>
>
> New since v1:
>
>  - rebase on top of Frederic's rcu-context-tracking rename fest
>  - more omap goodness as per the last discusion (thanks Tony!)
>  - removed one more RCU_NONIDLE() from arm64/risc-v perf code
>  - ubsan/kasan fixes
>  - intel_idle module-param for testing
>  - a bunch of extra __always_inline, because compilers are silly.
>
> ---
>  arch/alpha/kernel/process.c   |  1 -
>  arch/alpha/kernel/vmlinux.lds.S   |  1 -
>  arch/arc/kernel/process.c |  3 ++
>  arch/arc/kernel/vmlinux.lds.S |  1 -
>  arch/arm/include/asm/vmlinux.lds.h|  1 -
>  arch/arm/kernel/process.c |  1 -
>  arch/arm/kernel/smp.c |  6 +--
>  arch/arm/mach-gemini/board-dt.c   |  3 +-
>  arch/arm/mach-imx/cpuidle-imx6q.c |  4 +-
>  arch/arm/mach-imx/cpuidle-imx6sx.c|  5 ++-
>  arch/arm/mach-omap2/common.h  |  6 ++-
>  arch/arm/mach-omap2/cpuidle34xx.c | 16 +++-
>  arch/arm/mach-omap2/cpuidle44xx.c | 29 +++---
>  arch/arm/mach-omap2/omap-mpuss-lowpower.c | 12 +-
>  arch/arm/mach-omap2/pm.h  |  2 +-
>  arch/arm/mach-omap2/pm24xx.c  | 51 +---
>  arch/arm/mach-omap2/pm34xx.c  | 14 +--
>  arch/arm/mach-omap2/pm44xx.c  |  2 +-
>  arch/arm/mach-omap2/powerdomain.c | 10 ++---
>  arch/arm64/kernel/idle.c  |  1 -
>  arch/arm64/kernel/smp.c   |  4 +-
>  arch/arm64/kernel/vmlinux.lds.S   |  1 -
>  arch/csky/kernel/process.c|  1 -
>  arch/csky/kernel/smp.c|  2 +-
>  arch/csky/kernel/vmlinux.lds.S|  1 -
>  arch/hexagon/kernel/process.c |  1 -
>  arch/hexagon/kernel/vmlinux.lds.S |  1 -
>  arch/ia64/kernel/process.c|  1 +
>  arch/ia64/kernel/vmlinux.lds.S|  1 -
>  arch/loongarch/kernel/idle.c  |  1 +
>  arch/loongarch/kernel/vmlinux.lds.S   |  1 -
>  arch/m68k/kernel/vmlinux-nommu.lds|  1 -
>  arch/m68k/kernel/vmlinux-std.lds  |  1 -
>  arch/m68k/kernel/vmlinux-sun3.lds |  1 -
>  arch/microblaze/kernel/process.c  |  1 -
>  arch/microblaze/kernel/vmlinux.lds.S  |  1 -
>  arch/mips/kernel/idle.c   |  8 ++--
>  arch/mips/kernel/vmlinux.lds.S|  1 -
>  arch/nios2/kernel/process.c   |  1 -
>  arch/nios2/kernel/vmlinux.lds.S   |  1 -
>  arch/openrisc/kernel/process.c|  1 +
>  arch/openrisc/kernel/vmlinux.lds.S|  1 -
>  arch/parisc/kernel/process.c  |  2 -
>  arch/parisc/kernel/vmlinux.lds.S  |  1 -
>  arch/powerpc/kernel/idle.c|  5 +--
>  arch/powerpc/kernel/vmlinux.lds.S |  1 -
>  arch/riscv/kernel/process.c   |  1 -
>  arch/riscv/kernel/vmlinux-xip.lds.S   |  1 -
>  arch/riscv/kernel/vmlinux.lds.S   |  1 -
>  arch/s390/kernel/idle.c   |  1 -
>  arch/s390/kernel/vmlinux.lds.S|  1 -
>  arch/sh/kernel/idle.c |  1 +
>  arch/sh/kernel/vmlinux.lds.S  |  1 -
>  arch/sparc/kernel/leon_pmc.c  |  4 ++
>  arch/sparc/kernel/process_32.c|  1 -
>  arch/sparc/kernel/process_64.c|  3 +-
>  arch/sparc/kernel/vmlinux.lds.S   |  1 -
>  arch/um/kernel/dyn.lds.S  |  1 -
>  arch/um/kernel/process.c  |  1 -
>  arch/um/kernel/uml.lds.S  |  1 -
>  

Re: [PATCH] tools/perf: Fix aggr_printout to display cpu field irrespective of core value

2022-10-04 Thread Ian Rogers
On Tue, Oct 4, 2022, 12:06 AM Athira Rajeev 
wrote:

>
>
> > On 04-Oct-2022, at 12:21 AM, Ian Rogers  wrote:
> >
> > On Mon, Oct 3, 2022 at 7:03 AM atrajeev 
> wrote:
> >>
> >> On 2022-10-02 05:17, Ian Rogers wrote:
> >>> On Thu, Sep 29, 2022 at 5:56 AM James Clark 
> >>> wrote:
> 
> 
> 
>  On 29/09/2022 09:49, Athira Rajeev wrote:
> >
> >
> >> On 28-Sep-2022, at 9:05 PM, James Clark 
> wrote:
> >>
> >>
> >>
> >
> > Hi James,
> >
> > Thanks for looking at the patch and sharing review comments.
> >
> >> On 13/09/2022 12:57, Athira Rajeev wrote:
> >>> perf stat includes option to specify aggr_mode to display
> >>> per-socket, per-core, per-die, per-node counter details.
> >>> Also there is option -A ( AGGR_NONE, -no-aggr ), where the
> >>> counter values are displayed for each cpu along with "CPU"
> >>> value in one field of the output.
> >>>
> >>> Each of the aggregate mode uses the information fetched
> >>> from "/sys/devices/system/cpu/cpuX/topology" like core_id,
> >>
> >> I thought that this wouldn't apply to the cpu field because cpu is
> >> basically interchangeable as an index in cpumap, rather than
> anything
> >> being read from the topology file.
> >
> > The cpu value is filled in this function:
> >
> > Function : aggr_cpu_id__cpu
> > Code: util/cpumap.c
> >
> >>
> >>> physical_package_id. Utility functions in "cpumap.c" fetches
> >>> this information and populates the socket id, core id, cpu etc.
> >>> If the platform does not expose the topology information,
> >>> these values will be set to -1. Example, in case of powerpc,
> >>> details like physical_package_id is restricted to be exposed
> >>> in pSeries platform. So id.socket, id.core, id.cpu all will
> >>> be set as -1.
> >>>
> >>> In case of displaying socket or die value, there is no check
> >>> done in the "aggr_printout" function to see if it points to
> >>> valid socket id or die. But for displaying "cpu" value, there
> >>> is a check for "if (id.core > -1)". In case of powerpc pSeries
> >>> where detail like physical_package_id is restricted to be
> >>> exposed, id.core will be set to -1. Hence the column or field
> >>> itself for CPU won't be displayed in the output.
> >>>
> >>> Result for per-socket:
> >>>
> >>> <<>>
> >>> perf stat -e branches --per-socket -a true
> >>>
> >>> Performance counter stats for 'system wide':
> >>>
> >>> S-1  32416,851  branches
> >>> <<>>
> >>>
> >>> Here S has -1 in above result. But with -A option which also
> >>> expects CPU in one column in the result, below is observed.
> >>>
> >>> <<>>
> >>> /bin/perf stat -e instructions -A -a true
> >>>
> >>> Performance counter stats for 'system wide':
> >>>
> >>>   47,146  instructions
> >>>   45,226  instructions
> >>>   43,354  instructions
> >>>   45,184  instructions
> >>> <<>>
> >>>
> >>> If the cpu id value is pointing to -1 also, it makes sense
> >>> to display the column in the output to replicate the behaviour
> >>> or to be in precedence with other aggr options(like per-socket,
> >>> per-core). Remove the check "id.core" so that CPU field gets
> >>> displayed in the output.
> >>
> >> Why would you want to print -1 out? Seems like the if statement was
> a
> >> good one to me, otherwise the output looks a bit broken to users.
> Are
> >> the other aggregation modes even working if -1 is set for socket and
> >> die? Maybe we need to not print -1 in those cases or exit earlier
> with a
> >> failure.
> >>
> >> The -1 value has a specific internal meaning which is "to not
> >> aggregate". It doesn't mean "not set".
> >
> > Currently, this check is done only for printing cpu value.
> > For socket/die/core values, this check is not done. Pasting an
> > example snippet from a powerpc system ( specifically from pseries
> platform where
> > the value is set to -1 )
> >
> > ./perf stat --per-core -a -C 1 true
> >
> > Performance counter stats for 'system wide':
> >
> > S-1-D-1-C-1  1   1.06 msec cpu-clock
> #1.018 CPUs utilized
> > S-1-D-1-C-1  1  2  context-switches
>#1.879 K/sec
> > S-1-D-1-C-1  1  0  cpu-migrations
>#0.000 /sec
> >
> > Here though the value is -1, we are displaying it. Where as in case
> of cpu, the first column will be
> > empty since we do a check before printing.
> >
> > Example:
> >
> > ./perf stat --per-core -A -C 1 true
> >
> > Performance counter stats for 'CPU(s) 1':
> >
> >  0.88 msec 

Re: [PATCH net-next v6 6/9] net: dpaa: Convert to phylink

2022-10-04 Thread Russell King (Oracle)
On Fri, Sep 30, 2022 at 04:09:30PM -0400, Sean Anderson wrote:
> +static void memac_validate(struct phylink_config *config,
> +unsigned long *supported,
> +struct phylink_link_state *state)
> +{
> + __ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
> + struct fman_mac *memac = fman_config_to_mac(config)->fman_mac;
> +
> + phylink_generic_validate(config, supported, state);
> +
> + if (phy_interface_mode_is_rgmii(state->interface) &&
> + memac->rgmii_no_half_duplex) {
> + phylink_caps_to_linkmodes(mask, MAC_10HD | MAC_100HD);
> + linkmode_andnot(supported, supported, mask);
> + linkmode_andnot(state->advertising, state->advertising, mask);
> + }
> +}

Having been through the rest of this with a fine tooth comb, nothing
else stands out with the exception of the above, which I think could
be done better with this patch:

http://git.armlinux.org.uk/cgit/linux-arm.git/commit/?h=net-queue=e65a47c4053255bd51715d5550e21c869971258c

Since the above would become:

static void memac_validate(struct phylink_config *config,
   unsigned long *supported,
   struct phylink_link_state *state)
{
struct mac_device *mac_dev = fman_config_to_mac(config);
struct fman_mac *memac = mac_dev->fman_mac;
unsigned long caps;

caps = mac_dev->phylink_config.capabilities;

if (phy_interface_mode_is_rgmii(state->interface) &&
memac->rgmii_no_half_duplex)
caps &= ~(MAC_10HD | MAC_100HD);

phylink_validate_mask_caps(supported, state, caps);
}

If you want to pick up my patch that adds this helper into your series,
please do.

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!


Re: [PATCH] tools/perf: Fix aggr_printout to display cpu field irrespective of core value

2022-10-04 Thread Arnaldo Carvalho de Melo
Em Tue, Oct 04, 2022 at 03:14:27PM -0300, Arnaldo Carvalho de Melo escreveu:
> Em Tue, Oct 04, 2022 at 07:49:21AM -0700, Ian Rogers escreveu:
> > On Tue, Oct 4, 2022, 12:06 AM Athira Rajeev 
> > > Thanks for helping with testing. Can I add your Tested-by for the patch ?
>  
> > Yep.
>  
> > Tested-by: Ian Rogers 


Thanks, applied.

- Arnaldo



Re: [PATCH] tools/perf: Fix aggr_printout to display cpu field irrespective of core value

2022-10-04 Thread Arnaldo Carvalho de Melo
Em Tue, Oct 04, 2022 at 07:49:21AM -0700, Ian Rogers escreveu:
> On Tue, Oct 4, 2022, 12:06 AM Athira Rajeev 
> > Thanks for helping with testing. Can I add your Tested-by for the patch ?
 
> Yep.
 
> Tested-by: Ian Rogers 
 
> Thanks,
> Ian


Re: [PATCH net-next v6 0/9] [RFT] net: dpaa: Convert to phylink

2022-10-04 Thread Sean Anderson



On 10/4/22 12:52 PM, Jakub Kicinski wrote:
> On Tue, 4 Oct 2022 11:28:19 -0400 Sean Anderson wrote:
>> I noticed that this series was marked "RFC" in patchwork.
> 
> Because the cover letter has RTF in the subject, presumably.
> 
>> I consider this series ready to apply. I am requesting *testing*, in
>> particular on 10gec/dtsec boards (P-series). Since no one seems to
>> have tried that over the past 4 months that I've been working on this
>> series, perhaps the best way for it to get tested is to apply it...
> 
> You know the situation the best as the author, you should make 
> a clear call on the nature of the posting. It's either RFC/RFT 
> or a ready-to-go-in posting.

Well, I consider the memac stuff to be well tested, but I don't
have 10gec/dtsec hardware. I was hoping that someone with the hardware
might look at this series if I stuck RFT in the subject. I suspect
there are still some bugs in those drivers.

> Maybe in smaller subsystems you can post an RFC/RTF and then it 
> gets applied after some time without a repost but we don't do that.
> The normal processing time for a patch is 1-3 days while we like
> to give people a week to test. So the patches would have to rot in 
> the review queue for extra half a week. At our patch rate this is
> unsustainable.
> 

Well, I have gotten reviews for the device tree stuff, but the core
changes (what I consider to be the actual content of the series) is
missing Reviewed-bys. I don't anticipate making any major changes to
the series unless I get some feedback one way or another. If having
RFT in the subject is preventing that review, I will remove it.

--Sean


Re: [PATCH net-next v6 0/9] [RFT] net: dpaa: Convert to phylink

2022-10-04 Thread Jakub Kicinski
On Tue, 4 Oct 2022 11:28:19 -0400 Sean Anderson wrote:
> I noticed that this series was marked "RFC" in patchwork.

Because the cover letter has RTF in the subject, presumably.

> I consider this series ready to apply. I am requesting *testing*, in
> particular on 10gec/dtsec boards (P-series). Since no one seems to
> have tried that over the past 4 months that I've been working on this
> series, perhaps the best way for it to get tested is to apply it...

You know the situation the best as the author, you should make 
a clear call on the nature of the posting. It's either RFC/RFT 
or a ready-to-go-in posting.

Maybe in smaller subsystems you can post an RFC/RTF and then it 
gets applied after some time without a repost but we don't do that.
The normal processing time for a patch is 1-3 days while we like
to give people a week to test. So the patches would have to rot in 
the review queue for extra half a week. At our patch rate this is
unsustainable.


Re: [PATCH net-next v6 6/9] net: dpaa: Convert to phylink

2022-10-04 Thread Russell King (Oracle)
On Fri, Sep 30, 2022 at 04:09:30PM -0400, Sean Anderson wrote:
> @@ -1064,43 +1061,50 @@ static struct phylink_pcs *memac_pcs_create(struct 
> device_node *mac_node,
>   return pcs;
>  }
>  
> +static bool memac_supports(struct mac_device *mac_dev, phy_interface_t iface)
> +{
> + /* If there's no serdes device, assume that it's been configured for
> +  * whatever the default interface mode is.
> +  */
> + if (!mac_dev->fman_mac->serdes)
> + return mac_dev->phy_if == iface;
> + /* Otherwise, ask the serdes */
> + return !phy_validate(mac_dev->fman_mac->serdes, PHY_MODE_ETHERNET,
> +  iface, NULL);
> +}
> +
>  int memac_initialization(struct mac_device *mac_dev,
>struct device_node *mac_node,
>struct fman_mac_params *params)
>  {
>   int  err;
> + struct device_node  *fixed;
>   struct phylink_pcs  *pcs;
> - struct fixed_phy_status *fixed_link;
>   struct fman_mac *memac;
> + unsigned longcapabilities;
> + unsigned long   *supported;
>  
> + mac_dev->phylink_ops= _mac_ops;
>   mac_dev->set_promisc= memac_set_promiscuous;
>   mac_dev->change_addr= memac_modify_mac_address;
>   mac_dev->add_hash_mac_addr  = memac_add_hash_mac_address;
>   mac_dev->remove_hash_mac_addr   = memac_del_hash_mac_address;
> - mac_dev->set_tx_pause   = memac_set_tx_pause_frames;
> - mac_dev->set_rx_pause   = memac_accept_rx_pause_frames;
>   mac_dev->set_exception  = memac_set_exception;
>   mac_dev->set_allmulti   = memac_set_allmulti;
>   mac_dev->set_tstamp = memac_set_tstamp;
>   mac_dev->set_multi  = fman_set_multi;
> - mac_dev->adjust_link= adjust_link_memac;
>   mac_dev->enable = memac_enable;
>   mac_dev->disable= memac_disable;
>  
> - if (params->max_speed == SPEED_1)
> - mac_dev->phy_if = PHY_INTERFACE_MODE_XGMII;
> -
>   mac_dev->fman_mac = memac_config(mac_dev, params);
> - if (!mac_dev->fman_mac) {
> - err = -EINVAL;
> - goto _return;
> - }
> + if (!mac_dev->fman_mac)
> + return -EINVAL;
>  
>   memac = mac_dev->fman_mac;
>   memac->memac_drv_param->max_frame_length = fman_get_max_frm();
>   memac->memac_drv_param->reset_on_init = true;
>  
> - err = of_property_match_string(mac_node, "pcs-names", "xfi");
> + err = of_property_match_string(mac_node, "pcs-handle-names", "xfi");

While reading through the patch, I stumbled upon this - in the previous
patch, you introduce this code with "pcs-names" and then in this patch
you change the name of the property. I don't think this was mentioned in
the commit message (searching it for "pcs" didn't reveal anything) so
I'm wondering whether this name update should've been merged into the
previous patch instead of this one?

-- 
RMK's Patch system: https://www.armlinux.org.uk/developer/patches/
FTTP is here! 40Mbps down 10Mbps up. Decent connectivity at last!


Re: [PATCH net-next v6 6/9] net: dpaa: Convert to phylink

2022-10-04 Thread Sean Anderson



On 10/4/22 12:13 PM, Russell King (Oracle) wrote:
> On Fri, Sep 30, 2022 at 04:09:30PM -0400, Sean Anderson wrote:
>> @@ -1064,43 +1061,50 @@ static struct phylink_pcs *memac_pcs_create(struct 
>> device_node *mac_node,
>>  return pcs;
>>  }
>>  
>> +static bool memac_supports(struct mac_device *mac_dev, phy_interface_t 
>> iface)
>> +{
>> +/* If there's no serdes device, assume that it's been configured for
>> + * whatever the default interface mode is.
>> + */
>> +if (!mac_dev->fman_mac->serdes)
>> +return mac_dev->phy_if == iface;
>> +/* Otherwise, ask the serdes */
>> +return !phy_validate(mac_dev->fman_mac->serdes, PHY_MODE_ETHERNET,
>> + iface, NULL);
>> +}
>> +
>>  int memac_initialization(struct mac_device *mac_dev,
>>   struct device_node *mac_node,
>>   struct fman_mac_params *params)
>>  {
>>  int  err;
>> +struct device_node  *fixed;
>>  struct phylink_pcs  *pcs;
>> -struct fixed_phy_status *fixed_link;
>>  struct fman_mac *memac;
>> +unsigned longcapabilities;
>> +unsigned long   *supported;
>>  
>> +mac_dev->phylink_ops= _mac_ops;
>>  mac_dev->set_promisc= memac_set_promiscuous;
>>  mac_dev->change_addr= memac_modify_mac_address;
>>  mac_dev->add_hash_mac_addr  = memac_add_hash_mac_address;
>>  mac_dev->remove_hash_mac_addr   = memac_del_hash_mac_address;
>> -mac_dev->set_tx_pause   = memac_set_tx_pause_frames;
>> -mac_dev->set_rx_pause   = memac_accept_rx_pause_frames;
>>  mac_dev->set_exception  = memac_set_exception;
>>  mac_dev->set_allmulti   = memac_set_allmulti;
>>  mac_dev->set_tstamp = memac_set_tstamp;
>>  mac_dev->set_multi  = fman_set_multi;
>> -mac_dev->adjust_link= adjust_link_memac;
>>  mac_dev->enable = memac_enable;
>>  mac_dev->disable= memac_disable;
>>  
>> -if (params->max_speed == SPEED_1)
>> -mac_dev->phy_if = PHY_INTERFACE_MODE_XGMII;
>> -
>>  mac_dev->fman_mac = memac_config(mac_dev, params);
>> -if (!mac_dev->fman_mac) {
>> -err = -EINVAL;
>> -goto _return;
>> -}
>> +if (!mac_dev->fman_mac)
>> +return -EINVAL;
>>  
>>  memac = mac_dev->fman_mac;
>>  memac->memac_drv_param->max_frame_length = fman_get_max_frm();
>>  memac->memac_drv_param->reset_on_init = true;
>>  
>> -err = of_property_match_string(mac_node, "pcs-names", "xfi");
>> +err = of_property_match_string(mac_node, "pcs-handle-names", "xfi");
> 
> While reading through the patch, I stumbled upon this - in the previous
> patch, you introduce this code with "pcs-names" and then in this patch
> you change the name of the property. I don't think this was mentioned in
> the commit message (searching it for "pcs" didn't reveal anything) so
> I'm wondering whether this name update should've been merged into the
> previous patch instead of this one?

Yes, you're right. It looks like I applied this update to the wrong
patch. Will fix for v7.

--Sean


Re: [PATCH net-next v6 0/9] [RFT] net: dpaa: Convert to phylink

2022-10-04 Thread Sean Anderson



On 9/30/22 4:09 PM, Sean Anderson wrote:
> This series converts the DPAA driver to phylink.
> 
> I have tried to maintain backwards compatibility with existing device
> trees whereever possible. However, one area where I was unable to
> achieve this was with QSGMII. Please refer to patch 2 for details.
> 
> All mac drivers have now been converted. I would greatly appreciate if
> anyone has T-series or P-series boards they can test/debug this series
> on. I only have an LS1046ARDB. Everything but QSGMII should work without
> breakage; QSGMII needs patches 7 and 8. For this reason, the last 4
> patches in this series should be applied together (and should not go
> through separate trees).
> 
> Changes in v6:
> - Remove unnecessary $ref from renesas,rzn1-a5psw
> - Remove unnecessary type from pcs-handle-names
> - Add maxItems to pcs-handle
> - Fix 81-character line
> - Fix uninitialized variable in dtsec_mac_config
> 
> Changes in v5:
> - Add Lynx PCS binding
> 
> Changes in v4:
> - Use pcs-handle-names instead of pcs-names, as discussed
> - Don't fail if phy support was not compiled in
> - Split off rate adaptation series
> - Split off DPAA "preparation" series
> - Split off Lynx 10G support
> - t208x: Mark MAC1 and MAC2 as 10G
> - Add XFI PCS for t208x MAC1/MAC2
> 
> Changes in v3:
> - Expand pcs-handle to an array
> - Add vendor prefix 'fsl,' to rgmii and mii properties.
> - Set maxItems for pcs-names
> - Remove phy-* properties from example because dt-schema complains and I
>   can't be bothered to figure out how to make it work.
> - Add pcs-handle as a preferred version of pcsphy-handle
> - Deprecate pcsphy-handle
> - Remove mii/rmii properties
> - Put the PCS mdiodev only after we are done with it (since the PCS
>   does not perform a get itself).
> - Remove _return label from memac_initialization in favor of returning
>   directly
> - Fix grabbing the default PCS not checking for -ENODATA from
>   of_property_match_string
> - Set DTSEC_ECNTRL_R100M in dtsec_link_up instead of dtsec_mac_config
> - Remove rmii/mii properties
> - Replace 1000Base... with 1000BASE... to match IEEE capitalization
> - Add compatibles for QSGMII PCSs
> - Split arm and powerpcs dts updates
> 
> Changes in v2:
> - Better document how we select which PCS to use in the default case
> - Move PCS_LYNX dependency to fman Kconfig
> - Remove unused variable slow_10g_if
> - Restrict valid link modes based on the phy interface. This is easier
>   to set up, and mostly captures what I intended to do the first time.
>   We now have a custom validate which restricts half-duplex for some SoCs
>   for RGMII, but generally just uses the default phylink validate.
> - Configure the SerDes in enable/disable
> - Properly implement all ethtool ops and ioctls. These were mostly
>   stubbed out just enough to compile last time.
> - Convert 10GEC and dTSEC as well
> - Fix capitalization of mEMAC in commit messages
> - Add nodes for QSGMII PCSs
> - Add nodes for QSGMII PCSs
> 
> Sean Anderson (9):
>   dt-bindings: net: Expand pcs-handle to an array
>   dt-bindings: net: Add Lynx PCS binding
>   dt-bindings: net: fman: Add additional interface properties
>   net: fman: memac: Add serdes support
>   net: fman: memac: Use lynx pcs driver
>   net: dpaa: Convert to phylink
>   powerpc: dts: t208x: Mark MAC1 and MAC2 as 10G
>   powerpc: dts: qoriq: Add nodes for QSGMII PCSs
>   arm64: dts: layerscape: Add nodes for QSGMII PCSs
> 
>  .../bindings/net/dsa/renesas,rzn1-a5psw.yaml  |   2 +-
>  .../bindings/net/ethernet-controller.yaml |  11 +-
>  .../bindings/net/fsl,fman-dtsec.yaml  |  53 +-
>  .../bindings/net/fsl,qoriq-mc-dpmac.yaml  |   2 +-
>  .../devicetree/bindings/net/fsl-fman.txt  |   5 +-
>  .../bindings/net/pcs/fsl,lynx-pcs.yaml|  40 +
>  .../boot/dts/freescale/fsl-ls1043-post.dtsi   |  24 +
>  .../boot/dts/freescale/fsl-ls1046-post.dtsi   |  25 +
>  .../fsl/qoriq-fman3-0-10g-0-best-effort.dtsi  |   3 +-
>  .../boot/dts/fsl/qoriq-fman3-0-10g-0.dtsi |  10 +-
>  .../fsl/qoriq-fman3-0-10g-1-best-effort.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-0-10g-1.dtsi |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-0-10g-2.dtsi |  45 ++
>  .../boot/dts/fsl/qoriq-fman3-0-10g-3.dtsi |  45 ++
>  .../boot/dts/fsl/qoriq-fman3-0-1g-0.dtsi  |   3 +-
>  .../boot/dts/fsl/qoriq-fman3-0-1g-1.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-0-1g-2.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-0-1g-3.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-0-1g-4.dtsi  |   3 +-
>  .../boot/dts/fsl/qoriq-fman3-0-1g-5.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-1-10g-0.dtsi |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-1-10g-1.dtsi |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-1-1g-0.dtsi  |   3 +-
>  .../boot/dts/fsl/qoriq-fman3-1-1g-1.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-1-1g-2.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-1-1g-3.dtsi  |  10 +-
>  .../boot/dts/fsl/qoriq-fman3-1-1g-4.dtsi  

Re: [PATCH net-next v6 5/9] net: fman: memac: Use lynx pcs driver

2022-10-04 Thread Sean Anderson



On 9/30/22 4:09 PM, Sean Anderson wrote:
> Although not stated in the datasheet, as far as I can tell PCS for mEMACs
> is a "Lynx." By reusing the existing driver, we can remove the PCS
> management code from the memac driver. This requires calling some PCS
> functions manually which phylink would usually do for us, but we will let
> it do that soon.
> 
> One problem is that we don't actually have a PCS for QSGMII. We pretend
> that each mEMAC's MDIO bus has four QSGMII PCSs, but this is not the case.
> Only the "base" mEMAC's MDIO bus has the four QSGMII PCSs. This is not an
> issue yet, because we never get the PCS state. However, it will be once the
> conversion to phylink is complete, since the links will appear to never
> come up. To get around this, we allow specifying multiple PCSs in pcsphy.
> This breaks backwards compatibility with old device trees, but only for
> QSGMII. IMO this is the only reasonable way to figure out what the actual
> QSGMII PCS is.
> 
> Additionally, we now also support a separate XFI PCS. This can allow the
> SerDes driver to set different addresses for the SGMII and XFI PCSs so they
> can be accessed at the same time.
> 
> Signed-off-by: Sean Anderson 
> ---
> 
> Changes in v6:
> - Fix 81-character line
> 
> Changes in v3:
> - Put the PCS mdiodev only after we are done with it (since the PCS
>   does not perform a get itself).
> 
> Changes in v2:
> - Move PCS_LYNX dependency to fman Kconfig
> 
>  drivers/net/ethernet/freescale/fman/Kconfig   |   3 +
>  .../net/ethernet/freescale/fman/fman_memac.c  | 258 +++---
>  2 files changed, 105 insertions(+), 156 deletions(-)
> 
> diff --git a/drivers/net/ethernet/freescale/fman/Kconfig 
> b/drivers/net/ethernet/freescale/fman/Kconfig
> index 48bf8088795d..8f5637db41dd 100644
> --- a/drivers/net/ethernet/freescale/fman/Kconfig
> +++ b/drivers/net/ethernet/freescale/fman/Kconfig
> @@ -4,6 +4,9 @@ config FSL_FMAN
>   depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST
>   select GENERIC_ALLOCATOR
>   select PHYLIB
> + select PHYLINK
> + select PCS
> + select PCS_LYNX
>   select CRC32
>   default n
>   help
> diff --git a/drivers/net/ethernet/freescale/fman/fman_memac.c 
> b/drivers/net/ethernet/freescale/fman/fman_memac.c
> index 56a29f505590..eeb71352603b 100644
> --- a/drivers/net/ethernet/freescale/fman/fman_memac.c
> +++ b/drivers/net/ethernet/freescale/fman/fman_memac.c
> @@ -11,43 +11,12 @@
>  
>  #include 
>  #include 
> +#include 
>  #include 
>  #include 
>  #include 
>  #include 
>  
> -/* PCS registers */
> -#define MDIO_SGMII_CR0x00
> -#define MDIO_SGMII_DEV_ABIL_SGMII0x04
> -#define MDIO_SGMII_LINK_TMR_L0x12
> -#define MDIO_SGMII_LINK_TMR_H0x13
> -#define MDIO_SGMII_IF_MODE   0x14
> -
> -/* SGMII Control defines */
> -#define SGMII_CR_AN_EN   0x1000
> -#define SGMII_CR_RESTART_AN  0x0200
> -#define SGMII_CR_FD  0x0100
> -#define SGMII_CR_SPEED_SEL1_1G   0x0040
> -#define SGMII_CR_DEF_VAL (SGMII_CR_AN_EN | SGMII_CR_FD | \
> -  SGMII_CR_SPEED_SEL1_1G)
> -
> -/* SGMII Device Ability for SGMII defines */
> -#define MDIO_SGMII_DEV_ABIL_SGMII_MODE   0x4001
> -#define MDIO_SGMII_DEV_ABIL_BASEX_MODE   0x01A0
> -
> -/* Link timer define */
> -#define LINK_TMR_L   0xa120
> -#define LINK_TMR_H   0x0007
> -#define LINK_TMR_L_BASEX 0xaf08
> -#define LINK_TMR_H_BASEX 0x002f
> -
> -/* SGMII IF Mode defines */
> -#define IF_MODE_USE_SGMII_AN 0x0002
> -#define IF_MODE_SGMII_EN 0x0001
> -#define IF_MODE_SGMII_SPEED_100M 0x0004
> -#define IF_MODE_SGMII_SPEED_1G   0x0008
> -#define IF_MODE_SGMII_DUPLEX_HALF0x0010
> -
>  /* Num of additional exact match MAC adr regs */
>  #define MEMAC_NUM_OF_PADDRS 7
>  
> @@ -326,7 +295,9 @@ struct fman_mac {
>   struct fman_rev_info fm_rev_info;
>   bool basex_if;
>   struct phy *serdes;
> - struct phy_device *pcsphy;
> + struct phylink_pcs *sgmii_pcs;
> + struct phylink_pcs *qsgmii_pcs;
> + struct phylink_pcs *xfi_pcs;
>   bool allmulti_enabled;
>  };
>  
> @@ -487,91 +458,22 @@ static u32 get_mac_addr_hash_code(u64 eth_addr)
>   return xor_val;
>  }
>  
> -static void setup_sgmii_internal_phy(struct fman_mac *memac,
> -  struct fixed_phy_status *fixed_link)
> +static void setup_sgmii_internal(struct fman_mac *memac,
> +  struct phylink_pcs *pcs,
> +  struct fixed_phy_status *fixed_link)
>  {
> - u16 tmp_reg16;
> -
> - if (WARN_ON(!memac->pcsphy))
> - return;
> -
> - /* SGMII mode */
> - tmp_reg16 = IF_MODE_SGMII_EN;
> - if (!fixed_link)
> - /* AN enable */
> - tmp_reg16 |= IF_MODE_USE_SGMII_AN;
> - 

Re: [PATCH] powerpc: udbg: Remove extern function prototypes

2022-10-04 Thread Michael Ellerman
On Tue, 23 Aug 2022 01:17:51 +0200, Pali Rohár wrote:
> 'extern' keywork is pointless and deprecated for function prototypes.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: udbg: Remove extern function prototypes
  https://git.kernel.org/powerpc/c/6bd7ff497b4af13ea3d53781ffca7dc744dbb4da

cheers


Re: [PATCH v6 00/25] powerpc: Syscall wrapper and register clearing

2022-10-04 Thread Michael Ellerman
On Wed, 21 Sep 2022 16:55:40 +1000, Rohan McLure wrote:
> V5 available here:
> 
> Link: 
> https://lore.kernel.org/all/20220916053300.786330-2-rmcl...@linux.ibm.com/T/
> 
> Implement a syscall wrapper, causing arguments to handlers to be passed
> via a struct pt_regs on the stack. The syscall wrapper is implemented
> for all platforms other than the Cell processor, from which SPUs expect
> the ability to directly call syscall handler symbols with the regular
> in-register calling convention.
> 
> [...]

Patches 1-18 & 20-21 applied to powerpc/next.

[01/25] powerpc: Remove asmlinkage from syscall handler definitions

https://git.kernel.org/powerpc/c/5ba6c9a912fe4c60f84d6617ad10d2b8d7910990
[02/25] powerpc: Save caller r3 prior to system_call_exception

https://git.kernel.org/powerpc/c/2c27d4a419f627636b8c6038e55acb26df05c391
[03/25] powerpc: Add ZEROIZE_GPRS macros for register clears

https://git.kernel.org/powerpc/c/9d54a5ce3aa87810f13cd33b314097ac6d28c350
[04/25] powerpc/64s: Use {ZEROIZE,SAVE,REST}_GPRS macros in sc, scv 0 handlers

https://git.kernel.org/powerpc/c/2b1dac4b5f97ea88fb01dfcab7fc24500b5dea95
[05/25] powerpc/32: Clarify interrupt restores with REST_GPR macro in entry_32.S

https://git.kernel.org/powerpc/c/15ba74502ccfd0b34dad0ea022093ccc66b334d6
[06/25] powerpc/64e: Clarify register saves and clears with {SAVE,ZEROIZE}_GPRS

https://git.kernel.org/powerpc/c/53ecaa6778d613807e590c320ccfcf48a4114108
[07/25] powerpc/64s: Fix comment on interrupt handler prologue

https://git.kernel.org/powerpc/c/620f5c59c8617d623428c03414a022fca4e9eea2
[08/25] powerpc: Fix fallocate and fadvise64_64 compat parameter combination

https://git.kernel.org/powerpc/c/016ff72bd2090903715c0f9422a44afbb966f4ee
[09/25] asm-generic: compat: Support BE for long long args in 32-bit ABIs

https://git.kernel.org/powerpc/c/43d5de2b67d7f4a8478820005152f7f689608f2f
[10/25] powerpc: Use generic fallocate compatibility syscall

https://git.kernel.org/powerpc/c/c2e7a19827eec443a7cbe85e8d959052412d6dc3
[11/25] powerpc/32: Remove powerpc select specialisation

https://git.kernel.org/powerpc/c/b6b1334c9510e162bd8ca0ae58403cafad9572f1
[12/25] powerpc: Remove direct call to personality syscall handler

https://git.kernel.org/powerpc/c/4df0221f9ded8c39aecfb1a80cef346026671cb7
[13/25] powerpc: Remove direct call to mmap2 syscall handlers

https://git.kernel.org/powerpc/c/b7fa9ce86d32baf2a3a8bf8fdaa44870084edd85
[14/25] powerpc: Provide do_ppc64_personality helper

https://git.kernel.org/powerpc/c/ac17defbeb4e8285c5b9752164b1d68b13bf3e3b
[15/25] powerpc: Adopt SYSCALL_DEFINE for arch-specific syscall handlers

https://git.kernel.org/powerpc/c/dec20c50df79cadaff17e964ef7f622491a52134
[16/25] powerpc: Include all arch-specific syscall prototypes

https://git.kernel.org/powerpc/c/8cd1def4b8e4a592949509fac443e850da8428d0
[17/25] powerpc: Enable compile-time check for syscall handlers

https://git.kernel.org/powerpc/c/39859aea411b1696c6bc0c04bd2b5095ddba6196
[18/25] powerpc: Use common syscall handler type

https://git.kernel.org/powerpc/c/8640de0dee49cec50040d9845a2bc96fd15adc9e
[20/25] powerpc: Change system_call_exception calling convention

https://git.kernel.org/powerpc/c/f8971c627b14040e533768985a99f4fd6ffa420f
[21/25] powerpc: Provide syscall wrapper

https://git.kernel.org/powerpc/c/7e92e01b724526b98cbc7f03dd4afa0295780d56

cheers


Re: [PATCH v2] powerpc: Add support for early debugging via Serial 16550 console

2022-10-04 Thread Michael Ellerman
On Tue, 23 Aug 2022 01:15:01 +0200, Pali Rohár wrote:
> Currently powerpc early debugging contains lot of platform specific
> options, but does not support standard UART / serial 16550 console.
> 
> Later legacy_serial.c code supports registering UART as early debug console
> from device tree but it is not early during booting, but rather later after
> machine description code finishes.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Add support for early debugging via Serial 16550 console
  https://git.kernel.org/powerpc/c/b19448fe846baad689ff51a991ebfc74b4b5e0a8

cheers


Re: [PATCH] powerpc: dts: turris1x.dts: Fix NOR partitions labels

2022-10-04 Thread Michael Ellerman
On Wed, 31 Aug 2022 00:55:00 +0200, Pali Rohár wrote:
> Partition partition@2 contains generic kernel image and it does not
> have to be used only for rescue purposes. Partition partition@1c
> contains bootable rescue system and partition partition@34 contains
> factory image/data for restoring to NAND. So change partition labels to
> better fit their purpose by removing possible misleading substring "rootfs"
> from these labels.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: dts: turris1x.dts: Fix NOR partitions labels
  https://git.kernel.org/powerpc/c/c9986f0aefd1ae22fe9cf794d49699643f1e268b

cheers


Re: [PATCH v3 0/7] powerpc/64: interrupt soft-mask and context fixes

2022-10-04 Thread Michael Ellerman
On Mon, 26 Sep 2022 15:42:58 +1000, Nicholas Piggin wrote:
> No real changes since last posting, I just pulled fixes from several
> series together and rearranged them and updated changelogs slightly.
> 
> Thanks,
> Nick
> 
> Nicholas Piggin (7):
>   powerpc/64/interrupt: Fix false warning in context tracking due to
> idle state
>   powerpc/64: mark irqs hard disabled in boot paca
>   powerpc/64/interrupt: Fix return to masked context after hard-mask irq
> becomes pending
>   powerpc/64s: Fix irq state management in runlatch functions
>   powerpc/64s/interrupt: masked handler debug check for previous hard
> disable
>   powerpc/64/interrupt: avoid BUG/WARN recursion in interrupt entry
>   powerpc/64/irq: tidy soft-masked irq replay and improve documentation
> 
> [...]

Applied to powerpc/next.

[1/7] powerpc/64/interrupt: Fix false warning in context tracking due to idle 
state
  https://git.kernel.org/powerpc/c/56adbb7a8b6cc7fc9b940829c38494e53c9e57d1
[2/7] powerpc/64: mark irqs hard disabled in boot paca
  https://git.kernel.org/powerpc/c/799f7063c7645f9a751d17f5dfd73b952f962cd2
[3/7] powerpc/64/interrupt: Fix return to masked context after hard-mask irq 
becomes pending
  https://git.kernel.org/powerpc/c/e485f6c751e0a969327336c635ca602feea117f0
[4/7] powerpc/64s: Fix irq state management in runlatch functions
  https://git.kernel.org/powerpc/c/9524f2278f2e6925f147d9140c83f658e7a7c84f
[5/7] powerpc/64s/interrupt: masked handler debug check for previous hard 
disable
  https://git.kernel.org/powerpc/c/c39fb71a54f09977eba7584ef0eebb25047097c6
[6/7] powerpc/64/interrupt: avoid BUG/WARN recursion in interrupt entry
  https://git.kernel.org/powerpc/c/f7bff6e7759b1abb59334f6448f9ef3172c4c04a
[7/7] powerpc/64/irq: tidy soft-masked irq replay and improve documentation
  https://git.kernel.org/powerpc/c/1da5351f9eb9b72a7d25316b4d38bf10b6e671b1

cheers


Re: [PATCH] powerpc: dts: turris1x.dts: Fix labels in DSA cpu port nodes

2022-10-04 Thread Michael Ellerman
On Sat, 27 Aug 2022 15:15:38 +0200, Pali Rohár wrote:
> DSA cpu port node has to be marked with "cpu" label.
> So fix it for both cpu port nodes.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: dts: turris1x.dts: Fix labels in DSA cpu port nodes
  https://git.kernel.org/powerpc/c/8bf056f57f1d16c561e43f9af37301f23990cd21

cheers


Re: [PATCH] powerpc/boot: Explicitly disable usage of SPE instructions

2022-10-04 Thread Michael Ellerman
On Sat, 27 Aug 2022 15:44:54 +0200, Pali Rohár wrote:
> uImage boot wrapper should not use SPE instructions, like kernel itself.
> Boot wrapper has already disabled Altivec and VSX instructions but not SPE.
> Options -mno-spe and -mspe=no already set when compilation of kernel, but
> not when compiling uImage wrapper yet. Fix it.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/boot: Explicitly disable usage of SPE instructions
  https://git.kernel.org/powerpc/c/110a58b9f91c66f743c01a2c217243d94c899c23

cheers


Re: [PATCH v2 0/5] powerpc/64s: improve boot debugging

2022-10-04 Thread Michael Ellerman
On Mon, 26 Sep 2022 15:56:15 +1000, Nicholas Piggin wrote:
> This series provides a machine check handler to catch out of
> bounds memory accesses in early boot before the MMU is enabled.
> 
> Since v1:
> - 64e compile fix
> 
> Nicholas Piggin (5):
>   powerpc/64s/interrupt: move early boot ILE fixup into a macro
>   powerpc/64s: early boot machine check handler
>   powerpc/64: avoid using r13 in relocate
>   powerpc/64: don't set boot CPU's r13 to paca until the structure is
> set up
>   powerpc/64s/interrupt: halt early boot interrupts if paca is not set
> up
> 
> [...]

Applied to powerpc/next.

[1/5] powerpc/64s/interrupt: move early boot ILE fixup into a macro
  https://git.kernel.org/powerpc/c/bf75a3258a40327b73c5b4458ae8102cfa921b40
[2/5] powerpc/64s: early boot machine check handler
  https://git.kernel.org/powerpc/c/2f5182cffa43f31c241131a2c10a4ecd8e90fb3e
[3/5] powerpc/64: avoid using r13 in relocate
  https://git.kernel.org/powerpc/c/b830c8754e046f96e84da9d3b3e028c4ceef2b18
[4/5] powerpc/64: don't set boot CPU's r13 to paca until the structure is set up
  https://git.kernel.org/powerpc/c/519b2e317e39ac99ce589a7c8480c47a17d62638
[5/5] powerpc/64s/interrupt: halt early boot interrupts if paca is not set up
  https://git.kernel.org/powerpc/c/e1100cee059ad0bea6a668177e835baa087a0c65

cheers


Re: [PATCH 1/2] powerpc: Fix SPE Power ISA properties for e500v1 platforms

2022-10-04 Thread Michael Ellerman
On Fri, 2 Sep 2022 23:21:02 +0200, Pali Rohár wrote:
> Commit 2eb28006431c ("powerpc/e500v2: Add Power ISA properties to comply
> with ePAPR 1.1") introduced new include file e500v2_power_isa.dtsi and
> should have used it for all e500v2 platforms. But apparently it was used
> also for e500v1 platforms mpc8540, mpc8541, mpc8555 and mpc8560.
> 
> e500v1 cores compared to e500v2 do not support double precision floating
> point SPE instructions. Hence power-isa-sp.fd should not be set on e500v1
> platforms, which is in e500v2_power_isa.dtsi include file.
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc: Fix SPE Power ISA properties for e500v1 platforms
  https://git.kernel.org/powerpc/c/37b9345ce7f4ab17538ea62def6f6d430f091355
[2/2] powerpc: Include e500v1_power_isa.dtsi for remaining e500v1 platforms
  https://git.kernel.org/powerpc/c/c102432005e8811b80b25641e12c4577970b5558

cheers


Re: [PATCH v4 1/2] powerpc: add ISA v3.0 / v3.1 wait opcode macro

2022-10-04 Thread Michael Ellerman
On Tue, 20 Sep 2022 22:22:58 +1000, Nicholas Piggin wrote:
> The wait instruction encoding changed between ISA v2.07 and ISA v3.0.
> In v3.1 the instruction gained a new field.
> 
> Update the PPC_WAIT macro to the current encoding. Rename the older
> incompatible one with a _v203 suffix as it was introduced in v2.03
> (the WC field was introduced in v2.07 but the kernel only uses WC=0).
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc: add ISA v3.0 / v3.1 wait opcode macro
  https://git.kernel.org/powerpc/c/dabeb572adf24bbd7cb21d1cc4d118bdf2c2ab74
[2/2] powerpc/64s: Make POWER10 and later use pause_short in cpu_relax loops
  https://git.kernel.org/powerpc/c/9c7bfc2dc21e737e8e4a753630bce675e1e7c0ad

cheers


Re: [PATCH v2 0/7] powerpc: build / linker improvements

2022-10-04 Thread Michael Ellerman
On Fri, 16 Sep 2022 14:07:48 +1000, Nicholas Piggin wrote:
> This series is mainly about moving more things out of writable and
> executable memory, and slightly moving the linker script in the
> direction of the binutils ld internal linker script as we do.
> 
> Thanks,
> Nick
> 
> [...]

Applied to powerpc/next.

[1/7] powerpc: move __end_rodata to cover arch read-only sections
  https://git.kernel.org/powerpc/c/7082f8e7d2276575a8806370007cbb4a7b9abdce
[2/7] powerpc/32/build: move got1/got2 sections out of text
  https://git.kernel.org/powerpc/c/1faa1235c1a00614bc4849a8dbd0790363c9a22f
[3/7] powerpc/build: move got, toc, plt, branch_lt sections to read-only
  https://git.kernel.org/powerpc/c/f21ba4499a15b76ad6013ca0a60873dbcf164c7b
[4/7] powerpc/build: move .data.rel.ro, .sdata2 to read-only
  https://git.kernel.org/powerpc/c/b6adc6d6d327229d75607a948cde2349d317f366
[5/7] powerpc/64/build: only include .opd with ELFv1
  https://git.kernel.org/powerpc/c/c787fed11890babda1e4882cd3b6efaf412e1bde
[6/7] powerpc/64/build: merge .got and .toc input sections
  https://git.kernel.org/powerpc/c/1e9eca485a840985a663080eb049c420272d4bdd
[7/7] powerpc/build: put sys_call_table in .data.rel.ro if RELOCATABLE
  https://git.kernel.org/powerpc/c/fdfdcfd504933ed06eb6b4c9df21eede0e213c3e

cheers


Re: [PATCH v2 0/5] powerpc/64: avoid GOT addressing, don't put data in TOC

2022-10-04 Thread Michael Ellerman
On Mon, 26 Sep 2022 13:40:52 +1000, Nicholas Piggin wrote:
> This is a cleaned up set of the initial prep patches from the pcrel
> series, dealing with regularising addressing variables from asm
> and using helper macros more consistently.
> 
> Changes since v1:
> - Use "REGS" stack frame marker for 64-bit, leaving 32-bit unchanged.
> - Fix PPC64 32-bit boot wrapper compile.
> 
> [...]

Applied to powerpc/next.

[1/5] powerpc/64: use 32-bit immediate for STACK_FRAME_REGS_MARKER
  https://git.kernel.org/powerpc/c/17773afdcd1589c5925a984f512330410cb2ba4f
[2/5] powerpc/64: asm use consistent global variable declaration and access
  https://git.kernel.org/powerpc/c/dab3b8f4fd09c22e8dbb2d9608194c7d52252f33
[3/5] powerpc/64: switch asm helpers from GOT to TOC relative addressing
  https://git.kernel.org/powerpc/c/754f611774e4b9357a944f5b703dd291c85161cf
[4/5] powerpc/64: provide a helper macro to load r2 with the kernel TOC
  https://git.kernel.org/powerpc/c/8e93fb33c84f68db20c0bc2821334a4c54c3e251
[5/5] powerpc/64e: provide an addressing macro for use with TOC in alternate 
register
  https://git.kernel.org/powerpc/c/3569d84bb26f6f07d426446da3d2c836180f1565

cheers


Re: [PATCH] powerpc/time: avoid programming DEC at the start of the timer interrupt

2022-10-04 Thread Michael Ellerman
On Sat, 10 Sep 2022 00:24:57 +1000, Nicholas Piggin wrote:
> Setting DEC to maximum at the start of the timer interrupt is not
> necessary and can be avoided for performance when MSR[EE] is not
> enabled during the handler as explained in commit 0faf20a1ad16
> ("powerpc/64s/interrupt: Don't enable MSR[EE] in irq handlers unless
> perf is in use"), where this change was first attempted.
> 
> The idea is that the timer interrupt runs with MSR[EE]=0, and at the end
> of the interrupt DEC is programmed to the next timer interval, so there
> is no need to clear the decrementer exception before then.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/time: avoid programming DEC at the start of the timer interrupt
  https://git.kernel.org/powerpc/c/c84550203b3173511e8cdbe94bc2e33175ba1d72

cheers


Re: [PATCH] powerpc: remove orphan systbl_chk.sh

2022-10-04 Thread Michael Ellerman
On Thu, 29 Sep 2022 13:21:20 +1000, Nicholas Piggin wrote:
> arch/powerpc/kernel/systbl_chk.sh has not been referenced since commit
> ab66dcc76d6a ("powerpc: generate uapi header and system call table
> files"). Remove it.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: remove orphan systbl_chk.sh
  https://git.kernel.org/powerpc/c/a08661af4c52068972c552deb940b3b13635eb3e

cheers


Re: [PATCH v3 1/4] powerpc/64s: Add DEBUG_PAGEALLOC for radix

2022-10-04 Thread Michael Ellerman
On Mon, 26 Sep 2022 07:57:23 +, Nicholas Miehlbradt wrote:
> There is support for DEBUG_PAGEALLOC on hash but not on radix.
> Add support on radix.
> 
> 

Applied to powerpc/next.

[1/4] powerpc/64s: Add DEBUG_PAGEALLOC for radix
  https://git.kernel.org/powerpc/c/5e8b2c4dd3a0a4a2966e61d60dbeafab441cff28
[2/4] powerpc/64s: Remove unneeded #ifdef CONFIG_DEBUG_PAGEALLOC in hash_utils
  https://git.kernel.org/powerpc/c/3e791d0f32b10eff9437822c6099c7a158560151
[3/4] powerpc/64s: Allow double call of kernel_[un]map_linear_page()
  https://git.kernel.org/powerpc/c/d7902d31cbc3bf72722768831a684b0286ccd523
[4/4] powerpc/64s: Enable KFENCE on book3s64
  https://git.kernel.org/powerpc/c/a5edf9815dd739fce660b4c8658f61b7d2517042

cheers


Re: [PATCH] powerpc/pseries: move hcall_tracepoint_refcount out of .toc

2022-10-04 Thread Michael Ellerman
On Mon, 26 Sep 2022 15:38:23 +1000, Nicholas Piggin wrote:
> The .toc section is not really intended for arbitrary data. Writable
> data in particular prevents making the TOC read-only after relocation.
> Move hcall_tracepoint_refcount into the .data section.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/pseries: move hcall_tracepoint_refcount out of .toc
  https://git.kernel.org/powerpc/c/9a10ccb29c0a2befa5a9f691ed0ae37ee3e799a8

cheers


Re: [PATCH] powerpc/64s: POWER10 CPU Kconfig build option

2022-10-04 Thread Michael Ellerman
On Fri, 23 Sep 2022 13:30:04 +1000, Nicholas Piggin wrote:
> This adds basic POWER10_CPU option, which builds with -mcpu=power10.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/64s: POWER10 CPU Kconfig build option
  https://git.kernel.org/powerpc/c/4b2a9315f20d98576e25c9e4572e9a8e028d7aa2

cheers


Re: (subset) [PATCH 1/5] KVM: PPC: Book3S HV P9: Clear vcpu cpu fields before enabling host irqs

2022-10-04 Thread Michael Ellerman
On Thu, 8 Sep 2022 23:25:41 +1000, Nicholas Piggin wrote:
> On guest entry, vcpu->cpu and vcpu->arch.thread_cpu are set after
> disabling host irqs. On guest exit there is a window whre tick time
> accounting briefly enables irqs before these fields are cleared.
> 
> Move them up to ensure they are cleared before host irqs are run.
> This is possibly not a problem, but is more symmetric and makes the
> fields less surprising.
> 
> [...]

Patch 5 applied to powerpc/next.

[5/5] KVM: PPC: Book3S HV: Implement scheduling wait interval counters in the 
VPA
  https://git.kernel.org/powerpc/c/e4335f53198fa0c0aefb2a38bb5518e94253412c

cheers


Re: [PATCH 1/2] powerpc/64s: Fix GENERIC_CPU build flags for PPC970 / G5

2022-10-04 Thread Michael Ellerman
On Wed, 21 Sep 2022 11:41:02 +1000, Nicholas Piggin wrote:
> Big-endian GENERIC_CPU supports 970, but builds with -mcpu=power5.
> POWER5 is ISA v2.02 whereas 970 is v2.01 plus Altivec. 2.02 added
> the popcntb instruction which a compiler might use.
> 
> Use -mcpu=power4.
> 
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc/64s: Fix GENERIC_CPU build flags for PPC970 / G5
  https://git.kernel.org/powerpc/c/58ec7f06b74e0d6e76c4110afce367c8b5f0837d
[2/2] powerpc/64s: update cpu selection options
  https://git.kernel.org/powerpc/c/7fd123e544886bf04fa853869efe55cb3f22d0c0

cheers


Re: [PATCH v3] powerpc/smp: poll cpu_callin_map more aggressively in __cpu_up()

2022-10-04 Thread Michael Ellerman
On Mon, 26 Sep 2022 17:02:50 -0500, Nathan Lynch wrote:
> At boot time, it is not necessary to delay between polls of
> cpu_callin_map when waiting for a kicked CPU to come up. Remove the
> delay intervals, but preserve the overall deadline (five seconds).
> 
> At run time, the first poll result is usually negative and we incur a
> sleeping wait. If we spin on the callin word for a short time first,
> we can reduce __cpu_up() from dozens of milliseconds to under 1ms in
> the common case on a P9 LPAR:
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/smp: poll cpu_callin_map more aggressively in __cpu_up()
  https://git.kernel.org/powerpc/c/b37ac1894ac3c014863986d6b8ed880195213e78

cheers


Re: [PATCH v2 0/2] powerpc/pseries: restrict error injection and DT changes when locked down

2022-10-04 Thread Michael Ellerman
On Mon, 26 Sep 2022 08:16:41 -0500, Nathan Lynch wrote:
> Add two new lockdown reasons for use in powerpc's pseries platform
> code.
> 
> The pseries platform allows hardware-level error injection via certain
> calls to the RTAS (Run Time Abstraction Services) firmware. ACPI-based
> error injection is already restricted in lockdown; this facility
> should be restricted for the same reasons.
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc/pseries: block untrusted device tree changes when locked down
  https://git.kernel.org/powerpc/c/99df7a2810b6d24651d4887ab61a142e042fb235
[2/2] powerpc/rtas: block error injection when locked down
  https://git.kernel.org/powerpc/c/b8f3e48834fe8c86b4f21739c6effd160e2c2c19

cheers


Re: [PATCH] powerpc: Always select HAVE_EFFICIENT_UNALIGNED_ACCESS

2022-10-04 Thread Michael Ellerman
On Fri, 16 Sep 2022 23:15:23 +1000, Michael Ellerman wrote:
> Currently powerpc selects HAVE_EFFICIENT_UNALIGNED_ACCESS in all cases
> but one. The exception is if the kernel is being built little endian and
> explicitly targetted for Power7.
> 
> The combination of Power7 and little endian was never commercially
> supported, or widely used. It was only ever possible on bare metal
> machines, using unofficial firmware, or in qemu guests hosted on those
> machines.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Always select HAVE_EFFICIENT_UNALIGNED_ACCESS
  https://git.kernel.org/powerpc/c/ecf8f36446f53866727d9670df1746f8d20130a8

cheers


Re: [PATCH v3 1/6] powerpc: Add hardware description string

2022-10-04 Thread Michael Ellerman
On Fri, 30 Sep 2022 18:27:04 +1000, Michael Ellerman wrote:
> Create a hardware description string, which we will use to record
> various details of the hardware platform we are running on.
> 
> Print the accumulated description at boot, and use it to set the generic
> description which is printed in oopses.
> 
> To begin with add ppc_md.name, aka the "machine description".
> 
> [...]

Applied to powerpc/next.

[1/6] powerpc: Add hardware description string
  https://git.kernel.org/powerpc/c/41dc056391b334fae646b55ee020bfa8f67b60c8
[2/6] powerpc: Add PVR & CPU name to hardware description
  https://git.kernel.org/powerpc/c/bd649d40e0f2ffa1e16b4dbb93dc627177410e78
[3/6] powerpc/64: Add logical PVR to the hardware description
  https://git.kernel.org/powerpc/c/48b7019b6abd029d3800620bb53f0ae3ca052441
[4/6] powerpc: Add device-tree model to the hardware description
  https://git.kernel.org/powerpc/c/541229707970ff2ad3f7705b1dbd025d7cc9bc48
[5/6] powerpc/powernv: Add opal details to the hardware description
  https://git.kernel.org/powerpc/c/37576cb0961fe9d3318c17e4e4bc5ecebf38e9bb
[6/6] powerpc/pseries: Add firmware details to the hardware description
  https://git.kernel.org/powerpc/c/8535a1afff0f4f568eb589f3795a930ef3d483b0

cheers


Re: [PATCH] powerpc/64s: Remove lost/old comment

2022-10-04 Thread Michael Ellerman
On Wed, 28 Sep 2022 23:09:41 +1000, Michael Ellerman wrote:
> The bulk of this was moved/reworded in:
>   57f266497d81 ("powerpc: Use gas sections for arranging exception vectors")
> 
> And now appears around line 700 in arch/powerpc/kernel/exceptions-64s.S.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/64s: Remove lost/old comment
  https://git.kernel.org/powerpc/c/0c360996425e36945c10479e2bc6ad5992c57794

cheers


Re: [PATCH] powerpc/microwatt: Remove unused early debug code

2022-10-04 Thread Michael Ellerman
On Mon, 19 Sep 2022 15:27:55 +1000, Michael Ellerman wrote:
> The original microwatt submission[1] included some early debug code for
> using the Microwatt "potato" UART.
> 
> The series that was eventually merged switched to using a standard UART,
> and so doesn't need any special early debug handling. But some of the
> original code was merged accidentally under the non-existent
> CONFIG_PPC_EARLY_DEBUG_MICROWATT.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/microwatt: Remove unused early debug code
  https://git.kernel.org/powerpc/c/456c3005102b18cce6662b1915c6efffe7744dcc

cheers


Re: [PATCH] powerpc: Drops STABS_DEBUG from linker scripts

2022-10-04 Thread Michael Ellerman
On Wed, 28 Sep 2022 23:09:51 +1000, Michael Ellerman wrote:
> No toolchain we support should be generating stabs debug information
> anymore. Drop the sections entirely from our linker scripts.
> 
> We removed all the manual stabs annotations in commit
> 12318163737c ("powerpc/32: Remove remaining .stabs annotations").
> 
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Drops STABS_DEBUG from linker scripts
  https://git.kernel.org/powerpc/c/7673335e2a0b8e68a2a238773a34e287a089a8fe

cheers


Re: [PATCH] powerpc/configs: Enable PPC_UV in powernv_defconfig

2022-10-04 Thread Michael Ellerman
On Thu, 29 Sep 2022 15:15:17 +1000, Michael Ellerman wrote:
> Make sure the ultravisor code at least gets some build testing by
> enabling it in powernv_defconfig.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/configs: Enable PPC_UV in powernv_defconfig
  https://git.kernel.org/powerpc/c/d91c3f15fcaf90723ebdcd1c9172f9bb8ea4f09b

cheers


Re: [PATCH] powerpc/64s: Remove old STAB comment

2022-10-04 Thread Michael Ellerman
On Wed, 28 Sep 2022 23:09:12 +1000, Michael Ellerman wrote:
> This used to be about the 0x4300 handler, but that was moved in commit
> da2bc4644c75 ("powerpc/64s: Add new exception vector macros").
> 
> Note that "STAB" here refers to "Segment Table" not the debug format.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/64s: Remove old STAB comment
  https://git.kernel.org/powerpc/c/57a8e4b26eaa8f30aa8bc737255d192915a53023

cheers


Re: [PATCH] powerpc/64: Remove unused SYS_CALL_TABLE symbol

2022-10-04 Thread Michael Ellerman
On Tue, 13 Sep 2022 22:45:45 +1000, Michael Ellerman wrote:
> In interrupt_64.S, formerly entry_64.S, there are two toc entries
> created for sys_call_table and compat_sys_call_table.
> 
> These are no longer used, since the system call entry was converted from
> asm to C, so remove them.
> 
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/64: Remove unused SYS_CALL_TABLE symbol
  https://git.kernel.org/powerpc/c/e74611aa91bb9939dfc4a41b045a1a19227cff98

cheers


Re: [PATCH 1/2] powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned

2022-10-04 Thread Michael Ellerman
On Fri, 16 Sep 2022 23:14:21 +1000, Michael Ellerman wrote:
> Add a check that STRICT_ALIGN_SIZE is aligned to at least PAGE_SIZE.
> 
> That then makes the alignment to PAGE_SIZE immediately after the
> alignment to STRICT_ALIGN_SIZE redundant, so remove it.
> 
> 

Applied to powerpc/next.

[1/2] powerpc/vmlinux.lds: Ensure STRICT_ALIGN_SIZE is at least page aligned
  https://git.kernel.org/powerpc/c/331771e836e6a32c8632d8cf5e2cdd94471258ad
[2/2] powerpc/vmlinux.lds: Add an explicit symbol for the SRWX boundary
  https://git.kernel.org/powerpc/c/b150a4d12b919baf956b807aa305cf78df03d0fe

cheers


Re: [PATCH 1/2] powerpc/mm/64s: Drop pgd_huge()

2022-10-04 Thread Michael Ellerman
On Sat, 3 Sep 2022 22:36:39 +1000, Michael Ellerman wrote:
> On powerpc there are two ways for huge pages to be represented in the
> top level page table, aka PGD (Page Global Directory).
> 
> If the address space mapped by an individual PGD entry does not
> correspond to a given huge page size, then the PGD entry points to a
> non-standard page table, known as a "hugepd" (Huge Page Directory).
> The hugepd contains some number of huge page PTEs sufficient to map the
> address space with the given huge page size.
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc/mm/64s: Drop pgd_huge()
  https://git.kernel.org/powerpc/c/51da853e3708852f47cd95e6f5e1821c3d54c3ef
[2/2] powerpc/mm/64s: Drop p4d_leaf()
  https://git.kernel.org/powerpc/c/79c5640ab4460a03535ce0f120193174e7701b65

cheers


Re: [PATCH 1/2] powerpc: Make stack frame marker upper case

2022-10-04 Thread Michael Ellerman
On Wed, 28 Sep 2022 01:04:18 +1000, Michael Ellerman wrote:
> Now that the stack frame regs marker is only 32-bits it is not as
> obvious in memory dumps and easier to miss, eg:
> 
>   c4733e40    ||
>   c4733e50    ||
>   c4733e60    ||
>   c4733e70 73676572   |sger|
>   c4733e80 a700 708897f7ff7f  |p...|
>   c4733e90 0073428fff7f 208997f7ff7f  |.sB. ...|
>   c4733ea0 0100   ||
>   c4733eb0    ||
> 
> [...]

Applied to powerpc/next.

[1/2] powerpc: Make stack frame marker upper case
  https://git.kernel.org/powerpc/c/bbd71709087a9d486d1da42399eec14e106072f2
[2/2] powerpc: Reverse stack frame marker on little endian
  https://git.kernel.org/powerpc/c/19c95df1277c48e3ef8cc7d9f1d315dce949f203

cheers


Re: [PATCH] powerpc: update config files

2022-10-04 Thread Michael Ellerman
On Thu, 29 Sep 2022 12:15:02 +0200, Lukas Bulwahn wrote:
> Clean up config files by:
>   - removing configs that were deleted in the past
>   - removing configs not in tree and without recently pending patches
>   - adding new configs that are replacements for old configs in the file
> 
> For some detailed information, see Link.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: update config files
  https://git.kernel.org/powerpc/c/d210ee3fdfe8584f84f8fdd0ac4a9895d023325b

cheers


Re: [PATCH] powerpc/kprobes: Fix null pointer reference in arch_prepare_kprobe()

2022-10-04 Thread Michael Ellerman
On Fri, 23 Sep 2022 17:32:53 +0800, Li Huafei wrote:
> I found a null pointer reference in arch_prepare_kprobe():
> 
>   # echo 'p cmdline_proc_show' > kprobe_events
>   # echo 'p cmdline_proc_show+16' >> kprobe_events
>   [   67.278533][  T122] Kernel attempted to read user page (0) - exploit 
> attempt? (uid: 0)
>   [   67.279326][  T122] BUG: Kernel NULL pointer dereference on read at 
> 0x
>   [   67.279738][  T122] Faulting instruction address: 0xc0050bfc
>   [   67.280486][  T122] Oops: Kernel access of bad area, sig: 11 [#1]
>   [   67.280846][  T122] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA 
> PowerNV
>   [   67.281435][  T122] Modules linked in:
>   [   67.281903][  T122] CPU: 0 PID: 122 Comm: sh Not tainted 
> 6.0.0-rc3-7-gdcf8e5633e2e #10
>   [   67.282547][  T122] NIP:  c0050bfc LR: c0050bec CTR: 
> 5bdc
>   [   67.282920][  T122] REGS: c000348475b0 TRAP: 0300   Not tainted  
> (6.0.0-rc3-7-gdcf8e5633e2e)
>   [   67.283424][  T122] MSR:  90009033   
> CR: 88002444  XER: 20040006
>   [   67.284023][  T122] CFAR: c022d100 DAR:  DSISR: 
> 4000 IRQMASK: 0
>   [   67.284023][  T122] GPR00: c0050bec c00034847850 
> c13f6100 c1fb7718
>   [   67.284023][  T122] GPR04: c0515c10 c0e5fe08 
> c133da60 c4839300
>   [   67.284023][  T122] GPR08: c14ffb98  
> c0515c0c c0e18576
>   [   67.284023][  T122] GPR12: c0e60170 c15a 
> 0001155e0460 
>   [   67.284023][  T122] GPR16:  7fffe8eeb3c8 
> 000116320728 
>   [   67.284023][  T122] GPR20: 000116320720  
> c12fa918 0006
>   [   67.284023][  T122] GPR24: c14ffb98 c11ed360 
>  c1fb7928
>   [   67.284023][  T122] GPR28:   
> 7c0802a6 c1fb7918
>   [   67.287799][  T122] NIP [c0050bfc] 
> arch_prepare_kprobe+0x10c/0x2d0
>   [   67.288490][  T122] LR [c0050bec] arch_prepare_kprobe+0xfc/0x2d0
>   [   67.289025][  T122] Call Trace:
>   [   67.289268][  T122] [c00034847850] [c12f77a0] 
> 0xc12f77a0 (unreliable)
>   [   67.28][  T122] [c000348478d0] [c0231320] 
> register_kprobe+0x3c0/0x7a0
>   [   67.290439][  T122] [c00034847940] [c02938c0] 
> __register_trace_kprobe+0x140/0x1a0
>   [   67.290898][  T122] [c000348479b0] [c02944c4] 
> __trace_kprobe_create+0x794/0x1040
>   [   67.291330][  T122] [c00034847b60] [c02a1614] 
> trace_probe_create+0xc4/0xe0
>   [   67.291717][  T122] [c00034847bb0] [c029363c] 
> create_or_delete_trace_kprobe+0x2c/0x80
>   [   67.292158][  T122] [c00034847bd0] [c0264420] 
> trace_parse_run_command+0xf0/0x210
>   [   67.292611][  T122] [c00034847c70] [c02934a0] 
> probes_write+0x20/0x40
>   [   67.292996][  T122] [c00034847c90] [c045e98c] 
> vfs_write+0xfc/0x450
>   [   67.293356][  T122] [c00034847d50] [c045eec4] 
> ksys_write+0x84/0x140
>   [   67.293716][  T122] [c00034847da0] [c002e4fc] 
> system_call_exception+0x17c/0x3a0
>   [   67.294186][  T122] [c00034847e10] [c000c0e8] 
> system_call_vectored_common+0xe8/0x278
>   [   67.294680][  T122] --- interrupt: 3000 at 0x7fffa5682de0
>   [   67.294937][  T122] NIP:  7fffa5682de0 LR:  CTR: 
> 
>   [   67.295313][  T122] REGS: c00034847e80 TRAP: 3000   Not tainted  
> (6.0.0-rc3-7-gdcf8e5633e2e)
>   [   67.295725][  T122] MSR:  9280f033 
>   CR: 44002408  XER: 
>   [   67.296291][  T122] IRQMASK: 0
>   [   67.296291][  T122] GPR00: 0004 7fffe8eeaec0 
> 7fffa5757300 0001
>   [   67.296291][  T122] GPR04: 000116329c60 0017 
> 00116329 
>   [   67.296291][  T122] GPR08: 0006  
>  
>   [   67.296291][  T122] GPR12:  7fffa580ac60 
> 0001155e0460 
>   [   67.296291][  T122] GPR16:  7fffe8eeb3c8 
> 000116320728 
>   [   67.296291][  T122] GPR20: 000116320720  
>  0002
>   [   67.296291][  T122] GPR24: 0001163206f0 0020 
> 7fffe8eeafa0 0001
>   [   67.296291][  T122] GPR28:  0017 
> 000116329c60 0001
>   [   67.299570][  T122] NIP [7fffa5682de0] 0x7fffa5682de0
>   [   67.299837][  T122] LR [] 0x0
>   [   67.300072][  T122] --- interrupt: 3000
>   [   67.300447][  T122] Instruction dump:
>   [   67.300736][  T122] 386319d8 481342f5 6000 6000 6000 
> e87f0028 3863fffc 481dc4d1
>   [   67.301230][  T122] 6000 

Re: [PATCH v2] ppc64/kdump: Limit kdump base to 512MB

2022-10-04 Thread Michael Ellerman
On Mon, 12 Sep 2022 12:20:31 +0530, Hari Bathini wrote:
> Since commit e641eb03ab2b0 ("powerpc: Fix up the kdump base cap to
> 128M") memory for kdump kernel has been reserved at an offset of
> 128MB. This held up well for a long time before running into boot
> failure on LPARs having a lot of cores. Commit 7c5ed82b800d8
> ("powerpc: Set crashkernel offset to mid of RMA region") fixed this
> boot failure by moving the offset to mid of RMA region. This change
> meant the offset is either 256MB or 512MB on LPARs as ppc64_rma_size
> was 512MB or 1024MB owing to commit 103a8542cb35b ("powerpc/book3s64/
> radix: Fix boot failure with large amount of guest memory")
> 
> [...]

Applied to powerpc/next.

[1/1] ppc64/kdump: Limit kdump base to 512MB
  https://git.kernel.org/powerpc/c/bd7dc90e52e8db7ee0f38c51bc9047bafb54fe43

cheers


Re: [PATCH v3] powerpc: Ignore DSI error caused by the copy/paste instruction

2022-10-04 Thread Michael Ellerman
On Tue, 27 Sep 2022 18:29:27 -0700, Haren Myneni wrote:
> The data storage interrupt (DSI) error will be generated when the
> paste operation is issued on the suspended Nest Accelerator (NX)
> window due to NX state changes. The hypervisor expects the
> partition to ignore this error during page fault handling.
> To differentiate DSI caused by an actual HW configuration or by
> the NX window, a new “ibm,pi-features” type value is defined.
> Byte 0, bit 3 of pi-attribute-specifier-type is now defined to
> indicate this DSI error. If this error is not ignored, the user
> space can get SIGBUS when the NX request is issued.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc: Ignore DSI error caused by the copy/paste instruction
  https://git.kernel.org/powerpc/c/335e1a91042764629fbbcd8c7e40379fa3762d35

cheers


Re: [PATCH] powerpc/pseries/vas: Pass hw_cpu_id to node associativity HCALL

2022-10-04 Thread Michael Ellerman
On Wed, 28 Sep 2022 18:57:33 -0700, Haren Myneni wrote:
> Generally the hypervisor decides to allocate a window on different
> VAS instances. But if the user space wishes to allocate on the
> current VAS instance where the process is executing, the kernel has
> to pass associativity domain IDs to allocate VAS window HCALL. To
> determine the associativity domain IDs for the current CPU, passing
> smp_processor_id() to node associativity HCALL which may return
> H_P2 (-55) error during DLPAR CPU event.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/pseries/vas: Pass hw_cpu_id to node associativity HCALL
  https://git.kernel.org/powerpc/c/f3e5d9e53e74d77e711a2c90a91a8b0836a9e0b3

cheers


Re: [PATCH] powerpc/pseries: Move vas_migration_handler early during migration

2022-10-04 Thread Michael Ellerman
On Thu, 22 Sep 2022 01:27:07 -0700, Haren Myneni wrote:
> When the migration is initiated, the hypervisor changes VAS
> mappings as part of pre-migration event. Then the OS gets the
> migration event which closes all VAS windows before the migration
> starts. NX generates continuous faults until windows are closed
> and the user space can not differentiate these NX faults coming
> from the actual migration. So to reduce this time window, close
> VAS windows first in pseries_migrate_partition().
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/pseries: Move vas_migration_handler early during migration
  https://git.kernel.org/powerpc/c/465dda9d320d1cb9424f1015b0520ec4c4f0d279

cheers


Re: (subset) [PATCH v1 0/3] coding-style.rst: document BUG() and WARN() rules

2022-10-04 Thread Michael Ellerman
On Tue, 20 Sep 2022 14:22:59 +0200, David Hildenbrand wrote:
> As it seems to be rather unclear if/when to use BUG(), BUG_ON(),
> VM_BUG_ON(), WARN_ON_ONCE(), ... let's try to document the result of a
> recent discussion.
> 
> Details can be found in patch #1.
> 
> RFC -> v1:
> * "coding-style.rst: document BUG() and WARN() rules ("do not crash the
>kernel")"
>  -> Rephrase/extend according to John
>  -> Add some details regarding the use of panic()
> * powerpc/prom_init: drop PROM_BUG()
>  -> Added
> * "checkpatch: warn on usage of VM_BUG_ON() and other BUG variants"
>  -> Warn on more variants
> 
> [...]

Patch 2 applied to powerpc/next.

[2/3] powerpc/prom_init: drop PROM_BUG()
  https://git.kernel.org/powerpc/c/c4167aec98524fa4511b3222303a758b532b6009

cheers


Re: [PATCH v2 01/19] powerpc/Kconfig: Fix non existing CONFIG_PPC_FSL_BOOKE

2022-10-04 Thread Michael Ellerman
On Mon, 19 Sep 2022 19:01:25 +0200, Christophe Leroy wrote:
> CONFIG_PPC_FSL_BOOKE doesn't exist. Should be CONFIG_FSL_BOOKE.
> 
> 

Applied to powerpc/next.

[01/19] powerpc/Kconfig: Fix non existing CONFIG_PPC_FSL_BOOKE

https://git.kernel.org/powerpc/c/d1203f32d86987a3ccd7de9ba2448ba12d86d125
[02/19] powerpc/64e: Tie PPC_BOOK3E_64 to PPC_E500MC

https://git.kernel.org/powerpc/c/0069f3d14e7a656ba9d7dbaac72659687fdbf43c
[03/19] powerpc/64e: Remove unnecessary #ifdef CONFIG_PPC_FSL_BOOK3E

https://git.kernel.org/powerpc/c/b6100bedf1f9aea264757ac4a56eb1d8b04b9356
[04/19] powerpc/cputable: Remove __machine_check_early_realmode_p{7/8/9} 
prototypes

https://git.kernel.org/powerpc/c/afd2288a4c7d3400a53cb29616742f4395a809a1
[05/19] powerpc/cputable: Move __cpu_setup() prototypes out of cputable.h

https://git.kernel.org/powerpc/c/76b719881a26fec3b77652134f19cf1dfcc96318
[06/19] powerpc/cputable: Split cpu_specs[] out of cputable.h

https://git.kernel.org/powerpc/c/e320a76db4b02e1160eb4bfb17d8d1bc57979955
[07/19] powerpc: Remove CONFIG_FSL_BOOKE

https://git.kernel.org/powerpc/c/dfc3095cec27f402c183da920f4733785e4c873d
[08/19] powerpc/cputable: Split cpu_specs[] for mpc85xx and e500mc

https://git.kernel.org/powerpc/c/d7216567c65cbed655f9bf87ef906f9246d6f698
[09/19] powerpc: Remove CONFIG_PPC_BOOK3E

https://git.kernel.org/powerpc/c/e0d68273d7069537701bb91c51d90d1e12aacc33
[10/19] powerpc: Remove redundant selection of E500 and E500MC

https://git.kernel.org/powerpc/c/1df399012b6ab0b24466a0675710a53e3feb000f
[11/19] powerpc: Change CONFIG_E500 to CONFIG_PPC_E500

https://git.kernel.org/powerpc/c/688de017efaab8a7764ab2c05ce7128d0361023b
[12/19] Documentation: Rename PPC_FSL_BOOK3E to PPC_E500

https://git.kernel.org/powerpc/c/404a5e72f4dfd80dda6a3e9edd18012f79287bff
[13/19] watchdog: booke_wdt: Replace PPC_FSL_BOOK3E by PPC_E500

https://git.kernel.org/powerpc/c/ec65560ad84d9d2eb98cf864e3b530856cafd233
[14/19] powerpc: Remove CONFIG_PPC_FSL_BOOK3E

https://git.kernel.org/powerpc/c/3e7318584dfec11992f3ac45658c4bc1210b3778
[15/19] powerpc: Remove CONFIG_PPC_BOOK3E_MMU

https://git.kernel.org/powerpc/c/aa5f59df201dd350f7c291c845ac8b62c0d0edd5
[16/19] powerpc: Replace PPC_85xx || PPC_BOOKE_64 by PPC_E500

https://git.kernel.org/powerpc/c/772fd56deca62628c638d1a9bd2d34cbd371bb81
[17/19] powerpc: Simplify redundant Kconfig tests

https://git.kernel.org/powerpc/c/73d11498793f495d64230308afa50905f012f080
[18/19] powerpc: Cleanup idle for e500

https://git.kernel.org/powerpc/c/6556fd1a1e9fcd180348c4368d2387bdc6a17613
[19/19] powerpc: Remove impossible mmu_psize_defs[] on nohash

https://git.kernel.org/powerpc/c/605ba9ee8aaabc77178b369ec6f773616089020d

cheers


Re: [PATCH] powerpc: Reduce redundancy in pgtable.h

2022-10-04 Thread Michael Ellerman
On Wed, 7 Sep 2022 12:05:01 +0200, Christophe Leroy wrote:
> PAGE_KERNEL_TEXT, PAGE_KERNEL_EXEC and PAGE_AGP are the same
> for all powerpcs.
> 
> Remove duplicated definitions.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: Reduce redundancy in pgtable.h
  https://git.kernel.org/powerpc/c/b997b2f57cae396448bb62c428efa4b112dd90ed

cheers


Re: [PATCH] powerpc: Make PAGE_KERNEL_xxx macros grep-friendly

2022-10-04 Thread Michael Ellerman
On Wed, 7 Sep 2022 12:05:21 +0200, Christophe Leroy wrote:
> Avoid multi-lines to help getting a complete view when using
> grep. They still remain under the 100 chars limit.
> 
> 

Applied to powerpc/next.

[1/1] powerpc: Make PAGE_KERNEL_xxx macros grep-friendly
  https://git.kernel.org/powerpc/c/6cc07821adce44e864c3752a3842936a6a7f6aef

cheers


Re: [PATCH] powerpc/irq: Refactor irq_soft_mask_{set,or}_return()

2022-10-04 Thread Michael Ellerman
On Tue, 20 Sep 2022 08:41:08 +0200, Christophe Leroy wrote:
> This partialy reapply commit ef5b570d3700 ("powerpc/irq: Don't
> open code irq_soft_mask helpers") which was reverted by
> commit 684c68d92e2e ("Revert "powerpc/irq: Don't open code
> irq_soft_mask helpers"")
> 
> irq_soft_mask_set_return() and irq_soft_mask_or_return()
> are overset of irq_soft_mask_set().
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/irq: Refactor irq_soft_mask_{set,or}_return()
  https://git.kernel.org/powerpc/c/4af83545538a4fa80d14b9247ffc0db556e6a556

cheers


Re: [PATCH] powerpc/highmem: Properly handle fragmented memory

2022-10-04 Thread Michael Ellerman
On Tue, 20 Sep 2022 19:36:42 +0200, Christophe Leroy wrote:
> In addition to checking whether a page is reserved before allocating
> it to highmem, verify that it is valid memory.
> 
> Otherwise the kernel Oopses as below:
> 
> [0.00] mem auto-init: stack:off, heap alloc:off, heap free:off
> [0.00] Kernel attempted to read user page (7df58) - exploit attempt? 
> (uid: 0)
> [0.00] BUG: Unable to handle kernel data access on read at 0x0007df58
> [0.00] Faulting instruction address: 0xc01c8348
> [0.00] Oops: Kernel access of bad area, sig: 11 [#1]
> [0.00] BE PAGE_SIZE=4K SMP NR_CPUS=2 P2020RDB-PC
> [0.00] Modules linked in:
> [0.00] CPU: 0 PID: 0 Comm: swapper Not tainted 
> 6.0.0-rc2-0caacb197b677410bdac81bc34f05235+ #121
> [0.00] NIP:  c01c8348 LR: c01cb2bc CTR: 000a
> [0.00] REGS: c10d7e20 TRAP: 0300   Not tainted  
> (6.0.0-rc2-0caacb197b677410bdac81bc34f05235+)
> [0.00] MSR:  00021000   CR: 48044224  XER: 
> [0.00] DEAR: 0007df58 ESR: 
> [0.00] GPR00: c01cb294 c10d7f10 c1045340 0001 0004 c112bcc0 
> 0015 eedf1000
> [0.00] GPR08: 0003 0007df58  f000 28044228 0200 
>  
> [0.00] GPR16:    0275cb7a c000 0001 
> 075f 
> [0.00] GPR24: c1031004   0001 c10f eedf1000 
> 0008 0008
> [0.00] NIP [c01c8348] free_unref_page_prepare.part.93+0x48/0x60
> [0.00] LR [c01cb2bc] free_unref_page+0x84/0x4b8
> [0.00] Call Trace:
> [0.00] [c10d7f10] [eedf1000] 0xeedf1000 (unreliable)
> [0.00] [c10d7f20] [c01cb294] free_unref_page+0x5c/0x4b8
> [0.00] [c10d7f70] [c1007644] mem_init+0xd0/0x194
> [0.00] [c10d7fa0] [c1000e4c] start_kernel+0x4c0/0x6d0
> [0.00] [c10d7ff0] [c3e0] set_ivor+0x13c/0x178
> [0.00] Instruction dump:
> [0.00] 552817be 5509103a 7d294214 55293830 7d4a4a14 812a003c 814a0038 
> 5529002a
> [0.00] 7c892050 5484c23a 5489eafa 548406fe <7d2a482e> 7d242430 
> 5484077e 90870010
> [0.00] ---[ end trace  ]---
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/highmem: Properly handle fragmented memory
  https://git.kernel.org/powerpc/c/2fc1c63d2763ad7562ea7d241da79b42538a557b

cheers


Re: [PATCH] powerpc/book3s: Inline first level of update_mmu_cache()

2022-10-04 Thread Michael Ellerman
On Mon, 5 Sep 2022 11:38:25 +0200, Christophe Leroy wrote:
> update_mmu_cache() voids when hash page tables are not used.
> On PPC32 that means when MMU_FTR_HPTE_TABLE is not defined.
> On PPC64 that means when RADIX is enabled.
> 
> Rename core part of update_mmu_cache() as __update_mmu_cache()
> and include the initial verification in an inlined caller.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/book3s: Inline first level of update_mmu_cache()
  https://git.kernel.org/powerpc/c/73ea68ad0d2f655815b6f1fbe1c5521d72f01b64

cheers


Re: [PATCH 1/2] powerpc/nohash: Remove pgd_huge() stub

2022-10-04 Thread Michael Ellerman
On Wed, 7 Sep 2022 11:34:44 +0200, Christophe Leroy wrote:
> linux/hugetlb.h has a fallback pgd_huge() macro for when
> pgd_huge is not defined.
> 
> Remove the powerpc redundant definitions.
> 
> 

Applied to powerpc/next.

[1/2] powerpc/nohash: Remove pgd_huge() stub
  https://git.kernel.org/powerpc/c/a26494cf4aeb8e9888428a43f55cc486f06f1334
[2/2] powerpc: Rely on generic definition of hugepd_t and is_hugepd when unused
  https://git.kernel.org/powerpc/c/691cdf016d3be6f66a3ea384809be229e0f9c590

cheers


Re: [PATCH linux-next] powerpc/pseries/vas: Remove the unneeded result variable

2022-10-04 Thread Michael Ellerman
On Thu, 25 Aug 2022 07:26:57 +, cgel@gmail.com wrote:
> From: ye xingchen 
> 
> Return the value vas_register_coproc_api() directly instead of storing it
> in another redundant variable.
> 
> 

Applied to powerpc/next.

[1/1] powerpc/pseries/vas: Remove the unneeded result variable
  https://git.kernel.org/powerpc/c/91986d7f0300c2c01722e0eac5119bb0946fe9b5

cheers


Re: [PATCH] powerpc/mm: Fix UBSAN warning reported on hugetlb

2022-10-04 Thread Michael Ellerman
On Thu, 8 Sep 2022 12:54:40 +0530, Aneesh Kumar K.V wrote:
> Powerpc architecture supports 16GB hugetlb pages with hash translation. For 4K
> page size, this is implemented as a hugepage directory entry at PGD level and
> for 64K it is implemented as a huge page pte at PUD level
> 
> With 16GB hugetlb size, offset within a page is greater than 32 bits. Hence
> switch to use unsigned long type when using hugepd_shift.
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/mm: Fix UBSAN warning reported on hugetlb
  https://git.kernel.org/powerpc/c/7dd3a7b90bca2c12e2146a47d63cf69a2f5d7e89

cheers


Re: [PATCH v2] powerpc/mm: Update max/min_low_pfn in the same function

2022-10-04 Thread Michael Ellerman
On Mon, 4 Jul 2022 12:08:51 +0530, Aneesh Kumar K.V wrote:
> For both CONFIG_NUMA enabled/disabled use mem_topology_setup to
> update max/min_low_pfn.
> 
> This also add min_low_pfn update to CONFIG_NUMA which was initialized
> to zero before.
> 
> 
> [...]

Applied to powerpc/next.

[1/1] powerpc/mm: Update max/min_low_pfn in the same function
  https://git.kernel.org/powerpc/c/7b31f7dadd7074fa70bb14a53bd286ffdfc98b04

cheers


Re: [PATCH linux-next] ocxl: Remove the unneeded result variable

2022-10-04 Thread Michael Ellerman
On Tue, 6 Sep 2022 07:20:06 +, cgel@gmail.com wrote:
> From: ye xingchen 
> 
> Return the value opal_npu_spa_clear_cache() directly instead of storing
> it in another redundant variable.
> 
> 

Applied to powerpc/next.

[1/1] ocxl: Remove the unneeded result variable
  https://git.kernel.org/powerpc/c/5e4952656bca1b5d8c2be36682dc66d844797ad2

cheers


Re: (subset) [PATCH V2 1/3] powerpc/perf: Fix branch_filter support for multiple filters in powerpc

2022-10-04 Thread Michael Ellerman
On Wed, 21 Sep 2022 20:22:53 +0530, Athira Rajeev wrote:
> For PERF_SAMPLE_BRANCH_STACK sample type, different branch_sample_type
> ie branch filters are supported. The branch filters are requested via
> event attribute "branch_sample_type". Multiple branch filters can be
> passed in event attribute.
> 
> Example:
> perf record -b -o- -B --branch-filter any,ind_call true
> 
> [...]

Patch 3 applied to powerpc/next.

[3/3] tools/testing/selftests/powerpc: Update the bhrb filter sampling test to 
test for multiple branch filters
  https://git.kernel.org/powerpc/c/18213532de7156af689cb0511d2f95bcbe3c98a0

cheers


Re: [PATCH] powerpc/mm/book3s/hash: Rename flush_tlb_pmd_range

2022-10-04 Thread Michael Ellerman
On Wed, 7 Sep 2022 13:49:41 +0530, Aneesh Kumar K.V wrote:
> This function does the hash page table update. Hence rename it to
> indicate this better to avoid confusion with flush_pmd_tlb_range()
> 
> 

Applied to powerpc/next.

[1/1] powerpc/mm/book3s/hash: Rename flush_tlb_pmd_range
  https://git.kernel.org/powerpc/c/d368e0c478a628f36680650f8d1d1634037b046e

cheers


[PATCH] powerpc/64s/interrupt: stack backtrace fix

2022-10-04 Thread Nicholas Piggin
The value of the stack frame regs marker that gets saved on the
stack in interrupt entry code does not match the regs marker value,
which breaks stack frame marker matching.

This stray instruction looks to have been introduced in a mismerge.

Fixes: bf75a3258a403 ("powerpc/64s/interrupt: move early boot ILE fixup into a 
macro")
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index fed983cc7ee0..ec5dfc7b5517 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -590,7 +590,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
std r9,_TRAP(r1)/* set trap number  */
li  r10,0
LOAD_REG_IMMEDIATE(r11, STACK_FRAME_REGS_MARKER)
-   rldimi  r11, r11, 32, 0
std r10,RESULT(r1)  /* clear regs->result   */
std r11,STACK_FRAME_OVERHEAD-16(r1) /* mark the frame   */
 .endm
-- 
2.37.2



Re: [PATCH] KVM: PPC: Book3S HV: Fix decrementer migration

2022-10-04 Thread Michael Ellerman
On Tue, 16 Aug 2022 19:25:17 -0300, Fabiano Rosas wrote:
> We used to have a workaround[1] for a hang during migration that was
> made ineffective when we converted the decrementer expiry to be
> relative to guest timebase.
> 
> The point of the workaround was that in the absence of an explicit
> decrementer expiry value provided by userspace during migration, KVM
> needs to initialize dec_expires to a value that will result in an
> expired decrementer after subtracting the current guest timebase. That
> stops the vcpu from hanging after migration due to a decrementer
> that's too large.
> 
> [...]

Applied to powerpc/topic/ppc-kvm.

[1/1] KVM: PPC: Book3S HV: Fix decrementer migration
  https://git.kernel.org/powerpc/c/0a5bfb824a6ea35e54b7e5ac6f881beea5e309d2

cheers


Re: [PATCH v2 12/44] cpuidle,dt: Push RCU-idle into driver

2022-10-04 Thread Ulf Hansson
On Tue, 4 Oct 2022 at 13:03, Ulf Hansson  wrote:
>
> On Mon, 19 Sept 2022 at 12:18, Peter Zijlstra  wrote:
> >
> > Doing RCU-idle outside the driver, only to then temporarily enable it
> > again before going idle is daft.
> >
> > Notably: this converts all dt_init_idle_driver() and
> > __CPU_PM_CPU_IDLE_ENTER() users for they are inextrably intertwined.
> >
> > Signed-off-by: Peter Zijlstra (Intel) 
>
> Reviewed-by: Ulf Hansson 

This was not (yet) my intention. Please have a look at the comments I
provided below.

Kind regards
Uffe

>
> > ---
> >  arch/arm/mach-omap2/cpuidle34xx.c|4 ++--
> >  drivers/acpi/processor_idle.c|2 ++
> >  drivers/cpuidle/cpuidle-arm.c|1 +
> >  drivers/cpuidle/cpuidle-big_little.c |8 ++--
> >  drivers/cpuidle/cpuidle-psci.c   |1 +
> >  drivers/cpuidle/cpuidle-qcom-spm.c   |1 +
> >  drivers/cpuidle/cpuidle-riscv-sbi.c  |1 +
> >  drivers/cpuidle/dt_idle_states.c |2 +-
> >  include/linux/cpuidle.h  |4 
> >  9 files changed, 19 insertions(+), 5 deletions(-)
> >
> > --- a/drivers/acpi/processor_idle.c
> > +++ b/drivers/acpi/processor_idle.c
> > @@ -1200,6 +1200,8 @@ static int acpi_processor_setup_lpi_stat
> > state->target_residency = lpi->min_residency;
> > if (lpi->arch_flags)
> > state->flags |= CPUIDLE_FLAG_TIMER_STOP;
> > +   if (lpi->entry_method == ACPI_CSTATE_FFH)
> > +   state->flags |= CPUIDLE_FLAG_RCU_IDLE;
>
> I assume the state index here will never be 0?
>
> If not, it may lead to that acpi_processor_ffh_lpi_enter() may trigger
> CPU_PM_CPU_IDLE_ENTER_PARAM() to call ct_cpuidle_enter|exit() for an
> idle-state that doesn't have the CPUIDLE_FLAG_RCU_IDLE bit set.
>
> > state->enter = acpi_idle_lpi_enter;
> > drv->safe_state_index = i;
> > }
> > --- a/drivers/cpuidle/cpuidle-arm.c
> > +++ b/drivers/cpuidle/cpuidle-arm.c
> > @@ -53,6 +53,7 @@ static struct cpuidle_driver arm_idle_dr
> >  * handler for idle state index 0.
> >  */
> > .states[0] = {
> > +   .flags  = CPUIDLE_FLAG_RCU_IDLE,
>
> Comparing arm64 and arm32 idle-states/idle-drivers, the $subject
> series ends up setting the CPUIDLE_FLAG_RCU_IDLE for the ARM WFI idle
> state (state zero), but only for the arm64 and psci cases (mostly
> arm64). For arm32 we would need to update the ARM_CPUIDLE_WFI_STATE
> too, as that is what most arm32 idle-drivers are using. My point is,
> the code becomes a bit inconsistent.
>
> Perhaps it's easier to avoid setting the CPUIDLE_FLAG_RCU_IDLE bit for
> all of the ARM WFI idle states, for both arm64 and arm32?
>
> > .enter  = arm_enter_idle_state,
> > .exit_latency   = 1,
> > .target_residency   = 1,
> > --- a/drivers/cpuidle/cpuidle-big_little.c
> > +++ b/drivers/cpuidle/cpuidle-big_little.c
> > @@ -64,7 +64,8 @@ static struct cpuidle_driver bl_idle_lit
> > .enter  = bl_enter_powerdown,
> > .exit_latency   = 700,
> > .target_residency   = 2500,
> > -   .flags  = CPUIDLE_FLAG_TIMER_STOP,
> > +   .flags  = CPUIDLE_FLAG_TIMER_STOP |
> > + CPUIDLE_FLAG_RCU_IDLE,
> > .name   = "C1",
> > .desc   = "ARM little-cluster power down",
> > },
> > @@ -85,7 +86,8 @@ static struct cpuidle_driver bl_idle_big
> > .enter  = bl_enter_powerdown,
> > .exit_latency   = 500,
> > .target_residency   = 2000,
> > -   .flags  = CPUIDLE_FLAG_TIMER_STOP,
> > +   .flags  = CPUIDLE_FLAG_TIMER_STOP |
> > + CPUIDLE_FLAG_RCU_IDLE,
> > .name   = "C1",
> > .desc   = "ARM big-cluster power down",
> > },
> > @@ -124,11 +126,13 @@ static int bl_enter_powerdown(struct cpu
> > struct cpuidle_driver *drv, int idx)
> >  {
> > cpu_pm_enter();
> > +   ct_idle_enter();
> >
> > cpu_suspend(0, bl_powerdown_finisher);
> >
> > /* signals the MCPM core that CPU is out of low power state */
> > mcpm_cpu_powered_up();
> > +   ct_idle_exit();
> >
> > cpu_pm_exit();
> >
> > --- a/drivers/cpuidle/cpuidle-psci.c
> > +++ b/drivers/cpuidle/cpuidle-psci.c
> > @@ -357,6 +357,7 @@ static int psci_idle_init_cpu(struct dev
> >  * PSCI idle states relies on architectural WFI to be represented as
> >  * state index 0.
> >  */
> > +   drv->states[0].flags = CPUIDLE_FLAG_RCU_IDLE;
> > drv->states[0].enter = 

Re: [PATCH v1 1/3] coding-style.rst: document BUG() and WARN() rules ("do not crash the kernel")

2022-10-04 Thread David Hildenbrand

On 26.09.22 09:44, Kalle Valo wrote:

David Hildenbrand  writes:


+Use WARN_ON_ONCE() rather than WARN() or WARN_ON()
+**
+
+WARN_ON_ONCE() is generally preferred over WARN() or WARN_ON(), because it
+is common for a given warning condition, if it occurs at all, to occur
+multiple times. This can fill up and wrap the kernel log, and can even slow
+the system enough that the excessive logging turns into its own, additional
+problem.


FWIW I have had cases where WARN() messages caused a reboot, maybe
mention that here? In my case the logging was so excessive that the
watchdog wasn't updated and in the end the device was forcefully
rebooted.



That should be covered by the last part, no? What would be your suggestion?


I was just thinking that maybe make it more obvious that even WARN_ON()
can crash the system, something along these lines:

"..., additional problem like stalling the system so much that it causes
a reboot."


Hi Kalle,

sorry for the late reply. Jonathan already queued v2 and sent it upstream.

I think that's it is already covered by the statement and that the 
additional example isn't required -- most of us learned the hard way 
that "excessive logging turns into its own problem" includes all weird 
kinds of kernel crashes. A panic/reboot due to a watchdog not firing is 
one such possible outcome.


Thanks!

--
Thanks,

David / dhildenb



Re: [RFC PATCH 0/3] powerpc/32: nohz full support

2022-10-04 Thread Christophe Leroy


Le 04/10/2022 à 08:33, Nicholas Piggin a écrit :
> Doesn't seem to be much more involved in adding context tracking and
> generic virt cpu accounting support for 32-bit, which is all that's
> left to support NO_HZ_FULL.
> 
> I tested this with e5500 SMP kernel with isolated and nohz CPU, and
> it seems to be doing the right thing -- periodic tick is stopped on
> the nohz CPUs when they are running in userspace.
> 
> Context tracking warnings should catch quite quickly if we got
> something wrong there (with the force context tracking option). I
> don't have a 32-bit KVM environment to test so that might have some
> issues but it should be quite easy to fix if it can be tested.
> 
> I assume the virt cpu accounting gen option removal is okay, but not
> exactly sure what to look for in terms of possible problems, so we'll
> see what comments that gets back.

I'm having hard time finding the link between patch 1 and patch 2/3.

Christophe

> 
> Thanks,
> Nick
> 
> Nicholas Piggin (3):
>powerpc/32: Implement HAVE_CONTEXT_TRACKING_USER support
>powerpc: remove the last remnants of cputime_t
>Remove HAVE_VIRT_CPU_ACCOUNTING_GEN option
> 
>   arch/Kconfig | 11 ---
>   arch/arm/Kconfig |  1 -
>   arch/csky/Kconfig|  1 -
>   arch/loongarch/Kconfig   |  1 -
>   arch/mips/Kconfig|  1 -
>   arch/powerpc/Kconfig |  2 +-
>   arch/powerpc/include/asm/cputime.h   | 17 +
>   arch/powerpc/include/asm/interrupt.h | 21 ++---
>   arch/powerpc/kernel/time.c   | 23 ++-
>   arch/xtensa/Kconfig  |  1 -
>   init/Kconfig |  1 -
>   kernel/time/Kconfig  |  2 --
>   12 files changed, 10 insertions(+), 72 deletions(-)
> 

Re: [PATCH v2 38/44] cpuidle,powerdomain: Remove trace_.*_rcuidle()

2022-10-04 Thread Ulf Hansson
On Mon, 19 Sept 2022 at 12:17, Peter Zijlstra  wrote:
>
> OMAP was the one and only user.
>
> Signed-off-by: Peter Zijlstra (Intel) 

There are changes to the runtime PM core as part of $subject patch.
Perhaps move those parts into a separate patch? In any case, the code
looks good to me.

Reviewed-by: Ulf Hansson 

Kind regards
Uffe

> ---
>  arch/arm/mach-omap2/powerdomain.c |   10 +-
>  drivers/base/power/runtime.c  |   24 
>  2 files changed, 17 insertions(+), 17 deletions(-)
>
> --- a/arch/arm/mach-omap2/powerdomain.c
> +++ b/arch/arm/mach-omap2/powerdomain.c
> @@ -187,9 +187,9 @@ static int _pwrdm_state_switch(struct po
> trace_state = (PWRDM_TRACE_STATES_FLAG |
>((next & OMAP_POWERSTATE_MASK) << 8) |
>((prev & OMAP_POWERSTATE_MASK) << 0));
> -   trace_power_domain_target_rcuidle(pwrdm->name,
> - trace_state,
> - 
> raw_smp_processor_id());
> +   trace_power_domain_target(pwrdm->name,
> + trace_state,
> + raw_smp_processor_id());
> }
> break;
> default:
> @@ -541,8 +541,8 @@ int pwrdm_set_next_pwrst(struct powerdom
>
> if (arch_pwrdm && arch_pwrdm->pwrdm_set_next_pwrst) {
> /* Trace the pwrdm desired target state */
> -   trace_power_domain_target_rcuidle(pwrdm->name, pwrst,
> - raw_smp_processor_id());
> +   trace_power_domain_target(pwrdm->name, pwrst,
> + raw_smp_processor_id());
> /* Program the pwrdm desired target state */
> ret = arch_pwrdm->pwrdm_set_next_pwrst(pwrdm, pwrst);
> }
> --- a/drivers/base/power/runtime.c
> +++ b/drivers/base/power/runtime.c
> @@ -442,7 +442,7 @@ static int rpm_idle(struct device *dev,
> int (*callback)(struct device *);
> int retval;
>
> -   trace_rpm_idle_rcuidle(dev, rpmflags);
> +   trace_rpm_idle(dev, rpmflags);
> retval = rpm_check_suspend_allowed(dev);
> if (retval < 0)
> ;   /* Conditions are wrong. */
> @@ -481,7 +481,7 @@ static int rpm_idle(struct device *dev,
> dev->power.request_pending = true;
> queue_work(pm_wq, >power.work);
> }
> -   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, 0);
> +   trace_rpm_return_int(dev, _THIS_IP_, 0);
> return 0;
> }
>
> @@ -493,7 +493,7 @@ static int rpm_idle(struct device *dev,
> wake_up_all(>power.wait_queue);
>
>   out:
> -   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
> +   trace_rpm_return_int(dev, _THIS_IP_, retval);
> return retval ? retval : rpm_suspend(dev, rpmflags | RPM_AUTO);
>  }
>
> @@ -557,7 +557,7 @@ static int rpm_suspend(struct device *de
> struct device *parent = NULL;
> int retval;
>
> -   trace_rpm_suspend_rcuidle(dev, rpmflags);
> +   trace_rpm_suspend(dev, rpmflags);
>
>   repeat:
> retval = rpm_check_suspend_allowed(dev);
> @@ -708,7 +708,7 @@ static int rpm_suspend(struct device *de
> }
>
>   out:
> -   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
> +   trace_rpm_return_int(dev, _THIS_IP_, retval);
>
> return retval;
>
> @@ -760,7 +760,7 @@ static int rpm_resume(struct device *dev
> struct device *parent = NULL;
> int retval = 0;
>
> -   trace_rpm_resume_rcuidle(dev, rpmflags);
> +   trace_rpm_resume(dev, rpmflags);
>
>   repeat:
> if (dev->power.runtime_error) {
> @@ -925,7 +925,7 @@ static int rpm_resume(struct device *dev
> spin_lock_irq(>power.lock);
> }
>
> -   trace_rpm_return_int_rcuidle(dev, _THIS_IP_, retval);
> +   trace_rpm_return_int(dev, _THIS_IP_, retval);
>
> return retval;
>  }
> @@ -1081,7 +1081,7 @@ int __pm_runtime_idle(struct device *dev
> if (retval < 0) {
> return retval;
> } else if (retval > 0) {
> -   trace_rpm_usage_rcuidle(dev, rpmflags);
> +   trace_rpm_usage(dev, rpmflags);
> return 0;
> }
> }
> @@ -1119,7 +1119,7 @@ int __pm_runtime_suspend(struct device *
> if (retval < 0) {
> return retval;
> } else if (retval > 0) {
> -   trace_rpm_usage_rcuidle(dev, rpmflags);
> +   trace_rpm_usage(dev, rpmflags);
> return 0;
> }
> }
> @@ -1202,7 +1202,7 @@ int 

Re: [PATCH v2 39/44] cpuidle,clk: Remove trace_.*_rcuidle()

2022-10-04 Thread Ulf Hansson
On Mon, 19 Sept 2022 at 12:17, Peter Zijlstra  wrote:
>
> OMAP was the one and only user.

OMAP? :-)

>
> Signed-off-by: Peter Zijlstra (Intel) 

Reviewed-by: Ulf Hansson 

Kind regards
Uffe

> ---
>  drivers/clk/clk.c |8 
>  1 file changed, 4 insertions(+), 4 deletions(-)
>
> --- a/drivers/clk/clk.c
> +++ b/drivers/clk/clk.c
> @@ -978,12 +978,12 @@ static void clk_core_disable(struct clk_
> if (--core->enable_count > 0)
> return;
>
> -   trace_clk_disable_rcuidle(core);
> +   trace_clk_disable(core);
>
> if (core->ops->disable)
> core->ops->disable(core->hw);
>
> -   trace_clk_disable_complete_rcuidle(core);
> +   trace_clk_disable_complete(core);
>
> clk_core_disable(core->parent);
>  }
> @@ -1037,12 +1037,12 @@ static int clk_core_enable(struct clk_co
> if (ret)
> return ret;
>
> -   trace_clk_enable_rcuidle(core);
> +   trace_clk_enable(core);
>
> if (core->ops->enable)
> ret = core->ops->enable(core->hw);
>
> -   trace_clk_enable_complete_rcuidle(core);
> +   trace_clk_enable_complete(core);
>
> if (ret) {
> clk_core_disable(core->parent);
>
>
> ___
> Virtualization mailing list
> virtualizat...@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 23/44] arm,smp: Remove trace_.*_rcuidle() usage

2022-10-04 Thread Ulf Hansson
On Mon, 19 Sept 2022 at 12:18, Peter Zijlstra  wrote:
>
> None of these functions should ever be ran with RCU disabled anymore.
>
> Specifically, do_handle_IPI() is only called from handle_IPI() which
> explicitly does irq_enter()/irq_exit() which ensures RCU is watching.
>
> The problem with smp_cross_call() was, per commit 7c64cc0531fa ("arm: Use
> _rcuidle for smp_cross_call() tracepoints"), that
> cpuidle_enter_state_coupled() already had RCU disabled, but that's
> long been fixed by commit 1098582a0f6c ("sched,idle,rcu: Push rcu_idle
> deeper into the idle path").
>
> Signed-off-by: Peter Zijlstra (Intel) 

FWIW:

Reviewed-by: Ulf Hansson 

Kind regards
Uffe

> ---
>  arch/arm/kernel/smp.c |6 +++---
>  1 file changed, 3 insertions(+), 3 deletions(-)
>
> --- a/arch/arm/kernel/smp.c
> +++ b/arch/arm/kernel/smp.c
> @@ -639,7 +639,7 @@ static void do_handle_IPI(int ipinr)
> unsigned int cpu = smp_processor_id();
>
> if ((unsigned)ipinr < NR_IPI)
> -   trace_ipi_entry_rcuidle(ipi_types[ipinr]);
> +   trace_ipi_entry(ipi_types[ipinr]);
>
> switch (ipinr) {
> case IPI_WAKEUP:
> @@ -686,7 +686,7 @@ static void do_handle_IPI(int ipinr)
> }
>
> if ((unsigned)ipinr < NR_IPI)
> -   trace_ipi_exit_rcuidle(ipi_types[ipinr]);
> +   trace_ipi_exit(ipi_types[ipinr]);
>  }
>
>  /* Legacy version, should go away once all irqchips have been converted */
> @@ -709,7 +709,7 @@ static irqreturn_t ipi_handler(int irq,
>
>  static void smp_cross_call(const struct cpumask *target, unsigned int ipinr)
>  {
> -   trace_ipi_raise_rcuidle(target, ipi_types[ipinr]);
> +   trace_ipi_raise(target, ipi_types[ipinr]);
> __ipi_send_mask(ipi_desc[ipinr], target);
>  }
>
>
>
> ___
> Virtualization mailing list
> virtualizat...@lists.linux-foundation.org
> https://lists.linuxfoundation.org/mailman/listinfo/virtualization


Re: [PATCH v2 14/44] cpuidle,cpu_pm: Remove RCU fiddling from cpu_pm_{enter,exit}()

2022-10-04 Thread Ulf Hansson
On Mon, 19 Sept 2022 at 12:17, Peter Zijlstra  wrote:
>
> All callers should still have RCU enabled.
>
> Signed-off-by: Peter Zijlstra (Intel) 
> Acked-by: Mark Rutland 

Reviewed-by: Ulf Hansson 

Kind regards
Uffe

> ---
>  kernel/cpu_pm.c |9 -
>  1 file changed, 9 deletions(-)
>
> --- a/kernel/cpu_pm.c
> +++ b/kernel/cpu_pm.c
> @@ -30,16 +30,9 @@ static int cpu_pm_notify(enum cpu_pm_eve
>  {
> int ret;
>
> -   /*
> -* This introduces a RCU read critical section, which could be
> -* disfunctional in cpu idle. Copy RCU_NONIDLE code to let RCU know
> -* this.
> -*/
> -   ct_irq_enter_irqson();
> rcu_read_lock();
> ret = raw_notifier_call_chain(_pm_notifier.chain, event, NULL);
> rcu_read_unlock();
> -   ct_irq_exit_irqson();
>
> return notifier_to_errno(ret);
>  }
> @@ -49,11 +42,9 @@ static int cpu_pm_notify_robust(enum cpu
> unsigned long flags;
> int ret;
>
> -   ct_irq_enter_irqson();
> raw_spin_lock_irqsave(_pm_notifier.lock, flags);
> ret = raw_notifier_call_chain_robust(_pm_notifier.chain, 
> event_up, event_down, NULL);
> raw_spin_unlock_irqrestore(_pm_notifier.lock, flags);
> -   ct_irq_exit_irqson();
>
> return notifier_to_errno(ret);
>  }
>
>


Re: [PATCH v2 12/44] cpuidle,dt: Push RCU-idle into driver

2022-10-04 Thread Ulf Hansson
On Mon, 19 Sept 2022 at 12:18, Peter Zijlstra  wrote:
>
> Doing RCU-idle outside the driver, only to then temporarily enable it
> again before going idle is daft.
>
> Notably: this converts all dt_init_idle_driver() and
> __CPU_PM_CPU_IDLE_ENTER() users for they are inextrably intertwined.
>
> Signed-off-by: Peter Zijlstra (Intel) 

Reviewed-by: Ulf Hansson 

Kind regards
Uffe

> ---
>  arch/arm/mach-omap2/cpuidle34xx.c|4 ++--
>  drivers/acpi/processor_idle.c|2 ++
>  drivers/cpuidle/cpuidle-arm.c|1 +
>  drivers/cpuidle/cpuidle-big_little.c |8 ++--
>  drivers/cpuidle/cpuidle-psci.c   |1 +
>  drivers/cpuidle/cpuidle-qcom-spm.c   |1 +
>  drivers/cpuidle/cpuidle-riscv-sbi.c  |1 +
>  drivers/cpuidle/dt_idle_states.c |2 +-
>  include/linux/cpuidle.h  |4 
>  9 files changed, 19 insertions(+), 5 deletions(-)
>
> --- a/drivers/acpi/processor_idle.c
> +++ b/drivers/acpi/processor_idle.c
> @@ -1200,6 +1200,8 @@ static int acpi_processor_setup_lpi_stat
> state->target_residency = lpi->min_residency;
> if (lpi->arch_flags)
> state->flags |= CPUIDLE_FLAG_TIMER_STOP;
> +   if (lpi->entry_method == ACPI_CSTATE_FFH)
> +   state->flags |= CPUIDLE_FLAG_RCU_IDLE;

I assume the state index here will never be 0?

If not, it may lead to that acpi_processor_ffh_lpi_enter() may trigger
CPU_PM_CPU_IDLE_ENTER_PARAM() to call ct_cpuidle_enter|exit() for an
idle-state that doesn't have the CPUIDLE_FLAG_RCU_IDLE bit set.

> state->enter = acpi_idle_lpi_enter;
> drv->safe_state_index = i;
> }
> --- a/drivers/cpuidle/cpuidle-arm.c
> +++ b/drivers/cpuidle/cpuidle-arm.c
> @@ -53,6 +53,7 @@ static struct cpuidle_driver arm_idle_dr
>  * handler for idle state index 0.
>  */
> .states[0] = {
> +   .flags  = CPUIDLE_FLAG_RCU_IDLE,

Comparing arm64 and arm32 idle-states/idle-drivers, the $subject
series ends up setting the CPUIDLE_FLAG_RCU_IDLE for the ARM WFI idle
state (state zero), but only for the arm64 and psci cases (mostly
arm64). For arm32 we would need to update the ARM_CPUIDLE_WFI_STATE
too, as that is what most arm32 idle-drivers are using. My point is,
the code becomes a bit inconsistent.

Perhaps it's easier to avoid setting the CPUIDLE_FLAG_RCU_IDLE bit for
all of the ARM WFI idle states, for both arm64 and arm32?

> .enter  = arm_enter_idle_state,
> .exit_latency   = 1,
> .target_residency   = 1,
> --- a/drivers/cpuidle/cpuidle-big_little.c
> +++ b/drivers/cpuidle/cpuidle-big_little.c
> @@ -64,7 +64,8 @@ static struct cpuidle_driver bl_idle_lit
> .enter  = bl_enter_powerdown,
> .exit_latency   = 700,
> .target_residency   = 2500,
> -   .flags  = CPUIDLE_FLAG_TIMER_STOP,
> +   .flags  = CPUIDLE_FLAG_TIMER_STOP |
> + CPUIDLE_FLAG_RCU_IDLE,
> .name   = "C1",
> .desc   = "ARM little-cluster power down",
> },
> @@ -85,7 +86,8 @@ static struct cpuidle_driver bl_idle_big
> .enter  = bl_enter_powerdown,
> .exit_latency   = 500,
> .target_residency   = 2000,
> -   .flags  = CPUIDLE_FLAG_TIMER_STOP,
> +   .flags  = CPUIDLE_FLAG_TIMER_STOP |
> + CPUIDLE_FLAG_RCU_IDLE,
> .name   = "C1",
> .desc   = "ARM big-cluster power down",
> },
> @@ -124,11 +126,13 @@ static int bl_enter_powerdown(struct cpu
> struct cpuidle_driver *drv, int idx)
>  {
> cpu_pm_enter();
> +   ct_idle_enter();
>
> cpu_suspend(0, bl_powerdown_finisher);
>
> /* signals the MCPM core that CPU is out of low power state */
> mcpm_cpu_powered_up();
> +   ct_idle_exit();
>
> cpu_pm_exit();
>
> --- a/drivers/cpuidle/cpuidle-psci.c
> +++ b/drivers/cpuidle/cpuidle-psci.c
> @@ -357,6 +357,7 @@ static int psci_idle_init_cpu(struct dev
>  * PSCI idle states relies on architectural WFI to be represented as
>  * state index 0.
>  */
> +   drv->states[0].flags = CPUIDLE_FLAG_RCU_IDLE;
> drv->states[0].enter = psci_enter_idle_state;
> drv->states[0].exit_latency = 1;
> drv->states[0].target_residency = 1;
> --- a/drivers/cpuidle/cpuidle-qcom-spm.c
> +++ b/drivers/cpuidle/cpuidle-qcom-spm.c
> @@ -72,6 +72,7 @@ static struct cpuidle_driver qcom_spm_id
> .owner = THIS_MODULE,
> .states[0] = {
> .enter  

Re: [RFC PATCH 1/3] powerpc/32: Implement HAVE_CONTEXT_TRACKING_USER support

2022-10-04 Thread Christophe Leroy


Le 04/10/2022 à 08:33, Nicholas Piggin a écrit :
> Context tracking involves tracking user, kernel, guest switches. This
> enables existing context tracking code for interrupt entry on 32-bit.
> KVM and interrupt exit already has context tracking calls.
> 
> Signed-off-by: Nicholas Piggin 
> ---
>   arch/powerpc/Kconfig |  2 +-
>   arch/powerpc/include/asm/interrupt.h | 21 ++---
>   2 files changed, 7 insertions(+), 16 deletions(-)
> 
> diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
> index 81c9f895d690..f667279ec74c 100644
> --- a/arch/powerpc/Kconfig
> +++ b/arch/powerpc/Kconfig
> @@ -204,7 +204,7 @@ config PPC
>   select HAVE_ARCH_SECCOMP_FILTER
>   select HAVE_ARCH_TRACEHOOK
>   select HAVE_ASM_MODVERSIONS
> - select HAVE_CONTEXT_TRACKING_USER   if PPC64
> + select HAVE_CONTEXT_TRACKING_USER
>   select HAVE_C_RECORDMCOUNT
>   select HAVE_DEBUG_KMEMLEAK
>   select HAVE_DEBUG_STACKOVERFLOW
> diff --git a/arch/powerpc/include/asm/interrupt.h 
> b/arch/powerpc/include/asm/interrupt.h
> index 4745bb9998bd..8860a246d51a 100644
> --- a/arch/powerpc/include/asm/interrupt.h
> +++ b/arch/powerpc/include/asm/interrupt.h
> @@ -85,6 +85,8 @@ do {
> \
>   (user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \
>   BUG_ON(cond);   \
>   } while (0)
> +#else
> +#define INT_SOFT_MASK_BUG_ON(regs, cond)

Here you can just drop the ifdef CONFIG_PPC64 I guess instead of adding 
an additional empty macro.

>   #endif
>   
>   #ifdef CONFIG_PPC_BOOK3S_64
> @@ -152,19 +154,8 @@ static inline void booke_restore_dbcr0(void)
>   static inline void interrupt_enter_prepare(struct pt_regs *regs)
>   {
>   #ifdef CONFIG_PPC32
> - if (!arch_irq_disabled_regs(regs))
> - trace_hardirqs_off();
> -
> - if (user_mode(regs))
> - kuap_lock();
> - else
> - kuap_save_and_lock(regs);
> -
> - if (user_mode(regs))
> - account_cpu_user_entry();
> -#endif
> -
> -#ifdef CONFIG_PPC64
> + bool trace_enable = !arch_irq_disabled_regs(regs);

nit: You could be put this as an #else to the existing #ifdef CONFIG_PPC64

> +#else
>   bool trace_enable = false;
>   
>   if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) {
> @@ -188,8 +179,9 @@ static inline void interrupt_enter_prepare(struct pt_regs 
> *regs)
>   } else {
>   __hard_RI_enable();
>   }
> + /* Only call trace_hardirqs_off when RI=1, it can cause SLB faults */
> +#endif
>   
> - /* Do this when RI=1 because it can cause SLB faults */
>   if (trace_enable)
>   trace_hardirqs_off();
>   
> @@ -215,7 +207,6 @@ static inline void interrupt_enter_prepare(struct pt_regs 
> *regs)
>   }
>   INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
>  !(regs->msr & MSR_EE));
> -#endif
>   
>   booke_restore_dbcr0();
>   }

Re: [PATCH 1/2] powerpc/64s/interrupt: Change must-hard-mask interrupt check from BUG to WARN

2022-10-04 Thread Sachin Sant



> On 04-Oct-2022, at 10:41 AM, Nicholas Piggin  wrote:
> 
> This new assertion added is generally harmless and gets fixed up
> naturally, but it does indicate a problem with MSR manipulation
> somewhere.
> 
> Fixes: c39fb71a54f0 ("powerpc/64s/interrupt: masked handler debug check for 
> previous hard disable")
> Reported-by: Sachin Sant 
> Signed-off-by: Nicholas Piggin 
> ---

Thanks Nick. Tested both the patches on Power9 and Power10. I no longer
see the reported problem.

Tested-by: Sachin Sant 

- Sachin


Re: [RFC PATCH 2/2] powerpc: nop trap instruction after WARN_ONCE fires

2022-10-04 Thread Christophe Leroy


Le 04/10/2022 à 06:31, Nicholas Piggin a écrit :
> On Sat Sep 24, 2022 at 2:47 AM AEST, Christophe Leroy wrote:
>>
>>
>> Le 23/09/2022 à 17:41, Nicholas Piggin a écrit :
>>> WARN_ONCE and similar are often used in frequently executed code, and
>>> should not crash the system. The program check interrupt caused by
>>> WARN_ON_ONCE can be a significant overhead even when nothing is being
>>> printed. This can cause performance to become unacceptable, having the
>>> same effective impact to the user as a BUG_ON().
>>>
>>> Avoid this overhead by patching the trap with a nop instruction after a
>>> "once" trap fires. Conditional warnings that return a result must have
>>> equivalent compare and branch instructions after the trap, so when it is
>>> nopped the statement will behave the same way. It's possible the asm
>>> goto should be removed entirely and this comparison just done in C now.
>>
>> You mean, just like PPC32 ? (Since db87a7199229 ("powerpc/bug: Remove
>> specific powerpc BUG_ON() and WARN_ON() on PPC32"))
>>
>> But I'm having hard time with your change.
>>
>> You change only WARN_ON()
>> But WARN_ON_ONCE() calls __WARN_FLAGS()
>> And WARN_ONCE() calls WARN() via DO_ONCE_LITE_IF()
>>
>> So I don't see any ..._ONCE something going with WARN_ON().
>>
>> Am I missing something ?
> 
> Hmm, no I must have missed something. I guess it is the EMIT_WARN_ENTRY
> in asm which is the main problem I've seen. Although we could remove the
> DO_ONCE_LITE_IF code generation from our WARN_ON_ONCE as well if we did
> this patching.
> 

Yes, I guess having now the recovery address in the bug table instead of 
the extable is rather more efficient.

Maybe DO_ONCE_LITE could be replaced by DO_ONCE which uses jump_label ? 
Not sure it is worth a specific patching implementation, is it ?

Christophe

Re: [PATCH] tools/perf: Fix aggr_printout to display cpu field irrespective of core value

2022-10-04 Thread Athira Rajeev



> On 04-Oct-2022, at 12:21 AM, Ian Rogers  wrote:
> 
> On Mon, Oct 3, 2022 at 7:03 AM atrajeev  wrote:
>> 
>> On 2022-10-02 05:17, Ian Rogers wrote:
>>> On Thu, Sep 29, 2022 at 5:56 AM James Clark 
>>> wrote:
 
 
 
 On 29/09/2022 09:49, Athira Rajeev wrote:
> 
> 
>> On 28-Sep-2022, at 9:05 PM, James Clark  wrote:
>> 
>> 
>> 
> 
> Hi James,
> 
> Thanks for looking at the patch and sharing review comments.
> 
>> On 13/09/2022 12:57, Athira Rajeev wrote:
>>> perf stat includes option to specify aggr_mode to display
>>> per-socket, per-core, per-die, per-node counter details.
>>> Also there is option -A ( AGGR_NONE, -no-aggr ), where the
>>> counter values are displayed for each cpu along with "CPU"
>>> value in one field of the output.
>>> 
>>> Each of the aggregate mode uses the information fetched
>>> from "/sys/devices/system/cpu/cpuX/topology" like core_id,
>> 
>> I thought that this wouldn't apply to the cpu field because cpu is
>> basically interchangeable as an index in cpumap, rather than anything
>> being read from the topology file.
> 
> The cpu value is filled in this function:
> 
> Function : aggr_cpu_id__cpu
> Code: util/cpumap.c
> 
>> 
>>> physical_package_id. Utility functions in "cpumap.c" fetches
>>> this information and populates the socket id, core id, cpu etc.
>>> If the platform does not expose the topology information,
>>> these values will be set to -1. Example, in case of powerpc,
>>> details like physical_package_id is restricted to be exposed
>>> in pSeries platform. So id.socket, id.core, id.cpu all will
>>> be set as -1.
>>> 
>>> In case of displaying socket or die value, there is no check
>>> done in the "aggr_printout" function to see if it points to
>>> valid socket id or die. But for displaying "cpu" value, there
>>> is a check for "if (id.core > -1)". In case of powerpc pSeries
>>> where detail like physical_package_id is restricted to be
>>> exposed, id.core will be set to -1. Hence the column or field
>>> itself for CPU won't be displayed in the output.
>>> 
>>> Result for per-socket:
>>> 
>>> <<>>
>>> perf stat -e branches --per-socket -a true
>>> 
>>> Performance counter stats for 'system wide':
>>> 
>>> S-1  32416,851  branches
>>> <<>>
>>> 
>>> Here S has -1 in above result. But with -A option which also
>>> expects CPU in one column in the result, below is observed.
>>> 
>>> <<>>
>>> /bin/perf stat -e instructions -A -a true
>>> 
>>> Performance counter stats for 'system wide':
>>> 
>>>   47,146  instructions
>>>   45,226  instructions
>>>   43,354  instructions
>>>   45,184  instructions
>>> <<>>
>>> 
>>> If the cpu id value is pointing to -1 also, it makes sense
>>> to display the column in the output to replicate the behaviour
>>> or to be in precedence with other aggr options(like per-socket,
>>> per-core). Remove the check "id.core" so that CPU field gets
>>> displayed in the output.
>> 
>> Why would you want to print -1 out? Seems like the if statement was a
>> good one to me, otherwise the output looks a bit broken to users. Are
>> the other aggregation modes even working if -1 is set for socket and
>> die? Maybe we need to not print -1 in those cases or exit earlier with a
>> failure.
>> 
>> The -1 value has a specific internal meaning which is "to not
>> aggregate". It doesn't mean "not set".
> 
> Currently, this check is done only for printing cpu value.
> For socket/die/core values, this check is not done. Pasting an
> example snippet from a powerpc system ( specifically from pseries 
> platform where
> the value is set to -1 )
> 
> ./perf stat --per-core -a -C 1 true
> 
> Performance counter stats for 'system wide':
> 
> S-1-D-1-C-1  1   1.06 msec cpu-clock  
>   #1.018 CPUs utilized
> S-1-D-1-C-1  1  2  context-switches   
>   #1.879 K/sec
> S-1-D-1-C-1  1  0  cpu-migrations 
>   #0.000 /sec
> 
> Here though the value is -1, we are displaying it. Where as in case of 
> cpu, the first column will be
> empty since we do a check before printing.
> 
> Example:
> 
> ./perf stat --per-core -A -C 1 true
> 
> Performance counter stats for 'CPU(s) 1':
> 
>  0.88 msec cpu-clock#1.022 CPUs 
> utilized
> 2  context-switches
> 0  cpu-migrations
> 
> 
> No sure, whether there are scripts out there, 

[RFC PATCH 3/3] Remove HAVE_VIRT_CPU_ACCOUNTING_GEN option

2022-10-04 Thread Nicholas Piggin
This option was created in commit 554b0004d0ec4 ("vtime: Add
HAVE_VIRT_CPU_ACCOUNTING_GEN Kconfig") for architectures to indicate
they support the 64-bit cputime_t required for VIRT_CPU_ACCOUNTING_GEN.

The cputime_t type has since been removed, so this doesn't have any
meaning. Remove it.

Cc: linux-a...@vger.kernel.org
Cc: Kevin Hilman 
Cc: Frederic Weisbecker 
Signed-off-by: Nicholas Piggin 
---
 arch/Kconfig   | 11 ---
 arch/arm/Kconfig   |  1 -
 arch/csky/Kconfig  |  1 -
 arch/loongarch/Kconfig |  1 -
 arch/mips/Kconfig  |  1 -
 arch/xtensa/Kconfig|  1 -
 init/Kconfig   |  1 -
 kernel/time/Kconfig|  2 --
 8 files changed, 19 deletions(-)

diff --git a/arch/Kconfig b/arch/Kconfig
index 5dbf11a5ba4e..54c73e22c526 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -829,17 +829,6 @@ config HAVE_VIRT_CPU_ACCOUNTING_IDLE
 config ARCH_HAS_SCALED_CPUTIME
bool
 
-config HAVE_VIRT_CPU_ACCOUNTING_GEN
-   bool
-   default y if 64BIT
-   help
- With VIRT_CPU_ACCOUNTING_GEN, cputime_t becomes 64-bit.
- Before enabling this option, arch code must be audited
- to ensure there are no races in concurrent read/write of
- cputime_t. For example, reading/writing 64-bit cputime_t on
- some 32-bit arches may require multiple accesses, so proper
- locking is needed to protect against concurrent accesses.
-
 config HAVE_IRQ_TIME_ACCOUNTING
bool
help
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 87badeae3181..47f3a23564e8 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -119,7 +119,6 @@ config ARM
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_UID16
-   select HAVE_VIRT_CPU_ACCOUNTING_GEN
select IRQ_FORCED_THREADING
select MODULES_USE_ELF_REL
select NEED_DMA_MAP_STATE
diff --git a/arch/csky/Kconfig b/arch/csky/Kconfig
index 3cbc2dc62baf..8102d0d3f3b3 100644
--- a/arch/csky/Kconfig
+++ b/arch/csky/Kconfig
@@ -72,7 +72,6 @@ config CSKY
select HAVE_ARCH_MMAP_RND_BITS
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_CONTEXT_TRACKING_USER
-   select HAVE_VIRT_CPU_ACCOUNTING_GEN
select HAVE_DEBUG_BUGVERBOSE
select HAVE_DEBUG_KMEMLEAK
select HAVE_DYNAMIC_FTRACE
diff --git a/arch/loongarch/Kconfig b/arch/loongarch/Kconfig
index 26aeb1408e56..201b5d4e5c25 100644
--- a/arch/loongarch/Kconfig
+++ b/arch/loongarch/Kconfig
@@ -97,7 +97,6 @@ config LOONGARCH
select HAVE_SETUP_PER_CPU_AREA if NUMA
select HAVE_SYSCALL_TRACEPOINTS
select HAVE_TIF_NOHZ
-   select HAVE_VIRT_CPU_ACCOUNTING_GEN if !SMP
select IRQ_FORCED_THREADING
select IRQ_LOONGARCH_CPU
select MMU_GATHER_MERGE_VMAS if MMU
diff --git a/arch/mips/Kconfig b/arch/mips/Kconfig
index ec21f8999249..f67291d8e09c 100644
--- a/arch/mips/Kconfig
+++ b/arch/mips/Kconfig
@@ -90,7 +90,6 @@ config MIPS
select HAVE_SPARSE_SYSCALL_NR
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
-   select HAVE_VIRT_CPU_ACCOUNTING_GEN if 64BIT || !SMP
select IRQ_FORCED_THREADING
select ISA if EISA
select MODULES_USE_ELF_REL if MODULES
diff --git a/arch/xtensa/Kconfig b/arch/xtensa/Kconfig
index 12ac277282ba..18053fe9ec38 100644
--- a/arch/xtensa/Kconfig
+++ b/arch/xtensa/Kconfig
@@ -47,7 +47,6 @@ config XTENSA
select HAVE_PERF_EVENTS
select HAVE_STACKPROTECTOR
select HAVE_SYSCALL_TRACEPOINTS
-   select HAVE_VIRT_CPU_ACCOUNTING_GEN
select IRQ_DOMAIN
select MODULES_USE_ELF_RELA
select PERF_USE_VMALLOC
diff --git a/init/Kconfig b/init/Kconfig
index 94ce5a46a802..bb6d7f0d80fe 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -490,7 +490,6 @@ config VIRT_CPU_ACCOUNTING_NATIVE
 config VIRT_CPU_ACCOUNTING_GEN
bool "Full dynticks CPU time accounting"
depends on HAVE_CONTEXT_TRACKING_USER
-   depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
depends on GENERIC_CLOCKEVENTS
select VIRT_CPU_ACCOUNTING
select CONTEXT_TRACKING_USER
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index a41753be1a2b..ed480ba6cf35 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -121,8 +121,6 @@ config NO_HZ_FULL
# We need at least one periodic CPU for timekeeping
depends on SMP
depends on HAVE_CONTEXT_TRACKING_USER
-   # VIRT_CPU_ACCOUNTING_GEN dependency
-   depends on HAVE_VIRT_CPU_ACCOUNTING_GEN
select NO_HZ_COMMON
select RCU_NOCB_CPU
select VIRT_CPU_ACCOUNTING_GEN
-- 
2.37.2



[RFC PATCH 2/3] powerpc: remove the last remnants of cputime_t

2022-10-04 Thread Nicholas Piggin
cputime_t is no longer, converted to u64.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/cputime.h | 17 +
 arch/powerpc/kernel/time.c | 23 ++-
 2 files changed, 3 insertions(+), 37 deletions(-)

diff --git a/arch/powerpc/include/asm/cputime.h 
b/arch/powerpc/include/asm/cputime.h
index 431ae2343022..4961fb38e438 100644
--- a/arch/powerpc/include/asm/cputime.h
+++ b/arch/powerpc/include/asm/cputime.h
@@ -21,23 +21,8 @@
 #include 
 #include 
 
-typedef u64 __nocast cputime_t;
-typedef u64 __nocast cputime64_t;
-
-#define cmpxchg_cputime(ptr, old, new) cmpxchg(ptr, old, new)
-
 #ifdef __KERNEL__
-/*
- * Convert cputime <-> microseconds
- */
-extern u64 __cputime_usec_factor;
-
-static inline unsigned long cputime_to_usecs(const cputime_t ct)
-{
-   return mulhdu((__force u64) ct, __cputime_usec_factor);
-}
-
-#define cputime_to_nsecs(cputime) tb_to_ns((__force u64)cputime)
+#define cputime_to_nsecs(cputime) tb_to_ns(cputime)
 
 /*
  * PPC64 uses PACA which is task independent for storing accounting data while
diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c
index a2ab397065c6..d68de3618741 100644
--- a/arch/powerpc/kernel/time.c
+++ b/arch/powerpc/kernel/time.c
@@ -130,7 +130,7 @@ unsigned long tb_ticks_per_jiffy;
 unsigned long tb_ticks_per_usec = 100; /* sane default */
 EXPORT_SYMBOL(tb_ticks_per_usec);
 unsigned long tb_ticks_per_sec;
-EXPORT_SYMBOL(tb_ticks_per_sec);   /* for cputime_t conversions */
+EXPORT_SYMBOL(tb_ticks_per_sec);   /* for cputime conversions */
 
 DEFINE_SPINLOCK(rtc_lock);
 EXPORT_SYMBOL_GPL(rtc_lock);
@@ -150,21 +150,6 @@ EXPORT_SYMBOL_GPL(ppc_tb_freq);
 bool tb_invalid;
 
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
-/*
- * Factor for converting from cputime_t (timebase ticks) to
- * microseconds. This is stored as 0.64 fixed-point binary fraction.
- */
-u64 __cputime_usec_factor;
-EXPORT_SYMBOL(__cputime_usec_factor);
-
-static void calc_cputime_factors(void)
-{
-   struct div_result res;
-
-   div128_by_32(100, 0, tb_ticks_per_sec, );
-   __cputime_usec_factor = res.result_low;
-}
-
 /*
  * Read the SPURR on systems that have it, otherwise the PURR,
  * or if that doesn't exist return the timebase value passed in.
@@ -369,10 +354,7 @@ void vtime_flush(struct task_struct *tsk)
acct->hardirq_time = 0;
acct->softirq_time = 0;
 }
-
-#else /* ! CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
-#define calc_cputime_factors()
-#endif
+#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */
 
 void __delay(unsigned long loops)
 {
@@ -914,7 +896,6 @@ void __init time_init(void)
tb_ticks_per_jiffy = ppc_tb_freq / HZ;
tb_ticks_per_sec = ppc_tb_freq;
tb_ticks_per_usec = ppc_tb_freq / 100;
-   calc_cputime_factors();
 
/*
 * Compute scale factor for sched_clock.
-- 
2.37.2



[RFC PATCH 1/3] powerpc/32: Implement HAVE_CONTEXT_TRACKING_USER support

2022-10-04 Thread Nicholas Piggin
Context tracking involves tracking user, kernel, guest switches. This
enables existing context tracking code for interrupt entry on 32-bit.
KVM and interrupt exit already has context tracking calls.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/Kconfig |  2 +-
 arch/powerpc/include/asm/interrupt.h | 21 ++---
 2 files changed, 7 insertions(+), 16 deletions(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 81c9f895d690..f667279ec74c 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -204,7 +204,7 @@ config PPC
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_TRACEHOOK
select HAVE_ASM_MODVERSIONS
-   select HAVE_CONTEXT_TRACKING_USER   if PPC64
+   select HAVE_CONTEXT_TRACKING_USER
select HAVE_C_RECORDMCOUNT
select HAVE_DEBUG_KMEMLEAK
select HAVE_DEBUG_STACKOVERFLOW
diff --git a/arch/powerpc/include/asm/interrupt.h 
b/arch/powerpc/include/asm/interrupt.h
index 4745bb9998bd..8860a246d51a 100644
--- a/arch/powerpc/include/asm/interrupt.h
+++ b/arch/powerpc/include/asm/interrupt.h
@@ -85,6 +85,8 @@ do {  
\
(user_mode(regs) || (TRAP(regs) != INTERRUPT_PROGRAM))) \
BUG_ON(cond);   \
 } while (0)
+#else
+#define INT_SOFT_MASK_BUG_ON(regs, cond)
 #endif
 
 #ifdef CONFIG_PPC_BOOK3S_64
@@ -152,19 +154,8 @@ static inline void booke_restore_dbcr0(void)
 static inline void interrupt_enter_prepare(struct pt_regs *regs)
 {
 #ifdef CONFIG_PPC32
-   if (!arch_irq_disabled_regs(regs))
-   trace_hardirqs_off();
-
-   if (user_mode(regs))
-   kuap_lock();
-   else
-   kuap_save_and_lock(regs);
-
-   if (user_mode(regs))
-   account_cpu_user_entry();
-#endif
-
-#ifdef CONFIG_PPC64
+   bool trace_enable = !arch_irq_disabled_regs(regs);
+#else
bool trace_enable = false;
 
if (IS_ENABLED(CONFIG_TRACE_IRQFLAGS)) {
@@ -188,8 +179,9 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs)
} else {
__hard_RI_enable();
}
+   /* Only call trace_hardirqs_off when RI=1, it can cause SLB faults */
+#endif
 
-   /* Do this when RI=1 because it can cause SLB faults */
if (trace_enable)
trace_hardirqs_off();
 
@@ -215,7 +207,6 @@ static inline void interrupt_enter_prepare(struct pt_regs 
*regs)
}
INT_SOFT_MASK_BUG_ON(regs, !arch_irq_disabled_regs(regs) &&
   !(regs->msr & MSR_EE));
-#endif
 
booke_restore_dbcr0();
 }
-- 
2.37.2



[RFC PATCH 0/3] powerpc/32: nohz full support

2022-10-04 Thread Nicholas Piggin
Doesn't seem to be much more involved in adding context tracking and
generic virt cpu accounting support for 32-bit, which is all that's
left to support NO_HZ_FULL.

I tested this with e5500 SMP kernel with isolated and nohz CPU, and
it seems to be doing the right thing -- periodic tick is stopped on
the nohz CPUs when they are running in userspace.

Context tracking warnings should catch quite quickly if we got
something wrong there (with the force context tracking option). I
don't have a 32-bit KVM environment to test so that might have some
issues but it should be quite easy to fix if it can be tested.

I assume the virt cpu accounting gen option removal is okay, but not
exactly sure what to look for in terms of possible problems, so we'll
see what comments that gets back.

Thanks,
Nick

Nicholas Piggin (3):
  powerpc/32: Implement HAVE_CONTEXT_TRACKING_USER support
  powerpc: remove the last remnants of cputime_t
  Remove HAVE_VIRT_CPU_ACCOUNTING_GEN option

 arch/Kconfig | 11 ---
 arch/arm/Kconfig |  1 -
 arch/csky/Kconfig|  1 -
 arch/loongarch/Kconfig   |  1 -
 arch/mips/Kconfig|  1 -
 arch/powerpc/Kconfig |  2 +-
 arch/powerpc/include/asm/cputime.h   | 17 +
 arch/powerpc/include/asm/interrupt.h | 21 ++---
 arch/powerpc/kernel/time.c   | 23 ++-
 arch/xtensa/Kconfig  |  1 -
 init/Kconfig |  1 -
 kernel/time/Kconfig  |  2 --
 12 files changed, 10 insertions(+), 72 deletions(-)

-- 
2.37.2