Re: [PATCH v2] arch/powerpc: Remove unused cede related functions

2024-05-14 Thread Naveen N Rao
On Tue, May 14, 2024 at 06:54:55PM GMT, Gautam Menghani wrote:
> Remove extended_cede_processor() and its helpers as
> extended_cede_processor() has no callers since
> commit 48f6e7f6d948("powerpc/pseries: remove cede offline state for CPUs")
> 
> Signed-off-by: Gautam Menghani 
> ---
> v1 -> v2:
> 1. Remove helpers of extended_cede_processor()

Acked-by: Naveen N Rao 

> 
>  arch/powerpc/include/asm/plpar_wrappers.h | 28 ---
>  1 file changed, 28 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/plpar_wrappers.h 
> b/arch/powerpc/include/asm/plpar_wrappers.h
> index b3ee44a40c2f..71648c126970 100644
> --- a/arch/powerpc/include/asm/plpar_wrappers.h
> +++ b/arch/powerpc/include/asm/plpar_wrappers.h
> @@ -18,16 +18,6 @@ static inline long poll_pending(void)
>   return plpar_hcall_norets(H_POLL_PENDING);
>  }
>  
> -static inline u8 get_cede_latency_hint(void)
> -{
> - return get_lppaca()->cede_latency_hint;
> -}
> -
> -static inline void set_cede_latency_hint(u8 latency_hint)
> -{
> - get_lppaca()->cede_latency_hint = latency_hint;
> -}
> -
>  static inline long cede_processor(void)
>  {
>   /*
> @@ -37,24 +27,6 @@ static inline long cede_processor(void)
>   return plpar_hcall_norets_notrace(H_CEDE);
>  }
>  
> -static inline long extended_cede_processor(unsigned long latency_hint)
> -{
> - long rc;
> - u8 old_latency_hint = get_cede_latency_hint();
> -
> - set_cede_latency_hint(latency_hint);
> -
> - rc = cede_processor();
> -
> - /* Ensure that H_CEDE returns with IRQs on */
> - if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & 
> MSR_EE)))
> - __hard_irq_enable();
> -
> - set_cede_latency_hint(old_latency_hint);
> -
> - return rc;
> -}
> -
>  static inline long vpa_call(unsigned long flags, unsigned long cpu,
>   unsigned long vpa)
>  {
> -- 
> 2.45.0
> 


[PATCH v4 0/5] Add generic data patching functions

2024-05-14 Thread Benjamin Gray
Currently patch_instruction() bases the write length on the value being
written. If the value looks like a prefixed instruction it writes 8 bytes,
otherwise it writes 4 bytes. This makes it potentially buggy to use for
writing arbitrary data, as if you want to write 4 bytes but it decides to
write 8 bytes it may clobber the following memory or be unaligned and
trigger an oops if it tries to cross a page boundary.

To solve this, this series pulls out the size parameter to the 'top' of
the memory patching logic, and propagates it through the various functions.

The two sizes supported are int and long; this allows for patching
instructions and pointers on both ppc32 and ppc64. On ppc32 these are the
same size, so care is taken to only use the size parameter on static
functions, so the compiler can optimise it out entirely. Unfortunately
GCC trips over its own feet here and won't optimise in a way that is
optimal for strict RWX (mpc85xx_smp_defconfig) and no RWX
(pmac32_defconfig). More details in the v2 cover letter.

Changes from v3:
  * Improved the boot test. Now that alignment is enforced,
it checks the word (but not doubleword) aligned patch_ulong().

Changes from v2:
  * Various changes noted on each patch
  * Data patching now enforced to be aligned
  * Restore page aligned flushing optimisation

Changes from v1:
  * Addressed the v1 review actions
  * Removed noinline (for now)

v3: 
https://patchwork.ozlabs.org/project/linuxppc-dev/cover/20240325055302.876434-1-bg...@linux.ibm.com/
v2: 
https://patchwork.ozlabs.org/project/linuxppc-dev/cover/20231016050147.115686-1-bg...@linux.ibm.com/
v1: 
https://patchwork.ozlabs.org/project/linuxppc-dev/cover/20230207015643.590684-1-bg...@linux.ibm.com/

Benjamin Gray (5):
  powerpc/code-patching: Add generic memory patching
  powerpc/code-patching: Add data patch alignment check
  powerpc/64: Convert patch_instruction() to patch_u32()
  powerpc/32: Convert patch_instruction() to patch_uint()
  powerpc/code-patching: Add boot selftest for data patching

 arch/powerpc/include/asm/code-patching.h | 37 +
 arch/powerpc/kernel/module_64.c  |  5 +-
 arch/powerpc/kernel/static_call.c|  2 +-
 arch/powerpc/lib/code-patching.c | 70 +++-
 arch/powerpc/lib/test-code-patching.c| 41 ++
 arch/powerpc/platforms/powermac/smp.c|  2 +-
 6 files changed, 137 insertions(+), 20 deletions(-)

--
2.45.0



[PATCH v4 5/5] powerpc/code-patching: Add boot selftest for data patching

2024-05-14 Thread Benjamin Gray
Extend the code patching selftests with some basic coverage of the new
data patching variants too.

Signed-off-by: Benjamin Gray 

---

v4: * Change store to a check
* Account for doubleword alignment
v3: * New in v3
---
 arch/powerpc/lib/test-code-patching.c | 41 +++
 1 file changed, 41 insertions(+)

diff --git a/arch/powerpc/lib/test-code-patching.c 
b/arch/powerpc/lib/test-code-patching.c
index f76030087f98..8cd3b32f805b 100644
--- a/arch/powerpc/lib/test-code-patching.c
+++ b/arch/powerpc/lib/test-code-patching.c
@@ -438,6 +438,46 @@ static void __init test_multi_instruction_patching(void)
vfree(buf);
 }
 
+static void __init test_data_patching(void)
+{
+   void *buf;
+   u32 *addr32;
+
+   buf = vzalloc(PAGE_SIZE);
+   check(buf);
+   if (!buf)
+   return;
+
+   addr32 = buf + 128;
+
+   addr32[1] = 0xA0A1A2A3;
+   addr32[2] = 0xB0B1B2B3;
+
+   check(!patch_uint([1], 0xC0C1C2C3));
+
+   check(addr32[0] == 0);
+   check(addr32[1] == 0xC0C1C2C3);
+   check(addr32[2] == 0xB0B1B2B3);
+   check(addr32[3] == 0);
+
+   /* Unaligned patch_ulong() should fail */
+   if (IS_ENABLED(CONFIG_PPC64))
+   check(patch_ulong([1], 0xD0D1D2D3) == -EINVAL);
+
+   check(!patch_ulong([2], 0xD0D1D2D3));
+
+   check(addr32[0] == 0);
+   check(addr32[1] == 0xC0C1C2C3);
+   check(*(unsigned long *)([2]) == 0xD0D1D2D3);
+
+   if (!IS_ENABLED(CONFIG_PPC64))
+   check(addr32[3] == 0);
+
+   check(addr32[4] == 0);
+
+   vfree(buf);
+}
+
 static int __init test_code_patching(void)
 {
pr_info("Running code patching self-tests ...\n");
@@ -448,6 +488,7 @@ static int __init test_code_patching(void)
test_translate_branch();
test_prefixed_patching();
test_multi_instruction_patching();
+   test_data_patching();
 
return 0;
 }
-- 
2.45.0



[PATCH v4 1/5] powerpc/code-patching: Add generic memory patching

2024-05-14 Thread Benjamin Gray
patch_instruction() is designed for patching instructions in otherwise
readonly memory. Other consumers also sometimes need to patch readonly
memory, so have abused patch_instruction() for arbitrary data patches.

This is a problem on ppc64 as patch_instruction() decides on the patch
width using the 'instruction' opcode to see if it's a prefixed
instruction. Data that triggers this can lead to larger writes, possibly
crossing a page boundary and failing the write altogether.

Introduce patch_uint(), and patch_ulong(), with aliases patch_u32(), and
patch_u64() (on ppc64) designed for aligned data patches. The patch
size is now determined by the called function, and is passed as an
additional parameter to generic internals.

While the instruction flushing is not required for data patches, it
remains unconditional in this patch. A followup series is possible if
benchmarking shows fewer flushes gives an improvement in some
data-patching workload.

ppc32 does not support prefixed instructions, so is unaffected by the
original issue. Care is taken in not exposing the size parameter in the
public (non-static) interface, so the compiler can const-propagate it
away.

Signed-off-by: Benjamin Gray 

---

v3: * Rename from *_memory to *_mem
* Change type of ppc32 patch_uint() address to void*
* Explain introduction of val32 for big endian
* Some formatting

v2: * Deduplicate patch_32() definition
* Use u32 for val32
* Remove noinline
---
 arch/powerpc/include/asm/code-patching.h | 31 
 arch/powerpc/lib/code-patching.c | 64 ++--
 2 files changed, 80 insertions(+), 15 deletions(-)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 0e29ccf903d0..21a36e2c4e26 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -76,6 +76,37 @@ int patch_instruction(u32 *addr, ppc_inst_t instr);
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr);
 int patch_instructions(u32 *addr, u32 *code, size_t len, bool repeat_instr);
 
+/*
+ * The data patching functions patch_uint() and patch_ulong(), etc., must be
+ * called on aligned addresses.
+ *
+ * The instruction patching functions patch_instruction() and similar must be
+ * called on addresses satisfying instruction alignment requirements.
+ */
+
+#ifdef CONFIG_PPC64
+
+int patch_uint(void *addr, unsigned int val);
+int patch_ulong(void *addr, unsigned long val);
+
+#define patch_u64 patch_ulong
+
+#else
+
+static inline int patch_uint(void *addr, unsigned int val)
+{
+   return patch_instruction(addr, ppc_inst(val));
+}
+
+static inline int patch_ulong(void *addr, unsigned long val)
+{
+   return patch_instruction(addr, ppc_inst(val));
+}
+
+#endif
+
+#define patch_u32 patch_uint
+
 static inline unsigned long patch_site_addr(s32 *site)
 {
return (unsigned long)site + *site;
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index df64343b9214..18f762616db9 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -20,15 +20,14 @@
 #include 
 #include 
 
-static int __patch_instruction(u32 *exec_addr, ppc_inst_t instr, u32 
*patch_addr)
+static int __patch_mem(void *exec_addr, unsigned long val, void *patch_addr, 
bool is_dword)
 {
-   if (!ppc_inst_prefixed(instr)) {
-   u32 val = ppc_inst_val(instr);
+   if (!IS_ENABLED(CONFIG_PPC64) || likely(!is_dword)) {
+   /* For big endian correctness: plain address would use the 
wrong half */
+   u32 val32 = val;
 
-   __put_kernel_nofault(patch_addr, , u32, failed);
+   __put_kernel_nofault(patch_addr, , u32, failed);
} else {
-   u64 val = ppc_inst_as_ulong(instr);
-
__put_kernel_nofault(patch_addr, , u64, failed);
}
 
@@ -44,7 +43,10 @@ static int __patch_instruction(u32 *exec_addr, ppc_inst_t 
instr, u32 *patch_addr
 
 int raw_patch_instruction(u32 *addr, ppc_inst_t instr)
 {
-   return __patch_instruction(addr, instr, addr);
+   if (ppc_inst_prefixed(instr))
+   return __patch_mem(addr, ppc_inst_as_ulong(instr), addr, true);
+   else
+   return __patch_mem(addr, ppc_inst_val(instr), addr, false);
 }
 
 struct patch_context {
@@ -276,7 +278,7 @@ static void unmap_patch_area(unsigned long addr)
flush_tlb_kernel_range(addr, addr + PAGE_SIZE);
 }
 
-static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t instr)
+static int __do_patch_mem_mm(void *addr, unsigned long val, bool is_dword)
 {
int err;
u32 *patch_addr;
@@ -305,7 +307,7 @@ static int __do_patch_instruction_mm(u32 *addr, ppc_inst_t 
instr)
 
orig_mm = start_using_temp_mm(patching_mm);
 
-   err = __patch_instruction(addr, instr, patch_addr);
+   err = __patch_mem(addr, val, patch_addr, is_dword);
 
/* context synchronisation performed by 

[PATCH v4 2/5] powerpc/code-patching: Add data patch alignment check

2024-05-14 Thread Benjamin Gray
The new data patching still needs to be aligned within a
cacheline too for the flushes to work correctly. To simplify
this requirement, we just say data patches must be aligned.

Detect when data patching is not aligned, returning an invalid
argument error.

Signed-off-by: Benjamin Gray 

---

v3: * New in v3
---
 arch/powerpc/include/asm/code-patching.h | 6 ++
 arch/powerpc/lib/code-patching.c | 6 ++
 2 files changed, 12 insertions(+)

diff --git a/arch/powerpc/include/asm/code-patching.h 
b/arch/powerpc/include/asm/code-patching.h
index 21a36e2c4e26..e7f14720f630 100644
--- a/arch/powerpc/include/asm/code-patching.h
+++ b/arch/powerpc/include/asm/code-patching.h
@@ -95,11 +95,17 @@ int patch_ulong(void *addr, unsigned long val);
 
 static inline int patch_uint(void *addr, unsigned int val)
 {
+   if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int)))
+   return -EINVAL;
+
return patch_instruction(addr, ppc_inst(val));
 }
 
 static inline int patch_ulong(void *addr, unsigned long val)
 {
+   if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long)))
+   return -EINVAL;
+
return patch_instruction(addr, ppc_inst(val));
 }
 
diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c
index 18f762616db9..384b275d1bc5 100644
--- a/arch/powerpc/lib/code-patching.c
+++ b/arch/powerpc/lib/code-patching.c
@@ -386,12 +386,18 @@ NOKPROBE_SYMBOL(patch_instruction);
 
 int patch_uint(void *addr, unsigned int val)
 {
+   if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned int)))
+   return -EINVAL;
+
return patch_mem(addr, val, false);
 }
 NOKPROBE_SYMBOL(patch_uint);
 
 int patch_ulong(void *addr, unsigned long val)
 {
+   if (!IS_ALIGNED((unsigned long)addr, sizeof(unsigned long)))
+   return -EINVAL;
+
return patch_mem(addr, val, true);
 }
 NOKPROBE_SYMBOL(patch_ulong);
-- 
2.45.0



[PATCH v4 4/5] powerpc/32: Convert patch_instruction() to patch_uint()

2024-05-14 Thread Benjamin Gray
These changes are for patch_instruction() uses on data. Unlike ppc64
these should not be incorrect as-is, but using the patch_uint() alias
better reflects what kind of data being patched and allows for
benchmarking the effect of different patch_* implementations (e.g.,
skipping instruction flushing when patching data).

Signed-off-by: Benjamin Gray 
---
 arch/powerpc/kernel/static_call.c | 2 +-
 arch/powerpc/platforms/powermac/smp.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/static_call.c 
b/arch/powerpc/kernel/static_call.c
index 863a7aa24650..1502b7e439ca 100644
--- a/arch/powerpc/kernel/static_call.c
+++ b/arch/powerpc/kernel/static_call.c
@@ -17,7 +17,7 @@ void arch_static_call_transform(void *site, void *tramp, void 
*func, bool tail)
mutex_lock(_mutex);
 
if (func && !is_short) {
-   err = patch_instruction(tramp + PPC_SCT_DATA, ppc_inst(target));
+   err = patch_ulong(tramp + PPC_SCT_DATA, target);
if (err)
goto out;
}
diff --git a/arch/powerpc/platforms/powermac/smp.c 
b/arch/powerpc/platforms/powermac/smp.c
index 15644be31990..d21b681f52fb 100644
--- a/arch/powerpc/platforms/powermac/smp.c
+++ b/arch/powerpc/platforms/powermac/smp.c
@@ -827,7 +827,7 @@ static int smp_core99_kick_cpu(int nr)
mdelay(1);
 
/* Restore our exception vector */
-   patch_instruction(vector, ppc_inst(save_vector));
+   patch_uint(vector, save_vector);
 
local_irq_restore(flags);
if (ppc_md.progress) ppc_md.progress("smp_core99_kick_cpu done", 0x347);
-- 
2.45.0



[PATCH v4 3/5] powerpc/64: Convert patch_instruction() to patch_u32()

2024-05-14 Thread Benjamin Gray
This use of patch_instruction() is working on 32 bit data, and can fail
if the data looks like a prefixed instruction and the extra write
crosses a page boundary. Use patch_u32() to fix the write size.

Fixes: 8734b41b3efe ("powerpc/module_64: Fix livepatching for RO modules")
Link: https://lore.kernel.org/all/20230203004649.1f59dbd4@yea/
Signed-off-by: Benjamin Gray 

---

v2: * Added the fixes tag, it seems appropriate even if the subject does
  mention a more robust solution being required.

patch_u64() should be more efficient, but judging from the bug report
it doesn't seem like the data is doubleword aligned.
---
 arch/powerpc/kernel/module_64.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 7112adc597a8..e9bab599d0c2 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -651,12 +651,11 @@ static inline int create_stub(const Elf64_Shdr *sechdrs,
// func_desc_t is 8 bytes if ABIv2, else 16 bytes
desc = func_desc(addr);
for (i = 0; i < sizeof(func_desc_t) / sizeof(u32); i++) {
-   if (patch_instruction(((u32 *)>funcdata) + i,
- ppc_inst(((u32 *)())[i])))
+   if (patch_u32(((u32 *)>funcdata) + i, ((u32 *))[i]))
return 0;
}
 
-   if (patch_instruction(>magic, ppc_inst(STUB_MAGIC)))
+   if (patch_u32(>magic, STUB_MAGIC))
return 0;
 
return 1;
-- 
2.45.0



Re: Administrivia: remove old list address

2024-05-14 Thread Stephen Rothwell
Hi all,

On Fri, 3 May 2024 13:32:13 +1000 Stephen Rothwell  
wrote:
>
> Many years ago, linuxppc-dev was moved from ozlabs.org to
> lists.ozlabs.org.  These days most of the mail sent to linuxppc-dev
> @ozlabs.org is spam (causing added burden to the list owner i.e. me),
> so I intend to drop that name.  Initially, it will bounce with a
> message pointing to the new name, but after some time it will just
> bounce.

Having heard no nays, I have set up the "bounce with a message pointing
to the new name" action.

-- 
Cheers,
Stephen Rothwell


pgpoDAFj0ojZD.pgp
Description: OpenPGP digital signature


Re: [PATCH v2 1/1] arch/fault: don't print logs for pte marker poison errors

2024-05-14 Thread Peter Xu
On Tue, May 14, 2024 at 10:26:49PM +0200, Oscar Salvador wrote:
> On Fri, May 10, 2024 at 03:29:48PM -0400, Peter Xu wrote:
> > IMHO we shouldn't mention that detail, but only state the effect which is
> > to not report the event to syslog.
> > 
> > There's no hard rule that a pte marker can't reflect a real page poison in
> > the future even MCE.  Actually I still remember most places don't care
> > about the pfn in the hwpoison swap entry so maybe we can even do it? But
> > that's another story regardless..
> 
> But we should not use pte markers for real hwpoisons events (aka MCE), right?

The question is whether we can't.

Now we reserved a swp entry just for hwpoison and it makes sense only
because we cached the poisoned pfn inside.  My long standing question is
why do we ever need that pfn after all.  If we don't need the pfn, we
simply need a bit in the pgtable entry saying that it's poisoned, if
accessed we should kill the process using sigbus.

I used to comment on this before, the only path that uses that pfn is
check_hwpoisoned_entry(), which was introduced in:

commit a3f5d80ea401ac857f2910e28b15f35b2cf902f4
Author: Naoya Horiguchi 
Date:   Mon Jun 28 19:43:14 2021 -0700

mm,hwpoison: send SIGBUS with error virutal address

Now an action required MCE in already hwpoisoned address surely sends a
SIGBUS to current process, but the SIGBUS doesn't convey error virtual
address.  That's not optimal for hwpoison-aware applications.

To fix the issue, make memory_failure() call kill_accessing_process(),
that does pagetable walk to find the error virtual address.  It could find
multiple virtual addresses for the same error page, and it seems hard to
tell which virtual address is correct one.  But that's rare and sending
incorrect virtual address could be better than no address.  So let's
report the first found virtual address for now.

So this time I read more on this and Naoya explained why - it's only used
so far to dump the VA of the poisoned entry.

However what confused me is, if an entry is poisoned already logically we
dump that message in the fault handler not memory_failure(), which is:

MCE: Killing uffd-unit-tests:650 due to hardware memory corruption fault at 
7f3589d7e000

So perhaps we're trying to also dump that when the MCEs (points to the same
pfn) are only generated concurrently?  I donno much on hwpoison so I cannot
tell, there's also implication where it's only triggered if
MF_ACTION_REQUIRED.  But I think it means hwpoison may work without pfn
encoded, but I don't know the implication to lose that dmesg line.

> I mean, we do have the means to mark a page as hwpoisoned when a real
> MCE gets triggered, why would we want a pte marker to also reflect that?
> Or is that something for userfaultd realm?

No it's not userfaultfd realm.. it's just that pte marker should be a
generic concept, so it logically can be used outside userfaultfd.  That's
also why it's used in swapin errors, in which case we don't use anything
else in this case but a bit to reflect "this page is bad".

> 
> > And also not report swapin error is, IMHO, only because arch errors said
> > "MCE" in the error logs which may not apply here.  Logically speaking
> > swapin error should also be reported so admin knows better on why a proc is
> > killed.  Now it can still confuse the admin if it really happens, iiuc.
> 
> I am bit confused by this.
> It seems we create poisoned pte markers on swap errors (e.g:
> unuse_pte()), which get passed down the chain with VM_FAULT_HWPOISON,
> which end up in sigbus (I guess?).
> 
> This all seems very subtle to me.
> 
> First of all, why not passing VM_FAULT_SIGBUS if that is what will end
> up happening?
> I mean, at the moment that is not possible because we convolute swaping
> errors and uffd poison in the same type of marker, so we do not have any
> means to differentiate between the two of them.
> 
> Would it make sense to create yet another pte marker type to split that
> up? Because when I look at VM_FAULT_HWPOISON, I get reminded of MCE
> stuff, and that does not hold here.

We used to not dump error for swapin error.  Note that here what I am
saying is not that Axel is doing things wrong, but it's just that logically
swapin error (as pte marker) can also be with !QUIET, so my final point is
we may want to avoid having the assumption that "pte marker should always
be QUITE", because I want to make it clear that pte marker can used in any
form, so itself shouldn't imply anything..

Thanks,

-- 
Peter Xu



Re: [PATCH v2 1/1] arch/fault: don't print logs for pte marker poison errors

2024-05-14 Thread Oscar Salvador
On Fri, May 10, 2024 at 03:29:48PM -0400, Peter Xu wrote:
> IMHO we shouldn't mention that detail, but only state the effect which is
> to not report the event to syslog.
> 
> There's no hard rule that a pte marker can't reflect a real page poison in
> the future even MCE.  Actually I still remember most places don't care
> about the pfn in the hwpoison swap entry so maybe we can even do it? But
> that's another story regardless..

But we should not use pte markers for real hwpoisons events (aka MCE), right?
I mean, we do have the means to mark a page as hwpoisoned when a real
MCE gets triggered, why would we want a pte marker to also reflect that?
Or is that something for userfaultd realm?

> And also not report swapin error is, IMHO, only because arch errors said
> "MCE" in the error logs which may not apply here.  Logically speaking
> swapin error should also be reported so admin knows better on why a proc is
> killed.  Now it can still confuse the admin if it really happens, iiuc.

I am bit confused by this.
It seems we create poisoned pte markers on swap errors (e.g:
unuse_pte()), which get passed down the chain with VM_FAULT_HWPOISON,
which end up in sigbus (I guess?).

This all seems very subtle to me.

First of all, why not passing VM_FAULT_SIGBUS if that is what will end
up happening?
I mean, at the moment that is not possible because we convolute swaping
errors and uffd poison in the same type of marker, so we do not have any
means to differentiate between the two of them.

Would it make sense to create yet another pte marker type to split that
up? Because when I look at VM_FAULT_HWPOISON, I get reminded of MCE
stuff, and that does not hold here.

 

-- 
Oscar Salvador
SUSE Labs


[PATCH 3/3] crypto: Update Kconfig and Makefile for ppc64le x25519.

2024-05-14 Thread Danny Tsen
Defined CRYPTO_CURVE25519_PPC64 to support X25519 for ppc64le.

Added new module curve25519-ppc64le for X25519.

Signed-off-by: Danny Tsen 
---
 arch/powerpc/crypto/Kconfig  | 11 +++
 arch/powerpc/crypto/Makefile |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/arch/powerpc/crypto/Kconfig b/arch/powerpc/crypto/Kconfig
index 1e201b7ae2fc..09ebcbdfb34f 100644
--- a/arch/powerpc/crypto/Kconfig
+++ b/arch/powerpc/crypto/Kconfig
@@ -2,6 +2,17 @@
 
 menu "Accelerated Cryptographic Algorithms for CPU (powerpc)"
 
+config CRYPTO_CURVE25519_PPC64
+   tristate "Public key crypto: Curve25519 (PowerPC64)"
+   depends on PPC64 && CPU_LITTLE_ENDIAN
+   select CRYPTO_LIB_CURVE25519_GENERIC
+   select CRYPTO_ARCH_HAVE_LIB_CURVE25519
+   help
+ Curve25519 algorithm
+
+ Architecture: PowerPC64
+ - Little-endian
+
 config CRYPTO_CRC32C_VPMSUM
tristate "CRC32c"
depends on PPC64 && ALTIVEC
diff --git a/arch/powerpc/crypto/Makefile b/arch/powerpc/crypto/Makefile
index fca0e9739869..59808592f0a1 100644
--- a/arch/powerpc/crypto/Makefile
+++ b/arch/powerpc/crypto/Makefile
@@ -17,6 +17,7 @@ obj-$(CONFIG_CRYPTO_AES_GCM_P10) += aes-gcm-p10-crypto.o
 obj-$(CONFIG_CRYPTO_CHACHA20_P10) += chacha-p10-crypto.o
 obj-$(CONFIG_CRYPTO_POLY1305_P10) += poly1305-p10-crypto.o
 obj-$(CONFIG_CRYPTO_DEV_VMX_ENCRYPT) += vmx-crypto.o
+obj-$(CONFIG_CRYPTO_CURVE25519_PPC64) += curve25519-ppc64le.o
 
 aes-ppc-spe-y := aes-spe-core.o aes-spe-keys.o aes-tab-4k.o aes-spe-modes.o 
aes-spe-glue.o
 md5-ppc-y := md5-asm.o md5-glue.o
@@ -29,6 +30,7 @@ aes-gcm-p10-crypto-y := aes-gcm-p10-glue.o aes-gcm-p10.o 
ghashp10-ppc.o aesp10-p
 chacha-p10-crypto-y := chacha-p10-glue.o chacha-p10le-8x.o
 poly1305-p10-crypto-y := poly1305-p10-glue.o poly1305-p10le_64.o
 vmx-crypto-objs := vmx.o aesp8-ppc.o ghashp8-ppc.o aes.o aes_cbc.o aes_ctr.o 
aes_xts.o ghash.o
+curve25519-ppc64le-y := curve25519-ppc64le-core.o curve25519-ppc64le_asm.o
 
 ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y)
 override flavour := linux-ppc64le
-- 
2.31.1



[PATCH 2/3] crypto: X25519 core functions for ppc64le

2024-05-14 Thread Danny Tsen
X25519 core functions to handle scalar multiplication for ppc64le.

Signed-off-by: Danny Tsen 
---
 arch/powerpc/crypto/curve25519-ppc64le-core.c | 299 ++
 1 file changed, 299 insertions(+)
 create mode 100644 arch/powerpc/crypto/curve25519-ppc64le-core.c

diff --git a/arch/powerpc/crypto/curve25519-ppc64le-core.c 
b/arch/powerpc/crypto/curve25519-ppc64le-core.c
new file mode 100644
index ..6a8b5efc40ce
--- /dev/null
+++ b/arch/powerpc/crypto/curve25519-ppc64le-core.c
@@ -0,0 +1,299 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * Copyright 2024- IBM Corp. All rights reserved.
+ *
+ * X25519 scalar multiplication with 51 bits limbs for PPC64le.
+ *   Based on RFC7748 and AArch64 optimized implementation for X25519
+ * - Algorithm 1 Scalar multiplication of a variable point
+ */
+
+#include 
+#include 
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include 
+#include 
+
+typedef uint64_t fe51[5];
+
+asmlinkage void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
+asmlinkage void x25519_fe51_sqr(fe51 h, const fe51 f);
+asmlinkage void x25519_fe51_mul121666(fe51 h, fe51 f);
+asmlinkage void x25519_fe51_sqr_times(fe51 h, const fe51 f, int n);
+asmlinkage void x25519_fe51_frombytes(fe51 h, const uint8_t *s);
+asmlinkage void x25519_fe51_tobytes(uint8_t *s, const fe51 h);
+
+#define fmul x25519_fe51_mul
+#define fsqr x25519_fe51_sqr
+#define fmul121666 x25519_fe51_mul121666
+#define fe51_tobytes x25519_fe51_tobytes
+#define fe51_frombytes x25519_fe51_frombytes
+
+static void cswap(fe51 p, fe51 q, unsigned int bit)
+{
+   u64 t, i;
+   u64 c = 0 - (u64) bit;
+
+   for (i = 0; i < 5; ++i) {
+   t = c & (p[i] ^ q[i]);
+   p[i] ^= t;
+   q[i] ^= t;
+   }
+}
+
+static void fadd(fe51 h, const fe51 f, const fe51 g)
+{
+   h[0] = f[0] + g[0];
+   h[1] = f[1] + g[1];
+   h[2] = f[2] + g[2];
+   h[3] = f[3] + g[3];
+   h[4] = f[4] + g[4];
+}
+
+/*
+ * Prime = 2 ** 255 - 19, 255 bits
+ *(0x7fff       
ffed)
+ *
+ * Prime in 5 51-bit limbs
+ */
+static fe51 prime51 = { 0x7ffed, 0x7, 0x7, 
0x7, 0x7};
+
+static void fsub(fe51 h, const fe51 f, const fe51 g)
+{
+   h[0] = (f[0] + ((prime51[0] * 2))) - g[0];
+   h[1] = (f[1] + ((prime51[1] * 2))) - g[1];
+   h[2] = (f[2] + ((prime51[2] * 2))) - g[2];
+   h[3] = (f[3] + ((prime51[3] * 2))) - g[3];
+   h[4] = (f[4] + ((prime51[4] * 2))) - g[4];
+}
+
+static void finv(fe51 o, const fe51 i)
+{
+   fe51 a0, b, c, t00;
+
+   fsqr(a0, i);
+   x25519_fe51_sqr_times(t00, a0, 2);
+
+   fmul(b, t00, i);
+   fmul(a0, b, a0);
+
+   fsqr(t00, a0);
+
+   fmul(b, t00, b);
+   x25519_fe51_sqr_times(t00, b, 5);
+
+   fmul(b, t00, b);
+   x25519_fe51_sqr_times(t00, b, 10);
+
+   fmul(c, t00, b);
+   x25519_fe51_sqr_times(t00, c, 20);
+
+   fmul(t00, t00, c);
+   x25519_fe51_sqr_times(t00, t00, 10);
+
+   fmul(b, t00, b);
+   x25519_fe51_sqr_times(t00, b, 50);
+
+   fmul(c, t00, b);
+   x25519_fe51_sqr_times(t00, c, 100);
+
+   fmul(t00, t00, c);
+   x25519_fe51_sqr_times(t00, t00, 50);
+
+   fmul(t00, t00, b);
+   x25519_fe51_sqr_times(t00, t00, 5);
+
+   fmul(o, t00, a0);
+}
+
+static void curve25519_fe51(uint8_t out[32], const uint8_t scalar[32],
+   const uint8_t point[32])
+{
+   fe51 x1, x2, z2, x3, z3;
+   uint8_t s[32];
+   unsigned int swap = 0;
+   int i;
+
+   memcpy(s, scalar, 32);
+   s[0]  &= 0xf8;
+   s[31] &= 0x7f;
+   s[31] |= 0x40;
+   fe51_frombytes(x1, point);
+
+   z2[0] = z2[1] = z2[2] = z2[3] = z2[4] = 0;
+   x3[0] = x1[0];
+   x3[1] = x1[1];
+   x3[2] = x1[2];
+   x3[3] = x1[3];
+   x3[4] = x1[4];
+
+   x2[0] = z3[0] = 1;
+   x2[1] = z3[1] = 0;
+   x2[2] = z3[2] = 0;
+   x2[3] = z3[3] = 0;
+   x2[4] = z3[4] = 0;
+
+   for (i = 254; i >= 0; --i) {
+   unsigned int k_t = 1 & (s[i / 8] >> (i & 7));
+   fe51 a, b, c, d, e;
+   fe51 da, cb, aa, bb;
+   fe51 dacb_p, dacb_m;
+
+   swap ^= k_t;
+   cswap(x2, x3, swap);
+   cswap(z2, z3, swap);
+   swap = k_t;
+
+   fsub(b, x2, z2);// B = x_2 - z_2
+   fadd(a, x2, z2);// A = x_2 + z_2
+   fsub(d, x3, z3);// D = x_3 - z_3
+   fadd(c, x3, z3);// C = x_3 + z_3
+
+   fsqr(bb, b);// BB = B^2
+   fsqr(aa, a);// AA = A^2
+   fmul(da, d, a); // DA = D * A
+   fmul(cb, c, b); // CB = C * B
+
+   fsub(e, aa, bb);// E = AA - BB
+

[PATCH 0/3] crypto: X25519 supports for ppc64le

2024-05-14 Thread Danny Tsen
This patch series provide X25519 support for ppc64le with a new module
curve25519-ppc64le.

The implementation is based on CRYPTOGAMs perl output from x25519-ppc64.pl.
Modified and added 3 supporting functions.

This patch has passed the selftest by running modprobe
curve25519-ppc64le.

Danny Tsen (3):
  X25519 low-level primitives for ppc64le.
  X25519 core functions to handle scalar multiplication for ppc64le.
  Update Kconfig and Makefile.

 arch/powerpc/crypto/Kconfig   |  11 +
 arch/powerpc/crypto/Makefile  |   2 +
 arch/powerpc/crypto/curve25519-ppc64le-core.c | 299 
 arch/powerpc/crypto/curve25519-ppc64le_asm.S  | 648 ++
 4 files changed, 960 insertions(+)
 create mode 100644 arch/powerpc/crypto/curve25519-ppc64le-core.c
 create mode 100644 arch/powerpc/crypto/curve25519-ppc64le_asm.S

-- 
2.31.1



[PATCH 1/3] crypto: X25519 low-level primitives for ppc64le.

2024-05-14 Thread Danny Tsen
Use the perl output of x25519-ppc64.pl from CRYPTOGAMs and added three
supporting functions, x25519_fe51_sqr_times, x25519_fe51_frombytes
and x25519_fe51_tobytes.

Signed-off-by: Danny Tsen 
---
 arch/powerpc/crypto/curve25519-ppc64le_asm.S | 648 +++
 1 file changed, 648 insertions(+)
 create mode 100644 arch/powerpc/crypto/curve25519-ppc64le_asm.S

diff --git a/arch/powerpc/crypto/curve25519-ppc64le_asm.S 
b/arch/powerpc/crypto/curve25519-ppc64le_asm.S
new file mode 100644
index ..8a018104838a
--- /dev/null
+++ b/arch/powerpc/crypto/curve25519-ppc64le_asm.S
@@ -0,0 +1,648 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+#
+# Copyright 2024- IBM Corp.  All Rights Reserved.
+#
+# This code is taken from CRYPTOGAMs[1] and is included here using the option
+# in the license to distribute the code under the GPL. Therefore this program
+# is free software; you can redistribute it and/or modify it under the terms of
+# the GNU General Public License version 2 as published by the Free Software
+# Foundation.
+#
+# [1] https://www.openssl.org/~appro/cryptogams/
+
+# Copyright (c) 2006-2017, CRYPTOGAMS by 
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#
+#   * Redistributions of source code must retain copyright notices,
+# this list of conditions and the following disclaimer.
+#
+#   * Redistributions in binary form must reproduce the above
+# copyright notice, this list of conditions and the following
+# disclaimer in the documentation and/or other materials
+# provided with the distribution.
+#
+#   * Neither the name of the CRYPTOGAMS nor the names of its
+# copyright holder and contributors may be used to endorse or
+# promote products derived from this software without specific
+# prior written permission.
+#
+# ALTERNATIVELY, provided that this notice is retained in full, this
+# product may be distributed under the terms of the GNU General Public
+# License (GPL), in which case the provisions of the GPL apply INSTEAD OF
+# those given above.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDER AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+# 
+# Written by Andy Polyakov  for the OpenSSL
+# project. The module is, however, dual licensed under OpenSSL and
+# CRYPTOGAMS licenses depending on where you obtain it. For further
+# details see https://www.openssl.org/~appro/cryptogams/.
+# 
+
+#
+# 
+# Written and Modified by Danny Tsen 
+# - Added x25519_fe51_sqr_times, x25519_fe51_frombytes, x25519_fe51_tobytes
+#
+# X25519 lower-level primitives for PPC64.
+#
+
+#include 
+
+.machine "any"
+.abiversion2
+.text
+
+SYM_FUNC_START(x25519_fe51_mul)
+.align 5
+
+   stdu1,-144(1)
+   std 21,56(1)
+   std 22,64(1)
+   std 23,72(1)
+   std 24,80(1)
+   std 25,88(1)
+   std 26,96(1)
+   std 27,104(1)
+   std 28,112(1)
+   std 29,120(1)
+   std 30,128(1)
+   std 31,136(1)
+
+   ld  6,0(5)
+   ld  7,0(4)
+   ld  8,8(4)
+   ld  9,16(4)
+   ld  10,24(4)
+   ld  11,32(4)
+
+   mulld   22,7,6
+   mulhdu  23,7,6
+
+   mulld   24,8,6
+   mulhdu  25,8,6
+
+   mulld   30,11,6
+   mulhdu  31,11,6
+   ld  4,8(5)
+   mulli   11,11,19
+
+   mulld   26,9,6
+   mulhdu  27,9,6
+
+   mulld   28,10,6
+   mulhdu  29,10,6
+   mulld   12,11,4
+   mulhdu  21,11,4
+   addc22,22,12
+   adde23,23,21
+
+   mulld   12,7,4
+   mulhdu  21,7,4
+   addc24,24,12
+   adde25,25,21
+
+   mulld   12,10,4
+   mulhdu  21,10,4
+   ld  6,16(5)
+   mulli   10,10,19
+   addc30,30,12
+   adde31,31,21
+
+   mulld   12,8,4
+   mulhdu  21,8,4
+   addc26,26,12
+   adde27,27,21
+
+   mulld   12,9,4
+   mulhdu  21,9,4
+   addc28,28,12
+   

[PATCH v2 2/2] powerpc: hotplug driver bridge support

2024-05-14 Thread Krishna Kumar
There is an issue with the hotplug operation when it's done on the
bridge/switch slot. The bridge-port and devices behind the bridge, which
become offline by hot-unplug operation, don't get hot-plugged/enabled by
doing hot-plug operation on that slot. Only the first port of the bridge
gets enabled and the remaining port/devices remain unplugged. The hot
plug/unplug operation is done by the hotplug driver
(drivers/pci/hotplug/pnv_php.c).

Root Cause Analysis: This behavior is due to missing code for the DPC
switch/bridge. The existing driver depends on pci_hp_add_devices()
function for device enablement. This function calls pci_scan_slot() on
only one device-node/port of the bridge, not on all the siblings'
device-node/port.

The missing code needs to be added which will find all the sibling
device-nodes/bridge-ports and will run explicit pci_scan_slot() on
those.  A new function has been added for this purpose which gets
invoked from pci_hp_add_devices(). This new function
pci_traverse_sibling_nodes_and_scan_slot() gets all the sibling
bridge-ports by traversal and explicitly invokes pci_scan_slot on them.

Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: "Aneesh Kumar K.V" 
Cc: Bjorn Helgaas 
Cc: Gaurav Batra 
Cc: Nathan Lynch 
Cc: Brian King 

Signed-off-by: Krishna Kumar 
---
Command for reproducing the issue :

For hot unplug/disable - echo 0 > /sys/bus/pci/slots/C5/power
For hot plug/enable -echo 1 > /sys/bus/pci/slots/C5/power

where C5 is slot associated with bridge.

Scenario/Tests:
Output of lspci -nn before test is given below. This snippet contains
devices used for testing on Powernv machine.

0004:02:00.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:02:01.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:02:02.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:02:03.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:08:00.0 Serial Attached SCSI controller [0107]:
Broadcom / LSI SAS3216 PCI-Express Fusion-MPT SAS-3 [1000:00c9] (rev 01)
0004:09:00.0 Serial Attached SCSI controller [0107]:
Broadcom / LSI SAS3216 PCI-Express Fusion-MPT SAS-3 [1000:00c9] (rev 01)

Output of lspci -tv before test is as follows:

# lspci -tv
 +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
 |   |   +-01.0-[08]00.0  Broadcom / 
LSI SAS3216 PCI-Express Fusion-MPT SAS-3
 |   |   +-02.0-[09]00.0  Broadcom / 
LSI SAS3216 PCI-Express Fusion-MPT SAS-3
 |   |   \-03.0-[0a-0e]--
 |   \-00.1  PMC-Sierra Inc. Device 4052

C5(bridge) and C6(End Point) slot address are as below:
# cat /sys/bus/pci/slots/C5/address
0004:02:00
# cat /sys/bus/pci/slots/C6/address
0004:09:00

Hot-unplug operation on slot associated with bridge:
# echo 0 > /sys/bus/pci/slots/C5/power
# lspci -tv
 +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--
 |   \-00.1  PMC-Sierra Inc. Device 4052

>From the above lspci -tv output, it can be observed that hot unplug
operation has removed all the PMC-Sierra bridge ports like:
00.0-[03-07], 01.0-[08], 02.0-[09], 03.0-[0a-0e] and the SAS devices
behind the bridge-port. Without the fix, when the hot plug operation is
done on the same slot, it adds only the first bridge port and doesn't
restore all the bridge-ports and devices that it unplugged earlier.
Below snippet shows this.

Hot-plug operation on the bridge slot without the fix:
# echo 1 > /sys/bus/pci/slots/C5/power
# lspci -tv
 +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
 |   \-00.1  PMC-Sierra Inc. Device 4052

After the fix, it restores all the devices in the same manner how it
unplugged them earlier during the hot unplug operation. The below snippet
shows the same.
Hot-plug operation on bridge slot with the fix:
# echo 1 > /sys/bus/pci/slots/C5/power
# lspci -tv
 +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
 |   |   +-01.0-[08]00.0  Broadcom / 
LSI SAS3216 PCI-Express Fusion-MPT SAS-3
 |   |   +-02.0-[09]00.0  Broadcom / 
LSI SAS3216 PCI-Express Fusion-MPT SAS-3
 |   |   \-03.0-[0a-0e]--
 |   \-00.1  PMC-Sierra Inc. Device 4052

Removal of End point device behind bridge are also intact and behaving
correctly.
Hot-unplug operation on Endpoint device C6:
# echo 0 > /sys/bus/pci/slots/C6/power
# lspci -tv
 +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
 |   |   +-01.0-[08]00.0  Broadcom / 
LSI SAS3216 PCI-Express Fusion-MPT SAS-3
 |   |   +-02.0-[09]--
 |   |   \-03.0-[0a-0e]--
 |   \-00.1  PMC-Sierra Inc. Device 4052

Hot-plug operation on Endpoint device C6:
# echo 1 > 

[PATCH v2 1/2] pci/hotplug/pnv_php: Fix hotplug driver crash on Powernv

2024-05-14 Thread Krishna Kumar
Description of the problem: The hotplug driver for powerpc
(pci/hotplug/pnv_php.c) gives kernel crash when we try to
hot-unplug/disable the PCIe switch/bridge from the PHB.

Root Cause of Crash: The crash is due to the reason that, though the msi
data structure has been released during disable/hot-unplug path and it
has been assigned with NULL, still during unregistartion the code was
again trying to explicitly disable the msi which causes the Null pointer
dereference and kernel crash.

Proposed Fix : The fix is to correct the check during unregistration path
so that the code should not  try to invoke pci_disable_msi/msix() if its
data structure is already freed.

Cc: Michael Ellerman 
Cc: Nicholas Piggin 
Cc: Christophe Leroy 
Cc: "Aneesh Kumar K.V" 
Cc: Bjorn Helgaas 
Cc: Gaurav Batra 
Cc: Nathan Lynch 
Cc: Brian King 

Signed-off-by: Krishna Kumar 
---
Command used for reproducing the bug:

echo 0 > /sys/bus/pci/slots/C5/power

where C5 is slot associated with bridge.

Snippet of Crash:

 Kernel attempted to read user page (10) - exploit attempt? (uid: 0)
 BUG: Kernel NULL pointer dereference on read at 0x0010
 Faulting instruction address: 0xc0fad7d4
 Oops: Kernel access of bad area, sig: 11 [#1]
 Hardware name: 5105-22E POWER9 0x4e1203 opal:v7.0-39-g4660e63a PowerNV
 NIP [c0fad7d4] mutex_lock+0x34/0x88
 LR [c0fad7c8] mutex_lock+0x28/0x88
 Call Trace:
 [c0017075f940] [c0fad7c8] mutex_lock+0x28/0x88 (unreliable)
 [c0017075f970] [c0214464] msi_lock_descs+0x28/0x3c
 [c0017075f990] [c08e8be8] pci_disable_msi+0x68/0xa8
 [c0017075f9c0] [c090f0a4] pnv_php_disable_irq+0x2a0/0x2b0
 [c0017075fab0] [c090f128] pnv_php_free_slot+0x74/0xc4
 [c0017075fb30] [c0912184] pnv_php_disable_slot.part.0+0x1b8/0x24c
 [c0017075fc00] [c0902df0] power_write_file+0xf8/0x18c
 [c0017075fc80] [c08f84d8] pci_slot_attr_store+0x40/0x5c
 [c0017075fca0] [c06b4834] sysfs_kf_write+0x64/0x78
 [c0017075fcc0] [c06b3300] kernfs_fop_write_iter+0x1b8/0x2dc
 [c0017075fd10] [c05b3eb0] vfs_write+0x224/0x4e8
 [c0017075fdc0] [c05b44b0] ksys_write+0x88/0x150
 [c0017075fe10] [c0030864] system_call_exception+0x124/0x320
 [c0017075fe50] [c000cedc] system_call_vectored_common+0x15c/0x2ec
 --- interrupt: 3000 at 0x7fffb9748774


Root-Cause: Its safe to call pci_disable_msi() if its associated data structre 
are
not freed (during bailout path). But when the driver code disables the
bridge during hot-unplug operation, its msi data structure becomes NULL
(php_slot->pdev->dev.msi.data:).  This happens before
unregistration and during disable path in function
msi_device_data_release(). In this case, its not safe to explicitly call
pci_disable_msi/msix() due to NULL pointer dereference. But since the
current code does so, the crash is happening at the line
mutex_lock(>msi.data->mutex).


FIX: In the current code, there are two paths to invoke
pci_disable_msi/msix(). In the error/bailout path, first argument of the
check - if(disable_device || irq > 0), i.e. disable_device is true, so
it will always invoke pci_disable_msi/msix(), it will never depend on
second argument. In this path it's fine to call pci_disable_msi/msix().
During the slot releasing/disable/hot-unpug path the disable_device is
false, irq is having old value which is making the overall check true
and causing the crash. Of course, we should not choose the old/stale
value of irq but should choose php_slot->irq for check.  Also, since
php_slot->irq value is always 0 before the check, so it does not matter
if it will not be included into the check.  So, the check can be formed
with only one argument i.e. disable_device.  Based on its value
pci_disable_msi/msix() will be invoked and this is the fix for the
crash. During the bailout path its value will be true and during the
hot-unplug operation on the bridge slot, its value will be false.

 drivers/pci/hotplug/pnv_php.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 694349be9d0a..573a41869c15 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -40,7 +40,6 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
bool disable_device)
 {
struct pci_dev *pdev = php_slot->pdev;
-   int irq = php_slot->irq;
u16 ctrl;
 
if (php_slot->irq > 0) {
@@ -59,7 +58,7 @@ static void pnv_php_disable_irq(struct pnv_php_slot *php_slot,
php_slot->wq = NULL;
}
 
-   if (disable_device || irq > 0) {
+   if (disable_device) {
if (pdev->msix_enabled)
pci_disable_msix(pdev);
else if (pdev->msi_enabled)
-- 
2.45.0



[PATCH v2 0/2] PCI hotplug driver fixes

2024-05-14 Thread Krishna Kumar


The fix of Powerpc hotplug driver (drivers/pci/hotplug/pnv_php.c)
addresses below two issues.

1. Kernel Crash during hot unplug of bridge/switch slot.

2. DPC-Support Enablement - Previously, when we do a hot-unplug
operation on a bridge slot, all the ports and devices behind the
bridge-ports would be hot-unplugged/offline, but when we do a hot-plug
operation on the same bridge slot, all the ports and devices behind the
bridge would not get hot-plugged/online. In this case, Only the first
port of the bridge gets enabled and the remaining port/devices remain
unplugged/offline.  After the fix, The hot-unplug and hot-plug
operations on the slot associated with the bridge started behaving
correctly and became in sync. Now, after the hot plug operation on the
same slot, all the bridge ports and devices behind the bridge become
hot-plugged/online/restored in the same manner as it was before the
hot-unplug operation.

Krishna Kumar (2):
  pci/hotplug/pnv_php: Fix hotplug driver crash on Powernv
  powerpc: hotplug driver bridge support

 arch/powerpc/include/asm/ppc-pci.h |  4 
 arch/powerpc/kernel/pci-hotplug.c  |  5 ++---
 arch/powerpc/kernel/pci_dn.c   | 32 ++
 drivers/pci/hotplug/pnv_php.c  |  3 +--
 4 files changed, 39 insertions(+), 5 deletions(-)

Changelog:
==
v2: 14 May 2024
  - Used of_property_read_u32() in place of of_get_property() and
of_read_number(). [patch2]
  - Removed some unnecessary variable and changed the function return
type from void* to void. [patch2]
  - Removed the export declaration of
pci_traverse_sibling_nodes_and_scan_slot() as its not needed.
[patch2]
-- 
2.45.0



[powerpc:merge] BUILD SUCCESS 47279113c5d094a564909879dd1983f3c72c2ee5

2024-05-14 Thread kernel test robot
tree/branch: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
merge
branch HEAD: 47279113c5d094a564909879dd1983f3c72c2ee5  Automatic merge of 
'next' into merge (2024-05-13 23:14)

elapsed time: 1449m

configs tested: 195
configs skipped: 3

The following configs have been built successfully.
More configs may be tested in the coming days.

tested configs:
alpha allnoconfig   gcc  
alphaallyesconfig   gcc  
alpha   defconfig   gcc  
arc  allmodconfig   gcc  
arc   allnoconfig   gcc  
arc  allyesconfig   gcc  
arc defconfig   gcc  
arc   randconfig-001-20240514   gcc  
arc   randconfig-002-20240514   gcc  
arc   tb10x_defconfig   gcc  
arm  allmodconfig   gcc  
arm   allnoconfig   clang
arm  allyesconfig   gcc  
arm defconfig   clang
arm  pxa168_defconfig   clang
arm   randconfig-001-20240514   clang
arm   randconfig-002-20240514   gcc  
arm   randconfig-003-20240514   gcc  
arm   randconfig-004-20240514   gcc  
armrealview_defconfig   clang
arm rpc_defconfig   clang
arm64allmodconfig   clang
arm64 allnoconfig   gcc  
arm64   defconfig   gcc  
arm64 randconfig-001-20240514   clang
arm64 randconfig-002-20240514   clang
arm64 randconfig-003-20240514   gcc  
arm64 randconfig-004-20240514   clang
csky allmodconfig   gcc  
csky  allnoconfig   gcc  
csky allyesconfig   gcc  
cskydefconfig   gcc  
csky  randconfig-001-20240514   gcc  
csky  randconfig-002-20240514   gcc  
hexagon  allmodconfig   clang
hexagon   allnoconfig   clang
hexagon  allyesconfig   clang
hexagon defconfig   clang
hexagon   randconfig-001-20240514   clang
hexagon   randconfig-002-20240514   clang
i386 allmodconfig   gcc  
i386  allnoconfig   gcc  
i386 allyesconfig   gcc  
i386 buildonly-randconfig-001-20240513   clang
i386 buildonly-randconfig-001-20240514   clang
i386 buildonly-randconfig-002-20240513   clang
i386 buildonly-randconfig-002-20240514   clang
i386 buildonly-randconfig-003-20240513   gcc  
i386 buildonly-randconfig-003-20240514   gcc  
i386 buildonly-randconfig-004-20240513   clang
i386 buildonly-randconfig-004-20240514   clang
i386 buildonly-randconfig-005-20240513   gcc  
i386 buildonly-randconfig-005-20240514   gcc  
i386 buildonly-randconfig-006-20240513   gcc  
i386 buildonly-randconfig-006-20240514   clang
i386defconfig   clang
i386  randconfig-001-20240513   gcc  
i386  randconfig-001-20240514   gcc  
i386  randconfig-002-20240513   clang
i386  randconfig-002-20240514   clang
i386  randconfig-003-20240513   gcc  
i386  randconfig-003-20240514   gcc  
i386  randconfig-004-20240513   clang
i386  randconfig-004-20240514   clang
i386  randconfig-005-20240513   gcc  
i386  randconfig-005-20240514   clang
i386  randconfig-006-20240513   gcc  
i386  randconfig-006-20240514   clang
i386  randconfig-011-20240513   gcc  
i386  randconfig-011-20240514   clang
i386  randconfig-012-20240513   clang
i386  randconfig-012-20240514   gcc  
i386  randconfig-013-20240513   clang
i386  randconfig-013-20240514   gcc  
i386  randconfig-014-20240513   gcc  
i386  randconfig-014-20240514   clang
i386  randconfig-015-20240513   gcc  
i386  randconfig-015-20240514   gcc  
i386  randconfig-016-20240513   clang
i386  randconfig-016-20240514   clang
loongarchallmodconfig   gcc  
loongarch allnoconfig   gcc  
loongarch   defconfig   gcc  
loongarch randconfig-001-20240514   gcc  
loongarch randconfig-002-20240514   gcc  
m68k

[PATCH v2] arch/powerpc: Remove unused cede related functions

2024-05-14 Thread Gautam Menghani
Remove extended_cede_processor() and its helpers as
extended_cede_processor() has no callers since
commit 48f6e7f6d948("powerpc/pseries: remove cede offline state for CPUs")

Signed-off-by: Gautam Menghani 
---
v1 -> v2:
1. Remove helpers of extended_cede_processor()

 arch/powerpc/include/asm/plpar_wrappers.h | 28 ---
 1 file changed, 28 deletions(-)

diff --git a/arch/powerpc/include/asm/plpar_wrappers.h 
b/arch/powerpc/include/asm/plpar_wrappers.h
index b3ee44a40c2f..71648c126970 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -18,16 +18,6 @@ static inline long poll_pending(void)
return plpar_hcall_norets(H_POLL_PENDING);
 }
 
-static inline u8 get_cede_latency_hint(void)
-{
-   return get_lppaca()->cede_latency_hint;
-}
-
-static inline void set_cede_latency_hint(u8 latency_hint)
-{
-   get_lppaca()->cede_latency_hint = latency_hint;
-}
-
 static inline long cede_processor(void)
 {
/*
@@ -37,24 +27,6 @@ static inline long cede_processor(void)
return plpar_hcall_norets_notrace(H_CEDE);
 }
 
-static inline long extended_cede_processor(unsigned long latency_hint)
-{
-   long rc;
-   u8 old_latency_hint = get_cede_latency_hint();
-
-   set_cede_latency_hint(latency_hint);
-
-   rc = cede_processor();
-
-   /* Ensure that H_CEDE returns with IRQs on */
-   if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & 
MSR_EE)))
-   __hard_irq_enable();
-
-   set_cede_latency_hint(old_latency_hint);
-
-   return rc;
-}
-
 static inline long vpa_call(unsigned long flags, unsigned long cpu,
unsigned long vpa)
 {
-- 
2.45.0



Re: [PATCH 2/2] arch/powerpc: hotplug driver bridge support

2024-05-14 Thread krishna kumar
Thanks Sourabh for review. I have incorporated your comments and will be 
sending my patch soon.


Best Regards,

Krishna

On 5/13/24 15:15, Sourabh Jain wrote:

Hello Krishna,

Is "arch" in commit title really required?

On 09/05/24 17:35, Krishna Kumar wrote:

There is an issue with the hotplug operation when it's done on the
bridge/switch slot. The bridge-port and devices behind the bridge, which
become offline by hot-unplug operation, don't get hot-plugged/enabled by
doing hot-plug operation on that slot. Only the first port of the bridge
gets enabled and the remaining port/devices remain unplugged. The hot
plug/unplug operation is done by the hotplug driver
(drivers/pci/hotplug/pnv_php.c).

Root Cause Analysis: This behavior is due to missing code for the DPC
switch/bridge. The existing driver depends on pci_hp_add_devices()
function for device enablement. This function calls pci_scan_slot() on
only one device-node/port of the bridge, not on all the siblings'
device-node/port.

The missing code needs to be added which will find all the sibling
device-nodes/bridge-ports and will run explicit pci_scan_slot() on
those.  A new function has been added for this purpose which gets
invoked from pci_hp_add_devices(). This new function
pci_traverse_sibling_nodes_and_scan_slot() gets all the sibling
bridge-ports by traversal and explicitly invokes pci_scan_slot on them.


Signed-off-by: Krishna Kumar 
---

Command for reproducing the issue :

For hot unplug/disable - echo 0 > /sys/bus/pci/slots/C5/power
For hot plug/enable -    echo 1 > /sys/bus/pci/slots/C5/power

where C5 is slot associated with bridge.

Scenario/Tests:
Output of lspci -nn before test is given below. This snippet contains
devices used for testing on Powernv machine.

0004:02:00.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:02:01.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:02:02.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:02:03.0 PCI bridge [0604]: PMC-Sierra Inc. Device [11f8:4052]
0004:08:00.0 Serial Attached SCSI controller [0107]:
Broadcom / LSI SAS3216 PCI-Express Fusion-MPT SAS-3 [1000:00c9] (rev 01)
0004:09:00.0 Serial Attached SCSI controller [0107]:
Broadcom / LSI SAS3216 PCI-Express Fusion-MPT SAS-3 [1000:00c9] (rev 01)

Output of lspci -tv before test is as follows:

# lspci -tv
  +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
  |   | +-01.0-[08]00.0  Broadcom / LSI 
SAS3216 PCI-Express Fusion-MPT SAS-3
  |   | +-02.0-[09]00.0  Broadcom / LSI 
SAS3216 PCI-Express Fusion-MPT SAS-3

  |   |   \-03.0-[0a-0e]--
  |   \-00.1  PMC-Sierra Inc. Device 4052

C5(bridge) and C6(End Point) slot address are as below:
# cat /sys/bus/pci/slots/C5/address
0004:02:00
# cat /sys/bus/pci/slots/C6/address
0004:09:00

Hot-unplug operation on slot associated with bridge:
# echo 0 > /sys/bus/pci/slots/C5/power
# lspci -tv
  +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--
  |   \-00.1  PMC-Sierra Inc. Device 4052

 From the above lspci -tv output, it can be observed that hot unplug
operation has removed all the PMC-Sierra bridge ports like:
00.0-[03-07], 01.0-[08], 02.0-[09], 03.0-[0a-0e] and the SAS devices
behind the bridge-port. Without the fix, when the hot plug operation is
done on the same slot, it adds only the first bridge port and doesn't
restore all the bridge-ports and devices that it unplugged earlier.
Below snippet shows this.

Hot-plug operation on the bridge slot without the fix:
# echo 1 > /sys/bus/pci/slots/C5/power
# lspci -tv
  +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
  |   \-00.1  PMC-Sierra Inc. Device 4052

After the fix, it restores all the devices in the same manner how it
unplugged them earlier during the hot unplug operation. The below 
snippet

shows the same.
Hot-plug operation on bridge slot with the fix:
# echo 1 > /sys/bus/pci/slots/C5/power
# lspci -tv
  +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
  |   | +-01.0-[08]00.0  Broadcom / LSI 
SAS3216 PCI-Express Fusion-MPT SAS-3
  |   | +-02.0-[09]00.0  Broadcom / LSI 
SAS3216 PCI-Express Fusion-MPT SAS-3

  |   |   \-03.0-[0a-0e]--
  |   \-00.1  PMC-Sierra Inc. Device 4052

Removal of End point device behind bridge are also intact and behaving
correctly.
Hot-unplug operation on Endpoint device C6:
# echo 0 > /sys/bus/pci/slots/C6/power
# lspci -tv
  +-[0004:00]---00.0-[01-0e]--+-00.0-[02-0e]--+-00.0-[03-07]--
  |   | +-01.0-[08]00.0  Broadcom / LSI 
SAS3216 PCI-Express Fusion-MPT SAS-3

  |   |   +-02.0-[09]--
  |   |   \-03.0-[0a-0e]--
  |   \-00.1  PMC-Sierra Inc. Device 4052

Hot-plug 

Re: [PATCH 2/2] arch/powerpc: hotplug driver bridge support

2024-05-14 Thread krishna kumar
Thanks Andy for review. I have incorporated your comments and will be 
sending the patch soon.


On 5/11/24 11:12, Andy Shevchenko wrote:

Thu, May 09, 2024 at 05:35:54PM +0530, Krishna Kumar kirjoitti:

There is an issue with the hotplug operation when it's done on the
bridge/switch slot. The bridge-port and devices behind the bridge, which
become offline by hot-unplug operation, don't get hot-plugged/enabled by
doing hot-plug operation on that slot. Only the first port of the bridge
gets enabled and the remaining port/devices remain unplugged. The hot
plug/unplug operation is done by the hotplug driver
(drivers/pci/hotplug/pnv_php.c).

Root Cause Analysis: This behavior is due to missing code for the DPC
switch/bridge. The existing driver depends on pci_hp_add_devices()
function for device enablement. This function calls pci_scan_slot() on
only one device-node/port of the bridge, not on all the siblings'
device-node/port.

The missing code needs to be added which will find all the sibling
device-nodes/bridge-ports and will run explicit pci_scan_slot() on
those.  A new function has been added for this purpose which gets
invoked from pci_hp_add_devices(). This new function
pci_traverse_sibling_nodes_and_scan_slot() gets all the sibling
bridge-ports by traversal and explicitly invokes pci_scan_slot on them.



One blank line is enough here.

I have incorporated this.



Signed-off-by: Krishna Kumar 

...


+void *pci_traverse_sibling_nodes_and_scan_slot(struct device_node *start, 
struct pci_bus *bus)
+{
+   struct device_node *dn;
+   struct device_node *parent;
+   int slotno;
+
+   const __be32 *classp1;
+   u32 class1 = 0;
+   classp1 = of_get_property(start->child, "class-code", NULL);
+   if (classp1)
+   class1 = of_read_number(classp1, 1);

What's wrong with of_property_read_u32()?

I have incorporated this.




+   /* Call of pci_scan_slot for non-bridge/EP case */
+   if (!((class1 >> 8) == PCI_CLASS_BRIDGE_PCI)) {
+   slotno = PCI_SLOT(PCI_DN(start->child)->devfn);
+   pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+   return NULL;
+   }
+
+   /* Iterate all siblings */
+   parent = start;
+   for_each_child_of_node(parent, dn) {
+   const __be32 *classp;
+   u32 class = 0;
+
+   classp = of_get_property(dn, "class-code", NULL);
+   if (classp)
+   class = of_read_number(classp, 1);

Ditto.


+   /* Call of pci_scan_slot on each sibling-nodes/bridge-ports */
+   if ((class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+   slotno = PCI_SLOT(PCI_DN(dn)->devfn);
+   pci_scan_slot(bus, PCI_DEVFN(slotno, 0));
+   }
+   }
+
+   return NULL;
+}


Best Regards,

Krishna



[PATCH v5 7/7] PCI: Create helper to print TLP Header and Prefix Log

2024-05-14 Thread Ilpo Järvinen
Add pcie_print_tlp_log() helper to print TLP Header and Prefix Log.
Print End-End Prefixes only if they are non-zero.

Consolidate the few places which currently print TLP using custom
formatting.

The first attempt used pr_cont() instead of building a string first but
it turns out pr_cont() is not compatible with pci_err() and prints on a
separate line. When I asked about this, Andy Shevchenko suggested
pr_cont() should not be used in the first place (to eventually get rid
of it) so pr_cont() is now replaced with building the string first.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pci.h  |  2 ++
 drivers/pci/pcie/aer.c | 10 ++
 drivers/pci/pcie/dpc.c |  5 +
 drivers/pci/pcie/tlp.c | 31 +++
 4 files changed, 36 insertions(+), 12 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 7afdd71f9026..45083e62892c 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -423,6 +423,8 @@ void aer_print_error(struct pci_dev *dev, struct 
aer_err_info *info);
 int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
  unsigned int tlp_len, struct pcie_tlp_log *log);
 unsigned int aer_tlp_log_len(struct pci_dev *dev);
+void pcie_print_tlp_log(const struct pci_dev *dev,
+   const struct pcie_tlp_log *log, const char *pfx);
 #endif /* CONFIG_PCIEAER */
 
 #ifdef CONFIG_PCIEPORTBUS
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index ecc1dea5a208..efb9e728fe94 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -664,12 +664,6 @@ static void pci_rootport_aer_stats_incr(struct pci_dev 
*pdev,
}
 }
 
-static void __print_tlp_header(struct pci_dev *dev, struct pcie_tlp_log *t)
-{
-   pci_err(dev, "  TLP Header: %08x %08x %08x %08x\n",
-   t->dw[0], t->dw[1], t->dw[2], t->dw[3]);
-}
-
 static void __aer_print_error(struct pci_dev *dev,
  struct aer_err_info *info)
 {
@@ -724,7 +718,7 @@ void aer_print_error(struct pci_dev *dev, struct 
aer_err_info *info)
__aer_print_error(dev, info);
 
if (info->tlp_header_valid)
-   __print_tlp_header(dev, >tlp);
+   pcie_print_tlp_log(dev, >tlp, "  ");
 
 out:
if (info->id && info->error_dev_num > 1 && info->id == id)
@@ -796,7 +790,7 @@ void pci_print_aer(struct pci_dev *dev, int aer_severity,
aer->uncor_severity);
 
if (tlp_header_valid)
-   __print_tlp_header(dev, >header_log);
+   pcie_print_tlp_log(dev, >header_log, "  ");
 
trace_aer_event(dev_name(>dev), (status & ~mask),
aer_severity, tlp_header_valid, >header_log);
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index 5056cc6961ec..598f74384471 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -220,10 +220,7 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev)
pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
  cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG,
  dpc_tlp_log_len(pdev), _log);
-   pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
-   tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]);
-   for (i = 0; i < pdev->dpc_rp_log_size - 5; i++)
-   pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, 
tlp_log.prefix[i]);
+   pcie_print_tlp_log(pdev, _log, "");
 
if (pdev->dpc_rp_log_size < 5)
goto clear_status;
diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c
index def9dd7b73e8..097ac8514e96 100644
--- a/drivers/pci/pcie/tlp.c
+++ b/drivers/pci/pcie/tlp.c
@@ -6,6 +6,7 @@
  */
 
 #include 
+#include 
 #include 
 #include 
 
@@ -76,3 +77,33 @@ int pcie_read_tlp_log(struct pci_dev *dev, int where, int 
where2,
 
return 0;
 }
+
+/**
+ * pcie_print_tlp_log - Print TLP Header / Prefix Log contents
+ * @dev: PCIe device
+ * @log: TLP Log structure
+ * @pfx: String prefix (for print out indentation)
+ *
+ * Prints TLP Header and Prefix Log information held by @log.
+ */
+void pcie_print_tlp_log(const struct pci_dev *dev,
+   const struct pcie_tlp_log *log, const char *pfx)
+{
+   char buf[(10 + 1) * (4 + ARRAY_SIZE(log->prefix)) + 14 + 1];
+   unsigned int i;
+   int len;
+
+   len = scnprintf(buf, sizeof(buf), "%#010x %#010x %#010x %#010x",
+   log->dw[0], log->dw[1], log->dw[2], log->dw[3]);
+
+   if (log->prefix[0])
+   len += scnprintf(buf + len, sizeof(buf) - len, " E-E 
Prefixes:");
+   for (i = 0; i < ARRAY_SIZE(log->prefix); i++) {
+   if (!log->prefix[i])
+   break;
+   len += scnprintf(buf + len, sizeof(buf) - len,
+" %#010x", log->prefix[i]);
+   }
+
+   pci_err(dev, "%sTLP Header: %s\n", pfx, buf);
+}
-- 
2.39.2



[PATCH v5 6/7] PCI: Add TLP Prefix reading into pcie_read_tlp_log()

2024-05-14 Thread Ilpo Järvinen
pcie_read_tlp_log() handles only 4 Header Log DWORDs but TLP Prefix Log
(PCIe r6.1 secs 7.8.4.12 & 7.9.14.13) may also be present.

Generalize pcie_read_tlp_log() and struct pcie_tlp_log to handle also
TLP Prefix Log. The relevant registers are formatted identically in AER
and DPC Capability, but has these variations:

a) The offsets of TLP Prefix Log registers vary.
b) DPC RP PIO TLP Prefix Log register can be < 4 DWORDs.

Therefore callers must pass the offset of the TLP Prefix Log register
and the entire length to pcie_read_tlp_log() to be able to read the
correct number of TLP Prefix DWORDs from the correct offset.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pci.h |  5 +++-
 drivers/pci/pcie/aer.c|  4 ++-
 drivers/pci/pcie/dpc.c| 13 +-
 drivers/pci/pcie/tlp.c| 49 +++
 include/linux/aer.h   |  1 +
 include/uapi/linux/pci_regs.h |  1 +
 6 files changed, 59 insertions(+), 14 deletions(-)

diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 0e9917f8bf3f..7afdd71f9026 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -420,7 +420,9 @@ struct aer_err_info {
 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 
-int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log 
*log);
+int pcie_read_tlp_log(struct pci_dev *dev, int where, int where2,
+ unsigned int tlp_len, struct pcie_tlp_log *log);
+unsigned int aer_tlp_log_len(struct pci_dev *dev);
 #endif /* CONFIG_PCIEAER */
 
 #ifdef CONFIG_PCIEPORTBUS
@@ -439,6 +441,7 @@ void pci_dpc_init(struct pci_dev *pdev);
 void dpc_process_error(struct pci_dev *pdev);
 pci_ers_result_t dpc_reset_link(struct pci_dev *pdev);
 bool pci_dpc_recovered(struct pci_dev *pdev);
+unsigned int dpc_tlp_log_len(struct pci_dev *dev);
 #else
 static inline void pci_save_dpc_state(struct pci_dev *dev) { }
 static inline void pci_restore_dpc_state(struct pci_dev *dev) { }
diff --git a/drivers/pci/pcie/aer.c b/drivers/pci/pcie/aer.c
index ac6293c24976..ecc1dea5a208 100644
--- a/drivers/pci/pcie/aer.c
+++ b/drivers/pci/pcie/aer.c
@@ -1245,7 +1245,9 @@ int aer_get_device_error_info(struct pci_dev *dev, struct 
aer_err_info *info)
 
if (info->status & AER_LOG_TLP_MASKS) {
info->tlp_header_valid = 1;
-   pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG, 
>tlp);
+   pcie_read_tlp_log(dev, aer + PCI_ERR_HEADER_LOG,
+ aer + PCI_ERR_PREFIX_LOG,
+ aer_tlp_log_len(dev), >tlp);
}
}
 
diff --git a/drivers/pci/pcie/dpc.c b/drivers/pci/pcie/dpc.c
index a668820696dc..5056cc6961ec 100644
--- a/drivers/pci/pcie/dpc.c
+++ b/drivers/pci/pcie/dpc.c
@@ -190,7 +190,7 @@ pci_ers_result_t dpc_reset_link(struct pci_dev *pdev)
 static void dpc_process_rp_pio_error(struct pci_dev *pdev)
 {
u16 cap = pdev->dpc_cap, dpc_status, first_error;
-   u32 status, mask, sev, syserr, exc, log, prefix;
+   u32 status, mask, sev, syserr, exc, log;
struct pcie_tlp_log tlp_log;
int i;
 
@@ -217,20 +217,19 @@ static void dpc_process_rp_pio_error(struct pci_dev *pdev)
 
if (pdev->dpc_rp_log_size < 4)
goto clear_status;
-   pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, _log);
+   pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG,
+ cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG,
+ dpc_tlp_log_len(pdev), _log);
pci_err(pdev, "TLP Header: %#010x %#010x %#010x %#010x\n",
tlp_log.dw[0], tlp_log.dw[1], tlp_log.dw[2], tlp_log.dw[3]);
+   for (i = 0; i < pdev->dpc_rp_log_size - 5; i++)
+   pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, 
tlp_log.prefix[i]);
 
if (pdev->dpc_rp_log_size < 5)
goto clear_status;
pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, );
pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log);
 
-   for (i = 0; i < pdev->dpc_rp_log_size - 5; i++) {
-   pci_read_config_dword(pdev,
-   cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG + i * 4, 
);
-   pci_err(pdev, "TLP Prefix Header: dw%d, %#010x\n", i, prefix);
-   }
  clear_status:
pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status);
 }
diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c
index 65ac7b5d8a87..def9dd7b73e8 100644
--- a/drivers/pci/pcie/tlp.c
+++ b/drivers/pci/pcie/tlp.c
@@ -11,26 +11,65 @@
 
 #include "../pci.h"
 
+/**
+ * aer_tlp_log_len - Calculates AER Capability TLP Header/Prefix Log length
+ * @dev: PCIe device
+ *
+ * Return: TLP Header/Prefix Log length
+ */
+unsigned int aer_tlp_log_len(struct pci_dev *dev)
+{
+   return 4 + 

[PATCH v5 5/7] PCI: Store # of supported End-End TLP Prefixes

2024-05-14 Thread Ilpo Järvinen
eetlp_prefix_path in the struct pci_dev tells if End-End TLP Prefixes
are supported by the path or not, the value is only calculated if
CONFIG_PCI_PASID is set.

The Max End-End TLP Prefixes field in the Device Capabilities Register
2 also tells how many (1-4) End-End TLP Prefixes are supported (PCIe r6
sec 7.5.3.15). The number of supported End-End Prefixes is useful for
reading correct number of DWORDs from TLP Prefix Log register in AER
capability (PCIe r6 sec 7.8.4.12).

Replace eetlp_prefix_path with eetlp_prefix_max and determine the
number of supported End-End Prefixes regardless of CONFIG_PCI_PASID so
that an upcoming commit generalizing TLP Prefix Log register reading
does not have to read extra DWORDs for End-End Prefixes that never will
be there.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/ats.c |  2 +-
 drivers/pci/probe.c   | 14 +-
 include/linux/pci.h   |  2 +-
 include/uapi/linux/pci_regs.h |  1 +
 4 files changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/pci/ats.c b/drivers/pci/ats.c
index c570892b2090..e13433dcfc82 100644
--- a/drivers/pci/ats.c
+++ b/drivers/pci/ats.c
@@ -377,7 +377,7 @@ int pci_enable_pasid(struct pci_dev *pdev, int features)
if (WARN_ON(pdev->pasid_enabled))
return -EBUSY;
 
-   if (!pdev->eetlp_prefix_path && !pdev->pasid_no_tlp)
+   if (!pdev->eetlp_prefix_max && !pdev->pasid_no_tlp)
return -EINVAL;
 
if (!pasid)
diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index 1325fbae2f28..02035b005a53 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -2211,8 +2211,8 @@ static void pci_configure_relaxed_ordering(struct pci_dev 
*dev)
 
 static void pci_configure_eetlp_prefix(struct pci_dev *dev)
 {
-#ifdef CONFIG_PCI_PASID
struct pci_dev *bridge;
+   unsigned int eetlp_max;
int pcie_type;
u32 cap;
 
@@ -2224,15 +2224,19 @@ static void pci_configure_eetlp_prefix(struct pci_dev 
*dev)
return;
 
pcie_type = pci_pcie_type(dev);
+
+   eetlp_max = FIELD_GET(PCI_EXP_DEVCAP2_EE_PREFIX_MAX, cap);
+   /* 00b means 4 */
+   eetlp_max = eetlp_max ?: 4;
+
if (pcie_type == PCI_EXP_TYPE_ROOT_PORT ||
pcie_type == PCI_EXP_TYPE_RC_END)
-   dev->eetlp_prefix_path = 1;
+   dev->eetlp_prefix_max = eetlp_max;
else {
bridge = pci_upstream_bridge(dev);
-   if (bridge && bridge->eetlp_prefix_path)
-   dev->eetlp_prefix_path = 1;
+   if (bridge && bridge->eetlp_prefix_max)
+   dev->eetlp_prefix_max = eetlp_max;
}
-#endif
 }
 
 static void pci_configure_serr(struct pci_dev *dev)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 16493426a04f..29c51325b1d9 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -397,7 +397,7 @@ struct pci_dev {
   supported from root to here */
 #endif
unsigned intpasid_no_tlp:1; /* PASID works without TLP 
Prefix */
-   unsigned inteetlp_prefix_path:1;/* End-to-End TLP Prefix */
+   unsigned inteetlp_prefix_max:3; /* Max # of End-End TLP 
Prefixes, 0=not supported */
 
pci_channel_state_t error_state;/* Current connectivity state */
struct device   dev;/* Generic device interface */
diff --git a/include/uapi/linux/pci_regs.h b/include/uapi/linux/pci_regs.h
index a39193213ff2..09e0c300c952 100644
--- a/include/uapi/linux/pci_regs.h
+++ b/include/uapi/linux/pci_regs.h
@@ -661,6 +661,7 @@
 #define  PCI_EXP_DEVCAP2_OBFF_MSG  0x0004 /* New message signaling */
 #define  PCI_EXP_DEVCAP2_OBFF_WAKE 0x0008 /* Re-use WAKE# for OBFF */
 #define  PCI_EXP_DEVCAP2_EE_PREFIX 0x0020 /* End-End TLP Prefix */
+#define  PCI_EXP_DEVCAP2_EE_PREFIX_MAX 0x00c0 /* Max End-End TLP Prefixes 
*/
 #define PCI_EXP_DEVCTL20x28/* Device Control 2 */
 #define  PCI_EXP_DEVCTL2_COMP_TIMEOUT  0x000f  /* Completion Timeout Value */
 #define  PCI_EXP_DEVCTL2_COMP_TMOUT_DIS0x0010  /* Completion Timeout 
Disable */
-- 
2.39.2



[PATCH v5 4/7] PCI: Use unsigned int i in pcie_read_tlp_log()

2024-05-14 Thread Ilpo Järvinen
Loop variable i counting from 0 upwards does not need to be signed so
make it unsigned int.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pcie/tlp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c
index 2bf15749cd31..65ac7b5d8a87 100644
--- a/drivers/pci/pcie/tlp.c
+++ b/drivers/pci/pcie/tlp.c
@@ -24,7 +24,8 @@
 int pcie_read_tlp_log(struct pci_dev *dev, int where,
  struct pcie_tlp_log *log)
 {
-   int i, ret;
+   unsigned int i;
+   int ret;
 
memset(log, 0, sizeof(*log));
 
-- 
2.39.2



[PATCH v5 3/7] PCI: Make pcie_read_tlp_log() signature same

2024-05-14 Thread Ilpo Järvinen
pcie_read_tlp_log()'s prototype and function signature diverged due to
changes made while applying.

Make the parameters of pcie_read_tlp_log() named identically.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pcie/tlp.c | 11 +--
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c
index 3f053cc62290..2bf15749cd31 100644
--- a/drivers/pci/pcie/tlp.c
+++ b/drivers/pci/pcie/tlp.c
@@ -15,22 +15,21 @@
  * pcie_read_tlp_log - read TLP Header Log
  * @dev: PCIe device
  * @where: PCI Config offset of TLP Header Log
- * @tlp_log: TLP Log structure to fill
+ * @log: TLP Log structure to fill
  *
- * Fill @tlp_log from TLP Header Log registers, e.g., AER or DPC.
+ * Fill @log from TLP Header Log registers, e.g., AER or DPC.
  *
  * Return: 0 on success and filled TLP Log structure, <0 on error.
  */
 int pcie_read_tlp_log(struct pci_dev *dev, int where,
- struct pcie_tlp_log *tlp_log)
+ struct pcie_tlp_log *log)
 {
int i, ret;
 
-   memset(tlp_log, 0, sizeof(*tlp_log));
+   memset(log, 0, sizeof(*log));
 
for (i = 0; i < 4; i++) {
-   ret = pci_read_config_dword(dev, where + i * 4,
-   _log->dw[i]);
+   ret = pci_read_config_dword(dev, where + i * 4, >dw[i]);
if (ret)
return pcibios_err_to_errno(ret);
}
-- 
2.39.2



Re: [PATCH] arch/powerpc: Remove the definition of unused cede function

2024-05-14 Thread Gautam Menghani
On Tue, May 14, 2024 at 04:20:04PM GMT, Naveen N Rao wrote:
> On Tue, May 14, 2024 at 03:35:03PM GMT, Gautam Menghani wrote:
> > Remove extended_cede_processor() definition as it has no callers since
> > commit 48f6e7f6d948("powerpc/pseries: remove cede offline state for CPUs")
> 
> extended_cede_processor() was added in commit 69ddb57cbea0 
> ("powerpc/pseries: Add extended_cede_processor() helper function."), 
> which also added [get|set]_cede_latency_hint(). Those can also be 
> removed if extended_cede_processor() is no longer needed.

Yes thanks for pointing it out, will remove them as well.

Thanks,
Gautam


[PATCH v5 2/7] PCI: Move TLP Log handling to own file

2024-05-14 Thread Ilpo Järvinen
TLP Log is PCIe feature and is processed only by AER and DPC.
Configwise, DPC depends AER being enabled. In lack of better place, the
TLP Log handling code was initially placed into pci.c but it can be
easily placed in a separate file.

Move TLP Log handling code to own file under pcie/ subdirectory and
include it only when AER is enabled.

Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pci.c | 27 ---
 drivers/pci/pci.h |  2 +-
 drivers/pci/pcie/Makefile |  2 +-
 drivers/pci/pcie/tlp.c| 39 +++
 4 files changed, 41 insertions(+), 29 deletions(-)
 create mode 100644 drivers/pci/pcie/tlp.c

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 54ab1d6b8e53..2cc875f60fef 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1066,33 +1066,6 @@ static void pci_enable_acs(struct pci_dev *dev)
pci_disable_acs_redir(dev);
 }
 
-/**
- * pcie_read_tlp_log - read TLP Header Log
- * @dev: PCIe device
- * @where: PCI Config offset of TLP Header Log
- * @tlp_log: TLP Log structure to fill
- *
- * Fill @tlp_log from TLP Header Log registers, e.g., AER or DPC.
- *
- * Return: 0 on success and filled TLP Log structure, <0 on error.
- */
-int pcie_read_tlp_log(struct pci_dev *dev, int where,
- struct pcie_tlp_log *tlp_log)
-{
-   int i, ret;
-
-   memset(tlp_log, 0, sizeof(*tlp_log));
-
-   for (i = 0; i < 4; i++) {
-   ret = pci_read_config_dword(dev, where + i * 4,
-   _log->dw[i]);
-   if (ret)
-   return pcibios_err_to_errno(ret);
-   }
-
-   return 0;
-}
-
 /**
  * pci_restore_bars - restore a device's BAR values (e.g. after wake-up)
  * @dev: PCI device to have its BARs restored
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 9c968df86a92..0e9917f8bf3f 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -419,9 +419,9 @@ struct aer_err_info {
 
 int aer_get_device_error_info(struct pci_dev *dev, struct aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
-#endif /* CONFIG_PCIEAER */
 
 int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log 
*log);
+#endif /* CONFIG_PCIEAER */
 
 #ifdef CONFIG_PCIEPORTBUS
 /* Cached RCEC Endpoint Association */
diff --git a/drivers/pci/pcie/Makefile b/drivers/pci/pcie/Makefile
index 6461aa93fe76..591ef317 100644
--- a/drivers/pci/pcie/Makefile
+++ b/drivers/pci/pcie/Makefile
@@ -7,7 +7,7 @@ pcieportdrv-y   := portdrv.o rcec.o
 obj-$(CONFIG_PCIEPORTBUS)  += pcieportdrv.o
 
 obj-y  += aspm.o
-obj-$(CONFIG_PCIEAER)  += aer.o err.o
+obj-$(CONFIG_PCIEAER)  += aer.o err.o tlp.o
 obj-$(CONFIG_PCIEAER_INJECT)   += aer_inject.o
 obj-$(CONFIG_PCIE_PME) += pme.o
 obj-$(CONFIG_PCIE_DPC) += dpc.o
diff --git a/drivers/pci/pcie/tlp.c b/drivers/pci/pcie/tlp.c
new file mode 100644
index ..3f053cc62290
--- /dev/null
+++ b/drivers/pci/pcie/tlp.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * PCIe TLP Log handling
+ *
+ * Copyright (C) 2024 Intel Corporation
+ */
+
+#include 
+#include 
+#include 
+
+#include "../pci.h"
+
+/**
+ * pcie_read_tlp_log - read TLP Header Log
+ * @dev: PCIe device
+ * @where: PCI Config offset of TLP Header Log
+ * @tlp_log: TLP Log structure to fill
+ *
+ * Fill @tlp_log from TLP Header Log registers, e.g., AER or DPC.
+ *
+ * Return: 0 on success and filled TLP Log structure, <0 on error.
+ */
+int pcie_read_tlp_log(struct pci_dev *dev, int where,
+ struct pcie_tlp_log *tlp_log)
+{
+   int i, ret;
+
+   memset(tlp_log, 0, sizeof(*tlp_log));
+
+   for (i = 0; i < 4; i++) {
+   ret = pci_read_config_dword(dev, where + i * 4,
+   _log->dw[i]);
+   if (ret)
+   return pcibios_err_to_errno(ret);
+   }
+
+   return 0;
+}
-- 
2.39.2



[PATCH v5 1/7] PCI: Don't expose pcie_read_tlp_log() outside of PCI subsystem

2024-05-14 Thread Ilpo Järvinen
pcie_read_tlp_log() was exposed by the commit 0a5a46a6a61b ("PCI/AER:
Generalize TLP Header Log reading") but this is now considered a
mistake. No drivers outside of PCI subsystem should build their own
diagnostic logging but should rely on PCI core doing it for them.

There's currently one driver (ixgbe) doing it independently which was
the initial reason why the export was added but it was decided by the
PCI maintainer that it's something that should be eliminated.

Remove the unwanted EXPORT of pcie_read_tlp_log() and remove it from
include/linux/aer.h.

Link: https://lore.kernel.org/all/20240322193011.GA701027@bhelgaas/
Signed-off-by: Ilpo Järvinen 
---
 drivers/pci/pci.c   | 1 -
 drivers/pci/pci.h   | 4 
 include/linux/aer.h | 2 --
 3 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index e5f243dd4288..54ab1d6b8e53 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1092,7 +1092,6 @@ int pcie_read_tlp_log(struct pci_dev *dev, int where,
 
return 0;
 }
-EXPORT_SYMBOL_GPL(pcie_read_tlp_log);
 
 /**
  * pci_restore_bars - restore a device's BAR values (e.g. after wake-up)
diff --git a/drivers/pci/pci.h b/drivers/pci/pci.h
index 17fed1846847..9c968df86a92 100644
--- a/drivers/pci/pci.h
+++ b/drivers/pci/pci.h
@@ -4,6 +4,8 @@
 
 #include 
 
+struct pcie_tlp_log;
+
 /* Number of possible devfns: 0.0 to 1f.7 inclusive */
 #define MAX_NR_DEVFNS 256
 
@@ -419,6 +421,8 @@ int aer_get_device_error_info(struct pci_dev *dev, struct 
aer_err_info *info);
 void aer_print_error(struct pci_dev *dev, struct aer_err_info *info);
 #endif /* CONFIG_PCIEAER */
 
+int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log 
*log);
+
 #ifdef CONFIG_PCIEPORTBUS
 /* Cached RCEC Endpoint Association */
 struct rcec_ea {
diff --git a/include/linux/aer.h b/include/linux/aer.h
index 4b97f38f3fcf..190a0a2061cd 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -37,8 +37,6 @@ struct aer_capability_regs {
u16 uncor_err_source;
 };
 
-int pcie_read_tlp_log(struct pci_dev *dev, int where, struct pcie_tlp_log 
*log);
-
 #if defined(CONFIG_PCIEAER)
 int pci_aer_clear_nonfatal_status(struct pci_dev *dev);
 int pcie_aer_is_native(struct pci_dev *dev);
-- 
2.39.2



[PATCH v5 0/7] PCI: Consolidate TLP Log reading and printing

2024-05-14 Thread Ilpo Järvinen
This series has the remaining patches of the AER & DPC TLP Log handling
consolidation and now includes a few minor improvements to the earlier
accepted TLP Logging code.

v5:
- Fix build with AER=y and DPC=n
- Match kerneldoc and function parameter name

v4:
- Added patches:
- Remove EXPORT of pcie_read_tlp_log()
- Moved code to pcie/tlp.c and build only with AER enabled
- Match variables in prototype and function
- int -> unsigned int conversion
- eetlp_prefix_max into own patch
- struct pcie_tlp_log param consistently called "log" within tlp.c
- Moved function prototypes into drivers/pci/pci.h
- Describe AER/DPC differences more clearly in one commit message

v3:
- Small rewording in a commit message

v2:
- Don't add EXPORT()s
- Don't include igxbe changes
- Don't use pr_cont() as it's incompatible with pci_err() and according
  to Andy Shevchenko should not be used in the first place

Ilpo Järvinen (7):
  PCI: Don't expose pcie_read_tlp_log() outside of PCI subsystem
  PCI: Move TLP Log handling to own file
  PCI: Make pcie_read_tlp_log() signature same
  PCI: Use unsigned int i in pcie_read_tlp_log()
  PCI: Store # of supported End-End TLP Prefixes
  PCI: Add TLP Prefix reading into pcie_read_tlp_log()
  PCI: Create helper to print TLP Header and Prefix Log

 drivers/pci/ats.c |   2 +-
 drivers/pci/pci.c |  28 -
 drivers/pci/pci.h |   9 +++
 drivers/pci/pcie/Makefile |   2 +-
 drivers/pci/pcie/aer.c|  14 ++---
 drivers/pci/pcie/dpc.c|  14 ++---
 drivers/pci/pcie/tlp.c| 109 ++
 drivers/pci/probe.c   |  14 +++--
 include/linux/aer.h   |   3 +-
 include/linux/pci.h   |   2 +-
 include/uapi/linux/pci_regs.h |   2 +
 11 files changed, 143 insertions(+), 56 deletions(-)
 create mode 100644 drivers/pci/pcie/tlp.c

-- 
2.39.2



Re: [PATCH] arch/powerpc: Remove the definition of unused cede function

2024-05-14 Thread Naveen N Rao
On Tue, May 14, 2024 at 03:35:03PM GMT, Gautam Menghani wrote:
> Remove extended_cede_processor() definition as it has no callers since
> commit 48f6e7f6d948("powerpc/pseries: remove cede offline state for CPUs")

extended_cede_processor() was added in commit 69ddb57cbea0 
("powerpc/pseries: Add extended_cede_processor() helper function."), 
which also added [get|set]_cede_latency_hint(). Those can also be 
removed if extended_cede_processor() is no longer needed.

- Naveen

> 
> Signed-off-by: Gautam Menghani 
> ---
>  arch/powerpc/include/asm/plpar_wrappers.h | 18 --
>  1 file changed, 18 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/plpar_wrappers.h 
> b/arch/powerpc/include/asm/plpar_wrappers.h
> index b3ee44a40c2f..6431fa1e1cb1 100644
> --- a/arch/powerpc/include/asm/plpar_wrappers.h
> +++ b/arch/powerpc/include/asm/plpar_wrappers.h
> @@ -37,24 +37,6 @@ static inline long cede_processor(void)
>   return plpar_hcall_norets_notrace(H_CEDE);
>  }
>  
> -static inline long extended_cede_processor(unsigned long latency_hint)
> -{
> - long rc;
> - u8 old_latency_hint = get_cede_latency_hint();
> -
> - set_cede_latency_hint(latency_hint);
> -
> - rc = cede_processor();
> -
> - /* Ensure that H_CEDE returns with IRQs on */
> - if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & 
> MSR_EE)))
> - __hard_irq_enable();
> -
> - set_cede_latency_hint(old_latency_hint);
> -
> - return rc;
> -}
> -
>  static inline long vpa_call(unsigned long flags, unsigned long cpu,
>   unsigned long vpa)
>  {
> -- 
> 2.45.0
> 


Re: [PATCH v3 3/5] powerpc/64: Convert patch_instruction() to patch_u32()

2024-05-14 Thread Naveen N Rao
On Tue, May 14, 2024 at 04:39:30AM GMT, Christophe Leroy wrote:
> 
> 
> Le 14/05/2024 à 04:59, Benjamin Gray a écrit :
> > On Tue, 2024-04-23 at 15:09 +0530, Naveen N Rao wrote:
> >> On Mon, Mar 25, 2024 at 04:53:00PM +1100, Benjamin Gray wrote:
> >>> This use of patch_instruction() is working on 32 bit data, and can
> >>> fail
> >>> if the data looks like a prefixed instruction and the extra write
> >>> crosses a page boundary. Use patch_u32() to fix the write size.
> >>>
> >>> Fixes: 8734b41b3efe ("powerpc/module_64: Fix livepatching for RO
> >>> modules")
> >>> Link: https://lore.kernel.org/all/20230203004649.1f59dbd4@yea/
> >>> Signed-off-by: Benjamin Gray 
> >>>
> >>> ---
> >>>
> >>> v2: * Added the fixes tag, it seems appropriate even if the subject
> >>> does
> >>>    mention a more robust solution being required.
> >>>
> >>> patch_u64() should be more efficient, but judging from the bug
> >>> report
> >>> it doesn't seem like the data is doubleword aligned.
> >>
> >> Asking again, is that still the case? It looks like at least the
> >> first
> >> fix below can be converted to patch_u64().
> >>
> >> - Naveen
> > 
> > Sorry, I think I forgot this question last time. Reading the commit
> > descriptions you linked, I don't see any mention of "entry->funcdata
> > will always be doubleword aligned because XYZ". If the patch makes it
> > doubleword aligned anyway, I wouldn't be confident asserting all
> > callers will always do this without looking into it a lot more.

No worries. I was asking primarily to check if you had noticed a 
specific issue with alignment.

As Christophe mentions, the structure is aligned. It is primarily 
allotted in a separate stubs section for modules. Looking at it closer 
though, I wonder if we need the below:

diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index cccb1f78e058..0226d73a0007 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -428,8 +428,11 @@ int module_frob_arch_sections(Elf64_Ehdr *hdr,
 
/* Find .toc and .stubs sections, symtab and strtab */
for (i = 1; i < hdr->e_shnum; i++) {
-   if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0)
+   if (strcmp(secstrings + sechdrs[i].sh_name, ".stubs") == 0) {
me->arch.stubs_section = i;
+   if (sechdrs[i].sh_addralign < 8)
+   sechdrs[i].sh_addralign = 8;
+   }
 #ifdef CONFIG_PPC_KERNEL_PCREL
else if (strcmp(secstrings + sechdrs[i].sh_name, 
".data..percpu") == 0)
me->arch.pcpu_section = i;

> > 
> > Perhaps a separate series could optimise it with appropriate
> > justification/assertions to catch bad alignment. But I think leaving it
> > out of this series is fine because the original works in words, so it's
> > not regressing anything.

That should be fine.

> 
> As far as I can see, the struct is 64 bits aligned by definition so 
> funcdata field is aligned too as there are just 8x u32 before it:
> 
> struct ppc64_stub_entry {
>   /*
>* 28 byte jump instruction sequence (7 instructions) that can
>* hold ppc64_stub_insns or stub_insns. Must be 8-byte aligned
>* with PCREL kernels that use prefix instructions in the stub.
>*/
>   u32 jump[7];
>   /* Used by ftrace to identify stubs */
>   u32 magic;
>   /* Data for the above code */
>   func_desc_t funcdata;
> } __aligned(8);
> 

Thanks,
Naveen



[PATCH] arch/powerpc: Remove the definition of unused cede function

2024-05-14 Thread Gautam Menghani
Remove extended_cede_processor() definition as it has no callers since
commit 48f6e7f6d948("powerpc/pseries: remove cede offline state for CPUs")

Signed-off-by: Gautam Menghani 
---
 arch/powerpc/include/asm/plpar_wrappers.h | 18 --
 1 file changed, 18 deletions(-)

diff --git a/arch/powerpc/include/asm/plpar_wrappers.h 
b/arch/powerpc/include/asm/plpar_wrappers.h
index b3ee44a40c2f..6431fa1e1cb1 100644
--- a/arch/powerpc/include/asm/plpar_wrappers.h
+++ b/arch/powerpc/include/asm/plpar_wrappers.h
@@ -37,24 +37,6 @@ static inline long cede_processor(void)
return plpar_hcall_norets_notrace(H_CEDE);
 }
 
-static inline long extended_cede_processor(unsigned long latency_hint)
-{
-   long rc;
-   u8 old_latency_hint = get_cede_latency_hint();
-
-   set_cede_latency_hint(latency_hint);
-
-   rc = cede_processor();
-
-   /* Ensure that H_CEDE returns with IRQs on */
-   if (WARN_ON(IS_ENABLED(CONFIG_PPC_IRQ_SOFT_MASK_DEBUG) && !(mfmsr() & 
MSR_EE)))
-   __hard_irq_enable();
-
-   set_cede_latency_hint(old_latency_hint);
-
-   return rc;
-}
-
 static inline long vpa_call(unsigned long flags, unsigned long cpu,
unsigned long vpa)
 {
-- 
2.45.0