[PATCH v2] powerpc/powermac: Fix low_sleep_handler with CONFIG_VMAP_STACK

2020-12-07 Thread Christophe Leroy
low_sleep_handler() can't restore the context from standard
stack because the stack can hardly be accessed with MMU OFF.

Store everything in a global storage area instead of storing
a pointer to the stack in that global storage area.

To avoid a complete churn of the function, still use r1 as
the pointer to the storage area during restore.

Reported-by: Giuseppe Sacco 
Fixes: cd08f109e262 ("powerpc/32s: Enable CONFIG_VMAP_STACK")
Signed-off-by: Christophe Leroy 
---
This is only build tested. Giuseppe can you test it ? Thanks.

v2: Changed an erroneous 'addis' to 'addi' ; Using bss instead of data section
Signed-off-by: Christophe Leroy 
---
 arch/powerpc/platforms/Kconfig.cputype  |   2 +-
 arch/powerpc/platforms/powermac/sleep.S | 132 +++-
 2 files changed, 60 insertions(+), 74 deletions(-)

diff --git a/arch/powerpc/platforms/Kconfig.cputype 
b/arch/powerpc/platforms/Kconfig.cputype
index c194c4ae8bc7..32a9c4c09b98 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -36,7 +36,7 @@ config PPC_BOOK3S_6xx
select PPC_HAVE_PMU_SUPPORT
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
-   select HAVE_ARCH_VMAP_STACK if !ADB_PMU
+   select HAVE_ARCH_VMAP_STACK
 
 config PPC_85xx
bool "Freescale 85xx"
diff --git a/arch/powerpc/platforms/powermac/sleep.S 
b/arch/powerpc/platforms/powermac/sleep.S
index 7e0f8ba6e54a..d497a60003d2 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -44,7 +44,8 @@
 #define SL_TB  0xa0
 #define SL_R2  0xa8
 #define SL_CR  0xac
-#define SL_R12 0xb0/* r12 to r31 */
+#define SL_LR  0xb0
+#define SL_R12 0xb4/* r12 to r31 */
 #define SL_SIZE(SL_R12 + 80)
 
.section .text
@@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler)
blr
 #else
mflrr0
-   stw r0,4(r1)
-   stwur1,-SL_SIZE(r1)
+   lis r11,sleep_storage@ha
+   addir11,r11,sleep_storage@l
+   stw r0,SL_LR(r11)
mfcrr0
-   stw r0,SL_CR(r1)
-   stw r2,SL_R2(r1)
-   stmwr12,SL_R12(r1)
+   stw r0,SL_CR(r11)
+   stw r1,SL_SP(r11)
+   stw r2,SL_R2(r11)
+   stmwr12,SL_R12(r11)
 
/* Save MSR & SDR1 */
mfmsr   r4
-   stw r4,SL_MSR(r1)
+   stw r4,SL_MSR(r11)
mfsdr1  r4
-   stw r4,SL_SDR1(r1)
+   stw r4,SL_SDR1(r11)
 
/* Get a stable timebase and save it */
 1: mftbu   r4
-   stw r4,SL_TB(r1)
+   stw r4,SL_TB(r11)
mftbr5
-   stw r5,SL_TB+4(r1)
+   stw r5,SL_TB+4(r11)
mftbu   r3
cmpwr3,r4
bne 1b
 
/* Save SPRGs */
mfsprg  r4,0
-   stw r4,SL_SPRG0(r1)
+   stw r4,SL_SPRG0(r11)
mfsprg  r4,1
-   stw r4,SL_SPRG0+4(r1)
+   stw r4,SL_SPRG0+4(r11)
mfsprg  r4,2
-   stw r4,SL_SPRG0+8(r1)
+   stw r4,SL_SPRG0+8(r11)
mfsprg  r4,3
-   stw r4,SL_SPRG0+12(r1)
+   stw r4,SL_SPRG0+12(r11)
 
/* Save BATs */
mfdbatu r4,0
-   stw r4,SL_DBAT0(r1)
+   stw r4,SL_DBAT0(r11)
mfdbatl r4,0
-   stw r4,SL_DBAT0+4(r1)
+   stw r4,SL_DBAT0+4(r11)
mfdbatu r4,1
-   stw r4,SL_DBAT1(r1)
+   stw r4,SL_DBAT1(r11)
mfdbatl r4,1
-   stw r4,SL_DBAT1+4(r1)
+   stw r4,SL_DBAT1+4(r11)
mfdbatu r4,2
-   stw r4,SL_DBAT2(r1)
+   stw r4,SL_DBAT2(r11)
mfdbatl r4,2
-   stw r4,SL_DBAT2+4(r1)
+   stw r4,SL_DBAT2+4(r11)
mfdbatu r4,3
-   stw r4,SL_DBAT3(r1)
+   stw r4,SL_DBAT3(r11)
mfdbatl r4,3
-   stw r4,SL_DBAT3+4(r1)
+   stw r4,SL_DBAT3+4(r11)
mfibatu r4,0
-   stw r4,SL_IBAT0(r1)
+   stw r4,SL_IBAT0(r11)
mfibatl r4,0
-   stw r4,SL_IBAT0+4(r1)
+   stw r4,SL_IBAT0+4(r11)
mfibatu r4,1
-   stw r4,SL_IBAT1(r1)
+   stw r4,SL_IBAT1(r11)
mfibatl r4,1
-   stw r4,SL_IBAT1+4(r1)
+   stw r4,SL_IBAT1+4(r11)
mfibatu r4,2
-   stw r4,SL_IBAT2(r1)
+   stw r4,SL_IBAT2(r11)
mfibatl r4,2
-   stw r4,SL_IBAT2+4(r1)
+   stw r4,SL_IBAT2+4(r11)
mfibatu r4,3
-   stw r4,SL_IBAT3(r1)
+   stw r4,SL_IBAT3(r11)
mfibatl r4,3
-   stw r4,SL_IBAT3+4(r1)
+   stw r4,SL_IBAT3+4(r11)
 
 BEGIN_MMU_FTR_SECTION
mfspr   r4,SPRN_DBAT4U
-   stw r4,SL_DBAT4(r1)
+   stw r4,SL_DBAT4(r11)
mfspr   r4,SPRN_DBAT4L
-   stw r4,SL_DBAT4+4(r1)
+   stw r4,SL_DBAT4+4(r11)
mfspr   r4,SPRN_DBAT5U
-   stw r4,SL_DBAT5(r1)
+   stw r4,SL_DBAT5(r11)
mfspr   r4,SPRN_DBAT5L
-   stw r4,SL_DBAT5+4(r1)
+

[PATCH] powerpc/book3s64/kuap: Improve error reporting with KUAP

2020-12-07 Thread Aneesh Kumar K.V
This partially reverts commit eb232b162446 ("powerpc/book3s64/kuap: Improve
error reporting with KUAP") and update the fault handler to print

[   55.022514] Kernel attempted to access user page (7e6725b7) - exploit 
attempt? (uid: 0)
[   55.022528] BUG: Unable to handle kernel data access on read at 
0x7e6725b7
[   55.022533] Faulting instruction address: 0xc0e8b9bc
[   55.022540] Oops: Kernel access of bad area, sig: 11 [#1]


when the kernel access userspace address without unlocking AMR.

bad_kuap_fault() is added as part of commit 5e5be3aed230 ("powerpc/mm: Detect
bad KUAP faults") to catch userspace access incorrectly blocked by AMR. Hence
retain the full stack dump there even with hash translation. Also, add a comment
explaining the difference between hash and radix.

Signed-off-by: Aneesh Kumar K.V 
---
 arch/powerpc/include/asm/book3s/32/kup.h |  4 +--
 arch/powerpc/include/asm/book3s/64/kup.h | 34 ++--
 arch/powerpc/include/asm/kup.h   |  4 +--
 arch/powerpc/include/asm/nohash/32/kup-8xx.h |  4 +--
 arch/powerpc/mm/fault.c  |  4 +--
 5 files changed, 25 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/include/asm/book3s/32/kup.h 
b/arch/powerpc/include/asm/book3s/32/kup.h
index b18cd931e325..32fd4452e960 100644
--- a/arch/powerpc/include/asm/book3s/32/kup.h
+++ b/arch/powerpc/include/asm/book3s/32/kup.h
@@ -177,8 +177,8 @@ static inline void restore_user_access(unsigned long flags)
allow_user_access(to, to, end - addr, KUAP_READ_WRITE);
 }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
- bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
unsigned long begin = regs->kuap & 0xf000;
unsigned long end = regs->kuap << 28;
diff --git a/arch/powerpc/include/asm/book3s/64/kup.h 
b/arch/powerpc/include/asm/book3s/64/kup.h
index f2e6dd78d5e2..7075c92c320c 100644
--- a/arch/powerpc/include/asm/book3s/64/kup.h
+++ b/arch/powerpc/include/asm/book3s/64/kup.h
@@ -353,29 +353,29 @@ static inline void set_kuap(unsigned long value)
isync();
 }
 
-#define RADIX_KUAP_BLOCK_READ  UL(0x4000)
-#define RADIX_KUAP_BLOCK_WRITE UL(0x8000)
-
 static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
- bool is_write, unsigned long error_code)
+ bool is_write)
 {
if (!mmu_has_feature(MMU_FTR_BOOK3S_KUAP))
return false;
-
-   if (radix_enabled()) {
-   /*
-* Will be a storage protection fault.
-* Only check the details of AMR[0]
-*/
-   return WARN((regs->kuap & (is_write ? RADIX_KUAP_BLOCK_WRITE : 
RADIX_KUAP_BLOCK_READ)),
-   "Bug: %s fault blocked by AMR!", is_write ? "Write" 
: "Read");
-   }
/*
-* We don't want to WARN here because userspace can setup
-* keys such that a kernel access to user address can cause
-* fault
+* For radix this will be a storage protection fault (DSISR_PROTFAULT).
+* For hash this will be a key fault (DSISR_KEYFAULT)
 */
-   return !!(error_code & DSISR_KEYFAULT);
+   /*
+* We do have exception table entry, but accessing the
+* userspace results in fault.  This could be because we
+* didn't unlock the AMR or access is denied by userspace
+* using a key value that blocks access. We are only interested
+* in catching the use case of accessing without unlocking
+* the AMR. Hence check for BLOCK_WRITE/READ against AMR.
+*/
+   if (is_write) {
+   return WARN(((regs->amr & AMR_KUAP_BLOCK_WRITE) == 
AMR_KUAP_BLOCK_WRITE),
+   "Bug: Write fault blocked by AMR!");
+   }
+   return WARN(((regs->amr & AMR_KUAP_BLOCK_READ) == AMR_KUAP_BLOCK_READ),
+   "Bug: Read fault blocked by AMR!");
 }
 
 static __always_inline void allow_user_access(void __user *to, const void 
__user *from,
diff --git a/arch/powerpc/include/asm/kup.h b/arch/powerpc/include/asm/kup.h
index f8ec679bd2de..5a9820c54da9 100644
--- a/arch/powerpc/include/asm/kup.h
+++ b/arch/powerpc/include/asm/kup.h
@@ -62,8 +62,8 @@ void setup_kuap(bool disabled);
 #else
 static inline void setup_kuap(bool disabled) { }
 
-static inline bool bad_kuap_fault(struct pt_regs *regs, unsigned long address,
- bool is_write, unsigned long error_code)
+static inline bool
+bad_kuap_fault(struct pt_regs *regs, unsigned long address, bool is_write)
 {
return false;
 }
diff --git a/arch/powerpc/include/asm/nohash/32/kup-8xx.h 
b/arch/powerpc/include/asm/nohash/32/kup-8xx.h
index 7bdd9e5b63ed..567cdc557402 100644
--- a/arch/powerpc/inclu

RE: [PATCH 00/20] ethernet: ucc_geth: assorted fixes and simplifications

2020-12-07 Thread Qiang Zhao
On 06/12/2020 05:12, Rasmus Villemoes  wrote:


> -Original Message-
> From: Rasmus Villemoes 
> Sent: 2020年12月6日 5:12
> To: Jakub Kicinski 
> Cc: Leo Li ; David S. Miller ;
> Qiang Zhao ; net...@vger.kernel.org;
> linuxppc-dev@lists.ozlabs.org; linux-ker...@vger.kernel.org;
> linux-arm-ker...@lists.infradead.org; Vladimir Oltean
> 
> Subject: Re: [PATCH 00/20] ethernet: ucc_geth: assorted fixes and
> simplifications
> 
> On 05/12/2020 21.53, Jakub Kicinski wrote:
> > On Sat,  5 Dec 2020 20:17:23 +0100 Rasmus Villemoes wrote:
> >> While trying to figure out how to allow bumping the MTU with the
> >> ucc_geth driver, I fell into a rabbit hole and stumbled on a whole
> >> bunch of issues of varying importance - some are outright bug fixes,
> >> while most are a matter of simplifying the code to make it more
> >> accessible.
> >>
> >> At the end of digging around the code and data sheet to figure out
> >> how it all works, I think the MTU issue might be fixed by a
> >> one-liner, but I'm not sure it can be that simple. It does seem to
> >> work (ping -s X works for larger values of X, and wireshark confirms
> >> that the packets are not fragmented).
> >>
> >> Re patch 2, someone in NXP should check how the hardware actually
> >> works and make an updated reference manual available.
> >
> > Looks like a nice clean up on a quick look.
> >
> > Please separate patches 1 and 11 (which are the two bug fixes I see)
> 
> I think patch 2 is a bug fix as well, but I'd like someone from NXP to 
> comment.

It 's ok for me.


Best Regards,
Qiang Zhao


Re: [PATCH] arch: fix 'unexpected IRQ trap at vector' warnings

2020-12-07 Thread Michael Ellerman
"Enrico Weigelt, metux IT consult"  writes:
> All archs, except Alpha, print out the irq number in hex, but the message
> looks like it was a decimal number, which is quite confusing. Fixing this
> by adding "0x" prefix.

Arguably decimal would be better, /proc/interrupts and /proc/irq/ both
use decimal.

The whole message is very dated IMO, these days the number it prints is
(possibly) virtualised via IRQ domains, ie. it's not necessarily a
"vector" if that even makes sense on all arches). Arguably "trap" is the
wrong term on some arches too.

So it would be better reworded entirely IMO, and also switched to
decimal to match other sources of information on interrupts.

Perhaps:
"Unexpected Linux IRQ %d."


If anyone else is having deja vu like me, yes this has come up before:
  
https://lore.kernel.org/lkml/20150712220211.7166.42035.st...@bhelgaas-glaptop2.roam.corp.google.com/

cheers



> diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
> index cecc13214ef1..2749f19271d9 100644
> --- a/arch/arm/include/asm/hw_irq.h
> +++ b/arch/arm/include/asm/hw_irq.h
> @@ -9,7 +9,7 @@ static inline void ack_bad_irq(int irq)
>  {
>   extern unsigned long irq_err_count;
>   irq_err_count++;
> - pr_crit("unexpected IRQ trap at vector %02x\n", irq);
> + pr_crit("unexpected IRQ trap at vector 0x%02x\n", irq);
>  }
>  
>  #define ARCH_IRQ_INIT_FLAGS  (IRQ_NOREQUEST | IRQ_NOPROBE)
> diff --git a/arch/parisc/include/asm/hardirq.h 
> b/arch/parisc/include/asm/hardirq.h
> index 7f7039516e53..c3348af88d3f 100644
> --- a/arch/parisc/include/asm/hardirq.h
> +++ b/arch/parisc/include/asm/hardirq.h
> @@ -35,6 +35,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>  #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
>  #define inc_irq_stat(member) this_cpu_inc(irq_stat.member)
>  #define __inc_irq_stat(member)   __this_cpu_inc(irq_stat.member)
> -#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq)
> +#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector 0x%02x\n", 
> irq)
>  
>  #endif /* _PARISC_HARDIRQ_H */
> diff --git a/arch/powerpc/include/asm/hardirq.h 
> b/arch/powerpc/include/asm/hardirq.h
> index f133b5930ae1..ec8cf3cf6e49 100644
> --- a/arch/powerpc/include/asm/hardirq.h
> +++ b/arch/powerpc/include/asm/hardirq.h
> @@ -29,7 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
>  
>  static inline void ack_bad_irq(unsigned int irq)
>  {
> - printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
> + printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>  }
>  
>  extern u64 arch_irq_stat_cpu(unsigned int cpu);
> diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
> index dfbc3c6c0674..aaaec5cdd4fe 100644
> --- a/arch/s390/include/asm/hardirq.h
> +++ b/arch/s390/include/asm/hardirq.h
> @@ -23,7 +23,7 @@
>  
>  static inline void ack_bad_irq(unsigned int irq)
>  {
> - printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
> + printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>  }
>  
>  #endif /* __ASM_HARDIRQ_H */
> diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
> index b426796d26fd..2a2e6eae034b 100644
> --- a/arch/um/include/asm/hardirq.h
> +++ b/arch/um/include/asm/hardirq.h
> @@ -15,7 +15,7 @@ typedef struct {
>  #ifndef ack_bad_irq
>  static inline void ack_bad_irq(unsigned int irq)
>  {
> - printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
> + printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
>  }
>  #endif
>  
> diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
> index c5dd50369e2f..957c716f2df7 100644
> --- a/arch/x86/kernel/irq.c
> +++ b/arch/x86/kernel/irq.c
> @@ -37,7 +37,7 @@ atomic_t irq_err_count;
>  void ack_bad_irq(unsigned int irq)
>  {
>   if (printk_ratelimit())
> - pr_err("unexpected IRQ trap at vector %02x\n", irq);
> + pr_err("unexpected IRQ trap at vector 0x%02x\n", irq);
>  
>   /*
>* Currently unexpected vectors happen only on SMP and APIC.
> -- 
> 2.11.0


Re: [PATCH] powerpc/mm: Fix KUAP warning by providing copy_from_kernel_nofault_allowed()

2020-12-07 Thread Michael Ellerman
Christophe Leroy  writes:
> Le 07/12/2020 à 01:24, Michael Ellerman a écrit :
>> Christophe Leroy  writes:
>>> Since commit c33165253492 ("powerpc: use non-set_fs based maccess
>>> routines"), userspace access is not granted anymore when using
>>> copy_from_kernel_nofault()
>>>
>>> However, kthread_probe_data() uses copy_from_kernel_nofault()
>>> to check validity of pointers. When the pointer is NULL,
>>> it points to userspace, leading to a KUAP fault and triggering
>>> the following big hammer warning many times when you request
>>> a sysrq "show task":
>>>
>>> [ 1117.202054] [ cut here ]
>>> [ 1117.202102] Bug: fault blocked by AP register !
>>> [ 1117.202261] WARNING: CPU: 0 PID: 377 at 
>>> arch/powerpc/include/asm/nohash/32/kup-8xx.h:66 do_page_fault+0x4a8/0x5ec
>>> [ 1117.202310] Modules linked in:
>>> [ 1117.202428] CPU: 0 PID: 377 Comm: sh Tainted: GW 
>>> 5.10.0-rc5-01340-g83f53be2de31-dirty #4175
>>> [ 1117.202499] NIP:  c0012048 LR: c0012048 CTR: 
>>> [ 1117.202573] REGS: cacdbb88 TRAP: 0700   Tainted: GW  
>>> (5.10.0-rc5-01340-g83f53be2de31-dirty)
>>> [ 1117.202625] MSR:  00021032   CR: 2408  XER: 2000
>>> [ 1117.202899]
>>> [ 1117.202899] GPR00: c0012048 cacdbc40 c2929290 0023 c092e554 0001 
>>> c09865e8 c092e640
>>> [ 1117.202899] GPR08: 1032   00014efc 28082224 100d166a 
>>> 100a0920 
>>> [ 1117.202899] GPR16: 100cac0c 100b 1080c3fc 1080d685 100d 100d 
>>>  100a0900
>>> [ 1117.202899] GPR24: 100d c07892ec  c0921510 c21f4440 005c 
>>> c000 cacdbc80
>>> [ 1117.204362] NIP [c0012048] do_page_fault+0x4a8/0x5ec
>>> [ 1117.204461] LR [c0012048] do_page_fault+0x4a8/0x5ec
>>> [ 1117.204509] Call Trace:
>>> [ 1117.204609] [cacdbc40] [c0012048] do_page_fault+0x4a8/0x5ec (unreliable)
>>> [ 1117.204771] [cacdbc70] [c00112f0] handle_page_fault+0x8/0x34
>>> [ 1117.204911] --- interrupt: 301 at copy_from_kernel_nofault+0x70/0x1c0
>>> [ 1117.204979] NIP:  c010dbec LR: c010dbac CTR: 0001
>>> [ 1117.205053] REGS: cacdbc80 TRAP: 0301   Tainted: GW  
>>> (5.10.0-rc5-01340-g83f53be2de31-dirty)
>>> [ 1117.205104] MSR:  9032   CR: 28082224  XER: 
>>> [ 1117.205416] DAR: 005c DSISR: c000
>>> [ 1117.205416] GPR00: c0045948 cacdbd38 c2929290 0001 0017 0017 
>>> 0027 000f
>>> [ 1117.205416] GPR08: c09926ec   3000 24082224
>>> [ 1117.206106] NIP [c010dbec] copy_from_kernel_nofault+0x70/0x1c0
>>> [ 1117.206202] LR [c010dbac] copy_from_kernel_nofault+0x30/0x1c0
>>> [ 1117.206258] --- interrupt: 301
>>> [ 1117.206372] [cacdbd38] [c004bbb0] kthread_probe_data+0x44/0x70 
>>> (unreliable)
>>> [ 1117.206561] [cacdbd58] [c0045948] print_worker_info+0xe0/0x194
>>> [ 1117.206717] [cacdbdb8] [c00548ac] sched_show_task+0x134/0x168
>>> [ 1117.206851] [cacdbdd8] [c005a268] show_state_filter+0x70/0x100
>>> [ 1117.206989] [cacdbe08] [c039baa0] sysrq_handle_showstate+0x14/0x24
>>> [ 1117.207122] [cacdbe18] [c039bf18] __handle_sysrq+0xac/0x1d0
>>> [ 1117.207257] [cacdbe48] [c039c0c0] write_sysrq_trigger+0x4c/0x74
>>> [ 1117.207407] [cacdbe68] [c01fba48] proc_reg_write+0xb4/0x114
>>> [ 1117.207550] [cacdbe88] [c0179968] vfs_write+0x12c/0x478
>>> [ 1117.207686] [cacdbf08] [c0179e60] ksys_write+0x78/0x128
>>> [ 1117.207826] [cacdbf38] [c00110d0] ret_from_syscall+0x0/0x34
>>> [ 1117.207938] --- interrupt: c01 at 0xfd4e784
>>> [ 1117.208008] NIP:  0fd4e784 LR: 0fe0f244 CTR: 10048d38
>>> [ 1117.208083] REGS: cacdbf48 TRAP: 0c01   Tainted: GW  
>>> (5.10.0-rc5-01340-g83f53be2de31-dirty)
>>> [ 1117.208134] MSR:  d032   CR: 4400  XER: 
>>> 
>>> [ 1117.208470]
>>> [ 1117.208470] GPR00: 0004 7fc34090 77bfb4e0 0001 1080fa40 0002 
>>> 740f fefefeff
>>> [ 1117.208470] GPR08: 7f7f7f7f 10048d38 1080c414 7fc343c0 
>>> [ 1117.209104] NIP [0fd4e784] 0xfd4e784
>>> [ 1117.209180] LR [0fe0f244] 0xfe0f244
>>> [ 1117.209236] --- interrupt: c01
>>> [ 1117.209274] Instruction dump:
>>> [ 1117.209353] 714a4000 418200f0 73ca0001 40820084 73ca0032 408200f8 
>>> 73c90040 4082ff60
>>> [ 1117.209727] 0fe0 3c60c082 386399f4 48013b65 <0fe0> 80010034 
>>> 386b 7c0803a6
>>> [ 1117.210102] ---[ end trace 1927c0323393af3e ]---
>>>
>>> To avoid that, copy_from_kernel_nofault_allowed() is used to check
>>> whether the address is a valid kernel address. But the default
>>> version of it returns true for any address.
>>>
>>> Provide a powerpc version of copy_from_kernel_nofault_allowed()
>>> that returns false when the address is below TASK_USER_MAX,
>>> so that copy_from_kernel_nofault() will return -ERANGE.
>>>
>>> Reported-by: Qian Cai 
>>> Fixes: c33165253492 ("powerpc: use non-set_fs based maccess routines")
>>> Cc: Christoph Hellwig 
>>> Cc: Al Viro 
>>> Signed-off-by: Christophe Leroy 
>>> ---
>>> This issue was introduced in 5.10. I didn't mark it for stable, hopping it 
>>> w

Re: [PATCH v2 2/2] ASoC: fsl: Add imx-hdmi machine driver

2020-12-07 Thread Nicolin Chen
On Sun, Dec 06, 2020 at 06:41:59PM +0800, Shengjiu Wang wrote:
> The driver is initially designed for sound card using HDMI
> interface on i.MX platform. There is internal HDMI IP or
> external HDMI modules connect with SAI or AUD2HTX interface.
> It supports both transmitter and receiver devices.
> 
> Signed-off-by: Shengjiu Wang 

Acked-by: Nicolin Chen 


Re: [PATCH v6 1/5] PCI: Unify ECAM constants in native PCI Express drivers

2020-12-07 Thread Jim Quinlan
On Sun, Dec 6, 2020 at 10:25 PM Florian Fainelli  wrote:
>
> +JimQ,
>
> On 12/6/2020 12:16 PM, Krzysztof Wilczyński wrote:
> > Hello Nicolas, Florian and Florian,
> >
> > [...]
> >> -/* Configuration space read/write support */
> >> -static inline int brcm_pcie_cfg_index(int busnr, int devfn, int reg)
> >> -{
> >> -return ((PCI_SLOT(devfn) & 0x1f) << PCIE_EXT_SLOT_SHIFT)
> >> -| ((PCI_FUNC(devfn) & 0x07) << PCIE_EXT_FUNC_SHIFT)
> >> -| (busnr << PCIE_EXT_BUSNUM_SHIFT)
> >> -| (reg & ~3);
> >> -}
> >> -
> >>  static void __iomem *brcm_pcie_map_conf(struct pci_bus *bus, unsigned int 
> >> devfn,
> >>  int where)
> >>  {
> >> @@ -716,7 +704,7 @@ static void __iomem *brcm_pcie_map_conf(struct pci_bus 
> >> *bus, unsigned int devfn,
> >>  return PCI_SLOT(devfn) ? NULL : base + where;
> >>
> >>  /* For devices, write to the config space index register */
> >> -idx = brcm_pcie_cfg_index(bus->number, devfn, 0);
> >> +idx = PCIE_ECAM_OFFSET(bus->number, devfn, 0);
> >>  writel(idx, pcie->base + PCIE_EXT_CFG_INDEX);
> >>  return base + PCIE_EXT_CFG_DATA + where;
> >>  }
> > [...]
> >
> > Passing the hard-coded 0 as the "reg" argument here never actually did
> > anything, thus the 32 bit alignment was never correctly enforced.
> >
> > My question would be: should this be 32 bit aligned?  It seems like the
> > intention was to perhaps make the alignment?  I am sadly not intimately
> > familiar with his hardware, so I am not sure if there is something to
> > fix here or not.
Hello Krzystzof,

The value gets assigned to our config-space index register, which has
the lower two bits marked "unused".We're making sure that we are
putting zeroes there but it is most likely not necessary.

> >
> > Also, I wonder whether it would be safe to pass the offset (the "where"
> > variable) rather than hard-coded 0?
The answer is "no" for this code but "maybe" in the future -- allow me
to explain.  We have two methods to access the config space:

(1) Set a designated index register to map to the base of a device's
config-space.  From then we can access a 4k register set.  This is the
method you see in the code and is why we set reg=0 for the index value
and then add "where" to the return address.

(2) Set our index register to the bus/slot/func/reg value, and then we
access a single data register.  In this case we do set the "reg" to
the register value to set the index and then only add "where & 0x3" to
the return address.

As it turns out, (1) is not compatible with some MIPs SOCs that we
still support as they do not have the 4k data register set.  So I may
be changing to (1) in a future pullreq, and if so, I will invoke
PCIE_ECAM_OFFSET(bus->number, devfn, where & ~3);

Regards,
Jim Quinlan
Broadcom STB


> >
> > Thank you for help in advance!
> >
> > Bjorn also asked the same question:
> >   
> > https://lore.kernel.org/linux-pci/20201120203428.GA272511@bjorn-Precision-5520/
> >
> > Krzysztof
> >
>
> --
> Florian

-- 
This electronic communication and the information and any files transmitted 
with it, or attached to it, are confidential and are intended solely for 
the use of the individual or entity to whom it is addressed and may contain 
information that is confidential, legally privileged, protected by privacy 
laws, or otherwise restricted from disclosure to anyone else. If you are 
not the intended recipient or the person responsible for delivering the 
e-mail to the intended recipient, you are hereby notified that any use, 
copying, distributing, dissemination, forwarding, printing, or copying of 
this e-mail is strictly prohibited. If you received this e-mail in error, 
please return the e-mail to the sender, delete it from your computer, and 
destroy any printed copy of it.


smime.p7s
Description: S/MIME Cryptographic Signature


[powerpc:next] BUILD SUCCESS 250ad7a45b1e58d580decfb935fc063c4cf56f91

2020-12-07 Thread kernel test robot
   allyesconfig
h8300allyesconfig
arc defconfig
sh   allmodconfig
parisc  defconfig
s390 allyesconfig
parisc   allyesconfig
s390defconfig
arc  allyesconfig
nds32 allnoconfig
c6x  allyesconfig
i386 allyesconfig
sparcallyesconfig
sparc   defconfig
i386   tinyconfig
i386defconfig
mips allyesconfig
mips allmodconfig
powerpc  allyesconfig
powerpc  allmodconfig
i386 randconfig-a005-20201207
i386 randconfig-a004-20201207
i386 randconfig-a001-20201207
i386 randconfig-a002-20201207
i386 randconfig-a006-20201207
i386 randconfig-a003-20201207
x86_64   randconfig-a016-20201207
x86_64   randconfig-a012-20201207
x86_64   randconfig-a014-20201207
x86_64   randconfig-a013-20201207
x86_64   randconfig-a015-20201207
x86_64   randconfig-a011-20201207
i386 randconfig-a014-20201207
i386 randconfig-a013-20201207
i386 randconfig-a011-20201207
i386 randconfig-a015-20201207
i386 randconfig-a012-20201207
i386 randconfig-a016-20201207
riscvnommu_k210_defconfig
riscvallyesconfig
riscvnommu_virt_defconfig
riscv allnoconfig
riscv   defconfig
riscv  rv32_defconfig
riscvallmodconfig
x86_64   rhel
x86_64   allyesconfig
x86_64rhel-7.6-kselftests
x86_64   rhel-8.3
x86_64  kexec

clang tested configs:
x86_64   randconfig-a006-20201207
x86_64   randconfig-a005-20201207
x86_64   randconfig-a004-20201207
x86_64   randconfig-a002-20201207
x86_64   randconfig-a001-20201207
x86_64   randconfig-a003-20201207

---
0-DAY CI Kernel Test Service, Intel Corporation
https://lists.01.org/hyperkitty/list/kbuild-...@lists.01.org


Re: [RFC][PATCH 1/2] libnvdimm: Introduce ND_CMD_GET_STAT to retrieve nvdimm statistics

2020-12-07 Thread Dan Williams
[ add perf maintainers ]

On Sun, Nov 8, 2020 at 1:16 PM Vaibhav Jain  wrote:
>
> Implement support for exposing generic nvdimm statistics via newly
> introduced dimm-command ND_CMD_GET_STAT that can be handled by nvdimm
> command handler function and provide values for these statistics back
> to libnvdimm. Following generic nvdimm statistics are defined as an
> enumeration in 'uapi/ndctl.h':
>
> * "media_reads" : Number of media reads that have occurred since reboot.
> * "media_writes" : Number of media writes that have occurred since reboot.
> * "read_requests" : Number of read requests that have occurred since reboot.
> * "write_requests" : Number of write requests that have occurred since reboot.

Perhaps document these as "since device reset"? As I can imagine some
devices might have a mechanism to reset the count outside of "reboot"
which is a bit ambiguous.

> * "total_media_reads" : Total number of media reads that have occurred.
> * "total_media_writes" : Total number of media writes that have occurred.
> * "total_read_requests" : Total number of read requests that have occurred.
> * "total_write_requests" : Total number of write requests that have occurred.
>
> Apart from ND_CMD_GET_STAT ioctl these nvdimm statistics are also
> exposed via sysfs '/stats' directory for easy user-space
> access like below:
>
> /sys/class/nd/ndctl0/device/nmem0/stats # tail -n +1 *
> ==> media_reads <==
> 252197707602
> ==> media_writes <==
> 20684685172
> ==> read_requests <==
> 658810924962
> ==> write_requests <==
> 404464081574

Hmm, I haven't looked but how hard would it be to plumb these to be
perf counter-events. So someone could combine these with other perf
counters?

> In case a specific nvdimm-statistic is not supported than nvdimm
> command handler function can simply return an error (e.g -ENOENT) for
> request to read that nvdimm-statistic.

Makes sense, but I expect the perf route also has a way to enumerate
which statistics / counters are supported. I'm not opposed to also
having them in sysfs, but I think perf support should be a first class
citizen.

>
> The value for a specific nvdimm-stat is exchanged via newly introduced
> 'struct nd_cmd_get_dimm_stat' that hold a single statistics and a
> union of possible values types. Presently only '__s64' type of generic
> attributes are supported. These attributes are defined in
> 'ndvimm/dimm_devs.c' via a helper macro 'NVDIMM_STAT_ATTR'.
>
> Signed-off-by: Vaibhav Jain 
> ---
>  drivers/nvdimm/bus.c   |   6 ++
>  drivers/nvdimm/dimm_devs.c | 109 +
>  drivers/nvdimm/nd.h|   5 ++
>  include/uapi/linux/ndctl.h |  27 +
>  4 files changed, 147 insertions(+)
>
> diff --git a/drivers/nvdimm/bus.c b/drivers/nvdimm/bus.c
> index 2304c6183822..d53564477437 100644
> --- a/drivers/nvdimm/bus.c
> +++ b/drivers/nvdimm/bus.c
> @@ -794,6 +794,12 @@ static const struct nd_cmd_desc __nd_cmd_dimm_descs[] = {
> .out_num = 1,
> .out_sizes = { UINT_MAX, },
> },
> +   [ND_CMD_GET_STAT] = {
> +   .in_num = 1,
> +   .in_sizes = { sizeof(struct nd_cmd_get_dimm_stat), },
> +   .out_num = 1,
> +   .out_sizes = { sizeof(struct nd_cmd_get_dimm_stat), },
> +   },
>  };
>
>  const struct nd_cmd_desc *nd_cmd_dimm_desc(int cmd)
> diff --git a/drivers/nvdimm/dimm_devs.c b/drivers/nvdimm/dimm_devs.c
> index b59032e0859b..68aaa294def7 100644
> --- a/drivers/nvdimm/dimm_devs.c
> +++ b/drivers/nvdimm/dimm_devs.c
> @@ -555,6 +555,114 @@ static umode_t nvdimm_firmware_visible(struct kobject 
> *kobj, struct attribute *a
> return a->mode;
>  }
>
> +/* Request a dimm stat from the bus driver */
> +static int __request_dimm_stat(struct nvdimm_bus *nvdimm_bus,
> +  struct nvdimm *dimm, u64 stat_id,
> +  s64 *stat_val)
> +{
> +   struct nvdimm_bus_descriptor *nd_desc = nvdimm_bus->nd_desc;
> +   struct nd_cmd_get_dimm_stat stat = { .stat_id = stat_id };
> +   int rc, cmd_rc;
> +
> +   if (!test_bit(ND_CMD_GET_STAT, &dimm->cmd_mask)) {
> +   pr_debug("CMD_GET_STAT not set for bus driver 0x%lx\n",
> +nd_desc->cmd_mask);
> +   return -ENOENT;
> +   }
> +
> +   /* Is stat requested is known & bus driver supports fetching stats */
> +   if (stat_id <= ND_DIMM_STAT_INVALID || stat_id > ND_DIMM_STAT_MAX) {
> +   WARN(1, "Unknown stat-id(%llu) requested", stat_id);
> +   return -ENOENT;
> +   }
> +
> +   /* Ask bus driver for its stat value */
> +   rc = nd_desc->ndctl(nd_desc, dimm, ND_CMD_GET_STAT,
> +   &stat, sizeof(stat), &cmd_rc);
> +   if (rc || cmd_rc) {
> +   pr_debug("Unable to request stat %lld. Error (%d,%d)\n",
> +stat_id, rc, cmd_rc);
> +   return rc ? rc : cmd_rc;
> +   }
> +
> +   /* Indi

Re: [PATCH v2] clk: renesas: r9a06g032: Drop __packed for portability

2020-12-07 Thread Stephen Boyd
Quoting Geert Uytterhoeven (2020-11-30 00:57:43)
> The R9A06G032 clock driver uses an array of packed structures to reduce
> kernel size.  However, this array contains pointers, which are no longer
> aligned naturally, and cannot be relocated on PPC64.  Hence when
> compile-testing this driver on PPC64 with CONFIG_RELOCATABLE=y (e.g.
> PowerPC allyesconfig), the following warnings are produced:
> 
> WARNING: 136 bad relocations
> c0616be3 R_PPC64_UADDR64   .rodata+0x000cf338
> c0616bfe R_PPC64_UADDR64   .rodata+0x000cf370
> ...
> 
> Fix this by dropping the __packed attribute from the r9a06g032_clkdesc
> definition, trading a small size increase for portability.
> 
> This increases the 156-entry clock table by 1 byte per entry, but due to
> the compiler generating more efficient code for unpacked accesses, the
> net size increase is only 76 bytes (gcc 9.3.0 on arm32).
> 
> Reported-by: Stephen Rothwell 
> Fixes: 4c3d88526eba2143 ("clk: renesas: Renesas R9A06G032 clock driver")
> Signed-off-by: Geert Uytterhoeven 
> ---

Applied to clk-fixes


[PATCH v2 22/28] powerpc/pseries/hibernation: switch to rtas_ibm_suspend_me()

2020-12-07 Thread Nathan Lynch
rtas_suspend_last_cpu() and related code perform a lot of work that
isn't relevant to the hibernation workflow. All other CPUs are offline
when called so there is no need to place them in H_JOIN or prod them
on resume, nor is there need for retries or operations on shared
state.

Call the rtas_ibm_suspend_me() wrapper function directly from
pseries_suspend_enter() instead of using rtas_suspend_last_cpu().

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/suspend.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 3315d698d5ab..703728cb95ec 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -76,11 +76,7 @@ static void pseries_suspend_enable_irqs(void)
  **/
 static int pseries_suspend_enter(suspend_state_t state)
 {
-   int rc = rtas_suspend_last_cpu(&suspend_data);
-
-   atomic_set(&suspending, 0);
-   atomic_set(&suspend_data.done, 1);
-   return rc;
+   return rtas_ibm_suspend_me(NULL);
 }
 
 /**
-- 
2.28.0



[PATCH v2 23/28] powerpc/rtas: remove unused rtas_suspend_last_cpu()

2020-12-07 Thread Nathan Lynch
rtas_suspend_last_cpu() is now unused, remove it and
__rtas_suspend_last_cpu() which also becomes unused.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  1 -
 arch/powerpc/kernel/rtas.c  | 43 -
 2 files changed, 44 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 97ccb40fb09f..332e1000ca0f 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -256,7 +256,6 @@ extern bool rtas_indicator_present(int token, int 
*maxindex);
 extern int rtas_set_indicator(int indicator, int index, int new_value);
 extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
 extern void rtas_progress(char *s, unsigned short hex);
-extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
 int rtas_ibm_suspend_me(int *fw_status);
 
 struct rtc_time;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index aedd46967b99..9a7d1bba3ef7 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -830,49 +830,6 @@ void rtas_activate_firmware(void)
 
 static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
 #ifdef CONFIG_PPC_PSERIES
-static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int 
wake_when_done)
-{
-   u16 slb_size = mmu_slb_size;
-   int rc = H_MULTI_THREADS_ACTIVE;
-   int cpu;
-
-   slb_set_size(SLB_MIN_SIZE);
-   printk(KERN_DEBUG "calling ibm,suspend-me on cpu %i\n", 
smp_processor_id());
-
-   while (rc == H_MULTI_THREADS_ACTIVE && !atomic_read(&data->done) &&
-  !atomic_read(&data->error))
-   rc = rtas_call(data->token, 0, 1, NULL);
-
-   if (rc || atomic_read(&data->error)) {
-   printk(KERN_DEBUG "ibm,suspend-me returned %d\n", rc);
-   slb_set_size(slb_size);
-   }
-
-   if (atomic_read(&data->error))
-   rc = atomic_read(&data->error);
-
-   atomic_set(&data->error, rc);
-   pSeries_coalesce_init();
-
-   if (wake_when_done) {
-   atomic_set(&data->done, 1);
-
-   for_each_online_cpu(cpu)
-   plpar_hcall_norets(H_PROD, 
get_hard_smp_processor_id(cpu));
-   }
-
-   if (atomic_dec_return(&data->working) == 0)
-   complete(data->complete);
-
-   return rc;
-}
-
-int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data)
-{
-   atomic_inc(&data->working);
-   return __rtas_suspend_last_cpu(data, 0);
-}
-
 /**
  * rtas_call_reentrant() - Used for reentrant rtas calls
  * @token: Token for desired reentrant RTAS call
-- 
2.28.0



[PATCH v2 28/28] powerpc/pseries/mobility: refactor node lookup during DT update

2020-12-07 Thread Nathan Lynch
In pseries_devicetree_update(), with each call to ibm,update-nodes the
partition firmware communicates the node to be deleted or updated by
placing its phandle in the work buffer. Each of delete_dt_node(),
update_dt_node(), and add_dt_node() have duplicate lookups using the
phandle value and corresponding refcount management.

Move the lookup and of_node_put() into pseries_devicetree_update(),
and emit a warning on any failed lookups.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 49 ---
 1 file changed, 17 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index e670180f311d..ea4d6a660e0d 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -61,18 +61,10 @@ static int mobility_rtas_call(int token, char *buf, s32 
scope)
return rc;
 }
 
-static int delete_dt_node(__be32 phandle)
+static int delete_dt_node(struct device_node *dn)
 {
-   struct device_node *dn;
-
-   dn = of_find_node_by_phandle(be32_to_cpu(phandle));
-   if (!dn)
-   return -ENOENT;
-
pr_debug("removing node %pOFfp\n", dn);
-
dlpar_detach_node(dn);
-   of_node_put(dn);
return 0;
 }
 
@@ -137,10 +129,9 @@ static int update_dt_property(struct device_node *dn, 
struct property **prop,
return 0;
 }
 
-static int update_dt_node(__be32 phandle, s32 scope)
+static int update_dt_node(struct device_node *dn, s32 scope)
 {
struct update_props_workarea *upwa;
-   struct device_node *dn;
struct property *prop = NULL;
int i, rc, rtas_rc;
char *prop_data;
@@ -157,14 +148,8 @@ static int update_dt_node(__be32 phandle, s32 scope)
if (!rtas_buf)
return -ENOMEM;
 
-   dn = of_find_node_by_phandle(be32_to_cpu(phandle));
-   if (!dn) {
-   kfree(rtas_buf);
-   return -ENOENT;
-   }
-
upwa = (struct update_props_workarea *)&rtas_buf[0];
-   upwa->phandle = phandle;
+   upwa->phandle = cpu_to_be32(dn->phandle);
 
do {
rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
@@ -224,26 +209,18 @@ static int update_dt_node(__be32 phandle, s32 scope)
cond_resched();
} while (rtas_rc == 1);
 
-   of_node_put(dn);
kfree(rtas_buf);
return 0;
 }
 
-static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
+static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
 {
struct device_node *dn;
-   struct device_node *parent_dn;
int rc;
 
-   parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
-   if (!parent_dn)
-   return -ENOENT;
-
dn = dlpar_configure_connector(drc_index, parent_dn);
-   if (!dn) {
-   of_node_put(parent_dn);
+   if (!dn)
return -ENOENT;
-   }
 
rc = dlpar_attach_node(dn, parent_dn);
if (rc)
@@ -251,7 +228,6 @@ static int add_dt_node(__be32 parent_phandle, __be32 
drc_index)
 
pr_debug("added node %pOFfp\n", dn);
 
-   of_node_put(parent_dn);
return rc;
 }
 
@@ -284,22 +260,31 @@ int pseries_devicetree_update(s32 scope)
data++;
 
for (i = 0; i < node_count; i++) {
+   struct device_node *np;
__be32 phandle = *data++;
__be32 drc_index;
 
+   np = 
of_find_node_by_phandle(be32_to_cpu(phandle));
+   if (!np) {
+   pr_warn("Failed lookup: phandle 0x%x 
for action 0x%x\n",
+   be32_to_cpu(phandle), action);
+   continue;
+   }
+
switch (action) {
case DELETE_DT_NODE:
-   delete_dt_node(phandle);
+   delete_dt_node(np);
break;
case UPDATE_DT_NODE:
-   update_dt_node(phandle, scope);
+   update_dt_node(np, scope);
break;
case ADD_DT_NODE:
drc_index = *data++;
-   add_dt_node(phandle, drc_index);
+   add_dt_node(np, drc_index);
break;
}
 
+   of_node_put(np);
cond_resched();
}
}
-- 
2.28.0



[PATCH v2 27/28] powerpc/rtas: remove unused rtas_suspend_me_data

2020-12-07 Thread Nathan Lynch
All code which used this type has been removed.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas-types.h | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas-types.h 
b/arch/powerpc/include/asm/rtas-types.h
index aa420561bc10..8df6235d64d1 100644
--- a/arch/powerpc/include/asm/rtas-types.h
+++ b/arch/powerpc/include/asm/rtas-types.h
@@ -23,14 +23,6 @@ struct rtas_t {
struct device_node *dev;/* virtual address pointer */
 };
 
-struct rtas_suspend_me_data {
-   atomic_t working; /* number of cpus accessing this struct */
-   atomic_t done;
-   int token; /* ibm,suspend-me */
-   atomic_t error;
-   struct completion *complete; /* wait on this until working == 0 */
-};
-
 struct rtas_error_log {
/* Byte 0 */
u8  byte0;  /* Architectural version */
-- 
2.28.0



[PATCH v2 25/28] powerpc/pseries/hibernation: perform post-suspend fixups later

2020-12-07 Thread Nathan Lynch
The pseries hibernate code calls post_mobility_fixup() which is sort
of a dumping ground of fixups that need to run after resuming from
suspend regardless of whether suspend was a hibernation or a
migration. Calling post_mobility_fixup() from
pseries_suspend_enable_irqs() runs this code early in resume with
devices suspended and only one CPU up, while the much more commonly
used migration case runs these fixups in a more typical process
context.

Call post_mobility_fixup() after the suspend core returns a success
status to the hibernate sysfs store method and remove
pseries_suspend_enable_irqs().

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/suspend.c | 21 -
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 6a94cc0deb88..589a91730db8 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -50,21 +50,6 @@ static int pseries_suspend_begin(u64 stream_id)
return 0;
 }
 
-/**
- * pseries_suspend_enable_irqs
- *
- * Post suspend configuration updates
- *
- **/
-static void pseries_suspend_enable_irqs(void)
-{
-   /*
-* Update configuration which can be modified based on device tree
-* changes during resume.
-*/
-   post_mobility_fixup();
-}
-
 /**
  * pseries_suspend_enter - Final phase of hibernation
  *
@@ -127,8 +112,11 @@ static ssize_t store_hibernate(struct device *dev,
if (!rc)
rc = pm_suspend(PM_SUSPEND_MEM);
 
-   if (!rc)
+   if (!rc) {
rc = count;
+   post_mobility_fixup();
+   }
+
 
return rc;
 }
@@ -214,7 +202,6 @@ static int __init pseries_suspend_init(void)
if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
return rc;
 
-   ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs;
suspend_set_ops(&pseries_suspend_ops);
return 0;
 }
-- 
2.28.0



[PATCH v2 26/28] powerpc/pseries/hibernation: remove prepare_late() callback

2020-12-07 Thread Nathan Lynch
The pseries hibernate code no longer calls into the original
join/suspend code in kernel/rtas.c, so pseries_prepare_late() and
related code don't accomplish anything now.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/suspend.c | 25 
 1 file changed, 25 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 589a91730db8..1b902cbf85c5 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -15,9 +15,6 @@
 #include 
 
 static struct device suspend_dev;
-static DECLARE_COMPLETION(suspend_work);
-static struct rtas_suspend_me_data suspend_data;
-static atomic_t suspending;
 
 /**
  * pseries_suspend_begin - First phase of hibernation
@@ -61,23 +58,6 @@ static int pseries_suspend_enter(suspend_state_t state)
return rtas_ibm_suspend_me(NULL);
 }
 
-/**
- * pseries_prepare_late - Prepare to suspend all other CPUs
- *
- * Return value:
- * 0 on success / other on failure
- **/
-static int pseries_prepare_late(void)
-{
-   atomic_set(&suspending, 1);
-   atomic_set(&suspend_data.working, 0);
-   atomic_set(&suspend_data.done, 0);
-   atomic_set(&suspend_data.error, 0);
-   suspend_data.complete = &suspend_work;
-   reinit_completion(&suspend_work);
-   return 0;
-}
-
 /**
  * store_hibernate - Initiate partition hibernation
  * @dev:   subsys root device
@@ -152,7 +132,6 @@ static struct bus_type suspend_subsys = {
 
 static const struct platform_suspend_ops pseries_suspend_ops = {
.valid  = suspend_valid_only_mem,
-   .prepare_late   = pseries_prepare_late,
.enter  = pseries_suspend_enter,
 };
 
@@ -195,10 +174,6 @@ static int __init pseries_suspend_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR))
return 0;
 
-   suspend_data.token = rtas_token("ibm,suspend-me");
-   if (suspend_data.token == RTAS_UNKNOWN_SERVICE)
-   return 0;
-
if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
return rc;
 
-- 
2.28.0



[PATCH v2 24/28] powerpc/pseries/hibernation: remove redundant cacheinfo update

2020-12-07 Thread Nathan Lynch
Partitions with cache nodes in the device tree can encounter the
following warning on resume:

CPU 0 already accounted in PowerPC,POWER9@0(Data)
WARNING: CPU: 0 PID: 3177 at arch/powerpc/kernel/cacheinfo.c:197 
cacheinfo_cpu_online+0x640/0x820

These calls to cacheinfo_cpu_offline/online have been redundant since
commit e610a466d16a ("powerpc/pseries/mobility: rebuild cacheinfo
hierarchy post-migration").

Fixes: e610a466d16a ("powerpc/pseries/mobility: rebuild cacheinfo hierarchy 
post-migration")
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/suspend.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 703728cb95ec..6a94cc0deb88 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -13,7 +13,6 @@
 #include 
 #include 
 #include 
-#include "../../kernel/cacheinfo.h"
 
 static struct device suspend_dev;
 static DECLARE_COMPLETION(suspend_work);
@@ -63,9 +62,7 @@ static void pseries_suspend_enable_irqs(void)
 * Update configuration which can be modified based on device tree
 * changes during resume.
 */
-   cacheinfo_cpu_offline(smp_processor_id());
post_mobility_fixup();
-   cacheinfo_cpu_online(smp_processor_id());
 }
 
 /**
-- 
2.28.0



[PATCH v2 21/28] powerpc/rtas: remove rtas_suspend_cpu()

2020-12-07 Thread Nathan Lynch
rtas_suspend_cpu() no longer has users; remove it and
__rtas_suspend_cpu() which now becomes unused as well.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  1 -
 arch/powerpc/kernel/rtas.c  | 52 -
 2 files changed, 53 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 9a6107ffe378..97ccb40fb09f 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -256,7 +256,6 @@ extern bool rtas_indicator_present(int token, int 
*maxindex);
 extern int rtas_set_indicator(int indicator, int index, int new_value);
 extern int rtas_set_indicator_fast(int indicator, int index, int new_value);
 extern void rtas_progress(char *s, unsigned short hex);
-extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
 int rtas_ibm_suspend_me(int *fw_status);
 
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 7e6024f570da..aedd46967b99 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -873,58 +873,6 @@ int rtas_suspend_last_cpu(struct rtas_suspend_me_data 
*data)
return __rtas_suspend_last_cpu(data, 0);
 }
 
-static int __rtas_suspend_cpu(struct rtas_suspend_me_data *data, int 
wake_when_done)
-{
-   long rc = H_SUCCESS;
-   unsigned long msr_save;
-   int cpu;
-
-   atomic_inc(&data->working);
-
-   /* really need to ensure MSR.EE is off for H_JOIN */
-   msr_save = mfmsr();
-   mtmsr(msr_save & ~(MSR_EE));
-
-   while (rc == H_SUCCESS && !atomic_read(&data->done) && 
!atomic_read(&data->error))
-   rc = plpar_hcall_norets(H_JOIN);
-
-   mtmsr(msr_save);
-
-   if (rc == H_SUCCESS) {
-   /* This cpu was prodded and the suspend is complete. */
-   goto out;
-   } else if (rc == H_CONTINUE) {
-   /* All other cpus are in H_JOIN, this cpu does
-* the suspend.
-*/
-   return __rtas_suspend_last_cpu(data, wake_when_done);
-   } else {
-   printk(KERN_ERR "H_JOIN on cpu %i failed with rc = %ld\n",
-  smp_processor_id(), rc);
-   atomic_set(&data->error, rc);
-   }
-
-   if (wake_when_done) {
-   atomic_set(&data->done, 1);
-
-   /* This cpu did the suspend or got an error; in either case,
-* we need to prod all other other cpus out of join state.
-* Extra prods are harmless.
-*/
-   for_each_online_cpu(cpu)
-   plpar_hcall_norets(H_PROD, 
get_hard_smp_processor_id(cpu));
-   }
-out:
-   if (atomic_dec_return(&data->working) == 0)
-   complete(data->complete);
-   return rc;
-}
-
-int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
-{
-   return __rtas_suspend_cpu(data, 0);
-}
-
 /**
  * rtas_call_reentrant() - Used for reentrant rtas calls
  * @token: Token for desired reentrant RTAS call
-- 
2.28.0



[PATCH v2 20/28] powerpc/machdep: remove suspend_disable_cpu()

2020-12-07 Thread Nathan Lynch
There are no users left of the suspend_disable_cpu() callback, remove
it.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/machdep.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/include/asm/machdep.h 
b/arch/powerpc/include/asm/machdep.h
index 475687f24f4a..cf6ebbc16cb4 100644
--- a/arch/powerpc/include/asm/machdep.h
+++ b/arch/powerpc/include/asm/machdep.h
@@ -207,7 +207,6 @@ struct machdep_calls {
void (*suspend_disable_irqs)(void);
void (*suspend_enable_irqs)(void);
 #endif
-   int (*suspend_disable_cpu)(void);
 
 #ifdef CONFIG_ARCH_CPU_PROBE_RELEASE
ssize_t (*cpu_probe)(const char *, size_t);
-- 
2.28.0



[PATCH v2 19/28] powerpc/pseries/hibernation: remove pseries_suspend_cpu()

2020-12-07 Thread Nathan Lynch
Since commit 48f6e7f6d948 ("powerpc/pseries: remove cede offline state
for CPUs"), ppc_md.suspend_disable_cpu() is no longer used and all
CPUs (save one) are placed into true offline state as opposed to
H_JOIN. So pseries_suspend_cpu() is effectively unused; remove it.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/suspend.c | 15 ---
 1 file changed, 15 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 232621f33510..3315d698d5ab 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -48,20 +48,6 @@ static int pseries_suspend_begin(u64 stream_id)
   vasi_state);
return -EIO;
}
-
-   return 0;
-}
-
-/**
- * pseries_suspend_cpu - Suspend a single CPU
- *
- * Makes the H_JOIN call to suspend the CPU
- *
- **/
-static int pseries_suspend_cpu(void)
-{
-   if (atomic_read(&suspending))
-   return rtas_suspend_cpu(&suspend_data);
return 0;
 }
 
@@ -235,7 +221,6 @@ static int __init pseries_suspend_init(void)
if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
return rc;
 
-   ppc_md.suspend_disable_cpu = pseries_suspend_cpu;
ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs;
suspend_set_ops(&pseries_suspend_ops);
return 0;
-- 
2.28.0



[PATCH v2 17/28] powerpc/pseries/hibernation: drop pseries_suspend_begin() from suspend ops

2020-12-07 Thread Nathan Lynch
There are three ways pseries_suspend_begin() can be reached:

1. When "mem" is written to /sys/power/state:

kobj_attr_store()
-> state_store()
  -> pm_suspend()
-> suspend_devices_and_enter()
  -> pseries_suspend_begin()

This never works because there is no way to supply a valid stream id
using this interface, and H_VASI_STATE is called with a stream id of
zero. So this call path is useless at best.

2. When a stream id is written to /sys/devices/system/power/hibernate.
pseries_suspend_begin() is polled directly from store_hibernate()
until the stream is in the "Suspending" state (i.e. the platform is
ready for the OS to suspend execution):

dev_attr_store()
-> store_hibernate()
  -> pseries_suspend_begin()

3. When a stream id is written to /sys/devices/system/power/hibernate
(continued). After #2, pseries_suspend_begin() is called once again
from the pm core:

dev_attr_store()
-> store_hibernate()
  -> pm_suspend()
-> suspend_devices_and_enter()
  -> pseries_suspend_begin()

This is redundant because the VASI suspend state is already known to
be Suspending.

The begin() callback of platform_suspend_ops is optional, so we can
simply remove that assignment with no loss of function.

Fixes: 32d8ad4e621d ("powerpc/pseries: Partition hibernation support")
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/suspend.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 81e0ac58d620..3eaa9d59dc7a 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -187,7 +187,6 @@ static struct bus_type suspend_subsys = {
 
 static const struct platform_suspend_ops pseries_suspend_ops = {
.valid  = suspend_valid_only_mem,
-   .begin  = pseries_suspend_begin,
.prepare_late   = pseries_prepare_late,
.enter  = pseries_suspend_enter,
 };
-- 
2.28.0



[PATCH v2 18/28] powerpc/pseries/hibernation: pass stream id via function arguments

2020-12-07 Thread Nathan Lynch
There is no need for the stream id to be a file-global variable; pass
it from hibernate_store() to pseries_suspend_begin() for the
H_VASI_STATE call.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/suspend.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/suspend.c 
b/arch/powerpc/platforms/pseries/suspend.c
index 3eaa9d59dc7a..232621f33510 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -15,7 +15,6 @@
 #include 
 #include "../../kernel/cacheinfo.h"
 
-static u64 stream_id;
 static struct device suspend_dev;
 static DECLARE_COMPLETION(suspend_work);
 static struct rtas_suspend_me_data suspend_data;
@@ -29,7 +28,7 @@ static atomic_t suspending;
  * Return value:
  * 0 on success / other on failure
  **/
-static int pseries_suspend_begin(suspend_state_t state)
+static int pseries_suspend_begin(u64 stream_id)
 {
long vasi_state, rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -132,6 +131,7 @@ static ssize_t store_hibernate(struct device *dev,
   struct device_attribute *attr,
   const char *buf, size_t count)
 {
+   u64 stream_id;
int rc;
 
if (!capable(CAP_SYS_ADMIN))
@@ -140,7 +140,7 @@ static ssize_t store_hibernate(struct device *dev,
stream_id = simple_strtoul(buf, NULL, 16);
 
do {
-   rc = pseries_suspend_begin(PM_SUSPEND_MEM);
+   rc = pseries_suspend_begin(stream_id);
if (rc == -EAGAIN)
ssleep(1);
} while (rc == -EAGAIN);
@@ -148,8 +148,6 @@ static ssize_t store_hibernate(struct device *dev,
if (!rc)
rc = pm_suspend(PM_SUSPEND_MEM);
 
-   stream_id = 0;
-
if (!rc)
rc = count;
 
-- 
2.28.0



[PATCH v2 16/28] powerpc/rtas: remove rtas_ibm_suspend_me_unsafe()

2020-12-07 Thread Nathan Lynch
rtas_ibm_suspend_me_unsafe() is now unused; remove it and
rtas_percpu_suspend_me() which becomes unused as a result.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  1 -
 arch/powerpc/kernel/rtas.c  | 67 +
 2 files changed, 1 insertion(+), 67 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 3b52d8574fcc..9a6107ffe378 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -258,7 +258,6 @@ extern int rtas_set_indicator_fast(int indicator, int 
index, int new_value);
 extern void rtas_progress(char *s, unsigned short hex);
 extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
-int rtas_ibm_suspend_me_unsafe(u64 handle);
 int rtas_ibm_suspend_me(int *fw_status);
 
 struct rtc_time;
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index d4b048571728..7e6024f570da 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -925,66 +925,6 @@ int rtas_suspend_cpu(struct rtas_suspend_me_data *data)
return __rtas_suspend_cpu(data, 0);
 }
 
-static void rtas_percpu_suspend_me(void *info)
-{
-   __rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
-}
-
-int rtas_ibm_suspend_me_unsafe(u64 handle)
-{
-   long state;
-   long rc;
-   unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
-   struct rtas_suspend_me_data data;
-   DECLARE_COMPLETION_ONSTACK(done);
-
-   if (!rtas_service_present("ibm,suspend-me"))
-   return -ENOSYS;
-
-   /* Make sure the state is valid */
-   rc = plpar_hcall(H_VASI_STATE, retbuf, handle);
-
-   state = retbuf[0];
-
-   if (rc) {
-   printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned 
%ld\n",rc);
-   return rc;
-   } else if (state == H_VASI_ENABLED) {
-   return -EAGAIN;
-   } else if (state != H_VASI_SUSPENDING) {
-   printk(KERN_ERR "rtas_ibm_suspend_me: vasi_state returned state 
%ld\n",
-  state);
-   return -EIO;
-   }
-
-   atomic_set(&data.working, 0);
-   atomic_set(&data.done, 0);
-   atomic_set(&data.error, 0);
-   data.token = rtas_token("ibm,suspend-me");
-   data.complete = &done;
-
-   lock_device_hotplug();
-
-   cpu_hotplug_disable();
-
-   /* Call function on all CPUs.  One of us will make the
-* rtas call
-*/
-   on_each_cpu(rtas_percpu_suspend_me, &data, 0);
-
-   wait_for_completion(&done);
-
-   if (atomic_read(&data.error) != 0)
-   printk(KERN_ERR "Error doing global join\n");
-
-
-   cpu_hotplug_enable();
-
-   unlock_device_hotplug();
-
-   return atomic_read(&data.error);
-}
-
 /**
  * rtas_call_reentrant() - Used for reentrant rtas calls
  * @token: Token for desired reentrant RTAS call
@@ -1035,12 +975,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, 
int *outputs, ...)
return ret;
 }
 
-#else /* CONFIG_PPC_PSERIES */
-int rtas_ibm_suspend_me_unsafe(u64 handle)
-{
-   return -ENOSYS;
-}
-#endif
+#endif /* CONFIG_PPC_PSERIES */
 
 /**
  * Find a specific pseries error log in an RTAS extended event log.
-- 
2.28.0



[PATCH v2 15/28] powerpc/rtas: dispatch partition migration requests to pseries

2020-12-07 Thread Nathan Lynch
sys_rtas() cannot call ibm,suspend-me directly in the same way it
handles other inputs. Instead it must dispatch the request to code
that can first perform the H_JOIN sequence before any call to
ibm,suspend-me can succeed. Over time kernel/rtas.c has accreted a fair
amount of platform-specific code to implement this.

Since a different, more robust implementation of the suspend sequence
is now in the pseries platform code, we want to dispatch the request
there.

Note that invoking ibm,suspend-me via the RTAS syscall is all but
deprecated; this change preserves ABI compatibility for old programs
while providing to them the benefit of the new partition suspend
implementation. This is a behavior change in that the kernel performs
the device tree update and firmware activation before returning, but
experimentation indicates this is tolerated fine by legacy user space.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h   | 5 +
 arch/powerpc/kernel/rtas.c| 2 +-
 arch/powerpc/platforms/pseries/mobility.c | 5 +
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index fdefe6a974eb..3b52d8574fcc 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -279,8 +279,13 @@ extern time64_t last_rtas_event;
 extern int clobbering_unread_rtas_event(void);
 extern int pseries_devicetree_update(s32 scope);
 extern void post_mobility_fixup(void);
+int rtas_syscall_dispatch_ibm_suspend_me(u64 handle);
 #else
 static inline int clobbering_unread_rtas_event(void) { return 0; }
+static inline int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
+{
+   return -EINVAL;
+}
 #endif
 
 #ifdef CONFIG_PPC_RTAS_DAEMON
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 3a740ae933f8..d4b048571728 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1272,7 +1272,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
int rc = 0;
u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
  | be32_to_cpu(args.args[1]);
-   rc = rtas_ibm_suspend_me_unsafe(handle);
+   rc = rtas_syscall_dispatch_ibm_suspend_me(handle);
if (rc == -EAGAIN)
args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
else if (rc == -EIO)
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index fe7e35cdc9d5..e670180f311d 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -615,6 +615,11 @@ static int pseries_migrate_partition(u64 handle)
return ret;
 }
 
+int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
+{
+   return pseries_migrate_partition(handle);
+}
+
 static ssize_t migration_store(struct class *class,
   struct class_attribute *attr, const char *buf,
   size_t count)
-- 
2.28.0



[PATCH v2 14/28] powerpc/pseries/mobility: retry partition suspend after error

2020-12-07 Thread Nathan Lynch
This is a mitigation for the relatively rare occurrence where a
virtual IOA can be in a transient state that prevents the
suspend/migration from succeeding, resulting in an error from
ibm,suspend-me.

If the join/suspend sequence returns an error, it is acceptable to
retry as long as the VASI suspend session state is still
"Suspending" (i.e. the platform is still waiting for the OS to
suspend).

Retry a few times on suspend failure while this condition holds,
progressively increasing the delay between attempts. We don't want to
retry indefinitey because firmware emits an error log event on each
unsuccessful attempt.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 59 ++-
 1 file changed, 57 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index f234a7ed87aa..fe7e35cdc9d5 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -542,16 +542,71 @@ static void pseries_cancel_migration(u64 handle, int err)
pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
 }
 
+static int pseries_suspend(u64 handle)
+{
+   const unsigned int max_attempts = 5;
+   unsigned int retry_interval_ms = 1;
+   unsigned int attempt = 1;
+   int ret;
+
+   while (true) {
+   atomic_t counter = ATOMIC_INIT(0);
+   unsigned long vasi_state;
+   int vasi_err;
+
+   ret = stop_machine(do_join, &counter, cpu_online_mask);
+   if (ret == 0)
+   break;
+   /*
+* Encountered an error. If the VASI stream is still
+* in Suspending state, it's likely a transient
+* condition related to some device in the partition
+* and we can retry in the hope that the cause has
+* cleared after some delay.
+*
+* A better design would allow drivers etc to prepare
+* for the suspend and avoid conditions which prevent
+* the suspend from succeeding. For now, we have this
+* mitigation.
+*/
+   pr_notice("Partition suspend attempt %u of %u error: %d\n",
+ attempt, max_attempts, ret);
+
+   if (attempt == max_attempts)
+   break;
+
+   vasi_err = poll_vasi_state(handle, &vasi_state);
+   if (vasi_err == 0) {
+   if (vasi_state != H_VASI_SUSPENDING) {
+   pr_notice("VASI state %lu after failed 
suspend\n",
+ vasi_state);
+   break;
+   }
+   } else if (vasi_err != -EOPNOTSUPP) {
+   pr_err("VASI state poll error: %d", vasi_err);
+   break;
+   }
+
+   pr_notice("Will retry partition suspend after %u ms\n",
+ retry_interval_ms);
+
+   msleep(retry_interval_ms);
+   retry_interval_ms *= 10;
+   attempt++;
+   }
+
+   return ret;
+}
+
 static int pseries_migrate_partition(u64 handle)
 {
-   atomic_t counter = ATOMIC_INIT(0);
int ret;
 
ret = wait_for_vasi_session_suspending(handle);
if (ret)
return ret;
 
-   ret = stop_machine(do_join, &counter, cpu_online_mask);
+   ret = pseries_suspend(handle);
if (ret == 0)
post_mobility_fixup();
else
-- 
2.28.0



[PATCH v2 13/28] powerpc/pseries/mobility: signal suspend cancellation to platform

2020-12-07 Thread Nathan Lynch
If we're returning an error to user space, use H_VASI_SIGNAL to send a
cancellation request to the platform. This isn't strictly required but
it communicates that Linux will not attempt to complete the suspend,
which allows the various entities involved to promptly end the
operation in progress.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 31 +++
 1 file changed, 31 insertions(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 5a3951626a96..f234a7ed87aa 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -513,6 +513,35 @@ static int do_join(void *arg)
return ret;
 }
 
+/*
+ * Abort reason code byte 0. We use only the 'Migrating partition' value.
+ */
+enum vasi_aborting_entity {
+   ORCHESTRATOR= 1,
+   VSP_SOURCE  = 2,
+   PARTITION_FIRMWARE  = 3,
+   PLATFORM_FIRMWARE   = 4,
+   VSP_TARGET  = 5,
+   MIGRATING_PARTITION = 6,
+};
+
+static void pseries_cancel_migration(u64 handle, int err)
+{
+   u32 reason_code;
+   u32 detail;
+   u8 entity;
+   long hvrc;
+
+   entity = MIGRATING_PARTITION;
+   detail = abs(err) & 0xff;
+   reason_code = (entity << 24) | detail;
+
+   hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
+ H_VASI_SIGNAL_CANCEL, reason_code);
+   if (hvrc)
+   pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
+}
+
 static int pseries_migrate_partition(u64 handle)
 {
atomic_t counter = ATOMIC_INIT(0);
@@ -525,6 +554,8 @@ static int pseries_migrate_partition(u64 handle)
ret = stop_machine(do_join, &counter, cpu_online_mask);
if (ret == 0)
post_mobility_fixup();
+   else
+   pseries_cancel_migration(handle, ret);
 
return ret;
 }
-- 
2.28.0



[PATCH v2 12/28] powerpc/pseries/mobility: use stop_machine for join/suspend

2020-12-07 Thread Nathan Lynch
The partition suspend sequence as specified in the platform
architecture requires that all active processor threads call
H_JOIN, which:

- suspends the calling thread until it is the target of
  an H_PROD; or
- immediately returns H_CONTINUE, if the calling thread is the last to
  call H_JOIN. This thread is expected to call ibm,suspend-me to
  completely suspend the partition.

Upon returning from ibm,suspend-me the calling thread must wake all
others using H_PROD.

rtas_ibm_suspend_me_unsafe() uses on_each_cpu() to implement this
protocol, but because of its synchronizing nature this is susceptible
to deadlock versus users of stop_machine() or other callers of
on_each_cpu().

Not only is stop_machine() intended for use cases like this, it
handles error propagation and allows us to keep the data shared
between CPUs minimal: a single atomic counter which ensures exactly
one CPU will wake the others from their joined states.

Switch the migration code to use stop_machine() and a less complex
local implementation of the H_JOIN/ibm,suspend-me logic, which
carries additional benefits:

- more informative error reporting, appropriately ratelimited
- resets the lockup detector / watchdog on resume to prevent lockup
  warnings when the OS has been suspended for a time exceeding the
  threshold.

Fixes: 91dc182ca6e2 ("[PATCH] powerpc: special-case ibm,suspend-me RTAS call")
Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 132 --
 1 file changed, 125 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 573ed48b43d8..5a3951626a96 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -12,9 +12,11 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -405,6 +407,128 @@ static int wait_for_vasi_session_suspending(u64 handle)
return ret;
 }
 
+static void prod_single(unsigned int target_cpu)
+{
+   long hvrc;
+   int hwid;
+
+   hwid = get_hard_smp_processor_id(target_cpu);
+   hvrc = plpar_hcall_norets(H_PROD, hwid);
+   if (hvrc == H_SUCCESS)
+   return;
+   pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
+  target_cpu, hwid, hvrc);
+}
+
+static void prod_others(void)
+{
+   unsigned int cpu;
+
+   for_each_online_cpu(cpu) {
+   if (cpu != smp_processor_id())
+   prod_single(cpu);
+   }
+}
+
+static u16 clamp_slb_size(void)
+{
+   u16 prev = mmu_slb_size;
+
+   slb_set_size(SLB_MIN_SIZE);
+
+   return prev;
+}
+
+static int do_suspend(void)
+{
+   u16 saved_slb_size;
+   int status;
+   int ret;
+
+   pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
+
+   /*
+* The destination processor model may have fewer SLB entries
+* than the source. We reduce mmu_slb_size to a safe minimum
+* before suspending in order to minimize the possibility of
+* programming non-existent entries on the destination. If
+* suspend fails, we restore it before returning. On success
+* the OF reconfig path will update it from the new device
+* tree after resuming on the destination.
+*/
+   saved_slb_size = clamp_slb_size();
+
+   ret = rtas_ibm_suspend_me(&status);
+   if (ret != 0) {
+   pr_err("ibm,suspend-me error: %d\n", status);
+   slb_set_size(saved_slb_size);
+   }
+
+   return ret;
+}
+
+static int do_join(void *arg)
+{
+   atomic_t *counter = arg;
+   long hvrc;
+   int ret;
+
+   /* Must ensure MSR.EE off for H_JOIN. */
+   hard_irq_disable();
+   hvrc = plpar_hcall_norets(H_JOIN);
+
+   switch (hvrc) {
+   case H_CONTINUE:
+   /*
+* All other CPUs are offline or in H_JOIN. This CPU
+* attempts the suspend.
+*/
+   ret = do_suspend();
+   break;
+   case H_SUCCESS:
+   /*
+* The suspend is complete and this cpu has received a
+* prod.
+*/
+   ret = 0;
+   break;
+   case H_BAD_MODE:
+   case H_HARDWARE:
+   default:
+   ret = -EIO;
+   pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
+  hvrc, smp_processor_id());
+   break;
+   }
+
+   if (atomic_inc_return(counter) == 1) {
+   pr_info("CPU %u waking all threads\n", smp_processor_id());
+   prod_others();
+   }
+   /*
+* Execution may have been suspended for several seconds, so
+* reset the watchdog.
+*/
+   touch_nmi_watchdog();
+   return ret;
+}
+
+static int pseries_migrate_partition(u64 handle)

[PATCH v2 10/28] powerpc/pseries/mobility: use rtas_activate_firmware() on resume

2020-12-07 Thread Nathan Lynch
It's incorrect to abort post-suspend processing if
ibm,activate-firmware isn't available. Use rtas_activate_firmware(),
which logs this condition appropriately and allows us to proceed.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 15 +--
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 31d81b7da961..01ac7c03558e 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -312,21 +312,8 @@ int pseries_devicetree_update(s32 scope)
 void post_mobility_fixup(void)
 {
int rc;
-   int activate_fw_token;
 
-   activate_fw_token = rtas_token("ibm,activate-firmware");
-   if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
-   printk(KERN_ERR "Could not make post-mobility "
-  "activate-fw call.\n");
-   return;
-   }
-
-   do {
-   rc = rtas_call(activate_fw_token, 0, 1, NULL);
-   } while (rtas_busy_delay(rc));
-
-   if (rc)
-   printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
+   rtas_activate_firmware();
 
/*
 * We don't want CPUs to go online/offline while the device
-- 
2.28.0



[PATCH v2 11/28] powerpc/pseries/mobility: extract VASI session polling logic

2020-12-07 Thread Nathan Lynch
The behavior of rtas_ibm_suspend_me_unsafe() is to return -EAGAIN to
the caller until the specified VASI suspend session state makes the
transition from H_VASI_ENABLED to H_VASI_SUSPENDING. In the interest
of separating concerns to prepare for a new implementation of the
join/suspend sequence, extract VASI session polling logic into a
couple of local functions. Waiting for the session state to reach
H_VASI_SUSPENDING before calling rtas_ibm_suspend_me_unsafe() ensures
that we will never get an EAGAIN result necessitating a retry. No
user-visible change in behavior is intended.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 69 +--
 1 file changed, 64 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 01ac7c03558e..573ed48b43d8 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -345,6 +345,66 @@ void post_mobility_fixup(void)
return;
 }
 
+static int poll_vasi_state(u64 handle, unsigned long *res)
+{
+   unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+   long hvrc;
+   int ret;
+
+   hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle);
+   switch (hvrc) {
+   case H_SUCCESS:
+   ret = 0;
+   *res = retbuf[0];
+   break;
+   case H_PARAMETER:
+   ret = -EINVAL;
+   break;
+   case H_FUNCTION:
+   ret = -EOPNOTSUPP;
+   break;
+   case H_HARDWARE:
+   default:
+   pr_err("unexpected H_VASI_STATE result %ld\n", hvrc);
+   ret = -EIO;
+   break;
+   }
+   return ret;
+}
+
+static int wait_for_vasi_session_suspending(u64 handle)
+{
+   unsigned long state;
+   int ret;
+
+   /*
+* Wait for transition from H_VASI_ENABLED to
+* H_VASI_SUSPENDING. Treat anything else as an error.
+*/
+   while (true) {
+   ret = poll_vasi_state(handle, &state);
+
+   if (ret != 0 || state == H_VASI_SUSPENDING) {
+   break;
+   } else if (state == H_VASI_ENABLED) {
+   ssleep(1);
+   } else {
+   pr_err("unexpected H_VASI_STATE result %lu\n", state);
+   ret = -EIO;
+   break;
+   }
+   }
+
+   /*
+* Proceed even if H_VASI_STATE is unavailable. If H_JOIN or
+* ibm,suspend-me are also unimplemented, we'll recover then.
+*/
+   if (ret == -EOPNOTSUPP)
+   ret = 0;
+
+   return ret;
+}
+
 static ssize_t migration_store(struct class *class,
   struct class_attribute *attr, const char *buf,
   size_t count)
@@ -356,12 +416,11 @@ static ssize_t migration_store(struct class *class,
if (rc)
return rc;
 
-   do {
-   rc = rtas_ibm_suspend_me_unsafe(streamid);
-   if (rc == -EAGAIN)
-   ssleep(1);
-   } while (rc == -EAGAIN);
+   rc = wait_for_vasi_session_suspending(streamid);
+   if (rc)
+   return rc;
 
+   rc = rtas_ibm_suspend_me_unsafe(streamid);
if (rc)
return rc;
 
-- 
2.28.0



[PATCH v2 09/28] powerpc/pseries/mobility: error message improvements

2020-12-07 Thread Nathan Lynch
- Convert printk(KERN_ERR) to pr_err().
- Include errno in property update failure message.
- Remove reference to "Post-mobility" from device tree update message:
  with pr_err() it will have a "mobility:" prefix.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 527a64e2d89f..31d81b7da961 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -208,8 +208,8 @@ static int update_dt_node(__be32 phandle, s32 scope)
rc = update_dt_property(dn, &prop, prop_name,
vd, prop_data);
if (rc) {
-   printk(KERN_ERR "Could not update %s"
-  " property\n", prop_name);
+   pr_err("updating %s property failed: 
%d\n",
+  prop_name, rc);
}
 
prop_data += vd;
@@ -343,8 +343,7 @@ void post_mobility_fixup(void)
 
rc = pseries_devicetree_update(MIGRATION_SCOPE);
if (rc)
-   printk(KERN_ERR "Post-mobility device tree update "
-   "failed: %d\n", rc);
+   pr_err("device tree update failed: %d\n", rc);
 
cacheinfo_rebuild();
 
-- 
2.28.0



[PATCH v2 08/28] powerpc/pseries/mobility: add missing break to default case

2020-12-07 Thread Nathan Lynch
update_dt_node() has a switch statement where the default case lacks a
break statement.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index e66359b00297..527a64e2d89f 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -213,6 +213,7 @@ static int update_dt_node(__be32 phandle, s32 scope)
}
 
prop_data += vd;
+   break;
}
 
cond_resched();
-- 
2.28.0



[PATCH v2 06/28] powerpc/hvcall: add token and codes for H_VASI_SIGNAL

2020-12-07 Thread Nathan Lynch
H_VASI_SIGNAL can be used by a partition to request cancellation of
its migration. To be used in future changes.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/hvcall.h | 9 +
 1 file changed, 9 insertions(+)

diff --git a/arch/powerpc/include/asm/hvcall.h 
b/arch/powerpc/include/asm/hvcall.h
index c1fbccb04390..c98f5141e3fc 100644
--- a/arch/powerpc/include/asm/hvcall.h
+++ b/arch/powerpc/include/asm/hvcall.h
@@ -155,6 +155,14 @@
 #define H_VASI_RESUMED  5
 #define H_VASI_COMPLETED6
 
+/* VASI signal codes. Only the Cancel code is valid for H_VASI_SIGNAL. */
+#define H_VASI_SIGNAL_CANCEL1
+#define H_VASI_SIGNAL_ABORT 2
+#define H_VASI_SIGNAL_SUSPEND   3
+#define H_VASI_SIGNAL_COMPLETE  4
+#define H_VASI_SIGNAL_ENABLE5
+#define H_VASI_SIGNAL_FAILOVER  6
+
 /* Each control block has to be on a 4K boundary */
 #define H_CB_ALIGNMENT  4096
 
@@ -261,6 +269,7 @@
 #define H_ADD_CONN 0x284
 #define H_DEL_CONN 0x288
 #define H_JOIN 0x298
+#define H_VASI_SIGNAL   0x2A0
 #define H_VASI_STATE0x2A4
 #define H_VIOCTL   0x2A8
 #define H_ENABLE_CRQ   0x2B0
-- 
2.28.0



[PATCH v2 07/28] powerpc/pseries/mobility: don't error on absence of ibm, update-nodes

2020-12-07 Thread Nathan Lynch
Treat the absence of the ibm,update-nodes function as benign instead
of reporting an error. If the platform does not provide that facility,
it's not a problem for Linux.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/platforms/pseries/mobility.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 6ff642e84c6a..e66359b00297 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -261,7 +261,7 @@ int pseries_devicetree_update(s32 scope)
 
update_nodes_token = rtas_token("ibm,update-nodes");
if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
-   return -EINVAL;
+   return 0;
 
rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
if (!rtas_buf)
-- 
2.28.0



[PATCH v2 05/28] powerpc/rtas: add rtas_activate_firmware()

2020-12-07 Thread Nathan Lynch
Provide a documented wrapper function for the ibm,activate-firmware
service, which must be called after a partition migration or
hibernation.

If the function is absent or the call fails, the OS will continue to
run normally with the current firmware, so there is no need to perform
any recovery. Just log it and continue.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  1 +
 arch/powerpc/kernel/rtas.c  | 30 ++
 2 files changed, 31 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index b43165fc6c2a..fdefe6a974eb 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -247,6 +247,7 @@ extern void __noreturn rtas_restart(char *cmd);
 extern void rtas_power_off(void);
 extern void __noreturn rtas_halt(void);
 extern void rtas_os_term(char *str);
+void rtas_activate_firmware(void);
 extern int rtas_get_sensor(int sensor, int index, int *state);
 extern int rtas_get_sensor_fast(int sensor, int index, int *state);
 extern int rtas_get_power_level(int powerdomain, int *level);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 8a618a3c4beb..3a740ae933f8 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -798,6 +798,36 @@ void rtas_os_term(char *str)
printk(KERN_EMERG "ibm,os-term call failed %d\n", status);
 }
 
+/**
+ * rtas_activate_firmware() - Activate a new version of firmware.
+ *
+ * Activate a new version of partition firmware. The OS must call this
+ * after resuming from a partition hibernation or migration in order
+ * to maintain the ability to perform live firmware updates. It's not
+ * catastrophic for this method to be absent or to fail; just log the
+ * condition in that case.
+ *
+ * Context: This function may sleep.
+ */
+void rtas_activate_firmware(void)
+{
+   int token;
+   int fwrc;
+
+   token = rtas_token("ibm,activate-firmware");
+   if (token == RTAS_UNKNOWN_SERVICE) {
+   pr_notice("ibm,activate-firmware method unavailable\n");
+   return;
+   }
+
+   do {
+   fwrc = rtas_call(token, 0, 1, NULL);
+   } while (rtas_busy_delay(fwrc));
+
+   if (fwrc)
+   pr_err("ibm,activate-firmware failed (%i)\n", fwrc);
+}
+
 static int ibm_suspend_me_token = RTAS_UNKNOWN_SERVICE;
 #ifdef CONFIG_PPC_PSERIES
 static int __rtas_suspend_last_cpu(struct rtas_suspend_me_data *data, int 
wake_when_done)
-- 
2.28.0



[PATCH v2 02/28] powerpc/rtas: complete ibm,suspend-me status codes

2020-12-07 Thread Nathan Lynch
We don't completely account for the possible return codes for
ibm,suspend-me. Add definitions for these.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 55f9a154c95d..f060181a0d32 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -23,11 +23,16 @@
 #define RTAS_RMOBUF_MAX (64 * 1024)
 
 /* RTAS return status codes */
-#define RTAS_NOT_SUSPENDABLE   -9004
 #define RTAS_BUSY  -2/* RTAS Busy */
 #define RTAS_EXTENDED_DELAY_MIN9900
 #define RTAS_EXTENDED_DELAY_MAX9905
 
+/* statuses specific to ibm,suspend-me */
+#define RTAS_SUSPEND_ABORTED 9000 /* Suspension aborted */
+#define RTAS_NOT_SUSPENDABLE-9004 /* Partition not suspendable */
+#define RTAS_THREADS_ACTIVE -9005 /* Multiple processor threads active */
+#define RTAS_OUTSTANDING_COPROC -9006 /* Outstanding coprocessor operations */
+
 /*
  * In general to call RTAS use rtas_token("string") to lookup
  * an RTAS token for the given string (e.g. "event-scan").
-- 
2.28.0



[PATCH v2 04/28] powerpc/rtas: add rtas_ibm_suspend_me()

2020-12-07 Thread Nathan Lynch
Now that the name is available, provide a simple wrapper for
ibm,suspend-me which returns both a Linux errno and optionally the
actual RTAS status to the caller.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h |  1 +
 arch/powerpc/kernel/rtas.c  | 57 +
 2 files changed, 58 insertions(+)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index 8436ed01567b..b43165fc6c2a 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -258,6 +258,7 @@ extern void rtas_progress(char *s, unsigned short hex);
 extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
 int rtas_ibm_suspend_me_unsafe(u64 handle);
+int rtas_ibm_suspend_me(int *fw_status);
 
 struct rtc_time;
 extern time64_t rtas_get_boot_time(void);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 0a8e5dc2c108..8a618a3c4beb 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -684,6 +684,63 @@ int rtas_set_indicator_fast(int indicator, int index, int 
new_value)
return rc;
 }
 
+/**
+ * rtas_ibm_suspend_me() - Call ibm,suspend-me to suspend the LPAR.
+ *
+ * @fw_status: RTAS call status will be placed here if not NULL.
+ *
+ * rtas_ibm_suspend_me() should be called only on a CPU which has
+ * received H_CONTINUE from the H_JOIN hcall. All other active CPUs
+ * should be waiting to return from H_JOIN.
+ *
+ * rtas_ibm_suspend_me() may suspend execution of the OS
+ * indefinitely. Callers should take appropriate measures upon return, such as
+ * resetting watchdog facilities.
+ *
+ * Callers may choose to retry this call if @fw_status is
+ * %RTAS_THREADS_ACTIVE.
+ *
+ * Return:
+ * 0  - The partition has resumed from suspend, possibly after
+ *  migration to a different host.
+ * -ECANCELED - The operation was aborted.
+ * -EAGAIN- There were other CPUs not in H_JOIN at the time of the call.
+ * -EBUSY - Some other condition prevented the suspend from succeeding.
+ * -EIO   - Hardware/platform error.
+ */
+int rtas_ibm_suspend_me(int *fw_status)
+{
+   int fwrc;
+   int ret;
+
+   fwrc = rtas_call(rtas_token("ibm,suspend-me"), 0, 1, NULL);
+
+   switch (fwrc) {
+   case 0:
+   ret = 0;
+   break;
+   case RTAS_SUSPEND_ABORTED:
+   ret = -ECANCELED;
+   break;
+   case RTAS_THREADS_ACTIVE:
+   ret = -EAGAIN;
+   break;
+   case RTAS_NOT_SUSPENDABLE:
+   case RTAS_OUTSTANDING_COPROC:
+   ret = -EBUSY;
+   break;
+   case -1:
+   default:
+   ret = -EIO;
+   break;
+   }
+
+   if (fw_status)
+   *fw_status = fwrc;
+
+   return ret;
+}
+
 void __noreturn rtas_restart(char *cmd)
 {
if (rtas_flash_term_hook)
-- 
2.28.0



[PATCH v2 00/28] partition suspend updates

2020-12-07 Thread Nathan Lynch
This series aims to improve the pseries-specific partition migration
and hibernation implementation, part of which has been living in
kernel/rtas.c. Most of that code is eliminated or moved to
platforms/pseries, and the following major functional changes are
made:

- Use stop_machine() instead of on_each_cpu() to avoid deadlock in the
  join/suspend sequence.

- Retry the join/suspend sequence on errors that are likely to be
  transient. This is a mitigation for the fact that drivers currently
  have no way to prepare for an impending partition suspension,
  sometimes resulting in a virtual adapter being in a state which
  causes the platform to fail the suspend call.

- Request cancellation of the migration via H_VASI_SIGNAL if Linux is
  going to error out of the suspend attempt. This allows the
  management console and other entities to promptly clean up their
  operations instead of relying on long timeouts to fail the
  migration.

- Little-endian users of ibm,suspend-me, ibm,update-nodes and
  ibm,update-properties via sys_rtas are blocked when
  CONFIG_PPC_RTAS_FILTERS is enabled.

- Legacy user space code (drmgr) historically has driven the migration
  process by using sys_rtas to separately call ibm,suspend-me,
  ibm,activate-firmware, and ibm,update-nodes/properties, in that
  order. With these changes, when sys_rtas() dispatches
  ibm,suspend-me, the kernel performs the device tree update and
  firmware activation before returning. This is more reliable, and
  drmgr does not seem bothered by it.

- If the H_VASI_STATE hcall is absent, the implementation proceeds
  with the suspend instead of erroring out. This allows us to exercise
  these code paths in QEMU.

Changes since v1:
- Drop "powerpc/rtas: move rtas_call_reentrant() out of pseries
  guards". rtas_call_reentrant() actually is pseries-specific and this
  broke builds without CONFIG_PPC_PSERIES set.
- Simplify polling logic in wait_for_vasi_session_suspending().
  ("powerpc/pseries/mobility: extract VASI session polling logic")
- Use direct return instead of goto in pseries_migrate_partition().
  ("powerpc/pseries/mobility: use stop_machine for join/suspend")
- Change dispatch of ibm,suspend-me in rtas syscall path to use
  conventional config symbol guards instead of a weak function.
  ("powerpc/rtas: dispatch partition migration requests to pseries")
- Fix refcount imbalance in add_dt_node() error path.
  ("powerpc/pseries/mobility: refactor node lookup during DT update")

Nathan Lynch (28):
  powerpc/rtas: prevent suspend-related sys_rtas use on LE
  powerpc/rtas: complete ibm,suspend-me status codes
  powerpc/rtas: rtas_ibm_suspend_me -> rtas_ibm_suspend_me_unsafe
  powerpc/rtas: add rtas_ibm_suspend_me()
  powerpc/rtas: add rtas_activate_firmware()
  powerpc/hvcall: add token and codes for H_VASI_SIGNAL
  powerpc/pseries/mobility: don't error on absence of ibm,update-nodes
  powerpc/pseries/mobility: add missing break to default case
  powerpc/pseries/mobility: error message improvements
  powerpc/pseries/mobility: use rtas_activate_firmware() on resume
  powerpc/pseries/mobility: extract VASI session polling logic
  powerpc/pseries/mobility: use stop_machine for join/suspend
  powerpc/pseries/mobility: signal suspend cancellation to platform
  powerpc/pseries/mobility: retry partition suspend after error
  powerpc/rtas: dispatch partition migration requests to pseries
  powerpc/rtas: remove rtas_ibm_suspend_me_unsafe()
  powerpc/pseries/hibernation: drop pseries_suspend_begin() from suspend
ops
  powerpc/pseries/hibernation: pass stream id via function arguments
  powerpc/pseries/hibernation: remove pseries_suspend_cpu()
  powerpc/machdep: remove suspend_disable_cpu()
  powerpc/rtas: remove rtas_suspend_cpu()
  powerpc/pseries/hibernation: switch to rtas_ibm_suspend_me()
  powerpc/rtas: remove unused rtas_suspend_last_cpu()
  powerpc/pseries/hibernation: remove redundant cacheinfo update
  powerpc/pseries/hibernation: perform post-suspend fixups later
  powerpc/pseries/hibernation: remove prepare_late() callback
  powerpc/rtas: remove unused rtas_suspend_me_data
  powerpc/pseries/mobility: refactor node lookup during DT update

 arch/powerpc/include/asm/hvcall.h |   9 +
 arch/powerpc/include/asm/machdep.h|   1 -
 arch/powerpc/include/asm/rtas-types.h |   8 -
 arch/powerpc/include/asm/rtas.h   |  17 +-
 arch/powerpc/kernel/rtas.c| 243 ++-
 arch/powerpc/platforms/pseries/mobility.c | 358 ++
 arch/powerpc/platforms/pseries/suspend.c  |  79 +
 7 files changed, 415 insertions(+), 300 deletions(-)

-- 
2.28.0



[PATCH v2 03/28] powerpc/rtas: rtas_ibm_suspend_me -> rtas_ibm_suspend_me_unsafe

2020-12-07 Thread Nathan Lynch
The pseries partition suspend sequence requires that all active CPUs
call H_JOIN, which suspends all but one of them with interrupts
disabled. The "chosen" CPU is then to call ibm,suspend-me to complete
the suspend. Upon returning from ibm,suspend-me, the chosen CPU is to
use H_PROD to wake the joined CPUs.

Using on_each_cpu() for this, as rtas_ibm_suspend_me() does to
implement partition migration, is susceptible to deadlock with other
users of on_each_cpu() and with users of stop_machine APIs. The
callback passed to on_each_cpu() is not allowed to synchronize with
other CPUs in the way it is used here.

Complicating the fix is the fact that rtas_ibm_suspend_me() also
occupies the function name that should be used to provide a more
conventional wrapper for ibm,suspend-me. Rename rtas_ibm_suspend_me()
to rtas_ibm_suspend_me_unsafe() to free up the name and indicate that
it should not gain users.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/include/asm/rtas.h   | 2 +-
 arch/powerpc/kernel/rtas.c| 6 +++---
 arch/powerpc/platforms/pseries/mobility.c | 2 +-
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h
index f060181a0d32..8436ed01567b 100644
--- a/arch/powerpc/include/asm/rtas.h
+++ b/arch/powerpc/include/asm/rtas.h
@@ -257,7 +257,7 @@ extern int rtas_set_indicator_fast(int indicator, int 
index, int new_value);
 extern void rtas_progress(char *s, unsigned short hex);
 extern int rtas_suspend_cpu(struct rtas_suspend_me_data *data);
 extern int rtas_suspend_last_cpu(struct rtas_suspend_me_data *data);
-extern int rtas_ibm_suspend_me(u64 handle);
+int rtas_ibm_suspend_me_unsafe(u64 handle);
 
 struct rtc_time;
 extern time64_t rtas_get_boot_time(void);
diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 4ed64aba37d6..0a8e5dc2c108 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -843,7 +843,7 @@ static void rtas_percpu_suspend_me(void *info)
__rtas_suspend_cpu((struct rtas_suspend_me_data *)info, 1);
 }
 
-int rtas_ibm_suspend_me(u64 handle)
+int rtas_ibm_suspend_me_unsafe(u64 handle)
 {
long state;
long rc;
@@ -949,7 +949,7 @@ int rtas_call_reentrant(int token, int nargs, int nret, int 
*outputs, ...)
 }
 
 #else /* CONFIG_PPC_PSERIES */
-int rtas_ibm_suspend_me(u64 handle)
+int rtas_ibm_suspend_me_unsafe(u64 handle)
 {
return -ENOSYS;
 }
@@ -1185,7 +1185,7 @@ SYSCALL_DEFINE1(rtas, struct rtas_args __user *, uargs)
int rc = 0;
u64 handle = ((u64)be32_to_cpu(args.args[0]) << 32)
  | be32_to_cpu(args.args[1]);
-   rc = rtas_ibm_suspend_me(handle);
+   rc = rtas_ibm_suspend_me_unsafe(handle);
if (rc == -EAGAIN)
args.rets[0] = cpu_to_be32(RTAS_NOT_SUSPENDABLE);
else if (rc == -EIO)
diff --git a/arch/powerpc/platforms/pseries/mobility.c 
b/arch/powerpc/platforms/pseries/mobility.c
index 2f73cb5bf12d..6ff642e84c6a 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -370,7 +370,7 @@ static ssize_t migration_store(struct class *class,
return rc;
 
do {
-   rc = rtas_ibm_suspend_me(streamid);
+   rc = rtas_ibm_suspend_me_unsafe(streamid);
if (rc == -EAGAIN)
ssleep(1);
} while (rc == -EAGAIN);
-- 
2.28.0



[PATCH v2 01/28] powerpc/rtas: prevent suspend-related sys_rtas use on LE

2020-12-07 Thread Nathan Lynch
While drmgr has had work in some areas to make its RTAS syscall
interactions endian-neutral, its code for performing partition
migration via the syscall has never worked on LE. While it is able to
complete ibm,suspend-me successfully, it crashes when attempting the
subsequent ibm,update-nodes call.

drmgr is the only known (or plausible) user of ibm,suspend-me,
ibm,update-nodes, and ibm,update-properties, so allow them only in
big-endian configurations.

Signed-off-by: Nathan Lynch 
---
 arch/powerpc/kernel/rtas.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kernel/rtas.c b/arch/powerpc/kernel/rtas.c
index 954f41676f69..4ed64aba37d6 100644
--- a/arch/powerpc/kernel/rtas.c
+++ b/arch/powerpc/kernel/rtas.c
@@ -1050,9 +1050,11 @@ static struct rtas_filter rtas_filters[] __ro_after_init 
= {
{ "set-time-for-power-on", -1, -1, -1, -1, -1 },
{ "ibm,set-system-parameter", -1, 1, -1, -1, -1 },
{ "set-time-of-day", -1, -1, -1, -1, -1 },
+#ifdef CONFIG_CPU_BIG_ENDIAN
{ "ibm,suspend-me", -1, -1, -1, -1, -1 },
{ "ibm,update-nodes", -1, 0, -1, -1, -1, 4096 },
{ "ibm,update-properties", -1, 0, -1, -1, -1, 4096 },
+#endif
{ "ibm,physical-attestation", -1, 0, 1, -1, -1 },
 };
 
-- 
2.28.0



Re: [powerpc:next-test 54/220] arch/powerpc/kernel/vdso32/vgettimeofday.c:13:5: warning: no previous prototype for function '__c_kernel_clock_gettime64'

2020-12-07 Thread Segher Boessenkool
On Mon, Dec 07, 2020 at 09:56:56AM -0800, Nick Desaulniers wrote:
> On Mon, Dec 7, 2020 at 4:23 AM Michael Ellerman  wrote:
> > So is clang defining __powerpc64__ even for 32-bit code?
> >
> > And the answer appears to be yes:
> >
> >   $ clang --version
> >   Ubuntu clang version 11.0.0-2
> >   Target: powerpc64le-unknown-linux-gnu
> >
> >   $ clang -m32 -dM -E - < /dev/null | grep powerpc
> >   #define __powerpc64__ 1
> >   #define __powerpc__ 1
> >
> > Compare to gcc:
> >
> >   $ gcc --version
> >   gcc (Ubuntu 10.2.0-13ubuntu1) 10.2.0
> >
> >   $ gcc -m32 -dM -E - < /dev/null | grep powerpc
> >   #define __powerpc__ 1
> >   #define powerpc 1
> >   #define __powerpc 1
> >
> >
> > Which is fairly problematic, because we use the presence/absence of
> > __powerpc64__ to determine if we're building 64-bit/32-bit code in
> > several places.
> >
> > Not sure what the best approach for fixing that is.
> 
> Thanks for the triage; we should fix our preprocessor:
> https://bugs.llvm.org/show_bug.cgi?id=48427

Not only is that a compatibility problem (as the bug report says): it is
a straight up violation of the ABI!  (For ELFv2, which you have here;
older ABIs did not mention the preprocessor predefines, but this was
exactly the same on all compilers afaik.)


Segher


Re: [PATCH] powerpc/mm: Fix KUAP warning by providing copy_from_kernel_nofault_allowed()

2020-12-07 Thread Christophe Leroy




Le 07/12/2020 à 01:24, Michael Ellerman a écrit :

Christophe Leroy  writes:

Since commit c33165253492 ("powerpc: use non-set_fs based maccess
routines"), userspace access is not granted anymore when using
copy_from_kernel_nofault()

However, kthread_probe_data() uses copy_from_kernel_nofault()
to check validity of pointers. When the pointer is NULL,
it points to userspace, leading to a KUAP fault and triggering
the following big hammer warning many times when you request
a sysrq "show task":

[ 1117.202054] [ cut here ]
[ 1117.202102] Bug: fault blocked by AP register !
[ 1117.202261] WARNING: CPU: 0 PID: 377 at 
arch/powerpc/include/asm/nohash/32/kup-8xx.h:66 do_page_fault+0x4a8/0x5ec
[ 1117.202310] Modules linked in:
[ 1117.202428] CPU: 0 PID: 377 Comm: sh Tainted: GW 
5.10.0-rc5-01340-g83f53be2de31-dirty #4175
[ 1117.202499] NIP:  c0012048 LR: c0012048 CTR: 
[ 1117.202573] REGS: cacdbb88 TRAP: 0700   Tainted: GW  
(5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.202625] MSR:  00021032   CR: 2408  XER: 2000
[ 1117.202899]
[ 1117.202899] GPR00: c0012048 cacdbc40 c2929290 0023 c092e554 0001 
c09865e8 c092e640
[ 1117.202899] GPR08: 1032   00014efc 28082224 100d166a 
100a0920 
[ 1117.202899] GPR16: 100cac0c 100b 1080c3fc 1080d685 100d 100d 
 100a0900
[ 1117.202899] GPR24: 100d c07892ec  c0921510 c21f4440 005c 
c000 cacdbc80
[ 1117.204362] NIP [c0012048] do_page_fault+0x4a8/0x5ec
[ 1117.204461] LR [c0012048] do_page_fault+0x4a8/0x5ec
[ 1117.204509] Call Trace:
[ 1117.204609] [cacdbc40] [c0012048] do_page_fault+0x4a8/0x5ec (unreliable)
[ 1117.204771] [cacdbc70] [c00112f0] handle_page_fault+0x8/0x34
[ 1117.204911] --- interrupt: 301 at copy_from_kernel_nofault+0x70/0x1c0
[ 1117.204979] NIP:  c010dbec LR: c010dbac CTR: 0001
[ 1117.205053] REGS: cacdbc80 TRAP: 0301   Tainted: GW  
(5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.205104] MSR:  9032   CR: 28082224  XER: 
[ 1117.205416] DAR: 005c DSISR: c000
[ 1117.205416] GPR00: c0045948 cacdbd38 c2929290 0001 0017 0017 
0027 000f
[ 1117.205416] GPR08: c09926ec   3000 24082224
[ 1117.206106] NIP [c010dbec] copy_from_kernel_nofault+0x70/0x1c0
[ 1117.206202] LR [c010dbac] copy_from_kernel_nofault+0x30/0x1c0
[ 1117.206258] --- interrupt: 301
[ 1117.206372] [cacdbd38] [c004bbb0] kthread_probe_data+0x44/0x70 (unreliable)
[ 1117.206561] [cacdbd58] [c0045948] print_worker_info+0xe0/0x194
[ 1117.206717] [cacdbdb8] [c00548ac] sched_show_task+0x134/0x168
[ 1117.206851] [cacdbdd8] [c005a268] show_state_filter+0x70/0x100
[ 1117.206989] [cacdbe08] [c039baa0] sysrq_handle_showstate+0x14/0x24
[ 1117.207122] [cacdbe18] [c039bf18] __handle_sysrq+0xac/0x1d0
[ 1117.207257] [cacdbe48] [c039c0c0] write_sysrq_trigger+0x4c/0x74
[ 1117.207407] [cacdbe68] [c01fba48] proc_reg_write+0xb4/0x114
[ 1117.207550] [cacdbe88] [c0179968] vfs_write+0x12c/0x478
[ 1117.207686] [cacdbf08] [c0179e60] ksys_write+0x78/0x128
[ 1117.207826] [cacdbf38] [c00110d0] ret_from_syscall+0x0/0x34
[ 1117.207938] --- interrupt: c01 at 0xfd4e784
[ 1117.208008] NIP:  0fd4e784 LR: 0fe0f244 CTR: 10048d38
[ 1117.208083] REGS: cacdbf48 TRAP: 0c01   Tainted: GW  
(5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.208134] MSR:  d032   CR: 4400  XER: 
[ 1117.208470]
[ 1117.208470] GPR00: 0004 7fc34090 77bfb4e0 0001 1080fa40 0002 
740f fefefeff
[ 1117.208470] GPR08: 7f7f7f7f 10048d38 1080c414 7fc343c0 
[ 1117.209104] NIP [0fd4e784] 0xfd4e784
[ 1117.209180] LR [0fe0f244] 0xfe0f244
[ 1117.209236] --- interrupt: c01
[ 1117.209274] Instruction dump:
[ 1117.209353] 714a4000 418200f0 73ca0001 40820084 73ca0032 408200f8 73c90040 
4082ff60
[ 1117.209727] 0fe0 3c60c082 386399f4 48013b65 <0fe0> 80010034 386b 
7c0803a6
[ 1117.210102] ---[ end trace 1927c0323393af3e ]---

To avoid that, copy_from_kernel_nofault_allowed() is used to check
whether the address is a valid kernel address. But the default
version of it returns true for any address.

Provide a powerpc version of copy_from_kernel_nofault_allowed()
that returns false when the address is below TASK_USER_MAX,
so that copy_from_kernel_nofault() will return -ERANGE.

Reported-by: Qian Cai 
Fixes: c33165253492 ("powerpc: use non-set_fs based maccess routines")
Cc: Christoph Hellwig 
Cc: Al Viro 
Signed-off-by: Christophe Leroy 
---
This issue was introduced in 5.10. I didn't mark it for stable, hopping it will 
go into 5.10-rc7
---
  arch/powerpc/mm/Makefile  | 2 +-
  arch/powerpc/mm/maccess.c | 9 +
  2 files changed, 10 insertions(+), 1 deletion(-)
  create mode 100644 arch/powerpc/mm/maccess.c

diff --git a/arch/powerpc/mm/maccess.c b/arch/powerpc/mm/maccess.c
new file mode 100644
index ..56e97c0fb233
--- /dev/null
+++ b/arch/powerpc/mm/maccess.c
@@ -0,0 +1,9 @@
+// SPDX-License-I

[PATCH v2] powerpc/mm: Fix KUAP warning by providing copy_from_kernel_nofault_allowed()

2020-12-07 Thread Christophe Leroy
Since commit c33165253492 ("powerpc: use non-set_fs based maccess
routines"), userspace access is not granted anymore when using
copy_from_kernel_nofault()

However, kthread_probe_data() uses copy_from_kernel_nofault()
to check validity of pointers. When the pointer is NULL,
it points to userspace, leading to a KUAP fault and triggering
the following big hammer warning many times when you request
a sysrq "show task":

[ 1117.202054] [ cut here ]
[ 1117.202102] Bug: fault blocked by AP register !
[ 1117.202261] WARNING: CPU: 0 PID: 377 at 
arch/powerpc/include/asm/nohash/32/kup-8xx.h:66 do_page_fault+0x4a8/0x5ec
[ 1117.202310] Modules linked in:
[ 1117.202428] CPU: 0 PID: 377 Comm: sh Tainted: GW 
5.10.0-rc5-01340-g83f53be2de31-dirty #4175
[ 1117.202499] NIP:  c0012048 LR: c0012048 CTR: 
[ 1117.202573] REGS: cacdbb88 TRAP: 0700   Tainted: GW  
(5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.202625] MSR:  00021032   CR: 2408  XER: 2000
[ 1117.202899]
[ 1117.202899] GPR00: c0012048 cacdbc40 c2929290 0023 c092e554 0001 
c09865e8 c092e640
[ 1117.202899] GPR08: 1032   00014efc 28082224 100d166a 
100a0920 
[ 1117.202899] GPR16: 100cac0c 100b 1080c3fc 1080d685 100d 100d 
 100a0900
[ 1117.202899] GPR24: 100d c07892ec  c0921510 c21f4440 005c 
c000 cacdbc80
[ 1117.204362] NIP [c0012048] do_page_fault+0x4a8/0x5ec
[ 1117.204461] LR [c0012048] do_page_fault+0x4a8/0x5ec
[ 1117.204509] Call Trace:
[ 1117.204609] [cacdbc40] [c0012048] do_page_fault+0x4a8/0x5ec (unreliable)
[ 1117.204771] [cacdbc70] [c00112f0] handle_page_fault+0x8/0x34
[ 1117.204911] --- interrupt: 301 at copy_from_kernel_nofault+0x70/0x1c0
[ 1117.204979] NIP:  c010dbec LR: c010dbac CTR: 0001
[ 1117.205053] REGS: cacdbc80 TRAP: 0301   Tainted: GW  
(5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.205104] MSR:  9032   CR: 28082224  XER: 
[ 1117.205416] DAR: 005c DSISR: c000
[ 1117.205416] GPR00: c0045948 cacdbd38 c2929290 0001 0017 0017 
0027 000f
[ 1117.205416] GPR08: c09926ec   3000 24082224
[ 1117.206106] NIP [c010dbec] copy_from_kernel_nofault+0x70/0x1c0
[ 1117.206202] LR [c010dbac] copy_from_kernel_nofault+0x30/0x1c0
[ 1117.206258] --- interrupt: 301
[ 1117.206372] [cacdbd38] [c004bbb0] kthread_probe_data+0x44/0x70 (unreliable)
[ 1117.206561] [cacdbd58] [c0045948] print_worker_info+0xe0/0x194
[ 1117.206717] [cacdbdb8] [c00548ac] sched_show_task+0x134/0x168
[ 1117.206851] [cacdbdd8] [c005a268] show_state_filter+0x70/0x100
[ 1117.206989] [cacdbe08] [c039baa0] sysrq_handle_showstate+0x14/0x24
[ 1117.207122] [cacdbe18] [c039bf18] __handle_sysrq+0xac/0x1d0
[ 1117.207257] [cacdbe48] [c039c0c0] write_sysrq_trigger+0x4c/0x74
[ 1117.207407] [cacdbe68] [c01fba48] proc_reg_write+0xb4/0x114
[ 1117.207550] [cacdbe88] [c0179968] vfs_write+0x12c/0x478
[ 1117.207686] [cacdbf08] [c0179e60] ksys_write+0x78/0x128
[ 1117.207826] [cacdbf38] [c00110d0] ret_from_syscall+0x0/0x34
[ 1117.207938] --- interrupt: c01 at 0xfd4e784
[ 1117.208008] NIP:  0fd4e784 LR: 0fe0f244 CTR: 10048d38
[ 1117.208083] REGS: cacdbf48 TRAP: 0c01   Tainted: GW  
(5.10.0-rc5-01340-g83f53be2de31-dirty)
[ 1117.208134] MSR:  d032   CR: 4400  XER: 
[ 1117.208470]
[ 1117.208470] GPR00: 0004 7fc34090 77bfb4e0 0001 1080fa40 0002 
740f fefefeff
[ 1117.208470] GPR08: 7f7f7f7f 10048d38 1080c414 7fc343c0 
[ 1117.209104] NIP [0fd4e784] 0xfd4e784
[ 1117.209180] LR [0fe0f244] 0xfe0f244
[ 1117.209236] --- interrupt: c01
[ 1117.209274] Instruction dump:
[ 1117.209353] 714a4000 418200f0 73ca0001 40820084 73ca0032 408200f8 73c90040 
4082ff60
[ 1117.209727] 0fe0 3c60c082 386399f4 48013b65 <0fe0> 80010034 386b 
7c0803a6
[ 1117.210102] ---[ end trace 1927c0323393af3e ]---

To avoid that, copy_from_kernel_nofault_allowed() is used to check
whether the address is a valid kernel address. But the default
version of it returns true for any address.

Provide a powerpc version of copy_from_kernel_nofault_allowed()
that returns false when the address is below TASK_USER_MAX,
so that copy_from_kernel_nofault() will return -ERANGE.

Reported-by: Qian Cai 
Fixes: c33165253492 ("powerpc: use non-set_fs based maccess routines")
Cc: Christoph Hellwig 
Cc: Al Viro 
Signed-off-by: Christophe Leroy 
---
This issue was introduced in 5.10-rc1. I didn't mark it for stable, hopping it 
will go into 5.10

v2: Using is_kernel_addr() instead of comparison to TASK_SIZE_MAX.
---
 arch/powerpc/mm/Makefile  | 2 +-
 arch/powerpc/mm/maccess.c | 9 +
 2 files changed, 10 insertions(+), 1 deletion(-)
 create mode 100644 arch/powerpc/mm/maccess.c

diff --git a/arch/powerpc/mm/Makefile b/arch/powerpc/mm/Makefile
index 5e147986400d..55b4a8bd408a 100644
--- a/arch/powerpc/mm/Makefile
+++ b/arch/powerpc/mm/Makefile
@@ -5,7 +5,7 @@
 
 ccflags-$(CONFIG_PPC64):=

Re: [PATCH v5 19/19] dt-bindings: usb: intel, keembay-dwc3: Validate DWC3 sub-node

2020-12-07 Thread Rob Herring
On Sat, 05 Dec 2020 18:24:26 +0300, Serge Semin wrote:
> Intel Keem Bay DWC3 compatible DT nodes are supposed to have a DWC USB3
> compatible sub-node to describe a fully functioning USB interface. Let's
> use the available DWC USB3 DT schema to validate the Qualcomm DWC3
> sub-nodes.
> 
> Note since the generic DWC USB3 DT node is supposed to be named as generic
> USB HCD ("^usb(@.*)?") one we have to accordingly fix the sub-nodes name
> regexp and fix the DT node example.
> 
> Signed-off-by: Serge Semin 
> 
> ---
> 
> Changelog v5:
> - This is a new patch created for the new Intel Keem Bay bindings file,
>   which has been added just recently.
> ---
>  .../devicetree/bindings/usb/intel,keembay-dwc3.yaml  | 9 +++--
>  1 file changed, 3 insertions(+), 6 deletions(-)
> 

Reviewed-by: Rob Herring 


Re: [PATCH v5 01/19] dt-bindings: usb: usb-hcd: Detach generic USB controller properties

2020-12-07 Thread Rob Herring
On Sat, 05 Dec 2020 18:24:08 +0300, Serge Semin wrote:
> There can be three distinctive types of the USB controllers: USB hosts,
> USB peripherals/gadgets and USB OTG, which can switch from one role to
> another. In order to have that hierarchy handled in the DT binding files,
> we need to collect common properties in a common DT schema and specific
> properties in dedicated schemas. Seeing the usb-hcd.yaml DT schema is
> dedicated for the USB host controllers only, let's move some common
> properties from there into the usb.yaml schema. So the later would be
> available to evaluate all currently supported types of the USB
> controllers.
> 
> While at it add an explicit "additionalProperties: true" into the
> usb-hcd.yaml as setting the additionalProperties/unevaluateProperties
> properties is going to be get mandatory soon.
> 
> Signed-off-by: Serge Semin 
> 
> ---
> 
> Changelog v4:
> - This is a new patch created as a result of the comment left
>   by Chunfeng Yun in v3
> 
> Changelog v5:
> - Discard duplicated additionalProperties property definition.
> ---
>  .../devicetree/bindings/usb/usb-hcd.yaml  | 14 ++---
>  .../devicetree/bindings/usb/usb.yaml  | 29 +++
>  2 files changed, 31 insertions(+), 12 deletions(-)
>  create mode 100644 Documentation/devicetree/bindings/usb/usb.yaml
> 


My bot found errors running 'make dt_binding_check' on your patch:

yamllint warnings/errors:

dtschema/dtc warnings/errors:
schemas/usb/usb-hcd.yaml: ignoring, error in schema: 
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/usb/usb-hcd.yaml:
 'anyOf' conditional failed, one must be fixed:
'properties' is a required property
'patternProperties' is a required property
schemas/usb/usb-hcd.yaml: ignoring, error in schema: 
/builds/robherring/linux-dt-review/Documentation/devicetree/bindings/usb/usb-hcd.yaml:
 ignoring, error in schema: 
warning: no schema found in file: 
./Documentation/devicetree/bindings/usb/usb-hcd.yaml
schemas/usb/usb-hcd.yaml: ignoring, error in schema: 
dt-validate: recursion error: Check for prior errors in a referenced schema
schemas/usb/usb-hcd.yaml: ignoring, error in schema: 
dt-validate: recursion error: Check for prior errors in a referenced schema
schemas/usb/usb-hcd.yaml: ignoring, error in schema: 
dt-validate: recursion error: Check for prior errors in a referenced schema
schemas/usb/usb-hcd.yaml: ignoring, error in schema: 
dt-validate: recursion error: Check for prior errors in a referenced schema
schemas/usb/usb-hcd.yaml: ignoring, error in schema: 
dt-validate: recursion error: Check for prior errors in a referenced schema
dt-validate: recursion error: Check for prior errors in a referenced schema


See https://patchwork.ozlabs.org/patch/1411574

The base for the patch is generally the last rc1. Any dependencies
should be noted.

If you already ran 'make dt_binding_check' and didn't see the above
error(s), then make sure 'yamllint' is installed and dt-schema is up to
date:

pip3 install dtschema --upgrade

Please check and re-submit.



Re: [PATCH v5 10/19] dt-bindings: usb: Convert DWC USB3 bindings to DT schema

2020-12-07 Thread Rob Herring
On Sat, 05 Dec 2020 18:24:17 +0300, Serge Semin wrote:
> DWC USB3 DT node is supposed to be compliant with the Generic xHCI
> Controller schema, but with additional vendor-specific properties, the
> controller-specific reference clocks and PHYs. So let's convert the
> currently available legacy text-based DWC USB3 bindings to the DT schema
> and make sure the DWC USB3 nodes are also validated against the
> usb-xhci.yaml schema.
> 
> Note 1. we have to discard the nodename restriction of being prefixed with
> "dwc3@" string, since in accordance with the usb-hcd.yaml schema USB nodes
> are supposed to be named as "^usb(@.*)".
> 
> Note 2. The clock-related properties are marked as optional to match the
> DWC USB3 driver expectation and to improve the bindings mainainability
> so in case if there is a glue-node it would the responsible for the
> clocks initialization.
> 
> Signed-off-by: Serge Semin 
> 
> ---
> 
> Changelog v2:
> - Discard '|' from the descriptions, since we don't need to preserve
>   the text formatting in any of them.
> - Drop quotes from around the string constants.
> - Fix the "clock-names" prop description to be referring the enumerated
>   clock-names instead of the ones from the Databook.
> 
> Changelog v3:
> - Apply usb-xhci.yaml# schema only if the controller is supposed to work
>   as either host or otg.
> 
> Changelog v4:
> - Apply usb-drd.yaml schema first. If the controller is configured
>   to work in a gadget mode only, then apply the usb.yaml schema too,
>   otherwise apply the usb-xhci.yaml schema.
> - Discard the Rob'es Reviewed-by tag. Please review the patch one more
>   time.
> 
> Changelog v5:
> - Add "snps,dis-split-quirk" property to the DWC USB3 DT schema.
> - Add a commit log text about the clock-related property changes.
> - Make sure dr_mode exist to apply the USB-gadget-only schema.
> ---
>  .../devicetree/bindings/usb/dwc3.txt  | 128 ---
>  .../devicetree/bindings/usb/snps,dwc3.yaml| 312 ++
>  2 files changed, 312 insertions(+), 128 deletions(-)
>  delete mode 100644 Documentation/devicetree/bindings/usb/dwc3.txt
>  create mode 100644 Documentation/devicetree/bindings/usb/snps,dwc3.yaml
> 


My bot found errors running 'make dt_binding_check' on your patch:

yamllint warnings/errors:
./Documentation/devicetree/bindings/usb/snps,dwc3.yaml:55:4: [warning] wrong 
indentation: expected 4 but found 3 (indentation)

dtschema/dtc warnings/errors:
Unknown file referenced: [Errno 2] No such file or directory: 
'/usr/local/lib/python3.8/dist-packages/dtschema/schemas/usb/usb-drd.yaml'
xargs: dt-doc-validate: exited with status 255; aborting
make[1]: *** [Documentation/devicetree/bindings/Makefile:59: 
Documentation/devicetree/bindings/processed-schema-examples.json] Error 124
make: *** [Makefile:1364: dt_binding_check] Error 2


See https://patchwork.ozlabs.org/patch/1411582

The base for the patch is generally the last rc1. Any dependencies
should be noted.

If you already ran 'make dt_binding_check' and didn't see the above
error(s), then make sure 'yamllint' is installed and dt-schema is up to
date:

pip3 install dtschema --upgrade

Please check and re-submit.



[PATCH] powerpc: fix spelling mistake in Kconfig "seleted" -> "selected"

2020-12-07 Thread Colin King
From: Colin Ian King 

There is a spelling mistake in the help text of the Kconfig. Fix it.

Signed-off-by: Colin Ian King 
---
 arch/powerpc/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 8fb61a285c76..4010bae52351 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -87,7 +87,7 @@ config PPC_WATCHDOG
help
  This is a placeholder when the powerpc hardlockup detector
  watchdog is selected (arch/powerpc/kernel/watchdog.c). It is
- seleted via the generic lockup detector menu which is why we
+ selected via the generic lockup detector menu which is why we
  have no standalone config option for it here.
 
 config STACKTRACE_SUPPORT
-- 
2.29.2



[PATCH] arch: fix 'unexpected IRQ trap at vector' warnings

2020-12-07 Thread Enrico Weigelt, metux IT consult
All archs, except Alpha, print out the irq number in hex, but the message
looks like it was a decimal number, which is quite confusing. Fixing this
by adding "0x" prefix.

Signed-off-by: Enrico Weigelt, metux IT consult 
---
 arch/arm/include/asm/hw_irq.h  | 2 +-
 arch/parisc/include/asm/hardirq.h  | 2 +-
 arch/powerpc/include/asm/hardirq.h | 2 +-
 arch/s390/include/asm/hardirq.h| 2 +-
 arch/um/include/asm/hardirq.h  | 2 +-
 arch/x86/kernel/irq.c  | 2 +-
 6 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/arm/include/asm/hw_irq.h b/arch/arm/include/asm/hw_irq.h
index cecc13214ef1..2749f19271d9 100644
--- a/arch/arm/include/asm/hw_irq.h
+++ b/arch/arm/include/asm/hw_irq.h
@@ -9,7 +9,7 @@ static inline void ack_bad_irq(int irq)
 {
extern unsigned long irq_err_count;
irq_err_count++;
-   pr_crit("unexpected IRQ trap at vector %02x\n", irq);
+   pr_crit("unexpected IRQ trap at vector 0x%02x\n", irq);
 }
 
 #define ARCH_IRQ_INIT_FLAGS(IRQ_NOREQUEST | IRQ_NOPROBE)
diff --git a/arch/parisc/include/asm/hardirq.h 
b/arch/parisc/include/asm/hardirq.h
index 7f7039516e53..c3348af88d3f 100644
--- a/arch/parisc/include/asm/hardirq.h
+++ b/arch/parisc/include/asm/hardirq.h
@@ -35,6 +35,6 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 #define __IRQ_STAT(cpu, member) (irq_stat[cpu].member)
 #define inc_irq_stat(member)   this_cpu_inc(irq_stat.member)
 #define __inc_irq_stat(member) __this_cpu_inc(irq_stat.member)
-#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector %02x\n", irq)
+#define ack_bad_irq(irq) WARN(1, "unexpected IRQ trap at vector 0x%02x\n", irq)
 
 #endif /* _PARISC_HARDIRQ_H */
diff --git a/arch/powerpc/include/asm/hardirq.h 
b/arch/powerpc/include/asm/hardirq.h
index f133b5930ae1..ec8cf3cf6e49 100644
--- a/arch/powerpc/include/asm/hardirq.h
+++ b/arch/powerpc/include/asm/hardirq.h
@@ -29,7 +29,7 @@ DECLARE_PER_CPU_SHARED_ALIGNED(irq_cpustat_t, irq_stat);
 
 static inline void ack_bad_irq(unsigned int irq)
 {
-   printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+   printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
 }
 
 extern u64 arch_irq_stat_cpu(unsigned int cpu);
diff --git a/arch/s390/include/asm/hardirq.h b/arch/s390/include/asm/hardirq.h
index dfbc3c6c0674..aaaec5cdd4fe 100644
--- a/arch/s390/include/asm/hardirq.h
+++ b/arch/s390/include/asm/hardirq.h
@@ -23,7 +23,7 @@
 
 static inline void ack_bad_irq(unsigned int irq)
 {
-   printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+   printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
 }
 
 #endif /* __ASM_HARDIRQ_H */
diff --git a/arch/um/include/asm/hardirq.h b/arch/um/include/asm/hardirq.h
index b426796d26fd..2a2e6eae034b 100644
--- a/arch/um/include/asm/hardirq.h
+++ b/arch/um/include/asm/hardirq.h
@@ -15,7 +15,7 @@ typedef struct {
 #ifndef ack_bad_irq
 static inline void ack_bad_irq(unsigned int irq)
 {
-   printk(KERN_CRIT "unexpected IRQ trap at vector %02x\n", irq);
+   printk(KERN_CRIT "unexpected IRQ trap at vector 0x%02x\n", irq);
 }
 #endif
 
diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c
index c5dd50369e2f..957c716f2df7 100644
--- a/arch/x86/kernel/irq.c
+++ b/arch/x86/kernel/irq.c
@@ -37,7 +37,7 @@ atomic_t irq_err_count;
 void ack_bad_irq(unsigned int irq)
 {
if (printk_ratelimit())
-   pr_err("unexpected IRQ trap at vector %02x\n", irq);
+   pr_err("unexpected IRQ trap at vector 0x%02x\n", irq);
 
/*
 * Currently unexpected vectors happen only on SMP and APIC.
-- 
2.11.0



Re: [PATCH 3/3] powerpc/cacheinfo: Print correct cache-sibling map/list for L2 cache

2020-12-07 Thread Srikar Dronamraju
* Gautham R. Shenoy  [2020-12-04 10:18:47]:

> From: "Gautham R. Shenoy" 
> 
> 
> Signed-off-by: Gautham R. Shenoy 
> ---
> 
> +extern bool thread_group_shares_l2;
>  /*
>   * On big-core systems, each core has two groups of CPUs each of which
>   * has its own L1-cache. The thread-siblings which share l1-cache with
>   * @cpu can be obtained via cpu_smallcore_mask().
> + *
> + * On some big-core systems, the L2 cache is shared only between some
> + * groups of siblings. This is already parsed and encoded in
> + * cpu_l2_cache_mask().
>   */
>  static const struct cpumask *get_big_core_shared_cpu_map(int cpu, struct 
> cache *cache)
>  {
>   if (cache->level == 1)
>   return cpu_smallcore_mask(cpu);
> + if (cache->level == 2 && thread_group_shares_l2)
> + return cpu_l2_cache_mask(cpu);
> 
>   return &cache->shared_cpu_map;

As pointed with l...@intel.org, we need to do this only with #CONFIG_SMP,
even for cache->level = 1 too.

I agree that we are displaying shared_cpu_map correctly. Should we have also
update /clear shared_cpu_map in the first place. For example:- If for a P9
core with CPUs 0-7, the cache->shared_cpu_map for L1 would have 0-7 but
would display 0,2,4,6.

The drawback of this is even if cpus 0,2,4,6 are released L1 cache will not
be released. Is this as expected?


-- 
Thanks and Regards
Srikar Dronamraju


Re: [PATCH 2/3] powerpc/smp: Add support detecting thread-groups sharing L2 cache

2020-12-07 Thread Srikar Dronamraju
* Gautham R. Shenoy  [2020-12-04 10:18:46]:

> From: "Gautham R. Shenoy" 
> 
> On POWER systems, groups of threads within a core sharing the L2-cache
> can be indicated by the "ibm,thread-groups" property array with the
> identifier "2".
> 
> This patch adds support for detecting this, and when present, populate
> the populating the cpu_l2_cache_mask of every CPU to the core-siblings
> which share L2 with the CPU as specified in the by the
> "ibm,thread-groups" property array.
> 
> On a platform with the following "ibm,thread-group" configuration
>0001 0002 0004 
>0002 0004 0006 0001
>0003 0005 0007 0002
>0002 0004  0002
>0004 0006 0001 0003
>0005 0007
> 
> Without this patch, the sched-domain hierarchy for CPUs 0,1 would be
>   CPU0 attaching sched-domain(s):
>   domain-0: span=0,2,4,6 level=SMT
>   domain-1: span=0-7 level=CACHE
>   domain-2: span=0-15,24-39,48-55 level=MC
>   domain-3: span=0-55 level=DIE
> 
>   CPU1 attaching sched-domain(s):
>   domain-0: span=1,3,5,7 level=SMT
>   domain-1: span=0-7 level=CACHE
>   domain-2: span=0-15,24-39,48-55 level=MC
>   domain-3: span=0-55 level=DIE
> 
> The CACHE domain at 0-7 is incorrect since the ibm,thread-groups
> sub-array
> [0002 0002 0004
>   0002 0004 0006
>  0001 0003 0005 0007]
> indicates that L2 (Property "2") is shared only between the threads of a 
> single
> group. There are "2" groups of threads where each group contains "4"
> threads each. The groups being {0,2,4,6} and {1,3,5,7}.
> 
> With this patch, the sched-domain hierarchy for CPUs 0,1 would be
>   CPU0 attaching sched-domain(s):
>   domain-0: span=0,2,4,6 level=SMT
>   domain-1: span=0-15,24-39,48-55 level=MC
>   domain-2: span=0-55 level=DIE
> 
>   CPU1 attaching sched-domain(s):
>   domain-0: span=1,3,5,7 level=SMT
>   domain-1: span=0-15,24-39,48-55 level=MC
>   domain-2: span=0-55 level=DIE
> 
> The CACHE domain with span=0,2,4,6 for CPU 0 (span=1,3,5,7 for CPU 1
> resp.) gets degenerated into the SMT domain. Furthermore, the
> last-level-cache domain gets correctly set to the SMT sched-domain.
> 
> Signed-off-by: Gautham R. Shenoy 
> ---
>  arch/powerpc/kernel/smp.c | 66 
> +--
>  1 file changed, 58 insertions(+), 8 deletions(-)
> 
> diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
> index 6a242a3..a116d2d 100644
> --- a/arch/powerpc/kernel/smp.c
> +++ b/arch/powerpc/kernel/smp.c
> @@ -76,6 +76,7 @@
>  struct task_struct *secondary_current;
>  bool has_big_cores;
>  bool coregroup_enabled;
> +bool thread_group_shares_l2;

Either keep this as static in this patch or add its declaration

> 
>  DEFINE_PER_CPU(cpumask_var_t, cpu_sibling_map);
>  DEFINE_PER_CPU(cpumask_var_t, cpu_smallcore_map);
> @@ -99,6 +100,7 @@ enum {
> 
>  #define MAX_THREAD_LIST_SIZE 8
>  #define THREAD_GROUP_SHARE_L1   1
> +#define THREAD_GROUP_SHARE_L2   2
>  struct thread_groups {
>   unsigned int property;
>   unsigned int nr_groups;
> @@ -107,7 +109,7 @@ struct thread_groups {
>  };
> 
>  /* Maximum number of properties that groups of threads within a core can 
> share */
> -#define MAX_THREAD_GROUP_PROPERTIES 1
> +#define MAX_THREAD_GROUP_PROPERTIES 2
> 
>  struct thread_groups_list {
>   unsigned int nr_properties;
> @@ -121,6 +123,13 @@ struct thread_groups_list {
>   */
>  DEFINE_PER_CPU(cpumask_var_t, cpu_l1_cache_map);
> 
> +/*
> + * On some big-cores system, thread_group_l2_cache_map for each CPU
> + * corresponds to the set its siblings within the core that share the
> + * L2-cache.
> + */
> +DEFINE_PER_CPU(cpumask_var_t, thread_group_l2_cache_map);
> +

NIT:
We are trying to confuse ourselves with the names.
For L1 we have cpu_l2_cache_map to store the tasks from the thread group.
but cpu_smallcore_map for keeping track of tasks.

For L2 we have thread_group_l2_cache_map to store the tasks from the thread
group.  but cpu_l2_cache_map for keeping track of tasks.

I think we should do some renaming to keep the names consistent.
I would say probably say move the current cpu_l2_cache_map to
cpu_llc_cache_map and move the new aka  thread_group_l2_cache_map as
cpu_l2_cache_map to be somewhat consistent.

>  /* SMP operations for this machine */
>  struct smp_ops_t *smp_ops;
> 
> @@ -840,7 +851,8 @@ static int init_cpu_cache_map(int cpu, unsigned int 
> cache_property)
>   if (!dn)
>   return -ENODATA;
> 
> - if (!(cache_property == THREAD_GROUP_SHARE_L1))
> + if (!(cache_property == THREAD_GROUP_SHARE_L1 ||
> +   cache_property == THREAD_GROUP_SHARE_L2))
>   return -EINVAL;
> 
>   if (!cpu_tgl->nr_properties) {
> @@ -867,7 +879,10 @@ static int init_cpu_cache_map(int cpu, unsign

Re: [powerpc:next-test 54/220] arch/powerpc/kernel/vdso32/vgettimeofday.c:13:5: warning: no previous prototype for function '__c_kernel_clock_gettime64'

2020-12-07 Thread Michael Ellerman
kernel test robot  writes:
> tree:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git 
> next-test
> head:   4e4ed87981c764498942c52004c620bb8f104eac
> commit: d0e3fc69d00d1f50d22d6b6acfc555ccda80ad1e [54/220] powerpc/vdso: 
> Provide __kernel_clock_gettime64() on vdso32
> config: powerpc64-randconfig-r011-20201204 (attached as .config)
> compiler: clang version 12.0.0 (https://github.com/llvm/llvm-project 
> 32c501dd88b62787d3a5ffda7aabcf4650dbe3cd)
> reproduce (this is a W=1 build):
> wget 
> https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
> ~/bin/make.cross
> chmod +x ~/bin/make.cross
> # install powerpc64 cross compiling tool for clang build
> # apt-get install binutils-powerpc64-linux-gnu
> # 
> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git/commit/?id=d0e3fc69d00d1f50d22d6b6acfc555ccda80ad1e
> git remote add powerpc 
> https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git
> git fetch --no-tags powerpc next-test
> git checkout d0e3fc69d00d1f50d22d6b6acfc555ccda80ad1e
> # save the attached .config to linux build tree
> COMPILER_INSTALL_PATH=$HOME/0day COMPILER=clang make.cross 
> ARCH=powerpc64 
>
> If you fix the issue, kindly add following tag as appropriate
> Reported-by: kernel test robot 
>
> All warnings (new ones prefixed by >>):
>
>arch/powerpc/kernel/vdso32/vgettimeofday.c:7:5: error: conflicting types 
> for '__c_kernel_clock_gettime'
>int __c_kernel_clock_gettime(clockid_t clock, struct old_timespec32 *ts,
>^

We're building vdso32, which is 32-bit code, we pass -m32:

  clang -Wp,-MMD,arch/powerpc/kernel/vdso32/.vgettimeofday.o.d -nostdinc 
-isystem /usr/lib/llvm-11/lib/clang/11.0.0/include 
-I/linux/arch/powerpc/include -I./arch/powerpc/include/generated 
-I/linux/include -I./include -I/linux/arch/powerpc/include/uapi 
-I./arch/powerpc/include/generated/uapi -I/linux/include/uapi 
-I./include/generated/uapi -include /linux/include/linux/kconfig.h -include 
/linux/include/linux/compiler_types.h -D__KERNEL__ -I /linux/arch/powerpc 
-DHAVE_AS_ATHIGH=1 -Qunused-arguments -Wall -Wundef -Werror=strict-prototypes 
-Wno-trigraphs -fno-strict-aliasing -fno-common -fshort-wchar -fno-PIE 
-Werror=implicit-function-declaration -Werror=implicit-int -Werror=return-type 
-Wno-format-security -std=gnu89 --target=powerpc64le-linux-gnu 
--prefix=/usr/bin/powerpc64le-linux-gnu- --gcc-toolchain=/usr -no-integrated-as 
-Werror=unknown-warning-option -mlittle-endian -m64 -msoft-float -pipe 
-mcpu=power8 -mtune=power9 -mno-altivec -mno-vsx -mno-spe 
-fno-asynchronous-unwind-tables -Wa,-mpower4 -Wa,-many -mlittle-endian 
-fno-delete-null-pointer-checks -Wno-frame-address 
-Wno-address-of-packed-member -Os -Wframe-larger-than=2048 -fno-stack-protector 
-Wno-format-invalid-specifier -Wno-gnu -mno-global-merge 
-Wno-unused-const-variable -fomit-frame-pointer -Wdeclaration-after-statement 
-Wvla -Wno-pointer-sign -Wno-array-bounds -fno-strict-overflow -fno-stack-check 
-Werror=date-time -Werror=incompatible-pointer-types 
-fmacro-prefix-map=/linux/= -Wno-initializer-overrides -Wno-format 
-Wno-sign-compare -Wno-format-zero-length -Wno-pointer-to-enum-cast 
-Wno-tautological-constant-out-of-range-compare -D_TASK_CPU=304 -shared 
-fno-common -fno-builtin -nostdlib -Wl,-soname=linux-vdso32.so.1 
-Wl,--hash-style=both -include /linux/lib/vdso/gettimeofday.c 
-fno-stack-protector -DDISABLE_BRANCH_PROFILING -ffreestanding 
-fasynchronous-unwind-tables   -I /linux/arch/powerpc/kernel/vdso32 -I 
./arch/powerpc/kernel/vdso32
-DKBUILD_MODFILE='"arch/powerpc/kernel/vdso32/vgettimeofday"' 
-DKBUILD_BASENAME='"vgettimeofday"' -DKBUILD_MODNAME='"vgettimeofday"' -m32 -c 
-o arch/powerpc/kernel/vdso32/vgettimeofday.o 
/linux/arch/powerpc/kernel/vdso32/vgettimeofday.c


>arch/powerpc/include/asm/vdso/gettimeofday.h:183:5: note: previous 
> declaration is here
>int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
>^

But this is inside an #ifdef __powerpc64__ block:

182 #ifdef __powerpc64__
183 int __c_kernel_clock_gettime(clockid_t clock, struct __kernel_timespec *ts,
184  const struct vdso_data *vd);


So is clang defining __powerpc64__ even for 32-bit code?

And the answer appears to be yes:

  $ clang --version
  Ubuntu clang version 11.0.0-2
  Target: powerpc64le-unknown-linux-gnu

  $ clang -m32 -dM -E - < /dev/null | grep powerpc
  #define __powerpc64__ 1
  #define __powerpc__ 1

Compare to gcc:

  $ gcc --version
  gcc (Ubuntu 10.2.0-13ubuntu1) 10.2.0
  
  $ gcc -m32 -dM -E - < /dev/null | grep powerpc
  #define __powerpc__ 1
  #define powerpc 1
  #define __powerpc 1


Which is fairly problematic, because we use the presence/absence of
__powerpc64__ to determine if we're building 64-bit/32-bit code in
several places.

Not sure what the best approach for fixing that is.

cheers


Re: [PATCH 1/3] powerpc/smp: Parse ibm,thread-groups with multiple properties

2020-12-07 Thread Srikar Dronamraju
* Gautham R. Shenoy  [2020-12-04 10:18:45]:

> From: "Gautham R. Shenoy" 



> 
>  static int parse_thread_groups(struct device_node *dn,
> -struct thread_groups *tg,
> -unsigned int property)
> +struct thread_groups_list *tglp)
>  {
> - int i;
> - u32 thread_group_array[3 + MAX_THREAD_LIST_SIZE];
> + int i = 0;
> + u32 *thread_group_array;
>   u32 *thread_list;
>   size_t total_threads;
> - int ret;
> + int ret = 0, count;
> + unsigned int property_idx = 0;

NIT:
tglx mentions in one of his recent comments to try keep a reverse fir tree
ordering of variables where possible.

> 
> + count = of_property_count_u32_elems(dn, "ibm,thread-groups");
> + thread_group_array = kcalloc(count, sizeof(u32), GFP_KERNEL);
>   ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> -  thread_group_array, 3);
> +  thread_group_array, count);
>   if (ret)
> - return ret;
> -
> - tg->property = thread_group_array[0];
> - tg->nr_groups = thread_group_array[1];
> - tg->threads_per_group = thread_group_array[2];
> - if (tg->property != property ||
> - tg->nr_groups < 1 ||
> - tg->threads_per_group < 1)
> - return -ENODATA;
> + goto out_free;
> 
> - total_threads = tg->nr_groups * tg->threads_per_group;
> + while (i < count && property_idx < MAX_THREAD_GROUP_PROPERTIES) {
> + int j;
> + struct thread_groups *tg = &tglp->property_tgs[property_idx++];

NIT: same as above.

> 
> - ret = of_property_read_u32_array(dn, "ibm,thread-groups",
> -  thread_group_array,
> -  3 + total_threads);
> - if (ret)
> - return ret;
> + tg->property = thread_group_array[i];
> + tg->nr_groups = thread_group_array[i + 1];
> + tg->threads_per_group = thread_group_array[i + 2];
> + total_threads = tg->nr_groups * tg->threads_per_group;
> +
> + thread_list = &thread_group_array[i + 3];
> 
> - thread_list = &thread_group_array[3];
> + for (j = 0; j < total_threads; j++)
> + tg->thread_list[j] = thread_list[j];
> + i = i + 3 + total_threads;

Can't we simply use memcpy instead?

> + }
> 
> - for (i = 0 ; i < total_threads; i++)
> - tg->thread_list[i] = thread_list[i];
> + tglp->nr_properties = property_idx;
> 
> - return 0;
> +out_free:
> + kfree(thread_group_array);
> + return ret;
>  }
> 
>  /*
> @@ -805,24 +827,39 @@ static int get_cpu_thread_group_start(int cpu, struct 
> thread_groups *tg)
>   return -1;
>  }
> 
> -static int init_cpu_l1_cache_map(int cpu)
> +static int init_cpu_cache_map(int cpu, unsigned int cache_property)
> 
>  {
>   struct device_node *dn = of_get_cpu_node(cpu, NULL);
> - struct thread_groups tg = {.property = 0,
> -.nr_groups = 0,
> -.threads_per_group = 0};
> + struct thread_groups *tg = NULL;
>   int first_thread = cpu_first_thread_sibling(cpu);
>   int i, cpu_group_start = -1, err = 0;
> + cpumask_var_t *mask;
> + struct thread_groups_list *cpu_tgl = &tgl[cpu];

NIT: same as 1st comment.

> 
>   if (!dn)
>   return -ENODATA;
> 
> - err = parse_thread_groups(dn, &tg, THREAD_GROUP_SHARE_L1);
> - if (err)
> - goto out;
> + if (!(cache_property == THREAD_GROUP_SHARE_L1))
> + return -EINVAL;
> 
> - cpu_group_start = get_cpu_thread_group_start(cpu, &tg);
> + if (!cpu_tgl->nr_properties) {
> + err = parse_thread_groups(dn, cpu_tgl);
> + if (err)
> + goto out;
> + }
> +
> + for (i = 0; i < cpu_tgl->nr_properties; i++) {
> + if (cpu_tgl->property_tgs[i].property == cache_property) {
> + tg = &cpu_tgl->property_tgs[i];
> + break;
> + }
> + }
> +
> + if (!tg)
> + return -EINVAL;
> +
> + cpu_group_start = get_cpu_thread_group_start(cpu, tg);

This whole hunk should be moved to a new function and called before
init_cpu_cache_map. It will simplify the logic to great extent.

> 
>   if (unlikely(cpu_group_start == -1)) {
>   WARN_ON_ONCE(1);
> @@ -830,11 +867,12 @@ static int init_cpu_l1_cache_map(int cpu)
>   goto out;
>   }
> 
> - zalloc_cpumask_var_node(&per_cpu(cpu_l1_cache_map, cpu),
> - GFP_KERNEL, cpu_to_node(cpu));
> + mask = &per_cpu(cpu_l1_cache_map, cpu);
> +
> + zalloc_cpumask_var_node(mask, GFP_KERNEL, cpu_to_node(cpu));
> 

This hunk (and the next hunk) should be moved to next patch.

>   for (i = first_thread; i < 

Re: Build regressions/improvements in v5.10-rc7

2020-12-07 Thread Geert Uytterhoeven
On Mon, Dec 7, 2020 at 1:08 PM Geert Uytterhoeven  wrote:
> JFYI, when comparing v5.10-rc7[1] to v5.10-rc6[3], the summaries are:
>   - build errors: +1/-0

  + /kisskb/src/arch/powerpc/platforms/powermac/smp.c: error: implicit
declaration of function 'cleanup_cpu_mmu_context'
[-Werror=implicit-function-declaration]:  => 914:2

v5.10-rc7/powerpc-gcc4.9/pmac32_defconfig+SMP

> [1] 
> http://kisskb.ellerman.id.au/kisskb/branch/linus/head/0477e92881850d44910a7e94fc2c46f96faa131f/
>  (all 192 configs)
> [3] 
> http://kisskb.ellerman.id.au/kisskb/branch/linus/head/b65054597872ce3aefbc6a666385eabdf9e288da/
>  (all 192 configs)

Gr{oetje,eeting}s,

Geert

-- 
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- ge...@linux-m68k.org

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say "programmer" or something like that.
-- Linus Torvalds


Re: [PATCH v2 01/17] ibmvfc: add vhost fields and defaults for MQ enablement

2020-12-07 Thread Hannes Reinecke

On 12/4/20 3:26 PM, Brian King wrote:

On 12/2/20 11:27 AM, Tyrel Datwyler wrote:

On 12/2/20 7:14 AM, Brian King wrote:

On 12/1/20 6:53 PM, Tyrel Datwyler wrote:

Introduce several new vhost fields for managing MQ state of the adapter
as well as initial defaults for MQ enablement.

Signed-off-by: Tyrel Datwyler 
---
  drivers/scsi/ibmvscsi/ibmvfc.c |  9 -
  drivers/scsi/ibmvscsi/ibmvfc.h | 13 +++--
  2 files changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 42e4d35e0d35..f1d677a7423d 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -5161,12 +5161,13 @@ static int ibmvfc_probe(struct vio_dev *vdev, const 
struct vio_device_id *id)
}
  
  	shost->transportt = ibmvfc_transport_template;

-   shost->can_queue = max_requests;
+   shost->can_queue = (max_requests / IBMVFC_SCSI_HW_QUEUES);


This doesn't look right. can_queue is the SCSI host queue depth, not the MQ 
queue depth.


Our max_requests is the total number commands allowed across all queues. From
what I understand is can_queue is the total number of commands in flight allowed
for each hw queue.

 /*
  * In scsi-mq mode, the number of hardware queues supported by the LLD.
  *
  * Note: it is assumed that each hardware queue has a queue depth of
  * can_queue. In other words, the total queue depth per host
  * is nr_hw_queues * can_queue. However, for when host_tagset is set,
  * the total queue depth is can_queue.
  */

We currently don't use the host wide shared tagset.


Ok. I missed that bit... In that case, since we allocate by default only 100
event structs. If we slice that across IBMVFC_SCSI_HW_QUEUES (16) queues, then
we end up with only about 6 commands that can be outstanding per queue,
which is going to really hurt performance... I'd suggest bumping up
IBMVFC_MAX_REQUESTS_DEFAULT from 100 to 1000 as a starting point.


Before doing that I'd rather use the host-wide shared tagset.
Increasing the number of requests will increase the memory footprint of 
the driver (as each request will be statically allocated).


Cheers,

Hannes
--
Dr. Hannes ReineckeKernel Storage Architect
h...@suse.de  +49 911 74053 688
SUSE Software Solutions GmbH, Maxfeldstr. 5, 90409 Nürnberg
HRB 36809 (AG Nürnberg), Geschäftsführer: Felix Imendörffer


[Bug 209277] powerpc: obsolete driver: Marvell MV64X60 MPSC

2020-12-07 Thread bugzilla-daemon
https://bugzilla.kernel.org/show_bug.cgi?id=209277

--- Comment #2 from Borislav Petkov (b...@alien8.de) ---
https://git.kernel.org/pub/scm/linux/kernel/git/ras/ras.git/commit/?h=edac-drivers&id=0385979a30dc4abdef2dcebbccef818947c80cb7

-- 
You are receiving this mail because:
You are watching the assignee of the bug.

Re: [PATCH] EDAC/mv64x60: Remove orphan mv64x60 driver

2020-12-07 Thread Borislav Petkov
On Mon, Dec 07, 2020 at 03:02:53PM +1100, Michael Ellerman wrote:
> The mv64x60 EDAC driver depends on CONFIG_MV64X60. But that symbol is
> not user-selectable, and the last code that selected it was removed
> with the C2K board support in 2018, see:
> 
>   92c8c16f3457 ("powerpc/embedded6xx: Remove C2K board support")
> 
> That means the driver is now dead code, so remove it.
> 
> Suggested-by: Borislav Petkov 
> Signed-off-by: Michael Ellerman 
> ---
>  drivers/edac/Kconfig|   7 -
>  drivers/edac/Makefile   |   1 -
>  drivers/edac/mv64x60_edac.c | 883 
>  drivers/edac/mv64x60_edac.h | 114 -
>  4 files changed, 1005 deletions(-)
>  delete mode 100644 drivers/edac/mv64x60_edac.c
>  delete mode 100644 drivers/edac/mv64x60_edac.h

Gladly taken and applied, thanks!

-- 
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette


Re: [PATCH v5 10/19] dt-bindings: usb: Convert DWC USB3 bindings to DT schema

2020-12-07 Thread Chunfeng Yun
On Sat, 2020-12-05 at 18:24 +0300, Serge Semin wrote:
> DWC USB3 DT node is supposed to be compliant with the Generic xHCI
> Controller schema, but with additional vendor-specific properties, the
> controller-specific reference clocks and PHYs. So let's convert the
> currently available legacy text-based DWC USB3 bindings to the DT schema
> and make sure the DWC USB3 nodes are also validated against the
> usb-xhci.yaml schema.
> 
> Note 1. we have to discard the nodename restriction of being prefixed with
> "dwc3@" string, since in accordance with the usb-hcd.yaml schema USB nodes
> are supposed to be named as "^usb(@.*)".
> 
> Note 2. The clock-related properties are marked as optional to match the
> DWC USB3 driver expectation and to improve the bindings mainainability
> so in case if there is a glue-node it would the responsible for the
> clocks initialization.
> 
> Signed-off-by: Serge Semin 
> 
> ---
> 
> Changelog v2:
> - Discard '|' from the descriptions, since we don't need to preserve
>   the text formatting in any of them.
> - Drop quotes from around the string constants.
> - Fix the "clock-names" prop description to be referring the enumerated
>   clock-names instead of the ones from the Databook.
> 
> Changelog v3:
> - Apply usb-xhci.yaml# schema only if the controller is supposed to work
>   as either host or otg.
> 
> Changelog v4:
> - Apply usb-drd.yaml schema first. If the controller is configured
>   to work in a gadget mode only, then apply the usb.yaml schema too,
>   otherwise apply the usb-xhci.yaml schema.
> - Discard the Rob'es Reviewed-by tag. Please review the patch one more
>   time.
> 
> Changelog v5:
> - Add "snps,dis-split-quirk" property to the DWC USB3 DT schema.
> - Add a commit log text about the clock-related property changes.
> - Make sure dr_mode exist to apply the USB-gadget-only schema.
> ---
>  .../devicetree/bindings/usb/dwc3.txt  | 128 ---
>  .../devicetree/bindings/usb/snps,dwc3.yaml| 312 ++
>  2 files changed, 312 insertions(+), 128 deletions(-)
>  delete mode 100644 Documentation/devicetree/bindings/usb/dwc3.txt
>  create mode 100644 Documentation/devicetree/bindings/usb/snps,dwc3.yaml
> 
> diff --git a/Documentation/devicetree/bindings/usb/dwc3.txt 
> b/Documentation/devicetree/bindings/usb/dwc3.txt
> deleted file mode 100644
> index 1aae2b6160c1..
> --- a/Documentation/devicetree/bindings/usb/dwc3.txt
> +++ /dev/null
> @@ -1,128 +0,0 @@
> -synopsys DWC3 CORE
> -
[...]
> +
> +  clock-names:
> +contains:
> +  anyOf:
> +- enum: [bus_early, ref, suspend]
> +- true
> +
> +  usb-phy:
> +   minItems: 1
indentation: expected 4
> +   items:
> + - description: USB2/HS PHY
> + - description: USB3/SS PHY
> +
> +  phys:
> +minItems: 1
> +items:
> +  - description: USB2/HS PHY
> +  - description: USB3/SS PHY
> +
> +  phy-names:
> +minItems: 1
> +items:
> +  - const: usb2-phy
> +  - const: usb3-phy
[...]