[PATCH v2 49/52] powerpc/64s/exceptions: machine check move unrecoverable handling out of line

2019-06-19 Thread Nicholas Piggin
Similarly to the previous patch, move unrecoverable handling out of
line, which makes the regular path less cluttered and easier to
follow.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 83 +---
 1 file changed, 39 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index be83a4e71814..e8f644d6f310 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1016,9 +1016,9 @@ EXC_COMMON_BEGIN(machine_check_early_common)
bne 1f
/* First machine check entry */
ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
-1: subir1,r1,INT_FRAME_SIZE/* alloc stack frame */
-   /* Limit nested MCE to level 4 to avoid stack overflow */
-   bge cr1,2f  /* Check if we hit limit of 4 */
+1: /* Limit nested MCE to level 4 to avoid stack overflow */
+   bgt cr1,unrecoverable_mce   /* Check if we hit limit of 4 */
+   subir1,r1,INT_FRAME_SIZE/* alloc stack frame */
 
EXCEPTION_PROLOG_COMMON_1()
/* We don't touch AMR here, we never go to virtual mode */
@@ -1032,25 +1032,9 @@ EXC_COMMON_BEGIN(machine_check_early_common)
 
li  r10,MSR_RI
mtmsrd  r10,1
-
bl  enable_machine_check
b   machine_check_handle_early
 
-2:
-   /* Stack overflow. Stay on emergency stack and panic.
-* Keep the ME bit off while panic-ing, so that if we hit
-* another machine check we checkstop.
-*/
-   addir1,r1,INT_FRAME_SIZE/* go back to previous stack frame */
-   ld  r11,PACAKMSR(r13)
-   LOAD_HANDLER(r12, unrecover_mce)
-   li  r10,MSR_ME
-   andcr11,r11,r10 /* Turn off MSR_ME */
-   mtspr   SPRN_SRR0,r12
-   mtspr   SPRN_SRR1,r11
-   RFI_TO_KERNEL
-   b   .   /* prevent speculative execution */
-
 EXC_COMMON_BEGIN(machine_check_common)
/*
 * Machine check is different because we use a different
@@ -1166,32 +1150,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 * If yes, then stay on emergency stack and panic.
 */
andi.   r11,r12,MSR_RI
-   bne 2f
-1: mfspr   r11,SPRN_SRR0
-   LOAD_HANDLER(r10,unrecover_mce)
-   mtspr   SPRN_SRR0,r10
-   ld  r10,PACAKMSR(r13)
-   /*
-* We are going down. But there are chances that we might get hit by
-* another MCE during panic path and we may run into unstable state
-* with no way out. Hence, turn ME bit off while going down, so that
-* when another MCE is hit during panic path, system will checkstop
-* and hypervisor will get restarted cleanly by SP.
-*/
-   li  r3,MSR_ME
-   andcr10,r10,r3  /* Turn off MSR_ME */
-   mtspr   SPRN_SRR1,r10
-   RFI_TO_KERNEL
-   b   .
-2:
+   beq unrecoverable_mce
+
/*
 * Check if we have successfully handled/recovered from error, if not
 * then stay on emergency stack and panic.
 */
ld  r3,RESULT(r1)   /* Load result */
cmpdi   r3,0/* see if we handled MCE successfully */
-
-   beq 1b  /* if !handled then panic */
+   beq unrecoverable_mce /* if !handled then panic */
 
/*
 * Return from MC interrupt.
@@ -1213,17 +1180,31 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
 
-EXC_COMMON_BEGIN(unrecover_mce)
+EXC_COMMON_BEGIN(unrecoverable_mce)
+   /*
+* We are going down. But there are chances that we might get hit by
+* another MCE during panic path and we may run into unstable state
+* with no way out. Hence, turn ME bit off while going down, so that
+* when another MCE is hit during panic path, system will checkstop
+* and hypervisor will get restarted cleanly by SP.
+*/
+   bl  disable_machine_check
+   ld  r10,PACAKMSR(r13)
+   li  r3,MSR_ME
+   andcr10,r10,r3
+   mtmsrd  r10
+
/* Invoke machine_check_exception to print MCE event and panic. */
addir3,r1,STACK_FRAME_OVERHEAD
bl  machine_check_exception
+
/*
-* We will not reach here. Even if we did, there is no way out. Call
-* unrecoverable_exception and die.
+* We will not reach here. Even if we did, there is no way out.
+* Call unrecoverable_exception and die.
 */
-1: addir3,r1,STACK_FRAME_OVERHEAD
+   addir3,r1,STACK_FRAME_OVERHEAD
bl  unrecoverable_exception
-   b   1b
+   b   .
 
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
@@ -2297,6 +2278,20 @@ enable_machine_check:
 1: mtlrr0
blr
 
+disable_mach

[PATCH v2 48/52] powerpc/64s/exception: simplify machine check early path

2019-06-19 Thread Nicholas Piggin
machine_check_handle_early_common can reach machine_check_handle_early
directly now that it runs at the relocated address. The only reason to
do the rfi sequence is to enable MSR[ME]. Move that into a helper
function to make the normal code path a bit easier to read.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 30 
 1 file changed, 22 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 384f591ef078..be83a4e71814 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1030,13 +1030,12 @@ EXC_COMMON_BEGIN(machine_check_early_common)
std r3,_DAR(r1)
std r4,_DSISR(r1)
 
-   mfmsr   r11 /* get MSR value */
-   ori r11,r11,MSR_ME|MSR_RI   /* turn on ME, RI */
-   LOAD_HANDLER(r12, machine_check_handle_early)
-1: mtspr   SPRN_SRR0,r12
-   mtspr   SPRN_SRR1,r11
-   RFI_TO_KERNEL
-   b   .   /* prevent speculative execution */
+   li  r10,MSR_RI
+   mtmsrd  r10,1
+
+   bl  enable_machine_check
+   b   machine_check_handle_early
+
 2:
/* Stack overflow. Stay on emergency stack and panic.
 * Keep the ME bit off while panic-ing, so that if we hit
@@ -1047,7 +1046,9 @@ EXC_COMMON_BEGIN(machine_check_early_common)
LOAD_HANDLER(r12, unrecover_mce)
li  r10,MSR_ME
andcr11,r11,r10 /* Turn off MSR_ME */
-   b   1b
+   mtspr   SPRN_SRR0,r12
+   mtspr   SPRN_SRR1,r11
+   RFI_TO_KERNEL
b   .   /* prevent speculative execution */
 
 EXC_COMMON_BEGIN(machine_check_common)
@@ -2283,6 +2284,19 @@ CLOSE_FIXED_SECTION(virt_trampolines);
 
 USE_TEXT_SECTION()
 
+enable_machine_check:
+   mflrr0
+   bcl 20,31,$+4
+0: mflrr3
+   addir3,r3,(1f - 0b)
+   mtspr   SPRN_SRR0,r3
+   mfmsr   r3
+   ori r3,r3,MSR_ME
+   mtspr   SPRN_SRR1,r3
+   RFI_TO_KERNEL
+1: mtlrr0
+   blr
+
 /*
  * Hash table stuff
  */
-- 
2.20.1



[PATCH v2 47/52] powerpc/64s/exception: machine check restructure handler to be more regular

2019-06-19 Thread Nicholas Piggin
Follow the pattern of sreset and HMI handlers more closely, in using
EXCEPTION_PROLOG_COMMON_1 rather than open-coding it. Run the handler
at the relocated location.

This will help with simplification and code sharing.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 86 ++--
 1 file changed, 42 insertions(+), 44 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 8ed787dc579c..384f591ef078 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -958,17 +958,34 @@ BEGIN_FTR_SECTION
b   machine_check_pseries
 END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 #endif
-   b   machine_check_common_early
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 1, 1, 0
+   mfctr   r10 /* save ctr */
+   BRANCH_TO_C000(r11, machine_check_early_common)
+   /*
+* MSR_RI is not enabled, because PACA_EXMC is being used, so a
+* nested machine check corrupts it. machine_check_common enables
+* MSR_RI.
+*/
 EXC_REAL_END(machine_check, 0x200, 0x100)
 EXC_VIRT_NONE(0x4200, 0x100)
-TRAMP_REAL_BEGIN(machine_check_common_early)
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 0, 0, 0
+
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
+   /* See comment at machine_check exception, don't turn on RI */
+   EXCEPTION_PROLOG_0 PACA_EXMC
+machine_check_pseries:
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
+   EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
+#endif
+
+TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
+
+EXC_COMMON_BEGIN(machine_check_early_common)
+   mtctr   r10 /* Restore ctr */
+   mfspr   r11,SPRN_SRR0
+   mfspr   r12,SPRN_SRR1
+
/*
-* Register contents:
-* R13  = PACA
-* R9   = CR
-* Original R9 to R13 is saved on PACA_EXMC
-*
 * Switch to mc_emergency stack and handle re-entrancy (we limit
 * the nested MCE upto level 4 to avoid stack overflow).
 * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1
@@ -989,32 +1006,30 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
 * the machine check is handled then the idle wakeup code is called
 * to restore state.
 */
-   mr  r11,r1  /* Save r1 */
lhz r10,PACA_IN_MCE(r13)
cmpwi   r10,0   /* Are we in nested machine check */
-   bne 0f  /* Yes, we are. */
-   /* First machine check entry */
-   ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
-0: subir1,r1,INT_FRAME_SIZE/* alloc stack frame */
+   cmpwi   cr1,r10,MAX_MCE_DEPTH   /* Are we at maximum nesting */
addir10,r10,1   /* increment paca->in_mce */
sth r10,PACA_IN_MCE(r13)
+
+   mr  r10,r1  /* Save r1 */
+   bne 1f
+   /* First machine check entry */
+   ld  r1,PACAMCEMERGSP(r13)   /* Use MC emergency stack */
+1: subir1,r1,INT_FRAME_SIZE/* alloc stack frame */
/* Limit nested MCE to level 4 to avoid stack overflow */
-   cmpwi   r10,MAX_MCE_DEPTH
-   bgt 2f  /* Check if we hit limit of 4 */
-   std r11,GPR1(r1)/* Save r1 on the stack. */
-   std r11,0(r1)   /* make stack chain pointer */
-   mfspr   r11,SPRN_SRR0   /* Save SRR0 */
-   std r11,_NIP(r1)
-   mfspr   r11,SPRN_SRR1   /* Save SRR1 */
-   std r11,_MSR(r1)
-   mfspr   r11,SPRN_DAR/* Save DAR */
-   std r11,_DAR(r1)
-   mfspr   r11,SPRN_DSISR  /* Save DSISR */
-   std r11,_DSISR(r1)
-   std r9,_CCR(r1) /* Save CR in stackframe */
+   bge cr1,2f  /* Check if we hit limit of 4 */
+
+   EXCEPTION_PROLOG_COMMON_1()
/* We don't touch AMR here, we never go to virtual mode */
-   /* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
+   EXCEPTION_PROLOG_COMMON_3(0x200)
+
+   ld  r3,PACA_EXMC+EX_DAR(r13)
+   lwz r4,PACA_EXMC+EX_DSISR(r13)
+   std r3,_DAR(r1)
+   std r4,_DSISR(r1)
+
mfmsr   r11 /* get MSR value */
ori r11,r11,MSR_ME|MSR_RI   /* turn on ME, RI */
LOAD_HANDLER(r12, machine_check_handle_early)
@@ -1035,21 +1050,6 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
b   1b
b   .   /* prevent speculative execution */
 
-#ifdef CONFIG_PPC_PSERIES
-TRAMP_REAL_BEGIN(machine_check_fwnmi)
-   EXCEPTION_PROLOG_0 PACA_EXMC
-machine_check_pseries:
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
-   EXCEPTION_PROLOG_2_REAL machine_check_c

[PATCH v2 46/52] powerpc/64s/exception: fix machine check early should not set AMR

2019-06-19 Thread Nicholas Piggin
The early machine check runs in real mode, so locking is unnecessary.
Worse, the windup does not restore AMR, so this can result in a false
KUAP fault after a recoverable machine check hits inside a user copy
operation.

Fix this similarly to HMI by just avoiding the kuap lock in the
early machine check handler (it will be set by the late handler that
runs in virtual mode if that runs).

Fixes: 890274c2dc4c0 ("powerpc/64s: Implement KUAP for Radix MMU")
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index ab22af2509d8..8ed787dc579c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1012,7 +1012,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
mfspr   r11,SPRN_DSISR  /* Save DSISR */
std r11,_DSISR(r1)
std r9,_CCR(r1) /* Save CR in stackframe */
-   kuap_save_amr_and_lock r9, r10, cr1
+   /* We don't touch AMR here, we never go to virtual mode */
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr   r11 /* get MSR value */
-- 
2.20.1



[PATCH v2 45/52] powerpc/64s/exception: machine check windup restore cfar for host delivery

2019-06-19 Thread Nicholas Piggin
Bare metal machine checks run an "early" handler in real mode which
potentially flushes faulting translation structures, among other
things, before running the main handler which reports the event.

The main handler runs as a normal interrupt handler, after a "windup"
that sets registers back as they were at interrupt entry. CFAR does
not get restored by the windup code, so add that. The current handler
does not appear to use CFAR anywhere, because the main handler is not
run if the MCE happens in kernel-mode and the user-mode message is not
a register trace. However it may be useful in some cases or future
changes (xmon, panic on mce, etc).

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 4 
 1 file changed, 4 insertions(+)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 61c96502d2a8..ab22af2509d8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1205,6 +1205,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 
 9:
/* Deliver the machine check to host kernel in V mode. */
+BEGIN_FTR_SECTION
+   ld  r10,ORIG_GPR3(r1)
+   mtspr   SPRN_CFAR,r10
+END_FTR_SECTION_IFSET(CPU_FTR_CFAR)
MACHINE_CHECK_HANDLER_WINDUP
EXCEPTION_PROLOG_0 PACA_EXMC
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
-- 
2.20.1



[PATCH v2 44/52] powerpc/64s/exception: separate pseries and powernv mce delivery paths

2019-06-19 Thread Nicholas Piggin
This will allow standardised interrupt entry macros to be used in
future. These paths may be de-duplicated again after that if code
allows.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f3362adc99e6..61c96502d2a8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -952,11 +952,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
 * vector
 */
EXCEPTION_PROLOG_0 PACA_EXMC
+#ifdef CONFIG_PPC_PSERIES
 BEGIN_FTR_SECTION
+   /* Some hypervisors inject directly to 0x200 if FWNMI is not enabled */
+   b   machine_check_pseries
+END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
+#endif
b   machine_check_common_early
-FTR_SECTION_ELSE
-   b   machine_check_pSeries_0
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
 EXC_REAL_END(machine_check, 0x200, 0x100)
 EXC_VIRT_NONE(0x4200, 0x100)
 TRAMP_REAL_BEGIN(machine_check_common_early)
@@ -1033,18 +1035,18 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
b   1b
b   .   /* prevent speculative execution */
 
-TRAMP_REAL_BEGIN(machine_check_pSeries)
-   .globl machine_check_fwnmi
-machine_check_fwnmi:
+#ifdef CONFIG_PPC_PSERIES
+TRAMP_REAL_BEGIN(machine_check_fwnmi)
EXCEPTION_PROLOG_0 PACA_EXMC
-machine_check_pSeries_0:
+machine_check_pseries:
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
+   EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
/*
 * MSR_RI is not enabled, because PACA_EXMC is being used, so a
 * nested machine check corrupts it. machine_check_common enables
 * MSR_RI.
 */
-   EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
+#endif
 
 TRAMP_KVM_SKIP(PACA_EXMC, 0x200)
 
@@ -1205,7 +1207,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
EXCEPTION_PROLOG_0 PACA_EXMC
-   b   machine_check_pSeries_0
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
+   EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0
 
 EXC_COMMON_BEGIN(unrecover_mce)
/* Invoke machine_check_exception to print MCE event and panic. */
-- 
2.20.1



[PATCH v2 43/52] powerpc/64s/exception: machine check early only runs in HV mode

2019-06-19 Thread Nicholas Piggin
machine_check_common_early and machine_check_handle_early only run in
HVMODE. Remove dead code.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 38 +---
 1 file changed, 6 insertions(+), 32 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index b12755a4f884..f3362adc99e6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1014,10 +1014,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early)
/* Save r9 through r13 from EXMC save area to stack frame. */
EXCEPTION_PROLOG_COMMON_2(PACA_EXMC)
mfmsr   r11 /* get MSR value */
-BEGIN_FTR_SECTION
-   ori r11,r11,MSR_ME  /* turn on ME bit */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
-   ori r11,r11,MSR_RI  /* turn on RI bit */
+   ori r11,r11,MSR_ME|MSR_RI   /* turn on ME, RI */
LOAD_HANDLER(r12, machine_check_handle_early)
 1: mtspr   SPRN_SRR0,r12
mtspr   SPRN_SRR1,r11
@@ -1124,11 +1121,8 @@ EXC_COMMON_BEGIN(machine_check_handle_early)
bl  machine_check_early
std r3,RESULT(r1)   /* Save result */
ld  r12,_MSR(r1)
-BEGIN_FTR_SECTION
-   b   4f
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 
-#ifdef CONFIG_PPC_P7_NAP
+#ifdef CONFIG_PPC_P7_NAP
/*
 * Check if thread was in power saving mode. We come here when any
 * of the following is true:
@@ -1141,7 +1135,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 BEGIN_FTR_SECTION
rlwinm. r11,r12,47-31,30,31
bne machine_check_idle_common
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206)
 #endif
 
/*
@@ -1150,12 +1144,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 */
rldicl. r11,r12,4,63/* See if MC hit while in HV mode. */
beq 5f
-4: andi.   r11,r12,MSR_PR  /* See if coming from user. */
+   andi.   r11,r12,MSR_PR  /* See if coming from user. */
bne 9f  /* continue in V mode if we are. */
 
 5:
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-BEGIN_FTR_SECTION
/*
 * We are coming from kernel context. Check if we are coming from
 * guest. if yes, then we can continue. We will fall through
@@ -1164,7 +1157,6 @@ BEGIN_FTR_SECTION
lbz r11,HSTATE_IN_GUEST(r13)
cmpwi   r11,0   /* Check if coming from guest */
bne 9f  /* continue if we are. */
-END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
 #endif
/*
 * At this point we are not sure about what context we come from.
@@ -1199,7 +1191,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE)
cmpdi   r3,0/* see if we handled MCE successfully */
 
beq 1b  /* if !handled then panic */
-BEGIN_FTR_SECTION
+
/*
 * Return from MC interrupt.
 * Queue up the MCE event so that we can log it later, while
@@ -1208,18 +1200,7 @@ BEGIN_FTR_SECTION
bl  machine_check_queue_event
MACHINE_CHECK_HANDLER_WINDUP
RFI_TO_USER_OR_KERNEL
-FTR_SECTION_ELSE
-   /*
-* pSeries: Return from MC interrupt. Before that stay on emergency
-* stack and call machine_check_exception to log the MCE event.
-*/
-   LOAD_HANDLER(r10,mce_return)
-   mtspr   SPRN_SRR0,r10
-   ld  r10,PACAKMSR(r13)
-   mtspr   SPRN_SRR1,r10
-   RFI_TO_KERNEL
-   b   .
-ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
+
 9:
/* Deliver the machine check to host kernel in V mode. */
MACHINE_CHECK_HANDLER_WINDUP
@@ -1238,13 +1219,6 @@ EXC_COMMON_BEGIN(unrecover_mce)
bl  unrecoverable_exception
b   1b
 
-EXC_COMMON_BEGIN(mce_return)
-   /* Invoke machine_check_exception to print MCE event and return. */
-   addir3,r1,STACK_FRAME_OVERHEAD
-   bl  machine_check_exception
-   MACHINE_CHECK_HANDLER_WINDUP
-   RFI_TO_KERNEL
-   b   .
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
EXCEPTION_PROLOG_0 PACA_EXGEN
-- 
2.20.1



[PATCH v2 42/52] powerpc/64s/exception: machine check fwnmi does not trigger when in HV mode

2019-06-19 Thread Nicholas Piggin
Remove dead code.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 286bd5670d60..b12755a4f884 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1040,9 +1040,6 @@ TRAMP_REAL_BEGIN(machine_check_pSeries)
.globl machine_check_fwnmi
 machine_check_fwnmi:
EXCEPTION_PROLOG_0 PACA_EXMC
-BEGIN_FTR_SECTION
-   b   machine_check_common_early
-END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 machine_check_pSeries_0:
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
/*
-- 
2.20.1



[PATCH v2 41/52] powerpc/tm: update comment about interrupt re-entrancy

2019-06-19 Thread Nicholas Piggin
Since the system reset interrupt began to use its own stack, and
machine check interrupts have done so for some time, r1 can be
changed without clearing MSR[RI], provided no other interrupts
(including SLB misses) are taken.

MSR[RI] does have to be cleared when using SCRATCH0, however.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/tm.S | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S
index 9fabdce255cd..6ba0fdd1e7f8 100644
--- a/arch/powerpc/kernel/tm.S
+++ b/arch/powerpc/kernel/tm.S
@@ -148,7 +148,7 @@ _GLOBAL(tm_reclaim)
/* Stash the stack pointer away for use after reclaim */
std r1, PACAR1(r13)
 
-   /* Clear MSR RI since we are about to change r1, EE is already off. */
+   /* Clear MSR RI since we are about to use SCRATCH0, EE is already off */
li  r5, 0
mtmsrd  r5, 1
 
@@ -474,7 +474,7 @@ restore_gprs:
 
REST_GPR(7, r7)
 
-   /* Clear MSR RI since we are about to change r1. EE is already off */
+   /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */
li  r5, 0
mtmsrd  r5, 1
 
-- 
2.20.1



[PATCH v2 40/52] powerpc/64s/exception: move SET_SCRATCH0 into EXCEPTION_PROLOG_0

2019-06-19 Thread Nicholas Piggin
No generated code change. File is change is in bug table line numbers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 25 +
 1 file changed, 1 insertion(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 99de397a1cd9..286bd5670d60 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -128,6 +128,7 @@ BEGIN_FTR_SECTION_NESTED(943)   
\
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 .macro EXCEPTION_PROLOG_0 area
+   SET_SCRATCH0(r13)   /* save r13 */
GET_PACA(r13)
std r9,\area\()+EX_R9(r13)  /* save r9 */
OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
@@ -540,7 +541,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define __EXC_REAL(name, start, size, area)\
EXC_REAL_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0 area ;   \
EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ;   \
EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
@@ -551,7 +551,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define __EXC_VIRT(name, start, size, realvec, area)   \
EXC_VIRT_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);/* save r13 */\
EXCEPTION_PROLOG_0 area ;   \
EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0;  \
EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\
@@ -562,7 +561,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define EXC_REAL_MASKABLE(name, start, size, bitmask)  \
EXC_REAL_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);/* save r13 */\
EXCEPTION_PROLOG_0 PACA_EXGEN ; \
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \
EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
@@ -570,7 +568,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \
EXC_VIRT_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);/* save r13 */\
EXCEPTION_PROLOG_0 PACA_EXGEN ; \
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\
@@ -578,7 +575,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define EXC_REAL_HV(name, start, size) \
EXC_REAL_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0 PACA_EXGEN;  \
EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ;  \
EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ;  \
@@ -586,7 +582,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define EXC_VIRT_HV(name, start, size, realvec)
\
EXC_VIRT_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0 PACA_EXGEN;  \
EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ;\
EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV ; \
@@ -594,7 +589,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define __EXC_REAL_OOL(name, start, size)  \
EXC_REAL_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);  \
EXCEPTION_PROLOG_0 PACA_EXGEN ; \
b   tramp_real_##name ; \
EXC_REAL_END(name, start, size)
@@ -622,7 +616,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler)   \
EXC_REAL_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);  \
EXCEPTION_PROLOG_0 PACA_EXGEN ; \
b   handler;\
EXC_REAL_END(name, start, size)
@@ -653,7 +646,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
 
 #define __EXC_VIRT_OOL(name, start, size)  \
EXC_VIRT_BEGIN(name, start, size);

[PATCH v2 39/52] powerpc/64s/exception: denorm handler use standard scratch save macro

2019-06-19 Thread Nicholas Piggin
Although the 0x1500 interrupt only applies to bare metal, it is better
to just use the standard macro for scratch save.

Runtime code path remains unchanged (due to instruction patching).

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 437f91179537..99de397a1cd9 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1920,7 +1920,7 @@ EXC_REAL_NONE(0x1400, 0x100)
 EXC_VIRT_NONE(0x5400, 0x100)
 
 EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100)
-   mtspr   SPRN_SPRG_HSCRATCH0,r13
+   SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0 PACA_EXGEN
EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 0, 0x1500, 0, 0, 0
 
-- 
2.20.1



[PATCH v2 38/52] powerpc/64s/exception: machine check use standard macros to save dar/dsisr

2019-06-19 Thread Nicholas Piggin
Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 16d5ea1c86bb..437f91179537 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1056,7 +1056,7 @@ BEGIN_FTR_SECTION
b   machine_check_common_early
 END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 machine_check_pSeries_0:
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 0, 0, 0
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0
/*
 * MSR_RI is not enabled, because PACA_EXMC is being used, so a
 * nested machine check corrupts it. machine_check_common enables
@@ -1071,10 +1071,6 @@ EXC_COMMON_BEGIN(machine_check_common)
 * Machine check is different because we use a different
 * save area: PACA_EXMC instead of PACA_EXGEN.
 */
-   mfspr   r10,SPRN_DAR
-   std r10,PACA_EXMC+EX_DAR(r13)
-   mfspr   r10,SPRN_DSISR
-   stw r10,PACA_EXMC+EX_DSISR(r13)
EXCEPTION_COMMON(PACA_EXMC, 0x200)
FINISH_NAP
RECONCILE_IRQ_STATE(r10, r11)
-- 
2.20.1



[PATCH v2 37/52] powerpc/64s/exception: add dar and dsisr options to exception macro

2019-06-19 Thread Nicholas Piggin
Some exception entry requires DAR and/or DSISR to be saved into the
paca exception save area. Add options to the standard exception
macros for these.

Generated code changes slightly due to code structure.

- 554:  a6 02 72 7d mfdsisr r11
- 558:  a8 00 4d f9 std r10,168(r13)
- 55c:  b0 00 6d 91 stw r11,176(r13)
+ 554:  a8 00 4d f9 std r10,168(r13)
+ 558:  a6 02 52 7d mfdsisr r10
+ 55c:  b0 00 4d 91 stw r10,176(r13)

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 103 ---
 1 file changed, 46 insertions(+), 57 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 2412b5269e25..16d5ea1c86bb 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -136,7 +136,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 .endm
 
-.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask
+.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, dar, dsisr, bitmask
OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
INTERRUPT_TO_KERNEL
@@ -172,8 +172,22 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
std r11,\area\()+EX_R11(r13)
std r12,\area\()+EX_R12(r13)
+
+   /*
+* DAR/DSISR, SCRATCH0 must be read before setting MSR[RI],
+* because a d-side MCE will clobber those registers so is
+* not recoverable if they are live.
+*/
GET_SCRATCH0(r10)
std r10,\area\()+EX_R13(r13)
+   .if \dar
+   mfspr   r10,SPRN_DAR
+   std r10,\area\()+EX_DAR(r13)
+   .endif
+   .if \dsisr
+   mfspr   r10,SPRN_DSISR
+   stw r10,\area\()+EX_DSISR(r13)
+   .endif
 .endm
 
 .macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri
@@ -528,7 +542,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
EXC_REAL_BEGIN(name, start, size);  \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0 area ;   \
-   EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0 ; \
+   EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ;   \
EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
EXC_REAL_END(name, start, size)
 
@@ -539,7 +553,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
EXC_VIRT_BEGIN(name, start, size);  \
SET_SCRATCH0(r13);/* save r13 */\
EXCEPTION_PROLOG_0 area ;   \
-   EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0;\
+   EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0;  \
EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\
EXC_VIRT_END(name, start, size)
 
@@ -550,7 +564,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
EXC_REAL_BEGIN(name, start, size);  \
SET_SCRATCH0(r13);/* save r13 */\
EXCEPTION_PROLOG_0 PACA_EXGEN ; \
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, bitmask ; \
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \
EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
EXC_REAL_END(name, start, size)
 
@@ -558,7 +572,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
EXC_VIRT_BEGIN(name, start, size);  \
SET_SCRATCH0(r13);/* save r13 */\
EXCEPTION_PROLOG_0 PACA_EXGEN ; \
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, bitmask ;   \
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \
EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\
EXC_VIRT_END(name, start, size)
 
@@ -566,7 +580,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
EXC_REAL_BEGIN(name, start, size);  \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0 PACA_EXGEN;  \
-   EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0 ;\
+   EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ;  \
EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ;  \
EXC_REAL_END(name, start, size)
 
@@ -574,7 +588,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP)
EXC_VIRT_BEGIN(name, start, size);  \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0 PACA_EXGEN;  \
-   EXCEPTION

[PATCH v2 36/52] powerpc/64s/exception: use common macro for windup

2019-06-19 Thread Nicholas Piggin
No generated code change. File is change is in bug table line numbers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 112 +--
 1 file changed, 36 insertions(+), 76 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index cce75adf2095..2412b5269e25 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -417,6 +417,38 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);
   \
EXCEPTION_PROLOG_COMMON_2(area);\
EXCEPTION_PROLOG_COMMON_3(trap)
 
+/*
+ * Restore all registers including H/SRR0/1 saved in a stack frame of a
+ * standard exception.
+ */
+.macro EXCEPTION_RESTORE_REGS hsrr
+   /* Move original SRR0 and SRR1 into the respective regs */
+   ld  r9,_MSR(r1)
+   .if \hsrr
+   mtspr   SPRN_HSRR1,r9
+   .else
+   mtspr   SPRN_SRR1,r9
+   .endif
+   ld  r9,_NIP(r1)
+   .if \hsrr
+   mtspr   SPRN_HSRR0,r9
+   .else
+   mtspr   SPRN_SRR0,r9
+   .endif
+   ld  r9,_CTR(r1)
+   mtctr   r9
+   ld  r9,_XER(r1)
+   mtxer   r9
+   ld  r9,_LINK(r1)
+   mtlrr9
+   ld  r9,_CCR(r1)
+   mtcrr9
+   REST_8GPRS(2, r1)
+   REST_4GPRS(10, r1)
+   REST_GPR(0, r1)
+   /* restore original r1. */
+   ld  r1,GPR1(r1)
+.endm
 
 #define RUNLATCH_ON\
 BEGIN_FTR_SECTION  \
@@ -906,29 +938,7 @@ EXC_COMMON_BEGIN(system_reset_common)
ld  r10,SOFTE(r1)
stb r10,PACAIRQSOFTMASK(r13)
 
-   /*
-* Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP.
-* Should share common bits...
-*/
-
-   /* Move original SRR0 and SRR1 into the respective regs */
-   ld  r9,_MSR(r1)
-   mtspr   SPRN_SRR1,r9
-   ld  r9,_NIP(r1)
-   mtspr   SPRN_SRR0,r9
-   ld  r9,_CTR(r1)
-   mtctr   r9
-   ld  r9,_XER(r1)
-   mtxer   r9
-   ld  r9,_LINK(r1)
-   mtlrr9
-   ld  r9,_CCR(r1)
-   mtcrr9
-   REST_8GPRS(2, r1)
-   REST_4GPRS(10, r1)
-   REST_GPR(0, r1)
-   /* restore original r1. */
-   ld  r1,GPR1(r1)
+   EXCEPTION_RESTORE_REGS EXC_STD
RFI_TO_USER_OR_KERNEL
 
 
@@ -1074,24 +1084,7 @@ EXC_COMMON_BEGIN(machine_check_common)
lhz r12,PACA_IN_MCE(r13);   \
subir12,r12,1;  \
sth r12,PACA_IN_MCE(r13);   \
-   /* Move original SRR0 and SRR1 into the respective regs */  \
-   ld  r9,_MSR(r1);\
-   mtspr   SPRN_SRR1,r9;   \
-   ld  r9,_NIP(r1);\
-   mtspr   SPRN_SRR0,r9;   \
-   ld  r9,_CTR(r1);\
-   mtctr   r9; \
-   ld  r9,_XER(r1);\
-   mtxer   r9; \
-   ld  r9,_LINK(r1);   \
-   mtlrr9; \
-   ld  r9,_CCR(r1);\
-   mtcrr9; \
-   REST_8GPRS(2, r1);  \
-   REST_4GPRS(10, r1); \
-   REST_GPR(0, r1);\
-   /* restore original r1. */  \
-   ld  r1,GPR1(r1)
+   EXCEPTION_RESTORE_REGS EXC_STD
 
 #ifdef CONFIG_PPC_P7_NAP
 /*
@@ -1774,48 +1767,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
cmpdi   cr0,r3,0
bne 1f
 
-   /* Windup the stack. */
-   /* Move original HSRR0 and HSRR1 into the respective regs */
-   ld  r9,_MSR(r1)
-   mtspr   SPRN_HSRR1,r9
-   ld  r9,_NIP(r1)
-   mtspr   SPRN_HSRR0,r9
-   ld  r9,_CTR(r1)
-   mtctr   r9
-   ld  r9,_XER(r1)
-   mtxer   r9
-   ld  r9,_LINK(r1)
-   mtlrr9
-   ld  r9,_CCR(r1)
-   mtcrr9
-   REST_8GPRS(2, r1)
-   REST_4GPRS(10, r1)
-   REST_GPR(0, r1)
-   ld  r1,GPR1(r1)
+   EXCEPTION_RESTORE_REGS EXC_HV
HRFI_TO_USER_OR_KERNEL
 
 1:
-   ld  r9,_MSR(r1)
-   mtspr   SPRN_HSRR1,r9
-   ld  r9,_NIP(r1)
-   mtspr   SPRN_HSRR0,r9
-   ld  r9,_CTR(r1)
-   mtctr   r9
-   ld  r9,_XER(r1)
-   mtxer   r9
-   ld  r9,_LINK(r1)
-   mtlrr9
-   ld  r9,_CCR(r1)
-   mtcrr9
-   REST_8GPRS(2, r1)
-   REST_4GPRS(10, r1)
-   REST_GPR(0, r1)
-   ld  r1,GPR1(r1)
-
/*
 * Go to virtual mode and pull the HMI event information from
 * firmware.
 */
+   EXCEPTION_R

[PATCH v2 35/52] powerpc/64s/exception: shuffle windup code around

2019-06-19 Thread Nicholas Piggin
Restore all SPRs and CR up-front, these are longer latency
instructions. Move register restore around to maximise pairs of
adjacent loads (e.g., restore r0 next to r1).

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 40 +++-
 1 file changed, 16 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 3476cffa21b8..cce75adf2095 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -922,13 +922,11 @@ EXC_COMMON_BEGIN(system_reset_common)
mtxer   r9
ld  r9,_LINK(r1)
mtlrr9
-   REST_GPR(0, r1)
+   ld  r9,_CCR(r1)
+   mtcrr9
REST_8GPRS(2, r1)
-   REST_GPR(10, r1)
-   ld  r11,_CCR(r1)
-   mtcrr11
-   REST_GPR(11, r1)
-   REST_2GPRS(12, r1)
+   REST_4GPRS(10, r1)
+   REST_GPR(0, r1)
/* restore original r1. */
ld  r1,GPR1(r1)
RFI_TO_USER_OR_KERNEL
@@ -1087,13 +1085,11 @@ EXC_COMMON_BEGIN(machine_check_common)
mtxer   r9; \
ld  r9,_LINK(r1);   \
mtlrr9; \
-   REST_GPR(0, r1);\
+   ld  r9,_CCR(r1);\
+   mtcrr9; \
REST_8GPRS(2, r1);  \
-   REST_GPR(10, r1);   \
-   ld  r11,_CCR(r1);   \
-   mtcrr11;\
-   REST_GPR(11, r1);   \
-   REST_2GPRS(12, r1); \
+   REST_4GPRS(10, r1); \
+   REST_GPR(0, r1);\
/* restore original r1. */  \
ld  r1,GPR1(r1)
 
@@ -1790,13 +1786,11 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
mtxer   r9
ld  r9,_LINK(r1)
mtlrr9
-   REST_GPR(0, r1)
+   ld  r9,_CCR(r1)
+   mtcrr9
REST_8GPRS(2, r1)
-   REST_GPR(10, r1)
-   ld  r11,_CCR(r1)
-   REST_2GPRS(12, r1)
-   mtcrr11
-   REST_GPR(11, r1)
+   REST_4GPRS(10, r1)
+   REST_GPR(0, r1)
ld  r1,GPR1(r1)
HRFI_TO_USER_OR_KERNEL
 
@@ -1811,13 +1805,11 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
mtxer   r9
ld  r9,_LINK(r1)
mtlrr9
-   REST_GPR(0, r1)
+   ld  r9,_CCR(r1)
+   mtcrr9
REST_8GPRS(2, r1)
-   REST_GPR(10, r1)
-   ld  r11,_CCR(r1)
-   REST_2GPRS(12, r1)
-   mtcrr11
-   REST_GPR(11, r1)
+   REST_4GPRS(10, r1)
+   REST_GPR(0, r1)
ld  r1,GPR1(r1)
 
/*
-- 
2.20.1



[PATCH v2 34/52] powerpc/64s/exception: simplify hmi windup code

2019-06-19 Thread Nicholas Piggin
Duplicate the hmi windup code for both cases, rather than to put a
special case branch in the middle of it. Remove unused label. This
helps with later code consolidation.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 22 ++
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index cf89d728720a..3476cffa21b8 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1776,6 +1776,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
addir3,r1,STACK_FRAME_OVERHEAD
BRANCH_LINK_TO_FAR(DOTSYM(hmi_exception_realmode)) /* Function call ABI 
*/
cmpdi   cr0,r3,0
+   bne 1f
 
/* Windup the stack. */
/* Move original HSRR0 and HSRR1 into the respective regs */
@@ -1794,13 +1795,28 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
REST_GPR(10, r1)
ld  r11,_CCR(r1)
REST_2GPRS(12, r1)
-   bne 1f
mtcrr11
REST_GPR(11, r1)
ld  r1,GPR1(r1)
HRFI_TO_USER_OR_KERNEL
 
-1: mtcrr11
+1:
+   ld  r9,_MSR(r1)
+   mtspr   SPRN_HSRR1,r9
+   ld  r9,_NIP(r1)
+   mtspr   SPRN_HSRR0,r9
+   ld  r9,_CTR(r1)
+   mtctr   r9
+   ld  r9,_XER(r1)
+   mtxer   r9
+   ld  r9,_LINK(r1)
+   mtlrr9
+   REST_GPR(0, r1)
+   REST_8GPRS(2, r1)
+   REST_GPR(10, r1)
+   ld  r11,_CCR(r1)
+   REST_2GPRS(12, r1)
+   mtcrr11
REST_GPR(11, r1)
ld  r1,GPR1(r1)
 
@@ -1808,8 +1824,6 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
 * Go to virtual mode and pull the HMI event information from
 * firmware.
 */
-   .globl hmi_exception_after_realmode
-hmi_exception_after_realmode:
SET_SCRATCH0(r13)
EXCEPTION_PROLOG_0 PACA_EXGEN
b   tramp_real_hmi_exception
-- 
2.20.1



[PATCH v2 33/52] powerpc/64s/exception: move machine check windup in_mce handling

2019-06-19 Thread Nicholas Piggin
Move in_mce decrement earlier before registers are restored (but
still after RI=0). This helps with later consolidation.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 804438669454..cf89d728720a 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1072,6 +1072,10 @@ EXC_COMMON_BEGIN(machine_check_common)
/* Clear MSR_RI before setting SRR0 and SRR1. */\
li  r9,0;   \
mtmsrd  r9,1;   /* Clear MSR_RI */  \
+   /* Decrement paca->in_mce now RI is clear. */   \
+   lhz r12,PACA_IN_MCE(r13);   \
+   subir12,r12,1;  \
+   sth r12,PACA_IN_MCE(r13);   \
/* Move original SRR0 and SRR1 into the respective regs */  \
ld  r9,_MSR(r1);\
mtspr   SPRN_SRR1,r9;   \
@@ -1088,10 +1092,6 @@ EXC_COMMON_BEGIN(machine_check_common)
REST_GPR(10, r1);   \
ld  r11,_CCR(r1);   \
mtcrr11;\
-   /* Decrement paca->in_mce. */   \
-   lhz r12,PACA_IN_MCE(r13);   \
-   subir12,r12,1;  \
-   sth r12,PACA_IN_MCE(r13);   \
REST_GPR(11, r1);   \
REST_2GPRS(12, r1); \
/* restore original r1. */  \
-- 
2.20.1



[PATCH v2 32/52] powerpc/64s/exception: windup use r9 consistently to restore SPRs

2019-06-19 Thread Nicholas Piggin
Trivial code change, r3->r9.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 539bb1b83d90..804438669454 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -914,8 +914,8 @@ EXC_COMMON_BEGIN(system_reset_common)
/* Move original SRR0 and SRR1 into the respective regs */
ld  r9,_MSR(r1)
mtspr   SPRN_SRR1,r9
-   ld  r3,_NIP(r1)
-   mtspr   SPRN_SRR0,r3
+   ld  r9,_NIP(r1)
+   mtspr   SPRN_SRR0,r9
ld  r9,_CTR(r1)
mtctr   r9
ld  r9,_XER(r1)
@@ -1075,8 +1075,8 @@ EXC_COMMON_BEGIN(machine_check_common)
/* Move original SRR0 and SRR1 into the respective regs */  \
ld  r9,_MSR(r1);\
mtspr   SPRN_SRR1,r9;   \
-   ld  r3,_NIP(r1);\
-   mtspr   SPRN_SRR0,r3;   \
+   ld  r9,_NIP(r1);\
+   mtspr   SPRN_SRR0,r9;   \
ld  r9,_CTR(r1);\
mtctr   r9; \
ld  r9,_XER(r1);\
@@ -1781,8 +1781,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early)
/* Move original HSRR0 and HSRR1 into the respective regs */
ld  r9,_MSR(r1)
mtspr   SPRN_HSRR1,r9
-   ld  r3,_NIP(r1)
-   mtspr   SPRN_HSRR0,r3
+   ld  r9,_NIP(r1)
+   mtspr   SPRN_HSRR0,r9
ld  r9,_CTR(r1)
mtctr   r9
ld  r9,_XER(r1)
-- 
2.20.1



[PATCH v2 31/52] powerpc/64s/exception: mtmsrd L=1 cleanup

2019-06-19 Thread Nicholas Piggin
All supported 64s CPUs support mtmsrd L=1 instruction, so a cleanup
can be made in sreset and mce handlers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 9 ++---
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f582ae30f3f7..539bb1b83d90 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -887,11 +887,8 @@ EXC_COMMON_BEGIN(system_reset_common)
addir3,r1,STACK_FRAME_OVERHEAD
bl  system_reset_exception
 
-   /* This (and MCE) can be simplified with mtmsrd L=1 */
/* Clear MSR_RI before setting SRR0 and SRR1. */
-   li  r0,MSR_RI
-   mfmsr   r9
-   andcr9,r9,r0
+   li  r9,0
mtmsrd  r9,1
 
/*
@@ -1073,9 +1070,7 @@ EXC_COMMON_BEGIN(machine_check_common)
 
 #define MACHINE_CHECK_HANDLER_WINDUP   \
/* Clear MSR_RI before setting SRR0 and SRR1. */\
-   li  r0,MSR_RI;  \
-   mfmsr   r9; /* get MSR value */ \
-   andcr9,r9,r0;   \
+   li  r9,0;   \
mtmsrd  r9,1;   /* Clear MSR_RI */  \
/* Move original SRR0 and SRR1 into the respective regs */  \
ld  r9,_MSR(r1);\
-- 
2.20.1



[PATCH v2 30/52] powerpc/64s/exception: optimise system_reset for idle, clean up non-idle case

2019-06-19 Thread Nicholas Piggin
The idle wake up code in the system reset interrupt is not very
optimal. There are two requirements: perform idle wake up quickly;
and save everything including CFAR for non-idle interrupts, with
no performance requirement.

The problem with placing the idle test in the middle of the handler
and using the normal handler code to save CFAR, is that it's quite
costly (e.g., mfcfar is serialising, speculative workarounds get
applied, SRR1 has to be reloaded, etc). It also prevents the standard
interrupt handler boilerplate being used.

This pain can be avoided by using a dedicated idle interrupt handler
at the start of the interrupt handler, which restores all registers
back to the way they were in case it was not an idle wake up. CFAR
is preserved without saving it before the non-idle case by making that
the fall-through, and idle is a taken branch.

Performance seems to be in the noise, but possibly around 0.5% faster,
the executed instructions certainly look better. The bigger benefit is
being able to drop in standard interrupt handlers after the idle code,
which helps with subsequent cleanup and consolidation.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 89 ++--
 1 file changed, 44 insertions(+), 45 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index e0492912ea79..f582ae30f3f7 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -241,7 +241,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
  * load KBASE for a slight optimisation.
  */
 #define BRANCH_TO_C000(reg, label) \
-   __LOAD_HANDLER(reg, label); \
+   __LOAD_FAR_HANDLER(reg, label); \
mtctr   reg;\
bctr
 
@@ -784,16 +784,6 @@ EXC_VIRT_NONE(0x4000, 0x100)
 
 
 EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
-   SET_SCRATCH0(r13)
-   EXCEPTION_PROLOG_0 PACA_EXNMI
-
-   /* This is EXCEPTION_PROLOG_1 with the idle feature section added */
-   OPT_SAVE_REG_TO_PACA(PACA_EXNMI+EX_PPR, r9, CPU_FTR_HAS_PPR)
-   OPT_SAVE_REG_TO_PACA(PACA_EXNMI+EX_CFAR, r10, CPU_FTR_CFAR)
-   INTERRUPT_TO_KERNEL
-   SAVE_CTR(r10, PACA_EXNMI)
-   mfcrr9
-
 #ifdef CONFIG_PPC_P7_NAP
/*
 * If running native on arch 2.06 or later, check if we are waking up
@@ -801,45 +791,67 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
 * bits 46:47. A non-0 value indicates that we are coming from a power
 * saving state. The idle wakeup handler initially runs in real mode,
 * but we branch to the 0xc000... address so we can turn on relocation
-* with mtmsr.
+* with mtmsrd later, after SPRs are restored.
+*
+* Careful to minimise cost for the fast path (idle wakeup) while
+* also avoiding clobbering CFAR for the non-idle case. Once we know
+* it is an idle wake, volatiles don't matter, which is why we use
+* those here, and then re-do the entry in case of non-idle (without
+* branching for the non-idle case, to keep CFAR).
 */
 BEGIN_FTR_SECTION
-   mfspr   r10,SPRN_SRR1
-   rlwinm. r10,r10,47-31,30,31
-   beq-1f
-   cmpwi   cr1,r10,2
+   SET_SCRATCH0(r13)
+   GET_PACA(r13)
+   std r3,PACA_EXNMI+0*8(r13)
+   std r4,PACA_EXNMI+1*8(r13)
+   std r5,PACA_EXNMI+2*8(r13)
mfspr   r3,SPRN_SRR1
-   bltlr   cr1 /* no state loss, return to idle caller */
-   BRANCH_TO_C000(r10, system_reset_idle_common)
-1:
+   mfocrf  r4,0x80
+   rlwinm. r5,r3,47-31,30,31
+   bne+system_reset_idle_wake
+   /* Not powersave wakeup. Restore regs for regular interrupt handler. */
+   mtocrf  0x80,r4
+   ld  r12,PACA_EXNMI+0*8(r13)
+   ld  r4,PACA_EXNMI+1*8(r13)
+   ld  r5,PACA_EXNMI+2*8(r13)
+   GET_SCRATCH0(r13)
 END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
 
-   KVMTEST EXC_STD 0x100
-   std r11,PACA_EXNMI+EX_R11(r13)
-   std r12,PACA_EXNMI+EX_R12(r13)
-   GET_SCRATCH0(r10)
-   std r10,PACA_EXNMI+EX_R13(r13)
-
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0 PACA_EXNMI
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 1, 0x100, 0
EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0
/*
 * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is
 * being used, so a nested NMI exception would corrupt it.
 */
-
 EXC_REAL_END(system_reset, 0x100, 0x100)
+
 EXC_VIRT_NONE(0x4100, 0x100)
 TRAMP_KVM(PACA_EXNMI, 0x100)
 
 #ifdef CONFIG_PPC_P7_NAP
-EXC_COMMON_BEGIN(system_reset_idle_common)
-   /*
-* This must be a direct branch (without linker branch stub) because
-* we can not use TOC at this point as r

[PATCH v2 29/52] powerpc/64s/exception: avoid SPR RAW scoreboard stall in real mode entry

2019-06-19 Thread Nicholas Piggin
Move SPR reads ahead of writes. Real mode entry that is not a KVM
guest is rare these days, but bad practice propagates.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 14 +++---
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 62f7e9ad23c6..e0492912ea79 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -183,19 +183,19 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
.endif
.if \hsrr
mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
+   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
+   mtspr   SPRN_HSRR1,r10
.else
mfspr   r11,SPRN_SRR0   /* save SRR0 */
+   mfspr   r12,SPRN_SRR1   /* and SRR1 */
+   mtspr   SPRN_SRR1,r10
.endif
-   LOAD_HANDLER(r12, \label\())
+   LOAD_HANDLER(r10, \label\())
.if \hsrr
-   mtspr   SPRN_HSRR0,r12
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   mtspr   SPRN_HSRR1,r10
+   mtspr   SPRN_HSRR0,r10
HRFI_TO_KERNEL
.else
-   mtspr   SPRN_SRR0,r12
-   mfspr   r12,SPRN_SRR1   /* and SRR1 */
-   mtspr   SPRN_SRR1,r10
+   mtspr   SPRN_SRR0,r10
RFI_TO_KERNEL
.endif
b   .   /* prevent speculative execution */
-- 
2.20.1



[PATCH v2 28/52] powerpc/64s/exception: clean up system call entry

2019-06-19 Thread Nicholas Piggin
syscall / hcall entry unnecessarily differs between KVM and non-KVM
builds. Move the SMT priority instruction to the same location
(after INTERRUPT_TO_KERNEL).

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 25 +++--
 1 file changed, 7 insertions(+), 18 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index d6de0ce1f0f2..62f7e9ad23c6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1635,10 +1635,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
std r10,PACA_EXGEN+EX_R10(r13)
INTERRUPT_TO_KERNEL
KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */
-   HMT_MEDIUM
mfctr   r9
 #else
-   HMT_MEDIUM
mr  r9,r13
GET_PACA(r13)
INTERRUPT_TO_KERNEL
@@ -1650,11 +1648,13 @@ BEGIN_FTR_SECTION
beq-1f
 END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
 #endif
-   /* We reach here with PACA in r13, r13 in r9, and HMT_MEDIUM. */
-
-   .if \real
+   /* We reach here with PACA in r13, r13 in r9. */
mfspr   r11,SPRN_SRR0
mfspr   r12,SPRN_SRR1
+
+   HMT_MEDIUM
+
+   .if \real
__LOAD_HANDLER(r10, system_call_common)
mtspr   SPRN_SRR0,r10
ld  r10,PACAKMSR(r13)
@@ -1662,24 +1662,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
RFI_TO_KERNEL
b   .   /* prevent speculative execution */
.else
+   li  r10,MSR_RI
+   mtmsrd  r10,1   /* Set RI (EE=0) */
 #ifdef CONFIG_RELOCATABLE
-   /*
-* We can't branch directly so we do it via the CTR which
-* is volatile across system calls.
-*/
__LOAD_HANDLER(r10, system_call_common)
mtctr   r10
-   mfspr   r11,SPRN_SRR0
-   mfspr   r12,SPRN_SRR1
-   li  r10,MSR_RI
-   mtmsrd  r10,1
bctr
 #else
-   /* We can branch directly */
-   mfspr   r11,SPRN_SRR0
-   mfspr   r12,SPRN_SRR1
-   li  r10,MSR_RI
-   mtmsrd  r10,1   /* Set RI (EE=0) */
b   system_call_common
 #endif
.endif
-- 
2.20.1



[PATCH v2 27/52] powerpc/64s/exception: move paca save area offsets into exception-64s.S

2019-06-19 Thread Nicholas Piggin
No generated code change. File is change is in bug table line numbers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 17 +++--
 arch/powerpc/kernel/exceptions-64s.S | 22 ++
 2 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 79e5ac87c029..33f4f72eb035 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -32,22 +32,11 @@
  */
 #include 
 
-/* PACA save area offsets (exgen, exmc, etc) */
-#define EX_R9  0
-#define EX_R10 8
-#define EX_R11 16
-#define EX_R12 24
-#define EX_R13 32
-#define EX_DAR 40
-#define EX_DSISR   48
-#define EX_CCR 52
-#define EX_CFAR56
-#define EX_PPR 64
+/* PACA save area size in u64 units (exgen, exmc, etc) */
 #if defined(CONFIG_RELOCATABLE)
-#define EX_CTR 72
-#define EX_SIZE10  /* size in u64 units */
+#define EX_SIZE10
 #else
-#define EX_SIZE9   /* size in u64 units */
+#define EX_SIZE9
 #endif
 
 /*
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 4b4bb8f43f55..d6de0ce1f0f2 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -21,6 +21,28 @@
 #include 
 #include 
 
+/* PACA save area offsets (exgen, exmc, etc) */
+#define EX_R9  0
+#define EX_R10 8
+#define EX_R11 16
+#define EX_R12 24
+#define EX_R13 32
+#define EX_DAR 40
+#define EX_DSISR   48
+#define EX_CCR 52
+#define EX_CFAR56
+#define EX_PPR 64
+#if defined(CONFIG_RELOCATABLE)
+#define EX_CTR 72
+.if EX_SIZE != 10
+   .error "EX_SIZE is wrong"
+.endif
+#else
+.if EX_SIZE != 9
+   .error "EX_SIZE is wrong"
+.endif
+#endif
+
 /*
  * We're short on space and time in the exception prolog, so we can't
  * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
-- 
2.20.1



[PATCH v2 26/52] powerpc/64s/exception: remove pointless EXCEPTION_PROLOG macro indirection

2019-06-19 Thread Nicholas Piggin
No generated code change. File is change is in bug table line numbers.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 97 +---
 1 file changed, 45 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 6c0321e128da..4b4bb8f43f55 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -326,34 +326,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
std r0,GPR0(r1);/* save r0 in stackframe*/ \
std r10,GPR1(r1);   /* save r1 in stackframe*/ \
 
-
-/*
- * The common exception prolog is used for all except a few exceptions
- * such as a segment miss on a kernel address.  We have to be prepared
- * to take another exception from the point where we first touch the
- * kernel stack onwards.
- *
- * On entry r13 points to the paca, r9-r13 are saved in the paca,
- * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
- * SRR1, and relocation is on.
- */
-#define EXCEPTION_PROLOG_COMMON(n, area)  \
-   andi.   r10,r12,MSR_PR; /* See if coming from user  */ \
-   mr  r10,r1; /* Save r1  */ \
-   subir1,r1,INT_FRAME_SIZE;   /* alloc frame on kernel stack  */ \
-   beq-1f;\
-   ld  r1,PACAKSAVE(r13);  /* kernel stack to use  */ \
-1: tdgei   r1,-INT_FRAME_SIZE; /* trap if r1 is in userspace   */ \
-   EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; \
-3: EXCEPTION_PROLOG_COMMON_1();   \
-   kuap_save_amr_and_lock r9, r10, cr1, cr0;  \
-   beq 4f; /* if from kernel mode  */ \
-   ACCOUNT_CPU_USER_ENTRY(r13, r9, r10);  \
-   SAVE_PPR(area, r9);\
-4: EXCEPTION_PROLOG_COMMON_2(area)\
-   EXCEPTION_PROLOG_COMMON_3(n)   \
-   ACCOUNT_STOLEN_TIME
-
 /* Save original regs values from save area to stack frame. */
 #define EXCEPTION_PROLOG_COMMON_2(area)
   \
ld  r9,area+EX_R9(r13); /* move r9, r10 to stackframe   */ \
@@ -373,7 +345,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); 
   \
GET_CTR(r10, area);\
std r10,_CTR(r1);
 
-#define EXCEPTION_PROLOG_COMMON_3(n)  \
+#define EXCEPTION_PROLOG_COMMON_3(trap)
   \
std r2,GPR2(r1);/* save r2 in stackframe*/ \
SAVE_4GPRS(3, r1);  /* save r3 - r6 in stackframe   */ \
SAVE_2GPRS(7, r1);  /* save r7, r8 in stackframe*/ \
@@ -384,26 +356,38 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);   
   \
mfspr   r11,SPRN_XER;   /* save XER in stackframe   */ \
std r10,SOFTE(r1); \
std r11,_XER(r1);  \
-   li  r9,(n)+1;  \
+   li  r9,(trap)+1;   \
std r9,_TRAP(r1);   /* set trap number  */ \
li  r10,0; \
ld  r11,exception_marker@toc(r2);  \
std r10,RESULT(r1); /* clear regs->result   */ \
std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame  */
 
-#define RUNLATCH_ON\
-BEGIN_FTR_SECTION  \
-   ld  r3, PACA_THREAD_INFO(r13);  \
-   ld  r4,TI_LOCAL_FLAGS(r3);  \
-   andi.   r0,r4,_TLF_RUNLATCH;\
-   beqlppc64_runlatch_on_trampoline;   \
-END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
-
-#define EXCEPTION_COMMON(area, trap)   \
-   EXCEPTION_PROLOG_COMMON(trap, area);\
+/*
+ * On entry r13 points to the paca, r9-r13 are saved in the paca,
+ * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and
+ * SRR1, and relocation is on.
+ */
+#define EXCEPTION_COMMON(area, trap)  \
+   andi.   r10,r12,MSR_PR; /* See if coming from user  */ \
+   mr  r10,r1; /* Save r1  */ \
+   subir1,r1,INT_FRAME_SIZE;   /* alloc frame on kernel stack  */ \
+   beq-1f;   

[PATCH v2 25/52] powerpc/64s/exception: remove bad stack branch

2019-06-19 Thread Nicholas Piggin
The bad stack test in interrupt handlers has a few problems. For
performance it is taken in the common case, which is a fetch bubble
and a waste of i-cache.

For code development and maintainence, it requires yet another stack
frame setup routine, and that constrains all exception handlers to
follow the same register save pattern which inhibits future
optimisation.

Remove the test/branch and replace it with a trap. Teach the program
check handler to use the emergency stack for this case.

This does not result in quite so nice a message, however the SRR0 and
SRR1 of the crashed interrupt can be seen in r11 and r12, as is the
original r1 (adjusted by INT_FRAME_SIZE). These are the most important
parts to debugging the issue.

The original r9-12 and cr0 is lost, which is the main downside.

  kernel BUG at linux/arch/powerpc/kernel/exceptions-64s.S:847!
  Oops: Exception in kernel mode, sig: 5 [#1]
  BE SMP NR_CPUS=2048 NUMA PowerNV
  Modules linked in:
  CPU: 0 PID: 1 Comm: swapper/0 Not tainted
  NIP:  c0009108 LR: c0cadbcc CTR: c00090f0
  REGS: c000fffcbd70 TRAP: 0700   Not tainted
  MSR:  90021032   CR: 28222448  XER: 2004
  CFAR: c0009100 IRQMASK: 0
  GPR00: 003d fd00 c18cfb00 c000f02b3166
  GPR04: fffd 0007 fffb 0030
  GPR08: 0037 28222448  c0ca8de0
  GPR12: 92009032 c1ae c0010a00 
  GPR16:    
  GPR20: c000f00322c0 c0f85200 0004 
  GPR24: fffe   000a
  GPR28:   c000f02b391c c000f02b3167
  NIP [c0009108] decrementer_common+0x18/0x160
  LR [c0cadbcc] .vsnprintf+0x3ec/0x4f0
  Call Trace:
  Instruction dump:
  996d098a 994d098b 38610070 480246ed 48005518 6000 3820 718a4000
  7c2a0b78 3821fd00 41c20008 e82d0970 <0981fd00> f92101a0 f9610170 f9810178

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h |  7 --
 arch/powerpc/include/asm/paca.h  |  2 +
 arch/powerpc/kernel/asm-offsets.c|  2 +
 arch/powerpc/kernel/exceptions-64s.S | 95 
 arch/powerpc/xmon/xmon.c |  2 +
 5 files changed, 22 insertions(+), 86 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index dc6a5ccac965..79e5ac87c029 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -55,13 +55,6 @@
  */
 #define MAX_MCE_DEPTH  4
 
-/*
- * EX_R3 is only used by the bad_stack handler. bad_stack reloads and
- * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap
- * with EX_DAR.
- */
-#define EX_R3  EX_DAR
-
 #ifdef __ASSEMBLY__
 
 #define STF_ENTRY_BARRIER_SLOT \
diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h
index 9bd2326bef6f..e3cc9eb9204d 100644
--- a/arch/powerpc/include/asm/paca.h
+++ b/arch/powerpc/include/asm/paca.h
@@ -166,7 +166,9 @@ struct paca_struct {
u64 kstack; /* Saved Kernel stack addr */
u64 saved_r1;   /* r1 save for RTAS calls or PM or EE=0 
*/
u64 saved_msr;  /* MSR saved here by enter_rtas */
+#ifdef CONFIG_PPC_BOOK3E
u16 trap_save;  /* Used when bad stack is encountered */
+#endif
u8 irq_soft_mask;   /* mask for irq soft masking */
u8 irq_happened;/* irq happened while soft-disabled */
u8 irq_work_pending;/* IRQ_WORK interrupt while 
soft-disable */
diff --git a/arch/powerpc/kernel/asm-offsets.c 
b/arch/powerpc/kernel/asm-offsets.c
index 31dc7e64cbfc..4ccb6b3a7fbd 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -266,7 +266,9 @@ int main(void)
OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user);
OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime);
OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime);
+#ifdef CONFIG_PPC_BOOK3E
OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save);
+#endif
OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso);
 #else /* CONFIG_PPC64 */
 #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 0a2b4e8b02b0..6c0321e128da 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -343,14 +343,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
subir1,r1,INT_FRAME_SIZE;   /* alloc frame on kernel stack  */ \
beq-1f;\

[PATCH v2 24/52] powerpc/64s/exception: generate regs clear instructions using .rept

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 29 +++-
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 02b4722b7c64..0a2b4e8b02b0 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -2010,12 +2010,11 @@ BEGIN_FTR_SECTION
mtmsrd  r10
sync
 
-#define FMR2(n)  fmr (n), (n) ; fmr n+1, n+1
-#define FMR4(n)  FMR2(n) ; FMR2(n+2)
-#define FMR8(n)  FMR4(n) ; FMR4(n+4)
-#define FMR16(n) FMR8(n) ; FMR8(n+8)
-#define FMR32(n) FMR16(n) ; FMR16(n+16)
-   FMR32(0)
+   .Lreg=0
+   .rept 32
+   fmr .Lreg,.Lreg
+   .Lreg=.Lreg+1
+   .endr
 
 FTR_SECTION_ELSE
 /*
@@ -2027,12 +2026,11 @@ FTR_SECTION_ELSE
mtmsrd  r10
sync
 
-#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1)
-#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2)
-#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4)
-#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8)
-#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16)
-   XVCPSGNDP32(0)
+   .Lreg=0
+   .rept 32
+   XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+   .Lreg=.Lreg+1
+   .endr
 
 ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206)
 
@@ -2043,7 +2041,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S)
  * To denormalise we need to move a copy of the register to itself.
  * For POWER8 we need to do that for all 64 VSX registers
  */
-   XVCPSGNDP32(32)
+   .Lreg=32
+   .rept 32
+   XVCPSGNDP(.Lreg,.Lreg,.Lreg)
+   .Lreg=.Lreg+1
+   .endr
+
 denorm_done:
mfspr   r11,SPRN_HSRR0
subir11,r11,4
-- 
2.20.1



[PATCH v2 23/52] powerpc/64s/exception: fix indenting irregularities

2019-06-19 Thread Nicholas Piggin
Generally, macros that result in instructions being expanded are
indented by a tab, and those that don't have no indent. Fix the
obvious cases that go contrary to style.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 92 ++--
 1 file changed, 46 insertions(+), 46 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f7b6634bcc75..02b4722b7c64 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -261,16 +261,16 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
cmpwi   r10,KVM_GUEST_MODE_SKIP
beq 89f
.else
-   BEGIN_FTR_SECTION_NESTED(947)
+BEGIN_FTR_SECTION_NESTED(947)
ld  r10,\area+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
-   END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
.endif
 
-   BEGIN_FTR_SECTION_NESTED(948)
+BEGIN_FTR_SECTION_NESTED(948)
ld  r10,\area+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
-   END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
+END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
ld  r10,\area+EX_R10(r13)
std r12,HSTATE_SCRATCH0(r13)
sldir12,r9,32
@@ -372,10 +372,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r9,GPR11(r1);  \
std r10,GPR12(r1); \
std r11,GPR13(r1); \
-   BEGIN_FTR_SECTION_NESTED(66);  \
+BEGIN_FTR_SECTION_NESTED(66); \
ld  r10,area+EX_CFAR(r13); \
std r10,ORIG_GPR3(r1); \
-   END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);\
+END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);
   \
GET_CTR(r10, area);\
std r10,_CTR(r1);
 
@@ -794,7 +794,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
 * but we branch to the 0xc000... address so we can turn on relocation
 * with mtmsr.
 */
-   BEGIN_FTR_SECTION
+BEGIN_FTR_SECTION
mfspr   r10,SPRN_SRR1
rlwinm. r10,r10,47-31,30,31
beq-1f
@@ -803,7 +803,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
bltlr   cr1 /* no state loss, return to idle caller */
BRANCH_TO_C000(r10, system_reset_idle_common)
 1:
-   END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
 
KVMTEST EXC_STD 0x100
@@ -1151,10 +1151,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE)
 *
 * Go back to nap/sleep/winkle mode again if (b) is true.
 */
-   BEGIN_FTR_SECTION
+BEGIN_FTR_SECTION
rlwinm. r11,r12,47-31,30,31
bne machine_check_idle_common
-   END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 #endif
 
/*
@@ -1261,13 +1261,13 @@ EXC_COMMON_BEGIN(mce_return)
b   .
 
 EXC_REAL_BEGIN(data_access, 0x300, 0x80)
-SET_SCRATCH0(r13)  /* save r13 */
-EXCEPTION_PROLOG_0 PACA_EXGEN
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0 PACA_EXGEN
b   tramp_real_data_access
 EXC_REAL_END(data_access, 0x300, 0x80)
 
 TRAMP_REAL_BEGIN(tramp_real_data_access)
-EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 0
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 0
/*
 * DAR/DSISR must be read before setting MSR[RI], because
 * a d-side MCE will clobber those registers so is not
@@ -1280,9 +1280,9 @@ EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 0
 EXCEPTION_PROLOG_2_REAL data_access_common, EXC_STD, 1
 
 EXC_VIRT_BEGIN(data_access, 0x4300, 0x80)
-SET_SCRATCH0(r13)  /* save r13 */
-EXCEPTION_PROLOG_0 PACA_EXGEN
-EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 0
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0 PACA_EXGEN
+   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 0
mfspr   r10,SPRN_DAR
mfspr   r11,SPRN_DSISR
std r10,PACA_EXGEN+EX_DAR(r13)
@@ -1315,24 +1315,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 
 EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80)
-SET_SCRATCH0(r13)  /* save r13 */
-EXCEPTION_PROLOG_0 PACA_EXSLB
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0 PACA_EXSLB
b   tramp_real_data_access_slb
 EXC_REAL_END(data_access_slb, 0x380, 0x80)
 
 TRAMP_REAL_BEGIN(tramp_real_data_access_slb)
-EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSL

[PATCH v2 22/52] powerpc/64s/exception: use a gas macro for system call handler code

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 127 ---
 1 file changed, 55 insertions(+), 72 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index f1f1278cb131..f7b6634bcc75 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1607,6 +1607,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
  * without saving, though xer is not a good idea to use, as hardware may
  * interpret some bits so it may be costly to change them.
  */
+.macro SYSTEM_CALL real
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
/*
 * There is a little bit of juggling to get syscall and hcall
@@ -1616,95 +1617,77 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception)
 * Userspace syscalls have already saved the PPR, hcalls must save
 * it before setting HMT_MEDIUM.
 */
-#define SYSCALL_KVMTEST
\
-   mtctr   r13;\
-   GET_PACA(r13);  \
-   std r10,PACA_EXGEN+EX_R10(r13); \
-   INTERRUPT_TO_KERNEL;\
-   KVMTEST EXC_STD 0xc00 ; /* uses r10, branch to do_kvm_0xc00_system_call 
*/ \
-   HMT_MEDIUM; \
-   mfctr   r9;
-
+   mtctr   r13
+   GET_PACA(r13)
+   std r10,PACA_EXGEN+EX_R10(r13)
+   INTERRUPT_TO_KERNEL
+   KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */
+   HMT_MEDIUM
+   mfctr   r9
 #else
-#define SYSCALL_KVMTEST
\
-   HMT_MEDIUM; \
-   mr  r9,r13; \
-   GET_PACA(r13);  \
-   INTERRUPT_TO_KERNEL;
+   HMT_MEDIUM
+   mr  r9,r13
+   GET_PACA(r13)
+   INTERRUPT_TO_KERNEL
 #endif
-   
-#define LOAD_SYSCALL_HANDLER(reg)  \
-   __LOAD_HANDLER(reg, system_call_common)
-
-/*
- * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9,
- * and HMT_MEDIUM.
- */
-#define SYSCALL_REAL   \
-   mfspr   r11,SPRN_SRR0 ; \
-   mfspr   r12,SPRN_SRR1 ; \
-   LOAD_SYSCALL_HANDLER(r10) ; \
-   mtspr   SPRN_SRR0,r10 ; \
-   ld  r10,PACAKMSR(r13) ; \
-   mtspr   SPRN_SRR1,r10 ; \
-   RFI_TO_KERNEL ; \
-   b   . ; /* prevent speculative execution */
 
 #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH
-#define SYSCALL_FASTENDIAN_TEST\
-BEGIN_FTR_SECTION  \
-   cmpdi   r0,0x1ebe ; \
-   beq-1f ;\
-END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \
-
-#define SYSCALL_FASTENDIAN \
-   /* Fast LE/BE switch system call */ \
-1: mfspr   r12,SPRN_SRR1 ; \
-   xorir12,r12,MSR_LE ;\
-   mtspr   SPRN_SRR1,r12 ; \
-   mr  r13,r9 ;\
-   RFI_TO_USER ;   /* return to userspace */   \
-   b   . ; /* prevent speculative execution */
-#else
-#define SYSCALL_FASTENDIAN_TEST
-#define SYSCALL_FASTENDIAN
-#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */
+BEGIN_FTR_SECTION
+   cmpdi   r0,0x1ebe
+   beq-1f
+END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE)
+#endif
+   /* We reach here with PACA in r13, r13 in r9, and HMT_MEDIUM. */
 
-#if defined(CONFIG_RELOCATABLE)
+   .if \real
+   mfspr   r11,SPRN_SRR0
+   mfspr   r12,SPRN_SRR1
+   __LOAD_HANDLER(r10, system_call_common)
+   mtspr   SPRN_SRR0,r10
+   ld  r10,PACAKMSR(r13)
+   mtspr   SPRN_SRR1,r10
+   RFI_TO_KERNEL
+   b   .   /* prevent speculative execution */
+   .else
+#ifdef CONFIG_RELOCATABLE
/*
 * We can't branch directly so we do it via the CTR which
 * is volatile across system calls.
 */
-#define SYSCALL_VIRT   \
-   LOAD_SYSCALL_HANDLER(r10) ; \
-   mtctr   r10 ;   \
-   mfspr   r11,SPRN_SRR0 ; \
- 

[PATCH v2 21/52] powerpc/64s/exception: remove unused BRANCH_TO_COMMON

2019-06-19 Thread Nicholas Piggin
---
 arch/powerpc/kernel/exceptions-64s.S | 8 
 1 file changed, 8 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 87db0f5a67c4..f1f1278cb131 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -224,20 +224,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
bctr
 
 #ifdef CONFIG_RELOCATABLE
-#define BRANCH_TO_COMMON(reg, label)   \
-   __LOAD_HANDLER(reg, label); \
-   mtctr   reg;\
-   bctr
-
 #define BRANCH_LINK_TO_FAR(label)  \
__LOAD_FAR_HANDLER(r12, label); \
mtctr   r12;\
bctrl
 
 #else
-#define BRANCH_TO_COMMON(reg, label)   \
-   b   label
-
 #define BRANCH_LINK_TO_FAR(label)  \
bl  label
 #endif
-- 
2.20.1



[PATCH v2 20/52] powerpc/64s/exception: remove __BRANCH_TO_KVM

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 43 
 1 file changed, 18 insertions(+), 25 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 0a5a2d9dde90..87db0f5a67c4 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -243,29 +243,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #endif
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
-
-#ifdef CONFIG_RELOCATABLE
-/*
- * KVM requires __LOAD_FAR_HANDLER.
- *
- * __BRANCH_TO_KVM_EXIT branches are also a special case because they
- * explicitly use r9 then reload it from PACA before branching. Hence
- * the double-underscore.
- */
-#define __BRANCH_TO_KVM_EXIT(area, label)  \
-   mfctr   r9; \
-   std r9,HSTATE_SCRATCH1(r13);\
-   __LOAD_FAR_HANDLER(r9, label);  \
-   mtctr   r9; \
-   ld  r9,area+EX_R9(r13); \
-   bctr
-
-#else
-#define __BRANCH_TO_KVM_EXIT(area, label)  \
-   ld  r9,area+EX_R9(r13); \
-   b   label
-#endif
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * If hv is possible, interrupts come into to the hv version
@@ -311,8 +288,24 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
.else
ori r12,r12,(\n)
.endif
-   /* This reloads r9 before branching to kvmppc_interrupt */
-   __BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt)
+
+#ifdef CONFIG_RELOCATABLE
+   /*
+* KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives
+* outside the head section. CONFIG_RELOCATABLE KVM expects CTR
+* to be saved in HSTATE_SCRATCH1.
+*/
+   mfctr   r9
+   std r9,HSTATE_SCRATCH1(r13)
+   __LOAD_FAR_HANDLER(r9, kvmppc_interrupt)
+   mtctr   r9
+   ld  r9,\area+EX_R9(r13)
+   bctr
+#else
+   ld  r9,\area+EX_R9(r13)
+   b   kvmppc_interrupt
+#endif
+
 
.if \skip
 89:mtocrf  0x80,r9
-- 
2.20.1



[PATCH v2 19/52] powerpc/64s/exception: move head-64.h code to exception-64s.S where it is used

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h |   1 -
 arch/powerpc/include/asm/head-64.h   | 252 ---
 arch/powerpc/kernel/exceptions-64s.S | 251 ++
 3 files changed, 251 insertions(+), 253 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 9e6712099f7a..dc6a5ccac965 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -30,7 +30,6 @@
  * exception handlers (including pSeries LPAR) and iSeries LPAR
  * implementations as possible.
  */
-#include 
 #include 
 
 /* PACA save area offsets (exgen, exmc, etc) */
diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index dc1940c94a86..a466765709a9 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -169,53 +169,6 @@ end_##sname:
 
 #define ABS_ADDR(label) (label - fs_label + fs_start)
 
-/*
- * Following are the BOOK3S exception handler helper macros.
- * Handlers come in a number of types, and each type has a number of varieties.
- *
- * EXC_REAL_* - real, unrelocated exception vectors
- * EXC_VIRT_* - virt (AIL), unrelocated exception vectors
- * TRAMP_REAL_*   - real, unrelocated helpers (virt can call these)
- * TRAMP_VIRT_*   - virt, unreloc helpers (in practice, real can use)
- * TRAMP_KVM  - KVM handlers that get put into real, unrelocated
- * EXC_COMMON - virt, relocated common handlers
- *
- * The EXC handlers are given a name, and branch to name_common, or the
- * appropriate KVM or masking function. Vector handler verieties are as
- * follows:
- *
- * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception
- *
- * EXC_{REAL|VIRT}  - standard exception
- *
- * EXC_{REAL|VIRT}_suffix
- * where _suffix is:
- *   - _MASKABLE   - maskable exception
- *   - _OOL- out of line with trampoline to common handler
- *   - _HV - HV exception
- *
- * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV
- *
- * The one unusual case is __EXC_REAL_OOL_HV_DIRECT, which is
- * an OOL vector that branches to a specified handler rather than the usual
- * trampoline that goes to common. It, and other underscore macros, should
- * be used with care.
- *
- * KVM handlers come in the following verieties:
- * TRAMP_KVM
- * TRAMP_KVM_SKIP
- * TRAMP_KVM_HV
- * TRAMP_KVM_HV_SKIP
- *
- * COMMON handlers come in the following verieties:
- * EXC_COMMON_BEGIN/END - used to open-code the handler
- * EXC_COMMON
- * EXC_COMMON_ASYNC
- *
- * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM
- * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers.
- */
-
 #define EXC_REAL_BEGIN(name, start, size)  \
FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, 
exc_real_##start##_##name, start, size)
 
@@ -257,211 +210,6 @@ end_##sname:
FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, 
exc_virt_##start##_##unused, start, size); \
FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, 
exc_virt_##start##_##unused, start, size)
 
-
-#define __EXC_REAL(name, start, size, area)\
-   EXC_REAL_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);  /* save r13 */  \
-   EXCEPTION_PROLOG_0 area ;   \
-   EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0 ; \
-   EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
-   EXC_REAL_END(name, start, size)
-
-#define EXC_REAL(name, start, size)\
-   __EXC_REAL(name, start, size, PACA_EXGEN)
-
-#define __EXC_VIRT(name, start, size, realvec, area)   \
-   EXC_VIRT_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);/* save r13 */\
-   EXCEPTION_PROLOG_0 area ;   \
-   EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0;\
-   EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\
-   EXC_VIRT_END(name, start, size)
-
-#define EXC_VIRT(name, start, size, realvec)   \
-   __EXC_VIRT(name, start, size, realvec, PACA_EXGEN)
-
-#define EXC_REAL_MASKABLE(name, start, size, bitmask)  \
-   EXC_REAL_BEGIN(name, start, size);  \
-   SET_SCRATCH0(r13);/* save r13 */\
-   EXCEPTION_PROLOG_0 PACA_EXGEN ; \
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, bitmask ; \
-   EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \
-   EXC_REAL_END(name, start, size)
-
-#define EXC_VIRT_MASKABLE(name, start, size, real

[PATCH v2 18/52] powerpc/64s/exception: move exception-64s.h code to exception-64s.S where it is used

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 430 --
 arch/powerpc/kernel/exceptions-64s.S | 431 +++
 2 files changed, 431 insertions(+), 430 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index e996ffe68cf3..9e6712099f7a 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -146,436 +146,6 @@
hrfid;  \
b   hrfi_flush_fallback
 
-/*
- * We're short on space and time in the exception prolog, so we can't
- * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
- * Instead we get the base of the kernel from paca->kernelbase and or in the 
low
- * part of label. This requires that the label be within 64KB of kernelbase, 
and
- * that kernelbase be 64K aligned.
- */
-#define LOAD_HANDLER(reg, label)   \
-   ld  reg,PACAKBASE(r13); /* get high part of &label */   \
-   ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
-
-#define __LOAD_HANDLER(reg, label) \
-   ld  reg,PACAKBASE(r13); \
-   ori reg,reg,(ABS_ADDR(label))@l
-
-/*
- * Branches from unrelocated code (e.g., interrupts) to labels outside
- * head-y require >64K offsets.
- */
-#define __LOAD_FAR_HANDLER(reg, label) \
-   ld  reg,PACAKBASE(r13); \
-   ori reg,reg,(ABS_ADDR(label))@l;\
-   addis   reg,reg,(ABS_ADDR(label))@h
-
-/* Exception register prefixes */
-#define EXC_HV 1
-#define EXC_STD0
-
-#if defined(CONFIG_RELOCATABLE)
-/*
- * If we support interrupts with relocation on AND we're a relocatable kernel,
- * we need to use CTR to get to the 2nd level handler.  So, save/restore it
- * when required.
- */
-#define SAVE_CTR(reg, area)mfctr   reg ;   std reg,area+EX_CTR(r13)
-#define GET_CTR(reg, area) ld  reg,area+EX_CTR(r13)
-#define RESTORE_CTR(reg, area) ld  reg,area+EX_CTR(r13) ; mtctr reg
-#else
-/* ...else CTR is unused and in register. */
-#define SAVE_CTR(reg, area)
-#define GET_CTR(reg, area) mfctr   reg
-#define RESTORE_CTR(reg, area)
-#endif
-
-/*
- * PPR save/restore macros used in exceptions_64s.S  
- * Used for P7 or later processors
- */
-#define SAVE_PPR(area, ra) \
-BEGIN_FTR_SECTION_NESTED(940)  \
-   ld  ra,area+EX_PPR(r13);/* Read PPR from paca */\
-   std ra,_PPR(r1);\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940)
-
-#define RESTORE_PPR_PACA(area, ra) \
-BEGIN_FTR_SECTION_NESTED(941)  \
-   ld  ra,area+EX_PPR(r13);\
-   mtspr   SPRN_PPR,ra;\
-END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941)
-
-/*
- * Get an SPR into a register if the CPU has the given feature
- */
-#define OPT_GET_SPR(ra, spr, ftr)  \
-BEGIN_FTR_SECTION_NESTED(943)  \
-   mfspr   ra,spr; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Set an SPR from a register if the CPU has the given feature
- */
-#define OPT_SET_SPR(ra, spr, ftr)  \
-BEGIN_FTR_SECTION_NESTED(943)  \
-   mtspr   spr,ra; \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-/*
- * Save a register to the PACA if the CPU has the given feature
- */
-#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr)  \
-BEGIN_FTR_SECTION_NESTED(943)  \
-   std ra,offset(r13); \
-END_FTR_SECTION_NESTED(ftr,ftr,943)
-
-.macro EXCEPTION_PROLOG_0 area
-   GET_PACA(r13)
-   std r9,\area\()+EX_R9(r13)  /* save r9 */
-   OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
-   HMT_MEDIUM
-   std r10,\area\()+EX_R10(r13)/* save r10 - r12 */
-   OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
-.endm
-
-.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask
-   OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
-   OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
-   INTERRUPT_TO_KERNEL
-   SAVE_CTR(r10, \area\())
-   mfcrr9
-   .if \kvm
-   KVMTEST \hsrr \vec
-   .endif
-   .if \bitmask
-   lbz r10,PACAIRQ

[PATCH v2 17/52] powerpc/64s/exception: move KVM related code together

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 40 +---
 1 file changed, 21 insertions(+), 19 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 73705421f423..e996ffe68cf3 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -335,18 +335,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #endif
 .endm
 
-
-#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
-/*
- * If hv is possible, interrupts come into to the hv version
- * of the kvmppc_interrupt code, which then jumps to the PR handler,
- * kvmppc_interrupt_pr, if the guest is a PR guest.
- */
-#define kvmppc_interrupt kvmppc_interrupt_hv
-#else
-#define kvmppc_interrupt kvmppc_interrupt_pr
-#endif
-
 /*
  * Branch to label using its 0xC000 address. This results in instruction
  * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned
@@ -371,6 +359,17 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
mtctr   r12;\
bctrl
 
+#else
+#define BRANCH_TO_COMMON(reg, label)   \
+   b   label
+
+#define BRANCH_LINK_TO_FAR(label)  \
+   bl  label
+#endif
+
+#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
+
+#ifdef CONFIG_RELOCATABLE
 /*
  * KVM requires __LOAD_FAR_HANDLER.
  *
@@ -387,19 +386,22 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
bctr
 
 #else
-#define BRANCH_TO_COMMON(reg, label)   \
-   b   label
-
-#define BRANCH_LINK_TO_FAR(label)  \
-   bl  label
-
 #define __BRANCH_TO_KVM_EXIT(area, label)  \
ld  r9,area+EX_R9(r13); \
b   label
+#endif
 
+#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
+/*
+ * If hv is possible, interrupts come into to the hv version
+ * of the kvmppc_interrupt code, which then jumps to the PR handler,
+ * kvmppc_interrupt_pr, if the guest is a PR guest.
+ */
+#define kvmppc_interrupt kvmppc_interrupt_hv
+#else
+#define kvmppc_interrupt kvmppc_interrupt_pr
 #endif
 
-#ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 .macro KVMTEST hsrr, n
lbz r10,HSTATE_IN_GUEST(r13)
cmpwi   r10,0
-- 
2.20.1



[PATCH v2 16/52] powerpc/64s/exception: remove STD_EXCEPTION_COMMON variants

2019-06-19 Thread Nicholas Piggin
These are only called in one place each.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 22 --
 arch/powerpc/include/asm/head-64.h   | 19 +--
 2 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 6de3c393ddf7..73705421f423 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -555,28 +555,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
EXCEPTION_PROLOG_COMMON_2(area);\
EXCEPTION_PROLOG_COMMON_3(trap)
 
-#define STD_EXCEPTION_COMMON(trap, hdlr)   \
-   EXCEPTION_COMMON(PACA_EXGEN, trap); \
-   bl  save_nvgprs;\
-   RECONCILE_IRQ_STATE(r10, r11);  \
-   addir3,r1,STACK_FRAME_OVERHEAD; \
-   bl  hdlr;   \
-   b   ret_from_except
-
-/*
- * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
- * in the idle task and therefore need the special idle handling
- * (finish nap and runlatch)
- */
-#define STD_EXCEPTION_COMMON_ASYNC(trap, hdlr) \
-   EXCEPTION_COMMON(PACA_EXGEN, trap); \
-   FINISH_NAP; \
-   RECONCILE_IRQ_STATE(r10, r11);  \
-   RUNLATCH_ON;\
-   addir3,r1,STACK_FRAME_OVERHEAD; \
-   bl  hdlr;   \
-   b   ret_from_except_lite
-
 /*
  * When the idle code in power4_idle puts the CPU into NAP mode,
  * it has to do so in a loop, and relies on the external interrupt
diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index 54db05afb80f..dc1940c94a86 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -441,11 +441,26 @@ end_##sname:
 
 #define EXC_COMMON(name, realvec, hdlr)
\
EXC_COMMON_BEGIN(name); \
-   STD_EXCEPTION_COMMON(realvec, hdlr)
+   EXCEPTION_COMMON(PACA_EXGEN, realvec);  \
+   bl  save_nvgprs;\
+   RECONCILE_IRQ_STATE(r10, r11);  \
+   addir3,r1,STACK_FRAME_OVERHEAD; \
+   bl  hdlr;   \
+   b   ret_from_except
 
+/*
+ * Like EXC_COMMON, but for exceptions that can occur in the idle task and
+ * therefore need the special idle handling (finish nap and runlatch)
+ */
 #define EXC_COMMON_ASYNC(name, realvec, hdlr)  \
EXC_COMMON_BEGIN(name); \
-   STD_EXCEPTION_COMMON_ASYNC(realvec, hdlr)
+   EXCEPTION_COMMON(PACA_EXGEN, realvec);  \
+   FINISH_NAP; \
+   RECONCILE_IRQ_STATE(r10, r11);  \
+   RUNLATCH_ON;\
+   addir3,r1,STACK_FRAME_OVERHEAD; \
+   bl  hdlr;   \
+   b   ret_from_except_lite
 
 #endif /* __ASSEMBLY__ */
 
-- 
2.20.1



[PATCH v2 15/52] powerpc/64s/exception: move EXCEPTION_PROLOG_2* to a more logical place

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 113 ---
 1 file changed, 57 insertions(+), 56 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 0bb0310b794f..6de3c393ddf7 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -170,62 +170,6 @@
ori reg,reg,(ABS_ADDR(label))@l;\
addis   reg,reg,(ABS_ADDR(label))@h
 
-.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri
-   ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
-   .if ! \set_ri
-   xorir10,r10,MSR_RI  /* Clear MSR_RI */
-   .endif
-   .if \hsrr
-   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
-   .else
-   mfspr   r11,SPRN_SRR0   /* save SRR0 */
-   .endif
-   LOAD_HANDLER(r12, \label\())
-   .if \hsrr
-   mtspr   SPRN_HSRR0,r12
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   mtspr   SPRN_HSRR1,r10
-   HRFI_TO_KERNEL
-   .else
-   mtspr   SPRN_SRR0,r12
-   mfspr   r12,SPRN_SRR1   /* and SRR1 */
-   mtspr   SPRN_SRR1,r10
-   RFI_TO_KERNEL
-   .endif
-   b   .   /* prevent speculative execution */
-.endm
-
-.macro EXCEPTION_PROLOG_2_VIRT label, hsrr
-#ifdef CONFIG_RELOCATABLE
-   .if \hsrr
-   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
-   .else
-   mfspr   r11,SPRN_SRR0   /* save SRR0 */
-   .endif
-   LOAD_HANDLER(r12, \label\())
-   mtctr   r12
-   .if \hsrr
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   .else
-   mfspr   r12,SPRN_SRR1   /* and HSRR1 */
-   .endif
-   li  r10,MSR_RI
-   mtmsrd  r10,1   /* Set RI (EE=0) */
-   bctr
-#else
-   .if \hsrr
-   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   .else
-   mfspr   r11,SPRN_SRR0   /* save SRR0 */
-   mfspr   r12,SPRN_SRR1   /* and SRR1 */
-   .endif
-   li  r10,MSR_RI
-   mtmsrd  r10,1   /* Set RI (EE=0) */
-   b   \label
-#endif
-.endm
-
 /* Exception register prefixes */
 #define EXC_HV 1
 #define EXC_STD0
@@ -335,6 +279,63 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,\area\()+EX_R13(r13)
 .endm
 
+.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri
+   ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
+   .if ! \set_ri
+   xorir10,r10,MSR_RI  /* Clear MSR_RI */
+   .endif
+   .if \hsrr
+   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
+   .else
+   mfspr   r11,SPRN_SRR0   /* save SRR0 */
+   .endif
+   LOAD_HANDLER(r12, \label\())
+   .if \hsrr
+   mtspr   SPRN_HSRR0,r12
+   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
+   mtspr   SPRN_HSRR1,r10
+   HRFI_TO_KERNEL
+   .else
+   mtspr   SPRN_SRR0,r12
+   mfspr   r12,SPRN_SRR1   /* and SRR1 */
+   mtspr   SPRN_SRR1,r10
+   RFI_TO_KERNEL
+   .endif
+   b   .   /* prevent speculative execution */
+.endm
+
+.macro EXCEPTION_PROLOG_2_VIRT label, hsrr
+#ifdef CONFIG_RELOCATABLE
+   .if \hsrr
+   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
+   .else
+   mfspr   r11,SPRN_SRR0   /* save SRR0 */
+   .endif
+   LOAD_HANDLER(r12, \label\())
+   mtctr   r12
+   .if \hsrr
+   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
+   .else
+   mfspr   r12,SPRN_SRR1   /* and HSRR1 */
+   .endif
+   li  r10,MSR_RI
+   mtmsrd  r10,1   /* Set RI (EE=0) */
+   bctr
+#else
+   .if \hsrr
+   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
+   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
+   .else
+   mfspr   r11,SPRN_SRR0   /* save SRR0 */
+   mfspr   r12,SPRN_SRR1   /* and SRR1 */
+   .endif
+   li  r10,MSR_RI
+   mtmsrd  r10,1   /* Set RI (EE=0) */
+   b   \label
+#endif
+.endm
+
+
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * If hv is possible, interrupts come into to the hv version
-- 
2.20.1



[PATCH v2 14/52] powerpc/64s/exception: improve 0x500 handler code

2019-06-19 Thread Nicholas Piggin
After the previous cleanup, it becomes possible to consolidate some
common code outside the runtime alternate patching. Also remove
unused labels.

This results in some code change, but unchanged runtime instruction
sequence.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 16 
 1 file changed, 4 insertions(+), 12 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 5828d440da49..d35a9fa4651e 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -746,32 +746,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX)
 
 
 EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100)
-   .globl hardware_interrupt_hv
-hardware_interrupt_hv:
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0 PACA_EXGEN
BEGIN_FTR_SECTION
-   SET_SCRATCH0(r13)   /* save r13 */
-   EXCEPTION_PROLOG_0 PACA_EXGEN
EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, IRQS_DISABLED
EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_HV, 1
FTR_SECTION_ELSE
-   SET_SCRATCH0(r13)   /* save r13 */
-   EXCEPTION_PROLOG_0 PACA_EXGEN
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, IRQS_DISABLED
EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_STD, 1
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
 EXC_REAL_END(hardware_interrupt, 0x500, 0x100)
 
 EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100)
-   .globl hardware_interrupt_relon_hv
-hardware_interrupt_relon_hv:
+   SET_SCRATCH0(r13)   /* save r13 */
+   EXCEPTION_PROLOG_0 PACA_EXGEN
BEGIN_FTR_SECTION
-   SET_SCRATCH0(r13)   /* save r13 */
-   EXCEPTION_PROLOG_0 PACA_EXGEN
EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, IRQS_DISABLED
EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_HV
FTR_SECTION_ELSE
-   SET_SCRATCH0(r13)   /* save r13 */
-   EXCEPTION_PROLOG_0 PACA_EXGEN
EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, IRQS_DISABLED
EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_STD
ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
-- 
2.20.1



[PATCH v2 13/52] powerpc/64s/exception: unwind exception-64s.h macros

2019-06-19 Thread Nicholas Piggin
Many of these macros just specify 1-4 lines which are only called a
few times each at most, and often just once. Remove this indirection.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 101 ---
 arch/powerpc/include/asm/head-64.h   |  76 -
 arch/powerpc/kernel/exceptions-64s.S |  44 +-
 3 files changed, 82 insertions(+), 139 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 24fc0104c9d3..0bb0310b794f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -226,17 +226,6 @@
 #endif
 .endm
 
-/*
- * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to
- * rfid. Save CTR in case we're CONFIG_RELOCATABLE, in which case
- * EXCEPTION_PROLOG_2_VIRT will be using CTR.
- */
-#define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\
-   SET_SCRATCH0(r13);  /* save r13 */  \
-   EXCEPTION_PROLOG_0 area ;   \
-   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\
-   EXCEPTION_PROLOG_2_VIRT label, hsrr
-
 /* Exception register prefixes */
 #define EXC_HV 1
 #define EXC_STD0
@@ -346,12 +335,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,\area\()+EX_R13(r13)
 .endm
 
-#define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec)  \
-   SET_SCRATCH0(r13);  /* save r13 */  \
-   EXCEPTION_PROLOG_0 area ;   \
-   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\
-   EXCEPTION_PROLOG_2_REAL label, hsrr, 1
-
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
  * If hv is possible, interrupts come into to the hv version
@@ -415,12 +398,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #endif
 
-/* Do not enable RI */
-#define EXCEPTION_PROLOG_NORI(area, label, hsrr, kvm, vec) \
-   EXCEPTION_PROLOG_0 area ;   \
-   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\
-   EXCEPTION_PROLOG_2_REAL label, hsrr, 0
-
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 .macro KVMTEST hsrr, n
lbz r10,HSTATE_IN_GUEST(r13)
@@ -557,84 +534,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,RESULT(r1); /* clear regs->result   */ \
std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame  */
 
-/*
- * Exception vectors.
- */
-#define STD_EXCEPTION(vec, label)  \
-   EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_STD, 1, vec);
-
-/* Version of above for when we have to branch out-of-line */
-#define __OOL_EXCEPTION(vec, label, hdlr)  \
-   SET_SCRATCH0(r13);  \
-   EXCEPTION_PROLOG_0 PACA_EXGEN ; \
-   b hdlr
-
-#define STD_EXCEPTION_OOL(vec, label)  \
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0 ; \
-   EXCEPTION_PROLOG_2_REAL label, EXC_STD, 1
-
-#define STD_EXCEPTION_HV(loc, vec, label)  \
-   EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_HV, 1, vec)
-
-#define STD_EXCEPTION_HV_OOL(vec, label)   \
-   EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0 ;  \
-   EXCEPTION_PROLOG_2_REAL label, EXC_HV, 1
-
-#define STD_RELON_EXCEPTION(loc, vec, label)   \
-   /* No guest interrupts come through here */ \
-   EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_STD, 0, vec)
-
-#define STD_RELON_EXCEPTION_OOL(vec, label)\
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, vec, 0 ; \
-   EXCEPTION_PROLOG_2_VIRT label, EXC_STD
-
-#define STD_RELON_EXCEPTION_HV(loc, vec, label)\
-   EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_HV, 1, vec)
-
-#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \
-   EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0 ;  \
-   EXCEPTION_PROLOG_2_VIRT label, EXC_HV
-
-#define __MASKABLE_EXCEPTION(vec, label, hsrr, kvm, bitmask)   \
-   SET_SCRATCH0(r13);/* save r13 */\
-   EXCEPTION_PROLOG_0 PACA_EXGEN ; \
-   EXCEPTION_PROLOG_1 hsrr, PACA_EXGEN, kvm, vec, bitmask ;\
-   EXCEPTION_PROLOG_2_REAL label, hsrr, 1
-
-#define MASKABLE_EXCEPTION(vec, label, bitmask)
\
-   __MASKABLE_EXCEPTION(vec, label, EXC_STD, 1, bitmask)
-
-#define MASKABLE_EXCEPTION_OOL(vec, label, bitmask)\
-   EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, bitmask ;   \
-   EXCEPTION_PROLOG_2_REAL label, EXC_STD, 1
-
-#define MASKABLE_EXCEPTION_HV(vec, label, bitmask) \
-   __MASK

[PATCH v2 12/52] powerpc/64s/exception: Move EXCEPTION_COMMON additions into callers

2019-06-19 Thread Nicholas Piggin
More cases of code insertion via macros that does not add a great
deal. All the additions have to be specified in the macro arguments,
so they can just as well go after the macro.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 42 +++---
 arch/powerpc/include/asm/head-64.h   |  4 +--
 arch/powerpc/kernel/exceptions-64s.S | 45 +---
 3 files changed, 39 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index cc65e87cff2f..24fc0104c9d3 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -635,21 +635,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, bitmask ;\
EXCEPTION_PROLOG_2_VIRT label, EXC_HV
 
-/*
- * Our exception common code can be passed various "additions"
- * to specify the behaviour of interrupts, whether to kick the
- * runlatch, etc...
- */
-
-/*
- * This addition reconciles our actual IRQ state with the various software
- * flags that track it. This may call C code.
- */
-#define ADD_RECONCILE  RECONCILE_IRQ_STATE(r10,r11)
-
-#define ADD_NVGPRS \
-   bl  save_nvgprs
-
 #define RUNLATCH_ON\
 BEGIN_FTR_SECTION  \
ld  r3, PACA_THREAD_INFO(r13);  \
@@ -658,25 +643,22 @@ BEGIN_FTR_SECTION \
beqlppc64_runlatch_on_trampoline;   \
 END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
 
-#define EXCEPTION_COMMON(area, trap, label, additions) \
+#define EXCEPTION_COMMON(area, trap)   \
EXCEPTION_PROLOG_COMMON(trap, area);\
-   /* Volatile regs are potentially clobbered here */  \
-   additions
 
 /*
- * Exception where stack is already set in r1, r1 is saved in r10, and it
- * continues rather than returns.
+ * Exception where stack is already set in r1, r1 is saved in r10
  */
-#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, additions) \
+#define EXCEPTION_COMMON_STACK(area, trap) \
EXCEPTION_PROLOG_COMMON_1();\
kuap_save_amr_and_lock r9, r10, cr1;\
EXCEPTION_PROLOG_COMMON_2(area);\
-   EXCEPTION_PROLOG_COMMON_3(trap);\
-   /* Volatile regs are potentially clobbered here */  \
-   additions
+   EXCEPTION_PROLOG_COMMON_3(trap)
 
-#define STD_EXCEPTION_COMMON(trap, label, hdlr)\
-   EXCEPTION_COMMON(PACA_EXGEN, trap, label, ADD_NVGPRS;ADD_RECONCILE); \
+#define STD_EXCEPTION_COMMON(trap, hdlr)   \
+   EXCEPTION_COMMON(PACA_EXGEN, trap); \
+   bl  save_nvgprs;\
+   RECONCILE_IRQ_STATE(r10, r11);  \
addir3,r1,STACK_FRAME_OVERHEAD; \
bl  hdlr;   \
b   ret_from_except
@@ -686,9 +668,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
  * in the idle task and therefore need the special idle handling
  * (finish nap and runlatch)
  */
-#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr)  \
-   EXCEPTION_COMMON(PACA_EXGEN, trap, label,   \
-   FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON);  \
+#define STD_EXCEPTION_COMMON_ASYNC(trap, hdlr) \
+   EXCEPTION_COMMON(PACA_EXGEN, trap); \
+   FINISH_NAP; \
+   RECONCILE_IRQ_STATE(r10, r11);  \
+   RUNLATCH_ON;\
addir3,r1,STACK_FRAME_OVERHEAD; \
bl  hdlr;   \
b   ret_from_except_lite
diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index bdd67a26e959..acd94fcf9f40 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -403,11 +403,11 @@ end_##sname:
 
 #define EXC_COMMON(name, realvec, hdlr)
\
EXC_COMMON_BEGIN(name); \
-   STD_EXCEPTION_COMMON(realvec, name, hdlr)
+   STD_EXCEPTION_COMMON(realvec, hdlr)
 
 #define EXC_COMMON_ASYNC(name, realvec, hdlr)  \
EXC_COMMON_BEGIN(name); \
-   STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr)
+   STD_EXCEPTION_COMMON_ASYNC(realvec, hdlr)
 
 #endif /* __ASSEMBLY__ */
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 87c4ebeb950c..72c178feaa98 100644
--- a/arch/powerpc/ke

[PATCH v2 11/52] powerpc/64s/exception: Move EXCEPTION_COMMON handler and return branches into callers

2019-06-19 Thread Nicholas Piggin
The aim is to reduce the amount of indirection it takes to get through
the exception handler macros, particularly where it provides little
code sharing.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 26 
 arch/powerpc/kernel/exceptions-64s.S | 21 +++
 2 files changed, 26 insertions(+), 21 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index f19c2391cc36..cc65e87cff2f 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -658,31 +658,28 @@ BEGIN_FTR_SECTION \
beqlppc64_runlatch_on_trampoline;   \
 END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
 
-#define EXCEPTION_COMMON(area, trap, label, hdlr, ret, additions) \
+#define EXCEPTION_COMMON(area, trap, label, additions) \
EXCEPTION_PROLOG_COMMON(trap, area);\
/* Volatile regs are potentially clobbered here */  \
-   additions;  \
-   addir3,r1,STACK_FRAME_OVERHEAD; \
-   bl  hdlr;   \
-   b   ret
+   additions
 
 /*
  * Exception where stack is already set in r1, r1 is saved in r10, and it
  * continues rather than returns.
  */
-#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \
+#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, additions) \
EXCEPTION_PROLOG_COMMON_1();\
kuap_save_amr_and_lock r9, r10, cr1;\
EXCEPTION_PROLOG_COMMON_2(area);\
EXCEPTION_PROLOG_COMMON_3(trap);\
/* Volatile regs are potentially clobbered here */  \
-   additions;  \
-   addir3,r1,STACK_FRAME_OVERHEAD; \
-   bl  hdlr
+   additions
 
 #define STD_EXCEPTION_COMMON(trap, label, hdlr)\
-   EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \
-   ret_from_except, ADD_NVGPRS;ADD_RECONCILE)
+   EXCEPTION_COMMON(PACA_EXGEN, trap, label, ADD_NVGPRS;ADD_RECONCILE); \
+   addir3,r1,STACK_FRAME_OVERHEAD; \
+   bl  hdlr;   \
+   b   ret_from_except
 
 /*
  * Like STD_EXCEPTION_COMMON, but for exceptions that can occur
@@ -690,8 +687,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL)
  * (finish nap and runlatch)
  */
 #define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr)  \
-   EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \
-   ret_from_except_lite, FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON)
+   EXCEPTION_COMMON(PACA_EXGEN, trap, label,   \
+   FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON);  \
+   addir3,r1,STACK_FRAME_OVERHEAD; \
+   bl  hdlr;   \
+   b   ret_from_except_lite
 
 /*
  * When the idle code in power4_idle puts the CPU into NAP mode,
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 02d974b71f44..87c4ebeb950c 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -195,9 +195,10 @@ EXC_COMMON_BEGIN(system_reset_common)
mr  r10,r1
ld  r1,PACA_NMI_EMERG_SP(r13)
subir1,r1,INT_FRAME_SIZE
-   EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100,
-   system_reset, system_reset_exception,
-   ADD_NVGPRS;ADD_RECONCILE_NMI)
+   EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, system_reset,
+   ADD_NVGPRS;ADD_RECONCILE_NMI)
+   addir3,r1,STACK_FRAME_OVERHEAD
+   bl  system_reset_exception
 
/* This (and MCE) can be simplified with mtmsrd L=1 */
/* Clear MSR_RI before setting SRR0 and SRR1. */
@@ -1171,8 +1172,11 @@ hmi_exception_after_realmode:
b   tramp_real_hmi_exception
 
 EXC_COMMON_BEGIN(hmi_exception_common)
-EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception,
-ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
+EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common,
+   FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON)
+   addir3,r1,STACK_FRAME_OVERHEAD
+   bl  handle_hmi_exception
+   b   ret_from_except
 
 EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED)
 EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED)
@@ -1467,9 +1471,10 @@ EXC_COMMON_BEGIN(soft_nmi_common)
mr  r10,r1
ld  r1,PACAEMERGSP(r13)
subir1,r1,INT_FRAME_SIZE
-   EXCEPTION_CO

[PATCH v2 10/52] powerpc/64s/exception: Make EXCEPTION_PROLOG_0 a gas macro for consistency with others

2019-06-19 Thread Nicholas Piggin
No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 25 
 arch/powerpc/kernel/exceptions-64s.S | 24 +++
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 1d8fc085e845..f19c2391cc36 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -233,7 +233,7 @@
  */
 #define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\
SET_SCRATCH0(r13);  /* save r13 */  \
-   EXCEPTION_PROLOG_0(area);   \
+   EXCEPTION_PROLOG_0 area ;   \
EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\
EXCEPTION_PROLOG_2_VIRT label, hsrr
 
@@ -297,13 +297,14 @@ BEGIN_FTR_SECTION_NESTED(943) 
\
std ra,offset(r13); \
 END_FTR_SECTION_NESTED(ftr,ftr,943)
 
-#define EXCEPTION_PROLOG_0(area)   \
-   GET_PACA(r13);  \
-   std r9,area+EX_R9(r13); /* save r9 */   \
-   OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \
-   HMT_MEDIUM; \
-   std r10,area+EX_R10(r13);   /* save r10 - r12 */\
+.macro EXCEPTION_PROLOG_0 area
+   GET_PACA(r13)
+   std r9,\area\()+EX_R9(r13)  /* save r9 */
+   OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR)
+   HMT_MEDIUM
+   std r10,\area\()+EX_R10(r13)/* save r10 - r12 */
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
+.endm
 
 .macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask
OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
@@ -347,7 +348,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec)  \
SET_SCRATCH0(r13);  /* save r13 */  \
-   EXCEPTION_PROLOG_0(area);   \
+   EXCEPTION_PROLOG_0 area ;   \
EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\
EXCEPTION_PROLOG_2_REAL label, hsrr, 1
 
@@ -416,7 +417,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 /* Do not enable RI */
 #define EXCEPTION_PROLOG_NORI(area, label, hsrr, kvm, vec) \
-   EXCEPTION_PROLOG_0(area);   \
+   EXCEPTION_PROLOG_0 area ;   \
EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\
EXCEPTION_PROLOG_2_REAL label, hsrr, 0
 
@@ -565,7 +566,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 /* Version of above for when we have to branch out-of-line */
 #define __OOL_EXCEPTION(vec, label, hdlr)  \
SET_SCRATCH0(r13);  \
-   EXCEPTION_PROLOG_0(PACA_EXGEN); \
+   EXCEPTION_PROLOG_0 PACA_EXGEN ; \
b hdlr
 
 #define STD_EXCEPTION_OOL(vec, label)  \
@@ -596,7 +597,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define __MASKABLE_EXCEPTION(vec, label, hsrr, kvm, bitmask)   \
SET_SCRATCH0(r13);/* save r13 */\
-   EXCEPTION_PROLOG_0(PACA_EXGEN); \
+   EXCEPTION_PROLOG_0 PACA_EXGEN ; \
EXCEPTION_PROLOG_1 hsrr, PACA_EXGEN, kvm, vec, bitmask ;\
EXCEPTION_PROLOG_2_REAL label, hsrr, 1
 
@@ -616,7 +617,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define __MASKABLE_RELON_EXCEPTION(vec, label, hsrr, kvm, bitmask) \
SET_SCRATCH0(r13);/* save r13 */\
-   EXCEPTION_PROLOG_0(PACA_EXGEN); \
+   EXCEPTION_PROLOG_0 PACA_EXGEN ; \
EXCEPTION_PROLOG_1 hsrr, PACA_EXGEN, kvm, vec, bitmask ;\
EXCEPTION_PROLOG_2_VIRT label, hsrr
 
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 484d0710ca08..02d974b71f44 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -109,7 +109,7 @@ EXC_VIRT_NONE(0x4000, 0x100)
 
 EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
SET_SCRATCH0(r13)
-   EXCEPTION_PROLOG_0(PACA_EXNMI)
+   EXCEPTION_PROLOG_0 PACA_EXNMI
 
/* This is EXCEPTION_PROLOG_1 with the idle feature section added */
OPT_SAVE_REG_TO_PACA(PACA_EXNMI+EX_PPR, r9, CPU_FTR_HAS_PPR)
@@ -266,7 +266,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100)
 * vector

[PATCH v2 09/52] powerpc/64s/exception: KVM handler can set the HSRR trap bit

2019-06-19 Thread Nicholas Piggin
Move the KVM trap HSRR bit into the KVM handler, which can be
conditionally applied when hsrr parameter is set.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 5 +
 arch/powerpc/include/asm/head-64.h   | 7 ++-
 2 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 737c37d1df4b..1d8fc085e845 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -449,7 +449,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
ld  r10,\area+EX_R10(r13)
std r12,HSTATE_SCRATCH0(r13)
sldir12,r9,32
+   /* HSRR variants have the 0x2 bit added to their trap number */
+   .if \hsrr
+   ori r12,r12,(\n + 0x2)
+   .else
ori r12,r12,(\n)
+   .endif
/* This reloads r9 before branching to kvmppc_interrupt */
__BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt)
 
diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index 518d9758b41e..bdd67a26e959 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -393,16 +393,13 @@ end_##sname:
TRAMP_KVM_BEGIN(do_kvm_##n);\
KVM_HANDLER area, EXC_STD, n, 1
 
-/*
- * HV variant exceptions get the 0x2 bit added to their trap number.
- */
 #define TRAMP_KVM_HV(area, n)  \
TRAMP_KVM_BEGIN(do_kvm_H##n);   \
-   KVM_HANDLER area, EXC_HV, n + 0x2, 0
+   KVM_HANDLER area, EXC_HV, n, 0
 
 #define TRAMP_KVM_HV_SKIP(area, n) \
TRAMP_KVM_BEGIN(do_kvm_H##n);   \
-   KVM_HANDLER area, EXC_HV, n + 0x2, 1
+   KVM_HANDLER area, EXC_HV, n, 1
 
 #define EXC_COMMON(name, realvec, hdlr)
\
EXC_COMMON_BEGIN(name); \
-- 
2.20.1



[PATCH v2 08/52] powerpc/64s/exception: merge KVM handler and skip variants

2019-06-19 Thread Nicholas Piggin
Conditionally expand the skip case if it is specified.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 28 +---
 arch/powerpc/include/asm/head-64.h   |  8 +++
 arch/powerpc/kernel/exceptions-64s.S |  2 +-
 3 files changed, 15 insertions(+), 23 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 74ddcb37156c..737c37d1df4b 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -431,26 +431,17 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
.endif
 .endm
 
-.macro KVM_HANDLER area, hsrr, n
+.macro KVM_HANDLER area, hsrr, n, skip
+   .if \skip
+   cmpwi   r10,KVM_GUEST_MODE_SKIP
+   beq 89f
+   .else
BEGIN_FTR_SECTION_NESTED(947)
ld  r10,\area+EX_CFAR(r13)
std r10,HSTATE_CFAR(r13)
END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947)
-   BEGIN_FTR_SECTION_NESTED(948)
-   ld  r10,\area+EX_PPR(r13)
-   std r10,HSTATE_PPR(r13)
-   END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948)
-   ld  r10,\area+EX_R10(r13)
-   std r12,HSTATE_SCRATCH0(r13)
-   sldir12,r9,32
-   ori r12,r12,(\n)
-   /* This reloads r9 before branching to kvmppc_interrupt */
-   __BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt)
-.endm
+   .endif
 
-.macro KVM_HANDLER_SKIP area, hsrr, n
-   cmpwi   r10,KVM_GUEST_MODE_SKIP
-   beq 89f
BEGIN_FTR_SECTION_NESTED(948)
ld  r10,\area+EX_PPR(r13)
std r10,HSTATE_PPR(r13)
@@ -461,6 +452,8 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
ori r12,r12,(\n)
/* This reloads r9 before branching to kvmppc_interrupt */
__BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt)
+
+   .if \skip
 89:mtocrf  0x80,r9
ld  r9,\area+EX_R9(r13)
ld  r10,\area+EX_R10(r13)
@@ -469,14 +462,13 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
.else
b   kvmppc_skip_interrupt
.endif
+   .endif
 .endm
 
 #else
 .macro KVMTEST hsrr, n
 .endm
-.macro KVM_HANDLER area, hsrr, n
-.endm
-.macro KVM_HANDLER_SKIP area, hsrr, n
+.macro KVM_HANDLER area, hsrr, n, skip
 .endm
 #endif
 
diff --git a/arch/powerpc/include/asm/head-64.h 
b/arch/powerpc/include/asm/head-64.h
index 4767d6c7b8fa..518d9758b41e 100644
--- a/arch/powerpc/include/asm/head-64.h
+++ b/arch/powerpc/include/asm/head-64.h
@@ -387,22 +387,22 @@ end_##sname:
 
 #define TRAMP_KVM(area, n) \
TRAMP_KVM_BEGIN(do_kvm_##n);\
-   KVM_HANDLER area, EXC_STD, n
+   KVM_HANDLER area, EXC_STD, n, 0
 
 #define TRAMP_KVM_SKIP(area, n)
\
TRAMP_KVM_BEGIN(do_kvm_##n);\
-   KVM_HANDLER_SKIP area, EXC_STD, n
+   KVM_HANDLER area, EXC_STD, n, 1
 
 /*
  * HV variant exceptions get the 0x2 bit added to their trap number.
  */
 #define TRAMP_KVM_HV(area, n)  \
TRAMP_KVM_BEGIN(do_kvm_H##n);   \
-   KVM_HANDLER area, EXC_HV, n + 0x2
+   KVM_HANDLER area, EXC_HV, n + 0x2, 0
 
 #define TRAMP_KVM_HV_SKIP(area, n) \
TRAMP_KVM_BEGIN(do_kvm_H##n);   \
-   KVM_HANDLER_SKIP area, EXC_HV, n + 0x2
+   KVM_HANDLER area, EXC_HV, n + 0x2, 1
 
 #define EXC_COMMON(name, realvec, hdlr)
\
EXC_COMMON_BEGIN(name); \
diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index 01f7bfe0653c..484d0710ca08 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1063,7 +1063,7 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00)
SET_SCRATCH0(r10)
std r9,PACA_EXGEN+EX_R9(r13)
mfcrr9
-   KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00
+   KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00, 0
 #endif
 
 
-- 
2.20.1



[PATCH v2 07/52] powerpc/64s/exception: consolidate maskable and non-maskable prologs

2019-06-19 Thread Nicholas Piggin
Conditionally expand the soft-masking test if a mask is passed in.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 113 +--
 arch/powerpc/kernel/exceptions-64s.S |  20 ++--
 2 files changed, 55 insertions(+), 78 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index e1b449e2c9ea..74ddcb37156c 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -234,7 +234,7 @@
 #define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
-   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ;   \
+   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\
EXCEPTION_PROLOG_2_VIRT label, hsrr
 
 /* Exception register prefixes */
@@ -305,73 +305,50 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
std r10,area+EX_R10(r13);   /* save r10 - r12 */\
OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR)
 
-#define __EXCEPTION_PROLOG_1_PRE(area) \
-   OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \
-   OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);  \
-   INTERRUPT_TO_KERNEL;\
-   SAVE_CTR(r10, area);\
+.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask
+   OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR)
+   OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR)
+   INTERRUPT_TO_KERNEL
+   SAVE_CTR(r10, \area\())
mfcrr9
-
-#define __EXCEPTION_PROLOG_1_POST(area)
\
-   std r11,area+EX_R11(r13);   \
-   std r12,area+EX_R12(r13);   \
-   GET_SCRATCH0(r10);  \
-   std r10,area+EX_R13(r13)
-
-/*
- * This version of the EXCEPTION_PROLOG_1 will carry
- * addition parameter called "bitmask" to support
- * checking of the interrupt maskable level.
- * Intended to be used in MASKABLE_EXCPETION_* macros.
- */
-.macro MASKABLE_EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask
-   __EXCEPTION_PROLOG_1_PRE(\area\())
.if \kvm
KVMTEST \hsrr \vec
.endif
-
-   lbz r10,PACAIRQSOFTMASK(r13)
-   andi.   r10,r10,\bitmask
-   /* This associates vector numbers with bits in paca->irq_happened */
-   .if \vec == 0x500 || \vec == 0xea0
-   li  r10,PACA_IRQ_EE
-   .elseif \vec == 0x900 || \vec == 0xea0
-   li  r10,PACA_IRQ_DEC
-   .elseif \vec == 0xa00 || \vec == 0xe80
-   li  r10,PACA_IRQ_DBELL
-   .elseif \vec == 0xe60
-   li  r10,PACA_IRQ_HMI
-   .elseif \vec == 0xf00
-   li  r10,PACA_IRQ_PMI
-   .else
-   .abort "Bad maskable vector"
+   .if \bitmask
+   lbz r10,PACAIRQSOFTMASK(r13)
+   andi.   r10,r10,\bitmask
+   /* Associate vector numbers with bits in paca->irq_happened */
+   .if \vec == 0x500 || \vec == 0xea0
+   li  r10,PACA_IRQ_EE
+   .elseif \vec == 0x900 || \vec == 0xea0
+   li  r10,PACA_IRQ_DEC
+   .elseif \vec == 0xa00 || \vec == 0xe80
+   li  r10,PACA_IRQ_DBELL
+   .elseif \vec == 0xe60
+   li  r10,PACA_IRQ_HMI
+   .elseif \vec == 0xf00
+   li  r10,PACA_IRQ_PMI
+   .else
+   .abort "Bad maskable vector"
+   .endif
+
+   .if \hsrr
+   bne masked_Hinterrupt
+   .else
+   bne masked_interrupt
+   .endif
.endif
 
-   .if \hsrr
-   bne masked_Hinterrupt
-   .else
-   bne masked_interrupt
-   .endif
-
-   __EXCEPTION_PROLOG_1_POST(\area\())
-.endm
-
-/*
- * This version of the EXCEPTION_PROLOG_1 is intended
- * to be used in STD_EXCEPTION* macros
- */
-.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec
-   __EXCEPTION_PROLOG_1_PRE(\area\())
-   .if \kvm
-   KVMTEST \hsrr \vec
-   .endif
-   __EXCEPTION_PROLOG_1_POST(\area\())
+   std r11,\area\()+EX_R11(r13)
+   std r12,\area\()+EX_R12(r13)
+   GET_SCRATCH0(r10)
+   std r10,\area\()+EX_R13(r13)
 .endm
 
 #define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec)  \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
-   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ;   \
+   EXCEPTION_PROLOG_1 hsrr, a

[PATCH v2 06/52] powerpc/64s/exception: remove the "extra" macro parameter

2019-06-19 Thread Nicholas Piggin
Rather than pass in the soft-masking and KVM tests via macro that is
passed to another macro to expand it, switch to usig gas macros and
conditionally expand the soft-masking and KVM tests.

The system reset with its idle test is open coded as it is a one-off.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 158 ++-
 arch/powerpc/kernel/exceptions-64s.S |  78 ++-
 2 files changed, 114 insertions(+), 122 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 4aef70defcdd..e1b449e2c9ea 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -231,10 +231,10 @@
  * rfid. Save CTR in case we're CONFIG_RELOCATABLE, in which case
  * EXCEPTION_PROLOG_2_VIRT will be using CTR.
  */
-#define EXCEPTION_RELON_PROLOG(area, label, hsrr, extra, vec)  \
+#define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
-   EXCEPTION_PROLOG_1(area, extra, vec);   \
+   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ;   \
EXCEPTION_PROLOG_2_VIRT label, hsrr
 
 /* Exception register prefixes */
@@ -321,31 +321,58 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 /*
  * This version of the EXCEPTION_PROLOG_1 will carry
  * addition parameter called "bitmask" to support
- * checking of the interrupt maskable level in the SOFTEN_TEST.
+ * checking of the interrupt maskable level.
  * Intended to be used in MASKABLE_EXCPETION_* macros.
  */
-#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) 
\
-   __EXCEPTION_PROLOG_1_PRE(area); \
-   extra(vec, bitmask);\
-   __EXCEPTION_PROLOG_1_POST(area)
+.macro MASKABLE_EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask
+   __EXCEPTION_PROLOG_1_PRE(\area\())
+   .if \kvm
+   KVMTEST \hsrr \vec
+   .endif
+
+   lbz r10,PACAIRQSOFTMASK(r13)
+   andi.   r10,r10,\bitmask
+   /* This associates vector numbers with bits in paca->irq_happened */
+   .if \vec == 0x500 || \vec == 0xea0
+   li  r10,PACA_IRQ_EE
+   .elseif \vec == 0x900 || \vec == 0xea0
+   li  r10,PACA_IRQ_DEC
+   .elseif \vec == 0xa00 || \vec == 0xe80
+   li  r10,PACA_IRQ_DBELL
+   .elseif \vec == 0xe60
+   li  r10,PACA_IRQ_HMI
+   .elseif \vec == 0xf00
+   li  r10,PACA_IRQ_PMI
+   .else
+   .abort "Bad maskable vector"
+   .endif
+
+   .if \hsrr
+   bne masked_Hinterrupt
+   .else
+   bne masked_interrupt
+   .endif
+
+   __EXCEPTION_PROLOG_1_POST(\area\())
+.endm
 
 /*
  * This version of the EXCEPTION_PROLOG_1 is intended
  * to be used in STD_EXCEPTION* macros
  */
-#define _EXCEPTION_PROLOG_1(area, extra, vec)  \
-   __EXCEPTION_PROLOG_1_PRE(area); \
-   extra(vec); \
-   __EXCEPTION_PROLOG_1_POST(area)
-
-#define EXCEPTION_PROLOG_1(area, extra, vec)   \
-   _EXCEPTION_PROLOG_1(area, extra, vec)
+.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec
+   __EXCEPTION_PROLOG_1_PRE(\area\())
+   .if \kvm
+   KVMTEST \hsrr \vec
+   .endif
+   __EXCEPTION_PROLOG_1_POST(\area\())
+.endm
 
-#define EXCEPTION_PROLOG(area, label, h, extra, vec)   \
+#define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec)  \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
-   EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2_REAL label, h, 1
+   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ;   \
+   EXCEPTION_PROLOG_2_REAL label, hsrr, 1
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
@@ -411,10 +438,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #endif
 
 /* Do not enable RI */
-#define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec)  \
+#define EXCEPTION_PROLOG_NORI(area, label, hsrr, kvm, vec) \
EXCEPTION_PROLOG_0(area);   \
-   EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2_REAL label, h, 0
+   EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ;   \
+   EXCEPTION_PROLOG_2_REAL label, hsrr, 0
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 .macro KVMTEST hsrr, n
@@ -476,8 +503,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 .endm
 #endif
 
-#define NOTEST(n)
-
 #define EXCEPTION_PROLOG_COMMON_1()  

[PATCH v2 05/52] powerpc/64s/exception: fix sreset KVM test code

2019-06-19 Thread Nicholas Piggin
The sreset handler KVM test theoretically should not depend on P7.
In practice KVM now only supports P7 and up so no real bug fix, but
this change is made now so the quirk is not propagated through
cleanup patches.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/kernel/exceptions-64s.S | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/kernel/exceptions-64s.S 
b/arch/powerpc/kernel/exceptions-64s.S
index bb286f7e1aee..b34d7a9acae6 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -126,10 +126,10 @@ EXC_VIRT_NONE(0x4000, 0x100)
bltlr   cr1 ;   /* no state loss, return to idle caller */  \
BRANCH_TO_C000(r10, system_reset_idle_common) ; \
 1: \
-   KVMTEST_PR(n) ; \
-   END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206)
+   END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) ;  \
+   KVMTEST_PR(n)
 #else
-#define IDLETEST NOTEST
+#define IDLETEST KVMTEST_PR
 #endif
 
 EXC_REAL_BEGIN(system_reset, 0x100, 0x100)
-- 
2.20.1



[PATCH v2 04/52] powerpc/64s/exception: move and tidy EXCEPTION_PROLOG_2 variants

2019-06-19 Thread Nicholas Piggin
- Re-name the macros to _REAL and _VIRT suffixes rather than no and
  _RELON suffix.

- Move the macro definitions together in the file.

- Move RELOCATABLE ifdef inside the _VIRT macro.

Further consolidation between variants does not buy much here.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 87 
 arch/powerpc/kernel/exceptions-64s.S | 18 ++---
 2 files changed, 51 insertions(+), 54 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 94c4992188a7..4aef70defcdd 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -170,8 +170,33 @@
ori reg,reg,(ABS_ADDR(label))@l;\
addis   reg,reg,(ABS_ADDR(label))@h
 
+.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri
+   ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
+   .if ! \set_ri
+   xorir10,r10,MSR_RI  /* Clear MSR_RI */
+   .endif
+   .if \hsrr
+   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
+   .else
+   mfspr   r11,SPRN_SRR0   /* save SRR0 */
+   .endif
+   LOAD_HANDLER(r12, \label\())
+   .if \hsrr
+   mtspr   SPRN_HSRR0,r12
+   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
+   mtspr   SPRN_HSRR1,r10
+   HRFI_TO_KERNEL
+   .else
+   mtspr   SPRN_SRR0,r12
+   mfspr   r12,SPRN_SRR1   /* and SRR1 */
+   mtspr   SPRN_SRR1,r10
+   RFI_TO_KERNEL
+   .endif
+   b   .   /* prevent speculative execution */
+.endm
+
+.macro EXCEPTION_PROLOG_2_VIRT label, hsrr
 #ifdef CONFIG_RELOCATABLE
-.macro EXCEPTION_PROLOG_2_RELON label, hsrr
.if \hsrr
mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
.else
@@ -187,10 +212,7 @@
li  r10,MSR_RI
mtmsrd  r10,1   /* Set RI (EE=0) */
bctr
-.endm
 #else
-/* If not relocatable, we can jump directly -- and save messing with LR */
-.macro EXCEPTION_PROLOG_2_RELON label, hsrr
.if \hsrr
mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
@@ -201,19 +223,19 @@
li  r10,MSR_RI
mtmsrd  r10,1   /* Set RI (EE=0) */
b   \label
-.endm
 #endif
+.endm
 
 /*
  * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to
- * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case
- * EXCEPTION_PROLOG_2_RELON will be using LR.
+ * rfid. Save CTR in case we're CONFIG_RELOCATABLE, in which case
+ * EXCEPTION_PROLOG_2_VIRT will be using CTR.
  */
 #define EXCEPTION_RELON_PROLOG(area, label, hsrr, extra, vec)  \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2_RELON label, hsrr
+   EXCEPTION_PROLOG_2_VIRT label, hsrr
 
 /* Exception register prefixes */
 #define EXC_HV 1
@@ -319,36 +341,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define EXCEPTION_PROLOG_1(area, extra, vec)   \
_EXCEPTION_PROLOG_1(area, extra, vec)
 
-.macro EXCEPTION_PROLOG_2 label, hsrr, set_ri
-   ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
-   .if ! \set_ri
-   xorir10,r10,MSR_RI  /* Clear MSR_RI */
-   .endif
-   .if \hsrr
-   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
-   .else
-   mfspr   r11,SPRN_SRR0   /* save SRR0 */
-   .endif
-   LOAD_HANDLER(r12,\label\())
-   .if \hsrr
-   mtspr   SPRN_HSRR0,r12
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   mtspr   SPRN_HSRR1,r10
-   HRFI_TO_KERNEL
-   .else
-   mtspr   SPRN_SRR0,r12
-   mfspr   r12,SPRN_SRR1   /* and SRR1 */
-   mtspr   SPRN_SRR1,r10
-   RFI_TO_KERNEL
-   .endif
-   b   .   /* prevent speculative execution */
-.endm
-
 #define EXCEPTION_PROLOG(area, label, h, extra, vec)   \
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2 label, h, 1
+   EXCEPTION_PROLOG_2_REAL label, h, 1
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
@@ -417,7 +414,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec)  \
EXCEPTION_PROLOG_0(area);   \
EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2 label, h, 0
+   EXCEPTION_PROLOG_2_REAL label, h, 0
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 .macro KVMTEST hsrr

[PATCH v2 03/52] powerpc/64s/exception: consolidate EXCEPTION_PROLOG_2 with _NORI variant

2019-06-19 Thread Nicholas Piggin
Switch to a gas macro that conditionally expands the RI clearing
instruction.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 43 ++--
 arch/powerpc/kernel/exceptions-64s.S | 12 +++
 2 files changed, 17 insertions(+), 38 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 1496e4089cee..94c4992188a7 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -319,32 +319,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define EXCEPTION_PROLOG_1(area, extra, vec)   \
_EXCEPTION_PROLOG_1(area, extra, vec)
 
-.macro EXCEPTION_PROLOG_2 label, hsrr
-   ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
-   .if \hsrr
-   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
-   .else
-   mfspr   r11,SPRN_SRR0   /* save SRR0 */
-   .endif
-   LOAD_HANDLER(r12,\label\())
-   .if \hsrr
-   mtspr   SPRN_HSRR0,r12
-   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
-   mtspr   SPRN_HSRR1,r10
-   HRFI_TO_KERNEL
-   .else
-   mtspr   SPRN_SRR0,r12
-   mfspr   r12,SPRN_SRR1   /* and SRR1 */
-   mtspr   SPRN_SRR1,r10
-   RFI_TO_KERNEL
-   .endif
-   b   .   /* prevent speculative execution */
-.endm
-
-/* _NORI variant keeps MSR_RI clear */
-.macro EXCEPTION_PROLOG_2_NORI label, hsrr
+.macro EXCEPTION_PROLOG_2 label, hsrr, set_ri
ld  r10,PACAKMSR(r13)   /* get MSR value for kernel */
+   .if ! \set_ri
xorir10,r10,MSR_RI  /* Clear MSR_RI */
+   .endif
.if \hsrr
mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
.else
@@ -369,7 +348,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2 label, h
+   EXCEPTION_PROLOG_2 label, h, 1
 
 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE
 /*
@@ -438,7 +417,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec)  \
EXCEPTION_PROLOG_0(area);   \
EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2_NORI label, h
+   EXCEPTION_PROLOG_2 label, h, 0
 
 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER
 .macro KVMTEST hsrr, n
@@ -595,14 +574,14 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define STD_EXCEPTION_OOL(vec, label)  \
EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec);\
-   EXCEPTION_PROLOG_2 label, EXC_STD
+   EXCEPTION_PROLOG_2 label, EXC_STD, 1
 
 #define STD_EXCEPTION_HV(loc, vec, label)  \
EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec)
 
 #define STD_EXCEPTION_HV_OOL(vec, label)   \
EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec);\
-   EXCEPTION_PROLOG_2 label, EXC_HV
+   EXCEPTION_PROLOG_2 label, EXC_HV, 1
 
 #define STD_RELON_EXCEPTION(loc, vec, label)   \
/* No guest interrupts come through here */ \
@@ -666,21 +645,21 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
SET_SCRATCH0(r13);/* save r13 */\
EXCEPTION_PROLOG_0(PACA_EXGEN); \
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask);   \
-   EXCEPTION_PROLOG_2 label, h
+   EXCEPTION_PROLOG_2 label, h, 1
 
 #define MASKABLE_EXCEPTION(vec, label, bitmask)
\
__MASKABLE_EXCEPTION(vec, label, EXC_STD, SOFTEN_TEST_PR, bitmask)
 
 #define MASKABLE_EXCEPTION_OOL(vec, label, bitmask)\
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\
-   EXCEPTION_PROLOG_2 label, EXC_STD
+   EXCEPTION_PROLOG_2 label, EXC_STD, 1
 
 #define MASKABLE_EXCEPTION_HV(vec, label, bitmask) \
__MASKABLE_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask)
 
 #define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\
-   EXCEPTION_PROLOG_2 label, EXC_HV
+   EXCEPTION_PROLOG_2 label, EXC_HV, 1
 
 #define __MASKABLE_RELON_EXCEPTION(vec, label, h, extra, bitmask)  \
SET_SCRATCH0(r13);/* save r13 */\
@@ -693,7 +672,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 
 #define MASKABLE_RELON_EXCEPTION_OOL(vec, label, bitmask)  \
MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, 
bitmask);\
-   EXCEPTION_PROLOG_2 label, EXC_STD
+   EXCEPTION_PROLOG_2 label, EXC_STD, 1
 
 #de

[PATCH v2 02/52] powerpc/64s/exception: remove H concatenation for EXC_HV variants

2019-06-19 Thread Nicholas Piggin
Replace all instances of this with gas macros that test the hsrr
parameter and use the appropriate register names / labels.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 333 +--
 arch/powerpc/include/asm/head-64.h   |   8 +-
 arch/powerpc/kernel/exceptions-64s.S |  97 ---
 3 files changed, 253 insertions(+), 185 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index d3987ce65857..1496e4089cee 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -63,6 +63,8 @@
  */
 #define EX_R3  EX_DAR
 
+#ifdef __ASSEMBLY__
+
 #define STF_ENTRY_BARRIER_SLOT \
STF_ENTRY_BARRIER_FIXUP_SECTION;\
nop;\
@@ -144,38 +146,6 @@
hrfid;  \
b   hrfi_flush_fallback
 
-#ifdef CONFIG_RELOCATABLE
-#define __EXCEPTION_PROLOG_2_RELON(label, h)   \
-   mfspr   r11,SPRN_##h##SRR0; /* save SRR0 */ \
-   LOAD_HANDLER(r12,label);\
-   mtctr   r12;\
-   mfspr   r12,SPRN_##h##SRR1; /* and SRR1 */  \
-   li  r10,MSR_RI; \
-   mtmsrd  r10,1;  /* Set RI (EE=0) */ \
-   bctr;
-#else
-/* If not relocatable, we can jump directly -- and save messing with LR */
-#define __EXCEPTION_PROLOG_2_RELON(label, h)   \
-   mfspr   r11,SPRN_##h##SRR0; /* save SRR0 */ \
-   mfspr   r12,SPRN_##h##SRR1; /* and SRR1 */  \
-   li  r10,MSR_RI; \
-   mtmsrd  r10,1;  /* Set RI (EE=0) */ \
-   b   label;
-#endif
-#define EXCEPTION_PROLOG_2_RELON(label, h) \
-   __EXCEPTION_PROLOG_2_RELON(label, h)
-
-/*
- * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to
- * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case
- * EXCEPTION_PROLOG_2_RELON will be using LR.
- */
-#define EXCEPTION_RELON_PROLOG(area, label, h, extra, vec) \
-   SET_SCRATCH0(r13);  /* save r13 */  \
-   EXCEPTION_PROLOG_0(area);   \
-   EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2_RELON(label, h)
-
 /*
  * We're short on space and time in the exception prolog, so we can't
  * use the normal LOAD_REG_IMMEDIATE macro to load the address of label.
@@ -200,9 +170,54 @@
ori reg,reg,(ABS_ADDR(label))@l;\
addis   reg,reg,(ABS_ADDR(label))@h
 
+#ifdef CONFIG_RELOCATABLE
+.macro EXCEPTION_PROLOG_2_RELON label, hsrr
+   .if \hsrr
+   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
+   .else
+   mfspr   r11,SPRN_SRR0   /* save SRR0 */
+   .endif
+   LOAD_HANDLER(r12, \label\())
+   mtctr   r12
+   .if \hsrr
+   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
+   .else
+   mfspr   r12,SPRN_SRR1   /* and HSRR1 */
+   .endif
+   li  r10,MSR_RI
+   mtmsrd  r10,1   /* Set RI (EE=0) */
+   bctr
+.endm
+#else
+/* If not relocatable, we can jump directly -- and save messing with LR */
+.macro EXCEPTION_PROLOG_2_RELON label, hsrr
+   .if \hsrr
+   mfspr   r11,SPRN_HSRR0  /* save HSRR0 */
+   mfspr   r12,SPRN_HSRR1  /* and HSRR1 */
+   .else
+   mfspr   r11,SPRN_SRR0   /* save SRR0 */
+   mfspr   r12,SPRN_SRR1   /* and SRR1 */
+   .endif
+   li  r10,MSR_RI
+   mtmsrd  r10,1   /* Set RI (EE=0) */
+   b   \label
+.endm
+#endif
+
+/*
+ * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to
+ * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case
+ * EXCEPTION_PROLOG_2_RELON will be using LR.
+ */
+#define EXCEPTION_RELON_PROLOG(area, label, hsrr, extra, vec)  \
+   SET_SCRATCH0(r13);  /* save r13 */  \
+   EXCEPTION_PROLOG_0(area);   \
+   EXCEPTION_PROLOG_1(area, extra, vec);   \
+   EXCEPTION_PROLOG_2_RELON label, hsrr
+
 /* Exception register prefixes */
-#define EXC_HV H
-#define EXC_STD
+#define EXC_HV 1
+#define EXC_STD0
 
 #if defined(CONFIG_RELOCATABLE)
 /*
@@ -304,43 +319,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define EXCEPTION_PROLOG_1(area, extra, vec)   \
_EXCEPTION_PROLOG_1

[PATCH v2 00/52] powerpc/64s interrupt handler cleanups, gasification

2019-06-19 Thread Nicholas Piggin
This contains the previous 28 series in front, with one small fix
mentioned in the last thread, plus one extra patch to remove
unused BRANCH_TO_COMMON macro, so first 29 up to SPR RAW scoreboard
are all quite minimal generated code change.

The next patches start to get a bit more into code change, starting
to mainly attack the "odd" handlers which deviate significantly
from the norm (sreset, machine check, still have pending work to
do on hmi). The aim is to simplify them and make them more regular.
That makes maintaining easier, and also reduces the need to have
a lot of special cases and splits in macros, which helps further
cleanup in future.

After patch 19 I should add it's so much more pleasant to hack on
this code, you don't have to rebuild practically the whole kernel
whenever you change anything.

Oh I also got a KUAP fix in there we should backport.

Thanks,
Nick

Nicholas Piggin (52):
  powerpc/64s/exception: fix line wrap and semicolon inconsistencies in
macros
  powerpc/64s/exception: remove H concatenation for EXC_HV variants
  powerpc/64s/exception: consolidate EXCEPTION_PROLOG_2 with _NORI
variant
  powerpc/64s/exception: move and tidy EXCEPTION_PROLOG_2 variants
  powerpc/64s/exception: fix sreset KVM test code
  powerpc/64s/exception: remove the "extra" macro parameter
  powerpc/64s/exception: consolidate maskable and non-maskable prologs
  powerpc/64s/exception: merge KVM handler and skip variants
  powerpc/64s/exception: KVM handler can set the HSRR trap bit
  powerpc/64s/exception: Make EXCEPTION_PROLOG_0 a gas macro for
consistency with others
  powerpc/64s/exception: Move EXCEPTION_COMMON handler and return
branches into callers
  powerpc/64s/exception: Move EXCEPTION_COMMON additions into callers
  powerpc/64s/exception: unwind exception-64s.h macros
  powerpc/64s/exception: improve 0x500 handler code
  powerpc/64s/exception: move EXCEPTION_PROLOG_2* to a more logical
place
  powerpc/64s/exception: remove STD_EXCEPTION_COMMON variants
  powerpc/64s/exception: move KVM related code together
  powerpc/64s/exception: move exception-64s.h code to exception-64s.S
where it is used
  powerpc/64s/exception: move head-64.h code to exception-64s.S where it
is used
  powerpc/64s/exception: remove __BRANCH_TO_KVM
  powerpc/64s/exception: remove unused BRANCH_TO_COMMON
  powerpc/64s/exception: use a gas macro for system call handler code
  powerpc/64s/exception: fix indenting irregularities
  powerpc/64s/exception: generate regs clear instructions using .rept
  powerpc/64s/exception: remove bad stack branch
  powerpc/64s/exception: remove pointless EXCEPTION_PROLOG macro
indirection
  powerpc/64s/exception: move paca save area offsets into
exception-64s.S
  powerpc/64s/exception: clean up system call entry
  powerpc/64s/exception: avoid SPR RAW scoreboard stall in real mode
entry
  powerpc/64s/exception: optimise system_reset for idle, clean up
non-idle case
  powerpc/64s/exception: mtmsrd L=1 cleanup
  powerpc/64s/exception: windup use r9 consistently to restore SPRs
  powerpc/64s/exception: move machine check windup in_mce handling
  powerpc/64s/exception: simplify hmi windup code
  powerpc/64s/exception: shuffle windup code around
  powerpc/64s/exception: use common macro for windup
  powerpc/64s/exception: add dar and dsisr options to exception macro
  powerpc/64s/exception: machine check use standard macros to save
dar/dsisr
  powerpc/64s/exception: denorm handler use standard scratch save macro
  powerpc/64s/exception: move SET_SCRATCH0 into EXCEPTION_PROLOG_0
  powerpc/tm: update comment about interrupt re-entrancy
  powerpc/64s/exception: machine check fwnmi does not trigger when in HV
mode
  powerpc/64s/exception: machine check early only runs in HV mode
  powerpc/64s/exception: separate pseries and powernv mce delivery paths
  powerpc/64s/exception: machine check windup restore cfar for host
delivery
  powerpc/64s/exception: fix machine check early should not set AMR
  powerpc/64s/exception: machine check restructure handler to be more
regular
  powerpc/64s/exception: simplify machine check early path
  powerpc/64s/exceptions: machine check move unrecoverable handling out
of line
  powerpc/64s/exception: untangle early machine check handler
  powerpc/64s/exception: machine check improve branch labels
  powerpc/64s/exception: add missing branch to self after RFI

 arch/powerpc/include/asm/exception-64s.h |  609 +---
 arch/powerpc/include/asm/head-64.h   |  204 +--
 arch/powerpc/include/asm/paca.h  |2 +
 arch/powerpc/kernel/asm-offsets.c|2 +
 arch/powerpc/kernel/exceptions-64s.S | 1764 ++
 arch/powerpc/kernel/tm.S |4 +-
 arch/powerpc/xmon/xmon.c |2 +
 7 files changed, 1161 insertions(+), 1426 deletions(-)

-- 
2.20.1



[PATCH v2 01/52] powerpc/64s/exception: fix line wrap and semicolon inconsistencies in macros

2019-06-19 Thread Nicholas Piggin
By convention, all lines should be separated by a semicolons. Last line
should have neither semicolon or line wrap.

No generated code change.

Signed-off-by: Nicholas Piggin 
---
 arch/powerpc/include/asm/exception-64s.h | 36 ++---
 arch/powerpc/include/asm/head-64.h   | 68 
 2 files changed, 52 insertions(+), 52 deletions(-)

diff --git a/arch/powerpc/include/asm/exception-64s.h 
b/arch/powerpc/include/asm/exception-64s.h
index 841a0be6c1b2..d3987ce65857 100644
--- a/arch/powerpc/include/asm/exception-64s.h
+++ b/arch/powerpc/include/asm/exception-64s.h
@@ -185,11 +185,11 @@
  */
 #define LOAD_HANDLER(reg, label)   \
ld  reg,PACAKBASE(r13); /* get high part of &label */   \
-   ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label);
+   ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label)
 
 #define __LOAD_HANDLER(reg, label) \
ld  reg,PACAKBASE(r13); \
-   ori reg,reg,(ABS_ADDR(label))@l;
+   ori reg,reg,(ABS_ADDR(label))@l
 
 /*
  * Branches from unrelocated code (e.g., interrupts) to labels outside
@@ -198,7 +198,7 @@
 #define __LOAD_FAR_HANDLER(reg, label) \
ld  reg,PACAKBASE(r13); \
ori reg,reg,(ABS_ADDR(label))@l;\
-   addis   reg,reg,(ABS_ADDR(label))@h;
+   addis   reg,reg,(ABS_ADDR(label))@h
 
 /* Exception register prefixes */
 #define EXC_HV H
@@ -273,7 +273,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR);  \
INTERRUPT_TO_KERNEL;\
SAVE_CTR(r10, area);\
-   mfcrr9;
+   mfcrr9
 
 #define __EXCEPTION_PROLOG_1_POST(area)
\
std r11,area+EX_R11(r13);   \
@@ -290,7 +290,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) 
\
__EXCEPTION_PROLOG_1_PRE(area); \
extra(vec, bitmask);\
-   __EXCEPTION_PROLOG_1_POST(area);
+   __EXCEPTION_PROLOG_1_POST(area)
 
 /*
  * This version of the EXCEPTION_PROLOG_1 is intended
@@ -299,7 +299,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define _EXCEPTION_PROLOG_1(area, extra, vec)  \
__EXCEPTION_PROLOG_1_PRE(area); \
extra(vec); \
-   __EXCEPTION_PROLOG_1_POST(area);
+   __EXCEPTION_PROLOG_1_POST(area)
 
 #define EXCEPTION_PROLOG_1(area, extra, vec)   \
_EXCEPTION_PROLOG_1(area, extra, vec)
@@ -307,7 +307,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define __EXCEPTION_PROLOG_2(label, h) \
ld  r10,PACAKMSR(r13);  /* get MSR value for kernel */  \
mfspr   r11,SPRN_##h##SRR0; /* save SRR0 */ \
-   LOAD_HANDLER(r12,label) \
+   LOAD_HANDLER(r12,label);\
mtspr   SPRN_##h##SRR0,r12; \
mfspr   r12,SPRN_##h##SRR1; /* and SRR1 */  \
mtspr   SPRN_##h##SRR1,r10; \
@@ -321,7 +321,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
ld  r10,PACAKMSR(r13);  /* get MSR value for kernel */  \
xorir10,r10,MSR_RI; /* Clear MSR_RI */  \
mfspr   r11,SPRN_##h##SRR0; /* save SRR0 */ \
-   LOAD_HANDLER(r12,label) \
+   LOAD_HANDLER(r12,label);\
mtspr   SPRN_##h##SRR0,r12; \
mfspr   r12,SPRN_##h##SRR1; /* and SRR1 */  \
mtspr   SPRN_##h##SRR1,r10; \
@@ -335,7 +335,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
SET_SCRATCH0(r13);  /* save r13 */  \
EXCEPTION_PROLOG_0(area);   \
EXCEPTION_PROLOG_1(area, extra, vec);   \
-   EXCEPTION_PROLOG_2(label, h);
+   EXCEPTION_PROLOG_2(label, h)
 
 #define __KVMTEST(h, n)
\
lbz r10,HSTATE_IN_GUEST(r13);   \
@@ -409,7 +409,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943)
 #define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec)  \
EXCEPTION_PROLOG_0(area);   \
EXCEPTION_

[PATCH v2] ocxl: Allow contexts to be attached with a NULL mm

2019-06-19 Thread Alastair D'Silva
From: Alastair D'Silva 

If an OpenCAPI context is to be used directly by a kernel driver, there
may not be a suitable mm to use.

The patch makes the mm parameter to ocxl_context_attach optional.

Signed-off-by: Alastair D'Silva 
---
 arch/powerpc/mm/book3s64/radix_tlb.c |  5 +
 drivers/misc/ocxl/context.c  |  9 ++---
 drivers/misc/ocxl/link.c | 28 
 3 files changed, 35 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c 
b/arch/powerpc/mm/book3s64/radix_tlb.c
index bb9835681315..ce8a77fae6a7 100644
--- a/arch/powerpc/mm/book3s64/radix_tlb.c
+++ b/arch/powerpc/mm/book3s64/radix_tlb.c
@@ -666,6 +666,11 @@ EXPORT_SYMBOL(radix__flush_tlb_page);
 #define radix__flush_all_mm radix__local_flush_all_mm
 #endif /* CONFIG_SMP */
 
+/*
+ * If kernel TLBIs ever become local rather than global, then
+ * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it
+ * assumes kernel TLBIs are global.
+ */
 void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end)
 {
_tlbie_pid(0, RIC_FLUSH_ALL);
diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index bab9c9364184..994563a078eb 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -69,6 +69,7 @@ static void xsl_fault_error(void *data, u64 addr, u64 dsisr)
 int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct 
*mm)
 {
int rc;
+   unsigned long pidr = 0;
 
// Locks both status & tidr
mutex_lock(&ctx->status_mutex);
@@ -77,9 +78,11 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, 
struct mm_struct *mm)
goto out;
}
 
-   rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
-   mm->context.id, ctx->tidr, amr, mm,
-   xsl_fault_error, ctx);
+   if (mm)
+   pidr = mm->context.id;
+
+   rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr,
+ amr, mm, xsl_fault_error, ctx);
if (rc)
goto out;
 
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index cce5b0d64505..58d111afd9f6 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -224,6 +224,17 @@ static irqreturn_t xsl_fault_handler(int irq, void *data)
ack_irq(spa, ADDRESS_ERROR);
return IRQ_HANDLED;
}
+
+   if (!pe_data->mm) {
+   /*
+* translation fault from a kernel context - an OpenCAPI
+* device tried to access a bad kernel address
+*/
+   rcu_read_unlock();
+   pr_warn("Unresolved OpenCAPI xsl fault in kernel context\n");
+   ack_irq(spa, ADDRESS_ERROR);
+   return IRQ_HANDLED;
+   }
WARN_ON(pe_data->mm->context.id != pid);
 
if (mmget_not_zero(pe_data->mm)) {
@@ -523,7 +534,13 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 
pidr, u32 tidr,
pe->amr = cpu_to_be64(amr);
pe->software_state = cpu_to_be32(SPA_PE_VALID);
 
-   mm_context_add_copro(mm);
+   /*
+* For user contexts, register a copro so that TLBIs are seen
+* by the nest MMU. If we have a kernel context, TLBIs are
+* already global.
+*/
+   if (mm)
+   mm_context_add_copro(mm);
/*
 * Barrier is to make sure PE is visible in the SPA before it
 * is used by the device. It also helps with the global TLBI
@@ -546,7 +563,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 
pidr, u32 tidr,
 * have a reference on mm_users. Incrementing mm_count solves
 * the problem.
 */
-   mmgrab(mm);
+   if (mm)
+   mmgrab(mm);
trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr);
 unlock:
mutex_unlock(&spa->spa_lock);
@@ -652,8 +670,10 @@ int ocxl_link_remove_pe(void *link_handle, int pasid)
if (!pe_data) {
WARN(1, "Couldn't find pe data when removing PE\n");
} else {
-   mm_context_remove_copro(pe_data->mm);
-   mmdrop(pe_data->mm);
+   if (pe_data->mm) {
+   mm_context_remove_copro(pe_data->mm);
+   mmdrop(pe_data->mm);
+   }
kfree_rcu(pe_data, rcu);
}
 unlock:
-- 
2.21.0



Re: [PATCH v2 3/6] powerpc/eeh: Improve debug messages around device addition

2019-06-19 Thread Oliver O'Halloran
On Thu, Jun 20, 2019 at 12:40 PM Alexey Kardashevskiy  wrote:
>
> On 19/06/2019 14:27, Sam Bobroff wrote:
> > On Tue, Jun 11, 2019 at 03:47:58PM +1000, Alexey Kardashevskiy wrote:
> >>
> >> On 07/05/2019 14:30, Sam Bobroff wrote:
> >>> Also remove useless comment.
> >>>
> >>> Signed-off-by: Sam Bobroff 
> >>> Reviewed-by: Alexey Kardashevskiy 
> >>> ---
> *snip*
> >
> > I can see that edev will be non-NULL here, but that pr_debug() pattern
> > (using the PDN information to form the PCI address) is quite common
> > across the EEH code, so I think rather than changing a couple of
> > specific cases, I should do a separate cleanup patch and introduce
> > something like pdn_debug(pdn, ""). What do you think?
>
> I'd switch them all to already existing dev_dbg/pci_debug rather than
> adding pdn_debug as imho it should not have been used in the first place
> really...
>
> > (I don't know exactly when edev->pdev can be NULL.)
>
> ... and if you switch to dev_dbg/pci_debug, I think quite soon you'll
> know if it can or cannot be NULL :)

As far as I can tell edev->pdev is NULL in two cases:

1. Before eeh_device_add_late() has been called on the pdev. The late
part of the add maps the pdev to an edev and sets the pdev's edev
pointer and vis a vis.
2. While recoverying EEH unaware devices. Unaware devices are
destroyed and rescanned and the edev->pdev pointer is cleared by
pcibios_device_release()

In most of these cases it should be safe to use the pci_*() functions
rather than making a new one up for printing pdns. In the cases where
we might not have a PCI dev i'd make a new set of prints that take an
EEH dev rather than a pci_dn since i'd like pci_dn to die sooner
rather than later.

Oliver


Re: [PATCH v2 3/6] powerpc/eeh: Improve debug messages around device addition

2019-06-19 Thread Alexey Kardashevskiy



On 19/06/2019 14:27, Sam Bobroff wrote:
> On Tue, Jun 11, 2019 at 03:47:58PM +1000, Alexey Kardashevskiy wrote:
>>
>>
>> On 07/05/2019 14:30, Sam Bobroff wrote:
>>> Also remove useless comment.
>>>
>>> Signed-off-by: Sam Bobroff 
>>> Reviewed-by: Alexey Kardashevskiy 
>>> ---
>>>  arch/powerpc/kernel/eeh.c|  2 +-
>>>  arch/powerpc/platforms/powernv/eeh-powernv.c | 14 
>>>  arch/powerpc/platforms/pseries/eeh_pseries.c | 23 +++-
>>>  3 files changed, 28 insertions(+), 11 deletions(-)
>>>
>>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
>>> index 8d3c36a1f194..b14d89547895 100644
>>> --- a/arch/powerpc/kernel/eeh.c
>>> +++ b/arch/powerpc/kernel/eeh.c
>>> @@ -1291,7 +1291,7 @@ void eeh_add_device_late(struct pci_dev *dev)
>>> pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn);
>>> edev = pdn_to_eeh_dev(pdn);
>>> if (edev->pdev == dev) {
>>> -   pr_debug("EEH: Already referenced !\n");
>>> +   pr_debug("EEH: Device %s already referenced!\n", pci_name(dev));
>>> return;
>>> }
>>>  
>>> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c 
>>> b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> index 6fc1a463b796..0e374cdba961 100644
>>> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c
>>> @@ -50,10 +50,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev)
>>> if (!pdev->is_virtfn)
>>> return;
>>>  
>>> -   /*
>>> -* The following operations will fail if VF's sysfs files
>>> -* aren't created or its resources aren't finalized.
>>> -*/
>>> +   pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
>>
>>
>> dev_dbg() seems more appropriate.
> 
> Oh! It does, or even pci_debug() :-)
> 
> I'll change it if I need to do another version, otherwise I'll clean it
> up later.
> 
>>> eeh_add_device_early(pdn);
>>> eeh_add_device_late(pdev);
>>> eeh_sysfs_add_device(pdev);
>>> @@ -397,6 +394,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void 
>>> *data)
>>> int ret;
>>> int config_addr = (pdn->busno << 8) | (pdn->devfn);
>>>  
>>> +   pr_debug("%s: probing %04x:%02x:%02x.%01x\n",
>>> +   __func__, hose->global_number, pdn->busno,
>>> +   PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>> +
>>> /*
>>>  * When probing the root bridge, which doesn't have any
>>>  * subordinate PCI devices. We don't have OF node for
>>> @@ -491,6 +492,11 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void 
>>> *data)
>>> /* Save memory bars */
>>> eeh_save_bars(edev);
>>>  
>>> +   pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n",
>>> +   __func__, pdn->busno, PCI_SLOT(pdn->devfn),
>>> +   PCI_FUNC(pdn->devfn), edev->pe->phb->global_number,
>>> +   edev->pe->addr);
>>> +
>>> return NULL;
>>>  }
>>>  
>>> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c 
>>> b/arch/powerpc/platforms/pseries/eeh_pseries.c
>>> index 7aa50258dd42..ae06878fbdea 100644
>>> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c
>>> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c
>>> @@ -65,6 +65,8 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev)
>>> if (!pdev->is_virtfn)
>>> return;
>>>  
>>> +   pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev));
>>> +
>>> pdn->device_id  =  pdev->device;
>>> pdn->vendor_id  =  pdev->vendor;
>>> pdn->class_code =  pdev->class;
>>> @@ -251,6 +253,10 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, 
>>> void *data)
>>> int enable = 0;
>>> int ret;
>>>  
>>> +   pr_debug("%s: probing %04x:%02x:%02x.%01x\n",
>>> +   __func__, pdn->phb->global_number, pdn->busno,
>>> +   PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn));
>>> +
>>> /* Retrieve OF node and eeh device */
>>> edev = pdn_to_eeh_dev(pdn);
>>> if (!edev || edev->pe)
>>> @@ -294,7 +300,12 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, 
>>> void *data)
>>>  
>>> /* Enable EEH on the device */
>>> ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE);
>>> -   if (!ret) {
>>> +   if (ret) {
>>> +   pr_debug("%s: EEH failed to enable on %02x:%02x.%01x 
>>> PHB#%x-PE#%x (code %d)\n",
>>> +   __func__, pdn->busno, PCI_SLOT(pdn->devfn),
>>> +   PCI_FUNC(pdn->devfn), pe.phb->global_number,
>>> +   pe.addr, ret);
>>> +   } else {
>>
>>
>> edev!=NULL here so you could do dev_dbg(&edev->pdev->dev,...) and skip
>> PCI_SLOT/PCI_FUNC. Or is (edev!=NULL && edev->pdev==NULL) possible (it
>> could be, just asking)?
> 
> I can see that edev will be non-NULL here, but that pr_debug() pattern
> (using the PDN information to form the PCI address) is quite common
> across the EEH code, so I think rather than changing a couple of
> specific cases, I should do a separate cleanup patch and introduce
> something like p

Re: [PATCH v2 01/10] powerpc/8xx: move CPM1 related files from sysdev/ to platforms/8xx

2019-06-19 Thread kbuild test robot
Hi Christophe,

Thank you for the patch! Yet something to improve:

[auto build test ERROR on powerpc/next]
[also build test ERROR on v5.2-rc5 next-20190619]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-8xx-move-CPM1-related-files-from-sysdev-to-platforms-8xx/20190613-184514
base:   https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next
config: powerpc-tqm8555_defconfig (attached as .config)
compiler: powerpc-linux-gcc (GCC) 7.4.0
reproduce:
wget 
https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O 
~/bin/make.cross
chmod +x ~/bin/make.cross
# save the attached .config to linux build tree
GCC_VERSION=7.4.0 make.cross ARCH=powerpc 

If you fix the issue, kindly add following tag
Reported-by: kbuild test robot 

All errors (new ones prefixed by >>):

>> make[3]: *** No rule to make target 'arch/powerpc/sysdev/cpm_gpio.o', needed 
>> by 'arch/powerpc/sysdev/built-in.a'.
   make[3]: Target '__build' not remade because of errors.

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


[PATCH 2/3] KVM: PPC: Book3S HV: Signed extend decrementer value if not using large decr

2019-06-19 Thread Suraj Jitindar Singh
On POWER9 the decrementer can operate in large decrementer mode where
the decrementer is 56 bits and signed extended to 64 bits. When not
operating in this mode the decrementer behaves as a 32 bit decrementer
which is NOT signed extended (as on POWER8).

Currently when reading a guest decrementer value we don't take into
account whether the large decrementer is enabled or not, and this means
the value will be incorrect when the guest is not using the large
decrementer. Fix this by sign extending the value read when the guest
isn't using the large decrementer.

Fixes: 95a6432ce903 "KVM: PPC: Book3S HV: Streamlined guest entry/exit path on 
P9 for radix guests"

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/kvm/book3s_hv.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index d3684509da35..719fd2529eec 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -3607,6 +3607,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 
time_limit,
 
vcpu->arch.slb_max = 0;
dec = mfspr(SPRN_DEC);
+   if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */
+   dec = (s32) dec;
tb = mftb();
vcpu->arch.dec_expires = dec + tb;
vcpu->cpu = -1;
-- 
2.13.6



[PATCH 3/3] KVM: PPC: Book3S HV: Clear pending decr exceptions on nested guest entry

2019-06-19 Thread Suraj Jitindar Singh
If we enter an L1 guest with a pending decrementer exception then this
is cleared on guest exit if the guest has writtien a positive value into
the decrementer (indicating that it handled the decrementer exception)
since there is no other way to detect that the guest has handled the
pending exception and that it should be dequeued. In the event that the
L1 guest tries to run a nested (L2) guest immediately after this and the
L2 guest decrementer is negative (which is loaded by L1 before making
the H_ENTER_NESTED hcall), then the pending decrementer exception
isn't cleared and the L2 entry is blocked since L1 has a pending
exception, even though L1 may have already handled the exception and
written a positive value for it's decrementer. This results in a loop of
L1 trying to enter the L2 guest and L0 blocking the entry since L1 has
an interrupt pending with the outcome being that L2 never gets to run
and hangs.

Fix this by clearing any pending decrementer exceptions when L1 makes
the H_ENTER_NESTED hcall since it won't do this if it's decrementer has
gone negative, and anyway it's decrementer has been communicated to L0
in the hdec_expires field and L0 will return control to L1 when this
goes negative by delivering an H_DECREMENTER exception.

Fixes: 95a6432ce903 "KVM: PPC: Book3S HV: Streamlined guest entry/exit path on 
P9 for radix guests"

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/kvm/book3s_hv.c | 11 +--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c
index 719fd2529eec..4a5eb29b952f 100644
--- a/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@ -4128,8 +4128,15 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run,
 
preempt_enable();
 
-   /* cancel pending decrementer exception if DEC is now positive */
-   if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu))
+   /*
+* cancel pending decrementer exception if DEC is now positive, or if
+* entering a nested guest in which case the decrementer is now owned
+* by L2 and the L1 decrementer is provided in hdec_expires
+*/
+   if (kvmppc_core_pending_dec(vcpu) &&
+   ((get_tb() < vcpu->arch.dec_expires) ||
+(trap == BOOK3S_INTERRUPT_SYSCALL &&
+ kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED)))
kvmppc_core_dequeue_dec(vcpu);
 
trace_kvm_guest_exit(vcpu);
-- 
2.13.6



[PATCH 1/3] KVM: PPC: Book3S HV: Invalidate ERAT when flushing guest TLB entries

2019-06-19 Thread Suraj Jitindar Singh
When a guest vcpu moves from one physical thread to another it is
necessary for the host to perform a tlb flush on the previous core if
another vcpu from the same guest is going to run there. This is because the
guest may use the local form of the tlb invalidation instruction meaning
stale tlb entries would persist where it previously ran. This is handled
on guest entry in kvmppc_check_need_tlb_flush() which calls
flush_guest_tlb() to perform the tlb flush.

Previously the generic radix__local_flush_tlb_lpid_guest() function was
used, however the functionality was reimplemented in flush_guest_tlb()
to avoid the trace_tlbie() call as the flushing may be done in real
mode. The reimplementation in flush_guest_tlb() was missing an erat
invalidation after flushing the tlb.

This lead to observable memory corruption in the guest due to the
caching of stale translations. Fix this by adding the erat invalidation.

Fixes: 70ea13f6e609 "KVM: PPC: Book3S HV: Flush TLB on secondary radix threads"

Signed-off-by: Suraj Jitindar Singh 
---
 arch/powerpc/kvm/book3s_hv_builtin.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c 
b/arch/powerpc/kvm/book3s_hv_builtin.c
index 6035d24f1d1d..a46286f73eec 100644
--- a/arch/powerpc/kvm/book3s_hv_builtin.c
+++ b/arch/powerpc/kvm/book3s_hv_builtin.c
@@ -833,6 +833,7 @@ static void flush_guest_tlb(struct kvm *kvm)
}
}
asm volatile("ptesync": : :"memory");
+   asm volatile(PPC_INVALIDATE_ERAT : : :"memory");
 }
 
 void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu,
-- 
2.13.6



Re: [PATCH 3/4] powerpc/powernv: remove dead NPU DMA code

2019-06-19 Thread Alexey Kardashevskiy



On 19/06/2019 17:28, Christoph Hellwig wrote:
> On Wed, Jun 19, 2019 at 10:34:54AM +1000, Alexey Kardashevskiy wrote:
>>
>>
>> On 23/05/2019 17:49, Christoph Hellwig wrote:
>>> None of these routines were ever used since they were added to the
>>> kernel.
>>
>>
>> It is still being used exactly in the way as it was explained before in
>> previous respins. Thanks.
> 
> Please point to the in-kernel user, because that is the only relevant
> one.  This is not just my opinion but we had a clear discussion on that
> at least years kernel summit.


There is no in-kernel user which still does not mean that the code is
dead. If it is irrelevant - put this to the commit log instead of saying
it is dead; also if there was a clear outcome from that discussion, then
please point me to that, I do not get to attend these discussions. Thanks,


-- 
Alexey


Re: [PATCH 0/2] Fix handling of h_set_dawr

2019-06-19 Thread Suraj Jitindar Singh
On Mon, 2019-06-17 at 11:06 +0200, Cédric Le Goater wrote:
> On 17/06/2019 09:16, Suraj Jitindar Singh wrote:
> > Series contains 2 patches to fix the host in kernel handling of the
> > hcall
> > h_set_dawr.
> > 
> > First patch from Michael Neuling is just a resend added here for
> > clarity.
> > 
> > Michael Neuling (1):
> >   KVM: PPC: Book3S HV: Fix r3 corruption in h_set_dabr()
> > 
> > Suraj Jitindar Singh (1):
> >   KVM: PPC: Book3S HV: Only write DAWR[X] when handling h_set_dawr
> > in
> > real mode
> 
> 
> 
> Reviewed-by: Cédric Le Goater 
> 
> and 
> 
> Tested-by: Cédric Le Goater 
> 
> 
> but I see slowdowns in nested as if the IPIs were not delivered. Have
> we
> touch this part in 5.2 ? 

Hi,

I've seen the same and tracked it down to decrementer exceptions not
being delivered when the guest is using large decrementer. I've got a
patch I'm about to send so I'll CC you.

Another option is to disable the large decrementer with:
-machine pseries,cap-large-decr=false

Thanks,
Suraj

> 
> Thanks,
> 
> C.
> 


Re: [PATCH v5 2/2] powerpc: Fix compile issue with force DAWR

2019-06-19 Thread Christophe Leroy




Le 19/06/2019 à 03:11, Michael Neuling a écrit :

On Tue, 2019-06-18 at 18:28 +0200, Christophe Leroy wrote:


Le 04/06/2019 à 05:00, Michael Neuling a écrit :

If you compile with KVM but without CONFIG_HAVE_HW_BREAKPOINT you fail
at linking with:
arch/powerpc/kvm/book3s_hv_rmhandlers.o:(.text+0x708): undefined
reference to `dawr_force_enable'

This was caused by commit c1fe190c0672 ("powerpc: Add force enable of
DAWR on P9 option").

This moves a bunch of code around to fix this. It moves a lot of the
DAWR code in a new file and creates a new CONFIG_PPC_DAWR to enable
compiling it.


After looking at all this once more, I'm just wondering: why are we
creating stuff specific to DAWR ?

In the old days, we only add DABR, and everything was named on DABR.
When DAWR was introduced some years ago we renamed stuff like do_dabr()
to do_break() so that we could regroup things together. And now we are
taking dawr() out of the rest. Why not keep dabr() stuff and dawr()
stuff all together in something dedicated to breakpoints, and try to
regroup all breakpoint stuff in a single place ? I see some
breakpointing stuff done in kernel/process.c and other things done in
hw_breakpoint.c, to common functions call from one file to the other,
preventing GCC to fully optimise, etc ...

Also, behing this thinking, I have the idea that we could easily
implement 512 bytes breakpoints on the 8xx too. The 8xx have neither
DABR nor DAWR, but is using a set of comparators. And as you can see in
the 8xx version of __set_dabr() in kernel/process.c, we emulate the DABR
behaviour by setting two comparators. By using the same comparators with
a different setup, we should be able to implement breakpoints on larger
ranges of address.


Christophe

I agree that their are opportunities to refactor this code and I appreciate your
efforts in making this code better but...

We have a problem here of not being able to compile an odd ball case that almost
no one ever hits (it was just an odd mpe CI case). We're up to v5 of a simple
fix which is just silly.

So let's get this fix in and move on to the whole bunch of refactoring we can do
in this code which is already documented in the github issue tracking.



Agreed.

I've filed the following issue to keep that in mind: 
https://github.com/linuxppc/issues/issues/251


Thanks
Christophe


Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-19 Thread Naveen N. Rao

Nicholas Piggin wrote:

Naveen N. Rao's on June 19, 2019 7:53 pm:

Nicholas Piggin wrote:

Michael Ellerman's on June 19, 2019 3:14 pm:


I'm also not convinced the ordering between the two patches is
guaranteed by the ISA, given that there's possibly no isync on the other
CPU.


Will they go through a context synchronizing event?

synchronize_rcu_tasks() should ensure a thread is scheduled away, but
I'm not actually sure it guarantees CSI if it's kernel->kernel. Could
do a smp_call_function to do the isync on each CPU to be sure.


Good point. Per 
Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU:
"The solution, in the form of Tasks RCU, is to have implicit read-side 
critical sections that are delimited by voluntary context switches, that 
is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In 
addition, transitions to and from userspace execution also delimit 
tasks-RCU read-side critical sections."


I suppose transitions to/from userspace, as well as calls to schedule() 
result in context synchronizing instruction being executed. But, if some 
tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't 
have a CSI executed.


Also:
"In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these 
APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), 
respectively."


In this scenario as well, I think we won't have a CSI executed in case 
of cond_resched().


Should we enhance patch_instruction() to handle that?


Well, not sure. Do we have many post-boot callers of it? Should
they take care of their own synchronization requirements?


Kprobes and ftrace are the two users (along with anything else that may 
use jump labels).


Looking at this from the CMODX perspective: the main example quoted of 
an erratic behavior is when any variant of the patched instruction 
causes an exception.


With ftrace, I think we are ok since we only ever patch a 'nop' or a 
'bl' (and the 'mflr' now), none of which should cause an exception. As 
such, the existing patch_instruction() should suffice.


However, with kprobes, we patch a 'trap' (or a branch in case of 
optprobes) on most instructions. I wonder if we should be issuing an 
'isync' on all cpus in this case. Or, even if that is sufficient or 
necessary.



Thanks,
Naveen




Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range

2019-06-19 Thread Christophe Leroy




Le 19/06/2019 à 05:59, Nicholas Piggin a écrit :

Christophe Leroy's on June 11, 2019 4:46 pm:



Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :

I would like to remove the early ioremap or make it into its own
function. Re-implement map_kernel_page with ioremap_page_range,
allow page tables that don't use slab to avoid the early check,
unbolt the hptes mapped in early boot, etc.


Getting early ioremap out of the picture is a very good idea, it will 
help making things more common between all platform types. Today we face 
the fact that PPC32 allocates early io from the top of memory while 
PPC64 allocates it from the bottom of memory.


Any idea on how to proceed ?

Christophe


[PATCH v3] KVM: PPC: Report single stepping capability

2019-06-19 Thread Fabiano Rosas
When calling the KVM_SET_GUEST_DEBUG ioctl, userspace might request
the next instruction to be single stepped via the
KVM_GUESTDBG_SINGLESTEP control bit of the kvm_guest_debug structure.

This patch adds the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability in order
to inform userspace about the state of single stepping support.

We currently don't have support for guest single stepping implemented
in Book3S HV so the capability is only present for Book3S PR and
BookE.

Signed-off-by: Fabiano Rosas 
---

v1 -> v2:
 - add capability description to Documentation/virtual/kvm/api.txt

v2 -> v3:
 - be explicit in the commit message about when the capability is
   present
 - remove unnecessary check for CONFIG_BOOKE

 Documentation/virtual/kvm/api.txt | 3 +++
 arch/powerpc/kvm/powerpc.c| 2 ++
 include/uapi/linux/kvm.h  | 1 +
 3 files changed, 6 insertions(+)

diff --git a/Documentation/virtual/kvm/api.txt 
b/Documentation/virtual/kvm/api.txt
index ba6c42c576dd..a77643bfa917 100644
--- a/Documentation/virtual/kvm/api.txt
+++ b/Documentation/virtual/kvm/api.txt
@@ -2969,6 +2969,9 @@ can be determined by querying the 
KVM_CAP_GUEST_DEBUG_HW_BPS and
 KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number
 indicating the number of supported registers.

+For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether
+the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported.
+
 When debug events exit the main run loop with the reason
 KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
 structure containing architecture specific debug information.
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 6d704ad2472b..bd0a73eaf7ba 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -527,6 +527,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext)
case KVM_CAP_IMMEDIATE_EXIT:
r = 1;
break;
+   case KVM_CAP_PPC_GUEST_DEBUG_SSTEP:
+   /* fall through */
case KVM_CAP_PPC_PAIRED_SINGLES:
case KVM_CAP_PPC_OSI:
case KVM_CAP_PPC_GET_PVINFO:
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 2fe12b40d503..cad9fcd90f39 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -993,6 +993,7 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_ARM_SVE 170
 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171
 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172
+#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 173

 #ifdef KVM_CAP_IRQ_ROUTING

--
2.20.1



Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix

2019-06-19 Thread Bharata B Rao
On Wed, Jun 19, 2019 at 02:36:54PM +0530, Aneesh Kumar K.V wrote:
> Bharata B Rao  writes:
> 
> > We hit the following BUG_ON when memory hotplugged before reboot
> > is unplugged after reboot:
> >
> > kernel BUG at arch/powerpc/mm/pgtable-frag.c:113!
> >
> >  remove_pagetable+0x594/0x6a0
> >  (unreliable)
> >  remove_pagetable+0x94/0x6a0
> >  vmemmap_free+0x394/0x410
> >  sparse_remove_one_section+0x26c/0x2e8
> >  __remove_pages+0x428/0x540
> >  arch_remove_memory+0xd0/0x170
> >  __remove_memory+0xd4/0x1a0
> >  dlpar_remove_lmb+0xbc/0x110
> >  dlpar_memory+0xa80/0xd20
> >  handle_dlpar_errorlog+0xa8/0x160
> >  pseries_hp_work_fn+0x2c/0x60
> >  process_one_work+0x46c/0x860
> >  worker_thread+0x364/0x5e0
> >  kthread+0x1b0/0x1c0
> >  ret_from_kernel_thread+0x5c/0x68
> >
> > This occurs because, during reboot-after-hotplug, the hotplugged
> > memory range gets initialized as regular memory and page
> > tables are setup using memblock allocator. This means that we
> > wouldn't have initialized the PMD or PTE fragment count for
> > those PMD or PTE pages.
> >
> > Fixing this includes 3 aspects:
> >
> > - Walk the init_mm page tables from mem_init() and initialize
> >   the PMD and PTE fragment counts appropriately.
> > - When we do early allocation of PMD (and PGD as well) pages,
> >   allocate in page size PAGE_SIZE granularity so that we are
> >   sure that the complete page is available for us to set the
> >   fragment count which is part of struct page.
> 
> 
> That is an important change now. For early page table we now allocate
> PAGE_SIZE tables and hencec we consider then as pages with fragment
> count 1. You also may want to explain here why.

Sure will make this clear in my next version.

> I guess the challenge is
> due to the fact that we can't clearly control how the rest of the page
> will get used and we are not sure they all will be allocated for backing
> page table pages.
> 
> > - When PMD or PTE page is freed, check if it comes from memblock
> >   allocator and free it appropriately.
> >
> > Reported-by: Srikanth Aithal 
> > Signed-off-by: Bharata B Rao 
> > ---
> >  arch/powerpc/include/asm/book3s/64/radix.h |  1 +
> >  arch/powerpc/include/asm/sparsemem.h   |  1 +
> >  arch/powerpc/mm/book3s64/pgtable.c | 12 +++-
> >  arch/powerpc/mm/book3s64/radix_pgtable.c   | 67 +-
> >  arch/powerpc/mm/mem.c  |  5 ++
> >  arch/powerpc/mm/pgtable-frag.c |  5 +-
> >  6 files changed, 87 insertions(+), 4 deletions(-)
> >
> > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> > b/arch/powerpc/include/asm/book3s/64/radix.h
> > index 574eca33f893..4320f2790e8d 100644
> > --- a/arch/powerpc/include/asm/book3s/64/radix.h
> > +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> > @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void)
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> >  int radix__create_section_mapping(unsigned long start, unsigned long end, 
> > int nid);
> >  int radix__remove_section_mapping(unsigned long start, unsigned long end);
> > +void radix__fixup_pgtable_fragments(void);
> >  #endif /* CONFIG_MEMORY_HOTPLUG */
> >  #endif /* __ASSEMBLY__ */
> >  #endif
> > diff --git a/arch/powerpc/include/asm/sparsemem.h 
> > b/arch/powerpc/include/asm/sparsemem.h
> > index 3192d454a733..e662f9232d35 100644
> > --- a/arch/powerpc/include/asm/sparsemem.h
> > +++ b/arch/powerpc/include/asm/sparsemem.h
> > @@ -15,6 +15,7 @@
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> >  extern int create_section_mapping(unsigned long start, unsigned long end, 
> > int nid);
> >  extern int remove_section_mapping(unsigned long start, unsigned long end);
> > +void fixup_pgtable_fragments(void);
> >
> >  #ifdef CONFIG_PPC_BOOK3S_64
> >  extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
> > diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> > b/arch/powerpc/mm/book3s64/pgtable.c
> > index 01bc9663360d..7efe9cc16b39 100644
> > --- a/arch/powerpc/mm/book3s64/pgtable.c
> > +++ b/arch/powerpc/mm/book3s64/pgtable.c
> > @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long 
> > start, unsigned long end)
> >
> > return hash__remove_section_mapping(start, end);
> >  }
> > +
> > +void fixup_pgtable_fragments(void)
> > +{
> > +   if (radix_enabled())
> > +   radix__fixup_pgtable_fragments();
> > +}
> > +
> >  #endif /* CONFIG_MEMORY_HOTPLUG */
> >
> >  void __init mmu_partition_table_init(void)
> > @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd)
> > BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
> > if (atomic_dec_and_test(&page->pt_frag_refcount)) {
> > pgtable_pmd_page_dtor(page);
> > -   __free_page(page);
> > +   if (PageReserved(page))
> > +   free_reserved_page(page);
> > +   else
> > +   __free_page(page);
> > }
> >  }
> >
> > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
> > b/arch/powerpc/mm/book3s64/radix_pg

Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix

2019-06-19 Thread Bharata B Rao
On Wed, Jun 19, 2019 at 08:17:01PM +1000, Nicholas Piggin wrote:
> Bharata B Rao's on June 19, 2019 5:45 pm:
> > We hit the following BUG_ON when memory hotplugged before reboot
> > is unplugged after reboot:
> > 
> > kernel BUG at arch/powerpc/mm/pgtable-frag.c:113!
> > 
> >  remove_pagetable+0x594/0x6a0
> >  (unreliable)
> >  remove_pagetable+0x94/0x6a0
> >  vmemmap_free+0x394/0x410
> >  sparse_remove_one_section+0x26c/0x2e8
> >  __remove_pages+0x428/0x540
> >  arch_remove_memory+0xd0/0x170
> >  __remove_memory+0xd4/0x1a0
> >  dlpar_remove_lmb+0xbc/0x110
> >  dlpar_memory+0xa80/0xd20
> >  handle_dlpar_errorlog+0xa8/0x160
> >  pseries_hp_work_fn+0x2c/0x60
> >  process_one_work+0x46c/0x860
> >  worker_thread+0x364/0x5e0
> >  kthread+0x1b0/0x1c0
> >  ret_from_kernel_thread+0x5c/0x68
> > 
> > This occurs because, during reboot-after-hotplug, the hotplugged
> > memory range gets initialized as regular memory and page
> > tables are setup using memblock allocator. This means that we
> > wouldn't have initialized the PMD or PTE fragment count for
> > those PMD or PTE pages.
> > 
> > Fixing this includes 3 aspects:
> > 
> > - Walk the init_mm page tables from mem_init() and initialize
> >   the PMD and PTE fragment counts appropriately.
> > - When we do early allocation of PMD (and PGD as well) pages,
> >   allocate in page size PAGE_SIZE granularity so that we are
> >   sure that the complete page is available for us to set the
> >   fragment count which is part of struct page.
> > - When PMD or PTE page is freed, check if it comes from memblock
> >   allocator and free it appropriately.
> > 
> > Reported-by: Srikanth Aithal 
> > Signed-off-by: Bharata B Rao 
> > ---
> >  arch/powerpc/include/asm/book3s/64/radix.h |  1 +
> >  arch/powerpc/include/asm/sparsemem.h   |  1 +
> >  arch/powerpc/mm/book3s64/pgtable.c | 12 +++-
> >  arch/powerpc/mm/book3s64/radix_pgtable.c   | 67 +-
> >  arch/powerpc/mm/mem.c  |  5 ++
> >  arch/powerpc/mm/pgtable-frag.c |  5 +-
> >  6 files changed, 87 insertions(+), 4 deletions(-)
> > 
> > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> > b/arch/powerpc/include/asm/book3s/64/radix.h
> > index 574eca33f893..4320f2790e8d 100644
> > --- a/arch/powerpc/include/asm/book3s/64/radix.h
> > +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> > @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void)
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> >  int radix__create_section_mapping(unsigned long start, unsigned long end, 
> > int nid);
> >  int radix__remove_section_mapping(unsigned long start, unsigned long end);
> > +void radix__fixup_pgtable_fragments(void);
> >  #endif /* CONFIG_MEMORY_HOTPLUG */
> >  #endif /* __ASSEMBLY__ */
> >  #endif
> > diff --git a/arch/powerpc/include/asm/sparsemem.h 
> > b/arch/powerpc/include/asm/sparsemem.h
> > index 3192d454a733..e662f9232d35 100644
> > --- a/arch/powerpc/include/asm/sparsemem.h
> > +++ b/arch/powerpc/include/asm/sparsemem.h
> > @@ -15,6 +15,7 @@
> >  #ifdef CONFIG_MEMORY_HOTPLUG
> >  extern int create_section_mapping(unsigned long start, unsigned long end, 
> > int nid);
> >  extern int remove_section_mapping(unsigned long start, unsigned long end);
> > +void fixup_pgtable_fragments(void);
> >  
> >  #ifdef CONFIG_PPC_BOOK3S_64
> >  extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
> > diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> > b/arch/powerpc/mm/book3s64/pgtable.c
> > index 01bc9663360d..7efe9cc16b39 100644
> > --- a/arch/powerpc/mm/book3s64/pgtable.c
> > +++ b/arch/powerpc/mm/book3s64/pgtable.c
> > @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long 
> > start, unsigned long end)
> >  
> > return hash__remove_section_mapping(start, end);
> >  }
> > +
> > +void fixup_pgtable_fragments(void)
> > +{
> > +   if (radix_enabled())
> > +   radix__fixup_pgtable_fragments();
> > +}
> > +
> >  #endif /* CONFIG_MEMORY_HOTPLUG */
> >  
> >  void __init mmu_partition_table_init(void)
> > @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd)
> > BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
> > if (atomic_dec_and_test(&page->pt_frag_refcount)) {
> > pgtable_pmd_page_dtor(page);
> > -   __free_page(page);
> > +   if (PageReserved(page))
> > +   free_reserved_page(page);
> 
> Hmm. Rather than adding this special case here, I wonder if you can
> just go along in your fixup walk and convert all these pages to
> non-reserved pages?
> 
> ClearPageReserved ; init_page_count ; adjust_managed_page_count ; 
> should do the trick, right?

Yes, that should. We are anyway fixing the frag count during
the walk, might as well do all the above too and avoid the special
case in the free path.

Regards,
Bharata.



Re: [PATCH] ocxl: Update for AFU descriptor template version 1.1

2019-06-19 Thread christophe lombard

On 05/06/2019 13:15, Frederic Barrat wrote:

From: Alastair D'Silva 

The OpenCAPI discovery and configuration specification has been
updated and introduces version 1.1 of the AFU descriptor template,
with new fields to better define the memory layout of an OpenCAPI
adapter.

The ocxl driver doesn't do much yet to support LPC memory but as we
start seeing (non-LPC) AFU images using the new template, this patch
updates the config space parsing code to avoid spitting a warning.

Signed-off-by: Alastair D'Silva 
Signed-off-by: Frederic Barrat 
---



The content of the patch sounds good. Thanks.

Reviewed-by: Christophe Lombard 



Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac

2019-06-19 Thread Mathieu Malaterre
On Wed, Jun 19, 2019 at 4:18 PM Benjamin Herrenschmidt
 wrote:
>
> On Wed, 2019-06-19 at 22:32 +1000, Michael Ellerman wrote:
> > Christoph Hellwig  writes:
> > > Any chance this could get picked up to fix the regression?
> >
> > Was hoping Ben would Ack it. He's still powermac maintainer :)
> >
> > I guess he OK'ed it in the other thread, will add it to my queue.
>
> Yeah ack. If I had written it myself, I would have made the DMA bits a
> variable and only set it down to 30 if I see that device in the DT
> early on, but I can't be bothered now, if it works, ship it :-)
>
> Note: The patch affects all ppc32, though I don't think it will cause
> any significant issue on those who don't need it.

Thanks, that answer my earlier question.

> Cheers,
> Ben.
>
> > cheers
> >
> > > On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote:
> > > > With the strict dma mask checking introduced with the switch to
> > > > the generic DMA direct code common wifi chips on 32-bit
> > > > powerbooks
> > > > stopped working.  Add a 30-bit ZONE_DMA to the 32-bit pmac builds
> > > > to allow them to reliably allocate dma coherent memory.
> > > >
> > > > Fixes: 65a21b71f948 ("powerpc/dma: remove
> > > > dma_nommu_dma_supported")
> > > > Reported-by: Aaro Koskinen 
> > > > Signed-off-by: Christoph Hellwig 
> > > > ---
> > > >  arch/powerpc/include/asm/page.h | 7 +++
> > > >  arch/powerpc/mm/mem.c   | 3 ++-
> > > >  arch/powerpc/platforms/powermac/Kconfig | 1 +
> > > >  3 files changed, 10 insertions(+), 1 deletion(-)
> > > >
> > > > diff --git a/arch/powerpc/include/asm/page.h
> > > > b/arch/powerpc/include/asm/page.h
> > > > index b8286a2013b4..0d52f57fca04 100644
> > > > --- a/arch/powerpc/include/asm/page.h
> > > > +++ b/arch/powerpc/include/asm/page.h
> > > > @@ -319,6 +319,13 @@ struct vm_area_struct;
> > > >  #endif /* __ASSEMBLY__ */
> > > >  #include 
> > > >
> > > > +/*
> > > > + * Allow 30-bit DMA for very limited Broadcom wifi chips on many
> > > > powerbooks.
> > > > + */
> > > > +#ifdef CONFIG_PPC32
> > > > +#define ARCH_ZONE_DMA_BITS 30
> > > > +#else
> > > >  #define ARCH_ZONE_DMA_BITS 31
> > > > +#endif
> > > >
> > > >  #endif /* _ASM_POWERPC_PAGE_H */
> > > > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > > > index cba29131bccc..2540d3b2588c 100644
> > > > --- a/arch/powerpc/mm/mem.c
> > > > +++ b/arch/powerpc/mm/mem.c
> > > > @@ -248,7 +248,8 @@ void __init paging_init(void)
> > > >  (long int)((top_of_ram - total_ram) >> 20));
> > > >
> > > >  #ifdef CONFIG_ZONE_DMA
> > > > - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL
> > > > >> PAGE_SHIFT);
> > > > + max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
> > > > + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >>
> > > > PAGE_SHIFT);
> > > >  #endif
> > > >   max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
> > > >  #ifdef CONFIG_HIGHMEM
> > > > diff --git a/arch/powerpc/platforms/powermac/Kconfig
> > > > b/arch/powerpc/platforms/powermac/Kconfig
> > > > index f834a19ed772..c02d8c503b29 100644
> > > > --- a/arch/powerpc/platforms/powermac/Kconfig
> > > > +++ b/arch/powerpc/platforms/powermac/Kconfig
> > > > @@ -7,6 +7,7 @@ config PPC_PMAC
> > > >   select PPC_INDIRECT_PCI if PPC32
> > > >   select PPC_MPC106 if PPC32
> > > >   select PPC_NATIVE
> > > > + select ZONE_DMA if PPC32
> > > >   default y
> > > >
> > > >  config PPC_PMAC64
> > > > --
> > > > 2.20.1
> > >
> > > ---end quoted text---
>


Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac

2019-06-19 Thread Benjamin Herrenschmidt
On Wed, 2019-06-19 at 22:32 +1000, Michael Ellerman wrote:
> Christoph Hellwig  writes:
> > Any chance this could get picked up to fix the regression?
> 
> Was hoping Ben would Ack it. He's still powermac maintainer :)
> 
> I guess he OK'ed it in the other thread, will add it to my queue.

Yeah ack. If I had written it myself, I would have made the DMA bits a
variable and only set it down to 30 if I see that device in the DT
early on, but I can't be bothered now, if it works, ship it :-)

Note: The patch affects all ppc32, though I don't think it will cause
any significant issue on those who don't need it.

Cheers,
Ben.

> cheers
> 
> > On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote:
> > > With the strict dma mask checking introduced with the switch to
> > > the generic DMA direct code common wifi chips on 32-bit
> > > powerbooks
> > > stopped working.  Add a 30-bit ZONE_DMA to the 32-bit pmac builds
> > > to allow them to reliably allocate dma coherent memory.
> > > 
> > > Fixes: 65a21b71f948 ("powerpc/dma: remove
> > > dma_nommu_dma_supported")
> > > Reported-by: Aaro Koskinen 
> > > Signed-off-by: Christoph Hellwig 
> > > ---
> > >  arch/powerpc/include/asm/page.h | 7 +++
> > >  arch/powerpc/mm/mem.c   | 3 ++-
> > >  arch/powerpc/platforms/powermac/Kconfig | 1 +
> > >  3 files changed, 10 insertions(+), 1 deletion(-)
> > > 
> > > diff --git a/arch/powerpc/include/asm/page.h
> > > b/arch/powerpc/include/asm/page.h
> > > index b8286a2013b4..0d52f57fca04 100644
> > > --- a/arch/powerpc/include/asm/page.h
> > > +++ b/arch/powerpc/include/asm/page.h
> > > @@ -319,6 +319,13 @@ struct vm_area_struct;
> > >  #endif /* __ASSEMBLY__ */
> > >  #include 
> > >  
> > > +/*
> > > + * Allow 30-bit DMA for very limited Broadcom wifi chips on many
> > > powerbooks.
> > > + */
> > > +#ifdef CONFIG_PPC32
> > > +#define ARCH_ZONE_DMA_BITS 30
> > > +#else
> > >  #define ARCH_ZONE_DMA_BITS 31
> > > +#endif
> > >  
> > >  #endif /* _ASM_POWERPC_PAGE_H */
> > > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> > > index cba29131bccc..2540d3b2588c 100644
> > > --- a/arch/powerpc/mm/mem.c
> > > +++ b/arch/powerpc/mm/mem.c
> > > @@ -248,7 +248,8 @@ void __init paging_init(void)
> > >  (long int)((top_of_ram - total_ram) >> 20));
> > >  
> > >  #ifdef CONFIG_ZONE_DMA
> > > - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL
> > > >> PAGE_SHIFT);
> > > + max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
> > > + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >>
> > > PAGE_SHIFT);
> > >  #endif
> > >   max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
> > >  #ifdef CONFIG_HIGHMEM
> > > diff --git a/arch/powerpc/platforms/powermac/Kconfig
> > > b/arch/powerpc/platforms/powermac/Kconfig
> > > index f834a19ed772..c02d8c503b29 100644
> > > --- a/arch/powerpc/platforms/powermac/Kconfig
> > > +++ b/arch/powerpc/platforms/powermac/Kconfig
> > > @@ -7,6 +7,7 @@ config PPC_PMAC
> > >   select PPC_INDIRECT_PCI if PPC32
> > >   select PPC_MPC106 if PPC32
> > >   select PPC_NATIVE
> > > + select ZONE_DMA if PPC32
> > >   default y
> > >  
> > >  config PPC_PMAC64
> > > -- 
> > > 2.20.1
> > 
> > ---end quoted text---



[RFC 00/11] opencapi: enable card reset and link retraining

2019-06-19 Thread Frederic Barrat
This is the linux part of the work to use the PCI hotplug framework to
control an opencapi card so that it can be reset and re-read after
flashing a new FPGA image.

It needs support in skiboot:
http://patchwork.ozlabs.org/project/skiboot/list/?series=114803
On an old skiboot, it will do nothing.

A virtual PCI slot is created for the opencapi adapter, and its state
can be controlled through the pnv-php hotplug driver:

  echo 0|1 > /sys/bus/pci/slots/OPENCAPI-<...>/power

Note that the power to the card is not really turned off, as the card
needs to stay on to be flashed with a new image. Instead the card is
placed in reset.

The first part of the series mostly deals with the pci/ioda state, as
the devices can now go away and the state needs to be cleaned up.

The second part is modifications to the hotplug driver on powernv, so
that a virtual slot is created for the opencapi adapters found in the
device tree



Frederic Barrat (11):
  powerpc/powernv/ioda: Fix ref count for devices with their own PE
  powerpc/powernv/ioda: Protect PE list
  powerpc/powernv/ioda: set up PE on opencapi device when enabling
  powerpc/powernv/ioda: Release opencapi device
  powerpc/powernv/ioda: Find opencapi slot for a device node
  pci/hotplug/pnv-php: Remove erroneous warning
  pci/hotplug/pnv-php: Improve error msg on power state change failure
  pci/hotplug/pnv-php: Register opencapi slots
  pci/hotplug/pnv-php: Relax check when disabling slot
  pci/hotplug/pnv-php: Wrap warnings in macro
  ocxl: Add PCI hotplug dependency to Kconfig

 arch/powerpc/include/asm/pnv-pci.h|   1 +
 arch/powerpc/platforms/powernv/pci-ioda.c | 106 ++
 arch/powerpc/platforms/powernv/pci.c  |  10 +-
 drivers/misc/ocxl/Kconfig |   1 +
 drivers/pci/hotplug/pnv_php.c |  66 --
 5 files changed, 115 insertions(+), 69 deletions(-)

-- 
2.21.0



[RFC 07/11] pci/hotplug/pnv-php: Improve error msg on power state change failure

2019-06-19 Thread Frederic Barrat
When changing the slot state, if opal hits an error and tells as such
in the asynchronous reply, the warning "Wrong msg" is logged, which is
rather confusing. Instead we can reuse the better message which is
already used when we couldn't submit the asynchronous opal request
initially.

Signed-off-by: Frederic Barrat 
---
 drivers/pci/hotplug/pnv_php.c | 16 +++-
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 5b5cbf1e636d..5cdd2a3a4dd9 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -336,18 +336,19 @@ int pnv_php_set_slot_power_state(struct hotplug_slot 
*slot,
ret = pnv_pci_set_power_state(php_slot->id, state, &msg);
if (ret > 0) {
if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle ||
-   be64_to_cpu(msg.params[2]) != state ||
-   be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) {
+   be64_to_cpu(msg.params[2]) != state) {
pci_warn(php_slot->pdev, "Wrong msg (%lld, %lld, 
%lld)\n",
 be64_to_cpu(msg.params[1]),
 be64_to_cpu(msg.params[2]),
 be64_to_cpu(msg.params[3]));
return -ENOMSG;
}
+   if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) {
+   ret = -ENODEV;
+   goto error;
+   }
} else if (ret < 0) {
-   pci_warn(php_slot->pdev, "Error %d powering %s\n",
-ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
-   return ret;
+   goto error;
}
 
if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE)
@@ -356,6 +357,11 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
ret = pnv_php_add_devtree(php_slot);
 
return ret;
+
+error:
+   pci_warn(php_slot->pdev, "Error %d powering %s\n",
+   ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
+   return ret;
 }
 EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state);
 
-- 
2.21.0



[RFC 11/11] ocxl: Add PCI hotplug dependency to Kconfig

2019-06-19 Thread Frederic Barrat
The PCI hotplug framework is used to update the devices when a new
image is written to the FPGA.

Signed-off-by: Frederic Barrat 
---
 drivers/misc/ocxl/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig
index 7fb6d39d4c5a..13a5d9f30369 100644
--- a/drivers/misc/ocxl/Kconfig
+++ b/drivers/misc/ocxl/Kconfig
@@ -12,6 +12,7 @@ config OCXL
tristate "OpenCAPI coherent accelerator support"
depends on PPC_POWERNV && PCI && EEH
select OCXL_BASE
+   select HOTPLUG_PCI_POWERNV
default m
help
  Select this option to enable the ocxl driver for Open
-- 
2.21.0



[RFC 06/11] pci/hotplug/pnv-php: Remove erroneous warning

2019-06-19 Thread Frederic Barrat
On powernv, when removing a device through hotplug, the following
warning is logged:

 Invalid refcount <.> on <...>

It may be incorrect, the refcount may be set to a higher value than 1
and be valid. of_detach_node() can drop more than one reference. As it
doesn't seem trivial to assert the correct value, let's remove the
warning.

Signed-off-by: Frederic Barrat 
---
 drivers/pci/hotplug/pnv_php.c | 6 --
 1 file changed, 6 deletions(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 6758fd7c382e..5b5cbf1e636d 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -151,17 +151,11 @@ static void pnv_php_rmv_pdns(struct device_node *dn)
 static void pnv_php_detach_device_nodes(struct device_node *parent)
 {
struct device_node *dn;
-   int refcount;
 
for_each_child_of_node(parent, dn) {
pnv_php_detach_device_nodes(dn);
 
of_node_put(dn);
-   refcount = kref_read(&dn->kobj.kref);
-   if (refcount != 1)
-   pr_warn("Invalid refcount %d on <%pOF>\n",
-   refcount, dn);
-
of_detach_node(dn);
}
 }
-- 
2.21.0



[RFC 04/11] powerpc/powernv/ioda: Release opencapi device

2019-06-19 Thread Frederic Barrat
With hotplug, an opencapi device can now go away. It needs to be
released, mostly to clean up its PE state. We were previously not
defining any device callback. We can reuse the standard PCI release
callback, it does a bit too much for an opencapi device, but it's
harmless, and only needs minor tuning.

Also separate the undo of the PELT-V code in a separate function, it
is not needed for NPU devices and it improves a bit the readability of
the code.

Signed-off-by: Frederic Barrat 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 58 +++
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2cf06fb98978..33054d00b2c5 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -186,7 +186,7 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe)
unsigned int pe_num = pe->pe_number;
 
WARN_ON(pe->pdev);
-   WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */
+   WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */
kfree(pe->npucomp);
memset(pe, 0, sizeof(struct pnv_ioda_pe));
clear_bit(pe_num, phb->ioda.pe_alloc);
@@ -775,6 +775,33 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb,
return 0;
 }
 
+static void pnv_ioda_unset_peltv(struct pnv_phb *phb,
+struct pnv_ioda_pe *pe,
+struct pci_dev *parent)
+{
+   int64_t rc;
+
+   while (parent) {
+   struct pci_dn *pdn = pci_get_pdn(parent);
+   if (pdn && pdn->pe_number != IODA_INVALID_PE) {
+   rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
+   pe->pe_number, 
OPAL_REMOVE_PE_FROM_DOMAIN);
+   /* XXX What to do in case of error ? */
+   }
+   parent = parent->bus->self;
+   }
+
+   opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
+   OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+
+   /* Disassociate PE in PELT */
+   rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
+   pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
+   if (rc)
+   pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
+
+}
+
 static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe)
 {
struct pci_dev *parent;
@@ -825,25 +852,13 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, 
struct pnv_ioda_pe *pe)
for (rid = pe->rid; rid < rid_end; rid++)
phb->ioda.pe_rmap[rid] = IODA_INVALID_PE;
 
-   /* Release from all parents PELT-V */
-   while (parent) {
-   struct pci_dn *pdn = pci_get_pdn(parent);
-   if (pdn && pdn->pe_number != IODA_INVALID_PE) {
-   rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number,
-   pe->pe_number, 
OPAL_REMOVE_PE_FROM_DOMAIN);
-   /* XXX What to do in case of error ? */
-   }
-   parent = parent->bus->self;
-   }
-
-   opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number,
- OPAL_EEH_ACTION_CLEAR_FREEZE_ALL);
+   /*
+* Release from all parents PELT-V. NPUs don't have a PELTV
+* table
+*/
+   if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI)
+   pnv_ioda_unset_peltv(phb, pe, parent);
 
-   /* Disassociate PE in PELT */
-   rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number,
-   pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN);
-   if (rc)
-   pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc);
rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid,
 bcomp, dcomp, fcomp, OPAL_UNMAP_PE);
if (rc)
@@ -3528,6 +3543,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
case PNV_PHB_IODA2:
pnv_pci_ioda2_release_pe_dma(pe);
break;
+   case PNV_PHB_NPU_OCAPI:
+   break;
default:
WARN_ON(1);
}
@@ -3580,7 +3597,7 @@ static void pnv_pci_release_device(struct pci_dev *pdev)
pe = &phb->ioda.pe_array[pdn->pe_number];
pdn->pe_number = IODA_INVALID_PE;
 
-   WARN_ON(--pe->device_count < 0);
+   WARN_ON((pe->flags != PNV_IODA_PE_DEV) && (--pe->device_count < 0));
if (pe->device_count == 0)
pnv_ioda_release_pe(pe);
 }
@@ -3629,6 +3646,7 @@ static const struct pci_controller_ops 
pnv_npu_ioda_controller_ops = {
 
 static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
.enable_device_hook = pnv_ocapi_enable_device_hook,
+   .release_device = pnv_pci_release_

[RFC 08/11] pci/hotplug/pnv-php: Register opencapi slots

2019-06-19 Thread Frederic Barrat
Add the opencapi PHBs to the list of PHBs being scanned to look for
slots.

Signed-off-by: Frederic Barrat 
---
 drivers/pci/hotplug/pnv_php.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 5cdd2a3a4dd9..f9c624334ef7 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -954,7 +954,8 @@ static int __init pnv_php_init(void)
pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n");
for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
pnv_php_register(dn);
-
+   for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
+   pnv_php_register_one(dn);
return 0;
 }
 
@@ -964,6 +965,8 @@ static void __exit pnv_php_exit(void)
 
for_each_compatible_node(dn, NULL, "ibm,ioda2-phb")
pnv_php_unregister(dn);
+   for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb")
+   pnv_php_unregister(dn);
 }
 
 module_init(pnv_php_init);
-- 
2.21.0



[RFC 10/11] pci/hotplug/pnv-php: Wrap warnings in macro

2019-06-19 Thread Frederic Barrat
An opencapi slot doesn't have an associated bridge device. It's not
needed for operation, but any warning is displayed through pci_warn()
which uses the pci_dev struct of the assocated bridge device. So wrap
those warning so that a different trace mechanism can be used if it's
an opencapi slot.

Signed-off-by: Frederic Barrat 
---
 drivers/pci/hotplug/pnv_php.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index 74b62a8e11e7..08ac8f0df06c 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -18,6 +18,9 @@
 #define DRIVER_AUTHOR  "Gavin Shan, IBM Corporation"
 #define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver"
 
+#define SLOT_WARN(slot, x...) \
+   (slot->pdev ? pci_warn(slot->pdev, x) : dev_warn(&slot->bus->dev, x))
+
 struct pnv_php_event {
booladded;
struct pnv_php_slot *php_slot;
@@ -265,7 +268,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot 
*php_slot)
 
ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x1);
if (ret) {
-   pci_warn(php_slot->pdev, "Error %d getting FDT blob\n", ret);
+   SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret);
goto free_fdt1;
}
 
@@ -279,7 +282,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot 
*php_slot)
dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL);
if (!dt) {
ret = -EINVAL;
-   pci_warn(php_slot->pdev, "Cannot unflatten FDT\n");
+   SLOT_WARN(php_slot, "Cannot unflatten FDT\n");
goto free_fdt;
}
 
@@ -289,7 +292,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot 
*php_slot)
ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn);
if (ret) {
pnv_php_reverse_nodes(php_slot->dn);
-   pci_warn(php_slot->pdev, "Error %d populating changeset\n",
+   SLOT_WARN(php_slot, "Error %d populating changeset\n",
 ret);
goto free_dt;
}
@@ -297,7 +300,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot 
*php_slot)
php_slot->dn->child = NULL;
ret = of_changeset_apply(&php_slot->ocs);
if (ret) {
-   pci_warn(php_slot->pdev, "Error %d applying changeset\n", ret);
+   SLOT_WARN(php_slot, "Error %d applying changeset\n", ret);
goto destroy_changeset;
}
 
@@ -337,7 +340,7 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
if (ret > 0) {
if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle ||
be64_to_cpu(msg.params[2]) != state) {
-   pci_warn(php_slot->pdev, "Wrong msg (%lld, %lld, 
%lld)\n",
+   SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n",
 be64_to_cpu(msg.params[1]),
 be64_to_cpu(msg.params[2]),
 be64_to_cpu(msg.params[3]));
@@ -359,7 +362,7 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot,
return ret;
 
 error:
-   pci_warn(php_slot->pdev, "Error %d powering %s\n",
+   SLOT_WARN(php_slot, "Error %d powering %s\n",
ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off");
return ret;
 }
@@ -378,7 +381,7 @@ static int pnv_php_get_power_state(struct hotplug_slot 
*slot, u8 *state)
 */
ret = pnv_pci_get_power_state(php_slot->id, &power_state);
if (ret) {
-   pci_warn(php_slot->pdev, "Error %d getting power status\n",
+   SLOT_WARN(php_slot, "Error %d getting power status\n",
 ret);
} else {
*state = power_state;
@@ -402,7 +405,7 @@ static int pnv_php_get_adapter_state(struct hotplug_slot 
*slot, u8 *state)
*state = presence;
ret = 0;
} else {
-   pci_warn(php_slot->pdev, "Error %d getting presence\n", ret);
+   SLOT_WARN(php_slot, "Error %d getting presence\n", ret);
}
 
return ret;
@@ -637,7 +640,7 @@ static int pnv_php_register_slot(struct pnv_php_slot 
*php_slot)
ret = pci_hp_register(&php_slot->slot, php_slot->bus,
  php_slot->slot_no, php_slot->name);
if (ret) {
-   pci_warn(php_slot->pdev, "Error %d registering slot\n", ret);
+   SLOT_WARN(php_slot, "Error %d registering slot\n", ret);
return ret;
}
 
@@ -690,7 +693,7 @@ static int pnv_php_enable_msix(struct pnv_php_slot 
*php_slot)
/* Enable MSIx */
ret = pci_enable_msix_exact(pdev, &entry, 1);
if (ret) {
-   pci_warn(pdev, "Error %d enabling MSIx\n", ret);
+   SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret

[RFC 09/11] pci/hotplug/pnv-php: Relax check when disabling slot

2019-06-19 Thread Frederic Barrat
The driver only allows to disable a slot in the POPULATED
state. However, if an error occurs while enabling the slot, say
because the link couldn't be trained, then the POPULATED state may not
be reached, yet the power state of the slot is on. So allow to disable
a slot in the REGISTERED state. Removing the devices will do nothing
since it's not populated, and we'll set the power state of the slot
back to off.

Signed-off-by: Frederic Barrat 
---
 drivers/pci/hotplug/pnv_php.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c
index f9c624334ef7..74b62a8e11e7 100644
--- a/drivers/pci/hotplug/pnv_php.c
+++ b/drivers/pci/hotplug/pnv_php.c
@@ -523,7 +523,13 @@ static int pnv_php_disable_slot(struct hotplug_slot *slot)
struct pnv_php_slot *php_slot = to_pnv_php_slot(slot);
int ret;
 
-   if (php_slot->state != PNV_PHP_STATE_POPULATED)
+   /*
+* Allow to disable a slot already in the registered state to
+* cover cases where the slot couldn't be enabled and never
+* reached the populated state
+*/
+   if (php_slot->state != PNV_PHP_STATE_POPULATED &&
+   php_slot->state != PNV_PHP_STATE_REGISTERED)
return 0;
 
/* Remove all devices behind the slot */
-- 
2.21.0



[RFC 05/11] powerpc/powernv/ioda: Find opencapi slot for a device node

2019-06-19 Thread Frederic Barrat
Unlike real PCI slots, opencapi slots are directly associated to
the (virtual) opencapi PHB, there's no intermediate bridge. So when
looking for a slot ID, we must start the search from the device node
itself and not its parent.

Also, the slot ID is not attached to a specific bdfn, so let's build
it from the PHB ID, like skiboot.

Signed-off-by: Frederic Barrat 
---
 arch/powerpc/include/asm/pnv-pci.h   |  1 +
 arch/powerpc/platforms/powernv/pci.c | 10 +++---
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/arch/powerpc/include/asm/pnv-pci.h 
b/arch/powerpc/include/asm/pnv-pci.h
index b5a85f1bb305..4b4dfa6bfdd3 100644
--- a/arch/powerpc/include/asm/pnv-pci.h
+++ b/arch/powerpc/include/asm/pnv-pci.h
@@ -15,6 +15,7 @@
 #define PCI_SLOT_ID_PREFIX (1UL << 63)
 #define PCI_SLOT_ID(phb_id, bdfn)  \
(PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id))
+#define PCI_PHB_SLOT_ID(phb_id)(phb_id)
 
 extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id);
 extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len);
diff --git a/arch/powerpc/platforms/powernv/pci.c 
b/arch/powerpc/platforms/powernv/pci.c
index ff1a33fee8e6..3e4e75a883e1 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -49,13 +49,14 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t 
*id)
return -ENXIO;
 
bdfn = ((bdfn & 0x0000) >> 8);
-   while ((parent = of_get_parent(parent))) {
+   for (parent = np; parent; parent = of_get_parent(parent)) {
if (!PCI_DN(parent)) {
of_node_put(parent);
break;
}
 
-   if (!of_device_is_compatible(parent, "ibm,ioda2-phb")) {
+   if (!of_device_is_compatible(parent, "ibm,ioda2-phb") &&
+   !of_device_is_compatible(parent, 
"ibm,ioda2-npu2-opencapi-phb")) {
of_node_put(parent);
continue;
}
@@ -66,7 +67,10 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id)
return -ENXIO;
}
 
-   *id = PCI_SLOT_ID(phbid, bdfn);
+   if (of_device_is_compatible(parent, 
"ibm,ioda2-npu2-opencapi-phb"))
+   *id = PCI_PHB_SLOT_ID(phbid);
+   else
+   *id = PCI_SLOT_ID(phbid, bdfn);
return 0;
}
 
-- 
2.21.0



[RFC 02/11] powerpc/powernv/ioda: Protect PE list

2019-06-19 Thread Frederic Barrat
Protect the PHB's list of PE. Probably not needed as long as it was
populated during PHB creation, but it feels right and will become
required once we can add/remove opencapi devices on hotplug.

Signed-off-by: Frederic Barrat 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 3082912e2600..2c063b05bb64 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1078,8 +1078,9 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct 
pci_dev *dev)
}
 
/* Put PE to the list */
+   mutex_lock(&phb->ioda.pe_list_mutex);
list_add_tail(&pe->list, &phb->ioda.pe_list);
-
+   mutex_unlock(&phb->ioda.pe_list_mutex);
return pe;
 }
 
@@ -3501,7 +3502,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe)
struct pnv_phb *phb = pe->phb;
struct pnv_ioda_pe *slave, *tmp;
 
+   mutex_lock(&phb->ioda.pe_list_mutex);
list_del(&pe->list);
+   mutex_unlock(&phb->ioda.pe_list_mutex);
+
switch (phb->type) {
case PNV_PHB_IODA1:
pnv_pci_ioda1_release_pe_dma(pe);
-- 
2.21.0



[RFC 03/11] powerpc/powernv/ioda: set up PE on opencapi device when enabling

2019-06-19 Thread Frederic Barrat
The PE for an opencapi device was set as part of a late PHB fixup
operation, when creating the PHB. To use the PCI hotplug framework,
this is not going to work, as the PHB stays the same, it's only the
devices underneath which are updated. For regular PCI devices, it is
done as part of the reconfiguration of the bridge, but for opencapi
PHBs, we don't have an intermediate bridge. So let's define the PE
when the device is enabled. PEs are meaningless for opencapi, the NPU
doesn't define them and opal is not doing anything with them.

Signed-off-by: Frederic Barrat 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 31 +--
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2c063b05bb64..2cf06fb98978 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1258,8 +1258,6 @@ static void pnv_pci_ioda_setup_PEs(void)
 {
struct pci_controller *hose;
struct pnv_phb *phb;
-   struct pci_bus *bus;
-   struct pci_dev *pdev;
struct pnv_ioda_pe *pe;
 
list_for_each_entry(hose, &hose_list, list_node) {
@@ -1271,11 +1269,6 @@ static void pnv_pci_ioda_setup_PEs(void)
if (phb->model == PNV_PHB_MODEL_NPU2)
WARN_ON_ONCE(pnv_npu2_init(hose));
}
-   if (phb->type == PNV_PHB_NPU_OCAPI) {
-   bus = hose->bus;
-   list_for_each_entry(pdev, &bus->devices, bus_list)
-   pnv_ioda_setup_dev_PE(pdev);
-   }
}
list_for_each_entry(hose, &hose_list, list_node) {
phb = hose->private_data;
@@ -3373,6 +3366,28 @@ static bool pnv_pci_enable_device_hook(struct pci_dev 
*dev)
return true;
 }
 
+static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev)
+{
+   struct pci_controller *hose = pci_bus_to_host(dev->bus);
+   struct pnv_phb *phb = hose->private_data;
+   struct pci_dn *pdn;
+   struct pnv_ioda_pe *pe;
+
+   if (!phb->initialized)
+   return true;
+
+   pdn = pci_get_pdn(dev);
+   if (!pdn)
+   return false;
+
+   if (pdn->pe_number == IODA_INVALID_PE) {
+   pe = pnv_ioda_setup_dev_PE(dev);
+   if (!pe)
+   return false;
+   }
+   return true;
+}
+
 static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group,
   int num)
 {
@@ -3613,7 +3628,7 @@ static const struct pci_controller_ops 
pnv_npu_ioda_controller_ops = {
 };
 
 static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = {
-   .enable_device_hook = pnv_pci_enable_device_hook,
+   .enable_device_hook = pnv_ocapi_enable_device_hook,
.window_alignment   = pnv_pci_window_alignment,
.reset_secondary_bus= pnv_pci_reset_secondary_bus,
.shutdown   = pnv_pci_ioda_shutdown,
-- 
2.21.0



[RFC 01/11] powerpc/powernv/ioda: Fix ref count for devices with their own PE

2019-06-19 Thread Frederic Barrat
Taking a reference on the pci_dev structure was required with initial
commit 184cd4a3b962 ("powerpc/powernv: PCI support for p7IOC under
OPAL v2"), where we we storing the pci dev in the pci_dn structure.

However, the pci_dev was later removed from the pci_dn structure, but
the reference was kept. See 902bdc57451c ("powerpc/powernv/idoa:
Remove unnecessary pcidev from pci_dn").

The pnv_ioda_pe structure life cycle is the same as the pci_dev
structure, the PE is freed when the device is released. So we don't
need a reference for the pci_dev stored in the PE, otherwise the
pci_dev will never be released. Which is not really a surprise as the
comment (removed here as no longer needed) was stating as much.

Fixes: 902bdc57451c ("powerpc/powernv/idoa: Remove unnecessary pcidev from 
pci_dn")
Signed-off-by: Frederic Barrat 
---
 arch/powerpc/platforms/powernv/pci-ioda.c | 11 +--
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c 
b/arch/powerpc/platforms/powernv/pci-ioda.c
index 10cc42b9e541..3082912e2600 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -1060,14 +1060,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct 
pci_dev *dev)
return NULL;
}
 
-   /* NOTE: We get only one ref to the pci_dev for the pdn, not for the
-* pointer in the PE data structure, both should be destroyed at the
-* same time. However, this needs to be looked at more closely again
-* once we actually start removing things (Hotplug, SR-IOV, ...)
-*
-* At some point we want to remove the PDN completely anyways
-*/
-   pci_dev_get(dev);
pdn->pe_number = pe->pe_number;
pe->flags = PNV_IODA_PE_DEV;
pe->pdev = dev;
@@ -1082,7 +1074,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct 
pci_dev *dev)
pnv_ioda_free_pe(pe);
pdn->pe_number = IODA_INVALID_PE;
pe->pdev = NULL;
-   pci_dev_put(dev);
return NULL;
}
 
@@ -1226,7 +1217,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct 
pci_dev *npu_pdev)
 */
dev_info(&npu_pdev->dev,
"Associating to existing PE %x\n", pe_num);
-   pci_dev_get(npu_pdev);
+   pci_dev_get(npu_pdev); // still needed after 
902bdc57451c2c64aa139bbe24067f70a186db0a ?
npu_pdn = pci_get_pdn(npu_pdev);
rid = npu_pdev->bus->number << 8 | npu_pdn->devfn;
npu_pdn->pe_number = pe_num;
-- 
2.21.0



Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case

2019-06-19 Thread Christophe Leroy




Le 19/06/2019 à 06:04, Nicholas Piggin a écrit :

Christophe Leroy's on June 11, 2019 4:28 pm:



Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :

__ioremap_at error handling is wonky, it requires caller to clean up
after it. Implement a helper that does the map and error cleanup and
remove the requirement from the caller.

Signed-off-by: Nicholas Piggin 
---

This series is a different approach to the problem, using the generic
ioremap_page_range directly which reduces added code, and moves
the radix specific code into radix files. Thanks to Christophe for
pointing out various problems with the previous patch.

   arch/powerpc/mm/pgtable_64.c | 27 ---
   1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c
index d2d976ff8a0e..6bd3660388aa 100644
--- a/arch/powerpc/mm/pgtable_64.c
+++ b/arch/powerpc/mm/pgtable_64.c
@@ -108,14 +108,30 @@ unsigned long ioremap_bot;
   unsigned long ioremap_bot = IOREMAP_BASE;
   #endif
   
+static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid)

+{
+   unsigned long i;
+
+   for (i = 0; i < size; i += PAGE_SIZE) {
+   int err = map_kernel_page(ea + i, pa + i, prot);


Missing a blank line


+   if (err) {


I'd have done the following to reduce indentation depth

if (!err)
continue


I'll consider it, line lengths were not too bad.


+   if (slab_is_available())
+   unmap_kernel_range(ea, size);


Shouldn't it be unmap_kernel_range(ea, i) ?


I guess (i - PAGE_SIZE really), although the old code effectively did
the full range. As a "clean up" it may be better to avoid subtle
change in behaviour and do that in another patch?


Not sure we have to do it in another patch.
Previous code was doing full range because it was done at upper level so 
it didn't know the boundaries. You are creating a nice brand new 
function that have all necessary information, so why not make it right 
from the start ?


Christophe



Thanks,
Nick



Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range

2019-06-19 Thread Christophe Leroy




Le 19/06/2019 à 05:59, Nicholas Piggin a écrit :

Christophe Leroy's on June 11, 2019 4:46 pm:



Le 10/06/2019 à 05:08, Nicholas Piggin a écrit :


[snip]


diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
b/arch/powerpc/mm/book3s64/radix_pgtable.c
index c9bcf428dd2b..db993bc1aef3 100644
--- a/arch/powerpc/mm/book3s64/radix_pgtable.c
+++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
@@ -11,6 +11,7 @@
   
   #define pr_fmt(fmt) "radix-mmu: " fmt
   
+#include 

   #include 
   #include 
   #include 
@@ -1122,3 +1123,23 @@ void radix__ptep_modify_prot_commit(struct 
vm_area_struct *vma,
   
   	set_pte_at(mm, addr, ptep, pte);

   }
+
+int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size,
+   pgprot_t prot, int nid)
+{
+   if (likely(slab_is_available())) {
+   int err = ioremap_page_range(ea, ea + size, pa, prot);
+   if (err)
+   unmap_kernel_range(ea, size);
+   return err;
+   } else {
+   unsigned long i;
+
+   for (i = 0; i < size; i += PAGE_SIZE) {
+   int err = map_kernel_page(ea + i, pa + i, prot);
+   if (WARN_ON_ONCE(err)) /* Should clean up */
+   return err;
+   }


Same loop again.

What about not doing a radix specific function and just putting
something like below in the core ioremap_range() function ?

if (likely(slab_is_available()) && radix_enabled()) {
int err = ioremap_page_range(ea, ea + size, pa, prot);

if (err)
unmap_kernel_range(ea, size);
return err;
}

Because I'm pretty sure will more and more use ioremap_page_range().


Well I agree the duplication is not so nice, but it's convenient
to see what is going on for each MMU type.

There is a significant amount of churn that needs to be done in
this layer so I prefer to make it a bit simpler despite duplication.

I would like to remove the early ioremap or make it into its own
function. Re-implement map_kernel_page with ioremap_page_range,
allow page tables that don't use slab to avoid the early check,
unbolt the hptes mapped in early boot, etc.

I just wanted to escape out the 64s and hash/radix implementations
completely until that settles.


I can understand the benefit in some situations but here I just can't. 
And code duplication should be avoided as much as possible as it makes 
code maintenance more difficult.


Here you have:

+static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned 
long size, pgprot_t prot, int nid)

+{
+   unsigned long i;
+
+   for (i = 0; i < size; i += PAGE_SIZE) {
+   int err = map_kernel_page(ea + i, pa + i, prot);
+   if (err) {
+   if (slab_is_available())
+   unmap_kernel_range(ea, size);
+   else
+   WARN_ON_ONCE(1); /* Should clean up */
+   return err;
+   }
+   }
+
+   return 0;
+}

You now create a new one in another file, that is almost identical:

+int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, 
pgprot_t prot, int nid)

+{
+   unsigned long i;
+
+   if (radix_enabled())
+   return radix__ioremap_range(ea, pa, size, prot, nid);
+
+   for (i = 0; i < size; i += PAGE_SIZE) {
+   int err = map_kernel_page(ea + i, pa + i, prot);
+   if (err) {
+   if (slab_is_available())
+   unmap_kernel_range(ea, size);
+   else
+   WARN_ON_ONCE(1); /* Should clean up */
+   return err;
+   }
+   }
+
+   return 0;
+}

Then you have to make the original one __weak.

Sorry I'm still having difficulties understanding what the benefit is.

radix_enabled() is defined for every platforms so could just add the 
following on top of the existing ioremap_range() and voila.


+   if (radix_enabled())
+   return radix__ioremap_range(ea, pa, size, prot, nid);


And with that you wouldn't have the __weak stuff to handle.




-static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, 
pgprot_t prot, int nid)
+int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, 
pgprot_t prot, int nid)


Hum. Weak functions remain in unused in vmlinux unless
CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is selected.

Also, they are some how dangerous because people might change them
without seeing that it is overridden for some particular configuration.


Well you shouldn't assume that when you see a weak function, but
what's the preferred alternative? A config option?


Yes you are right, nobody should assume that, but ...

But I think if the fonctions were really different, the preferred 
alternative w

Re: [PATCH 1/4] mm: Move ioremap page table mapping function to mm/

2019-06-19 Thread Christophe Leroy




Le 19/06/2019 à 05:43, Nicholas Piggin a écrit :

Christophe Leroy's on June 11, 2019 3:24 pm:



Le 10/06/2019 à 06:38, Nicholas Piggin a écrit :


[snip]


diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 51e131245379..812bea5866d6 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -147,6 +147,9 @@ extern struct vm_struct *find_vm_area(const void *addr);
   extern int map_vm_area(struct vm_struct *area, pgprot_t prot,
struct page **pages);
   #ifdef CONFIG_MMU
+extern int vmap_range(unsigned long addr,
+  unsigned long end, phys_addr_t phys_addr, pgprot_t prot,
+  unsigned int max_page_shift);


Drop extern keyword here.


I don't know if I was going crazy but at one point I was getting
duplicate symbol errors that were fixed by adding extern somewhere.


probably not on a function name ...


Maybe sleep depravation. However...


As checkpatch tells you, 'CHECK:AVOID_EXTERNS: extern prototypes should
be avoided in .h files'


I prefer to follow existing style in surrounding code at the expense
of some checkpatch warnings. If somebody later wants to "fix" it
that's fine.


I don't think that's fine to 'fix' later things that could be done right 
from the begining. 'Cosmetic only' fixes never happen because they are a 
nightmare for backports, and a shame for 'git blame'.


In some patches, you add cleanups to make the code look nicer, and here 
you have the opportunity to make the code nice from the begining and you 
prefer repeating the errors done in the past ? You're surprising me.


Christophe



Thanks,
Nick



Re: [PATCH 0/1] PPC32: fix ptrace() access to FPU registers

2019-06-19 Thread Radu Rendec
On Wed, 2019-06-19 at 10:36 +1000, Daniel Axtens wrote:
> Andreas Schwab <
> sch...@linux-m68k.org
> > writes:
> 
> > On Jun 18 2019, Radu Rendec <
> > radu.ren...@gmail.com
> > > wrote:
> > 
> > > Since you already have a working setup, it would be nice if you could
> > > add a printk to arch_ptrace() to print the address and confirm what I
> > > believe happens (by reading the gdb source code).
> > 
> > A ppc32 ptrace syscall goes through compat_arch_ptrace.

Right. I completely overlooked that part.

> Ah right, and that (in ptrace32.c) contains code that will work:
> 
> 
>   /*
>* the user space code considers the floating point
>* to be an array of unsigned int (32 bits) - the
>* index passed in is based on this assumption.
>*/
>   tmp = ((unsigned int *)child->thread.fp_state.fpr)
>   [FPRINDEX(index)];
> 
> FPRINDEX is defined above to deal with the various manipulations you
> need to do.

Correct. Basically it does the same that I did in my patch: it divides
the index again by 2 (it's already divided by 4 in compat_arch_ptrace()
so it ends up divided by 8), then takes the least significant bit and
adds it to the index. I take bit 2 of the original address, which is the
same thing (because in FPRHALF() the address is already divided by 4).

So we have this in ptrace32.c:

#define FPRNUMBER(i) (((i) - PT_FPR0) >> 1)
#define FPRHALF(i) (((i) - PT_FPR0) & 1)
#define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i)

index = (unsigned long) addr >> 2;
(unsigned int *)child->thread.fp_state.fpr)[FPRINDEX(index)]


And we have this in my patch:

fpidx = (addr - PT_FPR0 * sizeof(long)) / 8;
(void *)&child->thread.TS_FPR(fpidx) + (addr & 4)

> Radu: I think we want to copy that working code back into ptrace.c. 

I'm not sure that would work. There's a subtle difference: the code in
ptrace32.c is always compiled on a 64-bit kernel and the user space
calling it is always 32-bit; on the other hand, the code in ptrace.c can
be compiled on either a 64-bit kernel or a 32-bit kernel and the user
space calling it always has the same "bitness" as the kernel.

One difference is the size of the CPU registers. On 64-bit they are 8
byte long and user space knows that and generates 8-byte aligned
addresses. So you have to divide the address by 8 to calculate the CPU
register index correctly, which compat_arch_ptrace() currently doesn't.

Another difference is that on 64-bit `long` is 8 bytes, so user space
can read a whole FPU register in a single ptrace call. 

Now that we are all aware of compat_arch_ptrace() (which handles the
special case of a 32-bit process running on a 64-bit kernel) I would say
the patch is correct and does the right thing for both 32-bit and 64-bit 
kernels and processes.

> The challenge will be unpicking the awful mess of ifdefs in ptrace.c
> and making it somewhat more comprehensible.

I'm not sure what ifdefs you're thinking about. The only that are used
inside arch_ptrace() are PT_FPR0, PT_FPSCR and TS_FPR, which seem to be
correct.

But perhaps it would be useful to change my patch and add a comment just
before arch_ptrace() that explains how the math is done and that the
code must work on both 32-bit and 64-bit, the user space address
assumptions, etc.

By the way, I'm not sure the code in compat_arch_ptrace() handles
PT_FPSCR correctly. It might (just because fpscr is right next to fpr[]
in memory - and that's a hack), but I can't figure out if it accesses
the right half.

Radu




Re: [PATCH 1/2] KVM: PPC: Book3S HV: Fix r3 corruption in h_set_dabr()

2019-06-19 Thread Michael Ellerman
On Mon, 2019-06-17 at 07:16:18 UTC, Suraj Jitindar Singh wrote:
> From: Michael Neuling 
> 
> Commit c1fe190c0672 ("powerpc: Add force enable of DAWR on P9
> option") screwed up some assembler and corrupted a pointer in
> r3. This resulted in crashes like the below:
> 
>   [   44.374746] BUG: Kernel NULL pointer dereference at 0x13bf
>   [   44.374848] Faulting instruction address: 0xc010b044
>   [   44.374906] Oops: Kernel access of bad area, sig: 11 [#1]
>   [   44.374951] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA 
> pSeries
>   [   44.375018] Modules linked in: vhost_net vhost tap xt_CHECKSUM 
> iptable_mangle xt_MASQUERADE iptable_nat nf_nat xt_conntrack nf_conntrack 
> nf_defrag_ipv6 libcrc32c nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 xt_tcpudp 
> bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables 
> iptable_filter bpfilter vmx_crypto crct10dif_vpmsum crc32c_vpmsum kvm_hv kvm 
> sch_fq_codel ip_tables x_tables autofs4 virtio_net net_failover virtio_scsi 
> failover
>   [   44.375401] CPU: 8 PID: 1771 Comm: qemu-system-ppc Kdump: loaded Not 
> tainted 5.2.0-rc4+ #3
>   [   44.375500] NIP:  c010b044 LR: c008089dacf4 CTR: 
> c010aff4
>   [   44.375604] REGS: c0179b397710 TRAP: 0300   Not tainted  (5.2.0-rc4+)
>   [   44.375691] MSR:  8280b033   
> CR: 42244842  XER: 
>   [   44.375815] CFAR: c010aff8 DAR: 13bf DSISR: 4200 
> IRQMASK: 0
>   [   44.375815] GPR00: c008089dd6bc c0179b3979a0 c00808a04300 
> 
>   [   44.375815] GPR04:  0003 2444b05d 
> c017f11c45d0
>   [   44.375815] GPR08: 07803e018dfe 0028 0001 
> 0075
>   [   44.375815] GPR12: c010aff4 c7ff6300  
> 
>   [   44.375815] GPR16:  c017f11d  
> c017f11ca7a8
>   [   44.375815] GPR20: c017f11c42ec   
> 000a
>   [   44.375815] GPR24: fffc  c017f11c 
> c1a77ed8
>   [   44.375815] GPR28: c0179af7 fffc c008089ff170 
> c0179ae88540
>   [   44.376673] NIP [c010b044] kvmppc_h_set_dabr+0x50/0x68
>   [   44.376754] LR [c008089dacf4] kvmppc_pseries_do_hcall+0xa3c/0xeb0 
> [kvm_hv]
>   [   44.376849] Call Trace:
>   [   44.376886] [c0179b3979a0] [c017f11c] 0xc017f11c 
> (unreliable)
>   [   44.376982] [c0179b397a10] [c008089dd6bc] 
> kvmppc_vcpu_run_hv+0x694/0xec0 [kvm_hv]
>   [   44.377084] [c0179b397ae0] [c008093f8bcc] 
> kvmppc_vcpu_run+0x34/0x48 [kvm]
>   [   44.377185] [c0179b397b00] [c008093f522c] 
> kvm_arch_vcpu_ioctl_run+0x2f4/0x400 [kvm]
>   [   44.377286] [c0179b397b90] [c008093e3618] 
> kvm_vcpu_ioctl+0x460/0x850 [kvm]
>   [   44.377384] [c0179b397d00] [c04ba6c4] do_vfs_ioctl+0xe4/0xb40
>   [   44.377464] [c0179b397db0] [c04bb1e4] ksys_ioctl+0xc4/0x110
>   [   44.377547] [c0179b397e00] [c04bb258] sys_ioctl+0x28/0x80
>   [   44.377628] [c0179b397e20] [c000b888] system_call+0x5c/0x70
>   [   44.377712] Instruction dump:
>   [   44.377765] 4082fff4 4c00012c 3860 4e800020 e96280c0 896b 
> 2c2b 3860
>   [   44.377862] 4d820020 50852e74 508516f6 78840724  f8a313c8 
> 7c942ba6 7cbc2ba6
> 
> Fix the bug by only changing r3 when we are returning immediately.
> 
> Fixes: c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option")
> Signed-off-by: Michael Neuling 
> Reported-by: Cédric Le Goater 

Series applied to powerpc fixes, thanks.

https://git.kernel.org/powerpc/c/fabb2efcf0846e28b4910fc20bdc203d3d0170af

cheers


Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac

2019-06-19 Thread Michael Ellerman
Christoph Hellwig  writes:
> Any chance this could get picked up to fix the regression?

Was hoping Ben would Ack it. He's still powermac maintainer :)

I guess he OK'ed it in the other thread, will add it to my queue.

cheers

> On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote:
>> With the strict dma mask checking introduced with the switch to
>> the generic DMA direct code common wifi chips on 32-bit powerbooks
>> stopped working.  Add a 30-bit ZONE_DMA to the 32-bit pmac builds
>> to allow them to reliably allocate dma coherent memory.
>> 
>> Fixes: 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported")
>> Reported-by: Aaro Koskinen 
>> Signed-off-by: Christoph Hellwig 
>> ---
>>  arch/powerpc/include/asm/page.h | 7 +++
>>  arch/powerpc/mm/mem.c   | 3 ++-
>>  arch/powerpc/platforms/powermac/Kconfig | 1 +
>>  3 files changed, 10 insertions(+), 1 deletion(-)
>> 
>> diff --git a/arch/powerpc/include/asm/page.h 
>> b/arch/powerpc/include/asm/page.h
>> index b8286a2013b4..0d52f57fca04 100644
>> --- a/arch/powerpc/include/asm/page.h
>> +++ b/arch/powerpc/include/asm/page.h
>> @@ -319,6 +319,13 @@ struct vm_area_struct;
>>  #endif /* __ASSEMBLY__ */
>>  #include 
>>  
>> +/*
>> + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
>> + */
>> +#ifdef CONFIG_PPC32
>> +#define ARCH_ZONE_DMA_BITS 30
>> +#else
>>  #define ARCH_ZONE_DMA_BITS 31
>> +#endif
>>  
>>  #endif /* _ASM_POWERPC_PAGE_H */
>> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
>> index cba29131bccc..2540d3b2588c 100644
>> --- a/arch/powerpc/mm/mem.c
>> +++ b/arch/powerpc/mm/mem.c
>> @@ -248,7 +248,8 @@ void __init paging_init(void)
>> (long int)((top_of_ram - total_ram) >> 20));
>>  
>>  #ifdef CONFIG_ZONE_DMA
>> -max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL >> PAGE_SHIFT);
>> +max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
>> +((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
>>  #endif
>>  max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
>>  #ifdef CONFIG_HIGHMEM
>> diff --git a/arch/powerpc/platforms/powermac/Kconfig 
>> b/arch/powerpc/platforms/powermac/Kconfig
>> index f834a19ed772..c02d8c503b29 100644
>> --- a/arch/powerpc/platforms/powermac/Kconfig
>> +++ b/arch/powerpc/platforms/powermac/Kconfig
>> @@ -7,6 +7,7 @@ config PPC_PMAC
>>  select PPC_INDIRECT_PCI if PPC32
>>  select PPC_MPC106 if PPC32
>>  select PPC_NATIVE
>> +select ZONE_DMA if PPC32
>>  default y
>>  
>>  config PPC_PMAC64
>> -- 
>> 2.20.1
> ---end quoted text---


Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac

2019-06-19 Thread Christoph Hellwig
Any chance this could get picked up to fix the regression?

On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote:
> With the strict dma mask checking introduced with the switch to
> the generic DMA direct code common wifi chips on 32-bit powerbooks
> stopped working.  Add a 30-bit ZONE_DMA to the 32-bit pmac builds
> to allow them to reliably allocate dma coherent memory.
> 
> Fixes: 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported")
> Reported-by: Aaro Koskinen 
> Signed-off-by: Christoph Hellwig 
> ---
>  arch/powerpc/include/asm/page.h | 7 +++
>  arch/powerpc/mm/mem.c   | 3 ++-
>  arch/powerpc/platforms/powermac/Kconfig | 1 +
>  3 files changed, 10 insertions(+), 1 deletion(-)
> 
> diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
> index b8286a2013b4..0d52f57fca04 100644
> --- a/arch/powerpc/include/asm/page.h
> +++ b/arch/powerpc/include/asm/page.h
> @@ -319,6 +319,13 @@ struct vm_area_struct;
>  #endif /* __ASSEMBLY__ */
>  #include 
>  
> +/*
> + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks.
> + */
> +#ifdef CONFIG_PPC32
> +#define ARCH_ZONE_DMA_BITS 30
> +#else
>  #define ARCH_ZONE_DMA_BITS 31
> +#endif
>  
>  #endif /* _ASM_POWERPC_PAGE_H */
> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c
> index cba29131bccc..2540d3b2588c 100644
> --- a/arch/powerpc/mm/mem.c
> +++ b/arch/powerpc/mm/mem.c
> @@ -248,7 +248,8 @@ void __init paging_init(void)
>  (long int)((top_of_ram - total_ram) >> 20));
>  
>  #ifdef CONFIG_ZONE_DMA
> - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL >> PAGE_SHIFT);
> + max_zone_pfns[ZONE_DMA] = min(max_low_pfn,
> + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT);
>  #endif
>   max_zone_pfns[ZONE_NORMAL] = max_low_pfn;
>  #ifdef CONFIG_HIGHMEM
> diff --git a/arch/powerpc/platforms/powermac/Kconfig 
> b/arch/powerpc/platforms/powermac/Kconfig
> index f834a19ed772..c02d8c503b29 100644
> --- a/arch/powerpc/platforms/powermac/Kconfig
> +++ b/arch/powerpc/platforms/powermac/Kconfig
> @@ -7,6 +7,7 @@ config PPC_PMAC
>   select PPC_INDIRECT_PCI if PPC32
>   select PPC_MPC106 if PPC32
>   select PPC_NATIVE
> + select ZONE_DMA if PPC32
>   default y
>  
>  config PPC_PMAC64
> -- 
> 2.20.1
---end quoted text---


Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-19 Thread Nicholas Piggin
Naveen N. Rao's on June 19, 2019 7:53 pm:
> Nicholas Piggin wrote:
>> Michael Ellerman's on June 19, 2019 3:14 pm:
>>> Hi Naveen,
>>> 
>>> Sorry I meant to reply to this earlier .. :/
> 
> No problem. Thanks for the questions.
> 
>>> 
>>> "Naveen N. Rao"  writes:
 With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
 enable function tracing and profiling. So far, with dynamic ftrace, we
 used to only patch out the branch to _mcount(). However, mflr is
 executed by the branch unit that can only execute one per cycle on
 POWER9 and shared with branches, so it would be nice to avoid it where
 possible.

 We cannot simply nop out the mflr either. When enabling function
 tracing, there can be a race if tracing is enabled when some thread was
 interrupted after executing a nop'ed out mflr. In this case, the thread
 would execute the now-patched-in branch to _mcount() without having
 executed the preceding mflr.

 To solve this, we now enable function tracing in 2 steps: patch in the
 mflr instruction, use synchronize_rcu_tasks() to ensure all existing
 threads make progress, and then patch in the branch to _mcount(). We
 override ftrace_replace_code() with a powerpc64 variant for this
 purpose.
>>> 
>>> According to the ISA we're not allowed to patch mflr at runtime. See the
>>> section on "CMODX".
>> 
>> According to "quasi patch class" engineering note, we can patch
>> anything with a preferred nop. But that's written as an optional
>> facility, which we don't have a feature to test for.
>> 
> 
> Hmm... I wonder what the implications are. We've been patching in a 
> 'trap' for kprobes for a long time now, along with having to patch back 
> the original instruction (which can be anything), when the probe is 
> removed.

Will have to check what implementations support "quasi patch class"
instructions. IIRC recent POWER processors are okay. May have to add
a feature test though.

>>> 
>>> I'm also not convinced the ordering between the two patches is
>>> guaranteed by the ISA, given that there's possibly no isync on the other
>>> CPU.
>> 
>> Will they go through a context synchronizing event?
>> 
>> synchronize_rcu_tasks() should ensure a thread is scheduled away, but
>> I'm not actually sure it guarantees CSI if it's kernel->kernel. Could
>> do a smp_call_function to do the isync on each CPU to be sure.
> 
> Good point. Per 
> Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU:
> "The solution, in the form of Tasks RCU, is to have implicit read-side 
> critical sections that are delimited by voluntary context switches, that 
> is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In 
> addition, transitions to and from userspace execution also delimit 
> tasks-RCU read-side critical sections."
> 
> I suppose transitions to/from userspace, as well as calls to schedule() 
> result in context synchronizing instruction being executed. But, if some 
> tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't 
> have a CSI executed.
> 
> Also:
> "In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these 
> APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), 
> respectively."
> 
> In this scenario as well, I think we won't have a CSI executed in case 
> of cond_resched().
> 
> Should we enhance patch_instruction() to handle that?

Well, not sure. Do we have many post-boot callers of it? Should
they take care of their own synchronization requirements?

Thanks,
Nick


Re: [PATCH] ocxl: Allow contexts to be attached with a NULL mm

2019-06-19 Thread Frederic Barrat




Le 18/06/2019 à 03:50, Andrew Donnellan a écrit :

On 17/6/19 2:41 pm, Alastair D'Silva wrote:

From: Alastair D'Silva 

If an OpenCAPI context is to be used directly by a kernel driver, there
may not be a suitable mm to use.

The patch makes the mm parameter to ocxl_context_attach optional.

Signed-off-by: Alastair D'Silva 


The one issue I can see here is that using mm == NULL bypasses our 
method of enabling/disabling global TLBIs in mm_context_add_copro().


Discussing this privately with Alastair and Fred - this should be fine, 
but perhaps we should document that.



So indeed we should be fine. I confirmed with Nick that kernel space 
invalidations are already global today.
Nick mentioned that we should still be fine tomorrow, but in the distant 
future, we could imagine local usage of some part of the kernel space. 
It will require some work, but it would be best to add a comment in one 
of the kernel invalidation function (for example 
radix__flush_tlb_kernel_range()) that if a kernel invalidation ever 
becomes local, then clients of the nest MMU may need some work.


A few more comments below.



---
  drivers/misc/ocxl/context.c |  9 ++---
  drivers/misc/ocxl/link.c    | 12 
  2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c
index bab9c9364184..994563a078eb 100644
--- a/drivers/misc/ocxl/context.c
+++ b/drivers/misc/ocxl/context.c
@@ -69,6 +69,7 @@ static void xsl_fault_error(void *data, u64 addr, 
u64 dsisr)
  int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct 
mm_struct *mm)

  {
  int rc;
+    unsigned long pidr = 0;
  // Locks both status & tidr
  mutex_lock(&ctx->status_mutex);
@@ -77,9 +78,11 @@ int ocxl_context_attach(struct ocxl_context *ctx, 
u64 amr, struct mm_struct *mm)

  goto out;
  }
-    rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid,
-    mm->context.id, ctx->tidr, amr, mm,
-    xsl_fault_error, ctx);
+    if (mm)
+    pidr = mm->context.id;
+
+    rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, 
ctx->tidr,

+  amr, mm, xsl_fault_error, ctx);
  if (rc)
  goto out;
diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c
index cce5b0d64505..43542f124807 100644
--- a/drivers/misc/ocxl/link.c
+++ b/drivers/misc/ocxl/link.c
@@ -523,7 +523,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, 
u32 pidr, u32 tidr,

  pe->amr = cpu_to_be64(amr);
  pe->software_state = cpu_to_be32(SPA_PE_VALID);
-    mm_context_add_copro(mm);
+    if (mm)
+    mm_context_add_copro(mm);



Same as above, we should add a comment here in the driver code that a 
kernel context is ok because invalidations are global.



We also need a new check in xsl_fault_handler(). A valid kernel address 
shouldn't fault, but it's still possible for the FPGA to try accessing a 
bogus kernel address. In which case, xsl_fault_handler() would be 
entered, with a valid fault context. We'll find pe_data in the tree 
based on the valid pe_handle, but pe_data->mm will be NULL. In that, we 
can return early, acknowledging the interrupt with ADDRESS_ERROR value 
(like we do if pe_data is not found in the tree).


  Fred



  /*
   * Barrier is to make sure PE is visible in the SPA before it
   * is used by the device. It also helps with the global TLBI
@@ -546,7 +547,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, 
u32 pidr, u32 tidr,

   * have a reference on mm_users. Incrementing mm_count solves
   * the problem.
   */
-    mmgrab(mm);
+    if (mm)
+    mmgrab(mm);
  trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, 
tidr);

  unlock:
  mutex_unlock(&spa->spa_lock);
@@ -652,8 +654,10 @@ int ocxl_link_remove_pe(void *link_handle, int 
pasid)

  if (!pe_data) {
  WARN(1, "Couldn't find pe data when removing PE\n");
  } else {
-    mm_context_remove_copro(pe_data->mm);
-    mmdrop(pe_data->mm);
+    if (pe_data->mm) {
+    mm_context_remove_copro(pe_data->mm);
+    mmdrop(pe_data->mm);
+    }
  kfree_rcu(pe_data, rcu);
  }
  unlock:







Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix

2019-06-19 Thread Nicholas Piggin
Bharata B Rao's on June 19, 2019 5:45 pm:
> We hit the following BUG_ON when memory hotplugged before reboot
> is unplugged after reboot:
> 
> kernel BUG at arch/powerpc/mm/pgtable-frag.c:113!
> 
>  remove_pagetable+0x594/0x6a0
>  (unreliable)
>  remove_pagetable+0x94/0x6a0
>  vmemmap_free+0x394/0x410
>  sparse_remove_one_section+0x26c/0x2e8
>  __remove_pages+0x428/0x540
>  arch_remove_memory+0xd0/0x170
>  __remove_memory+0xd4/0x1a0
>  dlpar_remove_lmb+0xbc/0x110
>  dlpar_memory+0xa80/0xd20
>  handle_dlpar_errorlog+0xa8/0x160
>  pseries_hp_work_fn+0x2c/0x60
>  process_one_work+0x46c/0x860
>  worker_thread+0x364/0x5e0
>  kthread+0x1b0/0x1c0
>  ret_from_kernel_thread+0x5c/0x68
> 
> This occurs because, during reboot-after-hotplug, the hotplugged
> memory range gets initialized as regular memory and page
> tables are setup using memblock allocator. This means that we
> wouldn't have initialized the PMD or PTE fragment count for
> those PMD or PTE pages.
> 
> Fixing this includes 3 aspects:
> 
> - Walk the init_mm page tables from mem_init() and initialize
>   the PMD and PTE fragment counts appropriately.
> - When we do early allocation of PMD (and PGD as well) pages,
>   allocate in page size PAGE_SIZE granularity so that we are
>   sure that the complete page is available for us to set the
>   fragment count which is part of struct page.
> - When PMD or PTE page is freed, check if it comes from memblock
>   allocator and free it appropriately.
> 
> Reported-by: Srikanth Aithal 
> Signed-off-by: Bharata B Rao 
> ---
>  arch/powerpc/include/asm/book3s/64/radix.h |  1 +
>  arch/powerpc/include/asm/sparsemem.h   |  1 +
>  arch/powerpc/mm/book3s64/pgtable.c | 12 +++-
>  arch/powerpc/mm/book3s64/radix_pgtable.c   | 67 +-
>  arch/powerpc/mm/mem.c  |  5 ++
>  arch/powerpc/mm/pgtable-frag.c |  5 +-
>  6 files changed, 87 insertions(+), 4 deletions(-)
> 
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> b/arch/powerpc/include/asm/book3s/64/radix.h
> index 574eca33f893..4320f2790e8d 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void)
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  int radix__create_section_mapping(unsigned long start, unsigned long end, 
> int nid);
>  int radix__remove_section_mapping(unsigned long start, unsigned long end);
> +void radix__fixup_pgtable_fragments(void);
>  #endif /* CONFIG_MEMORY_HOTPLUG */
>  #endif /* __ASSEMBLY__ */
>  #endif
> diff --git a/arch/powerpc/include/asm/sparsemem.h 
> b/arch/powerpc/include/asm/sparsemem.h
> index 3192d454a733..e662f9232d35 100644
> --- a/arch/powerpc/include/asm/sparsemem.h
> +++ b/arch/powerpc/include/asm/sparsemem.h
> @@ -15,6 +15,7 @@
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  extern int create_section_mapping(unsigned long start, unsigned long end, 
> int nid);
>  extern int remove_section_mapping(unsigned long start, unsigned long end);
> +void fixup_pgtable_fragments(void);
>  
>  #ifdef CONFIG_PPC_BOOK3S_64
>  extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> b/arch/powerpc/mm/book3s64/pgtable.c
> index 01bc9663360d..7efe9cc16b39 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long 
> start, unsigned long end)
>  
>   return hash__remove_section_mapping(start, end);
>  }
> +
> +void fixup_pgtable_fragments(void)
> +{
> + if (radix_enabled())
> + radix__fixup_pgtable_fragments();
> +}
> +
>  #endif /* CONFIG_MEMORY_HOTPLUG */
>  
>  void __init mmu_partition_table_init(void)
> @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd)
>   BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
>   if (atomic_dec_and_test(&page->pt_frag_refcount)) {
>   pgtable_pmd_page_dtor(page);
> - __free_page(page);
> + if (PageReserved(page))
> + free_reserved_page(page);

Hmm. Rather than adding this special case here, I wonder if you can
just go along in your fixup walk and convert all these pages to
non-reserved pages?

ClearPageReserved ; init_page_count ; adjust_managed_page_count ; 
should do the trick, right?


> + else
> + __free_page(page);

Thanks,
Nick


Re: [PATCH v2 1/1] cpuidle-powernv : forced wakeup for stop states

2019-06-19 Thread Nicholas Piggin
Abhishek's on June 19, 2019 7:08 pm:
> Hi Nick,
> 
> Thanks for the review. Some replies below.
> 
> On 06/19/2019 09:53 AM, Nicholas Piggin wrote:
>> Abhishek Goel's on June 17, 2019 7:56 pm:
>>> Currently, the cpuidle governors determine what idle state a idling CPU
>>> should enter into based on heuristics that depend on the idle history on
>>> that CPU. Given that no predictive heuristic is perfect, there are cases
>>> where the governor predicts a shallow idle state, hoping that the CPU will
>>> be busy soon. However, if no new workload is scheduled on that CPU in the
>>> near future, the CPU may end up in the shallow state.
>>>
>>> This is problematic, when the predicted state in the aforementioned
>>> scenario is a shallow stop state on a tickless system. As we might get
>>> stuck into shallow states for hours, in absence of ticks or interrupts.
>>>
>>> To address this, We forcefully wakeup the cpu by setting the
>>> decrementer. The decrementer is set to a value that corresponds with the
>>> residency of the next available state. Thus firing up a timer that will
>>> forcefully wakeup the cpu. Few such iterations will essentially train the
>>> governor to select a deeper state for that cpu, as the timer here
>>> corresponds to the next available cpuidle state residency. Thus, cpu will
>>> eventually end up in the deepest possible state.
>>>
>>> Signed-off-by: Abhishek Goel 
>>> ---
>>>
>>> Auto-promotion
>>>   v1 : started as auto promotion logic for cpuidle states in generic
>>> driver
>>>   v2 : Removed timeout_needed and rebased the code to upstream kernel
>>> Forced-wakeup
>>>   v1 : New patch with name of forced wakeup started
>>>   v2 : Extending the forced wakeup logic for all states. Setting the
>>> decrementer instead of queuing up a hrtimer to implement the logic.
>>>
>>>   drivers/cpuidle/cpuidle-powernv.c | 38 +++
>>>   1 file changed, 38 insertions(+)
>>>
>>> diff --git a/drivers/cpuidle/cpuidle-powernv.c 
>>> b/drivers/cpuidle/cpuidle-powernv.c
>>> index 84b1ebe212b3..bc9ca18ae7e3 100644
>>> --- a/drivers/cpuidle/cpuidle-powernv.c
>>> +++ b/drivers/cpuidle/cpuidle-powernv.c
>>> @@ -46,6 +46,26 @@ static struct stop_psscr_table 
>>> stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly
>>>   static u64 default_snooze_timeout __read_mostly;
>>>   static bool snooze_timeout_en __read_mostly;
>>>   
>>> +static u64 forced_wakeup_timeout(struct cpuidle_device *dev,
>>> +struct cpuidle_driver *drv,
>>> +int index)
>>> +{
>>> +   int i;
>>> +
>>> +   for (i = index + 1; i < drv->state_count; i++) {
>>> +   struct cpuidle_state *s = &drv->states[i];
>>> +   struct cpuidle_state_usage *su = &dev->states_usage[i];
>>> +
>>> +   if (s->disabled || su->disable)
>>> +   continue;
>>> +
>>> +   return (s->target_residency + 2 * s->exit_latency) *
>>> +   tb_ticks_per_usec;
>>> +   }
>>> +
>>> +   return 0;
>>> +}
>> It would be nice to not have this kind of loop iteration in the
>> idle fast path. Can we add a flag or something to the idle state?
> Currently, we do not have any callback notification or some feedback that
> notifies the driver everytime some state is enabled/disabled. So we have
> to parse everytime to get the next enabled state.

Ahh, that's why you're doing that.

> Are you suggesting to
> add something like next_enabled_state in cpuidle state structure itself
> which will be updated when a state is enabled or disabled?

Hmm, I guess it normally should not iterate over more than one state
unless some idle states are disabled.

What would have been nice is each state just have its own timeout
field with ticks already calculated, if that could be updated when
a state is enabled or disabled. How hard is that to add to the
cpuidle core?

>>> +
>>>   static u64 get_snooze_timeout(struct cpuidle_device *dev,
>>>   struct cpuidle_driver *drv,
>>>   int index)
>>> @@ -144,8 +164,26 @@ static int stop_loop(struct cpuidle_device *dev,
>>>  struct cpuidle_driver *drv,
>>>  int index)
>>>   {
>>> +   u64 dec_expiry_tb, dec, timeout_tb, forced_wakeup;
>>> +
>>> +   dec = mfspr(SPRN_DEC);
>>> +   timeout_tb = forced_wakeup_timeout(dev, drv, index);
>>> +   forced_wakeup = 0;
>>> +
>>> +   if (timeout_tb && timeout_tb < dec) {
>>> +   forced_wakeup = 1;
>>> +   dec_expiry_tb = mftb() + dec;
>>> +   }
>> The compiler probably can't optimise away the SPR manipulations so try
>> to avoid them if possible.
> Are you suggesting something like set_dec_before_idle?(in line with
> what you have suggested to do after idle, reset_dec_after_idle)

I should have been clear, I meant don't mfspr(SPRN_DEC) until you
have tested timeout_tb.

>>> +
>>> +   if (forced_wakeup)
>>> +   mtspr(SPRN_DEC, timeout_tb);
>> This should just be put in the above 'if'.
> Fair point.

Re: [PATCH 06/28] powerpc/64s/exception: remove the "extra" macro parameter

2019-06-19 Thread Michael Ellerman
Nicholas Piggin  writes:
> Nicholas Piggin's on June 12, 2019 12:30 am:
>> @@ -265,7 +275,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>>  EXC_REAL_END(machine_check, 0x200, 0x100)
>>  EXC_VIRT_NONE(0x4200, 0x100)
>>  TRAMP_REAL_BEGIN(machine_check_common_early)
>> -EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
>> +EXCEPTION_PROLOG_1 EXC_HV, PACA_EXMC, 0, 0x200
>>  /*
>>   * Register contents:
>>   * R13  = PACA
>
> This is a little bug here, machine check is an EXC_STD exception. It
> does not show up as generated code problem because EXCEPTION_PROLOG_1
> does not actually do anything with this parameter if KVM is false,
> which it is here.
>
> Still, it's wrong. I may just resend the series, because it caused a
> few conflicts in subsequent patches, and I have a few more to add to
> the end.

OK. I'll pull the series out for now.

cheers


Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel

2019-06-19 Thread Naveen N. Rao

Nicholas Piggin wrote:

Michael Ellerman's on June 19, 2019 3:14 pm:

Hi Naveen,

Sorry I meant to reply to this earlier .. :/


No problem. Thanks for the questions.



"Naveen N. Rao"  writes:

With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to
enable function tracing and profiling. So far, with dynamic ftrace, we
used to only patch out the branch to _mcount(). However, mflr is
executed by the branch unit that can only execute one per cycle on
POWER9 and shared with branches, so it would be nice to avoid it where
possible.

We cannot simply nop out the mflr either. When enabling function
tracing, there can be a race if tracing is enabled when some thread was
interrupted after executing a nop'ed out mflr. In this case, the thread
would execute the now-patched-in branch to _mcount() without having
executed the preceding mflr.

To solve this, we now enable function tracing in 2 steps: patch in the
mflr instruction, use synchronize_rcu_tasks() to ensure all existing
threads make progress, and then patch in the branch to _mcount(). We
override ftrace_replace_code() with a powerpc64 variant for this
purpose.


According to the ISA we're not allowed to patch mflr at runtime. See the
section on "CMODX".


According to "quasi patch class" engineering note, we can patch
anything with a preferred nop. But that's written as an optional
facility, which we don't have a feature to test for.



Hmm... I wonder what the implications are. We've been patching in a 
'trap' for kprobes for a long time now, along with having to patch back 
the original instruction (which can be anything), when the probe is 
removed.




I'm also not convinced the ordering between the two patches is
guaranteed by the ISA, given that there's possibly no isync on the other
CPU.


Will they go through a context synchronizing event?

synchronize_rcu_tasks() should ensure a thread is scheduled away, but
I'm not actually sure it guarantees CSI if it's kernel->kernel. Could
do a smp_call_function to do the isync on each CPU to be sure.


Good point. Per 
Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU:
"The solution, in the form of Tasks RCU, is to have implicit read-side 
critical sections that are delimited by voluntary context switches, that 
is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In 
addition, transitions to and from userspace execution also delimit 
tasks-RCU read-side critical sections."


I suppose transitions to/from userspace, as well as calls to schedule() 
result in context synchronizing instruction being executed. But, if some 
tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't 
have a CSI executed.


Also:
"In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these 
APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), 
respectively."


In this scenario as well, I think we won't have a CSI executed in case 
of cond_resched().


Should we enhance patch_instruction() to handle that?


- Naveen



RE: [PATCH v4 1/3] PM: wakeup: Add routine to help fetch wakeup source object.

2019-06-19 Thread Ran Wang
Hi Rafael,

On Wednesday, June 19, 2019 06:45, Rafael J. Wysocki wrote:
> 
> On Monday, May 20, 2019 11:52:36 AM CEST Ran Wang wrote:
> > Some user might want to go through all registered wakeup sources and
> > doing things accordingly. For example, SoC PM driver might need to do
> > HW programming to prevent powering down specific IP which wakeup
> > source depending on. And is user's responsibility to identify if this
> > wakeup source he is interested in.
> 
> I guess the idea here is that you need to walk wakeup devices and you noticed
> that there was a wakeup source object for each of them and those wakeup
> source objects were on a list, so you could walk wakeup devices by walking the
> list of wakeup source objects.
> 
> That is fair enough, but the changelog above doesn't even talk about that.

How about this:
"Providing a API for helping walk through all registered wakeup devices on the 
list.
It will be useful for SoC PMU driver to know which device will work as a wakeup
source then do specific HW programming for them."

> > Signed-off-by: Ran Wang 
> > ---
> > Change in v4:
> > - None.
> >
> > Change in v3:
> > - Adjust indentation of *attached_dev;.
> >
> > Change in v2:
> > - None.
> >
> >  drivers/base/power/wakeup.c |   18 ++
> >  include/linux/pm_wakeup.h   |3 +++
> >  2 files changed, 21 insertions(+), 0 deletions(-)
> >
> > diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
> > index 5b2b6a0..6904485 100644
> > --- a/drivers/base/power/wakeup.c
> > +++ b/drivers/base/power/wakeup.c
> > @@ -14,6 +14,7 @@
> >  #include 
> >  #include 
> >  #include 
> > +#include 
> >  #include 
> >  #include 
> >
> > @@ -226,6 +227,22 @@ void wakeup_source_unregister(struct
> wakeup_source *ws)
> > }
> >  }
> >  EXPORT_SYMBOL_GPL(wakeup_source_unregister);
> > +/**
> > + * wakeup_source_get_next - Get next wakeup source from the list
> > + * @ws: Previous wakeup source object, null means caller want first one.
> > + */
> > +struct wakeup_source *wakeup_source_get_next(struct wakeup_source
> > +*ws) {
> > +   struct list_head *ws_head = &wakeup_sources;
> > +
> > +   if (ws)
> > +   return list_next_or_null_rcu(ws_head, &ws->entry,
> > +   struct wakeup_source, entry);
> > +   else
> > +   return list_entry_rcu(ws_head->next,
> > +   struct wakeup_source, entry);
> > +}
> > +EXPORT_SYMBOL_GPL(wakeup_source_get_next);
> 
> This needs to be arranged along the lines of
> wakeup_sources_stats_seq_start/next/stop()
> because of the SRCU protection of the list.

Got it, how about this:
 230 /**
  
 231  * wakeup_source_get_next - Get next wakeup source from the list   
  
 232  * @ws: Previous wakeup source object, null means caller want first one.   
  
 233  */
  
 234 struct wakeup_source *wakeup_source_get_next(struct wakeup_source *ws) 
  
 235 {  
  
 236 struct list_head *ws_head = &wakeup_sources;   
  
 237 struct wakeup_source *next_ws = NULL;  
  
 238 int idx;   
  
 239
  
 240 idx = srcu_read_lock(&wakeup_srcu);
  
 241 if (ws)

 242 next_ws = list_next_or_null_rcu(ws_head, &ws->entry,   
  
 243 struct wakeup_source, entry);  
  
 244 else   
  
 245 next_ws = list_entry_rcu(ws_head->next,
  
 246 struct wakeup_source, entry);  
  
 247 srcu_read_unlock(&wakeup_srcu, idx);   
  
 248
  
 249 return next_ws;
  
 250 }  
  
 251 EXPORT_SYMBOL_GPL(wakeup_source_get_next);   

> >
> >  /**
> >   * device_wakeup_attach - Attach a wakeup source object to a device object.
> > @@ -242,6 +259,7 @@ static int device_wakeup_attach(struct device *dev,
> struct wakeup_source *ws)
> > return -EEXIST;
> > }
> > dev->power.wakeup = ws;
> > +   ws->attached_dev = dev;
> > if (dev->power.wakeirq)
> > device_wakeup_attach_irq(dev, dev->power.wakeirq);
> >  

Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-19 Thread Steven Rostedt
On Wed, 19 Jun 2019 13:26:37 +0530
"Naveen N. Rao"  wrote:

> > In include/ftrace.h:
> > 
> > #ifndef FTRACE_IP_EXTENSION
> > # define FTRACE_IP_EXTENSION0
> > #endif
> > 
> > 
> > In arch/powerpc/include/asm/ftrace.h
> > 
> > #define FTRACE_IP_EXTENSION MCOUNT_INSN_SIZE
> > 
> > 
> > Then we can just have:
> > 
> > unsigned long ftrace_location(unsigned long ip)
> > {
> > return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION);
> > }  
> 
> Thanks, that's indeed nice. I hope you don't mind me adding your SOB for 
> that.

Actually, it's best not to put a SOB by anyone other than yourself. It
actually has legal meaning.

In this case, please add:

Suggested-by: Steven Rostedt (VMware) 

Thanks!

-- Steve


Re: [PATCH v2 1/1] cpuidle-powernv : forced wakeup for stop states

2019-06-19 Thread Abhishek

Hi Nick,

Thanks for the review. Some replies below.

On 06/19/2019 09:53 AM, Nicholas Piggin wrote:

Abhishek Goel's on June 17, 2019 7:56 pm:

Currently, the cpuidle governors determine what idle state a idling CPU
should enter into based on heuristics that depend on the idle history on
that CPU. Given that no predictive heuristic is perfect, there are cases
where the governor predicts a shallow idle state, hoping that the CPU will
be busy soon. However, if no new workload is scheduled on that CPU in the
near future, the CPU may end up in the shallow state.

This is problematic, when the predicted state in the aforementioned
scenario is a shallow stop state on a tickless system. As we might get
stuck into shallow states for hours, in absence of ticks or interrupts.

To address this, We forcefully wakeup the cpu by setting the
decrementer. The decrementer is set to a value that corresponds with the
residency of the next available state. Thus firing up a timer that will
forcefully wakeup the cpu. Few such iterations will essentially train the
governor to select a deeper state for that cpu, as the timer here
corresponds to the next available cpuidle state residency. Thus, cpu will
eventually end up in the deepest possible state.

Signed-off-by: Abhishek Goel 
---

Auto-promotion
  v1 : started as auto promotion logic for cpuidle states in generic
driver
  v2 : Removed timeout_needed and rebased the code to upstream kernel
Forced-wakeup
  v1 : New patch with name of forced wakeup started
  v2 : Extending the forced wakeup logic for all states. Setting the
decrementer instead of queuing up a hrtimer to implement the logic.

  drivers/cpuidle/cpuidle-powernv.c | 38 +++
  1 file changed, 38 insertions(+)

diff --git a/drivers/cpuidle/cpuidle-powernv.c 
b/drivers/cpuidle/cpuidle-powernv.c
index 84b1ebe212b3..bc9ca18ae7e3 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -46,6 +46,26 @@ static struct stop_psscr_table 
stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly
  static u64 default_snooze_timeout __read_mostly;
  static bool snooze_timeout_en __read_mostly;
  
+static u64 forced_wakeup_timeout(struct cpuidle_device *dev,

+struct cpuidle_driver *drv,
+int index)
+{
+   int i;
+
+   for (i = index + 1; i < drv->state_count; i++) {
+   struct cpuidle_state *s = &drv->states[i];
+   struct cpuidle_state_usage *su = &dev->states_usage[i];
+
+   if (s->disabled || su->disable)
+   continue;
+
+   return (s->target_residency + 2 * s->exit_latency) *
+   tb_ticks_per_usec;
+   }
+
+   return 0;
+}

It would be nice to not have this kind of loop iteration in the
idle fast path. Can we add a flag or something to the idle state?

Currently, we do not have any callback notification or some feedback that
notifies the driver everytime some state is enabled/disabled. So we have
to parse everytime to get the next enabled state. Are you suggesting to
add something like next_enabled_state in cpuidle state structure itself
which will be updated when a state is enabled or disabled?

+
  static u64 get_snooze_timeout(struct cpuidle_device *dev,
  struct cpuidle_driver *drv,
  int index)
@@ -144,8 +164,26 @@ static int stop_loop(struct cpuidle_device *dev,
 struct cpuidle_driver *drv,
 int index)
  {
+   u64 dec_expiry_tb, dec, timeout_tb, forced_wakeup;
+
+   dec = mfspr(SPRN_DEC);
+   timeout_tb = forced_wakeup_timeout(dev, drv, index);
+   forced_wakeup = 0;
+
+   if (timeout_tb && timeout_tb < dec) {
+   forced_wakeup = 1;
+   dec_expiry_tb = mftb() + dec;
+   }

The compiler probably can't optimise away the SPR manipulations so try
to avoid them if possible.

Are you suggesting something like set_dec_before_idle?(in line with
what you have suggested to do after idle, reset_dec_after_idle)



+
+   if (forced_wakeup)
+   mtspr(SPRN_DEC, timeout_tb);

This should just be put in the above 'if'.

Fair point.



+
power9_idle_type(stop_psscr_table[index].val,
 stop_psscr_table[index].mask);
+
+   if (forced_wakeup)
+   mtspr(SPRN_DEC, dec_expiry_tb - mftb());

This will sometimes go negative and result in another timer interrupt.

It also breaks irq work (which can be set here by machine check I
believe.

May need to implement some timer code to do this for you.

static void reset_dec_after_idle(void)
{
u64 now;
 u64 *next_tb;

if (test_irq_work_pending())
return;
now = mftb;
next_tb = this_cpu_ptr(&decrementers_next_tb);

if (now >= *next_tb)
return;
set_dec(*next_tb - now);
if (test_irq_work_pen

Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix

2019-06-19 Thread Aneesh Kumar K.V
Bharata B Rao  writes:

> We hit the following BUG_ON when memory hotplugged before reboot
> is unplugged after reboot:
>
> kernel BUG at arch/powerpc/mm/pgtable-frag.c:113!
>
>  remove_pagetable+0x594/0x6a0
>  (unreliable)
>  remove_pagetable+0x94/0x6a0
>  vmemmap_free+0x394/0x410
>  sparse_remove_one_section+0x26c/0x2e8
>  __remove_pages+0x428/0x540
>  arch_remove_memory+0xd0/0x170
>  __remove_memory+0xd4/0x1a0
>  dlpar_remove_lmb+0xbc/0x110
>  dlpar_memory+0xa80/0xd20
>  handle_dlpar_errorlog+0xa8/0x160
>  pseries_hp_work_fn+0x2c/0x60
>  process_one_work+0x46c/0x860
>  worker_thread+0x364/0x5e0
>  kthread+0x1b0/0x1c0
>  ret_from_kernel_thread+0x5c/0x68
>
> This occurs because, during reboot-after-hotplug, the hotplugged
> memory range gets initialized as regular memory and page
> tables are setup using memblock allocator. This means that we
> wouldn't have initialized the PMD or PTE fragment count for
> those PMD or PTE pages.
>
> Fixing this includes 3 aspects:
>
> - Walk the init_mm page tables from mem_init() and initialize
>   the PMD and PTE fragment counts appropriately.
> - When we do early allocation of PMD (and PGD as well) pages,
>   allocate in page size PAGE_SIZE granularity so that we are
>   sure that the complete page is available for us to set the
>   fragment count which is part of struct page.


That is an important change now. For early page table we now allocate
PAGE_SIZE tables and hencec we consider then as pages with fragment
count 1. You also may want to explain here why. I guess the challenge is
due to the fact that we can't clearly control how the rest of the page
will get used and we are not sure they all will be allocated for backing
page table pages.

> - When PMD or PTE page is freed, check if it comes from memblock
>   allocator and free it appropriately.
>
> Reported-by: Srikanth Aithal 
> Signed-off-by: Bharata B Rao 
> ---
>  arch/powerpc/include/asm/book3s/64/radix.h |  1 +
>  arch/powerpc/include/asm/sparsemem.h   |  1 +
>  arch/powerpc/mm/book3s64/pgtable.c | 12 +++-
>  arch/powerpc/mm/book3s64/radix_pgtable.c   | 67 +-
>  arch/powerpc/mm/mem.c  |  5 ++
>  arch/powerpc/mm/pgtable-frag.c |  5 +-
>  6 files changed, 87 insertions(+), 4 deletions(-)
>
> diff --git a/arch/powerpc/include/asm/book3s/64/radix.h 
> b/arch/powerpc/include/asm/book3s/64/radix.h
> index 574eca33f893..4320f2790e8d 100644
> --- a/arch/powerpc/include/asm/book3s/64/radix.h
> +++ b/arch/powerpc/include/asm/book3s/64/radix.h
> @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void)
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  int radix__create_section_mapping(unsigned long start, unsigned long end, 
> int nid);
>  int radix__remove_section_mapping(unsigned long start, unsigned long end);
> +void radix__fixup_pgtable_fragments(void);
>  #endif /* CONFIG_MEMORY_HOTPLUG */
>  #endif /* __ASSEMBLY__ */
>  #endif
> diff --git a/arch/powerpc/include/asm/sparsemem.h 
> b/arch/powerpc/include/asm/sparsemem.h
> index 3192d454a733..e662f9232d35 100644
> --- a/arch/powerpc/include/asm/sparsemem.h
> +++ b/arch/powerpc/include/asm/sparsemem.h
> @@ -15,6 +15,7 @@
>  #ifdef CONFIG_MEMORY_HOTPLUG
>  extern int create_section_mapping(unsigned long start, unsigned long end, 
> int nid);
>  extern int remove_section_mapping(unsigned long start, unsigned long end);
> +void fixup_pgtable_fragments(void);
>
>  #ifdef CONFIG_PPC_BOOK3S_64
>  extern int resize_hpt_for_hotplug(unsigned long new_mem_size);
> diff --git a/arch/powerpc/mm/book3s64/pgtable.c 
> b/arch/powerpc/mm/book3s64/pgtable.c
> index 01bc9663360d..7efe9cc16b39 100644
> --- a/arch/powerpc/mm/book3s64/pgtable.c
> +++ b/arch/powerpc/mm/book3s64/pgtable.c
> @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long 
> start, unsigned long end)
>
>   return hash__remove_section_mapping(start, end);
>  }
> +
> +void fixup_pgtable_fragments(void)
> +{
> + if (radix_enabled())
> + radix__fixup_pgtable_fragments();
> +}
> +
>  #endif /* CONFIG_MEMORY_HOTPLUG */
>
>  void __init mmu_partition_table_init(void)
> @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd)
>   BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0);
>   if (atomic_dec_and_test(&page->pt_frag_refcount)) {
>   pgtable_pmd_page_dtor(page);
> - __free_page(page);
> + if (PageReserved(page))
> + free_reserved_page(page);
> + else
> + __free_page(page);
>   }
>  }
>
> diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c 
> b/arch/powerpc/mm/book3s64/radix_pgtable.c
> index 273ae66a9a45..402e8da28cab 100644
> --- a/arch/powerpc/mm/book3s64/radix_pgtable.c
> +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c
> @@ -32,6 +32,69 @@
>  unsigned int mmu_pid_bits;
>  unsigned int mmu_base_pid;
>
> +static void fixup_pmd_fragments(pmd_t *pmd)
> +{
> + int i;
> +
> + for (i = 0; i < PTRS_PER_PMD

Re: [PATCH 06/28] powerpc/64s/exception: remove the "extra" macro parameter

2019-06-19 Thread Nicholas Piggin
Nicholas Piggin's on June 12, 2019 12:30 am:
> @@ -265,7 +275,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE)
>  EXC_REAL_END(machine_check, 0x200, 0x100)
>  EXC_VIRT_NONE(0x4200, 0x100)
>  TRAMP_REAL_BEGIN(machine_check_common_early)
> - EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200)
> + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXMC, 0, 0x200
>   /*
>* Register contents:
>* R13  = PACA

This is a little bug here, machine check is an EXC_STD exception. It
does not show up as generated code problem because EXCEPTION_PROLOG_1
does not actually do anything with this parameter if KVM is false,
which it is here.

Still, it's wrong. I may just resend the series, because it caused a
few conflicts in subsequent patches, and I have a few more to add to
the end.

Thanks,
Nick



Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel

2019-06-19 Thread Naveen N. Rao

Steven Rostedt wrote:

On Tue, 18 Jun 2019 23:53:11 +0530
"Naveen N. Rao"  wrote:


Naveen N. Rao wrote:
> Steven Rostedt wrote:  
>> On Tue, 18 Jun 2019 20:17:04 +0530

>> "Naveen N. Rao"  wrote:
>>   
>>> @@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end)

>>>key.flags = end;/* overload flags, as it is unsigned long */
>>>  
>>>  	for (pg = ftrace_pages_start; pg; pg = pg->next) {

>>> -  if (end < pg->records[0].ip ||
>>> +		if (end <= pg->records[0].ip ||  
>> 
>> This breaks the algorithm. "end" is inclusive. That is, if you look for

>> a single byte, where "start" and "end" are the same, and it happens to
>> be the first ip on the pg page, it will be skipped, and not found.  
> 
> Thanks. It looks like I should be over-riding ftrace_location() instead.  
> I will update this patch.  

I think I will have ftrace own the two instruction range, regardless of 
whether the preceding instruction is a 'mflr r0' or not. This simplifies 
things and I don't see an issue with it as of now. I will do more 
testing to confirm.


- Naveen


--- a/arch/powerpc/kernel/trace/ftrace.c
+++ b/arch/powerpc/kernel/trace/ftrace.c
@@ -951,6 +951,16 @@ void arch_ftrace_update_code(int command)
 }
 
 #ifdef CONFIG_MPROFILE_KERNEL

+/*
+ * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part
+ * of ftrace. When checking for the first instruction, we want to include
+ * the next instruction in the range check.
+ */
+unsigned long ftrace_location(unsigned long ip)
+{
+   return ftrace_location_range(ip, ip + MCOUNT_INSN_SIZE);
+}
+
 /* Returns 1 if we patched in the mflr */
 static int __ftrace_make_call_prep(struct dyn_ftrace *rec)
 {
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 21d8e201ee80..122e2bb4a739 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1573,7 +1573,7 @@ unsigned long ftrace_location_range(unsigned long start, 
unsigned long end)
  * the function tracer. It checks the ftrace internal tables to
  * determine if the address belongs or not.
  */
-unsigned long ftrace_location(unsigned long ip)
+unsigned long __weak ftrace_location(unsigned long ip)
 {
return ftrace_location_range(ip, ip);
 }


Actually, instead of making this a weak function, let's do this:


In include/ftrace.h:

#ifndef FTRACE_IP_EXTENSION
# define FTRACE_IP_EXTENSION0
#endif


In arch/powerpc/include/asm/ftrace.h

#define FTRACE_IP_EXTENSION MCOUNT_INSN_SIZE


Then we can just have:

unsigned long ftrace_location(unsigned long ip)
{
return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION);
}


Thanks, that's indeed nice. I hope you don't mind me adding your SOB for 
that.


- Naveen




Re: [PATCH 0/5] Powerpc/hw-breakpoint: Fixes plus Code refactor

2019-06-19 Thread Ravi Bangoria



On 6/18/19 11:47 AM, Michael Neuling wrote:
> On Tue, 2019-06-18 at 08:01 +0200, Christophe Leroy wrote:
>>
>> Le 18/06/2019 à 06:27, Ravi Bangoria a écrit :
>>> patch 1-3: Code refactor
>>> patch 4: Speedup disabling breakpoint
>>> patch 5: Fix length calculation for unaligned targets
>>
>> While you are playing with hw breakpoints, did you have a look at 
>> https://github.com/linuxppc/issues/issues/38 ?
> 
> Agreed and also: 
> 
> https://github.com/linuxppc/issues/issues/170
> 
> https://github.com/linuxppc/issues/issues/128 
> 

Yes, I'm aware of those. Will have a look at them.



  1   2   >