[PATCH v2 49/52] powerpc/64s/exceptions: machine check move unrecoverable handling out of line
Similarly to the previous patch, move unrecoverable handling out of line, which makes the regular path less cluttered and easier to follow. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 83 +--- 1 file changed, 39 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index be83a4e71814..e8f644d6f310 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1016,9 +1016,9 @@ EXC_COMMON_BEGIN(machine_check_early_common) bne 1f /* First machine check entry */ ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ -1: subir1,r1,INT_FRAME_SIZE/* alloc stack frame */ - /* Limit nested MCE to level 4 to avoid stack overflow */ - bge cr1,2f /* Check if we hit limit of 4 */ +1: /* Limit nested MCE to level 4 to avoid stack overflow */ + bgt cr1,unrecoverable_mce /* Check if we hit limit of 4 */ + subir1,r1,INT_FRAME_SIZE/* alloc stack frame */ EXCEPTION_PROLOG_COMMON_1() /* We don't touch AMR here, we never go to virtual mode */ @@ -1032,25 +1032,9 @@ EXC_COMMON_BEGIN(machine_check_early_common) li r10,MSR_RI mtmsrd r10,1 - bl enable_machine_check b machine_check_handle_early -2: - /* Stack overflow. Stay on emergency stack and panic. -* Keep the ME bit off while panic-ing, so that if we hit -* another machine check we checkstop. -*/ - addir1,r1,INT_FRAME_SIZE/* go back to previous stack frame */ - ld r11,PACAKMSR(r13) - LOAD_HANDLER(r12, unrecover_mce) - li r10,MSR_ME - andcr11,r11,r10 /* Turn off MSR_ME */ - mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r11 - RFI_TO_KERNEL - b . /* prevent speculative execution */ - EXC_COMMON_BEGIN(machine_check_common) /* * Machine check is different because we use a different @@ -1166,32 +1150,15 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) * If yes, then stay on emergency stack and panic. */ andi. r11,r12,MSR_RI - bne 2f -1: mfspr r11,SPRN_SRR0 - LOAD_HANDLER(r10,unrecover_mce) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - /* -* We are going down. But there are chances that we might get hit by -* another MCE during panic path and we may run into unstable state -* with no way out. Hence, turn ME bit off while going down, so that -* when another MCE is hit during panic path, system will checkstop -* and hypervisor will get restarted cleanly by SP. -*/ - li r3,MSR_ME - andcr10,r10,r3 /* Turn off MSR_ME */ - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - b . -2: + beq unrecoverable_mce + /* * Check if we have successfully handled/recovered from error, if not * then stay on emergency stack and panic. */ ld r3,RESULT(r1) /* Load result */ cmpdi r3,0/* see if we handled MCE successfully */ - - beq 1b /* if !handled then panic */ + beq unrecoverable_mce /* if !handled then panic */ /* * Return from MC interrupt. @@ -1213,17 +1180,31 @@ END_FTR_SECTION_IFSET(CPU_FTR_CFAR) EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0 -EXC_COMMON_BEGIN(unrecover_mce) +EXC_COMMON_BEGIN(unrecoverable_mce) + /* +* We are going down. But there are chances that we might get hit by +* another MCE during panic path and we may run into unstable state +* with no way out. Hence, turn ME bit off while going down, so that +* when another MCE is hit during panic path, system will checkstop +* and hypervisor will get restarted cleanly by SP. +*/ + bl disable_machine_check + ld r10,PACAKMSR(r13) + li r3,MSR_ME + andcr10,r10,r3 + mtmsrd r10 + /* Invoke machine_check_exception to print MCE event and panic. */ addir3,r1,STACK_FRAME_OVERHEAD bl machine_check_exception + /* -* We will not reach here. Even if we did, there is no way out. Call -* unrecoverable_exception and die. +* We will not reach here. Even if we did, there is no way out. +* Call unrecoverable_exception and die. */ -1: addir3,r1,STACK_FRAME_OVERHEAD + addir3,r1,STACK_FRAME_OVERHEAD bl unrecoverable_exception - b 1b + b . EXC_REAL_BEGIN(data_access, 0x300, 0x80) @@ -2297,6 +2278,20 @@ enable_machine_check: 1: mtlrr0 blr +disable_mach
[PATCH v2 48/52] powerpc/64s/exception: simplify machine check early path
machine_check_handle_early_common can reach machine_check_handle_early directly now that it runs at the relocated address. The only reason to do the rfi sequence is to enable MSR[ME]. Move that into a helper function to make the normal code path a bit easier to read. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 30 1 file changed, 22 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 384f591ef078..be83a4e71814 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1030,13 +1030,12 @@ EXC_COMMON_BEGIN(machine_check_early_common) std r3,_DAR(r1) std r4,_DSISR(r1) - mfmsr r11 /* get MSR value */ - ori r11,r11,MSR_ME|MSR_RI /* turn on ME, RI */ - LOAD_HANDLER(r12, machine_check_handle_early) -1: mtspr SPRN_SRR0,r12 - mtspr SPRN_SRR1,r11 - RFI_TO_KERNEL - b . /* prevent speculative execution */ + li r10,MSR_RI + mtmsrd r10,1 + + bl enable_machine_check + b machine_check_handle_early + 2: /* Stack overflow. Stay on emergency stack and panic. * Keep the ME bit off while panic-ing, so that if we hit @@ -1047,7 +1046,9 @@ EXC_COMMON_BEGIN(machine_check_early_common) LOAD_HANDLER(r12, unrecover_mce) li r10,MSR_ME andcr11,r11,r10 /* Turn off MSR_ME */ - b 1b + mtspr SPRN_SRR0,r12 + mtspr SPRN_SRR1,r11 + RFI_TO_KERNEL b . /* prevent speculative execution */ EXC_COMMON_BEGIN(machine_check_common) @@ -2283,6 +2284,19 @@ CLOSE_FIXED_SECTION(virt_trampolines); USE_TEXT_SECTION() +enable_machine_check: + mflrr0 + bcl 20,31,$+4 +0: mflrr3 + addir3,r3,(1f - 0b) + mtspr SPRN_SRR0,r3 + mfmsr r3 + ori r3,r3,MSR_ME + mtspr SPRN_SRR1,r3 + RFI_TO_KERNEL +1: mtlrr0 + blr + /* * Hash table stuff */ -- 2.20.1
[PATCH v2 47/52] powerpc/64s/exception: machine check restructure handler to be more regular
Follow the pattern of sreset and HMI handlers more closely, in using EXCEPTION_PROLOG_COMMON_1 rather than open-coding it. Run the handler at the relocated location. This will help with simplification and code sharing. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 86 ++-- 1 file changed, 42 insertions(+), 44 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 8ed787dc579c..384f591ef078 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -958,17 +958,34 @@ BEGIN_FTR_SECTION b machine_check_pseries END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) #endif - b machine_check_common_early + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 1, 1, 0 + mfctr r10 /* save ctr */ + BRANCH_TO_C000(r11, machine_check_early_common) + /* +* MSR_RI is not enabled, because PACA_EXMC is being used, so a +* nested machine check corrupts it. machine_check_common enables +* MSR_RI. +*/ EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) -TRAMP_REAL_BEGIN(machine_check_common_early) - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 0, 0x200, 0, 0, 0 + +#ifdef CONFIG_PPC_PSERIES +TRAMP_REAL_BEGIN(machine_check_fwnmi) + /* See comment at machine_check exception, don't turn on RI */ + EXCEPTION_PROLOG_0 PACA_EXMC +machine_check_pseries: + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 + EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0 +#endif + +TRAMP_KVM_SKIP(PACA_EXMC, 0x200) + +EXC_COMMON_BEGIN(machine_check_early_common) + mtctr r10 /* Restore ctr */ + mfspr r11,SPRN_SRR0 + mfspr r12,SPRN_SRR1 + /* -* Register contents: -* R13 = PACA -* R9 = CR -* Original R9 to R13 is saved on PACA_EXMC -* * Switch to mc_emergency stack and handle re-entrancy (we limit * the nested MCE upto level 4 to avoid stack overflow). * Save MCE registers srr1, srr0, dar and dsisr and then set ME=1 @@ -989,32 +1006,30 @@ TRAMP_REAL_BEGIN(machine_check_common_early) * the machine check is handled then the idle wakeup code is called * to restore state. */ - mr r11,r1 /* Save r1 */ lhz r10,PACA_IN_MCE(r13) cmpwi r10,0 /* Are we in nested machine check */ - bne 0f /* Yes, we are. */ - /* First machine check entry */ - ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ -0: subir1,r1,INT_FRAME_SIZE/* alloc stack frame */ + cmpwi cr1,r10,MAX_MCE_DEPTH /* Are we at maximum nesting */ addir10,r10,1 /* increment paca->in_mce */ sth r10,PACA_IN_MCE(r13) + + mr r10,r1 /* Save r1 */ + bne 1f + /* First machine check entry */ + ld r1,PACAMCEMERGSP(r13) /* Use MC emergency stack */ +1: subir1,r1,INT_FRAME_SIZE/* alloc stack frame */ /* Limit nested MCE to level 4 to avoid stack overflow */ - cmpwi r10,MAX_MCE_DEPTH - bgt 2f /* Check if we hit limit of 4 */ - std r11,GPR1(r1)/* Save r1 on the stack. */ - std r11,0(r1) /* make stack chain pointer */ - mfspr r11,SPRN_SRR0 /* Save SRR0 */ - std r11,_NIP(r1) - mfspr r11,SPRN_SRR1 /* Save SRR1 */ - std r11,_MSR(r1) - mfspr r11,SPRN_DAR/* Save DAR */ - std r11,_DAR(r1) - mfspr r11,SPRN_DSISR /* Save DSISR */ - std r11,_DSISR(r1) - std r9,_CCR(r1) /* Save CR in stackframe */ + bge cr1,2f /* Check if we hit limit of 4 */ + + EXCEPTION_PROLOG_COMMON_1() /* We don't touch AMR here, we never go to virtual mode */ - /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) + EXCEPTION_PROLOG_COMMON_3(0x200) + + ld r3,PACA_EXMC+EX_DAR(r13) + lwz r4,PACA_EXMC+EX_DSISR(r13) + std r3,_DAR(r1) + std r4,_DSISR(r1) + mfmsr r11 /* get MSR value */ ori r11,r11,MSR_ME|MSR_RI /* turn on ME, RI */ LOAD_HANDLER(r12, machine_check_handle_early) @@ -1035,21 +1050,6 @@ TRAMP_REAL_BEGIN(machine_check_common_early) b 1b b . /* prevent speculative execution */ -#ifdef CONFIG_PPC_PSERIES -TRAMP_REAL_BEGIN(machine_check_fwnmi) - EXCEPTION_PROLOG_0 PACA_EXMC -machine_check_pseries: - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 - EXCEPTION_PROLOG_2_REAL machine_check_c
[PATCH v2 46/52] powerpc/64s/exception: fix machine check early should not set AMR
The early machine check runs in real mode, so locking is unnecessary. Worse, the windup does not restore AMR, so this can result in a false KUAP fault after a recoverable machine check hits inside a user copy operation. Fix this similarly to HMI by just avoiding the kuap lock in the early machine check handler (it will be set by the late handler that runs in virtual mode if that runs). Fixes: 890274c2dc4c0 ("powerpc/64s: Implement KUAP for Radix MMU") Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ab22af2509d8..8ed787dc579c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1012,7 +1012,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) mfspr r11,SPRN_DSISR /* Save DSISR */ std r11,_DSISR(r1) std r9,_CCR(r1) /* Save CR in stackframe */ - kuap_save_amr_and_lock r9, r10, cr1 + /* We don't touch AMR here, we never go to virtual mode */ /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ -- 2.20.1
[PATCH v2 45/52] powerpc/64s/exception: machine check windup restore cfar for host delivery
Bare metal machine checks run an "early" handler in real mode which potentially flushes faulting translation structures, among other things, before running the main handler which reports the event. The main handler runs as a normal interrupt handler, after a "windup" that sets registers back as they were at interrupt entry. CFAR does not get restored by the windup code, so add that. The current handler does not appear to use CFAR anywhere, because the main handler is not run if the MCE happens in kernel-mode and the user-mode message is not a register trace. However it may be useful in some cases or future changes (xmon, panic on mce, etc). Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 4 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 61c96502d2a8..ab22af2509d8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1205,6 +1205,10 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) 9: /* Deliver the machine check to host kernel in V mode. */ +BEGIN_FTR_SECTION + ld r10,ORIG_GPR3(r1) + mtspr SPRN_CFAR,r10 +END_FTR_SECTION_IFSET(CPU_FTR_CFAR) MACHINE_CHECK_HANDLER_WINDUP EXCEPTION_PROLOG_0 PACA_EXMC EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 -- 2.20.1
[PATCH v2 44/52] powerpc/64s/exception: separate pseries and powernv mce delivery paths
This will allow standardised interrupt entry macros to be used in future. These paths may be de-duplicated again after that if code allows. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 21 - 1 file changed, 12 insertions(+), 9 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f3362adc99e6..61c96502d2a8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -952,11 +952,13 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100) * vector */ EXCEPTION_PROLOG_0 PACA_EXMC +#ifdef CONFIG_PPC_PSERIES BEGIN_FTR_SECTION + /* Some hypervisors inject directly to 0x200 if FWNMI is not enabled */ + b machine_check_pseries +END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) +#endif b machine_check_common_early -FTR_SECTION_ELSE - b machine_check_pSeries_0 -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) EXC_REAL_END(machine_check, 0x200, 0x100) EXC_VIRT_NONE(0x4200, 0x100) TRAMP_REAL_BEGIN(machine_check_common_early) @@ -1033,18 +1035,18 @@ TRAMP_REAL_BEGIN(machine_check_common_early) b 1b b . /* prevent speculative execution */ -TRAMP_REAL_BEGIN(machine_check_pSeries) - .globl machine_check_fwnmi -machine_check_fwnmi: +#ifdef CONFIG_PPC_PSERIES +TRAMP_REAL_BEGIN(machine_check_fwnmi) EXCEPTION_PROLOG_0 PACA_EXMC -machine_check_pSeries_0: +machine_check_pseries: EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 + EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0 /* * MSR_RI is not enabled, because PACA_EXMC is being used, so a * nested machine check corrupts it. machine_check_common enables * MSR_RI. */ - EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0 +#endif TRAMP_KVM_SKIP(PACA_EXMC, 0x200) @@ -1205,7 +1207,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP EXCEPTION_PROLOG_0 PACA_EXMC - b machine_check_pSeries_0 + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 + EXCEPTION_PROLOG_2_REAL machine_check_common, EXC_STD, 0 EXC_COMMON_BEGIN(unrecover_mce) /* Invoke machine_check_exception to print MCE event and panic. */ -- 2.20.1
[PATCH v2 43/52] powerpc/64s/exception: machine check early only runs in HV mode
machine_check_common_early and machine_check_handle_early only run in HVMODE. Remove dead code. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 38 +--- 1 file changed, 6 insertions(+), 32 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index b12755a4f884..f3362adc99e6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1014,10 +1014,7 @@ TRAMP_REAL_BEGIN(machine_check_common_early) /* Save r9 through r13 from EXMC save area to stack frame. */ EXCEPTION_PROLOG_COMMON_2(PACA_EXMC) mfmsr r11 /* get MSR value */ -BEGIN_FTR_SECTION - ori r11,r11,MSR_ME /* turn on ME bit */ -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) - ori r11,r11,MSR_RI /* turn on RI bit */ + ori r11,r11,MSR_ME|MSR_RI /* turn on ME, RI */ LOAD_HANDLER(r12, machine_check_handle_early) 1: mtspr SPRN_SRR0,r12 mtspr SPRN_SRR1,r11 @@ -1124,11 +1121,8 @@ EXC_COMMON_BEGIN(machine_check_handle_early) bl machine_check_early std r3,RESULT(r1) /* Save result */ ld r12,_MSR(r1) -BEGIN_FTR_SECTION - b 4f -END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) -#ifdef CONFIG_PPC_P7_NAP +#ifdef CONFIG_PPC_P7_NAP /* * Check if thread was in power saving mode. We come here when any * of the following is true: @@ -1141,7 +1135,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) BEGIN_FTR_SECTION rlwinm. r11,r12,47-31,30,31 bne machine_check_idle_common -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_206) #endif /* @@ -1150,12 +1144,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) */ rldicl. r11,r12,4,63/* See if MC hit while in HV mode. */ beq 5f -4: andi. r11,r12,MSR_PR /* See if coming from user. */ + andi. r11,r12,MSR_PR /* See if coming from user. */ bne 9f /* continue in V mode if we are. */ 5: #ifdef CONFIG_KVM_BOOK3S_64_HANDLER -BEGIN_FTR_SECTION /* * We are coming from kernel context. Check if we are coming from * guest. if yes, then we can continue. We will fall through @@ -1164,7 +1157,6 @@ BEGIN_FTR_SECTION lbz r11,HSTATE_IN_GUEST(r13) cmpwi r11,0 /* Check if coming from guest */ bne 9f /* continue if we are. */ -END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) #endif /* * At this point we are not sure about what context we come from. @@ -1199,7 +1191,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_HVMODE) cmpdi r3,0/* see if we handled MCE successfully */ beq 1b /* if !handled then panic */ -BEGIN_FTR_SECTION + /* * Return from MC interrupt. * Queue up the MCE event so that we can log it later, while @@ -1208,18 +1200,7 @@ BEGIN_FTR_SECTION bl machine_check_queue_event MACHINE_CHECK_HANDLER_WINDUP RFI_TO_USER_OR_KERNEL -FTR_SECTION_ELSE - /* -* pSeries: Return from MC interrupt. Before that stay on emergency -* stack and call machine_check_exception to log the MCE event. -*/ - LOAD_HANDLER(r10,mce_return) - mtspr SPRN_SRR0,r10 - ld r10,PACAKMSR(r13) - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - b . -ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) + 9: /* Deliver the machine check to host kernel in V mode. */ MACHINE_CHECK_HANDLER_WINDUP @@ -1238,13 +1219,6 @@ EXC_COMMON_BEGIN(unrecover_mce) bl unrecoverable_exception b 1b -EXC_COMMON_BEGIN(mce_return) - /* Invoke machine_check_exception to print MCE event and return. */ - addir3,r1,STACK_FRAME_OVERHEAD - bl machine_check_exception - MACHINE_CHECK_HANDLER_WINDUP - RFI_TO_KERNEL - b . EXC_REAL_BEGIN(data_access, 0x300, 0x80) EXCEPTION_PROLOG_0 PACA_EXGEN -- 2.20.1
[PATCH v2 42/52] powerpc/64s/exception: machine check fwnmi does not trigger when in HV mode
Remove dead code. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 3 --- 1 file changed, 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 286bd5670d60..b12755a4f884 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1040,9 +1040,6 @@ TRAMP_REAL_BEGIN(machine_check_pSeries) .globl machine_check_fwnmi machine_check_fwnmi: EXCEPTION_PROLOG_0 PACA_EXMC -BEGIN_FTR_SECTION - b machine_check_common_early -END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) machine_check_pSeries_0: EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 /* -- 2.20.1
[PATCH v2 41/52] powerpc/tm: update comment about interrupt re-entrancy
Since the system reset interrupt began to use its own stack, and machine check interrupts have done so for some time, r1 can be changed without clearing MSR[RI], provided no other interrupts (including SLB misses) are taken. MSR[RI] does have to be cleared when using SCRATCH0, however. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/tm.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kernel/tm.S b/arch/powerpc/kernel/tm.S index 9fabdce255cd..6ba0fdd1e7f8 100644 --- a/arch/powerpc/kernel/tm.S +++ b/arch/powerpc/kernel/tm.S @@ -148,7 +148,7 @@ _GLOBAL(tm_reclaim) /* Stash the stack pointer away for use after reclaim */ std r1, PACAR1(r13) - /* Clear MSR RI since we are about to change r1, EE is already off. */ + /* Clear MSR RI since we are about to use SCRATCH0, EE is already off */ li r5, 0 mtmsrd r5, 1 @@ -474,7 +474,7 @@ restore_gprs: REST_GPR(7, r7) - /* Clear MSR RI since we are about to change r1. EE is already off */ + /* Clear MSR RI since we are about to use SCRATCH0. EE is already off */ li r5, 0 mtmsrd r5, 1 -- 2.20.1
[PATCH v2 40/52] powerpc/64s/exception: move SET_SCRATCH0 into EXCEPTION_PROLOG_0
No generated code change. File is change is in bug table line numbers. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 25 + 1 file changed, 1 insertion(+), 24 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 99de397a1cd9..286bd5670d60 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -128,6 +128,7 @@ BEGIN_FTR_SECTION_NESTED(943) \ END_FTR_SECTION_NESTED(ftr,ftr,943) .macro EXCEPTION_PROLOG_0 area + SET_SCRATCH0(r13) /* save r13 */ GET_PACA(r13) std r9,\area\()+EX_R9(r13) /* save r9 */ OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR) @@ -540,7 +541,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define __EXC_REAL(name, start, size, area)\ EXC_REAL_BEGIN(name, start, size); \ - SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0 area ; \ EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ; \ EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ @@ -551,7 +551,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define __EXC_VIRT(name, start, size, realvec, area) \ EXC_VIRT_BEGIN(name, start, size); \ - SET_SCRATCH0(r13);/* save r13 */\ EXCEPTION_PROLOG_0 area ; \ EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0; \ EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\ @@ -562,7 +561,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define EXC_REAL_MASKABLE(name, start, size, bitmask) \ EXC_REAL_BEGIN(name, start, size); \ - SET_SCRATCH0(r13);/* save r13 */\ EXCEPTION_PROLOG_0 PACA_EXGEN ; \ EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \ EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ @@ -570,7 +568,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define EXC_VIRT_MASKABLE(name, start, size, realvec, bitmask) \ EXC_VIRT_BEGIN(name, start, size); \ - SET_SCRATCH0(r13);/* save r13 */\ EXCEPTION_PROLOG_0 PACA_EXGEN ; \ EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \ EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\ @@ -578,7 +575,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define EXC_REAL_HV(name, start, size) \ EXC_REAL_BEGIN(name, start, size); \ - SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0 PACA_EXGEN; \ EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ; \ EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ; \ @@ -586,7 +582,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define EXC_VIRT_HV(name, start, size, realvec) \ EXC_VIRT_BEGIN(name, start, size); \ - SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0 PACA_EXGEN; \ EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, realvec, 0, 0, 0 ;\ EXCEPTION_PROLOG_2_VIRT name##_common, EXC_HV ; \ @@ -594,7 +589,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define __EXC_REAL_OOL(name, start, size) \ EXC_REAL_BEGIN(name, start, size); \ - SET_SCRATCH0(r13); \ EXCEPTION_PROLOG_0 PACA_EXGEN ; \ b tramp_real_##name ; \ EXC_REAL_END(name, start, size) @@ -622,7 +616,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define __EXC_REAL_OOL_HV_DIRECT(name, start, size, handler) \ EXC_REAL_BEGIN(name, start, size); \ - SET_SCRATCH0(r13); \ EXCEPTION_PROLOG_0 PACA_EXGEN ; \ b handler;\ EXC_REAL_END(name, start, size) @@ -653,7 +646,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) #define __EXC_VIRT_OOL(name, start, size) \ EXC_VIRT_BEGIN(name, start, size);
[PATCH v2 39/52] powerpc/64s/exception: denorm handler use standard scratch save macro
Although the 0x1500 interrupt only applies to bare metal, it is better to just use the standard macro for scratch save. Runtime code path remains unchanged (due to instruction patching). Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 437f91179537..99de397a1cd9 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1920,7 +1920,7 @@ EXC_REAL_NONE(0x1400, 0x100) EXC_VIRT_NONE(0x5400, 0x100) EXC_REAL_BEGIN(denorm_exception_hv, 0x1500, 0x100) - mtspr SPRN_SPRG_HSCRATCH0,r13 + SET_SCRATCH0(r13) EXCEPTION_PROLOG_0 PACA_EXGEN EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 0, 0x1500, 0, 0, 0 -- 2.20.1
[PATCH v2 38/52] powerpc/64s/exception: machine check use standard macros to save dar/dsisr
Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 16d5ea1c86bb..437f91179537 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1056,7 +1056,7 @@ BEGIN_FTR_SECTION b machine_check_common_early END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) machine_check_pSeries_0: - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 0, 0, 0 + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXMC, 1, 0x200, 1, 1, 0 /* * MSR_RI is not enabled, because PACA_EXMC is being used, so a * nested machine check corrupts it. machine_check_common enables @@ -1071,10 +1071,6 @@ EXC_COMMON_BEGIN(machine_check_common) * Machine check is different because we use a different * save area: PACA_EXMC instead of PACA_EXGEN. */ - mfspr r10,SPRN_DAR - std r10,PACA_EXMC+EX_DAR(r13) - mfspr r10,SPRN_DSISR - stw r10,PACA_EXMC+EX_DSISR(r13) EXCEPTION_COMMON(PACA_EXMC, 0x200) FINISH_NAP RECONCILE_IRQ_STATE(r10, r11) -- 2.20.1
[PATCH v2 37/52] powerpc/64s/exception: add dar and dsisr options to exception macro
Some exception entry requires DAR and/or DSISR to be saved into the paca exception save area. Add options to the standard exception macros for these. Generated code changes slightly due to code structure. - 554: a6 02 72 7d mfdsisr r11 - 558: a8 00 4d f9 std r10,168(r13) - 55c: b0 00 6d 91 stw r11,176(r13) + 554: a8 00 4d f9 std r10,168(r13) + 558: a6 02 52 7d mfdsisr r10 + 55c: b0 00 4d 91 stw r10,176(r13) Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 103 --- 1 file changed, 46 insertions(+), 57 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 2412b5269e25..16d5ea1c86bb 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -136,7 +136,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) .endm -.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask +.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, dar, dsisr, bitmask OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR) OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR) INTERRUPT_TO_KERNEL @@ -172,8 +172,22 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r11,\area\()+EX_R11(r13) std r12,\area\()+EX_R12(r13) + + /* +* DAR/DSISR, SCRATCH0 must be read before setting MSR[RI], +* because a d-side MCE will clobber those registers so is +* not recoverable if they are live. +*/ GET_SCRATCH0(r10) std r10,\area\()+EX_R13(r13) + .if \dar + mfspr r10,SPRN_DAR + std r10,\area\()+EX_DAR(r13) + .endif + .if \dsisr + mfspr r10,SPRN_DSISR + stw r10,\area\()+EX_DSISR(r13) + .endif .endm .macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri @@ -528,7 +542,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) EXC_REAL_BEGIN(name, start, size); \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0 area ; \ - EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0 ; \ + EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0, 0, 0 ; \ EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ EXC_REAL_END(name, start, size) @@ -539,7 +553,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) EXC_VIRT_BEGIN(name, start, size); \ SET_SCRATCH0(r13);/* save r13 */\ EXCEPTION_PROLOG_0 area ; \ - EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0;\ + EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0, 0, 0; \ EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\ EXC_VIRT_END(name, start, size) @@ -550,7 +564,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) EXC_REAL_BEGIN(name, start, size); \ SET_SCRATCH0(r13);/* save r13 */\ EXCEPTION_PROLOG_0 PACA_EXGEN ; \ - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, bitmask ; \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, 0, 0, bitmask ; \ EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ EXC_REAL_END(name, start, size) @@ -558,7 +572,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) EXC_VIRT_BEGIN(name, start, size); \ SET_SCRATCH0(r13);/* save r13 */\ EXCEPTION_PROLOG_0 PACA_EXGEN ; \ - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, bitmask ; \ + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, realvec, 0, 0, bitmask ; \ EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\ EXC_VIRT_END(name, start, size) @@ -566,7 +580,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) EXC_REAL_BEGIN(name, start, size); \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0 PACA_EXGEN; \ - EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0 ;\ + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, start, 0, 0, 0 ; \ EXCEPTION_PROLOG_2_REAL name##_common, EXC_HV, 1 ; \ EXC_REAL_END(name, start, size) @@ -574,7 +588,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_CAN_NAP) EXC_VIRT_BEGIN(name, start, size); \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0 PACA_EXGEN; \ - EXCEPTION
[PATCH v2 36/52] powerpc/64s/exception: use common macro for windup
No generated code change. File is change is in bug table line numbers. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 112 +-- 1 file changed, 36 insertions(+), 76 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cce75adf2095..2412b5269e25 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -417,6 +417,38 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ EXCEPTION_PROLOG_COMMON_2(area);\ EXCEPTION_PROLOG_COMMON_3(trap) +/* + * Restore all registers including H/SRR0/1 saved in a stack frame of a + * standard exception. + */ +.macro EXCEPTION_RESTORE_REGS hsrr + /* Move original SRR0 and SRR1 into the respective regs */ + ld r9,_MSR(r1) + .if \hsrr + mtspr SPRN_HSRR1,r9 + .else + mtspr SPRN_SRR1,r9 + .endif + ld r9,_NIP(r1) + .if \hsrr + mtspr SPRN_HSRR0,r9 + .else + mtspr SPRN_SRR0,r9 + .endif + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlrr9 + ld r9,_CCR(r1) + mtcrr9 + REST_8GPRS(2, r1) + REST_4GPRS(10, r1) + REST_GPR(0, r1) + /* restore original r1. */ + ld r1,GPR1(r1) +.endm #define RUNLATCH_ON\ BEGIN_FTR_SECTION \ @@ -906,29 +938,7 @@ EXC_COMMON_BEGIN(system_reset_common) ld r10,SOFTE(r1) stb r10,PACAIRQSOFTMASK(r13) - /* -* Keep below code in synch with MACHINE_CHECK_HANDLER_WINDUP. -* Should share common bits... -*/ - - /* Move original SRR0 and SRR1 into the respective regs */ - ld r9,_MSR(r1) - mtspr SPRN_SRR1,r9 - ld r9,_NIP(r1) - mtspr SPRN_SRR0,r9 - ld r9,_CTR(r1) - mtctr r9 - ld r9,_XER(r1) - mtxer r9 - ld r9,_LINK(r1) - mtlrr9 - ld r9,_CCR(r1) - mtcrr9 - REST_8GPRS(2, r1) - REST_4GPRS(10, r1) - REST_GPR(0, r1) - /* restore original r1. */ - ld r1,GPR1(r1) + EXCEPTION_RESTORE_REGS EXC_STD RFI_TO_USER_OR_KERNEL @@ -1074,24 +1084,7 @@ EXC_COMMON_BEGIN(machine_check_common) lhz r12,PACA_IN_MCE(r13); \ subir12,r12,1; \ sth r12,PACA_IN_MCE(r13); \ - /* Move original SRR0 and SRR1 into the respective regs */ \ - ld r9,_MSR(r1);\ - mtspr SPRN_SRR1,r9; \ - ld r9,_NIP(r1);\ - mtspr SPRN_SRR0,r9; \ - ld r9,_CTR(r1);\ - mtctr r9; \ - ld r9,_XER(r1);\ - mtxer r9; \ - ld r9,_LINK(r1); \ - mtlrr9; \ - ld r9,_CCR(r1);\ - mtcrr9; \ - REST_8GPRS(2, r1); \ - REST_4GPRS(10, r1); \ - REST_GPR(0, r1);\ - /* restore original r1. */ \ - ld r1,GPR1(r1) + EXCEPTION_RESTORE_REGS EXC_STD #ifdef CONFIG_PPC_P7_NAP /* @@ -1774,48 +1767,15 @@ TRAMP_REAL_BEGIN(hmi_exception_early) cmpdi cr0,r3,0 bne 1f - /* Windup the stack. */ - /* Move original HSRR0 and HSRR1 into the respective regs */ - ld r9,_MSR(r1) - mtspr SPRN_HSRR1,r9 - ld r9,_NIP(r1) - mtspr SPRN_HSRR0,r9 - ld r9,_CTR(r1) - mtctr r9 - ld r9,_XER(r1) - mtxer r9 - ld r9,_LINK(r1) - mtlrr9 - ld r9,_CCR(r1) - mtcrr9 - REST_8GPRS(2, r1) - REST_4GPRS(10, r1) - REST_GPR(0, r1) - ld r1,GPR1(r1) + EXCEPTION_RESTORE_REGS EXC_HV HRFI_TO_USER_OR_KERNEL 1: - ld r9,_MSR(r1) - mtspr SPRN_HSRR1,r9 - ld r9,_NIP(r1) - mtspr SPRN_HSRR0,r9 - ld r9,_CTR(r1) - mtctr r9 - ld r9,_XER(r1) - mtxer r9 - ld r9,_LINK(r1) - mtlrr9 - ld r9,_CCR(r1) - mtcrr9 - REST_8GPRS(2, r1) - REST_4GPRS(10, r1) - REST_GPR(0, r1) - ld r1,GPR1(r1) - /* * Go to virtual mode and pull the HMI event information from * firmware. */ + EXCEPTION_R
[PATCH v2 35/52] powerpc/64s/exception: shuffle windup code around
Restore all SPRs and CR up-front, these are longer latency instructions. Move register restore around to maximise pairs of adjacent loads (e.g., restore r0 next to r1). Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 40 +++- 1 file changed, 16 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 3476cffa21b8..cce75adf2095 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -922,13 +922,11 @@ EXC_COMMON_BEGIN(system_reset_common) mtxer r9 ld r9,_LINK(r1) mtlrr9 - REST_GPR(0, r1) + ld r9,_CCR(r1) + mtcrr9 REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) - mtcrr11 - REST_GPR(11, r1) - REST_2GPRS(12, r1) + REST_4GPRS(10, r1) + REST_GPR(0, r1) /* restore original r1. */ ld r1,GPR1(r1) RFI_TO_USER_OR_KERNEL @@ -1087,13 +1085,11 @@ EXC_COMMON_BEGIN(machine_check_common) mtxer r9; \ ld r9,_LINK(r1); \ mtlrr9; \ - REST_GPR(0, r1);\ + ld r9,_CCR(r1);\ + mtcrr9; \ REST_8GPRS(2, r1); \ - REST_GPR(10, r1); \ - ld r11,_CCR(r1); \ - mtcrr11;\ - REST_GPR(11, r1); \ - REST_2GPRS(12, r1); \ + REST_4GPRS(10, r1); \ + REST_GPR(0, r1);\ /* restore original r1. */ \ ld r1,GPR1(r1) @@ -1790,13 +1786,11 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mtxer r9 ld r9,_LINK(r1) mtlrr9 - REST_GPR(0, r1) + ld r9,_CCR(r1) + mtcrr9 REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) - REST_2GPRS(12, r1) - mtcrr11 - REST_GPR(11, r1) + REST_4GPRS(10, r1) + REST_GPR(0, r1) ld r1,GPR1(r1) HRFI_TO_USER_OR_KERNEL @@ -1811,13 +1805,11 @@ TRAMP_REAL_BEGIN(hmi_exception_early) mtxer r9 ld r9,_LINK(r1) mtlrr9 - REST_GPR(0, r1) + ld r9,_CCR(r1) + mtcrr9 REST_8GPRS(2, r1) - REST_GPR(10, r1) - ld r11,_CCR(r1) - REST_2GPRS(12, r1) - mtcrr11 - REST_GPR(11, r1) + REST_4GPRS(10, r1) + REST_GPR(0, r1) ld r1,GPR1(r1) /* -- 2.20.1
[PATCH v2 34/52] powerpc/64s/exception: simplify hmi windup code
Duplicate the hmi windup code for both cases, rather than to put a special case branch in the middle of it. Remove unused label. This helps with later code consolidation. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 22 ++ 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index cf89d728720a..3476cffa21b8 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1776,6 +1776,7 @@ TRAMP_REAL_BEGIN(hmi_exception_early) addir3,r1,STACK_FRAME_OVERHEAD BRANCH_LINK_TO_FAR(DOTSYM(hmi_exception_realmode)) /* Function call ABI */ cmpdi cr0,r3,0 + bne 1f /* Windup the stack. */ /* Move original HSRR0 and HSRR1 into the respective regs */ @@ -1794,13 +1795,28 @@ TRAMP_REAL_BEGIN(hmi_exception_early) REST_GPR(10, r1) ld r11,_CCR(r1) REST_2GPRS(12, r1) - bne 1f mtcrr11 REST_GPR(11, r1) ld r1,GPR1(r1) HRFI_TO_USER_OR_KERNEL -1: mtcrr11 +1: + ld r9,_MSR(r1) + mtspr SPRN_HSRR1,r9 + ld r9,_NIP(r1) + mtspr SPRN_HSRR0,r9 + ld r9,_CTR(r1) + mtctr r9 + ld r9,_XER(r1) + mtxer r9 + ld r9,_LINK(r1) + mtlrr9 + REST_GPR(0, r1) + REST_8GPRS(2, r1) + REST_GPR(10, r1) + ld r11,_CCR(r1) + REST_2GPRS(12, r1) + mtcrr11 REST_GPR(11, r1) ld r1,GPR1(r1) @@ -1808,8 +1824,6 @@ TRAMP_REAL_BEGIN(hmi_exception_early) * Go to virtual mode and pull the HMI event information from * firmware. */ - .globl hmi_exception_after_realmode -hmi_exception_after_realmode: SET_SCRATCH0(r13) EXCEPTION_PROLOG_0 PACA_EXGEN b tramp_real_hmi_exception -- 2.20.1
[PATCH v2 33/52] powerpc/64s/exception: move machine check windup in_mce handling
Move in_mce decrement earlier before registers are restored (but still after RI=0). This helps with later consolidation. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 8 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 804438669454..cf89d728720a 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1072,6 +1072,10 @@ EXC_COMMON_BEGIN(machine_check_common) /* Clear MSR_RI before setting SRR0 and SRR1. */\ li r9,0; \ mtmsrd r9,1; /* Clear MSR_RI */ \ + /* Decrement paca->in_mce now RI is clear. */ \ + lhz r12,PACA_IN_MCE(r13); \ + subir12,r12,1; \ + sth r12,PACA_IN_MCE(r13); \ /* Move original SRR0 and SRR1 into the respective regs */ \ ld r9,_MSR(r1);\ mtspr SPRN_SRR1,r9; \ @@ -1088,10 +1092,6 @@ EXC_COMMON_BEGIN(machine_check_common) REST_GPR(10, r1); \ ld r11,_CCR(r1); \ mtcrr11;\ - /* Decrement paca->in_mce. */ \ - lhz r12,PACA_IN_MCE(r13); \ - subir12,r12,1; \ - sth r12,PACA_IN_MCE(r13); \ REST_GPR(11, r1); \ REST_2GPRS(12, r1); \ /* restore original r1. */ \ -- 2.20.1
[PATCH v2 32/52] powerpc/64s/exception: windup use r9 consistently to restore SPRs
Trivial code change, r3->r9. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 12 ++-- 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 539bb1b83d90..804438669454 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -914,8 +914,8 @@ EXC_COMMON_BEGIN(system_reset_common) /* Move original SRR0 and SRR1 into the respective regs */ ld r9,_MSR(r1) mtspr SPRN_SRR1,r9 - ld r3,_NIP(r1) - mtspr SPRN_SRR0,r3 + ld r9,_NIP(r1) + mtspr SPRN_SRR0,r9 ld r9,_CTR(r1) mtctr r9 ld r9,_XER(r1) @@ -1075,8 +1075,8 @@ EXC_COMMON_BEGIN(machine_check_common) /* Move original SRR0 and SRR1 into the respective regs */ \ ld r9,_MSR(r1);\ mtspr SPRN_SRR1,r9; \ - ld r3,_NIP(r1);\ - mtspr SPRN_SRR0,r3; \ + ld r9,_NIP(r1);\ + mtspr SPRN_SRR0,r9; \ ld r9,_CTR(r1);\ mtctr r9; \ ld r9,_XER(r1);\ @@ -1781,8 +1781,8 @@ TRAMP_REAL_BEGIN(hmi_exception_early) /* Move original HSRR0 and HSRR1 into the respective regs */ ld r9,_MSR(r1) mtspr SPRN_HSRR1,r9 - ld r3,_NIP(r1) - mtspr SPRN_HSRR0,r3 + ld r9,_NIP(r1) + mtspr SPRN_HSRR0,r9 ld r9,_CTR(r1) mtctr r9 ld r9,_XER(r1) -- 2.20.1
[PATCH v2 31/52] powerpc/64s/exception: mtmsrd L=1 cleanup
All supported 64s CPUs support mtmsrd L=1 instruction, so a cleanup can be made in sreset and mce handlers. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 9 ++--- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f582ae30f3f7..539bb1b83d90 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -887,11 +887,8 @@ EXC_COMMON_BEGIN(system_reset_common) addir3,r1,STACK_FRAME_OVERHEAD bl system_reset_exception - /* This (and MCE) can be simplified with mtmsrd L=1 */ /* Clear MSR_RI before setting SRR0 and SRR1. */ - li r0,MSR_RI - mfmsr r9 - andcr9,r9,r0 + li r9,0 mtmsrd r9,1 /* @@ -1073,9 +1070,7 @@ EXC_COMMON_BEGIN(machine_check_common) #define MACHINE_CHECK_HANDLER_WINDUP \ /* Clear MSR_RI before setting SRR0 and SRR1. */\ - li r0,MSR_RI; \ - mfmsr r9; /* get MSR value */ \ - andcr9,r9,r0; \ + li r9,0; \ mtmsrd r9,1; /* Clear MSR_RI */ \ /* Move original SRR0 and SRR1 into the respective regs */ \ ld r9,_MSR(r1);\ -- 2.20.1
[PATCH v2 30/52] powerpc/64s/exception: optimise system_reset for idle, clean up non-idle case
The idle wake up code in the system reset interrupt is not very optimal. There are two requirements: perform idle wake up quickly; and save everything including CFAR for non-idle interrupts, with no performance requirement. The problem with placing the idle test in the middle of the handler and using the normal handler code to save CFAR, is that it's quite costly (e.g., mfcfar is serialising, speculative workarounds get applied, SRR1 has to be reloaded, etc). It also prevents the standard interrupt handler boilerplate being used. This pain can be avoided by using a dedicated idle interrupt handler at the start of the interrupt handler, which restores all registers back to the way they were in case it was not an idle wake up. CFAR is preserved without saving it before the non-idle case by making that the fall-through, and idle is a taken branch. Performance seems to be in the noise, but possibly around 0.5% faster, the executed instructions certainly look better. The bigger benefit is being able to drop in standard interrupt handlers after the idle code, which helps with subsequent cleanup and consolidation. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 89 ++-- 1 file changed, 44 insertions(+), 45 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index e0492912ea79..f582ae30f3f7 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -241,7 +241,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) * load KBASE for a slight optimisation. */ #define BRANCH_TO_C000(reg, label) \ - __LOAD_HANDLER(reg, label); \ + __LOAD_FAR_HANDLER(reg, label); \ mtctr reg;\ bctr @@ -784,16 +784,6 @@ EXC_VIRT_NONE(0x4000, 0x100) EXC_REAL_BEGIN(system_reset, 0x100, 0x100) - SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0 PACA_EXNMI - - /* This is EXCEPTION_PROLOG_1 with the idle feature section added */ - OPT_SAVE_REG_TO_PACA(PACA_EXNMI+EX_PPR, r9, CPU_FTR_HAS_PPR) - OPT_SAVE_REG_TO_PACA(PACA_EXNMI+EX_CFAR, r10, CPU_FTR_CFAR) - INTERRUPT_TO_KERNEL - SAVE_CTR(r10, PACA_EXNMI) - mfcrr9 - #ifdef CONFIG_PPC_P7_NAP /* * If running native on arch 2.06 or later, check if we are waking up @@ -801,45 +791,67 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100) * bits 46:47. A non-0 value indicates that we are coming from a power * saving state. The idle wakeup handler initially runs in real mode, * but we branch to the 0xc000... address so we can turn on relocation -* with mtmsr. +* with mtmsrd later, after SPRs are restored. +* +* Careful to minimise cost for the fast path (idle wakeup) while +* also avoiding clobbering CFAR for the non-idle case. Once we know +* it is an idle wake, volatiles don't matter, which is why we use +* those here, and then re-do the entry in case of non-idle (without +* branching for the non-idle case, to keep CFAR). */ BEGIN_FTR_SECTION - mfspr r10,SPRN_SRR1 - rlwinm. r10,r10,47-31,30,31 - beq-1f - cmpwi cr1,r10,2 + SET_SCRATCH0(r13) + GET_PACA(r13) + std r3,PACA_EXNMI+0*8(r13) + std r4,PACA_EXNMI+1*8(r13) + std r5,PACA_EXNMI+2*8(r13) mfspr r3,SPRN_SRR1 - bltlr cr1 /* no state loss, return to idle caller */ - BRANCH_TO_C000(r10, system_reset_idle_common) -1: + mfocrf r4,0x80 + rlwinm. r5,r3,47-31,30,31 + bne+system_reset_idle_wake + /* Not powersave wakeup. Restore regs for regular interrupt handler. */ + mtocrf 0x80,r4 + ld r12,PACA_EXNMI+0*8(r13) + ld r4,PACA_EXNMI+1*8(r13) + ld r5,PACA_EXNMI+2*8(r13) + GET_SCRATCH0(r13) END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif - KVMTEST EXC_STD 0x100 - std r11,PACA_EXNMI+EX_R11(r13) - std r12,PACA_EXNMI+EX_R12(r13) - GET_SCRATCH0(r10) - std r10,PACA_EXNMI+EX_R13(r13) - + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0 PACA_EXNMI + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXNMI, 1, 0x100, 0 EXCEPTION_PROLOG_2_REAL system_reset_common, EXC_STD, 0 /* * MSR_RI is not enabled, because PACA_EXNMI and nmi stack is * being used, so a nested NMI exception would corrupt it. */ - EXC_REAL_END(system_reset, 0x100, 0x100) + EXC_VIRT_NONE(0x4100, 0x100) TRAMP_KVM(PACA_EXNMI, 0x100) #ifdef CONFIG_PPC_P7_NAP -EXC_COMMON_BEGIN(system_reset_idle_common) - /* -* This must be a direct branch (without linker branch stub) because -* we can not use TOC at this point as r
[PATCH v2 29/52] powerpc/64s/exception: avoid SPR RAW scoreboard stall in real mode entry
Move SPR reads ahead of writes. Real mode entry that is not a KVM guest is rare these days, but bad practice propagates. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 14 +++--- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 62f7e9ad23c6..e0492912ea79 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -183,19 +183,19 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) .endif .if \hsrr mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + mtspr SPRN_HSRR1,r10 .else mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ + mtspr SPRN_SRR1,r10 .endif - LOAD_HANDLER(r12, \label\()) + LOAD_HANDLER(r10, \label\()) .if \hsrr - mtspr SPRN_HSRR0,r12 - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 + mtspr SPRN_HSRR0,r10 HRFI_TO_KERNEL .else - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 + mtspr SPRN_SRR0,r10 RFI_TO_KERNEL .endif b . /* prevent speculative execution */ -- 2.20.1
[PATCH v2 28/52] powerpc/64s/exception: clean up system call entry
syscall / hcall entry unnecessarily differs between KVM and non-KVM builds. Move the SMT priority instruction to the same location (after INTERRUPT_TO_KERNEL). Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 25 +++-- 1 file changed, 7 insertions(+), 18 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index d6de0ce1f0f2..62f7e9ad23c6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1635,10 +1635,8 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) std r10,PACA_EXGEN+EX_R10(r13) INTERRUPT_TO_KERNEL KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */ - HMT_MEDIUM mfctr r9 #else - HMT_MEDIUM mr r9,r13 GET_PACA(r13) INTERRUPT_TO_KERNEL @@ -1650,11 +1648,13 @@ BEGIN_FTR_SECTION beq-1f END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) #endif - /* We reach here with PACA in r13, r13 in r9, and HMT_MEDIUM. */ - - .if \real + /* We reach here with PACA in r13, r13 in r9. */ mfspr r11,SPRN_SRR0 mfspr r12,SPRN_SRR1 + + HMT_MEDIUM + + .if \real __LOAD_HANDLER(r10, system_call_common) mtspr SPRN_SRR0,r10 ld r10,PACAKMSR(r13) @@ -1662,24 +1662,13 @@ END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) RFI_TO_KERNEL b . /* prevent speculative execution */ .else + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ #ifdef CONFIG_RELOCATABLE - /* -* We can't branch directly so we do it via the CTR which -* is volatile across system calls. -*/ __LOAD_HANDLER(r10, system_call_common) mtctr r10 - mfspr r11,SPRN_SRR0 - mfspr r12,SPRN_SRR1 - li r10,MSR_RI - mtmsrd r10,1 bctr #else - /* We can branch directly */ - mfspr r11,SPRN_SRR0 - mfspr r12,SPRN_SRR1 - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ b system_call_common #endif .endif -- 2.20.1
[PATCH v2 27/52] powerpc/64s/exception: move paca save area offsets into exception-64s.S
No generated code change. File is change is in bug table line numbers. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 17 +++-- arch/powerpc/kernel/exceptions-64s.S | 22 ++ 2 files changed, 25 insertions(+), 14 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 79e5ac87c029..33f4f72eb035 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -32,22 +32,11 @@ */ #include -/* PACA save area offsets (exgen, exmc, etc) */ -#define EX_R9 0 -#define EX_R10 8 -#define EX_R11 16 -#define EX_R12 24 -#define EX_R13 32 -#define EX_DAR 40 -#define EX_DSISR 48 -#define EX_CCR 52 -#define EX_CFAR56 -#define EX_PPR 64 +/* PACA save area size in u64 units (exgen, exmc, etc) */ #if defined(CONFIG_RELOCATABLE) -#define EX_CTR 72 -#define EX_SIZE10 /* size in u64 units */ +#define EX_SIZE10 #else -#define EX_SIZE9 /* size in u64 units */ +#define EX_SIZE9 #endif /* diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 4b4bb8f43f55..d6de0ce1f0f2 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -21,6 +21,28 @@ #include #include +/* PACA save area offsets (exgen, exmc, etc) */ +#define EX_R9 0 +#define EX_R10 8 +#define EX_R11 16 +#define EX_R12 24 +#define EX_R13 32 +#define EX_DAR 40 +#define EX_DSISR 48 +#define EX_CCR 52 +#define EX_CFAR56 +#define EX_PPR 64 +#if defined(CONFIG_RELOCATABLE) +#define EX_CTR 72 +.if EX_SIZE != 10 + .error "EX_SIZE is wrong" +.endif +#else +.if EX_SIZE != 9 + .error "EX_SIZE is wrong" +.endif +#endif + /* * We're short on space and time in the exception prolog, so we can't * use the normal LOAD_REG_IMMEDIATE macro to load the address of label. -- 2.20.1
[PATCH v2 26/52] powerpc/64s/exception: remove pointless EXCEPTION_PROLOG macro indirection
No generated code change. File is change is in bug table line numbers. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 97 +--- 1 file changed, 45 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 6c0321e128da..4b4bb8f43f55 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -326,34 +326,6 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) std r0,GPR0(r1);/* save r0 in stackframe*/ \ std r10,GPR1(r1); /* save r1 in stackframe*/ \ - -/* - * The common exception prolog is used for all except a few exceptions - * such as a segment miss on a kernel address. We have to be prepared - * to take another exception from the point where we first touch the - * kernel stack onwards. - * - * On entry r13 points to the paca, r9-r13 are saved in the paca, - * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and - * SRR1, and relocation is on. - */ -#define EXCEPTION_PROLOG_COMMON(n, area) \ - andi. r10,r12,MSR_PR; /* See if coming from user */ \ - mr r10,r1; /* Save r1 */ \ - subir1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ - beq-1f;\ - ld r1,PACAKSAVE(r13); /* kernel stack to use */ \ -1: tdgei r1,-INT_FRAME_SIZE; /* trap if r1 is in userspace */ \ - EMIT_BUG_ENTRY 1b,__FILE__,__LINE__,0; \ -3: EXCEPTION_PROLOG_COMMON_1(); \ - kuap_save_amr_and_lock r9, r10, cr1, cr0; \ - beq 4f; /* if from kernel mode */ \ - ACCOUNT_CPU_USER_ENTRY(r13, r9, r10); \ - SAVE_PPR(area, r9);\ -4: EXCEPTION_PROLOG_COMMON_2(area)\ - EXCEPTION_PROLOG_COMMON_3(n) \ - ACCOUNT_STOLEN_TIME - /* Save original regs values from save area to stack frame. */ #define EXCEPTION_PROLOG_COMMON_2(area) \ ld r9,area+EX_R9(r13); /* move r9, r10 to stackframe */ \ @@ -373,7 +345,7 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ GET_CTR(r10, area);\ std r10,_CTR(r1); -#define EXCEPTION_PROLOG_COMMON_3(n) \ +#define EXCEPTION_PROLOG_COMMON_3(trap) \ std r2,GPR2(r1);/* save r2 in stackframe*/ \ SAVE_4GPRS(3, r1); /* save r3 - r6 in stackframe */ \ SAVE_2GPRS(7, r1); /* save r7, r8 in stackframe*/ \ @@ -384,26 +356,38 @@ END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ mfspr r11,SPRN_XER; /* save XER in stackframe */ \ std r10,SOFTE(r1); \ std r11,_XER(r1); \ - li r9,(n)+1; \ + li r9,(trap)+1; \ std r9,_TRAP(r1); /* set trap number */ \ li r10,0; \ ld r11,exception_marker@toc(r2); \ std r10,RESULT(r1); /* clear regs->result */ \ std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ -#define RUNLATCH_ON\ -BEGIN_FTR_SECTION \ - ld r3, PACA_THREAD_INFO(r13); \ - ld r4,TI_LOCAL_FLAGS(r3); \ - andi. r0,r4,_TLF_RUNLATCH;\ - beqlppc64_runlatch_on_trampoline; \ -END_FTR_SECTION_IFSET(CPU_FTR_CTRL) - -#define EXCEPTION_COMMON(area, trap) \ - EXCEPTION_PROLOG_COMMON(trap, area);\ +/* + * On entry r13 points to the paca, r9-r13 are saved in the paca, + * r9 contains the saved CR, r11 and r12 contain the saved SRR0 and + * SRR1, and relocation is on. + */ +#define EXCEPTION_COMMON(area, trap) \ + andi. r10,r12,MSR_PR; /* See if coming from user */ \ + mr r10,r1; /* Save r1 */ \ + subir1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ + beq-1f;
[PATCH v2 25/52] powerpc/64s/exception: remove bad stack branch
The bad stack test in interrupt handlers has a few problems. For performance it is taken in the common case, which is a fetch bubble and a waste of i-cache. For code development and maintainence, it requires yet another stack frame setup routine, and that constrains all exception handlers to follow the same register save pattern which inhibits future optimisation. Remove the test/branch and replace it with a trap. Teach the program check handler to use the emergency stack for this case. This does not result in quite so nice a message, however the SRR0 and SRR1 of the crashed interrupt can be seen in r11 and r12, as is the original r1 (adjusted by INT_FRAME_SIZE). These are the most important parts to debugging the issue. The original r9-12 and cr0 is lost, which is the main downside. kernel BUG at linux/arch/powerpc/kernel/exceptions-64s.S:847! Oops: Exception in kernel mode, sig: 5 [#1] BE SMP NR_CPUS=2048 NUMA PowerNV Modules linked in: CPU: 0 PID: 1 Comm: swapper/0 Not tainted NIP: c0009108 LR: c0cadbcc CTR: c00090f0 REGS: c000fffcbd70 TRAP: 0700 Not tainted MSR: 90021032 CR: 28222448 XER: 2004 CFAR: c0009100 IRQMASK: 0 GPR00: 003d fd00 c18cfb00 c000f02b3166 GPR04: fffd 0007 fffb 0030 GPR08: 0037 28222448 c0ca8de0 GPR12: 92009032 c1ae c0010a00 GPR16: GPR20: c000f00322c0 c0f85200 0004 GPR24: fffe 000a GPR28: c000f02b391c c000f02b3167 NIP [c0009108] decrementer_common+0x18/0x160 LR [c0cadbcc] .vsnprintf+0x3ec/0x4f0 Call Trace: Instruction dump: 996d098a 994d098b 38610070 480246ed 48005518 6000 3820 718a4000 7c2a0b78 3821fd00 41c20008 e82d0970 <0981fd00> f92101a0 f9610170 f9810178 Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 7 -- arch/powerpc/include/asm/paca.h | 2 + arch/powerpc/kernel/asm-offsets.c| 2 + arch/powerpc/kernel/exceptions-64s.S | 95 arch/powerpc/xmon/xmon.c | 2 + 5 files changed, 22 insertions(+), 86 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index dc6a5ccac965..79e5ac87c029 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -55,13 +55,6 @@ */ #define MAX_MCE_DEPTH 4 -/* - * EX_R3 is only used by the bad_stack handler. bad_stack reloads and - * saves DAR from SPRN_DAR, and EX_DAR is not used. So EX_R3 can overlap - * with EX_DAR. - */ -#define EX_R3 EX_DAR - #ifdef __ASSEMBLY__ #define STF_ENTRY_BARRIER_SLOT \ diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 9bd2326bef6f..e3cc9eb9204d 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -166,7 +166,9 @@ struct paca_struct { u64 kstack; /* Saved Kernel stack addr */ u64 saved_r1; /* r1 save for RTAS calls or PM or EE=0 */ u64 saved_msr; /* MSR saved here by enter_rtas */ +#ifdef CONFIG_PPC_BOOK3E u16 trap_save; /* Used when bad stack is encountered */ +#endif u8 irq_soft_mask; /* mask for irq soft masking */ u8 irq_happened;/* irq happened while soft-disabled */ u8 irq_work_pending;/* IRQ_WORK interrupt while soft-disable */ diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index 31dc7e64cbfc..4ccb6b3a7fbd 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -266,7 +266,9 @@ int main(void) OFFSET(ACCOUNT_STARTTIME_USER, paca_struct, accounting.starttime_user); OFFSET(ACCOUNT_USER_TIME, paca_struct, accounting.utime); OFFSET(ACCOUNT_SYSTEM_TIME, paca_struct, accounting.stime); +#ifdef CONFIG_PPC_BOOK3E OFFSET(PACA_TRAP_SAVE, paca_struct, trap_save); +#endif OFFSET(PACA_SPRG_VDSO, paca_struct, sprg_vdso); #else /* CONFIG_PPC64 */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0a2b4e8b02b0..6c0321e128da 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -343,14 +343,8 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) subir1,r1,INT_FRAME_SIZE; /* alloc frame on kernel stack */ \ beq-1f;\
[PATCH v2 24/52] powerpc/64s/exception: generate regs clear instructions using .rept
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 29 +++- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 02b4722b7c64..0a2b4e8b02b0 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -2010,12 +2010,11 @@ BEGIN_FTR_SECTION mtmsrd r10 sync -#define FMR2(n) fmr (n), (n) ; fmr n+1, n+1 -#define FMR4(n) FMR2(n) ; FMR2(n+2) -#define FMR8(n) FMR4(n) ; FMR4(n+4) -#define FMR16(n) FMR8(n) ; FMR8(n+8) -#define FMR32(n) FMR16(n) ; FMR16(n+16) - FMR32(0) + .Lreg=0 + .rept 32 + fmr .Lreg,.Lreg + .Lreg=.Lreg+1 + .endr FTR_SECTION_ELSE /* @@ -2027,12 +2026,11 @@ FTR_SECTION_ELSE mtmsrd r10 sync -#define XVCPSGNDP2(n) XVCPSGNDP(n,n,n) ; XVCPSGNDP(n+1,n+1,n+1) -#define XVCPSGNDP4(n) XVCPSGNDP2(n) ; XVCPSGNDP2(n+2) -#define XVCPSGNDP8(n) XVCPSGNDP4(n) ; XVCPSGNDP4(n+4) -#define XVCPSGNDP16(n) XVCPSGNDP8(n) ; XVCPSGNDP8(n+8) -#define XVCPSGNDP32(n) XVCPSGNDP16(n) ; XVCPSGNDP16(n+16) - XVCPSGNDP32(0) + .Lreg=0 + .rept 32 + XVCPSGNDP(.Lreg,.Lreg,.Lreg) + .Lreg=.Lreg+1 + .endr ALT_FTR_SECTION_END_IFCLR(CPU_FTR_ARCH_206) @@ -2043,7 +2041,12 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) * To denormalise we need to move a copy of the register to itself. * For POWER8 we need to do that for all 64 VSX registers */ - XVCPSGNDP32(32) + .Lreg=32 + .rept 32 + XVCPSGNDP(.Lreg,.Lreg,.Lreg) + .Lreg=.Lreg+1 + .endr + denorm_done: mfspr r11,SPRN_HSRR0 subir11,r11,4 -- 2.20.1
[PATCH v2 23/52] powerpc/64s/exception: fix indenting irregularities
Generally, macros that result in instructions being expanded are indented by a tab, and those that don't have no indent. Fix the obvious cases that go contrary to style. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 92 ++-- 1 file changed, 46 insertions(+), 46 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f7b6634bcc75..02b4722b7c64 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -261,16 +261,16 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) cmpwi r10,KVM_GUEST_MODE_SKIP beq 89f .else - BEGIN_FTR_SECTION_NESTED(947) +BEGIN_FTR_SECTION_NESTED(947) ld r10,\area+EX_CFAR(r13) std r10,HSTATE_CFAR(r13) - END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) +END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) .endif - BEGIN_FTR_SECTION_NESTED(948) +BEGIN_FTR_SECTION_NESTED(948) ld r10,\area+EX_PPR(r13) std r10,HSTATE_PPR(r13) - END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) +END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) ld r10,\area+EX_R10(r13) std r12,HSTATE_SCRATCH0(r13) sldir12,r9,32 @@ -372,10 +372,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r9,GPR11(r1); \ std r10,GPR12(r1); \ std r11,GPR13(r1); \ - BEGIN_FTR_SECTION_NESTED(66); \ +BEGIN_FTR_SECTION_NESTED(66); \ ld r10,area+EX_CFAR(r13); \ std r10,ORIG_GPR3(r1); \ - END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66);\ +END_FTR_SECTION_NESTED(CPU_FTR_CFAR, CPU_FTR_CFAR, 66); \ GET_CTR(r10, area);\ std r10,_CTR(r1); @@ -794,7 +794,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100) * but we branch to the 0xc000... address so we can turn on relocation * with mtmsr. */ - BEGIN_FTR_SECTION +BEGIN_FTR_SECTION mfspr r10,SPRN_SRR1 rlwinm. r10,r10,47-31,30,31 beq-1f @@ -803,7 +803,7 @@ EXC_REAL_BEGIN(system_reset, 0x100, 0x100) bltlr cr1 /* no state loss, return to idle caller */ BRANCH_TO_C000(r10, system_reset_idle_common) 1: - END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif KVMTEST EXC_STD 0x100 @@ -1151,10 +1151,10 @@ END_FTR_SECTION_IFCLR(CPU_FTR_HVMODE) * * Go back to nap/sleep/winkle mode again if (b) is true. */ - BEGIN_FTR_SECTION +BEGIN_FTR_SECTION rlwinm. r11,r12,47-31,30,31 bne machine_check_idle_common - END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) +END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* @@ -1261,13 +1261,13 @@ EXC_COMMON_BEGIN(mce_return) b . EXC_REAL_BEGIN(data_access, 0x300, 0x80) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0 PACA_EXGEN + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0 PACA_EXGEN b tramp_real_data_access EXC_REAL_END(data_access, 0x300, 0x80) TRAMP_REAL_BEGIN(tramp_real_data_access) -EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 0 + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 0 /* * DAR/DSISR must be read before setting MSR[RI], because * a d-side MCE will clobber those registers so is not @@ -1280,9 +1280,9 @@ EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x300, 0 EXCEPTION_PROLOG_2_REAL data_access_common, EXC_STD, 1 EXC_VIRT_BEGIN(data_access, 0x4300, 0x80) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0 PACA_EXGEN -EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 0 + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0 PACA_EXGEN + EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, 0x300, 0 mfspr r10,SPRN_DAR mfspr r11,SPRN_DSISR std r10,PACA_EXGEN+EX_DAR(r13) @@ -1315,24 +1315,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(data_access_slb, 0x380, 0x80) -SET_SCRATCH0(r13) /* save r13 */ -EXCEPTION_PROLOG_0 PACA_EXSLB + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0 PACA_EXSLB b tramp_real_data_access_slb EXC_REAL_END(data_access_slb, 0x380, 0x80) TRAMP_REAL_BEGIN(tramp_real_data_access_slb) -EXCEPTION_PROLOG_1 EXC_STD, PACA_EXSL
[PATCH v2 22/52] powerpc/64s/exception: use a gas macro for system call handler code
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 127 --- 1 file changed, 55 insertions(+), 72 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index f1f1278cb131..f7b6634bcc75 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1607,6 +1607,7 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) * without saving, though xer is not a good idea to use, as hardware may * interpret some bits so it may be costly to change them. */ +.macro SYSTEM_CALL real #ifdef CONFIG_KVM_BOOK3S_64_HANDLER /* * There is a little bit of juggling to get syscall and hcall @@ -1616,95 +1617,77 @@ EXC_COMMON(trap_0b_common, 0xb00, unknown_exception) * Userspace syscalls have already saved the PPR, hcalls must save * it before setting HMT_MEDIUM. */ -#define SYSCALL_KVMTEST \ - mtctr r13;\ - GET_PACA(r13); \ - std r10,PACA_EXGEN+EX_R10(r13); \ - INTERRUPT_TO_KERNEL;\ - KVMTEST EXC_STD 0xc00 ; /* uses r10, branch to do_kvm_0xc00_system_call */ \ - HMT_MEDIUM; \ - mfctr r9; - + mtctr r13 + GET_PACA(r13) + std r10,PACA_EXGEN+EX_R10(r13) + INTERRUPT_TO_KERNEL + KVMTEST EXC_STD 0xc00 /* uses r10, branch to do_kvm_0xc00_system_call */ + HMT_MEDIUM + mfctr r9 #else -#define SYSCALL_KVMTEST \ - HMT_MEDIUM; \ - mr r9,r13; \ - GET_PACA(r13); \ - INTERRUPT_TO_KERNEL; + HMT_MEDIUM + mr r9,r13 + GET_PACA(r13) + INTERRUPT_TO_KERNEL #endif - -#define LOAD_SYSCALL_HANDLER(reg) \ - __LOAD_HANDLER(reg, system_call_common) - -/* - * After SYSCALL_KVMTEST, we reach here with PACA in r13, r13 in r9, - * and HMT_MEDIUM. - */ -#define SYSCALL_REAL \ - mfspr r11,SPRN_SRR0 ; \ - mfspr r12,SPRN_SRR1 ; \ - LOAD_SYSCALL_HANDLER(r10) ; \ - mtspr SPRN_SRR0,r10 ; \ - ld r10,PACAKMSR(r13) ; \ - mtspr SPRN_SRR1,r10 ; \ - RFI_TO_KERNEL ; \ - b . ; /* prevent speculative execution */ #ifdef CONFIG_PPC_FAST_ENDIAN_SWITCH -#define SYSCALL_FASTENDIAN_TEST\ -BEGIN_FTR_SECTION \ - cmpdi r0,0x1ebe ; \ - beq-1f ;\ -END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) \ - -#define SYSCALL_FASTENDIAN \ - /* Fast LE/BE switch system call */ \ -1: mfspr r12,SPRN_SRR1 ; \ - xorir12,r12,MSR_LE ;\ - mtspr SPRN_SRR1,r12 ; \ - mr r13,r9 ;\ - RFI_TO_USER ; /* return to userspace */ \ - b . ; /* prevent speculative execution */ -#else -#define SYSCALL_FASTENDIAN_TEST -#define SYSCALL_FASTENDIAN -#endif /* CONFIG_PPC_FAST_ENDIAN_SWITCH */ +BEGIN_FTR_SECTION + cmpdi r0,0x1ebe + beq-1f +END_FTR_SECTION_IFSET(CPU_FTR_REAL_LE) +#endif + /* We reach here with PACA in r13, r13 in r9, and HMT_MEDIUM. */ -#if defined(CONFIG_RELOCATABLE) + .if \real + mfspr r11,SPRN_SRR0 + mfspr r12,SPRN_SRR1 + __LOAD_HANDLER(r10, system_call_common) + mtspr SPRN_SRR0,r10 + ld r10,PACAKMSR(r13) + mtspr SPRN_SRR1,r10 + RFI_TO_KERNEL + b . /* prevent speculative execution */ + .else +#ifdef CONFIG_RELOCATABLE /* * We can't branch directly so we do it via the CTR which * is volatile across system calls. */ -#define SYSCALL_VIRT \ - LOAD_SYSCALL_HANDLER(r10) ; \ - mtctr r10 ; \ - mfspr r11,SPRN_SRR0 ; \ -
[PATCH v2 21/52] powerpc/64s/exception: remove unused BRANCH_TO_COMMON
--- arch/powerpc/kernel/exceptions-64s.S | 8 1 file changed, 8 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 87db0f5a67c4..f1f1278cb131 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -224,20 +224,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) bctr #ifdef CONFIG_RELOCATABLE -#define BRANCH_TO_COMMON(reg, label) \ - __LOAD_HANDLER(reg, label); \ - mtctr reg;\ - bctr - #define BRANCH_LINK_TO_FAR(label) \ __LOAD_FAR_HANDLER(r12, label); \ mtctr r12;\ bctrl #else -#define BRANCH_TO_COMMON(reg, label) \ - b label - #define BRANCH_LINK_TO_FAR(label) \ bl label #endif -- 2.20.1
[PATCH v2 20/52] powerpc/64s/exception: remove __BRANCH_TO_KVM
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 43 1 file changed, 18 insertions(+), 25 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 0a5a2d9dde90..87db0f5a67c4 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -243,29 +243,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #endif #ifdef CONFIG_KVM_BOOK3S_64_HANDLER - -#ifdef CONFIG_RELOCATABLE -/* - * KVM requires __LOAD_FAR_HANDLER. - * - * __BRANCH_TO_KVM_EXIT branches are also a special case because they - * explicitly use r9 then reload it from PACA before branching. Hence - * the double-underscore. - */ -#define __BRANCH_TO_KVM_EXIT(area, label) \ - mfctr r9; \ - std r9,HSTATE_SCRATCH1(r13);\ - __LOAD_FAR_HANDLER(r9, label); \ - mtctr r9; \ - ld r9,area+EX_R9(r13); \ - bctr - -#else -#define __BRANCH_TO_KVM_EXIT(area, label) \ - ld r9,area+EX_R9(r13); \ - b label -#endif - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* * If hv is possible, interrupts come into to the hv version @@ -311,8 +288,24 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) .else ori r12,r12,(\n) .endif - /* This reloads r9 before branching to kvmppc_interrupt */ - __BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt) + +#ifdef CONFIG_RELOCATABLE + /* +* KVM requires __LOAD_FAR_HANDLER beause kvmppc_interrupt lives +* outside the head section. CONFIG_RELOCATABLE KVM expects CTR +* to be saved in HSTATE_SCRATCH1. +*/ + mfctr r9 + std r9,HSTATE_SCRATCH1(r13) + __LOAD_FAR_HANDLER(r9, kvmppc_interrupt) + mtctr r9 + ld r9,\area+EX_R9(r13) + bctr +#else + ld r9,\area+EX_R9(r13) + b kvmppc_interrupt +#endif + .if \skip 89:mtocrf 0x80,r9 -- 2.20.1
[PATCH v2 19/52] powerpc/64s/exception: move head-64.h code to exception-64s.S where it is used
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 1 - arch/powerpc/include/asm/head-64.h | 252 --- arch/powerpc/kernel/exceptions-64s.S | 251 ++ 3 files changed, 251 insertions(+), 253 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 9e6712099f7a..dc6a5ccac965 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -30,7 +30,6 @@ * exception handlers (including pSeries LPAR) and iSeries LPAR * implementations as possible. */ -#include #include /* PACA save area offsets (exgen, exmc, etc) */ diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index dc1940c94a86..a466765709a9 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -169,53 +169,6 @@ end_##sname: #define ABS_ADDR(label) (label - fs_label + fs_start) -/* - * Following are the BOOK3S exception handler helper macros. - * Handlers come in a number of types, and each type has a number of varieties. - * - * EXC_REAL_* - real, unrelocated exception vectors - * EXC_VIRT_* - virt (AIL), unrelocated exception vectors - * TRAMP_REAL_* - real, unrelocated helpers (virt can call these) - * TRAMP_VIRT_* - virt, unreloc helpers (in practice, real can use) - * TRAMP_KVM - KVM handlers that get put into real, unrelocated - * EXC_COMMON - virt, relocated common handlers - * - * The EXC handlers are given a name, and branch to name_common, or the - * appropriate KVM or masking function. Vector handler verieties are as - * follows: - * - * EXC_{REAL|VIRT}_BEGIN/END - used to open-code the exception - * - * EXC_{REAL|VIRT} - standard exception - * - * EXC_{REAL|VIRT}_suffix - * where _suffix is: - * - _MASKABLE - maskable exception - * - _OOL- out of line with trampoline to common handler - * - _HV - HV exception - * - * There can be combinations, e.g., EXC_VIRT_OOL_MASKABLE_HV - * - * The one unusual case is __EXC_REAL_OOL_HV_DIRECT, which is - * an OOL vector that branches to a specified handler rather than the usual - * trampoline that goes to common. It, and other underscore macros, should - * be used with care. - * - * KVM handlers come in the following verieties: - * TRAMP_KVM - * TRAMP_KVM_SKIP - * TRAMP_KVM_HV - * TRAMP_KVM_HV_SKIP - * - * COMMON handlers come in the following verieties: - * EXC_COMMON_BEGIN/END - used to open-code the handler - * EXC_COMMON - * EXC_COMMON_ASYNC - * - * TRAMP_REAL and TRAMP_VIRT can be used with BEGIN/END. KVM - * and OOL handlers are implemented as types of TRAMP and TRAMP_VIRT handlers. - */ - #define EXC_REAL_BEGIN(name, start, size) \ FIXED_SECTION_ENTRY_BEGIN_LOCATION(real_vectors, exc_real_##start##_##name, start, size) @@ -257,211 +210,6 @@ end_##sname: FIXED_SECTION_ENTRY_BEGIN_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size); \ FIXED_SECTION_ENTRY_END_LOCATION(virt_vectors, exc_virt_##start##_##unused, start, size) - -#define __EXC_REAL(name, start, size, area)\ - EXC_REAL_BEGIN(name, start, size); \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0 area ; \ - EXCEPTION_PROLOG_1 EXC_STD, area, 1, start, 0 ; \ - EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ - EXC_REAL_END(name, start, size) - -#define EXC_REAL(name, start, size)\ - __EXC_REAL(name, start, size, PACA_EXGEN) - -#define __EXC_VIRT(name, start, size, realvec, area) \ - EXC_VIRT_BEGIN(name, start, size); \ - SET_SCRATCH0(r13);/* save r13 */\ - EXCEPTION_PROLOG_0 area ; \ - EXCEPTION_PROLOG_1 EXC_STD, area, 0, realvec, 0;\ - EXCEPTION_PROLOG_2_VIRT name##_common, EXC_STD ;\ - EXC_VIRT_END(name, start, size) - -#define EXC_VIRT(name, start, size, realvec) \ - __EXC_VIRT(name, start, size, realvec, PACA_EXGEN) - -#define EXC_REAL_MASKABLE(name, start, size, bitmask) \ - EXC_REAL_BEGIN(name, start, size); \ - SET_SCRATCH0(r13);/* save r13 */\ - EXCEPTION_PROLOG_0 PACA_EXGEN ; \ - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, start, bitmask ; \ - EXCEPTION_PROLOG_2_REAL name##_common, EXC_STD, 1 ; \ - EXC_REAL_END(name, start, size) - -#define EXC_VIRT_MASKABLE(name, start, size, real
[PATCH v2 18/52] powerpc/64s/exception: move exception-64s.h code to exception-64s.S where it is used
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 430 -- arch/powerpc/kernel/exceptions-64s.S | 431 +++ 2 files changed, 431 insertions(+), 430 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index e996ffe68cf3..9e6712099f7a 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -146,436 +146,6 @@ hrfid; \ b hrfi_flush_fallback -/* - * We're short on space and time in the exception prolog, so we can't - * use the normal LOAD_REG_IMMEDIATE macro to load the address of label. - * Instead we get the base of the kernel from paca->kernelbase and or in the low - * part of label. This requires that the label be within 64KB of kernelbase, and - * that kernelbase be 64K aligned. - */ -#define LOAD_HANDLER(reg, label) \ - ld reg,PACAKBASE(r13); /* get high part of &label */ \ - ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label) - -#define __LOAD_HANDLER(reg, label) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l - -/* - * Branches from unrelocated code (e.g., interrupts) to labels outside - * head-y require >64K offsets. - */ -#define __LOAD_FAR_HANDLER(reg, label) \ - ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l;\ - addis reg,reg,(ABS_ADDR(label))@h - -/* Exception register prefixes */ -#define EXC_HV 1 -#define EXC_STD0 - -#if defined(CONFIG_RELOCATABLE) -/* - * If we support interrupts with relocation on AND we're a relocatable kernel, - * we need to use CTR to get to the 2nd level handler. So, save/restore it - * when required. - */ -#define SAVE_CTR(reg, area)mfctr reg ; std reg,area+EX_CTR(r13) -#define GET_CTR(reg, area) ld reg,area+EX_CTR(r13) -#define RESTORE_CTR(reg, area) ld reg,area+EX_CTR(r13) ; mtctr reg -#else -/* ...else CTR is unused and in register. */ -#define SAVE_CTR(reg, area) -#define GET_CTR(reg, area) mfctr reg -#define RESTORE_CTR(reg, area) -#endif - -/* - * PPR save/restore macros used in exceptions_64s.S - * Used for P7 or later processors - */ -#define SAVE_PPR(area, ra) \ -BEGIN_FTR_SECTION_NESTED(940) \ - ld ra,area+EX_PPR(r13);/* Read PPR from paca */\ - std ra,_PPR(r1);\ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,940) - -#define RESTORE_PPR_PACA(area, ra) \ -BEGIN_FTR_SECTION_NESTED(941) \ - ld ra,area+EX_PPR(r13);\ - mtspr SPRN_PPR,ra;\ -END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,941) - -/* - * Get an SPR into a register if the CPU has the given feature - */ -#define OPT_GET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mfspr ra,spr; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Set an SPR from a register if the CPU has the given feature - */ -#define OPT_SET_SPR(ra, spr, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - mtspr spr,ra; \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -/* - * Save a register to the PACA if the CPU has the given feature - */ -#define OPT_SAVE_REG_TO_PACA(offset, ra, ftr) \ -BEGIN_FTR_SECTION_NESTED(943) \ - std ra,offset(r13); \ -END_FTR_SECTION_NESTED(ftr,ftr,943) - -.macro EXCEPTION_PROLOG_0 area - GET_PACA(r13) - std r9,\area\()+EX_R9(r13) /* save r9 */ - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR) - HMT_MEDIUM - std r10,\area\()+EX_R10(r13)/* save r10 - r12 */ - OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) -.endm - -.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask - OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR) - OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR) - INTERRUPT_TO_KERNEL - SAVE_CTR(r10, \area\()) - mfcrr9 - .if \kvm - KVMTEST \hsrr \vec - .endif - .if \bitmask - lbz r10,PACAIRQ
[PATCH v2 17/52] powerpc/64s/exception: move KVM related code together
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 40 +--- 1 file changed, 21 insertions(+), 19 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 73705421f423..e996ffe68cf3 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -335,18 +335,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #endif .endm - -#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -/* - * If hv is possible, interrupts come into to the hv version - * of the kvmppc_interrupt code, which then jumps to the PR handler, - * kvmppc_interrupt_pr, if the guest is a PR guest. - */ -#define kvmppc_interrupt kvmppc_interrupt_hv -#else -#define kvmppc_interrupt kvmppc_interrupt_pr -#endif - /* * Branch to label using its 0xC000 address. This results in instruction * address suitable for MSR[IR]=0 or 1, which allows relocation to be turned @@ -371,6 +359,17 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) mtctr r12;\ bctrl +#else +#define BRANCH_TO_COMMON(reg, label) \ + b label + +#define BRANCH_LINK_TO_FAR(label) \ + bl label +#endif + +#ifdef CONFIG_KVM_BOOK3S_64_HANDLER + +#ifdef CONFIG_RELOCATABLE /* * KVM requires __LOAD_FAR_HANDLER. * @@ -387,19 +386,22 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) bctr #else -#define BRANCH_TO_COMMON(reg, label) \ - b label - -#define BRANCH_LINK_TO_FAR(label) \ - bl label - #define __BRANCH_TO_KVM_EXIT(area, label) \ ld r9,area+EX_R9(r13); \ b label +#endif +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE +/* + * If hv is possible, interrupts come into to the hv version + * of the kvmppc_interrupt code, which then jumps to the PR handler, + * kvmppc_interrupt_pr, if the guest is a PR guest. + */ +#define kvmppc_interrupt kvmppc_interrupt_hv +#else +#define kvmppc_interrupt kvmppc_interrupt_pr #endif -#ifdef CONFIG_KVM_BOOK3S_64_HANDLER .macro KVMTEST hsrr, n lbz r10,HSTATE_IN_GUEST(r13) cmpwi r10,0 -- 2.20.1
[PATCH v2 16/52] powerpc/64s/exception: remove STD_EXCEPTION_COMMON variants
These are only called in one place each. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 22 -- arch/powerpc/include/asm/head-64.h | 19 +-- 2 files changed, 17 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 6de3c393ddf7..73705421f423 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -555,28 +555,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) EXCEPTION_PROLOG_COMMON_2(area);\ EXCEPTION_PROLOG_COMMON_3(trap) -#define STD_EXCEPTION_COMMON(trap, hdlr) \ - EXCEPTION_COMMON(PACA_EXGEN, trap); \ - bl save_nvgprs;\ - RECONCILE_IRQ_STATE(r10, r11); \ - addir3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret_from_except - -/* - * Like STD_EXCEPTION_COMMON, but for exceptions that can occur - * in the idle task and therefore need the special idle handling - * (finish nap and runlatch) - */ -#define STD_EXCEPTION_COMMON_ASYNC(trap, hdlr) \ - EXCEPTION_COMMON(PACA_EXGEN, trap); \ - FINISH_NAP; \ - RECONCILE_IRQ_STATE(r10, r11); \ - RUNLATCH_ON;\ - addir3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret_from_except_lite - /* * When the idle code in power4_idle puts the CPU into NAP mode, * it has to do so in a loop, and relies on the external interrupt diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 54db05afb80f..dc1940c94a86 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -441,11 +441,26 @@ end_##sname: #define EXC_COMMON(name, realvec, hdlr) \ EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON(realvec, hdlr) + EXCEPTION_COMMON(PACA_EXGEN, realvec); \ + bl save_nvgprs;\ + RECONCILE_IRQ_STATE(r10, r11); \ + addir3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret_from_except +/* + * Like EXC_COMMON, but for exceptions that can occur in the idle task and + * therefore need the special idle handling (finish nap and runlatch) + */ #define EXC_COMMON_ASYNC(name, realvec, hdlr) \ EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON_ASYNC(realvec, hdlr) + EXCEPTION_COMMON(PACA_EXGEN, realvec); \ + FINISH_NAP; \ + RECONCILE_IRQ_STATE(r10, r11); \ + RUNLATCH_ON;\ + addir3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret_from_except_lite #endif /* __ASSEMBLY__ */ -- 2.20.1
[PATCH v2 15/52] powerpc/64s/exception: move EXCEPTION_PROLOG_2* to a more logical place
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 113 --- 1 file changed, 57 insertions(+), 56 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 0bb0310b794f..6de3c393ddf7 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -170,62 +170,6 @@ ori reg,reg,(ABS_ADDR(label))@l;\ addis reg,reg,(ABS_ADDR(label))@h -.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - .if ! \set_ri - xorir10,r10,MSR_RI /* Clear MSR_RI */ - .endif - .if \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - .endif - LOAD_HANDLER(r12, \label\()) - .if \hsrr - mtspr SPRN_HSRR0,r12 - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 - HRFI_TO_KERNEL - .else - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - .endif - b . /* prevent speculative execution */ -.endm - -.macro EXCEPTION_PROLOG_2_VIRT label, hsrr -#ifdef CONFIG_RELOCATABLE - .if \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - .endif - LOAD_HANDLER(r12, \label\()) - mtctr r12 - .if \hsrr - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - .else - mfspr r12,SPRN_SRR1 /* and HSRR1 */ - .endif - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ - bctr -#else - .if \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - mfspr r12,SPRN_SRR1 /* and SRR1 */ - .endif - li r10,MSR_RI - mtmsrd r10,1 /* Set RI (EE=0) */ - b \label -#endif -.endm - /* Exception register prefixes */ #define EXC_HV 1 #define EXC_STD0 @@ -335,6 +279,63 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,\area\()+EX_R13(r13) .endm +.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + .if ! \set_ri + xorir10,r10,MSR_RI /* Clear MSR_RI */ + .endif + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + .endif + LOAD_HANDLER(r12, \label\()) + .if \hsrr + mtspr SPRN_HSRR0,r12 + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + mtspr SPRN_HSRR1,r10 + HRFI_TO_KERNEL + .else + mtspr SPRN_SRR0,r12 + mfspr r12,SPRN_SRR1 /* and SRR1 */ + mtspr SPRN_SRR1,r10 + RFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ +.endm + +.macro EXCEPTION_PROLOG_2_VIRT label, hsrr +#ifdef CONFIG_RELOCATABLE + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + .endif + LOAD_HANDLER(r12, \label\()) + mtctr r12 + .if \hsrr + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + .else + mfspr r12,SPRN_SRR1 /* and HSRR1 */ + .endif + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ + bctr +#else + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ + .endif + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ + b \label +#endif +.endm + + #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* * If hv is possible, interrupts come into to the hv version -- 2.20.1
[PATCH v2 14/52] powerpc/64s/exception: improve 0x500 handler code
After the previous cleanup, it becomes possible to consolidate some common code outside the runtime alternate patching. Also remove unused labels. This results in some code change, but unchanged runtime instruction sequence. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 16 1 file changed, 4 insertions(+), 12 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 5828d440da49..d35a9fa4651e 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -746,32 +746,24 @@ ALT_MMU_FTR_SECTION_END_IFCLR(MMU_FTR_TYPE_RADIX) EXC_REAL_BEGIN(hardware_interrupt, 0x500, 0x100) - .globl hardware_interrupt_hv -hardware_interrupt_hv: + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0 PACA_EXGEN BEGIN_FTR_SECTION - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_0 PACA_EXGEN EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, IRQS_DISABLED EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_HV, 1 FTR_SECTION_ELSE - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_0 PACA_EXGEN EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, IRQS_DISABLED EXCEPTION_PROLOG_2_REAL hardware_interrupt_common, EXC_STD, 1 ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) EXC_REAL_END(hardware_interrupt, 0x500, 0x100) EXC_VIRT_BEGIN(hardware_interrupt, 0x4500, 0x100) - .globl hardware_interrupt_relon_hv -hardware_interrupt_relon_hv: + SET_SCRATCH0(r13) /* save r13 */ + EXCEPTION_PROLOG_0 PACA_EXGEN BEGIN_FTR_SECTION - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_0 PACA_EXGEN EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, 0x500, IRQS_DISABLED EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_HV FTR_SECTION_ELSE - SET_SCRATCH0(r13) /* save r13 */ - EXCEPTION_PROLOG_0 PACA_EXGEN EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, 0x500, IRQS_DISABLED EXCEPTION_PROLOG_2_VIRT hardware_interrupt_common, EXC_STD ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) -- 2.20.1
[PATCH v2 13/52] powerpc/64s/exception: unwind exception-64s.h macros
Many of these macros just specify 1-4 lines which are only called a few times each at most, and often just once. Remove this indirection. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 101 --- arch/powerpc/include/asm/head-64.h | 76 - arch/powerpc/kernel/exceptions-64s.S | 44 +- 3 files changed, 82 insertions(+), 139 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 24fc0104c9d3..0bb0310b794f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -226,17 +226,6 @@ #endif .endm -/* - * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to - * rfid. Save CTR in case we're CONFIG_RELOCATABLE, in which case - * EXCEPTION_PROLOG_2_VIRT will be using CTR. - */ -#define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0 area ; \ - EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\ - EXCEPTION_PROLOG_2_VIRT label, hsrr - /* Exception register prefixes */ #define EXC_HV 1 #define EXC_STD0 @@ -346,12 +335,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,\area\()+EX_R13(r13) .endm -#define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec) \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0 area ; \ - EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\ - EXCEPTION_PROLOG_2_REAL label, hsrr, 1 - #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* * If hv is possible, interrupts come into to the hv version @@ -415,12 +398,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #endif -/* Do not enable RI */ -#define EXCEPTION_PROLOG_NORI(area, label, hsrr, kvm, vec) \ - EXCEPTION_PROLOG_0 area ; \ - EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\ - EXCEPTION_PROLOG_2_REAL label, hsrr, 0 - #ifdef CONFIG_KVM_BOOK3S_64_HANDLER .macro KVMTEST hsrr, n lbz r10,HSTATE_IN_GUEST(r13) @@ -557,84 +534,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,RESULT(r1); /* clear regs->result */ \ std r11,STACK_FRAME_OVERHEAD-16(r1); /* mark the frame */ -/* - * Exception vectors. - */ -#define STD_EXCEPTION(vec, label) \ - EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_STD, 1, vec); - -/* Version of above for when we have to branch out-of-line */ -#define __OOL_EXCEPTION(vec, label, hdlr) \ - SET_SCRATCH0(r13); \ - EXCEPTION_PROLOG_0 PACA_EXGEN ; \ - b hdlr - -#define STD_EXCEPTION_OOL(vec, label) \ - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, 0 ; \ - EXCEPTION_PROLOG_2_REAL label, EXC_STD, 1 - -#define STD_EXCEPTION_HV(loc, vec, label) \ - EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_HV, 1, vec) - -#define STD_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0 ; \ - EXCEPTION_PROLOG_2_REAL label, EXC_HV, 1 - -#define STD_RELON_EXCEPTION(loc, vec, label) \ - /* No guest interrupts come through here */ \ - EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_STD, 0, vec) - -#define STD_RELON_EXCEPTION_OOL(vec, label)\ - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 0, vec, 0 ; \ - EXCEPTION_PROLOG_2_VIRT label, EXC_STD - -#define STD_RELON_EXCEPTION_HV(loc, vec, label)\ - EXCEPTION_RELON_PROLOG(PACA_EXGEN, label, EXC_HV, 1, vec) - -#define STD_RELON_EXCEPTION_HV_OOL(vec, label) \ - EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, 0 ; \ - EXCEPTION_PROLOG_2_VIRT label, EXC_HV - -#define __MASKABLE_EXCEPTION(vec, label, hsrr, kvm, bitmask) \ - SET_SCRATCH0(r13);/* save r13 */\ - EXCEPTION_PROLOG_0 PACA_EXGEN ; \ - EXCEPTION_PROLOG_1 hsrr, PACA_EXGEN, kvm, vec, bitmask ;\ - EXCEPTION_PROLOG_2_REAL label, hsrr, 1 - -#define MASKABLE_EXCEPTION(vec, label, bitmask) \ - __MASKABLE_EXCEPTION(vec, label, EXC_STD, 1, bitmask) - -#define MASKABLE_EXCEPTION_OOL(vec, label, bitmask)\ - EXCEPTION_PROLOG_1 EXC_STD, PACA_EXGEN, 1, vec, bitmask ; \ - EXCEPTION_PROLOG_2_REAL label, EXC_STD, 1 - -#define MASKABLE_EXCEPTION_HV(vec, label, bitmask) \ - __MASK
[PATCH v2 12/52] powerpc/64s/exception: Move EXCEPTION_COMMON additions into callers
More cases of code insertion via macros that does not add a great deal. All the additions have to be specified in the macro arguments, so they can just as well go after the macro. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 42 +++--- arch/powerpc/include/asm/head-64.h | 4 +-- arch/powerpc/kernel/exceptions-64s.S | 45 +--- 3 files changed, 39 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index cc65e87cff2f..24fc0104c9d3 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -635,21 +635,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) EXCEPTION_PROLOG_1 EXC_HV, PACA_EXGEN, 1, vec, bitmask ;\ EXCEPTION_PROLOG_2_VIRT label, EXC_HV -/* - * Our exception common code can be passed various "additions" - * to specify the behaviour of interrupts, whether to kick the - * runlatch, etc... - */ - -/* - * This addition reconciles our actual IRQ state with the various software - * flags that track it. This may call C code. - */ -#define ADD_RECONCILE RECONCILE_IRQ_STATE(r10,r11) - -#define ADD_NVGPRS \ - bl save_nvgprs - #define RUNLATCH_ON\ BEGIN_FTR_SECTION \ ld r3, PACA_THREAD_INFO(r13); \ @@ -658,25 +643,22 @@ BEGIN_FTR_SECTION \ beqlppc64_runlatch_on_trampoline; \ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) -#define EXCEPTION_COMMON(area, trap, label, additions) \ +#define EXCEPTION_COMMON(area, trap) \ EXCEPTION_PROLOG_COMMON(trap, area);\ - /* Volatile regs are potentially clobbered here */ \ - additions /* - * Exception where stack is already set in r1, r1 is saved in r10, and it - * continues rather than returns. + * Exception where stack is already set in r1, r1 is saved in r10 */ -#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, additions) \ +#define EXCEPTION_COMMON_STACK(area, trap) \ EXCEPTION_PROLOG_COMMON_1();\ kuap_save_amr_and_lock r9, r10, cr1;\ EXCEPTION_PROLOG_COMMON_2(area);\ - EXCEPTION_PROLOG_COMMON_3(trap);\ - /* Volatile regs are potentially clobbered here */ \ - additions + EXCEPTION_PROLOG_COMMON_3(trap) -#define STD_EXCEPTION_COMMON(trap, label, hdlr)\ - EXCEPTION_COMMON(PACA_EXGEN, trap, label, ADD_NVGPRS;ADD_RECONCILE); \ +#define STD_EXCEPTION_COMMON(trap, hdlr) \ + EXCEPTION_COMMON(PACA_EXGEN, trap); \ + bl save_nvgprs;\ + RECONCILE_IRQ_STATE(r10, r11); \ addir3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ b ret_from_except @@ -686,9 +668,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) * in the idle task and therefore need the special idle handling * (finish nap and runlatch) */ -#define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ - EXCEPTION_COMMON(PACA_EXGEN, trap, label, \ - FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON); \ +#define STD_EXCEPTION_COMMON_ASYNC(trap, hdlr) \ + EXCEPTION_COMMON(PACA_EXGEN, trap); \ + FINISH_NAP; \ + RECONCILE_IRQ_STATE(r10, r11); \ + RUNLATCH_ON;\ addir3,r1,STACK_FRAME_OVERHEAD; \ bl hdlr; \ b ret_from_except_lite diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index bdd67a26e959..acd94fcf9f40 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -403,11 +403,11 @@ end_##sname: #define EXC_COMMON(name, realvec, hdlr) \ EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON(realvec, name, hdlr) + STD_EXCEPTION_COMMON(realvec, hdlr) #define EXC_COMMON_ASYNC(name, realvec, hdlr) \ EXC_COMMON_BEGIN(name); \ - STD_EXCEPTION_COMMON_ASYNC(realvec, name, hdlr) + STD_EXCEPTION_COMMON_ASYNC(realvec, hdlr) #endif /* __ASSEMBLY__ */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 87c4ebeb950c..72c178feaa98 100644 --- a/arch/powerpc/ke
[PATCH v2 11/52] powerpc/64s/exception: Move EXCEPTION_COMMON handler and return branches into callers
The aim is to reduce the amount of indirection it takes to get through the exception handler macros, particularly where it provides little code sharing. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 26 arch/powerpc/kernel/exceptions-64s.S | 21 +++ 2 files changed, 26 insertions(+), 21 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index f19c2391cc36..cc65e87cff2f 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -658,31 +658,28 @@ BEGIN_FTR_SECTION \ beqlppc64_runlatch_on_trampoline; \ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) -#define EXCEPTION_COMMON(area, trap, label, hdlr, ret, additions) \ +#define EXCEPTION_COMMON(area, trap, label, additions) \ EXCEPTION_PROLOG_COMMON(trap, area);\ /* Volatile regs are potentially clobbered here */ \ - additions; \ - addir3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr; \ - b ret + additions /* * Exception where stack is already set in r1, r1 is saved in r10, and it * continues rather than returns. */ -#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, hdlr, additions) \ +#define EXCEPTION_COMMON_NORET_STACK(area, trap, label, additions) \ EXCEPTION_PROLOG_COMMON_1();\ kuap_save_amr_and_lock r9, r10, cr1;\ EXCEPTION_PROLOG_COMMON_2(area);\ EXCEPTION_PROLOG_COMMON_3(trap);\ /* Volatile regs are potentially clobbered here */ \ - additions; \ - addir3,r1,STACK_FRAME_OVERHEAD; \ - bl hdlr + additions #define STD_EXCEPTION_COMMON(trap, label, hdlr)\ - EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \ - ret_from_except, ADD_NVGPRS;ADD_RECONCILE) + EXCEPTION_COMMON(PACA_EXGEN, trap, label, ADD_NVGPRS;ADD_RECONCILE); \ + addir3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret_from_except /* * Like STD_EXCEPTION_COMMON, but for exceptions that can occur @@ -690,8 +687,11 @@ END_FTR_SECTION_IFSET(CPU_FTR_CTRL) * (finish nap and runlatch) */ #define STD_EXCEPTION_COMMON_ASYNC(trap, label, hdlr) \ - EXCEPTION_COMMON(PACA_EXGEN, trap, label, hdlr, \ - ret_from_except_lite, FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON) + EXCEPTION_COMMON(PACA_EXGEN, trap, label, \ + FINISH_NAP;ADD_RECONCILE;RUNLATCH_ON); \ + addir3,r1,STACK_FRAME_OVERHEAD; \ + bl hdlr; \ + b ret_from_except_lite /* * When the idle code in power4_idle puts the CPU into NAP mode, diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 02d974b71f44..87c4ebeb950c 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -195,9 +195,10 @@ EXC_COMMON_BEGIN(system_reset_common) mr r10,r1 ld r1,PACA_NMI_EMERG_SP(r13) subir1,r1,INT_FRAME_SIZE - EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, - system_reset, system_reset_exception, - ADD_NVGPRS;ADD_RECONCILE_NMI) + EXCEPTION_COMMON_NORET_STACK(PACA_EXNMI, 0x100, system_reset, + ADD_NVGPRS;ADD_RECONCILE_NMI) + addir3,r1,STACK_FRAME_OVERHEAD + bl system_reset_exception /* This (and MCE) can be simplified with mtmsrd L=1 */ /* Clear MSR_RI before setting SRR0 and SRR1. */ @@ -1171,8 +1172,11 @@ hmi_exception_after_realmode: b tramp_real_hmi_exception EXC_COMMON_BEGIN(hmi_exception_common) -EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, handle_hmi_exception, -ret_from_except, FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON) +EXCEPTION_COMMON(PACA_EXGEN, 0xe60, hmi_exception_common, + FINISH_NAP;ADD_NVGPRS;ADD_RECONCILE;RUNLATCH_ON) + addir3,r1,STACK_FRAME_OVERHEAD + bl handle_hmi_exception + b ret_from_except EXC_REAL_OOL_MASKABLE_HV(h_doorbell, 0xe80, 0x20, IRQS_DISABLED) EXC_VIRT_OOL_MASKABLE_HV(h_doorbell, 0x4e80, 0x20, 0xe80, IRQS_DISABLED) @@ -1467,9 +1471,10 @@ EXC_COMMON_BEGIN(soft_nmi_common) mr r10,r1 ld r1,PACAEMERGSP(r13) subir1,r1,INT_FRAME_SIZE - EXCEPTION_CO
[PATCH v2 10/52] powerpc/64s/exception: Make EXCEPTION_PROLOG_0 a gas macro for consistency with others
No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 25 arch/powerpc/kernel/exceptions-64s.S | 24 +++ 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1d8fc085e845..f19c2391cc36 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -233,7 +233,7 @@ */ #define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0(area); \ + EXCEPTION_PROLOG_0 area ; \ EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\ EXCEPTION_PROLOG_2_VIRT label, hsrr @@ -297,13 +297,14 @@ BEGIN_FTR_SECTION_NESTED(943) \ std ra,offset(r13); \ END_FTR_SECTION_NESTED(ftr,ftr,943) -#define EXCEPTION_PROLOG_0(area) \ - GET_PACA(r13); \ - std r9,area+EX_R9(r13); /* save r9 */ \ - OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR); \ - HMT_MEDIUM; \ - std r10,area+EX_R10(r13); /* save r10 - r12 */\ +.macro EXCEPTION_PROLOG_0 area + GET_PACA(r13) + std r9,\area\()+EX_R9(r13) /* save r9 */ + OPT_GET_SPR(r9, SPRN_PPR, CPU_FTR_HAS_PPR) + HMT_MEDIUM + std r10,\area\()+EX_R10(r13)/* save r10 - r12 */ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) +.endm .macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR) @@ -347,7 +348,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0(area); \ + EXCEPTION_PROLOG_0 area ; \ EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\ EXCEPTION_PROLOG_2_REAL label, hsrr, 1 @@ -416,7 +417,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) /* Do not enable RI */ #define EXCEPTION_PROLOG_NORI(area, label, hsrr, kvm, vec) \ - EXCEPTION_PROLOG_0(area); \ + EXCEPTION_PROLOG_0 area ; \ EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\ EXCEPTION_PROLOG_2_REAL label, hsrr, 0 @@ -565,7 +566,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) /* Version of above for when we have to branch out-of-line */ #define __OOL_EXCEPTION(vec, label, hdlr) \ SET_SCRATCH0(r13); \ - EXCEPTION_PROLOG_0(PACA_EXGEN); \ + EXCEPTION_PROLOG_0 PACA_EXGEN ; \ b hdlr #define STD_EXCEPTION_OOL(vec, label) \ @@ -596,7 +597,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __MASKABLE_EXCEPTION(vec, label, hsrr, kvm, bitmask) \ SET_SCRATCH0(r13);/* save r13 */\ - EXCEPTION_PROLOG_0(PACA_EXGEN); \ + EXCEPTION_PROLOG_0 PACA_EXGEN ; \ EXCEPTION_PROLOG_1 hsrr, PACA_EXGEN, kvm, vec, bitmask ;\ EXCEPTION_PROLOG_2_REAL label, hsrr, 1 @@ -616,7 +617,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __MASKABLE_RELON_EXCEPTION(vec, label, hsrr, kvm, bitmask) \ SET_SCRATCH0(r13);/* save r13 */\ - EXCEPTION_PROLOG_0(PACA_EXGEN); \ + EXCEPTION_PROLOG_0 PACA_EXGEN ; \ EXCEPTION_PROLOG_1 hsrr, PACA_EXGEN, kvm, vec, bitmask ;\ EXCEPTION_PROLOG_2_VIRT label, hsrr diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 484d0710ca08..02d974b71f44 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -109,7 +109,7 @@ EXC_VIRT_NONE(0x4000, 0x100) EXC_REAL_BEGIN(system_reset, 0x100, 0x100) SET_SCRATCH0(r13) - EXCEPTION_PROLOG_0(PACA_EXNMI) + EXCEPTION_PROLOG_0 PACA_EXNMI /* This is EXCEPTION_PROLOG_1 with the idle feature section added */ OPT_SAVE_REG_TO_PACA(PACA_EXNMI+EX_PPR, r9, CPU_FTR_HAS_PPR) @@ -266,7 +266,7 @@ EXC_REAL_BEGIN(machine_check, 0x200, 0x100) * vector
[PATCH v2 09/52] powerpc/64s/exception: KVM handler can set the HSRR trap bit
Move the KVM trap HSRR bit into the KVM handler, which can be conditionally applied when hsrr parameter is set. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 5 + arch/powerpc/include/asm/head-64.h | 7 ++- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 737c37d1df4b..1d8fc085e845 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -449,7 +449,12 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) ld r10,\area+EX_R10(r13) std r12,HSTATE_SCRATCH0(r13) sldir12,r9,32 + /* HSRR variants have the 0x2 bit added to their trap number */ + .if \hsrr + ori r12,r12,(\n + 0x2) + .else ori r12,r12,(\n) + .endif /* This reloads r9 before branching to kvmppc_interrupt */ __BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt) diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 518d9758b41e..bdd67a26e959 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -393,16 +393,13 @@ end_##sname: TRAMP_KVM_BEGIN(do_kvm_##n);\ KVM_HANDLER area, EXC_STD, n, 1 -/* - * HV variant exceptions get the 0x2 bit added to their trap number. - */ #define TRAMP_KVM_HV(area, n) \ TRAMP_KVM_BEGIN(do_kvm_H##n); \ - KVM_HANDLER area, EXC_HV, n + 0x2, 0 + KVM_HANDLER area, EXC_HV, n, 0 #define TRAMP_KVM_HV_SKIP(area, n) \ TRAMP_KVM_BEGIN(do_kvm_H##n); \ - KVM_HANDLER area, EXC_HV, n + 0x2, 1 + KVM_HANDLER area, EXC_HV, n, 1 #define EXC_COMMON(name, realvec, hdlr) \ EXC_COMMON_BEGIN(name); \ -- 2.20.1
[PATCH v2 08/52] powerpc/64s/exception: merge KVM handler and skip variants
Conditionally expand the skip case if it is specified. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 28 +--- arch/powerpc/include/asm/head-64.h | 8 +++ arch/powerpc/kernel/exceptions-64s.S | 2 +- 3 files changed, 15 insertions(+), 23 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 74ddcb37156c..737c37d1df4b 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -431,26 +431,17 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) .endif .endm -.macro KVM_HANDLER area, hsrr, n +.macro KVM_HANDLER area, hsrr, n, skip + .if \skip + cmpwi r10,KVM_GUEST_MODE_SKIP + beq 89f + .else BEGIN_FTR_SECTION_NESTED(947) ld r10,\area+EX_CFAR(r13) std r10,HSTATE_CFAR(r13) END_FTR_SECTION_NESTED(CPU_FTR_CFAR,CPU_FTR_CFAR,947) - BEGIN_FTR_SECTION_NESTED(948) - ld r10,\area+EX_PPR(r13) - std r10,HSTATE_PPR(r13) - END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,948) - ld r10,\area+EX_R10(r13) - std r12,HSTATE_SCRATCH0(r13) - sldir12,r9,32 - ori r12,r12,(\n) - /* This reloads r9 before branching to kvmppc_interrupt */ - __BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt) -.endm + .endif -.macro KVM_HANDLER_SKIP area, hsrr, n - cmpwi r10,KVM_GUEST_MODE_SKIP - beq 89f BEGIN_FTR_SECTION_NESTED(948) ld r10,\area+EX_PPR(r13) std r10,HSTATE_PPR(r13) @@ -461,6 +452,8 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) ori r12,r12,(\n) /* This reloads r9 before branching to kvmppc_interrupt */ __BRANCH_TO_KVM_EXIT(\area, kvmppc_interrupt) + + .if \skip 89:mtocrf 0x80,r9 ld r9,\area+EX_R9(r13) ld r10,\area+EX_R10(r13) @@ -469,14 +462,13 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) .else b kvmppc_skip_interrupt .endif + .endif .endm #else .macro KVMTEST hsrr, n .endm -.macro KVM_HANDLER area, hsrr, n -.endm -.macro KVM_HANDLER_SKIP area, hsrr, n +.macro KVM_HANDLER area, hsrr, n, skip .endm #endif diff --git a/arch/powerpc/include/asm/head-64.h b/arch/powerpc/include/asm/head-64.h index 4767d6c7b8fa..518d9758b41e 100644 --- a/arch/powerpc/include/asm/head-64.h +++ b/arch/powerpc/include/asm/head-64.h @@ -387,22 +387,22 @@ end_##sname: #define TRAMP_KVM(area, n) \ TRAMP_KVM_BEGIN(do_kvm_##n);\ - KVM_HANDLER area, EXC_STD, n + KVM_HANDLER area, EXC_STD, n, 0 #define TRAMP_KVM_SKIP(area, n) \ TRAMP_KVM_BEGIN(do_kvm_##n);\ - KVM_HANDLER_SKIP area, EXC_STD, n + KVM_HANDLER area, EXC_STD, n, 1 /* * HV variant exceptions get the 0x2 bit added to their trap number. */ #define TRAMP_KVM_HV(area, n) \ TRAMP_KVM_BEGIN(do_kvm_H##n); \ - KVM_HANDLER area, EXC_HV, n + 0x2 + KVM_HANDLER area, EXC_HV, n + 0x2, 0 #define TRAMP_KVM_HV_SKIP(area, n) \ TRAMP_KVM_BEGIN(do_kvm_H##n); \ - KVM_HANDLER_SKIP area, EXC_HV, n + 0x2 + KVM_HANDLER area, EXC_HV, n + 0x2, 1 #define EXC_COMMON(name, realvec, hdlr) \ EXC_COMMON_BEGIN(name); \ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 01f7bfe0653c..484d0710ca08 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1063,7 +1063,7 @@ TRAMP_KVM_BEGIN(do_kvm_0xc00) SET_SCRATCH0(r10) std r9,PACA_EXGEN+EX_R9(r13) mfcrr9 - KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00 + KVM_HANDLER PACA_EXGEN, EXC_STD, 0xc00, 0 #endif -- 2.20.1
[PATCH v2 07/52] powerpc/64s/exception: consolidate maskable and non-maskable prologs
Conditionally expand the soft-masking test if a mask is passed in. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 113 +-- arch/powerpc/kernel/exceptions-64s.S | 20 ++-- 2 files changed, 55 insertions(+), 78 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index e1b449e2c9ea..74ddcb37156c 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -234,7 +234,7 @@ #define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ; \ + EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, 0 ;\ EXCEPTION_PROLOG_2_VIRT label, hsrr /* Exception register prefixes */ @@ -305,73 +305,50 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) std r10,area+EX_R10(r13); /* save r10 - r12 */\ OPT_GET_SPR(r10, SPRN_CFAR, CPU_FTR_CFAR) -#define __EXCEPTION_PROLOG_1_PRE(area) \ - OPT_SAVE_REG_TO_PACA(area+EX_PPR, r9, CPU_FTR_HAS_PPR); \ - OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ - INTERRUPT_TO_KERNEL;\ - SAVE_CTR(r10, area);\ +.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask + OPT_SAVE_REG_TO_PACA(\area\()+EX_PPR, r9, CPU_FTR_HAS_PPR) + OPT_SAVE_REG_TO_PACA(\area\()+EX_CFAR, r10, CPU_FTR_CFAR) + INTERRUPT_TO_KERNEL + SAVE_CTR(r10, \area\()) mfcrr9 - -#define __EXCEPTION_PROLOG_1_POST(area) \ - std r11,area+EX_R11(r13); \ - std r12,area+EX_R12(r13); \ - GET_SCRATCH0(r10); \ - std r10,area+EX_R13(r13) - -/* - * This version of the EXCEPTION_PROLOG_1 will carry - * addition parameter called "bitmask" to support - * checking of the interrupt maskable level. - * Intended to be used in MASKABLE_EXCPETION_* macros. - */ -.macro MASKABLE_EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask - __EXCEPTION_PROLOG_1_PRE(\area\()) .if \kvm KVMTEST \hsrr \vec .endif - - lbz r10,PACAIRQSOFTMASK(r13) - andi. r10,r10,\bitmask - /* This associates vector numbers with bits in paca->irq_happened */ - .if \vec == 0x500 || \vec == 0xea0 - li r10,PACA_IRQ_EE - .elseif \vec == 0x900 || \vec == 0xea0 - li r10,PACA_IRQ_DEC - .elseif \vec == 0xa00 || \vec == 0xe80 - li r10,PACA_IRQ_DBELL - .elseif \vec == 0xe60 - li r10,PACA_IRQ_HMI - .elseif \vec == 0xf00 - li r10,PACA_IRQ_PMI - .else - .abort "Bad maskable vector" + .if \bitmask + lbz r10,PACAIRQSOFTMASK(r13) + andi. r10,r10,\bitmask + /* Associate vector numbers with bits in paca->irq_happened */ + .if \vec == 0x500 || \vec == 0xea0 + li r10,PACA_IRQ_EE + .elseif \vec == 0x900 || \vec == 0xea0 + li r10,PACA_IRQ_DEC + .elseif \vec == 0xa00 || \vec == 0xe80 + li r10,PACA_IRQ_DBELL + .elseif \vec == 0xe60 + li r10,PACA_IRQ_HMI + .elseif \vec == 0xf00 + li r10,PACA_IRQ_PMI + .else + .abort "Bad maskable vector" + .endif + + .if \hsrr + bne masked_Hinterrupt + .else + bne masked_interrupt + .endif .endif - .if \hsrr - bne masked_Hinterrupt - .else - bne masked_interrupt - .endif - - __EXCEPTION_PROLOG_1_POST(\area\()) -.endm - -/* - * This version of the EXCEPTION_PROLOG_1 is intended - * to be used in STD_EXCEPTION* macros - */ -.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec - __EXCEPTION_PROLOG_1_PRE(\area\()) - .if \kvm - KVMTEST \hsrr \vec - .endif - __EXCEPTION_PROLOG_1_POST(\area\()) + std r11,\area\()+EX_R11(r13) + std r12,\area\()+EX_R12(r13) + GET_SCRATCH0(r10) + std r10,\area\()+EX_R13(r13) .endm #define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ; \ + EXCEPTION_PROLOG_1 hsrr, a
[PATCH v2 06/52] powerpc/64s/exception: remove the "extra" macro parameter
Rather than pass in the soft-masking and KVM tests via macro that is passed to another macro to expand it, switch to usig gas macros and conditionally expand the soft-masking and KVM tests. The system reset with its idle test is open coded as it is a one-off. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 158 ++- arch/powerpc/kernel/exceptions-64s.S | 78 ++- 2 files changed, 114 insertions(+), 122 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 4aef70defcdd..e1b449e2c9ea 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -231,10 +231,10 @@ * rfid. Save CTR in case we're CONFIG_RELOCATABLE, in which case * EXCEPTION_PROLOG_2_VIRT will be using CTR. */ -#define EXCEPTION_RELON_PROLOG(area, label, hsrr, extra, vec) \ +#define EXCEPTION_RELON_PROLOG(area, label, hsrr, kvm, vec)\ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1(area, extra, vec); \ + EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ; \ EXCEPTION_PROLOG_2_VIRT label, hsrr /* Exception register prefixes */ @@ -321,31 +321,58 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) /* * This version of the EXCEPTION_PROLOG_1 will carry * addition parameter called "bitmask" to support - * checking of the interrupt maskable level in the SOFTEN_TEST. + * checking of the interrupt maskable level. * Intended to be used in MASKABLE_EXCPETION_* macros. */ -#define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \ - __EXCEPTION_PROLOG_1_PRE(area); \ - extra(vec, bitmask);\ - __EXCEPTION_PROLOG_1_POST(area) +.macro MASKABLE_EXCEPTION_PROLOG_1 hsrr, area, kvm, vec, bitmask + __EXCEPTION_PROLOG_1_PRE(\area\()) + .if \kvm + KVMTEST \hsrr \vec + .endif + + lbz r10,PACAIRQSOFTMASK(r13) + andi. r10,r10,\bitmask + /* This associates vector numbers with bits in paca->irq_happened */ + .if \vec == 0x500 || \vec == 0xea0 + li r10,PACA_IRQ_EE + .elseif \vec == 0x900 || \vec == 0xea0 + li r10,PACA_IRQ_DEC + .elseif \vec == 0xa00 || \vec == 0xe80 + li r10,PACA_IRQ_DBELL + .elseif \vec == 0xe60 + li r10,PACA_IRQ_HMI + .elseif \vec == 0xf00 + li r10,PACA_IRQ_PMI + .else + .abort "Bad maskable vector" + .endif + + .if \hsrr + bne masked_Hinterrupt + .else + bne masked_interrupt + .endif + + __EXCEPTION_PROLOG_1_POST(\area\()) +.endm /* * This version of the EXCEPTION_PROLOG_1 is intended * to be used in STD_EXCEPTION* macros */ -#define _EXCEPTION_PROLOG_1(area, extra, vec) \ - __EXCEPTION_PROLOG_1_PRE(area); \ - extra(vec); \ - __EXCEPTION_PROLOG_1_POST(area) - -#define EXCEPTION_PROLOG_1(area, extra, vec) \ - _EXCEPTION_PROLOG_1(area, extra, vec) +.macro EXCEPTION_PROLOG_1 hsrr, area, kvm, vec + __EXCEPTION_PROLOG_1_PRE(\area\()) + .if \kvm + KVMTEST \hsrr \vec + .endif + __EXCEPTION_PROLOG_1_POST(\area\()) +.endm -#define EXCEPTION_PROLOG(area, label, h, extra, vec) \ +#define EXCEPTION_PROLOG(area, label, hsrr, kvm, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2_REAL label, h, 1 + EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ; \ + EXCEPTION_PROLOG_2_REAL label, hsrr, 1 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* @@ -411,10 +438,10 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #endif /* Do not enable RI */ -#define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec) \ +#define EXCEPTION_PROLOG_NORI(area, label, hsrr, kvm, vec) \ EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2_REAL label, h, 0 + EXCEPTION_PROLOG_1 hsrr, area, kvm, vec ; \ + EXCEPTION_PROLOG_2_REAL label, hsrr, 0 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER .macro KVMTEST hsrr, n @@ -476,8 +503,6 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) .endm #endif -#define NOTEST(n) - #define EXCEPTION_PROLOG_COMMON_1()
[PATCH v2 05/52] powerpc/64s/exception: fix sreset KVM test code
The sreset handler KVM test theoretically should not depend on P7. In practice KVM now only supports P7 and up so no real bug fix, but this change is made now so the quirk is not propagated through cleanup patches. Signed-off-by: Nicholas Piggin --- arch/powerpc/kernel/exceptions-64s.S | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index bb286f7e1aee..b34d7a9acae6 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -126,10 +126,10 @@ EXC_VIRT_NONE(0x4000, 0x100) bltlr cr1 ; /* no state loss, return to idle caller */ \ BRANCH_TO_C000(r10, system_reset_idle_common) ; \ 1: \ - KVMTEST_PR(n) ; \ - END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) + END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) ; \ + KVMTEST_PR(n) #else -#define IDLETEST NOTEST +#define IDLETEST KVMTEST_PR #endif EXC_REAL_BEGIN(system_reset, 0x100, 0x100) -- 2.20.1
[PATCH v2 04/52] powerpc/64s/exception: move and tidy EXCEPTION_PROLOG_2 variants
- Re-name the macros to _REAL and _VIRT suffixes rather than no and _RELON suffix. - Move the macro definitions together in the file. - Move RELOCATABLE ifdef inside the _VIRT macro. Further consolidation between variants does not buy much here. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 87 arch/powerpc/kernel/exceptions-64s.S | 18 ++--- 2 files changed, 51 insertions(+), 54 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 94c4992188a7..4aef70defcdd 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -170,8 +170,33 @@ ori reg,reg,(ABS_ADDR(label))@l;\ addis reg,reg,(ABS_ADDR(label))@h +.macro EXCEPTION_PROLOG_2_REAL label, hsrr, set_ri + ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + .if ! \set_ri + xorir10,r10,MSR_RI /* Clear MSR_RI */ + .endif + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + .endif + LOAD_HANDLER(r12, \label\()) + .if \hsrr + mtspr SPRN_HSRR0,r12 + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + mtspr SPRN_HSRR1,r10 + HRFI_TO_KERNEL + .else + mtspr SPRN_SRR0,r12 + mfspr r12,SPRN_SRR1 /* and SRR1 */ + mtspr SPRN_SRR1,r10 + RFI_TO_KERNEL + .endif + b . /* prevent speculative execution */ +.endm + +.macro EXCEPTION_PROLOG_2_VIRT label, hsrr #ifdef CONFIG_RELOCATABLE -.macro EXCEPTION_PROLOG_2_RELON label, hsrr .if \hsrr mfspr r11,SPRN_HSRR0 /* save HSRR0 */ .else @@ -187,10 +212,7 @@ li r10,MSR_RI mtmsrd r10,1 /* Set RI (EE=0) */ bctr -.endm #else -/* If not relocatable, we can jump directly -- and save messing with LR */ -.macro EXCEPTION_PROLOG_2_RELON label, hsrr .if \hsrr mfspr r11,SPRN_HSRR0 /* save HSRR0 */ mfspr r12,SPRN_HSRR1 /* and HSRR1 */ @@ -201,19 +223,19 @@ li r10,MSR_RI mtmsrd r10,1 /* Set RI (EE=0) */ b \label -.endm #endif +.endm /* * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to - * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case - * EXCEPTION_PROLOG_2_RELON will be using LR. + * rfid. Save CTR in case we're CONFIG_RELOCATABLE, in which case + * EXCEPTION_PROLOG_2_VIRT will be using CTR. */ #define EXCEPTION_RELON_PROLOG(area, label, hsrr, extra, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2_RELON label, hsrr + EXCEPTION_PROLOG_2_VIRT label, hsrr /* Exception register prefixes */ #define EXC_HV 1 @@ -319,36 +341,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_1(area, extra, vec) \ _EXCEPTION_PROLOG_1(area, extra, vec) -.macro EXCEPTION_PROLOG_2 label, hsrr, set_ri - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - .if ! \set_ri - xorir10,r10,MSR_RI /* Clear MSR_RI */ - .endif - .if \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - .endif - LOAD_HANDLER(r12,\label\()) - .if \hsrr - mtspr SPRN_HSRR0,r12 - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 - HRFI_TO_KERNEL - .else - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - .endif - b . /* prevent speculative execution */ -.endm - #define EXCEPTION_PROLOG(area, label, h, extra, vec) \ SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2 label, h, 1 + EXCEPTION_PROLOG_2_REAL label, h, 1 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* @@ -417,7 +414,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec) \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2 label, h, 0 + EXCEPTION_PROLOG_2_REAL label, h, 0 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER .macro KVMTEST hsrr
[PATCH v2 03/52] powerpc/64s/exception: consolidate EXCEPTION_PROLOG_2 with _NORI variant
Switch to a gas macro that conditionally expands the RI clearing instruction. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 43 ++-- arch/powerpc/kernel/exceptions-64s.S | 12 +++ 2 files changed, 17 insertions(+), 38 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 1496e4089cee..94c4992188a7 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -319,32 +319,11 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_1(area, extra, vec) \ _EXCEPTION_PROLOG_1(area, extra, vec) -.macro EXCEPTION_PROLOG_2 label, hsrr - ld r10,PACAKMSR(r13) /* get MSR value for kernel */ - .if \hsrr - mfspr r11,SPRN_HSRR0 /* save HSRR0 */ - .else - mfspr r11,SPRN_SRR0 /* save SRR0 */ - .endif - LOAD_HANDLER(r12,\label\()) - .if \hsrr - mtspr SPRN_HSRR0,r12 - mfspr r12,SPRN_HSRR1 /* and HSRR1 */ - mtspr SPRN_HSRR1,r10 - HRFI_TO_KERNEL - .else - mtspr SPRN_SRR0,r12 - mfspr r12,SPRN_SRR1 /* and SRR1 */ - mtspr SPRN_SRR1,r10 - RFI_TO_KERNEL - .endif - b . /* prevent speculative execution */ -.endm - -/* _NORI variant keeps MSR_RI clear */ -.macro EXCEPTION_PROLOG_2_NORI label, hsrr +.macro EXCEPTION_PROLOG_2 label, hsrr, set_ri ld r10,PACAKMSR(r13) /* get MSR value for kernel */ + .if ! \set_ri xorir10,r10,MSR_RI /* Clear MSR_RI */ + .endif .if \hsrr mfspr r11,SPRN_HSRR0 /* save HSRR0 */ .else @@ -369,7 +348,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2 label, h + EXCEPTION_PROLOG_2 label, h, 1 #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE /* @@ -438,7 +417,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec) \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2_NORI label, h + EXCEPTION_PROLOG_2 label, h, 0 #ifdef CONFIG_KVM_BOOK3S_64_HANDLER .macro KVMTEST hsrr, n @@ -595,14 +574,14 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define STD_EXCEPTION_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_PR, vec);\ - EXCEPTION_PROLOG_2 label, EXC_STD + EXCEPTION_PROLOG_2 label, EXC_STD, 1 #define STD_EXCEPTION_HV(loc, vec, label) \ EXCEPTION_PROLOG(PACA_EXGEN, label, EXC_HV, KVMTEST_HV, vec) #define STD_EXCEPTION_HV_OOL(vec, label) \ EXCEPTION_PROLOG_1(PACA_EXGEN, KVMTEST_HV, vec);\ - EXCEPTION_PROLOG_2 label, EXC_HV + EXCEPTION_PROLOG_2 label, EXC_HV, 1 #define STD_RELON_EXCEPTION(loc, vec, label) \ /* No guest interrupts come through here */ \ @@ -666,21 +645,21 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) SET_SCRATCH0(r13);/* save r13 */\ EXCEPTION_PROLOG_0(PACA_EXGEN); \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, extra, vec, bitmask); \ - EXCEPTION_PROLOG_2 label, h + EXCEPTION_PROLOG_2 label, h, 1 #define MASKABLE_EXCEPTION(vec, label, bitmask) \ __MASKABLE_EXCEPTION(vec, label, EXC_STD, SOFTEN_TEST_PR, bitmask) #define MASKABLE_EXCEPTION_OOL(vec, label, bitmask)\ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_PR, vec, bitmask);\ - EXCEPTION_PROLOG_2 label, EXC_STD + EXCEPTION_PROLOG_2 label, EXC_STD, 1 #define MASKABLE_EXCEPTION_HV(vec, label, bitmask) \ __MASKABLE_EXCEPTION(vec, label, EXC_HV, SOFTEN_TEST_HV, bitmask) #define MASKABLE_EXCEPTION_HV_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_TEST_HV, vec, bitmask);\ - EXCEPTION_PROLOG_2 label, EXC_HV + EXCEPTION_PROLOG_2 label, EXC_HV, 1 #define __MASKABLE_RELON_EXCEPTION(vec, label, h, extra, bitmask) \ SET_SCRATCH0(r13);/* save r13 */\ @@ -693,7 +672,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define MASKABLE_RELON_EXCEPTION_OOL(vec, label, bitmask) \ MASKABLE_EXCEPTION_PROLOG_1(PACA_EXGEN, SOFTEN_NOTEST_PR, vec, bitmask);\ - EXCEPTION_PROLOG_2 label, EXC_STD + EXCEPTION_PROLOG_2 label, EXC_STD, 1 #de
[PATCH v2 02/52] powerpc/64s/exception: remove H concatenation for EXC_HV variants
Replace all instances of this with gas macros that test the hsrr parameter and use the appropriate register names / labels. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 333 +-- arch/powerpc/include/asm/head-64.h | 8 +- arch/powerpc/kernel/exceptions-64s.S | 97 --- 3 files changed, 253 insertions(+), 185 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index d3987ce65857..1496e4089cee 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -63,6 +63,8 @@ */ #define EX_R3 EX_DAR +#ifdef __ASSEMBLY__ + #define STF_ENTRY_BARRIER_SLOT \ STF_ENTRY_BARRIER_FIXUP_SECTION;\ nop;\ @@ -144,38 +146,6 @@ hrfid; \ b hrfi_flush_fallback -#ifdef CONFIG_RELOCATABLE -#define __EXCEPTION_PROLOG_2_RELON(label, h) \ - mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - LOAD_HANDLER(r12,label);\ - mtctr r12;\ - mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ - li r10,MSR_RI; \ - mtmsrd r10,1; /* Set RI (EE=0) */ \ - bctr; -#else -/* If not relocatable, we can jump directly -- and save messing with LR */ -#define __EXCEPTION_PROLOG_2_RELON(label, h) \ - mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ - li r10,MSR_RI; \ - mtmsrd r10,1; /* Set RI (EE=0) */ \ - b label; -#endif -#define EXCEPTION_PROLOG_2_RELON(label, h) \ - __EXCEPTION_PROLOG_2_RELON(label, h) - -/* - * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to - * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case - * EXCEPTION_PROLOG_2_RELON will be using LR. - */ -#define EXCEPTION_RELON_PROLOG(area, label, h, extra, vec) \ - SET_SCRATCH0(r13); /* save r13 */ \ - EXCEPTION_PROLOG_0(area); \ - EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2_RELON(label, h) - /* * We're short on space and time in the exception prolog, so we can't * use the normal LOAD_REG_IMMEDIATE macro to load the address of label. @@ -200,9 +170,54 @@ ori reg,reg,(ABS_ADDR(label))@l;\ addis reg,reg,(ABS_ADDR(label))@h +#ifdef CONFIG_RELOCATABLE +.macro EXCEPTION_PROLOG_2_RELON label, hsrr + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + .endif + LOAD_HANDLER(r12, \label\()) + mtctr r12 + .if \hsrr + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + .else + mfspr r12,SPRN_SRR1 /* and HSRR1 */ + .endif + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ + bctr +.endm +#else +/* If not relocatable, we can jump directly -- and save messing with LR */ +.macro EXCEPTION_PROLOG_2_RELON label, hsrr + .if \hsrr + mfspr r11,SPRN_HSRR0 /* save HSRR0 */ + mfspr r12,SPRN_HSRR1 /* and HSRR1 */ + .else + mfspr r11,SPRN_SRR0 /* save SRR0 */ + mfspr r12,SPRN_SRR1 /* and SRR1 */ + .endif + li r10,MSR_RI + mtmsrd r10,1 /* Set RI (EE=0) */ + b \label +.endm +#endif + +/* + * As EXCEPTION_PROLOG(), except we've already got relocation on so no need to + * rfid. Save LR in case we're CONFIG_RELOCATABLE, in which case + * EXCEPTION_PROLOG_2_RELON will be using LR. + */ +#define EXCEPTION_RELON_PROLOG(area, label, hsrr, extra, vec) \ + SET_SCRATCH0(r13); /* save r13 */ \ + EXCEPTION_PROLOG_0(area); \ + EXCEPTION_PROLOG_1(area, extra, vec); \ + EXCEPTION_PROLOG_2_RELON label, hsrr + /* Exception register prefixes */ -#define EXC_HV H -#define EXC_STD +#define EXC_HV 1 +#define EXC_STD0 #if defined(CONFIG_RELOCATABLE) /* @@ -304,43 +319,57 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_1(area, extra, vec) \ _EXCEPTION_PROLOG_1
[PATCH v2 00/52] powerpc/64s interrupt handler cleanups, gasification
This contains the previous 28 series in front, with one small fix mentioned in the last thread, plus one extra patch to remove unused BRANCH_TO_COMMON macro, so first 29 up to SPR RAW scoreboard are all quite minimal generated code change. The next patches start to get a bit more into code change, starting to mainly attack the "odd" handlers which deviate significantly from the norm (sreset, machine check, still have pending work to do on hmi). The aim is to simplify them and make them more regular. That makes maintaining easier, and also reduces the need to have a lot of special cases and splits in macros, which helps further cleanup in future. After patch 19 I should add it's so much more pleasant to hack on this code, you don't have to rebuild practically the whole kernel whenever you change anything. Oh I also got a KUAP fix in there we should backport. Thanks, Nick Nicholas Piggin (52): powerpc/64s/exception: fix line wrap and semicolon inconsistencies in macros powerpc/64s/exception: remove H concatenation for EXC_HV variants powerpc/64s/exception: consolidate EXCEPTION_PROLOG_2 with _NORI variant powerpc/64s/exception: move and tidy EXCEPTION_PROLOG_2 variants powerpc/64s/exception: fix sreset KVM test code powerpc/64s/exception: remove the "extra" macro parameter powerpc/64s/exception: consolidate maskable and non-maskable prologs powerpc/64s/exception: merge KVM handler and skip variants powerpc/64s/exception: KVM handler can set the HSRR trap bit powerpc/64s/exception: Make EXCEPTION_PROLOG_0 a gas macro for consistency with others powerpc/64s/exception: Move EXCEPTION_COMMON handler and return branches into callers powerpc/64s/exception: Move EXCEPTION_COMMON additions into callers powerpc/64s/exception: unwind exception-64s.h macros powerpc/64s/exception: improve 0x500 handler code powerpc/64s/exception: move EXCEPTION_PROLOG_2* to a more logical place powerpc/64s/exception: remove STD_EXCEPTION_COMMON variants powerpc/64s/exception: move KVM related code together powerpc/64s/exception: move exception-64s.h code to exception-64s.S where it is used powerpc/64s/exception: move head-64.h code to exception-64s.S where it is used powerpc/64s/exception: remove __BRANCH_TO_KVM powerpc/64s/exception: remove unused BRANCH_TO_COMMON powerpc/64s/exception: use a gas macro for system call handler code powerpc/64s/exception: fix indenting irregularities powerpc/64s/exception: generate regs clear instructions using .rept powerpc/64s/exception: remove bad stack branch powerpc/64s/exception: remove pointless EXCEPTION_PROLOG macro indirection powerpc/64s/exception: move paca save area offsets into exception-64s.S powerpc/64s/exception: clean up system call entry powerpc/64s/exception: avoid SPR RAW scoreboard stall in real mode entry powerpc/64s/exception: optimise system_reset for idle, clean up non-idle case powerpc/64s/exception: mtmsrd L=1 cleanup powerpc/64s/exception: windup use r9 consistently to restore SPRs powerpc/64s/exception: move machine check windup in_mce handling powerpc/64s/exception: simplify hmi windup code powerpc/64s/exception: shuffle windup code around powerpc/64s/exception: use common macro for windup powerpc/64s/exception: add dar and dsisr options to exception macro powerpc/64s/exception: machine check use standard macros to save dar/dsisr powerpc/64s/exception: denorm handler use standard scratch save macro powerpc/64s/exception: move SET_SCRATCH0 into EXCEPTION_PROLOG_0 powerpc/tm: update comment about interrupt re-entrancy powerpc/64s/exception: machine check fwnmi does not trigger when in HV mode powerpc/64s/exception: machine check early only runs in HV mode powerpc/64s/exception: separate pseries and powernv mce delivery paths powerpc/64s/exception: machine check windup restore cfar for host delivery powerpc/64s/exception: fix machine check early should not set AMR powerpc/64s/exception: machine check restructure handler to be more regular powerpc/64s/exception: simplify machine check early path powerpc/64s/exceptions: machine check move unrecoverable handling out of line powerpc/64s/exception: untangle early machine check handler powerpc/64s/exception: machine check improve branch labels powerpc/64s/exception: add missing branch to self after RFI arch/powerpc/include/asm/exception-64s.h | 609 +--- arch/powerpc/include/asm/head-64.h | 204 +-- arch/powerpc/include/asm/paca.h |2 + arch/powerpc/kernel/asm-offsets.c|2 + arch/powerpc/kernel/exceptions-64s.S | 1764 ++ arch/powerpc/kernel/tm.S |4 +- arch/powerpc/xmon/xmon.c |2 + 7 files changed, 1161 insertions(+), 1426 deletions(-) -- 2.20.1
[PATCH v2 01/52] powerpc/64s/exception: fix line wrap and semicolon inconsistencies in macros
By convention, all lines should be separated by a semicolons. Last line should have neither semicolon or line wrap. No generated code change. Signed-off-by: Nicholas Piggin --- arch/powerpc/include/asm/exception-64s.h | 36 ++--- arch/powerpc/include/asm/head-64.h | 68 2 files changed, 52 insertions(+), 52 deletions(-) diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 841a0be6c1b2..d3987ce65857 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -185,11 +185,11 @@ */ #define LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); /* get high part of &label */ \ - ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label); + ori reg,reg,FIXED_SYMBOL_ABS_ADDR(label) #define __LOAD_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); \ - ori reg,reg,(ABS_ADDR(label))@l; + ori reg,reg,(ABS_ADDR(label))@l /* * Branches from unrelocated code (e.g., interrupts) to labels outside @@ -198,7 +198,7 @@ #define __LOAD_FAR_HANDLER(reg, label) \ ld reg,PACAKBASE(r13); \ ori reg,reg,(ABS_ADDR(label))@l;\ - addis reg,reg,(ABS_ADDR(label))@h; + addis reg,reg,(ABS_ADDR(label))@h /* Exception register prefixes */ #define EXC_HV H @@ -273,7 +273,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) OPT_SAVE_REG_TO_PACA(area+EX_CFAR, r10, CPU_FTR_CFAR); \ INTERRUPT_TO_KERNEL;\ SAVE_CTR(r10, area);\ - mfcrr9; + mfcrr9 #define __EXCEPTION_PROLOG_1_POST(area) \ std r11,area+EX_R11(r13); \ @@ -290,7 +290,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define MASKABLE_EXCEPTION_PROLOG_1(area, extra, vec, bitmask) \ __EXCEPTION_PROLOG_1_PRE(area); \ extra(vec, bitmask);\ - __EXCEPTION_PROLOG_1_POST(area); + __EXCEPTION_PROLOG_1_POST(area) /* * This version of the EXCEPTION_PROLOG_1 is intended @@ -299,7 +299,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define _EXCEPTION_PROLOG_1(area, extra, vec) \ __EXCEPTION_PROLOG_1_PRE(area); \ extra(vec); \ - __EXCEPTION_PROLOG_1_POST(area); + __EXCEPTION_PROLOG_1_POST(area) #define EXCEPTION_PROLOG_1(area, extra, vec) \ _EXCEPTION_PROLOG_1(area, extra, vec) @@ -307,7 +307,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define __EXCEPTION_PROLOG_2(label, h) \ ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - LOAD_HANDLER(r12,label) \ + LOAD_HANDLER(r12,label);\ mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ @@ -321,7 +321,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) ld r10,PACAKMSR(r13); /* get MSR value for kernel */ \ xorir10,r10,MSR_RI; /* Clear MSR_RI */ \ mfspr r11,SPRN_##h##SRR0; /* save SRR0 */ \ - LOAD_HANDLER(r12,label) \ + LOAD_HANDLER(r12,label);\ mtspr SPRN_##h##SRR0,r12; \ mfspr r12,SPRN_##h##SRR1; /* and SRR1 */ \ mtspr SPRN_##h##SRR1,r10; \ @@ -335,7 +335,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) SET_SCRATCH0(r13); /* save r13 */ \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_PROLOG_1(area, extra, vec); \ - EXCEPTION_PROLOG_2(label, h); + EXCEPTION_PROLOG_2(label, h) #define __KVMTEST(h, n) \ lbz r10,HSTATE_IN_GUEST(r13); \ @@ -409,7 +409,7 @@ END_FTR_SECTION_NESTED(ftr,ftr,943) #define EXCEPTION_PROLOG_NORI(area, label, h, extra, vec) \ EXCEPTION_PROLOG_0(area); \ EXCEPTION_
[PATCH v2] ocxl: Allow contexts to be attached with a NULL mm
From: Alastair D'Silva If an OpenCAPI context is to be used directly by a kernel driver, there may not be a suitable mm to use. The patch makes the mm parameter to ocxl_context_attach optional. Signed-off-by: Alastair D'Silva --- arch/powerpc/mm/book3s64/radix_tlb.c | 5 + drivers/misc/ocxl/context.c | 9 ++--- drivers/misc/ocxl/link.c | 28 3 files changed, 35 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/book3s64/radix_tlb.c b/arch/powerpc/mm/book3s64/radix_tlb.c index bb9835681315..ce8a77fae6a7 100644 --- a/arch/powerpc/mm/book3s64/radix_tlb.c +++ b/arch/powerpc/mm/book3s64/radix_tlb.c @@ -666,6 +666,11 @@ EXPORT_SYMBOL(radix__flush_tlb_page); #define radix__flush_all_mm radix__local_flush_all_mm #endif /* CONFIG_SMP */ +/* + * If kernel TLBIs ever become local rather than global, then + * drivers/misc/ocxl/link.c:ocxl_link_add_pe will need some work, as it + * assumes kernel TLBIs are global. + */ void radix__flush_tlb_kernel_range(unsigned long start, unsigned long end) { _tlbie_pid(0, RIC_FLUSH_ALL); diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index bab9c9364184..994563a078eb 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -69,6 +69,7 @@ static void xsl_fault_error(void *data, u64 addr, u64 dsisr) int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) { int rc; + unsigned long pidr = 0; // Locks both status & tidr mutex_lock(&ctx->status_mutex); @@ -77,9 +78,11 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) goto out; } - rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, - mm->context.id, ctx->tidr, amr, mm, - xsl_fault_error, ctx); + if (mm) + pidr = mm->context.id; + + rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr, + amr, mm, xsl_fault_error, ctx); if (rc) goto out; diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index cce5b0d64505..58d111afd9f6 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -224,6 +224,17 @@ static irqreturn_t xsl_fault_handler(int irq, void *data) ack_irq(spa, ADDRESS_ERROR); return IRQ_HANDLED; } + + if (!pe_data->mm) { + /* +* translation fault from a kernel context - an OpenCAPI +* device tried to access a bad kernel address +*/ + rcu_read_unlock(); + pr_warn("Unresolved OpenCAPI xsl fault in kernel context\n"); + ack_irq(spa, ADDRESS_ERROR); + return IRQ_HANDLED; + } WARN_ON(pe_data->mm->context.id != pid); if (mmget_not_zero(pe_data->mm)) { @@ -523,7 +534,13 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, pe->amr = cpu_to_be64(amr); pe->software_state = cpu_to_be32(SPA_PE_VALID); - mm_context_add_copro(mm); + /* +* For user contexts, register a copro so that TLBIs are seen +* by the nest MMU. If we have a kernel context, TLBIs are +* already global. +*/ + if (mm) + mm_context_add_copro(mm); /* * Barrier is to make sure PE is visible in the SPA before it * is used by the device. It also helps with the global TLBI @@ -546,7 +563,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, * have a reference on mm_users. Incrementing mm_count solves * the problem. */ - mmgrab(mm); + if (mm) + mmgrab(mm); trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr); unlock: mutex_unlock(&spa->spa_lock); @@ -652,8 +670,10 @@ int ocxl_link_remove_pe(void *link_handle, int pasid) if (!pe_data) { WARN(1, "Couldn't find pe data when removing PE\n"); } else { - mm_context_remove_copro(pe_data->mm); - mmdrop(pe_data->mm); + if (pe_data->mm) { + mm_context_remove_copro(pe_data->mm); + mmdrop(pe_data->mm); + } kfree_rcu(pe_data, rcu); } unlock: -- 2.21.0
Re: [PATCH v2 3/6] powerpc/eeh: Improve debug messages around device addition
On Thu, Jun 20, 2019 at 12:40 PM Alexey Kardashevskiy wrote: > > On 19/06/2019 14:27, Sam Bobroff wrote: > > On Tue, Jun 11, 2019 at 03:47:58PM +1000, Alexey Kardashevskiy wrote: > >> > >> On 07/05/2019 14:30, Sam Bobroff wrote: > >>> Also remove useless comment. > >>> > >>> Signed-off-by: Sam Bobroff > >>> Reviewed-by: Alexey Kardashevskiy > >>> --- > *snip* > > > > I can see that edev will be non-NULL here, but that pr_debug() pattern > > (using the PDN information to form the PCI address) is quite common > > across the EEH code, so I think rather than changing a couple of > > specific cases, I should do a separate cleanup patch and introduce > > something like pdn_debug(pdn, ""). What do you think? > > I'd switch them all to already existing dev_dbg/pci_debug rather than > adding pdn_debug as imho it should not have been used in the first place > really... > > > (I don't know exactly when edev->pdev can be NULL.) > > ... and if you switch to dev_dbg/pci_debug, I think quite soon you'll > know if it can or cannot be NULL :) As far as I can tell edev->pdev is NULL in two cases: 1. Before eeh_device_add_late() has been called on the pdev. The late part of the add maps the pdev to an edev and sets the pdev's edev pointer and vis a vis. 2. While recoverying EEH unaware devices. Unaware devices are destroyed and rescanned and the edev->pdev pointer is cleared by pcibios_device_release() In most of these cases it should be safe to use the pci_*() functions rather than making a new one up for printing pdns. In the cases where we might not have a PCI dev i'd make a new set of prints that take an EEH dev rather than a pci_dn since i'd like pci_dn to die sooner rather than later. Oliver
Re: [PATCH v2 3/6] powerpc/eeh: Improve debug messages around device addition
On 19/06/2019 14:27, Sam Bobroff wrote: > On Tue, Jun 11, 2019 at 03:47:58PM +1000, Alexey Kardashevskiy wrote: >> >> >> On 07/05/2019 14:30, Sam Bobroff wrote: >>> Also remove useless comment. >>> >>> Signed-off-by: Sam Bobroff >>> Reviewed-by: Alexey Kardashevskiy >>> --- >>> arch/powerpc/kernel/eeh.c| 2 +- >>> arch/powerpc/platforms/powernv/eeh-powernv.c | 14 >>> arch/powerpc/platforms/pseries/eeh_pseries.c | 23 +++- >>> 3 files changed, 28 insertions(+), 11 deletions(-) >>> >>> diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c >>> index 8d3c36a1f194..b14d89547895 100644 >>> --- a/arch/powerpc/kernel/eeh.c >>> +++ b/arch/powerpc/kernel/eeh.c >>> @@ -1291,7 +1291,7 @@ void eeh_add_device_late(struct pci_dev *dev) >>> pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); >>> edev = pdn_to_eeh_dev(pdn); >>> if (edev->pdev == dev) { >>> - pr_debug("EEH: Already referenced !\n"); >>> + pr_debug("EEH: Device %s already referenced!\n", pci_name(dev)); >>> return; >>> } >>> >>> diff --git a/arch/powerpc/platforms/powernv/eeh-powernv.c >>> b/arch/powerpc/platforms/powernv/eeh-powernv.c >>> index 6fc1a463b796..0e374cdba961 100644 >>> --- a/arch/powerpc/platforms/powernv/eeh-powernv.c >>> +++ b/arch/powerpc/platforms/powernv/eeh-powernv.c >>> @@ -50,10 +50,7 @@ void pnv_pcibios_bus_add_device(struct pci_dev *pdev) >>> if (!pdev->is_virtfn) >>> return; >>> >>> - /* >>> -* The following operations will fail if VF's sysfs files >>> -* aren't created or its resources aren't finalized. >>> -*/ >>> + pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev)); >> >> >> dev_dbg() seems more appropriate. > > Oh! It does, or even pci_debug() :-) > > I'll change it if I need to do another version, otherwise I'll clean it > up later. > >>> eeh_add_device_early(pdn); >>> eeh_add_device_late(pdev); >>> eeh_sysfs_add_device(pdev); >>> @@ -397,6 +394,10 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void >>> *data) >>> int ret; >>> int config_addr = (pdn->busno << 8) | (pdn->devfn); >>> >>> + pr_debug("%s: probing %04x:%02x:%02x.%01x\n", >>> + __func__, hose->global_number, pdn->busno, >>> + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); >>> + >>> /* >>> * When probing the root bridge, which doesn't have any >>> * subordinate PCI devices. We don't have OF node for >>> @@ -491,6 +492,11 @@ static void *pnv_eeh_probe(struct pci_dn *pdn, void >>> *data) >>> /* Save memory bars */ >>> eeh_save_bars(edev); >>> >>> + pr_debug("%s: EEH enabled on %02x:%02x.%01x PHB#%x-PE#%x\n", >>> + __func__, pdn->busno, PCI_SLOT(pdn->devfn), >>> + PCI_FUNC(pdn->devfn), edev->pe->phb->global_number, >>> + edev->pe->addr); >>> + >>> return NULL; >>> } >>> >>> diff --git a/arch/powerpc/platforms/pseries/eeh_pseries.c >>> b/arch/powerpc/platforms/pseries/eeh_pseries.c >>> index 7aa50258dd42..ae06878fbdea 100644 >>> --- a/arch/powerpc/platforms/pseries/eeh_pseries.c >>> +++ b/arch/powerpc/platforms/pseries/eeh_pseries.c >>> @@ -65,6 +65,8 @@ void pseries_pcibios_bus_add_device(struct pci_dev *pdev) >>> if (!pdev->is_virtfn) >>> return; >>> >>> + pr_debug("%s: EEH: Setting up device %s.\n", __func__, pci_name(pdev)); >>> + >>> pdn->device_id = pdev->device; >>> pdn->vendor_id = pdev->vendor; >>> pdn->class_code = pdev->class; >>> @@ -251,6 +253,10 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, >>> void *data) >>> int enable = 0; >>> int ret; >>> >>> + pr_debug("%s: probing %04x:%02x:%02x.%01x\n", >>> + __func__, pdn->phb->global_number, pdn->busno, >>> + PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); >>> + >>> /* Retrieve OF node and eeh device */ >>> edev = pdn_to_eeh_dev(pdn); >>> if (!edev || edev->pe) >>> @@ -294,7 +300,12 @@ static void *pseries_eeh_probe(struct pci_dn *pdn, >>> void *data) >>> >>> /* Enable EEH on the device */ >>> ret = eeh_ops->set_option(&pe, EEH_OPT_ENABLE); >>> - if (!ret) { >>> + if (ret) { >>> + pr_debug("%s: EEH failed to enable on %02x:%02x.%01x >>> PHB#%x-PE#%x (code %d)\n", >>> + __func__, pdn->busno, PCI_SLOT(pdn->devfn), >>> + PCI_FUNC(pdn->devfn), pe.phb->global_number, >>> + pe.addr, ret); >>> + } else { >> >> >> edev!=NULL here so you could do dev_dbg(&edev->pdev->dev,...) and skip >> PCI_SLOT/PCI_FUNC. Or is (edev!=NULL && edev->pdev==NULL) possible (it >> could be, just asking)? > > I can see that edev will be non-NULL here, but that pr_debug() pattern > (using the PDN information to form the PCI address) is quite common > across the EEH code, so I think rather than changing a couple of > specific cases, I should do a separate cleanup patch and introduce > something like p
Re: [PATCH v2 01/10] powerpc/8xx: move CPM1 related files from sysdev/ to platforms/8xx
Hi Christophe, Thank you for the patch! Yet something to improve: [auto build test ERROR on powerpc/next] [also build test ERROR on v5.2-rc5 next-20190619] [if your patch is applied to the wrong git tree, please drop us a note to help improve the system] url: https://github.com/0day-ci/linux/commits/Christophe-Leroy/powerpc-8xx-move-CPM1-related-files-from-sysdev-to-platforms-8xx/20190613-184514 base: https://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux.git next config: powerpc-tqm8555_defconfig (attached as .config) compiler: powerpc-linux-gcc (GCC) 7.4.0 reproduce: wget https://raw.githubusercontent.com/intel/lkp-tests/master/sbin/make.cross -O ~/bin/make.cross chmod +x ~/bin/make.cross # save the attached .config to linux build tree GCC_VERSION=7.4.0 make.cross ARCH=powerpc If you fix the issue, kindly add following tag Reported-by: kbuild test robot All errors (new ones prefixed by >>): >> make[3]: *** No rule to make target 'arch/powerpc/sysdev/cpm_gpio.o', needed >> by 'arch/powerpc/sysdev/built-in.a'. make[3]: Target '__build' not remade because of errors. --- 0-DAY kernel test infrastructureOpen Source Technology Center https://lists.01.org/pipermail/kbuild-all Intel Corporation .config.gz Description: application/gzip
[PATCH 2/3] KVM: PPC: Book3S HV: Signed extend decrementer value if not using large decr
On POWER9 the decrementer can operate in large decrementer mode where the decrementer is 56 bits and signed extended to 64 bits. When not operating in this mode the decrementer behaves as a 32 bit decrementer which is NOT signed extended (as on POWER8). Currently when reading a guest decrementer value we don't take into account whether the large decrementer is enabled or not, and this means the value will be incorrect when the guest is not using the large decrementer. Fix this by sign extending the value read when the guest isn't using the large decrementer. Fixes: 95a6432ce903 "KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests" Signed-off-by: Suraj Jitindar Singh --- arch/powerpc/kvm/book3s_hv.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index d3684509da35..719fd2529eec 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -3607,6 +3607,8 @@ int kvmhv_p9_guest_entry(struct kvm_vcpu *vcpu, u64 time_limit, vcpu->arch.slb_max = 0; dec = mfspr(SPRN_DEC); + if (!(lpcr & LPCR_LD)) /* Sign extend if not using large decrementer */ + dec = (s32) dec; tb = mftb(); vcpu->arch.dec_expires = dec + tb; vcpu->cpu = -1; -- 2.13.6
[PATCH 3/3] KVM: PPC: Book3S HV: Clear pending decr exceptions on nested guest entry
If we enter an L1 guest with a pending decrementer exception then this is cleared on guest exit if the guest has writtien a positive value into the decrementer (indicating that it handled the decrementer exception) since there is no other way to detect that the guest has handled the pending exception and that it should be dequeued. In the event that the L1 guest tries to run a nested (L2) guest immediately after this and the L2 guest decrementer is negative (which is loaded by L1 before making the H_ENTER_NESTED hcall), then the pending decrementer exception isn't cleared and the L2 entry is blocked since L1 has a pending exception, even though L1 may have already handled the exception and written a positive value for it's decrementer. This results in a loop of L1 trying to enter the L2 guest and L0 blocking the entry since L1 has an interrupt pending with the outcome being that L2 never gets to run and hangs. Fix this by clearing any pending decrementer exceptions when L1 makes the H_ENTER_NESTED hcall since it won't do this if it's decrementer has gone negative, and anyway it's decrementer has been communicated to L0 in the hdec_expires field and L0 will return control to L1 when this goes negative by delivering an H_DECREMENTER exception. Fixes: 95a6432ce903 "KVM: PPC: Book3S HV: Streamlined guest entry/exit path on P9 for radix guests" Signed-off-by: Suraj Jitindar Singh --- arch/powerpc/kvm/book3s_hv.c | 11 +-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 719fd2529eec..4a5eb29b952f 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -4128,8 +4128,15 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, preempt_enable(); - /* cancel pending decrementer exception if DEC is now positive */ - if (get_tb() < vcpu->arch.dec_expires && kvmppc_core_pending_dec(vcpu)) + /* +* cancel pending decrementer exception if DEC is now positive, or if +* entering a nested guest in which case the decrementer is now owned +* by L2 and the L1 decrementer is provided in hdec_expires +*/ + if (kvmppc_core_pending_dec(vcpu) && + ((get_tb() < vcpu->arch.dec_expires) || +(trap == BOOK3S_INTERRUPT_SYSCALL && + kvmppc_get_gpr(vcpu, 3) == H_ENTER_NESTED))) kvmppc_core_dequeue_dec(vcpu); trace_kvm_guest_exit(vcpu); -- 2.13.6
[PATCH 1/3] KVM: PPC: Book3S HV: Invalidate ERAT when flushing guest TLB entries
When a guest vcpu moves from one physical thread to another it is necessary for the host to perform a tlb flush on the previous core if another vcpu from the same guest is going to run there. This is because the guest may use the local form of the tlb invalidation instruction meaning stale tlb entries would persist where it previously ran. This is handled on guest entry in kvmppc_check_need_tlb_flush() which calls flush_guest_tlb() to perform the tlb flush. Previously the generic radix__local_flush_tlb_lpid_guest() function was used, however the functionality was reimplemented in flush_guest_tlb() to avoid the trace_tlbie() call as the flushing may be done in real mode. The reimplementation in flush_guest_tlb() was missing an erat invalidation after flushing the tlb. This lead to observable memory corruption in the guest due to the caching of stale translations. Fix this by adding the erat invalidation. Fixes: 70ea13f6e609 "KVM: PPC: Book3S HV: Flush TLB on secondary radix threads" Signed-off-by: Suraj Jitindar Singh --- arch/powerpc/kvm/book3s_hv_builtin.c | 1 + 1 file changed, 1 insertion(+) diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index 6035d24f1d1d..a46286f73eec 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -833,6 +833,7 @@ static void flush_guest_tlb(struct kvm *kvm) } } asm volatile("ptesync": : :"memory"); + asm volatile(PPC_INVALIDATE_ERAT : : :"memory"); } void kvmppc_check_need_tlb_flush(struct kvm *kvm, int pcpu, -- 2.13.6
Re: [PATCH 3/4] powerpc/powernv: remove dead NPU DMA code
On 19/06/2019 17:28, Christoph Hellwig wrote: > On Wed, Jun 19, 2019 at 10:34:54AM +1000, Alexey Kardashevskiy wrote: >> >> >> On 23/05/2019 17:49, Christoph Hellwig wrote: >>> None of these routines were ever used since they were added to the >>> kernel. >> >> >> It is still being used exactly in the way as it was explained before in >> previous respins. Thanks. > > Please point to the in-kernel user, because that is the only relevant > one. This is not just my opinion but we had a clear discussion on that > at least years kernel summit. There is no in-kernel user which still does not mean that the code is dead. If it is irrelevant - put this to the commit log instead of saying it is dead; also if there was a clear outcome from that discussion, then please point me to that, I do not get to attend these discussions. Thanks, -- Alexey
Re: [PATCH 0/2] Fix handling of h_set_dawr
On Mon, 2019-06-17 at 11:06 +0200, Cédric Le Goater wrote: > On 17/06/2019 09:16, Suraj Jitindar Singh wrote: > > Series contains 2 patches to fix the host in kernel handling of the > > hcall > > h_set_dawr. > > > > First patch from Michael Neuling is just a resend added here for > > clarity. > > > > Michael Neuling (1): > > KVM: PPC: Book3S HV: Fix r3 corruption in h_set_dabr() > > > > Suraj Jitindar Singh (1): > > KVM: PPC: Book3S HV: Only write DAWR[X] when handling h_set_dawr > > in > > real mode > > > > Reviewed-by: Cédric Le Goater > > and > > Tested-by: Cédric Le Goater > > > but I see slowdowns in nested as if the IPIs were not delivered. Have > we > touch this part in 5.2 ? Hi, I've seen the same and tracked it down to decrementer exceptions not being delivered when the guest is using large decrementer. I've got a patch I'm about to send so I'll CC you. Another option is to disable the large decrementer with: -machine pseries,cap-large-decr=false Thanks, Suraj > > Thanks, > > C. >
Re: [PATCH v5 2/2] powerpc: Fix compile issue with force DAWR
Le 19/06/2019 à 03:11, Michael Neuling a écrit : On Tue, 2019-06-18 at 18:28 +0200, Christophe Leroy wrote: Le 04/06/2019 à 05:00, Michael Neuling a écrit : If you compile with KVM but without CONFIG_HAVE_HW_BREAKPOINT you fail at linking with: arch/powerpc/kvm/book3s_hv_rmhandlers.o:(.text+0x708): undefined reference to `dawr_force_enable' This was caused by commit c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option"). This moves a bunch of code around to fix this. It moves a lot of the DAWR code in a new file and creates a new CONFIG_PPC_DAWR to enable compiling it. After looking at all this once more, I'm just wondering: why are we creating stuff specific to DAWR ? In the old days, we only add DABR, and everything was named on DABR. When DAWR was introduced some years ago we renamed stuff like do_dabr() to do_break() so that we could regroup things together. And now we are taking dawr() out of the rest. Why not keep dabr() stuff and dawr() stuff all together in something dedicated to breakpoints, and try to regroup all breakpoint stuff in a single place ? I see some breakpointing stuff done in kernel/process.c and other things done in hw_breakpoint.c, to common functions call from one file to the other, preventing GCC to fully optimise, etc ... Also, behing this thinking, I have the idea that we could easily implement 512 bytes breakpoints on the 8xx too. The 8xx have neither DABR nor DAWR, but is using a set of comparators. And as you can see in the 8xx version of __set_dabr() in kernel/process.c, we emulate the DABR behaviour by setting two comparators. By using the same comparators with a different setup, we should be able to implement breakpoints on larger ranges of address. Christophe I agree that their are opportunities to refactor this code and I appreciate your efforts in making this code better but... We have a problem here of not being able to compile an odd ball case that almost no one ever hits (it was just an odd mpe CI case). We're up to v5 of a simple fix which is just silly. So let's get this fix in and move on to the whole bunch of refactoring we can do in this code which is already documented in the github issue tracking. Agreed. I've filed the following issue to keep that in mind: https://github.com/linuxppc/issues/issues/251 Thanks Christophe
Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel
Nicholas Piggin wrote: Naveen N. Rao's on June 19, 2019 7:53 pm: Nicholas Piggin wrote: Michael Ellerman's on June 19, 2019 3:14 pm: I'm also not convinced the ordering between the two patches is guaranteed by the ISA, given that there's possibly no isync on the other CPU. Will they go through a context synchronizing event? synchronize_rcu_tasks() should ensure a thread is scheduled away, but I'm not actually sure it guarantees CSI if it's kernel->kernel. Could do a smp_call_function to do the isync on each CPU to be sure. Good point. Per Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU: "The solution, in the form of Tasks RCU, is to have implicit read-side critical sections that are delimited by voluntary context switches, that is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In addition, transitions to and from userspace execution also delimit tasks-RCU read-side critical sections." I suppose transitions to/from userspace, as well as calls to schedule() result in context synchronizing instruction being executed. But, if some tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't have a CSI executed. Also: "In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), respectively." In this scenario as well, I think we won't have a CSI executed in case of cond_resched(). Should we enhance patch_instruction() to handle that? Well, not sure. Do we have many post-boot callers of it? Should they take care of their own synchronization requirements? Kprobes and ftrace are the two users (along with anything else that may use jump labels). Looking at this from the CMODX perspective: the main example quoted of an erratic behavior is when any variant of the patched instruction causes an exception. With ftrace, I think we are ok since we only ever patch a 'nop' or a 'bl' (and the 'mflr' now), none of which should cause an exception. As such, the existing patch_instruction() should suffice. However, with kprobes, we patch a 'trap' (or a branch in case of optprobes) on most instructions. I wonder if we should be issuing an 'isync' on all cpus in this case. Or, even if that is sufficient or necessary. Thanks, Naveen
Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
Le 19/06/2019 à 05:59, Nicholas Piggin a écrit : Christophe Leroy's on June 11, 2019 4:46 pm: Le 10/06/2019 à 05:08, Nicholas Piggin a écrit : I would like to remove the early ioremap or make it into its own function. Re-implement map_kernel_page with ioremap_page_range, allow page tables that don't use slab to avoid the early check, unbolt the hptes mapped in early boot, etc. Getting early ioremap out of the picture is a very good idea, it will help making things more common between all platform types. Today we face the fact that PPC32 allocates early io from the top of memory while PPC64 allocates it from the bottom of memory. Any idea on how to proceed ? Christophe
[PATCH v3] KVM: PPC: Report single stepping capability
When calling the KVM_SET_GUEST_DEBUG ioctl, userspace might request the next instruction to be single stepped via the KVM_GUESTDBG_SINGLESTEP control bit of the kvm_guest_debug structure. This patch adds the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability in order to inform userspace about the state of single stepping support. We currently don't have support for guest single stepping implemented in Book3S HV so the capability is only present for Book3S PR and BookE. Signed-off-by: Fabiano Rosas --- v1 -> v2: - add capability description to Documentation/virtual/kvm/api.txt v2 -> v3: - be explicit in the commit message about when the capability is present - remove unnecessary check for CONFIG_BOOKE Documentation/virtual/kvm/api.txt | 3 +++ arch/powerpc/kvm/powerpc.c| 2 ++ include/uapi/linux/kvm.h | 1 + 3 files changed, 6 insertions(+) diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index ba6c42c576dd..a77643bfa917 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -2969,6 +2969,9 @@ can be determined by querying the KVM_CAP_GUEST_DEBUG_HW_BPS and KVM_CAP_GUEST_DEBUG_HW_WPS capabilities which return a positive number indicating the number of supported registers. +For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether +the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported. + When debug events exit the main run loop with the reason KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run structure containing architecture specific debug information. diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 6d704ad2472b..bd0a73eaf7ba 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -527,6 +527,8 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; + case KVM_CAP_PPC_GUEST_DEBUG_SSTEP: + /* fall through */ case KVM_CAP_PPC_PAIRED_SINGLES: case KVM_CAP_PPC_OSI: case KVM_CAP_PPC_GET_PVINFO: diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 2fe12b40d503..cad9fcd90f39 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -993,6 +993,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_ARM_SVE 170 #define KVM_CAP_ARM_PTRAUTH_ADDRESS 171 #define KVM_CAP_ARM_PTRAUTH_GENERIC 172 +#define KVM_CAP_PPC_GUEST_DEBUG_SSTEP 173 #ifdef KVM_CAP_IRQ_ROUTING -- 2.20.1
Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix
On Wed, Jun 19, 2019 at 02:36:54PM +0530, Aneesh Kumar K.V wrote: > Bharata B Rao writes: > > > We hit the following BUG_ON when memory hotplugged before reboot > > is unplugged after reboot: > > > > kernel BUG at arch/powerpc/mm/pgtable-frag.c:113! > > > > remove_pagetable+0x594/0x6a0 > > (unreliable) > > remove_pagetable+0x94/0x6a0 > > vmemmap_free+0x394/0x410 > > sparse_remove_one_section+0x26c/0x2e8 > > __remove_pages+0x428/0x540 > > arch_remove_memory+0xd0/0x170 > > __remove_memory+0xd4/0x1a0 > > dlpar_remove_lmb+0xbc/0x110 > > dlpar_memory+0xa80/0xd20 > > handle_dlpar_errorlog+0xa8/0x160 > > pseries_hp_work_fn+0x2c/0x60 > > process_one_work+0x46c/0x860 > > worker_thread+0x364/0x5e0 > > kthread+0x1b0/0x1c0 > > ret_from_kernel_thread+0x5c/0x68 > > > > This occurs because, during reboot-after-hotplug, the hotplugged > > memory range gets initialized as regular memory and page > > tables are setup using memblock allocator. This means that we > > wouldn't have initialized the PMD or PTE fragment count for > > those PMD or PTE pages. > > > > Fixing this includes 3 aspects: > > > > - Walk the init_mm page tables from mem_init() and initialize > > the PMD and PTE fragment counts appropriately. > > - When we do early allocation of PMD (and PGD as well) pages, > > allocate in page size PAGE_SIZE granularity so that we are > > sure that the complete page is available for us to set the > > fragment count which is part of struct page. > > > That is an important change now. For early page table we now allocate > PAGE_SIZE tables and hencec we consider then as pages with fragment > count 1. You also may want to explain here why. Sure will make this clear in my next version. > I guess the challenge is > due to the fact that we can't clearly control how the rest of the page > will get used and we are not sure they all will be allocated for backing > page table pages. > > > - When PMD or PTE page is freed, check if it comes from memblock > > allocator and free it appropriately. > > > > Reported-by: Srikanth Aithal > > Signed-off-by: Bharata B Rao > > --- > > arch/powerpc/include/asm/book3s/64/radix.h | 1 + > > arch/powerpc/include/asm/sparsemem.h | 1 + > > arch/powerpc/mm/book3s64/pgtable.c | 12 +++- > > arch/powerpc/mm/book3s64/radix_pgtable.c | 67 +- > > arch/powerpc/mm/mem.c | 5 ++ > > arch/powerpc/mm/pgtable-frag.c | 5 +- > > 6 files changed, 87 insertions(+), 4 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h > > b/arch/powerpc/include/asm/book3s/64/radix.h > > index 574eca33f893..4320f2790e8d 100644 > > --- a/arch/powerpc/include/asm/book3s/64/radix.h > > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > > @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void) > > #ifdef CONFIG_MEMORY_HOTPLUG > > int radix__create_section_mapping(unsigned long start, unsigned long end, > > int nid); > > int radix__remove_section_mapping(unsigned long start, unsigned long end); > > +void radix__fixup_pgtable_fragments(void); > > #endif /* CONFIG_MEMORY_HOTPLUG */ > > #endif /* __ASSEMBLY__ */ > > #endif > > diff --git a/arch/powerpc/include/asm/sparsemem.h > > b/arch/powerpc/include/asm/sparsemem.h > > index 3192d454a733..e662f9232d35 100644 > > --- a/arch/powerpc/include/asm/sparsemem.h > > +++ b/arch/powerpc/include/asm/sparsemem.h > > @@ -15,6 +15,7 @@ > > #ifdef CONFIG_MEMORY_HOTPLUG > > extern int create_section_mapping(unsigned long start, unsigned long end, > > int nid); > > extern int remove_section_mapping(unsigned long start, unsigned long end); > > +void fixup_pgtable_fragments(void); > > > > #ifdef CONFIG_PPC_BOOK3S_64 > > extern int resize_hpt_for_hotplug(unsigned long new_mem_size); > > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > > b/arch/powerpc/mm/book3s64/pgtable.c > > index 01bc9663360d..7efe9cc16b39 100644 > > --- a/arch/powerpc/mm/book3s64/pgtable.c > > +++ b/arch/powerpc/mm/book3s64/pgtable.c > > @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long > > start, unsigned long end) > > > > return hash__remove_section_mapping(start, end); > > } > > + > > +void fixup_pgtable_fragments(void) > > +{ > > + if (radix_enabled()) > > + radix__fixup_pgtable_fragments(); > > +} > > + > > #endif /* CONFIG_MEMORY_HOTPLUG */ > > > > void __init mmu_partition_table_init(void) > > @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd) > > BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); > > if (atomic_dec_and_test(&page->pt_frag_refcount)) { > > pgtable_pmd_page_dtor(page); > > - __free_page(page); > > + if (PageReserved(page)) > > + free_reserved_page(page); > > + else > > + __free_page(page); > > } > > } > > > > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > > b/arch/powerpc/mm/book3s64/radix_pg
Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix
On Wed, Jun 19, 2019 at 08:17:01PM +1000, Nicholas Piggin wrote: > Bharata B Rao's on June 19, 2019 5:45 pm: > > We hit the following BUG_ON when memory hotplugged before reboot > > is unplugged after reboot: > > > > kernel BUG at arch/powerpc/mm/pgtable-frag.c:113! > > > > remove_pagetable+0x594/0x6a0 > > (unreliable) > > remove_pagetable+0x94/0x6a0 > > vmemmap_free+0x394/0x410 > > sparse_remove_one_section+0x26c/0x2e8 > > __remove_pages+0x428/0x540 > > arch_remove_memory+0xd0/0x170 > > __remove_memory+0xd4/0x1a0 > > dlpar_remove_lmb+0xbc/0x110 > > dlpar_memory+0xa80/0xd20 > > handle_dlpar_errorlog+0xa8/0x160 > > pseries_hp_work_fn+0x2c/0x60 > > process_one_work+0x46c/0x860 > > worker_thread+0x364/0x5e0 > > kthread+0x1b0/0x1c0 > > ret_from_kernel_thread+0x5c/0x68 > > > > This occurs because, during reboot-after-hotplug, the hotplugged > > memory range gets initialized as regular memory and page > > tables are setup using memblock allocator. This means that we > > wouldn't have initialized the PMD or PTE fragment count for > > those PMD or PTE pages. > > > > Fixing this includes 3 aspects: > > > > - Walk the init_mm page tables from mem_init() and initialize > > the PMD and PTE fragment counts appropriately. > > - When we do early allocation of PMD (and PGD as well) pages, > > allocate in page size PAGE_SIZE granularity so that we are > > sure that the complete page is available for us to set the > > fragment count which is part of struct page. > > - When PMD or PTE page is freed, check if it comes from memblock > > allocator and free it appropriately. > > > > Reported-by: Srikanth Aithal > > Signed-off-by: Bharata B Rao > > --- > > arch/powerpc/include/asm/book3s/64/radix.h | 1 + > > arch/powerpc/include/asm/sparsemem.h | 1 + > > arch/powerpc/mm/book3s64/pgtable.c | 12 +++- > > arch/powerpc/mm/book3s64/radix_pgtable.c | 67 +- > > arch/powerpc/mm/mem.c | 5 ++ > > arch/powerpc/mm/pgtable-frag.c | 5 +- > > 6 files changed, 87 insertions(+), 4 deletions(-) > > > > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h > > b/arch/powerpc/include/asm/book3s/64/radix.h > > index 574eca33f893..4320f2790e8d 100644 > > --- a/arch/powerpc/include/asm/book3s/64/radix.h > > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > > @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void) > > #ifdef CONFIG_MEMORY_HOTPLUG > > int radix__create_section_mapping(unsigned long start, unsigned long end, > > int nid); > > int radix__remove_section_mapping(unsigned long start, unsigned long end); > > +void radix__fixup_pgtable_fragments(void); > > #endif /* CONFIG_MEMORY_HOTPLUG */ > > #endif /* __ASSEMBLY__ */ > > #endif > > diff --git a/arch/powerpc/include/asm/sparsemem.h > > b/arch/powerpc/include/asm/sparsemem.h > > index 3192d454a733..e662f9232d35 100644 > > --- a/arch/powerpc/include/asm/sparsemem.h > > +++ b/arch/powerpc/include/asm/sparsemem.h > > @@ -15,6 +15,7 @@ > > #ifdef CONFIG_MEMORY_HOTPLUG > > extern int create_section_mapping(unsigned long start, unsigned long end, > > int nid); > > extern int remove_section_mapping(unsigned long start, unsigned long end); > > +void fixup_pgtable_fragments(void); > > > > #ifdef CONFIG_PPC_BOOK3S_64 > > extern int resize_hpt_for_hotplug(unsigned long new_mem_size); > > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > > b/arch/powerpc/mm/book3s64/pgtable.c > > index 01bc9663360d..7efe9cc16b39 100644 > > --- a/arch/powerpc/mm/book3s64/pgtable.c > > +++ b/arch/powerpc/mm/book3s64/pgtable.c > > @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long > > start, unsigned long end) > > > > return hash__remove_section_mapping(start, end); > > } > > + > > +void fixup_pgtable_fragments(void) > > +{ > > + if (radix_enabled()) > > + radix__fixup_pgtable_fragments(); > > +} > > + > > #endif /* CONFIG_MEMORY_HOTPLUG */ > > > > void __init mmu_partition_table_init(void) > > @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd) > > BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); > > if (atomic_dec_and_test(&page->pt_frag_refcount)) { > > pgtable_pmd_page_dtor(page); > > - __free_page(page); > > + if (PageReserved(page)) > > + free_reserved_page(page); > > Hmm. Rather than adding this special case here, I wonder if you can > just go along in your fixup walk and convert all these pages to > non-reserved pages? > > ClearPageReserved ; init_page_count ; adjust_managed_page_count ; > should do the trick, right? Yes, that should. We are anyway fixing the frag count during the walk, might as well do all the above too and avoid the special case in the free path. Regards, Bharata.
Re: [PATCH] ocxl: Update for AFU descriptor template version 1.1
On 05/06/2019 13:15, Frederic Barrat wrote: From: Alastair D'Silva The OpenCAPI discovery and configuration specification has been updated and introduces version 1.1 of the AFU descriptor template, with new fields to better define the memory layout of an OpenCAPI adapter. The ocxl driver doesn't do much yet to support LPC memory but as we start seeing (non-LPC) AFU images using the new template, this patch updates the config space parsing code to avoid spitting a warning. Signed-off-by: Alastair D'Silva Signed-off-by: Frederic Barrat --- The content of the patch sounds good. Thanks. Reviewed-by: Christophe Lombard
Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac
On Wed, Jun 19, 2019 at 4:18 PM Benjamin Herrenschmidt wrote: > > On Wed, 2019-06-19 at 22:32 +1000, Michael Ellerman wrote: > > Christoph Hellwig writes: > > > Any chance this could get picked up to fix the regression? > > > > Was hoping Ben would Ack it. He's still powermac maintainer :) > > > > I guess he OK'ed it in the other thread, will add it to my queue. > > Yeah ack. If I had written it myself, I would have made the DMA bits a > variable and only set it down to 30 if I see that device in the DT > early on, but I can't be bothered now, if it works, ship it :-) > > Note: The patch affects all ppc32, though I don't think it will cause > any significant issue on those who don't need it. Thanks, that answer my earlier question. > Cheers, > Ben. > > > cheers > > > > > On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote: > > > > With the strict dma mask checking introduced with the switch to > > > > the generic DMA direct code common wifi chips on 32-bit > > > > powerbooks > > > > stopped working. Add a 30-bit ZONE_DMA to the 32-bit pmac builds > > > > to allow them to reliably allocate dma coherent memory. > > > > > > > > Fixes: 65a21b71f948 ("powerpc/dma: remove > > > > dma_nommu_dma_supported") > > > > Reported-by: Aaro Koskinen > > > > Signed-off-by: Christoph Hellwig > > > > --- > > > > arch/powerpc/include/asm/page.h | 7 +++ > > > > arch/powerpc/mm/mem.c | 3 ++- > > > > arch/powerpc/platforms/powermac/Kconfig | 1 + > > > > 3 files changed, 10 insertions(+), 1 deletion(-) > > > > > > > > diff --git a/arch/powerpc/include/asm/page.h > > > > b/arch/powerpc/include/asm/page.h > > > > index b8286a2013b4..0d52f57fca04 100644 > > > > --- a/arch/powerpc/include/asm/page.h > > > > +++ b/arch/powerpc/include/asm/page.h > > > > @@ -319,6 +319,13 @@ struct vm_area_struct; > > > > #endif /* __ASSEMBLY__ */ > > > > #include > > > > > > > > +/* > > > > + * Allow 30-bit DMA for very limited Broadcom wifi chips on many > > > > powerbooks. > > > > + */ > > > > +#ifdef CONFIG_PPC32 > > > > +#define ARCH_ZONE_DMA_BITS 30 > > > > +#else > > > > #define ARCH_ZONE_DMA_BITS 31 > > > > +#endif > > > > > > > > #endif /* _ASM_POWERPC_PAGE_H */ > > > > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > > > > index cba29131bccc..2540d3b2588c 100644 > > > > --- a/arch/powerpc/mm/mem.c > > > > +++ b/arch/powerpc/mm/mem.c > > > > @@ -248,7 +248,8 @@ void __init paging_init(void) > > > > (long int)((top_of_ram - total_ram) >> 20)); > > > > > > > > #ifdef CONFIG_ZONE_DMA > > > > - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL > > > > >> PAGE_SHIFT); > > > > + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, > > > > + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> > > > > PAGE_SHIFT); > > > > #endif > > > > max_zone_pfns[ZONE_NORMAL] = max_low_pfn; > > > > #ifdef CONFIG_HIGHMEM > > > > diff --git a/arch/powerpc/platforms/powermac/Kconfig > > > > b/arch/powerpc/platforms/powermac/Kconfig > > > > index f834a19ed772..c02d8c503b29 100644 > > > > --- a/arch/powerpc/platforms/powermac/Kconfig > > > > +++ b/arch/powerpc/platforms/powermac/Kconfig > > > > @@ -7,6 +7,7 @@ config PPC_PMAC > > > > select PPC_INDIRECT_PCI if PPC32 > > > > select PPC_MPC106 if PPC32 > > > > select PPC_NATIVE > > > > + select ZONE_DMA if PPC32 > > > > default y > > > > > > > > config PPC_PMAC64 > > > > -- > > > > 2.20.1 > > > > > > ---end quoted text--- >
Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac
On Wed, 2019-06-19 at 22:32 +1000, Michael Ellerman wrote: > Christoph Hellwig writes: > > Any chance this could get picked up to fix the regression? > > Was hoping Ben would Ack it. He's still powermac maintainer :) > > I guess he OK'ed it in the other thread, will add it to my queue. Yeah ack. If I had written it myself, I would have made the DMA bits a variable and only set it down to 30 if I see that device in the DT early on, but I can't be bothered now, if it works, ship it :-) Note: The patch affects all ppc32, though I don't think it will cause any significant issue on those who don't need it. Cheers, Ben. > cheers > > > On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote: > > > With the strict dma mask checking introduced with the switch to > > > the generic DMA direct code common wifi chips on 32-bit > > > powerbooks > > > stopped working. Add a 30-bit ZONE_DMA to the 32-bit pmac builds > > > to allow them to reliably allocate dma coherent memory. > > > > > > Fixes: 65a21b71f948 ("powerpc/dma: remove > > > dma_nommu_dma_supported") > > > Reported-by: Aaro Koskinen > > > Signed-off-by: Christoph Hellwig > > > --- > > > arch/powerpc/include/asm/page.h | 7 +++ > > > arch/powerpc/mm/mem.c | 3 ++- > > > arch/powerpc/platforms/powermac/Kconfig | 1 + > > > 3 files changed, 10 insertions(+), 1 deletion(-) > > > > > > diff --git a/arch/powerpc/include/asm/page.h > > > b/arch/powerpc/include/asm/page.h > > > index b8286a2013b4..0d52f57fca04 100644 > > > --- a/arch/powerpc/include/asm/page.h > > > +++ b/arch/powerpc/include/asm/page.h > > > @@ -319,6 +319,13 @@ struct vm_area_struct; > > > #endif /* __ASSEMBLY__ */ > > > #include > > > > > > +/* > > > + * Allow 30-bit DMA for very limited Broadcom wifi chips on many > > > powerbooks. > > > + */ > > > +#ifdef CONFIG_PPC32 > > > +#define ARCH_ZONE_DMA_BITS 30 > > > +#else > > > #define ARCH_ZONE_DMA_BITS 31 > > > +#endif > > > > > > #endif /* _ASM_POWERPC_PAGE_H */ > > > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > > > index cba29131bccc..2540d3b2588c 100644 > > > --- a/arch/powerpc/mm/mem.c > > > +++ b/arch/powerpc/mm/mem.c > > > @@ -248,7 +248,8 @@ void __init paging_init(void) > > > (long int)((top_of_ram - total_ram) >> 20)); > > > > > > #ifdef CONFIG_ZONE_DMA > > > - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL > > > >> PAGE_SHIFT); > > > + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, > > > + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> > > > PAGE_SHIFT); > > > #endif > > > max_zone_pfns[ZONE_NORMAL] = max_low_pfn; > > > #ifdef CONFIG_HIGHMEM > > > diff --git a/arch/powerpc/platforms/powermac/Kconfig > > > b/arch/powerpc/platforms/powermac/Kconfig > > > index f834a19ed772..c02d8c503b29 100644 > > > --- a/arch/powerpc/platforms/powermac/Kconfig > > > +++ b/arch/powerpc/platforms/powermac/Kconfig > > > @@ -7,6 +7,7 @@ config PPC_PMAC > > > select PPC_INDIRECT_PCI if PPC32 > > > select PPC_MPC106 if PPC32 > > > select PPC_NATIVE > > > + select ZONE_DMA if PPC32 > > > default y > > > > > > config PPC_PMAC64 > > > -- > > > 2.20.1 > > > > ---end quoted text---
[RFC 00/11] opencapi: enable card reset and link retraining
This is the linux part of the work to use the PCI hotplug framework to control an opencapi card so that it can be reset and re-read after flashing a new FPGA image. It needs support in skiboot: http://patchwork.ozlabs.org/project/skiboot/list/?series=114803 On an old skiboot, it will do nothing. A virtual PCI slot is created for the opencapi adapter, and its state can be controlled through the pnv-php hotplug driver: echo 0|1 > /sys/bus/pci/slots/OPENCAPI-<...>/power Note that the power to the card is not really turned off, as the card needs to stay on to be flashed with a new image. Instead the card is placed in reset. The first part of the series mostly deals with the pci/ioda state, as the devices can now go away and the state needs to be cleaned up. The second part is modifications to the hotplug driver on powernv, so that a virtual slot is created for the opencapi adapters found in the device tree Frederic Barrat (11): powerpc/powernv/ioda: Fix ref count for devices with their own PE powerpc/powernv/ioda: Protect PE list powerpc/powernv/ioda: set up PE on opencapi device when enabling powerpc/powernv/ioda: Release opencapi device powerpc/powernv/ioda: Find opencapi slot for a device node pci/hotplug/pnv-php: Remove erroneous warning pci/hotplug/pnv-php: Improve error msg on power state change failure pci/hotplug/pnv-php: Register opencapi slots pci/hotplug/pnv-php: Relax check when disabling slot pci/hotplug/pnv-php: Wrap warnings in macro ocxl: Add PCI hotplug dependency to Kconfig arch/powerpc/include/asm/pnv-pci.h| 1 + arch/powerpc/platforms/powernv/pci-ioda.c | 106 ++ arch/powerpc/platforms/powernv/pci.c | 10 +- drivers/misc/ocxl/Kconfig | 1 + drivers/pci/hotplug/pnv_php.c | 66 -- 5 files changed, 115 insertions(+), 69 deletions(-) -- 2.21.0
[RFC 07/11] pci/hotplug/pnv-php: Improve error msg on power state change failure
When changing the slot state, if opal hits an error and tells as such in the asynchronous reply, the warning "Wrong msg" is logged, which is rather confusing. Instead we can reuse the better message which is already used when we couldn't submit the asynchronous opal request initially. Signed-off-by: Frederic Barrat --- drivers/pci/hotplug/pnv_php.c | 16 +++- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 5b5cbf1e636d..5cdd2a3a4dd9 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -336,18 +336,19 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot, ret = pnv_pci_set_power_state(php_slot->id, state, &msg); if (ret > 0) { if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle || - be64_to_cpu(msg.params[2]) != state || - be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { + be64_to_cpu(msg.params[2]) != state) { pci_warn(php_slot->pdev, "Wrong msg (%lld, %lld, %lld)\n", be64_to_cpu(msg.params[1]), be64_to_cpu(msg.params[2]), be64_to_cpu(msg.params[3])); return -ENOMSG; } + if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { + ret = -ENODEV; + goto error; + } } else if (ret < 0) { - pci_warn(php_slot->pdev, "Error %d powering %s\n", -ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); - return ret; + goto error; } if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE) @@ -356,6 +357,11 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot, ret = pnv_php_add_devtree(php_slot); return ret; + +error: + pci_warn(php_slot->pdev, "Error %d powering %s\n", + ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); + return ret; } EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state); -- 2.21.0
[RFC 11/11] ocxl: Add PCI hotplug dependency to Kconfig
The PCI hotplug framework is used to update the devices when a new image is written to the FPGA. Signed-off-by: Frederic Barrat --- drivers/misc/ocxl/Kconfig | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/misc/ocxl/Kconfig b/drivers/misc/ocxl/Kconfig index 7fb6d39d4c5a..13a5d9f30369 100644 --- a/drivers/misc/ocxl/Kconfig +++ b/drivers/misc/ocxl/Kconfig @@ -12,6 +12,7 @@ config OCXL tristate "OpenCAPI coherent accelerator support" depends on PPC_POWERNV && PCI && EEH select OCXL_BASE + select HOTPLUG_PCI_POWERNV default m help Select this option to enable the ocxl driver for Open -- 2.21.0
[RFC 06/11] pci/hotplug/pnv-php: Remove erroneous warning
On powernv, when removing a device through hotplug, the following warning is logged: Invalid refcount <.> on <...> It may be incorrect, the refcount may be set to a higher value than 1 and be valid. of_detach_node() can drop more than one reference. As it doesn't seem trivial to assert the correct value, let's remove the warning. Signed-off-by: Frederic Barrat --- drivers/pci/hotplug/pnv_php.c | 6 -- 1 file changed, 6 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 6758fd7c382e..5b5cbf1e636d 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -151,17 +151,11 @@ static void pnv_php_rmv_pdns(struct device_node *dn) static void pnv_php_detach_device_nodes(struct device_node *parent) { struct device_node *dn; - int refcount; for_each_child_of_node(parent, dn) { pnv_php_detach_device_nodes(dn); of_node_put(dn); - refcount = kref_read(&dn->kobj.kref); - if (refcount != 1) - pr_warn("Invalid refcount %d on <%pOF>\n", - refcount, dn); - of_detach_node(dn); } } -- 2.21.0
[RFC 04/11] powerpc/powernv/ioda: Release opencapi device
With hotplug, an opencapi device can now go away. It needs to be released, mostly to clean up its PE state. We were previously not defining any device callback. We can reuse the standard PCI release callback, it does a bit too much for an opencapi device, but it's harmless, and only needs minor tuning. Also separate the undo of the PELT-V code in a separate function, it is not needed for NPU devices and it improves a bit the readability of the code. Signed-off-by: Frederic Barrat --- arch/powerpc/platforms/powernv/pci-ioda.c | 58 +++ 1 file changed, 38 insertions(+), 20 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2cf06fb98978..33054d00b2c5 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -186,7 +186,7 @@ static void pnv_ioda_free_pe(struct pnv_ioda_pe *pe) unsigned int pe_num = pe->pe_number; WARN_ON(pe->pdev); - WARN_ON(pe->npucomp); /* NPUs are not supposed to be freed */ + WARN_ON(pe->npucomp); /* NPUs for nvlink are not supposed to be freed */ kfree(pe->npucomp); memset(pe, 0, sizeof(struct pnv_ioda_pe)); clear_bit(pe_num, phb->ioda.pe_alloc); @@ -775,6 +775,33 @@ static int pnv_ioda_set_peltv(struct pnv_phb *phb, return 0; } +static void pnv_ioda_unset_peltv(struct pnv_phb *phb, +struct pnv_ioda_pe *pe, +struct pci_dev *parent) +{ + int64_t rc; + + while (parent) { + struct pci_dn *pdn = pci_get_pdn(parent); + if (pdn && pdn->pe_number != IODA_INVALID_PE) { + rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); + /* XXX What to do in case of error ? */ + } + parent = parent->bus->self; + } + + opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, + OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + + /* Disassociate PE in PELT */ + rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, + pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); + if (rc) + pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); + +} + static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) { struct pci_dev *parent; @@ -825,25 +852,13 @@ static int pnv_ioda_deconfigure_pe(struct pnv_phb *phb, struct pnv_ioda_pe *pe) for (rid = pe->rid; rid < rid_end; rid++) phb->ioda.pe_rmap[rid] = IODA_INVALID_PE; - /* Release from all parents PELT-V */ - while (parent) { - struct pci_dn *pdn = pci_get_pdn(parent); - if (pdn && pdn->pe_number != IODA_INVALID_PE) { - rc = opal_pci_set_peltv(phb->opal_id, pdn->pe_number, - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); - /* XXX What to do in case of error ? */ - } - parent = parent->bus->self; - } - - opal_pci_eeh_freeze_clear(phb->opal_id, pe->pe_number, - OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); + /* +* Release from all parents PELT-V. NPUs don't have a PELTV +* table +*/ + if (phb->type != PNV_PHB_NPU_NVLINK && phb->type != PNV_PHB_NPU_OCAPI) + pnv_ioda_unset_peltv(phb, pe, parent); - /* Disassociate PE in PELT */ - rc = opal_pci_set_peltv(phb->opal_id, pe->pe_number, - pe->pe_number, OPAL_REMOVE_PE_FROM_DOMAIN); - if (rc) - pe_warn(pe, "OPAL error %lld remove self from PELTV\n", rc); rc = opal_pci_set_pe(phb->opal_id, pe->pe_number, pe->rid, bcomp, dcomp, fcomp, OPAL_UNMAP_PE); if (rc) @@ -3528,6 +3543,8 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) case PNV_PHB_IODA2: pnv_pci_ioda2_release_pe_dma(pe); break; + case PNV_PHB_NPU_OCAPI: + break; default: WARN_ON(1); } @@ -3580,7 +3597,7 @@ static void pnv_pci_release_device(struct pci_dev *pdev) pe = &phb->ioda.pe_array[pdn->pe_number]; pdn->pe_number = IODA_INVALID_PE; - WARN_ON(--pe->device_count < 0); + WARN_ON((pe->flags != PNV_IODA_PE_DEV) && (--pe->device_count < 0)); if (pe->device_count == 0) pnv_ioda_release_pe(pe); } @@ -3629,6 +3646,7 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { .enable_device_hook = pnv_ocapi_enable_device_hook, + .release_device = pnv_pci_release_
[RFC 08/11] pci/hotplug/pnv-php: Register opencapi slots
Add the opencapi PHBs to the list of PHBs being scanned to look for slots. Signed-off-by: Frederic Barrat --- drivers/pci/hotplug/pnv_php.c | 5 - 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 5cdd2a3a4dd9..f9c624334ef7 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -954,7 +954,8 @@ static int __init pnv_php_init(void) pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") pnv_php_register(dn); - + for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_php_register_one(dn); return 0; } @@ -964,6 +965,8 @@ static void __exit pnv_php_exit(void) for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") pnv_php_unregister(dn); + for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") + pnv_php_unregister(dn); } module_init(pnv_php_init); -- 2.21.0
[RFC 10/11] pci/hotplug/pnv-php: Wrap warnings in macro
An opencapi slot doesn't have an associated bridge device. It's not needed for operation, but any warning is displayed through pci_warn() which uses the pci_dev struct of the assocated bridge device. So wrap those warning so that a different trace mechanism can be used if it's an opencapi slot. Signed-off-by: Frederic Barrat --- drivers/pci/hotplug/pnv_php.c | 33 ++--- 1 file changed, 18 insertions(+), 15 deletions(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index 74b62a8e11e7..08ac8f0df06c 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -18,6 +18,9 @@ #define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" #define DRIVER_DESC"PowerPC PowerNV PCI Hotplug Driver" +#define SLOT_WARN(slot, x...) \ + (slot->pdev ? pci_warn(slot->pdev, x) : dev_warn(&slot->bus->dev, x)) + struct pnv_php_event { booladded; struct pnv_php_slot *php_slot; @@ -265,7 +268,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x1); if (ret) { - pci_warn(php_slot->pdev, "Error %d getting FDT blob\n", ret); + SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret); goto free_fdt1; } @@ -279,7 +282,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL); if (!dt) { ret = -EINVAL; - pci_warn(php_slot->pdev, "Cannot unflatten FDT\n"); + SLOT_WARN(php_slot, "Cannot unflatten FDT\n"); goto free_fdt; } @@ -289,7 +292,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn); if (ret) { pnv_php_reverse_nodes(php_slot->dn); - pci_warn(php_slot->pdev, "Error %d populating changeset\n", + SLOT_WARN(php_slot, "Error %d populating changeset\n", ret); goto free_dt; } @@ -297,7 +300,7 @@ static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) php_slot->dn->child = NULL; ret = of_changeset_apply(&php_slot->ocs); if (ret) { - pci_warn(php_slot->pdev, "Error %d applying changeset\n", ret); + SLOT_WARN(php_slot, "Error %d applying changeset\n", ret); goto destroy_changeset; } @@ -337,7 +340,7 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot, if (ret > 0) { if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle || be64_to_cpu(msg.params[2]) != state) { - pci_warn(php_slot->pdev, "Wrong msg (%lld, %lld, %lld)\n", + SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n", be64_to_cpu(msg.params[1]), be64_to_cpu(msg.params[2]), be64_to_cpu(msg.params[3])); @@ -359,7 +362,7 @@ int pnv_php_set_slot_power_state(struct hotplug_slot *slot, return ret; error: - pci_warn(php_slot->pdev, "Error %d powering %s\n", + SLOT_WARN(php_slot, "Error %d powering %s\n", ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); return ret; } @@ -378,7 +381,7 @@ static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) */ ret = pnv_pci_get_power_state(php_slot->id, &power_state); if (ret) { - pci_warn(php_slot->pdev, "Error %d getting power status\n", + SLOT_WARN(php_slot, "Error %d getting power status\n", ret); } else { *state = power_state; @@ -402,7 +405,7 @@ static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) *state = presence; ret = 0; } else { - pci_warn(php_slot->pdev, "Error %d getting presence\n", ret); + SLOT_WARN(php_slot, "Error %d getting presence\n", ret); } return ret; @@ -637,7 +640,7 @@ static int pnv_php_register_slot(struct pnv_php_slot *php_slot) ret = pci_hp_register(&php_slot->slot, php_slot->bus, php_slot->slot_no, php_slot->name); if (ret) { - pci_warn(php_slot->pdev, "Error %d registering slot\n", ret); + SLOT_WARN(php_slot, "Error %d registering slot\n", ret); return ret; } @@ -690,7 +693,7 @@ static int pnv_php_enable_msix(struct pnv_php_slot *php_slot) /* Enable MSIx */ ret = pci_enable_msix_exact(pdev, &entry, 1); if (ret) { - pci_warn(pdev, "Error %d enabling MSIx\n", ret); + SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret
[RFC 09/11] pci/hotplug/pnv-php: Relax check when disabling slot
The driver only allows to disable a slot in the POPULATED state. However, if an error occurs while enabling the slot, say because the link couldn't be trained, then the POPULATED state may not be reached, yet the power state of the slot is on. So allow to disable a slot in the REGISTERED state. Removing the devices will do nothing since it's not populated, and we'll set the power state of the slot back to off. Signed-off-by: Frederic Barrat --- drivers/pci/hotplug/pnv_php.c | 8 +++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/drivers/pci/hotplug/pnv_php.c b/drivers/pci/hotplug/pnv_php.c index f9c624334ef7..74b62a8e11e7 100644 --- a/drivers/pci/hotplug/pnv_php.c +++ b/drivers/pci/hotplug/pnv_php.c @@ -523,7 +523,13 @@ static int pnv_php_disable_slot(struct hotplug_slot *slot) struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); int ret; - if (php_slot->state != PNV_PHP_STATE_POPULATED) + /* +* Allow to disable a slot already in the registered state to +* cover cases where the slot couldn't be enabled and never +* reached the populated state +*/ + if (php_slot->state != PNV_PHP_STATE_POPULATED && + php_slot->state != PNV_PHP_STATE_REGISTERED) return 0; /* Remove all devices behind the slot */ -- 2.21.0
[RFC 05/11] powerpc/powernv/ioda: Find opencapi slot for a device node
Unlike real PCI slots, opencapi slots are directly associated to the (virtual) opencapi PHB, there's no intermediate bridge. So when looking for a slot ID, we must start the search from the device node itself and not its parent. Also, the slot ID is not attached to a specific bdfn, so let's build it from the PHB ID, like skiboot. Signed-off-by: Frederic Barrat --- arch/powerpc/include/asm/pnv-pci.h | 1 + arch/powerpc/platforms/powernv/pci.c | 10 +++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/include/asm/pnv-pci.h b/arch/powerpc/include/asm/pnv-pci.h index b5a85f1bb305..4b4dfa6bfdd3 100644 --- a/arch/powerpc/include/asm/pnv-pci.h +++ b/arch/powerpc/include/asm/pnv-pci.h @@ -15,6 +15,7 @@ #define PCI_SLOT_ID_PREFIX (1UL << 63) #define PCI_SLOT_ID(phb_id, bdfn) \ (PCI_SLOT_ID_PREFIX | ((uint64_t)(bdfn) << 16) | (phb_id)) +#define PCI_PHB_SLOT_ID(phb_id)(phb_id) extern int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id); extern int pnv_pci_get_device_tree(uint32_t phandle, void *buf, uint64_t len); diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c index ff1a33fee8e6..3e4e75a883e1 100644 --- a/arch/powerpc/platforms/powernv/pci.c +++ b/arch/powerpc/platforms/powernv/pci.c @@ -49,13 +49,14 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) return -ENXIO; bdfn = ((bdfn & 0x0000) >> 8); - while ((parent = of_get_parent(parent))) { + for (parent = np; parent; parent = of_get_parent(parent)) { if (!PCI_DN(parent)) { of_node_put(parent); break; } - if (!of_device_is_compatible(parent, "ibm,ioda2-phb")) { + if (!of_device_is_compatible(parent, "ibm,ioda2-phb") && + !of_device_is_compatible(parent, "ibm,ioda2-npu2-opencapi-phb")) { of_node_put(parent); continue; } @@ -66,7 +67,10 @@ int pnv_pci_get_slot_id(struct device_node *np, uint64_t *id) return -ENXIO; } - *id = PCI_SLOT_ID(phbid, bdfn); + if (of_device_is_compatible(parent, "ibm,ioda2-npu2-opencapi-phb")) + *id = PCI_PHB_SLOT_ID(phbid); + else + *id = PCI_SLOT_ID(phbid, bdfn); return 0; } -- 2.21.0
[RFC 02/11] powerpc/powernv/ioda: Protect PE list
Protect the PHB's list of PE. Probably not needed as long as it was populated during PHB creation, but it feels right and will become required once we can add/remove opencapi devices on hotplug. Signed-off-by: Frederic Barrat --- arch/powerpc/platforms/powernv/pci-ioda.c | 6 +- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 3082912e2600..2c063b05bb64 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1078,8 +1078,9 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) } /* Put PE to the list */ + mutex_lock(&phb->ioda.pe_list_mutex); list_add_tail(&pe->list, &phb->ioda.pe_list); - + mutex_unlock(&phb->ioda.pe_list_mutex); return pe; } @@ -3501,7 +3502,10 @@ static void pnv_ioda_release_pe(struct pnv_ioda_pe *pe) struct pnv_phb *phb = pe->phb; struct pnv_ioda_pe *slave, *tmp; + mutex_lock(&phb->ioda.pe_list_mutex); list_del(&pe->list); + mutex_unlock(&phb->ioda.pe_list_mutex); + switch (phb->type) { case PNV_PHB_IODA1: pnv_pci_ioda1_release_pe_dma(pe); -- 2.21.0
[RFC 03/11] powerpc/powernv/ioda: set up PE on opencapi device when enabling
The PE for an opencapi device was set as part of a late PHB fixup operation, when creating the PHB. To use the PCI hotplug framework, this is not going to work, as the PHB stays the same, it's only the devices underneath which are updated. For regular PCI devices, it is done as part of the reconfiguration of the bridge, but for opencapi PHBs, we don't have an intermediate bridge. So let's define the PE when the device is enabled. PEs are meaningless for opencapi, the NPU doesn't define them and opal is not doing anything with them. Signed-off-by: Frederic Barrat --- arch/powerpc/platforms/powernv/pci-ioda.c | 31 +-- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 2c063b05bb64..2cf06fb98978 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1258,8 +1258,6 @@ static void pnv_pci_ioda_setup_PEs(void) { struct pci_controller *hose; struct pnv_phb *phb; - struct pci_bus *bus; - struct pci_dev *pdev; struct pnv_ioda_pe *pe; list_for_each_entry(hose, &hose_list, list_node) { @@ -1271,11 +1269,6 @@ static void pnv_pci_ioda_setup_PEs(void) if (phb->model == PNV_PHB_MODEL_NPU2) WARN_ON_ONCE(pnv_npu2_init(hose)); } - if (phb->type == PNV_PHB_NPU_OCAPI) { - bus = hose->bus; - list_for_each_entry(pdev, &bus->devices, bus_list) - pnv_ioda_setup_dev_PE(pdev); - } } list_for_each_entry(hose, &hose_list, list_node) { phb = hose->private_data; @@ -3373,6 +3366,28 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev) return true; } +static bool pnv_ocapi_enable_device_hook(struct pci_dev *dev) +{ + struct pci_controller *hose = pci_bus_to_host(dev->bus); + struct pnv_phb *phb = hose->private_data; + struct pci_dn *pdn; + struct pnv_ioda_pe *pe; + + if (!phb->initialized) + return true; + + pdn = pci_get_pdn(dev); + if (!pdn) + return false; + + if (pdn->pe_number == IODA_INVALID_PE) { + pe = pnv_ioda_setup_dev_PE(dev); + if (!pe) + return false; + } + return true; +} + static long pnv_pci_ioda1_unset_window(struct iommu_table_group *table_group, int num) { @@ -3613,7 +3628,7 @@ static const struct pci_controller_ops pnv_npu_ioda_controller_ops = { }; static const struct pci_controller_ops pnv_npu_ocapi_ioda_controller_ops = { - .enable_device_hook = pnv_pci_enable_device_hook, + .enable_device_hook = pnv_ocapi_enable_device_hook, .window_alignment = pnv_pci_window_alignment, .reset_secondary_bus= pnv_pci_reset_secondary_bus, .shutdown = pnv_pci_ioda_shutdown, -- 2.21.0
[RFC 01/11] powerpc/powernv/ioda: Fix ref count for devices with their own PE
Taking a reference on the pci_dev structure was required with initial commit 184cd4a3b962 ("powerpc/powernv: PCI support for p7IOC under OPAL v2"), where we we storing the pci dev in the pci_dn structure. However, the pci_dev was later removed from the pci_dn structure, but the reference was kept. See 902bdc57451c ("powerpc/powernv/idoa: Remove unnecessary pcidev from pci_dn"). The pnv_ioda_pe structure life cycle is the same as the pci_dev structure, the PE is freed when the device is released. So we don't need a reference for the pci_dev stored in the PE, otherwise the pci_dev will never be released. Which is not really a surprise as the comment (removed here as no longer needed) was stating as much. Fixes: 902bdc57451c ("powerpc/powernv/idoa: Remove unnecessary pcidev from pci_dn") Signed-off-by: Frederic Barrat --- arch/powerpc/platforms/powernv/pci-ioda.c | 11 +-- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c index 10cc42b9e541..3082912e2600 100644 --- a/arch/powerpc/platforms/powernv/pci-ioda.c +++ b/arch/powerpc/platforms/powernv/pci-ioda.c @@ -1060,14 +1060,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) return NULL; } - /* NOTE: We get only one ref to the pci_dev for the pdn, not for the -* pointer in the PE data structure, both should be destroyed at the -* same time. However, this needs to be looked at more closely again -* once we actually start removing things (Hotplug, SR-IOV, ...) -* -* At some point we want to remove the PDN completely anyways -*/ - pci_dev_get(dev); pdn->pe_number = pe->pe_number; pe->flags = PNV_IODA_PE_DEV; pe->pdev = dev; @@ -1082,7 +1074,6 @@ static struct pnv_ioda_pe *pnv_ioda_setup_dev_PE(struct pci_dev *dev) pnv_ioda_free_pe(pe); pdn->pe_number = IODA_INVALID_PE; pe->pdev = NULL; - pci_dev_put(dev); return NULL; } @@ -1226,7 +1217,7 @@ static struct pnv_ioda_pe *pnv_ioda_setup_npu_PE(struct pci_dev *npu_pdev) */ dev_info(&npu_pdev->dev, "Associating to existing PE %x\n", pe_num); - pci_dev_get(npu_pdev); + pci_dev_get(npu_pdev); // still needed after 902bdc57451c2c64aa139bbe24067f70a186db0a ? npu_pdn = pci_get_pdn(npu_pdev); rid = npu_pdev->bus->number << 8 | npu_pdn->devfn; npu_pdn->pe_number = pe_num; -- 2.21.0
Re: [PATCH 1/3] powerpc/64: __ioremap_at clean up in the error case
Le 19/06/2019 à 06:04, Nicholas Piggin a écrit : Christophe Leroy's on June 11, 2019 4:28 pm: Le 10/06/2019 à 05:08, Nicholas Piggin a écrit : __ioremap_at error handling is wonky, it requires caller to clean up after it. Implement a helper that does the map and error cleanup and remove the requirement from the caller. Signed-off-by: Nicholas Piggin --- This series is a different approach to the problem, using the generic ioremap_page_range directly which reduces added code, and moves the radix specific code into radix files. Thanks to Christophe for pointing out various problems with the previous patch. arch/powerpc/mm/pgtable_64.c | 27 --- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index d2d976ff8a0e..6bd3660388aa 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -108,14 +108,30 @@ unsigned long ioremap_bot; unsigned long ioremap_bot = IOREMAP_BASE; #endif +static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) +{ + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); Missing a blank line + if (err) { I'd have done the following to reduce indentation depth if (!err) continue I'll consider it, line lengths were not too bad. + if (slab_is_available()) + unmap_kernel_range(ea, size); Shouldn't it be unmap_kernel_range(ea, i) ? I guess (i - PAGE_SIZE really), although the old code effectively did the full range. As a "clean up" it may be better to avoid subtle change in behaviour and do that in another patch? Not sure we have to do it in another patch. Previous code was doing full range because it was done at upper level so it didn't know the boundaries. You are creating a nice brand new function that have all necessary information, so why not make it right from the start ? Christophe Thanks, Nick
Re: [PATCH 2/3] powerpc/64s/radix: ioremap use ioremap_page_range
Le 19/06/2019 à 05:59, Nicholas Piggin a écrit : Christophe Leroy's on June 11, 2019 4:46 pm: Le 10/06/2019 à 05:08, Nicholas Piggin a écrit : [snip] diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c b/arch/powerpc/mm/book3s64/radix_pgtable.c index c9bcf428dd2b..db993bc1aef3 100644 --- a/arch/powerpc/mm/book3s64/radix_pgtable.c +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c @@ -11,6 +11,7 @@ #define pr_fmt(fmt) "radix-mmu: " fmt +#include #include #include #include @@ -1122,3 +1123,23 @@ void radix__ptep_modify_prot_commit(struct vm_area_struct *vma, set_pte_at(mm, addr, ptep, pte); } + +int radix__ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, + pgprot_t prot, int nid) +{ + if (likely(slab_is_available())) { + int err = ioremap_page_range(ea, ea + size, pa, prot); + if (err) + unmap_kernel_range(ea, size); + return err; + } else { + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + if (WARN_ON_ONCE(err)) /* Should clean up */ + return err; + } Same loop again. What about not doing a radix specific function and just putting something like below in the core ioremap_range() function ? if (likely(slab_is_available()) && radix_enabled()) { int err = ioremap_page_range(ea, ea + size, pa, prot); if (err) unmap_kernel_range(ea, size); return err; } Because I'm pretty sure will more and more use ioremap_page_range(). Well I agree the duplication is not so nice, but it's convenient to see what is going on for each MMU type. There is a significant amount of churn that needs to be done in this layer so I prefer to make it a bit simpler despite duplication. I would like to remove the early ioremap or make it into its own function. Re-implement map_kernel_page with ioremap_page_range, allow page tables that don't use slab to avoid the early check, unbolt the hptes mapped in early boot, etc. I just wanted to escape out the 64s and hash/radix implementations completely until that settles. I can understand the benefit in some situations but here I just can't. And code duplication should be avoided as much as possible as it makes code maintenance more difficult. Here you have: +static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) +{ + unsigned long i; + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + if (err) { + if (slab_is_available()) + unmap_kernel_range(ea, size); + else + WARN_ON_ONCE(1); /* Should clean up */ + return err; + } + } + + return 0; +} You now create a new one in another file, that is almost identical: +int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) +{ + unsigned long i; + + if (radix_enabled()) + return radix__ioremap_range(ea, pa, size, prot, nid); + + for (i = 0; i < size; i += PAGE_SIZE) { + int err = map_kernel_page(ea + i, pa + i, prot); + if (err) { + if (slab_is_available()) + unmap_kernel_range(ea, size); + else + WARN_ON_ONCE(1); /* Should clean up */ + return err; + } + } + + return 0; +} Then you have to make the original one __weak. Sorry I'm still having difficulties understanding what the benefit is. radix_enabled() is defined for every platforms so could just add the following on top of the existing ioremap_range() and voila. + if (radix_enabled()) + return radix__ioremap_range(ea, pa, size, prot, nid); And with that you wouldn't have the __weak stuff to handle. -static int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) +int __weak ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) Hum. Weak functions remain in unused in vmlinux unless CONFIG_LD_DEAD_CODE_DATA_ELIMINATION is selected. Also, they are some how dangerous because people might change them without seeing that it is overridden for some particular configuration. Well you shouldn't assume that when you see a weak function, but what's the preferred alternative? A config option? Yes you are right, nobody should assume that, but ... But I think if the fonctions were really different, the preferred alternative w
Re: [PATCH 1/4] mm: Move ioremap page table mapping function to mm/
Le 19/06/2019 à 05:43, Nicholas Piggin a écrit : Christophe Leroy's on June 11, 2019 3:24 pm: Le 10/06/2019 à 06:38, Nicholas Piggin a écrit : [snip] diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 51e131245379..812bea5866d6 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -147,6 +147,9 @@ extern struct vm_struct *find_vm_area(const void *addr); extern int map_vm_area(struct vm_struct *area, pgprot_t prot, struct page **pages); #ifdef CONFIG_MMU +extern int vmap_range(unsigned long addr, + unsigned long end, phys_addr_t phys_addr, pgprot_t prot, + unsigned int max_page_shift); Drop extern keyword here. I don't know if I was going crazy but at one point I was getting duplicate symbol errors that were fixed by adding extern somewhere. probably not on a function name ... Maybe sleep depravation. However... As checkpatch tells you, 'CHECK:AVOID_EXTERNS: extern prototypes should be avoided in .h files' I prefer to follow existing style in surrounding code at the expense of some checkpatch warnings. If somebody later wants to "fix" it that's fine. I don't think that's fine to 'fix' later things that could be done right from the begining. 'Cosmetic only' fixes never happen because they are a nightmare for backports, and a shame for 'git blame'. In some patches, you add cleanups to make the code look nicer, and here you have the opportunity to make the code nice from the begining and you prefer repeating the errors done in the past ? You're surprising me. Christophe Thanks, Nick
Re: [PATCH 0/1] PPC32: fix ptrace() access to FPU registers
On Wed, 2019-06-19 at 10:36 +1000, Daniel Axtens wrote: > Andreas Schwab < > sch...@linux-m68k.org > > writes: > > > On Jun 18 2019, Radu Rendec < > > radu.ren...@gmail.com > > > wrote: > > > > > Since you already have a working setup, it would be nice if you could > > > add a printk to arch_ptrace() to print the address and confirm what I > > > believe happens (by reading the gdb source code). > > > > A ppc32 ptrace syscall goes through compat_arch_ptrace. Right. I completely overlooked that part. > Ah right, and that (in ptrace32.c) contains code that will work: > > > /* >* the user space code considers the floating point >* to be an array of unsigned int (32 bits) - the >* index passed in is based on this assumption. >*/ > tmp = ((unsigned int *)child->thread.fp_state.fpr) > [FPRINDEX(index)]; > > FPRINDEX is defined above to deal with the various manipulations you > need to do. Correct. Basically it does the same that I did in my patch: it divides the index again by 2 (it's already divided by 4 in compat_arch_ptrace() so it ends up divided by 8), then takes the least significant bit and adds it to the index. I take bit 2 of the original address, which is the same thing (because in FPRHALF() the address is already divided by 4). So we have this in ptrace32.c: #define FPRNUMBER(i) (((i) - PT_FPR0) >> 1) #define FPRHALF(i) (((i) - PT_FPR0) & 1) #define FPRINDEX(i) TS_FPRWIDTH * FPRNUMBER(i) * 2 + FPRHALF(i) index = (unsigned long) addr >> 2; (unsigned int *)child->thread.fp_state.fpr)[FPRINDEX(index)] And we have this in my patch: fpidx = (addr - PT_FPR0 * sizeof(long)) / 8; (void *)&child->thread.TS_FPR(fpidx) + (addr & 4) > Radu: I think we want to copy that working code back into ptrace.c. I'm not sure that would work. There's a subtle difference: the code in ptrace32.c is always compiled on a 64-bit kernel and the user space calling it is always 32-bit; on the other hand, the code in ptrace.c can be compiled on either a 64-bit kernel or a 32-bit kernel and the user space calling it always has the same "bitness" as the kernel. One difference is the size of the CPU registers. On 64-bit they are 8 byte long and user space knows that and generates 8-byte aligned addresses. So you have to divide the address by 8 to calculate the CPU register index correctly, which compat_arch_ptrace() currently doesn't. Another difference is that on 64-bit `long` is 8 bytes, so user space can read a whole FPU register in a single ptrace call. Now that we are all aware of compat_arch_ptrace() (which handles the special case of a 32-bit process running on a 64-bit kernel) I would say the patch is correct and does the right thing for both 32-bit and 64-bit kernels and processes. > The challenge will be unpicking the awful mess of ifdefs in ptrace.c > and making it somewhat more comprehensible. I'm not sure what ifdefs you're thinking about. The only that are used inside arch_ptrace() are PT_FPR0, PT_FPSCR and TS_FPR, which seem to be correct. But perhaps it would be useful to change my patch and add a comment just before arch_ptrace() that explains how the math is done and that the code must work on both 32-bit and 64-bit, the user space address assumptions, etc. By the way, I'm not sure the code in compat_arch_ptrace() handles PT_FPSCR correctly. It might (just because fpscr is right next to fpr[] in memory - and that's a hack), but I can't figure out if it accesses the right half. Radu
Re: [PATCH 1/2] KVM: PPC: Book3S HV: Fix r3 corruption in h_set_dabr()
On Mon, 2019-06-17 at 07:16:18 UTC, Suraj Jitindar Singh wrote: > From: Michael Neuling > > Commit c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 > option") screwed up some assembler and corrupted a pointer in > r3. This resulted in crashes like the below: > > [ 44.374746] BUG: Kernel NULL pointer dereference at 0x13bf > [ 44.374848] Faulting instruction address: 0xc010b044 > [ 44.374906] Oops: Kernel access of bad area, sig: 11 [#1] > [ 44.374951] LE PAGE_SIZE=64K MMU=Radix MMU=Hash SMP NR_CPUS=2048 NUMA > pSeries > [ 44.375018] Modules linked in: vhost_net vhost tap xt_CHECKSUM > iptable_mangle xt_MASQUERADE iptable_nat nf_nat xt_conntrack nf_conntrack > nf_defrag_ipv6 libcrc32c nf_defrag_ipv4 ipt_REJECT nf_reject_ipv4 xt_tcpudp > bridge stp llc ebtable_filter ebtables ip6table_filter ip6_tables > iptable_filter bpfilter vmx_crypto crct10dif_vpmsum crc32c_vpmsum kvm_hv kvm > sch_fq_codel ip_tables x_tables autofs4 virtio_net net_failover virtio_scsi > failover > [ 44.375401] CPU: 8 PID: 1771 Comm: qemu-system-ppc Kdump: loaded Not > tainted 5.2.0-rc4+ #3 > [ 44.375500] NIP: c010b044 LR: c008089dacf4 CTR: > c010aff4 > [ 44.375604] REGS: c0179b397710 TRAP: 0300 Not tainted (5.2.0-rc4+) > [ 44.375691] MSR: 8280b033 > CR: 42244842 XER: > [ 44.375815] CFAR: c010aff8 DAR: 13bf DSISR: 4200 > IRQMASK: 0 > [ 44.375815] GPR00: c008089dd6bc c0179b3979a0 c00808a04300 > > [ 44.375815] GPR04: 0003 2444b05d > c017f11c45d0 > [ 44.375815] GPR08: 07803e018dfe 0028 0001 > 0075 > [ 44.375815] GPR12: c010aff4 c7ff6300 > > [ 44.375815] GPR16: c017f11d > c017f11ca7a8 > [ 44.375815] GPR20: c017f11c42ec > 000a > [ 44.375815] GPR24: fffc c017f11c > c1a77ed8 > [ 44.375815] GPR28: c0179af7 fffc c008089ff170 > c0179ae88540 > [ 44.376673] NIP [c010b044] kvmppc_h_set_dabr+0x50/0x68 > [ 44.376754] LR [c008089dacf4] kvmppc_pseries_do_hcall+0xa3c/0xeb0 > [kvm_hv] > [ 44.376849] Call Trace: > [ 44.376886] [c0179b3979a0] [c017f11c] 0xc017f11c > (unreliable) > [ 44.376982] [c0179b397a10] [c008089dd6bc] > kvmppc_vcpu_run_hv+0x694/0xec0 [kvm_hv] > [ 44.377084] [c0179b397ae0] [c008093f8bcc] > kvmppc_vcpu_run+0x34/0x48 [kvm] > [ 44.377185] [c0179b397b00] [c008093f522c] > kvm_arch_vcpu_ioctl_run+0x2f4/0x400 [kvm] > [ 44.377286] [c0179b397b90] [c008093e3618] > kvm_vcpu_ioctl+0x460/0x850 [kvm] > [ 44.377384] [c0179b397d00] [c04ba6c4] do_vfs_ioctl+0xe4/0xb40 > [ 44.377464] [c0179b397db0] [c04bb1e4] ksys_ioctl+0xc4/0x110 > [ 44.377547] [c0179b397e00] [c04bb258] sys_ioctl+0x28/0x80 > [ 44.377628] [c0179b397e20] [c000b888] system_call+0x5c/0x70 > [ 44.377712] Instruction dump: > [ 44.377765] 4082fff4 4c00012c 3860 4e800020 e96280c0 896b > 2c2b 3860 > [ 44.377862] 4d820020 50852e74 508516f6 78840724 f8a313c8 > 7c942ba6 7cbc2ba6 > > Fix the bug by only changing r3 when we are returning immediately. > > Fixes: c1fe190c0672 ("powerpc: Add force enable of DAWR on P9 option") > Signed-off-by: Michael Neuling > Reported-by: Cédric Le Goater Series applied to powerpc fixes, thanks. https://git.kernel.org/powerpc/c/fabb2efcf0846e28b4910fc20bdc203d3d0170af cheers
Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac
Christoph Hellwig writes: > Any chance this could get picked up to fix the regression? Was hoping Ben would Ack it. He's still powermac maintainer :) I guess he OK'ed it in the other thread, will add it to my queue. cheers > On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote: >> With the strict dma mask checking introduced with the switch to >> the generic DMA direct code common wifi chips on 32-bit powerbooks >> stopped working. Add a 30-bit ZONE_DMA to the 32-bit pmac builds >> to allow them to reliably allocate dma coherent memory. >> >> Fixes: 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported") >> Reported-by: Aaro Koskinen >> Signed-off-by: Christoph Hellwig >> --- >> arch/powerpc/include/asm/page.h | 7 +++ >> arch/powerpc/mm/mem.c | 3 ++- >> arch/powerpc/platforms/powermac/Kconfig | 1 + >> 3 files changed, 10 insertions(+), 1 deletion(-) >> >> diff --git a/arch/powerpc/include/asm/page.h >> b/arch/powerpc/include/asm/page.h >> index b8286a2013b4..0d52f57fca04 100644 >> --- a/arch/powerpc/include/asm/page.h >> +++ b/arch/powerpc/include/asm/page.h >> @@ -319,6 +319,13 @@ struct vm_area_struct; >> #endif /* __ASSEMBLY__ */ >> #include >> >> +/* >> + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. >> + */ >> +#ifdef CONFIG_PPC32 >> +#define ARCH_ZONE_DMA_BITS 30 >> +#else >> #define ARCH_ZONE_DMA_BITS 31 >> +#endif >> >> #endif /* _ASM_POWERPC_PAGE_H */ >> diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c >> index cba29131bccc..2540d3b2588c 100644 >> --- a/arch/powerpc/mm/mem.c >> +++ b/arch/powerpc/mm/mem.c >> @@ -248,7 +248,8 @@ void __init paging_init(void) >> (long int)((top_of_ram - total_ram) >> 20)); >> >> #ifdef CONFIG_ZONE_DMA >> -max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL >> PAGE_SHIFT); >> +max_zone_pfns[ZONE_DMA] = min(max_low_pfn, >> +((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); >> #endif >> max_zone_pfns[ZONE_NORMAL] = max_low_pfn; >> #ifdef CONFIG_HIGHMEM >> diff --git a/arch/powerpc/platforms/powermac/Kconfig >> b/arch/powerpc/platforms/powermac/Kconfig >> index f834a19ed772..c02d8c503b29 100644 >> --- a/arch/powerpc/platforms/powermac/Kconfig >> +++ b/arch/powerpc/platforms/powermac/Kconfig >> @@ -7,6 +7,7 @@ config PPC_PMAC >> select PPC_INDIRECT_PCI if PPC32 >> select PPC_MPC106 if PPC32 >> select PPC_NATIVE >> +select ZONE_DMA if PPC32 >> default y >> >> config PPC_PMAC64 >> -- >> 2.20.1 > ---end quoted text---
Re: [PATCH] powerpc: enable a 30-bit ZONE_DMA for 32-bit pmac
Any chance this could get picked up to fix the regression? On Thu, Jun 13, 2019 at 10:24:46AM +0200, Christoph Hellwig wrote: > With the strict dma mask checking introduced with the switch to > the generic DMA direct code common wifi chips on 32-bit powerbooks > stopped working. Add a 30-bit ZONE_DMA to the 32-bit pmac builds > to allow them to reliably allocate dma coherent memory. > > Fixes: 65a21b71f948 ("powerpc/dma: remove dma_nommu_dma_supported") > Reported-by: Aaro Koskinen > Signed-off-by: Christoph Hellwig > --- > arch/powerpc/include/asm/page.h | 7 +++ > arch/powerpc/mm/mem.c | 3 ++- > arch/powerpc/platforms/powermac/Kconfig | 1 + > 3 files changed, 10 insertions(+), 1 deletion(-) > > diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h > index b8286a2013b4..0d52f57fca04 100644 > --- a/arch/powerpc/include/asm/page.h > +++ b/arch/powerpc/include/asm/page.h > @@ -319,6 +319,13 @@ struct vm_area_struct; > #endif /* __ASSEMBLY__ */ > #include > > +/* > + * Allow 30-bit DMA for very limited Broadcom wifi chips on many powerbooks. > + */ > +#ifdef CONFIG_PPC32 > +#define ARCH_ZONE_DMA_BITS 30 > +#else > #define ARCH_ZONE_DMA_BITS 31 > +#endif > > #endif /* _ASM_POWERPC_PAGE_H */ > diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c > index cba29131bccc..2540d3b2588c 100644 > --- a/arch/powerpc/mm/mem.c > +++ b/arch/powerpc/mm/mem.c > @@ -248,7 +248,8 @@ void __init paging_init(void) > (long int)((top_of_ram - total_ram) >> 20)); > > #ifdef CONFIG_ZONE_DMA > - max_zone_pfns[ZONE_DMA] = min(max_low_pfn, 0x7fffUL >> PAGE_SHIFT); > + max_zone_pfns[ZONE_DMA] = min(max_low_pfn, > + ((1UL << ARCH_ZONE_DMA_BITS) - 1) >> PAGE_SHIFT); > #endif > max_zone_pfns[ZONE_NORMAL] = max_low_pfn; > #ifdef CONFIG_HIGHMEM > diff --git a/arch/powerpc/platforms/powermac/Kconfig > b/arch/powerpc/platforms/powermac/Kconfig > index f834a19ed772..c02d8c503b29 100644 > --- a/arch/powerpc/platforms/powermac/Kconfig > +++ b/arch/powerpc/platforms/powermac/Kconfig > @@ -7,6 +7,7 @@ config PPC_PMAC > select PPC_INDIRECT_PCI if PPC32 > select PPC_MPC106 if PPC32 > select PPC_NATIVE > + select ZONE_DMA if PPC32 > default y > > config PPC_PMAC64 > -- > 2.20.1 ---end quoted text---
Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel
Naveen N. Rao's on June 19, 2019 7:53 pm: > Nicholas Piggin wrote: >> Michael Ellerman's on June 19, 2019 3:14 pm: >>> Hi Naveen, >>> >>> Sorry I meant to reply to this earlier .. :/ > > No problem. Thanks for the questions. > >>> >>> "Naveen N. Rao" writes: With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to enable function tracing and profiling. So far, with dynamic ftrace, we used to only patch out the branch to _mcount(). However, mflr is executed by the branch unit that can only execute one per cycle on POWER9 and shared with branches, so it would be nice to avoid it where possible. We cannot simply nop out the mflr either. When enabling function tracing, there can be a race if tracing is enabled when some thread was interrupted after executing a nop'ed out mflr. In this case, the thread would execute the now-patched-in branch to _mcount() without having executed the preceding mflr. To solve this, we now enable function tracing in 2 steps: patch in the mflr instruction, use synchronize_rcu_tasks() to ensure all existing threads make progress, and then patch in the branch to _mcount(). We override ftrace_replace_code() with a powerpc64 variant for this purpose. >>> >>> According to the ISA we're not allowed to patch mflr at runtime. See the >>> section on "CMODX". >> >> According to "quasi patch class" engineering note, we can patch >> anything with a preferred nop. But that's written as an optional >> facility, which we don't have a feature to test for. >> > > Hmm... I wonder what the implications are. We've been patching in a > 'trap' for kprobes for a long time now, along with having to patch back > the original instruction (which can be anything), when the probe is > removed. Will have to check what implementations support "quasi patch class" instructions. IIRC recent POWER processors are okay. May have to add a feature test though. >>> >>> I'm also not convinced the ordering between the two patches is >>> guaranteed by the ISA, given that there's possibly no isync on the other >>> CPU. >> >> Will they go through a context synchronizing event? >> >> synchronize_rcu_tasks() should ensure a thread is scheduled away, but >> I'm not actually sure it guarantees CSI if it's kernel->kernel. Could >> do a smp_call_function to do the isync on each CPU to be sure. > > Good point. Per > Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU: > "The solution, in the form of Tasks RCU, is to have implicit read-side > critical sections that are delimited by voluntary context switches, that > is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In > addition, transitions to and from userspace execution also delimit > tasks-RCU read-side critical sections." > > I suppose transitions to/from userspace, as well as calls to schedule() > result in context synchronizing instruction being executed. But, if some > tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't > have a CSI executed. > > Also: > "In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these > APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), > respectively." > > In this scenario as well, I think we won't have a CSI executed in case > of cond_resched(). > > Should we enhance patch_instruction() to handle that? Well, not sure. Do we have many post-boot callers of it? Should they take care of their own synchronization requirements? Thanks, Nick
Re: [PATCH] ocxl: Allow contexts to be attached with a NULL mm
Le 18/06/2019 à 03:50, Andrew Donnellan a écrit : On 17/6/19 2:41 pm, Alastair D'Silva wrote: From: Alastair D'Silva If an OpenCAPI context is to be used directly by a kernel driver, there may not be a suitable mm to use. The patch makes the mm parameter to ocxl_context_attach optional. Signed-off-by: Alastair D'Silva The one issue I can see here is that using mm == NULL bypasses our method of enabling/disabling global TLBIs in mm_context_add_copro(). Discussing this privately with Alastair and Fred - this should be fine, but perhaps we should document that. So indeed we should be fine. I confirmed with Nick that kernel space invalidations are already global today. Nick mentioned that we should still be fine tomorrow, but in the distant future, we could imagine local usage of some part of the kernel space. It will require some work, but it would be best to add a comment in one of the kernel invalidation function (for example radix__flush_tlb_kernel_range()) that if a kernel invalidation ever becomes local, then clients of the nest MMU may need some work. A few more comments below. --- drivers/misc/ocxl/context.c | 9 ++--- drivers/misc/ocxl/link.c | 12 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/drivers/misc/ocxl/context.c b/drivers/misc/ocxl/context.c index bab9c9364184..994563a078eb 100644 --- a/drivers/misc/ocxl/context.c +++ b/drivers/misc/ocxl/context.c @@ -69,6 +69,7 @@ static void xsl_fault_error(void *data, u64 addr, u64 dsisr) int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) { int rc; + unsigned long pidr = 0; // Locks both status & tidr mutex_lock(&ctx->status_mutex); @@ -77,9 +78,11 @@ int ocxl_context_attach(struct ocxl_context *ctx, u64 amr, struct mm_struct *mm) goto out; } - rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, - mm->context.id, ctx->tidr, amr, mm, - xsl_fault_error, ctx); + if (mm) + pidr = mm->context.id; + + rc = ocxl_link_add_pe(ctx->afu->fn->link, ctx->pasid, pidr, ctx->tidr, + amr, mm, xsl_fault_error, ctx); if (rc) goto out; diff --git a/drivers/misc/ocxl/link.c b/drivers/misc/ocxl/link.c index cce5b0d64505..43542f124807 100644 --- a/drivers/misc/ocxl/link.c +++ b/drivers/misc/ocxl/link.c @@ -523,7 +523,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, pe->amr = cpu_to_be64(amr); pe->software_state = cpu_to_be32(SPA_PE_VALID); - mm_context_add_copro(mm); + if (mm) + mm_context_add_copro(mm); Same as above, we should add a comment here in the driver code that a kernel context is ok because invalidations are global. We also need a new check in xsl_fault_handler(). A valid kernel address shouldn't fault, but it's still possible for the FPGA to try accessing a bogus kernel address. In which case, xsl_fault_handler() would be entered, with a valid fault context. We'll find pe_data in the tree based on the valid pe_handle, but pe_data->mm will be NULL. In that, we can return early, acknowledging the interrupt with ADDRESS_ERROR value (like we do if pe_data is not found in the tree). Fred /* * Barrier is to make sure PE is visible in the SPA before it * is used by the device. It also helps with the global TLBI @@ -546,7 +547,8 @@ int ocxl_link_add_pe(void *link_handle, int pasid, u32 pidr, u32 tidr, * have a reference on mm_users. Incrementing mm_count solves * the problem. */ - mmgrab(mm); + if (mm) + mmgrab(mm); trace_ocxl_context_add(current->pid, spa->spa_mem, pasid, pidr, tidr); unlock: mutex_unlock(&spa->spa_lock); @@ -652,8 +654,10 @@ int ocxl_link_remove_pe(void *link_handle, int pasid) if (!pe_data) { WARN(1, "Couldn't find pe data when removing PE\n"); } else { - mm_context_remove_copro(pe_data->mm); - mmdrop(pe_data->mm); + if (pe_data->mm) { + mm_context_remove_copro(pe_data->mm); + mmdrop(pe_data->mm); + } kfree_rcu(pe_data, rcu); } unlock:
Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix
Bharata B Rao's on June 19, 2019 5:45 pm: > We hit the following BUG_ON when memory hotplugged before reboot > is unplugged after reboot: > > kernel BUG at arch/powerpc/mm/pgtable-frag.c:113! > > remove_pagetable+0x594/0x6a0 > (unreliable) > remove_pagetable+0x94/0x6a0 > vmemmap_free+0x394/0x410 > sparse_remove_one_section+0x26c/0x2e8 > __remove_pages+0x428/0x540 > arch_remove_memory+0xd0/0x170 > __remove_memory+0xd4/0x1a0 > dlpar_remove_lmb+0xbc/0x110 > dlpar_memory+0xa80/0xd20 > handle_dlpar_errorlog+0xa8/0x160 > pseries_hp_work_fn+0x2c/0x60 > process_one_work+0x46c/0x860 > worker_thread+0x364/0x5e0 > kthread+0x1b0/0x1c0 > ret_from_kernel_thread+0x5c/0x68 > > This occurs because, during reboot-after-hotplug, the hotplugged > memory range gets initialized as regular memory and page > tables are setup using memblock allocator. This means that we > wouldn't have initialized the PMD or PTE fragment count for > those PMD or PTE pages. > > Fixing this includes 3 aspects: > > - Walk the init_mm page tables from mem_init() and initialize > the PMD and PTE fragment counts appropriately. > - When we do early allocation of PMD (and PGD as well) pages, > allocate in page size PAGE_SIZE granularity so that we are > sure that the complete page is available for us to set the > fragment count which is part of struct page. > - When PMD or PTE page is freed, check if it comes from memblock > allocator and free it appropriately. > > Reported-by: Srikanth Aithal > Signed-off-by: Bharata B Rao > --- > arch/powerpc/include/asm/book3s/64/radix.h | 1 + > arch/powerpc/include/asm/sparsemem.h | 1 + > arch/powerpc/mm/book3s64/pgtable.c | 12 +++- > arch/powerpc/mm/book3s64/radix_pgtable.c | 67 +- > arch/powerpc/mm/mem.c | 5 ++ > arch/powerpc/mm/pgtable-frag.c | 5 +- > 6 files changed, 87 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h > b/arch/powerpc/include/asm/book3s/64/radix.h > index 574eca33f893..4320f2790e8d 100644 > --- a/arch/powerpc/include/asm/book3s/64/radix.h > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void) > #ifdef CONFIG_MEMORY_HOTPLUG > int radix__create_section_mapping(unsigned long start, unsigned long end, > int nid); > int radix__remove_section_mapping(unsigned long start, unsigned long end); > +void radix__fixup_pgtable_fragments(void); > #endif /* CONFIG_MEMORY_HOTPLUG */ > #endif /* __ASSEMBLY__ */ > #endif > diff --git a/arch/powerpc/include/asm/sparsemem.h > b/arch/powerpc/include/asm/sparsemem.h > index 3192d454a733..e662f9232d35 100644 > --- a/arch/powerpc/include/asm/sparsemem.h > +++ b/arch/powerpc/include/asm/sparsemem.h > @@ -15,6 +15,7 @@ > #ifdef CONFIG_MEMORY_HOTPLUG > extern int create_section_mapping(unsigned long start, unsigned long end, > int nid); > extern int remove_section_mapping(unsigned long start, unsigned long end); > +void fixup_pgtable_fragments(void); > > #ifdef CONFIG_PPC_BOOK3S_64 > extern int resize_hpt_for_hotplug(unsigned long new_mem_size); > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > b/arch/powerpc/mm/book3s64/pgtable.c > index 01bc9663360d..7efe9cc16b39 100644 > --- a/arch/powerpc/mm/book3s64/pgtable.c > +++ b/arch/powerpc/mm/book3s64/pgtable.c > @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long > start, unsigned long end) > > return hash__remove_section_mapping(start, end); > } > + > +void fixup_pgtable_fragments(void) > +{ > + if (radix_enabled()) > + radix__fixup_pgtable_fragments(); > +} > + > #endif /* CONFIG_MEMORY_HOTPLUG */ > > void __init mmu_partition_table_init(void) > @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd) > BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); > if (atomic_dec_and_test(&page->pt_frag_refcount)) { > pgtable_pmd_page_dtor(page); > - __free_page(page); > + if (PageReserved(page)) > + free_reserved_page(page); Hmm. Rather than adding this special case here, I wonder if you can just go along in your fixup walk and convert all these pages to non-reserved pages? ClearPageReserved ; init_page_count ; adjust_managed_page_count ; should do the trick, right? > + else > + __free_page(page); Thanks, Nick
Re: [PATCH v2 1/1] cpuidle-powernv : forced wakeup for stop states
Abhishek's on June 19, 2019 7:08 pm: > Hi Nick, > > Thanks for the review. Some replies below. > > On 06/19/2019 09:53 AM, Nicholas Piggin wrote: >> Abhishek Goel's on June 17, 2019 7:56 pm: >>> Currently, the cpuidle governors determine what idle state a idling CPU >>> should enter into based on heuristics that depend on the idle history on >>> that CPU. Given that no predictive heuristic is perfect, there are cases >>> where the governor predicts a shallow idle state, hoping that the CPU will >>> be busy soon. However, if no new workload is scheduled on that CPU in the >>> near future, the CPU may end up in the shallow state. >>> >>> This is problematic, when the predicted state in the aforementioned >>> scenario is a shallow stop state on a tickless system. As we might get >>> stuck into shallow states for hours, in absence of ticks or interrupts. >>> >>> To address this, We forcefully wakeup the cpu by setting the >>> decrementer. The decrementer is set to a value that corresponds with the >>> residency of the next available state. Thus firing up a timer that will >>> forcefully wakeup the cpu. Few such iterations will essentially train the >>> governor to select a deeper state for that cpu, as the timer here >>> corresponds to the next available cpuidle state residency. Thus, cpu will >>> eventually end up in the deepest possible state. >>> >>> Signed-off-by: Abhishek Goel >>> --- >>> >>> Auto-promotion >>> v1 : started as auto promotion logic for cpuidle states in generic >>> driver >>> v2 : Removed timeout_needed and rebased the code to upstream kernel >>> Forced-wakeup >>> v1 : New patch with name of forced wakeup started >>> v2 : Extending the forced wakeup logic for all states. Setting the >>> decrementer instead of queuing up a hrtimer to implement the logic. >>> >>> drivers/cpuidle/cpuidle-powernv.c | 38 +++ >>> 1 file changed, 38 insertions(+) >>> >>> diff --git a/drivers/cpuidle/cpuidle-powernv.c >>> b/drivers/cpuidle/cpuidle-powernv.c >>> index 84b1ebe212b3..bc9ca18ae7e3 100644 >>> --- a/drivers/cpuidle/cpuidle-powernv.c >>> +++ b/drivers/cpuidle/cpuidle-powernv.c >>> @@ -46,6 +46,26 @@ static struct stop_psscr_table >>> stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly >>> static u64 default_snooze_timeout __read_mostly; >>> static bool snooze_timeout_en __read_mostly; >>> >>> +static u64 forced_wakeup_timeout(struct cpuidle_device *dev, >>> +struct cpuidle_driver *drv, >>> +int index) >>> +{ >>> + int i; >>> + >>> + for (i = index + 1; i < drv->state_count; i++) { >>> + struct cpuidle_state *s = &drv->states[i]; >>> + struct cpuidle_state_usage *su = &dev->states_usage[i]; >>> + >>> + if (s->disabled || su->disable) >>> + continue; >>> + >>> + return (s->target_residency + 2 * s->exit_latency) * >>> + tb_ticks_per_usec; >>> + } >>> + >>> + return 0; >>> +} >> It would be nice to not have this kind of loop iteration in the >> idle fast path. Can we add a flag or something to the idle state? > Currently, we do not have any callback notification or some feedback that > notifies the driver everytime some state is enabled/disabled. So we have > to parse everytime to get the next enabled state. Ahh, that's why you're doing that. > Are you suggesting to > add something like next_enabled_state in cpuidle state structure itself > which will be updated when a state is enabled or disabled? Hmm, I guess it normally should not iterate over more than one state unless some idle states are disabled. What would have been nice is each state just have its own timeout field with ticks already calculated, if that could be updated when a state is enabled or disabled. How hard is that to add to the cpuidle core? >>> + >>> static u64 get_snooze_timeout(struct cpuidle_device *dev, >>> struct cpuidle_driver *drv, >>> int index) >>> @@ -144,8 +164,26 @@ static int stop_loop(struct cpuidle_device *dev, >>> struct cpuidle_driver *drv, >>> int index) >>> { >>> + u64 dec_expiry_tb, dec, timeout_tb, forced_wakeup; >>> + >>> + dec = mfspr(SPRN_DEC); >>> + timeout_tb = forced_wakeup_timeout(dev, drv, index); >>> + forced_wakeup = 0; >>> + >>> + if (timeout_tb && timeout_tb < dec) { >>> + forced_wakeup = 1; >>> + dec_expiry_tb = mftb() + dec; >>> + } >> The compiler probably can't optimise away the SPR manipulations so try >> to avoid them if possible. > Are you suggesting something like set_dec_before_idle?(in line with > what you have suggested to do after idle, reset_dec_after_idle) I should have been clear, I meant don't mfspr(SPRN_DEC) until you have tested timeout_tb. >>> + >>> + if (forced_wakeup) >>> + mtspr(SPRN_DEC, timeout_tb); >> This should just be put in the above 'if'. > Fair point.
Re: [PATCH 06/28] powerpc/64s/exception: remove the "extra" macro parameter
Nicholas Piggin writes: > Nicholas Piggin's on June 12, 2019 12:30 am: >> @@ -265,7 +275,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) >> EXC_REAL_END(machine_check, 0x200, 0x100) >> EXC_VIRT_NONE(0x4200, 0x100) >> TRAMP_REAL_BEGIN(machine_check_common_early) >> -EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) >> +EXCEPTION_PROLOG_1 EXC_HV, PACA_EXMC, 0, 0x200 >> /* >> * Register contents: >> * R13 = PACA > > This is a little bug here, machine check is an EXC_STD exception. It > does not show up as generated code problem because EXCEPTION_PROLOG_1 > does not actually do anything with this parameter if KVM is false, > which it is here. > > Still, it's wrong. I may just resend the series, because it caused a > few conflicts in subsequent patches, and I have a few more to add to > the end. OK. I'll pull the series out for now. cheers
Re: [PATCH 4/7] powerpc/ftrace: Additionally nop out the preceding mflr with -mprofile-kernel
Nicholas Piggin wrote: Michael Ellerman's on June 19, 2019 3:14 pm: Hi Naveen, Sorry I meant to reply to this earlier .. :/ No problem. Thanks for the questions. "Naveen N. Rao" writes: With -mprofile-kernel, gcc emits 'mflr r0', followed by 'bl _mcount' to enable function tracing and profiling. So far, with dynamic ftrace, we used to only patch out the branch to _mcount(). However, mflr is executed by the branch unit that can only execute one per cycle on POWER9 and shared with branches, so it would be nice to avoid it where possible. We cannot simply nop out the mflr either. When enabling function tracing, there can be a race if tracing is enabled when some thread was interrupted after executing a nop'ed out mflr. In this case, the thread would execute the now-patched-in branch to _mcount() without having executed the preceding mflr. To solve this, we now enable function tracing in 2 steps: patch in the mflr instruction, use synchronize_rcu_tasks() to ensure all existing threads make progress, and then patch in the branch to _mcount(). We override ftrace_replace_code() with a powerpc64 variant for this purpose. According to the ISA we're not allowed to patch mflr at runtime. See the section on "CMODX". According to "quasi patch class" engineering note, we can patch anything with a preferred nop. But that's written as an optional facility, which we don't have a feature to test for. Hmm... I wonder what the implications are. We've been patching in a 'trap' for kprobes for a long time now, along with having to patch back the original instruction (which can be anything), when the probe is removed. I'm also not convinced the ordering between the two patches is guaranteed by the ISA, given that there's possibly no isync on the other CPU. Will they go through a context synchronizing event? synchronize_rcu_tasks() should ensure a thread is scheduled away, but I'm not actually sure it guarantees CSI if it's kernel->kernel. Could do a smp_call_function to do the isync on each CPU to be sure. Good point. Per Documentation/RCU/Design/Requirements/Requirements.html#Tasks RCU: "The solution, in the form of Tasks RCU, is to have implicit read-side critical sections that are delimited by voluntary context switches, that is, calls to schedule(), cond_resched(), and synchronize_rcu_tasks(). In addition, transitions to and from userspace execution also delimit tasks-RCU read-side critical sections." I suppose transitions to/from userspace, as well as calls to schedule() result in context synchronizing instruction being executed. But, if some tasks call cond_resched() and synchronize_rcu_tasks(), we probably won't have a CSI executed. Also: "In CONFIG_PREEMPT=n kernels, trampolines cannot be preempted, so these APIs map to call_rcu(), synchronize_rcu(), and rcu_barrier(), respectively." In this scenario as well, I think we won't have a CSI executed in case of cond_resched(). Should we enhance patch_instruction() to handle that? - Naveen
RE: [PATCH v4 1/3] PM: wakeup: Add routine to help fetch wakeup source object.
Hi Rafael, On Wednesday, June 19, 2019 06:45, Rafael J. Wysocki wrote: > > On Monday, May 20, 2019 11:52:36 AM CEST Ran Wang wrote: > > Some user might want to go through all registered wakeup sources and > > doing things accordingly. For example, SoC PM driver might need to do > > HW programming to prevent powering down specific IP which wakeup > > source depending on. And is user's responsibility to identify if this > > wakeup source he is interested in. > > I guess the idea here is that you need to walk wakeup devices and you noticed > that there was a wakeup source object for each of them and those wakeup > source objects were on a list, so you could walk wakeup devices by walking the > list of wakeup source objects. > > That is fair enough, but the changelog above doesn't even talk about that. How about this: "Providing a API for helping walk through all registered wakeup devices on the list. It will be useful for SoC PMU driver to know which device will work as a wakeup source then do specific HW programming for them." > > Signed-off-by: Ran Wang > > --- > > Change in v4: > > - None. > > > > Change in v3: > > - Adjust indentation of *attached_dev;. > > > > Change in v2: > > - None. > > > > drivers/base/power/wakeup.c | 18 ++ > > include/linux/pm_wakeup.h |3 +++ > > 2 files changed, 21 insertions(+), 0 deletions(-) > > > > diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c > > index 5b2b6a0..6904485 100644 > > --- a/drivers/base/power/wakeup.c > > +++ b/drivers/base/power/wakeup.c > > @@ -14,6 +14,7 @@ > > #include > > #include > > #include > > +#include > > #include > > #include > > > > @@ -226,6 +227,22 @@ void wakeup_source_unregister(struct > wakeup_source *ws) > > } > > } > > EXPORT_SYMBOL_GPL(wakeup_source_unregister); > > +/** > > + * wakeup_source_get_next - Get next wakeup source from the list > > + * @ws: Previous wakeup source object, null means caller want first one. > > + */ > > +struct wakeup_source *wakeup_source_get_next(struct wakeup_source > > +*ws) { > > + struct list_head *ws_head = &wakeup_sources; > > + > > + if (ws) > > + return list_next_or_null_rcu(ws_head, &ws->entry, > > + struct wakeup_source, entry); > > + else > > + return list_entry_rcu(ws_head->next, > > + struct wakeup_source, entry); > > +} > > +EXPORT_SYMBOL_GPL(wakeup_source_get_next); > > This needs to be arranged along the lines of > wakeup_sources_stats_seq_start/next/stop() > because of the SRCU protection of the list. Got it, how about this: 230 /** 231 * wakeup_source_get_next - Get next wakeup source from the list 232 * @ws: Previous wakeup source object, null means caller want first one. 233 */ 234 struct wakeup_source *wakeup_source_get_next(struct wakeup_source *ws) 235 { 236 struct list_head *ws_head = &wakeup_sources; 237 struct wakeup_source *next_ws = NULL; 238 int idx; 239 240 idx = srcu_read_lock(&wakeup_srcu); 241 if (ws) 242 next_ws = list_next_or_null_rcu(ws_head, &ws->entry, 243 struct wakeup_source, entry); 244 else 245 next_ws = list_entry_rcu(ws_head->next, 246 struct wakeup_source, entry); 247 srcu_read_unlock(&wakeup_srcu, idx); 248 249 return next_ws; 250 } 251 EXPORT_SYMBOL_GPL(wakeup_source_get_next); > > > > /** > > * device_wakeup_attach - Attach a wakeup source object to a device object. > > @@ -242,6 +259,7 @@ static int device_wakeup_attach(struct device *dev, > struct wakeup_source *ws) > > return -EEXIST; > > } > > dev->power.wakeup = ws; > > + ws->attached_dev = dev; > > if (dev->power.wakeirq) > > device_wakeup_attach_irq(dev, dev->power.wakeirq); > >
Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
On Wed, 19 Jun 2019 13:26:37 +0530 "Naveen N. Rao" wrote: > > In include/ftrace.h: > > > > #ifndef FTRACE_IP_EXTENSION > > # define FTRACE_IP_EXTENSION0 > > #endif > > > > > > In arch/powerpc/include/asm/ftrace.h > > > > #define FTRACE_IP_EXTENSION MCOUNT_INSN_SIZE > > > > > > Then we can just have: > > > > unsigned long ftrace_location(unsigned long ip) > > { > > return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION); > > } > > Thanks, that's indeed nice. I hope you don't mind me adding your SOB for > that. Actually, it's best not to put a SOB by anyone other than yourself. It actually has legal meaning. In this case, please add: Suggested-by: Steven Rostedt (VMware) Thanks! -- Steve
Re: [PATCH v2 1/1] cpuidle-powernv : forced wakeup for stop states
Hi Nick, Thanks for the review. Some replies below. On 06/19/2019 09:53 AM, Nicholas Piggin wrote: Abhishek Goel's on June 17, 2019 7:56 pm: Currently, the cpuidle governors determine what idle state a idling CPU should enter into based on heuristics that depend on the idle history on that CPU. Given that no predictive heuristic is perfect, there are cases where the governor predicts a shallow idle state, hoping that the CPU will be busy soon. However, if no new workload is scheduled on that CPU in the near future, the CPU may end up in the shallow state. This is problematic, when the predicted state in the aforementioned scenario is a shallow stop state on a tickless system. As we might get stuck into shallow states for hours, in absence of ticks or interrupts. To address this, We forcefully wakeup the cpu by setting the decrementer. The decrementer is set to a value that corresponds with the residency of the next available state. Thus firing up a timer that will forcefully wakeup the cpu. Few such iterations will essentially train the governor to select a deeper state for that cpu, as the timer here corresponds to the next available cpuidle state residency. Thus, cpu will eventually end up in the deepest possible state. Signed-off-by: Abhishek Goel --- Auto-promotion v1 : started as auto promotion logic for cpuidle states in generic driver v2 : Removed timeout_needed and rebased the code to upstream kernel Forced-wakeup v1 : New patch with name of forced wakeup started v2 : Extending the forced wakeup logic for all states. Setting the decrementer instead of queuing up a hrtimer to implement the logic. drivers/cpuidle/cpuidle-powernv.c | 38 +++ 1 file changed, 38 insertions(+) diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c index 84b1ebe212b3..bc9ca18ae7e3 100644 --- a/drivers/cpuidle/cpuidle-powernv.c +++ b/drivers/cpuidle/cpuidle-powernv.c @@ -46,6 +46,26 @@ static struct stop_psscr_table stop_psscr_table[CPUIDLE_STATE_MAX] __read_mostly static u64 default_snooze_timeout __read_mostly; static bool snooze_timeout_en __read_mostly; +static u64 forced_wakeup_timeout(struct cpuidle_device *dev, +struct cpuidle_driver *drv, +int index) +{ + int i; + + for (i = index + 1; i < drv->state_count; i++) { + struct cpuidle_state *s = &drv->states[i]; + struct cpuidle_state_usage *su = &dev->states_usage[i]; + + if (s->disabled || su->disable) + continue; + + return (s->target_residency + 2 * s->exit_latency) * + tb_ticks_per_usec; + } + + return 0; +} It would be nice to not have this kind of loop iteration in the idle fast path. Can we add a flag or something to the idle state? Currently, we do not have any callback notification or some feedback that notifies the driver everytime some state is enabled/disabled. So we have to parse everytime to get the next enabled state. Are you suggesting to add something like next_enabled_state in cpuidle state structure itself which will be updated when a state is enabled or disabled? + static u64 get_snooze_timeout(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) @@ -144,8 +164,26 @@ static int stop_loop(struct cpuidle_device *dev, struct cpuidle_driver *drv, int index) { + u64 dec_expiry_tb, dec, timeout_tb, forced_wakeup; + + dec = mfspr(SPRN_DEC); + timeout_tb = forced_wakeup_timeout(dev, drv, index); + forced_wakeup = 0; + + if (timeout_tb && timeout_tb < dec) { + forced_wakeup = 1; + dec_expiry_tb = mftb() + dec; + } The compiler probably can't optimise away the SPR manipulations so try to avoid them if possible. Are you suggesting something like set_dec_before_idle?(in line with what you have suggested to do after idle, reset_dec_after_idle) + + if (forced_wakeup) + mtspr(SPRN_DEC, timeout_tb); This should just be put in the above 'if'. Fair point. + power9_idle_type(stop_psscr_table[index].val, stop_psscr_table[index].mask); + + if (forced_wakeup) + mtspr(SPRN_DEC, dec_expiry_tb - mftb()); This will sometimes go negative and result in another timer interrupt. It also breaks irq work (which can be set here by machine check I believe. May need to implement some timer code to do this for you. static void reset_dec_after_idle(void) { u64 now; u64 *next_tb; if (test_irq_work_pending()) return; now = mftb; next_tb = this_cpu_ptr(&decrementers_next_tb); if (now >= *next_tb) return; set_dec(*next_tb - now); if (test_irq_work_pen
Re: [RFC PATCH v0] powerpc: Fix BUG_ON during memory unplug on radix
Bharata B Rao writes: > We hit the following BUG_ON when memory hotplugged before reboot > is unplugged after reboot: > > kernel BUG at arch/powerpc/mm/pgtable-frag.c:113! > > remove_pagetable+0x594/0x6a0 > (unreliable) > remove_pagetable+0x94/0x6a0 > vmemmap_free+0x394/0x410 > sparse_remove_one_section+0x26c/0x2e8 > __remove_pages+0x428/0x540 > arch_remove_memory+0xd0/0x170 > __remove_memory+0xd4/0x1a0 > dlpar_remove_lmb+0xbc/0x110 > dlpar_memory+0xa80/0xd20 > handle_dlpar_errorlog+0xa8/0x160 > pseries_hp_work_fn+0x2c/0x60 > process_one_work+0x46c/0x860 > worker_thread+0x364/0x5e0 > kthread+0x1b0/0x1c0 > ret_from_kernel_thread+0x5c/0x68 > > This occurs because, during reboot-after-hotplug, the hotplugged > memory range gets initialized as regular memory and page > tables are setup using memblock allocator. This means that we > wouldn't have initialized the PMD or PTE fragment count for > those PMD or PTE pages. > > Fixing this includes 3 aspects: > > - Walk the init_mm page tables from mem_init() and initialize > the PMD and PTE fragment counts appropriately. > - When we do early allocation of PMD (and PGD as well) pages, > allocate in page size PAGE_SIZE granularity so that we are > sure that the complete page is available for us to set the > fragment count which is part of struct page. That is an important change now. For early page table we now allocate PAGE_SIZE tables and hencec we consider then as pages with fragment count 1. You also may want to explain here why. I guess the challenge is due to the fact that we can't clearly control how the rest of the page will get used and we are not sure they all will be allocated for backing page table pages. > - When PMD or PTE page is freed, check if it comes from memblock > allocator and free it appropriately. > > Reported-by: Srikanth Aithal > Signed-off-by: Bharata B Rao > --- > arch/powerpc/include/asm/book3s/64/radix.h | 1 + > arch/powerpc/include/asm/sparsemem.h | 1 + > arch/powerpc/mm/book3s64/pgtable.c | 12 +++- > arch/powerpc/mm/book3s64/radix_pgtable.c | 67 +- > arch/powerpc/mm/mem.c | 5 ++ > arch/powerpc/mm/pgtable-frag.c | 5 +- > 6 files changed, 87 insertions(+), 4 deletions(-) > > diff --git a/arch/powerpc/include/asm/book3s/64/radix.h > b/arch/powerpc/include/asm/book3s/64/radix.h > index 574eca33f893..4320f2790e8d 100644 > --- a/arch/powerpc/include/asm/book3s/64/radix.h > +++ b/arch/powerpc/include/asm/book3s/64/radix.h > @@ -285,6 +285,7 @@ static inline unsigned long radix__get_tree_size(void) > #ifdef CONFIG_MEMORY_HOTPLUG > int radix__create_section_mapping(unsigned long start, unsigned long end, > int nid); > int radix__remove_section_mapping(unsigned long start, unsigned long end); > +void radix__fixup_pgtable_fragments(void); > #endif /* CONFIG_MEMORY_HOTPLUG */ > #endif /* __ASSEMBLY__ */ > #endif > diff --git a/arch/powerpc/include/asm/sparsemem.h > b/arch/powerpc/include/asm/sparsemem.h > index 3192d454a733..e662f9232d35 100644 > --- a/arch/powerpc/include/asm/sparsemem.h > +++ b/arch/powerpc/include/asm/sparsemem.h > @@ -15,6 +15,7 @@ > #ifdef CONFIG_MEMORY_HOTPLUG > extern int create_section_mapping(unsigned long start, unsigned long end, > int nid); > extern int remove_section_mapping(unsigned long start, unsigned long end); > +void fixup_pgtable_fragments(void); > > #ifdef CONFIG_PPC_BOOK3S_64 > extern int resize_hpt_for_hotplug(unsigned long new_mem_size); > diff --git a/arch/powerpc/mm/book3s64/pgtable.c > b/arch/powerpc/mm/book3s64/pgtable.c > index 01bc9663360d..7efe9cc16b39 100644 > --- a/arch/powerpc/mm/book3s64/pgtable.c > +++ b/arch/powerpc/mm/book3s64/pgtable.c > @@ -186,6 +186,13 @@ int __meminit remove_section_mapping(unsigned long > start, unsigned long end) > > return hash__remove_section_mapping(start, end); > } > + > +void fixup_pgtable_fragments(void) > +{ > + if (radix_enabled()) > + radix__fixup_pgtable_fragments(); > +} > + > #endif /* CONFIG_MEMORY_HOTPLUG */ > > void __init mmu_partition_table_init(void) > @@ -320,7 +327,10 @@ void pmd_fragment_free(unsigned long *pmd) > BUG_ON(atomic_read(&page->pt_frag_refcount) <= 0); > if (atomic_dec_and_test(&page->pt_frag_refcount)) { > pgtable_pmd_page_dtor(page); > - __free_page(page); > + if (PageReserved(page)) > + free_reserved_page(page); > + else > + __free_page(page); > } > } > > diff --git a/arch/powerpc/mm/book3s64/radix_pgtable.c > b/arch/powerpc/mm/book3s64/radix_pgtable.c > index 273ae66a9a45..402e8da28cab 100644 > --- a/arch/powerpc/mm/book3s64/radix_pgtable.c > +++ b/arch/powerpc/mm/book3s64/radix_pgtable.c > @@ -32,6 +32,69 @@ > unsigned int mmu_pid_bits; > unsigned int mmu_base_pid; > > +static void fixup_pmd_fragments(pmd_t *pmd) > +{ > + int i; > + > + for (i = 0; i < PTRS_PER_PMD
Re: [PATCH 06/28] powerpc/64s/exception: remove the "extra" macro parameter
Nicholas Piggin's on June 12, 2019 12:30 am: > @@ -265,7 +275,7 @@ ALT_FTR_SECTION_END_IFSET(CPU_FTR_HVMODE) > EXC_REAL_END(machine_check, 0x200, 0x100) > EXC_VIRT_NONE(0x4200, 0x100) > TRAMP_REAL_BEGIN(machine_check_common_early) > - EXCEPTION_PROLOG_1(PACA_EXMC, NOTEST, 0x200) > + EXCEPTION_PROLOG_1 EXC_HV, PACA_EXMC, 0, 0x200 > /* >* Register contents: >* R13 = PACA This is a little bug here, machine check is an EXC_STD exception. It does not show up as generated code problem because EXCEPTION_PROLOG_1 does not actually do anything with this parameter if KVM is false, which it is here. Still, it's wrong. I may just resend the series, because it caused a few conflicts in subsequent patches, and I have a few more to add to the end. Thanks, Nick
Re: [PATCH 5/7] powerpc/ftrace: Update ftrace_location() for powerpc -mprofile-kernel
Steven Rostedt wrote: On Tue, 18 Jun 2019 23:53:11 +0530 "Naveen N. Rao" wrote: Naveen N. Rao wrote: > Steven Rostedt wrote: >> On Tue, 18 Jun 2019 20:17:04 +0530 >> "Naveen N. Rao" wrote: >> >>> @@ -1551,7 +1551,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) >>>key.flags = end;/* overload flags, as it is unsigned long */ >>> >>> for (pg = ftrace_pages_start; pg; pg = pg->next) { >>> - if (end < pg->records[0].ip || >>> + if (end <= pg->records[0].ip || >> >> This breaks the algorithm. "end" is inclusive. That is, if you look for >> a single byte, where "start" and "end" are the same, and it happens to >> be the first ip on the pg page, it will be skipped, and not found. > > Thanks. It looks like I should be over-riding ftrace_location() instead. > I will update this patch. I think I will have ftrace own the two instruction range, regardless of whether the preceding instruction is a 'mflr r0' or not. This simplifies things and I don't see an issue with it as of now. I will do more testing to confirm. - Naveen --- a/arch/powerpc/kernel/trace/ftrace.c +++ b/arch/powerpc/kernel/trace/ftrace.c @@ -951,6 +951,16 @@ void arch_ftrace_update_code(int command) } #ifdef CONFIG_MPROFILE_KERNEL +/* + * We consider two instructions -- 'mflr r0', 'bl _mcount' -- to be part + * of ftrace. When checking for the first instruction, we want to include + * the next instruction in the range check. + */ +unsigned long ftrace_location(unsigned long ip) +{ + return ftrace_location_range(ip, ip + MCOUNT_INSN_SIZE); +} + /* Returns 1 if we patched in the mflr */ static int __ftrace_make_call_prep(struct dyn_ftrace *rec) { diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 21d8e201ee80..122e2bb4a739 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -1573,7 +1573,7 @@ unsigned long ftrace_location_range(unsigned long start, unsigned long end) * the function tracer. It checks the ftrace internal tables to * determine if the address belongs or not. */ -unsigned long ftrace_location(unsigned long ip) +unsigned long __weak ftrace_location(unsigned long ip) { return ftrace_location_range(ip, ip); } Actually, instead of making this a weak function, let's do this: In include/ftrace.h: #ifndef FTRACE_IP_EXTENSION # define FTRACE_IP_EXTENSION0 #endif In arch/powerpc/include/asm/ftrace.h #define FTRACE_IP_EXTENSION MCOUNT_INSN_SIZE Then we can just have: unsigned long ftrace_location(unsigned long ip) { return ftrace_location_range(ip, ip + FTRACE_IP_EXTENSION); } Thanks, that's indeed nice. I hope you don't mind me adding your SOB for that. - Naveen
Re: [PATCH 0/5] Powerpc/hw-breakpoint: Fixes plus Code refactor
On 6/18/19 11:47 AM, Michael Neuling wrote: > On Tue, 2019-06-18 at 08:01 +0200, Christophe Leroy wrote: >> >> Le 18/06/2019 à 06:27, Ravi Bangoria a écrit : >>> patch 1-3: Code refactor >>> patch 4: Speedup disabling breakpoint >>> patch 5: Fix length calculation for unaligned targets >> >> While you are playing with hw breakpoints, did you have a look at >> https://github.com/linuxppc/issues/issues/38 ? > > Agreed and also: > > https://github.com/linuxppc/issues/issues/170 > > https://github.com/linuxppc/issues/issues/128 > Yes, I'm aware of those. Will have a look at them.