When a synchronous interrupt[1] is taken in a local_irq_disable() region which has MSR[EE]=1, the interrupt handler will enable MSR[EE] as part of enabling MSR[RI], for peformance and profiling reasons.
[1] Typically a hash fault, but in error cases this could be a page fault or facility unavailable as well. If an asynchronous interrupt hits here and its masked handler requires MSR[EE] to be cleared (it is a PACA_IRQ_MUST_HARD_MASK interrupt), then MSR[EE] must remain disabled until that pending interrupt is replayed. The problem is that the MSR of the original context has MSR[EE]=1, so returning directly to that causes MSR[EE] to be enabled while the interrupt is still pending. This issue was hacked around in the interrupt return code by just clearing the hard mask to avoid a warning, and taking the masked interrupt again immediately in the return context, which would disable MSR[EE]. However in the case of a pending PMI, it is possible that it is not maked in the calling context so the full handler will be run while there is a PMI pending, and this confuses the perf code and causes warnings with its PMI pending management. Fix this by removing the hack, and adjusting the return MSR if it has MSR[EE]=1 and there is a PACA_IRQ_MUST_HARD_MASK interrupt pending. Fixes: 4423eb5ae32e ("powerpc/64/interrupt: make normal synchronous interrupts enable MSR[EE] if possible") Signed-off-by: Nicholas Piggin <npig...@gmail.com> --- arch/powerpc/kernel/interrupt.c | 10 --------- arch/powerpc/kernel/interrupt_64.S | 34 +++++++++++++++++++++++++++--- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c index 7cd6ce3ec423..819d42c0ce9f 100644 --- a/arch/powerpc/kernel/interrupt.c +++ b/arch/powerpc/kernel/interrupt.c @@ -593,16 +593,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct pt_regs *regs) if (unlikely(stack_store)) __hard_EE_RI_disable(); - /* - * Returning to a kernel context with local irqs disabled. - * Here, if EE was enabled in the interrupted context, enable - * it on return as well. A problem exists here where a soft - * masked interrupt may have cleared MSR[EE] and set HARD_DIS - * here, and it will still exist on return to the caller. This - * will be resolved by the masked interrupt firing again. - */ - if (regs->msr & MSR_EE) - local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS; #endif /* CONFIG_PPC64 */ } diff --git a/arch/powerpc/kernel/interrupt_64.S b/arch/powerpc/kernel/interrupt_64.S index 7bab2d7de372..2641bbcb6e49 100644 --- a/arch/powerpc/kernel/interrupt_64.S +++ b/arch/powerpc/kernel/interrupt_64.S @@ -569,15 +569,43 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel) ld r11,SOFTE(r1) cmpwi r11,IRQS_ENABLED stb r11,PACAIRQSOFTMASK(r13) - bne 1f + beq .Linterrupt_return_\srr\()_soft_enabled + + /* + * Returning to soft-disabled context. + * Check if a MUST_HARD_MASK interrupt has become pending, in which + * case we need to disable MSR[EE] in the return context. + */ + ld r12,_MSR(r1) + andi. r10,r12,MSR_EE + beq .Lfast_kernel_interrupt_return_\srr\() // EE already disabled + lbz r11,PACAIRQHAPPENED(r13) + andi. r10,r11,PACA_IRQ_MUST_HARD_MASK + beq 1f // No HARD_MASK pending + + /* Must clear MSR_EE from _MSR */ +#ifdef CONFIG_PPC_BOOK3S + li r10,0 + /* Clear valid before changing _MSR */ + .ifc \srr,srr + stb r10,PACASRR_VALID(r13) + .else + stb r10,PACAHSRR_VALID(r13) + .endif +#endif + xori r12,r12,MSR_EE + std r12,_MSR(r1) + b .Lfast_kernel_interrupt_return_\srr\() + +.Linterrupt_return_\srr\()_soft_enabled: #ifdef CONFIG_PPC_BOOK3S lbz r11,PACAIRQHAPPENED(r13) andi. r11,r11,(~PACA_IRQ_HARD_DIS)@l bne- interrupt_return_\srr\()_kernel_restart #endif - li r11,0 - stb r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS 1: + li r11,0 + stb r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS .Lfast_kernel_interrupt_return_\srr\(): cmpdi cr1,r3,0 -- 2.23.0