When a synchronous interrupt[1] is taken in a local_irq_disable() region
which has MSR[EE]=1, the interrupt handler will enable MSR[EE] as part
of enabling MSR[RI], for peformance and profiling reasons.

[1] Typically a hash fault, but in error cases this could be a page
    fault or facility unavailable as well.

If an asynchronous interrupt hits here and its masked handler requires
MSR[EE] to be cleared (it is a PACA_IRQ_MUST_HARD_MASK interrupt), then
MSR[EE] must remain disabled until that pending interrupt is replayed.
The problem is that the MSR of the original context has MSR[EE]=1, so
returning directly to that causes MSR[EE] to be enabled while the
interrupt is still pending.

This issue was hacked around in the interrupt return code by just
clearing the hard mask to avoid a warning, and taking the masked
interrupt again immediately in the return context, which would disable
MSR[EE]. However in the case of a pending PMI, it is possible that it is
not maked in the calling context so the full handler will be run while
there is a PMI pending, and this confuses the perf code and causes
warnings with its PMI pending management.

Fix this by removing the hack, and adjusting the return MSR if it has
MSR[EE]=1 and there is a PACA_IRQ_MUST_HARD_MASK interrupt pending.

Fixes: 4423eb5ae32e ("powerpc/64/interrupt: make normal synchronous interrupts 
enable MSR[EE] if possible")
Signed-off-by: Nicholas Piggin <npig...@gmail.com>
---
 arch/powerpc/kernel/interrupt.c    | 10 ---------
 arch/powerpc/kernel/interrupt_64.S | 34 +++++++++++++++++++++++++++---
 2 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/arch/powerpc/kernel/interrupt.c b/arch/powerpc/kernel/interrupt.c
index 7cd6ce3ec423..819d42c0ce9f 100644
--- a/arch/powerpc/kernel/interrupt.c
+++ b/arch/powerpc/kernel/interrupt.c
@@ -593,16 +593,6 @@ notrace unsigned long interrupt_exit_kernel_prepare(struct 
pt_regs *regs)
 
                if (unlikely(stack_store))
                        __hard_EE_RI_disable();
-               /*
-                * Returning to a kernel context with local irqs disabled.
-                * Here, if EE was enabled in the interrupted context, enable
-                * it on return as well. A problem exists here where a soft
-                * masked interrupt may have cleared MSR[EE] and set HARD_DIS
-                * here, and it will still exist on return to the caller. This
-                * will be resolved by the masked interrupt firing again.
-                */
-               if (regs->msr & MSR_EE)
-                       local_paca->irq_happened &= ~PACA_IRQ_HARD_DIS;
 #endif /* CONFIG_PPC64 */
        }
 
diff --git a/arch/powerpc/kernel/interrupt_64.S 
b/arch/powerpc/kernel/interrupt_64.S
index 7bab2d7de372..2641bbcb6e49 100644
--- a/arch/powerpc/kernel/interrupt_64.S
+++ b/arch/powerpc/kernel/interrupt_64.S
@@ -569,15 +569,43 @@ _ASM_NOKPROBE_SYMBOL(interrupt_return_\srr\()_kernel)
        ld      r11,SOFTE(r1)
        cmpwi   r11,IRQS_ENABLED
        stb     r11,PACAIRQSOFTMASK(r13)
-       bne     1f
+       beq     .Linterrupt_return_\srr\()_soft_enabled
+
+       /*
+        * Returning to soft-disabled context.
+        * Check if a MUST_HARD_MASK interrupt has become pending, in which
+        * case we need to disable MSR[EE] in the return context.
+        */
+       ld      r12,_MSR(r1)
+       andi.   r10,r12,MSR_EE
+       beq     .Lfast_kernel_interrupt_return_\srr\() // EE already disabled
+       lbz     r11,PACAIRQHAPPENED(r13)
+       andi.   r10,r11,PACA_IRQ_MUST_HARD_MASK
+       beq     1f // No HARD_MASK pending
+
+       /* Must clear MSR_EE from _MSR */
+#ifdef CONFIG_PPC_BOOK3S
+       li      r10,0
+       /* Clear valid before changing _MSR */
+       .ifc \srr,srr
+       stb     r10,PACASRR_VALID(r13)
+       .else
+       stb     r10,PACAHSRR_VALID(r13)
+       .endif
+#endif
+       xori    r12,r12,MSR_EE
+       std     r12,_MSR(r1)
+       b       .Lfast_kernel_interrupt_return_\srr\()
+
+.Linterrupt_return_\srr\()_soft_enabled:
 #ifdef CONFIG_PPC_BOOK3S
        lbz     r11,PACAIRQHAPPENED(r13)
        andi.   r11,r11,(~PACA_IRQ_HARD_DIS)@l
        bne-    interrupt_return_\srr\()_kernel_restart
 #endif
-       li      r11,0
-       stb     r11,PACAIRQHAPPENED(r13) # clear out possible HARD_DIS
 1:
+       li      r11,0
+       stb     r11,PACAIRQHAPPENED(r13) // clear the possible HARD_DIS
 
 .Lfast_kernel_interrupt_return_\srr\():
        cmpdi   cr1,r3,0
-- 
2.23.0

Reply via email to