For SEA exception, kernel require take some action to recover from memory
error, such as isolate poison page adn kill failure thread, which are done
in memory_failure().

During our test, the failure thread cannot be killed due to this issue[1],
Here, I temporarily workaround this issue by sending signals to user
processes in do_sea(). After [1] is merged, this patch can be rolled back
or the SIGBUS will be sent repeated.

[1]https://lore.kernel.org/lkml/20240204080144.7977-1-xuesh...@linux.alibaba.com/

Signed-off-by: Tong Tiangen <tongtian...@huawei.com>
---
 arch/arm64/mm/fault.c | 14 +++++++++++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index 2dc65f99d389..37d7e74d9aee 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -730,9 +730,6 @@ static int do_sea(unsigned long far, unsigned long esr, 
struct pt_regs *regs)
        const struct fault_info *inf;
        unsigned long siaddr;
 
-       if (do_apei_claim_sea(regs))
-               return 0;
-
        inf = esr_to_fault_info(esr);
        if (esr & ESR_ELx_FnV) {
                siaddr = 0;
@@ -744,6 +741,17 @@ static int do_sea(unsigned long far, unsigned long esr, 
struct pt_regs *regs)
                 */
                siaddr  = untagged_addr(far);
        }
+
+       if (do_apei_claim_sea(regs)) {
+               if (current->mm) {
+                       set_thread_esr(0, esr);
+                       arm64_force_sig_fault(inf->sig, inf->code, siaddr,
+                               "Uncorrected memory error on access \
+                                to poison memory\n");
+               }
+               return 0;
+       }
+
        arm64_notify_die(inf->name, regs, inf->sig, inf->code, siaddr, esr);
 
        return 0;
-- 
2.25.1

Reply via email to