The commit is pushed to "branch-rh7-3.10.0-1127.18.2.vz7.163.x-ovz" and will 
appear at https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-1127.18.2.vz7.163.27
------>
commit 32457eef8a6680864624049df7ebdbcf53676a93
Author: Andrey Ryabinin <aryabi...@virtuozzo.com>
Date:   Tue Sep 22 10:32:22 2020 +0300

    x86_64: fix crashes due to bogus iret traps handling #PSBM-107794
    
    Our handling of bad irets seems to be broken since meltdown fix.
    When interrupt return to userspace fails we running with user CR3
    thus faulting in error_sti on access to 'kernel_stack' variable.
    This continues with series of faults in page fault handler until
    we run out of stack and end up with:
    
    PANIC: double fault, error_code: 0x0
    RIP: 0010:[<ffffffff9f1c278d>]  [<ffffffff9f1c278d>] 
async_page_fault+0xd/0x30
    Call Trace:
    <IRQ>
     ? smp_apic_timer_interrupt+0x48/0x60
     ? apic_timer_interrupt+0x16a/0x170
    <EOI>
     ? bad_area+0x49/0x50
     ? __do_page_fault+0x477/0x500
     ? trace_do_page_fault+0x56/0x150
     ? do_async_page_fault+0x22/0xf0
     ? async_page_fault+0x28/0x30
     ? .E_write_words+0x5c/0x641
     ? putname+0x3d/0x60
     ? timerqueue_add+0x60/0xb0
     ? enqueue_hrtimer+0x25/0x80
     ? hrtimer_start_range_ns+0x1fd/0x3c0
     ? recalc_sigpending+0x1b/0x70
     ? __set_task_blocked+0x41/0xa0
     ? restore_altstack+0x18/0x30
     ? sys_rt_sigreturn+0xe8/0x100
     ? stub_rt_sigreturn+0x48/0x90
    
    Backport the fix for this from RHEL 7.9 beta
    
    https://jira.sw.ru/browse/PSBM-107794
    Signed-off-by: Andrey Ryabinin <aryabi...@virtuozzo.com>
---
 arch/x86/kernel/entry_64.S | 49 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index 3e67d18..91e5503 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -949,12 +949,42 @@ irq_return:
         * when returning from IPI handler.
         */
        INTERRUPT_RETURN
+       _ASM_EXTABLE(irq_return, bad_iret)
 
 #ifdef CONFIG_PARAVIRT
 ENTRY(native_iret)
        iretq
+       _ASM_EXTABLE(native_iret, bad_iret)
 #endif
 
+       .section .fixup,"ax"
+bad_iret:
+       /*
+        * The iret traps when the %cs or %ss being restored is bogus.
+        * We've lost the original trap vector and error code.
+        * #GPF is the most likely one to get for an invalid selector.
+        * So pretend we completed the iret and took the #GPF in user mode.
+        *
+        * We are now running with the kernel GS after exception recovery.
+        * But error_entry expects us to have user GS to match the user %cs,
+        * so swap back.
+        */
+       pushq $0
+
+       /*
+        * If a kernel bug clears user CS bit and in turn we'll skip SWAPGS in
+        * general_protection, skip the SWAPGS here as well so we won't hard 
reboot.
+        * This increases robustness of bad_iret to kernel bugs as well.
+        */
+       testl $3, 8*2(%rsp)
+       je 1f
+       SWAPGS
+1:
+
+       jmp general_protection
+
+       .previous
+
        /* edi: workmask, edx: work */
 retint_careful:
        CFI_RESTORE_STATE
@@ -1550,15 +1580,16 @@ error_sti:
 
 /*
  * There are two places in the kernel that can potentially fault with
- * usergs. Handle them here.  B stepping K8s sometimes report a
- * truncated RIP for IRET exceptions returning to compat mode. Check
- * for these here too.
+ * usergs. Handle them here. The exception handlers after iret run with
+ * kernel gs again, so don't set the user space flag. B stepping K8s
+ * sometimes report an truncated RIP for IRET exceptions returning to
+ * compat mode. Check for these here too.
  */
 error_kernelspace:
        incl %ebx
        leaq irq_return(%rip),%rcx
        cmpq %rcx,RIP+8(%rsp)
-       je error_bad_iret
+       je error_swapgs
        movl %ecx,%eax  /* zero extend */
        cmpq %rax,RIP+8(%rsp)
        je bstep_iret
@@ -1570,15 +1601,7 @@ error_kernelspace:
 bstep_iret:
        /* Fix truncated RIP */
        movq %rcx,RIP+8(%rsp)
-       /* fall through */
-
-error_bad_iret:
-       SWAPGS
-       mov %rsp,%rdi
-       call fixup_bad_iret
-       mov %rax,%rsp
-       decl %ebx       /* Return to usergs */
-       jmp error_sti
+       jmp error_swapgs
        CFI_ENDPROC
 END(error_entry)
 
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to