Signed-off-by: Brian Gerst <brge...@gmail.com>
---
 arch/x86/entry/calling.h       | 10 +----
 arch/x86/entry/common.c        | 56 ++++++++++++++++++++++++++-
 arch/x86/entry/entry_64.S      | 71 ++--------------------------------
 arch/x86/include/asm/syscall.h |  2 +-
 4 files changed, 60 insertions(+), 79 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index 98e4d8886f11c..904477d3e388f 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -147,27 +147,19 @@ For 32-bit we have the following conventions - kernel is 
built with
 
 .endm
 
-.macro POP_REGS pop_rdi=1 skip_r11rcx=0
+.macro POP_REGS pop_rdi=1
        popq %r15
        popq %r14
        popq %r13
        popq %r12
        popq %rbp
        popq %rbx
-       .if \skip_r11rcx
-       popq %rsi
-       .else
        popq %r11
-       .endif
        popq %r10
        popq %r9
        popq %r8
        popq %rax
-       .if \skip_r11rcx
-       popq %rsi
-       .else
        popq %rcx
-       .endif
        popq %rdx
        popq %rsi
        .if \pop_rdi
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 54ad1890aefca..9e01445f6679c 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -365,9 +365,11 @@ __visible noinstr void syscall_return_slowpath(struct 
pt_regs *regs)
 }
 
 #ifdef CONFIG_X86_64
-__visible noinstr void do_syscall_64(unsigned long nr, struct pt_regs *regs)
+__visible noinstr bool do_syscall_64(unsigned long nr, struct pt_regs *regs)
 {
        struct thread_info *ti;
+       long rip;
+       unsigned int shift_rip;
 
        check_user_regs(regs);
 
@@ -394,6 +396,58 @@ __visible noinstr void do_syscall_64(unsigned long nr, 
struct pt_regs *regs)
 
        instrumentation_end();
        exit_to_user_mode();
+
+       /*
+        * Check that the register state is valid for using SYSRET to exit
+        * to userspace.  Otherwise use the slower IRET exit path.
+        */
+
+       /* SYSRET requires RCX == RIP and R11 = EFLAGS */
+       if (unlikely(regs->cx != regs->ip || regs->r11 != regs->flags))
+               return false;
+
+       /* CS and SS must match values set in MSR_STAR */
+       if (unlikely(regs->cs != __USER_CS || regs->ss != __USER_DS))
+               return false;
+
+       /*
+        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
+        * in kernel space.  This essentially lets the user take over
+        * the kernel, since userspace controls RSP.
+        *
+        * Change top bits to match most significant bit (47th or 56th bit
+        * depending on paging mode) in the address.
+        */
+       shift_rip = (64 - __VIRTUAL_MASK_SHIFT + 1);
+       rip = (long) regs->ip;
+       rip <<= shift_rip;
+       rip >>= shift_rip;
+       if (unlikely((unsigned long) rip != regs->ip))
+               return false;
+
+       /*
+        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
+        * restore RF properly. If the slowpath sets it for whatever reason, we
+        * need to restore it correctly.
+        *
+        * SYSRET can restore TF, but unlike IRET, restoring TF results in a
+        * trap from userspace immediately after SYSRET.  This would cause an
+        * infinite loop whenever #DB happens with register state that satisfies
+        * the opportunistic SYSRET conditions.  For example, single-stepping
+        * this user code:
+        *
+        *           movq       $stuck_here, %rcx
+        *           pushfq
+        *           popq %r11
+        *   stuck_here:
+        *
+        * would never get past 'stuck_here'.
+        */
+       if (unlikely(regs->flags & (X86_EFLAGS_RF|X86_EFLAGS_TF)))
+               return false;
+
+       /* Use SYSRET to exit to userspace */
+       return true;
 }
 #endif
 
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index fb729f4c4fbc2..b8025a62ac5e8 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -117,80 +117,15 @@ SYM_INNER_LABEL(entry_SYSCALL_64_after_hwframe, 
SYM_L_GLOBAL)
        movq    %rsp, %rsi
        call    do_syscall_64           /* returns with IRQs disabled */
 
-       /*
-        * Try to use SYSRET instead of IRET if we're returning to
-        * a completely clean 64-bit userspace context.  If we're not,
-        * go to the slow exit path.
-        */
-       movq    RCX(%rsp), %rcx
-       movq    RIP(%rsp), %r11
-
-       cmpq    %rcx, %r11      /* SYSRET requires RCX == RIP */
-       jne     swapgs_restore_regs_and_return_to_usermode
-
-       /*
-        * On Intel CPUs, SYSRET with non-canonical RCX/RIP will #GP
-        * in kernel space.  This essentially lets the user take over
-        * the kernel, since userspace controls RSP.
-        *
-        * If width of "canonical tail" ever becomes variable, this will need
-        * to be updated to remain correct on both old and new CPUs.
-        *
-        * Change top bits to match most significant bit (47th or 56th bit
-        * depending on paging mode) in the address.
-        */
-#ifdef CONFIG_X86_5LEVEL
-       ALTERNATIVE "shl $(64 - 48), %rcx; sar $(64 - 48), %rcx", \
-               "shl $(64 - 57), %rcx; sar $(64 - 57), %rcx", X86_FEATURE_LA57
-#else
-       shl     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-       sar     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
-#endif
-
-       /* If this changed %rcx, it was not canonical */
-       cmpq    %rcx, %r11
-       jne     swapgs_restore_regs_and_return_to_usermode
-
-       cmpq    $__USER_CS, CS(%rsp)            /* CS must match SYSRET */
-       jne     swapgs_restore_regs_and_return_to_usermode
-
-       movq    R11(%rsp), %r11
-       cmpq    %r11, EFLAGS(%rsp)              /* R11 == RFLAGS */
-       jne     swapgs_restore_regs_and_return_to_usermode
-
-       /*
-        * SYSCALL clears RF when it saves RFLAGS in R11 and SYSRET cannot
-        * restore RF properly. If the slowpath sets it for whatever reason, we
-        * need to restore it correctly.
-        *
-        * SYSRET can restore TF, but unlike IRET, restoring TF results in a
-        * trap from userspace immediately after SYSRET.  This would cause an
-        * infinite loop whenever #DB happens with register state that satisfies
-        * the opportunistic SYSRET conditions.  For example, single-stepping
-        * this user code:
-        *
-        *           movq       $stuck_here, %rcx
-        *           pushfq
-        *           popq %r11
-        *   stuck_here:
-        *
-        * would never get past 'stuck_here'.
-        */
-       testq   $(X86_EFLAGS_RF|X86_EFLAGS_TF), %r11
-       jnz     swapgs_restore_regs_and_return_to_usermode
-
-       /* nothing to check for RSP */
-
-       cmpq    $__USER_DS, SS(%rsp)            /* SS must match SYSRET */
-       jne     swapgs_restore_regs_and_return_to_usermode
+       testb   %al, %al                /* Is SYSRET allowed? */
+       jz      swapgs_restore_regs_and_return_to_usermode
 
        /*
         * We win! This label is here just for ease of understanding
         * perf profiles. Nothing jumps here.
         */
 syscall_return_via_sysret:
-       /* rcx and r11 are already restored (see code above) */
-       POP_REGS pop_rdi=0 skip_r11rcx=1
+       POP_REGS pop_rdi=0
 
        /*
         * Now all regs are restored except RSP and RDI.
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 7cbf733d11afd..766f9b9736185 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -160,7 +160,7 @@ static inline int syscall_get_arch(struct task_struct *task)
                ? AUDIT_ARCH_I386 : AUDIT_ARCH_X86_64;
 }
 
-void do_syscall_64(unsigned long nr, struct pt_regs *regs);
+bool do_syscall_64(unsigned long nr, struct pt_regs *regs);
 void do_int80_syscall_32(struct pt_regs *regs);
 long do_fast_syscall_32(struct pt_regs *regs);
 
-- 
2.26.2

Reply via email to