This change makes the check exact (no more false positives
on kernel addresses).

It isn't really important to be fully correct here -
almost all addresses we'll ever see will be userspace ones,
but OTOH it looks to be cheap enough:
the new code uses two more ALU ops but preserves %rcx,
allowing to not reload it from pt_regs->cx again.
On disassembly level, the changes are:

cmp %rcx,0x80(%rsp) -> mov 0x80(%rsp),%r11; cmp %rcx,%r11
shr $0x2f,%rcx      -> shl $0x10,%rcx; sar $0x10,%rcx; cmp %rcx,%r11
mov 0x58(%rsp),%rcx -> (eliminated)

Signed-off-by: Denys Vlasenko <dvlas...@redhat.com>
CC: Borislav Petkov <b...@alien8.de>
CC: x...@kernel.org
CC: linux-kernel@vger.kernel.org
---

Andy, I'd undecided myself on the merits of doing this.
If you like it, feel free to take it in your tree.
I trimmed CC list to not bother too many people with this trivial
and quite possibly "useless churn"-class change.

 arch/x86/kernel/entry_64.S | 23 ++++++++++++-----------
 1 file changed, 12 insertions(+), 11 deletions(-)

diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index bf9afad..a36d04d 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -688,26 +688,27 @@ retint_swapgs:            /* return to user-space */
         * a completely clean 64-bit userspace context.
         */
        movq RCX(%rsp),%rcx
-       cmpq %rcx,RIP(%rsp)             /* RCX == RIP */
+       movq RIP(%rsp),%r11
+       cmpq %rcx,%r11                  /* RCX == RIP */
        jne opportunistic_sysret_failed
 
        /*
         * On Intel CPUs, sysret with non-canonical RCX/RIP will #GP
         * in kernel space.  This essentially lets the user take over
-        * the kernel, since userspace controls RSP.  It's not worth
-        * testing for canonicalness exactly -- this check detects any
-        * of the 17 high bits set, which is true for non-canonical
-        * or kernel addresses.  (This will pessimize vsyscall=native.
-        * Big deal.)
+        * the kernel, since userspace controls RSP.
         *
-        * If virtual addresses ever become wider, this will need
+        * If width of "canonical tail" ever become variable, this will need
         * to be updated to remain correct on both old and new CPUs.
         */
        .ifne __VIRTUAL_MASK_SHIFT - 47
        .error "virtual address width changed -- sysret checks need update"
        .endif
-       shr $__VIRTUAL_MASK_SHIFT, %rcx
-       jnz opportunistic_sysret_failed
+       /* Change top 16 bits to be a sign-extension of the rest */
+       shl     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+       sar     $(64 - (__VIRTUAL_MASK_SHIFT+1)), %rcx
+       /* If this changed %rcx, it was not canonical */
+       cmpq    %rcx, %r11
+       jne     opportunistic_sysret_failed
 
        cmpq $__USER_CS,CS(%rsp)        /* CS must match SYSRET */
        jne opportunistic_sysret_failed
@@ -730,8 +731,8 @@ retint_swapgs:              /* return to user-space */
         */
 irq_return_via_sysret:
        CFI_REMEMBER_STATE
-       /* r11 is already restored (see code above) */
-       RESTORE_C_REGS_EXCEPT_R11
+       /* rcx and r11 are already restored (see code above) */
+       RESTORE_C_REGS_EXCEPT_RCX_R11
        movq RSP(%rsp),%rsp
        USERGS_SYSRET64
        CFI_RESTORE_STATE
-- 
1.8.1.4

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to