Reorganize the tests for SYSEXITS/SYSRETL, cleaning up comments and merging native and compat code.
Signed-off-by: Brian Gerst <brge...@gmail.com> --- arch/x86/entry/common.c | 85 ++++++++++++++------------------ arch/x86/entry/entry_32.S | 6 +-- arch/x86/entry/entry_64_compat.S | 13 ++--- arch/x86/include/asm/segment.h | 1 + arch/x86/include/asm/syscall.h | 2 +- 5 files changed, 48 insertions(+), 59 deletions(-) diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index bdb4c15b8610e..df1497fa554b8 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -500,10 +500,24 @@ __visible noinstr void do_int80_syscall_32(struct pt_regs *regs) exit_to_user_mode(); } +/* Returns true to return using SYSEXIT/SYSRETL, or false to return using IRET */ static bool __do_fast_syscall_32(struct pt_regs *regs) { + /* + * Called using the internal vDSO SYSENTER/SYSCALL32 calling + * convention. Adjust regs so it looks like we entered using int80. + */ + unsigned long landing_pad = (unsigned long)current->mm->context.vdso + + vdso_image_32.sym_int80_landing_pad; int res; + /* + * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward + * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. + * Fix it up. + */ + regs->ip = landing_pad; + /* Fetch EBP from where the vDSO stashed it. */ if (IS_ENABLED(CONFIG_X86_64)) { /* @@ -522,34 +536,39 @@ static bool __do_fast_syscall_32(struct pt_regs *regs) regs->ax = -EFAULT; local_irq_disable(); __prepare_exit_to_usermode(regs); + /* Keep it simple: use IRET. */ return false; } /* Now this is just like a normal syscall. */ do_syscall_32_irqs_on(regs); + + /* XEN PV guests always use IRET path */ + if (static_cpu_has(X86_FEATURE_XENPV)) + return false; + + /* CS and SS must match values set in MSR_STAR */ + if (unlikely(regs->cs != __USER32_CS || regs->ss != __USER_DS)) + return false; + + /* EIP must point to the VDSO landing pad */ + if (unlikely(regs->ip != landing_pad)) + return false; + + /* The TF, RF, and VM flags must be restored with IRET */ + if (unlikely(regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM))) + return false; + + /* Return with SYSEXIT/SYSRETL */ return true; } -/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ -__visible noinstr long do_fast_syscall_32(struct pt_regs *regs) +__visible noinstr bool do_fast_syscall_32(struct pt_regs *regs) { - /* - * Called using the internal vDSO SYSENTER/SYSCALL32 calling - * convention. Adjust regs so it looks like we entered using int80. - */ - unsigned long landing_pad = (unsigned long)current->mm->context.vdso + - vdso_image_32.sym_int80_landing_pad; bool success; check_user_regs(regs); - /* - * SYSENTER loses EIP, and even SYSCALL32 needs us to skip forward - * so that 'regs->ip -= 2' lands back on an int $0x80 instruction. - * Fix it up. - */ - regs->ip = landing_pad; - enter_from_user_mode(); instrumentation_begin(); @@ -559,42 +578,10 @@ __visible noinstr long do_fast_syscall_32(struct pt_regs *regs) instrumentation_end(); exit_to_user_mode(); - /* If it failed, keep it simple: use IRET. */ - if (!success) - return 0; - -#ifdef CONFIG_X86_64 - /* - * Opportunistic SYSRETL: if possible, try to return using SYSRETL. - * SYSRETL is available on all 64-bit CPUs, so we don't need to - * bother with SYSEXIT. - * - * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, - * because the ECX fixup above will ensure that this is essentially - * never the case. - */ - return regs->cs == __USER32_CS && regs->ss == __USER_DS && - regs->ip == landing_pad && - (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF)) == 0; -#else - /* - * Opportunistic SYSEXIT: if possible, try to return using SYSEXIT. - * - * Unlike 64-bit opportunistic SYSRET, we can't check that CX == IP, - * because the ECX fixup above will ensure that this is essentially - * never the case. - * - * We don't allow syscalls at all from VM86 mode, but we still - * need to check VM, because we might be returning from sys_vm86. - */ - return regs->cs == __USER_CS && regs->ss == __USER_DS && - regs->ip == landing_pad && - (regs->flags & (X86_EFLAGS_RF | X86_EFLAGS_TF | X86_EFLAGS_VM)) == 0; -#endif + return success; } -/* Returns 0 to return using IRET or 1 to return using SYSEXIT/SYSRETL. */ -__visible noinstr long do_SYSENTER_32(struct pt_regs *regs) +__visible noinstr bool do_SYSENTER_32(struct pt_regs *regs) { /* SYSENTER loses RSP, but the vDSO saved it in RBP. */ regs->sp = regs->bp; diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S index 2d0bd5d5f0328..24979fc747df0 100644 --- a/arch/x86/entry/entry_32.S +++ b/arch/x86/entry/entry_32.S @@ -965,9 +965,9 @@ SYM_FUNC_START(entry_SYSENTER_32) movl %esp, %eax call do_SYSENTER_32 - /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz .Lsyscall_32_done", \ - "jmp .Lsyscall_32_done", X86_FEATURE_XENPV + + testb %al, %al + jz .Lsyscall_32_done STACKLEAK_ERASE diff --git a/arch/x86/entry/entry_64_compat.S b/arch/x86/entry/entry_64_compat.S index 541fdaf640453..d8ac70ebb5f9e 100644 --- a/arch/x86/entry/entry_64_compat.S +++ b/arch/x86/entry/entry_64_compat.S @@ -137,9 +137,10 @@ SYM_INNER_LABEL(entry_SYSENTER_compat_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi call do_SYSENTER_32 - /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ - "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV + + testb %al, %al + jz swapgs_restore_regs_and_return_to_usermode + jmp sysret32_from_system_call .Lsysenter_fix_flags: @@ -252,9 +253,9 @@ SYM_INNER_LABEL(entry_SYSCALL_compat_after_hwframe, SYM_L_GLOBAL) movq %rsp, %rdi call do_fast_syscall_32 - /* XEN PV guests always use IRET path */ - ALTERNATIVE "testl %eax, %eax; jz swapgs_restore_regs_and_return_to_usermode", \ - "jmp swapgs_restore_regs_and_return_to_usermode", X86_FEATURE_XENPV + + testb %al, %al + jz swapgs_restore_regs_and_return_to_usermode /* Opportunistic SYSRET */ sysret32_from_system_call: diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index 6669164abadcb..da29d2cfeee83 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -135,6 +135,7 @@ #define __KERNEL_DS (GDT_ENTRY_KERNEL_DS*8) #define __USER_DS (GDT_ENTRY_DEFAULT_USER_DS*8 + 3) #define __USER_CS (GDT_ENTRY_DEFAULT_USER_CS*8 + 3) +#define __USER32_CS __USER_CS #define __ESPFIX_SS (GDT_ENTRY_ESPFIX_SS*8) /* segment for calling fn: */ diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h index 766f9b9736185..e3e2f255bdb67 100644 --- a/arch/x86/include/asm/syscall.h +++ b/arch/x86/include/asm/syscall.h @@ -162,7 +162,7 @@ static inline int syscall_get_arch(struct task_struct *task) bool do_syscall_64(unsigned long nr, struct pt_regs *regs); void do_int80_syscall_32(struct pt_regs *regs); -long do_fast_syscall_32(struct pt_regs *regs); +bool do_fast_syscall_32(struct pt_regs *regs); #endif /* CONFIG_X86_32 */ -- 2.26.2