This is a second attempt to make the improvements from c6f2062935c8 ("x86/signal/64: Fix SS handling for signals delivered to 64-bit programs"), which was reverted by 51adbfbba5c6 ("x86/signal/64: Add support for SS in the 64-bit signal context").
This adds two new uc_flags flags. UC_SIGCONTEXT_SS will be set for all 64-bit signals (including x32). It indicates that the saved SS field is valid and that the kernel supports the new behavior. The goal is to fix a problems with signal handling in 64-bit tasks: SS wasn't saved in the 64-bit signal context, making it awkward to determine what SS was at the time of signal delivery and making it impossible to return to a non-flat SS (as calling sigreturn clobbers SS). This also made it extremely difficult for 64-bit tasks to return to fully-defined 16-bit contexts, because only the kernel can easily do espfix64, but sigreturn was unable to set a non-flag SS:ESP. (DOSEMU has a monstrous hack to partially work around this limitation.) If we could go back in time, the correct fix would be to make 64-bit signals work just like 32-bit signals with respect to SS: save it in signal context, reset it when delivering a signal, and restore it in sigreturn. Unfortunately, doing that (as I tried originally) breaks DOSEMU: DOSEMU wouldn't reset the signal context's SS when clearing the LDT and changing the saved CS to 64-bit mode, since it predates the SS context field existing in the first place. This patch is a bit more complicated, and it tries to balance a bunch of goals. It makes most cases of changing ucontext->ss during signal handling work as expected. I do this by special-casing the interesting case. On sigreturn, ucontext->ss will be honored by default, unless the ucontext was created from scratch by an old program and had a 64-bit CS (unfortunately, CRIU can do this) or was the result of changing a 32-bit signal context to 64-bit without resetting SS (as DOSEMU does). For the benefit of new 64-bit software that uses segmentation (new versions of DOSEMU might), the new behavior can be detected with a new ucontext flag UC_SIGCONTEXT_SS. To avoid compilation issues, __pad0 is left as an alias for ss in ucontext. The nitty-gritty details are documented in the header file. This patch also re-enables the sigreturn_64 and ldt_gdt_64 selftests, as the kernel change allows both of them to pass. Cc: Stas Sergeev <s...@list.ru> Cc: Linus Torvalds <torva...@linux-foundation.org> Cc: Cyrill Gorcunov <gorcu...@gmail.com> Cc: Pavel Emelyanov <xe...@parallels.com> Tested-by: Stas Sergeev <s...@list.ru> Signed-off-by: Andy Lutomirski <l...@kernel.org> --- arch/x86/include/asm/sighandling.h | 1 - arch/x86/include/uapi/asm/sigcontext.h | 7 ++-- arch/x86/include/uapi/asm/ucontext.h | 54 ++++++++++++++++++++++++++--- arch/x86/kernel/signal.c | 63 ++++++++++++++++++++++++---------- tools/testing/selftests/x86/Makefile | 7 ++-- 5 files changed, 102 insertions(+), 30 deletions(-) diff --git a/arch/x86/include/asm/sighandling.h b/arch/x86/include/asm/sighandling.h index 89db46752a8f..452c88b8ad06 100644 --- a/arch/x86/include/asm/sighandling.h +++ b/arch/x86/include/asm/sighandling.h @@ -13,7 +13,6 @@ X86_EFLAGS_CF | X86_EFLAGS_RF) void signal_fault(struct pt_regs *regs, void __user *frame, char *where); -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc); int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, struct pt_regs *regs, unsigned long mask); diff --git a/arch/x86/include/uapi/asm/sigcontext.h b/arch/x86/include/uapi/asm/sigcontext.h index 47dae8150520..bb0dde737b59 100644 --- a/arch/x86/include/uapi/asm/sigcontext.h +++ b/arch/x86/include/uapi/asm/sigcontext.h @@ -256,7 +256,7 @@ struct sigcontext_64 { __u16 cs; __u16 gs; __u16 fs; - __u16 __pad0; + __u16 ss; __u64 err; __u64 trapno; __u64 oldmask; @@ -362,7 +362,10 @@ struct sigcontext { */ __u16 gs; __u16 fs; - __u16 __pad0; + union { + __u16 ss; /* If UC_SAVED_SS */ + __u16 __pad0; /* If !UC_SAVED_SS */ + }; __u64 err; __u64 trapno; __u64 oldmask; diff --git a/arch/x86/include/uapi/asm/ucontext.h b/arch/x86/include/uapi/asm/ucontext.h index b7c29c8017f2..9f1b1c21e541 100644 --- a/arch/x86/include/uapi/asm/ucontext.h +++ b/arch/x86/include/uapi/asm/ucontext.h @@ -1,11 +1,55 @@ #ifndef _ASM_X86_UCONTEXT_H #define _ASM_X86_UCONTEXT_H -#define UC_FP_XSTATE 0x1 /* indicates the presence of extended state - * information in the memory layout pointed - * by the fpstate pointer in the ucontext's - * sigcontext struct (uc_mcontext). - */ +/* + * Indicates the presence of extended state information in the memory + * layout pointed by the fpstate pointer in the ucontext's sigcontext + * struct (uc_mcontext). + */ +#define UC_FP_XSTATE 0x1 + +#ifdef __x86_64__ +/* + * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on + * kernels that save SS in the sigcontext. All kernels that set + * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp + * regardless of SS (i.e. they implement espfix). + * + * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS + * when delivering a signal that came from 64-bit code. + * + * Sigreturn restores SS as follows: + * + * if (saved SS is valid || UC_STRICT_RESTORE_SS is set || + * saved CS is not 64-bit) + * new SS = saved SS (will fail IRET and signal if invalid) + * else + * new SS = a flat 32-bit data segment + * + * + * This behavior serves three purposes: + * + * - Legacy programs that construct a 64-bit sigcontext from scratch + * with zero or garbage in the SS slot (e.g. old CRIU) and call + * sigreturn will still work. + * + * - Old DOSEMU versions sometimes catch a signal from a segmented + * context, delete the old SS segment (with modify_ldt), and change + * the saved CS to a 64-bit segment. These DOSEMU versions expect + * sigreturn to send them back to 64-bit mode without killing them, + * despite the fact that the SS selector when the signal was raised is + * no longer valid. UC_STRICT_RESTORE_SS will be clear, so the kernel + * will fix up SS for these DOSEMU versions. + * + * - Old and new programs that catch a signal and return without + * modifying the saved context will end up in exactly the state they + * started in, even if they were running in a segmented context when + * the signal was raised.. Old kernels would lose track of the + * previous SS value. + */ +#define UC_SIGCONTEXT_SS 0x2 +#define UC_STRICT_RESTORE_SS 0x4 +#endif #include <asm-generic/ucontext.h> diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index bb3e4208d90d..32725f6a2932 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -90,7 +90,9 @@ static void force_valid_ss(struct pt_regs *regs) } #endif -int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) +static int restore_sigcontext(struct pt_regs *regs, + struct sigcontext __user *sc, + unsigned long uc_flags) { unsigned long buf_val; void __user *buf; @@ -123,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc) COPY(r15); #endif /* CONFIG_X86_64 */ -#ifdef CONFIG_X86_32 COPY_SEG_CPL3(cs); COPY_SEG_CPL3(ss); -#else /* !CONFIG_X86_32 */ - /* Kernel saves and restores only the CS segment register on signals, - * which is the bare minimum needed to allow mixed 32/64-bit code. - * App's signal handler can save/restore other segments if needed. */ - COPY_SEG_CPL3(cs); -#endif /* CONFIG_X86_32 */ + +#ifdef CONFIG_X86_64 + /* + * Fix up SS if needed for the benefit of old DOSEMU and + * CRIU. + */ + if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) && + user_64bit_mode(regs))) + force_valid_ss(regs); +#endif get_user_ex(tmpflags, &sc->flags); regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & FIX_EFLAGS); @@ -194,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate, put_user_ex(regs->cs, &sc->cs); put_user_ex(0, &sc->gs); put_user_ex(0, &sc->fs); + put_user_ex(regs->ss, &sc->ss); #endif /* CONFIG_X86_32 */ put_user_ex(fpstate, &sc->fpstate); @@ -432,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, return 0; } #else /* !CONFIG_X86_32 */ +static unsigned long frame_uc_flags(struct pt_regs *regs) +{ + unsigned long flags; + + if (cpu_has_xsave) + flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS; + else + flags = UC_SIGCONTEXT_SS; + + if (likely(user_64bit_mode(regs))) + flags |= UC_STRICT_RESTORE_SS; + + return flags; +} + static int __setup_rt_frame(int sig, struct ksignal *ksig, sigset_t *set, struct pt_regs *regs) { @@ -451,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); save_altstack_ex(&frame->uc.uc_stack, regs->sp); @@ -536,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig, put_user_try { /* Create the ucontext. */ - if (cpu_has_xsave) - put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags); - else - put_user_ex(0, &frame->uc.uc_flags); + put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags); put_user_ex(0, &frame->uc.uc_link); compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp); put_user_ex(0, &frame->uc.uc__pad0); @@ -601,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void) set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->sc)) + /* + * x86_32 has no uc_flags bits relevant to restore_sigcontext. + * Save a few cycles by skipping the __get_user. + */ + if (restore_sigcontext(regs, &frame->sc, 0)) goto badframe; return regs->ax; @@ -617,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long)); if (!access_ok(VERIFY_READ, frame, sizeof(*frame))) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (restore_altstack(&frame->uc.uc_stack)) @@ -810,6 +832,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void) struct pt_regs *regs = current_pt_regs(); struct rt_sigframe_x32 __user *frame; sigset_t set; + unsigned long uc_flags; frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8); @@ -817,10 +840,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void) goto badframe; if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set))) goto badframe; + if (__get_user(uc_flags, &frame->uc.uc_flags)) + goto badframe; set_current_blocked(&set); - if (restore_sigcontext(regs, &frame->uc.uc_mcontext)) + if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags)) goto badframe; if (compat_restore_altstack(&frame->uc.uc_stack)) diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile index d0c473f65850..f5a02f19546c 100644 --- a/tools/testing/selftests/x86/Makefile +++ b/tools/testing/selftests/x86/Makefile @@ -4,10 +4,11 @@ include ../lib.mk .PHONY: all all_32 all_64 warn_32bit_failure clean -TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall -TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn test_syscall_vdso unwind_vdso \ +TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt ptrace_syscall \ + sigreturn \ + ldt_gdt +TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso unwind_vdso \ test_FCMOV test_FCOMI test_FISTTP \ - ldt_gdt \ vdso_restorer TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) -- 2.5.0