This is a second attempt to make the improvements from c6f2062935c8
("x86/signal/64: Fix SS handling for signals delivered to 64-bit
programs"), which was reverted by 51adbfbba5c6 ("x86/signal/64: Add
support for SS in the 64-bit signal context").

This adds two new uc_flags flags.  UC_SIGCONTEXT_SS will be set for
all 64-bit signals (including x32).  It indicates that the saved SS
field is valid and that the kernel supports the new behavior.

The goal is to fix a problems with signal handling in 64-bit tasks:
SS wasn't saved in the 64-bit signal context, making it awkward to
determine what SS was at the time of signal delivery and making it
impossible to return to a non-flat SS (as calling sigreturn clobbers
SS).

This also made it extremely difficult for 64-bit tasks to return to
fully-defined 16-bit contexts, because only the kernel can easily do
espfix64, but sigreturn was unable to set a non-flag SS:ESP.
(DOSEMU has a monstrous hack to partially work around this
limitation.)

If we could go back in time, the correct fix would be to make 64-bit
signals work just like 32-bit signals with respect to SS: save it
in signal context, reset it when delivering a signal, and restore
it in sigreturn.

Unfortunately, doing that (as I tried originally) breaks DOSEMU:
DOSEMU wouldn't reset the signal context's SS when clearing the LDT
and changing the saved CS to 64-bit mode, since it predates the SS
context field existing in the first place.

This patch is a bit more complicated, and it tries to balance a
bunch of goals.  It makes most cases of changing ucontext->ss during
signal handling work as expected.

I do this by special-casing the interesting case.  On sigreturn,
ucontext->ss will be honored by default, unless the ucontext was
created from scratch by an old program and had a 64-bit CS
(unfortunately, CRIU can do this) or was the result of changing a
32-bit signal context to 64-bit without resetting SS (as DOSEMU
does).

For the benefit of new 64-bit software that uses segmentation (new
versions of DOSEMU might), the new behavior can be detected with a
new ucontext flag UC_SIGCONTEXT_SS.

To avoid compilation issues, __pad0 is left as an alias for ss in
ucontext.

The nitty-gritty details are documented in the header file.

This patch also re-enables the sigreturn_64 and ldt_gdt_64 selftests,
as the kernel change allows both of them to pass.

Cc: Stas Sergeev <s...@list.ru>
Cc: Linus Torvalds <torva...@linux-foundation.org>
Cc: Cyrill Gorcunov <gorcu...@gmail.com>
Cc: Pavel Emelyanov <xe...@parallels.com>
Tested-by: Stas Sergeev <s...@list.ru>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 arch/x86/include/asm/sighandling.h     |  1 -
 arch/x86/include/uapi/asm/sigcontext.h |  7 ++--
 arch/x86/include/uapi/asm/ucontext.h   | 54 ++++++++++++++++++++++++++---
 arch/x86/kernel/signal.c               | 63 ++++++++++++++++++++++++----------
 tools/testing/selftests/x86/Makefile   |  7 ++--
 5 files changed, 102 insertions(+), 30 deletions(-)

diff --git a/arch/x86/include/asm/sighandling.h 
b/arch/x86/include/asm/sighandling.h
index 89db46752a8f..452c88b8ad06 100644
--- a/arch/x86/include/asm/sighandling.h
+++ b/arch/x86/include/asm/sighandling.h
@@ -13,7 +13,6 @@
                         X86_EFLAGS_CF | X86_EFLAGS_RF)
 
 void signal_fault(struct pt_regs *regs, void __user *frame, char *where);
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc);
 int setup_sigcontext(struct sigcontext __user *sc, void __user *fpstate,
                     struct pt_regs *regs, unsigned long mask);
 
diff --git a/arch/x86/include/uapi/asm/sigcontext.h 
b/arch/x86/include/uapi/asm/sigcontext.h
index 47dae8150520..bb0dde737b59 100644
--- a/arch/x86/include/uapi/asm/sigcontext.h
+++ b/arch/x86/include/uapi/asm/sigcontext.h
@@ -256,7 +256,7 @@ struct sigcontext_64 {
        __u16                           cs;
        __u16                           gs;
        __u16                           fs;
-       __u16                           __pad0;
+       __u16                           ss;
        __u64                           err;
        __u64                           trapno;
        __u64                           oldmask;
@@ -362,7 +362,10 @@ struct sigcontext {
         */
        __u16                           gs;
        __u16                           fs;
-       __u16                           __pad0;
+       union {
+               __u16                   ss;     /* If UC_SAVED_SS */
+               __u16                   __pad0; /* If !UC_SAVED_SS */
+       };
        __u64                           err;
        __u64                           trapno;
        __u64                           oldmask;
diff --git a/arch/x86/include/uapi/asm/ucontext.h 
b/arch/x86/include/uapi/asm/ucontext.h
index b7c29c8017f2..9f1b1c21e541 100644
--- a/arch/x86/include/uapi/asm/ucontext.h
+++ b/arch/x86/include/uapi/asm/ucontext.h
@@ -1,11 +1,55 @@
 #ifndef _ASM_X86_UCONTEXT_H
 #define _ASM_X86_UCONTEXT_H
 
-#define UC_FP_XSTATE   0x1     /* indicates the presence of extended state
-                                * information in the memory layout pointed
-                                * by the fpstate pointer in the ucontext's
-                                * sigcontext struct (uc_mcontext).
-                                */
+/*
+ * Indicates the presence of extended state information in the memory
+ * layout pointed by the fpstate pointer in the ucontext's sigcontext
+ * struct (uc_mcontext).
+ */
+#define UC_FP_XSTATE   0x1
+
+#ifdef __x86_64__
+/*
+ * UC_SIGCONTEXT_SS will be set when delivering 64-bit or x32 signals on
+ * kernels that save SS in the sigcontext.  All kernels that set
+ * UC_SIGCONTEXT_SS will correctly restore at least the low 32 bits of esp
+ * regardless of SS (i.e. they implement espfix).
+ *
+ * Kernels that set UC_SIGCONTEXT_SS will also set UC_STRICT_RESTORE_SS
+ * when delivering a signal that came from 64-bit code.
+ *
+ * Sigreturn restores SS as follows:
+ *
+ * if (saved SS is valid || UC_STRICT_RESTORE_SS is set ||
+ *     saved CS is not 64-bit)
+ *         new SS = saved SS  (will fail IRET and signal if invalid)
+ * else
+ *         new SS = a flat 32-bit data segment
+ *
+ *
+ * This behavior serves three purposes:
+ *
+ * - Legacy programs that construct a 64-bit sigcontext from scratch
+ *   with zero or garbage in the SS slot (e.g. old CRIU) and call
+ *   sigreturn will still work.
+ *
+ * - Old DOSEMU versions sometimes catch a signal from a segmented
+ *   context, delete the old SS segment (with modify_ldt), and change
+ *   the saved CS to a 64-bit segment.  These DOSEMU versions expect
+ *   sigreturn to send them back to 64-bit mode without killing them,
+ *   despite the fact that the SS selector when the signal was raised is
+ *   no longer valid.  UC_STRICT_RESTORE_SS will be clear, so the kernel
+ *   will fix up SS for these DOSEMU versions.
+ *
+ * - Old and new programs that catch a signal and return without
+ *   modifying the saved context will end up in exactly the state they
+ *   started in, even if they were running in a segmented context when
+ *   the signal was raised..  Old kernels would lose track of the
+ *   previous SS value.
+ */
+#define UC_SIGCONTEXT_SS       0x2
+#define UC_STRICT_RESTORE_SS   0x4
+#endif
 
 #include <asm-generic/ucontext.h>
 
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index bb3e4208d90d..32725f6a2932 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -90,7 +90,9 @@ static void force_valid_ss(struct pt_regs *regs)
 }
 #endif
 
-int restore_sigcontext(struct pt_regs *regs, struct sigcontext __user *sc)
+static int restore_sigcontext(struct pt_regs *regs,
+                             struct sigcontext __user *sc,
+                             unsigned long uc_flags)
 {
        unsigned long buf_val;
        void __user *buf;
@@ -123,15 +125,18 @@ int restore_sigcontext(struct pt_regs *regs, struct 
sigcontext __user *sc)
                COPY(r15);
 #endif /* CONFIG_X86_64 */
 
-#ifdef CONFIG_X86_32
                COPY_SEG_CPL3(cs);
                COPY_SEG_CPL3(ss);
-#else /* !CONFIG_X86_32 */
-               /* Kernel saves and restores only the CS segment register on 
signals,
-                * which is the bare minimum needed to allow mixed 32/64-bit 
code.
-                * App's signal handler can save/restore other segments if 
needed. */
-               COPY_SEG_CPL3(cs);
-#endif /* CONFIG_X86_32 */
+
+#ifdef CONFIG_X86_64
+               /*
+                * Fix up SS if needed for the benefit of old DOSEMU and
+                * CRIU.
+                */
+               if (unlikely(!(uc_flags & UC_STRICT_RESTORE_SS) &&
+                            user_64bit_mode(regs)))
+                       force_valid_ss(regs);
+#endif
 
                get_user_ex(tmpflags, &sc->flags);
                regs->flags = (regs->flags & ~FIX_EFLAGS) | (tmpflags & 
FIX_EFLAGS);
@@ -194,6 +199,7 @@ int setup_sigcontext(struct sigcontext __user *sc, void 
__user *fpstate,
                put_user_ex(regs->cs, &sc->cs);
                put_user_ex(0, &sc->gs);
                put_user_ex(0, &sc->fs);
+               put_user_ex(regs->ss, &sc->ss);
 #endif /* CONFIG_X86_32 */
 
                put_user_ex(fpstate, &sc->fpstate);
@@ -432,6 +438,21 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
        return 0;
 }
 #else /* !CONFIG_X86_32 */
+static unsigned long frame_uc_flags(struct pt_regs *regs)
+{
+       unsigned long flags;
+
+       if (cpu_has_xsave)
+               flags = UC_FP_XSTATE | UC_SIGCONTEXT_SS;
+       else
+               flags = UC_SIGCONTEXT_SS;
+
+       if (likely(user_64bit_mode(regs)))
+               flags |= UC_STRICT_RESTORE_SS;
+
+       return flags;
+}
+
 static int __setup_rt_frame(int sig, struct ksignal *ksig,
                            sigset_t *set, struct pt_regs *regs)
 {
@@ -451,10 +472,7 @@ static int __setup_rt_frame(int sig, struct ksignal *ksig,
 
        put_user_try {
                /* Create the ucontext.  */
-               if (cpu_has_xsave)
-                       put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-               else
-                       put_user_ex(0, &frame->uc.uc_flags);
+               put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
                put_user_ex(0, &frame->uc.uc_link);
                save_altstack_ex(&frame->uc.uc_stack, regs->sp);
 
@@ -536,10 +554,7 @@ static int x32_setup_rt_frame(struct ksignal *ksig,
 
        put_user_try {
                /* Create the ucontext.  */
-               if (cpu_has_xsave)
-                       put_user_ex(UC_FP_XSTATE, &frame->uc.uc_flags);
-               else
-                       put_user_ex(0, &frame->uc.uc_flags);
+               put_user_ex(frame_uc_flags(regs), &frame->uc.uc_flags);
                put_user_ex(0, &frame->uc.uc_link);
                compat_save_altstack_ex(&frame->uc.uc_stack, regs->sp);
                put_user_ex(0, &frame->uc.uc__pad0);
@@ -601,7 +616,11 @@ asmlinkage unsigned long sys_sigreturn(void)
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->sc))
+       /*
+        * x86_32 has no uc_flags bits relevant to restore_sigcontext.
+        * Save a few cycles by skipping the __get_user.
+        */
+       if (restore_sigcontext(regs, &frame->sc, 0))
                goto badframe;
        return regs->ax;
 
@@ -617,16 +636,19 @@ asmlinkage long sys_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe __user *frame;
        sigset_t set;
+       unsigned long uc_flags;
 
        frame = (struct rt_sigframe __user *)(regs->sp - sizeof(long));
        if (!access_ok(VERIFY_READ, frame, sizeof(*frame)))
                goto badframe;
        if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
                goto badframe;
+       if (__get_user(uc_flags, &frame->uc.uc_flags))
+               goto badframe;
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
                goto badframe;
 
        if (restore_altstack(&frame->uc.uc_stack))
@@ -810,6 +832,7 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
        struct pt_regs *regs = current_pt_regs();
        struct rt_sigframe_x32 __user *frame;
        sigset_t set;
+       unsigned long uc_flags;
 
        frame = (struct rt_sigframe_x32 __user *)(regs->sp - 8);
 
@@ -817,10 +840,12 @@ asmlinkage long sys32_x32_rt_sigreturn(void)
                goto badframe;
        if (__copy_from_user(&set, &frame->uc.uc_sigmask, sizeof(set)))
                goto badframe;
+       if (__get_user(uc_flags, &frame->uc.uc_flags))
+               goto badframe;
 
        set_current_blocked(&set);
 
-       if (restore_sigcontext(regs, &frame->uc.uc_mcontext))
+       if (restore_sigcontext(regs, &frame->uc.uc_mcontext, uc_flags))
                goto badframe;
 
        if (compat_restore_altstack(&frame->uc.uc_stack))
diff --git a/tools/testing/selftests/x86/Makefile 
b/tools/testing/selftests/x86/Makefile
index d0c473f65850..f5a02f19546c 100644
--- a/tools/testing/selftests/x86/Makefile
+++ b/tools/testing/selftests/x86/Makefile
@@ -4,10 +4,11 @@ include ../lib.mk
 
 .PHONY: all all_32 all_64 warn_32bit_failure clean
 
-TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
ptrace_syscall
-TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault sigreturn 
test_syscall_vdso unwind_vdso \
+TARGETS_C_BOTHBITS := single_step_syscall sysret_ss_attrs syscall_nt 
ptrace_syscall \
+       sigreturn \
+       ldt_gdt
+TARGETS_C_32BIT_ONLY := entry_from_vm86 syscall_arg_fault test_syscall_vdso 
unwind_vdso \
                        test_FCMOV test_FCOMI test_FISTTP \
-                       ldt_gdt \
                        vdso_restorer
 
 TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY)
-- 
2.5.0

Reply via email to