Setting TS_COMPAT in ptrace is wrong: if we happen to do it during
syscall entry, then we'll confuse seccomp and audit.  (The former
isn't a security problem: seccomp is currently entirely insecure if a
malicious ptracer is attached.)  As a minimal fix, this patch adds a
new flag TS_I386_REGS_POKED that handles the ptrace special case.

Cc: Pedro Alves <pal...@redhat.com>
Cc: Kees Cook <keesc...@chromium.org>
Acked-by: Oleg Nesterov <o...@redhat.com>
Signed-off-by: Andy Lutomirski <l...@kernel.org>
---

This is identical to v3 except that I added Oleg's ack and I split
it out from the rest of the series.  The other patches need some work
before I'll be fully comfortable with them.

This will have a trivial conflict with -mm.

 arch/x86/entry/common.c            |  6 +++++-
 arch/x86/include/asm/syscall.h     |  5 +----
 arch/x86/include/asm/thread_info.h |  3 +++
 arch/x86/kernel/ptrace.c           | 15 +++++++++------
 arch/x86/kernel/signal.c           | 26 ++++++++++++++++++++++++--
 5 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index ec138e538c44..0db497a8ff19 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -270,8 +270,12 @@ __visible inline void prepare_exit_to_usermode(struct 
pt_regs *regs)
         * handling, because syscall restart has a fixup for compat
         * syscalls.  The fixup is exercised by the ptrace_syscall_32
         * selftest.
+        *
+        * We also need to clear TS_REGS_POKED_I386: the 32-bit tracer
+        * special case only applies after poking regs and before the
+        * very next return to user mode.
         */
-       ti->status &= ~TS_COMPAT;
+       ti->status &= ~(TS_COMPAT|TS_I386_REGS_POKED);
 #endif
 
        user_enter();
diff --git a/arch/x86/include/asm/syscall.h b/arch/x86/include/asm/syscall.h
index 999b7cd2e78c..4e23dd15c661 100644
--- a/arch/x86/include/asm/syscall.h
+++ b/arch/x86/include/asm/syscall.h
@@ -60,7 +60,7 @@ static inline long syscall_get_error(struct task_struct *task,
         * TS_COMPAT is set for 32-bit syscall entries and then
         * remains set until we return to user mode.
         */
-       if (task_thread_info(task)->status & TS_COMPAT)
+       if (task_thread_info(task)->status & (TS_COMPAT|TS_I386_REGS_POKED))
                /*
                 * Sign-extend the value so (int)-EFOO becomes (long)-EFOO
                 * and will match correctly in comparisons.
@@ -239,9 +239,6 @@ static inline int syscall_get_arch(void)
         * TS_COMPAT is set for 32-bit syscall entry and then
         * remains set until we return to user mode.
         *
-        * TIF_IA32 tasks should always have TS_COMPAT set at
-        * system call time.
-        *
         * x32 tasks should be considered AUDIT_ARCH_X86_64.
         */
        if (task_thread_info(current)->status & TS_COMPAT)
diff --git a/arch/x86/include/asm/thread_info.h 
b/arch/x86/include/asm/thread_info.h
index 30c133ac05cd..4bca518d11f4 100644
--- a/arch/x86/include/asm/thread_info.h
+++ b/arch/x86/include/asm/thread_info.h
@@ -228,6 +228,9 @@ static inline unsigned long current_stack_pointer(void)
  * have to worry about atomic accesses.
  */
 #define TS_COMPAT              0x0002  /* 32bit syscall active (64BIT)*/
+#ifdef CONFIG_COMPAT
+#define TS_I386_REGS_POKED     0x0004  /* regs poked by 32-bit ptracer */
+#endif
 #define TS_RESTORE_SIGMASK     0x0008  /* restore signal mask in do_signal() */
 
 #ifndef __ASSEMBLY__
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 600edd225e81..f79576a541ff 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -923,15 +923,18 @@ static int putreg32(struct task_struct *child, unsigned 
regno, u32 value)
 
        case offsetof(struct user32, regs.orig_eax):
                /*
-                * A 32-bit debugger setting orig_eax means to restore
-                * the state of the task restarting a 32-bit syscall.
-                * Make sure we interpret the -ERESTART* codes correctly
-                * in case the task is not actually still sitting at the
-                * exit from a 32-bit syscall with TS_COMPAT still set.
+                * Warning: bizarre corner case fixup here.  A 32-bit
+                * debugger setting orig_eax to -1 wants to disable
+                * syscall restart.  Make sure that the syscall
+                * restart code sign-extends orig_ax.  Also make sure
+                * we interpret the -ERESTART* codes correctly if
+                * loaded into regs->ax in case the task is not
+                * actually still sitting at the exit from a 32-bit
+                * syscall with TS_COMPAT still set.
                 */
                regs->orig_ax = value;
                if (syscall_get_nr(child, regs) >= 0)
-                       task_thread_info(child)->status |= TS_COMPAT;
+                       task_thread_info(child)->status |= TS_I386_REGS_POKED;
                break;
 
        case offsetof(struct user32, regs.eflags):
diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c
index 22cc2f9f8aec..6b952e1d8db8 100644
--- a/arch/x86/kernel/signal.c
+++ b/arch/x86/kernel/signal.c
@@ -760,8 +760,30 @@ handle_signal(struct ksignal *ksig, struct pt_regs *regs)
 
 static inline unsigned long get_nr_restart_syscall(const struct pt_regs *regs)
 {
-#ifdef CONFIG_X86_64
-       if (in_ia32_syscall())
+       /*
+        * This function is fundamentally broken as currently
+        * implemented.
+        *
+        * The idea is that we want to trigger a call to the
+        * restart_block() syscall and that we want in_ia32_syscall(),
+        * in_x32_syscall(), etc.  to match whatever they were in the
+        * syscall being restarted.  We assume that the syscall
+        * instruction at (regs->ip - 2) matches whatever syscall
+        * instruction we used to enter in the first place.
+        *
+        * The problem is that we can get here when ptrace pokes
+        * syscall-like values into regs even if we're not in a syscall
+        * at all.
+        *
+        * For now, we maintain historical behavior and guess based on
+        * stored state.  We could do better by saving the actual
+        * syscall arch in restart_block or (with caveats on x32) by
+        * checking if regs->ip points to 'int $0x80'.  The current
+        * behavior is incorrect if a tracer has a different bitness
+        * than the tracee.
+        */
+#ifdef CONFIG_IA32_EMULATION
+       if (current_thread_info()->status & (TS_COMPAT|TS_I386_REGS_POKED))
                return __NR_ia32_restart_syscall;
 #endif
 #ifdef CONFIG_X86_X32_ABI
-- 
2.7.4

Reply via email to