If arch code calls the wrong kernel entry helpers, syscall entries and
exits can get out of sync.  Add a new field to task_struct to track the
syscall state and validate that it transitions correctly.

Signed-off-by: Andy Lutomirski <l...@kernel.org>
---

I'm not in love with this patch.  I'm imagining moving TS_COMPAT and such
into the new kentry_syscall_state field.  What do you all think?

 include/linux/entry-common.h | 11 +++++++++++
 include/linux/sched.h        |  1 +
 init/init_task.c             |  9 +++++++++
 kernel/entry/common.c        | 25 ++++++++++++++++++++++++-
 4 files changed, 45 insertions(+), 1 deletion(-)

diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index ca86a00abe86..c2463b6b71fe 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -60,6 +60,17 @@
         _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL |  \
         ARCH_EXIT_TO_USER_MODE_WORK)
 
+/*
+ * task_struct::kentry_syscall_state
+ *
+ * kentry_syscall_state is reset to zero on syscall exit.  For efficiency
+ * reasons, if CONFIG_PROVE_LOCKING=n, kentry_syscall_state is permitted
+ * to remain 0 even inside a syscall.
+ */
+#ifdef CONFIG_PROVE_LOCKING
+# define KENTRY_SYSCALL_STATE_IN_SYSCALL               1
+#endif
+
 /**
  * arch_check_user_regs - Architecture specific sanity check for user mode regs
  * @regs:      Pointer to currents pt_regs
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 6e3a5eeec509..691057a3b48d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1002,6 +1002,7 @@ struct task_struct {
 #endif
        struct seccomp                  seccomp;
        struct syscall_user_dispatch    syscall_dispatch;
+       u32                             kentry_syscall_state;
 
        /* Thread group tracking: */
        u64                             parent_exec_id;
diff --git a/init/init_task.c b/init/init_task.c
index 8a992d73e6fb..91050c4f0bb3 100644
--- a/init/init_task.c
+++ b/init/init_task.c
@@ -12,6 +12,7 @@
 #include <linux/audit.h>
 #include <linux/numa.h>
 #include <linux/scs.h>
+#include <linux/entry-common.h>
 
 #include <linux/uaccess.h>
 
@@ -212,6 +213,14 @@ struct task_struct init_task
 #ifdef CONFIG_SECCOMP
        .seccomp        = { .filter_count = ATOMIC_INIT(0) },
 #endif
+#ifdef CONFIG_PROVE_LOCKING
+       /*
+        * The init task, and kernel threads in general, are considered
+        * to be "in a syscall".  This way they can execve() and then exit
+        * the supposed syscall that they were in to go to user mode.
+        */
+       .kentry_syscall_state = KENTRY_SYSCALL_STATE_IN_SYSCALL,
+#endif
 };
 EXPORT_SYMBOL(init_task);
 
diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 378341642f94..7e971b866ad2 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -83,7 +83,16 @@ static long syscall_trace_enter(struct pt_regs *regs, long 
syscall,
 static __always_inline long
 __syscall_enter_from_user_work(struct pt_regs *regs, long syscall)
 {
-       unsigned long work = READ_ONCE(current_thread_info()->syscall_work);
+       unsigned long work;
+
+       if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+               WARN_ONCE(current->kentry_syscall_state,
+                         "entering syscall %ld while already in a syscall",
+                         syscall);
+               current->kentry_syscall_state = KENTRY_SYSCALL_STATE_IN_SYSCALL;
+       }
+
+       work = READ_ONCE(current_thread_info()->syscall_work);
 
        if (work & SYSCALL_WORK_ENTER)
                syscall = syscall_trace_enter(regs, syscall, work);
@@ -195,6 +204,12 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs)
 {
        unsigned long ti_work = READ_ONCE(current_thread_info()->flags);
 
+       if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+               WARN_ONCE(current->kentry_syscall_state &
+                         KENTRY_SYSCALL_STATE_IN_SYSCALL,
+                         "exiting to user mode while in syscall context");
+       }
+
        lockdep_assert_irqs_disabled();
 
        if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK))
@@ -282,6 +297,14 @@ static void syscall_exit_to_user_mode_prepare(struct 
pt_regs *regs)
         */
        if (unlikely(work & SYSCALL_WORK_EXIT))
                syscall_exit_work(regs, work);
+
+       if (IS_ENABLED(CONFIG_PROVE_LOCKING)) {
+               WARN_ONCE(!(current->kentry_syscall_state &
+                           KENTRY_SYSCALL_STATE_IN_SYSCALL),
+                         "exiting syscall %lu without entering first", nr);
+       }
+
+       current->kentry_syscall_state = 0;
 }
 
 static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs 
*regs)
-- 
2.29.2

Reply via email to