If arch code calls the wrong kernel entry helpers, syscall entries and exits can get out of sync. Add a new field to task_struct to track the syscall state and validate that it transitions correctly.
Signed-off-by: Andy Lutomirski <l...@kernel.org> --- I'm not in love with this patch. I'm imagining moving TS_COMPAT and such into the new kentry_syscall_state field. What do you all think? include/linux/entry-common.h | 11 +++++++++++ include/linux/sched.h | 1 + init/init_task.c | 9 +++++++++ kernel/entry/common.c | 25 ++++++++++++++++++++++++- 4 files changed, 45 insertions(+), 1 deletion(-) diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index ca86a00abe86..c2463b6b71fe 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -60,6 +60,17 @@ _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \ ARCH_EXIT_TO_USER_MODE_WORK) +/* + * task_struct::kentry_syscall_state + * + * kentry_syscall_state is reset to zero on syscall exit. For efficiency + * reasons, if CONFIG_PROVE_LOCKING=n, kentry_syscall_state is permitted + * to remain 0 even inside a syscall. + */ +#ifdef CONFIG_PROVE_LOCKING +# define KENTRY_SYSCALL_STATE_IN_SYSCALL 1 +#endif + /** * arch_check_user_regs - Architecture specific sanity check for user mode regs * @regs: Pointer to currents pt_regs diff --git a/include/linux/sched.h b/include/linux/sched.h index 6e3a5eeec509..691057a3b48d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1002,6 +1002,7 @@ struct task_struct { #endif struct seccomp seccomp; struct syscall_user_dispatch syscall_dispatch; + u32 kentry_syscall_state; /* Thread group tracking: */ u64 parent_exec_id; diff --git a/init/init_task.c b/init/init_task.c index 8a992d73e6fb..91050c4f0bb3 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -12,6 +12,7 @@ #include <linux/audit.h> #include <linux/numa.h> #include <linux/scs.h> +#include <linux/entry-common.h> #include <linux/uaccess.h> @@ -212,6 +213,14 @@ struct task_struct init_task #ifdef CONFIG_SECCOMP .seccomp = { .filter_count = ATOMIC_INIT(0) }, #endif +#ifdef CONFIG_PROVE_LOCKING + /* + * The init task, and kernel threads in general, are considered + * to be "in a syscall". This way they can execve() and then exit + * the supposed syscall that they were in to go to user mode. + */ + .kentry_syscall_state = KENTRY_SYSCALL_STATE_IN_SYSCALL, +#endif }; EXPORT_SYMBOL(init_task); diff --git a/kernel/entry/common.c b/kernel/entry/common.c index 378341642f94..7e971b866ad2 100644 --- a/kernel/entry/common.c +++ b/kernel/entry/common.c @@ -83,7 +83,16 @@ static long syscall_trace_enter(struct pt_regs *regs, long syscall, static __always_inline long __syscall_enter_from_user_work(struct pt_regs *regs, long syscall) { - unsigned long work = READ_ONCE(current_thread_info()->syscall_work); + unsigned long work; + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + WARN_ONCE(current->kentry_syscall_state, + "entering syscall %ld while already in a syscall", + syscall); + current->kentry_syscall_state = KENTRY_SYSCALL_STATE_IN_SYSCALL; + } + + work = READ_ONCE(current_thread_info()->syscall_work); if (work & SYSCALL_WORK_ENTER) syscall = syscall_trace_enter(regs, syscall, work); @@ -195,6 +204,12 @@ static void exit_to_user_mode_prepare(struct pt_regs *regs) { unsigned long ti_work = READ_ONCE(current_thread_info()->flags); + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + WARN_ONCE(current->kentry_syscall_state & + KENTRY_SYSCALL_STATE_IN_SYSCALL, + "exiting to user mode while in syscall context"); + } + lockdep_assert_irqs_disabled(); if (unlikely(ti_work & EXIT_TO_USER_MODE_WORK)) @@ -282,6 +297,14 @@ static void syscall_exit_to_user_mode_prepare(struct pt_regs *regs) */ if (unlikely(work & SYSCALL_WORK_EXIT)) syscall_exit_work(regs, work); + + if (IS_ENABLED(CONFIG_PROVE_LOCKING)) { + WARN_ONCE(!(current->kentry_syscall_state & + KENTRY_SYSCALL_STATE_IN_SYSCALL), + "exiting syscall %lu without entering first", nr); + } + + current->kentry_syscall_state = 0; } static __always_inline void __syscall_exit_to_user_mode_work(struct pt_regs *regs) -- 2.29.2