It's quite easy to mess up kentry calls.  Add debgging checks that kentry
transitions to and from user mode match up and that kentry_nmi_enter() and
kentry_nmi_exit() match up.

Checking full matching of kentry_enter() with kentry_exit() needs
per-task state.

Signed-off-by: Andy Lutomirski <l...@kernel.org>
---
 kernel/entry/common.c | 81 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 80 insertions(+), 1 deletion(-)

diff --git a/kernel/entry/common.c b/kernel/entry/common.c
index 4ba82c684189..f62934d761e3 100644
--- a/kernel/entry/common.c
+++ b/kernel/entry/common.c
@@ -11,9 +11,65 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/syscalls.h>
 
+/*
+ * kentry_cpu_depth is 0 in user mode, 1 in normal kernel mode, and
+ * 1 + n * KENTRY_DEPTH_NMI in kentry_nmi_enter() mode.  We can't
+ * use a percpu variable to match up kentry_enter() from kernel mode
+ * with the corresponding kentry_exit() because tasks may schedule inside
+ * nested kentry_enter() regions.
+ */
+#define KENTRY_CPU_DEPTH_NMI   1024UL
+
+#ifdef CONFIG_DEBUG_ENTRY
+
+/*
+ * Extra safe WARN_ONCE.  Per-arch optimized WARN_ONCE() implementations
+ * might go through the low-level entry and kentry code even before noticing
+ * that the warning already fired, which could result in recursive warnings.
+ * This carefully avoids any funny business once a given warning has fired.
+ */
+#define DEBUG_ENTRY_WARN_ONCE(condition, format...) ({         \
+       static bool __section(".data.once") __warned;           \
+       int __ret_warn_once = !!(condition);                    \
+                                                               \
+       if (unlikely(__ret_warn_once && !READ_ONCE(__warned))) {\
+               WRITE_ONCE(__warned, true);                     \
+               WARN(1, format);                                \
+       }                                                       \
+       unlikely(__ret_warn_once);                              \
+})
+
+
+static DEFINE_PER_CPU(unsigned int, kentry_cpu_depth) = 1UL;
+
+static __always_inline void kentry_cpu_depth_add(unsigned int n)
+{
+       this_cpu_add(kentry_cpu_depth, n);
+}
+
+static void kentry_cpu_depth_check(unsigned int n)
+{
+       DEBUG_ENTRY_WARN_ONCE(this_cpu_read(kentry_cpu_depth) < n, "kentry 
depth underflow");
+}
+
+static __always_inline void kentry_cpu_depth_sub(unsigned int n)
+{
+       this_cpu_sub(kentry_cpu_depth, n);
+}
+#else
+
+#define DEBUG_ENTRY_WARN_ONCE(condition, format...) do {} while (0)
+
+static __always_inline void kentry_cpu_depth_add(unsigned int n) {}
+static void kentry_cpu_depth_check(unsigned int n) {}
+static __always_inline void kentry_cpu_depth_sub(unsigned int n) {}
+
+#endif
+
 /* See comment for enter_from_user_mode() in entry-common.h */
 static __always_inline void __enter_from_user_mode(struct pt_regs *regs)
 {
+       kentry_cpu_depth_add(1);
        arch_check_user_regs(regs);
        lockdep_hardirqs_off(CALLER_ADDR0);
 
@@ -22,6 +78,14 @@ static __always_inline void __enter_from_user_mode(struct 
pt_regs *regs)
 
        instrumentation_begin();
        trace_hardirqs_off_finish();
+
+#ifdef CONFIG_DEBUG_ENTRY
+       DEBUG_ENTRY_WARN_ONCE(
+               this_cpu_read(kentry_cpu_depth) != 1,
+               "kentry: __enter_from_user_mode() called while kentry thought 
the CPU was in the kernel (%u)",
+               this_cpu_read(kentry_cpu_depth));
+#endif
+
        instrumentation_end();
 }
 
@@ -99,6 +163,11 @@ long kentry_syscall_begin(struct pt_regs *regs, long 
syscall)
 static __always_inline void __exit_to_user_mode(void)
 {
        instrumentation_begin();
+#ifdef CONFIG_DEBUG_ENTRY
+       DEBUG_ENTRY_WARN_ONCE(this_cpu_read(kentry_cpu_depth) != 1,
+                             "__exit_to_user_mode called at wrong kentry cpu 
depth (%u)",
+                             this_cpu_read(kentry_cpu_depth));
+#endif
        trace_hardirqs_on_prepare();
        lockdep_hardirqs_on_prepare(CALLER_ADDR0);
        instrumentation_end();
@@ -106,6 +175,7 @@ static __always_inline void __exit_to_user_mode(void)
        user_enter_irqoff();
        arch_exit_to_user_mode();
        lockdep_hardirqs_on(CALLER_ADDR0);
+       kentry_cpu_depth_sub(1);
 }
 
 #ifdef CONFIG_ARM64
@@ -360,7 +430,12 @@ noinstr void kentry_exit(struct pt_regs *regs, 
kentry_state_t state)
        /* Check whether this returns to user mode */
        if (user_mode(regs)) {
                kentry_exit_to_user_mode(regs);
-       } else if (!regs_irqs_disabled(regs)) {
+               return;
+       }
+
+       kentry_cpu_depth_check(1);
+
+       if (!regs_irqs_disabled(regs)) {
                /*
                 * If RCU was not watching on entry this needs to be done
                 * carefully and needs the same ordering of lockdep/tracing
@@ -399,6 +474,8 @@ kentry_state_t noinstr kentry_nmi_enter(struct pt_regs 
*regs)
 
        irq_state.lockdep = lockdep_hardirqs_enabled();
 
+       kentry_cpu_depth_add(KENTRY_CPU_DEPTH_NMI);
+
        __nmi_enter();
        lockdep_hardirqs_off(CALLER_ADDR0);
        lockdep_hardirq_enter();
@@ -415,6 +492,7 @@ kentry_state_t noinstr kentry_nmi_enter(struct pt_regs 
*regs)
 void noinstr kentry_nmi_exit(struct pt_regs *regs, kentry_state_t irq_state)
 {
        instrumentation_begin();
+       kentry_cpu_depth_check(KENTRY_CPU_DEPTH_NMI);
        ftrace_nmi_exit();
        if (irq_state.lockdep) {
                trace_hardirqs_on_prepare();
@@ -427,4 +505,5 @@ void noinstr kentry_nmi_exit(struct pt_regs *regs, 
kentry_state_t irq_state)
        if (irq_state.lockdep)
                lockdep_hardirqs_on(CALLER_ADDR0);
        __nmi_exit();
+       kentry_cpu_depth_sub(KENTRY_CPU_DEPTH_NMI);
 }
-- 
2.29.2

Reply via email to