While running a user_access regions, it is not supported to reschedule.
Add an overridable primitive to indicate whether a user_access region is
active and check that this is not the case when calling rescheduling
functions.

These checks are only performed when DEBUG_UACCESS_SLEEP is selected.

Also, add a comment clarifying the behaviour of user_access regions.

Signed-off-by: Julien Thierry <[email protected]>
Cc: Ingo Molnar <[email protected]>
Cc: Peter Zijlstra <[email protected]>

---
 include/linux/kernel.h  | 11 +++++++++--
 include/linux/uaccess.h | 13 +++++++++++++
 kernel/sched/core.c     | 22 ++++++++++++++++++++++
 lib/Kconfig.debug       |  8 ++++++++
 4 files changed, 52 insertions(+), 2 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 8f0e68e..73f1f82 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -237,11 +237,18 @@
 struct pt_regs;
 struct user;

+#ifdef CONFIG_DEBUG_UACCESS_SLEEP
+extern void __might_resched(const char *file, int line);
+#else
+#define __might_resched(file, line) do { } while (0)
+#endif
+
 #ifdef CONFIG_PREEMPT_VOLUNTARY
 extern int _cond_resched(void);
-# define might_resched() _cond_resched()
+# define might_resched() \
+       do { __might_resched(__FILE__, __LINE__); _cond_resched(); } while (0)
 #else
-# define might_resched() do { } while (0)
+# define might_resched() __might_resched(__FILE__, __LINE__)
 #endif

 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 37b226e..2c0c39e 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -263,6 +263,15 @@ static inline unsigned long 
__copy_from_user_inatomic_nocache(void *to,
 #define probe_kernel_address(addr, retval)             \
        probe_kernel_read(&retval, addr, sizeof(retval))

+/*
+ * user_access_begin() and user_access_end() define a region where
+ * unsafe user accessors can be used. Exceptions and interrupt shall exit the
+ * user_access region and re-enter it when returning to the interrupted 
context.
+ *
+ * No sleeping function should get called during a user_access region - we rely
+ * on exception handling to take care of the user_access status for us, but 
that
+ * doesn't happen when directly calling schedule().
+ */
 #ifndef user_access_begin
 #define user_access_begin(ptr,len) access_ok(ptr, len)
 #define user_access_end() do { } while (0)
@@ -270,6 +279,10 @@ static inline unsigned long 
__copy_from_user_inatomic_nocache(void *to,
 #define unsafe_put_user(x, ptr, err) do { if (unlikely(__put_user(x, ptr))) 
goto err; } while (0)
 #endif

+#ifndef unsafe_user_region_active
+#define unsafe_user_region_active()    false
+#endif
+
 #ifdef CONFIG_HARDENED_USERCOPY
 void usercopy_warn(const char *name, const char *detail, bool to_user,
                   unsigned long offset, unsigned long len);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a674c7db..b1bb7e9 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3289,6 +3289,14 @@ static inline void schedule_debug(struct task_struct 
*prev)
                __schedule_bug(prev);
                preempt_count_set(PREEMPT_DISABLED);
        }
+
+       if (IS_ENABLED(CONFIG_DEBUG_UACCESS_SLEEP) &&
+           unlikely(unsafe_user_region_active())) {
+               printk(KERN_ERR "BUG: scheduling while user_access enabled: 
%s/%d/0x%08x\n",
+                      prev->comm, prev->pid, preempt_count());
+               dump_stack();
+       }
+
        rcu_sleep_check();

        profile_hit(SCHED_PROFILING, __builtin_return_address(0));
@@ -6151,6 +6159,20 @@ void ___might_sleep(const char *file, int line, int 
preempt_offset)
 EXPORT_SYMBOL(___might_sleep);
 #endif

+#ifdef CONFIG_DEBUG_UACCESS_SLEEP
+void __might_resched(const char *file, int line)
+{
+       if (!unsafe_user_region_active())
+               return;
+
+       printk(KERN_ERR
+               "BUG: rescheduling function called from user access context at 
%s:%d\n",
+                       file, line);
+       dump_stack();
+}
+EXPORT_SYMBOL(__might_resched);
+#endif
+
 #ifdef CONFIG_MAGIC_SYSRQ
 void normalize_rt_tasks(void)
 {
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d4df5b2..d030e31 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2069,6 +2069,14 @@ config IO_STRICT_DEVMEM

          If in doubt, say Y.

+config DEBUG_UACCESS_SLEEP
+       bool "Check sleep inside a user access region"
+       depends on DEBUG_KERNEL
+       help
+         If you say Y here, various routines which may sleep will become very
+         noisy if they are called inside a user access region (i.e. between
+         a user_access_begin() and a user_access_end())
+
 source "arch/$(SRCARCH)/Kconfig.debug"

 endmenu # Kernel hacking
--
1.9.1

Reply via email to