On Tue, Sep 13, 2016 at 06:14:27PM +0200, Oleg Nesterov wrote:

> Me too, and I failed to find something which could be broken... So
> perhaps should make it nop and investigate the new bug reports after
> that.

Works for me :-)

> 
> Hmm. And  preempt_enable_no_resched_notrace() under TASK_DEAD in
> __schedule() should be removed it seems, do_exit() can call __schedule()
> directly.


something like so?

---

 include/linux/kernel.h |  2 +-
 include/linux/sched.h  |  2 ++
 kernel/exit.c          | 11 ++---------
 kernel/sched/core.c    | 23 ++++++++++++-----------
 4 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index d96a6118d26a..e5bd9cdd2e24 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -266,7 +266,7 @@ extern void oops_enter(void);
 extern void oops_exit(void);
 void print_oops_end_marker(void);
 extern int oops_may_print(void);
-void do_exit(long error_code)
+void __noreturn do_exit(long error_code)
        __noreturn;
 void complete_and_exit(struct completion *, long)
        __noreturn;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eb64fcd89e68..b0c818a05b2e 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -448,6 +448,8 @@ static inline void io_schedule(void)
        io_schedule_timeout(MAX_SCHEDULE_TIMEOUT);
 }
 
+void __noreturn do_task_dead(void);
+
 struct nsproxy;
 struct user_namespace;
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 091a78be3b09..d4c12692f766 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -725,7 +725,7 @@ static void check_stack_usage(void)
 static inline void check_stack_usage(void) {}
 #endif
 
-void do_exit(long code)
+void __noreturn do_exit(long code)
 {
        struct task_struct *tsk = current;
        int group_dead;
@@ -897,14 +897,7 @@ void do_exit(long code)
        smp_mb();
        raw_spin_unlock_wait(&tsk->pi_lock);
 
-       /* causes final put_task_struct in finish_task_switch(). */
-       tsk->state = TASK_DEAD;
-       tsk->flags |= PF_NOFREEZE;      /* tell freezer to ignore us */
-       schedule();
-       BUG();
-       /* Avoid "noreturn function does return".  */
-       for (;;)
-               cpu_relax();    /* For when BUG is null */
+       do_task_dead();
 }
 EXPORT_SYMBOL_GPL(do_exit);
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index a0086a5fc008..6034f269000f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -3327,17 +3327,6 @@ static void __sched notrace __schedule(bool preempt)
        rq = cpu_rq(cpu);
        prev = rq->curr;
 
-       /*
-        * do_exit() calls schedule() with preemption disabled as an exception;
-        * however we must fix that up, otherwise the next task will see an
-        * inconsistent (higher) preempt count.
-        *
-        * It also avoids the below schedule_debug() test from complaining
-        * about this.
-        */
-       if (unlikely(prev->state == TASK_DEAD))
-               preempt_enable_no_resched_notrace();
-
        schedule_debug(prev);
 
        if (sched_feat(HRTICK))
@@ -3404,6 +3393,18 @@ static void __sched notrace __schedule(bool preempt)
        balance_callback(rq);
 }
 
+void __noreturn do_task_dead(void)
+{
+       /* causes final put_task_struct in finish_task_switch(). */
+       __set_current_state(TASK_DEAD);
+       current->flags |= PF_NOFREEZE;  /* tell freezer to ignore us */
+       __schedule(false);
+       BUG();
+       /* Avoid "noreturn function does return".  */
+       for (;;)
+               cpu_relax();    /* For when BUG is null */
+}
+
 static inline void sched_submit_work(struct task_struct *tsk)
 {
        if (!tsk->state || tsk_is_pi_blocked(tsk))

Reply via email to