Linus,

Please pull the latest sched-urgent-for-linus git tree from:

   git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git 
sched-urgent-for-linus

   # HEAD: 8e5bfa8c1f8471aa4a2d30be631ef2b50e10abaf sched/autogroup: Do not use 
autogroup->tg in zombie threads

Two fixes for autogroup scheduling, for races when turning the feature on/off 
via 
/proc/sys/kernel/sched_autogroup_enabled.

 Thanks,

        Ingo

------------------>
Oleg Nesterov (2):
      sched/autogroup: Fix autogroup_move_group() to never skip 
sched_move_task()
      sched/autogroup: Do not use autogroup->tg in zombie threads


 include/linux/sched.h     |  2 ++
 kernel/exit.c             |  1 +
 kernel/sched/auto_group.c | 36 ++++++++++++++++++++++++++++--------
 3 files changed, 31 insertions(+), 8 deletions(-)

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 348f51b0ec92..e9c009dc3a4a 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2567,6 +2567,7 @@ extern void sched_autogroup_create_attach(struct 
task_struct *p);
 extern void sched_autogroup_detach(struct task_struct *p);
 extern void sched_autogroup_fork(struct signal_struct *sig);
 extern void sched_autogroup_exit(struct signal_struct *sig);
+extern void sched_autogroup_exit_task(struct task_struct *p);
 #ifdef CONFIG_PROC_FS
 extern void proc_sched_autogroup_show_task(struct task_struct *p, struct 
seq_file *m);
 extern int proc_sched_autogroup_set_nice(struct task_struct *p, int nice);
@@ -2576,6 +2577,7 @@ static inline void sched_autogroup_create_attach(struct 
task_struct *p) { }
 static inline void sched_autogroup_detach(struct task_struct *p) { }
 static inline void sched_autogroup_fork(struct signal_struct *sig) { }
 static inline void sched_autogroup_exit(struct signal_struct *sig) { }
+static inline void sched_autogroup_exit_task(struct task_struct *p) { }
 #endif
 
 extern int yield_to(struct task_struct *p, bool preempt);
diff --git a/kernel/exit.c b/kernel/exit.c
index 9d68c45ebbe3..3076f3089919 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -836,6 +836,7 @@ void __noreturn do_exit(long code)
         */
        perf_event_exit_task(tsk);
 
+       sched_autogroup_exit_task(tsk);
        cgroup_exit(tsk);
 
        /*
diff --git a/kernel/sched/auto_group.c b/kernel/sched/auto_group.c
index a5d966cb8891..f1c8fd566246 100644
--- a/kernel/sched/auto_group.c
+++ b/kernel/sched/auto_group.c
@@ -111,10 +111,13 @@ bool task_wants_autogroup(struct task_struct *p, struct 
task_group *tg)
 {
        if (tg != &root_task_group)
                return false;
-
        /*
-        * We can only assume the task group can't go away on us if
-        * autogroup_move_group() can see us on ->thread_group list.
+        * If we race with autogroup_move_group() the caller can use the old
+        * value of signal->autogroup but in this case sched_move_task() will
+        * be called again before autogroup_kref_put().
+        *
+        * However, there is no way sched_autogroup_exit_task() could tell us
+        * to avoid autogroup->tg, so we abuse PF_EXITING flag for this case.
         */
        if (p->flags & PF_EXITING)
                return false;
@@ -122,6 +125,16 @@ bool task_wants_autogroup(struct task_struct *p, struct 
task_group *tg)
        return true;
 }
 
+void sched_autogroup_exit_task(struct task_struct *p)
+{
+       /*
+        * We are going to call exit_notify() and autogroup_move_group() can't
+        * see this thread after that: we can no longer use signal->autogroup.
+        * See the PF_EXITING check in task_wants_autogroup().
+        */
+       sched_move_task(p);
+}
+
 static void
 autogroup_move_group(struct task_struct *p, struct autogroup *ag)
 {
@@ -138,13 +151,20 @@ autogroup_move_group(struct task_struct *p, struct 
autogroup *ag)
        }
 
        p->signal->autogroup = autogroup_kref_get(ag);
-
-       if (!READ_ONCE(sysctl_sched_autogroup_enabled))
-               goto out;
-
+       /*
+        * We can't avoid sched_move_task() after we changed signal->autogroup,
+        * this process can already run with task_group() == prev->tg or we can
+        * race with cgroup code which can read autogroup = prev under rq->lock.
+        * In the latter case for_each_thread() can not miss a migrating thread,
+        * cpu_cgroup_attach() must not be possible after cgroup_exit() and it
+        * can't be removed from thread list, we hold ->siglock.
+        *
+        * If an exiting thread was already removed from thread list we rely on
+        * sched_autogroup_exit_task().
+        */
        for_each_thread(p, t)
                sched_move_task(t);
-out:
+
        unlock_task_sighand(p, &flags);
        autogroup_kref_put(prev);
 }

Reply via email to