Greg,

It looks like the below patch missed 4.5 and I'm starting to get bug
reports that look very much like this issue, could we get this patch
lifted into 4.5-stable?

On Mon, Mar 21, 2016 at 04:15:41AM -0700, tip-bot for Peter Zijlstra wrote:
> Commit-ID:  2f5177f0fd7e531b26d54633be62d1d4cb94621c
> Gitweb:     http://git.kernel.org/tip/2f5177f0fd7e531b26d54633be62d1d4cb94621c
> Author:     Peter Zijlstra <pet...@infradead.org>
> AuthorDate: Wed, 16 Mar 2016 16:22:45 +0100
> Committer:  Ingo Molnar <mi...@kernel.org>
> CommitDate: Mon, 21 Mar 2016 10:49:23 +0100
> 
> sched/cgroup: Fix/cleanup cgroup teardown/init
> 
> The CPU controller hasn't kept up with the various changes in the whole
> cgroup initialization / destruction sequence, and commit:
> 
>   2e91fa7f6d45 ("cgroup: keep zombies associated with their original cgroups")
> 
> caused it to explode.
> 
> The reason for this is that zombies do not inhibit css_offline() from
> being called, but do stall css_released(). Now we tear down the cfs_rq
> structures on css_offline() but zombies can run after that, leading to
> use-after-free issues.
> 
> The solution is to move the tear-down to css_released(), which
> guarantees nobody (including no zombies) is still using our cgroup.
> 
> Furthermore, a few simple cleanups are possible too. There doesn't
> appear to be any point to us using css_online() (anymore?) so fold that
> in css_alloc().
> 
> And since cgroup code guarantees an RCU grace period between
> css_released() and css_free() we can forgo using call_rcu() and free the
> stuff immediately.
> 
> Suggested-by: Tejun Heo <t...@kernel.org>
> Reported-by: Kazuki Yamaguchi <k...@rhe.jp>
> Reported-by: Niklas Cassel <niklas.cas...@axis.com>
> Tested-by: Niklas Cassel <niklas.cas...@axis.com>
> Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org>
> Acked-by: Tejun Heo <t...@kernel.org>
> Cc: Linus Torvalds <torva...@linux-foundation.org>
> Cc: Peter Zijlstra <pet...@infradead.org>
> Cc: Thomas Gleixner <t...@linutronix.de>
> Fixes: 2e91fa7f6d45 ("cgroup: keep zombies associated with their original 
> cgroups")
> Link: 
> http://lkml.kernel.org/r/20160316152245.gy6...@twins.programming.kicks-ass.net
> Signed-off-by: Ingo Molnar <mi...@kernel.org>
> ---
>  kernel/sched/core.c | 35 ++++++++++++++---------------------
>  1 file changed, 14 insertions(+), 21 deletions(-)
> 
> diff --git a/kernel/sched/core.c b/kernel/sched/core.c
> index 4d872e1..2a87bdd 100644
> --- a/kernel/sched/core.c
> +++ b/kernel/sched/core.c
> @@ -7536,7 +7536,7 @@ void set_curr_task(int cpu, struct task_struct *p)
>  /* task_group_lock serializes the addition/removal of task groups */
>  static DEFINE_SPINLOCK(task_group_lock);
>  
> -static void free_sched_group(struct task_group *tg)
> +static void sched_free_group(struct task_group *tg)
>  {
>       free_fair_sched_group(tg);
>       free_rt_sched_group(tg);
> @@ -7562,7 +7562,7 @@ struct task_group *sched_create_group(struct task_group 
> *parent)
>       return tg;
>  
>  err:
> -     free_sched_group(tg);
> +     sched_free_group(tg);
>       return ERR_PTR(-ENOMEM);
>  }
>  
> @@ -7582,17 +7582,16 @@ void sched_online_group(struct task_group *tg, struct 
> task_group *parent)
>  }
>  
>  /* rcu callback to free various structures associated with a task group */
> -static void free_sched_group_rcu(struct rcu_head *rhp)
> +static void sched_free_group_rcu(struct rcu_head *rhp)
>  {
>       /* now it should be safe to free those cfs_rqs */
> -     free_sched_group(container_of(rhp, struct task_group, rcu));
> +     sched_free_group(container_of(rhp, struct task_group, rcu));
>  }
>  
> -/* Destroy runqueue etc associated with a task group */
>  void sched_destroy_group(struct task_group *tg)
>  {
>       /* wait for possible concurrent references to cfs_rqs complete */
> -     call_rcu(&tg->rcu, free_sched_group_rcu);
> +     call_rcu(&tg->rcu, sched_free_group_rcu);
>  }
>  
>  void sched_offline_group(struct task_group *tg)
> @@ -8051,31 +8050,26 @@ cpu_cgroup_css_alloc(struct cgroup_subsys_state 
> *parent_css)
>       if (IS_ERR(tg))
>               return ERR_PTR(-ENOMEM);
>  
> +     sched_online_group(tg, parent);
> +
>       return &tg->css;
>  }
>  
> -static int cpu_cgroup_css_online(struct cgroup_subsys_state *css)
> +static void cpu_cgroup_css_released(struct cgroup_subsys_state *css)
>  {
>       struct task_group *tg = css_tg(css);
> -     struct task_group *parent = css_tg(css->parent);
>  
> -     if (parent)
> -             sched_online_group(tg, parent);
> -     return 0;
> +     sched_offline_group(tg);
>  }
>  
>  static void cpu_cgroup_css_free(struct cgroup_subsys_state *css)
>  {
>       struct task_group *tg = css_tg(css);
>  
> -     sched_destroy_group(tg);
> -}
> -
> -static void cpu_cgroup_css_offline(struct cgroup_subsys_state *css)
> -{
> -     struct task_group *tg = css_tg(css);
> -
> -     sched_offline_group(tg);
> +     /*
> +      * Relies on the RCU grace period between css_released() and this.
> +      */
> +     sched_free_group(tg);
>  }
>  
>  static void cpu_cgroup_fork(struct task_struct *task)
> @@ -8435,9 +8429,8 @@ static struct cftype cpu_files[] = {
>  
>  struct cgroup_subsys cpu_cgrp_subsys = {
>       .css_alloc      = cpu_cgroup_css_alloc,
> +     .css_released   = cpu_cgroup_css_released,
>       .css_free       = cpu_cgroup_css_free,
> -     .css_online     = cpu_cgroup_css_online,
> -     .css_offline    = cpu_cgroup_css_offline,
>       .fork           = cpu_cgroup_fork,
>       .can_attach     = cpu_cgroup_can_attach,
>       .attach         = cpu_cgroup_attach,

Reply via email to