On Mon, Apr 17, 2017 at 06:41:38PM +0800, Zefan Li wrote:
> On 2017/4/15 7:32, Andrei Vagin wrote:
> > On Fri, Apr 14, 2017 at 04:27:37PM -0700, Andrei Vagin wrote:
> >> Hello,
> >>
> >> One of our CRIU tests hangs with this patch.
> >>
> >> Steps to reproduce:
> >> curl -o cgroupns.c 
> >> https://gist.githubusercontent.com/avagin/f87c8a8bd2a0de9afcc74976327786bc/raw/5843701ef3679f50dd2427cf57a80871082eb28c/gistfile1.txt
> >> gcc cgroupns.c -o cgroupns
> >> ./cgroupns
> >> ./cgroupns
> > 
> > I've found a trivial reproducer:
> > mkdir /tmp/xxx
> > mount -t cgroup -o none,name=zdtmtst xxx /tmp/xxx
> > mkdir /tmp/xxx/xxx
> > umount /tmp/xxx
> > mount -t cgroup -o none,name=zdtmtst xxx /tmp/xxx
> > 
> 
> Now I remember why it didn't check NULL pointer... Could you try the 
> following fix?
> It also reverts my previous patch. I would appreciate if you run the full 
> test suit,
> to make sure it won't break anything.

It works for me. Thanks.

Tested-by: Andrei Vagin <ava...@virtuozzo.com>

> 
> PS: Tejun, I found recently I can no longer receive your emails. Don't know 
> why...
> 
> =======
> 
> [PATCH] cgruop: avoid attaching a cgroup root to two different superblocks, 
> take 2
> 
> Commit bfb0b80db5f9 is broken. Now we try to fix the race by delaying
> the initialization of cgroup root refcnt until a superblock has been
> allocated.
> 
> Cc: sta...@vger.kernel.org # 3.16+
> Reported-by: Dmitry Vyukov <dvyu...@google.com>
> Reported-by: Andrei Vagin <ava...@virtuozzo.com>
> Signed-off-by: Zefan Li <lize...@huawei.com>
> ---
>  kernel/cgroup/cgroup-internal.h |  2 +-
>  kernel/cgroup/cgroup-v1.c       | 18 ++++++++++++++++--
>  kernel/cgroup/cgroup.c          |  8 ++++----
>  3 files changed, 21 insertions(+), 7 deletions(-)
> 
> diff --git a/kernel/cgroup/cgroup-internal.h b/kernel/cgroup/cgroup-internal.h
> index 9203bfb..e470268 100644
> --- a/kernel/cgroup/cgroup-internal.h
> +++ b/kernel/cgroup/cgroup-internal.h
> @@ -163,7 +163,7 @@ int cgroup_path_ns_locked(struct cgroup *cgrp, char *buf, 
> size_t buflen,
>  
>  void cgroup_free_root(struct cgroup_root *root);
>  void init_cgroup_root(struct cgroup_root *root, struct cgroup_sb_opts *opts);
> -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask);
> +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags);
>  int rebind_subsystems(struct cgroup_root *dst_root, u16 ss_mask);
>  struct dentry *cgroup_do_mount(struct file_system_type *fs_type, int flags,
>                              struct cgroup_root *root, unsigned long magic,
> diff --git a/kernel/cgroup/cgroup-v1.c b/kernel/cgroup/cgroup-v1.c
> index 12e19f0..6ca9b12 100644
> --- a/kernel/cgroup/cgroup-v1.c
> +++ b/kernel/cgroup/cgroup-v1.c
> @@ -1072,6 +1072,7 @@ struct dentry *cgroup1_mount(struct file_system_type 
> *fs_type, int flags,
>       struct cgroup_subsys *ss;
>       struct dentry *dentry;
>       int i, ret;
> +     bool new_root = false;
>  
>       cgroup_lock_and_drain_offline(&cgrp_dfl_root.cgrp);
>  
> @@ -1146,7 +1147,7 @@ struct dentry *cgroup1_mount(struct file_system_type 
> *fs_type, int flags,
>                * path is super cold.  Let's just sleep a bit and retry.
>                */
>               pinned_sb = kernfs_pin_sb(root->kf_root, NULL);
> -             if (IS_ERR_OR_NULL(pinned_sb) ||
> +             if (IS_ERR(pinned_sb) ||
>                   !percpu_ref_tryget_live(&root->cgrp.self.refcnt)) {
>                       mutex_unlock(&cgroup_mutex);
>                       if (!IS_ERR_OR_NULL(pinned_sb))
> @@ -1181,10 +1182,11 @@ struct dentry *cgroup1_mount(struct file_system_type 
> *fs_type, int flags,
>               ret = -ENOMEM;
>               goto out_unlock;
>       }
> +     new_root = true;
>  
>       init_cgroup_root(root, &opts);
>  
> -     ret = cgroup_setup_root(root, opts.subsys_mask);
> +     ret = cgroup_setup_root(root, opts.subsys_mask, PERCPU_REF_INIT_DEAD);
>       if (ret)
>               cgroup_free_root(root);
>  
> @@ -1201,6 +1203,18 @@ struct dentry *cgroup1_mount(struct file_system_type 
> *fs_type, int flags,
>                                CGROUP_SUPER_MAGIC, ns);
>  
>       /*
> +      * There's a race window after we release cgroup_mutex and before
> +      * allocating a superblock. Make sure a concurrent process won't
> +      * be able to re-use the root during this window by delaying the
> +      * initialization of root refcnt.
> +      */
> +     if (new_root) {
> +             mutex_lock(&cgroup_mutex);
> +             percpu_ref_reinit(&root->cgrp.self.refcnt);
> +             mutex_unlock(&cgroup_mutex);
> +     }
> +
> +     /*
>        * If @pinned_sb, we're reusing an existing root and holding an
>        * extra ref on its sb.  Mount is complete.  Put the extra ref.
>        */
> diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
> index 4885132..0f98010 100644
> --- a/kernel/cgroup/cgroup.c
> +++ b/kernel/cgroup/cgroup.c
> @@ -1640,7 +1640,7 @@ void init_cgroup_root(struct cgroup_root *root, struct 
> cgroup_sb_opts *opts)
>               set_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->cgrp.flags);
>  }
>  
> -int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask)
> +int cgroup_setup_root(struct cgroup_root *root, u16 ss_mask, int ref_flags)
>  {
>       LIST_HEAD(tmp_links);
>       struct cgroup *root_cgrp = &root->cgrp;
> @@ -1656,8 +1656,8 @@ int cgroup_setup_root(struct cgroup_root *root, u16 
> ss_mask)
>       root_cgrp->id = ret;
>       root_cgrp->ancestor_ids[0] = ret;
>  
> -     ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
> -                           GFP_KERNEL);
> +     ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release,
> +                           ref_flags, GFP_KERNEL);
>       if (ret)
>               goto out;
>  
> @@ -4512,7 +4512,7 @@ int __init cgroup_init(void)
>       hash_add(css_set_table, &init_css_set.hlist,
>                css_set_hash(init_css_set.subsys));
>  
> -     BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0));
> +     BUG_ON(cgroup_setup_root(&cgrp_dfl_root, 0, 0));
>  
>       mutex_unlock(&cgroup_mutex);
>  
> -- 
> 1.8.3.1
> 

Reply via email to