On 28.07.2020 20:53, Valeriy Vdovin wrote: > This is done so that each container could set it's own release agent. > Release agent information is now stored in per-cgroup-root data > structure in ve. > > https://jira.sw.ru/browse/PSBM-83887 > > Signed-off-by: Valeriy Vdovin <valeriy.vdo...@virtuozzo.com> > --- > include/linux/cgroup.h | 3 -- > include/linux/ve.h | 6 +++ > kernel/cgroup.c | 100 > ++++++++++++++++++++++++++++++++++++++++--------- > kernel/ve/ve.c | 72 +++++++++++++++++++++++++++++++++++ > 4 files changed, 161 insertions(+), 20 deletions(-) > > diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h > index 5f1460d..fc138c0 100644 > --- a/include/linux/cgroup.h > +++ b/include/linux/cgroup.h > @@ -429,9 +429,6 @@ struct cgroupfs_root { > /* IDs for cgroups in this hierarchy */ > struct ida cgroup_ida; > > - /* The path to use for release notifications. */ > - char release_agent_path[PATH_MAX]; > - > /* The name for this hierarchy - may be empty */ > char name[MAX_CGROUP_ROOT_NAMELEN]; > }; > diff --git a/include/linux/ve.h b/include/linux/ve.h > index 65413d5..b6662637 100644 > --- a/include/linux/ve.h > +++ b/include/linux/ve.h > @@ -214,6 +214,12 @@ void do_update_load_avg_ve(void); > > void ve_add_to_release_list(struct cgroup *cgrp); > void ve_rm_from_release_list(struct cgroup *cgrp); > + > +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot, > + const char *release_agent); > + > +const char *ve_get_release_agent_path(struct cgroup *cgrp_root); > + > extern struct ve_struct *get_ve(struct ve_struct *ve); > extern void put_ve(struct ve_struct *ve); > > diff --git a/kernel/cgroup.c b/kernel/cgroup.c > index aa93cf2..1d9c889 100644 > --- a/kernel/cgroup.c > +++ b/kernel/cgroup.c > @@ -1092,9 +1092,12 @@ static int rebind_subsystems(struct cgroupfs_root > *root, > > static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) > { > + const char *release_agent; > struct cgroupfs_root *root = dentry->d_sb->s_fs_info; > struct cgroup_subsys *ss; > + struct cgroup *root_cgrp = &root->top_cgroup; > > + mutex_lock(&cgroup_mutex); > mutex_lock(&cgroup_root_mutex); > for_each_subsys(root, ss) > seq_printf(seq, ",%s", ss->name); > @@ -1106,14 +1109,37 @@ static int cgroup_show_options(struct seq_file *seq, > struct dentry *dentry) > seq_puts(seq, ",xattr"); > if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) > seq_puts(seq, ",cpuset_v2_mode"); > - if (strlen(root->release_agent_path)) > - seq_show_option(seq, "release_agent", > - root->release_agent_path); > +#ifdef CONFIG_VE > + { > + struct ve_struct *ve = get_exec_env(); > + > + if (!ve_is_super(ve)) { > + /* > + * ve->init_task is NULL in case when cgroup is accessed > + * before ve_start_container has been called. > + * > + * ve->init_task is synchronized via ve->ve_ns rcu, see > + * ve_grab_context/drop_context. > + */ > + rcu_read_lock(); > + if (ve->ve_ns) > + root_cgrp = task_cgroup_from_root(ve->init_task, > + root); > + rcu_read_unlock(); > + } > + } > +#endif > + rcu_read_lock(); > + release_agent = ve_get_release_agent_path(root_cgrp); > + if (release_agent && release_agent[0]) > + seq_show_option(seq, "release_agent", release_agent); > + rcu_read_unlock(); > if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags)) > seq_puts(seq, ",clone_children"); > if (strlen(root->name)) > seq_show_option(seq, "name", root->name); > mutex_unlock(&cgroup_root_mutex); > + mutex_unlock(&cgroup_mutex); > return 0; > } > > @@ -1386,8 +1412,13 @@ static int cgroup_remount(struct super_block *sb, int > *flags, char *data) > /* re-populate subsystem files */ > cgroup_populate_dir(cgrp, false, added_mask); > > - if (opts.release_agent) > - strcpy(root->release_agent_path, opts.release_agent); > + if (opts.release_agent) { > + struct cgroup *root_cgrp; > + root_cgrp = cgroup_get_local_root(cgrp); > + if (root_cgrp->ve_owner) > + ret = ve_set_release_agent_path(root_cgrp, > + opts.release_agent); > + } > out_unlock: > kfree(opts.release_agent); > kfree(opts.name); > @@ -1549,8 +1580,6 @@ static struct cgroupfs_root > *cgroup_root_from_opts(struct cgroup_sb_opts *opts) > root->subsys_mask = opts->subsys_mask; > root->flags = opts->flags; > ida_init(&root->cgroup_ida); > - if (opts->release_agent) > - strcpy(root->release_agent_path, opts->release_agent); > if (opts->name) > strcpy(root->name, opts->name); > if (opts->cpuset_clone_children) > @@ -1748,6 +1777,11 @@ static struct dentry *cgroup_mount(struct > file_system_type *fs_type, > > cred = override_creds(&init_cred); > cgroup_populate_dir(root_cgrp, true, root->subsys_mask); > + if (opts.release_agent) { > + ret = ve_set_release_agent_path(root_cgrp, > + opts.release_agent); > + } > + > revert_creds(cred); > mutex_unlock(&cgroup_root_mutex); > mutex_unlock(&cgroup_mutex); > @@ -2317,7 +2351,8 @@ static int cgroup_procs_write(struct cgroup *cgrp, > struct cftype *cft, u64 tgid) > static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype > *cft, > const char *buffer) > { > - BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); > + int ret = 0; > + struct cgroup *root_cgrp; > > if (strlen(buffer) >= PATH_MAX) > return -EINVAL; > @@ -2325,19 +2360,35 @@ static int cgroup_release_agent_write(struct cgroup > *cgrp, struct cftype *cft, > if (!cgroup_lock_live_group(cgrp)) > return -ENODEV; > > - mutex_lock(&cgroup_root_mutex); > - strcpy(cgrp->root->release_agent_path, buffer); > - mutex_unlock(&cgroup_root_mutex); > + root_cgrp = cgroup_get_local_root(cgrp); > + BUG_ON(!root_cgrp); > + if (root_cgrp->ve_owner) > + ret = ve_set_release_agent_path(root_cgrp, buffer); > + else > + return -ENODEV;
Missed mutex_unlock() > + > mutex_unlock(&cgroup_mutex); > - return 0; > + return ret; > } > > static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, > struct seq_file *seq) > { > + const char *release_agent; > + struct cgroup *root_cgrp; > + > if (!cgroup_lock_live_group(cgrp)) > return -ENODEV; > - seq_puts(seq, cgrp->root->release_agent_path); > + > + root_cgrp = cgroup_get_local_root(cgrp); > + if (root_cgrp->ve_owner) { > + rcu_read_lock(); > + release_agent = ve_get_release_agent_path(root_cgrp); > + > + if (release_agent) > + seq_puts(seq, release_agent); > + rcu_read_unlock(); > + } > seq_putc(seq, '\n'); > mutex_unlock(&cgroup_mutex); > return 0; > @@ -5532,15 +5583,24 @@ static void check_for_release(struct cgroup *cgrp) > void cgroup_release_agent(struct work_struct *work) > { > struct ve_struct *ve; > + char *agentbuf; > + > + agentbuf = kzalloc(PATH_MAX, GFP_KERNEL); > + if (!agentbuf) { > + pr_warn("failed to allocate agentbuf\n"); > + return; > + } > + > ve = container_of(work, struct ve_struct, release_agent_work); > mutex_lock(&cgroup_mutex); > raw_spin_lock(&ve->release_list_lock); > while (!list_empty(&ve->release_list)) { > char *argv[3], *envp[3]; > int i, err; > - char *pathbuf = NULL, *agentbuf = NULL; > + char *pathbuf = NULL; > struct cgroup *cgrp, *root_cgrp; > struct task_struct *ve_task; > + const char *release_agent; > > cgrp = list_entry(ve->release_list.next, > struct cgroup, > @@ -5568,9 +5628,15 @@ void cgroup_release_agent(struct work_struct *work) > rcu_read_unlock(); > goto continue_free; > } > + > + release_agent = ve_get_release_agent_path(root_cgrp); > + > + *agentbuf = 0; > + if (release_agent) > + strncpy(agentbuf, release_agent, PATH_MAX); > rcu_read_unlock(); > - agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); > - if (!agentbuf) > + > + if (!*agentbuf) > goto continue_free; > > i = 0; > @@ -5601,11 +5667,11 @@ void cgroup_release_agent(struct work_struct *work) > mutex_lock(&cgroup_mutex); > continue_free: > kfree(pathbuf); > - kfree(agentbuf); > raw_spin_lock(&ve->release_list_lock); > } > raw_spin_unlock(&ve->release_list_lock); > mutex_unlock(&cgroup_mutex); > + kfree(agentbuf); > } > > static int __init cgroup_disable(char *str) > diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c > index f564dca..f03f665 100644 > --- a/kernel/ve/ve.c > +++ b/kernel/ve/ve.c > @@ -51,6 +51,11 @@ struct per_cgroot_data { > * data is related to this cgroup > */ > struct cgroup *cgroot; > + /* > + * path to release agent binaray, that should > + * be spawned for all cgroups under this cgroup root > + */ > + struct cgroup_rcu_string __rcu *release_agent_path; > }; > > extern struct kmapset_set sysfs_ve_perms_set; > @@ -175,6 +180,68 @@ static inline struct per_cgroot_data > *per_cgroot_get_or_create( > return data; > } > > +int ve_set_release_agent_path(struct cgroup *cgroot, > + const char *release_agent) > +{ > + struct ve_struct *ve; > + struct per_cgroot_data *data; > + struct cgroup_rcu_string *new_path, *old_path; > + int err = 0; > + > + /* > + * caller should grab cgroup_mutex to safely use > + * ve_owner field > + */ > + ve = cgroot->ve_owner; > + BUG_ON(!ve); > + > + new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent)); > + if (IS_ERR(new_path)) > + return PTR_ERR(new_path); > + > + data = per_cgroot_get_or_create(ve, cgroot); > + if (IS_ERR(data)) { > + kfree(new_path); > + return PTR_ERR(data); > + } > + > + raw_spin_lock(&ve->per_cgroot_list_lock); > + > + old_path = rcu_dereference_protected(data->release_agent_path, > + lockdep_is_held(&ve->per_cgroot_list_lock)); > + > + rcu_assign_pointer(data->release_agent_path, new_path); > + raw_spin_unlock(&ve->per_cgroot_list_lock); > + > + if (old_path) > + kfree_rcu(old_path, rcu_head); > + > + return err; > +} > + > +const char *ve_get_release_agent_path(struct cgroup *cgroot) > +{ > + /* caller must grab rcu_read_lock */ > + const char *result = NULL; > + struct per_cgroot_data *data; > + struct cgroup_rcu_string *str; > + struct ve_struct *ve; > + ve = rcu_dereference(cgroot->ve_owner); > + if (!ve) > + return NULL; > + > + raw_spin_lock(&ve->per_cgroot_list_lock); > + > + data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot); > + if (data) { > + str = rcu_dereference(data->release_agent_path); > + if (str) > + result = str->val; > + } > + raw_spin_unlock(&ve->per_cgroot_list_lock); > + return result; > +} > + > struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int > subsys_id) > { > struct cgroup_subsys_state *css, *tmp; > @@ -677,9 +744,14 @@ err_list: > static void ve_per_cgroot_free(struct ve_struct *ve) > { > struct per_cgroot_data *data, *saved; > + struct cgroup_rcu_string *release_agent; > > raw_spin_lock(&ve->per_cgroot_list_lock); > list_for_each_entry_safe(data, saved, &ve->per_cgroot_list, list) { > + release_agent = data->release_agent_path; > + RCU_INIT_POINTER(data->release_agent_path, NULL); > + if (release_agent) > + kfree_rcu(release_agent, rcu_head); > list_del_init(&data->list); > kfree(data); > } > _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel