This is done so that each container could set it's own release agent. Release agent information is now stored in per-cgroup-root data structure in ve.
https://jira.sw.ru/browse/PSBM-83887 Signed-off-by: Valeriy Vdovin <[email protected]> Reviewed-by: Kirill Tkhai <[email protected]> --- include/linux/cgroup.h | 5 +- include/linux/ve.h | 7 +++ kernel/cgroup.c | 133 ++++++++++++++++++++++++++++++++++++++++--------- kernel/ve/ve.c | 69 ++++++++++++++++++++++++- 4 files changed, 186 insertions(+), 28 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d0ce3cc..911dd48 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -394,9 +394,6 @@ struct cgroupfs_root { /* IDs for cgroups in this hierarchy */ struct ida cgroup_ida; - /* The path to use for release notifications. */ - char release_agent_path[PATH_MAX]; - /* The name for this hierarchy - may be empty */ char name[MAX_CGROUP_ROOT_NAMELEN]; }; @@ -639,7 +636,7 @@ int cgroup_task_count(const struct cgroup *cgrp); void cgroup_release_agent(struct work_struct *work); #ifdef CONFIG_VE -void cgroup_mark_ve_roots(struct ve_struct *ve); +int cgroup_mark_ve_roots(struct ve_struct *ve); void cgroup_unmark_ve_roots(struct ve_struct *ve); struct ve_struct *cgroup_get_ve_owner(struct cgroup *cgrp); #endif diff --git a/include/linux/ve.h b/include/linux/ve.h index 94a07df..da719b4 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -214,6 +214,13 @@ void do_update_load_avg_ve(void); void ve_add_to_release_list(struct cgroup *cgrp); void ve_rm_from_release_list(struct cgroup *cgrp); + +int ve_set_release_agent_path(struct ve_struct *ve, struct cgroup *cgroot, + const char *release_agent); + +const char *ve_get_release_agent_path(struct ve_struct *ve, + struct cgroup *cgrp_root); + extern struct ve_struct *get_ve(struct ve_struct *ve); extern void put_ve(struct ve_struct *ve); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index fa1a881..9b7c8f3 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1094,10 +1094,21 @@ static int rebind_subsystems(struct cgroupfs_root *root, static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) { + const char *release_agent; struct cgroupfs_root *root = dentry->d_sb->s_fs_info; struct cgroup_subsys *ss; + struct cgroup *root_cgrp = &root->top_cgroup; +#ifdef CONFIG_VE + struct ve_struct *ve = get_exec_env(); + if (!ve_is_super(ve)) { + mutex_lock(&cgroup_mutex); + root_cgrp = task_cgroup_from_root(ve->init_task, root); + mutex_unlock(&cgroup_mutex); + } +#endif mutex_lock(&cgroup_root_mutex); + for_each_subsys(root, ss) seq_printf(seq, ",%s", ss->name); if (root->flags & CGRP_ROOT_SANE_BEHAVIOR) @@ -1108,9 +1119,12 @@ static int cgroup_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ",xattr"); if (root->flags & CGRP_ROOT_CPUSET_V2_MODE) seq_puts(seq, ",cpuset_v2_mode"); - if (strlen(root->release_agent_path)) - seq_show_option(seq, "release_agent", - root->release_agent_path); + rcu_read_lock(); + release_agent = ve_get_release_agent_path(root_cgrp->ve_owner, + root_cgrp); + if (release_agent && release_agent[0]) + seq_show_option(seq, "release_agent", release_agent); + rcu_read_unlock(); if (test_bit(CGRP_CPUSET_CLONE_CHILDREN, &root->top_cgroup.flags)) seq_puts(seq, ",clone_children"); if (strlen(root->name)) @@ -1388,8 +1402,13 @@ static int cgroup_remount(struct super_block *sb, int *flags, char *data) /* re-populate subsystem files */ cgroup_populate_dir(cgrp, false, added_mask); - if (opts.release_agent) - strcpy(root->release_agent_path, opts.release_agent); + if (opts.release_agent) { + struct cgroup *root_cgrp; + root_cgrp = cgroup_get_local_root(cgrp); + if (root_cgrp->ve_owner) + ret = ve_set_release_agent_path(root_cgrp->ve_owner, + root_cgrp, opts.release_agent); + } out_unlock: kfree(opts.release_agent); kfree(opts.name); @@ -1550,8 +1569,6 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct cgroup_sb_opts *opts) root->subsys_mask = opts->subsys_mask; root->flags = opts->flags; ida_init(&root->cgroup_ida); - if (opts->release_agent) - strcpy(root->release_agent_path, opts->release_agent); if (opts->name) strcpy(root->name, opts->name); if (opts->cpuset_clone_children) @@ -2317,27 +2334,44 @@ static int cgroup_procs_write(struct cgroup *cgrp, struct cftype *cft, u64 tgid) static int cgroup_release_agent_write(struct cgroup *cgrp, struct cftype *cft, const char *buffer) { - BUILD_BUG_ON(sizeof(cgrp->root->release_agent_path) < PATH_MAX); - + int ret = 0; + struct cgroup *root_cgrp; if (strlen(buffer) >= PATH_MAX) return -EINVAL; if (!cgroup_lock_live_group(cgrp)) return -ENODEV; - mutex_lock(&cgroup_root_mutex); - strcpy(cgrp->root->release_agent_path, buffer); - mutex_unlock(&cgroup_root_mutex); + root_cgrp = cgroup_get_local_root(cgrp); + BUG_ON(!root_cgrp); + if (root_cgrp->ve_owner) { + ret = ve_set_release_agent_path(root_cgrp->ve_owner, + root_cgrp, buffer); + } + mutex_unlock(&cgroup_mutex); - return 0; + return ret; } static int cgroup_release_agent_show(struct cgroup *cgrp, struct cftype *cft, struct seq_file *seq) { + const char *release_agent; + struct cgroup *root_cgrp; + if (!cgroup_lock_live_group(cgrp)) return -ENODEV; - seq_puts(seq, cgrp->root->release_agent_path); + + root_cgrp = cgroup_get_local_root(cgrp); + if (root_cgrp->ve_owner) { + rcu_read_lock(); + release_agent = ve_get_release_agent_path( + root_cgrp->ve_owner, root_cgrp); + + if (release_agent) + seq_puts(seq, release_agent); + rcu_read_unlock(); + } seq_putc(seq, '\n'); mutex_unlock(&cgroup_mutex); return 0; @@ -4153,7 +4187,7 @@ static struct cftype files[] = { }, { .name = "release_agent", - .flags = CFTYPE_ONLY_ON_ROOT, + .flags = CFTYPE_ONLY_ON_ROOT | CFTYPE_VE_WRITABLE, .read_seq_string = cgroup_release_agent_show, .write_string = cgroup_release_agent_write, .max_write_len = PATH_MAX, @@ -4282,22 +4316,60 @@ static int subgroups_count(struct cgroup *cgroup) return cgrps_count; } +static struct cftype *get_cftype_by_name(const char *name) +{ + struct cftype *cft; + for (cft = files; cft->name[0] != '\0'; cft++) { + if (!strcmp(cft->name, name)) + return cft; + } + return NULL; +} + #ifdef CONFIG_VE -void cgroup_mark_ve_roots(struct ve_struct *ve) +int cgroup_mark_ve_roots(struct ve_struct *ve) { - struct cgroup *cgrp; + struct cgroup *cgrp, *tmp; struct cgroupfs_root *root; + int err = 0; + struct cftype *cft; + LIST_HEAD(pending); + + cft = get_cftype_by_name("release_agent"); + BUG_ON(!cft); + mutex_lock(&cgroup_cft_mutex); mutex_lock(&cgroup_mutex); for_each_active_root(root) { cgrp = task_cgroup_from_root(ve->init_task, root); - cgrp->ve_owner = ve; + rcu_assign_pointer(cgrp->ve_owner, ve); set_bit(CGRP_VE_ROOT, &cgrp->flags); - + dget(cgrp->dentry); + list_add_tail(&cgrp->cft_q_node, &pending); if (test_bit(cpu_cgroup_subsys_id, &root->subsys_mask)) link_ve_root_cpu_cgroup(cgrp); } mutex_unlock(&cgroup_mutex); + list_for_each_entry_safe(cgrp, tmp, &pending, cft_q_node) { + struct inode *inode = cgrp->dentry->d_inode; + + if (err) { + dput(cgrp->dentry); + continue; + } + + mutex_lock(&inode->i_mutex); + mutex_lock(&cgroup_mutex); + if (!cgroup_is_removed(cgrp)) + err = cgroup_add_file(cgrp, NULL, cft); + mutex_unlock(&cgroup_mutex); + mutex_unlock(&inode->i_mutex); + + list_del_init(&cgrp->cft_q_node); + dput(cgrp->dentry); + } + mutex_unlock(&cgroup_cft_mutex); + return err; } void cgroup_unmark_ve_roots(struct ve_struct *ve) @@ -5432,13 +5504,22 @@ static void check_for_release(struct cgroup *cgrp) void cgroup_release_agent(struct work_struct *work) { struct ve_struct *ve; + char *agentbuf; + + agentbuf = kzalloc(PATH_MAX, GFP_KERNEL); + if (!agentbuf) { + pr_warn("failed to allocate agentbuf\n"); + return; + } + ve = container_of(work, struct ve_struct, release_agent_work); mutex_lock(&cgroup_mutex); raw_spin_lock(&ve->release_list_lock); while (!list_empty(&ve->release_list)) { char *argv[3], *envp[3]; int i, err; - char *pathbuf = NULL, *agentbuf = NULL; + const char *release_agent; + char *pathbuf = NULL; struct cgroup *cgrp, *root_cgrp; cgrp = list_entry(ve->release_list.next, @@ -5458,9 +5539,15 @@ void cgroup_release_agent(struct work_struct *work) rcu_read_unlock(); goto continue_free; } + + release_agent = ve_get_release_agent_path(ve, root_cgrp); + + *agentbuf = 0; + if (release_agent) + strcpy(agentbuf, release_agent); rcu_read_unlock(); - agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL); - if (!agentbuf) + + if (!*agentbuf) goto continue_free; i = 0; @@ -5489,11 +5576,11 @@ void cgroup_release_agent(struct work_struct *work) mutex_lock(&cgroup_mutex); continue_free: kfree(pathbuf); - kfree(agentbuf); raw_spin_lock(&ve->release_list_lock); } raw_spin_unlock(&ve->release_list_lock); mutex_unlock(&cgroup_mutex); + kfree(agentbuf); } static int __init cgroup_disable(char *str) diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index bee3696..12f7497 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -51,6 +51,11 @@ struct per_cgroot_data { * data is related to this cgroup */ struct cgroup *cgroot; + /* + * path to release agent binaray, that should + * be spawned for all cgroups under this cgroup root + */ + struct cgroup_rcu_string __rcu *release_agent_path; }; extern struct kmapset_set sysfs_ve_perms_set; @@ -175,6 +180,59 @@ static inline struct per_cgroot_data *per_cgroot_get_or_create( return data; } +int ve_set_release_agent_path(struct ve_struct *ve, + struct cgroup *cgroot, const char *release_agent) +{ + struct per_cgroot_data *data; + struct cgroup_rcu_string *new_path, *old_path; + int err = 0; + + new_path = cgroup_rcu_strdup(release_agent, strlen(release_agent)); + if (IS_ERR(new_path)) + return PTR_ERR(new_path); + + data = per_cgroot_get_or_create(ve, cgroot); + if (IS_ERR(data)) { + kfree(new_path); + return PTR_ERR(data); + } + + raw_spin_lock(&ve->per_cgroot_list_lock); + + old_path = rcu_dereference_protected(data->release_agent_path, + lockdep_is_held(&ve->per_cgroot_list_lock)); + + rcu_assign_pointer(data->release_agent_path, new_path); + raw_spin_unlock(&ve->per_cgroot_list_lock); + + if (old_path) + kfree_rcu(old_path, rcu_head); + + return err; +} + +const char *ve_get_release_agent_path(struct ve_struct *ve, + struct cgroup *cgroot) +{ + /* caller must grab rcu_read_lock */ + const char *result = NULL; + struct per_cgroot_data *data; + struct cgroup_rcu_string *str; + + raw_spin_lock(&ve->per_cgroot_list_lock); + + data = per_cgroot_data_find_locked(&ve->per_cgroot_list, cgroot); + raw_spin_unlock(&ve->per_cgroot_list_lock); + + if (!data) + return NULL; + + str = rcu_dereference(data->release_agent_path); + if (str) + result = str->val; + return result; +} + struct cgroup_subsys_state *ve_get_init_css(struct ve_struct *ve, int subsys_id) { struct cgroup_subsys_state *css, *tmp; @@ -646,7 +704,9 @@ static int ve_start_container(struct ve_struct *ve) if (err < 0) goto err_iterate; - cgroup_mark_ve_roots(ve); + err = cgroup_mark_ve_roots(ve); + if (err) + goto err_mark_ve; ve->is_running = 1; @@ -656,6 +716,8 @@ static int ve_start_container(struct ve_struct *ve) return 0; +err_mark_ve: + ve_hook_iterate_fini(VE_SS_CHAIN, ve); err_iterate: ve_workqueue_stop(ve); err_workqueue: @@ -672,7 +734,12 @@ err_list: static void per_cgroot_free_all_locked(struct list_head *per_cgroot_list) { struct per_cgroot_data *data, *saved; + struct cgroup_rcu_string *release_agent; list_for_each_entry_safe(data, saved, per_cgroot_list, list) { + release_agent = data->release_agent_path; + RCU_INIT_POINTER(data->release_agent_path, NULL); + if (release_agent) + kfree_rcu(release_agent, rcu_head); list_del_init(&data->list); kfree(data); } -- 1.8.3.1 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
