When cgroup_path() is called from inside a container, its output is "virtualized", i.e. cgroup /CTID/A/B is reported as /A/B. This was done for userspace tools to not get confused by the output of some proc files (namely, /proc/PID/{cgroup,cpuset}). However, it is wrong to virtualize cgroup_path() anytime it is called by a container. For instance, it is called from inside a container on OOM in order to dump memcg info to system log, in which case mangling its output would be incorrect.
Therefore this patch makes cgroup_path() always return an absolute path. To get a container-relative path, one should now use cgroup_path_ve(). Currently, cgroup_path_ve() is only used for /proc files output (it seems to be enough for now). https://jira.sw.ru/browse/PSBM-34852 Signed-off-by: Vladimir Davydov <vdavy...@parallels.com> --- include/linux/cgroup.h | 1 + kernel/cgroup.c | 20 ++++++++++++++++---- kernel/cpuset.c | 2 +- 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index b7eb28ffd0d6..146a924664cf 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -561,6 +561,7 @@ int cgroup_is_removed(const struct cgroup *cgrp); bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor); int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen); +int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen); int cgroup_task_count(const struct cgroup *cgrp); diff --git a/kernel/cgroup.c b/kernel/cgroup.c index aa3546d93f88..0ff3b5254b5f 100644 --- a/kernel/cgroup.c +++ b/kernel/cgroup.c @@ -1804,11 +1804,13 @@ static struct kobject *cgroup_kobj; * inode's i_mutex, while on the other hand cgroup_path() can be called * with some irq-safe spinlocks held. */ -int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) +int __cgroup_path(const struct cgroup *cgrp, char *buf, int buflen, bool virt) { int ret = -ENAMETOOLONG; char *start; - struct ve_struct *ve = get_exec_env(); + + if (ve_is_super(get_exec_env())) + virt = false; if (!cgrp->parent) { if (strlcpy(buf, "/", buflen) >= buflen) @@ -1825,7 +1827,7 @@ int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) int len; #ifdef CONFIG_VE - if (!ve_is_super(ve) && cgrp->parent && !cgrp->parent->parent) { + if (virt && cgrp->parent && !cgrp->parent->parent) { /* * Containers cgroups are bind-mounted from node * so they are like '/' from inside, thus we have @@ -1860,8 +1862,18 @@ out: rcu_read_unlock(); return ret; } + +int cgroup_path(const struct cgroup *cgrp, char *buf, int buflen) +{ + return __cgroup_path(cgrp, buf, buflen, false); +} EXPORT_SYMBOL_GPL(cgroup_path); +int cgroup_path_ve(const struct cgroup *cgrp, char *buf, int buflen) +{ + return __cgroup_path(cgrp, buf, buflen, true); +} + /* * Control Group taskset */ @@ -4927,7 +4939,7 @@ int proc_cgroup_show(struct seq_file *m, void *v) root->name); seq_putc(m, ':'); cgrp = task_cgroup_from_root(tsk, root); - retval = cgroup_path(cgrp, buf, PAGE_SIZE); + retval = cgroup_path_ve(cgrp, buf, PAGE_SIZE); if (retval < 0) goto out_unlock; seq_puts(m, buf); diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 2400c4e1b002..81030b340dbd 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -2697,7 +2697,7 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v) rcu_read_lock(); css = task_subsys_state(tsk, cpuset_subsys_id); - retval = cgroup_path(css->cgroup, buf, PAGE_SIZE); + retval = cgroup_path_ve(css->cgroup, buf, PAGE_SIZE); rcu_read_unlock(); if (retval < 0) goto out_put_task; -- 2.1.4 _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel