The commit is pushed to "branch-rh7-3.10.0-123.1.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-123.1.2.vz7.5.9 ------> commit 66ae81139f560aa67e1c14e10acb3d22f301e01b Author: Vladimir Davydov <vdavy...@parallels.com> Date: Thu Jun 4 17:14:09 2015 +0400
sched: Port diff-fairsched-cpuset-add-fake-cpuset-for-containers Author: Pavel Tikhomirov Email: ptikhomi...@parallels.com Subject: cpuset: add fake cpuset for containers Date: Tue, 27 Jan 2015 15:40:12 +0300 If container want to write/read cpumask or nodemask of cpuset through cgroupfs for incontainer cgroup, fake it - add special ve_* fields to cpuset structure and operate with them. We don't want to validate change as it is just fake, so allow any. For flags, relax_domain_level, mem_migration_pending do not allow access from container. for docker integration-cli test: TestRunWithCpuset https://jira.sw.ru/browse/PSBM-30878 v2: add for mems, cpus_allowed, mems_allowed; simplify checks in update_cpumask/update_nodemask, no excessive code in alloc_trial_cpuset and change naming for masks v3: do not take the callback_mutex for printing ve_cpus_allowed, do not permit r/w to cpuset_cpus_allowed, cpuset_mems_allowed, add ve_flags and ve_relax_domain_level. v4: leave only ve_cpus/mems_allowed, others are not faked, block access to others from CT v5: cleanup code Signed-off-by: Pavel Tikhomirov <ptikhomi...@parallels.com> Reviewed-by: Vladimir Davydov <vdavy...@parallels.com> ============================================================================= Related to https://jira.sw.ru/browse/PSBM-33642 Signed-off-by: Vladimir Davydov <vdavy...@parallels.com> --- kernel/cpuset.c | 55 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) diff --git a/kernel/cpuset.c b/kernel/cpuset.c index 3c4355e..ef08c19 100644 --- a/kernel/cpuset.c +++ b/kernel/cpuset.c @@ -87,6 +87,9 @@ struct cpuset { cpumask_var_t cpus_allowed; /* CPUs allowed to tasks in cpuset */ nodemask_t mems_allowed; /* Memory Nodes allowed to tasks */ + cpumask_var_t ve_cpus_allowed; + nodemask_t ve_mems_allowed; + struct fmeter fmeter; /* memory_pressure filter */ /* @@ -866,6 +869,15 @@ static int __update_cpumask(struct cpuset *cs, if (cs == &top_cpuset) return -EACCES; + /* + * If we are in CT use fake cpu mask + * can set and read, but no effect + */ + if (!ve_is_super(get_exec_env())) { + cpumask_copy(cs->ve_cpus_allowed, cpus_allowed); + return 0; + } + if (!cpumask_subset(cpus_allowed, cpu_active_mask)) return -EINVAL; @@ -1127,6 +1139,16 @@ static int __update_nodemask(struct cpuset *cs, goto done; } + /* + * If we are in CT use fake node mask + * can set and read, but no effect + */ + if (!ve_is_super(get_exec_env())) { + cs->ve_mems_allowed = *mems_allowed; + retval = 0; + goto done; + } + if (!nodes_subset(*mems_allowed, node_states[N_MEMORY])) { retval = -EINVAL; goto done; @@ -1563,6 +1585,9 @@ static int cpuset_write_u64(struct cgroup *cgrp, struct cftype *cft, u64 val) cpuset_filetype_t type = cft->private; int retval = 0; + if (!ve_is_super(get_exec_env())) + return -EACCES; + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) { retval = -ENODEV; @@ -1612,6 +1637,9 @@ static int cpuset_write_s64(struct cgroup *cgrp, struct cftype *cft, s64 val) cpuset_filetype_t type = cft->private; int retval = -ENODEV; + if (!ve_is_super(get_exec_env())) + return -EACCES; + mutex_lock(&cpuset_mutex); if (!is_cpuset_online(cs)) goto out_unlock; @@ -1693,6 +1721,9 @@ static size_t cpuset_sprintf_cpulist(char *page, struct cpuset *cs) { size_t count; + if (!ve_is_super(get_exec_env())) + return cpulist_scnprintf(page, PAGE_SIZE, cs->ve_cpus_allowed); + mutex_lock(&callback_mutex); count = cpulist_scnprintf(page, PAGE_SIZE, cs->cpus_allowed); mutex_unlock(&callback_mutex); @@ -1704,6 +1735,9 @@ static size_t cpuset_sprintf_memlist(char *page, struct cpuset *cs) { size_t count; + if (!ve_is_super(get_exec_env())) + return nodelist_scnprintf(page, PAGE_SIZE, cs->ve_mems_allowed); + mutex_lock(&callback_mutex); count = nodelist_scnprintf(page, PAGE_SIZE, cs->mems_allowed); mutex_unlock(&callback_mutex); @@ -1751,6 +1785,10 @@ static u64 cpuset_read_u64(struct cgroup *cont, struct cftype *cft) { struct cpuset *cs = cgroup_cs(cont); cpuset_filetype_t type = cft->private; + + if (!ve_is_super(get_exec_env())) + return 0; + switch (type) { case FILE_CPU_EXCLUSIVE: return is_cpu_exclusive(cs); @@ -1782,6 +1820,10 @@ static s64 cpuset_read_s64(struct cgroup *cont, struct cftype *cft) { struct cpuset *cs = cgroup_cs(cont); cpuset_filetype_t type = cft->private; + + if (!ve_is_super(get_exec_env())) + return 0; + switch (type) { case FILE_SCHED_RELAX_DOMAIN_LEVEL: return cs->relax_domain_level; @@ -1909,10 +1951,17 @@ static struct cgroup_subsys_state *cpuset_css_alloc(struct cgroup *cont) kfree(cs); return ERR_PTR(-ENOMEM); } + if (!alloc_cpumask_var(&cs->ve_cpus_allowed, GFP_KERNEL)) { + free_cpumask_var(cs->cpus_allowed); + kfree(cs); + return ERR_PTR(-ENOMEM); + } set_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); cpumask_clear(cs->cpus_allowed); nodes_clear(cs->mems_allowed); + cpumask_clear(cs->ve_cpus_allowed); + nodes_clear(cs->ve_mems_allowed); fmeter_init(&cs->fmeter); INIT_WORK(&cs->hotplug_work, cpuset_propagate_hotplug_workfn); cs->relax_domain_level = -1; @@ -2000,6 +2049,7 @@ static void cpuset_css_free(struct cgroup *cont) struct cpuset *cs = cgroup_cs(cont); free_cpumask_var(cs->cpus_allowed); + free_cpumask_var(cs->ve_cpus_allowed); kfree(cs); } @@ -2029,10 +2079,15 @@ int __init cpuset_init(void) if (!alloc_cpumask_var(&top_cpuset.cpus_allowed, GFP_KERNEL)) BUG(); + if (!alloc_cpumask_var(&top_cpuset.ve_cpus_allowed, GFP_KERNEL)) + BUG(); cpumask_setall(top_cpuset.cpus_allowed); nodes_setall(top_cpuset.mems_allowed); + cpumask_clear(top_cpuset.ve_cpus_allowed); + nodes_clear(top_cpuset.ve_mems_allowed); + fmeter_init(&top_cpuset.fmeter); set_bit(CS_SCHED_LOAD_BALANCE, &top_cpuset.flags); top_cpuset.relax_domain_level = -1; _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel