[Devel] [PATCH 2/3] cgroup : make the mount options parsing more accurate

2010-09-04 Thread Daniel Lezcano
The actual code does not detect 'all' with one subsystem name, which
is IMHO mutually exclusive and when an option is specified even if it
is not a subsystem name, we have to specify the 'all' option with the
other option.
eg:
 not detected : mount -t cgroup -o all,freezer cgroup /cgroup
 not flexible : mount -t cgroup -o noprefix,all cgroup /cgroup

This patch fix this and makes the code a bit more clear by replacing
'else if' indentation by 'continue' blocks in the loop.

Signed-off-by: Daniel Lezcano 
Signed-off-by: Serge E. Hallyn 
Cc: Eric W. Biederman 
Cc: Paul Menage 
Reviewed-by: Li Zefan 
---
 kernel/cgroup.c |   91 +--
 1 files changed, 61 insertions(+), 30 deletions(-)

diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 0473a9a..ca2314f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1074,7 +1074,8 @@ struct cgroup_sb_opts {
  */
 static int parse_cgroupfs_options(char *data, struct cgroup_sb_opts *opts)
 {
-   char *token, *o = data ?: "all";
+   char *token, *o = data;
+   bool all_ss = false, one_ss = false;
unsigned long mask = (unsigned long)-1;
int i;
bool module_pin_failed = false;
@@ -1088,26 +1089,30 @@ static int parse_cgroupfs_options(char *data, struct 
cgroup_sb_opts *opts)
memset(opts, 0, sizeof(*opts));
 
while ((token = strsep(&o, ",")) != NULL) {
+
if (!*token)
return -EINVAL;
-   if (!strcmp(token, "all")) {
-   /* Add all non-disabled subsystems */
-   opts->subsys_bits = 0;
-   for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-   struct cgroup_subsys *ss = subsys[i];
-   if (ss == NULL)
-   continue;
-   if (!ss->disabled)
-   opts->subsys_bits |= 1ul << i;
-   }
-   } else if (!strcmp(token, "none")) {
+   if (!strcmp(token, "none")) {
/* Explicitly have no subsystems */
opts->none = true;
-   } else if (!strcmp(token, "noprefix")) {
+   continue;
+   }
+   if (!strcmp(token, "all")) {
+   /* Mutually exclusive option 'all' + subsystem name */
+   if (one_ss)
+   return -EINVAL;
+   all_ss = true;
+   continue;
+   }
+   if (!strcmp(token, "noprefix")) {
set_bit(ROOT_NOPREFIX, &opts->flags);
-   } else if (!strcmp(token, "clone_children")) {
+   continue;
+   }
+   if (!strcmp(token, "clone_children")) {
set_bit(ROOT_CLONE_CHILDREN, &opts->flags);
-   } else if (!strncmp(token, "release_agent=", 14)) {
+   continue;
+   }
+   if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
return -EINVAL;
@@ -1115,7 +1120,9 @@ static int parse_cgroupfs_options(char *data, struct 
cgroup_sb_opts *opts)
kstrndup(token + 14, PATH_MAX - 1, GFP_KERNEL);
if (!opts->release_agent)
return -ENOMEM;
-   } else if (!strncmp(token, "name=", 5)) {
+   continue;
+   }
+   if (!strncmp(token, "name=", 5)) {
const char *name = token + 5;
/* Can't specify an empty name */
if (!strlen(name))
@@ -1137,20 +1144,44 @@ static int parse_cgroupfs_options(char *data, struct 
cgroup_sb_opts *opts)
  GFP_KERNEL);
if (!opts->name)
return -ENOMEM;
-   } else {
-   struct cgroup_subsys *ss;
-   for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
-   ss = subsys[i];
-   if (ss == NULL)
-   continue;
-   if (!strcmp(token, ss->name)) {
-   if (!ss->disabled)
-   set_bit(i, &opts->subsys_bits);
-   break;
-   }
-   }
-   if (i == CGROUP_SUBSYS_COUNT)
-   return -ENOENT;
+
+   continue;
+   }
+
+   for (i = 0; i < CGROUP_SUBSYS_COUNT; i++) {
+

[Devel] [PATCH 3/3] cgroup : remove the ns_cgroup

2010-09-04 Thread Daniel Lezcano
The ns_cgroup is an annoying cgroup at the namespace / cgroup frontier.

For example, a single process can not handle a big amount of namespaces
without interacting with this cgroup and falling in an exponential creation
time due to the nested cgroup directory depth (eg. /cgroup//...//...).

That was spotted when creating a single process using multiple network 
namespaces,
the objective was 4096 network namespaces, but at 820 netns, the creation time
was dramatically slow and the creation time for a namespace increased from 
10msec
to 10sec. After five hours, the expected numbers of netns was not reached.
Without the ns_cgroup interaction, 4K netns are created after 2 minutes.

In order to solve that, we have to mount the cgroup with all the subsystems
except the ns_cgroup, it's a little weird and hard to manage from an 
administration
pov because we have to know what are the cgroup available on the system and we
can't do a simple 'mount -t cgroup cgroup /cgroup'.

With the previous patch which adds a 'clone_children' parameter to a cgroup,
we should be able to remove the ns_cgroup and manage manually the creation +
adding a task to the cgroup consistenly with the rest of the subsystems.

This patch removes the ns_cgroup as suggested in the following thread:

https://lists.linux-foundation.org/pipermail/containers/2009-June/018616.html

The 'cgroup_clone' function is removed because it is no longer used.

Changelog:
* Sep 1 (dle): refreshed CONFIG_CGROUP_NS references
* Jul 29 (seh): remove references to ns_cgroup_clone(), fix up
   some documentation, and remove CONFIG_CGROUP_NS references.

Signed-off-by: Daniel Lezcano 
Signed-off-by: Serge E. Hallyn 
Cc: Eric W. Biederman 
Cc: Jamal Hadi Salim 
Reviewed-by: Li Zefan 
Acked-by: Paul Menage 
Acked-by: Matt Helsley 
---
 Documentation/cgroups/cgroups.txt  |2 +-
 arch/mips/configs/bcm47xx_defconfig|1 -
 arch/powerpc/configs/ppc6xx_defconfig  |1 -
 arch/powerpc/configs/pseries_defconfig |1 -
 arch/s390/defconfig|1 -
 arch/sh/configs/sdk7786_defconfig  |1 -
 arch/sh/configs/se7206_defconfig   |1 -
 arch/sh/configs/shx3_defconfig |1 -
 arch/sh/configs/urquell_defconfig  |1 -
 arch/x86/configs/i386_defconfig|1 -
 arch/x86/configs/x86_64_defconfig  |1 -
 include/linux/cgroup.h |3 -
 include/linux/cgroup_subsys.h  |6 --
 include/linux/nsproxy.h|9 ---
 init/Kconfig   |9 ---
 kernel/Makefile|1 -
 kernel/cgroup.c|  116 
 kernel/cpuset.c|7 +-
 kernel/fork.c  |6 --
 kernel/ns_cgroup.c |  110 --
 kernel/nsproxy.c   |4 -
 21 files changed, 4 insertions(+), 279 deletions(-)
 delete mode 100644 kernel/ns_cgroup.c

diff --git a/Documentation/cgroups/cgroups.txt 
b/Documentation/cgroups/cgroups.txt
index 190018b..6a5ba63 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -618,7 +618,7 @@ always handled well.
 void post_clone(struct cgroup_subsys *ss, struct cgroup *cgrp)
 (cgroup_mutex held by caller)
 
-Called at the end of cgroup_clone() to do any parameter
+Called during cgroup_create() to do any parameter
 initialization which might be required before a task could attach.  For
 example in cpusets, no task may attach before 'cpus' and 'mems' are set
 up.
diff --git a/arch/mips/configs/bcm47xx_defconfig 
b/arch/mips/configs/bcm47xx_defconfig
index 927d58b..c4338e0 100644
--- a/arch/mips/configs/bcm47xx_defconfig
+++ b/arch/mips/configs/bcm47xx_defconfig
@@ -16,7 +16,6 @@ CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
 CONFIG_TINY_RCU=y
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RELAY=y
 CONFIG_BLK_DEV_INITRD=y
diff --git a/arch/powerpc/configs/ppc6xx_defconfig 
b/arch/powerpc/configs/ppc6xx_defconfig
index 9d64a68..9b253f6 100644
--- a/arch/powerpc/configs/ppc6xx_defconfig
+++ b/arch/powerpc/configs/ppc6xx_defconfig
@@ -10,7 +10,6 @@ CONFIG_TASK_XACCT=y
 CONFIG_TASK_IO_ACCOUNTING=y
 CONFIG_AUDIT=y
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CGROUP_CPUACCT=y
 CONFIG_RESOURCE_COUNTERS=y
diff --git a/arch/powerpc/configs/pseries_defconfig 
b/arch/powerpc/configs/pseries_defconfig
index f87f0e1..972587f 100644
--- a/arch/powerpc/configs/pseries_defconfig
+++ b/arch/powerpc/configs/pseries_defconfig
@@ -15,7 +15,6 @@ CONFIG_AUDITSYSCALL=y
 CONFIG_IKCONFIG=y
 CONFIG_IKCONFIG_PROC=y
 CONFIG_CGROUPS=y
-CONFIG_CGROUP_NS=y
 CONFIG_CGROUP_FREEZER=y
 CONFIG_CGROUP_DEVICE=y
 CONFIG_CPUSETS=y
diff --git a/arch/s390/defconfig b/arch/s390/defconfig
index e40ac6e..4b6d1a1 100644
--- a/arch/s390/defconfig
+++ b/arch/s390/defconfig
@@ -5,7 +5,6 @@ CONFIG_AUDIT=y
 CONFIG_IKCONFIG=y
 CONFIG

[Devel] [PATCH 1/3] cgroup : add clone_children control file

2010-09-04 Thread Daniel Lezcano
This patch is sent as an answer to a previous thread around the ns_cgroup.

https://lists.linux-foundation.org/pipermail/containers/2009-June/018627.html

It adds a control file 'clone_children' for a cgroup.
This control file is a boolean specifying if the child cgroup should
be a clone of the parent cgroup or not. The default value is 'false'.

This flag makes the child cgroup to call the post_clone callback of all
the subsystem, if it is available.

At present, the cpuset is the only one which had implemented the post_clone
callback.

The option can be set at mount time by specifying the 'clone_children' mount
option.

Signed-off-by: Daniel Lezcano 
Signed-off-by: Serge E. Hallyn 
Cc: Eric W. Biederman 
Cc: Paul Menage 
Reviewed-by: Li Zefan 
---
 Documentation/cgroups/cgroups.txt |   14 +++-
 include/linux/cgroup.h|4 +++
 kernel/cgroup.c   |   39 +
 3 files changed, 55 insertions(+), 2 deletions(-)

diff --git a/Documentation/cgroups/cgroups.txt 
b/Documentation/cgroups/cgroups.txt
index b34823f..190018b 100644
--- a/Documentation/cgroups/cgroups.txt
+++ b/Documentation/cgroups/cgroups.txt
@@ -18,7 +18,8 @@ CONTENTS:
   1.2 Why are cgroups needed ?
   1.3 How are cgroups implemented ?
   1.4 What does notify_on_release do ?
-  1.5 How do I use cgroups ?
+  1.5 What does clone_children do ?
+  1.6 How do I use cgroups ?
 2. Usage Examples and Syntax
   2.1 Basic Usage
   2.2 Attaching processes
@@ -293,7 +294,16 @@ notify_on_release in the root cgroup at system boot is 
disabled
 value of their parents notify_on_release setting. The default value of
 a cgroup hierarchy's release_agent path is empty.
 
-1.5 How do I use cgroups ?
+1.5 What does clone_children do ?
+-
+
+If the clone_children flag is enabled (1) in a cgroup, then all
+cgroups created beneath will call the post_clone callbacks for each
+subsystem of the newly created cgroup. Usually when this callback is
+implemented for a subsystem, it copies the values of the parent
+subsystem, this is the case for the cpuset.
+
+1.6 How do I use cgroups ?
 --
 
 To start a new job that is to be contained within a cgroup, using
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3cb7d04..d01543b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -154,6 +154,10 @@ enum {
 * A thread in rmdir() is wating for this cgroup.
 */
CGRP_WAIT_ON_RMDIR,
+   /*
+* Clone cgroup values when creating a new child cgroup
+*/
+   CGRP_CLONE_CHILDREN,
 };
 
 /* which pidlist file are we talking about? */
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index e5c5497..0473a9a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -229,6 +229,7 @@ inline int cgroup_is_removed(const struct cgroup *cgrp)
 /* bits in struct cgroupfs_root flags field */
 enum {
ROOT_NOPREFIX, /* mounted subsystems have no named prefix */
+   ROOT_CLONE_CHILDREN, /* mounted subsystems will inherit from parent */
 };
 
 static int cgroup_is_releasable(const struct cgroup *cgrp)
@@ -244,6 +245,11 @@ static int notify_on_release(const struct cgroup *cgrp)
return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
 }
 
+static int clone_children(const struct cgroup *cgrp)
+{
+   return test_bit(CGRP_CLONE_CHILDREN, &cgrp->flags);
+}
+
 /*
  * for_each_subsys() allows you to iterate on each subsystem attached to
  * an active hierarchy
@@ -1038,6 +1044,8 @@ static int cgroup_show_options(struct seq_file *seq, 
struct vfsmount *vfs)
seq_printf(seq, ",%s", ss->name);
if (test_bit(ROOT_NOPREFIX, &root->flags))
seq_puts(seq, ",noprefix");
+   if (test_bit(ROOT_CLONE_CHILDREN, &root->flags))
+   seq_puts(seq, ",clone_children");
if (strlen(root->release_agent_path))
seq_printf(seq, ",release_agent=%s", root->release_agent_path);
if (strlen(root->name))
@@ -1097,6 +1105,8 @@ static int parse_cgroupfs_options(char *data, struct 
cgroup_sb_opts *opts)
opts->none = true;
} else if (!strcmp(token, "noprefix")) {
set_bit(ROOT_NOPREFIX, &opts->flags);
+   } else if (!strcmp(token, "clone_children")) {
+   set_bit(ROOT_CLONE_CHILDREN, &opts->flags);
} else if (!strncmp(token, "release_agent=", 14)) {
/* Specifying two release agents is forbidden */
if (opts->release_agent)
@@ -1357,6 +1367,8 @@ static struct cgroupfs_root *cgroup_root_from_opts(struct 
cgroup_sb_opts *opts)
strcpy(root->release_agent_path, opts->release_agent);
if (opts->name)
strcpy(root->name, opts->name);
+   if (test_bit(ROOT_CLONE_CHILDREN, &opts->flags))
+   set_bit(CGRP_CLONE_CHILDREN, &root->top_cgroup.flags);