Re: [linux-yocto] [PATCH 1/2] [RFC]cgroups: Resource controller for open files.

Bruce Ashfield Wed, 30 Jul 2014 06:35:47 -0700

On 14-07-30 04:58 AM, zhe...@windriver.com wrote:

From: Binder Makin <meri...@google.com>


lkml https://lkml.org/lkml/2014/7/2/640 upstream

Add a resource controller for limiting the number of open
file handles.  This allows us to catch misbehaving processes
and return EMFILE instead of ENOMEM for kernel memory limits.


It looks like we got this series twice, with the Sign off and
lkml link being the big differences.

When sending a series multiple times, please put [v<x>] in the
patch subject, so we can easily tell them apart.

You can explain the delta between the versions below the ---
line under the sign offs, since it will be dropped when the
change is applied.

No need to resend this series, but keep that in mind for future
submissions.

Bruce


Signed-off-by: Binder Makin <meri...@google.com>
Signed-off-by: He Zhe <zhe...@windriver.com>
---
  fs/Makefile                   |    1 +
  fs/file.c                     |   44 ++++++++
  fs/filescontrol.c             |  247 +++++++++++++++++++++++++++++++++++++++++
  include/linux/cgroup_subsys.h |    5 +
  include/linux/fdtable.h       |    3 +
  include/linux/filescontrol.h  |   32 ++++++
  init/Kconfig                  |    7 ++
  7 files changed, 339 insertions(+)
  create mode 100644 fs/filescontrol.c
  create mode 100644 include/linux/filescontrol.h

diff --git a/fs/Makefile b/fs/Makefile
index ebfe2ee..18eaee0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_COREDUMP)                += coredump.o
  obj-$(CONFIG_SYSCTL)          += drop_caches.o

  obj-$(CONFIG_FHANDLE)         += fhandle.o
+obj-$(CONFIG_CGROUP_FILES)     += filescontrol.o

  obj-y                         += quota/

diff --git a/fs/file.c b/fs/file.c
index eb56a13..4b84070 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -22,6 +22,7 @@
  #include <linux/spinlock.h>
  #include <linux/rcupdate.h>
  #include <linux/workqueue.h>
+#include <linux/filescontrol.h>

  int sysctl_nr_open __read_mostly = 1024*1024;
  int sysctl_nr_open_min = BITS_PER_LONG;
@@ -264,6 +265,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)
        new_fdt->close_on_exec = newf->close_on_exec_init;
        new_fdt->open_fds = newf->open_fds_init;
        new_fdt->fd = &newf->fd_array[0];
+#ifdef CONFIG_CGROUP_FILES
+       files_cgroup_assign(newf);
+#endif

        spin_lock(&oldf->file_lock);
        old_fdt = files_fdtable(oldf);
@@ -340,9 +344,28 @@ struct files_struct *dup_fd(struct files_struct *oldf, int 
*errorp)

        rcu_assign_pointer(newf->fdt, new_fdt);

+#ifdef CONFIG_CGROUP_FILES
+       if (!files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf)))
+               return newf;
+
+/* could not get enough FD resources.  Need to clean up. */
+       new_fds = new_fdt->fd;
+       for (i = open_files; i != 0; i--) {
+               struct file *f = *new_fds++;
+               if (f)
+                       fput(f);
+       }
+       if (new_fdt != &newf->fdtab)
+               __free_fdtable(new_fdt);
+       *errorp = -EMFILE;
+#else
        return newf;
+#endif

  out_release:
+#ifdef CONFIG_CGROUP_FILES
+       files_cgroup_remove(newf);
+#endif
        kmem_cache_free(files_cachep, newf);
  out:
        return NULL;
@@ -368,6 +391,9 @@ static struct fdtable *close_files(struct files_struct * 
files)
                        if (set & 1) {
                                struct file * file = xchg(&fdt->fd[i], NULL);
                                if (file) {
+#ifdef CONFIG_CGROUP_FILES
+                                       files_cgroup_unalloc_fd(files, 1);
+#endif
                                        filp_close(file, files);
                                        cond_resched();
                                }
@@ -486,6 +512,13 @@ repeat:
        if (error)
                goto repeat;

+#ifdef CONFIG_CGROUP_FILES
+       if (files_cgroup_alloc_fd(files, 1)) {
+               error = -EMFILE;
+               goto out;
+       }
+#endif
+
        if (start <= files->next_fd)
                files->next_fd = fd + 1;

@@ -522,6 +555,10 @@ EXPORT_SYMBOL(get_unused_fd_flags);
  static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  {
        struct fdtable *fdt = files_fdtable(files);
+#ifdef CONFIG_CGROUP_FILES
+       if (test_bit(fd, fdt->open_fds))
+               files_cgroup_unalloc_fd(files, 1);
+#endif
        __clear_open_fd(fd, fdt);
        if (fd < files->next_fd)
                files->next_fd = fd;
@@ -780,6 +817,13 @@ static int do_dup2(struct files_struct *files,
        tofree = fdt->fd[fd];
        if (!tofree && fd_is_open(fd, fdt))
                goto Ebusy;
+
+#ifdef CONFIG_CGROUP_FILES
+       if (!tofree)
+               if (!files_cgroup_alloc_fd(files, 1))
+                       goto Ebusy;
+#endif
+
        get_file(file);
        rcu_assign_pointer(fdt->fd[fd], file);
        __set_open_fd(fd, fdt);
diff --git a/fs/filescontrol.c b/fs/filescontrol.c
new file mode 100644
index 0000000..c532d02
--- /dev/null
+++ b/fs/filescontrol.c
@@ -0,0 +1,247 @@
+/* filescontrol.c - Cgroup controller for open file handles.
+ *
+ * Copyright 2014 Google Inc.
+ * Author: Brian Makin <meri...@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/res_counter.h>
+#include <linux/filescontrol.h>
+#include <linux/cgroup.h>
+#include <linux/export.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/fdtable.h>
+
+struct cgroup_subsys files_cgrp_subsys __read_mostly;
+EXPORT_SYMBOL(files_cgrp_subsys);
+
+struct files_cgroup {
+       struct cgroup_subsys_state css;
+       struct res_counter open_handles;
+};
+
+static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css)
+{
+       return css ? container_of(css, struct files_cgroup, css) : NULL;
+}
+
+static inline struct res_counter *
+css_res_open_handles(struct cgroup_subsys_state *css)
+{
+       return &css_fcg(css)->open_handles;
+}
+
+static inline struct files_cgroup *
+files_cgroup_from_files(struct files_struct *files)
+{
+       return files->files_cgroup;
+}
+
+static struct cgroup_subsys_state *
+files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+       struct files_cgroup *fcg;
+
+       fcg = kzalloc(sizeof(*fcg), GFP_KERNEL);
+       if (!fcg)
+               goto out;
+
+       if (!parent_css) {
+               res_counter_init(&fcg->open_handles, NULL);
+               res_counter_set_limit(&fcg->open_handles, get_max_files());
+       } else {
+               struct files_cgroup *parent_fcg = css_fcg(parent_css);
+               res_counter_init(&fcg->open_handles, &parent_fcg->open_handles);
+               res_counter_set_limit(&fcg->open_handles,
+                               res_counter_read_u64(&parent_fcg->open_handles,
+                                               RES_LIMIT));
+       }
+       return &fcg->css;
+
+out:
+       return ERR_PTR(-ENOMEM);
+}
+
+static void files_cgroup_css_free(struct cgroup_subsys_state *css)
+{
+       kfree(css_fcg(css));
+}
+
+u64 files_cgroup_count_fds(struct files_struct *files)
+{
+       int i;
+       struct fdtable *fdt;
+       int retval = 0;
+
+       fdt = files_fdtable(files);
+       for (i = 0; i < DIV_ROUND_UP(fdt->max_fds, BITS_PER_LONG); i++)
+               retval += hweight64((__u64)fdt->open_fds[i]);
+       return retval;
+}
+
+static u64 files_in_taskset(struct cgroup_taskset *tset)
+{
+       struct task_struct *task;
+       u64 files = 0;
+       cgroup_taskset_for_each(task, tset) {
+               if (!thread_group_leader(task))
+                       continue;
+
+               task_lock(task);
+               files += files_cgroup_count_fds(task->files);
+               task_unlock(task);
+       }
+       return files;
+}
+
+/*
+ * If attaching this cgroup would overcommit the resource then deny
+ * the attach.
+ */
+static int files_cgroup_can_attach(struct cgroup_subsys_state *css,
+                               struct cgroup_taskset *tset)
+{
+       u64 files = files_in_taskset(tset);
+       if (res_counter_margin(css_res_open_handles(css)) < files)
+               return -ENOMEM;
+       return 0;
+}
+
+/*
+ * If resource counts have gone up between can_attach and attach then
+ * this may overcommit resources.  In that case just deny further allocation
+ * until the resource usage drops.
+ */
+static void files_cgroup_attach(struct cgroup_subsys_state *to_css,
+                               struct cgroup_taskset *tset)
+{
+       u64 num_files;
+       struct task_struct *task = cgroup_taskset_first(tset);
+       struct cgroup_subsys_state *from_css;
+       struct res_counter *from_res;
+       struct res_counter *to_res = css_res_open_handles(to_css);
+       struct res_counter *fail_res;
+       struct files_struct *files;
+
+       task_lock(task);
+       files = task->files;
+       if (!files) {
+               task_unlock(task);
+               return;
+       }
+
+       from_css = &files_cgroup_from_files(files)->css;
+       from_res = css_res_open_handles(from_css);
+
+       spin_lock(&files->file_lock);
+       num_files = files_cgroup_count_fds(files);
+       res_counter_uncharge(from_res, num_files);
+       css_put(from_css);
+
+       if (res_counter_charge(to_res, num_files, &fail_res))
+               pr_err("Open files limit overcommited\n");
+       css_get(to_css);
+
+       task->files->files_cgroup = css_fcg(to_css);
+       spin_unlock(&files->file_lock);
+       task_unlock(task);
+}
+
+int files_cgroup_alloc_fd(struct files_struct *files, u64 n)
+{
+       struct res_counter *fail_res;
+       struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
+
+       if (res_counter_charge(&files_cgroup->open_handles, n, &fail_res))
+               return -ENOMEM;
+
+       return 0;
+}
+EXPORT_SYMBOL(files_cgroup_alloc_fd);
+
+void files_cgroup_unalloc_fd(struct files_struct *files, u64 n)
+{
+       struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
+
+       res_counter_uncharge(&files_cgroup->open_handles, n);
+}
+EXPORT_SYMBOL(files_cgroup_unalloc_fd);
+
+static u64 files_limit_read(struct cgroup_subsys_state *css,
+                       struct cftype *cft)
+{
+       struct files_cgroup *fcg = css_fcg(css);
+       return res_counter_read_u64(&fcg->open_handles, RES_LIMIT);
+}
+
+static int files_limit_write(struct cgroup_subsys_state *css,
+                       struct cftype *cft, u64 value)
+{
+       struct files_cgroup *fcg = css_fcg(css);
+       return res_counter_set_limit(&fcg->open_handles, value);
+}
+
+static u64 files_usage_read(struct cgroup_subsys_state *css,
+                       struct cftype *cft)
+{
+       struct files_cgroup *fcg = css_fcg(css);
+       return res_counter_read_u64(&fcg->open_handles, RES_USAGE);
+}
+
+static struct cftype files[] = {
+       {
+               .name = "limit",
+               .read_u64 = files_limit_read,
+               .write_u64 = files_limit_write,
+       },
+       {
+               .name = "usage",
+               .read_u64 = files_usage_read,
+       },
+       { }
+};
+
+struct cgroup_subsys files_cgrp_subsys = {
+       .name = "files",
+       .css_alloc = files_cgroup_css_alloc,
+       .css_free = files_cgroup_css_free,
+       .can_attach = files_cgroup_can_attach,
+       .attach = files_cgroup_attach,
+       .base_cftypes = files,
+};
+
+void files_cgroup_assign(struct files_struct *files)
+{
+       struct task_struct *tsk = current;
+       struct cgroup_subsys_state *css;
+
+       task_lock(tsk);
+       css = task_css(tsk, files_cgrp_id);
+       css_get(css);
+       files->files_cgroup = container_of(css, struct files_cgroup, css);
+       task_unlock(tsk);
+}
+
+void files_cgroup_remove(struct files_struct *files)
+{
+       struct task_struct *tsk = current;
+       struct files_cgroup *fcg;
+
+       task_lock(tsk);
+       spin_lock(&files->file_lock);
+       fcg = files_cgroup_from_files(files);
+       css_put(&fcg->css);
+       spin_unlock(&files->file_lock);
+       task_unlock(tsk);
+}
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 7b99d71..e6c9a40 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -50,6 +50,11 @@ SUBSYS(net_prio)
  #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
  SUBSYS(hugetlb)
  #endif
+
+#if IS_ENABLED(CONFIG_CGROUP_FILES)
+SUBSYS(files)
+#endif
+
  /*
   * DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
   */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 70e8e21..fee8e45 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,6 +57,9 @@ struct files_struct {
        unsigned long close_on_exec_init[1];
        unsigned long open_fds_init[1];
        struct file __rcu * fd_array[NR_OPEN_DEFAULT];
+#ifdef CONFIG_CGROUP_FILES
+       struct files_cgroup *files_cgroup;
+#endif
  };

  struct file_operations;
diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h
new file mode 100644
index 0000000..e39ed2a
--- /dev/null
+++ b/include/linux/filescontrol.h
@@ -0,0 +1,32 @@
+/* filescontrol.h - Files Controller
+ *
+ * Copyright 2014 Google Inc.
+ * Author: Brian Makin <meri...@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_FILESCONTROL_H
+#define _LINUX_FILESCONTROL_H
+
+#include <linux/fdtable.h>
+
+#ifdef CONFIG_CGROUP_FILES
+
+extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n);
+extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n);
+extern u64 files_cgroup_count_fds(struct files_struct *files);
+
+void files_cgroup_assign(struct files_struct *files);
+void files_cgroup_remove(struct files_struct *files);
+
+#endif /* CONFIG_CGROUP_FILES */
+#endif /* _LINUX_FILESCONTROL_H */
diff --git a/init/Kconfig b/init/Kconfig
index a4b4209..a8c8392 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1075,6 +1075,13 @@ config DEBUG_BLK_CGROUP
        Enable some debugging help. Currently it exports additional stat
        files in a cgroup which can be useful for debugging.

+config CGROUP_FILES
+       bool "Files Resource Controller for Control Groups"
+       default n
+       help
+         Provides a cgroup resource controller that limits number of open
+         file handles within a cgroup.
+
  endif # CGROUPS

  config CHECKPOINT_RESTORE


--
_______________________________________________
linux-yocto mailing list
linux-yocto@yoctoproject.org
https://lists.yoctoproject.org/listinfo/linux-yocto

Re: [linux-yocto] [PATCH 1/2] [RFC]cgroups: Resource controller for open files.

Reply via email to