On 07/30/2014 11:06 AM, Bruce Ashfield wrote:
On 2014-07-29, 1:58 AM, zhe...@windriver.com wrote:
From: He Zhe <zhe...@windriver.com>

How did you extract this patch from the mailing list ? The
From: field should not be changing, since you aren't the
original author of the patch.


I've corrected this.


Add a resource controller for limiting the number of open
file handles.  This allows us to catch misbehaving processes
and return EMFILE instead of ENOMEM for kernel memory limits.

Signed-off-by: Binder Makin <meri...@google.com>

Port from lkml: https://lkml.org/lkml/2014/7/2/640
Correct wrong handling in fs/file.c:do_dup2

This should be in a separate patch.

So let's see a new send of this, with patch 1/2 being the upstream
change (with the author information intact) and the 2/2 being any
fixes you did to the original.


I added the following line and my SOB in patch 1/2, is that proper?
lkml https://lkml.org/lkml/2014/7/2/640 upstream

Please check out my latest submit.

Zhe

Bruce


Signed-off-by: He Zhe <zhe...@windriver.com>
---
  fs/Makefile                   |    1 +
  fs/file.c                     |   46 ++++++++
fs/filescontrol.c | 249 +++++++++++++++++++++++++++++++++++++++++
  include/linux/cgroup_subsys.h |    5 +
  include/linux/fdtable.h       |    3 +
  include/linux/filescontrol.h  |   32 ++++++
  init/Kconfig                  |    7 ++
  7 files changed, 343 insertions(+)
  create mode 100644 fs/filescontrol.c
  create mode 100644 include/linux/filescontrol.h

diff --git a/fs/Makefile b/fs/Makefile
index ebfe2ee..18eaee0 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_COREDUMP)        += coredump.o
  obj-$(CONFIG_SYSCTL)        += drop_caches.o

  obj-$(CONFIG_FHANDLE)        += fhandle.o
+obj-$(CONFIG_CGROUP_FILES)    += filescontrol.o

  obj-y                += quota/

diff --git a/fs/file.c b/fs/file.c
index eb56a13..e615dc9 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -22,6 +22,7 @@
  #include <linux/spinlock.h>
  #include <linux/rcupdate.h>
  #include <linux/workqueue.h>
+#include <linux/filescontrol.h>

  int sysctl_nr_open __read_mostly = 1024*1024;
  int sysctl_nr_open_min = BITS_PER_LONG;
@@ -264,6 +265,9 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
      new_fdt->close_on_exec = newf->close_on_exec_init;
      new_fdt->open_fds = newf->open_fds_init;
      new_fdt->fd = &newf->fd_array[0];
+#ifdef CONFIG_CGROUP_FILES
+    files_cgroup_assign(newf);
+#endif

      spin_lock(&oldf->file_lock);
      old_fdt = files_fdtable(oldf);
@@ -340,9 +344,28 @@ struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)

      rcu_assign_pointer(newf->fdt, new_fdt);

+#ifdef CONFIG_CGROUP_FILES
+    if (!files_cgroup_alloc_fd(newf, files_cgroup_count_fds(newf)))
+        return newf;
+
+/* could not get enough FD resources.  Need to clean up. */
+    new_fds = new_fdt->fd;
+    for (i = open_files; i != 0; i--) {
+        struct file *f = *new_fds++;
+        if (f)
+            fput(f);
+    }
+    if (new_fdt != &newf->fdtab)
+        __free_fdtable(new_fdt);
+    *errorp = -EMFILE;
+#else
      return newf;
+#endif

  out_release:
+#ifdef CONFIG_CGROUP_FILES
+    files_cgroup_remove(newf);
+#endif
      kmem_cache_free(files_cachep, newf);
  out:
      return NULL;
@@ -368,6 +391,9 @@ static struct fdtable *close_files(struct files_struct * files)
              if (set & 1) {
                  struct file * file = xchg(&fdt->fd[i], NULL);
                  if (file) {
+#ifdef CONFIG_CGROUP_FILES
+                    files_cgroup_unalloc_fd(files, 1);
+#endif
                      filp_close(file, files);
                      cond_resched();
                  }
@@ -486,6 +512,13 @@ repeat:
      if (error)
          goto repeat;

+#ifdef CONFIG_CGROUP_FILES
+    if (files_cgroup_alloc_fd(files, 1)) {
+        error = -EMFILE;
+        goto out;
+    }
+#endif
+
      if (start <= files->next_fd)
          files->next_fd = fd + 1;

@@ -522,6 +555,10 @@ EXPORT_SYMBOL(get_unused_fd_flags);
static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  {
      struct fdtable *fdt = files_fdtable(files);
+#ifdef CONFIG_CGROUP_FILES
+    if (test_bit(fd, fdt->open_fds))
+        files_cgroup_unalloc_fd(files, 1);
+#endif
      __clear_open_fd(fd, fdt);
      if (fd < files->next_fd)
          files->next_fd = fd;
@@ -780,6 +817,15 @@ static int do_dup2(struct files_struct *files,
      tofree = fdt->fd[fd];
      if (!tofree && fd_is_open(fd, fdt))
          goto Ebusy;
+
+#ifdef CONFIG_CGROUP_FILES
+    if (!tofree)
+        if (files_cgroup_alloc_fd(files, 1)) {
+            spin_unlock(&files->file_lock);
+            return -EMFILE;
+        }
+#endif
+
      get_file(file);
      rcu_assign_pointer(fdt->fd[fd], file);
      __set_open_fd(fd, fdt);
diff --git a/fs/filescontrol.c b/fs/filescontrol.c
new file mode 100644
index 0000000..0ba8ffa
--- /dev/null
+++ b/fs/filescontrol.c
@@ -0,0 +1,249 @@
+/* filescontrol.c - Cgroup controller for open file handles.
+ *
+ * Copyright 2014 Google Inc.
+ * Author: Brian Makin <meri...@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/res_counter.h>
+#include <linux/filescontrol.h>
+#include <linux/cgroup.h>
+#include <linux/export.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/fdtable.h>
+
+struct cgroup_subsys files_subsys __read_mostly;
+EXPORT_SYMBOL(files_subsys);
+
+struct files_cgroup {
+    struct cgroup_subsys_state css;
+    struct res_counter open_handles;
+};
+
+static inline struct files_cgroup *css_fcg(struct cgroup_subsys_state *css)
+{
+    return css ? container_of(css, struct files_cgroup, css) : NULL;
+}
+
+static inline struct res_counter *
+css_res_open_handles(struct cgroup_subsys_state *css)
+{
+    return &css_fcg(css)->open_handles;
+}
+
+static inline struct files_cgroup *
+files_cgroup_from_files(struct files_struct *files)
+{
+    return files->files_cgroup;
+}
+
+static struct cgroup_subsys_state *
+files_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
+{
+    struct files_cgroup *fcg;
+
+    fcg = kzalloc(sizeof(*fcg), GFP_KERNEL);
+    if (!fcg)
+        goto out;
+
+    if (!parent_css) {
+        res_counter_init(&fcg->open_handles, NULL);
+        res_counter_set_limit(&fcg->open_handles, get_max_files());
+    } else {
+        struct files_cgroup *parent_fcg = css_fcg(parent_css);
+ res_counter_init(&fcg->open_handles, &parent_fcg->open_handles);
+        res_counter_set_limit(&fcg->open_handles,
+ res_counter_read_u64(&parent_fcg->open_handles,
+                        RES_LIMIT));
+    }
+    return &fcg->css;
+
+out:
+    return ERR_PTR(-ENOMEM);
+}
+
+static void files_cgroup_css_free(struct cgroup_subsys_state *css)
+{
+    kfree(css_fcg(css));
+}
+
+u64 files_cgroup_count_fds(struct files_struct *files)
+{
+    int i;
+    struct fdtable *fdt;
+    int retval = 0;
+
+    fdt = files_fdtable(files);
+    for (i = 0; i < DIV_ROUND_UP(fdt->max_fds, BITS_PER_LONG); i++)
+        retval += hweight64((__u64)fdt->open_fds[i]);
+    return retval;
+}
+
+static u64 files_in_taskset(struct cgroup_taskset *tset)
+{
+    struct cgroup_subsys_state *css = NULL;
+    struct task_struct *task;
+    u64 files = 0;
+    cgroup_taskset_for_each(task, css, tset) {
+        if (!thread_group_leader(task))
+            continue;
+
+        task_lock(task);
+        files += files_cgroup_count_fds(task->files);
+        task_unlock(task);
+    }
+    return files;
+}
+
+/*
+ * If attaching this cgroup would overcommit the resource then deny
+ * the attach.
+ */
+static int files_cgroup_can_attach(struct cgroup_subsys_state *css,
+                struct cgroup_taskset *tset)
+{
+    u64 files = files_in_taskset(tset);
+    if (res_counter_margin(css_res_open_handles(css)) < files)
+        return -ENOMEM;
+    return 0;
+}
+
+/*
+ * If resource counts have gone up between can_attach and attach then
+ * this may overcommit resources. In that case just deny further allocation
+ * until the resource usage drops.
+ */
+static void files_cgroup_attach(struct cgroup_subsys_state *to_css,
+                struct cgroup_taskset *tset)
+{
+    u64 num_files;
+    struct task_struct *task = cgroup_taskset_first(tset);
+    struct cgroup_subsys_state *from_css;
+    struct res_counter *from_res;
+    struct res_counter *to_res = css_res_open_handles(to_css);
+    struct res_counter *fail_res;
+    struct files_struct *files;
+
+    task_lock(task);
+    files = task->files;
+    if (!files) {
+        task_unlock(task);
+        return;
+    }
+
+    from_css = &files_cgroup_from_files(files)->css;
+    from_res = css_res_open_handles(from_css);
+
+    spin_lock(&files->file_lock);
+    num_files = files_cgroup_count_fds(files);
+    res_counter_uncharge(from_res, num_files);
+    css_put(from_css);
+
+    if (res_counter_charge(to_res, num_files, &fail_res))
+        pr_err("Open files limit overcommited\n");
+    css_get(to_css);
+
+    task->files->files_cgroup = css_fcg(to_css);
+    spin_unlock(&files->file_lock);
+    task_unlock(task);
+}
+
+int files_cgroup_alloc_fd(struct files_struct *files, u64 n)
+{
+    struct res_counter *fail_res;
+    struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
+
+    if (res_counter_charge(&files_cgroup->open_handles, n, &fail_res))
+        return -ENOMEM;
+
+    return 0;
+}
+EXPORT_SYMBOL(files_cgroup_alloc_fd);
+
+void files_cgroup_unalloc_fd(struct files_struct *files, u64 n)
+{
+    struct files_cgroup *files_cgroup = files_cgroup_from_files(files);
+
+    res_counter_uncharge(&files_cgroup->open_handles, n);
+}
+EXPORT_SYMBOL(files_cgroup_unalloc_fd);
+
+static u64 files_limit_read(struct cgroup_subsys_state *css,
+            struct cftype *cft)
+{
+    struct files_cgroup *fcg = css_fcg(css);
+    return res_counter_read_u64(&fcg->open_handles, RES_LIMIT);
+}
+
+static int files_limit_write(struct cgroup_subsys_state *css,
+            struct cftype *cft, u64 value)
+{
+    struct files_cgroup *fcg = css_fcg(css);
+    return res_counter_set_limit(&fcg->open_handles, value);
+}
+
+static u64 files_usage_read(struct cgroup_subsys_state *css,
+            struct cftype *cft)
+{
+    struct files_cgroup *fcg = css_fcg(css);
+    return res_counter_read_u64(&fcg->open_handles, RES_USAGE);
+}
+
+static struct cftype files[] = {
+    {
+        .name = "limit",
+        .read_u64 = files_limit_read,
+        .write_u64 = files_limit_write,
+    },
+    {
+        .name = "usage",
+        .read_u64 = files_usage_read,
+    },
+    { }
+};
+
+struct cgroup_subsys files_subsys = {
+    .name = "files",
+    .css_alloc = files_cgroup_css_alloc,
+    .css_free = files_cgroup_css_free,
+    .can_attach = files_cgroup_can_attach,
+    .attach = files_cgroup_attach,
+    .base_cftypes = files,
+    .subsys_id = files_subsys_id,
+};
+
+void files_cgroup_assign(struct files_struct *files)
+{
+    struct task_struct *tsk = current;
+    struct cgroup_subsys_state *css;
+
+    task_lock(tsk);
+    css = task_css(tsk, files_subsys_id);
+    css_get(css);
+    files->files_cgroup = container_of(css, struct files_cgroup, css);
+    task_unlock(tsk);
+}
+
+void files_cgroup_remove(struct files_struct *files)
+{
+    struct task_struct *tsk = current;
+    struct files_cgroup *fcg;
+
+    task_lock(tsk);
+    spin_lock(&files->file_lock);
+    fcg = files_cgroup_from_files(files);
+    css_put(&fcg->css);
+    spin_unlock(&files->file_lock);
+    task_unlock(tsk);
+}
diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h
index 7b99d71..defadc0 100644
--- a/include/linux/cgroup_subsys.h
+++ b/include/linux/cgroup_subsys.h
@@ -50,6 +50,11 @@ SUBSYS(net_prio)
  #if IS_SUBSYS_ENABLED(CONFIG_CGROUP_HUGETLB)
  SUBSYS(hugetlb)
  #endif
+
+#if IS_SUBSYS_ENABLED(CONFIG_CGROUP_FILES)
+SUBSYS(files)
+#endif
+
  /*
* DO NOT ADD ANY SUBSYSTEM WITHOUT EXPLICIT ACKS FROM CGROUP MAINTAINERS.
   */
diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h
index 70e8e21..fee8e45 100644
--- a/include/linux/fdtable.h
+++ b/include/linux/fdtable.h
@@ -57,6 +57,9 @@ struct files_struct {
      unsigned long close_on_exec_init[1];
      unsigned long open_fds_init[1];
      struct file __rcu * fd_array[NR_OPEN_DEFAULT];
+#ifdef CONFIG_CGROUP_FILES
+    struct files_cgroup *files_cgroup;
+#endif
  };

  struct file_operations;
diff --git a/include/linux/filescontrol.h b/include/linux/filescontrol.h
new file mode 100644
index 0000000..e39ed2a
--- /dev/null
+++ b/include/linux/filescontrol.h
@@ -0,0 +1,32 @@
+/* filescontrol.h - Files Controller
+ *
+ * Copyright 2014 Google Inc.
+ * Author: Brian Makin <meri...@google.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_FILESCONTROL_H
+#define _LINUX_FILESCONTROL_H
+
+#include <linux/fdtable.h>
+
+#ifdef CONFIG_CGROUP_FILES
+
+extern int files_cgroup_alloc_fd(struct files_struct *files, u64 n);
+extern void files_cgroup_unalloc_fd(struct files_struct *files, u64 n);
+extern u64 files_cgroup_count_fds(struct files_struct *files);
+
+void files_cgroup_assign(struct files_struct *files);
+void files_cgroup_remove(struct files_struct *files);
+
+#endif /* CONFIG_CGROUP_FILES */
+#endif /* _LINUX_FILESCONTROL_H */
diff --git a/init/Kconfig b/init/Kconfig
index a4b4209..a8c8392 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1075,6 +1075,13 @@ config DEBUG_BLK_CGROUP
      Enable some debugging help. Currently it exports additional stat
      files in a cgroup which can be useful for debugging.

+config CGROUP_FILES
+    bool "Files Resource Controller for Control Groups"
+    default n
+    help
+      Provides a cgroup resource controller that limits number of open
+      file handles within a cgroup.
+
  endif # CGROUPS

  config CHECKPOINT_RESTORE



--
_______________________________________________
linux-yocto mailing list
linux-yocto@yoctoproject.org
https://lists.yoctoproject.org/listinfo/linux-yocto

Reply via email to