11.11.2015 15:01, Stanislav Kinsburskiy пишет:
Thanks. That's much better.
But maybe we could simplify it even more?
Please, take a look at my comments inlined. They allow to reduce impact
to generic code even further and simplify future rebasing (which is
usually a lot of pain).

Good point. I will address this in v.3 of the patch. Thanks!


11.11.2015 10:33, Evgenii Shatokhin пишет:
https://jira.sw.ru/browse/PSBM-34438

(This fix was adapted from PCS6.)

It is possible for a container to create lots of mount points, which may
make operations with them slower. As some of these operations take
global locks (namespace_sem, vfsmount_lock), it might affect other
containers as well.

Let us limit the maximum number of mount points a VE may create. The
limit can be customized via /proc/sys/fs/ve-mount-nr knob.

Changes in v.2:

* The situations where VE0 mounts something and another VE unmounts it
   seem to be of no concern. If so, it is OK not to alter struct  mount:
   the mount counter for a VE may become unbalanced but this is
   acceptable here.

* The sysctl knob is now defined alongside other VE sysctls.

Signed-off-by: Evgenii Shatokhin <eshatok...@odin.com>
---
  fs/namespace.c      | 22 ++++++++++++++++++++--
  include/linux/ve.h  |  6 ++++++
  kernel/ve/ve.c      |  2 ++
  kernel/ve/veowner.c | 15 +++++++++++++++
  4 files changed, 43 insertions(+), 2 deletions(-)

diff --git a/fs/namespace.c b/fs/namespace.c
index 8909c13..264c0b5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -165,7 +165,15 @@ unsigned int mnt_get_count(struct mount *mnt)

/* Somewhere in our headers, or whatever other homebrew files */
static int ve_mount_allowed(void)
{
     return get_exec_env()->mnt_nr < sysctl_ve_mount_nr;
}

  static struct mount *alloc_vfsmnt(const char *name)
  {
-    struct mount *mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
+    struct mount *mnt;
+    struct ve_struct *ve = get_exec_env();
+
+    if (ve &&
+        atomic_add_return(1, &ve->mnt_nr) > sysctl_ve_mount_nr &&
+        !ve_is_super(ve))
+        goto out_mnt_nr_dec;
+

  +        if (!ve_mount_allowed())
  +               return NULL;

+    mnt = kmem_cache_zalloc(mnt_cache, GFP_KERNEL);
      if (mnt) {
          int err;
@@ -201,7 +209,9 @@ static struct mount *alloc_vfsmnt(const char *name)
  #ifdef CONFIG_FSNOTIFY
          INIT_HLIST_HEAD(&mnt->mnt_fsnotify_marks);
  #endif
-    }
+    } else
+        goto out_mnt_nr_dec;
+

+    ve->mnt_nr++;

With this we can grow above 4096, yes. But we anyway took this number
from out of nowhere, so I don't think we should really care about it.

      return mnt;
  #ifdef CONFIG_SMP
@@ -212,6 +222,9 @@ out_free_id:
      mnt_free_id(mnt);
  out_free_cache:
      kmem_cache_free(mnt_cache, mnt);
+out_mnt_nr_dec:
+    if (ve)
+        atomic_dec(&ve->mnt_nr);
      return NULL;
  }
@@ -542,6 +555,11 @@ int sb_prepare_remount_readonly(struct
super_block *sb)
  static void free_vfsmnt(struct mount *mnt)
  {
+    struct ve_struct *ve = get_exec_env();
+
+    if (ve)
+        atomic_dec(&ve->mnt_nr);
+
      kfree(mnt->mnt_devname);
      mnt_free_id(mnt);
  #ifdef CONFIG_SMP
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 86b95c3..02600ac 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -128,6 +128,10 @@ struct ve_struct {
      unsigned long        aio_nr;
      unsigned long        aio_max_nr;
  #endif
+    /* Number of mounts. May become unbalanced if VE0 mounts something
+     * and the VE unmounts it. This is acceptable.
+     */
+    atomic_t        mnt_nr;

Since we are exposing this counter to user, and it's compared with some
dummy "4096" value, we don't really care about exact result.
IOW, I doubt, that we require an atomic variable here.
Simple integer will be enough (especially if we take into account, that
word-related operations are anyway atomic in x86)

+            int                                mnt_nr;

  };
  struct ve_devmnt {
@@ -145,6 +149,8 @@ extern int nr_ve;
  extern struct proc_dir_entry *proc_vz_dir;
  extern struct cgroup_subsys ve_subsys;
+extern unsigned int sysctl_ve_mount_nr;
+
  #ifdef CONFIG_VE_IPTABLES
  extern __u64 ve_setup_iptables_mask(__u64 init_mask);
  #endif
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index e9219e6..216608c 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -82,6 +82,7 @@ struct ve_struct ve0 = {
  #endif
      .sched_lat_ve.cur    = &ve0_lat_stats,
      .init_cred        = &init_cred,
+    .mnt_nr            = ATOMIC_INIT(0),
  };
  EXPORT_SYMBOL(ve0);
@@ -653,6 +654,7 @@ do_init:
      ve->aio_nr = 0;
      ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
  #endif
+    atomic_set(&ve->mnt_nr, 0);
   +              ve->mnt_nr = 0;
      return &ve->css;
diff --git a/kernel/ve/veowner.c b/kernel/ve/veowner.c
index 316e4d0..1a7e735 100644
--- a/kernel/ve/veowner.c
+++ b/kernel/ve/veowner.c
@@ -55,6 +55,14 @@ static void prepare_proc(void)
  int ve_xattr_policy = VE_XATTR_POLICY_ACCEPT;
  static int ve_area_access_check;
+/*
+ * Operations with a big amount of mount points can require a lot of
time.
+ * These operations take the global lock namespace_sem, so they can
affect
+ * other containers. Let us allow no more than sysctl_ve_mount_nr mount
+ * points for a VE.
+ */
+unsigned int sysctl_ve_mount_nr = 4096;
+
  static struct ctl_table vz_fs_table[] = {
      {
          .procname    = "ve-area-access-check",
@@ -77,6 +85,13 @@ static struct ctl_table vz_fs_table[] = {
          .mode        = 0644 | S_ISVTX,
          .proc_handler    = &proc_dointvec_virtual,
      },
+    {
+        .procname       = "ve-mount-nr",
+        .data           = &sysctl_ve_mount_nr,
+        .maxlen         = sizeof(sysctl_ve_mount_nr),
+        .mode           = 0644,
+        .proc_handler   = proc_dointvec,
+    },
      { 0 }
  };

.


_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to