The commit is pushed to "branch-rh7-3.10.0-229.7.2-ovz" and will appear at 
https://src.openvz.org/scm/ovz/vzkernel.git
after rh7-3.10.0-229.7.2.vz7.6.4
------>
commit d5a0970d86642a4150439d8a599f2f359e75fbf4
Author: Andrey Ryabinin <aryabi...@odin.com>
Date:   Mon Aug 31 19:38:05 2015 +0400

    ve/fs/aio: aio_nr & aio_max_nr variables virtualization
    
    Virtualization of kernel global aio_nr & aio_max_nr variables is required
    to isolate containers and ve0 when allocating aio request/events resources.
    
    Each ve and ve0 has own aio_nr, aio_max_nr values. Function ioctx_alloc 
trying
    to charge appropriate aio_nr value selected by ve context.
    
    It's not possible to exhaust aio events resources of one ve from another ve.
    
    Default per-CT aio_max_nr value == 0x10000, including CT0.
    
    https://jira.sw.ru/browse/PSBM-29017
    
    Signed-off-by: Andrey Ryabinin <aryabi...@odin.com>
    Reviewed-by: Vladimir Davydov <vdavy...@parallels.com>
---
 fs/aio.c            | 38 +++++++++++++++++++++-----------------
 include/linux/aio.h |  6 ++----
 include/linux/ve.h  |  6 ++++++
 kernel/sysctl.c     | 16 ++++++++--------
 kernel/ve/ve.c      |  7 +++++++
 5 files changed, 44 insertions(+), 29 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 70a6599..9d700b0 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -29,6 +29,7 @@
 #include <linux/slab.h>
 #include <linux/timer.h>
 #include <linux/aio.h>
+#include <linux/ve.h>
 #include <linux/highmem.h>
 #include <linux/workqueue.h>
 #include <linux/security.h>
@@ -122,14 +123,9 @@ struct kioctx {
 
        struct page             *internal_pages[AIO_RING_PAGES];
        struct file             *aio_ring_file;
+       struct ve_struct        *ve;
 };
 
-/*------ sysctl variables----*/
-static DEFINE_SPINLOCK(aio_nr_lock);
-unsigned long aio_nr;          /* current system wide number of aio requests */
-unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio 
requests */
-/*----end sysctl variables---*/
-
 static struct kmem_cache       *kiocb_cachep;
 static struct kmem_cache       *kioctx_cachep;
 
@@ -495,6 +491,9 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb 
*kiocb,
 static void free_ioctx_rcu(struct rcu_head *head)
 {
        struct kioctx *ctx = container_of(head, struct kioctx, rcu_head);
+       struct ve_struct *ve = ctx->ve;
+
+       put_ve(ve);
        kmem_cache_free(kioctx_cachep, ctx);
 }
 
@@ -571,6 +570,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 {
        struct mm_struct *mm = current->mm;
        struct kioctx *ctx;
+       struct ve_struct *ve = get_exec_env();
        int err = -ENOMEM;
 
        /* Prevent overflows */
@@ -580,7 +580,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
                return ERR_PTR(-EINVAL);
        }
 
-       if (!nr_events || (unsigned long)nr_events > aio_max_nr)
+       if (!nr_events || (unsigned long)nr_events > ve->aio_max_nr)
                return ERR_PTR(-EAGAIN);
 
        ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL);
@@ -588,6 +588,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
                return ERR_PTR(-ENOMEM);
 
        ctx->max_reqs = nr_events;
+       ctx->ve = get_ve(ve);
 
        spin_lock_init(&ctx->ctx_lock);
        spin_lock_init(&ctx->completion_lock);
@@ -608,14 +609,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
                goto out_freectx;
 
        /* limit the number of system wide aios */
-       spin_lock(&aio_nr_lock);
-       if (aio_nr + nr_events > aio_max_nr ||
-           aio_nr + nr_events < aio_nr) {
-               spin_unlock(&aio_nr_lock);
+       spin_lock(&ve->aio_nr_lock);
+       if (ve->aio_nr + nr_events > ve->aio_max_nr ||
+           ve->aio_nr + nr_events < ve->aio_nr) {
+               spin_unlock(&ve->aio_nr_lock);
                goto out_cleanup;
        }
-       aio_nr += ctx->max_reqs;
-       spin_unlock(&aio_nr_lock);
+       ve->aio_nr += ctx->max_reqs;
+       spin_unlock(&ve->aio_nr_lock);
 
        /* now link into global list. */
        spin_lock(&mm->ioctx_lock);
@@ -633,6 +634,7 @@ out_cleanup:
        err = -EAGAIN;
        aio_free_ring(ctx);
 out_freectx:
+       put_ve(ctx->ve);
        mutex_unlock(&ctx->ring_lock);
        put_aio_ring_file(ctx);
        kmem_cache_free(kioctx_cachep, ctx);
@@ -665,6 +667,8 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx 
*ctx,
                struct completion *requests_done)
 {
        if (!atomic_xchg(&ctx->dead, 1)) {
+               struct ve_struct *ve = ctx->ve;
+
                spin_lock(&mm->ioctx_lock);
                hlist_del_rcu(&ctx->list);
                spin_unlock(&mm->ioctx_lock);
@@ -676,10 +680,10 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx 
*ctx,
                 * -EAGAIN with no ioctxs actually in use (as far as userspace
                 *  could tell).
                 */
-               spin_lock(&aio_nr_lock);
-               BUG_ON(aio_nr - ctx->max_reqs > aio_nr);
-               aio_nr -= ctx->max_reqs;
-               spin_unlock(&aio_nr_lock);
+               spin_lock(&ve->aio_nr_lock);
+               BUG_ON(ve->aio_nr - ctx->max_reqs > ve->aio_nr);
+               ve->aio_nr -= ctx->max_reqs;
+               spin_unlock(&ve->aio_nr_lock);
 
                if (ctx->mmap_size)
                        vm_munmap(ctx->mmap_base, ctx->mmap_size);
diff --git a/include/linux/aio.h b/include/linux/aio.h
index a2f6172..0aa7dd3 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -14,6 +14,8 @@ struct kiocb;
 
 #define KIOCB_KEY              0
 
+#define AIO_MAX_NR_DEFAULT     0x10000
+
 /*
  * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either
  * cancelled or completed (this makes a certain amount of sense because
@@ -124,8 +126,4 @@ static inline struct kiocb *list_kiocb(struct list_head *h)
        return list_entry(h, struct kiocb, ki_list);
 }
 
-/* for sysctl: */
-extern unsigned long aio_nr;
-extern unsigned long aio_max_nr;
-
 #endif /* __LINUX__AIO_H */
diff --git a/include/linux/ve.h b/include/linux/ve.h
index 3009901..1758d51 100644
--- a/include/linux/ve.h
+++ b/include/linux/ve.h
@@ -129,6 +129,12 @@ struct ve_struct {
        struct mutex            devmnt_mutex;
 
        struct kmapset_key      ve_sysfs_perms;
+
+#ifdef CONFIG_AIO
+       spinlock_t              aio_nr_lock;
+       unsigned long           aio_nr;
+       unsigned long           aio_max_nr;
+#endif
 };
 
 struct ve_devmnt {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 1a568e7..976f48c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1656,17 +1656,17 @@ static struct ctl_table fs_table[] = {
 #ifdef CONFIG_AIO
        {
                .procname       = "aio-nr",
-               .data           = &aio_nr,
-               .maxlen         = sizeof(aio_nr),
-               .mode           = 0444,
-               .proc_handler   = proc_doulongvec_minmax,
+               .data           = &ve0.aio_nr,
+               .maxlen         = sizeof(unsigned long),
+               .mode           = 0444 | S_ISVTX,
+               .proc_handler   = proc_doulongvec_minmax_virtual,
        },
        {
                .procname       = "aio-max-nr",
-               .data           = &aio_max_nr,
-               .maxlen         = sizeof(aio_max_nr),
-               .mode           = 0644,
-               .proc_handler   = proc_doulongvec_minmax,
+               .data           = &ve0.aio_max_nr,
+               .maxlen         = sizeof(unsigned long),
+               .mode           = 0644 | S_ISVTX,
+               .proc_handler   = proc_doulongvec_minmax_virtual,
        },
 #endif /* CONFIG_AIO */
 #ifdef CONFIG_INOTIFY_USER
diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c
index 5025149..c67ff3f 100644
--- a/kernel/ve/ve.c
+++ b/kernel/ve/ve.c
@@ -17,6 +17,7 @@
 #include <linux/ve.h>
 #include <linux/init.h>
 
+#include <linux/aio.h>
 #include <linux/errno.h>
 #include <linux/unistd.h>
 #include <linux/slab.h>
@@ -625,6 +626,12 @@ do_init:
        mutex_init(&ve->devmnt_mutex);
        kmapset_init_key(&ve->ve_sysfs_perms);
 
+#ifdef CONFIG_AIO
+       spin_lock_init(&ve->aio_nr_lock);
+       ve->aio_nr = 0;
+       ve->aio_max_nr = AIO_MAX_NR_DEFAULT;
+#endif
+
        return &ve->css;
 
 err_log:
_______________________________________________
Devel mailing list
Devel@openvz.org
https://lists.openvz.org/mailman/listinfo/devel

Reply via email to