The commit is pushed to "branch-rh7-3.10.0-229.7.2-ovz" and will appear at https://src.openvz.org/scm/ovz/vzkernel.git after rh7-3.10.0-229.7.2.vz7.6.4 ------> commit d5a0970d86642a4150439d8a599f2f359e75fbf4 Author: Andrey Ryabinin <aryabi...@odin.com> Date: Mon Aug 31 19:38:05 2015 +0400
ve/fs/aio: aio_nr & aio_max_nr variables virtualization Virtualization of kernel global aio_nr & aio_max_nr variables is required to isolate containers and ve0 when allocating aio request/events resources. Each ve and ve0 has own aio_nr, aio_max_nr values. Function ioctx_alloc trying to charge appropriate aio_nr value selected by ve context. It's not possible to exhaust aio events resources of one ve from another ve. Default per-CT aio_max_nr value == 0x10000, including CT0. https://jira.sw.ru/browse/PSBM-29017 Signed-off-by: Andrey Ryabinin <aryabi...@odin.com> Reviewed-by: Vladimir Davydov <vdavy...@parallels.com> --- fs/aio.c | 38 +++++++++++++++++++++----------------- include/linux/aio.h | 6 ++---- include/linux/ve.h | 6 ++++++ kernel/sysctl.c | 16 ++++++++-------- kernel/ve/ve.c | 7 +++++++ 5 files changed, 44 insertions(+), 29 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 70a6599..9d700b0 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -29,6 +29,7 @@ #include <linux/slab.h> #include <linux/timer.h> #include <linux/aio.h> +#include <linux/ve.h> #include <linux/highmem.h> #include <linux/workqueue.h> #include <linux/security.h> @@ -122,14 +123,9 @@ struct kioctx { struct page *internal_pages[AIO_RING_PAGES]; struct file *aio_ring_file; + struct ve_struct *ve; }; -/*------ sysctl variables----*/ -static DEFINE_SPINLOCK(aio_nr_lock); -unsigned long aio_nr; /* current system wide number of aio requests */ -unsigned long aio_max_nr = 0x10000; /* system wide maximum number of aio requests */ -/*----end sysctl variables---*/ - static struct kmem_cache *kiocb_cachep; static struct kmem_cache *kioctx_cachep; @@ -495,6 +491,9 @@ static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb, static void free_ioctx_rcu(struct rcu_head *head) { struct kioctx *ctx = container_of(head, struct kioctx, rcu_head); + struct ve_struct *ve = ctx->ve; + + put_ve(ve); kmem_cache_free(kioctx_cachep, ctx); } @@ -571,6 +570,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) { struct mm_struct *mm = current->mm; struct kioctx *ctx; + struct ve_struct *ve = get_exec_env(); int err = -ENOMEM; /* Prevent overflows */ @@ -580,7 +580,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ERR_PTR(-EINVAL); } - if (!nr_events || (unsigned long)nr_events > aio_max_nr) + if (!nr_events || (unsigned long)nr_events > ve->aio_max_nr) return ERR_PTR(-EAGAIN); ctx = kmem_cache_zalloc(kioctx_cachep, GFP_KERNEL); @@ -588,6 +588,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) return ERR_PTR(-ENOMEM); ctx->max_reqs = nr_events; + ctx->ve = get_ve(ve); spin_lock_init(&ctx->ctx_lock); spin_lock_init(&ctx->completion_lock); @@ -608,14 +609,14 @@ static struct kioctx *ioctx_alloc(unsigned nr_events) goto out_freectx; /* limit the number of system wide aios */ - spin_lock(&aio_nr_lock); - if (aio_nr + nr_events > aio_max_nr || - aio_nr + nr_events < aio_nr) { - spin_unlock(&aio_nr_lock); + spin_lock(&ve->aio_nr_lock); + if (ve->aio_nr + nr_events > ve->aio_max_nr || + ve->aio_nr + nr_events < ve->aio_nr) { + spin_unlock(&ve->aio_nr_lock); goto out_cleanup; } - aio_nr += ctx->max_reqs; - spin_unlock(&aio_nr_lock); + ve->aio_nr += ctx->max_reqs; + spin_unlock(&ve->aio_nr_lock); /* now link into global list. */ spin_lock(&mm->ioctx_lock); @@ -633,6 +634,7 @@ out_cleanup: err = -EAGAIN; aio_free_ring(ctx); out_freectx: + put_ve(ctx->ve); mutex_unlock(&ctx->ring_lock); put_aio_ring_file(ctx); kmem_cache_free(kioctx_cachep, ctx); @@ -665,6 +667,8 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, struct completion *requests_done) { if (!atomic_xchg(&ctx->dead, 1)) { + struct ve_struct *ve = ctx->ve; + spin_lock(&mm->ioctx_lock); hlist_del_rcu(&ctx->list); spin_unlock(&mm->ioctx_lock); @@ -676,10 +680,10 @@ static int kill_ioctx(struct mm_struct *mm, struct kioctx *ctx, * -EAGAIN with no ioctxs actually in use (as far as userspace * could tell). */ - spin_lock(&aio_nr_lock); - BUG_ON(aio_nr - ctx->max_reqs > aio_nr); - aio_nr -= ctx->max_reqs; - spin_unlock(&aio_nr_lock); + spin_lock(&ve->aio_nr_lock); + BUG_ON(ve->aio_nr - ctx->max_reqs > ve->aio_nr); + ve->aio_nr -= ctx->max_reqs; + spin_unlock(&ve->aio_nr_lock); if (ctx->mmap_size) vm_munmap(ctx->mmap_base, ctx->mmap_size); diff --git a/include/linux/aio.h b/include/linux/aio.h index a2f6172..0aa7dd3 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -14,6 +14,8 @@ struct kiocb; #define KIOCB_KEY 0 +#define AIO_MAX_NR_DEFAULT 0x10000 + /* * We use ki_cancel == KIOCB_CANCELLED to indicate that a kiocb has been either * cancelled or completed (this makes a certain amount of sense because @@ -124,8 +126,4 @@ static inline struct kiocb *list_kiocb(struct list_head *h) return list_entry(h, struct kiocb, ki_list); } -/* for sysctl: */ -extern unsigned long aio_nr; -extern unsigned long aio_max_nr; - #endif /* __LINUX__AIO_H */ diff --git a/include/linux/ve.h b/include/linux/ve.h index 3009901..1758d51 100644 --- a/include/linux/ve.h +++ b/include/linux/ve.h @@ -129,6 +129,12 @@ struct ve_struct { struct mutex devmnt_mutex; struct kmapset_key ve_sysfs_perms; + +#ifdef CONFIG_AIO + spinlock_t aio_nr_lock; + unsigned long aio_nr; + unsigned long aio_max_nr; +#endif }; struct ve_devmnt { diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 1a568e7..976f48c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1656,17 +1656,17 @@ static struct ctl_table fs_table[] = { #ifdef CONFIG_AIO { .procname = "aio-nr", - .data = &aio_nr, - .maxlen = sizeof(aio_nr), - .mode = 0444, - .proc_handler = proc_doulongvec_minmax, + .data = &ve0.aio_nr, + .maxlen = sizeof(unsigned long), + .mode = 0444 | S_ISVTX, + .proc_handler = proc_doulongvec_minmax_virtual, }, { .procname = "aio-max-nr", - .data = &aio_max_nr, - .maxlen = sizeof(aio_max_nr), - .mode = 0644, - .proc_handler = proc_doulongvec_minmax, + .data = &ve0.aio_max_nr, + .maxlen = sizeof(unsigned long), + .mode = 0644 | S_ISVTX, + .proc_handler = proc_doulongvec_minmax_virtual, }, #endif /* CONFIG_AIO */ #ifdef CONFIG_INOTIFY_USER diff --git a/kernel/ve/ve.c b/kernel/ve/ve.c index 5025149..c67ff3f 100644 --- a/kernel/ve/ve.c +++ b/kernel/ve/ve.c @@ -17,6 +17,7 @@ #include <linux/ve.h> #include <linux/init.h> +#include <linux/aio.h> #include <linux/errno.h> #include <linux/unistd.h> #include <linux/slab.h> @@ -625,6 +626,12 @@ do_init: mutex_init(&ve->devmnt_mutex); kmapset_init_key(&ve->ve_sysfs_perms); +#ifdef CONFIG_AIO + spin_lock_init(&ve->aio_nr_lock); + ve->aio_nr = 0; + ve->aio_max_nr = AIO_MAX_NR_DEFAULT; +#endif + return &ve->css; err_log: _______________________________________________ Devel mailing list Devel@openvz.org https://lists.openvz.org/mailman/listinfo/devel