[PATCH 15/25] aio: Use cancellation list lazily

Kent Overstreet Wed, 28 Nov 2012 08:51:40 -0800

Cancelling kiocbs requires adding them to a per kioctx linked list,
which is one of the few things we need to take the kioctx lock for in
the fast path. But most kiocbs can't be cancelled - so if we just do
this lazily, we can avoid quite a bit of locking overhead.


Signed-off-by: Kent Overstreet <koverstr...@google.com>
---
 drivers/usb/gadget/inode.c |  3 +-
 fs/aio.c                   | 88 +++++++++++++++++++++++++++-------------------
 include/linux/aio.h        | 10 ++++--
 3 files changed, 59 insertions(+), 42 deletions(-)

diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c
index 7640e01..3bf0c35 100644
--- a/drivers/usb/gadget/inode.c
+++ b/drivers/usb/gadget/inode.c
@@ -534,7 +534,6 @@ static int ep_aio_cancel(struct kiocb *iocb, struct 
io_event *e)
        local_irq_disable();
        epdata = priv->epdata;
        // spin_lock(&epdata->dev->lock);
-       kiocbSetCancelled(iocb);
        if (likely(epdata && epdata->ep && priv->req))
                value = usb_ep_dequeue (epdata->ep, priv->req);
        else
@@ -664,7 +663,7 @@ fail:
                goto fail;
        }
 
-       iocb->ki_cancel = ep_aio_cancel;
+       kiocb_set_cancel_fn(iocb, ep_aio_cancel);
        get_ep(epdata);
        priv->epdata = epdata;
        priv->actual = 0;
diff --git a/fs/aio.c b/fs/aio.c
index de255d8..f1f2345 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -96,6 +96,8 @@ struct kioctx {
        unsigned                max_reqs;
 
        struct aio_ring_info    ring_info;
+
+       spinlock_t              completion_lock;
 };
 
 /*------ sysctl variables----*/
@@ -232,25 +234,43 @@ static int aio_setup_ring(struct kioctx *ctx)
        kunmap_atomic((void *)((unsigned long)__event & PAGE_MASK)); \
 } while(0)
 
+void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel)
+{
+       if (!req->ki_list.next) {
+               struct kioctx *ctx = req->ki_ctx;
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->ctx_lock, flags);
+               list_add(&req->ki_list, &ctx->active_reqs);
+               spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+       }
+
+       req->ki_cancel = cancel;
+}
+EXPORT_SYMBOL(kiocb_set_cancel_fn);
+
 static int kiocb_cancel(struct kioctx *ctx, struct kiocb *kiocb,
                        struct io_event *res)
 {
-       int (*cancel)(struct kiocb *, struct io_event *);
+       kiocb_cancel_fn *cancel;
        int ret = -EINVAL;
 
        cancel = kiocb->ki_cancel;
-       kiocbSetCancelled(kiocb);
-       if (cancel) {
-               atomic_inc(&kiocb->ki_users);
-               spin_unlock_irq(&ctx->ctx_lock);
+       if (!cancel)
+               return ret;
 
-               memset(res, 0, sizeof(*res));
-               res->obj = (u64) kiocb->ki_obj.user;
-               res->data = kiocb->ki_user_data;
-               ret = cancel(kiocb, res);
+       if (test_and_set_bit(KIF_CANCELLED, &kiocb->ki_flags))
+               return ret;
 
-               spin_lock_irq(&ctx->ctx_lock);
-       }
+       atomic_inc(&kiocb->ki_users);
+       spin_unlock_irq(&ctx->ctx_lock);
+
+       memset(res, 0, sizeof(*res));
+       res->obj = (u64) kiocb->ki_obj.user;
+       res->data = kiocb->ki_user_data;
+       ret = cancel(kiocb, res);
+
+       spin_lock_irq(&ctx->ctx_lock);
 
        return ret;
 }
@@ -324,6 +344,7 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
        atomic_set(&ctx->users, 2);
        spin_lock_init(&ctx->ctx_lock);
+       spin_lock_init(&ctx->completion_lock);
        mutex_init(&ctx->ring_info.ring_lock);
        init_waitqueue_head(&ctx->wait);
 
@@ -440,20 +461,12 @@ static struct kiocb *__aio_get_req(struct kioctx *ctx)
 {
        struct kiocb *req = NULL;
 
-       req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL);
+       req = kmem_cache_alloc(kiocb_cachep, GFP_KERNEL|__GFP_ZERO);
        if (unlikely(!req))
                return NULL;
 
-       req->ki_flags = 0;
        atomic_set(&req->ki_users, 2);
-       req->ki_key = 0;
        req->ki_ctx = ctx;
-       req->ki_cancel = NULL;
-       req->ki_retry = NULL;
-       req->ki_dtor = NULL;
-       req->private = NULL;
-       req->ki_iovec = NULL;
-       req->ki_eventfd = NULL;
 
        return req;
 }
@@ -530,10 +543,7 @@ static int kiocb_batch_refill(struct kioctx *ctx, struct 
kiocb_batch *batch)
        }
 
        batch->count -= allocated;
-       list_for_each_entry(req, &batch->head, ki_batch) {
-               list_add(&req->ki_list, &ctx->active_reqs);
-               atomic_inc(&ctx->reqs_active);
-       }
+       atomic_add(allocated, &ctx->reqs_active);
 
        kunmap_atomic(ring);
        spin_unlock_irq(&ctx->ctx_lock);
@@ -627,25 +637,33 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
        info = &ctx->ring_info;
 
        /*
-        * Add a completion event to the ring buffer. Must be done holding
-        * ctx->ctx_lock to prevent other code from messing with the tail
-        * pointer since we might be called from irq context.
-        *
         * Take rcu_read_lock() in case the kioctx is being destroyed, as we
         * need to issue a wakeup after decrementing reqs_active.
         */
        rcu_read_lock();
-       spin_lock_irqsave(&ctx->ctx_lock, flags);
 
-       list_del(&iocb->ki_list); /* remove from active_reqs */
+       if (iocb->ki_list.next) {
+               unsigned long flags;
+
+               spin_lock_irqsave(&ctx->ctx_lock, flags);
+               list_del(&iocb->ki_list);
+               spin_unlock_irqrestore(&ctx->ctx_lock, flags);
+       }
 
        /*
         * cancelled requests don't get events, userland was given one
         * when the event got cancelled.
         */
-       if (kiocbIsCancelled(iocb))
+       if (test_and_set_bit(KIF_CANCELLED, &iocb->ki_flags))
                goto put_rq;
 
+       /*
+        * Add a completion event to the ring buffer. Must be done holding
+        * ctx->ctx_lock to prevent other code from messing with the tail
+        * pointer since we might be called from irq context.
+        */
+       spin_lock_irqsave(&ctx->completion_lock, flags);
+
        ring = kmap_atomic(info->ring_pages[0]);
 
        tail = info->tail;
@@ -673,6 +691,8 @@ void aio_complete(struct kiocb *iocb, long res, long res2)
        put_aio_ring_event(event);
        kunmap_atomic(ring);
 
+       spin_unlock_irqrestore(&ctx->completion_lock, flags);
+
        pr_debug("added to ring %p at [%lu]\n", iocb, tail);
 
        /*
@@ -699,7 +719,6 @@ put_rq:
        if (waitqueue_active(&ctx->wait))
                wake_up(&ctx->wait);
 
-       spin_unlock_irqrestore(&ctx->ctx_lock, flags);
        rcu_read_unlock();
 }
 EXPORT_SYMBOL(aio_complete);
@@ -1190,7 +1209,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb 
__user *user_iocb,
        req->ki_opcode = iocb->aio_lio_opcode;
 
        ret = aio_setup_iocb(req, compat);
-
        if (ret)
                goto out_put_req;
 
@@ -1214,10 +1232,6 @@ static int io_submit_one(struct kioctx *ctx, struct iocb 
__user *user_iocb,
        return 0;
 
 out_put_req:
-       spin_lock_irq(&ctx->ctx_lock);
-       list_del(&req->ki_list);
-       spin_unlock_irq(&ctx->ctx_lock);
-
        atomic_dec(&ctx->reqs_active);
        aio_put_req(req);       /* drop extra ref to req */
        aio_put_req(req);       /* drop i/o ref to req */
diff --git a/include/linux/aio.h b/include/linux/aio.h
index 294b659..ea048ee 100644
--- a/include/linux/aio.h
+++ b/include/linux/aio.h
@@ -10,18 +10,19 @@
 #include <linux/atomic.h>
 
 struct kioctx;
+struct kiocb;
 
 #define KIOCB_SYNC_KEY         (~0U)
 
 /* ki_flags bits */
 #define KIF_CANCELLED          2
 
-#define kiocbSetCancelled(iocb)        set_bit(KIF_CANCELLED, 
&(iocb)->ki_flags)
-
 #define kiocbClearCancelled(iocb)      clear_bit(KIF_CANCELLED, 
&(iocb)->ki_flags)
 
 #define kiocbIsCancelled(iocb) test_bit(KIF_CANCELLED, &(iocb)->ki_flags)
 
+typedef int (kiocb_cancel_fn)(struct kiocb *, struct io_event *);
+
 /* is there a better place to document function pointer methods? */
 /**
  * ki_retry    -       iocb forward progress callback
@@ -55,7 +56,7 @@ struct kiocb {
 
        struct file             *ki_filp;
        struct kioctx           *ki_ctx;        /* may be NULL for sync ops */
-       int                     (*ki_cancel)(struct kiocb *, struct io_event *);
+       kiocb_cancel_fn         *ki_cancel;
        ssize_t                 (*ki_retry)(struct kiocb *);
        void                    (*ki_dtor)(struct kiocb *);
 
@@ -113,6 +114,7 @@ struct mm_struct;
 extern void exit_aio(struct mm_struct *mm);
 extern long do_io_submit(aio_context_t ctx_id, long nr,
                         struct iocb __user *__user *iocbpp, bool compat);
+void kiocb_set_cancel_fn(struct kiocb *req, kiocb_cancel_fn *cancel);
 #else
 static inline ssize_t wait_on_sync_kiocb(struct kiocb *iocb) { return 0; }
 static inline void aio_put_req(struct kiocb *iocb) { }
@@ -122,6 +124,8 @@ static inline void exit_aio(struct mm_struct *mm) { }
 static inline long do_io_submit(aio_context_t ctx_id, long nr,
                                struct iocb __user * __user *iocbpp,
                                bool compat) { return 0; }
+static inline void kiocb_set_cancel_fn(struct kiocb *req,
+                                      kiocb_cancel_fn *cancel) { }
 #endif /* CONFIG_AIO */
 
 static inline struct kiocb *list_kiocb(struct list_head *h)
-- 
1.7.12

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

[PATCH 15/25] aio: Use cancellation list lazily

Reply via email to