On Mon, Mar 05, 2018 at 01:27:14PM -0800, Christoph Hellwig wrote:
> The upcoming aio poll support would like to be able to complete the
> iocb inline from the cancellation context, but that would cause
> a lock order reversal.  Add support for optionally moving the cancelation
> outside the context lock to avoid this reversal.

I started to wonder which lock order reversal the commit message refers
to?

I think the reason for adding delayed cancellations is that we want to
be able to call io_cancel -> kiocb_cancel -> aio_poll_cancel ->
aio_complete without double locking ctx_lock?

--D

> Signed-off-by: Christoph Hellwig <h...@lst.de>
> Acked-by: Jeff Moyer <jmo...@redhat.com>
> ---
>  fs/aio.c | 49 ++++++++++++++++++++++++++++++++++++++-----------
>  1 file changed, 38 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/aio.c b/fs/aio.c
> index 0b6394b4e528..9d7d6e4cde87 100644
> --- a/fs/aio.c
> +++ b/fs/aio.c
> @@ -170,6 +170,10 @@ struct aio_kiocb {
>       struct list_head        ki_list;        /* the aio core uses this
>                                                * for cancellation */
>  
> +     unsigned int            flags;          /* protected by ctx->ctx_lock */
> +#define AIO_IOCB_DELAYED_CANCEL      (1 << 0)
> +#define AIO_IOCB_CANCELLED   (1 << 1)
> +
>       /*
>        * If the aio_resfd field of the userspace iocb is not zero,
>        * this is the underlying eventfd context to deliver events to.
> @@ -536,9 +540,9 @@ static int aio_setup_ring(struct kioctx *ctx, unsigned 
> int nr_events)
>  #define AIO_EVENTS_FIRST_PAGE        ((PAGE_SIZE - sizeof(struct aio_ring)) 
> / sizeof(struct io_event))
>  #define AIO_EVENTS_OFFSET    (AIO_EVENTS_PER_PAGE - AIO_EVENTS_FIRST_PAGE)
>  
> -void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
> +static void __kiocb_set_cancel_fn(struct aio_kiocb *req,
> +             kiocb_cancel_fn *cancel, unsigned int iocb_flags)
>  {
> -     struct aio_kiocb *req = container_of(iocb, struct aio_kiocb, rw);
>       struct kioctx *ctx = req->ki_ctx;
>       unsigned long flags;
>  
> @@ -548,8 +552,15 @@ void kiocb_set_cancel_fn(struct kiocb *iocb, 
> kiocb_cancel_fn *cancel)
>       spin_lock_irqsave(&ctx->ctx_lock, flags);
>       list_add_tail(&req->ki_list, &ctx->active_reqs);
>       req->ki_cancel = cancel;
> +     req->flags |= iocb_flags;
>       spin_unlock_irqrestore(&ctx->ctx_lock, flags);
>  }
> +
> +void kiocb_set_cancel_fn(struct kiocb *iocb, kiocb_cancel_fn *cancel)
> +{
> +     return __kiocb_set_cancel_fn(container_of(iocb, struct aio_kiocb, rw),
> +                     cancel, 0);
> +}
>  EXPORT_SYMBOL(kiocb_set_cancel_fn);
>  
>  /*
> @@ -603,17 +614,27 @@ static void free_ioctx_users(struct percpu_ref *ref)
>  {
>       struct kioctx *ctx = container_of(ref, struct kioctx, users);
>       struct aio_kiocb *req;
> +     LIST_HEAD(list);
>  
>       spin_lock_irq(&ctx->ctx_lock);
> -
>       while (!list_empty(&ctx->active_reqs)) {
>               req = list_first_entry(&ctx->active_reqs,
>                                      struct aio_kiocb, ki_list);
> -             kiocb_cancel(req);
> -     }
>  
> +             if (req->flags & AIO_IOCB_DELAYED_CANCEL) {
> +                     req->flags |= AIO_IOCB_CANCELLED;
> +                     list_move_tail(&req->ki_list, &list);
> +             } else {
> +                     kiocb_cancel(req);
> +             }
> +     }
>       spin_unlock_irq(&ctx->ctx_lock);
>  
> +     while (!list_empty(&list)) {
> +             req = list_first_entry(&list, struct aio_kiocb, ki_list);
> +             kiocb_cancel(req);
> +     }
> +
>       percpu_ref_kill(&ctx->reqs);
>       percpu_ref_put(&ctx->reqs);
>  }
> @@ -1785,15 +1806,22 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
> struct iocb __user *, iocb,
>       if (unlikely(!ctx))
>               return -EINVAL;
>  
> -     spin_lock_irq(&ctx->ctx_lock);
> +     ret = -EINVAL;
>  
> +     spin_lock_irq(&ctx->ctx_lock);
>       kiocb = lookup_kiocb(ctx, iocb, key);
> +     if (kiocb) {
> +             if (kiocb->flags & AIO_IOCB_DELAYED_CANCEL) {
> +                     kiocb->flags |= AIO_IOCB_CANCELLED;
> +             } else {
> +                     ret = kiocb_cancel(kiocb);
> +                     kiocb = NULL;
> +             }
> +     }
> +     spin_unlock_irq(&ctx->ctx_lock);
> +
>       if (kiocb)
>               ret = kiocb_cancel(kiocb);
> -     else
> -             ret = -EINVAL;
> -
> -     spin_unlock_irq(&ctx->ctx_lock);
>  
>       if (!ret) {
>               /*
> @@ -1805,7 +1833,6 @@ SYSCALL_DEFINE3(io_cancel, aio_context_t, ctx_id, 
> struct iocb __user *, iocb,
>       }
>  
>       percpu_ref_put(&ctx->users);
> -
>       return ret;
>  }
>  
> -- 
> 2.14.2
> 

Reply via email to