[PATCH 2/4] aio: add a iocb refcount
This is needed to prevent races caused by the way the ->poll API works. To avoid introducing overhead for other users of the iocbs we initialize it to zero and only do refcount operations if it is non-zero in the completion path. Signed-off-by: Christoph Hellwig Tested-by: Avi Kivity --- fs/aio.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 27454594e37a..fe2018ada32c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -178,6 +179,7 @@ struct aio_kiocb { struct list_headki_list;/* the aio core uses this * for cancellation */ + refcount_t ki_refcnt; /* * If the aio_resfd field of the userspace iocb is not zero, @@ -1015,6 +1017,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) percpu_ref_get(&ctx->reqs); INIT_LIST_HEAD(&req->ki_list); + refcount_set(&req->ki_refcnt, 0); req->ki_ctx = ctx; return req; out_put: @@ -1049,6 +1052,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) return ret; } +static inline void iocb_put(struct aio_kiocb *iocb) +{ + if (refcount_read(&iocb->ki_refcnt) == 0 || + refcount_dec_and_test(&iocb->ki_refcnt)) { + percpu_ref_put(&iocb->ki_ctx->reqs); + kmem_cache_free(kiocb_cachep, iocb); + } +} + /* aio_complete * Called when the io request on the given iocb is complete. */ @@ -1118,8 +1130,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) eventfd_ctx_put(iocb->ki_eventfd); } - kmem_cache_free(kiocb_cachep, iocb); - /* * We have to order our ring_info tail store above and test * of the wait list below outside the wait lock. This is @@ -1130,8 +1140,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - - percpu_ref_put(&ctx->reqs); + iocb_put(iocb); } /* aio_read_events_ring -- 2.18.0
Re: [PATCH 2/4] aio: add a iocb refcount
On Thu, Aug 02, 2018 at 12:19:56AM +0100, Al Viro wrote: > On Mon, Jul 30, 2018 at 09:15:42AM +0200, Christoph Hellwig wrote: > > This is needed to prevent races caused by the way the ->poll API works. > > To avoid introducing overhead for other users of the iocbs we initialize > > it to zero and only do refcount operations if it is non-zero in the > > completion path. > > refcount_t looks like a bad match - you, AFAICS, have count 0 for everything > except poll, while for poll you start with 2. That looks like > if (iocb->shared && test_and_clear_bit(0, &iocb->shared)) > return; > kill the sucker > in your iocb_put() and initializing it to 1 in poll. No? For non-poll we don't need the recount, so we can ignore that. For poll we have two reference - submitting and wakee context. We could replace the refcount_t with an atomic bitops - but it would a) make the code harder to read than plain old refcounting b) make the iocb bigger - at least for now as there is nothing else we can share that unsigned long with. Your call - I can respin it either way.
Re: [PATCH 2/4] aio: add a iocb refcount
On Mon, Jul 30, 2018 at 09:15:42AM +0200, Christoph Hellwig wrote: > This is needed to prevent races caused by the way the ->poll API works. > To avoid introducing overhead for other users of the iocbs we initialize > it to zero and only do refcount operations if it is non-zero in the > completion path. refcount_t looks like a bad match - you, AFAICS, have count 0 for everything except poll, while for poll you start with 2. That looks like if (iocb->shared && test_and_clear_bit(0, &iocb->shared)) return; kill the sucker in your iocb_put() and initializing it to 1 in poll. No?
[PATCH 2/4] aio: add a iocb refcount
This is needed to prevent races caused by the way the ->poll API works. To avoid introducing overhead for other users of the iocbs we initialize it to zero and only do refcount operations if it is non-zero in the completion path. Signed-off-by: Christoph Hellwig --- fs/aio.c | 17 + 1 file changed, 13 insertions(+), 4 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 27454594e37a..fe2018ada32c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include @@ -178,6 +179,7 @@ struct aio_kiocb { struct list_headki_list;/* the aio core uses this * for cancellation */ + refcount_t ki_refcnt; /* * If the aio_resfd field of the userspace iocb is not zero, @@ -1015,6 +1017,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) percpu_ref_get(&ctx->reqs); INIT_LIST_HEAD(&req->ki_list); + refcount_set(&req->ki_refcnt, 0); req->ki_ctx = ctx; return req; out_put: @@ -1049,6 +1052,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) return ret; } +static inline void iocb_put(struct aio_kiocb *iocb) +{ + if (refcount_read(&iocb->ki_refcnt) == 0 || + refcount_dec_and_test(&iocb->ki_refcnt)) { + percpu_ref_put(&iocb->ki_ctx->reqs); + kmem_cache_free(kiocb_cachep, iocb); + } +} + /* aio_complete * Called when the io request on the given iocb is complete. */ @@ -1118,8 +1130,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) eventfd_ctx_put(iocb->ki_eventfd); } - kmem_cache_free(kiocb_cachep, iocb); - /* * We have to order our ring_info tail store above and test * of the wait list below outside the wait lock. This is @@ -1130,8 +1140,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - - percpu_ref_put(&ctx->reqs); + iocb_put(iocb); } /* aio_read_events_ring -- 2.18.0
Re: [PATCH 2/4] aio: add a iocb refcount
On Thu, Jul 26, 2018 at 01:57:05PM +0200, Christoph Hellwig wrote: > On Thu, Jul 26, 2018 at 04:22:27AM -0700, Matthew Wilcox wrote: > > On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote: > > > + atomic_tki_refcnt; > > > > Should this be a refcount_t instead? At first glance your usage seems > > compatible with refcount_t. > > I though the magic 0 meaning would be incompatible with a refcnt_t. > I'll investigate and respin if it ends up being ok. Seems like a recount_t works fine, even with CONFIG_REFCOUNT_FULL, so I'll switch it over for the next version.
Re: [PATCH 2/4] aio: add a iocb refcount
On Thu, Jul 26, 2018 at 04:22:27AM -0700, Matthew Wilcox wrote: > On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote: > > + atomic_tki_refcnt; > > Should this be a refcount_t instead? At first glance your usage seems > compatible with refcount_t. I though the magic 0 meaning would be incompatible with a refcnt_t. I'll investigate and respin if it ends up being ok.
Re: [PATCH 2/4] aio: add a iocb refcount
On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote: > + atomic_tki_refcnt; Should this be a refcount_t instead? At first glance your usage seems compatible with refcount_t.
[PATCH 2/4] aio: add a iocb refcount
This is needed to prevent races caused by the way the ->poll API works. To avoid introducing overhead for other users of the iocbs we initialize it to zero and only do refcount operations if it is non-zero in the completion path. Signed-off-by: Christoph Hellwig --- fs/aio.c | 16 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/fs/aio.c b/fs/aio.c index 27454594e37a..7f3c159b3e2e 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -178,6 +178,7 @@ struct aio_kiocb { struct list_headki_list;/* the aio core uses this * for cancellation */ + atomic_tki_refcnt; /* * If the aio_resfd field of the userspace iocb is not zero, @@ -1015,6 +1016,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx *ctx) percpu_ref_get(&ctx->reqs); INIT_LIST_HEAD(&req->ki_list); + atomic_set(&req->ki_refcnt, 0); req->ki_ctx = ctx; return req; out_put: @@ -1049,6 +1051,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id) return ret; } +static inline void iocb_put(struct aio_kiocb *iocb) +{ + if (atomic_read(&iocb->ki_refcnt) == 0 || + atomic_dec_and_test(&iocb->ki_refcnt)) { + percpu_ref_put(&iocb->ki_ctx->reqs); + kmem_cache_free(kiocb_cachep, iocb); + } +} + /* aio_complete * Called when the io request on the given iocb is complete. */ @@ -1118,8 +1129,6 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) eventfd_ctx_put(iocb->ki_eventfd); } - kmem_cache_free(kiocb_cachep, iocb); - /* * We have to order our ring_info tail store above and test * of the wait list below outside the wait lock. This is @@ -1130,8 +1139,7 @@ static void aio_complete(struct aio_kiocb *iocb, long res, long res2) if (waitqueue_active(&ctx->wait)) wake_up(&ctx->wait); - - percpu_ref_put(&ctx->reqs); + iocb_put(iocb); } /* aio_read_events_ring -- 2.18.0