[PATCH 2/4] aio: add a iocb refcount

2018-08-06 Thread Christoph Hellwig
This is needed to prevent races caused by the way the ->poll API works.
To avoid introducing overhead for other users of the iocbs we initialize
it to zero and only do refcount operations if it is non-zero in the
completion path.

Signed-off-by: Christoph Hellwig 
Tested-by: Avi Kivity 
---
 fs/aio.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..fe2018ada32c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -178,6 +179,7 @@ struct aio_kiocb {
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
+   refcount_t  ki_refcnt;
 
/*
 * If the aio_resfd field of the userspace iocb is not zero,
@@ -1015,6 +1017,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx 
*ctx)
 
percpu_ref_get(>reqs);
INIT_LIST_HEAD(>ki_list);
+   refcount_set(>ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
 out_put:
@@ -1049,6 +1052,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
 }
 
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+   if (refcount_read(>ki_refcnt) == 0 ||
+   refcount_dec_and_test(>ki_refcnt)) {
+   percpu_ref_put(>ki_ctx->reqs);
+   kmem_cache_free(kiocb_cachep, iocb);
+   }
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1118,8 +1130,6 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
 
-   kmem_cache_free(kiocb_cachep, iocb);
-
/*
 * We have to order our ring_info tail store above and test
 * of the wait list below outside the wait lock.  This is
@@ -1130,8 +1140,7 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
 
if (waitqueue_active(>wait))
wake_up(>wait);
-
-   percpu_ref_put(>reqs);
+   iocb_put(iocb);
 }
 
 /* aio_read_events_ring
-- 
2.18.0



[PATCH 2/4] aio: add a iocb refcount

2018-08-06 Thread Christoph Hellwig
This is needed to prevent races caused by the way the ->poll API works.
To avoid introducing overhead for other users of the iocbs we initialize
it to zero and only do refcount operations if it is non-zero in the
completion path.

Signed-off-by: Christoph Hellwig 
Tested-by: Avi Kivity 
---
 fs/aio.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..fe2018ada32c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -178,6 +179,7 @@ struct aio_kiocb {
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
+   refcount_t  ki_refcnt;
 
/*
 * If the aio_resfd field of the userspace iocb is not zero,
@@ -1015,6 +1017,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx 
*ctx)
 
percpu_ref_get(>reqs);
INIT_LIST_HEAD(>ki_list);
+   refcount_set(>ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
 out_put:
@@ -1049,6 +1052,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
 }
 
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+   if (refcount_read(>ki_refcnt) == 0 ||
+   refcount_dec_and_test(>ki_refcnt)) {
+   percpu_ref_put(>ki_ctx->reqs);
+   kmem_cache_free(kiocb_cachep, iocb);
+   }
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1118,8 +1130,6 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
 
-   kmem_cache_free(kiocb_cachep, iocb);
-
/*
 * We have to order our ring_info tail store above and test
 * of the wait list below outside the wait lock.  This is
@@ -1130,8 +1140,7 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
 
if (waitqueue_active(>wait))
wake_up(>wait);
-
-   percpu_ref_put(>reqs);
+   iocb_put(iocb);
 }
 
 /* aio_read_events_ring
-- 
2.18.0



Re: [PATCH 2/4] aio: add a iocb refcount

2018-08-02 Thread Christoph Hellwig
On Thu, Aug 02, 2018 at 12:19:56AM +0100, Al Viro wrote:
> On Mon, Jul 30, 2018 at 09:15:42AM +0200, Christoph Hellwig wrote:
> > This is needed to prevent races caused by the way the ->poll API works.
> > To avoid introducing overhead for other users of the iocbs we initialize
> > it to zero and only do refcount operations if it is non-zero in the
> > completion path.
> 
> refcount_t looks like a bad match - you, AFAICS, have count 0 for everything
> except poll, while for poll you start with 2.  That looks like
>   if (iocb->shared && test_and_clear_bit(0, >shared))
>   return;
>   kill the sucker
> in your iocb_put() and initializing it to 1 in poll.  No?

For non-poll we don't need the recount, so we can ignore that.

For poll we have two reference - submitting and wakee context.  We
could replace the refcount_t with an atomic bitops - but it would

 a) make the code harder to read than plain old refcounting
 b) make the iocb bigger - at least for now as there is nothing
else we can share that unsigned long with.

Your call - I can respin it either way.


Re: [PATCH 2/4] aio: add a iocb refcount

2018-08-02 Thread Christoph Hellwig
On Thu, Aug 02, 2018 at 12:19:56AM +0100, Al Viro wrote:
> On Mon, Jul 30, 2018 at 09:15:42AM +0200, Christoph Hellwig wrote:
> > This is needed to prevent races caused by the way the ->poll API works.
> > To avoid introducing overhead for other users of the iocbs we initialize
> > it to zero and only do refcount operations if it is non-zero in the
> > completion path.
> 
> refcount_t looks like a bad match - you, AFAICS, have count 0 for everything
> except poll, while for poll you start with 2.  That looks like
>   if (iocb->shared && test_and_clear_bit(0, >shared))
>   return;
>   kill the sucker
> in your iocb_put() and initializing it to 1 in poll.  No?

For non-poll we don't need the recount, so we can ignore that.

For poll we have two reference - submitting and wakee context.  We
could replace the refcount_t with an atomic bitops - but it would

 a) make the code harder to read than plain old refcounting
 b) make the iocb bigger - at least for now as there is nothing
else we can share that unsigned long with.

Your call - I can respin it either way.


Re: [PATCH 2/4] aio: add a iocb refcount

2018-08-01 Thread Al Viro
On Mon, Jul 30, 2018 at 09:15:42AM +0200, Christoph Hellwig wrote:
> This is needed to prevent races caused by the way the ->poll API works.
> To avoid introducing overhead for other users of the iocbs we initialize
> it to zero and only do refcount operations if it is non-zero in the
> completion path.

refcount_t looks like a bad match - you, AFAICS, have count 0 for everything
except poll, while for poll you start with 2.  That looks like
if (iocb->shared && test_and_clear_bit(0, >shared))
return;
kill the sucker
in your iocb_put() and initializing it to 1 in poll.  No?


Re: [PATCH 2/4] aio: add a iocb refcount

2018-08-01 Thread Al Viro
On Mon, Jul 30, 2018 at 09:15:42AM +0200, Christoph Hellwig wrote:
> This is needed to prevent races caused by the way the ->poll API works.
> To avoid introducing overhead for other users of the iocbs we initialize
> it to zero and only do refcount operations if it is non-zero in the
> completion path.

refcount_t looks like a bad match - you, AFAICS, have count 0 for everything
except poll, while for poll you start with 2.  That looks like
if (iocb->shared && test_and_clear_bit(0, >shared))
return;
kill the sucker
in your iocb_put() and initializing it to 1 in poll.  No?


[PATCH 2/4] aio: add a iocb refcount

2018-07-30 Thread Christoph Hellwig
This is needed to prevent races caused by the way the ->poll API works.
To avoid introducing overhead for other users of the iocbs we initialize
it to zero and only do refcount operations if it is non-zero in the
completion path.

Signed-off-by: Christoph Hellwig 
---
 fs/aio.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..fe2018ada32c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -178,6 +179,7 @@ struct aio_kiocb {
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
+   refcount_t  ki_refcnt;
 
/*
 * If the aio_resfd field of the userspace iocb is not zero,
@@ -1015,6 +1017,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx 
*ctx)
 
percpu_ref_get(>reqs);
INIT_LIST_HEAD(>ki_list);
+   refcount_set(>ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
 out_put:
@@ -1049,6 +1052,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
 }
 
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+   if (refcount_read(>ki_refcnt) == 0 ||
+   refcount_dec_and_test(>ki_refcnt)) {
+   percpu_ref_put(>ki_ctx->reqs);
+   kmem_cache_free(kiocb_cachep, iocb);
+   }
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1118,8 +1130,6 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
 
-   kmem_cache_free(kiocb_cachep, iocb);
-
/*
 * We have to order our ring_info tail store above and test
 * of the wait list below outside the wait lock.  This is
@@ -1130,8 +1140,7 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
 
if (waitqueue_active(>wait))
wake_up(>wait);
-
-   percpu_ref_put(>reqs);
+   iocb_put(iocb);
 }
 
 /* aio_read_events_ring
-- 
2.18.0



[PATCH 2/4] aio: add a iocb refcount

2018-07-30 Thread Christoph Hellwig
This is needed to prevent races caused by the way the ->poll API works.
To avoid introducing overhead for other users of the iocbs we initialize
it to zero and only do refcount operations if it is non-zero in the
completion path.

Signed-off-by: Christoph Hellwig 
---
 fs/aio.c | 17 +
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..fe2018ada32c 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -18,6 +18,7 @@
 #include 
 #include 
 #include 
+#include 
 #include 
 
 #include 
@@ -178,6 +179,7 @@ struct aio_kiocb {
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
+   refcount_t  ki_refcnt;
 
/*
 * If the aio_resfd field of the userspace iocb is not zero,
@@ -1015,6 +1017,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx 
*ctx)
 
percpu_ref_get(>reqs);
INIT_LIST_HEAD(>ki_list);
+   refcount_set(>ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
 out_put:
@@ -1049,6 +1052,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
 }
 
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+   if (refcount_read(>ki_refcnt) == 0 ||
+   refcount_dec_and_test(>ki_refcnt)) {
+   percpu_ref_put(>ki_ctx->reqs);
+   kmem_cache_free(kiocb_cachep, iocb);
+   }
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1118,8 +1130,6 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
 
-   kmem_cache_free(kiocb_cachep, iocb);
-
/*
 * We have to order our ring_info tail store above and test
 * of the wait list below outside the wait lock.  This is
@@ -1130,8 +1140,7 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
 
if (waitqueue_active(>wait))
wake_up(>wait);
-
-   percpu_ref_put(>reqs);
+   iocb_put(iocb);
 }
 
 /* aio_read_events_ring
-- 
2.18.0



Re: [PATCH 2/4] aio: add a iocb refcount

2018-07-27 Thread Christoph Hellwig
On Thu, Jul 26, 2018 at 01:57:05PM +0200, Christoph Hellwig wrote:
> On Thu, Jul 26, 2018 at 04:22:27AM -0700, Matthew Wilcox wrote:
> > On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote:
> > > + atomic_tki_refcnt;
> > 
> > Should this be a refcount_t instead?  At first glance your usage seems
> > compatible with refcount_t.
> 
> I though the magic 0 meaning would be incompatible with a refcnt_t.
> I'll investigate and respin if it ends up being ok.

Seems like a recount_t works fine, even with CONFIG_REFCOUNT_FULL,
so I'll switch it over for the next version.


Re: [PATCH 2/4] aio: add a iocb refcount

2018-07-27 Thread Christoph Hellwig
On Thu, Jul 26, 2018 at 01:57:05PM +0200, Christoph Hellwig wrote:
> On Thu, Jul 26, 2018 at 04:22:27AM -0700, Matthew Wilcox wrote:
> > On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote:
> > > + atomic_tki_refcnt;
> > 
> > Should this be a refcount_t instead?  At first glance your usage seems
> > compatible with refcount_t.
> 
> I though the magic 0 meaning would be incompatible with a refcnt_t.
> I'll investigate and respin if it ends up being ok.

Seems like a recount_t works fine, even with CONFIG_REFCOUNT_FULL,
so I'll switch it over for the next version.


Re: [PATCH 2/4] aio: add a iocb refcount

2018-07-26 Thread Christoph Hellwig
On Thu, Jul 26, 2018 at 04:22:27AM -0700, Matthew Wilcox wrote:
> On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote:
> > +   atomic_tki_refcnt;
> 
> Should this be a refcount_t instead?  At first glance your usage seems
> compatible with refcount_t.

I though the magic 0 meaning would be incompatible with a refcnt_t.
I'll investigate and respin if it ends up being ok.


Re: [PATCH 2/4] aio: add a iocb refcount

2018-07-26 Thread Christoph Hellwig
On Thu, Jul 26, 2018 at 04:22:27AM -0700, Matthew Wilcox wrote:
> On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote:
> > +   atomic_tki_refcnt;
> 
> Should this be a refcount_t instead?  At first glance your usage seems
> compatible with refcount_t.

I though the magic 0 meaning would be incompatible with a refcnt_t.
I'll investigate and respin if it ends up being ok.


Re: [PATCH 2/4] aio: add a iocb refcount

2018-07-26 Thread Matthew Wilcox
On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote:
> + atomic_tki_refcnt;

Should this be a refcount_t instead?  At first glance your usage seems
compatible with refcount_t.



Re: [PATCH 2/4] aio: add a iocb refcount

2018-07-26 Thread Matthew Wilcox
On Thu, Jul 26, 2018 at 10:29:01AM +0200, Christoph Hellwig wrote:
> + atomic_tki_refcnt;

Should this be a refcount_t instead?  At first glance your usage seems
compatible with refcount_t.



[PATCH 2/4] aio: add a iocb refcount

2018-07-26 Thread Christoph Hellwig
This is needed to prevent races caused by the way the ->poll API works.
To avoid introducing overhead for other users of the iocbs we initialize
it to zero and only do refcount operations if it is non-zero in the
completion path.

Signed-off-by: Christoph Hellwig 
---
 fs/aio.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..7f3c159b3e2e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -178,6 +178,7 @@ struct aio_kiocb {
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
+   atomic_tki_refcnt;
 
/*
 * If the aio_resfd field of the userspace iocb is not zero,
@@ -1015,6 +1016,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx 
*ctx)
 
percpu_ref_get(>reqs);
INIT_LIST_HEAD(>ki_list);
+   atomic_set(>ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
 out_put:
@@ -1049,6 +1051,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
 }
 
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+   if (atomic_read(>ki_refcnt) == 0 ||
+   atomic_dec_and_test(>ki_refcnt)) {
+   percpu_ref_put(>ki_ctx->reqs);
+   kmem_cache_free(kiocb_cachep, iocb);
+   }
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1118,8 +1129,6 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
 
-   kmem_cache_free(kiocb_cachep, iocb);
-
/*
 * We have to order our ring_info tail store above and test
 * of the wait list below outside the wait lock.  This is
@@ -1130,8 +1139,7 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
 
if (waitqueue_active(>wait))
wake_up(>wait);
-
-   percpu_ref_put(>reqs);
+   iocb_put(iocb);
 }
 
 /* aio_read_events_ring
-- 
2.18.0



[PATCH 2/4] aio: add a iocb refcount

2018-07-26 Thread Christoph Hellwig
This is needed to prevent races caused by the way the ->poll API works.
To avoid introducing overhead for other users of the iocbs we initialize
it to zero and only do refcount operations if it is non-zero in the
completion path.

Signed-off-by: Christoph Hellwig 
---
 fs/aio.c | 16 
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/fs/aio.c b/fs/aio.c
index 27454594e37a..7f3c159b3e2e 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -178,6 +178,7 @@ struct aio_kiocb {
 
struct list_headki_list;/* the aio core uses this
 * for cancellation */
+   atomic_tki_refcnt;
 
/*
 * If the aio_resfd field of the userspace iocb is not zero,
@@ -1015,6 +1016,7 @@ static inline struct aio_kiocb *aio_get_req(struct kioctx 
*ctx)
 
percpu_ref_get(>reqs);
INIT_LIST_HEAD(>ki_list);
+   atomic_set(>ki_refcnt, 0);
req->ki_ctx = ctx;
return req;
 out_put:
@@ -1049,6 +1051,15 @@ static struct kioctx *lookup_ioctx(unsigned long ctx_id)
return ret;
 }
 
+static inline void iocb_put(struct aio_kiocb *iocb)
+{
+   if (atomic_read(>ki_refcnt) == 0 ||
+   atomic_dec_and_test(>ki_refcnt)) {
+   percpu_ref_put(>ki_ctx->reqs);
+   kmem_cache_free(kiocb_cachep, iocb);
+   }
+}
+
 /* aio_complete
  * Called when the io request on the given iocb is complete.
  */
@@ -1118,8 +1129,6 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
eventfd_ctx_put(iocb->ki_eventfd);
}
 
-   kmem_cache_free(kiocb_cachep, iocb);
-
/*
 * We have to order our ring_info tail store above and test
 * of the wait list below outside the wait lock.  This is
@@ -1130,8 +1139,7 @@ static void aio_complete(struct aio_kiocb *iocb, long 
res, long res2)
 
if (waitqueue_active(>wait))
wake_up(>wait);
-
-   percpu_ref_put(>reqs);
+   iocb_put(iocb);
 }
 
 /* aio_read_events_ring
-- 
2.18.0