From: Xiaoguang Wang <xiaoguang.w...@linux.alibaba.com>

[ Upstream commit 65a6543da386838f935d2f03f452c5c0acff2a68 ]

While testing io_uring in arm, we found sometimes io_sq_thread() keeps
polling io requests even though there are not inflight io requests in
block layer. After some investigations, found a possible race about
io_kiocb.flags, see below race codes:
  1) in the end of io_write() or io_read()
    req->flags &= ~REQ_F_NEED_CLEANUP;
    kfree(iovec);
    return ret;

  2) in io_complete_rw_iopoll()
    if (res != -EAGAIN)
        req->flags |= REQ_F_IOPOLL_COMPLETED;

In IOPOLL mode, io requests still maybe completed by interrupt, then
above codes are not safe, concurrent modifications to req->flags, which
is not protected by lock or is not atomic modifications. I also had
disassemble io_complete_rw_iopoll() in arm:
   req->flags |= REQ_F_IOPOLL_COMPLETED;
   0xffff000008387b18 <+76>:    ldr     w0, [x19,#104]
   0xffff000008387b1c <+80>:    orr     w0, w0, #0x1000
   0xffff000008387b20 <+84>:    str     w0, [x19,#104]

Seems that the "req->flags |= REQ_F_IOPOLL_COMPLETED;" is  load and
modification, two instructions, which obviously is not atomic.

To fix this issue, add a new iopoll_completed in io_kiocb to indicate
whether io request is completed.

Signed-off-by: Xiaoguang Wang <xiaoguang.w...@linux.alibaba.com>
Signed-off-by: Jens Axboe <ax...@kernel.dk>
Signed-off-by: Greg Kroah-Hartman <gre...@linuxfoundation.org>
---
 fs/io_uring.c |   12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -513,7 +513,6 @@ enum {
        REQ_F_INFLIGHT_BIT,
        REQ_F_CUR_POS_BIT,
        REQ_F_NOWAIT_BIT,
-       REQ_F_IOPOLL_COMPLETED_BIT,
        REQ_F_LINK_TIMEOUT_BIT,
        REQ_F_TIMEOUT_BIT,
        REQ_F_ISREG_BIT,
@@ -556,8 +555,6 @@ enum {
        REQ_F_CUR_POS           = BIT(REQ_F_CUR_POS_BIT),
        /* must not punt to workers */
        REQ_F_NOWAIT            = BIT(REQ_F_NOWAIT_BIT),
-       /* polled IO has completed */
-       REQ_F_IOPOLL_COMPLETED  = BIT(REQ_F_IOPOLL_COMPLETED_BIT),
        /* has linked timeout */
        REQ_F_LINK_TIMEOUT      = BIT(REQ_F_LINK_TIMEOUT_BIT),
        /* timeout request */
@@ -618,6 +615,8 @@ struct io_kiocb {
        int                             cflags;
        bool                            needs_fixed_file;
        u8                              opcode;
+       /* polled IO has completed */
+       u8                              iopoll_completed;
 
        u16                             buf_index;
 
@@ -1760,7 +1759,7 @@ static int io_do_iopoll(struct io_ring_c
                 * If we find a request that requires polling, break out
                 * and complete those lists first, if we have entries there.
                 */
-               if (req->flags & REQ_F_IOPOLL_COMPLETED) {
+               if (READ_ONCE(req->iopoll_completed)) {
                        list_move_tail(&req->list, &done);
                        continue;
                }
@@ -1941,7 +1940,7 @@ static void io_complete_rw_iopoll(struct
                req_set_fail_links(req);
        req->result = res;
        if (res != -EAGAIN)
-               req->flags |= REQ_F_IOPOLL_COMPLETED;
+               WRITE_ONCE(req->iopoll_completed, 1);
 }
 
 /*
@@ -1974,7 +1973,7 @@ static void io_iopoll_req_issued(struct
         * For fast devices, IO may have already completed. If it has, add
         * it to the front so we find it first.
         */
-       if (req->flags & REQ_F_IOPOLL_COMPLETED)
+       if (READ_ONCE(req->iopoll_completed))
                list_add(&req->list, &ctx->poll_list);
        else
                list_add_tail(&req->list, &ctx->poll_list);
@@ -2098,6 +2097,7 @@ static int io_prep_rw(struct io_kiocb *r
                kiocb->ki_flags |= IOCB_HIPRI;
                kiocb->ki_complete = io_complete_rw_iopoll;
                req->result = 0;
+               req->iopoll_completed = 0;
        } else {
                if (kiocb->ki_flags & IOCB_HIPRI)
                        return -EINVAL;


Reply via email to