Adding the request to sq_overflow isn't enough: 1. luringcb->sqeq is uninitialized if there was space in the sq ring at submission time. 2. Not all code paths invoke ioq_submit() after processing completions, so the request could hang.
Additional bugs include checking for EINTR instead of -EINTR and forgetting to skip the completion callback when a request is resubmitted. Fix this by always initializing luringcb->sqeq and ensuring that all code paths invoke ioq_submit() after appending to sq_overflow. Ensure that luring_process_completions() marks the cqe seen and decrements in_flight before resubmitting the request. Signed-off-by: Stefan Hajnoczi <stefa...@redhat.com> --- block/io_uring.c | 64 ++++++++++++++++++++++++++++-------------------- 1 file changed, 37 insertions(+), 27 deletions(-) diff --git a/block/io_uring.c b/block/io_uring.c index 19919da4c9..97e4f876d7 100644 --- a/block/io_uring.c +++ b/block/io_uring.c @@ -87,6 +87,18 @@ int luring_register_fd(LuringState *s, unsigned int fd) s->fd.head, s->fd.size); } +/** + * luring_resubmit: + * + * Resubmit a request by appending it to sq_overflow. The caller must ensure + * that ioq_submit() is called later so that sq_overflow requests are started. + */ +static void luring_resubmit(LuringState *s, LuringAIOCB *luringcb) +{ + QSIMPLEQ_INSERT_TAIL(&s->io_q.sq_overflow, luringcb, next); + s->io_q.in_queue++; +} + /** * luring_process_completions: * @s: AIO state @@ -102,7 +114,6 @@ int luring_register_fd(LuringState *s, unsigned int fd) static void luring_process_completions(LuringState *s) { struct io_uring_cqe *cqes; - int ret; /* * Request completion callbacks can run the nested event loop. @@ -122,11 +133,20 @@ static void luring_process_completions(LuringState *s) qemu_bh_schedule(s->completion_bh); while (io_uring_peek_cqe(&s->ring, &cqes) == 0) { + LuringAIOCB *luringcb; + int ret; + if (!cqes) { break; } - LuringAIOCB *luringcb = io_uring_cqe_get_data(cqes); + + luringcb = io_uring_cqe_get_data(cqes); ret = cqes->res; + io_uring_cqe_seen(&s->ring, cqes); + cqes = NULL; + + /* Change counters one-by-one because we can be nested. */ + s->io_q.in_flight--; trace_luring_process_completion(s, luringcb, ret); @@ -143,17 +163,12 @@ static void luring_process_completions(LuringState *s) ret = -ENOSPC;; } /* Add to overflow queue to be resubmitted later */ - } else if (ret == EINTR) { - QSIMPLEQ_INSERT_TAIL(&s->io_q.sq_overflow, luringcb, next); + } else if (ret == -EINTR) { + luring_resubmit(s, luringcb); + continue; } luringcb->ret = ret; - - io_uring_cqe_seen(&s->ring, cqes); - cqes = NULL; - /* Change counters one-by-one because we can be nested. */ - s->io_q.in_flight--; - /* * If the coroutine is already entered it must be in ioq_submit() * and will notice luringcb->ret has been filled in when it @@ -245,16 +260,16 @@ static int ioq_submit(LuringState *s) } s->io_q.in_flight += ret; s->io_q.in_queue -= ret; + + if (s->io_q.in_flight) { + /* + * We can try to complete something just right away if there are + * still requests in-flight. + */ + luring_process_completions(s); + } } s->io_q.blocked = (s->io_q.in_queue > 0); - - if (s->io_q.in_flight) { - /* - * We can try to complete something just right away if there are - * still requests in-flight. - */ - luring_process_completions(s); - } return ret; } @@ -290,15 +305,7 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, uint64_t offset, int type) { int ret; - struct io_uring_sqe *sqes = io_uring_get_sqe(&s->ring); - /* - *If the ring is full and cannot fetch new sqes, add the request to - * to an overflow queue to be submitted later - */ - if (!sqes) { - sqes = &luringcb->sqeq; - QSIMPLEQ_INSERT_TAIL(&s->io_q.sq_overflow, luringcb, next); - } + struct io_uring_sqe *sqes = &luringcb->sqeq; switch (type) { case QEMU_AIO_WRITE: @@ -318,7 +325,10 @@ static int luring_do_submit(int fd, LuringAIOCB *luringcb, LuringState *s, abort(); } io_uring_sqe_set_data(sqes, luringcb); + + QSIMPLEQ_INSERT_TAIL(&s->io_q.sq_overflow, luringcb, next); s->io_q.in_queue++; + trace_luring_do_submit(s, s->io_q.blocked, s->io_q.plugged, s->io_q.in_queue, s->io_q.in_flight); if (!s->io_q.blocked && -- 2.21.0