From: Jens Axboe <ax...@kernel.dk>

[ Upstream commit b7db41c9e03b5189bc94993bd50e4506ac9e34c1 ]

When switching to TWA_SIGNAL for task_work notifications, we also made
any signal based condition in io_cqring_wait() return -ERESTARTSYS.
This breaks applications that rely on using signals to abort someone
waiting for events.

Check if we have a signal pending because of queued task_work, and
repeat the signal check once we've run the task_work. This provides a
reliable way of telling the two apart.

Additionally, only use TWA_SIGNAL if we are using an eventfd. If not,
we don't have the dependency situation described in the original commit,
and we can get by with just using TWA_RESUME like we previously did.

Fixes: ce593a6c480a ("io_uring: use signal based task_work running")
Cc: sta...@vger.kernel.org # v5.7
Reported-by: Andres Freund <and...@anarazel.de>
Tested-by: Andres Freund <and...@anarazel.de>
Signed-off-by: Jens Axboe <ax...@kernel.dk>
Signed-off-by: Sasha Levin <sas...@kernel.org>
---
 fs/io_uring.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/fs/io_uring.c b/fs/io_uring.c
index 51362a619fd50..2be6ea0103405 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -4136,14 +4136,22 @@ struct io_poll_table {
        int error;
 };
 
-static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb,
-                               int notify)
+static int io_req_task_work_add(struct io_kiocb *req, struct callback_head *cb)
 {
        struct task_struct *tsk = req->task;
-       int ret;
+       struct io_ring_ctx *ctx = req->ctx;
+       int ret, notify = TWA_RESUME;
 
-       if (req->ctx->flags & IORING_SETUP_SQPOLL)
+       /*
+        * SQPOLL kernel thread doesn't need notification, just a wakeup.
+        * If we're not using an eventfd, then TWA_RESUME is always fine,
+        * as we won't have dependencies between request completions for
+        * other kernel wait conditions.
+        */
+       if (ctx->flags & IORING_SETUP_SQPOLL)
                notify = 0;
+       else if (ctx->cq_ev_fd)
+               notify = TWA_SIGNAL;
 
        ret = task_work_add(tsk, cb, notify);
        if (!ret)
@@ -4174,7 +4182,7 @@ static int __io_async_wake(struct io_kiocb *req, struct 
io_poll_iocb *poll,
         * of executing it. We can't safely execute it anyway, as we may not
         * have the needed state needed for it anyway.
         */
-       ret = io_req_task_work_add(req, &req->task_work, TWA_SIGNAL);
+       ret = io_req_task_work_add(req, &req->task_work);
        if (unlikely(ret)) {
                WRITE_ONCE(poll->canceled, true);
                tsk = io_wq_get_task(req->ctx->io_wq);
@@ -6279,7 +6287,14 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int 
min_events,
                if (current->task_works)
                        task_work_run();
                if (signal_pending(current)) {
-                       ret = -ERESTARTSYS;
+                       if (current->jobctl & JOBCTL_TASK_WORK) {
+                               spin_lock_irq(&current->sighand->siglock);
+                               current->jobctl &= ~JOBCTL_TASK_WORK;
+                               recalc_sigpending();
+                               spin_unlock_irq(&current->sighand->siglock);
+                               continue;
+                       }
+                       ret = -EINTR;
                        break;
                }
                if (io_should_wake(&iowq, false))
@@ -6288,7 +6303,7 @@ static int io_cqring_wait(struct io_ring_ctx *ctx, int 
min_events,
        } while (1);
        finish_wait(&ctx->wait, &iowq.wq);
 
-       restore_saved_sigmask_unless(ret == -ERESTARTSYS);
+       restore_saved_sigmask_unless(ret == -EINTR);
 
        return READ_ONCE(rings->cq.head) == READ_ONCE(rings->cq.tail) ? ret : 0;
 }
-- 
2.25.1



Reply via email to