On 29.10.25 17:57, Kevin Wolf wrote:
Am 28.10.2025 um 17:33 hat Hanna Czenczek geschrieben:
If we wake a coroutine from a different context, we must ensure that it
will yield exactly once (now or later), awaiting that wake.

curl’s current .ret == -EINPROGRESS loop may lead to the coroutine not
yielding if the request finishes before the loop gets run.  To fix it,
drop the loop and just yield exactly once, unless the request is served
from the cache or failed before it was submitted – that last part makes
it a bit complicated, as the result of curl_find_buf() now needs to be a
tristate.

(Can be reproduced with multiqueue by adding a usleep(100000) before the
`while (acb.ret == -EINPROGRESS)` loop.)

Also, add a comment why aio_co_wake() is safe regardless of whether the
coroutine and curl_multi_check_completion() run in the same context.

Cc: [email protected]
Signed-off-by: Hanna Czenczek <[email protected]>
---
  block/curl.c | 55 +++++++++++++++++++++++++++++++++++-----------------
  1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/block/curl.c b/block/curl.c
index 68cf83ce55..65996a8866 100644
--- a/block/curl.c
+++ b/block/curl.c
@@ -124,6 +124,16 @@ typedef struct BDRVCURLState {
      char *proxypassword;
  } BDRVCURLState;
+/** Possible result states of curl_find_buf() */
+typedef enum {
+    /* No buffer found, need to create new request */
+    CURL_NO_BUF_FOUND,
+    /* Buffer found, request filled and done */
+    CURL_REQUEST_FILLED,
+    /* Ongoing request found, need to yield */
+    CURL_REQUEST_ONGOING,
+} CURLFindBufResult;
+
  static void curl_clean_state(CURLState *s);
  static void curl_multi_do(void *arg);
@@ -258,8 +268,8 @@ read_end:
  }
/* Called with s->mutex held. */
-static bool curl_find_buf(BDRVCURLState *s, uint64_t start, uint64_t len,
-                          CURLAIOCB *acb)
+static CURLFindBufResult curl_find_buf(BDRVCURLState *s, uint64_t start,
+                                       uint64_t len, CURLAIOCB *acb)
  {
      int i;
      uint64_t end = start + len;
@@ -289,7 +299,7 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t start, 
uint64_t len,
                  qemu_iovec_memset(acb->qiov, clamped_len, 0, len - 
clamped_len);
              }
              acb->ret = 0;
-            return true;
+            return CURL_REQUEST_FILLED;
          }
// Wait for unfinished chunks
@@ -307,13 +317,13 @@ static bool curl_find_buf(BDRVCURLState *s, uint64_t 
start, uint64_t len,
              for (j=0; j<CURL_NUM_ACB; j++) {
                  if (!state->acb[j]) {
                      state->acb[j] = acb;
-                    return true;
+                    return CURL_REQUEST_ONGOING;
                  }
              }
          }
      }
- return false;
+    return CURL_NO_BUF_FOUND;
  }
/* Called with s->mutex held. */
@@ -378,6 +388,16 @@ static void curl_multi_check_completion(BDRVCURLState *s)
                  acb->ret = error ? -EIO : 0;
                  state->acb[i] = NULL;
                  qemu_mutex_unlock(&s->mutex);
+                /*
+                 * Current AioContext is the BDS context, which may or may not
+                 * be the request (coroutine) context.
+                 * - If it is, the coroutine must have yielded or the FD 
handler
+                 *   (curl_multi_do()/curl_multi_timeout_do()) could not have
+                 *   been called and we would not be here
+                 * - If it is not, it doesn't matter whether it has already
+                 *   yielded or not; it will be scheduled once it does yield
+                 * So aio_co_wake() is safe to call.
+                 */
                  aio_co_wake(acb->co);
                  qemu_mutex_lock(&s->mutex);
              }
@@ -868,7 +888,8 @@ out_noclean:
      return -EINVAL;
  }
-static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
+/* Return whether a request was submitted that requires yielding */
+static bool coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB 
*acb)
  {
      CURLState *state;
      int running;
@@ -877,13 +898,15 @@ static void coroutine_fn 
curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
uint64_t start = acb->offset;
      uint64_t end;
+    CURLFindBufResult find_buf_res;
- qemu_mutex_lock(&s->mutex);
+    QEMU_LOCK_GUARD(&s->mutex);
// In case we have the requested data already (e.g. read-ahead),
      // we can just call the callback and be done.
-    if (curl_find_buf(s, start, acb->bytes, acb)) {
-        goto out;
+    find_buf_res = curl_find_buf(s, start, acb->bytes, acb);
+    if (find_buf_res != CURL_NO_BUF_FOUND) {
+        return find_buf_res == CURL_REQUEST_ONGOING;
      }
// No cache found, so let's start a new request
@@ -898,7 +921,7 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState 
*bs, CURLAIOCB *acb)
      if (curl_init_state(s, state) < 0) {
          curl_clean_state(state);
          acb->ret = -EIO;
-        goto out;
+        return false;
      }
acb->start = 0;
@@ -913,7 +936,7 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState 
*bs, CURLAIOCB *acb)
      if (state->buf_len && state->orig_buf == NULL) {
          curl_clean_state(state);
          acb->ret = -ENOMEM;
-        goto out;
+        return false;
      }
      state->acb[0] = acb;
@@ -925,14 +948,12 @@ static void coroutine_fn curl_setup_preadv(BlockDriverState *bs, CURLAIOCB *acb)
          acb->ret = -EIO;
curl_clean_state(state);
-        goto out;
+        return false;
      }
/* Tell curl it needs to kick things off */
      curl_multi_socket_action(s->multi, CURL_SOCKET_TIMEOUT, 0, &running);
-
-out:
-    qemu_mutex_unlock(&s->mutex);
+    return true;
  }
static int coroutine_fn curl_co_preadv(BlockDriverState *bs,
@@ -941,14 +962,12 @@ static int coroutine_fn curl_co_preadv(BlockDriverState 
*bs,
  {
      CURLAIOCB acb = {
          .co = qemu_coroutine_self(),
-        .ret = -EINPROGRESS,
          .qiov = qiov,
          .offset = offset,
          .bytes = bytes
      };
Let's leave -EINPROGRESS here even if no other code checks for this
value any more. It can be helpful for debugging when you can distinguish
"completed successfully" from "still running".

Does that mean you want me to keep the `complete` field on rbd and nfs, too?

- curl_setup_preadv(bs, &acb);
-    while (acb.ret == -EINPROGRESS) {
+    if (curl_setup_preadv(bs, &acb)) {
          qemu_coroutine_yield();
      }
      return acb.ret;
That whole pattern of returning true and false or even a new enum
everywhere to tell if we are waiting for something felt strange to me.
Took me a while, but I think now I know what I expected instead: Why
don't these places just yield immediately instead of requiring the outer
layer to understand what happened in the functions it called?

I was considering the same.  My result was, if they yielded immediately, we might as well fully inline curl_setup_preadv() into this function.  I didn’t want to do that at the time, but if you prefer, no problem.

Hanna


Reply via email to