Hi Paolo,

new patch attached. Desciption is still wrong.


> I think this is all unneeded.  Just store rcb->ret into
> rcb->acb->status, and your version of qemu_rbd_aio_cancel should just
> work.
>
> Also, I think the acb->cancelled field is not necessary anymore after
> these changes.


1.) It removes cancelled
2.) It adds status variable
3.) aio cancel now just waits for io completetion

This should fix the write race you mentioned. But it still does not help with discard the kernel starts to cancel as the reply takes too long. See:

[   49.183366] sd 2:0:0:1: [sdb]
[   49.183366] Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
[   49.183366] sd 2:0:0:1: [sdb]
[   49.183366] Sense Key : Aborted Command [current]
[   49.183366] sd 2:0:0:1: [sdb]
[   49.183366] Add. Sense: I/O process terminated
[   49.183366] sd 2:0:0:1: [sdb] CDB:
[ 49.183366] Write same(16): 93 08 00 00 00 00 03 ff ff f8 00 7f ff ff 00 00
[   49.183366] end_request: I/O error, dev sdb, sector 67108856
[   49.183366] sd 2:0:0:1: [sdb]
[   49.183366] Result: hostbyte=DID_OK driverbyte=DRIVER_SENSE
[   49.183366] sd 2:0:0:1: [sdb]
[   49.183366] Sense Key : Aborted Command [current]
[   49.183366] sd 2:0:0:1: [sdb]
[   49.183366] Add. Sense: I/O process terminated
[   49.183366] sd 2:0:0:1: [sdb] CDB:
[ 49.183366] Write same(16): 93 08 00 00 00 00 04 7f ff f7 00 62 00 09 00 00
[   49.183366] end_request: I/O error, dev sdb, sector 75497463

Greets,
Stefan

>From d65f2c2ba8c81842992953dd772355898e702968 Mon Sep 17 00:00:00 2001
From: Stefan Priebe <s.pri...@profhost.ag>
Date: Mon, 19 Nov 2012 15:54:05 +0100
Subject: [PATCH] fix cancel rbd race


Signed-off-by: Stefan Priebe <s.pri...@profhost.ag>
---
 block/rbd.c |   19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/block/rbd.c b/block/rbd.c
index 5a0f79f..7b3bcbb 100644
--- a/block/rbd.c
+++ b/block/rbd.c
@@ -76,7 +76,7 @@ typedef struct RBDAIOCB {
     int64_t sector_num;
     int error;
     struct BDRVRBDState *s;
-    int cancelled;
+    int status;
 } RBDAIOCB;
 
 typedef struct RADOSCB {
@@ -376,9 +376,7 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
     RBDAIOCB *acb = rcb->acb;
     int64_t r;
 
-    if (acb->cancelled) {
-        qemu_vfree(acb->bounce);
-        qemu_aio_release(acb);
+    if (acb->bh) {
         goto done;
     }
 
@@ -406,9 +404,12 @@ static void qemu_rbd_complete_aio(RADOSCB *rcb)
             acb->ret = r;
         }
     }
+    acb->status = acb->ret;
+
     /* Note that acb->bh can be NULL in case where the aio was cancelled */
     acb->bh = qemu_bh_new(rbd_aio_bh_cb, acb);
     qemu_bh_schedule(acb->bh);
+
 done:
     g_free(rcb);
 }
@@ -573,7 +574,10 @@ static void qemu_rbd_close(BlockDriverState *bs)
 static void qemu_rbd_aio_cancel(BlockDriverAIOCB *blockacb)
 {
     RBDAIOCB *acb = (RBDAIOCB *) blockacb;
-    acb->cancelled = 1;
+
+    while (acb->status == -EINPROGRESS) {
+        qemu_aio_wait();
+    }
 }
 
 static AIOPool rbd_aio_pool = {
@@ -642,10 +646,11 @@ static void rbd_aio_bh_cb(void *opaque)
         qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
     }
     qemu_vfree(acb->bounce);
-    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
     qemu_bh_delete(acb->bh);
     acb->bh = NULL;
 
+    acb->common.cb(acb->common.opaque, (acb->ret > 0 ? 0 : acb->ret));
+
     qemu_aio_release(acb);
 }
 
@@ -689,8 +694,8 @@ static BlockDriverAIOCB *rbd_start_aio(BlockDriverState *bs,
     acb->ret = 0;
     acb->error = 0;
     acb->s = s;
-    acb->cancelled = 0;
     acb->bh = NULL;
+    acb->status = -EINPROGRESS;
 
     if (cmd == RBD_AIO_WRITE) {
         qemu_iovec_to_buf(acb->qiov, 0, acb->bounce, qiov->size);
-- 
1.7.10.4

Reply via email to