Re: [Qemu-block] [PATCH 06/16] block: add BDS field to count in-flight requests

2016-03-09 Thread Fam Zheng
On Wed, 03/09 09:22, Paolo Bonzini wrote:
> 
> 
> On 09/03/2016 09:00, Fam Zheng wrote:
> >> > On 09/03/2016 04:35, Fam Zheng wrote:
> > > >> >  enum BdrvTrackedRequestType {
> > > >> >  BDRV_TRACKED_READ,
> > > >> >  BDRV_TRACKED_WRITE,
> > > >> > -BDRV_TRACKED_FLUSH,
> > > >> > -BDRV_TRACKED_IOCTL,
> > > >> >  BDRV_TRACKED_DISCARD,
> >>> > > Okay, so flush and ioctl are not needed, but why is discard different?
> >> > 
> >> > Discard can modify the contents of the device, so I think it's safer to
> >> > serialize it against RMW and copy-on-read operations.
> > Okay, that makes sense, but ioctl like SG_IO can also modify content, no?
> 
> If you use SG_IO you shouldn't use RMW (because scsi_read_complete traps
> READ CAPACITY and sets the host block size as the guest block size) or
> copy-on-read (because raw has no backing file).
> 
> Besides, BDRV_TRACKED_IOCTL didn't include sector_num/nr_sectors
> operations so it didn't provide serialization.
> 

Yes, that's right. Thanks.

Fam



Re: [Qemu-block] [PATCH 06/16] block: add BDS field to count in-flight requests

2016-03-09 Thread Paolo Bonzini


On 09/03/2016 09:00, Fam Zheng wrote:
>> > On 09/03/2016 04:35, Fam Zheng wrote:
> > >> >  enum BdrvTrackedRequestType {
> > >> >  BDRV_TRACKED_READ,
> > >> >  BDRV_TRACKED_WRITE,
> > >> > -BDRV_TRACKED_FLUSH,
> > >> > -BDRV_TRACKED_IOCTL,
> > >> >  BDRV_TRACKED_DISCARD,
>>> > > Okay, so flush and ioctl are not needed, but why is discard different?
>> > 
>> > Discard can modify the contents of the device, so I think it's safer to
>> > serialize it against RMW and copy-on-read operations.
> Okay, that makes sense, but ioctl like SG_IO can also modify content, no?

If you use SG_IO you shouldn't use RMW (because scsi_read_complete traps
READ CAPACITY and sets the host block size as the guest block size) or
copy-on-read (because raw has no backing file).

Besides, BDRV_TRACKED_IOCTL didn't include sector_num/nr_sectors
operations so it didn't provide serialization.

Paolo



[Qemu-block] [PATCH 06/16] block: add BDS field to count in-flight requests

2016-02-16 Thread Paolo Bonzini
Unlike tracked_requests, this field also counts throttled requests,
and remains non-zero if an AIO operation needs a BH to be "really"
completed.

With this change, it is no longer necessary to have a dummy
BdrvTrackedRequest for requests that are never serialising, and
it is no longer necessary to poll the AioContext once after
bdrv_requests_pending(bs) returns false.

Signed-off-by: Paolo Bonzini 
---
 block/io.c| 71 +++
 block/mirror.c|  2 ++
 include/block/block_int.h |  8 --
 3 files changed, 54 insertions(+), 27 deletions(-)

diff --git a/block/io.c b/block/io.c
index d8d50b7..a9a23a6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -224,13 +224,7 @@ bool bdrv_requests_pending(BlockDriverState *bs)
 {
 BdrvChild *child;
 
-if (!QLIST_EMPTY(>tracked_requests)) {
-return true;
-}
-if (!qemu_co_queue_empty(>throttled_reqs[0])) {
-return true;
-}
-if (!qemu_co_queue_empty(>throttled_reqs[1])) {
+if (atomic_read(>in_flight)) {
 return true;
 }
 
@@ -268,15 +262,12 @@ static void bdrv_drain_recurse(BlockDriverState *bs)
  */
 void bdrv_drain(BlockDriverState *bs)
 {
-bool busy = true;
-
 bdrv_no_throttling_begin(bs);
 bdrv_io_unplugged_begin(bs);
 bdrv_drain_recurse(bs);
-while (busy) {
+while (bdrv_requests_pending(bs)) {
 /* Keep iterating */
- busy = bdrv_requests_pending(bs);
- busy |= aio_poll(bdrv_get_aio_context(bs), busy);
+ aio_poll(bdrv_get_aio_context(bs), true);
 }
 bdrv_io_unplugged_end(bs);
 bdrv_no_throttling_end(bs);
@@ -291,7 +282,7 @@ void bdrv_drain(BlockDriverState *bs)
 void bdrv_drain_all(void)
 {
 /* Always run first iteration so any pending completion BHs run */
-bool busy = true;
+bool waited = true;
 BlockDriverState *bs = NULL;
 GSList *aio_ctxs = NULL, *ctx;
 
@@ -318,8 +309,8 @@ void bdrv_drain_all(void)
  * request completion.  Therefore we must keep looping until there was no
  * more activity rather than simply draining each device independently.
  */
-while (busy) {
-busy = false;
+while (waited) {
+waited = false;
 
 for (ctx = aio_ctxs; ctx != NULL; ctx = ctx->next) {
 AioContext *aio_context = ctx->data;
@@ -329,12 +320,11 @@ void bdrv_drain_all(void)
 while ((bs = bdrv_next(bs))) {
 if (aio_context == bdrv_get_aio_context(bs)) {
 if (bdrv_requests_pending(bs)) {
-busy = true;
-aio_poll(aio_context, busy);
+aio_poll(aio_context, true);
+waited = true;
 }
 }
 }
-busy |= aio_poll(aio_context, false);
 aio_context_release(aio_context);
 }
 }
@@ -457,6 +447,16 @@ static bool tracked_request_overlaps(BdrvTrackedRequest 
*req,
 return true;
 }
 
+void bdrv_inc_in_flight(BlockDriverState *bs)
+{
+atomic_inc(>in_flight);
+}
+
+void bdrv_dec_in_flight(BlockDriverState *bs)
+{
+atomic_dec(>in_flight);
+}
+
 static bool coroutine_fn wait_serialising_requests(BdrvTrackedRequest *self)
 {
 BlockDriverState *bs = self->bs;
@@ -963,6 +963,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState 
*bs,
 return ret;
 }
 
+bdrv_inc_in_flight(bs);
+
 /* Don't do copy-on-read if we read data before write operation */
 if (bs->copy_on_read && !(flags & BDRV_REQ_NO_SERIALISING)) {
 flags |= BDRV_REQ_COPY_ON_READ;
@@ -1003,6 +1005,7 @@ static int coroutine_fn 
bdrv_co_do_preadv(BlockDriverState *bs,
   use_local_qiov ? _qiov : qiov,
   flags);
 tracked_request_end();
+bdrv_dec_in_flight(bs);
 
 if (use_local_qiov) {
 qemu_iovec_destroy(_qiov);
@@ -1310,6 +1313,8 @@ static int coroutine_fn 
bdrv_co_do_pwritev(BlockDriverState *bs,
 return ret;
 }
 
+bdrv_inc_in_flight(bs);
+
 /* throttling disk I/O */
 if (bs->throttle_state) {
 throttle_group_co_io_limits_intercept(bs, bytes, true);
@@ -1408,6 +1413,7 @@ fail:
 qemu_vfree(tail_buf);
 out:
 tracked_request_end();
+bdrv_dec_in_flight(bs);
 return ret;
 }
 
@@ -2065,6 +2071,7 @@ static const AIOCBInfo bdrv_em_aiocb_info = {
 static void bdrv_aio_bh_cb(void *opaque)
 {
 BlockAIOCBSync *acb = opaque;
+BlockDriverState *bs = acb->common.bs;
 
 if (!acb->is_write && acb->ret >= 0) {
 qemu_iovec_from_buf(acb->qiov, 0, acb->bounce, acb->qiov->size);
@@ -2072,6 +2079,7 @@ static void bdrv_aio_bh_cb(void *opaque)
 qemu_vfree(acb->bounce);
 acb->common.cb(acb->common.opaque, acb->ret);
 qemu_bh_delete(acb->bh);
+bdrv_dec_in_flight(bs);
 acb->bh = NULL;
 qemu_aio_unref(acb);
 }
@@ -2087,6 +2095,7 @@ static BlockAIOCB