On 08.07.2016 21:44, John Snow wrote:

On 07/04/2016 10:38 AM, Denis V. Lunev wrote:
From: Evgeny Yakovlev <eyakov...@virtuozzo.com>

Some guests (win2008 server for example) do a lot of unnecessary
flushing when underlying media has not changed. This adds additional
overhead on host when calling fsync/fdatasync.

This change introduces a write generation scheme in BlockDriverState.
Current write generation is checked against last flushed generation to
avoid unnessesary flushes.

The problem with excessive flushing was found by a performance test
which does parallel directory tree creation (from 2 processes).
Results improved from 0.424 loops/sec to 0.432 loops/sec.
Each loop creates 10^3 directories with 10 files in each.

Signed-off-by: Evgeny Yakovlev <eyakov...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
CC: Kevin Wolf <kw...@redhat.com>
CC: Max Reitz <mre...@redhat.com>
CC: Stefan Hajnoczi <stefa...@redhat.com>
CC: Fam Zheng <f...@redhat.com>
CC: John Snow <js...@redhat.com>
---
  block.c                   |  3 +++
  block/io.c                | 18 ++++++++++++++++++
  include/block/block_int.h |  5 +++++
  3 files changed, 26 insertions(+)

diff --git a/block.c b/block.c
index f4648e9..366fad6 100644
--- a/block.c
+++ b/block.c
@@ -234,6 +234,8 @@ BlockDriverState *bdrv_new(void)
      bs->refcnt = 1;
      bs->aio_context = qemu_get_aio_context();
+ qemu_co_queue_init(&bs->flush_queue);
+
      QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
return bs;
@@ -2582,6 +2584,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
          ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
          bdrv_dirty_bitmap_truncate(bs);
          bdrv_parent_cb_resize(bs);
+        ++bs->write_gen;
      }
      return ret;
  }
diff --git a/block/io.c b/block/io.c
index 7cf3645..a5451b6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1294,6 +1294,7 @@ static int coroutine_fn 
bdrv_aligned_pwritev(BlockDriverState *bs,
      }
      bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
+ ++bs->write_gen;
      bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
@@ -2211,6 +2212,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
  {
      int ret;
      BdrvTrackedRequest req;
+    int current_gen = bs->write_gen;
if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
          bdrv_is_sg(bs)) {
@@ -2219,6 +2221,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH); + /* Wait until any previous flushes are completed */
+    while (bs->flush_started_gen != bs->flushed_gen) {
+        qemu_co_queue_wait(&bs->flush_queue);
+    }
+    bs->flush_started_gen = current_gen;
+
      /* Write back all layers by calling one driver function */
      if (bs->drv->bdrv_co_flush) {
          ret = bs->drv->bdrv_co_flush(bs);
@@ -2239,6 +2247,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
          goto flush_parent;
      }
+ /* Check if we really need to flush anything */
+    if (bs->flushed_gen == current_gen) {
+        goto flush_parent;
+    }
+
      BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
      if (bs->drv->bdrv_co_flush_to_disk) {
          ret = bs->drv->bdrv_co_flush_to_disk(bs);
@@ -2279,6 +2292,10 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
  flush_parent:
      ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
  out:
+    /* Notify any pending flushes that we have completed */
+    bs->flushed_gen = current_gen;
+    qemu_co_queue_restart_all(&bs->flush_queue);
+
      tracked_request_end(&req);
      return ret;
  }
@@ -2402,6 +2419,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, 
int64_t sector_num,
      }
      ret = 0;
  out:
+    ++bs->write_gen;
      bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
                     req.bytes >> BDRV_SECTOR_BITS);
      tracked_request_end(&req);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2057156..8543daf 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -420,6 +420,11 @@ struct BlockDriverState {
                           note this is a reference count */
      bool probed;
+ CoQueue flush_queue; /* Serializing flush queue */
+    unsigned int write_gen;         /* Current data generation */
+    unsigned int flush_started_gen; /* Generation for which flush has started 
*/
+    unsigned int flushed_gen;       /* Flushed write generation */
+
      BlockDriver *drv; /* NULL means no media */
      void *opaque;
Breaks qcow2 iotests 026 089 141 144

Sorry, didn't knew those tests existed, only ran make check previously.
Looking at 026, looks like it is the same problem as in IDE and AHCI. Test case injects blkdebug write errors which should be triggered by flushes and expects to see them in output. However those flushes are now skipped and no events are generated. Otherwise resulting image looks consistent, all data was flushed. Expect the same problem to be with other tests, but maybe test case is incorrect now?


--js


Reply via email to