From: Evgeny Yakovlev <eyakov...@virtuozzo.com>
Some guests (win2008 server for example) do a lot of unnecessary
flushing when underlying media has not changed. This adds additional
overhead on host when calling fsync/fdatasync.
This change introduces a write generation scheme in BlockDriverState.
Current write generation is checked against last flushed generation to
avoid unnessesary flushes.
The problem with excessive flushing was found by a performance test
which does parallel directory tree creation (from 2 processes).
Results improved from 0.424 loops/sec to 0.432 loops/sec.
Each loop creates 10^3 directories with 10 files in each.
Signed-off-by: Evgeny Yakovlev <eyakov...@virtuozzo.com>
Signed-off-by: Denis V. Lunev <d...@openvz.org>
CC: Kevin Wolf <kw...@redhat.com>
CC: Max Reitz <mre...@redhat.com>
CC: Stefan Hajnoczi <stefa...@redhat.com>
CC: Fam Zheng <f...@redhat.com>
CC: John Snow <js...@redhat.com>
---
block.c | 3 +++
block/io.c | 18 ++++++++++++++++++
include/block/block_int.h | 5 +++++
3 files changed, 26 insertions(+)
diff --git a/block.c b/block.c
index f4648e9..366fad6 100644
--- a/block.c
+++ b/block.c
@@ -234,6 +234,8 @@ BlockDriverState *bdrv_new(void)
bs->refcnt = 1;
bs->aio_context = qemu_get_aio_context();
+ qemu_co_queue_init(&bs->flush_queue);
+
QTAILQ_INSERT_TAIL(&all_bdrv_states, bs, bs_list);
return bs;
@@ -2582,6 +2584,7 @@ int bdrv_truncate(BlockDriverState *bs, int64_t offset)
ret = refresh_total_sectors(bs, offset >> BDRV_SECTOR_BITS);
bdrv_dirty_bitmap_truncate(bs);
bdrv_parent_cb_resize(bs);
+ ++bs->write_gen;
}
return ret;
}
diff --git a/block/io.c b/block/io.c
index 7cf3645..a5451b6 100644
--- a/block/io.c
+++ b/block/io.c
@@ -1294,6 +1294,7 @@ static int coroutine_fn
bdrv_aligned_pwritev(BlockDriverState *bs,
}
bdrv_debug_event(bs, BLKDBG_PWRITEV_DONE);
+ ++bs->write_gen;
bdrv_set_dirty(bs, start_sector, end_sector - start_sector);
if (bs->wr_highest_offset < offset + bytes) {
@@ -2211,6 +2212,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{
int ret;
BdrvTrackedRequest req;
+ int current_gen = bs->write_gen;
if (!bs || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs) ||
bdrv_is_sg(bs)) {
@@ -2219,6 +2221,12 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
tracked_request_begin(&req, bs, 0, 0, BDRV_TRACKED_FLUSH);
+ /* Wait until any previous flushes are completed */
+ while (bs->flush_started_gen != bs->flushed_gen) {
+ qemu_co_queue_wait(&bs->flush_queue);
+ }
+ bs->flush_started_gen = current_gen;
+
/* Write back all layers by calling one driver function */
if (bs->drv->bdrv_co_flush) {
ret = bs->drv->bdrv_co_flush(bs);
@@ -2239,6 +2247,11 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
goto flush_parent;
}
+ /* Check if we really need to flush anything */
+ if (bs->flushed_gen == current_gen) {
+ goto flush_parent;
+ }
+
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK);
if (bs->drv->bdrv_co_flush_to_disk) {
ret = bs->drv->bdrv_co_flush_to_disk(bs);
@@ -2279,6 +2292,10 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
flush_parent:
ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0;
out:
+ /* Notify any pending flushes that we have completed */
+ bs->flushed_gen = current_gen;
+ qemu_co_queue_restart_all(&bs->flush_queue);
+
tracked_request_end(&req);
return ret;
}
@@ -2402,6 +2419,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs,
int64_t sector_num,
}
ret = 0;
out:
+ ++bs->write_gen;
bdrv_set_dirty(bs, req.offset >> BDRV_SECTOR_BITS,
req.bytes >> BDRV_SECTOR_BITS);
tracked_request_end(&req);
diff --git a/include/block/block_int.h b/include/block/block_int.h
index 2057156..8543daf 100644
--- a/include/block/block_int.h
+++ b/include/block/block_int.h
@@ -420,6 +420,11 @@ struct BlockDriverState {
note this is a reference count */
bool probed;
+ CoQueue flush_queue; /* Serializing flush queue */
+ unsigned int write_gen; /* Current data generation */
+ unsigned int flush_started_gen; /* Generation for which flush has started
*/
+ unsigned int flushed_gen; /* Flushed write generation */
+
BlockDriver *drv; /* NULL means no media */
void *opaque;