Re: [PATCH 6/6] block/io: improve loadvm performance

2020-07-10 Thread Vladimir Sementsov-Ogievskiy

09.07.2020 16:26, Denis V. Lunev wrote:

This patch creates intermediate buffer for reading from block driver
state and performs read-ahead to this buffer. Snapshot code performs
reads sequentially and thus we know what offsets will be required
and when they will become not needed.

Results are fantastic. Switch to snapshot times of 2GB Fedora 31 VM
over NVME storage are the following:
 original fixed
cached:  1.84s   1.16s
non-cached: 12.74s   1.27s

The difference over HDD would be more significant:)

Signed-off-by: Denis V. Lunev
CC: Vladimir Sementsov-Ogievskiy
CC: Kevin Wolf
CC: Max Reitz
CC: Stefan Hajnoczi
CC: Fam Zheng
CC: Juan Quintela
CC: Denis Plotnikov


Reviewed-by: Vladimir Sementsov-Ogievskiy 

--
Best regards,
Vladimir



[PATCH 6/6] block/io: improve loadvm performance

2020-07-09 Thread Denis V. Lunev
This patch creates intermediate buffer for reading from block driver
state and performs read-ahead to this buffer. Snapshot code performs
reads sequentially and thus we know what offsets will be required
and when they will become not needed.

Results are fantastic. Switch to snapshot times of 2GB Fedora 31 VM
over NVME storage are the following:
original fixed
cached:  1.84s   1.16s
non-cached: 12.74s   1.27s

The difference over HDD would be more significant :)

Signed-off-by: Denis V. Lunev 
CC: Vladimir Sementsov-Ogievskiy 
CC: Kevin Wolf 
CC: Max Reitz 
CC: Stefan Hajnoczi 
CC: Fam Zheng 
CC: Juan Quintela 
CC: Denis Plotnikov 
---
 block/block-backend.c |  12 +-
 block/io.c| 239 +-
 include/block/block_int.h |   3 +
 3 files changed, 250 insertions(+), 4 deletions(-)

diff --git a/block/block-backend.c b/block/block-backend.c
index 5bb11c8e01..09773b3e37 100644
--- a/block/block-backend.c
+++ b/block/block-backend.c
@@ -2201,11 +2201,21 @@ int blk_save_vmstate(BlockBackend *blk, const uint8_t 
*buf,
 
 int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size)
 {
+int ret, ret2;
+
 if (!blk_is_available(blk)) {
 return -ENOMEDIUM;
 }
 
-return bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
+ret = bdrv_load_vmstate(blk_bs(blk), buf, pos, size);
+ret2 = bdrv_finalize_vmstate(blk_bs(blk));
+if (ret < 0) {
+return ret;
+}
+if (ret2 < 0) {
+return ret2;
+}
+return ret;
 }
 
 int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz)
diff --git a/block/io.c b/block/io.c
index 061d3239b9..510122fbc4 100644
--- a/block/io.c
+++ b/block/io.c
@@ -2739,6 +2739,194 @@ static int bdrv_co_do_save_vmstate(BlockDriverState 
*bs, QEMUIOVector *qiov,
 }
 }
 
+
+typedef struct BdrvLoadVMChunk {
+void *buf;
+uint64_t offset;
+ssize_t bytes;
+
+QLIST_ENTRY(BdrvLoadVMChunk) list;
+} BdrvLoadVMChunk;
+
+typedef struct BdrvLoadVMState {
+AioTaskPool *pool;
+
+int64_t offset;
+int64_t last_loaded;
+
+int chunk_count;
+QLIST_HEAD(, BdrvLoadVMChunk) chunks;
+QLIST_HEAD(, BdrvLoadVMChunk) loading;
+CoQueue waiters;
+} BdrvLoadVMState;
+
+typedef struct BdrvLoadVMStateTask {
+AioTask task;
+
+BlockDriverState *bs;
+BdrvLoadVMChunk *chunk;
+} BdrvLoadVMStateTask;
+
+static BdrvLoadVMChunk *bdrv_co_find_loadvmstate_chunk(int64_t pos,
+   BdrvLoadVMChunk *c)
+{
+for (; c != NULL; c = QLIST_NEXT(c, list)) {
+if (c->offset <= pos && c->offset + c->bytes > pos) {
+return c;
+}
+}
+
+return NULL;
+}
+
+static void bdrv_free_loadvm_chunk(BdrvLoadVMChunk *c)
+{
+qemu_vfree(c->buf);
+g_free(c);
+}
+
+static coroutine_fn int bdrv_co_vmstate_load_task_entry(AioTask *task)
+{
+int err = 0;
+BdrvLoadVMStateTask *t = container_of(task, BdrvLoadVMStateTask, task);
+BdrvLoadVMChunk *c = t->chunk;
+BdrvLoadVMState *state = t->bs->loadvm_state;
+QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, c->buf, c->bytes);
+
+bdrv_inc_in_flight(t->bs);
+err = t->bs->drv->bdrv_load_vmstate(t->bs, &qiov, c->offset);
+bdrv_dec_in_flight(t->bs);
+
+QLIST_REMOVE(c, list);
+if (err == 0) {
+QLIST_INSERT_HEAD(&state->chunks, c, list);
+} else {
+bdrv_free_loadvm_chunk(c);
+}
+qemu_co_queue_restart_all(&state->waiters);
+
+return err;
+}
+
+
+static void bdrv_co_loadvmstate_next(BlockDriverState *bs, BdrvLoadVMChunk *c)
+{
+BdrvLoadVMStateTask *t = g_new(BdrvLoadVMStateTask, 1);
+BdrvLoadVMState *state = bs->loadvm_state;
+
+c->offset = state->last_loaded;
+
+*t = (BdrvLoadVMStateTask) {
+.task.func = bdrv_co_vmstate_load_task_entry,
+.bs = bs,
+.chunk = c,
+};
+
+QLIST_INSERT_HEAD(&state->loading, t->chunk, list);
+state->chunk_count++;
+state->last_loaded += c->bytes;
+
+aio_task_pool_start_task(state->pool, &t->task);
+}
+
+
+static void bdrv_co_loadvmstate_start(BlockDriverState *bs)
+{
+int i;
+size_t buf_size = MAX(bdrv_get_cluster_size(bs), 1 * MiB);
+
+for (i = 0; i < BDRV_VMSTATE_WORKERS_MAX; i++) {
+BdrvLoadVMChunk *c = g_new0(BdrvLoadVMChunk, 1);
+
+c->buf = qemu_blockalign(bs, buf_size);
+c->bytes = buf_size;
+
+bdrv_co_loadvmstate_next(bs, c);
+}
+}
+
+static int bdrv_co_do_load_vmstate(BlockDriverState *bs, QEMUIOVector *qiov,
+   int64_t pos)
+{
+BdrvLoadVMState *state = bs->loadvm_state;
+BdrvLoadVMChunk *c;
+size_t off;
+int64_t start_pos = pos;
+
+if (state == NULL) {
+if (pos != 0) {
+goto slow_path;
+}
+
+state = g_new(BdrvLoadVMState, 1);
+*state = (BdrvLoadVMState) {
+.pool = aio_task_pool_new(BDRV_VMSTATE_WORKERS_MAX),
+