Mirrored writes are used by live block copy. Signed-off-by: Marcelo Tosatti <mtosa...@redhat.com> Signed-off-by: Federico Simoncelli <fsimo...@redhat.com> Signed-off-by: Paolo Bonzini <pbonz...@redhat.com> --- Makefile.objs | 2 +- block/blkmirror.c | 278 ++++++++++++++++++++++++++++++++++++++++++++++++++++ cutils.c | 56 +++++++++++ docs/blkmirror.txt | 15 +++ qemu-common.h | 2 + 5 files changed, 352 insertions(+), 1 deletions(-) create mode 100644 block/blkmirror.c create mode 100644 docs/blkmirror.txt
diff --git a/Makefile.objs b/Makefile.objs index 808de6a..2302c96 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -34,7 +34,7 @@ block-nested-y += raw.o cow.o qcow.o vdi.o vmdk.o cloop.o dmg.o bochs.o vpc.o vv block-nested-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-cache.o block-nested-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-nested-y += qed-check.o -block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o +block-nested-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o blkmirror.o block-nested-y += stream.o block-nested-$(CONFIG_WIN32) += raw-win32.o block-nested-$(CONFIG_POSIX) += raw-posix.o diff --git a/block/blkmirror.c b/block/blkmirror.c new file mode 100644 index 0000000..d894ca8 --- /dev/null +++ b/block/blkmirror.c @@ -0,0 +1,278 @@ +/* + * Block driver for mirrored writes. + * + * Copyright (C) 2011-2012 Red Hat, Inc. + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include <stdarg.h> +#include "block_int.h" + +typedef struct { + BlockDriverState *bs[2]; +} BdrvMirrorState; + +typedef struct DupAIOCB DupAIOCB; + +typedef struct SingleAIOCB { + BlockDriverAIOCB *aiocb; + int finished; + DupAIOCB *parent; +} SingleAIOCB; + +struct DupAIOCB { + BlockDriverAIOCB common; + int count; + + BlockDriverCompletionFunc *cb; + SingleAIOCB aios[2]; + int ret; +}; + +/* Valid blkmirror filenames look like + * blkmirror:fmt1:path/to/image1:fmt2:path/to/image2 */ +static int blkmirror_open(BlockDriverState *bs, const char *filename, int flags) +{ + int ret = 0, i; + char *tmpbuf, *tok[4], *next; + BlockDriver *drv1, *drv2; + BdrvMirrorState *m = bs->opaque; + BlockDriverState *bk; + + m->bs[0] = bdrv_new(""); + if (m->bs[0] == NULL) { + return -ENOMEM; + } + + m->bs[1] = bdrv_new(""); + if (m->bs[1] == NULL) { + bdrv_delete(m->bs[0]); + return -ENOMEM; + } + + tmpbuf = g_malloc(strlen(filename) + 1); + pstrcpy(tmpbuf, strlen(filename) + 1, filename); + + /* Parse the blkmirror: prefix */ + if (!strstart(tmpbuf, "blkmirror:", (const char **) &next)) { + next = tmpbuf; + } + + for (i = 0; i < 4; i++) { + if (!next) { + ret = -EINVAL; + goto out; + } + tok[i] = estrtok_r(NULL, ":", '\\', &next); + } + + drv1 = bdrv_find_whitelisted_format(tok[0]); + drv2 = bdrv_find_whitelisted_format(tok[2]); + if (!drv1 || !drv2) { + ret = -EINVAL; + goto out; + } + + ret = bdrv_open(m->bs[0], tok[1], flags, drv1); + if (ret < 0) { + goto out; + } + + /* If we crash, we cannot assume that the destination is a + * valid mirror and we have to start over. So speed up things + * by effectively operating on the destination in cache=unsafe + * mode. + */ + ret = bdrv_open(m->bs[1], tok[3], flags | BDRV_O_NO_BACKING + | BDRV_O_NO_FLUSH | BDRV_O_CACHE_WB, drv2); + if (ret < 0) { + goto out; + } + + if (m->bs[0]->backing_hd) { + bk = m->bs[0]->backing_hd; + + m->bs[1]->backing_hd = bdrv_new(""); + if (!m->bs[1]->backing_hd) { + ret = -ENOMEM; + goto out; + } + + /* opening the same backing file of the source */ + ret = bdrv_open(m->bs[1]->backing_hd, + bk->filename, bk->open_flags, bk->drv); + if (ret < 0) { + goto out; + } + } + + out: + g_free(tmpbuf); + + if (ret < 0) { + for (i = 0; i < 2; i++) { + bdrv_delete(m->bs[i]); + m->bs[i] = NULL; + } + } + + return ret; +} + +static void blkmirror_close(BlockDriverState *bs) +{ + BdrvMirrorState *m = bs->opaque; + int i; + + for (i = 0; i < 2; i++) { + bdrv_delete(m->bs[i]); + m->bs[i] = NULL; + } +} + +static coroutine_fn int blkmirror_co_flush(BlockDriverState *bs) +{ + BdrvMirrorState *m = bs->opaque; + int ret; + + ret = bdrv_co_flush(m->bs[0]); + if (ret < 0) { + return ret; + } + + return bdrv_co_flush(m->bs[1]); +} + +static int64_t blkmirror_getlength(BlockDriverState *bs) +{ + BdrvMirrorState *m = bs->opaque; + + return bdrv_getlength(m->bs[0]); +} + +static BlockDriverAIOCB *blkmirror_aio_readv(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + BdrvMirrorState *m = bs->opaque; + return bdrv_aio_readv(m->bs[0], sector_num, qiov, nb_sectors, cb, opaque); +} + +static void dup_aio_cancel(BlockDriverAIOCB *blockacb) +{ + DupAIOCB *acb = container_of(blockacb, DupAIOCB, common); + int i; + + for (i = 0 ; i < 2; i++) { + if (!acb->aios[i].finished) { + bdrv_aio_cancel(acb->aios[i].aiocb); + } + } + qemu_aio_release(acb); +} + +static AIOPool dup_aio_pool = { + .aiocb_size = sizeof(DupAIOCB), + .cancel = dup_aio_cancel, +}; + +static void blkmirror_aio_cb(void *opaque, int ret) +{ + SingleAIOCB *scb = opaque; + DupAIOCB *dcb = scb->parent; + + scb->finished = 1; + dcb->count--; + assert(dcb->count >= 0); + if (ret < 0) { + dcb->ret = ret; + } + if (dcb->count == 0) { + dcb->common.cb(dcb->common.opaque, dcb->ret); + qemu_aio_release(dcb); + } +} + +static DupAIOCB *dup_aio_get(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + DupAIOCB *dcb; + int i; + + dcb = qemu_aio_get(&dup_aio_pool, bs, cb, opaque); + if (!dcb) { + return NULL; + } + dcb->count = 2; + for (i = 0; i < 2; i++) { + dcb->aios[i].parent = dcb; + dcb->aios[i].finished = 0; + } + dcb->ret = 0; + + return dcb; +} + +static BlockDriverAIOCB *blkmirror_aio_writev(BlockDriverState *bs, + int64_t sector_num, + QEMUIOVector *qiov, + int nb_sectors, + BlockDriverCompletionFunc *cb, + void *opaque) +{ + BdrvMirrorState *m = bs->opaque; + DupAIOCB *dcb = dup_aio_get(bs, cb, opaque); + int i; + + for (i = 0; i < 2; i++) { + dcb->aios[i].aiocb = bdrv_aio_writev(m->bs[i], sector_num, qiov, + nb_sectors, &blkmirror_aio_cb, + &dcb->aios[i]); + } + + return &dcb->common; +} + +static coroutine_fn int blkmirror_co_discard(BlockDriverState *bs, + int64_t sector_num, int nb_sectors) +{ + BdrvMirrorState *m = bs->opaque; + int ret; + + ret = bdrv_co_discard(m->bs[0], sector_num, nb_sectors); + if (ret < 0) { + return ret; + } + + return bdrv_co_discard(m->bs[1], sector_num, nb_sectors); +} + + +static BlockDriver bdrv_blkmirror = { + .format_name = "blkmirror", + .protocol_name = "blkmirror", + .instance_size = sizeof(BdrvMirrorState), + + .bdrv_getlength = blkmirror_getlength, + + .bdrv_file_open = blkmirror_open, + .bdrv_close = blkmirror_close, + .bdrv_co_flush_to_disk = blkmirror_co_flush, + .bdrv_co_discard = blkmirror_co_discard, + + .bdrv_aio_readv = blkmirror_aio_readv, + .bdrv_aio_writev = blkmirror_aio_writev, +}; + +static void bdrv_blkmirror_init(void) +{ + bdrv_register(&bdrv_blkmirror); +} + +block_init(bdrv_blkmirror_init); diff --git a/cutils.c b/cutils.c index af308cd..ae8ddfb 100644 --- a/cutils.c +++ b/cutils.c @@ -54,6 +54,62 @@ char *pstrcat(char *buf, int buf_size, const char *s) return buf; } +/* strtok_r with escaping support */ +char *estrtok_r(char *str, const char *delim, char esc, char **p) +{ + int n = 0, escape = 0; + + if (str == NULL) { + str = *p; + } + + for (*p = str; **p != '\0'; (*p)++) { + if (!escape && strchr(delim, **p)) { + str[n] = '\0', (*p)++; + return str; + } + + if (!escape && **p == esc) { + escape = 1; + } else { + escape = 0; + } + + if (!escape) { + str[n++] = **p; + } + } + + str[n] = '\0', *p = NULL; + return str; +} + +/* strdup with escaping support */ +char *estrdup(const char *str, const char *delim, char esc) +{ + int i, j; + const char *p; + char *ret; + + for (p = str, j = 0, i = 0; *p != '\0'; p++, i++) { + if (strchr(delim, *p) || *p == esc) { + j++; + } + } + + ret = g_malloc(i + (j * 2) + 1); + + for (p = str, i = 0; *p != '\0'; p++, i++) { + if (strchr(delim, *p) || *p == esc) { + ret[i++] = esc; + } + ret[i] = *p; + } + + ret[i] = '\0'; + return ret; +} + int strstart(const char *str, const char *val, const char **ptr) { const char *p, *q; diff --git a/docs/blkmirror.txt b/docs/blkmirror.txt new file mode 100644 index 0000000..c9967eb --- /dev/null +++ b/docs/blkmirror.txt @@ -0,0 +1,15 @@ +Block mirror driver +------------------- + +This driver will mirror writes to two distinct images. +It's used internally by live block copy. + +Format +------ + +blkmirror:fmt1:/image1.img:fmt2:/image2.img + +Backslash '\' can be used to escape colon processing as +a separator. Backslashes themselves also can be escaped +as '\\'. + diff --git a/qemu-common.h b/qemu-common.h index c5e9cad..af9621f 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -122,6 +122,8 @@ int qemu_timedate_diff(struct tm *tm); /* cutils.c */ void pstrcpy(char *buf, int buf_size, const char *str); char *pstrcat(char *buf, int buf_size, const char *s); +char *estrtok_r(char *str, const char *delim, char esc, char **p); +char *estrdup(const char *str, const char *delim, char esc); int strstart(const char *str, const char *val, const char **ptr); int stristart(const char *str, const char *val, const char **ptr); int qemu_strnlen(const char *s, int max_len); -- 1.7.1