Signed-off-by: Fam Zheng <f...@redhat.com>
---
 block/Makefile.objs |    1 +
 block/qbm.c         | 1315 +++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 1316 insertions(+)
 create mode 100644 block/qbm.c

diff --git a/block/Makefile.objs b/block/Makefile.objs
index cdd8655..1111ba7 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -5,6 +5,7 @@ block-obj-y += qed-check.o
 block-obj-$(CONFIG_VHDX) += vhdx.o vhdx-endian.o vhdx-log.o
 block-obj-y += quorum.o
 block-obj-y += parallels.o blkdebug.o blkverify.o
+block-obj-y += qbm.o
 block-obj-y += block-backend.o snapshot.o qapi.o
 block-obj-$(CONFIG_WIN32) += raw-win32.o win32-aio.o
 block-obj-$(CONFIG_POSIX) += raw-posix.o
diff --git a/block/qbm.c b/block/qbm.c
new file mode 100644
index 0000000..91e129f
--- /dev/null
+++ b/block/qbm.c
@@ -0,0 +1,1315 @@
+/*
+ * Block driver for the QBM format
+ *
+ * Copyright (c) 2016 Red Hat Inc.
+ *
+ * Authors:
+ *     Fam Zheng <f...@redhat.com>
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to 
deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 
FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+
+#include "qemu-common.h"
+#include "block/block_int.h"
+#include "qapi/qmp/qerror.h"
+#include "qemu/error-report.h"
+#include "qemu/module.h"
+#include "migration/migration.h"
+#include "qapi/qmp/qint.h"
+#include "qapi/qmp/qjson.h"
+
+#define QBM_BUF_SIZE_MAX (32 << 20)
+
+typedef enum QBMBitmapType {
+    QBM_TYPE_DIRTY,
+    QBM_TYPE_ALLOC,
+} QBMBitmapType;
+
+typedef struct QBMBitmap {
+    BdrvDirtyBitmap *bitmap;
+    BdrvChild *file;
+    char *name;
+    QBMBitmapType type;
+} QBMBitmap;
+
+typedef struct BDRVQBMState {
+    BdrvChild *image;
+    BdrvDirtyBitmap *alloc_bitmap;
+    QDict *desc;
+    QDict *backing_dict;
+    QBMBitmap *bitmaps;
+    int num_bitmaps;
+} BDRVQBMState;
+
+static const char *qbm_token_consume(const char *p, const char *token)
+{
+    size_t len = strlen(token);
+
+    if (!p) {
+        return NULL;
+    }
+    while (*p && (*p == ' ' ||
+                  *p == '\t' ||
+                  *p == '\n' ||
+                  *p == '\r')) {
+        p++;
+    }
+    if (strncmp(p, token, len)) {
+        return p + len;
+    }
+    return NULL;
+}
+
+static int qbm_probe(const uint8_t *buf, int buf_size, const char *filename)
+{
+    const char *p;
+    p = strstr((const char *)buf, "\"QBM\"");
+    if (!p) {
+        p = strstr((const char *)buf, "'QBM'");
+    }
+    if (!p) {
+        return 0;
+    }
+    p = qbm_token_consume(p, ":");
+    p = qbm_token_consume(p, "{");
+    if (p && *p) {
+        return 100;
+    }
+    return 0;
+}
+
+static void qbm_load_bitmap(BlockDriverState *bs, QBMBitmap *bm, Error **errp)
+{
+    int r;
+    BDRVQBMState *s = bs->opaque;
+    int64_t bitmap_file_size;
+    int64_t bitmap_size;
+    uint8_t *buf = NULL;
+    BlockDriverState *file = bm->file->bs;
+    int64_t image_size = bdrv_getlength(s->image->bs);
+
+    if (image_size < 0) {
+        error_setg(errp, "Cannot get image size: %s", s->image->bs->filename);
+        return;
+    }
+    bitmap_size = bdrv_dirty_bitmap_serialization_size(bm->bitmap, 0,
+                        bdrv_dirty_bitmap_size(bm->bitmap));
+    if (bitmap_size > QBM_BUF_SIZE_MAX) {
+        error_setg(errp, "Bitmap too big");
+        return;
+    }
+    bitmap_file_size = bdrv_getlength(file);
+    if (bitmap_file_size < bitmap_size) {
+        error_setg(errp,
+                   "Bitmap \"%s\" file too small "
+                   "(expecting at least %ld bytes but got %ld bytes): %s",
+                   bm->name, bitmap_size, bitmap_file_size, file->filename);
+        goto out;
+    }
+    buf = qemu_blockalign(file, bitmap_size);
+    r = bdrv_pread(file, 0, buf, bitmap_size);
+    if (r < 0) {
+        error_setg(errp, "Failed to read bitmap file \"%s\"",
+                   file->filename);
+        goto out;
+    }
+    bdrv_dirty_bitmap_deserialize_part(bm->bitmap, buf, 0, bs->total_sectors,
+                                       true);
+
+out:
+    g_free(buf);
+}
+
+static int qbm_reopen_prepare(BDRVReopenState *state,
+                              BlockReopenQueue *queue, Error **errp)
+{
+    return 0;
+}
+
+static void qbm_get_fullname(BlockDriverState *bs, char *dest, size_t sz,
+                             const char *filename)
+{
+    const char *base, *p;
+
+    base = bs->exact_filename[0] ? bs->exact_filename : bs->filename;
+
+    if (strstart(base, "json:", NULL)) {
+        /* There is not much we can do with a json: file name, try bs->file and
+         * cross our fingers. */
+        if (bs->file) {
+            qbm_get_fullname(bs->file->bs, dest, sz, filename);
+        } else {
+            pstrcpy(dest, sz, filename);
+        }
+        return;
+    }
+
+    p = strrchr(base, '/');
+
+    assert(sz > 0);
+    if (path_has_protocol(filename) || path_is_absolute(filename)) {
+        pstrcpy(dest, sz, filename);
+        return;
+    }
+
+    if (p) {
+        pstrcpy(dest, MIN(sz, p - base + 2), base);
+    } else {
+        dest[0] = '\0';
+    }
+    pstrcat(dest, sz, filename);
+}
+
+static BdrvChild *qbm_open_image(BlockDriverState *bs,
+                                 QDict *image, QDict *options,
+                                 Error **errp)
+{
+    BdrvChild *child;
+    const char *filename = qdict_get_try_str(image, "file");
+    const char *fmt = qdict_get_try_str(image, "format");
+    const char *checksum = qdict_get_try_str(image, "checksum");
+    char fullname[PATH_MAX];
+
+    if (!filename) {
+        error_setg(errp, "Image missing 'file' field");
+        return NULL;
+    }
+    if (!fmt) {
+        error_setg(errp, "Image missing 'format' field");
+        return NULL;
+    }
+    qbm_get_fullname(bs, fullname, sizeof(fullname), filename);
+    qdict_put(options, "image.driver", qstring_from_str(fmt));
+    child = bdrv_open_child(fullname, options, "image", bs, &child_file, false,
+                            errp);
+    if (!child) {
+        goto out;
+    }
+    if (checksum) {
+        /* TODO: compare checksum when we support this */
+        error_setg(errp, "Checksum not supported");
+    }
+out:
+    return child;
+}
+
+/* Open and load the persistent bitmap and return the created QBMBitmap object.
+ * If reuse_bitmap is not NULL, we skip bdrv_create_dirty_bitmap and reuse it.
+ **/
+static QBMBitmap *qbm_open_bitmap(BlockDriverState *bs,
+                                  const char *name,
+                                  const char *filename, int granularity,
+                                  QBMBitmapType type,
+                                  BdrvDirtyBitmap *reuse_bitmap,
+                                  Error **errp)
+{
+    BDRVQBMState *s = bs->opaque;
+    QBMBitmap *bm;
+    BdrvChild *file;
+    BdrvDirtyBitmap *bdrv_bitmap;
+    char *key;
+    QDict *options;
+    char fullname[PATH_MAX];
+    Error *local_err = NULL;
+
+    qbm_get_fullname(bs, fullname, sizeof(fullname), filename);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+    s->bitmaps = g_realloc_n(s->bitmaps, s->num_bitmaps + 1,
+                             sizeof(QBMBitmap));
+
+    /* Create options for the bitmap child BDS */
+    options = qdict_new();
+    key = g_strdup_printf("bitmap-%s.driver", name);
+    qdict_put(options, key, qstring_from_str("raw"));
+    g_free(key);
+
+    /* Open the child as plain "file" */
+    key = g_strdup_printf("bitmap-%s", name);
+    file = bdrv_open_child(fullname, options, key, bs, &child_file, false,
+                           errp);
+    g_free(key);
+    QDECREF(options);
+    if (!file) {
+        return NULL;
+    }
+
+    if (reuse_bitmap) {
+        bdrv_bitmap = reuse_bitmap;
+    } else {
+        bdrv_bitmap = bdrv_create_dirty_bitmap(bs, granularity, name, errp);
+        if (!bdrv_bitmap) {
+            bdrv_unref_child(bs, file);
+            return NULL;
+        }
+        bdrv_dirty_bitmap_set_persistent(bs, bdrv_bitmap, true, true, NULL);
+    }
+    bdrv_create_meta_dirty_bitmap(bdrv_bitmap, BDRV_SECTOR_SIZE);
+
+    bm = &s->bitmaps[s->num_bitmaps++];
+    bm->file = file;
+    bm->name = g_strdup(name);
+    bm->type = type;
+    bm->bitmap = bdrv_bitmap;
+    if (type == QBM_TYPE_ALLOC) {
+        assert(!s->alloc_bitmap);
+        s->alloc_bitmap = bdrv_bitmap;
+        /* Align the request to granularity so the block layer will take care
+         * of RMW for partial writes. */
+        bs->request_alignment = granularity;
+    }
+    return bm;
+}
+
+typedef struct QBMIterState {
+    QDict *options;
+    BlockDriverState *bs;
+    Error *err;
+    bool has_backing;
+} QBMIterState;
+
+static void qbm_bitmap_iter(const char *key, QObject *obj, void *opaque)
+{
+    QDict *dict;
+    const char *filename, *typename;
+    QBMBitmapType type;
+    int granularity;
+    QBMIterState *state = opaque;
+    BDRVQBMState *s = state->bs->opaque;
+    QBMBitmap *bm;
+
+    if (state->err) {
+        return;
+    }
+    dict = qobject_to_qdict(obj);
+    if (!dict) {
+        error_setg(&state->err, "'%s' is not a dicionary", key);
+        return;
+    }
+    filename = qdict_get_try_str(dict, "file");
+    if (!filename) {
+        error_setg(&state->err, "\"file\" is missing in bitmap \"%s\"", key);
+        return;
+    }
+    typename = qdict_get_try_str(dict, "type");
+    if (!typename) {
+        error_setg(&state->err, "\"value\" is missing in bitmap \"%s\"", key);
+        return;
+    } else if (!strcmp(typename, "dirty")) {
+        type = QBM_TYPE_DIRTY;
+    } else if (!strcmp(typename, "allocation")) {
+        QDict *backing_dict = qdict_get_qdict(dict, "backing");
+        type = QBM_TYPE_ALLOC;
+        if (backing_dict) {
+            if (state->has_backing) {
+                error_setg(&state->err, "Multiple backing is not supported");
+                return;
+            }
+            state->has_backing = true;
+            pstrcpy(state->bs->backing_file, PATH_MAX,
+                    qdict_get_try_str(backing_dict, "file"));
+            if (qdict_haskey(backing_dict, "format")) {
+                pstrcpy(state->bs->backing_format,
+                        sizeof(state->bs->backing_format),
+                        qdict_get_try_str(backing_dict, "format"));
+                }
+            s->backing_dict = backing_dict;
+            if (!strlen(state->bs->backing_file)) {
+                error_setg(&state->err, "Backing file name not specified");
+                return;
+            }
+        }
+    } else {
+        error_setg(&state->err, "\"value\" is missing in bitmap \"%s\"", key);
+        return;
+    }
+    granularity = qdict_get_try_int(dict, "granularity-bytes", -1);
+    if (granularity == -1) {
+        error_setg(&state->err, "\"granularity\" is missing in bitmap \"%s\"",
+                   key);
+        return;
+    } else if (granularity & (granularity - 1)) {
+        error_setg(&state->err, "\"granularity\" must be power of two");
+        return;
+    } else if (granularity < 512) {
+        error_setg(&state->err, "\"granularity\" too small");
+        return;
+    }
+
+    bm = qbm_open_bitmap(state->bs, key, filename, granularity,
+                         type, NULL, &state->err);
+    if (!bm) {
+        return;
+    }
+    qbm_load_bitmap(state->bs, bm, &state->err);
+}
+
+static void qbm_release_bitmap(BlockDriverState *bs, QBMBitmap *bm)
+{
+    bdrv_release_meta_dirty_bitmap(bm->bitmap);
+    bdrv_release_dirty_bitmap(bs, bm->bitmap);
+    bdrv_unref_child(bs, bm->file);
+}
+
+static void qbm_release_bitmaps(BlockDriverState *bs)
+{
+    int i;
+    BDRVQBMState *s = bs->opaque;
+
+    for (i = 0; i < s->num_bitmaps; i++) {
+        QBMBitmap *bm = &s->bitmaps[i];
+        bdrv_flush(bm->file->bs);
+        qbm_release_bitmap(bs, bm);
+        g_free(bm->name);
+    }
+}
+
+static int qbm_open_bitmaps(BlockDriverState *bs, QDict *bitmaps,
+                            QDict *options, Error **errp)
+{
+    QBMIterState state = (QBMIterState) {
+        .bs = bs,
+        .options = options,
+    };
+    qdict_iter(bitmaps, qbm_bitmap_iter, &state);
+    if (state.err) {
+        qbm_release_bitmaps(bs);
+        error_propagate(errp, state.err);
+        return -EINVAL;
+    }
+    return 0;
+}
+
+static int qbm_open(BlockDriverState *bs, QDict *options, int flags,
+                    Error **errp)
+{
+    BDRVQBMState *s = bs->opaque;
+    int ret;
+    int64_t len;
+    char *desc;
+    QDict *dict, *image_dict, *bitmaps;
+
+    len = bdrv_getlength(bs->file->bs);
+    if (len > QBM_BUF_SIZE_MAX) {
+        error_setg(errp, "QBM description file too big.");
+        return -ENOMEM;
+    } else if (len < 0) {
+        error_setg(errp, "Failed to get descriptor file size");
+        return len;
+    } else if (!len) {
+        error_setg(errp, "Empty file");
+        return -EINVAL;
+    }
+
+    desc = qemu_blockalign(bs->file->bs, len);
+    ret = bdrv_pread(bs->file->bs, 0, desc, len);
+    if (ret < 0) {
+        goto out;
+    }
+    dict = qobject_to_qdict(qobject_from_json(desc));
+    if (!dict || !qdict_haskey(dict, "QBM")) {
+        error_setg(errp, "Failed to parse json from file");
+        ret = -EINVAL;
+        goto out;
+    }
+    s->desc = qdict_get_qdict(dict, "QBM");
+    if (!s->desc) {
+        error_setg(errp, "Json doesn't have key \"QBM\"");
+        ret = -EINVAL;
+        goto out;
+    }
+    if (qdict_get_try_int(s->desc, "version", -1) != 1) {
+        error_setg(errp, "Invalid version of json file");
+        ret = -EINVAL;
+        goto out;
+    }
+    if (!qdict_haskey(s->desc, "image")) {
+        error_setg(errp, "Key \"image\" not found in json file");
+        ret = -EINVAL;
+        goto out;
+    }
+    image_dict = qdict_get_qdict(s->desc, "image");
+    if (!image_dict) {
+        error_setg(errp, "\"image\" information invalid");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    s->image = qbm_open_image(bs, image_dict, options, errp);
+    if (!s->image) {
+        ret = -EIO;
+        goto out;
+    }
+    bs->total_sectors = bdrv_nb_sectors(s->image->bs);
+    if (bs->total_sectors < 0) {
+        error_setg(errp, "Failed to get image size");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    bitmaps = qdict_get_qdict(s->desc, "bitmaps");
+    if (!bitmaps) {
+        error_setg(errp, "\"bitmaps\" not found");
+        ret = -EINVAL;
+        goto out;
+    }
+
+    ret = qbm_open_bitmaps(bs, bitmaps, options, errp);
+
+out:
+    g_free(desc);
+    return ret;
+}
+
+
+static void qbm_refresh_limits(BlockDriverState *bs, Error **errp)
+{
+    BDRVQBMState *s = bs->opaque;
+    Error *local_err = NULL;
+
+    bdrv_refresh_limits(s->image->bs, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return;
+    }
+    bs->bl.min_mem_alignment = s->image->bs->bl.min_mem_alignment;
+    bs->bl.opt_mem_alignment = s->image->bs->bl.opt_mem_alignment;
+    bs->bl.write_zeroes_alignment = bdrv_get_cluster_size(bs);
+}
+
+static int64_t coroutine_fn qbm_co_get_block_status(BlockDriverState *bs,
+                                                    int64_t sector_num,
+                                                    int nb_sectors,
+                                                    int *pnum,
+                                                    BlockDriverState **file)
+{
+    bool alloc = true;
+    int64_t next;
+    int cluster_sectors;
+    BDRVQBMState *s = bs->opaque;
+    int64_t ret = BDRV_BLOCK_OFFSET_VALID;
+
+    if (!s->alloc_bitmap) {
+        return bdrv_get_block_status(s->image->bs, sector_num, nb_sectors,
+                                     pnum, file);
+    }
+
+    ret |= BDRV_BLOCK_OFFSET_MASK & (sector_num << BDRV_SECTOR_BITS);
+    next = sector_num;
+    cluster_sectors = bdrv_dirty_bitmap_granularity(s->alloc_bitmap)
+                            >> BDRV_SECTOR_BITS;
+    while (next < sector_num + nb_sectors) {
+        if (next == sector_num) {
+            alloc = bdrv_get_dirty(bs, s->alloc_bitmap, next);
+        } else if (bdrv_get_dirty(bs, s->alloc_bitmap, next) != alloc) {
+            break;
+        }
+        next += cluster_sectors - next % cluster_sectors;
+    }
+    *pnum = MIN(next - sector_num, nb_sectors);
+
+    ret |= alloc ? BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED : 0;
+    *file = alloc ? s->image->bs : NULL;
+
+    return ret;
+}
+
+static int qbm_save_desc(BlockDriverState *desc_file, QDict *desc)
+{
+    int ret;
+    const char *str;
+    size_t len;
+    QString *json_str = NULL;
+    QDict *td;
+
+    ret = bdrv_truncate(desc_file, 0);
+    if (ret) {
+        return ret;
+    }
+
+    td = qdict_new();
+    /* Grab an extra reference so it doesn't get freed with td */
+    QINCREF(desc);
+    qdict_put(td, "QBM", desc);
+
+    json_str = qobject_to_json_pretty(QOBJECT(td));
+    str = qstring_get_str(json_str);
+    len = strlen(str);
+    ret = bdrv_pwrite(desc_file, 0, str, len);
+    /* End the json file with a new line, doesn't hurt if it fails. */
+    bdrv_pwrite(desc_file, len, "\n", 1);
+    /* bdrv_pwrite write padding zeros to align to sector, we don't need that
+     * for a text file */
+    bdrv_truncate(desc_file, len + 1);
+    QDECREF(json_str);
+    QDECREF(td);
+    return ret == len ? 0 : -EIO;
+}
+
+static coroutine_fn int qbm_co_readv(BlockDriverState *bs, int64_t sector_num,
+                                     int nb_sectors, QEMUIOVector *qiov)
+{
+    QEMUIOVector local_qiov;
+    BDRVQBMState *s = bs->opaque;
+    BdrvDirtyBitmapIter *iter;
+    int done_sectors = 0;
+    int ret;
+    int64_t next_allocated;
+    int64_t cur_sector = sector_num;
+    int granularity_sectors;
+
+    if (!s->alloc_bitmap) {
+        return bdrv_co_readv(s->image->bs, sector_num, nb_sectors, qiov);
+    }
+    granularity_sectors = bdrv_dirty_bitmap_granularity(s->alloc_bitmap)
+                            >> BDRV_SECTOR_BITS;
+    iter = bdrv_dirty_iter_new(s->alloc_bitmap, sector_num);
+    qemu_iovec_init(&local_qiov, qiov->niov);
+    do {
+        int64_t n;
+        int64_t consective_end;
+        next_allocated = bdrv_dirty_iter_next(iter);
+        if (next_allocated < 0) {
+            next_allocated = sector_num + nb_sectors;
+        } else {
+            next_allocated = MIN(next_allocated, sector_num + nb_sectors);
+        }
+        if (next_allocated > cur_sector) {
+            /* Read [cur_sector, next_allocated) from backing */
+            n = next_allocated - cur_sector;
+            qemu_iovec_reset(&local_qiov);
+            qemu_iovec_concat(&local_qiov, qiov,
+                              done_sectors << BDRV_SECTOR_BITS,
+                              n << BDRV_SECTOR_BITS);
+            ret = bdrv_co_readv(bs->backing->bs, cur_sector, n, &local_qiov);
+            if (ret) {
+                goto out;
+            }
+            done_sectors += n;
+            cur_sector += n;
+            if (done_sectors == nb_sectors) {
+                break;
+            }
+        }
+        consective_end = next_allocated;
+        /* Find consective allocated sectors */
+        while (consective_end < sector_num + nb_sectors) {
+            int64_t next = bdrv_dirty_iter_next(iter);
+            if (next < 0 || next - consective_end > granularity_sectors) {
+                /* No more consective sectors */
+                consective_end += granularity_sectors
+                                  - consective_end % granularity_sectors;
+                break;
+            }
+            consective_end = next;
+        }
+        consective_end = MIN(consective_end, sector_num + nb_sectors);
+        n = consective_end - cur_sector;
+        assert(n > 0);
+        /* Read [cur_sector, consective_end] from image */
+        qemu_iovec_reset(&local_qiov);
+        qemu_iovec_concat(&local_qiov, qiov,
+                          done_sectors << BDRV_SECTOR_BITS,
+                          n << BDRV_SECTOR_BITS);
+        ret = bdrv_co_readv(s->image->bs, cur_sector, n, &local_qiov);
+        if (ret) {
+            goto out;
+        }
+        done_sectors += n;
+        cur_sector += n;
+    } while (done_sectors < nb_sectors);
+out:
+    qemu_iovec_destroy(&local_qiov);
+    bdrv_dirty_iter_free(iter);
+    return ret;
+}
+
+static inline void qbm_check_alignment(BDRVQBMState *s, int64_t sector_num,
+                                       int nb_sectors)
+{
+    if (s->alloc_bitmap) {
+        int cluster_sectors = bdrv_dirty_bitmap_granularity(s->alloc_bitmap)
+                                >> BDRV_SECTOR_BITS;
+        assert(sector_num % cluster_sectors == 0);
+        assert(nb_sectors % cluster_sectors == 0);
+    }
+}
+
+typedef struct {
+    int inflight;
+    Coroutine *co;
+    int ret;
+} QBMBitmapWriteTracker;
+
+typedef struct {
+    QEMUIOVector qiov;
+    uint8_t *buf;
+    QBMBitmapWriteTracker *tracker;
+    BlockDriverState *bs;
+    QBMBitmap *bitmap;
+    int64_t sector_num;
+    int nb_sectors;
+} QBMBitmapWriteData;
+
+static void qbm_write_bitmap_cb(void *opaque, int ret)
+{
+    QBMBitmapWriteData *data = opaque;
+    QBMBitmapWriteTracker *tracker = data->tracker;
+
+    qemu_iovec_destroy(&data->qiov);
+    qemu_vfree(data->buf);
+    if (!ret) {
+        bdrv_dirty_bitmap_reset_meta(data->bs,
+                                     data->bitmap->bitmap,
+                                     data->sector_num, data->nb_sectors);
+    }
+    g_free(data);
+    tracker->ret = tracker->ret ? : ret;
+    if (!--tracker->inflight) {
+        qemu_coroutine_enter(tracker->co, NULL);
+    }
+}
+
+static int qbm_write_bitmap(BlockDriverState *bs, QBMBitmap *bm,
+                            int64_t sector_num, int nb_sectors,
+                            QBMBitmapWriteTracker *tracker)
+{
+    QBMBitmapWriteData *data;
+    int64_t start, end;
+    int64_t file_sector_num;
+    int file_nb_sectors;
+    size_t buf_size;
+    /* Each bit in the bitmap tracks bdrv_dirty_bitmap_granularity(bm->bitmap)
+     * bytes of guest data, so each sector in the bitmap tracks
+     * (bdrv_dirty_bitmap_granularity(bm->bitmap) * BDRV_SECTOR_SIZE *
+     * BITS_PER_BYTE) bytes of guest data, so in sector unit is: */
+    int64_t sectors_per_bitmap_sector =
+        BITS_PER_BYTE * bdrv_dirty_bitmap_granularity(bm->bitmap);
+    int align = MAX(bdrv_dirty_bitmap_serialization_align(bm->bitmap),
+                    sectors_per_bitmap_sector);
+
+    /* The start sector that is being marked dirty. */
+    start = QEMU_ALIGN_DOWN(sector_num, align);
+    /* The end sector that is being marked dirty. */
+    end = MIN(QEMU_ALIGN_UP(sector_num + nb_sectors, align),
+              bs->total_sectors);
+
+    if (!bdrv_dirty_bitmap_get_meta(bs, bm->bitmap, sector_num, nb_sectors)) {
+        return 0;
+    }
+
+    file_sector_num = start / sectors_per_bitmap_sector;
+    buf_size = bdrv_dirty_bitmap_serialization_size(bm->bitmap, start,
+                                                    end - start);
+    buf_size = QEMU_ALIGN_UP(buf_size, BDRV_SECTOR_SIZE);
+    file_nb_sectors = buf_size >> BDRV_SECTOR_BITS;
+
+    data = g_new(QBMBitmapWriteData, 1);
+    data->buf = qemu_blockalign0(bm->file->bs, buf_size);
+    bdrv_dirty_bitmap_serialize_part(bm->bitmap, data->buf, start,
+                                     end - start);
+    qemu_iovec_init(&data->qiov, 1);
+    qemu_iovec_add(&data->qiov, data->buf, buf_size);
+    data->tracker = tracker;
+    data->sector_num = start;
+    data->nb_sectors = end - start;
+    data->bs = bm->file->bs;
+    data->bitmap = bm;
+    bdrv_aio_writev(bm->file->bs, file_sector_num, &data->qiov,
+                    file_nb_sectors, qbm_write_bitmap_cb,
+                    data);
+    return -EINPROGRESS;
+}
+
+static int qbm_write_bitmaps(BlockDriverState *bs, int64_t sector_num,
+                             int nb_sectors)
+{
+    int i;
+    BDRVQBMState *s = bs->opaque;
+    QBMBitmapWriteTracker tracker = (QBMBitmapWriteTracker) {
+        .inflight = 1, /* So that no aio completion will call
+                          qemu_coroutine_enter before we yield. */
+        .co = qemu_coroutine_self(),
+    };
+
+    for (i = 0; i < s->num_bitmaps; i++) {
+        int ret = qbm_write_bitmap(bs, &s->bitmaps[i],
+                                   sector_num, nb_sectors, &tracker);
+        if (ret == -EINPROGRESS) {
+            tracker.inflight++;
+        } else if (ret < 0) {
+            tracker.ret = ret;
+            break;
+        }
+    }
+    tracker.inflight--;
+    if (tracker.inflight) {
+        /* At least one aio in submitted, wait. */
+        qemu_coroutine_yield();
+    }
+    return tracker.ret;
+}
+
+static coroutine_fn int qbm_co_writev(BlockDriverState *bs, int64_t sector_num,
+                                      int nb_sectors, QEMUIOVector *qiov)
+{
+    int ret;
+    BDRVQBMState *s = bs->opaque;
+
+    qbm_check_alignment(s, sector_num, nb_sectors);
+    ret = bdrv_co_writev(s->image->bs, sector_num, nb_sectors, qiov);
+    if (ret) {
+        return ret;
+    }
+    return qbm_write_bitmaps(bs, sector_num, nb_sectors);
+}
+
+static int coroutine_fn qbm_co_write_zeroes(BlockDriverState *bs,
+                                            int64_t sector_num,
+                                            int nb_sectors,
+                                            BdrvRequestFlags flags)
+{
+    int ret;
+    BDRVQBMState *s = bs->opaque;
+
+    qbm_check_alignment(s, sector_num, nb_sectors);
+    ret = bdrv_co_write_zeroes(s->image->bs, sector_num, nb_sectors, flags);
+    if (ret) {
+        return ret;
+    }
+    return qbm_write_bitmaps(bs, sector_num, nb_sectors);
+}
+
+static coroutine_fn int qbm_co_discard(BlockDriverState *bs,
+                                       int64_t sector_num,
+                                       int nb_sectors)
+{
+    int ret;
+    BDRVQBMState *s = bs->opaque;
+
+    ret = bdrv_co_discard(s->image->bs, sector_num, nb_sectors);
+    if (ret) {
+        return ret;
+    }
+    return qbm_write_bitmaps(bs, sector_num, nb_sectors);
+}
+
+static int qbm_make_empty(BlockDriverState *bs)
+{
+    BDRVQBMState *s = bs->opaque;
+    BlockDriverState *image_bs = s->image->bs;
+    int ret = 0;
+
+    if (image_bs->drv->bdrv_make_empty) {
+        ret = image_bs->drv->bdrv_make_empty(s->image->bs);
+        if (ret) {
+            return ret;
+        }
+    } else if (!s->alloc_bitmap) {
+        return -ENOTSUP;
+    }
+    if (s->alloc_bitmap) {
+        int i;
+        bdrv_clear_dirty_bitmap(s->alloc_bitmap, NULL);
+        for (i = 0; i < s->num_bitmaps; i++) {
+            QBMBitmap *bm = &s->bitmaps[i];
+            if (bm->bitmap != s->alloc_bitmap) {
+                continue;
+            }
+            ret = bdrv_write_zeroes(bm->file->bs, 0,
+                                    DIV_ROUND_UP(bdrv_getlength(bm->file->bs),
+                                                 BDRV_SECTOR_SIZE),
+                                    BDRV_REQ_MAY_UNMAP);
+        }
+    }
+    return ret;
+}
+
+/* Create a file with given size, and return the relative path. */
+static char *qbm_create_file(BlockDriverState *bs, const char *name,
+                             const char *ext,
+                             int64_t size, Error **errp)
+{
+    char *filename = NULL;
+    Error *local_err = NULL;
+    char fullname[PATH_MAX];
+    char path[PATH_MAX];
+    char prefix[PATH_MAX];
+    char postfix[PATH_MAX];
+
+    filename_decompose(bs->filename, path, prefix,
+                       postfix, PATH_MAX, &local_err);
+    if (local_err) {
+        error_propagate(errp, local_err);
+        return NULL;
+    }
+    filename = g_strdup_printf("%s-%s.%s%s", prefix, name, ext, postfix);
+    qbm_get_fullname(bs, fullname, sizeof(fullname), filename);
+
+    bdrv_img_create(fullname, "raw", NULL, NULL, NULL, size, 0,
+                    &local_err, true);
+    if (local_err) {
+        g_free(filename);
+        filename = NULL;
+        error_propagate(errp, local_err);
+    }
+    return filename;
+}
+
+static QDict *qbm_create_image_dict(BlockDriverState *bs,
+                                    const char *image_name,
+                                    const char *format,
+                                    Error **errp)
+{
+    QDict *dict;
+    char fullname[PATH_MAX];
+
+    qbm_get_fullname(bs, fullname, sizeof(fullname), image_name);
+    dict = qdict_new();
+    qdict_put(dict, "file", qstring_from_str(image_name));
+    qdict_put(dict, "format", qstring_from_str(format ? : ""));
+    /* TODO: Set checksum when we support it. */
+
+    return dict;
+}
+
+static inline QDict *qbm_make_bitmap_dict(const char *filename,
+                                          int granularity,
+                                          QBMBitmapType type)
+{
+    QDict *d = qdict_new();
+    qdict_put(d, "file", qstring_from_str(filename));
+    qdict_put(d, "granularity-bytes", qint_from_int(granularity));
+    switch (type) {
+    case QBM_TYPE_DIRTY:
+        qdict_put(d, "type", qstring_from_str("dirty"));
+        break;
+    case QBM_TYPE_ALLOC:
+        qdict_put(d, "type", qstring_from_str("allocation"));
+        break;
+    default:
+        abort();
+    }
+    return d;
+}
+
+static QDict *qbm_create_dirty_bitmaps(BlockDriverState *bs,
+                                       uint64_t image_size,
+                                       int granularity,
+                                       int n, Error **errp)
+{
+    int i;
+    QDict *dict = qdict_new();
+    int64_t bitmap_size = DIV_ROUND_UP(image_size, granularity * 
BITS_PER_BYTE);
+
+    for (i = 0; i < n; i++) {
+        char *bitmap_filename;
+        char *key = g_strdup_printf("dirty.%d", i);
+
+        bitmap_filename = qbm_create_file(bs, key, "bitmap", bitmap_size,
+                                          errp);
+        if (!bitmap_filename) {
+            g_free(key);
+            QDECREF(dict);
+            dict = NULL;
+            goto out;
+        }
+        qdict_put(dict, key,
+                  qbm_make_bitmap_dict(bitmap_filename, granularity,
+                                       QBM_TYPE_DIRTY));
+        g_free(key);
+    }
+out:
+    return dict;
+}
+
+static QDict *qbm_create_allocation(BlockDriverState *bs,
+                                    uint64_t image_size,
+                                    int granularity,
+                                    const char *backing_file,
+                                    const char *format,
+                                    Error **errp)
+{
+    char *bitmap_filename;
+    QDict *ret, *backing;
+    int64_t bitmap_size = DIV_ROUND_UP(image_size, granularity * 
BITS_PER_BYTE);
+
+    bitmap_filename = qbm_create_file(bs, "allocation", "bitmap",
+                                      bitmap_size,
+                                      errp);
+    if (!bitmap_filename) {
+        return NULL;
+    }
+
+    ret = qdict_new();
+
+    qdict_put(ret, "file", qstring_from_str(bitmap_filename));
+    if (format) {
+        qdict_put(ret, "format", qstring_from_str(format));
+    }
+    qdict_put(ret, "type", qstring_from_str("allocation"));
+    qdict_put(ret, "granularity-bytes", qint_from_int(granularity));
+
+    backing = qbm_create_image_dict(bs, backing_file, format, errp);
+    if (!backing) {
+        QDECREF(ret);
+        ret = NULL;
+        goto out;
+    }
+    qdict_put(ret, "backing", backing);
+
+out:
+    g_free(bitmap_filename);
+    return ret;
+}
+
+static int qbm_create(const char *filename, QemuOpts *opts, Error **errp)
+{
+    char *backing_file;
+    const char *image_filename;
+    int granularity, dirty_bitmaps;
+    int64_t image_size;
+    int ret;
+    QDict *dict = NULL, *bitmaps, *image;
+    BlockDriverState *bs = NULL, *image_bs = NULL;
+    char fullname[PATH_MAX];
+
+    ret = bdrv_create_file(filename, NULL, errp);
+    if (ret) {
+        return ret;
+    }
+    ret = bdrv_open(&bs, filename, NULL, NULL,
+                    BDRV_O_RDWR | BDRV_O_PROTOCOL, errp);
+    if (ret) {
+        return ret;
+    }
+
+    image_filename = qemu_opt_get_del(opts, "image");
+    if (!image_filename) {
+        /* Try to create one */
+        int64_t size = qemu_opt_get_size_del(opts, "size", -1);
+        if (size == -1) {
+            error_setg(errp, "Invalid size specified for data image");
+            ret = -EINVAL;
+            goto out;
+        }
+        image_filename = qbm_create_file(bs, "data", "img", size, errp);
+        if (!image_filename) {
+            ret = -EIO;
+            goto out;
+        }
+    }
+
+    granularity = qemu_opt_get_number(opts, "granularity", 65536);
+    dirty_bitmaps = qemu_opt_get_number(opts, "dirty-bitmaps", 0);
+
+    qbm_get_fullname(bs, fullname, sizeof(fullname), image_filename);
+    ret = bdrv_open(&image_bs, fullname, NULL, NULL, 0, errp);
+    if (ret) {
+        goto out;
+    }
+    image_size = bdrv_getlength(image_bs);
+
+    dict = qdict_new();
+    bitmaps = qbm_create_dirty_bitmaps(bs, image_size, granularity,
+                                       dirty_bitmaps, errp);
+    image = qbm_create_image_dict(bs, image_filename,
+                                  bdrv_get_format_name(image_bs), errp);
+    if (!image) {
+        goto out;
+    }
+
+    qdict_put(dict, "version", qint_from_int(1));
+    qdict_put(dict, "creator", qstring_from_str("QEMU"));
+    qdict_put(dict, "bitmaps", bitmaps);
+    qdict_put(dict, "image", image);
+
+    backing_file = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FILE);
+    if (backing_file) {
+        char *backing_fmt = qemu_opt_get_del(opts, BLOCK_OPT_BACKING_FMT);
+        QDict *alloc = qbm_create_allocation(bs, image_size,
+                                             granularity, backing_file,
+                                             backing_fmt, errp);
+        if (!alloc) {
+            ret = -EIO;
+            goto out;
+        }
+        /* Create "allocation" bitmap. */
+        qdict_put(bitmaps, "allocation", alloc);
+        g_free(backing_file);
+        backing_file = NULL;
+        g_free(backing_fmt);
+    }
+
+    ret = qbm_save_desc(bs, dict);
+
+out:
+    bdrv_unref(image_bs);
+    bdrv_unref(bs);
+    QDECREF(dict);
+    return ret;
+}
+
+static int64_t qbm_getlength(BlockDriverState *bs)
+{
+    BDRVQBMState *s = bs->opaque;
+    return bdrv_getlength(s->image->bs);
+}
+
+static void qbm_close(BlockDriverState *bs)
+{
+    BDRVQBMState *s = bs->opaque;
+
+    qbm_release_bitmaps(bs);
+    bdrv_unref(s->image->bs);
+    g_free(s->bitmaps);
+    QDECREF(s->desc);
+}
+
+static int qbm_truncate(BlockDriverState *bs, int64_t offset)
+{
+    BDRVQBMState *s = bs->opaque;
+    /* Truncate the image only, the bitmaps's sizes will be made correct when
+     * saving. */
+    return bdrv_truncate(s->image->bs, offset);
+}
+
+static coroutine_fn int qbm_co_flush(BlockDriverState *bs)
+{
+    int ret;
+    int i;
+    BDRVQBMState *s = bs->opaque;
+
+    ret = bdrv_flush(s->image->bs);
+    for (i = 0; ret >= 0 && i < s->num_bitmaps; i++) {
+        ret = bdrv_flush(s->bitmaps[i].file->bs);
+    }
+    return ret;
+}
+
+static int qbm_change_backing_file(BlockDriverState *bs,
+                                   const char *backing_file,
+                                   const char *backing_fmt)
+{
+    BDRVQBMState *s = bs->opaque;
+    if (!s->backing_dict) {
+        return -ENOTSUP;
+    }
+    if (backing_file) {
+        qdict_put(s->backing_dict, "file", qstring_from_str(backing_file));
+        qdict_put(s->backing_dict, "format",
+                  qstring_from_str(backing_fmt ? : ""));
+    } else {
+        int i;
+        QDict *bitmaps = qdict_get_qdict(s->desc, "bitmaps");
+
+        assert(bitmaps);
+        if (!qdict_haskey(bitmaps, "allocation")) {
+            return 0;
+        }
+        qdict_del(bitmaps, "allocation");
+        for (i = 0; i < s->num_bitmaps; i++) {
+            if (s->bitmaps[i].type == QBM_TYPE_ALLOC) {
+                qbm_release_bitmap(bs, &s->bitmaps[i]);
+                s->bitmaps[i] = s->bitmaps[--s->num_bitmaps];
+                break;
+            }
+        }
+        s->alloc_bitmap = NULL;
+        s->backing_dict = NULL;
+    }
+    return qbm_save_desc(bs->file->bs, s->desc);
+}
+
+static int64_t qbm_get_allocated_file_size(BlockDriverState *bs)
+{
+    BDRVQBMState *s = bs->opaque;
+    /* Take the file sizes of descriptor and bitmap files into account? */
+    return bdrv_get_allocated_file_size(s->image->bs);
+}
+
+static int qbm_has_zero_init(BlockDriverState *bs)
+{
+    return 1;
+}
+
+static int qbm_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
+{
+    BDRVQBMState *s = bs->opaque;
+
+    bdi->unallocated_blocks_are_zero = true;
+    bdi->can_write_zeroes_with_unmap = true;
+    if (s->alloc_bitmap) {
+        bdi->cluster_size = bdrv_dirty_bitmap_granularity(s->alloc_bitmap);
+    } else {
+        bdi->cluster_size = bdrv_get_cluster_size(s->image->bs);
+    }
+    return 0;
+}
+
+static int qbm_check(BlockDriverState *bs, BdrvCheckResult *result,
+                     BdrvCheckMode fix)
+{
+    /* TODO: checksum verification and bitmap size checks? */
+    return 0;
+}
+
+static void qbm_detach_aio_context(BlockDriverState *bs)
+{
+    int i;
+    BDRVQBMState *s = bs->opaque;
+
+    bdrv_detach_aio_context(s->image->bs);
+    for (i = 0; i < s->num_bitmaps; i++) {
+        bdrv_detach_aio_context(s->bitmaps[i].file->bs);
+    }
+}
+
+static void qbm_attach_aio_context(BlockDriverState *bs,
+                                   AioContext *new_context)
+{
+    int i;
+    BDRVQBMState *s = bs->opaque;
+
+    bdrv_attach_aio_context(s->image->bs, new_context);
+    for (i = 0; i < s->num_bitmaps; i++) {
+        bdrv_attach_aio_context(s->bitmaps[i].file->bs, new_context);
+    }
+}
+
+static int qbm_bitmap_set_persistent(BlockDriverState *bs,
+                                     BdrvDirtyBitmap *bitmap,
+                                     bool persistent, Error **errp)
+{
+    BDRVQBMState *s = bs->opaque;
+    int ret = 0;
+    QBMBitmap *bm;
+    char *filename;
+    const char *name = bdrv_dirty_bitmap_name(bitmap);
+    int granularity = bdrv_dirty_bitmap_granularity(bitmap);
+    QDict *bitmaps = qdict_get_qdict(s->desc, "bitmaps");
+
+    if (persistent) {
+        filename = qbm_create_file(bs, name, "bin",
+                                   bdrv_dirty_bitmap_size(bitmap), errp);
+        if (!filename) {
+            return -EIO;
+        }
+
+        bm = qbm_open_bitmap(bs, name, filename, granularity,
+                             QBM_TYPE_DIRTY, bitmap, errp);
+        if (!bm) {
+            ret = -EIO;
+        }
+        qdict_put(bitmaps, name, qbm_make_bitmap_dict(filename, granularity,
+                                                      QBM_TYPE_DIRTY));
+        g_free(filename);
+    } else {
+        if (!qdict_haskey(bitmaps, name)) {
+            error_setg(errp, "No persistent bitmap with name '%s'", name);
+            return -ENOENT;
+        }
+        qdict_del(bitmaps, name);
+    }
+    ret = qbm_save_desc(bs->file->bs, s->desc);
+    if (ret) {
+        error_setg(errp, "Failed to save json description to file");
+    }
+    return ret;
+}
+
+static QemuOptsList qbm_create_opts = {
+    .name = "qbm-create-opts",
+    .head = QTAILQ_HEAD_INITIALIZER(qbm_create_opts.head),
+    .desc = {
+        {
+            .name = BLOCK_OPT_SIZE,
+            .type = QEMU_OPT_SIZE,
+            .help = "Virtual disk size"
+        },
+        {
+            .name = "image",
+            .type = QEMU_OPT_STRING,
+            .help = "The file name of the referenced image, if not specified, "
+                    "one will be created automatically",
+        },
+        {
+            .name = BLOCK_OPT_BACKING_FILE,
+            .type = QEMU_OPT_STRING,
+            .help = "File name of a base image"
+        },
+        {
+            .name = BLOCK_OPT_BACKING_FMT,
+            .type = QEMU_OPT_STRING,
+            .help = "Image format of the base image"
+        },
+        {
+            .name = "granularity",
+            .type = QEMU_OPT_NUMBER,
+            .help = "Bitmap granularity in bytes"
+        },
+        {
+            .name = "dirty-bitmaps",
+            .type = QEMU_OPT_NUMBER,
+            .help = "The number of dirty bitmaps to create"
+        },
+        { /* end of list */ }
+    }
+};
+
+static BlockDriver bdrv_qbm = {
+    .format_name                  = "qbm",
+    .protocol_name                = "qbm",
+    .instance_size                = sizeof(BDRVQBMState),
+    .bdrv_probe                   = qbm_probe,
+    .bdrv_open                    = qbm_open,
+    .bdrv_reopen_prepare          = qbm_reopen_prepare,
+    .bdrv_co_readv                = qbm_co_readv,
+    .bdrv_co_writev               = qbm_co_writev,
+    .bdrv_co_write_zeroes         = qbm_co_write_zeroes,
+    .bdrv_co_discard              = qbm_co_discard,
+    .bdrv_make_empty              = qbm_make_empty,
+    .bdrv_close                   = qbm_close,
+    .bdrv_getlength               = qbm_getlength,
+    .bdrv_create                  = qbm_create,
+    .bdrv_co_flush_to_disk        = qbm_co_flush,
+    .bdrv_truncate                = qbm_truncate,
+    .bdrv_co_get_block_status     = qbm_co_get_block_status,
+    .bdrv_get_allocated_file_size = qbm_get_allocated_file_size,
+    .bdrv_has_zero_init           = qbm_has_zero_init,
+    .bdrv_refresh_limits          = qbm_refresh_limits,
+    .bdrv_get_info                = qbm_get_info,
+    .bdrv_check                   = qbm_check,
+    .bdrv_detach_aio_context      = qbm_detach_aio_context,
+    .bdrv_attach_aio_context      = qbm_attach_aio_context,
+    .bdrv_dirty_bitmap_set_persistent
+                                  = qbm_bitmap_set_persistent,
+    .bdrv_change_backing_file     = qbm_change_backing_file,
+    .supports_backing             = true,
+    .create_opts                  = &qbm_create_opts,
+};
+
+static void bdrv_qbm_init(void)
+{
+    bdrv_register(&bdrv_qbm);
+}
+
+block_init(bdrv_qbm_init);
-- 
2.4.3


Reply via email to