Fleecing-hook filter does copy-before-write operation. It should be
inserted above active disk and has a target node for CBW, like the
following:

    +-------+
    | Guest |
    +---+---+
        |r,w
        v
    +---+-----------+  target   +---------------+
    | Fleecing hook |---------->| target(qcow2) |
    +---+-----------+   CBW     +---+-----------+
        |                           |
backing |r,w                        |
        v                           |
    +---+---------+      backing    |
    | Active disk |<----------------+
    +-------------+        r

Target's backing may point to active disk (should be set up
separately), which gives fleecing-scheme.

Signed-off-by: Vladimir Sementsov-Ogievskiy <vsement...@virtuozzo.com>
---
 qapi/block-core.json  |  22 +++-
 block/fleecing-hook.c | 298 ++++++++++++++++++++++++++++++++++++++++++
 block/Makefile.objs   |   2 +
 3 files changed, 320 insertions(+), 2 deletions(-)
 create mode 100644 block/fleecing-hook.c

diff --git a/qapi/block-core.json b/qapi/block-core.json
index c4774af18e..13cf90eab6 100644
--- a/qapi/block-core.json
+++ b/qapi/block-core.json
@@ -2628,7 +2628,8 @@
             'host_cdrom', 'host_device', 'http', 'https', 'iscsi', 'luks',
             'nbd', 'nfs', 'null-aio', 'null-co', 'nvme', 'parallels', 'qcow',
             'qcow2', 'qed', 'quorum', 'raw', 'rbd', 'replication', 'sheepdog',
-            'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs' ] 
}
+            'ssh', 'throttle', 'vdi', 'vhdx', 'vmdk', 'vpc', 'vvfat', 'vxhs',
+            'fleecing-hook'] }
 
 ##
 # @BlockdevOptionsFile:
@@ -2719,6 +2720,22 @@
 { 'struct': 'BlockdevOptionsGenericFormat',
   'data': { 'file': 'BlockdevRef' } }
 
+##
+# @BlockdevOptionsFleecingHook:
+#
+# Driver specific block device options for image format that have no option
+# besides their data source.
+#
+# @append-to:        reference to or definition of the data source block device
+# @target:        reference to or definition of the data source block device
+# @copy-bitmap:   name for the copy-bitmap of the process. May be shared TODO: 
normal description here
+#
+# Since: 2.9
+##
+  { 'struct': 'BlockdevOptionsFleecingHook',
+    'data': { 'append-to': 'str', 'target': 'BlockdevRef',
+              '*copy-bitmap': 'str'} }
+
 ##
 # @BlockdevOptionsLUKS:
 #
@@ -3718,7 +3735,8 @@
       'vmdk':       'BlockdevOptionsGenericCOWFormat',
       'vpc':        'BlockdevOptionsGenericFormat',
       'vvfat':      'BlockdevOptionsVVFAT',
-      'vxhs':       'BlockdevOptionsVxHS'
+      'vxhs':       'BlockdevOptionsVxHS',
+      'fleecing-hook': 'BlockdevOptionsFleecingHook'
   } }
 
 ##
diff --git a/block/fleecing-hook.c b/block/fleecing-hook.c
new file mode 100644
index 0000000000..f4e2f3ce83
--- /dev/null
+++ b/block/fleecing-hook.c
@@ -0,0 +1,298 @@
+/*
+ * Fleecing Hook filter driver
+ *
+ * The driver performs Copy-Before-Write (CBW) operation: it is injected above
+ * some node, and before each write it copies _old_ data to the target node.
+ *
+ * Copyright (c) 2018 Virtuozzo International GmbH. All rights reserved.
+ *
+ * Author:
+ *  Sementsov-Ogievskiy Vladimir <vsement...@virtuozzo.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "qemu/osdep.h"
+#include "qemu/cutils.h"
+#include "qapi/error.h"
+#include "block/block_int.h"
+#include "block/qdict.h"
+
+typedef struct BDRVFleecingHookState {
+    BdrvDirtyBitmap *cbw_bitmap; /* what should be copied to @target
+                                    on guest write. */
+    BdrvChild *target;
+    bool cbw_bitmap_created;
+} BDRVFleecingHookState;
+
+static coroutine_fn int fleecing_hook_co_preadv(
+        BlockDriverState *bs, uint64_t offset, uint64_t bytes,
+        QEMUIOVector *qiov, int flags)
+{
+    /* Features to be implemented:
+     * F1. COR. save read data to fleecing target for fast access
+     *     (to reduce reads). This possibly may be done with use of 
copy-on-read
+     *     filter, but we need an ability to make COR requests optional: for
+     *     example, if target is a ram-cache, and if it is full now, we should
+     *     skip doing COR request, as it is actually not necessary.
+     *
+     * F2. Feature for guest: read from fleecing target if data is in ram-cache
+     *     and is unchanged
+     */
+
+    return bdrv_co_preadv(bs->backing, offset, bytes, qiov, flags);
+}
+
+static coroutine_fn int fleecing_hook_cbw(BlockDriverState *bs, uint64_t 
offset,
+                                          uint64_t bytes)
+{
+    int ret = 0;
+    BDRVFleecingHookState *s = bs->opaque;
+    uint64_t gran = bdrv_dirty_bitmap_granularity(s->cbw_bitmap);
+    uint64_t end = QEMU_ALIGN_UP(offset + bytes, gran);
+    uint64_t off = QEMU_ALIGN_DOWN(offset, gran), len;
+    size_t align = MAX(bdrv_opt_mem_align(bs->backing->bs),
+                       bdrv_opt_mem_align(s->target->bs));
+    struct iovec iov = {
+        .iov_base = qemu_memalign(align, end - off),
+        .iov_len = end - off
+    };
+    QEMUIOVector qiov;
+
+    qemu_iovec_init_external(&qiov, &iov, 1);
+
+    /* Features to be implemented:
+     * F3. parallelize copying loop
+     * F4. detect zeros
+     * F5. use block_status ?
+     * F6. don't copy clusters which are already cached by COR [see F1]
+     */
+
+    len = end - off;
+    while (bdrv_dirty_bitmap_next_dirty_area(s->cbw_bitmap, &off, &len)) {
+        iov.iov_len = qiov.size = len;
+
+        bdrv_reset_dirty_bitmap(s->cbw_bitmap, off, len);
+
+        ret = bdrv_co_preadv(bs->backing, off, len, &qiov,
+                             BDRV_REQ_NO_SERIALISING);
+        if (ret < 0) {
+            bdrv_set_dirty_bitmap(s->cbw_bitmap, off, len);
+            goto finish;
+        }
+
+        ret = bdrv_co_pwritev(s->target, off, len, &qiov, 
BDRV_REQ_SERIALISING);
+        if (ret < 0) {
+            bdrv_set_dirty_bitmap(s->cbw_bitmap, off, len);
+            goto finish;
+        }
+
+        off += len;
+        if (off >= end) {
+            break;
+        }
+        len = end - off;
+    }
+
+finish:
+    qemu_vfree(iov.iov_base);
+
+    return ret;
+}
+
+static int coroutine_fn fleecing_hook_co_pdiscard(BlockDriverState *bs,
+                                                  int64_t offset, int bytes)
+{
+    int ret = fleecing_hook_cbw(bs, offset, bytes);
+    if (ret < 0) {
+        return ret;
+    }
+
+    /* Features to be implemented:
+     * F7. possibility of lazy discard: just defer the discard after fleecing
+     *     completion. If write (or new discard) occurs to the same area, just
+     *     drop deferred discard.
+     */
+
+    return bdrv_co_pdiscard(bs->backing, offset, bytes);
+}
+
+static int coroutine_fn fleecing_hook_co_pwrite_zeroes(BlockDriverState *bs,
+        int64_t offset, int bytes, BdrvRequestFlags flags)
+{
+    int ret = fleecing_hook_cbw(bs, offset, bytes);
+    if (ret < 0) {
+        /* F8. Additional option to break fleecing instead of breaking guest
+         * write here */
+        return ret;
+    }
+
+    return bdrv_co_pwrite_zeroes(bs->backing, offset, bytes, flags);
+}
+
+static coroutine_fn int fleecing_hook_co_pwritev(BlockDriverState *bs,
+                                                 uint64_t offset,
+                                                 uint64_t bytes,
+                                                 QEMUIOVector *qiov, int flags)
+{
+    int ret = fleecing_hook_cbw(bs, offset, bytes);
+    if (ret < 0) {
+        return ret;
+    }
+
+    return bdrv_co_pwritev(bs->backing, offset, bytes, qiov, flags);
+}
+
+static int coroutine_fn fleecing_hook_co_flush(BlockDriverState *bs)
+{
+    if (!bs->backing) {
+        return 0;
+    }
+
+    return bdrv_co_flush(bs->backing->bs);
+}
+
+static void fleecing_hook_refresh_filename(BlockDriverState *bs, QDict *opts)
+{
+    if (bs->backing == NULL) {
+        /* we can be here after failed bdrv_attach_child in
+         * bdrv_set_backing_hd */
+        return;
+    }
+    bdrv_refresh_filename(bs->backing->bs);
+    pstrcpy(bs->exact_filename, sizeof(bs->exact_filename),
+            bs->backing->bs->filename);
+}
+
+static void fleecing_hook_child_perm(BlockDriverState *bs, BdrvChild *c,
+                                       const BdrvChildRole *role,
+                                       BlockReopenQueue *reopen_queue,
+                                       uint64_t perm, uint64_t shared,
+                                       uint64_t *nperm, uint64_t *nshared)
+{
+    bdrv_filter_default_perms(bs, c, role, reopen_queue, perm, shared, nperm,
+                              nshared);
+
+    if (role == &child_file) {
+        /* share write to target, to not interfere guest writes to it's disk
+         * which will be in target backing chain */
+        *nshared = *nshared | BLK_PERM_WRITE;
+    }
+}
+
+static int fleecing_hook_open(BlockDriverState *bs, QDict *options, int flags,
+                              Error **errp)
+{
+    BDRVFleecingHookState *s = bs->opaque;
+    Error *local_err = NULL;
+    const char *append_to, *copy_bitmap_name;
+    BlockDriverState *backing_bs;
+
+    append_to = qdict_get_str(options, "append-to");
+    qdict_del(options, "append-to");
+    backing_bs = bdrv_lookup_bs(append_to, append_to, errp);
+    if (!backing_bs) {
+        return -EINVAL;
+    }
+
+    bs->total_sectors = backing_bs->total_sectors;
+
+    copy_bitmap_name = qdict_get_try_str(options, "copy-bitmap");
+    if (copy_bitmap_name) {
+        qdict_del(options, "copy-bitmap");
+        s->cbw_bitmap = bdrv_find_dirty_bitmap(backing_bs, copy_bitmap_name);
+    }
+
+    if (!s->cbw_bitmap) {
+        s->cbw_bitmap = bdrv_create_dirty_bitmap(bs, 65536, copy_bitmap_name,
+                                                 errp);
+        if (!s->cbw_bitmap) {
+            return -EINVAL;
+        }
+        s->cbw_bitmap_created = true;
+    }
+
+    bdrv_disable_dirty_bitmap(s->cbw_bitmap);
+    bdrv_set_dirty_bitmap(s->cbw_bitmap, 0, bdrv_getlength(backing_bs));
+
+    s->target = bdrv_open_child(NULL, options, "target", bs, &child_file,
+                               false, errp);
+    if (!s->target) {
+        return -EINVAL;
+    }
+
+    bdrv_set_aio_context(bs, bdrv_get_aio_context(backing_bs));
+    bdrv_set_aio_context(s->target->bs, bdrv_get_aio_context(backing_bs));
+
+    bdrv_drained_begin(backing_bs);
+
+    bdrv_ref(bs);
+    bdrv_append(bs, backing_bs, &local_err);
+
+    if (local_err) {
+        bdrv_unref(bs);
+    }
+
+    bdrv_drained_end(backing_bs);
+
+    if (local_err) {
+        bdrv_unref_child(bs, s->target);
+        error_propagate(errp, local_err);
+        return -EINVAL;
+    }
+
+    return 0;
+}
+
+static void fleecing_hook_close(BlockDriverState *bs)
+{
+    BDRVFleecingHookState *s = bs->opaque;
+
+    if (s->cbw_bitmap && s->cbw_bitmap_created) {
+        bdrv_release_dirty_bitmap(bs, s->cbw_bitmap);
+    }
+
+    if (s->target) {
+        bdrv_unref_child(bs, s->target);
+    }
+}
+
+BlockDriver bdrv_fleecing_hook_filter = {
+    .format_name = "fleecing-hook",
+    .instance_size = sizeof(BDRVFleecingHookState),
+
+    .bdrv_co_preadv             = fleecing_hook_co_preadv,
+    .bdrv_co_pwritev            = fleecing_hook_co_pwritev,
+    .bdrv_co_pwrite_zeroes      = fleecing_hook_co_pwrite_zeroes,
+    .bdrv_co_pdiscard           = fleecing_hook_co_pdiscard,
+    .bdrv_co_flush              = fleecing_hook_co_flush,
+
+    .bdrv_co_block_status       = bdrv_co_block_status_from_backing,
+
+    .bdrv_refresh_filename      = fleecing_hook_refresh_filename,
+
+    .bdrv_open                  = fleecing_hook_open,
+    .bdrv_close                 = fleecing_hook_close,
+
+    .bdrv_child_perm            = fleecing_hook_child_perm,
+
+    .is_filter = true,
+};
+
+static void bdrv_fleecing_hook_init(void)
+{
+    bdrv_register(&bdrv_fleecing_hook_filter);
+}
+
+block_init(bdrv_fleecing_hook_init);
diff --git a/block/Makefile.objs b/block/Makefile.objs
index c8337bf186..081720b14f 100644
--- a/block/Makefile.objs
+++ b/block/Makefile.objs
@@ -31,6 +31,8 @@ block-obj-y += throttle.o copy-on-read.o
 
 block-obj-y += crypto.o
 
+block-obj-y += fleecing-hook.o
+
 common-obj-y += stream.o
 
 nfs.o-libs         := $(LIBNFS_LIBS)
-- 
2.18.0


Reply via email to